[zbin] Use LZMA compression

LZMA provides significantly better compression (by ~15%) than the current NRV2B algorithm. We use a raw LZMA stream (aka LZMA1) to avoid the need for code to parse the LZMA2 block headers. We use parameters {lc=2,lp=0,pb=0} to reduce the stack space required by the decompressor to acceptable levels (around 8kB). Using lc=3 or pb=2 would give marginally better compression, but at the cost of substantially increasing the required stack space. The build process now requires the liblzma headers to be present on the build system, since we do not include a copy of an LZMA compressor within the iPXE source tree. The decompressor is written from scratch (based on XZ Embedded) and is entirely self-contained within the iPXE source. The branch-call-jump (BCJ) filter used to improve the compressibility is specific to iPXE. We choose not to use liblzma's built-in BCJ filter since the algorithm is complex and undocumented. Our BCJ filter achieves approximately the same results (on typical iPXE binaries) with a substantially simpler algorithm. Signed-off-by: Michael Brown <mcb30@ipxe.org>
author: Michael Brown 2015-02-21 15:29:53 +0100
committer: Michael Brown 2015-02-25 15:06:13 +0100
commit: 5350b65a3ce727f20e9d9fa7b9c2c0af52cfb7bd (patch)
tree: da203cb65fc868590ca5395ae1795aabe88d3c85 /src/util
parent: [prefix] Use .bss16 as temporary stack space for calls to install_block (diff)
download: ipxe-5350b65a3ce727f20e9d9fa7b9c2c0af52cfb7bd.tar.gz
ipxe-5350b65a3ce727f20e9d9fa7b9c2c0af52cfb7bd.tar.xz
ipxe-5350b65a3ce727f20e9d9fa7b9c2c0af52cfb7bd.zip
1 files changed, 86 insertions, 10 deletions
diff --git a/src/util/zbin.c b/src/util/zbin.c
index 3b7cf95b..1862a382 100644
--- a/src/util/zbin.c
+++ b/src/util/zbin.c
@@ -1,13 +1,21 @@
+#include <stdint.h>
 #include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
 #include <sys/stat.h>
-
-#define ENCODE
-#define VERBOSE
-#include "nrv2b.c"
-FILE *infile, *outfile;
+#include <lzma.h>
 
 #define DEBUG 0
 
+/* LZMA filter choices.  Must match those used by unlzma.S */
+#define LZMA_LC 2
+#define LZMA_LP 0
+#define LZMA_PB 0
+
+/* LZMA preset choice.  This is a policy decision */
+#define LZMA_PRESET ( LZMA_PRESET_DEFAULT | LZMA_PRESET_EXTREME )
+
 struct input_file {
 	void *buf;
 	size_t len;
@@ -177,13 +185,75 @@ static int process_zinfo_copy ( struct input_file *input,
 	return 0;
 }
 
+#define OPCODE_CALL 0xe8
+#define OPCODE_JMP 0xe9
+
+static void bcj_filter ( void *data, size_t len ) {
+	struct {
+		uint8_t opcode;
+		int32_t target;
+	} __attribute__ (( packed )) *jump;
+	ssize_t limit = ( len - sizeof ( *jump ) );
+	ssize_t offset;
+
+	/* liblzma does include an x86 BCJ filter, but it's hideously
+	 * convoluted and undocumented.  This BCJ filter is
+	 * substantially simpler and achieves the same compression (at
+	 * the cost of requiring the decompressor to know the size of
+	 * the decompressed data, which we already have in iPXE).
+	 */
+	for ( offset = 0 ; offset <= limit ; offset++ ) {
+		jump = ( data + offset );
+
+		/* Skip instructions that are not followed by a rel32 address */
+		if ( ( jump->opcode != OPCODE_CALL ) &&
+		     ( jump->opcode != OPCODE_JMP ) )
+			continue;
+
+		/* Convert rel32 address to an absolute address.  To
+		 * avoid false positives (which damage the compression
+		 * ratio), we should check that the jump target is
+		 * within the range [0,limit).
+		 *
+		 * Some output values would then end up being mapped
+		 * from two distinct input values, making the
+		 * transformation irreversible.  To solve this, we
+		 * transform such values back into the part of the
+		 * range which would otherwise correspond to no input
+		 * values.
+		 */
+		if ( ( jump->target >= -offset ) &&
+		     ( jump->target < ( limit - offset ) ) ) {
+			/* Convert relative addresses in the range
+			 * [-offset,limit-offset) to absolute
+			 * addresses in the range [0,limit).
+			 */
+			jump->target += offset;
+		} else if ( ( jump->target >= ( limit - offset ) ) &&
+			    ( jump->target < limit ) ) {
+			/* Convert positive numbers in the range
+			 * [limit-offset,limit) to negative numbers in
+			 * the range [-offset,0).
+			 */
+			jump->target -= limit;
+		}
+		offset += sizeof ( jump->target );
+	};
+}
+
 static int process_zinfo_pack ( struct input_file *input,
 				struct output_file *output,
 				union zinfo_record *zinfo ) {
 	struct zinfo_pack *pack = &zinfo->pack;
 	size_t offset = pack->offset;
 	size_t len = pack->len;
-	unsigned long packed_len;
+	size_t packed_len = 0;
+	size_t remaining = ( output->max_len - output->len );
+	lzma_options_lzma options;
+	const lzma_filter filters[] = {
+		{ .id = LZMA_FILTER_LZMA1, .options = &options },
+		{ .id = LZMA_VLI_UNKNOWN }
+	};
 
 	if ( ( offset + len ) > input->len ) {
 		fprintf ( stderr, "Input buffer overrun on pack\n" );
@@ -196,9 +266,15 @@ static int process_zinfo_pack ( struct input_file *input,
 		return -1;
 	}
 
-	if ( ucl_nrv2b_99_compress ( ( input->buf + offset ), len,
-				     ( output->buf + output->len ),
-				     &packed_len, 0 ) != UCL_E_OK ) {
+	bcj_filter ( ( input->buf + offset ), len );
+
+	lzma_lzma_preset ( &options, LZMA_PRESET );
+	options.lc = LZMA_LC;
+	options.lp = LZMA_LP;
+	options.pb = LZMA_PB;
+	if ( lzma_raw_buffer_encode ( filters, NULL, ( input->buf + offset ),
+				      len, ( output->buf + output->len ),
+				      &packed_len, remaining ) != LZMA_OK ) {
 		fprintf ( stderr, "Compression failure\n" );
 		return -1;
 	}
@@ -206,7 +282,7 @@ static int process_zinfo_pack ( struct input_file *input,
 	if ( DEBUG ) {
 		fprintf ( stderr, "PACK [%#zx,%#zx) to [%#zx,%#zx)\n",
 			  offset, ( offset + len ), output->len,
-			  ( size_t )( output->len + packed_len ) );
+			  ( output->len + packed_len ) );
 	}
 
 	output->len += packed_len;
author	Michael Brown	2015-02-21 15:29:53 +0100
committer	Michael Brown	2015-02-25 15:06:13 +0100
commit	5350b65a3ce727f20e9d9fa7b9c2c0af52cfb7bd (patch)
tree	da203cb65fc868590ca5395ae1795aabe88d3c85 /src/util
parent	[prefix] Use .bss16 as temporary stack space for calls to install_block (diff)
download	ipxe-5350b65a3ce727f20e9d9fa7b9c2c0af52cfb7bd.tar.gz ipxe-5350b65a3ce727f20e9d9fa7b9c2c0af52cfb7bd.tar.xz ipxe-5350b65a3ce727f20e9d9fa7b9c2c0af52cfb7bd.zip