summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorMichael Brown2015-02-21 15:29:53 +0100
committerMichael Brown2015-02-25 15:06:13 +0100
commit5350b65a3ce727f20e9d9fa7b9c2c0af52cfb7bd (patch)
treeda203cb65fc868590ca5395ae1795aabe88d3c85 /src
parent[prefix] Use .bss16 as temporary stack space for calls to install_block (diff)
downloadipxe-5350b65a3ce727f20e9d9fa7b9c2c0af52cfb7bd.tar.gz
ipxe-5350b65a3ce727f20e9d9fa7b9c2c0af52cfb7bd.tar.xz
ipxe-5350b65a3ce727f20e9d9fa7b9c2c0af52cfb7bd.zip
[zbin] Use LZMA compression
LZMA provides significantly better compression (by ~15%) than the current NRV2B algorithm. We use a raw LZMA stream (aka LZMA1) to avoid the need for code to parse the LZMA2 block headers. We use parameters {lc=2,lp=0,pb=0} to reduce the stack space required by the decompressor to acceptable levels (around 8kB). Using lc=3 or pb=2 would give marginally better compression, but at the cost of substantially increasing the required stack space. The build process now requires the liblzma headers to be present on the build system, since we do not include a copy of an LZMA compressor within the iPXE source tree. The decompressor is written from scratch (based on XZ Embedded) and is entirely self-contained within the iPXE source. The branch-call-jump (BCJ) filter used to improve the compressibility is specific to iPXE. We choose not to use liblzma's built-in BCJ filter since the algorithm is complex and undocumented. Our BCJ filter achieves approximately the same results (on typical iPXE binaries) with a substantially simpler algorithm. Signed-off-by: Michael Brown <mcb30@ipxe.org>
Diffstat (limited to 'src')
-rw-r--r--src/Makefile.housekeeping11
-rw-r--r--src/arch/i386/prefix/unlzma.S905
-rw-r--r--src/arch/i386/prefix/unlzma16.S9
-rw-r--r--src/util/zbin.c96
4 files changed, 1004 insertions, 17 deletions
diff --git a/src/Makefile.housekeeping b/src/Makefile.housekeeping
index bb2b0cb4..274945ae 100644
--- a/src/Makefile.housekeeping
+++ b/src/Makefile.housekeeping
@@ -1220,15 +1220,12 @@ endif # defined(BIN)
#
# The compression utilities
#
-$(NRV2B) : util/nrv2b.c $(MAKEDEPS)
- $(QM)$(ECHO) " [HOSTCC] $@"
- $(Q)$(HOST_CC) $(HOST_CFLAGS) -DENCODE -DDECODE -DMAIN -DVERBOSE \
- -DNDEBUG -DBITSIZE=32 -DENDIAN=0 -o $@ $<
-CLEANUP += $(NRV2B)
-$(ZBIN) : util/zbin.c util/nrv2b.c $(MAKEDEPS)
+ZBIN_LDFLAGS := -llzma
+
+$(ZBIN) : util/zbin.c $(MAKEDEPS)
$(QM)$(ECHO) " [HOSTCC] $@"
- $(Q)$(HOST_CC) $(HOST_CFLAGS) -o $@ $<
+ $(Q)$(HOST_CC) $(HOST_CFLAGS) $< $(ZBIN_LDFLAGS) -o $@
CLEANUP += $(ZBIN)
###############################################################################
diff --git a/src/arch/i386/prefix/unlzma.S b/src/arch/i386/prefix/unlzma.S
new file mode 100644
index 00000000..f6c74286
--- /dev/null
+++ b/src/arch/i386/prefix/unlzma.S
@@ -0,0 +1,905 @@
+/*
+ * Copyright (C) 2015 Michael Brown <mbrown@fensystems.co.uk>.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+FILE_LICENCE ( GPL2_OR_LATER );
+
+/****************************************************************************
+ *
+ * This file provides the decompress() and decompress16() functions
+ * which can be called in order to decompress an LZMA-compressed
+ * image. The code is modelled on the public-domain "XZ Embedded"
+ * implementation as used by the Linux kernel. Symbol names are
+ * chosen to match the XZ Embedded implementation where possible, for
+ * ease of reference.
+ *
+ * This code is optimised for size rather than speed, since the amount
+ * of data to be decompressed is trivially small by modern standards.
+ *
+ * The same basic assembly code is used to compile both decompress()
+ * and decompress16().
+ *
+ * Note that these functions require large amounts of stack space.
+ *
+ ****************************************************************************
+ */
+
+ .text
+ .arch i586
+ .section ".prefix.lib", "ax", @progbits
+
+#ifdef CODE16
+#define ADDR16
+#define ADDR32 addr32
+#define decompress decompress16
+ .code16
+#else /* CODE16 */
+#define ADDR16 addr16
+#define ADDR32
+ .code32
+#endif /* CODE16 */
+
+/****************************************************************************
+ * Debugging (via 0xe9 debug port)
+ ****************************************************************************
+ */
+
+#define DEBUG 0
+
+#if DEBUG
+ .macro print_character, char
+ pushw %ax
+ movb $\char, %al
+ outb %al, $0xe9
+ popw %ax
+ .endm
+
+ .macro print_hex_nibble
+ cmpb $10, %al
+ sbb $0x69, %al
+ das
+ outb %al, $0xe9
+ .endm
+
+ .macro print_hex_byte, reg
+ pushfl
+ pushw %ax
+ movb \reg, %al
+ pushw %ax
+ shrb $4, %al
+ print_hex_nibble
+ popw %ax
+ andb $0x0f, %al
+ print_hex_nibble
+ popw %ax
+ popfl
+ .endm
+
+ .macro print_hex_word, reg
+ pushw %ax
+ movw \reg, %ax
+ print_hex_byte %ah
+ print_hex_byte %al
+ popw %ax
+ .endm
+
+ .macro print_hex_dword, reg
+ pushl %eax
+ movl \reg, %eax
+ rorl $16, %eax
+ print_hex_word %ax
+ rorl $16, %eax
+ print_hex_word %ax
+ popl %eax
+ .endm
+#else
+ .macro print_character, char
+ .endm
+ .macro print_hex_byte, reg
+ .endm
+ .macro print_hex_word, reg
+ .endm
+ .macro print_hex_dword, reg
+ .endm
+#endif
+
+/****************************************************************************
+ * LZMA parameters and data structures
+ ****************************************************************************
+ */
+
+/* LZMA decompressor states (as used in XZ Embedded) */
+#define STATE_LIT_LIT 0x00
+#define STATE_MATCH_LIT_LIT 0x01
+#define STATE_REP_LIT_LIT 0x02
+#define STATE_SHORTREP_LIT_LIT 0x03
+#define STATE_MATCH_LIT 0x04
+#define STATE_REP_LIT 0x05
+#define STATE_SHORTREP_LIT 0x06
+#define STATE_LIT_MATCH 0x07
+#define STATE_LIT_LONGREP 0x08
+#define STATE_LIT_SHORTREP 0x09
+#define STATE_NONLIT_MATCH 0x0a
+#define STATE_NONLIT_REP 0x0b
+
+/* LZMA maximum decompressor state in which most recent symbol was a literal */
+#define STATE_LIT_MAX 0x06
+
+/* LZMA number of literal context bits ("lc=" parameter) */
+#define LZMA_LC 2
+
+ .struct 0
+lzma_len_dec:
+choice: .word 0
+choice2: .word 0
+low: .rept ( 1 << 3 )
+ .word 0
+ .endr
+mid: .rept ( 1 << 3 )
+ .word 0
+ .endr
+high: .rept ( 1 << 8 )
+ .word 0
+ .endr
+ .equ sizeof__lzma_len_dec, . - lzma_len_dec
+ .previous
+
+ .struct 0
+lzma_dec:
+in_start: .long 0
+out_start: .long 0
+rc_code: .long 0
+rc_range: .long 0
+len: .word 0
+reps:
+rep0: .long 0
+rep1: .long 0
+rep2: .long 0
+rep3: .long 0
+probs:
+is_match: .word 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+is_rep: .word 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+is_rep0: .word 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+is_rep1: .word 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+is_rep2: .word 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+is_rep0_long: .word 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+dist_slot: .rept ( 4 * ( 1 << 6 ) )
+ .word 0
+ .endr
+dist_special: .rept ( ( 1 << ( 14 / 2 ) ) - 14 )
+ .word 0
+ .endr
+dist_align: .rept ( 1 << 4 )
+ .word 0
+ .endr
+match_len_dec: .space sizeof__lzma_len_dec
+rep_len_dec: .space sizeof__lzma_len_dec
+literal: .rept ( ( 1 << LZMA_LC ) * 0x300 )
+ .word 0
+ .endr
+ .align 4
+ .equ sizeof__lzma_dec, . - lzma_dec
+ .previous
+
+/*****************************************************************************
+ * Normalise range encoder
+ *
+ * Parameters:
+ * %ss:%ebp : LZMA parameter block
+ * %ds:%esi : compressed input data pointer
+ * Returns:
+ * %ds:%esi : compressed input data pointer (possibly updated)
+ * %eax : current range
+ * Corrupts:
+ * %eax
+ *****************************************************************************
+ */
+rc_normalise:
+ /* Check if rc_range is less than 1<<24 */
+ testb $0xff, (rc_range+3)(%ebp)
+ jnz 1f
+ /* If it is, shift in a new byte from the compressed input data */
+ shll $8, rc_range(%ebp)
+ shll $8, rc_code(%ebp)
+ ADDR32 lodsb
+ movb %al, (rc_code+0)(%ebp)
+1: /* Return current range */
+ movl rc_range(%ebp), %eax
+ ret
+ .size rc_normalise, . - rc_normalise
+
+/*****************************************************************************
+ * Decode single range-encoded bit using a probability estimate
+ *
+ * Parameters:
+ * %ss:%ebp : LZMA parameter block
+ * %ds:%esi : compressed input data pointer
+ * %ebx : probability estimate pointer (offset from %ebp)
+ * Returns:
+ * %ds:%esi : compressed input data pointer (possibly updated)
+ * CF : decoded bit
+ * ZF : inverse of decoded bit
+ * Corrupts:
+ * none
+ *****************************************************************************
+ */
+rc_bit:
+ /* Preserve registers */
+ pushl %eax
+ pushl %edx
+ /* Perform normalisation */
+ call rc_normalise
+ /* Calculate bound in %eax and probability estimate in %dx */
+ shrl $11, %eax
+ movzwl (%ebp,%ebx), %edx
+ mul %edx /* will zero %edx */
+ movw (%ebp,%ebx), %dx
+ /* Compare code against bound */
+ cmpl %eax, rc_code(%ebp)
+ jae 2f
+1: /* Code is less than bound */
+ movl %eax, rc_range(%ebp)
+ negw %dx
+ addw $(1<<11), %dx
+ shrw $5, %dx
+ addw %dx, (%ebp,%ebx)
+ xorw %ax, %ax /* Clear CF, set ZF */
+ jmp 99f
+2: /* Code is greater than or equal to bound */
+ subl %eax, rc_range(%ebp)
+ subl %eax, rc_code(%ebp)
+ shrw $5, %dx
+ subw %dx, (%ebp,%ebx)
+ incw %dx /* Clear ZF (%dx is 11-bit; can never wrap) */
+ stc /* Set CF */
+99: /* Restore registers and return */
+ popl %edx
+ popl %eax
+ ret
+ .size rc_bit, . - rc_bit
+
+/*****************************************************************************
+ * Decode MSB-first bittree
+ *
+ * Parameters:
+ * %ss:%ebp : LZMA parameter block
+ * %ds:%esi : compressed input data pointer
+ * %ebx : probability estimate set pointer (offset from %ebp)
+ * %cx : number of bits to decode
+ * Returns:
+ * %ds:%esi : compressed input data pointer (possibly updated)
+ * %eax : decoded bittree
+ * Corrupts:
+ * none
+ *****************************************************************************
+ */
+rc_bittree:
+ /* Preserve registers */
+ pushl %edi
+ pushw %cx
+ movl %ebx, %edi
+ /* Initialise registers */
+ movl $1, %eax
+1: /* Decode bit */
+ leaw (%edi,%eax,2), %bx /* high word always zero anyway */
+ call rc_bit
+ rclw %ax
+ ADDR16 loop 1b
+ /* Restore registers, clear unwanted high bit of result, and return */
+ movl %edi, %ebx
+ popw %cx
+ popl %edi
+ btrw %cx, %ax
+ ret
+ .size rc_bittree, . - rc_bittree
+
+/*****************************************************************************
+ * Decode LSB-first bittree
+ *
+ * Parameters:
+ * %ss:%ebp : LZMA parameter block
+ * %ds:%esi : compressed input data pointer
+ * %ebx : probability estimate set pointer (offset from %ebp)
+ * %cx : number of bits to decode
+ * Returns:
+ * %ds:%esi : compressed input data pointer (possibly updated)
+ * %eax : decoded bittree
+ * Corrupts:
+ * none
+ *****************************************************************************
+ */
+rc_bittree_reverse:
+ /* Preserve registers */
+ pushw %cx
+ /* Decode bittree */
+ call rc_bittree
+1: /* Reverse result */
+ rcrb %al
+ rclb %ah
+ ADDR16 loop 1b
+ shrw $8, %ax
+ /* Restore registers and return */
+ popw %cx
+ ret
+ .size rc_bittree_reverse, . - rc_bittree_reverse
+
+/*****************************************************************************
+ * Decode MSB-first bittree with optional match byte
+ *
+ * Parameters:
+ * %ss:%ebp : LZMA parameter block
+ * %ds:%esi : compressed input data pointer
+ * %ebx : probability estimate set pointer (offset from %ebp)
+ * %cl : match byte
+ * %ch : 1 to use match byte, 0 to ignore match byte
+ * Returns:
+ * %ds:%esi : compressed input data pointer (possibly updated)
+ * %eax : decoded bittree
+ * Corrupts:
+ * none
+ *****************************************************************************
+ */
+rc_bittree_match:
+ /* Preserve registers */
+ pushl %edi
+ pushw %cx
+ pushw %dx
+ movl %ebx, %edi
+ /* Initialise registers */
+ movl $1, %eax
+1: /* Decode bit */
+ rolb $1, %cl
+ movw %cx, %dx
+ andb %dh, %dl /* match_bit in %dl */
+ movw %dx, %bx
+ addb %bl, %bh
+ xorb %bl, %bl
+ addw %ax, %bx /* offset + match_bit + symbol */
+ leaw (%edi,%ebx,2), %bx /* high word always zero anyway */
+ call rc_bit
+ rclw %ax
+ movb %al, %dh
+ notb %dh
+ xorb %dh, %dl
+ andb %dl, %ch /* offset &= ( match_bit ^ bit ) */
+ testb %ah, %ah
+ jz 1b
+ /* Restore registers, clear unwanted high bit of result, and return */
+ movl %edi, %ebx
+ popw %dx
+ popw %cx
+ popl %edi
+ xorb %ah, %ah
+ ret
+ .size rc_bittree_match, . - rc_bittree_match
+
+/*****************************************************************************
+ * Decode direct bits (no probability estimates)
+ *
+ * Parameters:
+ * %ss:%ebp : LZMA parameter block
+ * %ds:%esi : compressed input data pointer
+ * %cx : number of bits to decode
+ * Returns:
+ * %ds:%esi : compressed input data pointer (possibly updated)
+ * %eax : decoded bits
+ * Corrupts:
+ * none
+ *****************************************************************************
+ */
+rc_direct:
+ /* Preserve registers */
+ pushl %ebx
+ pushw %cx
+ pushl %edx
+ /* Initialise registers */
+ xorl %edx, %edx
+1: /* Perform normalisation */
+ call rc_normalise
+ /* Decode bit */
+ shrl $1, %eax
+ movl %eax, rc_range(%ebp)
+ movl rc_code(%ebp), %ebx
+ subl %eax, %ebx
+ js 2f
+ movl %ebx, rc_code(%ebp)
+2: rcll %ebx
+ rcll %edx
+ xorb $1, %dl
+ ADDR16 loop 1b
+ /* Restore registers and return */
+ movl %edx, %eax
+ popl %edx
+ popw %cx
+ popl %ebx
+ ret
+ .size rc_direct, . - rc_direct
+
+/*****************************************************************************
+ * Decode an LZMA literal
+ *
+ * Parameters:
+ * %ss:%ebp : LZMA parameter block
+ * %ds:%esi : compressed input data pointer
+ * %es:%edi : uncompressed output data pointer
+ * %edx : LZMA state
+ * Returns:
+ * %ds:%esi : compressed input data pointer (possibly updated)
+ * %es:%edi : uncompressed output data pointer (updated)
+ * %edx : LZMA state
+ * CF : end of payload marker found (always zero)
+ * Corrupts:
+ * %eax
+ * %ebx
+ * %ecx
+ *****************************************************************************
+ *
+ * Literals are coded as an eight-bit tree, using a match byte if the
+ * previous symbol was not a literal.
+ *
+ */
+lzma_literal:
+ /* Get most recent output byte, if available */
+ xorl %ebx, %ebx
+ cmpl %esi, in_start(%ebp)
+ je 1f
+ movb %es:-1(%edi), %bh
+1: /* Locate probability estimate set */
+ shrb $( 8 - LZMA_LC ), %bh
+ shlb $1, %bh
+ leaw literal(%ebx,%ebx,2), %bx
+ /* Get match byte, if applicable */
+ xorw %cx, %cx
+ cmpb $STATE_LIT_MAX, %dl
+ jbe 1f
+ movl rep0(%ebp), %eax
+ notl %eax
+ movb %es:(%edi,%eax), %cl
+ movb $1, %ch
+1: /* Decode bittree */
+ call rc_bittree_match
+ /* Store output byte */
+ ADDR32 stosb
+ print_hex_byte %al
+ print_character ' '
+ /* Update LZMA state */
+ subb $3, %dl
+ jns 1f
+ xorb %dl, %dl
+1: cmpb $7, %dl
+ jb 1f
+ subb $3, %dl
+1: /* Clear CF and return */
+ clc
+ ret
+ .size lzma_literal, . - lzma_literal
+
+/*****************************************************************************
+ * Decode an LZMA length
+ *
+ * Parameters:
+ * %ss:%ebp : LZMA parameter block
+ * %ds:%esi : compressed input data pointer
+ * %ebx : length parameter pointer (offset from %ebp)
+ * Returns:
+ * %ds:%esi : compressed input data pointer (possibly updated)
+ * Corrupts:
+ * %ebx
+ *****************************************************************************
+ *
+ * Lengths are encoded as:
+ *
+ * "0" + 3 bits : lengths 2-9 ("low")
+ * "10" + 3 bits : lengths 10-17 ("mid")
+ * "11" + 8 bits : lengths 18-273 ("high")
+ */
+lzma_len:
+ /* Preserve registers */
+ pushl %eax
+ pushl %ecx
+ pushl %edi
+ movl %ebx, %edi
+ /* Start by assuming three bits and a base length of 2 */
+ movw $3, %cx
+ movw $2, len(%ebp)
+ /* Check low-length choice bit */
+ leal choice(%edi), %ebx
+ call rc_bit
+ leal low(%edi), %ebx
+ jz 1f
+ /* Check high-length choice bit */
+ leal choice2(%edi), %ebx
+ call rc_bit
+ leal mid(%edi), %ebx
+ movb $10, len(%ebp)
+ jz 1f
+ leal high(%edi), %ebx
+ movb $8, %cl
+ movb $18, len(%ebp)
+1: /* Get encoded length */
+ call rc_bittree
+ addw %ax, len(%ebp)
+ /* Restore registers and return */
+ movl %edi, %ebx
+ popl %edi
+ popl %ecx
+ popl %eax
+ ret
+ .size lzma_len, . - lzma_len
+
+/*****************************************************************************
+ * Copy (possibly repeated) matched data
+ *
+ * Parameters:
+ * %ss:%ebp : LZMA parameter block
+ * %ds:%esi : compressed input data pointer
+ * %es:%edi : uncompressed output data pointer
+ * %cl : repeated match distance index (for repeated matches)
+ * %eax : match distance (for non-repeated matches)
+ * Returns:
+ * %ds:%esi : compressed input data pointer (possibly updated)
+ * %es:%edi : uncompressed output data pointer
+ * CF : match distance is out of range
+ * Corrupts:
+ * %eax
+ * %ebx
+ * %ecx
+ *****************************************************************************
+ */
+match: /* Update repeated match list */
+ print_character '['
+ movl $3, %ecx
+ jmp 1f
+match_rep:
+ print_character '['
+ print_character 'R'
+ print_hex_byte %cl
+ print_character '='
+ movzbl %cl, %ecx
+ movl reps(%ebp,%ecx,4), %eax
+ jcxz 2f
+1: movl (reps-4)(%ebp,%ecx,4), %ebx
+ movl %ebx, reps(%ebp,%ecx,4)
+ loop 1b
+ movl %eax, rep0(%ebp)
+2: /* Preserve registers */
+ pushl %esi
+ /* Get stored match length */
+ movzwl len(%ebp), %ecx
+ print_hex_dword %eax
+ print_character '+'
+ print_hex_word %cx
+ print_character ']'
+ print_character ' '
+ /* Abort with CF set if match distance is out of range */
+ movl out_start(%ebp), %esi
+ negl %esi
+ leal -1(%edi,%esi), %esi
+ cmpl %eax, %esi
+ jc 99f
+ /* Perform copy */
+ notl %eax
+ leal (%edi,%eax), %esi
+ ADDR32 es rep movsb
+99: /* Restore registers and return */
+ popl %esi
+ ret
+ .size match, . - match
+
+/*****************************************************************************
+ * Decode an LZMA match
+ *
+ * Parameters:
+ * %ss:%ebp : LZMA parameter block
+ * %ds:%esi : compressed input data pointer
+ * %es:%edi : uncompressed output data pointer
+ * %edx : LZMA state
+ * Returns:
+ * %ds:%esi : compressed input data pointer (possibly updated)
+ * %es:%edi : uncompressed output data pointer
+ * %edx : LZMA state
+ * CF : end of payload marker found
+ * Corrupts:
+ * %eax
+ * %ebx
+ * %ecx
+ *****************************************************************************
+ *
+ * Matches are encoded as an LZMA length followed by a 6-bit "distance
+ * slot" code, 0-26 fixed-probability bits, and 0-5 context encoded
+ * bits.
+ */
+lzma_match:
+ /* Preserve registers */
+ pushl %edi
+ /* Update LZMA state */
+ cmpb $STATE_LIT_MAX, %dl
+ movb $STATE_LIT_MATCH, %dl
+ jbe 1f
+ movb $STATE_NONLIT_MATCH, %dl
+1: /* Decode length */
+ movl $match_len_dec, %ebx
+ call lzma_len
+ /* Decode distance slot */
+ movw len(%ebp), %bx
+ subw $2, %bx
+ cmpw $4, %bx
+ jb 1f
+ movw $3, %bx
+1: shlw $7, %bx
+ addw $dist_slot, %bx
+ movw $6, %cx
+ call rc_bittree
+ /* Distance slots 0-3 are literal distances */
+ cmpb $4, %al
+ jb 99f
+ /* Determine initial bits: 10/11 for even/odd distance codes */
+ movl %eax, %edi
+ andw $1, %di
+ orw $2, %di
+ /* Determine number of context-encoded bits */
+ movw %ax, %cx
+ shrb $1, %cl
+ decb %cl
+ /* Select context to be used in absence of fixed-probability bits */
+ movl %edi, %ebx
+ shlw %cl, %bx
+ subw %ax, %bx
+ leaw (dist_special-2)(%ebx,%ebx), %bx
+ /* Decode fixed-probability bits, if any */
+ cmpb $6, %cl
+ jb 1f
+ subb $4, %cl
+ shll %cl, %edi
+ call rc_direct
+ orl %eax, %edi
+ /* Select context to be used in presence of fixed-probability bits */
+ movb $4, %cl
+ movl $dist_align, %ebx
+1: /* Decode context-encoded bits */
+ shll %cl, %edi
+ call rc_bittree_reverse
+ orl %edi, %eax
+99: /* Restore registers and tail-call */
+ popl %edi
+ jmp match
+ .size lzma_match, . - lzma_match
+
+/*****************************************************************************
+ * Decode an LZMA repeated match
+ *
+ * Parameters:
+ * %ss:%ebp : LZMA parameter block
+ * %ds:%esi : compressed input data pointer
+ * %es:%edi : uncompressed output data pointer
+ * %edx : LZMA state
+ * Returns:
+ * %ds:%esi : compressed input data pointer (possibly updated)
+ * %es:%edi : uncompressed output data pointer
+ * %edx : LZMA state
+ * CF : end of payload marker found
+ * Corrupts:
+ * %eax
+ * %ebx
+ * %ecx
+ *****************************************************************************
+ *
+ * Repeated matches are encoded as:
+ *
+ * "00" : shortrep0 (implicit length 1)
+ * "01" + len : longrep0
+ * "10" + len : longrep1
+ * "110" + len : longrep2
+ * "111" + len : longrep3
+ */
+lzma_rep_match:
+ /* Initially assume longrep0 */
+ movw $(STATE_LIT_LONGREP << 8), %cx
+ /* Get is_rep0 bit */
+ leal is_rep0(,%edx,2), %ebx
+ call rc_bit
+ jnz 1f
+ /* Get is_rep0_long bit */
+ leal is_rep0_long(,%edx,2), %ebx
+ call rc_bit
+ jnz 98f
+ movw $1, len(%ebp)
+ movb $STATE_LIT_SHORTREP, %ch
+ jmp 99f
+1: /* Get is_rep1 bit */
+ incb %cl
+ leal is_rep1(,%edx,2), %ebx
+ call rc_bit
+ jz 98f
+ /* Get is_rep2 bit */
+ incb %cl
+ leal is_rep2(,%edx,2), %ebx
+ call rc_bit
+ adcb $0, %cl
+98: /* Decode length */
+ movl $rep_len_dec, %ebx
+ call lzma_len
+99: /* Update LZMA state */
+ cmpb $STATE_LIT_MAX, %dl
+ movb %ch, %dl
+ jbe 1f
+ movb $STATE_NONLIT_REP, %dl
+1: /* Tail call */
+ jmp match_rep
+ .size lzma_match, . - lzma_match
+
+/*****************************************************************************
+ * Decode one LZMA symbol
+ *
+ * Parameters:
+ * %ss:%ebp : LZMA parameter block
+ * %ds:%esi : compressed input data pointer
+ * %es:%edi : uncompressed output data pointer
+ * %edx : LZMA state
+ * Returns:
+ * %ds:%esi : compressed input data pointer (possibly updated)
+ * %es:%edi : uncompressed output data pointer (updated)
+ * %edx : LZMA state
+ * CF : end of payload marker found
+ * Corrupts:
+ * %eax
+ * %ebx
+ * %ecx
+ *****************************************************************************
+ */
+lzma_decode:
+ /* Get is_match bit */
+ leal is_match(,%edx,2), %ebx
+ call rc_bit
+ jz lzma_literal
+ /* Get is_rep bit */
+ leal is_rep(,%edx,2), %ebx
+ call rc_bit
+ jz lzma_match
+ jmp lzma_rep_match
+ .size lzma_decode, . - lzma_decode
+
+/****************************************************************************
+ * Undo effect of branch-call-jump (BCJ) filter
+ *
+ * Parameters:
+ * %es:%esi : start of uncompressed output data (note %es)
+ * %es:%edi : end of uncompressed output data
+ * Returns:
+ * Corrupts:
+ * %eax
+ * %ebx
+ * %ecx
+ * %edx
+ * %esi
+ *****************************************************************************
+ */
+bcj_filter:
+ /* Store (negative) start of data in %edx */
+ movl %esi, %edx
+ negl %edx
+ /* Calculate limit in %ecx */
+ leal -5(%edi,%edx), %ecx
+1: /* Calculate offset in %ebx */
+ leal (%esi,%edx), %ebx
+ /* Check for end of data */
+ cmpl %ecx, %ebx
+ ja 99f
+ /* Check for an opcode which would be followed by a rel32 address */
+ ADDR32 es lodsb
+ andb $0xfe, %al
+ cmpb $0xe8, %al
+ jne 1b
+ /* Get current jump target value in %eax */
+ ADDR32 es lodsl
+ /* Convert absolute addresses in the range [0,limit) back to
+ * relative addresses in the range [-offset,limit-offset).
+ */
+ cmpl %ecx, %eax
+ jae 2f
+ subl %ebx,%es:-4(%esi)
+2: /* Convert negative numbers in the range [-offset,0) back to
+ * positive numbers in the range [limit-offset,limit).
+ */
+ notl %eax /* Range is now [0,offset) */
+ cmpl %ebx, %eax
+ jae 1b
+ addl %ecx,%es:-4(%esi)
+ jmp 1b
+99: /* Return */
+ ret
+ .size bcj_filter, . - bcj_filter
+
+/****************************************************************************
+ * decompress (real-mode or 16/32-bit protected-mode near call)
+ *
+ * Decompress data
+ *
+ * Parameters (passed via registers):
+ * %ds:%esi : Start of compressed input data
+ * %es:%edi : Start of output buffer
+ * Returns:
+ * %ds:%esi - End of compressed input data
+ * %es:%edi - End of decompressed output data
+ * All other registers are preserved
+ *
+ * NOTE: It would be possible to build a smaller version of the
+ * decompression code for -DKEEP_IT_REAL by using 16-bit registers
+ * where possible.
+ ****************************************************************************
+ */
+ .globl decompress
+decompress:
+ /* Preserve registers */
+ pushl %eax
+ pushl %ebx
+ pushl %ecx
+ pushl %edx
+ pushl %ebp
+ /* Allocate parameter block */
+ subl $sizeof__lzma_dec, %esp
+ movl %esp, %ebp
+ /* Zero parameter block and set all probabilities to 0.5 */
+ pushl %edi
+ pushw %es
+ pushw %ss
+ popw %es
+ movl %ebp, %edi
+ xorl %eax, %eax
+ movl $( sizeof__lzma_dec / 4 ), %ecx
+ ADDR32 rep stosl
+ leal probs(%ebp), %edi
+ movw $( ( 1 << 11 ) / 2 ), %ax
+ movl $( ( sizeof__lzma_dec - probs ) / 2 ), %ecx
+ ADDR32 rep stosw
+ popw %es
+ popl %edi
+ /* Initialise remaining parameters */
+ movl %esi, in_start(%ebp)
+ movl %edi, out_start(%ebp)
+ print_character '\n'
+ ADDR32 lodsb /* discard initial byte */
+ print_hex_byte %al
+ ADDR32 lodsl
+ bswapl %eax
+ print_hex_dword %eax
+ print_character '\n'
+ movl %eax, rc_code(%ebp)
+ decl rc_range(%ebp)
+ movl $STATE_LIT_LIT, %edx
+1: /* Decompress until we reach end of buffer */
+ call lzma_decode
+ jnc 1b
+ print_character '\n'
+ /* Undo BCJ filter */
+ pushl %esi
+ movl out_start(%ebp), %esi
+ call bcj_filter
+ popl %esi
+ /* Restore registers and return */
+ addl $sizeof__lzma_dec, %esp
+ popl %ebp
+ popl %edx
+ popl %ecx
+ popl %ebx
+ popl %eax
+ ret
+
+ /* Specify minimum amount of stack space required */
+ .globl _min_decompress_stack
+ .equ _min_decompress_stack, ( sizeof__lzma_dec + 512 /* margin */ )
diff --git a/src/arch/i386/prefix/unlzma16.S b/src/arch/i386/prefix/unlzma16.S
new file mode 100644
index 00000000..f58b0f73
--- /dev/null
+++ b/src/arch/i386/prefix/unlzma16.S
@@ -0,0 +1,9 @@
+/*
+ * 16-bit version of the decompressor
+ *
+ */
+
+FILE_LICENCE ( GPL2_OR_LATER )
+
+#define CODE16
+#include "unlzma.S"
diff --git a/src/util/zbin.c b/src/util/zbin.c
index 3b7cf95b..1862a382 100644
--- a/src/util/zbin.c
+++ b/src/util/zbin.c
@@ -1,13 +1,21 @@
+#include <stdint.h>
#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
#include <sys/stat.h>
-
-#define ENCODE
-#define VERBOSE
-#include "nrv2b.c"
-FILE *infile, *outfile;
+#include <lzma.h>
#define DEBUG 0
+/* LZMA filter choices. Must match those used by unlzma.S */
+#define LZMA_LC 2
+#define LZMA_LP 0
+#define LZMA_PB 0
+
+/* LZMA preset choice. This is a policy decision */
+#define LZMA_PRESET ( LZMA_PRESET_DEFAULT | LZMA_PRESET_EXTREME )
+
struct input_file {
void *buf;
size_t len;
@@ -177,13 +185,75 @@ static int process_zinfo_copy ( struct input_file *input,
return 0;
}
+#define OPCODE_CALL 0xe8
+#define OPCODE_JMP 0xe9
+
+static void bcj_filter ( void *data, size_t len ) {
+ struct {
+ uint8_t opcode;
+ int32_t target;
+ } __attribute__ (( packed )) *jump;
+ ssize_t limit = ( len - sizeof ( *jump ) );
+ ssize_t offset;
+
+ /* liblzma does include an x86 BCJ filter, but it's hideously
+ * convoluted and undocumented. This BCJ filter is
+ * substantially simpler and achieves the same compression (at
+ * the cost of requiring the decompressor to know the size of
+ * the decompressed data, which we already have in iPXE).
+ */
+ for ( offset = 0 ; offset <= limit ; offset++ ) {
+ jump = ( data + offset );
+
+ /* Skip instructions that are not followed by a rel32 address */
+ if ( ( jump->opcode != OPCODE_CALL ) &&
+ ( jump->opcode != OPCODE_JMP ) )
+ continue;
+
+ /* Convert rel32 address to an absolute address. To
+ * avoid false positives (which damage the compression
+ * ratio), we should check that the jump target is
+ * within the range [0,limit).
+ *
+ * Some output values would then end up being mapped
+ * from two distinct input values, making the
+ * transformation irreversible. To solve this, we
+ * transform such values back into the part of the
+ * range which would otherwise correspond to no input
+ * values.
+ */
+ if ( ( jump->target >= -offset ) &&
+ ( jump->target < ( limit - offset ) ) ) {
+ /* Convert relative addresses in the range
+ * [-offset,limit-offset) to absolute
+ * addresses in the range [0,limit).
+ */
+ jump->target += offset;
+ } else if ( ( jump->target >= ( limit - offset ) ) &&
+ ( jump->target < limit ) ) {
+ /* Convert positive numbers in the range
+ * [limit-offset,limit) to negative numbers in
+ * the range [-offset,0).
+ */
+ jump->target -= limit;
+ }
+ offset += sizeof ( jump->target );
+ };
+}
+
static int process_zinfo_pack ( struct input_file *input,
struct output_file *output,
union zinfo_record *zinfo ) {
struct zinfo_pack *pack = &zinfo->pack;
size_t offset = pack->offset;
size_t len = pack->len;
- unsigned long packed_len;
+ size_t packed_len = 0;
+ size_t remaining = ( output->max_len - output->len );
+ lzma_options_lzma options;
+ const lzma_filter filters[] = {
+ { .id = LZMA_FILTER_LZMA1, .options = &options },
+ { .id = LZMA_VLI_UNKNOWN }
+ };
if ( ( offset + len ) > input->len ) {
fprintf ( stderr, "Input buffer overrun on pack\n" );
@@ -196,9 +266,15 @@ static int process_zinfo_pack ( struct input_file *input,
return -1;
}
- if ( ucl_nrv2b_99_compress ( ( input->buf + offset ), len,
- ( output->buf + output->len ),
- &packed_len, 0 ) != UCL_E_OK ) {
+ bcj_filter ( ( input->buf + offset ), len );
+
+ lzma_lzma_preset ( &options, LZMA_PRESET );
+ options.lc = LZMA_LC;
+ options.lp = LZMA_LP;
+ options.pb = LZMA_PB;
+ if ( lzma_raw_buffer_encode ( filters, NULL, ( input->buf + offset ),
+ len, ( output->buf + output->len ),
+ &packed_len, remaining ) != LZMA_OK ) {
fprintf ( stderr, "Compression failure\n" );
return -1;
}
@@ -206,7 +282,7 @@ static int process_zinfo_pack ( struct input_file *input,
if ( DEBUG ) {
fprintf ( stderr, "PACK [%#zx,%#zx) to [%#zx,%#zx)\n",
offset, ( offset + len ), output->len,
- ( size_t )( output->len + packed_len ) );
+ ( output->len + packed_len ) );
}
output->len += packed_len;