-/*--
- This file is a part of bzip2 and/or libbzip2, a program and
- library for lossless, block-sorting data compression.
-
- Copyright (C) 1996-2000 Julian R Seward. All rights reserved.
-
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions
- are met:
-
- 1. Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
-
- 2. The origin of this software must not be misrepresented; you must
- not claim that you wrote the original software. If you use this
- software in a product, an acknowledgment in the product
- documentation would be appreciated but is not required.
-
- 3. Altered source versions must be plainly marked as such, and must
- not be misrepresented as being the original software.
-
- 4. The name of the author may not be used to endorse or promote
- products derived from this software without specific prior written
- permission.
-
- THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
- OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
- DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
- GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
- WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
- Julian Seward, Cambridge, UK.
- jseward@acm.org
- bzip2/libbzip2 version 1.0 of 21 March 2000
-
- This program is based on (at least) the work of:
- Mike Burrows
- David Wheeler
- Peter Fenwick
- Alistair Moffat
- Radford Neal
- Ian H. Witten
- Robert Sedgewick
- Jon L. Bentley
-
- For more information on these sources, see the manual.
---*/
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <getopt.h>
-#include <unistd.h>
-#include <busybox.h>
-
-//#define TRUE 1
-//#define FALSE 0
-
-#define MTFA_SIZE 4096
-#define MTFL_SIZE 16
-#define BZ_N_GROUPS 6
-#define BZ_G_SIZE 50
-#define BZ_MAX_ALPHA_SIZE 258
-
-#define BZ_OK 0
-#define BZ_STREAM_END 4
-#define BZ_SEQUENCE_ERROR (-1)
-#define BZ_DATA_ERROR (-4)
-#define BZ_DATA_ERROR_MAGIC (-5)
-#define BZ_IO_ERROR (-6)
-#define BZ_UNEXPECTED_EOF (-7)
-
-#define BZ_RUNA 0
-#define BZ_RUNB 1
-
-#define BZ_MAX_UNUSED 5000
-#define FILE_NAME_LEN 1034
-/*-- states for decompression. --*/
-
-#define BZ_X_IDLE 1
-#define BZ_X_OUTPUT 2
-
-#define BZ_X_MAGIC_1 10
-#define BZ_X_MAGIC_2 11
-#define BZ_X_MAGIC_3 12
-#define BZ_X_MAGIC_4 13
-#define BZ_X_BLKHDR_1 14
-#define BZ_X_BLKHDR_2 15
-#define BZ_X_BLKHDR_3 16
-#define BZ_X_BLKHDR_4 17
-#define BZ_X_BLKHDR_5 18
-#define BZ_X_BLKHDR_6 19
-#define BZ_X_BCRC_1 20
-#define BZ_X_BCRC_2 21
-#define BZ_X_BCRC_3 22
-#define BZ_X_BCRC_4 23
-#define BZ_X_RANDBIT 24
-#define BZ_X_ORIGPTR_1 25
-#define BZ_X_ORIGPTR_2 26
-#define BZ_X_ORIGPTR_3 27
-#define BZ_X_MAPPING_1 28
-#define BZ_X_MAPPING_2 29
-#define BZ_X_SELECTOR_1 30
-#define BZ_X_SELECTOR_2 31
-#define BZ_X_SELECTOR_3 32
-#define BZ_X_CODING_1 33
-#define BZ_X_CODING_2 34
-#define BZ_X_CODING_3 35
-#define BZ_X_MTF_1 36
-#define BZ_X_MTF_2 37
-#define BZ_X_MTF_3 38
-#define BZ_X_MTF_4 39
-#define BZ_X_MTF_5 40
-#define BZ_X_MTF_6 41
-#define BZ_X_ENDHDR_2 42
-#define BZ_X_ENDHDR_3 43
-#define BZ_X_ENDHDR_4 44
-#define BZ_X_ENDHDR_5 45
-#define BZ_X_ENDHDR_6 46
-#define BZ_X_CCRC_1 47
-#define BZ_X_CCRC_2 48
-#define BZ_X_CCRC_3 49
-#define BZ_X_CCRC_4 50
-
-#define BZ_MAX_CODE_LEN 23
-#define OM_TEST 3
+/* vi: set sw=4 ts=4: */
+/* Small bzip2 deflate implementation, by Rob Landley (rob@landley.net).
+
+ Based on bzip2 decompression code by Julian R Seward (jseward@acm.org),
+ which also acknowledges contributions by Mike Burrows, David Wheeler,
+ Peter Fenwick, Alistair Moffat, Radford Neal, Ian H. Witten,
+ Robert Sedgewick, and Jon L. Bentley.
+
+ Licensed under GPLv2 or later, see file LICENSE in this tarball for details.
+*/
+
+/*
+ Size and speed optimizations by Manuel Novoa III (mjn3@codepoet.org).
+
+ More efficient reading of Huffman codes, a streamlined read_bunzip()
+ function, and various other tweaks. In (limited) tests, approximately
+ 20% faster than bzcat on x86 and about 10% faster on arm.
+
+ Note that about 2/3 of the time is spent in read_unzip() reversing
+ the Burrows-Wheeler transformation. Much of that time is delay
+ resulting from cache misses.
+
+ I would ask that anyone benefiting from this work, especially those
+ using it in commercial products, consider making a donation to my local
+ non-profit hospice organization (www.hospiceacadiana.com) in the name of
+ the woman I loved, Toni W. Hagan, who passed away Feb. 12, 2003.
+
+ Manuel
+ */
+
+#include "libbb.h"
+#include "unarchive.h"
+
+/* Constants for Huffman coding */
+#define MAX_GROUPS 6
+#define GROUP_SIZE 50 /* 64 would have been more efficient */
+#define MAX_HUFCODE_BITS 20 /* Longest Huffman code allowed */
+#define MAX_SYMBOLS 258 /* 256 literals + RUNA + RUNB */
+#define SYMBOL_RUNA 0
+#define SYMBOL_RUNB 1
+
+/* Status return values */
+#define RETVAL_OK 0
+#define RETVAL_LAST_BLOCK (-1)
+#define RETVAL_NOT_BZIP_DATA (-2)
+#define RETVAL_UNEXPECTED_INPUT_EOF (-3)
+#define RETVAL_UNEXPECTED_OUTPUT_EOF (-4)
+#define RETVAL_DATA_ERROR (-5)
+#define RETVAL_OUT_OF_MEMORY (-6)
+#define RETVAL_OBSOLETE_INPUT (-7)
+
+/* Other housekeeping constants */
+#define IOBUF_SIZE 4096
+
+/* This is what we know about each Huffman coding group */
+struct group_data {
+ /* We have an extra slot at the end of limit[] for a sentinal value. */
+ int limit[MAX_HUFCODE_BITS+1],base[MAX_HUFCODE_BITS],permute[MAX_SYMBOLS];
+ int minLen, maxLen;
+};
+
+/* Structure holding all the housekeeping data, including IO buffers and
+ memory that persists between calls to bunzip */
typedef struct {
- char *next_in;
- unsigned int avail_in;
+ /* State for interrupting output loop */
- char *next_out;
- unsigned int avail_out;
+ int writeCopies,writePos,writeRunCountdown,writeCount,writeCurrent;
- void *state;
+ /* I/O tracking data (file handles, buffers, positions, etc.) */
-} bz_stream;
+ int in_fd,out_fd,inbufCount,inbufPos /*,outbufPos*/;
+ unsigned char *inbuf /*,*outbuf*/;
+ unsigned int inbufBitCount, inbufBits;
-typedef struct {
- bz_stream strm;
- FILE *handle;
- unsigned char initialisedOk;
- char buf[BZ_MAX_UNUSED];
- int lastErr;
- int bufN;
-} bzFile;
-
-/*-- Structure holding all the decompression-side stuff. --*/
-typedef struct {
- /* pointer back to the struct bz_stream */
- bz_stream* strm;
-
- /* state indicator for this stream */
- int state;
-
- /* for doing the final run-length decoding */
- unsigned char state_out_ch;
- int state_out_len;
- unsigned char blockRandomised;
- int rNToGo;
- int rTPos;
-
- /* the buffer for bit stream reading */
- unsigned int bsBuff;
- int bsLive;
-
- /* misc administratium */
- int blockSize100k;
- int currBlockNo;
-
- /* for undoing the Burrows-Wheeler transform */
- int origPtr;
- unsigned int tPos;
- int k0;
- int unzftab[256];
- int nblock_used;
- int cftab[257];
- int cftabCopy[257];
-
- /* for undoing the Burrows-Wheeler transform (FAST) */
- unsigned int *tt;
-
- /* stored and calculated CRCs */
- unsigned int storedBlockCRC;
- unsigned int storedCombinedCRC;
- unsigned int calculatedBlockCRC;
- unsigned int calculatedCombinedCRC;
-
- /* map of bytes used in block */
- int nInUse;
- unsigned char inUse[256];
- unsigned char inUse16[16];
- unsigned char seqToUnseq[256];
-
- /* for decoding the MTF values */
- unsigned char mtfa [MTFA_SIZE];
- unsigned char selector [2 + (900000 / BZ_G_SIZE)];
- unsigned char selectorMtf[2 + (900000 / BZ_G_SIZE)];
- unsigned char len [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
- int mtfbase[256 / MTFL_SIZE];
-
- int limit [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
- int base [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
- int perm [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
- int minLens[BZ_N_GROUPS];
-
- /* save area for scalars in the main decompress code */
- int save_i;
- int save_j;
- int save_t;
- int save_alphaSize;
- int save_nGroups;
- int save_nSelectors;
- int save_EOB;
- int save_groupNo;
- int save_groupPos;
- int save_nextSym;
- int save_nblockMAX;
- int save_nblock;
- int save_es;
- int save_N;
- int save_curr;
- int save_zt;
- int save_zn;
- int save_zvec;
- int save_zj;
- int save_gSel;
- int save_gMinlen;
- int *save_gLimit;
- int *save_gBase;
- int *save_gPerm;
-} DState;
-
-int BZ2_rNums[512];
-char inName[FILE_NAME_LEN];
-char outName[FILE_NAME_LEN];
-int srcMode;
-int opMode;
-unsigned char deleteOutputOnInterrupt;
-FILE *outputHandleJustInCase;
-int numFileNames;
-int numFilesProcessed;
-int exitValue;
-
-const unsigned int BZ2_crc32Table[256] = {
-
- /*-- Ugly, innit? --*/
-
- 0x00000000L, 0x04c11db7L, 0x09823b6eL, 0x0d4326d9L,
- 0x130476dcL, 0x17c56b6bL, 0x1a864db2L, 0x1e475005L,
- 0x2608edb8L, 0x22c9f00fL, 0x2f8ad6d6L, 0x2b4bcb61L,
- 0x350c9b64L, 0x31cd86d3L, 0x3c8ea00aL, 0x384fbdbdL,
- 0x4c11db70L, 0x48d0c6c7L, 0x4593e01eL, 0x4152fda9L,
- 0x5f15adacL, 0x5bd4b01bL, 0x569796c2L, 0x52568b75L,
- 0x6a1936c8L, 0x6ed82b7fL, 0x639b0da6L, 0x675a1011L,
- 0x791d4014L, 0x7ddc5da3L, 0x709f7b7aL, 0x745e66cdL,
- 0x9823b6e0L, 0x9ce2ab57L, 0x91a18d8eL, 0x95609039L,
- 0x8b27c03cL, 0x8fe6dd8bL, 0x82a5fb52L, 0x8664e6e5L,
- 0xbe2b5b58L, 0xbaea46efL, 0xb7a96036L, 0xb3687d81L,
- 0xad2f2d84L, 0xa9ee3033L, 0xa4ad16eaL, 0xa06c0b5dL,
- 0xd4326d90L, 0xd0f37027L, 0xddb056feL, 0xd9714b49L,
- 0xc7361b4cL, 0xc3f706fbL, 0xceb42022L, 0xca753d95L,
- 0xf23a8028L, 0xf6fb9d9fL, 0xfbb8bb46L, 0xff79a6f1L,
- 0xe13ef6f4L, 0xe5ffeb43L, 0xe8bccd9aL, 0xec7dd02dL,
- 0x34867077L, 0x30476dc0L, 0x3d044b19L, 0x39c556aeL,
- 0x278206abL, 0x23431b1cL, 0x2e003dc5L, 0x2ac12072L,
- 0x128e9dcfL, 0x164f8078L, 0x1b0ca6a1L, 0x1fcdbb16L,
- 0x018aeb13L, 0x054bf6a4L, 0x0808d07dL, 0x0cc9cdcaL,
- 0x7897ab07L, 0x7c56b6b0L, 0x71159069L, 0x75d48ddeL,
- 0x6b93dddbL, 0x6f52c06cL, 0x6211e6b5L, 0x66d0fb02L,
- 0x5e9f46bfL, 0x5a5e5b08L, 0x571d7dd1L, 0x53dc6066L,
- 0x4d9b3063L, 0x495a2dd4L, 0x44190b0dL, 0x40d816baL,
- 0xaca5c697L, 0xa864db20L, 0xa527fdf9L, 0xa1e6e04eL,
- 0xbfa1b04bL, 0xbb60adfcL, 0xb6238b25L, 0xb2e29692L,
- 0x8aad2b2fL, 0x8e6c3698L, 0x832f1041L, 0x87ee0df6L,
- 0x99a95df3L, 0x9d684044L, 0x902b669dL, 0x94ea7b2aL,
- 0xe0b41de7L, 0xe4750050L, 0xe9362689L, 0xedf73b3eL,
- 0xf3b06b3bL, 0xf771768cL, 0xfa325055L, 0xfef34de2L,
- 0xc6bcf05fL, 0xc27dede8L, 0xcf3ecb31L, 0xcbffd686L,
- 0xd5b88683L, 0xd1799b34L, 0xdc3abdedL, 0xd8fba05aL,
- 0x690ce0eeL, 0x6dcdfd59L, 0x608edb80L, 0x644fc637L,
- 0x7a089632L, 0x7ec98b85L, 0x738aad5cL, 0x774bb0ebL,
- 0x4f040d56L, 0x4bc510e1L, 0x46863638L, 0x42472b8fL,
- 0x5c007b8aL, 0x58c1663dL, 0x558240e4L, 0x51435d53L,
- 0x251d3b9eL, 0x21dc2629L, 0x2c9f00f0L, 0x285e1d47L,
- 0x36194d42L, 0x32d850f5L, 0x3f9b762cL, 0x3b5a6b9bL,
- 0x0315d626L, 0x07d4cb91L, 0x0a97ed48L, 0x0e56f0ffL,
- 0x1011a0faL, 0x14d0bd4dL, 0x19939b94L, 0x1d528623L,
- 0xf12f560eL, 0xf5ee4bb9L, 0xf8ad6d60L, 0xfc6c70d7L,
- 0xe22b20d2L, 0xe6ea3d65L, 0xeba91bbcL, 0xef68060bL,
- 0xd727bbb6L, 0xd3e6a601L, 0xdea580d8L, 0xda649d6fL,
- 0xc423cd6aL, 0xc0e2d0ddL, 0xcda1f604L, 0xc960ebb3L,
- 0xbd3e8d7eL, 0xb9ff90c9L, 0xb4bcb610L, 0xb07daba7L,
- 0xae3afba2L, 0xaafbe615L, 0xa7b8c0ccL, 0xa379dd7bL,
- 0x9b3660c6L, 0x9ff77d71L, 0x92b45ba8L, 0x9675461fL,
- 0x8832161aL, 0x8cf30badL, 0x81b02d74L, 0x857130c3L,
- 0x5d8a9099L, 0x594b8d2eL, 0x5408abf7L, 0x50c9b640L,
- 0x4e8ee645L, 0x4a4ffbf2L, 0x470cdd2bL, 0x43cdc09cL,
- 0x7b827d21L, 0x7f436096L, 0x7200464fL, 0x76c15bf8L,
- 0x68860bfdL, 0x6c47164aL, 0x61043093L, 0x65c52d24L,
- 0x119b4be9L, 0x155a565eL, 0x18197087L, 0x1cd86d30L,
- 0x029f3d35L, 0x065e2082L, 0x0b1d065bL, 0x0fdc1becL,
- 0x3793a651L, 0x3352bbe6L, 0x3e119d3fL, 0x3ad08088L,
- 0x2497d08dL, 0x2056cd3aL, 0x2d15ebe3L, 0x29d4f654L,
- 0xc5a92679L, 0xc1683bceL, 0xcc2b1d17L, 0xc8ea00a0L,
- 0xd6ad50a5L, 0xd26c4d12L, 0xdf2f6bcbL, 0xdbee767cL,
- 0xe3a1cbc1L, 0xe760d676L, 0xea23f0afL, 0xeee2ed18L,
- 0xf0a5bd1dL, 0xf464a0aaL, 0xf9278673L, 0xfde69bc4L,
- 0x89b8fd09L, 0x8d79e0beL, 0x803ac667L, 0x84fbdbd0L,
- 0x9abc8bd5L, 0x9e7d9662L, 0x933eb0bbL, 0x97ffad0cL,
- 0xafb010b1L, 0xab710d06L, 0xa6322bdfL, 0xa2f33668L,
- 0xbcb4666dL, 0xb8757bdaL, 0xb5365d03L, 0xb1f740b4L
-};
+ /* The CRC values stored in the block header and calculated from the data */
+
+ uint32_t headerCRC, totalCRC, writeCRC;
+ uint32_t *crc32Table;
+ /* Intermediate buffer and its size (in bytes) */
+
+ unsigned int *dbuf, dbufSize;
+
+ /* These things are a bit too big to go on the stack */
+
+ unsigned char selectors[32768]; /* nSelectors=15 bits */
+ struct group_data groups[MAX_GROUPS]; /* Huffman coding tables */
-static void bz_rand_udp_mask(DState *s)
+ /* For I/O error handling */
+
+ jmp_buf jmpbuf;
+} bunzip_data;
+
+/* Return the next nnn bits of input. All reads from the compressed input
+ are done through this function. All reads are big endian */
+
+static unsigned int get_bits(bunzip_data *bd, char bits_wanted)
{
- if (s->rNToGo == 0) {
- s->rNToGo = BZ2_rNums[s->rTPos];
- s->rTPos++;
- if (s->rTPos == 512) {
- s->rTPos = 0;
+ unsigned int bits=0;
+
+ /* If we need to get more data from the byte buffer, do so. (Loop getting
+ one byte at a time to enforce endianness and avoid unaligned access.) */
+
+ while (bd->inbufBitCount<bits_wanted) {
+
+ /* If we need to read more data from file into byte buffer, do so */
+
+ if(bd->inbufPos==bd->inbufCount) {
+ if((bd->inbufCount = read(bd->in_fd, bd->inbuf, IOBUF_SIZE)) <= 0)
+ longjmp(bd->jmpbuf,RETVAL_UNEXPECTED_INPUT_EOF);
+ bd->inbufPos=0;
}
- }
- s->rNToGo--;
-}
-static unsigned char myfeof(FILE *f)
-{
- int c = fgetc(f);
- if (c == EOF) {
- return(TRUE);
- }
- ungetc(c, f);
- return(FALSE);
-}
+ /* Avoid 32-bit overflow (dump bit buffer to top of output) */
-static void BZ2_hbCreateDecodeTables(int *limit, int *base, int *perm, unsigned char *length, int minLen, int maxLen, int alphaSize )
-{
- int pp, i, j, vec;
-
- pp = 0;
- for (i = minLen; i <= maxLen; i++) {
- for (j = 0; j < alphaSize; j++) {
- if (length[j] == i) {
- perm[pp] = j;
- pp++;
- }
+ if(bd->inbufBitCount>=24) {
+ bits=bd->inbufBits&((1<<bd->inbufBitCount)-1);
+ bits_wanted-=bd->inbufBitCount;
+ bits<<=bits_wanted;
+ bd->inbufBitCount=0;
}
- }
- for (i = 0; i < BZ_MAX_CODE_LEN; i++) {
- base[i] = 0;
- }
+ /* Grab next 8 bits of input from buffer. */
- for (i = 0; i < alphaSize; i++) {
- base[length[i]+1]++;
+ bd->inbufBits=(bd->inbufBits<<8)|bd->inbuf[bd->inbufPos++];
+ bd->inbufBitCount+=8;
}
- for (i = 1; i < BZ_MAX_CODE_LEN; i++) {
- base[i] += base[i-1];
- }
+ /* Calculate result */
- for (i = 0; i < BZ_MAX_CODE_LEN; i++) {
- limit[i] = 0;
- }
- vec = 0;
+ bd->inbufBitCount-=bits_wanted;
+ bits|=(bd->inbufBits>>bd->inbufBitCount)&((1<<bits_wanted)-1);
- for (i = minLen; i <= maxLen; i++) {
- vec += (base[i+1] - base[i]);
- limit[i] = vec-1;
- vec <<= 1;
- }
- for (i = minLen + 1; i <= maxLen; i++) {
- base[i] = ((limit[i-1] + 1) << 1) - base[i];
- }
+ return bits;
}
+/* Unpacks the next block and sets up for the inverse burrows-wheeler step. */
-static int get_bits(DState *s, int *vvv, char nnn)
+static int get_next_block(bunzip_data *bd)
{
- while (1) {
- if (s->bsLive >= nnn) {
- *vvv = (s->bsBuff >> (s->bsLive-nnn)) & ((1 << nnn)-1);
- s->bsLive -= nnn;
- break;
+ struct group_data *hufGroup;
+ int dbufCount,nextSym,dbufSize,groupCount,*base,*limit,selector,
+ i,j,k,t,runPos,symCount,symTotal,nSelectors,byteCount[256];
+ unsigned char uc, symToByte[256], mtfSymbol[256], *selectors;
+ unsigned int *dbuf,origPtr;
+
+ dbuf=bd->dbuf;
+ dbufSize=bd->dbufSize;
+ selectors=bd->selectors;
+
+ /* Reset longjmp I/O error handling */
+
+ i=setjmp(bd->jmpbuf);
+ if (i) return i;
+
+ /* Read in header signature and CRC, then validate signature.
+ (last block signature means CRC is for whole file, return now) */
+
+ i = get_bits(bd,24);
+ j = get_bits(bd,24);
+ bd->headerCRC=get_bits(bd,32);
+ if ((i == 0x177245) && (j == 0x385090)) return RETVAL_LAST_BLOCK;
+ if ((i != 0x314159) || (j != 0x265359)) return RETVAL_NOT_BZIP_DATA;
+
+ /* We can add support for blockRandomised if anybody complains. There was
+ some code for this in busybox 1.0.0-pre3, but nobody ever noticed that
+ it didn't actually work. */
+
+ if (get_bits(bd,1)) return RETVAL_OBSOLETE_INPUT;
+ if ((origPtr=get_bits(bd,24)) > dbufSize) return RETVAL_DATA_ERROR;
+
+ /* mapping table: if some byte values are never used (encoding things
+ like ascii text), the compression code removes the gaps to have fewer
+ symbols to deal with, and writes a sparse bitfield indicating which
+ values were present. We make a translation table to convert the symbols
+ back to the corresponding bytes. */
+
+ t=get_bits(bd, 16);
+ symTotal=0;
+ for (i=0;i<16;i++) {
+ if(t&(1<<(15-i))) {
+ k=get_bits(bd,16);
+ for (j=0;j<16;j++)
+ if(k&(1<<(15-j))) symToByte[symTotal++]=(16*i)+j;
}
- if (s->strm->avail_in == 0) {
- return(FALSE);
- }
- s->bsBuff = (s->bsBuff << 8) | ((unsigned int) (*((unsigned char*)(s->strm->next_in))));
- s->bsLive += 8;
- s->strm->next_in++;
- s->strm->avail_in--;
}
- return(TRUE);
-}
-static int bz_get_fast(DState *s)
-{
- int cccc;
- s->tPos = s->tt[s->tPos];
- cccc = (unsigned char)(s->tPos & 0xff);
- s->tPos >>= 8;
- return(cccc);
-}
+ /* How many different Huffman coding groups does this block use? */
-/*---------------------------------------------------*/
-static inline int BZ2_decompress(DState *s)
-{
- int uc = 0;
- int retVal;
- int minLen, maxLen;
-
- /* stuff that needs to be saved/restored */
- int i;
- int j;
- int t;
- int alphaSize;
- int nGroups;
- int nSelectors;
- int EOB;
- int groupNo;
- int groupPos;
- int nextSym;
- int nblockMAX;
- int nblock;
- int es;
- int N;
- int curr;
- int zt;
- int zn;
- int zvec;
- int zj;
- int gSel;
- int gMinlen;
- int *gLimit;
- int *gBase;
- int *gPerm;
- int switch_val;
-
- int get_mtf_val_init(void)
- {
- if (groupPos == 0) {
- groupNo++;
- if (groupNo >= nSelectors) {
- retVal = BZ_DATA_ERROR;
- return(FALSE);
- }
- groupPos = BZ_G_SIZE;
- gSel = s->selector[groupNo];
- gMinlen = s->minLens[gSel];
- gLimit = &(s->limit[gSel][0]);
- gPerm = &(s->perm[gSel][0]);
- gBase = &(s->base[gSel][0]);
- }
- groupPos--;
- zn = gMinlen;
- return(TRUE);
- }
+ groupCount=get_bits(bd,3);
+ if (groupCount<2 || groupCount>MAX_GROUPS) return RETVAL_DATA_ERROR;
- if (s->state == BZ_X_MAGIC_1) {
- /*initialise the save area*/
- s->save_i = 0;
- s->save_j = 0;
- s->save_t = 0;
- s->save_alphaSize = 0;
- s->save_nGroups = 0;
- s->save_nSelectors = 0;
- s->save_EOB = 0;
- s->save_groupNo = 0;
- s->save_groupPos = 0;
- s->save_nextSym = 0;
- s->save_nblockMAX = 0;
- s->save_nblock = 0;
- s->save_es = 0;
- s->save_N = 0;
- s->save_curr = 0;
- s->save_zt = 0;
- s->save_zn = 0;
- s->save_zvec = 0;
- s->save_zj = 0;
- s->save_gSel = 0;
- s->save_gMinlen = 0;
- s->save_gLimit = NULL;
- s->save_gBase = NULL;
- s->save_gPerm = NULL;
- }
+ /* nSelectors: Every GROUP_SIZE many symbols we select a new Huffman coding
+ group. Read in the group selector list, which is stored as MTF encoded
+ bit runs. (MTF=Move To Front, as each value is used it's moved to the
+ start of the list.) */
- /*restore from the save area*/
- i = s->save_i;
- j = s->save_j;
- t = s->save_t;
- alphaSize = s->save_alphaSize;
- nGroups = s->save_nGroups;
- nSelectors = s->save_nSelectors;
- EOB = s->save_EOB;
- groupNo = s->save_groupNo;
- groupPos = s->save_groupPos;
- nextSym = s->save_nextSym;
- nblockMAX = s->save_nblockMAX;
- nblock = s->save_nblock;
- es = s->save_es;
- N = s->save_N;
- curr = s->save_curr;
- zt = s->save_zt;
- zn = s->save_zn;
- zvec = s->save_zvec;
- zj = s->save_zj;
- gSel = s->save_gSel;
- gMinlen = s->save_gMinlen;
- gLimit = s->save_gLimit;
- gBase = s->save_gBase;
- gPerm = s->save_gPerm;
-
- retVal = BZ_OK;
- switch_val = s->state;
- switch (switch_val) {
- case BZ_X_MAGIC_1:
- s->state = BZ_X_MAGIC_1;
- if (! get_bits(s, &uc, 8)) {
- retVal = BZ_OK;
- goto save_state_and_return;
- }
- if (uc != 'B') {
- retVal = BZ_DATA_ERROR_MAGIC;
- goto save_state_and_return;
- }
+ if(!(nSelectors=get_bits(bd, 15))) return RETVAL_DATA_ERROR;
+ for (i=0; i<groupCount; i++) mtfSymbol[i] = i;
+ for (i=0; i<nSelectors; i++) {
- case BZ_X_MAGIC_2:
- s->state = BZ_X_MAGIC_2;
- if (! get_bits(s, &uc, 8)) {
- retVal = BZ_OK;
- goto save_state_and_return;
- }
- if (uc != 'Z') {
- retVal = BZ_DATA_ERROR_MAGIC;
- goto save_state_and_return;
- }
+ /* Get next value */
- case BZ_X_MAGIC_3:
- s->state = BZ_X_MAGIC_3;
- if (! get_bits(s, &uc, 8)) {
- retVal = BZ_OK;
- goto save_state_and_return;
- }
- if (uc != 'h') {
- retVal = BZ_DATA_ERROR_MAGIC;
- goto save_state_and_return;
- }
+ for (j=0;get_bits(bd,1);j++) if (j>=groupCount) return RETVAL_DATA_ERROR;
- case BZ_X_MAGIC_4:
- s->state = BZ_X_MAGIC_4;
- if (! get_bits(s, &s->blockSize100k, 8)) {
- retVal = BZ_OK;
- goto save_state_and_return;
- }
- if ((s->blockSize100k < '1') || (s->blockSize100k > '9')) {
- retVal = BZ_DATA_ERROR_MAGIC;
- goto save_state_and_return;
- }
- s->blockSize100k -= '0';
+ /* Decode MTF to get the next selector */
- s->tt = xmalloc(s->blockSize100k * 100000 * sizeof(int));
+ uc = mtfSymbol[j];
+ for (;j;j--) mtfSymbol[j] = mtfSymbol[j-1];
+ mtfSymbol[0]=selectors[i]=uc;
+ }
- case BZ_X_BLKHDR_1:
- s->state = BZ_X_BLKHDR_1;
- if (! get_bits(s, &uc, 8)) {
- retVal = BZ_OK;
- goto save_state_and_return;
- }
+ /* Read the Huffman coding tables for each group, which code for symTotal
+ literal symbols, plus two run symbols (RUNA, RUNB) */
- if (uc == 0x17) {
- goto endhdr_2;
- }
- if (uc != 0x31) {
- retVal = BZ_DATA_ERROR;
- goto save_state_and_return;
- }
+ symCount=symTotal+2;
+ for (j=0; j<groupCount; j++) {
+ unsigned char length[MAX_SYMBOLS],temp[MAX_HUFCODE_BITS+1];
+ int minLen, maxLen, pp;
- case BZ_X_BLKHDR_2:
- s->state = BZ_X_BLKHDR_2;
- if (! get_bits(s, &uc, 8)) {
- retVal = BZ_OK;
- goto save_state_and_return;
- }
- if (uc != 0x41) {
- retVal = BZ_DATA_ERROR;
- goto save_state_and_return;
- }
+ /* Read Huffman code lengths for each symbol. They're stored in
+ a way similar to mtf; record a starting value for the first symbol,
+ and an offset from the previous value for everys symbol after that.
+ (Subtracting 1 before the loop and then adding it back at the end is
+ an optimization that makes the test inside the loop simpler: symbol
+ length 0 becomes negative, so an unsigned inequality catches it.) */
- case BZ_X_BLKHDR_3:
- s->state = BZ_X_BLKHDR_3;
- if (! get_bits(s, &uc, 8)) {
- retVal = BZ_OK;
- goto save_state_and_return;
- }
- if (uc != 0x59) {
- retVal = BZ_DATA_ERROR;
- goto save_state_and_return;
- }
+ t=get_bits(bd, 5)-1;
+ for (i = 0; i < symCount; i++) {
+ for (;;) {
+ if (((unsigned)t) > (MAX_HUFCODE_BITS-1))
+ return RETVAL_DATA_ERROR;
- case BZ_X_BLKHDR_4:
- s->state = BZ_X_BLKHDR_4;
- if (! get_bits(s, &uc, 8)) {
- retVal = BZ_OK;
- goto save_state_and_return;
- }
- if (uc != 0x26) {
- retVal = BZ_DATA_ERROR;
- goto save_state_and_return;
- }
+ /* If first bit is 0, stop. Else second bit indicates whether
+ to increment or decrement the value. Optimization: grab 2
+ bits and unget the second if the first was 0. */
- case BZ_X_BLKHDR_5:
- s->state = BZ_X_BLKHDR_5;
- if (! get_bits(s, &uc, 8)) {
- retVal = BZ_OK;
- goto save_state_and_return;
- }
- if (uc != 0x53) {
- retVal = BZ_DATA_ERROR;
- goto save_state_and_return;
- }
+ k = get_bits(bd,2);
+ if (k < 2) {
+ bd->inbufBitCount++;
+ break;
+ }
- case BZ_X_BLKHDR_6:
- s->state = BZ_X_BLKHDR_6;
- if (! get_bits(s, &uc, 8)) {
- retVal = BZ_OK;
- goto save_state_and_return;
- }
- if (uc != 0x59) {
- retVal = BZ_DATA_ERROR;
- goto save_state_and_return;
+ /* Add one if second bit 1, else subtract 1. Avoids if/else */
+
+ t+=(((k+1)&2)-1);
}
- s->currBlockNo++;
- s->storedBlockCRC = 0;
+ /* Correct for the initial -1, to get the final symbol length */
- case BZ_X_BCRC_1:
- s->state = BZ_X_BCRC_1;
- if (! get_bits(s, &uc, 8)) {
- retVal = BZ_OK;
- goto save_state_and_return;
- }
- s->storedBlockCRC = (s->storedBlockCRC << 8) | ((unsigned int)uc);
+ length[i]=t+1;
+ }
- case BZ_X_BCRC_2:
- s->state = BZ_X_BCRC_2;
- if (! get_bits(s, &uc, 8)) {
- retVal = BZ_OK;
- goto save_state_and_return;
- }
- s->storedBlockCRC = (s->storedBlockCRC << 8) | ((unsigned int)uc);
+ /* Find largest and smallest lengths in this group */
- case BZ_X_BCRC_3:
- s->state = BZ_X_BCRC_3;
- if (! get_bits(s, &uc, 8)) {
- retVal = BZ_OK;
- goto save_state_and_return;
- }
- s->storedBlockCRC = (s->storedBlockCRC << 8) | ((unsigned int)uc);
+ minLen=maxLen=length[0];
+ for (i = 1; i < symCount; i++) {
+ if(length[i] > maxLen) maxLen = length[i];
+ else if(length[i] < minLen) minLen = length[i];
+ }
- case BZ_X_BCRC_4:
- s->state = BZ_X_BCRC_4;
- if (! get_bits(s, &uc, 8)) {
- retVal = BZ_OK;
- goto save_state_and_return;
- }
- s->storedBlockCRC = (s->storedBlockCRC << 8) | ((unsigned int)uc);
-
- case BZ_X_RANDBIT:
- s->state = BZ_X_RANDBIT;
- {
- int tmp = s->blockRandomised;
- const int ret = get_bits(s, &tmp, 1);
- s->blockRandomised = tmp;
- if (! ret) {
- retVal = BZ_OK;
- goto save_state_and_return;
- }
- }
+ /* Calculate permute[], base[], and limit[] tables from length[].
+ *
+ * permute[] is the lookup table for converting Huffman coded symbols
+ * into decoded symbols. base[] is the amount to subtract from the
+ * value of a Huffman symbol of a given length when using permute[].
+ *
+ * limit[] indicates the largest numerical value a symbol with a given
+ * number of bits can have. This is how the Huffman codes can vary in
+ * length: each code with a value>limit[length] needs another bit.
+ */
+
+ hufGroup=bd->groups+j;
+ hufGroup->minLen = minLen;
+ hufGroup->maxLen = maxLen;
+
+ /* Note that minLen can't be smaller than 1, so we adjust the base
+ and limit array pointers so we're not always wasting the first
+ entry. We do this again when using them (during symbol decoding).*/
+
+ base=hufGroup->base-1;
+ limit=hufGroup->limit-1;
+
+ /* Calculate permute[]. Concurently, initialize temp[] and limit[]. */
+
+ pp=0;
+ for (i=minLen;i<=maxLen;i++) {
+ temp[i]=limit[i]=0;
+ for (t=0;t<symCount;t++)
+ if(length[t]==i) hufGroup->permute[pp++] = t;
+ }
- s->origPtr = 0;
+ /* Count symbols coded for at each bit length */
- case BZ_X_ORIGPTR_1:
- s->state = BZ_X_ORIGPTR_1;
- if (! get_bits(s, &uc, 8)) {
- retVal = BZ_OK;
- goto save_state_and_return;
- }
- s->origPtr = (s->origPtr << 8) | ((int)uc);
+ for (i=0;i<symCount;i++) temp[length[i]]++;
- case BZ_X_ORIGPTR_2:
- s->state = BZ_X_ORIGPTR_2;
- if (! get_bits(s, &uc, 8)) {
- retVal = BZ_OK;
- goto save_state_and_return;
- }
- s->origPtr = (s->origPtr << 8) | ((int)uc);
+ /* Calculate limit[] (the largest symbol-coding value at each bit
+ * length, which is (previous limit<<1)+symbols at this level), and
+ * base[] (number of symbols to ignore at each bit length, which is
+ * limit minus the cumulative count of symbols coded for already). */
- case BZ_X_ORIGPTR_3:
- s->state = BZ_X_ORIGPTR_3;
- if (! get_bits(s, &uc, 8)) {
- retVal = BZ_OK;
- goto save_state_and_return;
- }
- s->origPtr = (s->origPtr << 8) | ((int)uc);
+ pp=t=0;
+ for (i=minLen; i<maxLen; i++) {
+ pp+=temp[i];
- if (s->origPtr < 0) {
- retVal = BZ_DATA_ERROR;
- goto save_state_and_return;
- }
- if (s->origPtr > 10 + 100000*s->blockSize100k) {
- retVal = BZ_DATA_ERROR;
- goto save_state_and_return;
- }
+ /* We read the largest possible symbol size and then unget bits
+ after determining how many we need, and those extra bits could
+ be set to anything. (They're noise from future symbols.) At
+ each level we're really only interested in the first few bits,
+ so here we set all the trailing to-be-ignored bits to 1 so they
+ don't affect the value>limit[length] comparison. */
- /*--- Receive the mapping table ---*/
- case BZ_X_MAPPING_1:
- for (i = 0; i < 16; i++) {
- s->state = BZ_X_MAPPING_1;
- if (! get_bits(s, &uc, 1)) {
- retVal = BZ_OK;
- goto save_state_and_return;
- }
- if (uc == 1) {
- s->inUse16[i] = TRUE;
- } else {
- s->inUse16[i] = FALSE;
- }
- }
+ limit[i]= (pp << (maxLen - i)) - 1;
+ pp<<=1;
+ base[i+1]=pp-(t+=temp[i]);
+ }
+ limit[maxLen+1] = INT_MAX; /* Sentinal value for reading next sym. */
+ limit[maxLen]=pp+temp[maxLen]-1;
+ base[minLen]=0;
+ }
- for (i = 0; i < 256; i++) {
- s->inUse[i] = FALSE;
- }
+ /* We've finished reading and digesting the block header. Now read this
+ block's Huffman coded symbols from the file and undo the Huffman coding
+ and run length encoding, saving the result into dbuf[dbufCount++]=uc */
- for (i = 0; i < 16; i++) {
- if (s->inUse16[i]) {
- for (j = 0; j < 16; j++) {
- case BZ_X_MAPPING_2:
- s->state = BZ_X_MAPPING_2;
- if (! get_bits(s, &uc, 1)) {
- retVal = BZ_OK;
- goto save_state_and_return;
- }
- if (uc == 1) {
- s->inUse[i * 16 + j] = TRUE;
- }
- }
- }
- }
+ /* Initialize symbol occurrence counters and symbol Move To Front table */
- s->nInUse = 0;
- for (i = 0; i < 256; i++) {
- if (s->inUse[i]) {
- s->seqToUnseq[s->nInUse] = i;
- s->nInUse++;
- }
- }
- if (s->nInUse == 0) {
- retVal = BZ_DATA_ERROR;
- goto save_state_and_return;
- }
- alphaSize = s->nInUse+2;
-
- /*--- Now the selectors ---*/
- case BZ_X_SELECTOR_1:
- s->state = BZ_X_SELECTOR_1;
- if (! get_bits(s, &nGroups, 3)) {
- retVal = BZ_OK;
- goto save_state_and_return;
- }
- if (nGroups < 2 || nGroups > 6) {
- retVal = BZ_DATA_ERROR;
- goto save_state_and_return;
- }
+ for (i=0;i<256;i++) {
+ byteCount[i] = 0;
+ mtfSymbol[i]=(unsigned char)i;
+ }
- case BZ_X_SELECTOR_2:
- s->state = BZ_X_SELECTOR_2;
- if (! get_bits(s, &nSelectors, 15)) {
- retVal = BZ_OK;
- goto save_state_and_return;
- }
- if (nSelectors < 1) {
- retVal = BZ_DATA_ERROR;
- goto save_state_and_return;
- }
+ /* Loop through compressed symbols. */
+ runPos=dbufCount=selector=0;
+ for (;;) {
+ /* fetch next Huffman coding group from list. */
- for (i = 0; i < nSelectors; i++) {
- j = 0;
- while (1) {
- case BZ_X_SELECTOR_3:
- s->state = BZ_X_SELECTOR_3;
- if (! get_bits(s, &uc, 1)) {
- retVal = BZ_OK;
- goto save_state_and_return;
- }
- if (uc == 0) {
- break;
- }
- j++;
- if (j >= nGroups) {
- retVal = BZ_DATA_ERROR;
- goto save_state_and_return;
- }
- }
- s->selectorMtf[i] = j;
- }
+ symCount=GROUP_SIZE-1;
+ if(selector>=nSelectors) return RETVAL_DATA_ERROR;
+ hufGroup=bd->groups+selectors[selector++];
+ base=hufGroup->base-1;
+ limit=hufGroup->limit-1;
+continue_this_group:
- /*--- Undo the MTF values for the selectors. ---*/
- {
- unsigned char pos[BZ_N_GROUPS], tmp, v;
- for (v = 0; v < nGroups; v++) {
- pos[v] = v;
- }
- for (i = 0; i < nSelectors; i++) {
- v = s->selectorMtf[i];
- tmp = pos[v];
- while (v > 0) {
- pos[v] = pos[v-1];
- v--;
- }
- pos[0] = tmp;
- s->selector[i] = tmp;
- }
- }
+ /* Read next Huffman-coded symbol. */
- /*--- Now the coding tables ---*/
- for (t = 0; t < nGroups; t++) {
- case BZ_X_CODING_1:
- s->state = BZ_X_CODING_1;
- if (! get_bits(s, &curr, 5)) {
- retVal = BZ_OK;
- goto save_state_and_return;
- }
- for (i = 0; i < alphaSize; i++) {
- while (TRUE) {
- if (curr < 1 || curr > 20) {
- retVal = BZ_DATA_ERROR;
- goto save_state_and_return;
- }
-
- case BZ_X_CODING_2:
- s->state = BZ_X_CODING_2;
- if (! get_bits(s, &uc, 1)) {
- retVal = BZ_OK;
- goto save_state_and_return;
- }
- if (uc == 0) {
- break;
- }
-
- case BZ_X_CODING_3:
- s->state = BZ_X_CODING_3;
- if (! get_bits(s, &uc, 1)) {
- retVal = BZ_OK;
- goto save_state_and_return;
- }
- if (uc == 0) {
- curr++;
- } else {
- curr--;
- }
- }
- s->len[t][i] = curr;
- }
- }
+ /* Note: It is far cheaper to read maxLen bits and back up than it is
+ to read minLen bits and then an additional bit at a time, testing
+ as we go. Because there is a trailing last block (with file CRC),
+ there is no danger of the overread causing an unexpected EOF for a
+ valid compressed file. As a further optimization, we do the read
+ inline (falling back to a call to get_bits if the buffer runs
+ dry). The following (up to got_huff_bits:) is equivalent to
+ j=get_bits(bd,hufGroup->maxLen);
+ */
- /*--- Create the Huffman decoding tables ---*/
- for (t = 0; t < nGroups; t++) {
- minLen = 32;
- maxLen = 0;
- for (i = 0; i < alphaSize; i++) {
- if (s->len[t][i] > maxLen) {
- maxLen = s->len[t][i];
- }
- if (s->len[t][i] < minLen) {
- minLen = s->len[t][i];
- }
+ while (bd->inbufBitCount<hufGroup->maxLen) {
+ if(bd->inbufPos==bd->inbufCount) {
+ j = get_bits(bd,hufGroup->maxLen);
+ goto got_huff_bits;
}
+ bd->inbufBits=(bd->inbufBits<<8)|bd->inbuf[bd->inbufPos++];
+ bd->inbufBitCount+=8;
+ };
+ bd->inbufBitCount-=hufGroup->maxLen;
+ j = (bd->inbufBits>>bd->inbufBitCount)&((1<<hufGroup->maxLen)-1);
- BZ2_hbCreateDecodeTables (
- &(s->limit[t][0]),
- &(s->base[t][0]),
- &(s->perm[t][0]),
- &(s->len[t][0]),
- minLen, maxLen, alphaSize
- );
+got_huff_bits:
+ /* Figure how how many bits are in next symbol and unget extras */
- s->minLens[t] = minLen;
- }
+ i=hufGroup->minLen;
+ while (j>limit[i]) ++i;
+ bd->inbufBitCount += (hufGroup->maxLen - i);
- /*--- Now the MTF values ---*/
+ /* Huffman decode value to get nextSym (with bounds checking) */
- EOB = s->nInUse+1;
- nblockMAX = 100000 * s->blockSize100k;
- groupNo = -1;
- groupPos = 0;
+ if ((i > hufGroup->maxLen)
+ || (((unsigned)(j=(j>>(hufGroup->maxLen-i))-base[i]))
+ >= MAX_SYMBOLS))
+ return RETVAL_DATA_ERROR;
+ nextSym = hufGroup->permute[j];
- for (i = 0; i <= 255; i++) {
- s->unzftab[i] = 0;
- }
- /*-- MTF init --*/
- {
- int ii, jj, kk;
- kk = MTFA_SIZE-1;
- for (ii = 256 / MTFL_SIZE - 1; ii >= 0; ii--) {
- for (jj = MTFL_SIZE-1; jj >= 0; jj--) {
- s->mtfa[kk] = (unsigned char)(ii * MTFL_SIZE + jj);
- kk--;
- }
- s->mtfbase[ii] = kk + 1;
- }
- }
- /*-- end MTF init --*/
+ /* We have now decoded the symbol, which indicates either a new literal
+ byte, or a repeated run of the most recent literal byte. First,
+ check if nextSym indicates a repeated run, and if so loop collecting
+ how many times to repeat the last literal. */
- nblock = 0;
+ if (((unsigned)nextSym) <= SYMBOL_RUNB) { /* RUNA or RUNB */
- if (! get_mtf_val_init()) {
- goto save_state_and_return;
- }
- case BZ_X_MTF_1:
- s->state = BZ_X_MTF_1;
- if (! get_bits(s, &zvec, zn)) {
- retVal = BZ_OK;
- goto save_state_and_return;
- }
- while (1) {
- if (zn > 20 /* the longest code */) {
- retVal = BZ_DATA_ERROR;
- goto save_state_and_return;
- }
- if (zvec <= gLimit[zn]) {
- break;
- }
- zn++;
-
- case BZ_X_MTF_2:
- s->state = BZ_X_MTF_2;
- if (! get_bits(s, &zj, 1)) {
- retVal = BZ_OK;
- goto save_state_and_return;
- }
- zvec = (zvec << 1) | zj;
- }
- if (zvec - gBase[zn] < 0 || zvec - gBase[zn] >= BZ_MAX_ALPHA_SIZE) {
- retVal = BZ_DATA_ERROR;
- goto save_state_and_return;
- }
- nextSym = gPerm[zvec - gBase[zn]];
+ /* If this is the start of a new run, zero out counter */
- while (1) {
- if (nextSym == EOB) {
- break;
+ if(!runPos) {
+ runPos = 1;
+ t = 0;
}
- if (nextSym == BZ_RUNA || nextSym == BZ_RUNB) {
- es = -1;
- N = 1;
- do {
- if (nextSym == BZ_RUNA) {
- es = es + (0+1) * N;
- } else {
- if (nextSym == BZ_RUNB) {
- es = es + (1+1) * N;
- }
- }
- N = N * 2;
- if (! get_mtf_val_init()) {
- goto save_state_and_return;
- }
- case BZ_X_MTF_3:
- s->state = BZ_X_MTF_3;
- if (! get_bits(s, &zvec, zn)) {
- retVal = BZ_OK;
- goto save_state_and_return;
- }
- while (1) {
- if (zn > 20 /* the longest code */) {
- retVal = BZ_DATA_ERROR;
- goto save_state_and_return;
- }
- if (zvec <= gLimit[zn]) {
- break;
- }
- zn++;
-
- case BZ_X_MTF_4:
- s->state = BZ_X_MTF_4;
- if (! get_bits(s, &zj, 1)) {
- retVal = BZ_OK;
- goto save_state_and_return;
- }
- zvec = (zvec << 1) | zj;
- }
- if (zvec - gBase[zn] < 0 || zvec - gBase[zn] >= BZ_MAX_ALPHA_SIZE) {
- retVal = BZ_DATA_ERROR;
- goto save_state_and_return;
-
- }
- nextSym = gPerm[zvec - gBase[zn]];
- }
- while (nextSym == BZ_RUNA || nextSym == BZ_RUNB);
+ /* Neat trick that saves 1 symbol: instead of or-ing 0 or 1 at
+ each bit position, add 1 or 2 instead. For example,
+ 1011 is 1<<0 + 1<<1 + 2<<2. 1010 is 2<<0 + 2<<1 + 1<<2.
+ You can make any bit pattern that way using 1 less symbol than
+ the basic or 0/1 method (except all bits 0, which would use no
+ symbols, but a run of length 0 doesn't mean anything in this
+ context). Thus space is saved. */
- es++;
- uc = s->seqToUnseq[ s->mtfa[s->mtfbase[0]] ];
- s->unzftab[uc] += es;
+ t += (runPos << nextSym); /* +runPos if RUNA; +2*runPos if RUNB */
+ if(runPos < dbufSize) runPos <<= 1;
+ goto end_of_huffman_loop;
+ }
- while (es > 0) {
- if (nblock >= nblockMAX) {
- retVal = BZ_DATA_ERROR;
- goto save_state_and_return;
- }
- s->tt[nblock] = (unsigned int)uc;
- nblock++;
- es--;
- }
- continue;
- } else {
- if (nblock >= nblockMAX) {
- retVal = BZ_DATA_ERROR;
- goto save_state_and_return;
- }
- /*-- uc = MTF ( nextSym-1 ) --*/
- {
- int ii, jj, kk, pp, lno, off;
- unsigned int nn;
- nn = (unsigned int)(nextSym - 1);
-
- if (nn < MTFL_SIZE) {
- /* avoid general-case expense */
- pp = s->mtfbase[0];
- uc = s->mtfa[pp+nn];
- while (nn > 3) {
- int z = pp+nn;
- s->mtfa[(z) ] = s->mtfa[(z)-1];
- s->mtfa[(z)-1] = s->mtfa[(z)-2];
- s->mtfa[(z)-2] = s->mtfa[(z)-3];
- s->mtfa[(z)-3] = s->mtfa[(z)-4];
- nn -= 4;
- }
- while (nn > 0) {
- s->mtfa[(pp+nn)] = s->mtfa[(pp+nn)-1]; nn--;
- }
- s->mtfa[pp] = uc;
- } else {
- /* general case */
- lno = nn / MTFL_SIZE;
- off = nn % MTFL_SIZE;
- pp = s->mtfbase[lno] + off;
- uc = s->mtfa[pp];
- while (pp > s->mtfbase[lno]) {
- s->mtfa[pp] = s->mtfa[pp-1];
- pp--;
- }
- s->mtfbase[lno]++;
- while (lno > 0) {
- s->mtfbase[lno]--;
- s->mtfa[s->mtfbase[lno]] = s->mtfa[s->mtfbase[lno-1] + MTFL_SIZE - 1];
- lno--;
- }
- s->mtfbase[0]--;
- s->mtfa[s->mtfbase[0]] = uc;
- if (s->mtfbase[0] == 0) {
- kk = MTFA_SIZE-1;
- for (ii = 256 / MTFL_SIZE-1; ii >= 0; ii--) {
- for (jj = MTFL_SIZE-1; jj >= 0; jj--) {
- s->mtfa[kk] = s->mtfa[s->mtfbase[ii] + jj];
- kk--;
- }
- s->mtfbase[ii] = kk + 1;
- }
- }
- }
- }
- /*-- end uc = MTF ( nextSym-1 ) --*/
+ /* When we hit the first non-run symbol after a run, we now know
+ how many times to repeat the last literal, so append that many
+ copies to our buffer of decoded symbols (dbuf) now. (The last
+ literal used is the one at the head of the mtfSymbol array.) */
- s->unzftab[s->seqToUnseq[uc]]++;
- s->tt[nblock] = (unsigned int)(s->seqToUnseq[uc]);
- nblock++;
+ if(runPos) {
+ runPos=0;
+ if(dbufCount+t>=dbufSize) return RETVAL_DATA_ERROR;
- if (! get_mtf_val_init()) {
- goto save_state_and_return;
- }
- case BZ_X_MTF_5:
- s->state = BZ_X_MTF_5;
- if (! get_bits(s, &zvec, zn)) {
- retVal = BZ_OK;
- goto save_state_and_return;
- }
- while (1) {
- if (zn > 20 /* the longest code */) {
- retVal = BZ_DATA_ERROR;
- goto save_state_and_return;
- }
- if (zvec <= gLimit[zn]) {
- break;
- }
- zn++;
-
- case BZ_X_MTF_6:
- s->state = BZ_X_MTF_6;
- if (! get_bits(s, &zj, 1)) {
- retVal = BZ_OK;
- goto save_state_and_return;
- }
- zvec = (zvec << 1) | zj;
- }
- if (zvec - gBase[zn] < 0 || zvec - gBase[zn] >= BZ_MAX_ALPHA_SIZE) {
- retVal = BZ_DATA_ERROR;
- goto save_state_and_return;
- }
- nextSym = gPerm[zvec - gBase[zn]];
- continue;
+ uc = symToByte[mtfSymbol[0]];
+ byteCount[uc] += t;
+ while (t--) dbuf[dbufCount++]=uc;
}
- }
- /* Now we know what nblock is, we can do a better sanity
- check on s->origPtr.
- */
- if (s->origPtr < 0 || s->origPtr >= nblock) {
- retVal = BZ_DATA_ERROR;
- goto save_state_and_return;
- }
- s->state_out_len = 0;
- s->state_out_ch = 0;
- s->calculatedBlockCRC = 0xffffffffL;
- s->state = BZ_X_OUTPUT;
-
- /*-- Set up cftab to facilitate generation of T^(-1) --*/
- s->cftab[0] = 0;
- for (i = 1; i <= 256; i++) {
- s->cftab[i] = s->unzftab[i-1];
- }
- for (i = 1; i <= 256; i++) {
- s->cftab[i] += s->cftab[i-1];
- }
+ /* Is this the terminating symbol? */
- /*-- compute the T^(-1) vector --*/
- for (i = 0; i < nblock; i++) {
- uc = (unsigned char)(s->tt[i] & 0xff);
- s->tt[s->cftab[uc]] |= (i << 8);
- s->cftab[uc]++;
- }
+ if(nextSym>symTotal) break;
- s->tPos = s->tt[s->origPtr] >> 8;
- s->nblock_used = 0;
- if (s->blockRandomised) {
- s->rNToGo = 0;
- s->rTPos = 0;
- s->k0 = bz_get_fast(s);
+ /* At this point, nextSym indicates a new literal character. Subtract
+ one to get the position in the MTF array at which this literal is
+ currently to be found. (Note that the result can't be -1 or 0,
+ because 0 and 1 are RUNA and RUNB. But another instance of the
+ first symbol in the mtf array, position 0, would have been handled
+ as part of a run above. Therefore 1 unused mtf position minus
+ 2 non-literal nextSym values equals -1.) */
- s->nblock_used++;
- bz_rand_udp_mask(s);
- s->k0 ^= ((s->rNToGo == 1) ? 1 : 0);
- } else {
- s->k0 = bz_get_fast(s);
- s->nblock_used++;
- }
+ if(dbufCount>=dbufSize) return RETVAL_DATA_ERROR;
+ i = nextSym - 1;
+ uc = mtfSymbol[i];
- retVal = BZ_OK;
- goto save_state_and_return;
+ /* Adjust the MTF array. Since we typically expect to move only a
+ * small number of symbols, and are bound by 256 in any case, using
+ * memmove here would typically be bigger and slower due to function
+ * call overhead and other assorted setup costs. */
-endhdr_2:
- case BZ_X_ENDHDR_2:
- s->state = BZ_X_ENDHDR_2;
- if (! get_bits(s, &uc, 8)) {
- retVal = BZ_OK;
- goto save_state_and_return;
- }
- if (uc != 0x72) {
- retVal = BZ_DATA_ERROR;
- goto save_state_and_return;
- }
+ do {
+ mtfSymbol[i] = mtfSymbol[i-1];
+ } while (--i);
+ mtfSymbol[0] = uc;
+ uc=symToByte[uc];
- case BZ_X_ENDHDR_3:
- s->state = BZ_X_ENDHDR_3;
- if (! get_bits(s, &uc, 8)) {
- retVal = BZ_OK;
- goto save_state_and_return;
- }
- if (uc != 0x45) {
- retVal = BZ_DATA_ERROR;
- goto save_state_and_return;
- }
+ /* We have our literal byte. Save it into dbuf. */
- case BZ_X_ENDHDR_4:
- s->state = BZ_X_ENDHDR_4;
- if (! get_bits(s, &uc, 8)) {
- retVal = BZ_OK;
- goto save_state_and_return;
- }
- if (uc != 0x38) {
- retVal = BZ_DATA_ERROR;
- goto save_state_and_return;
- }
+ byteCount[uc]++;
+ dbuf[dbufCount++] = (unsigned int)uc;
- case BZ_X_ENDHDR_5:
- s->state = BZ_X_ENDHDR_5;
- if (! get_bits(s, &uc, 8)) {
- retVal = BZ_OK;
- goto save_state_and_return;
- }
- if (uc != 0x50) {
- retVal = BZ_DATA_ERROR;
- goto save_state_and_return;
- }
+ /* Skip group initialization if we're not done with this group. Done
+ * this way to avoid compiler warning. */
- case BZ_X_ENDHDR_6:
- s->state = BZ_X_ENDHDR_6;
- if (! get_bits(s, &uc, 8)) {
- retVal = BZ_OK;
- goto save_state_and_return;
- }
- if (uc != 0x90) {
- retVal = BZ_DATA_ERROR;
- goto save_state_and_return;
- }
- s->storedCombinedCRC = 0;
+end_of_huffman_loop:
+ if(symCount--) goto continue_this_group;
+ }
- case BZ_X_CCRC_1:
- s->state = BZ_X_CCRC_1;
- if (! get_bits(s, &uc, 8)) {
- retVal = BZ_OK;
- goto save_state_and_return;
- }
- s->storedCombinedCRC = (s->storedCombinedCRC << 8) | ((unsigned int)uc);
- case BZ_X_CCRC_2:
- s->state = BZ_X_CCRC_2;
- if (! get_bits(s, &uc, 8)) {
- retVal = BZ_OK;
- goto save_state_and_return;
- }
- s->storedCombinedCRC = (s->storedCombinedCRC << 8) | ((unsigned int)uc);
+ /* At this point, we've read all the Huffman-coded symbols (and repeated
+ runs) for this block from the input stream, and decoded them into the
+ intermediate buffer. There are dbufCount many decoded bytes in dbuf[].
+ Now undo the Burrows-Wheeler transform on dbuf.
+ See http://dogma.net/markn/articles/bwt/bwt.htm
+ */
- case BZ_X_CCRC_3:
- s->state = BZ_X_CCRC_3;
- if (! get_bits(s, &uc, 8)) {
- retVal = BZ_OK;
- goto save_state_and_return;
- }
- s->storedCombinedCRC = (s->storedCombinedCRC << 8) | ((unsigned int)uc);
+ /* Turn byteCount into cumulative occurrence counts of 0 to n-1. */
- case BZ_X_CCRC_4:
- s->state = BZ_X_CCRC_4;
- if (! get_bits(s, &uc, 8)) {
- retVal = BZ_OK;
- goto save_state_and_return;
- }
- s->storedCombinedCRC = (s->storedCombinedCRC << 8) | ((unsigned int)uc);
+ j=0;
+ for (i=0;i<256;i++) {
+ k=j+byteCount[i];
+ byteCount[i] = j;
+ j=k;
+ }
- s->state = BZ_X_IDLE;
- retVal = BZ_STREAM_END;
- goto save_state_and_return;
+ /* Figure out what order dbuf would be in if we sorted it. */
+ for (i=0;i<dbufCount;i++) {
+ uc=(unsigned char)(dbuf[i] & 0xff);
+ dbuf[byteCount[uc]] |= (i << 8);
+ byteCount[uc]++;
}
-save_state_and_return:
- s->save_i = i;
- s->save_j = j;
- s->save_t = t;
- s->save_alphaSize = alphaSize;
- s->save_nGroups = nGroups;
- s->save_nSelectors = nSelectors;
- s->save_EOB = EOB;
- s->save_groupNo = groupNo;
- s->save_groupPos = groupPos;
- s->save_nextSym = nextSym;
- s->save_nblockMAX = nblockMAX;
- s->save_nblock = nblock;
- s->save_es = es;
- s->save_N = N;
- s->save_curr = curr;
- s->save_zt = zt;
- s->save_zn = zn;
- s->save_zvec = zvec;
- s->save_zj = zj;
- s->save_gSel = gSel;
- s->save_gMinlen = gMinlen;
- s->save_gLimit = gLimit;
- s->save_gBase = gBase;
- s->save_gPerm = gPerm;
-
- return retVal;
-}
-
-//int BZ2_bzDecompressInit(bz_stream* strm, int verbosity_level, int small)
-static inline int BZ2_bzDecompressInit(bz_stream* strm)
-{
- DState* s;
-
-// if (verbosity_level < 0 || verbosity_level > 4) {
-// return BZ_PARAM_ERROR;
-// }
- s = xmalloc(sizeof(DState));
- s->strm = strm;
- strm->state = s;
- s->state = BZ_X_MAGIC_1;
- s->bsLive = 0;
- s->bsBuff = 0;
- s->calculatedCombinedCRC = 0;
- s->tt = NULL;
- s->currBlockNo = 0;
-
- return BZ_OK;
-}
+ /* Decode first byte by hand to initialize "previous" byte. Note that it
+ doesn't get output, and if the first three characters are identical
+ it doesn't qualify as a run (hence writeRunCountdown=5). */
-static void bz_seterr(int eee, int *bzerror, bzFile **bzf)
-{
- if (bzerror != NULL) {
- *bzerror = eee;
- }
- if (*bzf != NULL) {
- (*bzf)->lastErr = eee;
+ if(dbufCount) {
+ if(origPtr>=dbufCount) return RETVAL_DATA_ERROR;
+ bd->writePos=dbuf[origPtr];
+ bd->writeCurrent=(unsigned char)(bd->writePos&0xff);
+ bd->writePos>>=8;
+ bd->writeRunCountdown=5;
}
+ bd->writeCount=dbufCount;
+
+ return RETVAL_OK;
}
-static void BZ2_bzReadClose(int *bzerror, void *b)
+/* Undo burrows-wheeler transform on intermediate buffer to produce output.
+ If start_bunzip was initialized with out_fd=-1, then up to len bytes of
+ data are written to outbuf. Return value is number of bytes written or
+ error (all errors are negative numbers). If out_fd!=-1, outbuf and len
+ are ignored, data is written to out_fd and return is RETVAL_OK or error.
+*/
+
+static int read_bunzip(bunzip_data *bd, char *outbuf, int len)
{
- bzFile* bzf = (bzFile*)b;
+ const unsigned int *dbuf;
+ int pos,current,previous,gotcount;
- bz_seterr(BZ_OK, bzerror, &bzf);
+ /* If last read was short due to end of file, return last block now */
+ if(bd->writeCount<0) return bd->writeCount;
- if (bzf->initialisedOk) {
- bz_stream *strm = &(bzf->strm);
- DState *s;
- if (strm == NULL) {
- return;
- }
- s = strm->state;
- if ((s == NULL) || (s->strm != strm)) {
- return;
- }
- free(s->tt);
- free(strm->state);
- strm->state = NULL;
- return;
- }
- free(bzf);
-}
+ gotcount = 0;
+ dbuf=bd->dbuf;
+ pos=bd->writePos;
+ current=bd->writeCurrent;
-static void unRLE_obuf_to_output_FAST(DState *s)
-{
- unsigned char k1;
-
- if (s->blockRandomised) {
- while (1) {
- /* try to finish existing run */
- while (1) {
- if (s->strm->avail_out == 0) {
- return;
- }
- if (s->state_out_len == 0) {
- break;
- }
- *((unsigned char *)(s->strm->next_out)) = s->state_out_ch;
- s->calculatedBlockCRC = (s->calculatedBlockCRC << 8) ^
- BZ2_crc32Table[(s->calculatedBlockCRC >> 24) ^
- ((unsigned char)s->state_out_ch)];
- s->state_out_len--;
- s->strm->next_out++;
- s->strm->avail_out--;
- }
-
- /* can a new run be started? */
- if (s->nblock_used == s->save_nblock+1) {
- return;
- }
- s->state_out_len = 1;
- s->state_out_ch = s->k0;
- k1 = bz_get_fast(s);
- bz_rand_udp_mask(s);
- k1 ^= ((s->rNToGo == 1) ? 1 : 0);
- s->nblock_used++;
- if (s->nblock_used == s->save_nblock+1) {
- continue;
- }
- if (k1 != s->k0) {
- s->k0 = k1;
- continue;
- }
+ /* We will always have pending decoded data to write into the output
+ buffer unless this is the very first call (in which case we haven't
+ Huffman-decoded a block into the intermediate buffer yet). */
- s->state_out_len = 2;
- k1 = bz_get_fast(s);
- bz_rand_udp_mask(s);
- k1 ^= ((s->rNToGo == 1) ? 1 : 0);
- s->nblock_used++;
- if (s->nblock_used == s->save_nblock+1) {
- continue;
- }
- if (k1 != s->k0) {
- s->k0 = k1;
- continue;
- }
- s->state_out_len = 3;
- k1 = bz_get_fast(s);
- bz_rand_udp_mask(s);
- k1 ^= ((s->rNToGo == 1) ? 1 : 0);
- s->nblock_used++;
- if (s->nblock_used == s->save_nblock+1) {
- continue;
- }
- if (k1 != s->k0) {
- s->k0 = k1;
- continue;
- }
+ if (bd->writeCopies) {
- k1 = bz_get_fast(s);
- bz_rand_udp_mask(s);
- k1 ^= ((s->rNToGo == 1) ? 1 : 0);
- s->nblock_used++;
- s->state_out_len = ((int)k1) + 4;
- s->k0 = bz_get_fast(s);
- bz_rand_udp_mask(s);
- s->k0 ^= ((s->rNToGo == 1) ? 1 : 0);
- s->nblock_used++;
- }
- } else {
- /* restore */
- unsigned int c_calculatedBlockCRC = s->calculatedBlockCRC;
- unsigned char c_state_out_ch = s->state_out_ch;
- int c_state_out_len = s->state_out_len;
- int c_nblock_used = s->nblock_used;
- int c_k0 = s->k0;
- unsigned int *c_tt = s->tt;
- unsigned int c_tPos = s->tPos;
- char *cs_next_out = s->strm->next_out;
- unsigned int cs_avail_out = s->strm->avail_out;
- /* end restore */
-
- int s_save_nblockPP = s->save_nblock+1;
-
- while (1) {
- /* try to finish existing run */
- if (c_state_out_len > 0) {
- while (TRUE) {
- if (cs_avail_out == 0) {
- goto return_notr;
- }
- if (c_state_out_len == 1) {
- break;
- }
- *((unsigned char *)(cs_next_out)) = c_state_out_ch;
- c_calculatedBlockCRC = (c_calculatedBlockCRC << 8) ^
- BZ2_crc32Table[(c_calculatedBlockCRC >> 24) ^
- ((unsigned char)c_state_out_ch)];
- c_state_out_len--;
- cs_next_out++;
- cs_avail_out--;
- }
-s_state_out_len_eq_one:
- {
- if (cs_avail_out == 0) {
- c_state_out_len = 1;
- goto return_notr;
- }
- *((unsigned char *)(cs_next_out)) = c_state_out_ch;
- c_calculatedBlockCRC = (c_calculatedBlockCRC << 8) ^
- BZ2_crc32Table[(c_calculatedBlockCRC >> 24) ^
- ((unsigned char)c_state_out_ch)];
- cs_next_out++;
- cs_avail_out--;
- }
- }
- /* can a new run be started? */
- if (c_nblock_used == s_save_nblockPP) {
- c_state_out_len = 0; goto return_notr;
- }
- c_state_out_ch = c_k0;
- c_tPos = c_tt[c_tPos];
- k1 = (unsigned char)(c_tPos & 0xff);
- c_tPos >>= 8;
+ /* Inside the loop, writeCopies means extra copies (beyond 1) */
- c_nblock_used++;
+ --bd->writeCopies;
- if (k1 != c_k0) {
- c_k0 = k1;
- goto s_state_out_len_eq_one;
- }
+ /* Loop outputting bytes */
- if (c_nblock_used == s_save_nblockPP) {
- goto s_state_out_len_eq_one;
- }
+ for (;;) {
- c_state_out_len = 2;
- c_tPos = c_tt[c_tPos];
- k1 = (unsigned char)(c_tPos & 0xff);
- c_tPos >>= 8;
+ /* If the output buffer is full, snapshot state and return */
- c_nblock_used++;
- if (c_nblock_used == s_save_nblockPP) {
- continue;
- }
- if (k1 != c_k0) {
- c_k0 = k1;
- continue;
+ if(gotcount >= len) {
+ bd->writePos=pos;
+ bd->writeCurrent=current;
+ bd->writeCopies++;
+ return len;
}
- c_state_out_len = 3;
- c_tPos = c_tt[c_tPos];
- k1 = (unsigned char)(c_tPos & 0xff);
- c_tPos >>= 8;
+ /* Write next byte into output buffer, updating CRC */
- c_nblock_used++;
- if (c_nblock_used == s_save_nblockPP) {
- continue;
- }
- if (k1 != c_k0) {
- c_k0 = k1;
+ outbuf[gotcount++] = current;
+ bd->writeCRC=(((bd->writeCRC)<<8)
+ ^bd->crc32Table[((bd->writeCRC)>>24)^current]);
+
+ /* Loop now if we're outputting multiple copies of this byte */
+
+ if (bd->writeCopies) {
+ --bd->writeCopies;
continue;
}
-
- c_tPos = c_tt[c_tPos];
- k1 = (unsigned char)(c_tPos & 0xff);
- c_tPos >>= 8;
+decode_next_byte:
+ if (!bd->writeCount--) break;
+ /* Follow sequence vector to undo Burrows-Wheeler transform */
+ previous=current;
+ pos=dbuf[pos];
+ current=pos&0xff;
+ pos>>=8;
- c_nblock_used++;
- c_state_out_len = ((int)k1) + 4;
+ /* After 3 consecutive copies of the same byte, the 4th is a repeat
+ count. We count down from 4 instead
+ * of counting up because testing for non-zero is faster */
- c_tPos = c_tt[c_tPos];
- c_k0 = (unsigned char)(c_tPos & 0xff);
- c_tPos >>= 8;
+ if(--bd->writeRunCountdown) {
+ if(current!=previous) bd->writeRunCountdown=4;
+ } else {
- c_nblock_used++;
- }
+ /* We have a repeated run, this byte indicates the count */
-return_notr:
-
- /* save */
- s->calculatedBlockCRC = c_calculatedBlockCRC;
- s->state_out_ch = c_state_out_ch;
- s->state_out_len = c_state_out_len;
- s->nblock_used = c_nblock_used;
- s->k0 = c_k0;
- s->tt = c_tt;
- s->tPos = c_tPos;
- s->strm->next_out = cs_next_out;
- s->strm->avail_out = cs_avail_out;
- /* end save */
- }
-}
-static inline
-int BZ2_bzDecompress(bz_stream *strm)
-{
- DState* s;
- s = strm->state;
+ bd->writeCopies=current;
+ current=previous;
+ bd->writeRunCountdown=5;
- while (1) {
- if (s->state == BZ_X_IDLE) {
- return BZ_SEQUENCE_ERROR;
- }
- if (s->state == BZ_X_OUTPUT) {
- unRLE_obuf_to_output_FAST(s);
- if (s->nblock_used == s->save_nblock+1 && s->state_out_len == 0) {
- s->calculatedBlockCRC = ~(s->calculatedBlockCRC);
- if (s->calculatedBlockCRC != s->storedBlockCRC) {
- return BZ_DATA_ERROR;
- }
- s->calculatedCombinedCRC = (s->calculatedCombinedCRC << 1) | (s->calculatedCombinedCRC >> 31);
- s->calculatedCombinedCRC ^= s->calculatedBlockCRC;
- s->state = BZ_X_BLKHDR_1;
- } else {
- return BZ_OK;
+ /* Sometimes there are just 3 bytes (run length 0) */
+
+ if(!bd->writeCopies) goto decode_next_byte;
+
+ /* Subtract the 1 copy we'd output anyway to get extras */
+
+ --bd->writeCopies;
}
}
- if (s->state >= BZ_X_MAGIC_1) {
- int r = BZ2_decompress(s);
- if (r == BZ_STREAM_END) {
- if (s->calculatedCombinedCRC != s->storedCombinedCRC) {
- return BZ_DATA_ERROR;
- }
- return r;
- }
- if (s->state != BZ_X_OUTPUT) {
- return r;
- }
+
+ /* Decompression of this block completed successfully */
+
+ bd->writeCRC=~bd->writeCRC;
+ bd->totalCRC=((bd->totalCRC<<1) | (bd->totalCRC>>31)) ^ bd->writeCRC;
+
+ /* If this block had a CRC error, force file level CRC error. */
+
+ if(bd->writeCRC!=bd->headerCRC) {
+ bd->totalCRC=bd->headerCRC+1;
+ return RETVAL_LAST_BLOCK;
}
}
- return(0); /*NOTREACHED*/
+ /* Refill the intermediate buffer by Huffman-decoding next block of input */
+ /* (previous is just a convenient unused temp variable here) */
+
+ previous=get_next_block(bd);
+ if(previous) {
+ bd->writeCount=previous;
+ return (previous!=RETVAL_LAST_BLOCK) ? previous : gotcount;
+ }
+ bd->writeCRC=~0;
+ pos=bd->writePos;
+ current=bd->writeCurrent;
+ goto decode_next_byte;
}
-static inline int BZ2_bzRead(int *bzerror, void *b, void *buf, int len)
+/* Allocate the structure, read file header. If in_fd==-1, inbuf must contain
+ a complete bunzip file (len bytes long). If in_fd!=-1, inbuf and len are
+ ignored, and data is read from file handle into temporary buffer. */
+
+static int start_bunzip(bunzip_data **bdp, int in_fd, unsigned char *inbuf,
+ int len)
{
- int n, ret;
- bzFile *bzf = (bzFile*)b;
+ bunzip_data *bd;
+ unsigned int i;
+ const unsigned int BZh0=(((unsigned int)'B')<<24)+(((unsigned int)'Z')<<16)
+ +(((unsigned int)'h')<<8)+(unsigned int)'0';
- bz_seterr(BZ_OK, bzerror, &bzf);
+ /* Figure out how much data to allocate */
- if (len == 0) {
- bz_seterr(BZ_OK, bzerror, &bzf);
- return 0;
- }
+ i=sizeof(bunzip_data);
+ if(in_fd!=-1) i+=IOBUF_SIZE;
- bzf->strm.avail_out = len;
- bzf->strm.next_out = buf;
+ /* Allocate bunzip_data. Most fields initialize to zero. */
- while (1) {
- if (ferror(bzf->handle)) {
- bz_seterr(BZ_IO_ERROR, bzerror, &bzf);
- return 0;
- }
- if ((bzf->strm.avail_in == 0) && !myfeof(bzf->handle)) {
- n = fread(bzf->buf, sizeof(unsigned char), BZ_MAX_UNUSED, bzf->handle);
- if (ferror(bzf->handle)) {
- bz_seterr(BZ_IO_ERROR, bzerror, &bzf);
- return 0;
- }
- bzf->bufN = n;
- bzf->strm.avail_in = bzf->bufN;
- bzf->strm.next_in = bzf->buf;
- }
+ bd=*bdp=xzalloc(i);
- ret = BZ2_bzDecompress(&(bzf->strm));
+ /* Setup input buffer */
- if ((ret != BZ_OK) && (ret != BZ_STREAM_END)) {
- bz_seterr(ret, bzerror, &bzf);
- return 0;
- }
+ if(-1==(bd->in_fd=in_fd)) {
+ bd->inbuf=inbuf;
+ bd->inbufCount=len;
+ } else bd->inbuf=(unsigned char *)(bd+1);
- if ((ret == BZ_OK) && myfeof(bzf->handle) &&
- (bzf->strm.avail_in == 0) && (bzf->strm.avail_out > 0)) {
- bz_seterr(BZ_UNEXPECTED_EOF, bzerror, &bzf);
- return(0);
- }
+ /* Init the CRC32 table (big endian) */
- if (ret == BZ_STREAM_END) {
- bz_seterr(BZ_STREAM_END, bzerror, &bzf);
- return(len - bzf->strm.avail_out);
- }
- if (bzf->strm.avail_out == 0) {
- bz_seterr(BZ_OK, bzerror, &bzf);
- return(len);
- }
- }
- return(0); /*not reached*/
-}
+ bd->crc32Table = crc32_filltable(1);
-static inline void *BZ2_bzReadOpen(int *bzerror, FILE *f, void *unused, int nUnused)
-{
- bzFile *bzf = xmalloc(sizeof(bzFile));
- int ret;
+ /* Setup for I/O error handling via longjmp */
- bz_seterr(BZ_OK, bzerror, &bzf);
+ i=setjmp(bd->jmpbuf);
+ if(i) return i;
- bzf->initialisedOk = FALSE;
- bzf->handle = f;
- bzf->bufN = 0;
+ /* Ensure that file starts with "BZh['1'-'9']." */
- ret = BZ2_bzDecompressInit(&(bzf->strm));
- if (ret != BZ_OK) {
- bz_seterr(ret, bzerror, &bzf);
- free(bzf);
- return NULL;
- }
+ i = get_bits(bd,32);
+ if (((unsigned int)(i-BZh0-1)) >= 9) return RETVAL_NOT_BZIP_DATA;
- while (nUnused > 0) {
- bzf->buf[bzf->bufN] = *((unsigned char *)(unused)); bzf->bufN++;
- unused = ((void *)( 1 + ((unsigned char *)(unused)) ));
- nUnused--;
- }
- bzf->strm.avail_in = bzf->bufN;
- bzf->strm.next_in = bzf->buf;
+ /* Fourth byte (ascii '1'-'9'), indicates block size in units of 100k of
+ uncompressed data. Allocate intermediate buffer for block. */
- bzf->initialisedOk = TRUE;
- return bzf;
+ bd->dbufSize=100000*(i-BZh0);
+
+ bd->dbuf=xmalloc(bd->dbufSize * sizeof(int));
+ return RETVAL_OK;
}
-extern unsigned char uncompressStream(FILE *zStream, FILE *stream)
+/* Example usage: decompress src_fd to dst_fd. (Stops at end of bzip data,
+ not end of file.) */
+
+USE_DESKTOP(long long) int
+uncompressStream(int src_fd, int dst_fd)
{
- unsigned char unused[BZ_MAX_UNUSED];
- unsigned char *unusedTmp;
- unsigned char obuf[5000];
- bzFile *bzf = NULL;
- int bzerr_dummy;
- int bzerr;
- int nread;
- int nUnused;
- int streamNo;
- int ret;
+ USE_DESKTOP(long long total_written = 0;)
+ char *outbuf;
+ bunzip_data *bd;
int i;
- nUnused = 0;
- streamNo = 0;
-
- if (ferror(stream)) {
- goto errhandler_io;
- }
- if (ferror(zStream)) {
- goto errhandler_io;
- }
-
- while(1) {
- bzf = BZ2_bzReadOpen(&bzerr, zStream, unused, nUnused);
- if (bzf == NULL || bzerr != BZ_OK) {
- goto errhandler;
- }
- streamNo++;
-
- while (bzerr == BZ_OK) {
- nread = BZ2_bzRead(&bzerr, bzf, obuf, 5000);
- if (bzerr == BZ_DATA_ERROR_MAGIC) {
- goto errhandler;
- }
- if ((bzerr == BZ_OK || bzerr == BZ_STREAM_END) && nread > 0) {
- fwrite(obuf, sizeof(unsigned char), nread, stream);
- }
- if (ferror(stream)) {
- goto errhandler_io;
+ outbuf=xmalloc(IOBUF_SIZE);
+ i=start_bunzip(&bd,src_fd,0,0);
+ if(!i) {
+ for (;;) {
+ if((i=read_bunzip(bd,outbuf,IOBUF_SIZE)) <= 0) break;
+ if(i!=write(dst_fd,outbuf,i)) {
+ i=RETVAL_UNEXPECTED_OUTPUT_EOF;
+ break;
}
- }
- if (bzerr != BZ_STREAM_END) {
- goto errhandler;
- }
- nUnused = bzf->strm.avail_in;
- unusedTmp = bzf->strm.next_in;
- bz_seterr(BZ_OK, &bzerr, &bzf);
- for (i = 0; i < nUnused; i++) {
- unused[i] = unusedTmp[i];
- }
- BZ2_bzReadClose(&bzerr, bzf);
- if ((nUnused == 0) && myfeof(zStream)) {
- break;
+ USE_DESKTOP(total_written += i;)
}
}
- if (ferror(zStream)) {
- goto errhandler_io;
- }
- ret = fclose(zStream);
- if (ret == EOF) {
- goto errhandler_io;
- }
- if (ferror(stream)) {
- goto errhandler_io;
- }
- ret = fflush(stream);
- if (ret != 0) {
- goto errhandler_io;
- }
- if (stream != stdout) {
- ret = fclose(stream);
- if (ret == EOF) {
- goto errhandler_io;
+ /* Check CRC and release memory */
+
+ if(i==RETVAL_LAST_BLOCK) {
+ if (bd->headerCRC!=bd->totalCRC) {
+ bb_error_msg("data integrity error when decompressing");
+ } else {
+ i=RETVAL_OK;
}
+ } else if (i==RETVAL_UNEXPECTED_OUTPUT_EOF) {
+ bb_error_msg("compressed file ends unexpectedly");
+ } else {
+ bb_error_msg("decompression failed");
}
- return TRUE;
-
-errhandler:
- BZ2_bzReadClose ( &bzerr_dummy, bzf );
- switch (bzerr) {
- case BZ_IO_ERROR:
-errhandler_io:
- error_msg("\n%s: I/O or other error, bailing out. "
- "Possible reason follows.\n", applet_name);
- perror(applet_name);
- exit(1);
- case BZ_DATA_ERROR:
- error_msg("\n%s: Data integrity error when decompressing.\n", applet_name);
- exit(2);
- case BZ_UNEXPECTED_EOF:
- error_msg("\n%s: Compressed file ends unexpectedly;\n\t"
- "perhaps it is corrupted? *Possible* reason follows.\n", applet_name);
- perror(applet_name);
- exit(2);
- case BZ_DATA_ERROR_MAGIC:
- if (zStream != stdin) {
- fclose(zStream);
- }
- if (stream != stdout) {
- fclose(stream);
- }
- if (streamNo == 1) {
- return FALSE;
- } else {
- return TRUE;
- }
- }
+ free(bd->dbuf);
+ free(bd);
+ free(outbuf);
+
+ return i ? i : USE_DESKTOP(total_written) + 0;
+}
+
+#ifdef TESTING
+
+static char * const bunzip_errors[]={NULL,"Bad file checksum","Not bzip data",
+ "Unexpected input EOF","Unexpected output EOF","Data error",
+ "Out of memory","Obsolete (pre 0.9.5) bzip format not supported."};
+
+/* Dumb little test thing, decompress stdin to stdout */
+int main(int argc, char *argv[])
+{
+ int i=uncompressStream(0,1);
+ char c;
- return(TRUE); /*notreached*/
+ if(i<0) fprintf(stderr,"%s\n", bunzip_errors[-i]);
+ else if(read(0,&c,1)) fprintf(stderr,"Trailing garbage ignored\n");
+ return -i;
}
+#endif