archival/libarchive/decompress_bunzip2.c

   1 /* vi: set sw=4 ts=4: */
   2 /* Small bzip2 deflate implementation, by Rob Landley (rob@landley.net).
   3
   4    Based on bzip2 decompression code by Julian R Seward (jseward@acm.org),
   5    which also acknowledges contributions by Mike Burrows, David Wheeler,
   6    Peter Fenwick, Alistair Moffat, Radford Neal, Ian H. Witten,
   7    Robert Sedgewick, and Jon L. Bentley.
   8
   9    Licensed under GPLv2 or later, see file LICENSE in this source tree.
  10 */
  11
  12 /*
  13         Size and speed optimizations by Manuel Novoa III  (mjn3@codepoet.org).
  14
  15         More efficient reading of Huffman codes, a streamlined read_bunzip()
  16         function, and various other tweaks.  In (limited) tests, approximately
  17         20% faster than bzcat on x86 and about 10% faster on arm.
  18
  19         Note that about 2/3 of the time is spent in read_bunzip() reversing
  20         the Burrows-Wheeler transformation.  Much of that time is delay
  21         resulting from cache misses.
  22
  23         (2010 update by vda: profiled "bzcat <84mbyte.bz2 >/dev/null"
  24         on x86-64 CPU with L2 > 1M: get_next_block is hotter than read_bunzip:
  25         %time seconds   calls function
  26         71.01   12.69     444 get_next_block
  27         28.65    5.12   93065 read_bunzip
  28         00.22    0.04 7736490 get_bits
  29         00.11    0.02      47 dealloc_bunzip
  30         00.00    0.00   93018 full_write
  31         ...)
  32
  33
  34         I would ask that anyone benefiting from this work, especially those
  35         using it in commercial products, consider making a donation to my local
  36         non-profit hospice organization (www.hospiceacadiana.com) in the name of
  37         the woman I loved, Toni W. Hagan, who passed away Feb. 12, 2003.
  38
  39         Manuel
  40  */
  41
  42 #include "libbb.h"
  43 #include "bb_archive.h"
  44
  45 #if 0
  46 # define dbg(...) bb_error_msg(__VA_ARGS__)
  47 #else
  48 # define dbg(...) ((void)0)
  49 #endif
  50
  51 /* Constants for Huffman coding */
  52 #define MAX_GROUPS          6
  53 #define GROUP_SIZE          50      /* 64 would have been more efficient */
  54 #define MAX_HUFCODE_BITS    20      /* Longest Huffman code allowed */
  55 #define MAX_SYMBOLS         258     /* 256 literals + RUNA + RUNB */
  56 #define SYMBOL_RUNA         0
  57 #define SYMBOL_RUNB         1
  58
  59 /* Status return values */
  60 #define RETVAL_OK                       0
  61 #define RETVAL_LAST_BLOCK               (dbg("%d", __LINE__), -1)
  62 #define RETVAL_NOT_BZIP_DATA            (dbg("%d", __LINE__), -2)
  63 #define RETVAL_UNEXPECTED_INPUT_EOF     (dbg("%d", __LINE__), -3)
  64 #define RETVAL_SHORT_WRITE              (dbg("%d", __LINE__), -4)
  65 #define RETVAL_DATA_ERROR               (dbg("%d", __LINE__), -5)
  66 #define RETVAL_OUT_OF_MEMORY            (dbg("%d", __LINE__), -6)
  67 #define RETVAL_OBSOLETE_INPUT           (dbg("%d", __LINE__), -7)
  68
  69 /* Other housekeeping constants */
  70 #define IOBUF_SIZE          4096
  71
  72 /* This is what we know about each Huffman coding group */
  73 struct group_data {
  74         /* We have an extra slot at the end of limit[] for a sentinel value. */
  75         int limit[MAX_HUFCODE_BITS+1], base[MAX_HUFCODE_BITS], permute[MAX_SYMBOLS];
  76         int minLen, maxLen;
  77 };
  78
  79 /* Structure holding all the housekeeping data, including IO buffers and
  80  * memory that persists between calls to bunzip
  81  * Found the most used member:
  82  *  cat this_file.c | sed -e 's/"/ /g' -e "s/'/ /g" | xargs -n1 \
  83  *  | grep 'bd->' | sed 's/^.*bd->/bd->/' | sort | $PAGER
  84  * and moved it (inbufBitCount) to offset 0.
  85  */
  86 struct bunzip_data {
  87         /* I/O tracking data (file handles, buffers, positions, etc.) */
  88         unsigned inbufBitCount, inbufBits;
  89         int in_fd, out_fd, inbufCount, inbufPos /*, outbufPos*/;
  90         uint8_t *inbuf /*,*outbuf*/;
  91
  92         /* State for interrupting output loop */
  93         int writeCopies, writePos, writeRunCountdown, writeCount;
  94         int writeCurrent; /* actually a uint8_t */
  95
  96         /* The CRC values stored in the block header and calculated from the data */
  97         uint32_t headerCRC, totalCRC, writeCRC;
  98
  99         /* Intermediate buffer and its size (in bytes) */
 100         uint32_t *dbuf;
 101         unsigned dbufSize;
 102
 103         /* For I/O error handling */
 104         jmp_buf jmpbuf;
 105
 106         /* Big things go last (register-relative addressing can be larger for big offsets) */
 107         uint32_t crc32Table[256];
 108         uint8_t selectors[32768];  /* nSelectors=15 bits */
 109         struct group_data groups[MAX_GROUPS];  /* Huffman coding tables */
 110 };
 111 /* typedef struct bunzip_data bunzip_data; -- done in .h file */
 112
 113
 114 /* Return the next nnn bits of input.  All reads from the compressed input
 115    are done through this function.  All reads are big endian */
 116 static unsigned get_bits(bunzip_data *bd, int bits_wanted)
 117 {
 118         unsigned bits = 0;
 119         /* Cache bd->inbufBitCount in a CPU register (hopefully): */
 120         int bit_count = bd->inbufBitCount;
 121
 122         /* If we need to get more data from the byte buffer, do so.  (Loop getting
 123            one byte at a time to enforce endianness and avoid unaligned access.) */
 124         while (bit_count < bits_wanted) {
 125
 126                 /* If we need to read more data from file into byte buffer, do so */
 127                 if (bd->inbufPos == bd->inbufCount) {
 128                         /* if "no input fd" case: in_fd == -1, read fails, we jump */
 129                         bd->inbufCount = read(bd->in_fd, bd->inbuf, IOBUF_SIZE);
 130                         if (bd->inbufCount <= 0)
 131                                 longjmp(bd->jmpbuf, RETVAL_UNEXPECTED_INPUT_EOF);
 132                         bd->inbufPos = 0;
 133                 }
 134
 135                 /* Avoid 32-bit overflow (dump bit buffer to top of output) */
 136                 if (bit_count >= 24) {
 137                         bits = bd->inbufBits & ((1 << bit_count) - 1);
 138                         bits_wanted -= bit_count;
 139                         bits <<= bits_wanted;
 140                         bit_count = 0;
 141                 }
 142
 143                 /* Grab next 8 bits of input from buffer. */
 144                 bd->inbufBits = (bd->inbufBits << 8) | bd->inbuf[bd->inbufPos++];
 145                 bit_count += 8;
 146         }
 147
 148         /* Calculate result */
 149         bit_count -= bits_wanted;
 150         bd->inbufBitCount = bit_count;
 151         bits |= (bd->inbufBits >> bit_count) & ((1 << bits_wanted) - 1);
 152
 153         return bits;
 154 }
 155
 156 /* Unpacks the next block and sets up for the inverse Burrows-Wheeler step. */
 157 static int get_next_block(bunzip_data *bd)
 158 {
 159         struct group_data *hufGroup;
 160         int dbufCount, dbufSize, groupCount, *base, *limit, selector,
 161                 i, j, t, runPos, symCount, symTotal, nSelectors, byteCount[256];
 162         int runCnt = runCnt; /* for compiler */
 163         uint8_t uc, symToByte[256], mtfSymbol[256], *selectors;
 164         uint32_t *dbuf;
 165         unsigned origPtr;
 166
 167         dbuf = bd->dbuf;
 168         dbufSize = bd->dbufSize;
 169         selectors = bd->selectors;
 170
 171 /* In bbox, we are ok with aborting through setjmp which is set up in start_bunzip */
 172 #if 0
 173         /* Reset longjmp I/O error handling */
 174         i = setjmp(bd->jmpbuf);
 175         if (i) return i;
 176 #endif
 177
 178         /* Read in header signature and CRC, then validate signature.
 179            (last block signature means CRC is for whole file, return now) */
 180         i = get_bits(bd, 24);
 181         j = get_bits(bd, 24);
 182         bd->headerCRC = get_bits(bd, 32);
 183         if ((i == 0x177245) && (j == 0x385090)) return RETVAL_LAST_BLOCK;
 184         if ((i != 0x314159) || (j != 0x265359)) return RETVAL_NOT_BZIP_DATA;
 185
 186         /* We can add support for blockRandomised if anybody complains.  There was
 187            some code for this in busybox 1.0.0-pre3, but nobody ever noticed that
 188            it didn't actually work. */
 189         if (get_bits(bd, 1)) return RETVAL_OBSOLETE_INPUT;
 190         origPtr = get_bits(bd, 24);
 191         if ((int)origPtr > dbufSize) return RETVAL_DATA_ERROR;
 192
 193         /* mapping table: if some byte values are never used (encoding things
 194            like ascii text), the compression code removes the gaps to have fewer
 195            symbols to deal with, and writes a sparse bitfield indicating which
 196            values were present.  We make a translation table to convert the symbols
 197            back to the corresponding bytes. */
 198         symTotal = 0;
 199         i = 0;
 200         t = get_bits(bd, 16);
 201         do {
 202                 if (t & (1 << 15)) {
 203                         unsigned inner_map = get_bits(bd, 16);
 204                         do {
 205                                 if (inner_map & (1 << 15))
 206                                         symToByte[symTotal++] = i;
 207                                 inner_map <<= 1;
 208                                 i++;
 209                         } while (i & 15);
 210                         i -= 16;
 211                 }
 212                 t <<= 1;
 213                 i += 16;
 214         } while (i < 256);
 215
 216         /* How many different Huffman coding groups does this block use? */
 217         groupCount = get_bits(bd, 3);
 218         if (groupCount < 2 || groupCount > MAX_GROUPS)
 219                 return RETVAL_DATA_ERROR;
 220
 221         /* nSelectors: Every GROUP_SIZE many symbols we select a new Huffman coding
 222            group.  Read in the group selector list, which is stored as MTF encoded
 223            bit runs.  (MTF=Move To Front, as each value is used it's moved to the
 224            start of the list.) */
 225         for (i = 0; i < groupCount; i++)
 226                 mtfSymbol[i] = i;
 227         nSelectors = get_bits(bd, 15);
 228         if (!nSelectors)
 229                 return RETVAL_DATA_ERROR;
 230         for (i = 0; i < nSelectors; i++) {
 231                 uint8_t tmp_byte;
 232                 /* Get next value */
 233                 int n = 0;
 234                 while (get_bits(bd, 1)) {
 235                         if (n >= groupCount) return RETVAL_DATA_ERROR;
 236                         n++;
 237                 }
 238                 /* Decode MTF to get the next selector */
 239                 tmp_byte = mtfSymbol[n];
 240                 while (--n >= 0)
 241                         mtfSymbol[n + 1] = mtfSymbol[n];
 242                 mtfSymbol[0] = selectors[i] = tmp_byte;
 243         }
 244
 245         /* Read the Huffman coding tables for each group, which code for symTotal
 246            literal symbols, plus two run symbols (RUNA, RUNB) */
 247         symCount = symTotal + 2;
 248         for (j = 0; j < groupCount; j++) {
 249                 uint8_t length[MAX_SYMBOLS];
 250                 /* 8 bits is ALMOST enough for temp[], see below */
 251                 unsigned temp[MAX_HUFCODE_BITS+1];
 252                 int minLen, maxLen, pp, len_m1;
 253
 254                 /* Read Huffman code lengths for each symbol.  They're stored in
 255                    a way similar to mtf; record a starting value for the first symbol,
 256                    and an offset from the previous value for every symbol after that.
 257                    (Subtracting 1 before the loop and then adding it back at the end is
 258                    an optimization that makes the test inside the loop simpler: symbol
 259                    length 0 becomes negative, so an unsigned inequality catches it.) */
 260                 len_m1 = get_bits(bd, 5) - 1;
 261                 for (i = 0; i < symCount; i++) {
 262                         for (;;) {
 263                                 int two_bits;
 264                                 if ((unsigned)len_m1 > (MAX_HUFCODE_BITS-1))
 265                                         return RETVAL_DATA_ERROR;
 266
 267                                 /* If first bit is 0, stop.  Else second bit indicates whether
 268                                    to increment or decrement the value.  Optimization: grab 2
 269                                    bits and unget the second if the first was 0. */
 270                                 two_bits = get_bits(bd, 2);
 271                                 if (two_bits < 2) {
 272                                         bd->inbufBitCount++;
 273                                         break;
 274                                 }
 275
 276                                 /* Add one if second bit 1, else subtract 1.  Avoids if/else */
 277                                 len_m1 += (((two_bits+1) & 2) - 1);
 278                         }
 279
 280                         /* Correct for the initial -1, to get the final symbol length */
 281                         length[i] = len_m1 + 1;
 282                 }
 283
 284                 /* Find largest and smallest lengths in this group */
 285                 minLen = maxLen = length[0];
 286                 for (i = 1; i < symCount; i++) {
 287                         if (length[i] > maxLen) maxLen = length[i];
 288                         else if (length[i] < minLen) minLen = length[i];
 289                 }
 290
 291                 /* Calculate permute[], base[], and limit[] tables from length[].
 292                  *
 293                  * permute[] is the lookup table for converting Huffman coded symbols
 294                  * into decoded symbols.  base[] is the amount to subtract from the
 295                  * value of a Huffman symbol of a given length when using permute[].
 296                  *
 297                  * limit[] indicates the largest numerical value a symbol with a given
 298                  * number of bits can have.  This is how the Huffman codes can vary in
 299                  * length: each code with a value>limit[length] needs another bit.
 300                  */
 301                 hufGroup = bd->groups + j;
 302                 hufGroup->minLen = minLen;
 303                 hufGroup->maxLen = maxLen;
 304
 305                 /* Note that minLen can't be smaller than 1, so we adjust the base
 306                    and limit array pointers so we're not always wasting the first
 307                    entry.  We do this again when using them (during symbol decoding). */
 308                 base = hufGroup->base - 1;
 309                 limit = hufGroup->limit - 1;
 310
 311                 /* Calculate permute[].  Concurently, initialize temp[] and limit[]. */
 312                 pp = 0;
 313                 for (i = minLen; i <= maxLen; i++) {
 314                         int k;
 315                         temp[i] = limit[i] = 0;
 316                         for (k = 0; k < symCount; k++)
 317                                 if (length[k] == i)
 318                                         hufGroup->permute[pp++] = k;
 319                 }
 320
 321                 /* Count symbols coded for at each bit length */
 322                 /* NB: in pathological cases, temp[8] can end ip being 256.
 323                  * That's why uint8_t is too small for temp[]. */
 324                 for (i = 0; i < symCount; i++) temp[length[i]]++;
 325
 326                 /* Calculate limit[] (the largest symbol-coding value at each bit
 327                  * length, which is (previous limit<<1)+symbols at this level), and
 328                  * base[] (number of symbols to ignore at each bit length, which is
 329                  * limit minus the cumulative count of symbols coded for already). */
 330                 pp = t = 0;
 331                 for (i = minLen; i < maxLen;) {
 332                         unsigned temp_i = temp[i];
 333
 334                         pp += temp_i;
 335
 336                         /* We read the largest possible symbol size and then unget bits
 337                            after determining how many we need, and those extra bits could
 338                            be set to anything.  (They're noise from future symbols.)  At
 339                            each level we're really only interested in the first few bits,
 340                            so here we set all the trailing to-be-ignored bits to 1 so they
 341                            don't affect the value>limit[length] comparison. */
 342                         limit[i] = (pp << (maxLen - i)) - 1;
 343                         pp <<= 1;
 344                         t += temp_i;
 345                         base[++i] = pp - t;
 346                 }
 347                 limit[maxLen] = pp + temp[maxLen] - 1;
 348                 limit[maxLen+1] = INT_MAX; /* Sentinel value for reading next sym. */
 349                 base[minLen] = 0;
 350         }
 351
 352         /* We've finished reading and digesting the block header.  Now read this
 353            block's Huffman coded symbols from the file and undo the Huffman coding
 354            and run length encoding, saving the result into dbuf[dbufCount++] = uc */
 355
 356         /* Initialize symbol occurrence counters and symbol Move To Front table */
 357         /*memset(byteCount, 0, sizeof(byteCount)); - smaller, but slower */
 358         for (i = 0; i < 256; i++) {
 359                 byteCount[i] = 0;
 360                 mtfSymbol[i] = (uint8_t)i;
 361         }
 362
 363         /* Loop through compressed symbols. */
 364
 365         runPos = dbufCount = selector = 0;
 366         for (;;) {
 367                 int nextSym;
 368
 369                 /* Fetch next Huffman coding group from list. */
 370                 symCount = GROUP_SIZE - 1;
 371                 if (selector >= nSelectors) return RETVAL_DATA_ERROR;
 372                 hufGroup = bd->groups + selectors[selector++];
 373                 base = hufGroup->base - 1;
 374                 limit = hufGroup->limit - 1;
 375
 376  continue_this_group:
 377                 /* Read next Huffman-coded symbol. */
 378
 379                 /* Note: It is far cheaper to read maxLen bits and back up than it is
 380                    to read minLen bits and then add additional bit at a time, testing
 381                    as we go.  Because there is a trailing last block (with file CRC),
 382                    there is no danger of the overread causing an unexpected EOF for a
 383                    valid compressed file.
 384                  */
 385                 if (1) {
 386                         /* As a further optimization, we do the read inline
 387                            (falling back to a call to get_bits if the buffer runs dry).
 388                          */
 389                         int new_cnt;
 390                         while ((new_cnt = bd->inbufBitCount - hufGroup->maxLen) < 0) {
 391                                 /* bd->inbufBitCount < hufGroup->maxLen */
 392                                 if (bd->inbufPos == bd->inbufCount) {
 393                                         nextSym = get_bits(bd, hufGroup->maxLen);
 394                                         goto got_huff_bits;
 395                                 }
 396                                 bd->inbufBits = (bd->inbufBits << 8) | bd->inbuf[bd->inbufPos++];
 397                                 bd->inbufBitCount += 8;
 398                         };
 399                         bd->inbufBitCount = new_cnt; /* "bd->inbufBitCount -= hufGroup->maxLen;" */
 400                         nextSym = (bd->inbufBits >> new_cnt) & ((1 << hufGroup->maxLen) - 1);
 401  got_huff_bits: ;
 402                 } else { /* unoptimized equivalent */
 403                         nextSym = get_bits(bd, hufGroup->maxLen);
 404                 }
 405                 /* Figure how many bits are in next symbol and unget extras */
 406                 i = hufGroup->minLen;
 407                 while (nextSym > limit[i]) ++i;
 408                 j = hufGroup->maxLen - i;
 409                 if (j < 0)
 410                         return RETVAL_DATA_ERROR;
 411                 bd->inbufBitCount += j;
 412
 413                 /* Huffman decode value to get nextSym (with bounds checking) */
 414                 nextSym = (nextSym >> j) - base[i];
 415                 if ((unsigned)nextSym >= MAX_SYMBOLS)
 416                         return RETVAL_DATA_ERROR;
 417                 nextSym = hufGroup->permute[nextSym];
 418
 419                 /* We have now decoded the symbol, which indicates either a new literal
 420                    byte, or a repeated run of the most recent literal byte.  First,
 421                    check if nextSym indicates a repeated run, and if so loop collecting
 422                    how many times to repeat the last literal. */
 423                 if ((unsigned)nextSym <= SYMBOL_RUNB) { /* RUNA or RUNB */
 424
 425                         /* If this is the start of a new run, zero out counter */
 426                         if (runPos == 0) {
 427                                 runPos = 1;
 428                                 runCnt = 0;
 429                         }
 430
 431                         /* Neat trick that saves 1 symbol: instead of or-ing 0 or 1 at
 432                            each bit position, add 1 or 2 instead.  For example,
 433                            1011 is 1<<0 + 1<<1 + 2<<2.  1010 is 2<<0 + 2<<1 + 1<<2.
 434                            You can make any bit pattern that way using 1 less symbol than
 435                            the basic or 0/1 method (except all bits 0, which would use no
 436                            symbols, but a run of length 0 doesn't mean anything in this
 437                            context).  Thus space is saved. */
 438                         runCnt += (runPos << nextSym); /* +runPos if RUNA; +2*runPos if RUNB */
 439                         if (runPos < dbufSize) runPos <<= 1;
 440                         goto end_of_huffman_loop;
 441                 }
 442
 443                 /* When we hit the first non-run symbol after a run, we now know
 444                    how many times to repeat the last literal, so append that many
 445                    copies to our buffer of decoded symbols (dbuf) now.  (The last
 446                    literal used is the one at the head of the mtfSymbol array.) */
 447                 if (runPos != 0) {
 448                         uint8_t tmp_byte;
 449                         if (dbufCount + runCnt > dbufSize) {
 450                                 dbg("dbufCount:%d+runCnt:%d %d > dbufSize:%d RETVAL_DATA_ERROR",
 451                                                 dbufCount, runCnt, dbufCount + runCnt, dbufSize);
 452                                 return RETVAL_DATA_ERROR;
 453                         }
 454                         tmp_byte = symToByte[mtfSymbol[0]];
 455                         byteCount[tmp_byte] += runCnt;
 456                         while (--runCnt >= 0) dbuf[dbufCount++] = (uint32_t)tmp_byte;
 457                         runPos = 0;
 458                 }
 459
 460                 /* Is this the terminating symbol? */
 461                 if (nextSym > symTotal) break;
 462
 463                 /* At this point, nextSym indicates a new literal character.  Subtract
 464                    one to get the position in the MTF array at which this literal is
 465                    currently to be found.  (Note that the result can't be -1 or 0,
 466                    because 0 and 1 are RUNA and RUNB.  But another instance of the
 467                    first symbol in the mtf array, position 0, would have been handled
 468                    as part of a run above.  Therefore 1 unused mtf position minus
 469                    2 non-literal nextSym values equals -1.) */
 470                 if (dbufCount >= dbufSize) return RETVAL_DATA_ERROR;
 471                 i = nextSym - 1;
 472                 uc = mtfSymbol[i];
 473
 474                 /* Adjust the MTF array.  Since we typically expect to move only a
 475                  * small number of symbols, and are bound by 256 in any case, using
 476                  * memmove here would typically be bigger and slower due to function
 477                  * call overhead and other assorted setup costs. */
 478                 do {
 479                         mtfSymbol[i] = mtfSymbol[i-1];
 480                 } while (--i);
 481                 mtfSymbol[0] = uc;
 482                 uc = symToByte[uc];
 483
 484                 /* We have our literal byte.  Save it into dbuf. */
 485                 byteCount[uc]++;
 486                 dbuf[dbufCount++] = (uint32_t)uc;
 487
 488                 /* Skip group initialization if we're not done with this group.  Done
 489                  * this way to avoid compiler warning. */
 490  end_of_huffman_loop:
 491                 if (--symCount >= 0) goto continue_this_group;
 492         }
 493
 494         /* At this point, we've read all the Huffman-coded symbols (and repeated
 495            runs) for this block from the input stream, and decoded them into the
 496            intermediate buffer.  There are dbufCount many decoded bytes in dbuf[].
 497            Now undo the Burrows-Wheeler transform on dbuf.
 498            See http://dogma.net/markn/articles/bwt/bwt.htm
 499          */
 500
 501         /* Turn byteCount into cumulative occurrence counts of 0 to n-1. */
 502         j = 0;
 503         for (i = 0; i < 256; i++) {
 504                 int tmp_count = j + byteCount[i];
 505                 byteCount[i] = j;
 506                 j = tmp_count;
 507         }
 508
 509         /* Figure out what order dbuf would be in if we sorted it. */
 510         for (i = 0; i < dbufCount; i++) {
 511                 uint8_t tmp_byte = (uint8_t)dbuf[i];
 512                 int tmp_count = byteCount[tmp_byte];
 513                 dbuf[tmp_count] |= (i << 8);
 514                 byteCount[tmp_byte] = tmp_count + 1;
 515         }
 516
 517         /* Decode first byte by hand to initialize "previous" byte.  Note that it
 518            doesn't get output, and if the first three characters are identical
 519            it doesn't qualify as a run (hence writeRunCountdown=5). */
 520         if (dbufCount) {
 521                 uint32_t tmp;
 522                 if ((int)origPtr >= dbufCount) return RETVAL_DATA_ERROR;
 523                 tmp = dbuf[origPtr];
 524                 bd->writeCurrent = (uint8_t)tmp;
 525                 bd->writePos = (tmp >> 8);
 526                 bd->writeRunCountdown = 5;
 527         }
 528         bd->writeCount = dbufCount;
 529
 530         return RETVAL_OK;
 531 }
 532
 533 /* Undo Burrows-Wheeler transform on intermediate buffer to produce output.
 534    If start_bunzip was initialized with out_fd=-1, then up to len bytes of
 535    data are written to outbuf.  Return value is number of bytes written or
 536    error (all errors are negative numbers).  If out_fd!=-1, outbuf and len
 537    are ignored, data is written to out_fd and return is RETVAL_OK or error.
 538
 539    NB: read_bunzip returns < 0 on error, or the number of *unfilled* bytes
 540    in outbuf. IOW: on EOF returns len ("all bytes are not filled"), not 0.
 541    (Why? This allows to get rid of one local variable)
 542 */
 543 int FAST_FUNC read_bunzip(bunzip_data *bd, char *outbuf, int len)
 544 {
 545         const uint32_t *dbuf;
 546         int pos, current, previous;
 547         uint32_t CRC;
 548
 549         /* If we already have error/end indicator, return it */
 550         if (bd->writeCount < 0)
 551                 return bd->writeCount;
 552
 553         dbuf = bd->dbuf;
 554
 555         /* Register-cached state (hopefully): */
 556         pos = bd->writePos;
 557         current = bd->writeCurrent;
 558         CRC = bd->writeCRC; /* small loss on x86-32 (not enough regs), win on x86-64 */
 559
 560         /* We will always have pending decoded data to write into the output
 561            buffer unless this is the very first call (in which case we haven't
 562            Huffman-decoded a block into the intermediate buffer yet). */
 563         if (bd->writeCopies) {
 564
 565  dec_writeCopies:
 566                 /* Inside the loop, writeCopies means extra copies (beyond 1) */
 567                 --bd->writeCopies;
 568
 569                 /* Loop outputting bytes */
 570                 for (;;) {
 571
 572                         /* If the output buffer is full, save cached state and return */
 573                         if (--len < 0) {
 574                                 /* Unlikely branch.
 575                                  * Use of "goto" instead of keeping code here
 576                                  * helps compiler to realize this. */
 577                                 goto outbuf_full;
 578                         }
 579
 580                         /* Write next byte into output buffer, updating CRC */
 581                         *outbuf++ = current;
 582                         CRC = (CRC << 8) ^ bd->crc32Table[(CRC >> 24) ^ current];
 583
 584                         /* Loop now if we're outputting multiple copies of this byte */
 585                         if (bd->writeCopies) {
 586                                 /* Unlikely branch */
 587                                 /*--bd->writeCopies;*/
 588                                 /*continue;*/
 589                                 /* Same, but (ab)using other existing --writeCopies operation
 590                                  * (and this if() compiles into just test+branch pair): */
 591                                 goto dec_writeCopies;
 592                         }
 593  decode_next_byte:
 594                         if (--bd->writeCount < 0)
 595                                 break; /* input block is fully consumed, need next one */
 596
 597                         /* Follow sequence vector to undo Burrows-Wheeler transform */
 598                         previous = current;
 599                         pos = dbuf[pos];
 600                         current = (uint8_t)pos;
 601                         pos >>= 8;
 602
 603                         /* After 3 consecutive copies of the same byte, the 4th
 604                          * is a repeat count.  We count down from 4 instead
 605                          * of counting up because testing for non-zero is faster */
 606                         if (--bd->writeRunCountdown != 0) {
 607                                 if (current != previous)
 608                                         bd->writeRunCountdown = 4;
 609                         } else {
 610                                 /* Unlikely branch */
 611                                 /* We have a repeated run, this byte indicates the count */
 612                                 bd->writeCopies = current;
 613                                 current = previous;
 614                                 bd->writeRunCountdown = 5;
 615
 616                                 /* Sometimes there are just 3 bytes (run length 0) */
 617                                 if (!bd->writeCopies) goto decode_next_byte;
 618
 619                                 /* Subtract the 1 copy we'd output anyway to get extras */
 620                                 --bd->writeCopies;
 621                         }
 622                 } /* for(;;) */
 623
 624                 /* Decompression of this input block completed successfully */
 625                 bd->writeCRC = CRC = ~CRC;
 626                 bd->totalCRC = ((bd->totalCRC << 1) | (bd->totalCRC >> 31)) ^ CRC;
 627
 628                 /* If this block had a CRC error, force file level CRC error */
 629                 if (CRC != bd->headerCRC) {
 630                         bd->totalCRC = bd->headerCRC + 1;
 631                         return RETVAL_LAST_BLOCK;
 632                 }
 633         }
 634
 635         /* Refill the intermediate buffer by Huffman-decoding next block of input */
 636         {
 637                 int r = get_next_block(bd);
 638                 if (r) { /* error/end */
 639                         bd->writeCount = r;
 640                         return (r != RETVAL_LAST_BLOCK) ? r : len;
 641                 }
 642         }
 643
 644         CRC = ~0;
 645         pos = bd->writePos;
 646         current = bd->writeCurrent;
 647         goto decode_next_byte;
 648
 649  outbuf_full:
 650         /* Output buffer is full, save cached state and return */
 651         bd->writePos = pos;
 652         bd->writeCurrent = current;
 653         bd->writeCRC = CRC;
 654
 655         bd->writeCopies++;
 656
 657         return 0;
 658 }
 659
 660 /* Allocate the structure, read file header.  If in_fd==-1, inbuf must contain
 661    a complete bunzip file (len bytes long).  If in_fd!=-1, inbuf and len are
 662    ignored, and data is read from file handle into temporary buffer. */
 663
 664 /* Because bunzip2 is used for help text unpacking, and because bb_show_usage()
 665    should work for NOFORK applets too, we must be extremely careful to not leak
 666    any allocations! */
 667 int FAST_FUNC start_bunzip(bunzip_data **bdp, int in_fd,
 668                 const void *inbuf, int len)
 669 {
 670         bunzip_data *bd;
 671         unsigned i;
 672         enum {
 673                 BZh0 = ('B' << 24) + ('Z' << 16) + ('h' << 8) + '0',
 674                 h0 = ('h' << 8) + '0',
 675         };
 676
 677         /* Figure out how much data to allocate */
 678         i = sizeof(bunzip_data);
 679         if (in_fd != -1) i += IOBUF_SIZE;
 680
 681         /* Allocate bunzip_data.  Most fields initialize to zero. */
 682         bd = *bdp = xzalloc(i);
 683
 684         /* Setup input buffer */
 685         bd->in_fd = in_fd;
 686         if (-1 == in_fd) {
 687                 /* in this case, bd->inbuf is read-only */
 688                 bd->inbuf = (void*)inbuf; /* cast away const-ness */
 689         } else {
 690                 bd->inbuf = (uint8_t*)(bd + 1);
 691                 memcpy(bd->inbuf, inbuf, len);
 692         }
 693         bd->inbufCount = len;
 694
 695         /* Init the CRC32 table (big endian) */
 696         crc32_filltable(bd->crc32Table, 1);
 697
 698         /* Setup for I/O error handling via longjmp */
 699         i = setjmp(bd->jmpbuf);
 700         if (i) return i;
 701
 702         /* Ensure that file starts with "BZh['1'-'9']." */
 703         /* Update: now caller verifies 1st two bytes, makes .gz/.bz2
 704          * integration easier */
 705         /* was: */
 706         /* i = get_bits(bd, 32); */
 707         /* if ((unsigned)(i - BZh0 - 1) >= 9) return RETVAL_NOT_BZIP_DATA; */
 708         i = get_bits(bd, 16);
 709         if ((unsigned)(i - h0 - 1) >= 9) return RETVAL_NOT_BZIP_DATA;
 710
 711         /* Fourth byte (ascii '1'-'9') indicates block size in units of 100k of
 712            uncompressed data.  Allocate intermediate buffer for block. */
 713         /* bd->dbufSize = 100000 * (i - BZh0); */
 714         bd->dbufSize = 100000 * (i - h0);
 715
 716         /* Cannot use xmalloc - may leak bd in NOFORK case! */
 717         bd->dbuf = malloc_or_warn(bd->dbufSize * sizeof(bd->dbuf[0]));
 718         if (!bd->dbuf) {
 719                 free(bd);
 720                 xfunc_die();
 721         }
 722         return RETVAL_OK;
 723 }
 724
 725 void FAST_FUNC dealloc_bunzip(bunzip_data *bd)
 726 {
 727         free(bd->dbuf);
 728         free(bd);
 729 }
 730
 731
 732 /* Decompress src_fd to dst_fd.  Stops at end of bzip data, not end of file. */
 733 IF_DESKTOP(long long) int FAST_FUNC
 734 unpack_bz2_stream(transformer_aux_data_t *aux, int src_fd, int dst_fd)
 735 {
 736         IF_DESKTOP(long long total_written = 0;)
 737         bunzip_data *bd;
 738         char *outbuf;
 739         int i;
 740         unsigned len;
 741
 742         if (check_signature16(aux, src_fd, BZIP2_MAGIC))
 743                 return -1;
 744
 745         outbuf = xmalloc(IOBUF_SIZE);
 746         len = 0;
 747         while (1) { /* "Process one BZ... stream" loop */
 748
 749                 i = start_bunzip(&bd, src_fd, outbuf + 2, len);
 750
 751                 if (i == 0) {
 752                         while (1) { /* "Produce some output bytes" loop */
 753                                 i = read_bunzip(bd, outbuf, IOBUF_SIZE);
 754                                 if (i < 0) /* error? */
 755                                         break;
 756                                 i = IOBUF_SIZE - i; /* number of bytes produced */
 757                                 if (i == 0) /* EOF? */
 758                                         break;
 759                                 if (i != full_write(dst_fd, outbuf, i)) {
 760                                         bb_error_msg("short write");
 761                                         i = RETVAL_SHORT_WRITE;
 762                                         goto release_mem;
 763                                 }
 764                                 IF_DESKTOP(total_written += i;)
 765                         }
 766                 }
 767
 768                 if (i != RETVAL_LAST_BLOCK
 769                 /* Observed case when i == RETVAL_OK:
 770                  * "bzcat z.bz2", where "z.bz2" is a bzipped zero-length file
 771                  * (to be exact, z.bz2 is exactly these 14 bytes:
 772                  * 42 5a 68 39 17 72 45 38  50 90 00 00 00 00).
 773                  */
 774                  && i != RETVAL_OK
 775                 ) {
 776                         bb_error_msg("bunzip error %d", i);
 777                         break;
 778                 }
 779                 if (bd->headerCRC != bd->totalCRC) {
 780                         bb_error_msg("CRC error");
 781                         break;
 782                 }
 783
 784                 /* Successfully unpacked one BZ stream */
 785                 i = RETVAL_OK;
 786
 787                 /* Do we have "BZ..." after last processed byte?
 788                  * pbzip2 (parallelized bzip2) produces such files.
 789                  */
 790                 len = bd->inbufCount - bd->inbufPos;
 791                 memcpy(outbuf, &bd->inbuf[bd->inbufPos], len);
 792                 if (len < 2) {
 793                         if (safe_read(src_fd, outbuf + len, 2 - len) != 2 - len)
 794                                 break;
 795                         len = 2;
 796                 }
 797                 if (*(uint16_t*)outbuf != BZIP2_MAGIC) /* "BZ"? */
 798                         break;
 799                 dealloc_bunzip(bd);
 800                 len -= 2;
 801         }
 802
 803  release_mem:
 804         dealloc_bunzip(bd);
 805         free(outbuf);
 806
 807         return i ? i : IF_DESKTOP(total_written) + 0;
 808 }
 809
 810 #ifdef TESTING
 811
 812 static char *const bunzip_errors[] = {
 813         NULL, "Bad file checksum", "Not bzip data",
 814         "Unexpected input EOF", "Unexpected output EOF", "Data error",
 815         "Out of memory", "Obsolete (pre 0.9.5) bzip format not supported"
 816 };
 817
 818 /* Dumb little test thing, decompress stdin to stdout */
 819 int main(int argc, char **argv)
 820 {
 821         char c;
 822
 823         int i = unpack_bz2_stream(0, 1);
 824         if (i < 0)
 825                 fprintf(stderr, "%s\n", bunzip_errors[-i]);
 826         else if (read(STDIN_FILENO, &c, 1))
 827                 fprintf(stderr, "Trailing garbage ignored\n");
 828         return -i;
 829 }
 830 #endif