bzip2: fix two crashes on corrupted archives
authorDenys Vlasenko <vda.linux@googlemail.com>
Sun, 8 Apr 2018 18:02:01 +0000 (20:02 +0200)
committerDenys Vlasenko <vda.linux@googlemail.com>
Sun, 8 Apr 2018 18:05:04 +0000 (20:05 +0200)
As it turns out, longjmp'ing into freed stack is not healthy...

function                                             old     new   delta
unpack_usage_messages                                  -      97     +97
unpack_bz2_stream                                    369     409     +40
get_next_block                                      1667    1677     +10
get_bits                                             156     155      -1
start_bunzip                                         212     183     -29
bb_show_usage                                        181     120     -61
------------------------------------------------------------------------------
(add/remove: 1/0 grow/shrink: 2/3 up/down: 147/-91)            Total: 56 bytes

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
archival/libarchive/decompress_bunzip2.c
archival/libarchive/decompress_gunzip.c
coreutils/test.c
include/bb_archive.h
libbb/appletlib.c
miscutils/bbconfig.c
shell/ash.c
testsuite/bunzip2.tests
testsuite/bz2_issue_11.bz2 [new file with mode: 0644]
testsuite/bz2_issue_12.bz2 [new file with mode: 0644]

index bec89edd3a4df217e8d64833f0fb346835aec6b3..7ef4e035f32ff80ed159726453119678b6edc25e 100644 (file)
@@ -100,7 +100,7 @@ struct bunzip_data {
        unsigned dbufSize;
 
        /* For I/O error handling */
-       jmp_buf jmpbuf;
+       jmp_buf *jmpbuf;
 
        /* Big things go last (register-relative addressing can be larger for big offsets) */
        uint32_t crc32Table[256];
@@ -127,7 +127,7 @@ static unsigned get_bits(bunzip_data *bd, int bits_wanted)
                        /* if "no input fd" case: in_fd == -1, read fails, we jump */
                        bd->inbufCount = read(bd->in_fd, bd->inbuf, IOBUF_SIZE);
                        if (bd->inbufCount <= 0)
-                               longjmp(bd->jmpbuf, RETVAL_UNEXPECTED_INPUT_EOF);
+                               longjmp(*bd->jmpbuf, RETVAL_UNEXPECTED_INPUT_EOF);
                        bd->inbufPos = 0;
                }
 
@@ -151,12 +151,12 @@ static unsigned get_bits(bunzip_data *bd, int bits_wanted)
 
        return bits;
 }
+//#define get_bits(bd, n) (dbg("%d:get_bits()", __LINE__), get_bits(bd, n))
 
 /* Unpacks the next block and sets up for the inverse Burrows-Wheeler step. */
 static int get_next_block(bunzip_data *bd)
 {
-       struct group_data *hufGroup;
-       int groupCount, *base, *limit, selector,
+       int groupCount, selector,
                i, j, symCount, symTotal, nSelectors, byteCount[256];
        uint8_t uc, symToByte[256], mtfSymbol[256], *selectors;
        uint32_t *dbuf;
@@ -179,15 +179,19 @@ static int get_next_block(bunzip_data *bd)
        i = get_bits(bd, 24);
        j = get_bits(bd, 24);
        bd->headerCRC = get_bits(bd, 32);
-       if ((i == 0x177245) && (j == 0x385090)) return RETVAL_LAST_BLOCK;
-       if ((i != 0x314159) || (j != 0x265359)) return RETVAL_NOT_BZIP_DATA;
+       if ((i == 0x177245) && (j == 0x385090))
+               return RETVAL_LAST_BLOCK;
+       if ((i != 0x314159) || (j != 0x265359))
+               return RETVAL_NOT_BZIP_DATA;
 
        /* We can add support for blockRandomised if anybody complains.  There was
           some code for this in busybox 1.0.0-pre3, but nobody ever noticed that
           it didn't actually work. */
-       if (get_bits(bd, 1)) return RETVAL_OBSOLETE_INPUT;
+       if (get_bits(bd, 1))
+               return RETVAL_OBSOLETE_INPUT;
        origPtr = get_bits(bd, 24);
-       if (origPtr > bd->dbufSize) return RETVAL_DATA_ERROR;
+       if (origPtr > bd->dbufSize)
+               return RETVAL_DATA_ERROR;
 
        /* mapping table: if some byte values are never used (encoding things
           like ascii text), the compression code removes the gaps to have fewer
@@ -231,13 +235,21 @@ static int get_next_block(bunzip_data *bd)
                /* Get next value */
                int n = 0;
                while (get_bits(bd, 1)) {
-                       if (n >= groupCount) return RETVAL_DATA_ERROR;
+                       if (n >= groupCount)
+                               return RETVAL_DATA_ERROR;
                        n++;
                }
                /* Decode MTF to get the next selector */
                tmp_byte = mtfSymbol[n];
                while (--n >= 0)
                        mtfSymbol[n + 1] = mtfSymbol[n];
+//We catch it later, in the second loop where we use selectors[i].
+//Maybe this is a better place, though?
+//             if (tmp_byte >= groupCount) {
+//                     dbg("%d: selectors[%d]:%d groupCount:%d",
+//                                     __LINE__, i, tmp_byte, groupCount);
+//                     return RETVAL_DATA_ERROR;
+//             }
                mtfSymbol[0] = selectors[i] = tmp_byte;
        }
 
@@ -248,6 +260,8 @@ static int get_next_block(bunzip_data *bd)
                uint8_t length[MAX_SYMBOLS];
                /* 8 bits is ALMOST enough for temp[], see below */
                unsigned temp[MAX_HUFCODE_BITS+1];
+               struct group_data *hufGroup;
+               int *base, *limit;
                int minLen, maxLen, pp, len_m1;
 
                /* Read Huffman code lengths for each symbol.  They're stored in
@@ -283,8 +297,10 @@ static int get_next_block(bunzip_data *bd)
                /* Find largest and smallest lengths in this group */
                minLen = maxLen = length[0];
                for (i = 1; i < symCount; i++) {
-                       if (length[i] > maxLen) maxLen = length[i];
-                       else if (length[i] < minLen) minLen = length[i];
+                       if (length[i] > maxLen)
+                               maxLen = length[i];
+                       else if (length[i] < minLen)
+                               minLen = length[i];
                }
 
                /* Calculate permute[], base[], and limit[] tables from length[].
@@ -320,7 +336,8 @@ static int get_next_block(bunzip_data *bd)
                /* Count symbols coded for at each bit length */
                /* NB: in pathological cases, temp[8] can end ip being 256.
                 * That's why uint8_t is too small for temp[]. */
-               for (i = 0; i < symCount; i++) temp[length[i]]++;
+               for (i = 0; i < symCount; i++)
+                       temp[length[i]]++;
 
                /* Calculate limit[] (the largest symbol-coding value at each bit
                 * length, which is (previous limit<<1)+symbols at this level), and
@@ -363,12 +380,22 @@ static int get_next_block(bunzip_data *bd)
 
        runPos = dbufCount = selector = 0;
        for (;;) {
+               struct group_data *hufGroup;
+               int *base, *limit;
                int nextSym;
+               uint8_t ngrp;
 
                /* Fetch next Huffman coding group from list. */
                symCount = GROUP_SIZE - 1;
-               if (selector >= nSelectors) return RETVAL_DATA_ERROR;
-               hufGroup = bd->groups + selectors[selector++];
+               if (selector >= nSelectors)
+                       return RETVAL_DATA_ERROR;
+               ngrp = selectors[selector++];
+               if (ngrp >= groupCount) {
+                       dbg("%d selectors[%d]:%d groupCount:%d",
+                               __LINE__, selector-1, ngrp, groupCount);
+                       return RETVAL_DATA_ERROR;
+               }
+               hufGroup = bd->groups + ngrp;
                base = hufGroup->base - 1;
                limit = hufGroup->limit - 1;
 
@@ -403,7 +430,8 @@ static int get_next_block(bunzip_data *bd)
                }
                /* Figure how many bits are in next symbol and unget extras */
                i = hufGroup->minLen;
-               while (nextSym > limit[i]) ++i;
+               while (nextSym > limit[i])
+                       ++i;
                j = hufGroup->maxLen - i;
                if (j < 0)
                        return RETVAL_DATA_ERROR;
@@ -671,7 +699,10 @@ int FAST_FUNC read_bunzip(bunzip_data *bd, char *outbuf, int len)
 /* Because bunzip2 is used for help text unpacking, and because bb_show_usage()
    should work for NOFORK applets too, we must be extremely careful to not leak
    any allocations! */
-int FAST_FUNC start_bunzip(bunzip_data **bdp, int in_fd,
+int FAST_FUNC start_bunzip(
+               void *jmpbuf,
+               bunzip_data **bdp,
+               int in_fd,
                const void *inbuf, int len)
 {
        bunzip_data *bd;
@@ -683,11 +714,14 @@ int FAST_FUNC start_bunzip(bunzip_data **bdp, int in_fd,
 
        /* Figure out how much data to allocate */
        i = sizeof(bunzip_data);
-       if (in_fd != -1) i += IOBUF_SIZE;
+       if (in_fd != -1)
+               i += IOBUF_SIZE;
 
        /* Allocate bunzip_data.  Most fields initialize to zero. */
        bd = *bdp = xzalloc(i);
 
+       bd->jmpbuf = jmpbuf;
+
        /* Setup input buffer */
        bd->in_fd = in_fd;
        if (-1 == in_fd) {
@@ -702,10 +736,6 @@ int FAST_FUNC start_bunzip(bunzip_data **bdp, int in_fd,
        /* Init the CRC32 table (big endian) */
        crc32_filltable(bd->crc32Table, 1);
 
-       /* Setup for I/O error handling via longjmp */
-       i = setjmp(bd->jmpbuf);
-       if (i) return i;
-
        /* Ensure that file starts with "BZh['1'-'9']." */
        /* Update: now caller verifies 1st two bytes, makes .gz/.bz2
         * integration easier */
@@ -752,8 +782,12 @@ unpack_bz2_stream(transformer_state_t *xstate)
        outbuf = xmalloc(IOBUF_SIZE);
        len = 0;
        while (1) { /* "Process one BZ... stream" loop */
+               jmp_buf jmpbuf;
 
-               i = start_bunzip(&bd, xstate->src_fd, outbuf + 2, len);
+               /* Setup for I/O error handling via longjmp */
+               i = setjmp(jmpbuf);
+               if (i == 0)
+                       i = start_bunzip(&jmpbuf, &bd, xstate->src_fd, outbuf + 2, len);
 
                if (i == 0) {
                        while (1) { /* "Produce some output bytes" loop */
index 9a58d10d4e0dd5905cd5c9051c68088c2d803436..7f9046b82b98d881b66b9537b681dc9a9e5bc691 100644 (file)
@@ -32,7 +32,6 @@
  *
  * Licensed under GPLv2 or later, see file LICENSE in this source tree.
  */
-#include <setjmp.h>
 #include "libbb.h"
 #include "bb_archive.h"
 
index a8286525a87155a4dfcbd7cfb20cea062f711b47..824ce3b5a06c56cf0527e83696c7b69d587d12b2 100644 (file)
@@ -76,7 +76,6 @@
 //usage:       "1\n"
 
 #include "libbb.h"
-#include <setjmp.h>
 
 /* This is a NOFORK applet. Be very careful! */
 
index a5c61e95b51185278f0e84aa8850bff01fac6d55..b437f1920f1c7853a25f6d9e371870bf8e9997a4 100644 (file)
@@ -210,7 +210,7 @@ const llist_t *find_list_entry2(const llist_t *list, const char *filename) FAST_
 
 /* A bit of bunzip2 internals are exposed for compressed help support: */
 typedef struct bunzip_data bunzip_data;
-int start_bunzip(bunzip_data **bdp, int in_fd, const void *inbuf, int len) FAST_FUNC;
+int start_bunzip(void *, bunzip_data **bdp, int in_fd, const void *inbuf, int len) FAST_FUNC;
 /* NB: read_bunzip returns < 0 on error, or the number of *unfilled* bytes
  * in outbuf. IOW: on EOF returns len ("all bytes are not filled"), not 0: */
 int read_bunzip(bunzip_data *bd, char *outbuf, int len) FAST_FUNC;
index 022455da4f9cc04e818b8e36b831baadbc4a6633..769b7881ce403749bfc88165caa419eadf0dcd67 100644 (file)
@@ -102,14 +102,21 @@ static const char *unpack_usage_messages(void)
        char *outbuf = NULL;
        bunzip_data *bd;
        int i;
+       jmp_buf jmpbuf;
 
-       i = start_bunzip(&bd,
+       /* Setup for I/O error handling via longjmp */
+       i = setjmp(jmpbuf);
+       if (i == 0) {
+               i = start_bunzip(&jmpbuf,
+                       &bd,
                        /* src_fd: */ -1,
                        /* inbuf:  */ packed_usage,
-                       /* len:    */ sizeof(packed_usage));
-       /* read_bunzip can longjmp to start_bunzip, and ultimately
-        * end up here with i != 0 on read data errors! Not trivial */
-       if (!i) {
+                       /* len:    */ sizeof(packed_usage)
+               );
+       }
+       /* read_bunzip can longjmp and end up here with i != 0
+        * on read data errors! Not trivial */
+       if (i == 0) {
                /* Cannot use xmalloc: will leak bd in NOFORK case! */
                outbuf = malloc_or_warn(sizeof(UNPACKED_USAGE));
                if (outbuf)
index 9ab57876e562bb401e22ebc1c2f6823ab38c1a86..50134954848389344464e73dd1a26668093a9af2 100644 (file)
@@ -44,13 +44,22 @@ int bbconfig_main(int argc UNUSED_PARAM, char **argv UNUSED_PARAM)
 {
 #if ENABLE_FEATURE_COMPRESS_BBCONFIG
        bunzip_data *bd;
-       int i = start_bunzip(&bd,
+       int i;
+       jmp_buf jmpbuf;
+
+       /* Setup for I/O error handling via longjmp */
+       i = setjmp(jmpbuf);
+       if (i == 0) {
+               i = start_bunzip(&jmpbuf,
+                       &bd,
                        /* src_fd: */ -1,
                        /* inbuf:  */ bbconfig_config_bz2,
-                       /* len:    */ sizeof(bbconfig_config_bz2));
-       /* read_bunzip can longjmp to start_bunzip, and ultimately
-        * end up here with i != 0 on read data errors! Not trivial */
-       if (!i) {
+                       /* len:    */ sizeof(bbconfig_config_bz2)
+               );
+       }
+       /* read_bunzip can longjmp and end up here with i != 0
+        * on read data errors! Not trivial */
+       if (i == 0) {
                /* Cannot use xmalloc: will leak bd in NOFORK case! */
                char *outbuf = malloc_or_warn(sizeof(bbconfig_config));
                if (outbuf) {
index 56fba4a57382bfdc8d72104f3fa1eb51564b79c1..24958c0fceb3208ab265129aa17d7d73a4000838 100644 (file)
 
 #define JOBS ENABLE_ASH_JOB_CONTROL
 
-#include <setjmp.h>
 #include <fnmatch.h>
 #include <sys/times.h>
 #include <sys/utsname.h> /* for setting $HOSTNAME */
index fcfce1a31eea1a2ec9f7d3bb336dcdbb45f4ff13..edb3327488c2e6d18b8f8677c05a5c8c0703232a 100755 (executable)
@@ -552,6 +552,22 @@ if test "${0##*/}" = "bunzip2.tests"; then
        echo "FAIL: $unpack: pbzip_4m_zeros file"
        FAILCOUNT=$((FAILCOUNT + 1))
     fi
+
+    errout="`${bb}bunzip2 <bz2_issue_11.bz2 2>&1 >/dev/null`"
+    if test x"$errout:$?" = x"bunzip2: bunzip error -5:1"; then
+       echo "PASS: $unpack: bz2_issue_11.bz2 corrupted example"
+    else
+       echo "FAIL: $unpack: bz2_issue_11.bz2 corrupted example"
+       FAILCOUNT=$((FAILCOUNT + 1))
+    fi
+
+    errout="`${bb}bunzip2 <bz2_issue_12.bz2 2>&1 >/dev/null`"
+    if test x"$errout:$?" = x"bunzip2: bunzip error -3:1"; then
+       echo "PASS: $unpack: bz2_issue_12.bz2 corrupted example"
+    else
+       echo "FAIL: $unpack: bz2_issue_12.bz2 corrupted example"
+       FAILCOUNT=$((FAILCOUNT + 1))
+    fi
 fi
 
 exit $((FAILCOUNT <= 255 ? FAILCOUNT : 255))
diff --git a/testsuite/bz2_issue_11.bz2 b/testsuite/bz2_issue_11.bz2
new file mode 100644 (file)
index 0000000..62b2520
Binary files /dev/null and b/testsuite/bz2_issue_11.bz2 differ
diff --git a/testsuite/bz2_issue_12.bz2 b/testsuite/bz2_issue_12.bz2
new file mode 100644 (file)
index 0000000..4215f08
Binary files /dev/null and b/testsuite/bz2_issue_12.bz2 differ