From ee72302ac5e3b0b2217f616ab316d3c89e5a1f4c Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Sun, 8 Jan 2017 14:14:19 +0100 Subject: [PATCH] unzip: do not use CDF.extra_len, read local file header. Closes 9536 While at it, shorten many field and variable names. function old new delta unzip_main 2334 2376 +42 Signed-off-by: Denys Vlasenko --- archival/unzip.c | 236 ++++++++++++++++++++++-------------------- testsuite/unzip.tests | 4 +- 2 files changed, 125 insertions(+), 115 deletions(-) diff --git a/archival/unzip.c b/archival/unzip.c index 98a71c09d..921493591 100644 --- a/archival/unzip.c +++ b/archival/unzip.c @@ -62,8 +62,8 @@ enum { #if BB_BIG_ENDIAN ZIP_FILEHEADER_MAGIC = 0x504b0304, - ZIP_CDF_MAGIC = 0x504b0102, /* central directory's file header */ - ZIP_CDE_MAGIC = 0x504b0506, /* "end of central directory" record */ + ZIP_CDF_MAGIC = 0x504b0102, /* CDF item */ + ZIP_CDE_MAGIC = 0x504b0506, /* End of CDF */ ZIP_DD_MAGIC = 0x504b0708, #else ZIP_FILEHEADER_MAGIC = 0x04034b50, @@ -91,16 +91,16 @@ typedef union { /* filename follows (not NUL terminated) */ /* extra field follows */ /* data follows */ - } formatted PACKED; + } fmt PACKED; } zip_header_t; /* PACKED - gcc 4.2.1 doesn't like it (spews warning) */ -#define FIX_ENDIANNESS_ZIP(zip_header) \ +#define FIX_ENDIANNESS_ZIP(zip) \ do { if (BB_BIG_ENDIAN) { \ - (zip_header).formatted.crc32 = SWAP_LE32((zip_header).formatted.crc32 ); \ - (zip_header).formatted.cmpsize = SWAP_LE32((zip_header).formatted.cmpsize ); \ - (zip_header).formatted.ucmpsize = SWAP_LE32((zip_header).formatted.ucmpsize ); \ - (zip_header).formatted.filename_len = SWAP_LE16((zip_header).formatted.filename_len); \ - (zip_header).formatted.extra_len = SWAP_LE16((zip_header).formatted.extra_len ); \ + (zip).fmt.crc32 = SWAP_LE32((zip).fmt.crc32 ); \ + (zip).fmt.cmpsize = SWAP_LE32((zip).fmt.cmpsize ); \ + (zip).fmt.ucmpsize = SWAP_LE32((zip).fmt.ucmpsize ); \ + (zip).fmt.filename_len = SWAP_LE16((zip).fmt.filename_len); \ + (zip).fmt.extra_len = SWAP_LE16((zip).fmt.extra_len ); \ }} while (0) #define CDF_HEADER_LEN 42 @@ -118,39 +118,39 @@ typedef union { uint32_t crc32; /* 12-15 */ uint32_t cmpsize; /* 16-19 */ uint32_t ucmpsize; /* 20-23 */ - uint16_t file_name_length; /* 24-25 */ - uint16_t extra_field_length; /* 26-27 */ + uint16_t filename_len; /* 24-25 */ + uint16_t extra_len; /* 26-27 */ uint16_t file_comment_length; /* 28-29 */ uint16_t disk_number_start; /* 30-31 */ - uint16_t internal_file_attributes; /* 32-33 */ - uint32_t external_file_attributes PACKED; /* 34-37 */ + uint16_t internal_attributes; /* 32-33 */ + uint32_t external_attributes PACKED; /* 34-37 */ uint32_t relative_offset_of_local_header PACKED; /* 38-41 */ /* filename follows (not NUL terminated) */ /* extra field follows */ - /* comment follows */ - } formatted PACKED; + /* file comment follows */ + } fmt PACKED; } cdf_header_t; -#define FIX_ENDIANNESS_CDF(cdf_header) \ +#define FIX_ENDIANNESS_CDF(cdf) \ do { if (BB_BIG_ENDIAN) { \ - (cdf_header).formatted.version_made_by = SWAP_LE16((cdf_header).formatted.version_made_by); \ - (cdf_header).formatted.version_needed = SWAP_LE16((cdf_header).formatted.version_needed); \ - (cdf_header).formatted.method = SWAP_LE16((cdf_header).formatted.method ); \ - (cdf_header).formatted.modtime = SWAP_LE16((cdf_header).formatted.modtime ); \ - (cdf_header).formatted.moddate = SWAP_LE16((cdf_header).formatted.moddate ); \ - (cdf_header).formatted.crc32 = SWAP_LE32((cdf_header).formatted.crc32 ); \ - (cdf_header).formatted.cmpsize = SWAP_LE32((cdf_header).formatted.cmpsize ); \ - (cdf_header).formatted.ucmpsize = SWAP_LE32((cdf_header).formatted.ucmpsize ); \ - (cdf_header).formatted.file_name_length = SWAP_LE16((cdf_header).formatted.file_name_length); \ - (cdf_header).formatted.extra_field_length = SWAP_LE16((cdf_header).formatted.extra_field_length); \ - (cdf_header).formatted.file_comment_length = SWAP_LE16((cdf_header).formatted.file_comment_length); \ - (cdf_header).formatted.external_file_attributes = SWAP_LE32((cdf_header).formatted.external_file_attributes); \ + (cdf).fmt.version_made_by = SWAP_LE16((cdf).fmt.version_made_by); \ + (cdf).fmt.version_needed = SWAP_LE16((cdf).fmt.version_needed); \ + (cdf).fmt.method = SWAP_LE16((cdf).fmt.method ); \ + (cdf).fmt.modtime = SWAP_LE16((cdf).fmt.modtime ); \ + (cdf).fmt.moddate = SWAP_LE16((cdf).fmt.moddate ); \ + (cdf).fmt.crc32 = SWAP_LE32((cdf).fmt.crc32 ); \ + (cdf).fmt.cmpsize = SWAP_LE32((cdf).fmt.cmpsize ); \ + (cdf).fmt.ucmpsize = SWAP_LE32((cdf).fmt.ucmpsize ); \ + (cdf).fmt.filename_len = SWAP_LE16((cdf).fmt.filename_len); \ + (cdf).fmt.extra_len = SWAP_LE16((cdf).fmt.extra_len ); \ + (cdf).fmt.file_comment_length = SWAP_LE16((cdf).fmt.file_comment_length); \ + (cdf).fmt.external_attributes = SWAP_LE32((cdf).fmt.external_attributes); \ }} while (0) -#define CDE_HEADER_LEN 16 +#define CDE_LEN 16 typedef union { - uint8_t raw[CDE_HEADER_LEN]; + uint8_t raw[CDE_LEN]; struct { /* uint32_t signature; 50 4b 05 06 */ uint16_t this_disk_no; @@ -159,14 +159,14 @@ typedef union { uint16_t cdf_entries_total; uint32_t cdf_size; uint32_t cdf_offset; - /* uint16_t file_comment_length; */ - /* .ZIP file comment (variable size) */ - } formatted PACKED; -} cde_header_t; + /* uint16_t archive_comment_length; */ + /* archive comment follows */ + } fmt PACKED; +} cde_t; -#define FIX_ENDIANNESS_CDE(cde_header) \ +#define FIX_ENDIANNESS_CDE(cde) \ do { if (BB_BIG_ENDIAN) { \ - (cde_header).formatted.cdf_offset = SWAP_LE32((cde_header).formatted.cdf_offset); \ + (cde).fmt.cdf_offset = SWAP_LE32((cde).fmt.cdf_offset); \ }} while (0) struct BUG { @@ -175,13 +175,13 @@ struct BUG { * even though the elements are all in the right place. */ char BUG_zip_header_must_be_26_bytes[ - offsetof(zip_header_t, formatted.extra_len) + 2 + offsetof(zip_header_t, fmt.extra_len) + 2 == ZIP_HEADER_LEN ? 1 : -1]; char BUG_cdf_header_must_be_42_bytes[ - offsetof(cdf_header_t, formatted.relative_offset_of_local_header) + 4 + offsetof(cdf_header_t, fmt.relative_offset_of_local_header) + 4 == CDF_HEADER_LEN ? 1 : -1]; - char BUG_cde_header_must_be_16_bytes[ - sizeof(cde_header_t) == CDE_HEADER_LEN ? 1 : -1]; + char BUG_cde_must_be_16_bytes[ + sizeof(cde_t) == CDE_LEN ? 1 : -1]; }; @@ -207,7 +207,7 @@ enum { zip_fd = 3 }; /* NB: does not preserve file position! */ static uint32_t find_cdf_offset(void) { - cde_header_t cde_header; + cde_t cde; unsigned char *buf; unsigned char *p; off_t end; @@ -228,7 +228,7 @@ static uint32_t find_cdf_offset(void) found = BAD_CDF_OFFSET; p = buf; - while (p <= buf + PEEK_FROM_END - CDE_HEADER_LEN - 4) { + while (p <= buf + PEEK_FROM_END - CDE_LEN - 4) { if (*p != 'P') { p++; continue; @@ -240,19 +240,19 @@ static uint32_t find_cdf_offset(void) if (*++p != 6) continue; /* we found CDE! */ - memcpy(cde_header.raw, p + 1, CDE_HEADER_LEN); - FIX_ENDIANNESS_CDE(cde_header); + memcpy(cde.raw, p + 1, CDE_LEN); + FIX_ENDIANNESS_CDE(cde); /* * I've seen .ZIP files with seemingly valid CDEs * where cdf_offset points past EOF - ?? * This check ignores such CDEs: */ - if (cde_header.formatted.cdf_offset < end + (p - buf)) { - found = cde_header.formatted.cdf_offset; + if (cde.fmt.cdf_offset < end + (p - buf)) { + found = cde.fmt.cdf_offset; dbg("Possible cdf_offset:0x%x at 0x%"OFF_FMT"x", (unsigned)found, end + (p-3 - buf)); dbg(" cdf_offset+cdf_size:0x%x", - (unsigned)(found + SWAP_LE32(cde_header.formatted.cdf_size))); + (unsigned)(found + SWAP_LE32(cde.fmt.cdf_size))); /* * We do not "break" here because only the last CDE is valid. * I've seen a .zip archive which contained a .zip file, @@ -266,7 +266,7 @@ static uint32_t find_cdf_offset(void) return found; }; -static uint32_t read_next_cdf(uint32_t cdf_offset, cdf_header_t *cdf_ptr) +static uint32_t read_next_cdf(uint32_t cdf_offset, cdf_header_t *cdf) { uint32_t magic; @@ -276,23 +276,25 @@ static uint32_t read_next_cdf(uint32_t cdf_offset, cdf_header_t *cdf_ptr) dbg("Reading CDF at 0x%x", (unsigned)cdf_offset); xlseek(zip_fd, cdf_offset, SEEK_SET); xread(zip_fd, &magic, 4); - /* Central Directory End? */ + /* Central Directory End? Assume CDF has ended. + * (more correct method is to use cde.cdf_entries_total counter) + */ if (magic == ZIP_CDE_MAGIC) { dbg("got ZIP_CDE_MAGIC"); return 0; /* EOF */ } - xread(zip_fd, cdf_ptr->raw, CDF_HEADER_LEN); + xread(zip_fd, cdf->raw, CDF_HEADER_LEN); - FIX_ENDIANNESS_CDF(*cdf_ptr); - dbg(" file_name_length:%u extra_field_length:%u file_comment_length:%u", - (unsigned)cdf_ptr->formatted.file_name_length, - (unsigned)cdf_ptr->formatted.extra_field_length, - (unsigned)cdf_ptr->formatted.file_comment_length + FIX_ENDIANNESS_CDF(*cdf); + dbg(" filename_len:%u extra_len:%u file_comment_length:%u", + (unsigned)cdf->fmt.filename_len, + (unsigned)cdf->fmt.extra_len, + (unsigned)cdf->fmt.file_comment_length ); cdf_offset += 4 + CDF_HEADER_LEN - + cdf_ptr->formatted.file_name_length - + cdf_ptr->formatted.extra_field_length - + cdf_ptr->formatted.file_comment_length; + + cdf->fmt.filename_len + + cdf->fmt.extra_len + + cdf->fmt.file_comment_length; return cdf_offset; }; @@ -315,28 +317,28 @@ static void unzip_create_leading_dirs(const char *fn) free(name); } -static void unzip_extract(zip_header_t *zip_header, int dst_fd) +static void unzip_extract(zip_header_t *zip, int dst_fd) { - if (zip_header->formatted.method == 0) { + if (zip->fmt.method == 0) { /* Method 0 - stored (not compressed) */ - off_t size = zip_header->formatted.ucmpsize; + off_t size = zip->fmt.ucmpsize; if (size) bb_copyfd_exact_size(zip_fd, dst_fd, size); } else { /* Method 8 - inflate */ transformer_state_t xstate; init_transformer_state(&xstate); - xstate.bytes_in = zip_header->formatted.cmpsize; + xstate.bytes_in = zip->fmt.cmpsize; xstate.src_fd = zip_fd; xstate.dst_fd = dst_fd; if (inflate_unzip(&xstate) < 0) bb_error_msg_and_die("inflate error"); /* Validate decompression - crc */ - if (zip_header->formatted.crc32 != (xstate.crc32 ^ 0xffffffffL)) { + if (zip->fmt.crc32 != (xstate.crc32 ^ 0xffffffffL)) { bb_error_msg_and_die("crc error"); } /* Validate decompression - size */ - if (zip_header->formatted.ucmpsize != xstate.bytes_out) { + if (zip->fmt.ucmpsize != xstate.bytes_out) { /* Don't die. Who knows, maybe len calculation * was botched somewhere. After all, crc matched! */ bb_error_msg("bad length"); @@ -563,7 +565,7 @@ int unzip_main(int argc, char **argv) total_entries = 0; cdf_offset = find_cdf_offset(); /* try to seek to the end, find CDE and CDF start */ while (1) { - zip_header_t zip_header; + zip_header_t zip; mode_t dir_mode = 0777; #if ENABLE_FEATURE_UNZIP_CDF mode_t file_mode = 0666; @@ -589,7 +591,7 @@ int unzip_main(int argc, char **argv) /* Check magic number */ xread(zip_fd, &magic, 4); - /* Central directory? It's at the end, so exit */ + /* CDF item? Assume there are no more files, exit */ if (magic == ZIP_CDF_MAGIC) { dbg("got ZIP_CDF_MAGIC"); break; @@ -605,71 +607,74 @@ int unzip_main(int argc, char **argv) bb_error_msg_and_die("invalid zip magic %08X", (int)magic); dbg("got ZIP_FILEHEADER_MAGIC"); - xread(zip_fd, zip_header.raw, ZIP_HEADER_LEN); - FIX_ENDIANNESS_ZIP(zip_header); - if ((zip_header.formatted.method != 0) - && (zip_header.formatted.method != 8) + xread(zip_fd, zip.raw, ZIP_HEADER_LEN); + FIX_ENDIANNESS_ZIP(zip); + if ((zip.fmt.method != 0) + && (zip.fmt.method != 8) ) { /* TODO? method 12: bzip2, method 14: LZMA */ - bb_error_msg_and_die("unsupported method %d", zip_header.formatted.method); + bb_error_msg_and_die("unsupported method %d", zip.fmt.method); } - if (zip_header.formatted.zip_flags & SWAP_LE16(0x0009)) { + if (zip.fmt.zip_flags & SWAP_LE16(0x0009)) { bb_error_msg_and_die("zip flags 1 and 8 are not supported"); } } #if ENABLE_FEATURE_UNZIP_CDF else { /* cdf_offset is valid (and we know the file is seekable) */ - cdf_header_t cdf_header; - cdf_offset = read_next_cdf(cdf_offset, &cdf_header); + cdf_header_t cdf; + cdf_offset = read_next_cdf(cdf_offset, &cdf); if (cdf_offset == 0) /* EOF? */ break; -# if 0 +# if 1 xlseek(zip_fd, - SWAP_LE32(cdf_header.formatted.relative_offset_of_local_header) + 4, + SWAP_LE32(cdf.fmt.relative_offset_of_local_header) + 4, SEEK_SET); - xread(zip_fd, zip_header.raw, ZIP_HEADER_LEN); - FIX_ENDIANNESS_ZIP(zip_header); - if (zip_header.formatted.zip_flags & SWAP_LE16(0x0008)) { + xread(zip_fd, zip.raw, ZIP_HEADER_LEN); + FIX_ENDIANNESS_ZIP(zip); + if (zip.fmt.zip_flags & SWAP_LE16(0x0008)) { /* 0x0008 - streaming. [u]cmpsize can be reliably gotten * only from Central Directory. */ - zip_header.formatted.crc32 = cdf_header.formatted.crc32; - zip_header.formatted.cmpsize = cdf_header.formatted.cmpsize; - zip_header.formatted.ucmpsize = cdf_header.formatted.ucmpsize; + zip.fmt.crc32 = cdf.fmt.crc32; + zip.fmt.cmpsize = cdf.fmt.cmpsize; + zip.fmt.ucmpsize = cdf.fmt.ucmpsize; } # else - /* CDF has the same data as local header, no need to read the latter */ - memcpy(&zip_header.formatted.version, - &cdf_header.formatted.version_needed, ZIP_HEADER_LEN); + /* CDF has the same data as local header, no need to read the latter... + * ...not really. An archive was seen with cdf.extra_len == 6 but + * zip.extra_len == 0. + */ + memcpy(&zip.fmt.version, + &cdf.fmt.version_needed, ZIP_HEADER_LEN); xlseek(zip_fd, - SWAP_LE32(cdf_header.formatted.relative_offset_of_local_header) + 4 + ZIP_HEADER_LEN, + SWAP_LE32(cdf.fmt.relative_offset_of_local_header) + 4 + ZIP_HEADER_LEN, SEEK_SET); # endif - if ((cdf_header.formatted.version_made_by >> 8) == 3) { + if ((cdf.fmt.version_made_by >> 8) == 3) { /* This archive is created on Unix */ - dir_mode = file_mode = (cdf_header.formatted.external_file_attributes >> 16); + dir_mode = file_mode = (cdf.fmt.external_attributes >> 16); } } #endif - if (zip_header.formatted.zip_flags & SWAP_LE16(0x0001)) { + if (zip.fmt.zip_flags & SWAP_LE16(0x0001)) { /* 0x0001 - encrypted */ bb_error_msg_and_die("zip flag 1 (encryption) is not supported"); } dbg("File cmpsize:0x%x extra_len:0x%x ucmpsize:0x%x", - (unsigned)zip_header.formatted.cmpsize, - (unsigned)zip_header.formatted.extra_len, - (unsigned)zip_header.formatted.ucmpsize + (unsigned)zip.fmt.cmpsize, + (unsigned)zip.fmt.extra_len, + (unsigned)zip.fmt.ucmpsize ); /* Read filename */ free(dst_fn); - dst_fn = xzalloc(zip_header.formatted.filename_len + 1); - xread(zip_fd, dst_fn, zip_header.formatted.filename_len); + dst_fn = xzalloc(zip.fmt.filename_len + 1); + xread(zip_fd, dst_fn, zip.fmt.filename_len); /* Skip extra header bytes */ - unzip_skip(zip_header.formatted.extra_len); + unzip_skip(zip.fmt.extra_len); /* Guard against "/abspath", "/../" and similar attacks */ overlapping_strcpy(dst_fn, strip_unsafe_prefix(dst_fn)); @@ -684,32 +689,32 @@ int unzip_main(int argc, char **argv) /* List entry */ char dtbuf[sizeof("mm-dd-yyyy hh:mm")]; sprintf(dtbuf, "%02u-%02u-%04u %02u:%02u", - (zip_header.formatted.moddate >> 5) & 0xf, // mm: 0x01e0 - (zip_header.formatted.moddate) & 0x1f, // dd: 0x001f - (zip_header.formatted.moddate >> 9) + 1980, // yy: 0xfe00 - (zip_header.formatted.modtime >> 11), // hh: 0xf800 - (zip_header.formatted.modtime >> 5) & 0x3f // mm: 0x07e0 - // seconds/2 are not shown, encoded in ----------- 0x001f + (zip.fmt.moddate >> 5) & 0xf, // mm: 0x01e0 + (zip.fmt.moddate) & 0x1f, // dd: 0x001f + (zip.fmt.moddate >> 9) + 1980, // yy: 0xfe00 + (zip.fmt.modtime >> 11), // hh: 0xf800 + (zip.fmt.modtime >> 5) & 0x3f // mm: 0x07e0 + // seconds/2 not shown, encoded in -- 0x001f ); if (!verbose) { // " Length Date Time Name\n" // "--------- ---------- ----- ----" printf( "%9u " "%s " "%s\n", - (unsigned)zip_header.formatted.ucmpsize, + (unsigned)zip.fmt.ucmpsize, dtbuf, dst_fn); } else { - unsigned long percents = zip_header.formatted.ucmpsize - zip_header.formatted.cmpsize; + unsigned long percents = zip.fmt.ucmpsize - zip.fmt.cmpsize; if ((int32_t)percents < 0) percents = 0; /* happens if ucmpsize < cmpsize */ percents = percents * 100; - if (zip_header.formatted.ucmpsize) - percents /= zip_header.formatted.ucmpsize; + if (zip.fmt.ucmpsize) + percents /= zip.fmt.ucmpsize; // " Length Method Size Cmpr Date Time CRC-32 Name\n" // "-------- ------ ------- ---- ---------- ----- -------- ----" printf( "%8u %s" "%9u%4u%% " "%s " "%08x " "%s\n", - (unsigned)zip_header.formatted.ucmpsize, - zip_header.formatted.method == 0 ? "Stored" : "Defl:N", /* Defl is method 8 */ + (unsigned)zip.fmt.ucmpsize, + zip.fmt.method == 0 ? "Stored" : "Defl:N", /* Defl is method 8 */ /* TODO: show other methods? * 1 - Shrunk * 2 - Reduced with compression factor 1 @@ -722,15 +727,16 @@ int unzip_main(int argc, char **argv) * 10 - PKWARE Data Compression Library Imploding * 11 - Reserved by PKWARE * 12 - BZIP2 + * 14 - LZMA */ - (unsigned)zip_header.formatted.cmpsize, + (unsigned)zip.fmt.cmpsize, (unsigned)percents, dtbuf, - zip_header.formatted.crc32, + zip.fmt.crc32, dst_fn); - total_size += zip_header.formatted.cmpsize; + total_size += zip.fmt.cmpsize; } - total_usize += zip_header.formatted.ucmpsize; + total_usize += zip.fmt.ucmpsize; i = 'n'; } else if (dst_fd == STDOUT_FILENO) { /* Extracting to STDOUT */ @@ -798,9 +804,11 @@ int unzip_main(int argc, char **argv) #endif case -1: /* Unzip */ if (!quiet) { - printf(" inflating: %s\n", dst_fn); + printf(/* zip.fmt.method == 0 + ? " extracting: %s\n" + : */ " inflating: %s\n", dst_fn); } - unzip_extract(&zip_header, dst_fd); + unzip_extract(&zip, dst_fd); if (dst_fd != STDOUT_FILENO) { /* closing STDOUT is potentially bad for future business */ close(dst_fd); @@ -811,7 +819,7 @@ int unzip_main(int argc, char **argv) overwrite = O_NEVER; case 'n': /* Skip entry data */ - unzip_skip(zip_header.formatted.cmpsize); + unzip_skip(zip.fmt.cmpsize); break; case 'r': diff --git a/testsuite/unzip.tests b/testsuite/unzip.tests index d9c45242c..2e4becdb8 100755 --- a/testsuite/unzip.tests +++ b/testsuite/unzip.tests @@ -34,7 +34,9 @@ rm foo.zip optional FEATURE_UNZIP_CDF testing "unzip (bad archive)" "uudecode; unzip bad.zip 2>&1; echo \$?" \ "Archive: bad.zip -unzip: short read + inflating: ]3j½r«IK-%Ix +unzip: corrupted data +unzip: inflate error 1 " \ "" "\ -- 2.25.1