pointless whitespace/comment fixes, no code changes
[oweals/busybox.git] / archival / unzip.c
index 8462822f1a1529fc1f5277ba6ea9e79fc60f344c..7b47a8ab8cbfa8f358cc58476864c8002e404b5c 100644 (file)
  */
 
 /* TODO
- * Endian issues
  * Zip64 + other methods
- * Improve handling of zip format, ie.
- * - deferred CRC, comp. & uncomp. lengths (zip header flags bit 3)
- * - unix file permissions, etc.
- * - central directory
  */
 
 #include "libbb.h"
@@ -31,12 +26,12 @@ enum {
 #if BB_BIG_ENDIAN
        ZIP_FILEHEADER_MAGIC = 0x504b0304,
        ZIP_CDS_MAGIC        = 0x504b0102,
-       ZIP_CDS_END_MAGIC    = 0x504b0506,
+       ZIP_CDE_MAGIC        = 0x504b0506,
        ZIP_DD_MAGIC         = 0x504b0708,
 #else
        ZIP_FILEHEADER_MAGIC = 0x04034b50,
        ZIP_CDS_MAGIC        = 0x02014b50,
-       ZIP_CDS_END_MAGIC    = 0x06054b50,
+       ZIP_CDE_MAGIC        = 0x06054b50,
        ZIP_DD_MAGIC         = 0x08074b50,
 #endif
 };
@@ -46,18 +41,18 @@ enum {
 typedef union {
        uint8_t raw[ZIP_HEADER_LEN];
        struct {
-               uint16_t version;                       /* 0-1 */
-               uint16_t flags;                         /* 2-3 */
-               uint16_t method;                        /* 4-5 */
-               uint16_t modtime;                       /* 6-7 */
-               uint16_t moddate;                       /* 8-9 */
-               uint32_t crc32 ATTRIBUTE_PACKED;        /* 10-13 */
-               uint32_t cmpsize ATTRIBUTE_PACKED;      /* 14-17 */
-               uint32_t ucmpsize ATTRIBUTE_PACKED;     /* 18-21 */
-               uint16_t filename_len;                  /* 22-23 */
-               uint16_t extra_len;                     /* 24-25 */
-       } formatted ATTRIBUTE_PACKED;
-} zip_header_t;
+               uint16_t version;               /* 0-1 */
+               uint16_t flags;                 /* 2-3 */
+               uint16_t method;                /* 4-5 */
+               uint16_t modtime;               /* 6-7 */
+               uint16_t moddate;               /* 8-9 */
+               uint32_t crc32 PACKED;          /* 10-13 */
+               uint32_t cmpsize PACKED;        /* 14-17 */
+               uint32_t ucmpsize PACKED;       /* 18-21 */
+               uint16_t filename_len;          /* 22-23 */
+               uint16_t extra_len;             /* 24-25 */
+       } formatted PACKED;
+} zip_header_t; /* PACKED - gcc 4.2.1 doesn't like it (spews warning) */
 
 /* Check the offset of the last element, not the length.  This leniency
  * allows for poor packing, whereby the overall struct may be too long,
@@ -65,11 +60,11 @@ typedef union {
  */
 struct BUG_zip_header_must_be_26_bytes {
        char BUG_zip_header_must_be_26_bytes[
-               offsetof(zip_header_t, formatted.extra_len) + 2 ==
-                       ZIP_HEADER_LEN ? 1 : -1];
+               offsetof(zip_header_t, formatted.extra_len) + 2
+                       == ZIP_HEADER_LEN ? 1 : -1];
 };
 
-#define FIX_ENDIANNESS(zip_header) do { \
+#define FIX_ENDIANNESS_ZIP(zip_header) do { \
        (zip_header).formatted.version      = SWAP_LE16((zip_header).formatted.version     ); \
        (zip_header).formatted.flags        = SWAP_LE16((zip_header).formatted.flags       ); \
        (zip_header).formatted.method       = SWAP_LE16((zip_header).formatted.method      ); \
@@ -82,9 +77,138 @@ struct BUG_zip_header_must_be_26_bytes {
        (zip_header).formatted.extra_len    = SWAP_LE16((zip_header).formatted.extra_len   ); \
 } while (0)
 
-static void unzip_skip(int fd, off_t skip)
+#define CDS_HEADER_LEN 42
+
+typedef union {
+       uint8_t raw[CDS_HEADER_LEN];
+       struct {
+               /* uint32_t signature; 50 4b 01 02 */
+               uint16_t version_made_by;       /* 0-1 */
+               uint16_t version_needed;        /* 2-3 */
+               uint16_t cds_flags;             /* 4-5 */
+               uint16_t method;                /* 6-7 */
+               uint16_t mtime;                 /* 8-9 */
+               uint16_t mdate;                 /* 10-11 */
+               uint32_t crc32;                 /* 12-15 */
+               uint32_t cmpsize;               /* 16-19 */
+               uint32_t ucmpsize;              /* 20-23 */
+               uint16_t file_name_length;      /* 24-25 */
+               uint16_t extra_field_length;    /* 26-27 */
+               uint16_t file_comment_length;   /* 28-29 */
+               uint16_t disk_number_start;     /* 30-31 */
+               uint16_t internal_file_attributes; /* 32-33 */
+               uint32_t external_file_attributes PACKED; /* 34-37 */
+               uint32_t relative_offset_of_local_header PACKED; /* 38-41 */
+       } formatted PACKED;
+} cds_header_t;
+
+struct BUG_cds_header_must_be_42_bytes {
+       char BUG_cds_header_must_be_42_bytes[
+               offsetof(cds_header_t, formatted.relative_offset_of_local_header) + 4
+                       == CDS_HEADER_LEN ? 1 : -1];
+};
+
+#define FIX_ENDIANNESS_CDS(cds_header) do { \
+       (cds_header).formatted.crc32        = SWAP_LE32((cds_header).formatted.crc32       ); \
+       (cds_header).formatted.cmpsize      = SWAP_LE32((cds_header).formatted.cmpsize     ); \
+       (cds_header).formatted.ucmpsize     = SWAP_LE32((cds_header).formatted.ucmpsize    ); \
+       (cds_header).formatted.file_name_length = SWAP_LE16((cds_header).formatted.file_name_length); \
+       (cds_header).formatted.extra_field_length = SWAP_LE16((cds_header).formatted.extra_field_length); \
+       (cds_header).formatted.file_comment_length = SWAP_LE16((cds_header).formatted.file_comment_length); \
+} while (0)
+
+#define CDE_HEADER_LEN 16
+
+typedef union {
+       uint8_t raw[CDE_HEADER_LEN];
+       struct {
+               /* uint32_t signature; 50 4b 05 06 */
+               uint16_t this_disk_no;
+               uint16_t disk_with_cds_no;
+               uint16_t cds_entries_on_this_disk;
+               uint16_t cds_entries_total;
+               uint32_t cds_size;
+               uint32_t cds_offset;
+               /* uint16_t file_comment_length; */
+               /* .ZIP file comment (variable size) */
+       } formatted PACKED;
+} cde_header_t;
+
+struct BUG_cde_header_must_be_16_bytes {
+       char BUG_cde_header_must_be_16_bytes[
+               sizeof(cde_header_t) == CDE_HEADER_LEN ? 1 : -1];
+};
+
+#define FIX_ENDIANNESS_CDE(cde_header) do { \
+       (cde_header).formatted.cds_offset = SWAP_LE32((cde_header).formatted.cds_offset); \
+} while (0)
+
+enum { zip_fd = 3 };
+
+
+#if ENABLE_DESKTOP
+/* NB: does not preserve file position! */
+static uint32_t find_cds_offset(void)
+{
+       unsigned char buf[1024];
+       cde_header_t cde_header;
+       unsigned char *p;
+       off_t end;
+
+       end = xlseek(zip_fd, 0, SEEK_END);
+       if (end < 1024)
+               end = 1024;
+       end -= 1024;
+       xlseek(zip_fd, end, SEEK_SET);
+       full_read(zip_fd, buf, 1024);
+
+       p = buf;
+       while (p <= buf + 1024 - CDE_HEADER_LEN - 4) {
+               if (*p != 'P') {
+                       p++;
+                       continue;
+               }
+               if (*++p != 'K')
+                       continue;
+               if (*++p != 5)
+                       continue;
+               if (*++p != 6)
+                       continue;
+               /* we found CDE! */
+               memcpy(cde_header.raw, p + 1, CDE_HEADER_LEN);
+               FIX_ENDIANNESS_CDE(cde_header);
+               return cde_header.formatted.cds_offset;
+       }
+       bb_error_msg_and_die("can't find file table");
+};
+
+static uint32_t read_next_cds(int count_m1, uint32_t cds_offset, cds_header_t *cds_ptr)
+{
+       off_t org;
+
+       org = xlseek(zip_fd, 0, SEEK_CUR);
+
+       if (!cds_offset)
+               cds_offset = find_cds_offset();
+
+       while (count_m1-- >= 0) {
+               xlseek(zip_fd, cds_offset + 4, SEEK_SET);
+               xread(zip_fd, cds_ptr->raw, CDS_HEADER_LEN);
+               FIX_ENDIANNESS_CDS(*cds_ptr);
+               cds_offset += 4 + CDS_HEADER_LEN
+                       + cds_ptr->formatted.file_name_length
+                       + cds_ptr->formatted.extra_field_length
+                       + cds_ptr->formatted.file_comment_length;
+       }
+
+       xlseek(zip_fd, org, SEEK_SET);
+       return cds_offset;
+};
+#endif
+
+static void unzip_skip(off_t skip)
 {
-       bb_copyfd_exact_size(fd, -1, skip);
+       bb_copyfd_exact_size(zip_fd, -1, skip);
 }
 
 static void unzip_create_leading_dirs(const char *fn)
@@ -97,17 +221,17 @@ static void unzip_create_leading_dirs(const char *fn)
        free(name);
 }
 
-static void unzip_extract(zip_header_t *zip_header, int src_fd, int dst_fd)
+static void unzip_extract(zip_header_t *zip_header, int dst_fd)
 {
        if (zip_header->formatted.method == 0) {
                /* Method 0 - stored (not compressed) */
                off_t size = zip_header->formatted.ucmpsize;
                if (size)
-                       bb_copyfd_exact_size(src_fd, dst_fd, size);
+                       bb_copyfd_exact_size(zip_fd, dst_fd, size);
        } else {
                /* Method 8 - inflate */
                inflate_unzip_result res;
-               if (inflate_unzip(&res, zip_header->formatted.cmpsize, src_fd, dst_fd) < 0)
+               if (inflate_unzip(&res, zip_header->formatted.cmpsize, zip_fd, dst_fd) < 0)
                        bb_error_msg_and_die("inflate error");
                /* Validate decompression - crc */
                if (zip_header->formatted.crc32 != (res.crc ^ 0xffffffffL)) {
@@ -131,9 +255,12 @@ int unzip_main(int argc, char **argv)
        smallint verbose = 1;
        smallint listing = 0;
        smallint overwrite = O_PROMPT;
+#if ENABLE_DESKTOP
+       uint32_t cds_offset;
+       unsigned cds_entries;
+#endif
        unsigned total_size;
        unsigned total_entries;
-       int src_fd = -1;
        int dst_fd = -1;
        char *src_fn = NULL;
        char *dst_fn = NULL;
@@ -221,13 +348,14 @@ int unzip_main(int argc, char **argv)
 
        /* Open input file */
        if (LONE_DASH(src_fn)) {
-               src_fd = STDIN_FILENO;
+               xdup2(STDIN_FILENO, zip_fd);
                /* Cannot use prompt mode since zip data is arriving on STDIN */
                if (overwrite == O_PROMPT)
                        overwrite = O_NEVER;
        } else {
                static const char extn[][5] = {"", ".zip", ".ZIP"};
                int orig_src_fn_len = strlen(src_fn);
+               int src_fd = -1;
 
                for (i = 0; (i < 3) && (src_fd == -1); i++) {
                        strcpy(src_fn + orig_src_fn_len, extn[i]);
@@ -237,6 +365,7 @@ int unzip_main(int argc, char **argv)
                        src_fn[orig_src_fn_len] = '\0';
                        bb_error_msg_and_die("can't open %s, %s.zip, %s.ZIP", src_fn, src_fn, src_fn);
                }
+               xmove_fd(src_fd, zip_fd);
        }
 
        /* Change dir if necessary */
@@ -253,30 +382,63 @@ int unzip_main(int argc, char **argv)
 
        total_size = 0;
        total_entries = 0;
+#if ENABLE_DESKTOP
+       cds_entries = 0;
+       cds_offset = 0;
+#endif
        while (1) {
                uint32_t magic;
 
                /* Check magic number */
-               xread(src_fd, &magic, 4);
+               xread(zip_fd, &magic, 4);
+               /* Central directory? It's at the end, so exit */
                if (magic == ZIP_CDS_MAGIC)
                        break;
+#if ENABLE_DESKTOP
+               /* Data descriptor? It was a streaming file, go on */
+               if (magic == ZIP_DD_MAGIC) {
+                       /* skip over duplicate crc32, cmpsize and ucmpsize */
+                       unzip_skip(3 * 4);
+                       continue;
+               }
+#endif
                if (magic != ZIP_FILEHEADER_MAGIC)
-                       bb_error_msg_and_die("invalid zip magic %08X", magic);
+                       bb_error_msg_and_die("invalid zip magic %08X", (int)magic);
 
                /* Read the file header */
-               xread(src_fd, zip_header.raw, ZIP_HEADER_LEN);
-               FIX_ENDIANNESS(zip_header);
+               xread(zip_fd, zip_header.raw, ZIP_HEADER_LEN);
+               FIX_ENDIANNESS_ZIP(zip_header);
                if ((zip_header.formatted.method != 0) && (zip_header.formatted.method != 8)) {
                        bb_error_msg_and_die("unsupported method %d", zip_header.formatted.method);
                }
+#if !ENABLE_DESKTOP
+               if (zip_header.formatted.flags & 0x0009) {
+                       bb_error_msg_and_die("zip flags 1 and 8 are not supported");
+               }
+#else
+               if (zip_header.formatted.flags & 0x0001) {
+                       /* 0x0001 - encrypted */
+                       bb_error_msg_and_die("zip flag 1 (encryption) is not supported");
+               }
+               if (zip_header.formatted.flags & 0x0008) {
+                       cds_header_t cds_header;
+                       /* 0x0008 - streaming. [u]cmpsize can be reliably gotten
+                        * only from Central Directory. See unzip_doc.txt */
+                       cds_offset = read_next_cds(total_entries - cds_entries, cds_offset, &cds_header);
+                       cds_entries = total_entries + 1;
+                       zip_header.formatted.crc32    = cds_header.formatted.crc32;
+                       zip_header.formatted.cmpsize  = cds_header.formatted.cmpsize;
+                       zip_header.formatted.ucmpsize = cds_header.formatted.ucmpsize;
+               }
+#endif
 
                /* Read filename */
                free(dst_fn);
                dst_fn = xzalloc(zip_header.formatted.filename_len + 1);
-               xread(src_fd, dst_fn, zip_header.formatted.filename_len);
+               xread(zip_fd, dst_fn, zip_header.formatted.filename_len);
 
                /* Skip extra header bytes */
-               unzip_skip(src_fd, zip_header.formatted.extra_len);
+               unzip_skip(zip_header.formatted.extra_len);
 
                /* Filter zip entries */
                if (find_list_entry(zreject, dst_fn)
@@ -297,7 +459,6 @@ int unzip_main(int argc, char **argv)
                                           (dostime & 0x000007e0) >> 5,
                                           dst_fn);
                                        total_size += zip_header.formatted.ucmpsize;
-                                       total_entries++;
                                } else {
                                        /* short listing -- filenames only */
                                        puts(dst_fn);
@@ -308,7 +469,7 @@ int unzip_main(int argc, char **argv)
                        } else if (last_char_is(dst_fn, '/')) { /* Extract directory */
                                if (stat(dst_fn, &stat_buf) == -1) {
                                        if (errno != ENOENT) {
-                                               bb_perror_msg_and_die("cannot stat '%s'",dst_fn);
+                                               bb_perror_msg_and_die("can't stat '%s'", dst_fn);
                                        }
                                        if (verbose) {
                                                printf("   creating: %s\n", dst_fn);
@@ -325,10 +486,10 @@ int unzip_main(int argc, char **argv)
                                i = 'n';
 
                        } else {  /* Extract file */
_check_file:
+ check_file:
                                if (stat(dst_fn, &stat_buf) == -1) { /* File does not exist */
                                        if (errno != ENOENT) {
-                                               bb_perror_msg_and_die("cannot stat '%s'",dst_fn);
+                                               bb_perror_msg_and_die("can't stat '%s'", dst_fn);
                                        }
                                        i = 'y';
                                } else { /* File already exists */
@@ -340,12 +501,12 @@ int unzip_main(int argc, char **argv)
                                                } else {
                                                        printf("replace %s? [y]es, [n]o, [A]ll, [N]one, [r]ename: ", dst_fn);
                                                        if (!fgets(key_buf, sizeof(key_buf), stdin)) {
-                                                               bb_perror_msg_and_die("cannot read input");
+                                                               bb_perror_msg_and_die("can't read input");
                                                        }
                                                        i = key_buf[0];
                                                }
                                        } else { /* File is not regular file */
-                                               bb_error_msg_and_die("'%s' exists but is not regular file",dst_fn);
+                                               bb_error_msg_and_die("'%s' exists but is not regular file", dst_fn);
                                        }
                                }
                        }
@@ -361,7 +522,7 @@ int unzip_main(int argc, char **argv)
                        if (verbose) {
                                printf("  inflating: %s\n", dst_fn);
                        }
-                       unzip_extract(&zip_header, src_fd, dst_fd);
+                       unzip_extract(&zip_header, dst_fd);
                        if (dst_fd != STDOUT_FILENO) {
                                /* closing STDOUT is potentially bad for future business */
                                close(dst_fd);
@@ -372,30 +533,26 @@ int unzip_main(int argc, char **argv)
                        overwrite = O_NEVER;
                case 'n':
                        /* Skip entry data */
-                       unzip_skip(src_fd, zip_header.formatted.cmpsize);
+                       unzip_skip(zip_header.formatted.cmpsize);
                        break;
 
                case 'r':
                        /* Prompt for new name */
                        printf("new name: ");
                        if (!fgets(key_buf, sizeof(key_buf), stdin)) {
-                               bb_perror_msg_and_die("cannot read input");
+                               bb_perror_msg_and_die("can't read input");
                        }
                        free(dst_fn);
                        dst_fn = xstrdup(key_buf);
                        chomp(dst_fn);
-                       goto _check_file;
+                       goto check_file;
 
                default:
                        printf("error: invalid response [%c]\n",(char)i);
-                       goto _check_file;
+                       goto check_file;
                }
 
-               /* Data descriptor section */
-               if (zip_header.formatted.flags & 4) {
-                       /* skip over duplicate crc, compressed size and uncompressed size */
-                       unzip_skip(src_fd, 12);
-               }
+               total_entries++;
        }
 
        if (listing && verbose) {