9214935916ca300af24856ce3cb1fd7c306455f1
[oweals/busybox.git] / archival / unzip.c
1 /* vi: set sw=4 ts=4: */
2 /*
3  * Mini unzip implementation for busybox
4  *
5  * Copyright (C) 2004 by Ed Clark
6  *
7  * Loosely based on original busybox unzip applet by Laurence Anderson.
8  * All options and features should work in this version.
9  *
10  * Licensed under GPLv2 or later, see file LICENSE in this source tree.
11  */
12 /* For reference see
13  * http://www.pkware.com/company/standards/appnote/
14  * http://www.info-zip.org/pub/infozip/doc/appnote-iz-latest.zip
15  *
16  * TODO
17  * Zip64 + other methods
18  */
19 //config:config UNZIP
20 //config:       bool "unzip"
21 //config:       default y
22 //config:       help
23 //config:         unzip will list or extract files from a ZIP archive,
24 //config:         commonly found on DOS/WIN systems. The default behavior
25 //config:         (with no options) is to extract the archive into the
26 //config:         current directory.
27 //config:
28 //config:config FEATURE_UNZIP_CDF
29 //config:       bool "Read and use Central Directory data"
30 //config:       default y
31 //config:       depends on UNZIP
32 //config:       help
33 //config:         If you know that you only need to deal with simple
34 //config:         ZIP files without deleted/updated files, SFX archives etc,
35 //config:         you can reduce code size by unselecting this option.
36 //config:         To support less trivial ZIPs, say Y.
37
38 //applet:IF_UNZIP(APPLET(unzip, BB_DIR_USR_BIN, BB_SUID_DROP))
39 //kbuild:lib-$(CONFIG_UNZIP) += unzip.o
40
41 //usage:#define unzip_trivial_usage
42 //usage:       "[-lnopq] FILE[.zip] [FILE]... [-x FILE...] [-d DIR]"
43 //usage:#define unzip_full_usage "\n\n"
44 //usage:       "Extract FILEs from ZIP archive\n"
45 //usage:     "\n        -l      List contents (with -q for short form)"
46 //usage:     "\n        -n      Never overwrite files (default: ask)"
47 //usage:     "\n        -o      Overwrite"
48 //usage:     "\n        -p      Print to stdout"
49 //usage:     "\n        -q      Quiet"
50 //usage:     "\n        -x FILE Exclude FILEs"
51 //usage:     "\n        -d DIR  Extract into DIR"
52
53 #include "libbb.h"
54 #include "bb_archive.h"
55
56 #if 0
57 # define dbg(...) bb_error_msg(__VA_ARGS__)
58 #else
59 # define dbg(...) ((void)0)
60 #endif
61
62 enum {
63 #if BB_BIG_ENDIAN
64         ZIP_FILEHEADER_MAGIC = 0x504b0304,
65         ZIP_CDF_MAGIC        = 0x504b0102, /* CDF item */
66         ZIP_CDE_MAGIC        = 0x504b0506, /* End of CDF */
67         ZIP_DD_MAGIC         = 0x504b0708,
68 #else
69         ZIP_FILEHEADER_MAGIC = 0x04034b50,
70         ZIP_CDF_MAGIC        = 0x02014b50,
71         ZIP_CDE_MAGIC        = 0x06054b50,
72         ZIP_DD_MAGIC         = 0x08074b50,
73 #endif
74 };
75
76 #define ZIP_HEADER_LEN 26
77
78 typedef union {
79         uint8_t raw[ZIP_HEADER_LEN];
80         struct {
81                 uint16_t version;               /* 0-1 */
82                 uint16_t zip_flags;             /* 2-3 */
83                 uint16_t method;                /* 4-5 */
84                 uint16_t modtime;               /* 6-7 */
85                 uint16_t moddate;               /* 8-9 */
86                 uint32_t crc32 PACKED;          /* 10-13 */
87                 uint32_t cmpsize PACKED;        /* 14-17 */
88                 uint32_t ucmpsize PACKED;       /* 18-21 */
89                 uint16_t filename_len;          /* 22-23 */
90                 uint16_t extra_len;             /* 24-25 */
91                 /* filename follows (not NUL terminated) */
92                 /* extra field follows */
93                 /* data follows */
94         } fmt PACKED;
95 } zip_header_t; /* PACKED - gcc 4.2.1 doesn't like it (spews warning) */
96
97 #define FIX_ENDIANNESS_ZIP(zip) \
98 do { if (BB_BIG_ENDIAN) { \
99         (zip).fmt.crc32         = SWAP_LE32((zip).fmt.crc32       ); \
100         (zip).fmt.cmpsize       = SWAP_LE32((zip).fmt.cmpsize     ); \
101         (zip).fmt.ucmpsize      = SWAP_LE32((zip).fmt.ucmpsize    ); \
102         (zip).fmt.filename_len  = SWAP_LE16((zip).fmt.filename_len); \
103         (zip).fmt.extra_len     = SWAP_LE16((zip).fmt.extra_len   ); \
104 }} while (0)
105
106 #define CDF_HEADER_LEN 42
107
108 typedef union {
109         uint8_t raw[CDF_HEADER_LEN];
110         struct {
111                 /* uint32_t signature; 50 4b 01 02 */
112                 uint16_t version_made_by;       /* 0-1 */
113                 uint16_t version_needed;        /* 2-3 */
114                 uint16_t cdf_flags;             /* 4-5 */
115                 uint16_t method;                /* 6-7 */
116                 uint16_t modtime;               /* 8-9 */
117                 uint16_t moddate;               /* 10-11 */
118                 uint32_t crc32;                 /* 12-15 */
119                 uint32_t cmpsize;               /* 16-19 */
120                 uint32_t ucmpsize;              /* 20-23 */
121                 uint16_t filename_len;          /* 24-25 */
122                 uint16_t extra_len;             /* 26-27 */
123                 uint16_t file_comment_length;   /* 28-29 */
124                 uint16_t disk_number_start;     /* 30-31 */
125                 uint16_t internal_attributes;   /* 32-33 */
126                 uint32_t external_attributes PACKED; /* 34-37 */
127                 uint32_t relative_offset_of_local_header PACKED; /* 38-41 */
128                 /* filename follows (not NUL terminated) */
129                 /* extra field follows */
130                 /* file comment follows */
131         } fmt PACKED;
132 } cdf_header_t;
133
134 #define FIX_ENDIANNESS_CDF(cdf) \
135 do { if (BB_BIG_ENDIAN) { \
136         (cdf).fmt.version_made_by = SWAP_LE16((cdf).fmt.version_made_by); \
137         (cdf).fmt.version_needed = SWAP_LE16((cdf).fmt.version_needed); \
138         (cdf).fmt.method        = SWAP_LE16((cdf).fmt.method      ); \
139         (cdf).fmt.modtime       = SWAP_LE16((cdf).fmt.modtime     ); \
140         (cdf).fmt.moddate       = SWAP_LE16((cdf).fmt.moddate     ); \
141         (cdf).fmt.crc32         = SWAP_LE32((cdf).fmt.crc32       ); \
142         (cdf).fmt.cmpsize       = SWAP_LE32((cdf).fmt.cmpsize     ); \
143         (cdf).fmt.ucmpsize      = SWAP_LE32((cdf).fmt.ucmpsize    ); \
144         (cdf).fmt.filename_len  = SWAP_LE16((cdf).fmt.filename_len); \
145         (cdf).fmt.extra_len     = SWAP_LE16((cdf).fmt.extra_len   ); \
146         (cdf).fmt.file_comment_length = SWAP_LE16((cdf).fmt.file_comment_length); \
147         (cdf).fmt.external_attributes = SWAP_LE32((cdf).fmt.external_attributes); \
148 }} while (0)
149
150 #define CDE_LEN 16
151
152 typedef union {
153         uint8_t raw[CDE_LEN];
154         struct {
155                 /* uint32_t signature; 50 4b 05 06 */
156                 uint16_t this_disk_no;
157                 uint16_t disk_with_cdf_no;
158                 uint16_t cdf_entries_on_this_disk;
159                 uint16_t cdf_entries_total;
160                 uint32_t cdf_size;
161                 uint32_t cdf_offset;
162                 /* uint16_t archive_comment_length; */
163                 /* archive comment follows */
164         } fmt PACKED;
165 } cde_t;
166
167 #define FIX_ENDIANNESS_CDE(cde) \
168 do { if (BB_BIG_ENDIAN) { \
169         (cde).fmt.cdf_offset = SWAP_LE32((cde).fmt.cdf_offset); \
170 }} while (0)
171
172 struct BUG {
173         /* Check the offset of the last element, not the length.  This leniency
174          * allows for poor packing, whereby the overall struct may be too long,
175          * even though the elements are all in the right place.
176          */
177         char BUG_zip_header_must_be_26_bytes[
178                 offsetof(zip_header_t, fmt.extra_len) + 2
179                         == ZIP_HEADER_LEN ? 1 : -1];
180         char BUG_cdf_header_must_be_42_bytes[
181                 offsetof(cdf_header_t, fmt.relative_offset_of_local_header) + 4
182                         == CDF_HEADER_LEN ? 1 : -1];
183         char BUG_cde_must_be_16_bytes[
184                 sizeof(cde_t) == CDE_LEN ? 1 : -1];
185 };
186
187
188 enum { zip_fd = 3 };
189
190
191 /* This value means that we failed to find CDF */
192 #define BAD_CDF_OFFSET ((uint32_t)0xffffffff)
193
194 #if !ENABLE_FEATURE_UNZIP_CDF
195
196 # define find_cdf_offset() BAD_CDF_OFFSET
197
198 #else
199 /* Seen in the wild:
200  * Self-extracting PRO2K3XP_32.exe contains 19078464 byte zip archive,
201  * where CDE was nearly 48 kbytes before EOF.
202  * (Surprisingly, it also apparently has *another* CDE structure
203  * closer to the end, with bogus cdf_offset).
204  * To make extraction work, bumped PEEK_FROM_END from 16k to 64k.
205  */
206 #define PEEK_FROM_END (64*1024)
207 /* NB: does not preserve file position! */
208 static uint32_t find_cdf_offset(void)
209 {
210         cde_t cde;
211         unsigned char *buf;
212         unsigned char *p;
213         off_t end;
214         uint32_t found;
215
216         end = lseek(zip_fd, 0, SEEK_END);
217         if (end == (off_t) -1)
218                 return BAD_CDF_OFFSET;
219
220         end -= PEEK_FROM_END;
221         if (end < 0)
222                 end = 0;
223
224         dbg("Looking for cdf_offset starting from 0x%"OFF_FMT"x", end);
225         xlseek(zip_fd, end, SEEK_SET);
226         buf = xzalloc(PEEK_FROM_END);
227         full_read(zip_fd, buf, PEEK_FROM_END);
228
229         found = BAD_CDF_OFFSET;
230         p = buf;
231         while (p <= buf + PEEK_FROM_END - CDE_LEN - 4) {
232                 if (*p != 'P') {
233                         p++;
234                         continue;
235                 }
236                 if (*++p != 'K')
237                         continue;
238                 if (*++p != 5)
239                         continue;
240                 if (*++p != 6)
241                         continue;
242                 /* we found CDE! */
243                 memcpy(cde.raw, p + 1, CDE_LEN);
244                 FIX_ENDIANNESS_CDE(cde);
245                 /*
246                  * I've seen .ZIP files with seemingly valid CDEs
247                  * where cdf_offset points past EOF - ??
248                  * This check ignores such CDEs:
249                  */
250                 if (cde.fmt.cdf_offset < end + (p - buf)) {
251                         found = cde.fmt.cdf_offset;
252                         dbg("Possible cdf_offset:0x%x at 0x%"OFF_FMT"x",
253                                 (unsigned)found, end + (p-3 - buf));
254                         dbg("  cdf_offset+cdf_size:0x%x",
255                                 (unsigned)(found + SWAP_LE32(cde.fmt.cdf_size)));
256                         /*
257                          * We do not "break" here because only the last CDE is valid.
258                          * I've seen a .zip archive which contained a .zip file,
259                          * uncompressed, and taking the first CDE was using
260                          * the CDE inside that file!
261                          */
262                 }
263         }
264         free(buf);
265         dbg("Found cdf_offset:0x%x", (unsigned)found);
266         return found;
267 };
268
269 static uint32_t read_next_cdf(uint32_t cdf_offset, cdf_header_t *cdf)
270 {
271         uint32_t magic;
272
273         if (cdf_offset == BAD_CDF_OFFSET)
274                 return cdf_offset;
275
276         dbg("Reading CDF at 0x%x", (unsigned)cdf_offset);
277         xlseek(zip_fd, cdf_offset, SEEK_SET);
278         xread(zip_fd, &magic, 4);
279         /* Central Directory End? Assume CDF has ended.
280          * (more correct method is to use cde.cdf_entries_total counter)
281          */
282         if (magic == ZIP_CDE_MAGIC) {
283                 dbg("got ZIP_CDE_MAGIC");
284                 return 0; /* EOF */
285         }
286         xread(zip_fd, cdf->raw, CDF_HEADER_LEN);
287
288         FIX_ENDIANNESS_CDF(*cdf);
289         dbg("  filename_len:%u extra_len:%u file_comment_length:%u",
290                 (unsigned)cdf->fmt.filename_len,
291                 (unsigned)cdf->fmt.extra_len,
292                 (unsigned)cdf->fmt.file_comment_length
293         );
294         cdf_offset += 4 + CDF_HEADER_LEN
295                 + cdf->fmt.filename_len
296                 + cdf->fmt.extra_len
297                 + cdf->fmt.file_comment_length;
298
299         return cdf_offset;
300 };
301 #endif
302
303 static void unzip_skip(off_t skip)
304 {
305         if (skip != 0)
306                 if (lseek(zip_fd, skip, SEEK_CUR) == (off_t)-1)
307                         bb_copyfd_exact_size(zip_fd, -1, skip);
308 }
309
310 static void unzip_create_leading_dirs(const char *fn)
311 {
312         /* Create all leading directories */
313         char *name = xstrdup(fn);
314         if (bb_make_directory(dirname(name), 0777, FILEUTILS_RECUR)) {
315                 xfunc_die(); /* bb_make_directory is noisy */
316         }
317         free(name);
318 }
319
320 static void unzip_extract(zip_header_t *zip, int dst_fd)
321 {
322         if (zip->fmt.method == 0) {
323                 /* Method 0 - stored (not compressed) */
324                 off_t size = zip->fmt.ucmpsize;
325                 if (size)
326                         bb_copyfd_exact_size(zip_fd, dst_fd, size);
327         } else {
328                 /* Method 8 - inflate */
329                 transformer_state_t xstate;
330                 init_transformer_state(&xstate);
331                 xstate.bytes_in = zip->fmt.cmpsize;
332                 xstate.src_fd = zip_fd;
333                 xstate.dst_fd = dst_fd;
334                 if (inflate_unzip(&xstate) < 0)
335                         bb_error_msg_and_die("inflate error");
336                 /* Validate decompression - crc */
337                 if (zip->fmt.crc32 != (xstate.crc32 ^ 0xffffffffL)) {
338                         bb_error_msg_and_die("crc error");
339                 }
340                 /* Validate decompression - size */
341                 if (zip->fmt.ucmpsize != xstate.bytes_out) {
342                         /* Don't die. Who knows, maybe len calculation
343                          * was botched somewhere. After all, crc matched! */
344                         bb_error_msg("bad length");
345                 }
346         }
347         /* TODO? method 12: bzip2, method 14: LZMA */
348 }
349
350 static void my_fgets80(char *buf80)
351 {
352         fflush_all();
353         if (!fgets(buf80, 80, stdin)) {
354                 bb_perror_msg_and_die("can't read standard input");
355         }
356 }
357
358 int unzip_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
359 int unzip_main(int argc, char **argv)
360 {
361         enum { O_PROMPT, O_NEVER, O_ALWAYS };
362
363         smallint quiet = 0;
364         IF_NOT_FEATURE_UNZIP_CDF(const) smallint verbose = 0;
365         smallint listing = 0;
366         smallint overwrite = O_PROMPT;
367         smallint x_opt_seen;
368         uint32_t cdf_offset;
369         unsigned long total_usize;
370         unsigned long total_size;
371         unsigned total_entries;
372         int dst_fd = -1;
373         char *src_fn = NULL;
374         char *dst_fn = NULL;
375         llist_t *zaccept = NULL;
376         llist_t *zreject = NULL;
377         char *base_dir = NULL;
378         int i, opt;
379         char key_buf[80]; /* must match size used by my_fgets80 */
380         struct stat stat_buf;
381
382 /* -q, -l and -v: UnZip 5.52 of 28 February 2005, by Info-ZIP:
383  *
384  * # /usr/bin/unzip -qq -v decompress_unlzma.i.zip
385  *   204372  Defl:N    35278  83%  09-06-09 14:23  0d056252  decompress_unlzma.i
386  * # /usr/bin/unzip -q -v decompress_unlzma.i.zip
387  *  Length   Method    Size  Ratio   Date   Time   CRC-32    Name
388  * --------  ------  ------- -----   ----   ----   ------    ----
389  *   204372  Defl:N    35278  83%  09-06-09 14:23  0d056252  decompress_unlzma.i
390  * --------          -------  ---                            -------
391  *   204372            35278  83%                            1 file
392  * # /usr/bin/unzip -v decompress_unlzma.i.zip
393  * Archive:  decompress_unlzma.i.zip
394  *  Length   Method    Size  Ratio   Date   Time   CRC-32    Name
395  * --------  ------  ------- -----   ----   ----   ------    ----
396  *   204372  Defl:N    35278  83%  09-06-09 14:23  0d056252  decompress_unlzma.i
397  * --------          -------  ---                            -------
398  *   204372            35278  83%                            1 file
399  * # unzip -v decompress_unlzma.i.zip
400  * Archive:  decompress_unlzma.i.zip
401  *   Length     Date   Time    Name
402  *  --------    ----   ----    ----
403  *    204372  09-06-09 14:23   decompress_unlzma.i
404  *  --------                   -------
405  *    204372                   1 files
406  * # /usr/bin/unzip -l -qq decompress_unlzma.i.zip
407  *    204372  09-06-09 14:23   decompress_unlzma.i
408  * # /usr/bin/unzip -l -q decompress_unlzma.i.zip
409  *   Length     Date   Time    Name
410  *  --------    ----   ----    ----
411  *    204372  09-06-09 14:23   decompress_unlzma.i
412  *  --------                   -------
413  *    204372                   1 file
414  * # /usr/bin/unzip -l decompress_unlzma.i.zip
415  * Archive:  decompress_unlzma.i.zip
416  *   Length     Date   Time    Name
417  *  --------    ----   ----    ----
418  *    204372  09-06-09 14:23   decompress_unlzma.i
419  *  --------                   -------
420  *    204372                   1 file
421  */
422
423         x_opt_seen = 0;
424         /* '-' makes getopt return 1 for non-options */
425         while ((opt = getopt(argc, argv, "-d:lnopqxv")) != -1) {
426                 switch (opt) {
427                 case 'd':  /* Extract to base directory */
428                         base_dir = optarg;
429                         break;
430
431                 case 'l': /* List */
432                         listing = 1;
433                         break;
434
435                 case 'n': /* Never overwrite existing files */
436                         overwrite = O_NEVER;
437                         break;
438
439                 case 'o': /* Always overwrite existing files */
440                         overwrite = O_ALWAYS;
441                         break;
442
443                 case 'p': /* Extract files to stdout and fall through to set verbosity */
444                         dst_fd = STDOUT_FILENO;
445
446                 case 'q': /* Be quiet */
447                         quiet++;
448                         break;
449
450                 case 'v': /* Verbose list */
451                         IF_FEATURE_UNZIP_CDF(verbose++;)
452                         listing = 1;
453                         break;
454
455                 case 'x':
456                         x_opt_seen = 1;
457                         break;
458
459                 case 1:
460                         if (!src_fn) {
461                                 /* The zip file */
462                                 /* +5: space for ".zip" and NUL */
463                                 src_fn = xmalloc(strlen(optarg) + 5);
464                                 strcpy(src_fn, optarg);
465                         } else if (!x_opt_seen) {
466                                 /* Include files */
467                                 llist_add_to(&zaccept, optarg);
468                         } else {
469                                 /* Exclude files */
470                                 llist_add_to(&zreject, optarg);
471                         }
472                         break;
473
474                 default:
475                         bb_show_usage();
476                 }
477         }
478
479 #ifndef __GLIBC__
480         /*
481          * This code is needed for non-GNU getopt
482          * which doesn't understand "-" in option string.
483          * The -x option won't work properly in this case:
484          * "unzip a.zip q -x w e" will be interpreted as
485          * "unzip a.zip q w e -x" = "unzip a.zip q w e"
486          */
487         argv += optind;
488         if (argv[0]) {
489                 /* +5: space for ".zip" and NUL */
490                 src_fn = xmalloc(strlen(argv[0]) + 5);
491                 strcpy(src_fn, argv[0]);
492                 while (*++argv)
493                         llist_add_to(&zaccept, *argv);
494         }
495 #endif
496
497         if (!src_fn) {
498                 bb_show_usage();
499         }
500
501         /* Open input file */
502         if (LONE_DASH(src_fn)) {
503                 xdup2(STDIN_FILENO, zip_fd);
504                 /* Cannot use prompt mode since zip data is arriving on STDIN */
505                 if (overwrite == O_PROMPT)
506                         overwrite = O_NEVER;
507         } else {
508                 static const char extn[][5] ALIGN1 = { ".zip", ".ZIP" };
509                 char *ext = src_fn + strlen(src_fn);
510                 int src_fd;
511
512                 i = 0;
513                 for (;;) {
514                         src_fd = open(src_fn, O_RDONLY);
515                         if (src_fd >= 0)
516                                 break;
517                         if (++i > 2) {
518                                 *ext = '\0';
519                                 bb_error_msg_and_die("can't open %s[.zip]", src_fn);
520                         }
521                         strcpy(ext, extn[i - 1]);
522                 }
523                 xmove_fd(src_fd, zip_fd);
524         }
525
526         /* Change dir if necessary */
527         if (base_dir)
528                 xchdir(base_dir);
529
530         if (quiet <= 1) { /* not -qq */
531                 if (quiet == 0)
532                         printf("Archive:  %s\n", src_fn);
533                 if (listing) {
534                         puts(verbose ?
535                                 " Length   Method    Size  Cmpr    Date    Time   CRC-32   Name\n"
536                                 "--------  ------  ------- ---- ---------- ----- --------  ----"
537                                 :
538                                 "  Length      Date    Time    Name\n"
539                                 "---------  ---------- -----   ----"
540                                 );
541                 }
542         }
543
544 /* Example of an archive with one 0-byte long file named 'z'
545  * created by Zip 2.31 on Unix:
546  * 0000 [50 4b]03 04 0a 00 00 00 00 00 42 1a b8 3c 00 00 |PK........B..<..|
547  *       sig........ vneed flags compr mtime mdate crc32>
548  * 0010  00 00 00 00 00 00 00 00 00 00 01 00 15 00 7a 55 |..............zU|
549  *      >..... csize...... usize...... fnlen exlen fn ex>
550  * 0020  54 09 00 03 cc d3 f9 4b cc d3 f9 4b 55 78 04 00 |T......K...KUx..|
551  *      >tra_field......................................
552  * 0030  00 00 00 00[50 4b]01 02 17 03 0a 00 00 00 00 00 |....PK..........|
553  *       ........... sig........ vmade vneed flags compr
554  * 0040  42 1a b8 3c 00 00 00 00 00 00 00 00 00 00 00 00 |B..<............|
555  *       mtime mdate crc32...... csize...... usize......
556  * 0050  01 00 0d 00 00 00 00 00 00 00 00 00 a4 81 00 00 |................|
557  *       fnlen exlen clen. dnum. iattr eattr...... relofs> (eattr = rw-r--r--)
558  * 0060  00 00 7a 55 54 05 00 03 cc d3 f9 4b 55 78 00 00 |..zUT......KUx..|
559  *      >..... fn extra_field...........................
560  * 0070 [50 4b]05 06 00 00 00 00 01 00 01 00 3c 00 00 00 |PK..........<...|
561  * 0080  34 00 00 00 00 00                               |4.....|
562  */
563         total_usize = 0;
564         total_size = 0;
565         total_entries = 0;
566         cdf_offset = find_cdf_offset(); /* try to seek to the end, find CDE and CDF start */
567         while (1) {
568                 zip_header_t zip;
569                 mode_t dir_mode = 0777;
570 #if ENABLE_FEATURE_UNZIP_CDF
571                 mode_t file_mode = 0666;
572 #endif
573
574                 if (!ENABLE_FEATURE_UNZIP_CDF || cdf_offset == BAD_CDF_OFFSET) {
575                         /* Normally happens when input is unseekable.
576                          *
577                          * Valid ZIP file has Central Directory at the end
578                          * with central directory file headers (CDFs).
579                          * After it, there is a Central Directory End structure.
580                          * CDFs identify what files are in the ZIP and where
581                          * they are located. This allows ZIP readers to load
582                          * the list of files without reading the entire ZIP archive.
583                          * ZIP files may be appended to, only files specified in
584                          * the CD are valid. Scanning for local file headers is
585                          * not a correct algorithm.
586                          *
587                          * We try to do the above, and resort to "linear" reading
588                          * of ZIP file only if seek failed or CDE wasn't found.
589                          */
590                         uint32_t magic;
591
592                         /* Check magic number */
593                         xread(zip_fd, &magic, 4);
594                         /* CDF item? Assume there are no more files, exit */
595                         if (magic == ZIP_CDF_MAGIC) {
596                                 dbg("got ZIP_CDF_MAGIC");
597                                 break;
598                         }
599                         /* Data descriptor? It was a streaming file, go on */
600                         if (magic == ZIP_DD_MAGIC) {
601                                 dbg("got ZIP_DD_MAGIC");
602                                 /* skip over duplicate crc32, cmpsize and ucmpsize */
603                                 unzip_skip(3 * 4);
604                                 continue;
605                         }
606                         if (magic != ZIP_FILEHEADER_MAGIC)
607                                 bb_error_msg_and_die("invalid zip magic %08X", (int)magic);
608                         dbg("got ZIP_FILEHEADER_MAGIC");
609
610                         xread(zip_fd, zip.raw, ZIP_HEADER_LEN);
611                         FIX_ENDIANNESS_ZIP(zip);
612                         if ((zip.fmt.method != 0)
613                          && (zip.fmt.method != 8)
614                         ) {
615                                 /* TODO? method 12: bzip2, method 14: LZMA */
616                                 bb_error_msg_and_die("unsupported method %d", zip.fmt.method);
617                         }
618                         if (zip.fmt.zip_flags & SWAP_LE16(0x0009)) {
619                                 bb_error_msg_and_die("zip flags 1 and 8 are not supported");
620                         }
621                 }
622 #if ENABLE_FEATURE_UNZIP_CDF
623                 else {
624                         /* cdf_offset is valid (and we know the file is seekable) */
625                         cdf_header_t cdf;
626                         cdf_offset = read_next_cdf(cdf_offset, &cdf);
627                         if (cdf_offset == 0) /* EOF? */
628                                 break;
629 # if 1
630                         xlseek(zip_fd,
631                                 SWAP_LE32(cdf.fmt.relative_offset_of_local_header) + 4,
632                                 SEEK_SET);
633                         xread(zip_fd, zip.raw, ZIP_HEADER_LEN);
634                         FIX_ENDIANNESS_ZIP(zip);
635                         if (zip.fmt.zip_flags & SWAP_LE16(0x0008)) {
636                                 /* 0x0008 - streaming. [u]cmpsize can be reliably gotten
637                                  * only from Central Directory.
638                                  */
639                                 zip.fmt.crc32    = cdf.fmt.crc32;
640                                 zip.fmt.cmpsize  = cdf.fmt.cmpsize;
641                                 zip.fmt.ucmpsize = cdf.fmt.ucmpsize;
642                         }
643 # else
644                         /* CDF has the same data as local header, no need to read the latter...
645                          * ...not really. An archive was seen with cdf.extra_len == 6 but
646                          * zip.extra_len == 0.
647                          */
648                         memcpy(&zip.fmt.version,
649                                 &cdf.fmt.version_needed, ZIP_HEADER_LEN);
650                         xlseek(zip_fd,
651                                 SWAP_LE32(cdf.fmt.relative_offset_of_local_header) + 4 + ZIP_HEADER_LEN,
652                                 SEEK_SET);
653 # endif
654                         if ((cdf.fmt.version_made_by >> 8) == 3) {
655                                 /* This archive is created on Unix */
656                                 dir_mode = file_mode = (cdf.fmt.external_attributes >> 16);
657                         }
658                 }
659 #endif
660
661                 if (zip.fmt.zip_flags & SWAP_LE16(0x0001)) {
662                         /* 0x0001 - encrypted */
663                         bb_error_msg_and_die("zip flag 1 (encryption) is not supported");
664                 }
665                 dbg("File cmpsize:0x%x extra_len:0x%x ucmpsize:0x%x",
666                         (unsigned)zip.fmt.cmpsize,
667                         (unsigned)zip.fmt.extra_len,
668                         (unsigned)zip.fmt.ucmpsize
669                 );
670
671                 /* Read filename */
672                 free(dst_fn);
673                 dst_fn = xzalloc(zip.fmt.filename_len + 1);
674                 xread(zip_fd, dst_fn, zip.fmt.filename_len);
675
676                 /* Skip extra header bytes */
677                 unzip_skip(zip.fmt.extra_len);
678
679                 /* Guard against "/abspath", "/../" and similar attacks */
680                 overlapping_strcpy(dst_fn, strip_unsafe_prefix(dst_fn));
681
682                 /* Filter zip entries */
683                 if (find_list_entry(zreject, dst_fn)
684                  || (zaccept && !find_list_entry(zaccept, dst_fn))
685                 ) { /* Skip entry */
686                         i = 'n';
687                 } else {
688                         if (listing) {
689                                 /* List entry */
690                                 char dtbuf[sizeof("mm-dd-yyyy hh:mm")];
691                                 sprintf(dtbuf, "%02u-%02u-%04u %02u:%02u",
692                                         (zip.fmt.moddate >> 5) & 0xf,  // mm: 0x01e0
693                                         (zip.fmt.moddate)      & 0x1f, // dd: 0x001f
694                                         (zip.fmt.moddate >> 9) + 1980, // yy: 0xfe00
695                                         (zip.fmt.modtime >> 11),       // hh: 0xf800
696                                         (zip.fmt.modtime >> 5) & 0x3f  // mm: 0x07e0
697                                         // seconds/2 not shown, encoded in -- 0x001f
698                                 );
699                                 if (!verbose) {
700                                         //      "  Length      Date    Time    Name\n"
701                                         //      "---------  ---------- -----   ----"
702                                         printf(       "%9u  " "%s   "         "%s\n",
703                                                 (unsigned)zip.fmt.ucmpsize,
704                                                 dtbuf,
705                                                 dst_fn);
706                                 } else {
707                                         unsigned long percents = zip.fmt.ucmpsize - zip.fmt.cmpsize;
708                                         if ((int32_t)percents < 0)
709                                                 percents = 0; /* happens if ucmpsize < cmpsize */
710                                         percents = percents * 100;
711                                         if (zip.fmt.ucmpsize)
712                                                 percents /= zip.fmt.ucmpsize;
713                                         //      " Length   Method    Size  Cmpr    Date    Time   CRC-32   Name\n"
714                                         //      "--------  ------  ------- ---- ---------- ----- --------  ----"
715                                         printf(      "%8u  %s"        "%9u%4u%% " "%s "         "%08x  "  "%s\n",
716                                                 (unsigned)zip.fmt.ucmpsize,
717                                                 zip.fmt.method == 0 ? "Stored" : "Defl:N", /* Defl is method 8 */
718 /* TODO: show other methods?
719  *  1 - Shrunk
720  *  2 - Reduced with compression factor 1
721  *  3 - Reduced with compression factor 2
722  *  4 - Reduced with compression factor 3
723  *  5 - Reduced with compression factor 4
724  *  6 - Imploded
725  *  7 - Reserved for Tokenizing compression algorithm
726  *  9 - Deflate64
727  * 10 - PKWARE Data Compression Library Imploding
728  * 11 - Reserved by PKWARE
729  * 12 - BZIP2
730  * 14 - LZMA
731  */
732                                                 (unsigned)zip.fmt.cmpsize,
733                                                 (unsigned)percents,
734                                                 dtbuf,
735                                                 zip.fmt.crc32,
736                                                 dst_fn);
737                                         total_size += zip.fmt.cmpsize;
738                                 }
739                                 total_usize += zip.fmt.ucmpsize;
740                                 i = 'n';
741                         } else if (dst_fd == STDOUT_FILENO) {
742                                 /* Extracting to STDOUT */
743                                 i = -1;
744                         } else if (last_char_is(dst_fn, '/')) {
745                                 /* Extract directory */
746                                 if (stat(dst_fn, &stat_buf) == -1) {
747                                         if (errno != ENOENT) {
748                                                 bb_perror_msg_and_die("can't stat '%s'", dst_fn);
749                                         }
750                                         if (!quiet) {
751                                                 printf("   creating: %s\n", dst_fn);
752                                         }
753                                         unzip_create_leading_dirs(dst_fn);
754                                         if (bb_make_directory(dst_fn, dir_mode, FILEUTILS_IGNORE_CHMOD_ERR)) {
755                                                 xfunc_die();
756                                         }
757                                 } else {
758                                         if (!S_ISDIR(stat_buf.st_mode)) {
759                                                 bb_error_msg_and_die("'%s' exists but is not a %s",
760                                                         dst_fn, "directory");
761                                         }
762                                 }
763                                 i = 'n';
764                         } else {
765                                 /* Extract file */
766  check_file:
767                                 if (stat(dst_fn, &stat_buf) == -1) {
768                                         /* File does not exist */
769                                         if (errno != ENOENT) {
770                                                 bb_perror_msg_and_die("can't stat '%s'", dst_fn);
771                                         }
772                                         i = 'y';
773                                 } else {
774                                         /* File already exists */
775                                         if (overwrite == O_NEVER) {
776                                                 i = 'n';
777                                         } else if (S_ISREG(stat_buf.st_mode)) {
778                                                 /* File is regular file */
779                                                 if (overwrite == O_ALWAYS) {
780                                                         i = 'y';
781                                                 } else {
782                                                         printf("replace %s? [y]es, [n]o, [A]ll, [N]one, [r]ename: ", dst_fn);
783                                                         my_fgets80(key_buf);
784                                                         i = key_buf[0];
785                                                 }
786                                         } else {
787                                                 /* File is not regular file */
788                                                 bb_error_msg_and_die("'%s' exists but is not a %s",
789                                                         dst_fn, "regular file");
790                                         }
791                                 }
792                         }
793                 }
794
795                 switch (i) {
796                 case 'A':
797                         overwrite = O_ALWAYS;
798                 case 'y': /* Open file and fall into unzip */
799                         unzip_create_leading_dirs(dst_fn);
800 #if ENABLE_FEATURE_UNZIP_CDF
801                         dst_fd = xopen3(dst_fn, O_WRONLY | O_CREAT | O_TRUNC, file_mode);
802 #else
803                         dst_fd = xopen(dst_fn, O_WRONLY | O_CREAT | O_TRUNC);
804 #endif
805                 case -1: /* Unzip */
806                         if (!quiet) {
807                                 printf(/* zip.fmt.method == 0
808                                         ? " extracting: %s\n"
809                                         : */ "  inflating: %s\n", dst_fn);
810                         }
811                         unzip_extract(&zip, dst_fd);
812                         if (dst_fd != STDOUT_FILENO) {
813                                 /* closing STDOUT is potentially bad for future business */
814                                 close(dst_fd);
815                         }
816                         break;
817
818                 case 'N':
819                         overwrite = O_NEVER;
820                 case 'n':
821                         /* Skip entry data */
822                         unzip_skip(zip.fmt.cmpsize);
823                         break;
824
825                 case 'r':
826                         /* Prompt for new name */
827                         printf("new name: ");
828                         my_fgets80(key_buf);
829                         free(dst_fn);
830                         dst_fn = xstrdup(key_buf);
831                         chomp(dst_fn);
832                         goto check_file;
833
834                 default:
835                         printf("error: invalid response [%c]\n", (char)i);
836                         goto check_file;
837                 }
838
839                 total_entries++;
840         }
841
842         if (listing && quiet <= 1) {
843                 if (!verbose) {
844                         //      "  Length      Date    Time    Name\n"
845                         //      "---------  ---------- -----   ----"
846                         printf( " --------%21s"               "-------\n"
847                                      "%9lu%21s"               "%u files\n",
848                                 "",
849                                 total_usize, "", total_entries);
850                 } else {
851                         unsigned long percents = total_usize - total_size;
852                         if ((long)percents < 0)
853                                 percents = 0; /* happens if usize < size */
854                         percents = percents * 100;
855                         if (total_usize)
856                                 percents /= total_usize;
857                         //      " Length   Method    Size  Cmpr    Date    Time   CRC-32   Name\n"
858                         //      "--------  ------  ------- ---- ---------- ----- --------  ----"
859                         printf( "--------          ------- ----%28s"                      "----\n"
860                                 "%8lu"              "%17lu%4u%%%28s"                      "%u files\n",
861                                 "",
862                                 total_usize, total_size, (unsigned)percents, "",
863                                 total_entries);
864                 }
865         }
866
867         return 0;
868 }