unzip: remove now-pointless lseek which returns current position
[oweals/busybox.git] / archival / unzip.c
1 /* vi: set sw=4 ts=4: */
2 /*
3  * Mini unzip implementation for busybox
4  *
5  * Copyright (C) 2004 by Ed Clark
6  *
7  * Loosely based on original busybox unzip applet by Laurence Anderson.
8  * All options and features should work in this version.
9  *
10  * Licensed under GPLv2 or later, see file LICENSE in this source tree.
11  */
12 /* For reference see
13  * http://www.pkware.com/company/standards/appnote/
14  * http://www.info-zip.org/pub/infozip/doc/appnote-iz-latest.zip
15  *
16  * TODO
17  * Zip64 + other methods
18  */
19 //config:config UNZIP
20 //config:       bool "unzip"
21 //config:       default y
22 //config:       help
23 //config:         unzip will list or extract files from a ZIP archive,
24 //config:         commonly found on DOS/WIN systems. The default behavior
25 //config:         (with no options) is to extract the archive into the
26 //config:         current directory.
27 //config:
28 //config:config FEATURE_UNZIP_CDF
29 //config:       bool "Read and use Central Directory data"
30 //config:       default y
31 //config:       depends on UNZIP
32 //config:       help
33 //config:         If you know that you only need to deal with simple
34 //config:         ZIP files without deleted/updated files, SFX archives etc,
35 //config:         you can reduce code size by unselecting this option.
36 //config:         To support less trivial ZIPs, say Y.
37
38 //applet:IF_UNZIP(APPLET(unzip, BB_DIR_USR_BIN, BB_SUID_DROP))
39 //kbuild:lib-$(CONFIG_UNZIP) += unzip.o
40
41 //usage:#define unzip_trivial_usage
42 //usage:       "[-lnopq] FILE[.zip] [FILE]... [-x FILE...] [-d DIR]"
43 //usage:#define unzip_full_usage "\n\n"
44 //usage:       "Extract FILEs from ZIP archive\n"
45 //usage:     "\n        -l      List contents (with -q for short form)"
46 //usage:     "\n        -n      Never overwrite files (default: ask)"
47 //usage:     "\n        -o      Overwrite"
48 //usage:     "\n        -p      Print to stdout"
49 //usage:     "\n        -q      Quiet"
50 //usage:     "\n        -x FILE Exclude FILEs"
51 //usage:     "\n        -d DIR  Extract into DIR"
52
53 #include "libbb.h"
54 #include "bb_archive.h"
55
56 #if 0
57 # define dbg(...) bb_error_msg(__VA_ARGS__)
58 #else
59 # define dbg(...) ((void)0)
60 #endif
61
62 enum {
63 #if BB_BIG_ENDIAN
64         ZIP_FILEHEADER_MAGIC = 0x504b0304,
65         ZIP_CDF_MAGIC        = 0x504b0102, /* central directory's file header */
66         ZIP_CDE_MAGIC        = 0x504b0506, /* "end of central directory" record */
67         ZIP_DD_MAGIC         = 0x504b0708,
68 #else
69         ZIP_FILEHEADER_MAGIC = 0x04034b50,
70         ZIP_CDF_MAGIC        = 0x02014b50,
71         ZIP_CDE_MAGIC        = 0x06054b50,
72         ZIP_DD_MAGIC         = 0x08074b50,
73 #endif
74 };
75
76 #define ZIP_HEADER_LEN 26
77
78 typedef union {
79         uint8_t raw[ZIP_HEADER_LEN];
80         struct {
81                 uint16_t version;               /* 0-1 */
82                 uint16_t zip_flags;             /* 2-3 */
83                 uint16_t method;                /* 4-5 */
84                 uint16_t modtime;               /* 6-7 */
85                 uint16_t moddate;               /* 8-9 */
86                 uint32_t crc32 PACKED;          /* 10-13 */
87                 uint32_t cmpsize PACKED;        /* 14-17 */
88                 uint32_t ucmpsize PACKED;       /* 18-21 */
89                 uint16_t filename_len;          /* 22-23 */
90                 uint16_t extra_len;             /* 24-25 */
91                 /* filename follows (not NUL terminated) */
92                 /* extra field follows */
93                 /* data follows */
94         } formatted PACKED;
95 } zip_header_t; /* PACKED - gcc 4.2.1 doesn't like it (spews warning) */
96
97 #define FIX_ENDIANNESS_ZIP(zip_header) \
98 do { if (BB_BIG_ENDIAN) { \
99         (zip_header).formatted.crc32        = SWAP_LE32((zip_header).formatted.crc32       ); \
100         (zip_header).formatted.cmpsize      = SWAP_LE32((zip_header).formatted.cmpsize     ); \
101         (zip_header).formatted.ucmpsize     = SWAP_LE32((zip_header).formatted.ucmpsize    ); \
102         (zip_header).formatted.filename_len = SWAP_LE16((zip_header).formatted.filename_len); \
103         (zip_header).formatted.extra_len    = SWAP_LE16((zip_header).formatted.extra_len   ); \
104 }} while (0)
105
106 #define CDF_HEADER_LEN 42
107
108 typedef union {
109         uint8_t raw[CDF_HEADER_LEN];
110         struct {
111                 /* uint32_t signature; 50 4b 01 02 */
112                 uint16_t version_made_by;       /* 0-1 */
113                 uint16_t version_needed;        /* 2-3 */
114                 uint16_t cdf_flags;             /* 4-5 */
115                 uint16_t method;                /* 6-7 */
116                 uint16_t modtime;               /* 8-9 */
117                 uint16_t moddate;               /* 10-11 */
118                 uint32_t crc32;                 /* 12-15 */
119                 uint32_t cmpsize;               /* 16-19 */
120                 uint32_t ucmpsize;              /* 20-23 */
121                 uint16_t file_name_length;      /* 24-25 */
122                 uint16_t extra_field_length;    /* 26-27 */
123                 uint16_t file_comment_length;   /* 28-29 */
124                 uint16_t disk_number_start;     /* 30-31 */
125                 uint16_t internal_file_attributes; /* 32-33 */
126                 uint32_t external_file_attributes PACKED; /* 34-37 */
127                 uint32_t relative_offset_of_local_header PACKED; /* 38-41 */
128                 /* filename follows (not NUL terminated) */
129                 /* extra field follows */
130                 /* comment follows */
131         } formatted PACKED;
132 } cdf_header_t;
133
134 #define FIX_ENDIANNESS_CDF(cdf_header) \
135 do { if (BB_BIG_ENDIAN) { \
136         (cdf_header).formatted.version_made_by = SWAP_LE16((cdf_header).formatted.version_made_by); \
137         (cdf_header).formatted.version_needed = SWAP_LE16((cdf_header).formatted.version_needed); \
138         (cdf_header).formatted.method       = SWAP_LE16((cdf_header).formatted.method      ); \
139         (cdf_header).formatted.modtime      = SWAP_LE16((cdf_header).formatted.modtime     ); \
140         (cdf_header).formatted.moddate      = SWAP_LE16((cdf_header).formatted.moddate     ); \
141         (cdf_header).formatted.crc32        = SWAP_LE32((cdf_header).formatted.crc32       ); \
142         (cdf_header).formatted.cmpsize      = SWAP_LE32((cdf_header).formatted.cmpsize     ); \
143         (cdf_header).formatted.ucmpsize     = SWAP_LE32((cdf_header).formatted.ucmpsize    ); \
144         (cdf_header).formatted.file_name_length = SWAP_LE16((cdf_header).formatted.file_name_length); \
145         (cdf_header).formatted.extra_field_length = SWAP_LE16((cdf_header).formatted.extra_field_length); \
146         (cdf_header).formatted.file_comment_length = SWAP_LE16((cdf_header).formatted.file_comment_length); \
147         (cdf_header).formatted.external_file_attributes = SWAP_LE32((cdf_header).formatted.external_file_attributes); \
148 }} while (0)
149
150 #define CDE_HEADER_LEN 16
151
152 typedef union {
153         uint8_t raw[CDE_HEADER_LEN];
154         struct {
155                 /* uint32_t signature; 50 4b 05 06 */
156                 uint16_t this_disk_no;
157                 uint16_t disk_with_cdf_no;
158                 uint16_t cdf_entries_on_this_disk;
159                 uint16_t cdf_entries_total;
160                 uint32_t cdf_size;
161                 uint32_t cdf_offset;
162                 /* uint16_t file_comment_length; */
163                 /* .ZIP file comment (variable size) */
164         } formatted PACKED;
165 } cde_header_t;
166
167 #define FIX_ENDIANNESS_CDE(cde_header) \
168 do { if (BB_BIG_ENDIAN) { \
169         (cde_header).formatted.cdf_offset = SWAP_LE32((cde_header).formatted.cdf_offset); \
170 }} while (0)
171
172 struct BUG {
173         /* Check the offset of the last element, not the length.  This leniency
174          * allows for poor packing, whereby the overall struct may be too long,
175          * even though the elements are all in the right place.
176          */
177         char BUG_zip_header_must_be_26_bytes[
178                 offsetof(zip_header_t, formatted.extra_len) + 2
179                         == ZIP_HEADER_LEN ? 1 : -1];
180         char BUG_cdf_header_must_be_42_bytes[
181                 offsetof(cdf_header_t, formatted.relative_offset_of_local_header) + 4
182                         == CDF_HEADER_LEN ? 1 : -1];
183         char BUG_cde_header_must_be_16_bytes[
184                 sizeof(cde_header_t) == CDE_HEADER_LEN ? 1 : -1];
185 };
186
187
188 enum { zip_fd = 3 };
189
190
191 /* This value means that we failed to find CDF */
192 #define BAD_CDF_OFFSET ((uint32_t)0xffffffff)
193
194 #if !ENABLE_FEATURE_UNZIP_CDF
195
196 # define find_cdf_offset() BAD_CDF_OFFSET
197
198 #else
199 /* Seen in the wild:
200  * Self-extracting PRO2K3XP_32.exe contains 19078464 byte zip archive,
201  * where CDE was nearly 48 kbytes before EOF.
202  * (Surprisingly, it also apparently has *another* CDE structure
203  * closer to the end, with bogus cdf_offset).
204  * To make extraction work, bumped PEEK_FROM_END from 16k to 64k.
205  */
206 #define PEEK_FROM_END (64*1024)
207 /* NB: does not preserve file position! */
208 static uint32_t find_cdf_offset(void)
209 {
210         cde_header_t cde_header;
211         unsigned char *buf;
212         unsigned char *p;
213         off_t end;
214         uint32_t found;
215
216         end = lseek(zip_fd, 0, SEEK_END);
217         if (end == (off_t) -1)
218                 return BAD_CDF_OFFSET;
219
220         end -= PEEK_FROM_END;
221         if (end < 0)
222                 end = 0;
223
224         dbg("Looking for cdf_offset starting from 0x%"OFF_FMT"x", end);
225         xlseek(zip_fd, end, SEEK_SET);
226         buf = xzalloc(PEEK_FROM_END);
227         full_read(zip_fd, buf, PEEK_FROM_END);
228
229         found = BAD_CDF_OFFSET;
230         p = buf;
231         while (p <= buf + PEEK_FROM_END - CDE_HEADER_LEN - 4) {
232                 if (*p != 'P') {
233                         p++;
234                         continue;
235                 }
236                 if (*++p != 'K')
237                         continue;
238                 if (*++p != 5)
239                         continue;
240                 if (*++p != 6)
241                         continue;
242                 /* we found CDE! */
243                 memcpy(cde_header.raw, p + 1, CDE_HEADER_LEN);
244                 FIX_ENDIANNESS_CDE(cde_header);
245                 /*
246                  * I've seen .ZIP files with seemingly valid CDEs
247                  * where cdf_offset points past EOF - ??
248                  * This check ignores such CDEs:
249                  */
250                 if (cde_header.formatted.cdf_offset < end + (p - buf)) {
251                         found = cde_header.formatted.cdf_offset;
252                         dbg("Possible cdf_offset:0x%x at 0x%"OFF_FMT"x",
253                                 (unsigned)found, end + (p-3 - buf));
254                         dbg("  cdf_offset+cdf_size:0x%x",
255                                 (unsigned)(found + SWAP_LE32(cde_header.formatted.cdf_size)));
256                         /*
257                          * We do not "break" here because only the last CDE is valid.
258                          * I've seen a .zip archive which contained a .zip file,
259                          * uncompressed, and taking the first CDE was using
260                          * the CDE inside that file!
261                          */
262                 }
263         }
264         free(buf);
265         dbg("Found cdf_offset:0x%x", (unsigned)found);
266         return found;
267 };
268
269 static uint32_t read_next_cdf(uint32_t cdf_offset, cdf_header_t *cdf_ptr)
270 {
271         uint32_t magic;
272
273         if (cdf_offset == BAD_CDF_OFFSET)
274                 return cdf_offset;
275
276         dbg("Reading CDF at 0x%x", (unsigned)cdf_offset);
277         xlseek(zip_fd, cdf_offset, SEEK_SET);
278         xread(zip_fd, &magic, 4);
279         /* Central Directory End? */
280         if (magic == ZIP_CDE_MAGIC) {
281                 dbg("got ZIP_CDE_MAGIC");
282                 return 0; /* EOF */
283         }
284         xread(zip_fd, cdf_ptr->raw, CDF_HEADER_LEN);
285
286         FIX_ENDIANNESS_CDF(*cdf_ptr);
287         dbg("  file_name_length:%u extra_field_length:%u file_comment_length:%u",
288                 (unsigned)cdf_ptr->formatted.file_name_length,
289                 (unsigned)cdf_ptr->formatted.extra_field_length,
290                 (unsigned)cdf_ptr->formatted.file_comment_length
291         );
292         cdf_offset += 4 + CDF_HEADER_LEN
293                 + cdf_ptr->formatted.file_name_length
294                 + cdf_ptr->formatted.extra_field_length
295                 + cdf_ptr->formatted.file_comment_length;
296
297         return cdf_offset;
298 };
299 #endif
300
301 static void unzip_skip(off_t skip)
302 {
303         if (skip != 0)
304                 if (lseek(zip_fd, skip, SEEK_CUR) == (off_t)-1)
305                         bb_copyfd_exact_size(zip_fd, -1, skip);
306 }
307
308 static void unzip_create_leading_dirs(const char *fn)
309 {
310         /* Create all leading directories */
311         char *name = xstrdup(fn);
312         if (bb_make_directory(dirname(name), 0777, FILEUTILS_RECUR)) {
313                 xfunc_die(); /* bb_make_directory is noisy */
314         }
315         free(name);
316 }
317
318 static void unzip_extract(zip_header_t *zip_header, int dst_fd)
319 {
320         if (zip_header->formatted.method == 0) {
321                 /* Method 0 - stored (not compressed) */
322                 off_t size = zip_header->formatted.ucmpsize;
323                 if (size)
324                         bb_copyfd_exact_size(zip_fd, dst_fd, size);
325         } else {
326                 /* Method 8 - inflate */
327                 transformer_state_t xstate;
328                 init_transformer_state(&xstate);
329                 xstate.bytes_in = zip_header->formatted.cmpsize;
330                 xstate.src_fd = zip_fd;
331                 xstate.dst_fd = dst_fd;
332                 if (inflate_unzip(&xstate) < 0)
333                         bb_error_msg_and_die("inflate error");
334                 /* Validate decompression - crc */
335                 if (zip_header->formatted.crc32 != (xstate.crc32 ^ 0xffffffffL)) {
336                         bb_error_msg_and_die("crc error");
337                 }
338                 /* Validate decompression - size */
339                 if (zip_header->formatted.ucmpsize != xstate.bytes_out) {
340                         /* Don't die. Who knows, maybe len calculation
341                          * was botched somewhere. After all, crc matched! */
342                         bb_error_msg("bad length");
343                 }
344         }
345         /* TODO? method 12: bzip2, method 14: LZMA */
346 }
347
348 static void my_fgets80(char *buf80)
349 {
350         fflush_all();
351         if (!fgets(buf80, 80, stdin)) {
352                 bb_perror_msg_and_die("can't read standard input");
353         }
354 }
355
356 int unzip_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
357 int unzip_main(int argc, char **argv)
358 {
359         enum { O_PROMPT, O_NEVER, O_ALWAYS };
360
361         smallint quiet = 0;
362         IF_NOT_FEATURE_UNZIP_CDF(const) smallint verbose = 0;
363         smallint listing = 0;
364         smallint overwrite = O_PROMPT;
365         smallint x_opt_seen;
366         uint32_t cdf_offset;
367         unsigned long total_usize;
368         unsigned long total_size;
369         unsigned total_entries;
370         int dst_fd = -1;
371         char *src_fn = NULL;
372         char *dst_fn = NULL;
373         llist_t *zaccept = NULL;
374         llist_t *zreject = NULL;
375         char *base_dir = NULL;
376         int i, opt;
377         char key_buf[80]; /* must match size used by my_fgets80 */
378         struct stat stat_buf;
379
380 /* -q, -l and -v: UnZip 5.52 of 28 February 2005, by Info-ZIP:
381  *
382  * # /usr/bin/unzip -qq -v decompress_unlzma.i.zip
383  *   204372  Defl:N    35278  83%  09-06-09 14:23  0d056252  decompress_unlzma.i
384  * # /usr/bin/unzip -q -v decompress_unlzma.i.zip
385  *  Length   Method    Size  Ratio   Date   Time   CRC-32    Name
386  * --------  ------  ------- -----   ----   ----   ------    ----
387  *   204372  Defl:N    35278  83%  09-06-09 14:23  0d056252  decompress_unlzma.i
388  * --------          -------  ---                            -------
389  *   204372            35278  83%                            1 file
390  * # /usr/bin/unzip -v decompress_unlzma.i.zip
391  * Archive:  decompress_unlzma.i.zip
392  *  Length   Method    Size  Ratio   Date   Time   CRC-32    Name
393  * --------  ------  ------- -----   ----   ----   ------    ----
394  *   204372  Defl:N    35278  83%  09-06-09 14:23  0d056252  decompress_unlzma.i
395  * --------          -------  ---                            -------
396  *   204372            35278  83%                            1 file
397  * # unzip -v decompress_unlzma.i.zip
398  * Archive:  decompress_unlzma.i.zip
399  *   Length     Date   Time    Name
400  *  --------    ----   ----    ----
401  *    204372  09-06-09 14:23   decompress_unlzma.i
402  *  --------                   -------
403  *    204372                   1 files
404  * # /usr/bin/unzip -l -qq decompress_unlzma.i.zip
405  *    204372  09-06-09 14:23   decompress_unlzma.i
406  * # /usr/bin/unzip -l -q decompress_unlzma.i.zip
407  *   Length     Date   Time    Name
408  *  --------    ----   ----    ----
409  *    204372  09-06-09 14:23   decompress_unlzma.i
410  *  --------                   -------
411  *    204372                   1 file
412  * # /usr/bin/unzip -l decompress_unlzma.i.zip
413  * Archive:  decompress_unlzma.i.zip
414  *   Length     Date   Time    Name
415  *  --------    ----   ----    ----
416  *    204372  09-06-09 14:23   decompress_unlzma.i
417  *  --------                   -------
418  *    204372                   1 file
419  */
420
421         x_opt_seen = 0;
422         /* '-' makes getopt return 1 for non-options */
423         while ((opt = getopt(argc, argv, "-d:lnopqxv")) != -1) {
424                 switch (opt) {
425                 case 'd':  /* Extract to base directory */
426                         base_dir = optarg;
427                         break;
428
429                 case 'l': /* List */
430                         listing = 1;
431                         break;
432
433                 case 'n': /* Never overwrite existing files */
434                         overwrite = O_NEVER;
435                         break;
436
437                 case 'o': /* Always overwrite existing files */
438                         overwrite = O_ALWAYS;
439                         break;
440
441                 case 'p': /* Extract files to stdout and fall through to set verbosity */
442                         dst_fd = STDOUT_FILENO;
443
444                 case 'q': /* Be quiet */
445                         quiet++;
446                         break;
447
448                 case 'v': /* Verbose list */
449                         IF_FEATURE_UNZIP_CDF(verbose++;)
450                         listing = 1;
451                         break;
452
453                 case 'x':
454                         x_opt_seen = 1;
455                         break;
456
457                 case 1:
458                         if (!src_fn) {
459                                 /* The zip file */
460                                 /* +5: space for ".zip" and NUL */
461                                 src_fn = xmalloc(strlen(optarg) + 5);
462                                 strcpy(src_fn, optarg);
463                         } else if (!x_opt_seen) {
464                                 /* Include files */
465                                 llist_add_to(&zaccept, optarg);
466                         } else {
467                                 /* Exclude files */
468                                 llist_add_to(&zreject, optarg);
469                         }
470                         break;
471
472                 default:
473                         bb_show_usage();
474                 }
475         }
476
477 #ifndef __GLIBC__
478         /*
479          * This code is needed for non-GNU getopt
480          * which doesn't understand "-" in option string.
481          * The -x option won't work properly in this case:
482          * "unzip a.zip q -x w e" will be interpreted as
483          * "unzip a.zip q w e -x" = "unzip a.zip q w e"
484          */
485         argv += optind;
486         if (argv[0]) {
487                 /* +5: space for ".zip" and NUL */
488                 src_fn = xmalloc(strlen(argv[0]) + 5);
489                 strcpy(src_fn, argv[0]);
490                 while (*++argv)
491                         llist_add_to(&zaccept, *argv);
492         }
493 #endif
494
495         if (!src_fn) {
496                 bb_show_usage();
497         }
498
499         /* Open input file */
500         if (LONE_DASH(src_fn)) {
501                 xdup2(STDIN_FILENO, zip_fd);
502                 /* Cannot use prompt mode since zip data is arriving on STDIN */
503                 if (overwrite == O_PROMPT)
504                         overwrite = O_NEVER;
505         } else {
506                 static const char extn[][5] ALIGN1 = { ".zip", ".ZIP" };
507                 char *ext = src_fn + strlen(src_fn);
508                 int src_fd;
509
510                 i = 0;
511                 for (;;) {
512                         src_fd = open(src_fn, O_RDONLY);
513                         if (src_fd >= 0)
514                                 break;
515                         if (++i > 2) {
516                                 *ext = '\0';
517                                 bb_error_msg_and_die("can't open %s[.zip]", src_fn);
518                         }
519                         strcpy(ext, extn[i - 1]);
520                 }
521                 xmove_fd(src_fd, zip_fd);
522         }
523
524         /* Change dir if necessary */
525         if (base_dir)
526                 xchdir(base_dir);
527
528         if (quiet <= 1) { /* not -qq */
529                 if (quiet == 0)
530                         printf("Archive:  %s\n", src_fn);
531                 if (listing) {
532                         puts(verbose ?
533                                 " Length   Method    Size  Cmpr    Date    Time   CRC-32   Name\n"
534                                 "--------  ------  ------- ---- ---------- ----- --------  ----"
535                                 :
536                                 "  Length      Date    Time    Name\n"
537                                 "---------  ---------- -----   ----"
538                                 );
539                 }
540         }
541
542 /* Example of an archive with one 0-byte long file named 'z'
543  * created by Zip 2.31 on Unix:
544  * 0000 [50 4b]03 04 0a 00 00 00 00 00 42 1a b8 3c 00 00 |PK........B..<..|
545  *       sig........ vneed flags compr mtime mdate crc32>
546  * 0010  00 00 00 00 00 00 00 00 00 00 01 00 15 00 7a 55 |..............zU|
547  *      >..... csize...... usize...... fnlen exlen fn ex>
548  * 0020  54 09 00 03 cc d3 f9 4b cc d3 f9 4b 55 78 04 00 |T......K...KUx..|
549  *      >tra_field......................................
550  * 0030  00 00 00 00[50 4b]01 02 17 03 0a 00 00 00 00 00 |....PK..........|
551  *       ........... sig........ vmade vneed flags compr
552  * 0040  42 1a b8 3c 00 00 00 00 00 00 00 00 00 00 00 00 |B..<............|
553  *       mtime mdate crc32...... csize...... usize......
554  * 0050  01 00 0d 00 00 00 00 00 00 00 00 00 a4 81 00 00 |................|
555  *       fnlen exlen clen. dnum. iattr eattr...... relofs> (eattr = rw-r--r--)
556  * 0060  00 00 7a 55 54 05 00 03 cc d3 f9 4b 55 78 00 00 |..zUT......KUx..|
557  *      >..... fn extra_field...........................
558  * 0070 [50 4b]05 06 00 00 00 00 01 00 01 00 3c 00 00 00 |PK..........<...|
559  * 0080  34 00 00 00 00 00                               |4.....|
560  */
561         total_usize = 0;
562         total_size = 0;
563         total_entries = 0;
564         cdf_offset = find_cdf_offset(); /* try to seek to the end, find CDE and CDF start */
565         while (1) {
566                 zip_header_t zip_header;
567                 mode_t dir_mode = 0777;
568 #if ENABLE_FEATURE_UNZIP_CDF
569                 mode_t file_mode = 0666;
570 #endif
571
572                 if (!ENABLE_FEATURE_UNZIP_CDF || cdf_offset == BAD_CDF_OFFSET) {
573                         /* Normally happens when input is unseekable.
574                          *
575                          * Valid ZIP file has Central Directory at the end
576                          * with central directory file headers (CDFs).
577                          * After it, there is a Central Directory End structure.
578                          * CDFs identify what files are in the ZIP and where
579                          * they are located. This allows ZIP readers to load
580                          * the list of files without reading the entire ZIP archive.
581                          * ZIP files may be appended to, only files specified in
582                          * the CD are valid. Scanning for local file headers is
583                          * not a correct algorithm.
584                          *
585                          * We try to do the above, and resort to "linear" reading
586                          * of ZIP file only if seek failed or CDE wasn't found.
587                          */
588                         uint32_t magic;
589
590                         /* Check magic number */
591                         xread(zip_fd, &magic, 4);
592                         /* Central directory? It's at the end, so exit */
593                         if (magic == ZIP_CDF_MAGIC) {
594                                 dbg("got ZIP_CDF_MAGIC");
595                                 break;
596                         }
597                         /* Data descriptor? It was a streaming file, go on */
598                         if (magic == ZIP_DD_MAGIC) {
599                                 dbg("got ZIP_DD_MAGIC");
600                                 /* skip over duplicate crc32, cmpsize and ucmpsize */
601                                 unzip_skip(3 * 4);
602                                 continue;
603                         }
604                         if (magic != ZIP_FILEHEADER_MAGIC)
605                                 bb_error_msg_and_die("invalid zip magic %08X", (int)magic);
606                         dbg("got ZIP_FILEHEADER_MAGIC");
607
608                         xread(zip_fd, zip_header.raw, ZIP_HEADER_LEN);
609                         FIX_ENDIANNESS_ZIP(zip_header);
610                         if ((zip_header.formatted.method != 0)
611                          && (zip_header.formatted.method != 8)
612                         ) {
613                                 /* TODO? method 12: bzip2, method 14: LZMA */
614                                 bb_error_msg_and_die("unsupported method %d", zip_header.formatted.method);
615                         }
616                         if (zip_header.formatted.zip_flags & SWAP_LE16(0x0009)) {
617                                 bb_error_msg_and_die("zip flags 1 and 8 are not supported");
618                         }
619                 }
620 #if ENABLE_FEATURE_UNZIP_CDF
621                 else {
622                         /* cdf_offset is valid (and we know the file is seekable) */
623                         cdf_header_t cdf_header;
624                         cdf_offset = read_next_cdf(cdf_offset, &cdf_header);
625                         if (cdf_offset == 0) /* EOF? */
626                                 break;
627 # if 0
628                         xlseek(zip_fd,
629                                 SWAP_LE32(cdf_header.formatted.relative_offset_of_local_header) + 4,
630                                 SEEK_SET);
631                         xread(zip_fd, zip_header.raw, ZIP_HEADER_LEN);
632                         FIX_ENDIANNESS_ZIP(zip_header);
633                         if (zip_header.formatted.zip_flags & SWAP_LE16(0x0008)) {
634                                 /* 0x0008 - streaming. [u]cmpsize can be reliably gotten
635                                  * only from Central Directory.
636                                  */
637                                 zip_header.formatted.crc32    = cdf_header.formatted.crc32;
638                                 zip_header.formatted.cmpsize  = cdf_header.formatted.cmpsize;
639                                 zip_header.formatted.ucmpsize = cdf_header.formatted.ucmpsize;
640                         }
641 # else
642                         /* CDF has the same data as local header, no need to read the latter */
643                         memcpy(&zip_header.formatted.version,
644                                 &cdf_header.formatted.version_needed, ZIP_HEADER_LEN);
645                         xlseek(zip_fd,
646                                 SWAP_LE32(cdf_header.formatted.relative_offset_of_local_header) + 4 + ZIP_HEADER_LEN,
647                                 SEEK_SET);
648 # endif
649                         if ((cdf_header.formatted.version_made_by >> 8) == 3) {
650                                 /* This archive is created on Unix */
651                                 dir_mode = file_mode = (cdf_header.formatted.external_file_attributes >> 16);
652                         }
653                 }
654 #endif
655
656                 if (zip_header.formatted.zip_flags & SWAP_LE16(0x0001)) {
657                         /* 0x0001 - encrypted */
658                         bb_error_msg_and_die("zip flag 1 (encryption) is not supported");
659                 }
660                 dbg("File cmpsize:0x%x extra_len:0x%x ucmpsize:0x%x",
661                         (unsigned)zip_header.formatted.cmpsize,
662                         (unsigned)zip_header.formatted.extra_len,
663                         (unsigned)zip_header.formatted.ucmpsize
664                 );
665
666                 /* Read filename */
667                 free(dst_fn);
668                 dst_fn = xzalloc(zip_header.formatted.filename_len + 1);
669                 xread(zip_fd, dst_fn, zip_header.formatted.filename_len);
670
671                 /* Skip extra header bytes */
672                 unzip_skip(zip_header.formatted.extra_len);
673
674                 /* Guard against "/abspath", "/../" and similar attacks */
675                 overlapping_strcpy(dst_fn, strip_unsafe_prefix(dst_fn));
676
677                 /* Filter zip entries */
678                 if (find_list_entry(zreject, dst_fn)
679                  || (zaccept && !find_list_entry(zaccept, dst_fn))
680                 ) { /* Skip entry */
681                         i = 'n';
682                 } else {
683                         if (listing) {
684                                 /* List entry */
685                                 char dtbuf[sizeof("mm-dd-yyyy hh:mm")];
686                                 sprintf(dtbuf, "%02u-%02u-%04u %02u:%02u",
687                                         (zip_header.formatted.moddate >> 5) & 0xf,  // mm: 0x01e0
688                                         (zip_header.formatted.moddate)      & 0x1f, // dd: 0x001f
689                                         (zip_header.formatted.moddate >> 9) + 1980, // yy: 0xfe00
690                                         (zip_header.formatted.modtime >> 11),       // hh: 0xf800
691                                         (zip_header.formatted.modtime >> 5) & 0x3f  // mm: 0x07e0
692                                         // seconds/2 are not shown, encoded in ----------- 0x001f
693                                 );
694                                 if (!verbose) {
695                                         //      "  Length      Date    Time    Name\n"
696                                         //      "---------  ---------- -----   ----"
697                                         printf(       "%9u  " "%s   "         "%s\n",
698                                                 (unsigned)zip_header.formatted.ucmpsize,
699                                                 dtbuf,
700                                                 dst_fn);
701                                 } else {
702                                         unsigned long percents = zip_header.formatted.ucmpsize - zip_header.formatted.cmpsize;
703                                         if ((int32_t)percents < 0)
704                                                 percents = 0; /* happens if ucmpsize < cmpsize */
705                                         percents = percents * 100;
706                                         if (zip_header.formatted.ucmpsize)
707                                                 percents /= zip_header.formatted.ucmpsize;
708                                         //      " Length   Method    Size  Cmpr    Date    Time   CRC-32   Name\n"
709                                         //      "--------  ------  ------- ---- ---------- ----- --------  ----"
710                                         printf(      "%8u  %s"        "%9u%4u%% " "%s "         "%08x  "  "%s\n",
711                                                 (unsigned)zip_header.formatted.ucmpsize,
712                                                 zip_header.formatted.method == 0 ? "Stored" : "Defl:N", /* Defl is method 8 */
713 /* TODO: show other methods?
714  *  1 - Shrunk
715  *  2 - Reduced with compression factor 1
716  *  3 - Reduced with compression factor 2
717  *  4 - Reduced with compression factor 3
718  *  5 - Reduced with compression factor 4
719  *  6 - Imploded
720  *  7 - Reserved for Tokenizing compression algorithm
721  *  9 - Deflate64
722  * 10 - PKWARE Data Compression Library Imploding
723  * 11 - Reserved by PKWARE
724  * 12 - BZIP2
725  */
726                                                 (unsigned)zip_header.formatted.cmpsize,
727                                                 (unsigned)percents,
728                                                 dtbuf,
729                                                 zip_header.formatted.crc32,
730                                                 dst_fn);
731                                         total_size += zip_header.formatted.cmpsize;
732                                 }
733                                 total_usize += zip_header.formatted.ucmpsize;
734                                 i = 'n';
735                         } else if (dst_fd == STDOUT_FILENO) {
736                                 /* Extracting to STDOUT */
737                                 i = -1;
738                         } else if (last_char_is(dst_fn, '/')) {
739                                 /* Extract directory */
740                                 if (stat(dst_fn, &stat_buf) == -1) {
741                                         if (errno != ENOENT) {
742                                                 bb_perror_msg_and_die("can't stat '%s'", dst_fn);
743                                         }
744                                         if (!quiet) {
745                                                 printf("   creating: %s\n", dst_fn);
746                                         }
747                                         unzip_create_leading_dirs(dst_fn);
748                                         if (bb_make_directory(dst_fn, dir_mode, FILEUTILS_IGNORE_CHMOD_ERR)) {
749                                                 xfunc_die();
750                                         }
751                                 } else {
752                                         if (!S_ISDIR(stat_buf.st_mode)) {
753                                                 bb_error_msg_and_die("'%s' exists but is not a %s",
754                                                         dst_fn, "directory");
755                                         }
756                                 }
757                                 i = 'n';
758                         } else {
759                                 /* Extract file */
760  check_file:
761                                 if (stat(dst_fn, &stat_buf) == -1) {
762                                         /* File does not exist */
763                                         if (errno != ENOENT) {
764                                                 bb_perror_msg_and_die("can't stat '%s'", dst_fn);
765                                         }
766                                         i = 'y';
767                                 } else {
768                                         /* File already exists */
769                                         if (overwrite == O_NEVER) {
770                                                 i = 'n';
771                                         } else if (S_ISREG(stat_buf.st_mode)) {
772                                                 /* File is regular file */
773                                                 if (overwrite == O_ALWAYS) {
774                                                         i = 'y';
775                                                 } else {
776                                                         printf("replace %s? [y]es, [n]o, [A]ll, [N]one, [r]ename: ", dst_fn);
777                                                         my_fgets80(key_buf);
778                                                         i = key_buf[0];
779                                                 }
780                                         } else {
781                                                 /* File is not regular file */
782                                                 bb_error_msg_and_die("'%s' exists but is not a %s",
783                                                         dst_fn, "regular file");
784                                         }
785                                 }
786                         }
787                 }
788
789                 switch (i) {
790                 case 'A':
791                         overwrite = O_ALWAYS;
792                 case 'y': /* Open file and fall into unzip */
793                         unzip_create_leading_dirs(dst_fn);
794 #if ENABLE_FEATURE_UNZIP_CDF
795                         dst_fd = xopen3(dst_fn, O_WRONLY | O_CREAT | O_TRUNC, file_mode);
796 #else
797                         dst_fd = xopen(dst_fn, O_WRONLY | O_CREAT | O_TRUNC);
798 #endif
799                 case -1: /* Unzip */
800                         if (!quiet) {
801                                 printf("  inflating: %s\n", dst_fn);
802                         }
803                         unzip_extract(&zip_header, dst_fd);
804                         if (dst_fd != STDOUT_FILENO) {
805                                 /* closing STDOUT is potentially bad for future business */
806                                 close(dst_fd);
807                         }
808                         break;
809
810                 case 'N':
811                         overwrite = O_NEVER;
812                 case 'n':
813                         /* Skip entry data */
814                         unzip_skip(zip_header.formatted.cmpsize);
815                         break;
816
817                 case 'r':
818                         /* Prompt for new name */
819                         printf("new name: ");
820                         my_fgets80(key_buf);
821                         free(dst_fn);
822                         dst_fn = xstrdup(key_buf);
823                         chomp(dst_fn);
824                         goto check_file;
825
826                 default:
827                         printf("error: invalid response [%c]\n", (char)i);
828                         goto check_file;
829                 }
830
831                 total_entries++;
832         }
833
834         if (listing && quiet <= 1) {
835                 if (!verbose) {
836                         //      "  Length      Date    Time    Name\n"
837                         //      "---------  ---------- -----   ----"
838                         printf( " --------%21s"               "-------\n"
839                                      "%9lu%21s"               "%u files\n",
840                                 "",
841                                 total_usize, "", total_entries);
842                 } else {
843                         unsigned long percents = total_usize - total_size;
844                         if ((long)percents < 0)
845                                 percents = 0; /* happens if usize < size */
846                         percents = percents * 100;
847                         if (total_usize)
848                                 percents /= total_usize;
849                         //      " Length   Method    Size  Cmpr    Date    Time   CRC-32   Name\n"
850                         //      "--------  ------  ------- ---- ---------- ----- --------  ----"
851                         printf( "--------          ------- ----%28s"                      "----\n"
852                                 "%8lu"              "%17lu%4u%%%28s"                      "%u files\n",
853                                 "",
854                                 total_usize, total_size, (unsigned)percents, "",
855                                 total_entries);
856                 }
857         }
858
859         return 0;
860 }