Implement suggestion from Adam Slattery, (don't default to killing closing bug #1190.
[oweals/busybox.git] / libbb / unarchive.c
1 /*
2  *  Copyright (C) 2000 by Glenn McGrath
3  *  Copyright (C) 2001 by Laurence Anderson
4  *      
5  *  Based on previous work by busybox developers and others.
6  *
7  *  This program is free software; you can redistribute it and/or modify
8  *  it under the terms of the GNU General Public License as published by
9  *  the Free Software Foundation; either version 2 of the License, or
10  *  (at your option) any later version.
11  *
12  *  This program is distributed in the hope that it will be useful,
13  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
14  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  *  GNU Library General Public License for more details.
16  *
17  *  You should have received a copy of the GNU General Public License
18  *  along with this program; if not, write to the Free Software
19  *  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
20  */
21
22 #include <stdio.h>
23 #include <errno.h>
24 #include <stdlib.h>
25 #include <string.h>
26 #include <unistd.h>
27 #include <utime.h>
28 #include "libbb.h"
29
30 extern void seek_sub_file(FILE *src_stream, const int count);
31 extern char *extract_archive(FILE *src_stream, FILE *out_stream, const file_header_t *file_entry,
32  const int function, const char *prefix);
33
34
35 #ifdef L_archive_offset
36 off_t archive_offset;
37 #else
38 extern off_t archive_offset;
39 #endif  
40
41 #ifdef L_seek_sub_file
42 void seek_sub_file(FILE *src_stream, const int count)
43 {
44         int i;
45         /* Try to fseek as faster */
46         archive_offset += count;
47         if (fseek(src_stream, count, SEEK_CUR) != 0 && errno == ESPIPE) {
48         for (i = 0; i < count; i++) {
49                 fgetc(src_stream);
50                 }
51         }
52         return;
53 }
54 #endif  
55
56
57
58 #ifdef L_extract_archive
59 /* Extract the data postioned at src_stream to either filesystem, stdout or 
60  * buffer depending on the value of 'function' which is defined in libbb.h 
61  *
62  * prefix doesnt have to be just a directory, it may prefix the filename as well.
63  *
64  * e.g. '/var/lib/dpkg/info/dpkg.' will extract all files to the base bath 
65  * '/var/lib/dpkg/info/' and all files/dirs created in that dir will have 
66  * 'dpkg.' as their prefix
67  *
68  * For this reason if prefix does point to a dir then it must end with a
69  * trailing '/' or else the last dir will be assumed to be the file prefix 
70  */
71 char *extract_archive(FILE *src_stream, FILE *out_stream, const file_header_t *file_entry,
72  const int function, const char *prefix)
73 {
74         FILE *dst_stream = NULL;
75         char *full_name = NULL;
76         char *buffer = NULL;
77         struct utimbuf t;
78
79         /* prefix doesnt have to be a proper path it may prepend 
80          * the filename as well */
81         if (prefix != NULL) {
82                 /* strip leading '/' in filename to extract as prefix may not be dir */
83                 /* Cant use concat_path_file here as prefix might not be a directory */
84                 char *path = file_entry->name;
85                 if (*path == '/') {
86                         path++;
87                 }
88                 full_name = xmalloc(strlen(prefix) + strlen(path) + 1);
89                 strcpy(full_name, prefix);
90                 strcat(full_name, path);
91         } else {
92                 full_name = file_entry->name;
93         }
94
95         if (function & extract_to_stdout) {
96                 if (S_ISREG(file_entry->mode)) {
97                         copy_file_chunk(src_stream, out_stream, file_entry->size);                      
98                         archive_offset += file_entry->size;
99                 }
100         }
101         else if (function & extract_one_to_buffer) { 
102                 if (S_ISREG(file_entry->mode)) {
103                         buffer = (char *) xmalloc(file_entry->size + 1);
104                         fread(buffer, 1, file_entry->size, src_stream);
105                         archive_offset += file_entry->size;
106                         return(buffer);
107                 }
108         }
109         else if (function & extract_all_to_fs) {
110                 struct stat oldfile;
111                 int stat_res;
112                 stat_res = lstat (full_name, &oldfile);
113                 if (stat_res == 0) { /* The file already exists */
114                         if ((function & extract_unconditional) || (oldfile.st_mtime < file_entry->mtime)) {
115                                 if (!S_ISDIR(oldfile.st_mode)) {
116                                         unlink(full_name); /* Directories might not be empty etc */
117                                 }
118                         } else {
119                                 error_msg("%s not created: newer or same age file exists", file_entry->name);
120                                         seek_sub_file(src_stream, file_entry->size);
121                                 return (NULL);
122                         }
123                 }
124                 if (function & extract_create_leading_dirs) { /* Create leading directories with default umask */
125                         char *parent = dirname(full_name);
126                         if (make_directory (parent, -1, FILEUTILS_RECUR) != 0) {
127                                 error_msg("couldn't create leading directories");
128                         }
129                         free (parent);
130                 }
131                 switch(file_entry->mode & S_IFMT) {
132                         case S_IFREG:
133                                 if (file_entry->link_name) { /* Found a cpio hard link */
134                                         if (link(file_entry->link_name, full_name) != 0) {
135                                                 perror_msg("Cannot link from %s to '%s'",
136                                                         file_entry->name, file_entry->link_name);
137                                         }
138                                 } else {
139                                         if ((dst_stream = wfopen(full_name, "w")) == NULL) {
140                                                 seek_sub_file(src_stream, file_entry->size);
141                                                 return NULL;
142                                         }
143                                         archive_offset += file_entry->size;
144                                         copy_file_chunk(src_stream, dst_stream, file_entry->size);                      
145                                         fclose(dst_stream);
146                                 }
147                                 break;
148                         case S_IFDIR:
149                                 if (stat_res != 0) {
150                                         if (mkdir(full_name, file_entry->mode) < 0) {
151                                                 perror_msg("extract_archive: ");
152                                         }
153                                 }
154                                 break;
155                         case S_IFLNK:
156                                 if (symlink(file_entry->link_name, full_name) < 0) {
157                                         perror_msg("Cannot create symlink from %s to '%s'", file_entry->name, file_entry->link_name); 
158                                         return NULL;
159                                 }
160                                 break;
161                         case S_IFSOCK:
162                         case S_IFBLK:
163                         case S_IFCHR:
164                         case S_IFIFO:
165                                 if (mknod(full_name, file_entry->mode, file_entry->device) == -1) {
166                                         perror_msg("Cannot create node %s", file_entry->name);
167                                         return NULL;
168                                 }
169                                 break;
170                 }
171
172                 /* Changing a symlink's properties normally changes the properties of the 
173                  * file pointed to, so dont try and change the date or mode, lchown does
174                  * does the right thing, but isnt available in older versions of libc */
175                 if (S_ISLNK(file_entry->mode)) {
176 #if (__GLIBC__ >= 2) && (__GLIBC_MINOR__ >= 1)
177                         lchown(full_name, file_entry->uid, file_entry->gid);
178 #endif
179                 } else {
180                         if (function & extract_preserve_date) {
181                                 t.actime = file_entry->mtime;
182                                 t.modtime = file_entry->mtime;
183                                 utime(full_name, &t);
184                         }
185                         chmod(full_name, file_entry->mode);
186                         chown(full_name, file_entry->uid, file_entry->gid);
187                 }
188         } else {
189                 /* If we arent extracting data we have to skip it, 
190                  * if data size is 0 then then just do it anyway
191                  * (saves testing for it) */
192                 seek_sub_file(src_stream, file_entry->size);
193         }
194
195         /* extract_list and extract_verbose_list can be used in conjunction
196          * with one of the above four extraction functions, so do this seperately */
197         if (function & extract_verbose_list) {
198                 fprintf(out_stream, "%s %d/%d %8d %s ", mode_string(file_entry->mode), 
199                         file_entry->uid, file_entry->gid,
200                         (int) file_entry->size, time_string(file_entry->mtime));
201         }
202         if ((function & extract_list) || (function & extract_verbose_list)){
203                 /* fputs doesnt add a trailing \n, so use fprintf */
204                 fprintf(out_stream, "%s\n", file_entry->name);
205         }
206
207         free(full_name);
208
209         return(NULL); /* Maybe we should say if failed */
210 }
211 #endif
212
213 #ifdef L_unarchive
214 char *unarchive(FILE *src_stream, file_header_t *(*get_headers)(FILE *),
215         const int extract_function, const char *prefix, char **extract_names)
216 {
217         file_header_t *file_entry;
218         int found;
219         int i;
220         char *buffer = NULL;
221
222         archive_offset = 0;
223         while ((file_entry = get_headers(src_stream)) != NULL) {
224                 found = FALSE;
225                 if (extract_names[0] != NULL) {
226                         for(i = 0; extract_names[i] != 0; i++) {
227                                 if (strcmp(extract_names[i], file_entry->name) == 0) {
228                                         found = TRUE;
229                                 }
230                         }
231                         if (!found) {
232                                 /* seek past the data entry */
233                                 seek_sub_file(src_stream, file_entry->size);
234                                 continue;
235                         }
236                 }
237                 buffer = extract_archive(src_stream, stdout, file_entry, extract_function, prefix);
238         }
239         return(buffer);
240 }
241 #endif
242
243 #ifdef L_get_header_ar
244 file_header_t *get_header_ar(FILE *src_stream)
245 {
246         file_header_t *typed;
247         union {
248                 char raw[60];
249                 struct {
250                         char name[16];
251                         char date[12];
252                         char uid[6];
253                         char gid[6];
254                         char mode[8];
255                         char size[10];
256                         char magic[2];
257                 } formated;
258         } ar;
259         static char *ar_long_names;
260
261         if (fread(ar.raw, 1, 60, src_stream) != 60) {
262                 free (ar_long_names);
263                 return(NULL);
264         }
265         archive_offset += 60;
266         /* align the headers based on the header magic */
267         if ((ar.formated.magic[0] != '`') || (ar.formated.magic[1] != '\n')) {
268                 /* some version of ar, have an extra '\n' after each data entry,
269                  * this puts the next header out by 1 */
270                 if (ar.formated.magic[1] != '`') {
271                         error_msg("Invalid magic");
272                         return(NULL);
273                 }
274                 /* read the next char out of what would be the data section,
275                  * if its a '\n' then it is a valid header offset by 1*/
276                 archive_offset++;
277                 if (fgetc(src_stream) != '\n') {
278                         error_msg("Invalid magic");
279                         return(NULL);
280                 }
281                 /* fix up the header, we started reading 1 byte too early */
282                 /* raw_header[60] wont be '\n' as it should, but it doesnt matter */
283                 memmove(ar.raw, &ar.raw[1], 59);
284         }
285                 
286         typed = (file_header_t *) xcalloc(1, sizeof(file_header_t));
287
288         typed->size = (size_t) atoi(ar.formated.size);
289         /* long filenames have '/' as the first character */
290         if (ar.formated.name[0] == '/') {
291                 if (ar.formated.name[1] == '/') {
292                         /* If the second char is a '/' then this entries data section
293                          * stores long filename for multiple entries, they are stored
294                          * in static variable long_names for use in future entries */
295                         ar_long_names = (char *) xrealloc(ar_long_names, typed->size);
296                         fread(ar_long_names, 1, typed->size, src_stream);
297                         archive_offset += typed->size;
298                         /* This ar entries data section only contained filenames for other records
299                          * they are stored in the static ar_long_names for future reference */
300                         return (get_header_ar(src_stream)); /* Return next header */
301                 } else if (ar.formated.name[1] == ' ') {
302                         /* This is the index of symbols in the file for compilers */
303                         seek_sub_file(src_stream, typed->size);
304                         return (get_header_ar(src_stream)); /* Return next header */
305                 } else {
306                         /* The number after the '/' indicates the offset in the ar data section
307                         (saved in variable long_name) that conatains the real filename */
308                         if (!ar_long_names) {
309                                 error_msg("Cannot resolve long file name");
310                                 return (NULL);
311                         }
312                         typed->name = xstrdup(ar_long_names + atoi(&ar.formated.name[1]));
313                 }
314         } else {
315                 /* short filenames */
316                 typed->name = xcalloc(1, 16);
317                 strncpy(typed->name, ar.formated.name, 16);
318         }
319         typed->name[strcspn(typed->name, " /")]='\0';
320
321         /* convert the rest of the now valid char header to its typed struct */ 
322         parse_mode(ar.formated.mode, &typed->mode);
323         typed->mtime = atoi(ar.formated.date);
324         typed->uid = atoi(ar.formated.uid);
325         typed->gid = atoi(ar.formated.gid);
326
327         return(typed);
328 }
329 #endif
330
331 #ifdef L_get_header_cpio
332 struct hardlinks {
333         file_header_t *entry;
334         int inode;
335         struct hardlinks *next;
336 };
337
338 file_header_t *get_header_cpio(FILE *src_stream)
339 {
340         file_header_t *cpio_entry = NULL;
341         char cpio_header[110];
342         int namesize;
343         char dummy[16];
344         int major, minor, nlink, inode;
345         static struct hardlinks *saved_hardlinks = NULL;
346         static int pending_hardlinks = 0;
347
348         if (pending_hardlinks) { /* Deal with any pending hardlinks */
349                 struct hardlinks *tmp = saved_hardlinks, *oldtmp = NULL;
350                 while (tmp) {
351                         if (tmp->entry->link_name) { /* Found a hardlink ready to be extracted */
352                                 cpio_entry = tmp->entry;
353                                 if (oldtmp) oldtmp->next = tmp->next; /* Remove item from linked list */
354                                 else saved_hardlinks = tmp->next;
355                                 free(tmp);
356                                 return (cpio_entry);
357                         }
358                         oldtmp = tmp;
359                         tmp = tmp->next;
360                 }
361                 pending_hardlinks = 0; /* No more pending hardlinks, read next file entry */
362         }
363   
364         /* There can be padding before archive header */
365         seek_sub_file(src_stream, (4 - (archive_offset % 4)) % 4);
366         if (fread(cpio_header, 1, 110, src_stream) == 110) {
367                 archive_offset += 110;
368                 if (strncmp(cpio_header, "07070", 5) != 0) {
369                         error_msg("Unsupported format or invalid magic");
370                         return(NULL);
371                 }
372                 switch (cpio_header[5]) {
373                         case '2': /* "crc" header format */
374                                 /* Doesnt do the crc check yet */
375                         case '1': /* "newc" header format */
376                                 cpio_entry = (file_header_t *) xcalloc(1, sizeof(file_header_t));
377                                 sscanf(cpio_header, "%6c%8x%8x%8x%8x%8x%8lx%8lx%16c%8x%8x%8x%8c",
378                                         dummy, &inode, &cpio_entry->mode, &cpio_entry->uid, &cpio_entry->gid,
379                                         &nlink, &cpio_entry->mtime, &cpio_entry->size,
380                                         dummy, &major, &minor, &namesize, dummy);
381
382                                 cpio_entry->name = (char *) xcalloc(1, namesize);
383                                 fread(cpio_entry->name, 1, namesize, src_stream); /* Read in filename */
384                                 archive_offset += namesize;
385                                 /* Skip padding before file contents */
386                                 seek_sub_file(src_stream, (4 - (archive_offset % 4)) % 4);
387                                 if (strcmp(cpio_entry->name, "TRAILER!!!") == 0) {
388                                         printf("%d blocks\n", (int) (archive_offset % 512 ? (archive_offset / 512) + 1 : archive_offset / 512)); /* Always round up */
389                                         if (saved_hardlinks) { /* Bummer - we still have unresolved hardlinks */
390                                                 struct hardlinks *tmp = saved_hardlinks, *oldtmp = NULL;
391                                                 while (tmp) {
392                                                         error_msg("%s not created: cannot resolve hardlink", tmp->entry->name);
393                                                         oldtmp = tmp;
394                                                         tmp = tmp->next;
395                                                         free (oldtmp->entry->name);
396                                                         free (oldtmp->entry);
397                                                         free (oldtmp);
398                                                 }
399                                                 saved_hardlinks = NULL;
400                                                 pending_hardlinks = 0;
401                                         }
402                                         return(NULL);
403                                 }
404
405                                 if (S_ISLNK(cpio_entry->mode)) {
406                                         cpio_entry->link_name = (char *) xcalloc(1, cpio_entry->size + 1);
407                                         fread(cpio_entry->link_name, 1, cpio_entry->size, src_stream);
408                                         archive_offset += cpio_entry->size;
409                                         cpio_entry->size = 0; /* Stop possiable seeks in future */
410                                 }
411                                 if (nlink > 1 && !S_ISDIR(cpio_entry->mode)) {
412                                         if (cpio_entry->size == 0) { /* Put file on a linked list for later */
413                                                 struct hardlinks *new = xmalloc(sizeof(struct hardlinks));
414                                                 new->next = saved_hardlinks;
415                                                 new->inode = inode;
416                                                 new->entry = cpio_entry;
417                                                 saved_hardlinks = new;
418                                         return(get_header_cpio(src_stream)); /* Recurse to next file */
419                                         } else { /* Found the file with data in */
420                                                 struct hardlinks *tmp = saved_hardlinks;
421                                                 pending_hardlinks = 1;
422                                                 while (tmp) {
423                                                         if (tmp->inode == inode) {
424                                                                 tmp->entry->link_name = xstrdup(cpio_entry->name);
425                                                                 nlink--;
426                                                         }
427                                                         tmp = tmp->next;
428                                                 }
429                                                 if (nlink > 1) error_msg("error resolving hardlink: did you create the archive with GNU cpio 2.0-2.2?");
430                                         }
431                                 }
432                                 cpio_entry->device = (major << 8) | minor;
433                                 break;
434                         default:
435                                 error_msg("Unsupported format");
436                                 return(NULL);
437                 }
438                 if (ferror(src_stream) || feof(src_stream)) {
439                         perror_msg("Stream error");
440                         return(NULL);
441                 }
442         }
443         return(cpio_entry);
444 }
445 #endif
446
447 #ifdef L_get_header_tar
448 file_header_t *get_header_tar(FILE *tar_stream)
449 {
450         union {
451                 unsigned char raw[512];
452                 struct {
453                         char name[100];         /*   0-99 */
454                         char mode[8];           /* 100-107 */
455                         char uid[8];            /* 108-115 */
456                         char gid[8];            /* 116-123 */
457                         char size[12];          /* 124-135 */
458                         char mtime[12];         /* 136-147 */
459                         char chksum[8];         /* 148-155 */
460                         char typeflag;          /* 156-156 */
461                         char linkname[100];     /* 157-256 */
462                         char magic[6];          /* 257-262 */
463                         char version[2];        /* 263-264 */
464                         char uname[32];         /* 265-296 */
465                         char gname[32];         /* 297-328 */
466                         char devmajor[8];       /* 329-336 */
467                         char devminor[8];       /* 337-344 */
468                         char prefix[155];       /* 345-499 */
469                         char padding[12];       /* 500-512 */
470                 } formated;
471         } tar;
472         file_header_t *tar_entry = NULL;
473         long i;
474         long sum = 0;
475
476         if (archive_offset % 512 != 0) {
477                 seek_sub_file(tar_stream, 512 - (archive_offset % 512));
478         }
479
480         if (fread(tar.raw, 1, 512, tar_stream) != 512) {
481                 error_msg("Couldnt read header");
482                 return(NULL);
483         }
484         archive_offset += 512;
485
486         /* Check header has valid magic, unfortunately some tar files
487          * have empty (0'ed) tar entries at the end, which will
488          * cause this to fail, so fail silently for now
489          */
490         if (strncmp(tar.formated.magic, "ustar", 5) != 0) {
491                 return(NULL);
492         }
493
494         /* Do checksum on headers */
495         for (i =  0; i < 148 ; i++) {
496                 sum += tar.raw[i];
497         }
498         sum += ' ' * 8;
499         for (i =  156; i < 512 ; i++) {
500                 sum += tar.raw[i];
501         }
502         if (sum != strtol(tar.formated.chksum, NULL, 8)) {
503                 error_msg("Invalid tar header checksum");
504                 return(NULL);
505         }
506
507         /* convert to type'ed variables */
508         tar_entry = xcalloc(1, sizeof(file_header_t));
509         tar_entry->name = xstrdup(tar.formated.name);
510
511         parse_mode(tar.formated.mode, &tar_entry->mode);
512         tar_entry->uid   = strtol(tar.formated.uid, NULL, 8);
513         tar_entry->gid   = strtol(tar.formated.gid, NULL, 8);
514         tar_entry->size  = strtol(tar.formated.size, NULL, 8);
515         tar_entry->mtime = strtol(tar.formated.mtime, NULL, 8);
516         tar_entry->link_name  = strlen(tar.formated.linkname) ? 
517             xstrdup(tar.formated.linkname) : NULL;
518         tar_entry->device = (strtol(tar.formated.devmajor, NULL, 8) << 8) +
519                 strtol(tar.formated.devminor, NULL, 8);
520
521         return(tar_entry);
522 }
523 #endif
524
525 #ifdef L_deb_extract
526 char *deb_extract(const char *package_filename, FILE *out_stream, 
527         const int extract_function, const char *prefix, const char *filename)
528 {
529         FILE *deb_stream;
530         FILE *uncompressed_stream = NULL;
531         file_header_t *ar_header = NULL;
532         char *output_buffer = NULL;
533         char *ared_file = NULL;
534         char ar_magic[8];
535         char **file_list;
536         int gunzip_pid;
537
538         file_list = malloc(sizeof(char *));
539         file_list[0] = xstrdup(filename);
540         file_list[1] = NULL;
541
542         if (extract_function & extract_control_tar_gz) {
543                 ared_file = xstrdup("control.tar.gz");
544         }
545         else if (extract_function & extract_data_tar_gz) {              
546                 ared_file = xstrdup("data.tar.gz");
547         }
548
549         /* open the debian package to be worked on */
550         deb_stream = wfopen(package_filename, "r");
551
552         /* check ar magic */
553         fread(ar_magic, 1, 8, deb_stream);
554         if (strncmp(ar_magic,"!<arch>",7) != 0) {
555                 error_msg_and_die("invalid magic");
556         }
557         archive_offset = 8;
558
559         while ((ar_header = get_header_ar(deb_stream)) != NULL) {
560                 if (strcmp(ared_file, ar_header->name) == 0) {
561                         /* open a stream of decompressed data */
562                         uncompressed_stream = gz_open(deb_stream, &gunzip_pid);
563                         archive_offset = 0;
564                         output_buffer = unarchive(uncompressed_stream, get_header_tar, extract_function, prefix, file_list);
565                 }
566                 seek_sub_file(deb_stream, ar_header->size);
567         }
568         gz_close(gunzip_pid);
569         fclose(deb_stream);
570         fclose(uncompressed_stream);
571         free(ared_file);
572         return(output_buffer);
573 }
574 #endif