Remove debugging statement.
[oweals/busybox.git] / libbb / unarchive.c
1 /*
2  *  Copyright (C) 2000 by Glenn McGrath
3  *  Copyright (C) 2001 by Laurence Anderson
4  *      
5  *  Based on previous work by busybox developers and others.
6  *
7  *  This program is free software; you can redistribute it and/or modify
8  *  it under the terms of the GNU General Public License as published by
9  *  the Free Software Foundation; either version 2 of the License, or
10  *  (at your option) any later version.
11  *
12  *  This program is distributed in the hope that it will be useful,
13  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
14  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  *  GNU Library General Public License for more details.
16  *
17  *  You should have received a copy of the GNU General Public License
18  *  along with this program; if not, write to the Free Software
19  *  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
20  */
21
22 #include <stdio.h>
23 #include <errno.h>
24 #include <stdlib.h>
25 #include <string.h>
26 #include <unistd.h>
27 #include <utime.h>
28 #include "libbb.h"
29
30 extern void seek_sub_file(FILE *src_stream, const int count);
31 extern char *extract_archive(FILE *src_stream, FILE *out_stream, const file_header_t *file_entry,
32  const int function, const char *prefix);
33
34
35 #ifdef L_archive_offset
36 off_t archive_offset;
37 #else
38 extern off_t archive_offset;
39 #endif  
40
41 #ifdef L_seek_sub_file
42 void seek_sub_file(FILE *src_stream, const int count)
43 {
44         int i;
45         /* Try to fseek as faster */
46         archive_offset += count;
47         if (fseek(src_stream, count, SEEK_CUR) != 0 && errno == ESPIPE) {
48         for (i = 0; i < count; i++) {
49                 fgetc(src_stream);
50                 }
51         }
52         return;
53 }
54 #endif  
55
56
57
58 #ifdef L_extract_archive
59 /* Extract the data postioned at src_stream to either filesystem, stdout or 
60  * buffer depending on the value of 'function' which is defined in libbb.h 
61  *
62  * prefix doesnt have to be just a directory, it may prefix the filename as well.
63  *
64  * e.g. '/var/lib/dpkg/info/dpkg.' will extract all files to the base bath 
65  * '/var/lib/dpkg/info/' and all files/dirs created in that dir will have 
66  * 'dpkg.' as their prefix
67  *
68  * For this reason if prefix does point to a dir then it must end with a
69  * trailing '/' or else the last dir will be assumed to be the file prefix 
70  */
71 char *extract_archive(FILE *src_stream, FILE *out_stream, const file_header_t *file_entry,
72  const int function, const char *prefix)
73 {
74         FILE *dst_stream = NULL;
75         char *full_name = NULL;
76         char *buffer = NULL;
77         struct utimbuf t;
78
79         /* prefix doesnt have to be a proper path it may prepend 
80          * the filename as well */
81         if (prefix != NULL) {
82                 /* strip leading '/' in filename to extract as prefix may not be dir */
83                 /* Cant use concat_path_file here as prefix might not be a directory */
84                 char *path = file_entry->name;
85                 if (*path == '/') {
86                         path++;
87                 }
88                 full_name = xmalloc(strlen(prefix) + strlen(path) + 1);
89                 strcpy(full_name, prefix);
90                 strcat(full_name, path);
91         } else {
92                 full_name = file_entry->name;
93         }
94
95         if (function & extract_to_stdout) {
96                 if (S_ISREG(file_entry->mode)) {
97                         copy_file_chunk(src_stream, out_stream, file_entry->size);                      
98                         archive_offset += file_entry->size;
99                 }
100         }
101         else if (function & extract_one_to_buffer) { 
102                 if (S_ISREG(file_entry->mode)) {
103                         buffer = (char *) xmalloc(file_entry->size + 1);
104                         fread(buffer, 1, file_entry->size, src_stream);
105                         archive_offset += file_entry->size;
106                         return(buffer);
107                 }
108         }
109         else if (function & extract_all_to_fs) {
110                 struct stat oldfile;
111                 int stat_res;
112                 stat_res = lstat (full_name, &oldfile);
113                 if (stat_res == 0) { /* The file already exists */
114                         if ((function & extract_unconditional) || (oldfile.st_mtime < file_entry->mtime)) {
115                                 if (!S_ISDIR(oldfile.st_mode)) {
116                                         unlink(full_name); /* Directories might not be empty etc */
117                                 }
118                         } else {
119                                 if ((function & extract_quiet) != extract_quiet) {
120                                         error_msg("%s not created: newer or same age file exists", file_entry->name);
121                                 }
122                                 seek_sub_file(src_stream, file_entry->size);
123                                 return (NULL);
124                         }
125                 }
126                 if (function & extract_create_leading_dirs) { /* Create leading directories with default umask */
127                         char *parent = dirname(full_name);
128                         if (make_directory (parent, -1, FILEUTILS_RECUR) != 0) {
129                                 if ((function & extract_quiet) != extract_quiet) {
130                                         error_msg("couldn't create leading directories");
131                                 }
132                         }
133                         free (parent);
134                 }
135                 switch(file_entry->mode & S_IFMT) {
136                         case S_IFREG:
137                                 if (file_entry->link_name) { /* Found a cpio hard link */
138                                         if (link(file_entry->link_name, full_name) != 0) {
139                                                 if ((function & extract_quiet) != extract_quiet) {
140                                                         perror_msg("Cannot link from %s to '%s'",
141                                                                 file_entry->name, file_entry->link_name);
142                                                 }
143                                         }
144                                 } else {
145                                         if ((dst_stream = wfopen(full_name, "w")) == NULL) {
146                                                 seek_sub_file(src_stream, file_entry->size);
147                                                 return NULL;
148                                         }
149                                         archive_offset += file_entry->size;
150                                         copy_file_chunk(src_stream, dst_stream, file_entry->size);                      
151                                         fclose(dst_stream);
152                                 }
153                                 break;
154                         case S_IFDIR:
155                                 if (stat_res != 0) {
156                                         if (mkdir(full_name, file_entry->mode) < 0) {
157                                                 if ((function & extract_quiet) != extract_quiet) {
158                                                         perror_msg("extract_archive: ");
159                                                 }
160                                         }
161                                 }
162                                 break;
163                         case S_IFLNK:
164                                 if (symlink(file_entry->link_name, full_name) < 0) {
165                                         if ((function & extract_quiet) != extract_quiet) {
166                                                 perror_msg("Cannot create symlink from %s to '%s'", file_entry->name, file_entry->link_name);
167                                         }
168                                         return NULL;
169                                 }
170                                 break;
171                         case S_IFSOCK:
172                         case S_IFBLK:
173                         case S_IFCHR:
174                         case S_IFIFO:
175                                 if (mknod(full_name, file_entry->mode, file_entry->device) == -1) {
176                                         if ((function & extract_quiet) != extract_quiet) {
177                                                 perror_msg("Cannot create node %s", file_entry->name);
178                                         }
179                                         return NULL;
180                                 }
181                                 break;
182                 }
183
184                 /* Changing a symlink's properties normally changes the properties of the 
185                  * file pointed to, so dont try and change the date or mode, lchown does
186                  * does the right thing, but isnt available in older versions of libc */
187                 if (S_ISLNK(file_entry->mode)) {
188 #if (__GLIBC__ >= 2) && (__GLIBC_MINOR__ >= 1)
189                         lchown(full_name, file_entry->uid, file_entry->gid);
190 #endif
191                 } else {
192                         if (function & extract_preserve_date) {
193                                 t.actime = file_entry->mtime;
194                                 t.modtime = file_entry->mtime;
195                                 utime(full_name, &t);
196                         }
197                         chmod(full_name, file_entry->mode);
198                         chown(full_name, file_entry->uid, file_entry->gid);
199                 }
200         } else {
201                 /* If we arent extracting data we have to skip it, 
202                  * if data size is 0 then then just do it anyway
203                  * (saves testing for it) */
204                 seek_sub_file(src_stream, file_entry->size);
205         }
206
207         /* extract_list and extract_verbose_list can be used in conjunction
208          * with one of the above four extraction functions, so do this seperately */
209         if (function & extract_verbose_list) {
210                 fprintf(out_stream, "%s %d/%d %8d %s ", mode_string(file_entry->mode), 
211                         file_entry->uid, file_entry->gid,
212                         (int) file_entry->size, time_string(file_entry->mtime));
213         }
214         if ((function & extract_list) || (function & extract_verbose_list)){
215                 /* fputs doesnt add a trailing \n, so use fprintf */
216                 fprintf(out_stream, "%s\n", file_entry->name);
217         }
218
219         free(full_name);
220
221         return(NULL); /* Maybe we should say if failed */
222 }
223 #endif
224
225 #ifdef L_unarchive
226 char *unarchive(FILE *src_stream, FILE *out_stream, file_header_t *(*get_headers)(FILE *),
227         const int extract_function, const char *prefix, char **extract_names)
228 {
229         file_header_t *file_entry;
230         int found;
231         int i;
232         char *buffer = NULL;
233
234         if (extract_names == NULL) {
235                 return(NULL);
236         }
237         archive_offset = 0;
238         while ((file_entry = get_headers(src_stream)) != NULL) {
239                 found = FALSE;
240                 if (extract_names[0] != NULL) {
241                         for(i = 0; extract_names[i] != 0; i++) {
242                                 if (strcmp(extract_names[i], file_entry->name) == 0) {
243                                         found = TRUE;
244                                 }
245                         }
246                         if (!found) {
247                                 /* seek past the data entry */
248                                 seek_sub_file(src_stream, file_entry->size);
249                                 continue;
250                         }
251                 }
252                 buffer = extract_archive(src_stream, out_stream, file_entry, extract_function, prefix);
253         }
254         return(buffer);
255 }
256 #endif
257
258 #ifdef L_get_header_ar
259 file_header_t *get_header_ar(FILE *src_stream)
260 {
261         file_header_t *typed;
262         union {
263                 char raw[60];
264                 struct {
265                         char name[16];
266                         char date[12];
267                         char uid[6];
268                         char gid[6];
269                         char mode[8];
270                         char size[10];
271                         char magic[2];
272                 } formated;
273         } ar;
274         static char *ar_long_names;
275
276         if (fread(ar.raw, 1, 60, src_stream) != 60) {
277                 return(NULL);
278         }
279         archive_offset += 60;
280         /* align the headers based on the header magic */
281         if ((ar.formated.magic[0] != '`') || (ar.formated.magic[1] != '\n')) {
282                 /* some version of ar, have an extra '\n' after each data entry,
283                  * this puts the next header out by 1 */
284                 if (ar.formated.magic[1] != '`') {
285                         error_msg("Invalid magic");
286                         return(NULL);
287                 }
288                 /* read the next char out of what would be the data section,
289                  * if its a '\n' then it is a valid header offset by 1*/
290                 archive_offset++;
291                 if (fgetc(src_stream) != '\n') {
292                         error_msg("Invalid magic");
293                         return(NULL);
294                 }
295                 /* fix up the header, we started reading 1 byte too early */
296                 /* raw_header[60] wont be '\n' as it should, but it doesnt matter */
297                 memmove(ar.raw, &ar.raw[1], 59);
298         }
299                 
300         typed = (file_header_t *) xcalloc(1, sizeof(file_header_t));
301
302         typed->size = (size_t) atoi(ar.formated.size);
303         /* long filenames have '/' as the first character */
304         if (ar.formated.name[0] == '/') {
305                 if (ar.formated.name[1] == '/') {
306                         /* If the second char is a '/' then this entries data section
307                          * stores long filename for multiple entries, they are stored
308                          * in static variable long_names for use in future entries */
309                         ar_long_names = (char *) xrealloc(ar_long_names, typed->size);
310                         fread(ar_long_names, 1, typed->size, src_stream);
311                         archive_offset += typed->size;
312                         /* This ar entries data section only contained filenames for other records
313                          * they are stored in the static ar_long_names for future reference */
314                         return (get_header_ar(src_stream)); /* Return next header */
315                 } else if (ar.formated.name[1] == ' ') {
316                         /* This is the index of symbols in the file for compilers */
317                         seek_sub_file(src_stream, typed->size);
318                         return (get_header_ar(src_stream)); /* Return next header */
319                 } else {
320                         /* The number after the '/' indicates the offset in the ar data section
321                         (saved in variable long_name) that conatains the real filename */
322                         if (!ar_long_names) {
323                                 error_msg("Cannot resolve long file name");
324                                 return (NULL);
325                         }
326                         typed->name = xstrdup(ar_long_names + atoi(&ar.formated.name[1]));
327                 }
328         } else {
329                 /* short filenames */
330                 typed->name = xcalloc(1, 16);
331                 strncpy(typed->name, ar.formated.name, 16);
332         }
333         typed->name[strcspn(typed->name, " /")]='\0';
334
335         /* convert the rest of the now valid char header to its typed struct */ 
336         parse_mode(ar.formated.mode, &typed->mode);
337         typed->mtime = atoi(ar.formated.date);
338         typed->uid = atoi(ar.formated.uid);
339         typed->gid = atoi(ar.formated.gid);
340
341         return(typed);
342 }
343 #endif
344
345 #ifdef L_get_header_cpio
346 struct hardlinks {
347         file_header_t *entry;
348         int inode;
349         struct hardlinks *next;
350 };
351
352 file_header_t *get_header_cpio(FILE *src_stream)
353 {
354         file_header_t *cpio_entry = NULL;
355         char cpio_header[110];
356         int namesize;
357         char dummy[16];
358         int major, minor, nlink, inode;
359         static struct hardlinks *saved_hardlinks = NULL;
360         static int pending_hardlinks = 0;
361
362         if (pending_hardlinks) { /* Deal with any pending hardlinks */
363                 struct hardlinks *tmp = saved_hardlinks, *oldtmp = NULL;
364                 while (tmp) {
365                         if (tmp->entry->link_name) { /* Found a hardlink ready to be extracted */
366                                 cpio_entry = tmp->entry;
367                                 if (oldtmp) oldtmp->next = tmp->next; /* Remove item from linked list */
368                                 else saved_hardlinks = tmp->next;
369                                 free(tmp);
370                                 return (cpio_entry);
371                         }
372                         oldtmp = tmp;
373                         tmp = tmp->next;
374                 }
375                 pending_hardlinks = 0; /* No more pending hardlinks, read next file entry */
376         }
377   
378         /* There can be padding before archive header */
379         seek_sub_file(src_stream, (4 - (archive_offset % 4)) % 4);
380         if (fread(cpio_header, 1, 110, src_stream) == 110) {
381                 archive_offset += 110;
382                 if (strncmp(cpio_header, "07070", 5) != 0) {
383                         error_msg("Unsupported format or invalid magic");
384                         return(NULL);
385                 }
386                 switch (cpio_header[5]) {
387                         case '2': /* "crc" header format */
388                                 /* Doesnt do the crc check yet */
389                         case '1': /* "newc" header format */
390                                 cpio_entry = (file_header_t *) xcalloc(1, sizeof(file_header_t));
391                                 sscanf(cpio_header, "%6c%8x%8x%8x%8x%8x%8lx%8lx%16c%8x%8x%8x%8c",
392                                         dummy, &inode, &cpio_entry->mode, &cpio_entry->uid, &cpio_entry->gid,
393                                         &nlink, &cpio_entry->mtime, &cpio_entry->size,
394                                         dummy, &major, &minor, &namesize, dummy);
395
396                                 cpio_entry->name = (char *) xcalloc(1, namesize);
397                                 fread(cpio_entry->name, 1, namesize, src_stream); /* Read in filename */
398                                 archive_offset += namesize;
399                                 /* Skip padding before file contents */
400                                 seek_sub_file(src_stream, (4 - (archive_offset % 4)) % 4);
401                                 if (strcmp(cpio_entry->name, "TRAILER!!!") == 0) {
402                                         printf("%d blocks\n", (int) (archive_offset % 512 ? (archive_offset / 512) + 1 : archive_offset / 512)); /* Always round up */
403                                         if (saved_hardlinks) { /* Bummer - we still have unresolved hardlinks */
404                                                 struct hardlinks *tmp = saved_hardlinks, *oldtmp = NULL;
405                                                 while (tmp) {
406                                                         error_msg("%s not created: cannot resolve hardlink", tmp->entry->name);
407                                                         oldtmp = tmp;
408                                                         tmp = tmp->next;
409                                                         free (oldtmp->entry->name);
410                                                         free (oldtmp->entry);
411                                                         free (oldtmp);
412                                                 }
413                                                 saved_hardlinks = NULL;
414                                                 pending_hardlinks = 0;
415                                         }
416                                         return(NULL);
417                                 }
418
419                                 if (S_ISLNK(cpio_entry->mode)) {
420                                         cpio_entry->link_name = (char *) xcalloc(1, cpio_entry->size + 1);
421                                         fread(cpio_entry->link_name, 1, cpio_entry->size, src_stream);
422                                         archive_offset += cpio_entry->size;
423                                         cpio_entry->size = 0; /* Stop possiable seeks in future */
424                                 }
425                                 if (nlink > 1 && !S_ISDIR(cpio_entry->mode)) {
426                                         if (cpio_entry->size == 0) { /* Put file on a linked list for later */
427                                                 struct hardlinks *new = xmalloc(sizeof(struct hardlinks));
428                                                 new->next = saved_hardlinks;
429                                                 new->inode = inode;
430                                                 new->entry = cpio_entry;
431                                                 saved_hardlinks = new;
432                                         return(get_header_cpio(src_stream)); /* Recurse to next file */
433                                         } else { /* Found the file with data in */
434                                                 struct hardlinks *tmp = saved_hardlinks;
435                                                 pending_hardlinks = 1;
436                                                 while (tmp) {
437                                                         if (tmp->inode == inode) {
438                                                                 tmp->entry->link_name = xstrdup(cpio_entry->name);
439                                                                 nlink--;
440                                                         }
441                                                         tmp = tmp->next;
442                                                 }
443                                                 if (nlink > 1) error_msg("error resolving hardlink: did you create the archive with GNU cpio 2.0-2.2?");
444                                         }
445                                 }
446                                 cpio_entry->device = (major << 8) | minor;
447                                 break;
448                         default:
449                                 error_msg("Unsupported format");
450                                 return(NULL);
451                 }
452                 if (ferror(src_stream) || feof(src_stream)) {
453                         perror_msg("Stream error");
454                         return(NULL);
455                 }
456         }
457         return(cpio_entry);
458 }
459 #endif
460
461 #ifdef L_get_header_tar
462 file_header_t *get_header_tar(FILE *tar_stream)
463 {
464         union {
465                 unsigned char raw[512];
466                 struct {
467                         char name[100];         /*   0-99 */
468                         char mode[8];           /* 100-107 */
469                         char uid[8];            /* 108-115 */
470                         char gid[8];            /* 116-123 */
471                         char size[12];          /* 124-135 */
472                         char mtime[12];         /* 136-147 */
473                         char chksum[8];         /* 148-155 */
474                         char typeflag;          /* 156-156 */
475                         char linkname[100];     /* 157-256 */
476                         char magic[6];          /* 257-262 */
477                         char version[2];        /* 263-264 */
478                         char uname[32];         /* 265-296 */
479                         char gname[32];         /* 297-328 */
480                         char devmajor[8];       /* 329-336 */
481                         char devminor[8];       /* 337-344 */
482                         char prefix[155];       /* 345-499 */
483                         char padding[12];       /* 500-512 */
484                 } formated;
485         } tar;
486         file_header_t *tar_entry = NULL;
487         long i;
488         long sum = 0;
489
490         if (archive_offset % 512 != 0) {
491                 seek_sub_file(tar_stream, 512 - (archive_offset % 512));
492         }
493
494         if (fread(tar.raw, 1, 512, tar_stream) != 512) {
495                 error_msg("Couldnt read header");
496                 return(NULL);
497         }
498         archive_offset += 512;
499
500         /* Check header has valid magic, unfortunately some tar files
501          * have empty (0'ed) tar entries at the end, which will
502          * cause this to fail, so fail silently for now
503          */
504         if (strncmp(tar.formated.magic, "ustar", 5) != 0) {
505                 return(NULL);
506         }
507
508         /* Do checksum on headers */
509         for (i =  0; i < 148 ; i++) {
510                 sum += tar.raw[i];
511         }
512         sum += ' ' * 8;
513         for (i =  156; i < 512 ; i++) {
514                 sum += tar.raw[i];
515         }
516         if (sum != strtol(tar.formated.chksum, NULL, 8)) {
517                 error_msg("Invalid tar header checksum");
518                 return(NULL);
519         }
520
521         /* convert to type'ed variables */
522         tar_entry = xcalloc(1, sizeof(file_header_t));
523         tar_entry->name = xstrdup(tar.formated.name);
524
525         parse_mode(tar.formated.mode, &tar_entry->mode);
526         tar_entry->uid   = strtol(tar.formated.uid, NULL, 8);
527         tar_entry->gid   = strtol(tar.formated.gid, NULL, 8);
528         tar_entry->size  = strtol(tar.formated.size, NULL, 8);
529         tar_entry->mtime = strtol(tar.formated.mtime, NULL, 8);
530         tar_entry->link_name  = strlen(tar.formated.linkname) ? 
531             xstrdup(tar.formated.linkname) : NULL;
532         tar_entry->device = (strtol(tar.formated.devmajor, NULL, 8) << 8) +
533                 strtol(tar.formated.devminor, NULL, 8);
534
535         return(tar_entry);
536 }
537 #endif
538
539 #ifdef L_deb_extract
540 char *deb_extract(const char *package_filename, FILE *out_stream, 
541         const int extract_function, const char *prefix, const char *filename)
542 {
543         FILE *deb_stream;
544         FILE *uncompressed_stream = NULL;
545         file_header_t *ar_header = NULL;
546         char *output_buffer = NULL;
547         char *ared_file = NULL;
548         char ar_magic[8];
549         char **file_list;
550         int gunzip_pid;
551
552         if (filename == NULL) {
553                 file_list = xmalloc(sizeof(char *));
554                 file_list[0] = NULL;
555         } else {
556                 file_list = xmalloc(sizeof(char *) * 3);
557                 file_list[0] = xstrdup(filename);
558                 file_list[1] = NULL;
559         }
560         
561         if (extract_function & extract_control_tar_gz) {
562                 ared_file = xstrdup("control.tar.gz");
563         }
564         else if (extract_function & extract_data_tar_gz) {              
565                 ared_file = xstrdup("data.tar.gz");
566         }
567
568         /* open the debian package to be worked on */
569         deb_stream = wfopen(package_filename, "r");
570
571         /* check ar magic */
572         fread(ar_magic, 1, 8, deb_stream);
573         if (strncmp(ar_magic,"!<arch>",7) != 0) {
574                 error_msg_and_die("invalid magic");
575         }
576         archive_offset = 8;
577
578         while ((ar_header = get_header_ar(deb_stream)) != NULL) {
579                 if (strcmp(ared_file, ar_header->name) == 0) {
580                         /* open a stream of decompressed data */
581                         uncompressed_stream = gz_open(deb_stream, &gunzip_pid);
582                         archive_offset = 0;
583                         output_buffer = unarchive(uncompressed_stream, out_stream, get_header_tar, extract_function, prefix, file_list);
584                 }
585                 seek_sub_file(deb_stream, ar_header->size);
586         }
587         gz_close(gunzip_pid);
588         fclose(deb_stream);
589         fclose(uncompressed_stream);
590         free(ared_file);
591         return(output_buffer);
592 }
593 #endif