Some libc5 cleanups
[oweals/busybox.git] / libbb / unarchive.c
1 /*
2  *  Copyright (C) 2000 by Glenn McGrath
3  *  Copyright (C) 2001 by Laurence Anderson
4  *      
5  *  Based on previous work by busybox developers and others.
6  *
7  *  This program is free software; you can redistribute it and/or modify
8  *  it under the terms of the GNU General Public License as published by
9  *  the Free Software Foundation; either version 2 of the License, or
10  *  (at your option) any later version.
11  *
12  *  This program is distributed in the hope that it will be useful,
13  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
14  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  *  GNU Library General Public License for more details.
16  *
17  *  You should have received a copy of the GNU General Public License
18  *  along with this program; if not, write to the Free Software
19  *  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
20  */
21
22 #include <stdio.h>
23 #include <errno.h>
24 #include <stdlib.h>
25 #include <string.h>
26 #include <unistd.h>
27 #include <utime.h>
28 #include "libbb.h"
29
30 extern void seek_sub_file(FILE *src_stream, const int count);
31 extern char *extract_archive(FILE *src_stream, FILE *out_stream, const file_header_t *file_entry,
32  const int function, const char *prefix);
33
34
35 #ifdef L_archive_offset
36 off_t archive_offset;
37 #else
38 extern off_t archive_offset;
39 #endif  
40
41 #ifdef L_seek_sub_file
42 void seek_sub_file(FILE *src_stream, const int count)
43 {
44         int i;
45         /* Try to fseek as faster */
46         archive_offset += count;
47         if (fseek(src_stream, count, SEEK_CUR) != 0 && errno == ESPIPE) {
48         for (i = 0; i < count; i++) {
49                 fgetc(src_stream);
50                 }
51         }
52         return;
53 }
54 #endif  
55
56
57
58 #ifdef L_extract_archive
59 /* Extract the data postioned at src_stream to either filesystem, stdout or 
60  * buffer depending on the value of 'function' which is defined in libbb.h 
61  *
62  * prefix doesnt have to be just a directory, it may prefix the filename as well.
63  *
64  * e.g. '/var/lib/dpkg/info/dpkg.' will extract all files to the base bath 
65  * '/var/lib/dpkg/info/' and all files/dirs created in that dir will have 
66  * 'dpkg.' as their prefix
67  *
68  * For this reason if prefix does point to a dir then it must end with a
69  * trailing '/' or else the last dir will be assumed to be the file prefix 
70  */
71 char *extract_archive(FILE *src_stream, FILE *out_stream, const file_header_t *file_entry,
72  const int function, const char *prefix)
73 {
74         FILE *dst_stream = NULL;
75         char *full_name = NULL;
76         char *buffer = NULL;
77         struct utimbuf t;
78
79         /* prefix doesnt have to be a proper path it may prepend 
80          * the filename as well */
81         if (prefix != NULL) {
82                 /* strip leading '/' in filename to extract as prefix may not be dir */
83                 /* Cant use concat_path_file here as prefix might not be a directory */
84                 char *path = file_entry->name;
85                 if (strncmp("./", path, 2) == 0) {
86                         path += 2;
87                         if (strlen(path) == 0) {
88                                 return(NULL);
89                         }
90                 }
91                 full_name = xmalloc(strlen(prefix) + strlen(path) + 1);
92                 strcpy(full_name, prefix);
93                 strcat(full_name, path);
94         } else {
95                 full_name = file_entry->name;
96         }
97         if (function & extract_to_stdout) {
98                 if (S_ISREG(file_entry->mode)) {
99                         copy_file_chunk(src_stream, out_stream, file_entry->size);                      
100                         archive_offset += file_entry->size;
101                 }
102         }
103         else if (function & extract_one_to_buffer) { 
104                 if (S_ISREG(file_entry->mode)) {
105                         buffer = (char *) xmalloc(file_entry->size + 1);
106                         fread(buffer, 1, file_entry->size, src_stream);
107                         buffer[file_entry->size] = '\0';
108                         archive_offset += file_entry->size;
109                         return(buffer);
110                 }
111         }
112         else if (function & extract_all_to_fs) {
113                 struct stat oldfile;
114                 int stat_res;
115                 stat_res = lstat (full_name, &oldfile);
116                 if (stat_res == 0) { /* The file already exists */
117                         if ((function & extract_unconditional) || (oldfile.st_mtime < file_entry->mtime)) {
118                                 if (!S_ISDIR(oldfile.st_mode)) {
119                                         unlink(full_name); /* Directories might not be empty etc */
120                                 }
121                         } else {
122                                 if ((function & extract_quiet) != extract_quiet) {
123                                         error_msg("%s not created: newer or same age file exists", file_entry->name);
124                                 }
125                                 seek_sub_file(src_stream, file_entry->size);
126                                 return (NULL);
127                         }
128                 }
129                 if (function & extract_create_leading_dirs) { /* Create leading directories with default umask */
130                         char *parent = dirname(full_name);
131                         if (make_directory (parent, -1, FILEUTILS_RECUR) != 0) {
132                                 if ((function & extract_quiet) != extract_quiet) {
133                                         error_msg("couldn't create leading directories");
134                                 }
135                         }
136                         free (parent);
137                 }
138                 switch(file_entry->mode & S_IFMT) {
139                         case S_IFREG:
140                                 if (file_entry->link_name) { /* Found a cpio hard link */
141                                         if (link(file_entry->link_name, full_name) != 0) {
142                                                 if ((function & extract_quiet) != extract_quiet) {
143                                                         perror_msg("Cannot link from %s to '%s'",
144                                                                 file_entry->name, file_entry->link_name);
145                                                 }
146                                         }
147                                 } else {
148                                         if ((dst_stream = wfopen(full_name, "w")) == NULL) {
149                                                 seek_sub_file(src_stream, file_entry->size);
150                                                 return NULL;
151                                         }
152                                         archive_offset += file_entry->size;
153                                         copy_file_chunk(src_stream, dst_stream, file_entry->size);                      
154                                         fclose(dst_stream);
155                                 }
156                                 break;
157                         case S_IFDIR:
158                                 if (stat_res != 0) {
159                                         if (mkdir(full_name, file_entry->mode) < 0) {
160                                                 if ((function & extract_quiet) != extract_quiet) {
161                                                         perror_msg("extract_archive: ");
162                                                 }
163                                         }
164                                 }
165                                 break;
166                         case S_IFLNK:
167                                 if (symlink(file_entry->link_name, full_name) < 0) {
168                                         if ((function & extract_quiet) != extract_quiet) {
169                                                 perror_msg("Cannot create symlink from %s to '%s'", file_entry->name, file_entry->link_name);
170                                         }
171                                         return NULL;
172                                 }
173                                 break;
174                         case S_IFSOCK:
175                         case S_IFBLK:
176                         case S_IFCHR:
177                         case S_IFIFO:
178                                 if (mknod(full_name, file_entry->mode, file_entry->device) == -1) {
179                                         if ((function & extract_quiet) != extract_quiet) {
180                                                 perror_msg("Cannot create node %s", file_entry->name);
181                                         }
182                                         return NULL;
183                                 }
184                                 break;
185                 }
186
187                 /* Changing a symlink's properties normally changes the properties of the 
188                  * file pointed to, so dont try and change the date or mode, lchown does
189                  * does the right thing, but isnt available in older versions of libc */
190                 if (S_ISLNK(file_entry->mode)) {
191 #if (__GLIBC__ > 2) && (__GLIBC_MINOR__ > 1)
192                         lchown(full_name, file_entry->uid, file_entry->gid);
193 #endif
194                 } else {
195                         if (function & extract_preserve_date) {
196                                 t.actime = file_entry->mtime;
197                                 t.modtime = file_entry->mtime;
198                                 utime(full_name, &t);
199                         }
200                         chmod(full_name, file_entry->mode);
201                         chown(full_name, file_entry->uid, file_entry->gid);
202                 }
203         } else {
204                 /* If we arent extracting data we have to skip it, 
205                  * if data size is 0 then then just do it anyway
206                  * (saves testing for it) */
207                 seek_sub_file(src_stream, file_entry->size);
208         }
209
210         /* extract_list and extract_verbose_list can be used in conjunction
211          * with one of the above four extraction functions, so do this seperately */
212         if (function & extract_verbose_list) {
213                 fprintf(out_stream, "%s %d/%d %8d %s ", mode_string(file_entry->mode), 
214                         file_entry->uid, file_entry->gid,
215                         (int) file_entry->size, time_string(file_entry->mtime));
216         }
217         if ((function & extract_list) || (function & extract_verbose_list)){
218                 /* fputs doesnt add a trailing \n, so use fprintf */
219                 fprintf(out_stream, "%s\n", file_entry->name);
220         }
221
222         free(full_name);
223
224         return(NULL); /* Maybe we should say if failed */
225 }
226 #endif
227
228 #ifdef L_unarchive
229 char *unarchive(FILE *src_stream, FILE *out_stream, file_header_t *(*get_headers)(FILE *),
230         const int extract_function, const char *prefix, char **extract_names)
231 {
232         file_header_t *file_entry;
233         int found;
234         int i;
235         char *buffer = NULL;
236
237         archive_offset = 0;
238         while ((file_entry = get_headers(src_stream)) != NULL) {
239                 found = FALSE;
240                 if (extract_names == NULL) {
241                         found = TRUE;
242                 } else {
243                         for(i = 0; extract_names[i] != 0; i++) {
244                                 if (strcmp(extract_names[i], file_entry->name) == 0) {
245                                         found = TRUE;
246                                 }
247                         }
248                 }
249
250                 if (found) {
251                         buffer = extract_archive(src_stream, out_stream, file_entry, extract_function, prefix);
252                 } else {
253                         /* seek past the data entry */
254                         seek_sub_file(src_stream, file_entry->size);
255                 }
256         }
257         return(buffer);
258 }
259 #endif
260
261 #ifdef L_get_header_ar
262 file_header_t *get_header_ar(FILE *src_stream)
263 {
264         file_header_t *typed;
265         union {
266                 char raw[60];
267                 struct {
268                         char name[16];
269                         char date[12];
270                         char uid[6];
271                         char gid[6];
272                         char mode[8];
273                         char size[10];
274                         char magic[2];
275                 } formated;
276         } ar;
277         static char *ar_long_names;
278
279         if (fread(ar.raw, 1, 60, src_stream) != 60) {
280                 return(NULL);
281         }
282         archive_offset += 60;
283         /* align the headers based on the header magic */
284         if ((ar.formated.magic[0] != '`') || (ar.formated.magic[1] != '\n')) {
285                 /* some version of ar, have an extra '\n' after each data entry,
286                  * this puts the next header out by 1 */
287                 if (ar.formated.magic[1] != '`') {
288                         error_msg("Invalid magic");
289                         return(NULL);
290                 }
291                 /* read the next char out of what would be the data section,
292                  * if its a '\n' then it is a valid header offset by 1*/
293                 archive_offset++;
294                 if (fgetc(src_stream) != '\n') {
295                         error_msg("Invalid magic");
296                         return(NULL);
297                 }
298                 /* fix up the header, we started reading 1 byte too early */
299                 /* raw_header[60] wont be '\n' as it should, but it doesnt matter */
300                 memmove(ar.raw, &ar.raw[1], 59);
301         }
302                 
303         typed = (file_header_t *) xcalloc(1, sizeof(file_header_t));
304
305         typed->size = (size_t) atoi(ar.formated.size);
306         /* long filenames have '/' as the first character */
307         if (ar.formated.name[0] == '/') {
308                 if (ar.formated.name[1] == '/') {
309                         /* If the second char is a '/' then this entries data section
310                          * stores long filename for multiple entries, they are stored
311                          * in static variable long_names for use in future entries */
312                         ar_long_names = (char *) xrealloc(ar_long_names, typed->size);
313                         fread(ar_long_names, 1, typed->size, src_stream);
314                         archive_offset += typed->size;
315                         /* This ar entries data section only contained filenames for other records
316                          * they are stored in the static ar_long_names for future reference */
317                         return (get_header_ar(src_stream)); /* Return next header */
318                 } else if (ar.formated.name[1] == ' ') {
319                         /* This is the index of symbols in the file for compilers */
320                         seek_sub_file(src_stream, typed->size);
321                         return (get_header_ar(src_stream)); /* Return next header */
322                 } else {
323                         /* The number after the '/' indicates the offset in the ar data section
324                         (saved in variable long_name) that conatains the real filename */
325                         if (!ar_long_names) {
326                                 error_msg("Cannot resolve long file name");
327                                 return (NULL);
328                         }
329                         typed->name = xstrdup(ar_long_names + atoi(&ar.formated.name[1]));
330                 }
331         } else {
332                 /* short filenames */
333                 typed->name = xcalloc(1, 16);
334                 strncpy(typed->name, ar.formated.name, 16);
335         }
336         typed->name[strcspn(typed->name, " /")]='\0';
337
338         /* convert the rest of the now valid char header to its typed struct */ 
339         parse_mode(ar.formated.mode, &typed->mode);
340         typed->mtime = atoi(ar.formated.date);
341         typed->uid = atoi(ar.formated.uid);
342         typed->gid = atoi(ar.formated.gid);
343
344         return(typed);
345 }
346 #endif
347
348 #ifdef L_get_header_cpio
349 struct hardlinks {
350         file_header_t *entry;
351         int inode;
352         struct hardlinks *next;
353 };
354
355 file_header_t *get_header_cpio(FILE *src_stream)
356 {
357         file_header_t *cpio_entry = NULL;
358         char cpio_header[110];
359         int namesize;
360         char dummy[16];
361         int major, minor, nlink, inode;
362         static struct hardlinks *saved_hardlinks = NULL;
363         static int pending_hardlinks = 0;
364
365         if (pending_hardlinks) { /* Deal with any pending hardlinks */
366                 struct hardlinks *tmp = saved_hardlinks, *oldtmp = NULL;
367                 while (tmp) {
368                         if (tmp->entry->link_name) { /* Found a hardlink ready to be extracted */
369                                 cpio_entry = tmp->entry;
370                                 if (oldtmp) oldtmp->next = tmp->next; /* Remove item from linked list */
371                                 else saved_hardlinks = tmp->next;
372                                 free(tmp);
373                                 return (cpio_entry);
374                         }
375                         oldtmp = tmp;
376                         tmp = tmp->next;
377                 }
378                 pending_hardlinks = 0; /* No more pending hardlinks, read next file entry */
379         }
380   
381         /* There can be padding before archive header */
382         seek_sub_file(src_stream, (4 - (archive_offset % 4)) % 4);
383         if (fread(cpio_header, 1, 110, src_stream) == 110) {
384                 archive_offset += 110;
385                 if (strncmp(cpio_header, "07070", 5) != 0) {
386                         error_msg("Unsupported format or invalid magic");
387                         return(NULL);
388                 }
389                 switch (cpio_header[5]) {
390                         case '2': /* "crc" header format */
391                                 /* Doesnt do the crc check yet */
392                         case '1': /* "newc" header format */
393                                 cpio_entry = (file_header_t *) xcalloc(1, sizeof(file_header_t));
394                                 sscanf(cpio_header, "%6c%8x%8x%8x%8x%8x%8lx%8lx%16c%8x%8x%8x%8c",
395                                         dummy, &inode, (unsigned int*)&cpio_entry->mode, 
396                                         (unsigned int*)&cpio_entry->uid, (unsigned int*)&cpio_entry->gid,
397                                         &nlink, &cpio_entry->mtime, &cpio_entry->size,
398                                         dummy, &major, &minor, &namesize, dummy);
399
400                                 cpio_entry->name = (char *) xcalloc(1, namesize);
401                                 fread(cpio_entry->name, 1, namesize, src_stream); /* Read in filename */
402                                 archive_offset += namesize;
403                                 /* Skip padding before file contents */
404                                 seek_sub_file(src_stream, (4 - (archive_offset % 4)) % 4);
405                                 if (strcmp(cpio_entry->name, "TRAILER!!!") == 0) {
406                                         printf("%d blocks\n", (int) (archive_offset % 512 ? (archive_offset / 512) + 1 : archive_offset / 512)); /* Always round up */
407                                         if (saved_hardlinks) { /* Bummer - we still have unresolved hardlinks */
408                                                 struct hardlinks *tmp = saved_hardlinks, *oldtmp = NULL;
409                                                 while (tmp) {
410                                                         error_msg("%s not created: cannot resolve hardlink", tmp->entry->name);
411                                                         oldtmp = tmp;
412                                                         tmp = tmp->next;
413                                                         free (oldtmp->entry->name);
414                                                         free (oldtmp->entry);
415                                                         free (oldtmp);
416                                                 }
417                                                 saved_hardlinks = NULL;
418                                                 pending_hardlinks = 0;
419                                         }
420                                         return(NULL);
421                                 }
422
423                                 if (S_ISLNK(cpio_entry->mode)) {
424                                         cpio_entry->link_name = (char *) xcalloc(1, cpio_entry->size + 1);
425                                         fread(cpio_entry->link_name, 1, cpio_entry->size, src_stream);
426                                         archive_offset += cpio_entry->size;
427                                         cpio_entry->size = 0; /* Stop possiable seeks in future */
428                                 }
429                                 if (nlink > 1 && !S_ISDIR(cpio_entry->mode)) {
430                                         if (cpio_entry->size == 0) { /* Put file on a linked list for later */
431                                                 struct hardlinks *new = xmalloc(sizeof(struct hardlinks));
432                                                 new->next = saved_hardlinks;
433                                                 new->inode = inode;
434                                                 new->entry = cpio_entry;
435                                                 saved_hardlinks = new;
436                                         return(get_header_cpio(src_stream)); /* Recurse to next file */
437                                         } else { /* Found the file with data in */
438                                                 struct hardlinks *tmp = saved_hardlinks;
439                                                 pending_hardlinks = 1;
440                                                 while (tmp) {
441                                                         if (tmp->inode == inode) {
442                                                                 tmp->entry->link_name = xstrdup(cpio_entry->name);
443                                                                 nlink--;
444                                                         }
445                                                         tmp = tmp->next;
446                                                 }
447                                                 if (nlink > 1) error_msg("error resolving hardlink: did you create the archive with GNU cpio 2.0-2.2?");
448                                         }
449                                 }
450                                 cpio_entry->device = (major << 8) | minor;
451                                 break;
452                         default:
453                                 error_msg("Unsupported format");
454                                 return(NULL);
455                 }
456                 if (ferror(src_stream) || feof(src_stream)) {
457                         perror_msg("Stream error");
458                         return(NULL);
459                 }
460         }
461         return(cpio_entry);
462 }
463 #endif
464
465 #ifdef L_get_header_tar
466 file_header_t *get_header_tar(FILE *tar_stream)
467 {
468         union {
469                 unsigned char raw[512];
470                 struct {
471                         char name[100];         /*   0-99 */
472                         char mode[8];           /* 100-107 */
473                         char uid[8];            /* 108-115 */
474                         char gid[8];            /* 116-123 */
475                         char size[12];          /* 124-135 */
476                         char mtime[12];         /* 136-147 */
477                         char chksum[8];         /* 148-155 */
478                         char typeflag;          /* 156-156 */
479                         char linkname[100];     /* 157-256 */
480                         char magic[6];          /* 257-262 */
481                         char version[2];        /* 263-264 */
482                         char uname[32];         /* 265-296 */
483                         char gname[32];         /* 297-328 */
484                         char devmajor[8];       /* 329-336 */
485                         char devminor[8];       /* 337-344 */
486                         char prefix[155];       /* 345-499 */
487                         char padding[12];       /* 500-512 */
488                 } formated;
489         } tar;
490         file_header_t *tar_entry = NULL;
491         long i;
492         long sum = 0;
493
494         if (archive_offset % 512 != 0) {
495                 seek_sub_file(tar_stream, 512 - (archive_offset % 512));
496         }
497
498         if (fread(tar.raw, 1, 512, tar_stream) != 512) {
499                 /* Unfortunatly its common for tar files to have all sorts of
500                  * trailing garbage, fail silently */
501 //              error_msg("Couldnt read header");
502                 return(NULL);
503         }
504         archive_offset += 512;
505
506         /* Check header has valid magic, unfortunately some tar files
507          * have empty (0'ed) tar entries at the end, which will
508          * cause this to fail, so fail silently for now
509          */
510         if (strncmp(tar.formated.magic, "ustar", 5) != 0) {
511                 return(NULL);
512         }
513
514         /* Do checksum on headers */
515         for (i =  0; i < 148 ; i++) {
516                 sum += tar.raw[i];
517         }
518         sum += ' ' * 8;
519         for (i =  156; i < 512 ; i++) {
520                 sum += tar.raw[i];
521         }
522         if (sum != strtol(tar.formated.chksum, NULL, 8)) {
523                 error_msg("Invalid tar header checksum");
524                 return(NULL);
525         }
526
527         /* convert to type'ed variables */
528         tar_entry = xcalloc(1, sizeof(file_header_t));
529         tar_entry->name = xstrdup(tar.formated.name);
530
531         parse_mode(tar.formated.mode, &tar_entry->mode);
532         tar_entry->uid   = strtol(tar.formated.uid, NULL, 8);
533         tar_entry->gid   = strtol(tar.formated.gid, NULL, 8);
534         tar_entry->size  = strtol(tar.formated.size, NULL, 8);
535         tar_entry->mtime = strtol(tar.formated.mtime, NULL, 8);
536         tar_entry->link_name  = strlen(tar.formated.linkname) ? 
537             xstrdup(tar.formated.linkname) : NULL;
538         tar_entry->device = (strtol(tar.formated.devmajor, NULL, 8) << 8) +
539                 strtol(tar.formated.devminor, NULL, 8);
540
541         return(tar_entry);
542 }
543 #endif
544
545 #ifdef L_deb_extract
546 char *deb_extract(const char *package_filename, FILE *out_stream, 
547         const int extract_function, const char *prefix, const char *filename)
548 {
549         FILE *deb_stream;
550         FILE *uncompressed_stream = NULL;
551         file_header_t *ar_header = NULL;
552         char **file_list = NULL;
553         char *output_buffer = NULL;
554         char *ared_file = NULL;
555         char ar_magic[8];
556         int gunzip_pid;
557
558         if (filename != NULL) {
559                 file_list = xmalloc(sizeof(char *) * 2);
560                 file_list[0] = xstrdup(filename);
561                 file_list[1] = NULL;
562         }
563         
564         if (extract_function & extract_control_tar_gz) {
565                 ared_file = xstrdup("control.tar.gz");
566         }
567         else if (extract_function & extract_data_tar_gz) {              
568                 ared_file = xstrdup("data.tar.gz");
569         }
570
571         /* open the debian package to be worked on */
572         deb_stream = wfopen(package_filename, "r");
573         if (deb_stream == NULL) {
574                 return(NULL);
575         }
576         /* set the buffer size */
577         setvbuf(deb_stream, NULL, _IOFBF, 0x8000);
578
579         /* check ar magic */
580         fread(ar_magic, 1, 8, deb_stream);
581         if (strncmp(ar_magic,"!<arch>",7) != 0) {
582                 error_msg_and_die("invalid magic");
583         }
584         archive_offset = 8;
585
586         while ((ar_header = get_header_ar(deb_stream)) != NULL) {
587                 if (strcmp(ared_file, ar_header->name) == 0) {
588                         /* open a stream of decompressed data */
589                         uncompressed_stream = gz_open(deb_stream, &gunzip_pid);
590                         archive_offset = 0;
591                         output_buffer = unarchive(uncompressed_stream, out_stream, get_header_tar, extract_function, prefix, file_list);
592                 }
593                 seek_sub_file(deb_stream, ar_header->size);
594         }
595         gz_close(gunzip_pid);
596         fclose(deb_stream);
597         fclose(uncompressed_stream);
598         free(ared_file);
599         return(output_buffer);
600 }
601 #endif