Fix a memory leak if parent directory creation failed.
[oweals/busybox.git] / libbb / unarchive.c
1 /*
2  *  Copyright (C) 2000 by Glenn McGrath
3  *  Copyright (C) 2001 by Laurence Anderson
4  *      
5  *  Based on previous work by busybox developers and others.
6  *
7  *  This program is free software; you can redistribute it and/or modify
8  *  it under the terms of the GNU General Public License as published by
9  *  the Free Software Foundation; either version 2 of the License, or
10  *  (at your option) any later version.
11  *
12  *  This program is distributed in the hope that it will be useful,
13  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
14  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  *  GNU Library General Public License for more details.
16  *
17  *  You should have received a copy of the GNU General Public License
18  *  along with this program; if not, write to the Free Software
19  *  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
20  */
21
22 #include <stdio.h>
23 #include <errno.h>
24 #include <stdlib.h>
25 #include <string.h>
26 #include <unistd.h>
27 #include <utime.h>
28 #include "libbb.h"
29
30 extern void seek_sub_file(FILE *src_stream, const int count);
31 extern char *extract_archive(FILE *src_stream, FILE *out_stream, const file_header_t *file_entry,
32  const int function, const char *prefix);
33
34
35 #ifdef L_archive_offset
36 off_t archive_offset;
37 #else
38 extern off_t archive_offset;
39 #endif  
40
41 #ifdef L_seek_sub_file
42 void seek_sub_file(FILE *src_stream, const int count)
43 {
44         int i;
45         /* Try to fseek as faster */
46         archive_offset += count;
47         if (fseek(src_stream, count, SEEK_CUR) != 0 && errno == ESPIPE) {
48         for (i = 0; i < count; i++) {
49                 fgetc(src_stream);
50                 }
51         }
52         return;
53 }
54 #endif  
55
56
57
58 #ifdef L_extract_archive
59 /* Extract the data postioned at src_stream to either filesystem, stdout or 
60  * buffer depending on the value of 'function' which is defined in libbb.h 
61  *
62  * prefix doesnt have to be just a directory, it may prefix the filename as well.
63  *
64  * e.g. '/var/lib/dpkg/info/dpkg.' will extract all files to the base bath 
65  * '/var/lib/dpkg/info/' and all files/dirs created in that dir will have 
66  * 'dpkg.' as their prefix
67  *
68  * For this reason if prefix does point to a dir then it must end with a
69  * trailing '/' or else the last dir will be assumed to be the file prefix 
70  */
71 char *extract_archive(FILE *src_stream, FILE *out_stream, const file_header_t *file_entry,
72  const int function, const char *prefix)
73 {
74         FILE *dst_stream = NULL;
75         char *full_name = NULL;
76         char *buffer = NULL;
77         struct utimbuf t;
78
79         /* prefix doesnt have to be a proper path it may prepend 
80          * the filename as well */
81         if (prefix != NULL) {
82                 /* strip leading '/' in filename to extract as prefix may not be dir */
83                 /* Cant use concat_path_file here as prefix might not be a directory */
84                 char *path = file_entry->name;
85                 if (strncmp("./", path, 2) == 0) {
86                         path += 2;
87                         if (strlen(path) == 0) {
88                                 return(NULL);
89                         }
90                 }
91                 full_name = xmalloc(strlen(prefix) + strlen(path) + 1);
92                 strcpy(full_name, prefix);
93                 strcat(full_name, path);
94         } else {
95                 full_name = file_entry->name;
96         }
97         if (function & extract_to_stdout) {
98                 if (S_ISREG(file_entry->mode)) {
99                         copy_file_chunk(src_stream, out_stream, file_entry->size);                      
100                         archive_offset += file_entry->size;
101                 }
102         }
103         else if (function & extract_one_to_buffer) { 
104                 if (S_ISREG(file_entry->mode)) {
105                         buffer = (char *) xmalloc(file_entry->size + 1);
106                         fread(buffer, 1, file_entry->size, src_stream);
107                         buffer[file_entry->size] = '\0';
108                         archive_offset += file_entry->size;
109                         return(buffer);
110                 }
111         }
112         else if (function & extract_all_to_fs) {
113                 struct stat oldfile;
114                 int stat_res;
115                 stat_res = lstat (full_name, &oldfile);
116                 if (stat_res == 0) { /* The file already exists */
117                         if ((function & extract_unconditional) || (oldfile.st_mtime < file_entry->mtime)) {
118                                 if (!S_ISDIR(oldfile.st_mode)) {
119                                         unlink(full_name); /* Directories might not be empty etc */
120                                 }
121                         } else {
122                                 if ((function & extract_quiet) != extract_quiet) {
123                                         error_msg("%s not created: newer or same age file exists", file_entry->name);
124                                 }
125                                 seek_sub_file(src_stream, file_entry->size);
126                                 return (NULL);
127                         }
128                 }
129                 if (function & extract_create_leading_dirs) { /* Create leading directories with default umask */
130                         char *parent = dirname(full_name);
131                         if (make_directory (parent, -1, FILEUTILS_RECUR) != 0) {
132                                 if ((function & extract_quiet) != extract_quiet) {
133                                         error_msg("couldn't create leading directories");
134                                 }
135                         }
136                         free (parent);
137                 }
138                 switch(file_entry->mode & S_IFMT) {
139                         case S_IFREG:
140                                 if (file_entry->link_name) { /* Found a cpio hard link */
141                                         if (link(file_entry->link_name, full_name) != 0) {
142                                                 if ((function & extract_quiet) != extract_quiet) {
143                                                         perror_msg("Cannot link from %s to '%s'",
144                                                                 file_entry->name, file_entry->link_name);
145                                                 }
146                                         }
147                                 } else {
148                                         if ((dst_stream = wfopen(full_name, "w")) == NULL) {
149                                                 seek_sub_file(src_stream, file_entry->size);
150                                                 return NULL;
151                                         }
152                                         archive_offset += file_entry->size;
153                                         copy_file_chunk(src_stream, dst_stream, file_entry->size);                      
154                                         fclose(dst_stream);
155                                 }
156                                 break;
157                         case S_IFDIR:
158                                 if (stat_res != 0) {
159                                         if (mkdir(full_name, file_entry->mode) < 0) {
160                                                 if ((function & extract_quiet) != extract_quiet) {
161                                                         perror_msg("extract_archive: ");
162                                                 }
163                                         }
164                                 }
165                                 break;
166                         case S_IFLNK:
167                                 if (symlink(file_entry->link_name, full_name) < 0) {
168                                         if ((function & extract_quiet) != extract_quiet) {
169                                                 perror_msg("Cannot create symlink from %s to '%s'", file_entry->name, file_entry->link_name);
170                                         }
171                                         return NULL;
172                                 }
173                                 break;
174                         case S_IFSOCK:
175                         case S_IFBLK:
176                         case S_IFCHR:
177                         case S_IFIFO:
178                                 if (mknod(full_name, file_entry->mode, file_entry->device) == -1) {
179                                         if ((function & extract_quiet) != extract_quiet) {
180                                                 perror_msg("Cannot create node %s", file_entry->name);
181                                         }
182                                         return NULL;
183                                 }
184                                 break;
185                 }
186
187                 /* Changing a symlink's properties normally changes the properties of the 
188                  * file pointed to, so dont try and change the date or mode, lchown does
189                  * does the right thing, but isnt available in older versions of libc */
190                 if (S_ISLNK(file_entry->mode)) {
191 #if (__GLIBC__ > 2) && (__GLIBC_MINOR__ > 1)
192                         lchown(full_name, file_entry->uid, file_entry->gid);
193 #endif
194                 } else {
195                         if (function & extract_preserve_date) {
196                                 t.actime = file_entry->mtime;
197                                 t.modtime = file_entry->mtime;
198                                 utime(full_name, &t);
199                         }
200                         chmod(full_name, file_entry->mode);
201                         chown(full_name, file_entry->uid, file_entry->gid);
202                 }
203         } else {
204                 /* If we arent extracting data we have to skip it, 
205                  * if data size is 0 then then just do it anyway
206                  * (saves testing for it) */
207                 seek_sub_file(src_stream, file_entry->size);
208         }
209
210         /* extract_list and extract_verbose_list can be used in conjunction
211          * with one of the above four extraction functions, so do this seperately */
212         if (function & extract_verbose_list) {
213                 fprintf(out_stream, "%s %d/%d %8d %s ", mode_string(file_entry->mode), 
214                         file_entry->uid, file_entry->gid,
215                         (int) file_entry->size, time_string(file_entry->mtime));
216         }
217         if ((function & extract_list) || (function & extract_verbose_list)){
218                 /* fputs doesnt add a trailing \n, so use fprintf */
219                 fprintf(out_stream, "%s\n", file_entry->name);
220         }
221
222         free(full_name);
223
224         return(NULL); /* Maybe we should say if failed */
225 }
226 #endif
227
228 #ifdef L_unarchive
229 char *unarchive(FILE *src_stream, FILE *out_stream, file_header_t *(*get_headers)(FILE *),
230         const int extract_function, const char *prefix, char **extract_names)
231 {
232         file_header_t *file_entry;
233         int extract_flag;
234         int i;
235         char *buffer = NULL;
236
237         archive_offset = 0;
238         while ((file_entry = get_headers(src_stream)) != NULL) {
239                 extract_flag = TRUE;
240                 if (extract_names != NULL) {
241                         int found_flag = FALSE;
242                         for(i = 0; extract_names[i] != 0; i++) {
243                                 if (strcmp(extract_names[i], file_entry->name) == 0) {
244                                         found_flag = TRUE;
245                                         break;
246                                 }
247                         }
248                         if (extract_function & extract_exclude_list) {
249                                 if (found_flag == TRUE) {
250                                         extract_flag = FALSE;
251                                 }
252                         } else {
253                                 /* If its not found in the include list dont extract it */
254                                 if (found_flag == FALSE) {
255                                         extract_flag = FALSE;
256                                 }
257                         }
258
259                 }
260
261                 if (extract_flag == TRUE) {
262                         buffer = extract_archive(src_stream, out_stream, file_entry, extract_function, prefix);
263                 } else {
264                         /* seek past the data entry */
265                         seek_sub_file(src_stream, file_entry->size);
266                 }
267         }
268         return(buffer);
269 }
270 #endif
271
272 #ifdef L_get_header_ar
273 file_header_t *get_header_ar(FILE *src_stream)
274 {
275         file_header_t *typed;
276         union {
277                 char raw[60];
278                 struct {
279                         char name[16];
280                         char date[12];
281                         char uid[6];
282                         char gid[6];
283                         char mode[8];
284                         char size[10];
285                         char magic[2];
286                 } formated;
287         } ar;
288         static char *ar_long_names;
289
290         if (fread(ar.raw, 1, 60, src_stream) != 60) {
291                 return(NULL);
292         }
293         archive_offset += 60;
294         /* align the headers based on the header magic */
295         if ((ar.formated.magic[0] != '`') || (ar.formated.magic[1] != '\n')) {
296                 /* some version of ar, have an extra '\n' after each data entry,
297                  * this puts the next header out by 1 */
298                 if (ar.formated.magic[1] != '`') {
299                         error_msg("Invalid magic");
300                         return(NULL);
301                 }
302                 /* read the next char out of what would be the data section,
303                  * if its a '\n' then it is a valid header offset by 1*/
304                 archive_offset++;
305                 if (fgetc(src_stream) != '\n') {
306                         error_msg("Invalid magic");
307                         return(NULL);
308                 }
309                 /* fix up the header, we started reading 1 byte too early */
310                 /* raw_header[60] wont be '\n' as it should, but it doesnt matter */
311                 memmove(ar.raw, &ar.raw[1], 59);
312         }
313                 
314         typed = (file_header_t *) xcalloc(1, sizeof(file_header_t));
315
316         typed->size = (size_t) atoi(ar.formated.size);
317         /* long filenames have '/' as the first character */
318         if (ar.formated.name[0] == '/') {
319                 if (ar.formated.name[1] == '/') {
320                         /* If the second char is a '/' then this entries data section
321                          * stores long filename for multiple entries, they are stored
322                          * in static variable long_names for use in future entries */
323                         ar_long_names = (char *) xrealloc(ar_long_names, typed->size);
324                         fread(ar_long_names, 1, typed->size, src_stream);
325                         archive_offset += typed->size;
326                         /* This ar entries data section only contained filenames for other records
327                          * they are stored in the static ar_long_names for future reference */
328                         return (get_header_ar(src_stream)); /* Return next header */
329                 } else if (ar.formated.name[1] == ' ') {
330                         /* This is the index of symbols in the file for compilers */
331                         seek_sub_file(src_stream, typed->size);
332                         return (get_header_ar(src_stream)); /* Return next header */
333                 } else {
334                         /* The number after the '/' indicates the offset in the ar data section
335                         (saved in variable long_name) that conatains the real filename */
336                         if (!ar_long_names) {
337                                 error_msg("Cannot resolve long file name");
338                                 return (NULL);
339                         }
340                         typed->name = xstrdup(ar_long_names + atoi(&ar.formated.name[1]));
341                 }
342         } else {
343                 /* short filenames */
344                 typed->name = xcalloc(1, 16);
345                 strncpy(typed->name, ar.formated.name, 16);
346         }
347         typed->name[strcspn(typed->name, " /")]='\0';
348
349         /* convert the rest of the now valid char header to its typed struct */ 
350         parse_mode(ar.formated.mode, &typed->mode);
351         typed->mtime = atoi(ar.formated.date);
352         typed->uid = atoi(ar.formated.uid);
353         typed->gid = atoi(ar.formated.gid);
354
355         return(typed);
356 }
357 #endif
358
359 #ifdef L_get_header_cpio
360 struct hardlinks {
361         file_header_t *entry;
362         int inode;
363         struct hardlinks *next;
364 };
365
366 file_header_t *get_header_cpio(FILE *src_stream)
367 {
368         file_header_t *cpio_entry = NULL;
369         char cpio_header[110];
370         int namesize;
371         char dummy[16];
372         int major, minor, nlink, inode;
373         static struct hardlinks *saved_hardlinks = NULL;
374         static int pending_hardlinks = 0;
375
376         if (pending_hardlinks) { /* Deal with any pending hardlinks */
377                 struct hardlinks *tmp = saved_hardlinks, *oldtmp = NULL;
378                 while (tmp) {
379                         if (tmp->entry->link_name) { /* Found a hardlink ready to be extracted */
380                                 cpio_entry = tmp->entry;
381                                 if (oldtmp) oldtmp->next = tmp->next; /* Remove item from linked list */
382                                 else saved_hardlinks = tmp->next;
383                                 free(tmp);
384                                 return (cpio_entry);
385                         }
386                         oldtmp = tmp;
387                         tmp = tmp->next;
388                 }
389                 pending_hardlinks = 0; /* No more pending hardlinks, read next file entry */
390         }
391   
392         /* There can be padding before archive header */
393         seek_sub_file(src_stream, (4 - (archive_offset % 4)) % 4);
394         if (fread(cpio_header, 1, 110, src_stream) == 110) {
395                 archive_offset += 110;
396                 if (strncmp(cpio_header, "07070", 5) != 0) {
397                         error_msg("Unsupported format or invalid magic");
398                         return(NULL);
399                 }
400                 switch (cpio_header[5]) {
401                         case '2': /* "crc" header format */
402                                 /* Doesnt do the crc check yet */
403                         case '1': /* "newc" header format */
404                                 cpio_entry = (file_header_t *) xcalloc(1, sizeof(file_header_t));
405                                 sscanf(cpio_header, "%6c%8x%8x%8x%8x%8x%8lx%8lx%16c%8x%8x%8x%8c",
406                                         dummy, &inode, (unsigned int*)&cpio_entry->mode, 
407                                         (unsigned int*)&cpio_entry->uid, (unsigned int*)&cpio_entry->gid,
408                                         &nlink, &cpio_entry->mtime, &cpio_entry->size,
409                                         dummy, &major, &minor, &namesize, dummy);
410
411                                 cpio_entry->name = (char *) xcalloc(1, namesize);
412                                 fread(cpio_entry->name, 1, namesize, src_stream); /* Read in filename */
413                                 archive_offset += namesize;
414                                 /* Skip padding before file contents */
415                                 seek_sub_file(src_stream, (4 - (archive_offset % 4)) % 4);
416                                 if (strcmp(cpio_entry->name, "TRAILER!!!") == 0) {
417                                         printf("%d blocks\n", (int) (archive_offset % 512 ? (archive_offset / 512) + 1 : archive_offset / 512)); /* Always round up */
418                                         if (saved_hardlinks) { /* Bummer - we still have unresolved hardlinks */
419                                                 struct hardlinks *tmp = saved_hardlinks, *oldtmp = NULL;
420                                                 while (tmp) {
421                                                         error_msg("%s not created: cannot resolve hardlink", tmp->entry->name);
422                                                         oldtmp = tmp;
423                                                         tmp = tmp->next;
424                                                         free (oldtmp->entry->name);
425                                                         free (oldtmp->entry);
426                                                         free (oldtmp);
427                                                 }
428                                                 saved_hardlinks = NULL;
429                                                 pending_hardlinks = 0;
430                                         }
431                                         return(NULL);
432                                 }
433
434                                 if (S_ISLNK(cpio_entry->mode)) {
435                                         cpio_entry->link_name = (char *) xcalloc(1, cpio_entry->size + 1);
436                                         fread(cpio_entry->link_name, 1, cpio_entry->size, src_stream);
437                                         archive_offset += cpio_entry->size;
438                                         cpio_entry->size = 0; /* Stop possiable seeks in future */
439                                 }
440                                 if (nlink > 1 && !S_ISDIR(cpio_entry->mode)) {
441                                         if (cpio_entry->size == 0) { /* Put file on a linked list for later */
442                                                 struct hardlinks *new = xmalloc(sizeof(struct hardlinks));
443                                                 new->next = saved_hardlinks;
444                                                 new->inode = inode;
445                                                 new->entry = cpio_entry;
446                                                 saved_hardlinks = new;
447                                         return(get_header_cpio(src_stream)); /* Recurse to next file */
448                                         } else { /* Found the file with data in */
449                                                 struct hardlinks *tmp = saved_hardlinks;
450                                                 pending_hardlinks = 1;
451                                                 while (tmp) {
452                                                         if (tmp->inode == inode) {
453                                                                 tmp->entry->link_name = xstrdup(cpio_entry->name);
454                                                                 nlink--;
455                                                         }
456                                                         tmp = tmp->next;
457                                                 }
458                                                 if (nlink > 1) error_msg("error resolving hardlink: did you create the archive with GNU cpio 2.0-2.2?");
459                                         }
460                                 }
461                                 cpio_entry->device = (major << 8) | minor;
462                                 break;
463                         default:
464                                 error_msg("Unsupported format");
465                                 return(NULL);
466                 }
467                 if (ferror(src_stream) || feof(src_stream)) {
468                         perror_msg("Stream error");
469                         return(NULL);
470                 }
471         }
472         return(cpio_entry);
473 }
474 #endif
475
476 #ifdef L_get_header_tar
477 file_header_t *get_header_tar(FILE *tar_stream)
478 {
479         union {
480                 unsigned char raw[512];
481                 struct {
482                         char name[100];         /*   0-99 */
483                         char mode[8];           /* 100-107 */
484                         char uid[8];            /* 108-115 */
485                         char gid[8];            /* 116-123 */
486                         char size[12];          /* 124-135 */
487                         char mtime[12];         /* 136-147 */
488                         char chksum[8];         /* 148-155 */
489                         char typeflag;          /* 156-156 */
490                         char linkname[100];     /* 157-256 */
491                         char magic[6];          /* 257-262 */
492                         char version[2];        /* 263-264 */
493                         char uname[32];         /* 265-296 */
494                         char gname[32];         /* 297-328 */
495                         char devmajor[8];       /* 329-336 */
496                         char devminor[8];       /* 337-344 */
497                         char prefix[155];       /* 345-499 */
498                         char padding[12];       /* 500-512 */
499                 } formated;
500         } tar;
501         file_header_t *tar_entry = NULL;
502         long i;
503         long sum = 0;
504
505         if (archive_offset % 512 != 0) {
506                 seek_sub_file(tar_stream, 512 - (archive_offset % 512));
507         }
508
509         if (fread(tar.raw, 1, 512, tar_stream) != 512) {
510                 /* Unfortunatly its common for tar files to have all sorts of
511                  * trailing garbage, fail silently */
512 //              error_msg("Couldnt read header");
513                 return(NULL);
514         }
515         archive_offset += 512;
516
517         /* Check header has valid magic, unfortunately some tar files
518          * have empty (0'ed) tar entries at the end, which will
519          * cause this to fail, so fail silently for now
520          */
521         if (strncmp(tar.formated.magic, "ustar", 5) != 0) {
522                 return(NULL);
523         }
524
525         /* Do checksum on headers */
526         for (i =  0; i < 148 ; i++) {
527                 sum += tar.raw[i];
528         }
529         sum += ' ' * 8;
530         for (i =  156; i < 512 ; i++) {
531                 sum += tar.raw[i];
532         }
533         if (sum != strtol(tar.formated.chksum, NULL, 8)) {
534                 error_msg("Invalid tar header checksum");
535                 return(NULL);
536         }
537
538         /* convert to type'ed variables */
539         tar_entry = xcalloc(1, sizeof(file_header_t));
540         tar_entry->name = xstrdup(tar.formated.name);
541
542         parse_mode(tar.formated.mode, &tar_entry->mode);
543         tar_entry->uid   = strtol(tar.formated.uid, NULL, 8);
544         tar_entry->gid   = strtol(tar.formated.gid, NULL, 8);
545         tar_entry->size  = strtol(tar.formated.size, NULL, 8);
546         tar_entry->mtime = strtol(tar.formated.mtime, NULL, 8);
547         tar_entry->link_name  = strlen(tar.formated.linkname) ? 
548             xstrdup(tar.formated.linkname) : NULL;
549         tar_entry->device = (strtol(tar.formated.devmajor, NULL, 8) << 8) +
550                 strtol(tar.formated.devminor, NULL, 8);
551
552         return(tar_entry);
553 }
554 #endif
555
556 #ifdef L_deb_extract
557 char *deb_extract(const char *package_filename, FILE *out_stream, 
558         const int extract_function, const char *prefix, const char *filename)
559 {
560         FILE *deb_stream;
561         FILE *uncompressed_stream = NULL;
562         file_header_t *ar_header = NULL;
563         char **file_list = NULL;
564         char *output_buffer = NULL;
565         char *ared_file = NULL;
566         char ar_magic[8];
567         int gunzip_pid;
568
569         if (filename != NULL) {
570                 file_list = xmalloc(sizeof(char *) * 2);
571                 file_list[0] = xstrdup(filename);
572                 file_list[1] = NULL;
573         }
574         
575         if (extract_function & extract_control_tar_gz) {
576                 ared_file = xstrdup("control.tar.gz");
577         }
578         else if (extract_function & extract_data_tar_gz) {              
579                 ared_file = xstrdup("data.tar.gz");
580         }
581
582         /* open the debian package to be worked on */
583         deb_stream = wfopen(package_filename, "r");
584         if (deb_stream == NULL) {
585                 return(NULL);
586         }
587         /* set the buffer size */
588         setvbuf(deb_stream, NULL, _IOFBF, 0x8000);
589
590         /* check ar magic */
591         fread(ar_magic, 1, 8, deb_stream);
592         if (strncmp(ar_magic,"!<arch>",7) != 0) {
593                 error_msg_and_die("invalid magic");
594         }
595         archive_offset = 8;
596
597         while ((ar_header = get_header_ar(deb_stream)) != NULL) {
598                 if (strcmp(ared_file, ar_header->name) == 0) {
599                         /* open a stream of decompressed data */
600                         uncompressed_stream = gz_open(deb_stream, &gunzip_pid);
601                         archive_offset = 0;
602                         output_buffer = unarchive(uncompressed_stream, out_stream, get_header_tar, extract_function, prefix, file_list);
603                 }
604                 seek_sub_file(deb_stream, ar_header->size);
605         }
606         gz_close(gunzip_pid);
607         fclose(deb_stream);
608         fclose(uncompressed_stream);
609         free(ared_file);
610         return(output_buffer);
611 }
612 #endif