less: document -S flag and make it independently configurable
[oweals/busybox.git] / archival / libarchive / get_header_tar.c
index 2e03327926fe0d4494dbb4a1fb05b8a883264a3f..fb68673b900fdf82a69b42fcd55cce4ee2e07c34 100644 (file)
  */
 
 #include "libbb.h"
-#include "archive.h"
+#include "bb_archive.h"
 
 typedef uint32_t aliased_uint32_t FIX_ALIASING;
 typedef off_t    aliased_off_t    FIX_ALIASING;
 
-
 /* NB: _DESTROYS_ str[len] character! */
 static unsigned long long getOctal(char *str, int len)
 {
@@ -50,34 +49,31 @@ static unsigned long long getOctal(char *str, int len)
                 *
                 * NB: tarballs with NEGATIVE unix times encoded that way were seen!
                 */
-               v = first;
-               /* Sign-extend using 6th bit: */
-               v <<= sizeof(unsigned long long)*8 - 7;
-               v = (long long)v >> (sizeof(unsigned long long)*8 - 7);
+               /* Sign-extend 7bit 'first' to 64bit 'v' (that is, using 6th bit as sign): */
+               first <<= 1;
+               first >>= 1; /* now 7th bit = 6th bit */
+               v = first;   /* sign-extend 8 bits to 64 */
                while (--len != 0)
-                       v = (v << 8) + (unsigned char) *str++;
+                       v = (v << 8) + (uint8_t) *++str;
        }
        return v;
 }
 #define GET_OCTAL(a) getOctal((a), sizeof(a))
 
-#if ENABLE_FEATURE_TAR_SELINUX
-/* Scan a PAX header for SELinux contexts, via "RHT.security.selinux" keyword.
- * This is what Red Hat's patched version of tar uses.
- */
-# define SELINUX_CONTEXT_KEYWORD "RHT.security.selinux"
-static char *get_selinux_sctx_from_pax_hdr(archive_handle_t *archive_handle, unsigned sz)
+/* "global" is 0 or 1 */
+static void process_pax_hdr(archive_handle_t *archive_handle, unsigned sz, int global)
 {
        char *buf, *p;
-       char *result;
+       unsigned blk_sz;
+
+       blk_sz = (sz + 511) & (~511);
+       p = buf = xmalloc(blk_sz + 1);
+       xread(archive_handle->src_fd, buf, blk_sz);
+       archive_handle->offset += blk_sz;
 
-       p = buf = xmalloc(sz + 1);
        /* prevent bb_strtou from running off the buffer */
        buf[sz] = '\0';
-       xread(archive_handle->src_fd, buf, sz);
-       archive_handle->offset += sz;
 
-       result = NULL;
        while (sz != 0) {
                char *end, *value;
                unsigned len;
@@ -89,7 +85,9 @@ static char *get_selinux_sctx_from_pax_hdr(archive_handle_t *archive_handle, uns
                 */
                p += len;
                sz -= len;
-               if ((int)sz < 0
+               if (
+               /** (int)sz < 0 - not good enough for huge malicious VALUE of 2^32-1 */
+                   (int)(sz|len) < 0 /* this works */
                 || len == 0
                 || errno != EINVAL
                 || *end != ' '
@@ -104,19 +102,33 @@ static char *get_selinux_sctx_from_pax_hdr(archive_handle_t *archive_handle, uns
                 * (we do not bother to check that it *was* a newline)
                 */
                p[-1] = '\0';
-               /* Is it selinux security context? */
                value = end + 1;
-               if (strncmp(value, SELINUX_CONTEXT_KEYWORD"=", sizeof(SELINUX_CONTEXT_KEYWORD"=") - 1) == 0) {
+
+#if ENABLE_FEATURE_TAR_GNU_EXTENSIONS
+               if (!global && is_prefixed_with(value, "path=")) {
+                       value += sizeof("path=") - 1;
+                       free(archive_handle->tar__longname);
+                       archive_handle->tar__longname = xstrdup(value);
+                       continue;
+               }
+#endif
+
+#if ENABLE_FEATURE_TAR_SELINUX
+               /* Scan for SELinux contexts, via "RHT.security.selinux" keyword.
+                * This is what Red Hat's patched version of tar uses.
+                */
+# define SELINUX_CONTEXT_KEYWORD "RHT.security.selinux"
+               if (is_prefixed_with(value, SELINUX_CONTEXT_KEYWORD"=")) {
                        value += sizeof(SELINUX_CONTEXT_KEYWORD"=") - 1;
-                       result = xstrdup(value);
-                       break;
+                       free(archive_handle->tar__sctx[global]);
+                       archive_handle->tar__sctx[global] = xstrdup(value);
+                       continue;
                }
+#endif
        }
 
        free(buf);
-       return result;
 }
-#endif
 
 char FAST_FUNC get_header_tar(archive_handle_t *archive_handle)
 {
@@ -158,13 +170,13 @@ char FAST_FUNC get_header_tar(archive_handle_t *archive_handle)
         * the message and we don't check whether we indeed
         * saw zero block directly before this. */
        if (i == 0) {
-               xfunc_error_retval = 0;
- short_read:
-               bb_error_msg_and_die("short read");
+               bb_error_msg("short read");
+               /* this merely signals end of archive, not exit(1): */
+               return EXIT_FAILURE;
        }
        if (i != 512) {
                IF_FEATURE_TAR_AUTODETECT(goto autodetect;)
-               goto short_read;
+               bb_error_msg_and_die("short read");
        }
 
 #else
@@ -181,57 +193,32 @@ char FAST_FUNC get_header_tar(archive_handle_t *archive_handle)
                         */
                        while (full_read(archive_handle->src_fd, &tar, 512) == 512)
                                continue;
-                       return EXIT_FAILURE;
+                       return EXIT_FAILURE; /* "end of archive" */
                }
                archive_handle->tar__end = 1;
-               return EXIT_SUCCESS;
+               return EXIT_SUCCESS; /* "decoded one header" */
        }
        archive_handle->tar__end = 0;
 
        /* Check header has valid magic, "ustar" is for the proper tar,
         * five NULs are for the old tar format  */
-       if (strncmp(tar.magic, "ustar", 5) != 0
+       if (!is_prefixed_with(tar.magic, "ustar")
         && (!ENABLE_FEATURE_TAR_OLDGNU_COMPATIBILITY
             || memcmp(tar.magic, "\0\0\0\0", 5) != 0)
        ) {
 #if ENABLE_FEATURE_TAR_AUTODETECT
-               char FAST_FUNC (*get_header_ptr)(archive_handle_t *);
-               uint16_t magic2;
-
  autodetect:
-               magic2 = *(bb__aliased_uint16_t*)tar.name;
-               /* tar gz/bz autodetect: check for gz/bz2 magic.
-                * If we see the magic, and it is the very first block,
-                * we can switch to get_header_tar_gz/bz2/lzma().
-                * Needs seekable fd. I wish recv(MSG_PEEK) works
-                * on any fd... */
-# if ENABLE_FEATURE_SEAMLESS_GZ
-               if (magic2 == GZIP_MAGIC) {
-                       get_header_ptr = get_header_tar_gz;
-               } else
-# endif
-# if ENABLE_FEATURE_SEAMLESS_BZ2
-               if (magic2 == BZIP2_MAGIC
-                && tar.name[2] == 'h' && isdigit(tar.name[3])
-               ) { /* bzip2 */
-                       get_header_ptr = get_header_tar_bz2;
-               } else
-# endif
-# if ENABLE_FEATURE_SEAMLESS_XZ
-               //TODO: if (magic2 == XZ_MAGIC1)...
-               //else
-# endif
-                       goto err;
                /* Two different causes for lseek() != 0:
                 * unseekable fd (would like to support that too, but...),
                 * or not first block (false positive, it's not .gz/.bz2!) */
                if (lseek(archive_handle->src_fd, -i, SEEK_CUR) != 0)
                        goto err;
-               while (get_header_ptr(archive_handle) == EXIT_SUCCESS)
-                       continue;
-               return EXIT_FAILURE;
+               if (setup_unzip_on_fd(archive_handle->src_fd, /*fail_if_not_compressed:*/ 0) != 0)
  err:
-#endif /* FEATURE_TAR_AUTODETECT */
+                       bb_error_msg_and_die("invalid tar magic");
+               archive_handle->offset = 0;
+               goto again_after_align;
+#endif
                bb_error_msg_and_die("invalid tar magic");
        }
 
@@ -319,10 +306,20 @@ char FAST_FUNC get_header_tar(archive_handle_t *archive_handle)
        /* Set bits 12-15 of the files mode */
        /* (typeflag was not trashed because chksum does not use getOctal) */
        switch (tar.typeflag) {
-       /* busybox identifies hard links as being regular files with 0 size and a link name */
-       case '1':
+       case '1': /* hardlink */
+               /* we mark hardlinks as regular files with zero size and a link name */
                file_header->mode |= S_IFREG;
-               break;
+               /* on size of link fields from star(4)
+                * ... For tar archives written by pre POSIX.1-1988
+                * implementations, the size field usually contains the size of
+                * the file and needs to be ignored as no data may follow this
+                * header type.  For POSIX.1- 1988 compliant archives, the size
+                * field needs to be 0.  For POSIX.1-2001 compliant archives,
+                * the size field may be non zero, indicating that file data is
+                * included in the archive.
+                * i.e; always assume this is zero for safety.
+                */
+               goto size0;
        case '7':
        /* case 0: */
        case '0':
@@ -353,7 +350,14 @@ char FAST_FUNC get_header_tar(archive_handle_t *archive_handle)
        case '6':
                file_header->mode |= S_IFIFO;
                goto size0;
+       case 'g':       /* pax global header */
+       case 'x': {     /* pax extended header */
+               if ((uoff_t)file_header->size > 0xfffff) /* paranoia */
+                       goto skip_ext_hdr;
+               process_pax_hdr(archive_handle, file_header->size, (tar.typeflag == 'g'));
+               goto again_after_align;
 #if ENABLE_FEATURE_TAR_GNU_EXTENSIONS
+/* See http://www.gnu.org/software/tar/manual/html_node/Extensions.html */
        case 'L':
                /* free: paranoia: tar with several consecutive longnames */
                free(p_longname);
@@ -373,18 +377,19 @@ char FAST_FUNC get_header_tar(archive_handle_t *archive_handle)
                archive_handle->offset += file_header->size;
                /* return get_header_tar(archive_handle); */
                goto again;
-       case 'D':       /* GNU dump dir */
-       case 'M':       /* Continuation of multi volume archive */
-       case 'N':       /* Old GNU for names > 100 characters */
-       case 'S':       /* Sparse file */
-       case 'V':       /* Volume header */
+/*
+ *     case 'S':       // Sparse file
+ * Was seen in the wild. Not supported (yet?).
+ * See https://www.gnu.org/software/tar/manual/html_section/tar_92.html
+ * for the format. (An "Old GNU Format" was seen, not PAX formats).
+ */
+//     case 'D':       /* GNU dump dir */
+//     case 'M':       /* Continuation of multi volume archive */
+//     case 'N':       /* Old GNU for names > 100 characters */
+//     case 'V':       /* Volume header */
 #endif
-#if !ENABLE_FEATURE_TAR_SELINUX
-       case 'g':       /* pax global header */
-       case 'x':       /* pax extended header */
-#else
+       }
  skip_ext_hdr:
-#endif
        {
                off_t sz;
                bb_error_msg("warning: skipping header '%c'", tar.typeflag);
@@ -396,18 +401,6 @@ char FAST_FUNC get_header_tar(archive_handle_t *archive_handle)
                /* return get_header_tar(archive_handle); */
                goto again_after_align;
        }
-#if ENABLE_FEATURE_TAR_SELINUX
-       case 'g':       /* pax global header */
-       case 'x': {     /* pax extended header */
-               char **pp;
-               if ((uoff_t)file_header->size > 0xfffff) /* paranoia */
-                       goto skip_ext_hdr;
-               pp = (tar.typeflag == 'g') ? &archive_handle->tar__global_sctx : &archive_handle->tar__next_file_sctx;
-               free(*pp);
-               *pp = get_selinux_sctx_from_pax_hdr(archive_handle, file_header->size);
-               goto again;
-       }
-#endif
        default:
                bb_error_msg_and_die("unknown typeflag: 0x%x", tar.typeflag);
        }
@@ -422,12 +415,9 @@ char FAST_FUNC get_header_tar(archive_handle_t *archive_handle)
                p_linkname = NULL;
        }
 #endif
-       if (strncmp(file_header->name, "/../"+1, 3) == 0
-        || strstr(file_header->name, "/../")
-       ) {
-               bb_error_msg_and_die("name with '..' encountered: '%s'",
-                               file_header->name);
-       }
+
+       /* Everything up to and including last ".." component is stripped */
+       overlapping_strcpy(file_header->name, strip_unsafe_prefix(file_header->name));
 
        /* Strip trailing '/' in directories */
        /* Must be done after mode is set as '/' is used to check if it's a directory */
@@ -440,9 +430,11 @@ char FAST_FUNC get_header_tar(archive_handle_t *archive_handle)
                if (cp)
                        *cp = '\0';
                archive_handle->action_data(archive_handle);
-               if (archive_handle->accept || archive_handle->reject)
+               if (archive_handle->accept || archive_handle->reject
+                || (archive_handle->ah_flags & ARCHIVE_REMEMBER_NAMES)
+               ) {
                        llist_add_to(&archive_handle->passed, file_header->name);
-               else /* Caller isn't interested in list of unpacked files */
+               else /* Caller isn't interested in list of unpacked files */
                        free(file_header->name);
        } else {
                data_skip(archive_handle);
@@ -457,5 +449,5 @@ char FAST_FUNC get_header_tar(archive_handle_t *archive_handle)
        free(file_header->tar__uname);
        free(file_header->tar__gname);
 #endif
-       return EXIT_SUCCESS;
+       return EXIT_SUCCESS; /* "decoded one header" */
 }