tar: add support for --strip-components=N
authorDenys Vlasenko <vda.linux@googlemail.com>
Wed, 21 Oct 2015 23:07:13 +0000 (01:07 +0200)
committerDenys Vlasenko <vda.linux@googlemail.com>
Wed, 21 Oct 2015 23:07:13 +0000 (01:07 +0200)
function                                             old     new   delta
data_extract_all                                     882     995    +113
tar_longopts                                         290     309     +19
tar_main                                             938     942      +4
------------------------------------------------------------------------------
(add/remove: 0/0 grow/shrink: 3/0 up/down: 136/0)             Total: 136 bytes

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
archival/libarchive/data_extract_all.c
archival/libarchive/get_header_tar.c
archival/tar.c
include/bb_archive.h
testsuite/tar.tests

index 45776dcbe9daa3c5d97430f5d337949d489f9e31..bd51d2ad3dd93499794c31e8c74802734a907203 100644 (file)
@@ -8,9 +8,17 @@
 
 void FAST_FUNC data_extract_all(archive_handle_t *archive_handle)
 {
+
        file_header_t *file_header = archive_handle->file_header;
        int dst_fd;
        int res;
+#if ENABLE_FEATURE_TAR_LONG_OPTIONS
+       char *dst_name;
+       char *dst_link;
+#else
+# define dst_name (file_header->name)
+# define dst_link (file_header->link_target)
+#endif
 
 #if ENABLE_FEATURE_TAR_SELINUX
        char *sctx = archive_handle->tar__sctx[PAX_NEXT_FILE];
@@ -23,11 +31,47 @@ void FAST_FUNC data_extract_all(archive_handle_t *archive_handle)
        }
 #endif
 
+#if ENABLE_FEATURE_TAR_LONG_OPTIONS
+       dst_name = file_header->name;
+       dst_link = file_header->link_target;
+       if (archive_handle->tar__strip_components) {
+               unsigned n = archive_handle->tar__strip_components;
+               do {
+                       dst_name = strchr(dst_name, '/');
+                       if (!dst_name || dst_name[1] == '\0') {
+                               data_skip(archive_handle);
+                               return;
+                       }
+                       dst_name++;
+                       /*
+                        * Link target is shortened only for hardlinks:
+                        * softlinks restored unchanged.
+                        */
+                       if (S_ISREG(file_header->mode)
+                        && file_header->size == 0
+                        && dst_link
+                       ) {
+// GNU tar 1.26 does not check that we reached end of link name:
+// if "dir/hardlink" is hardlinked to "file",
+// tar xvf a.tar --strip-components=1 says:
+//  tar: hardlink: Cannot hard link to '': No such file or directory
+// and continues processing. We silently skip such entries.
+                               dst_link = strchr(dst_link, '/');
+                               if (!dst_link || dst_link[1] == '\0') {
+                                       data_skip(archive_handle);
+                                       return;
+                               }
+                               dst_link++;
+                       }
+               } while (--n != 0);
+       }
+#endif
+
        if (archive_handle->ah_flags & ARCHIVE_CREATE_LEADING_DIRS) {
-               char *slash = strrchr(file_header->name, '/');
+               char *slash = strrchr(dst_name, '/');
                if (slash) {
                        *slash = '\0';
-                       bb_make_directory(file_header->name, -1, FILEUTILS_RECUR);
+                       bb_make_directory(dst_name, -1, FILEUTILS_RECUR);
                        *slash = '/';
                }
        }
@@ -38,8 +82,8 @@ void FAST_FUNC data_extract_all(archive_handle_t *archive_handle)
                        /* Is it hardlink?
                         * We encode hard links as regular files of size 0 with a symlink */
                        if (S_ISREG(file_header->mode)
-                        && file_header->link_target
                         && file_header->size == 0
+                        && dst_link
                        ) {
                                /* Ugly special case:
                                 * tar cf t.tar hardlink1 hardlink2 hardlink1
@@ -48,22 +92,22 @@ void FAST_FUNC data_extract_all(archive_handle_t *archive_handle)
                                 * hardlink2 -> hardlink1
                                 * hardlink1 -> hardlink1 <== !!!
                                 */
-                               if (strcmp(file_header->link_target, file_header->name) == 0)
+                               if (strcmp(dst_link, dst_name) == 0)
                                        goto ret;
                        }
                        /* Proceed with deleting */
-                       if (unlink(file_header->name) == -1
+                       if (unlink(dst_name) == -1
                         && errno != ENOENT
                        ) {
                                bb_perror_msg_and_die("can't remove old file %s",
-                                               file_header->name);
+                                               dst_name);
                        }
                }
        }
        else if (archive_handle->ah_flags & ARCHIVE_EXTRACT_NEWER) {
                /* Remove the existing entry if its older than the extracted entry */
                struct stat existing_sb;
-               if (lstat(file_header->name, &existing_sb) == -1) {
+               if (lstat(dst_name, &existing_sb) == -1) {
                        if (errno != ENOENT) {
                                bb_perror_msg_and_die("can't stat old file");
                        }
@@ -73,30 +117,30 @@ void FAST_FUNC data_extract_all(archive_handle_t *archive_handle)
                         && !S_ISDIR(file_header->mode)
                        ) {
                                bb_error_msg("%s not created: newer or "
-                                       "same age file exists", file_header->name);
+                                       "same age file exists", dst_name);
                        }
                        data_skip(archive_handle);
                        goto ret;
                }
-               else if ((unlink(file_header->name) == -1) && (errno != EISDIR)) {
+               else if ((unlink(dst_name) == -1) && (errno != EISDIR)) {
                        bb_perror_msg_and_die("can't remove old file %s",
-                                       file_header->name);
+                                       dst_name);
                }
        }
 
        /* Handle hard links separately
         * We encode hard links as regular files of size 0 with a symlink */
        if (S_ISREG(file_header->mode)
-        && file_header->link_target
         && file_header->size == 0
+        && dst_link
        ) {
-               /* hard link */
-               res = link(file_header->link_target, file_header->name);
-               if ((res == -1) && !(archive_handle->ah_flags & ARCHIVE_EXTRACT_QUIET)) {
+               /* Hard link */
+               res = link(dst_link, dst_name);
+               if (res != 0 && !(archive_handle->ah_flags & ARCHIVE_EXTRACT_QUIET)) {
                        bb_perror_msg("can't create %slink "
                                        "from %s to %s", "hard",
-                                       file_header->name,
-                                       file_header->link_target);
+                                       dst_name,
+                                       dst_link);
                }
                /* Hardlinks have no separate mode/ownership, skip chown/chmod */
                goto ret;
@@ -106,17 +150,17 @@ void FAST_FUNC data_extract_all(archive_handle_t *archive_handle)
        switch (file_header->mode & S_IFMT) {
        case S_IFREG: {
                /* Regular file */
-               char *dst_name;
+               char *dst_nameN;
                int flags = O_WRONLY | O_CREAT | O_EXCL;
                if (archive_handle->ah_flags & ARCHIVE_O_TRUNC)
                        flags = O_WRONLY | O_CREAT | O_TRUNC;
-               dst_name = file_header->name;
+               dst_nameN = dst_name;
 #ifdef ARCHIVE_REPLACE_VIA_RENAME
                if (archive_handle->ah_flags & ARCHIVE_REPLACE_VIA_RENAME)
                        /* rpm-style temp file name */
-                       dst_name = xasprintf("%s;%x", dst_name, (int)getpid());
+                       dst_nameN = xasprintf("%s;%x", dst_name, (int)getpid());
 #endif
-               dst_fd = xopen3(dst_name,
+               dst_fd = xopen3(dst_nameN,
                        flags,
                        file_header->mode
                        );
@@ -124,32 +168,32 @@ void FAST_FUNC data_extract_all(archive_handle_t *archive_handle)
                close(dst_fd);
 #ifdef ARCHIVE_REPLACE_VIA_RENAME
                if (archive_handle->ah_flags & ARCHIVE_REPLACE_VIA_RENAME) {
-                       xrename(dst_name, file_header->name);
-                       free(dst_name);
+                       xrename(dst_nameN, dst_name);
+                       free(dst_nameN);
                }
 #endif
                break;
        }
        case S_IFDIR:
-               res = mkdir(file_header->name, file_header->mode);
+               res = mkdir(dst_name, file_header->mode);
                if ((res == -1)
                 && (errno != EISDIR) /* btw, Linux doesn't return this */
                 && (errno != EEXIST)
                 && !(archive_handle->ah_flags & ARCHIVE_EXTRACT_QUIET)
                ) {
-                       bb_perror_msg("can't make dir %s", file_header->name);
+                       bb_perror_msg("can't make dir %s", dst_name);
                }
                break;
        case S_IFLNK:
                /* Symlink */
 //TODO: what if file_header->link_target == NULL (say, corrupted tarball?)
-               res = symlink(file_header->link_target, file_header->name);
-               if ((res == -1)
+               res = symlink(file_header->link_target, dst_name);
+               if (res != 0
                 && !(archive_handle->ah_flags & ARCHIVE_EXTRACT_QUIET)
                ) {
                        bb_perror_msg("can't create %slink "
                                "from %s to %s", "sym",
-                               file_header->name,
+                               dst_name,
                                file_header->link_target);
                }
                break;
@@ -157,11 +201,11 @@ void FAST_FUNC data_extract_all(archive_handle_t *archive_handle)
        case S_IFBLK:
        case S_IFCHR:
        case S_IFIFO:
-               res = mknod(file_header->name, file_header->mode, file_header->device);
+               res = mknod(dst_name, file_header->mode, file_header->device);
                if ((res == -1)
                 && !(archive_handle->ah_flags & ARCHIVE_EXTRACT_QUIET)
                ) {
-                       bb_perror_msg("can't create node %s", file_header->name);
+                       bb_perror_msg("can't create node %s", dst_name);
                }
                break;
        default:
@@ -186,20 +230,20 @@ void FAST_FUNC data_extract_all(archive_handle_t *archive_handle)
                        }
 #endif
                        /* GNU tar 1.15.1 uses chown, not lchown */
-                       chown(file_header->name, uid, gid);
+                       chown(dst_name, uid, gid);
                }
                /* uclibc has no lchmod, glibc is even stranger -
                 * it has lchmod which seems to do nothing!
                 * so we use chmod... */
                if (!(archive_handle->ah_flags & ARCHIVE_DONT_RESTORE_PERM)) {
-                       chmod(file_header->name, file_header->mode);
+                       chmod(dst_name, file_header->mode);
                }
                if (archive_handle->ah_flags & ARCHIVE_RESTORE_DATE) {
                        struct timeval t[2];
 
                        t[1].tv_sec = t[0].tv_sec = file_header->mtime;
                        t[1].tv_usec = t[0].tv_usec = 0;
-                       utimes(file_header->name, t);
+                       utimes(dst_name, t);
                }
        }
 
index fb68673b900fdf82a69b42fcd55cce4ee2e07c34..ac2be726f205fb726d618fc4910020d31ab30d66 100644 (file)
@@ -418,6 +418,7 @@ char FAST_FUNC get_header_tar(archive_handle_t *archive_handle)
 
        /* Everything up to and including last ".." component is stripped */
        overlapping_strcpy(file_header->name, strip_unsafe_prefix(file_header->name));
+//TODO: do the same for file_header->link_target?
 
        /* Strip trailing '/' in directories */
        /* Must be done after mode is set as '/' is used to check if it's a directory */
index aa03ba9901767379351bfe36f94e08069dd5a62a..566ba34f68718cf7b3e7c7af6448c7acebb0b37d 100644 (file)
 # define FNM_LEADING_DIR 0
 #endif
 
-
-//#define DBG(fmt, ...) bb_error_msg("%s: " fmt, __func__, ## __VA_ARGS__)
-#define DBG(...) ((void)0)
+#if 0
+# define DBG(fmt, ...) bb_error_msg("%s: " fmt, __func__, ## __VA_ARGS__)
+#else
+# define DBG(...) ((void)0)
+#endif
+#define DBG_OPTION_PARSING 0
 
 
 #define block_buf bb_common_bufsiz1
@@ -855,6 +858,7 @@ enum {
        IF_FEATURE_SEAMLESS_Z(   OPTBIT_COMPRESS    ,)
        IF_FEATURE_TAR_NOPRESERVE_TIME(OPTBIT_NOPRESERVE_TIME,)
 #if ENABLE_FEATURE_TAR_LONG_OPTIONS
+       OPTBIT_STRIP_COMPONENTS,
        OPTBIT_NORECURSION,
        IF_FEATURE_TAR_TO_COMMAND(OPTBIT_2COMMAND   ,)
        OPTBIT_NUMERIC_OWNER,
@@ -879,12 +883,13 @@ enum {
        OPT_GZIP         = IF_FEATURE_SEAMLESS_GZ(  (1 << OPTBIT_GZIP        )) + 0, // z
        OPT_XZ           = IF_FEATURE_SEAMLESS_XZ(  (1 << OPTBIT_XZ          )) + 0, // J
        OPT_COMPRESS     = IF_FEATURE_SEAMLESS_Z(   (1 << OPTBIT_COMPRESS    )) + 0, // Z
-       OPT_NOPRESERVE_TIME = IF_FEATURE_TAR_NOPRESERVE_TIME((1 << OPTBIT_NOPRESERVE_TIME)) + 0, // m
-       OPT_NORECURSION     = IF_FEATURE_TAR_LONG_OPTIONS((1 << OPTBIT_NORECURSION    )) + 0, // no-recursion
-       OPT_2COMMAND        = IF_FEATURE_TAR_TO_COMMAND(  (1 << OPTBIT_2COMMAND       )) + 0, // to-command
-       OPT_NUMERIC_OWNER   = IF_FEATURE_TAR_LONG_OPTIONS((1 << OPTBIT_NUMERIC_OWNER  )) + 0, // numeric-owner
-       OPT_NOPRESERVE_PERM = IF_FEATURE_TAR_LONG_OPTIONS((1 << OPTBIT_NOPRESERVE_PERM)) + 0, // no-same-permissions
-       OPT_OVERWRITE       = IF_FEATURE_TAR_LONG_OPTIONS((1 << OPTBIT_OVERWRITE      )) + 0, // overwrite
+       OPT_NOPRESERVE_TIME  = IF_FEATURE_TAR_NOPRESERVE_TIME((1 << OPTBIT_NOPRESERVE_TIME)) + 0, // m
+       OPT_STRIP_COMPONENTS = IF_FEATURE_TAR_LONG_OPTIONS((1 << OPTBIT_STRIP_COMPONENTS)) + 0, // strip-components
+       OPT_NORECURSION      = IF_FEATURE_TAR_LONG_OPTIONS((1 << OPTBIT_NORECURSION    )) + 0, // no-recursion
+       OPT_2COMMAND         = IF_FEATURE_TAR_TO_COMMAND(  (1 << OPTBIT_2COMMAND       )) + 0, // to-command
+       OPT_NUMERIC_OWNER    = IF_FEATURE_TAR_LONG_OPTIONS((1 << OPTBIT_NUMERIC_OWNER  )) + 0, // numeric-owner
+       OPT_NOPRESERVE_PERM  = IF_FEATURE_TAR_LONG_OPTIONS((1 << OPTBIT_NOPRESERVE_PERM)) + 0, // no-same-permissions
+       OPT_OVERWRITE        = IF_FEATURE_TAR_LONG_OPTIONS((1 << OPTBIT_OVERWRITE      )) + 0, // overwrite
 
        OPT_ANY_COMPRESS = (OPT_BZIP2 | OPT_LZMA | OPT_GZIP | OPT_XZ | OPT_COMPRESS),
 };
@@ -928,6 +933,7 @@ static const char tar_longopts[] ALIGN1 =
 # if ENABLE_FEATURE_TAR_NOPRESERVE_TIME
        "touch\0"               No_argument       "m"
 # endif
+       "strip-components\0"    Required_argument "\xf9"
        "no-recursion\0"        No_argument       "\xfa"
 # if ENABLE_FEATURE_TAR_TO_COMMAND
        "to-command\0"          Required_argument "\xfb"
@@ -973,11 +979,15 @@ int tar_main(int argc UNUSED_PARAM, char **argv)
                "tt:vv:" // count -t,-v
                IF_FEATURE_TAR_FROM("X::T::") // cumulative lists
 #if ENABLE_FEATURE_TAR_LONG_OPTIONS && ENABLE_FEATURE_TAR_FROM
-               "\xff::" // cumulative lists for --exclude
+               "\xff::" // --exclude=PATTERN is a list
 #endif
                IF_FEATURE_TAR_CREATE("c:") "t:x:" // at least one of these is reqd
                IF_FEATURE_TAR_CREATE("c--tx:t--cx:x--ct") // mutually exclusive
-               IF_NOT_FEATURE_TAR_CREATE("t--x:x--t"); // mutually exclusive
+               IF_NOT_FEATURE_TAR_CREATE("t--x:x--t") // mutually exclusive
+#if ENABLE_FEATURE_TAR_LONG_OPTIONS
+               ":\xf9+" // --strip-components=NUM
+#endif
+       ;
 #if ENABLE_FEATURE_TAR_LONG_OPTIONS
        applet_long_options = tar_longopts;
 #endif
@@ -1018,10 +1028,14 @@ int tar_main(int argc UNUSED_PARAM, char **argv)
                IF_FEATURE_SEAMLESS_XZ(  "J"   )
                IF_FEATURE_SEAMLESS_Z(   "Z"   )
                IF_FEATURE_TAR_NOPRESERVE_TIME("m")
+               IF_FEATURE_TAR_LONG_OPTIONS("\xf9:") // --strip-components
                , &base_dir // -C dir
                , &tar_filename // -f filename
                IF_FEATURE_TAR_FROM(, &(tar_handle->accept)) // T
                IF_FEATURE_TAR_FROM(, &(tar_handle->reject)) // X
+#if ENABLE_FEATURE_TAR_LONG_OPTIONS
+               , &tar_handle->tar__strip_components // --strip-components
+#endif
                IF_FEATURE_TAR_TO_COMMAND(, &(tar_handle->tar__to_command)) // --to-command
 #if ENABLE_FEATURE_TAR_LONG_OPTIONS && ENABLE_FEATURE_TAR_FROM
                , &excludes // --exclude
@@ -1029,11 +1043,49 @@ int tar_main(int argc UNUSED_PARAM, char **argv)
                , &verboseFlag // combined count for -t and -v
                , &verboseFlag // combined count for -t and -v
                );
-       //bb_error_msg("opt:%08x", opt);
+#if DBG_OPTION_PARSING
+       bb_error_msg("opt: 0x%08x", opt);
+# define showopt(o) bb_error_msg("opt & %s(%x): %x", #o, o, opt & o);
+       showopt(OPT_TEST            );
+       showopt(OPT_EXTRACT         );
+       showopt(OPT_BASEDIR         );
+       showopt(OPT_TARNAME         );
+       showopt(OPT_2STDOUT         );
+       showopt(OPT_NOPRESERVE_OWNER);
+       showopt(OPT_P               );
+       showopt(OPT_VERBOSE         );
+       showopt(OPT_KEEP_OLD        );
+       showopt(OPT_CREATE          );
+       showopt(OPT_DEREFERENCE     );
+       showopt(OPT_BZIP2           );
+       showopt(OPT_LZMA            );
+       showopt(OPT_INCLUDE_FROM    );
+       showopt(OPT_EXCLUDE_FROM    );
+       showopt(OPT_GZIP            );
+       showopt(OPT_XZ              );
+       showopt(OPT_COMPRESS        );
+       showopt(OPT_NOPRESERVE_TIME );
+       showopt(OPT_STRIP_COMPONENTS);
+       showopt(OPT_NORECURSION     );
+       showopt(OPT_2COMMAND        );
+       showopt(OPT_NUMERIC_OWNER   );
+       showopt(OPT_NOPRESERVE_PERM );
+       showopt(OPT_OVERWRITE       );
+       showopt(OPT_ANY_COMPRESS    );
+       bb_error_msg("base_dir:'%s'", base_dir);
+       bb_error_msg("tar_filename:'%s'", tar_filename);
+       bb_error_msg("verboseFlag:%d", verboseFlag);
+       bb_error_msg("tar_handle->tar__to_command:'%s'", tar_handle->tar__to_command);
+       bb_error_msg("tar_handle->tar__strip_components:%u", tar_handle->tar__strip_components);
+       return 0;
+# undef showopt
+#endif
        argv += optind;
 
-       if (verboseFlag) tar_handle->action_header = header_verbose_list;
-       if (verboseFlag == 1) tar_handle->action_header = header_list;
+       if (verboseFlag)
+               tar_handle->action_header = header_verbose_list;
+       if (verboseFlag == 1)
+               tar_handle->action_header = header_list;
 
        if (opt & OPT_EXTRACT)
                tar_handle->action_data = data_extract_all;
index 2329d025d4dc79e05477fa863a5276062f329a41..10969b56780d0466e98c75da0c87f1581e057731 100644 (file)
@@ -77,6 +77,9 @@ typedef struct archive_handle_t {
        off_t offset;
 
        /* Archiver specific. Can make it a union if it ever gets big */
+#if ENABLE_FEATURE_TAR_LONG_OPTIONS
+       unsigned tar__strip_components;
+#endif
 #define PAX_NEXT_FILE 0
 #define PAX_GLOBAL    1
 #if ENABLE_TAR || ENABLE_DPKG || ENABLE_DPKG_DEB
index 4929f4e499a851583db0af0f3aaf032e37270981..383a4646c7eab2456116aa1f44256647ca112804 100755 (executable)
@@ -53,6 +53,15 @@ dd if=/dev/zero bs=512 count=20 2>/dev/null | tar xvf - 2>&1; echo $?
 "" ""
 SKIP=
 
+# "tar cf test.tar input input_dir/ input_hard1 input_hard2 input_hard1 input_dir/ input":
+# GNU tar 1.26 records as hardlinks:
+#  input_hard2 -> input_hard1
+#  input_hard1 -> input_hard1 (!!!)
+#  input_dir/file -> input_dir/file
+#  input -> input
+# As of 1.24.0, we don't record last two: for them, nlink==1
+# and we check for "hardlink"ness only files with nlink!=1
+# We also don't use "hrw-r--r--" notation for hardlinks in "tar tv" listing.
 optional FEATURE_TAR_CREATE FEATURE_LS_SORTFILES
 testing "tar hardlinks and repeated files" '\
 rm -rf input_* test.tar 2>/dev/null
@@ -64,6 +73,7 @@ chmod -R 644 *
 chmod    755 input_dir
 tar cf test.tar input input_dir/ input_hard1 input_hard2 input_hard1 input_dir/ input
 tar tvf test.tar | sed "s/.*[0-9] input/input/"
+rm -rf input_dir
 tar xf test.tar 2>&1
 echo Ok: $?
 ls -l . input_dir/* | grep input_ | sed "s/\\(^[^ ]*\\) .* input/\\1 input/"