From 6c563e370d0f2f3cf36f3b274e8fe1392ca7125f Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Thu, 22 Oct 2015 01:07:13 +0200 Subject: [PATCH] tar: add support for --strip-components=N function old new delta data_extract_all 882 995 +113 tar_longopts 290 309 +19 tar_main 938 942 +4 ------------------------------------------------------------------------------ (add/remove: 0/0 grow/shrink: 3/0 up/down: 136/0) Total: 136 bytes Signed-off-by: Denys Vlasenko --- archival/libarchive/data_extract_all.c | 108 +++++++++++++++++-------- archival/libarchive/get_header_tar.c | 1 + archival/tar.c | 80 ++++++++++++++---- include/bb_archive.h | 3 + testsuite/tar.tests | 10 +++ 5 files changed, 156 insertions(+), 46 deletions(-) diff --git a/archival/libarchive/data_extract_all.c b/archival/libarchive/data_extract_all.c index 45776dcbe..bd51d2ad3 100644 --- a/archival/libarchive/data_extract_all.c +++ b/archival/libarchive/data_extract_all.c @@ -8,9 +8,17 @@ void FAST_FUNC data_extract_all(archive_handle_t *archive_handle) { + file_header_t *file_header = archive_handle->file_header; int dst_fd; int res; +#if ENABLE_FEATURE_TAR_LONG_OPTIONS + char *dst_name; + char *dst_link; +#else +# define dst_name (file_header->name) +# define dst_link (file_header->link_target) +#endif #if ENABLE_FEATURE_TAR_SELINUX char *sctx = archive_handle->tar__sctx[PAX_NEXT_FILE]; @@ -23,11 +31,47 @@ void FAST_FUNC data_extract_all(archive_handle_t *archive_handle) } #endif +#if ENABLE_FEATURE_TAR_LONG_OPTIONS + dst_name = file_header->name; + dst_link = file_header->link_target; + if (archive_handle->tar__strip_components) { + unsigned n = archive_handle->tar__strip_components; + do { + dst_name = strchr(dst_name, '/'); + if (!dst_name || dst_name[1] == '\0') { + data_skip(archive_handle); + return; + } + dst_name++; + /* + * Link target is shortened only for hardlinks: + * softlinks restored unchanged. + */ + if (S_ISREG(file_header->mode) + && file_header->size == 0 + && dst_link + ) { +// GNU tar 1.26 does not check that we reached end of link name: +// if "dir/hardlink" is hardlinked to "file", +// tar xvf a.tar --strip-components=1 says: +// tar: hardlink: Cannot hard link to '': No such file or directory +// and continues processing. We silently skip such entries. + dst_link = strchr(dst_link, '/'); + if (!dst_link || dst_link[1] == '\0') { + data_skip(archive_handle); + return; + } + dst_link++; + } + } while (--n != 0); + } +#endif + if (archive_handle->ah_flags & ARCHIVE_CREATE_LEADING_DIRS) { - char *slash = strrchr(file_header->name, '/'); + char *slash = strrchr(dst_name, '/'); if (slash) { *slash = '\0'; - bb_make_directory(file_header->name, -1, FILEUTILS_RECUR); + bb_make_directory(dst_name, -1, FILEUTILS_RECUR); *slash = '/'; } } @@ -38,8 +82,8 @@ void FAST_FUNC data_extract_all(archive_handle_t *archive_handle) /* Is it hardlink? * We encode hard links as regular files of size 0 with a symlink */ if (S_ISREG(file_header->mode) - && file_header->link_target && file_header->size == 0 + && dst_link ) { /* Ugly special case: * tar cf t.tar hardlink1 hardlink2 hardlink1 @@ -48,22 +92,22 @@ void FAST_FUNC data_extract_all(archive_handle_t *archive_handle) * hardlink2 -> hardlink1 * hardlink1 -> hardlink1 <== !!! */ - if (strcmp(file_header->link_target, file_header->name) == 0) + if (strcmp(dst_link, dst_name) == 0) goto ret; } /* Proceed with deleting */ - if (unlink(file_header->name) == -1 + if (unlink(dst_name) == -1 && errno != ENOENT ) { bb_perror_msg_and_die("can't remove old file %s", - file_header->name); + dst_name); } } } else if (archive_handle->ah_flags & ARCHIVE_EXTRACT_NEWER) { /* Remove the existing entry if its older than the extracted entry */ struct stat existing_sb; - if (lstat(file_header->name, &existing_sb) == -1) { + if (lstat(dst_name, &existing_sb) == -1) { if (errno != ENOENT) { bb_perror_msg_and_die("can't stat old file"); } @@ -73,30 +117,30 @@ void FAST_FUNC data_extract_all(archive_handle_t *archive_handle) && !S_ISDIR(file_header->mode) ) { bb_error_msg("%s not created: newer or " - "same age file exists", file_header->name); + "same age file exists", dst_name); } data_skip(archive_handle); goto ret; } - else if ((unlink(file_header->name) == -1) && (errno != EISDIR)) { + else if ((unlink(dst_name) == -1) && (errno != EISDIR)) { bb_perror_msg_and_die("can't remove old file %s", - file_header->name); + dst_name); } } /* Handle hard links separately * We encode hard links as regular files of size 0 with a symlink */ if (S_ISREG(file_header->mode) - && file_header->link_target && file_header->size == 0 + && dst_link ) { - /* hard link */ - res = link(file_header->link_target, file_header->name); - if ((res == -1) && !(archive_handle->ah_flags & ARCHIVE_EXTRACT_QUIET)) { + /* Hard link */ + res = link(dst_link, dst_name); + if (res != 0 && !(archive_handle->ah_flags & ARCHIVE_EXTRACT_QUIET)) { bb_perror_msg("can't create %slink " "from %s to %s", "hard", - file_header->name, - file_header->link_target); + dst_name, + dst_link); } /* Hardlinks have no separate mode/ownership, skip chown/chmod */ goto ret; @@ -106,17 +150,17 @@ void FAST_FUNC data_extract_all(archive_handle_t *archive_handle) switch (file_header->mode & S_IFMT) { case S_IFREG: { /* Regular file */ - char *dst_name; + char *dst_nameN; int flags = O_WRONLY | O_CREAT | O_EXCL; if (archive_handle->ah_flags & ARCHIVE_O_TRUNC) flags = O_WRONLY | O_CREAT | O_TRUNC; - dst_name = file_header->name; + dst_nameN = dst_name; #ifdef ARCHIVE_REPLACE_VIA_RENAME if (archive_handle->ah_flags & ARCHIVE_REPLACE_VIA_RENAME) /* rpm-style temp file name */ - dst_name = xasprintf("%s;%x", dst_name, (int)getpid()); + dst_nameN = xasprintf("%s;%x", dst_name, (int)getpid()); #endif - dst_fd = xopen3(dst_name, + dst_fd = xopen3(dst_nameN, flags, file_header->mode ); @@ -124,32 +168,32 @@ void FAST_FUNC data_extract_all(archive_handle_t *archive_handle) close(dst_fd); #ifdef ARCHIVE_REPLACE_VIA_RENAME if (archive_handle->ah_flags & ARCHIVE_REPLACE_VIA_RENAME) { - xrename(dst_name, file_header->name); - free(dst_name); + xrename(dst_nameN, dst_name); + free(dst_nameN); } #endif break; } case S_IFDIR: - res = mkdir(file_header->name, file_header->mode); + res = mkdir(dst_name, file_header->mode); if ((res == -1) && (errno != EISDIR) /* btw, Linux doesn't return this */ && (errno != EEXIST) && !(archive_handle->ah_flags & ARCHIVE_EXTRACT_QUIET) ) { - bb_perror_msg("can't make dir %s", file_header->name); + bb_perror_msg("can't make dir %s", dst_name); } break; case S_IFLNK: /* Symlink */ //TODO: what if file_header->link_target == NULL (say, corrupted tarball?) - res = symlink(file_header->link_target, file_header->name); - if ((res == -1) + res = symlink(file_header->link_target, dst_name); + if (res != 0 && !(archive_handle->ah_flags & ARCHIVE_EXTRACT_QUIET) ) { bb_perror_msg("can't create %slink " "from %s to %s", "sym", - file_header->name, + dst_name, file_header->link_target); } break; @@ -157,11 +201,11 @@ void FAST_FUNC data_extract_all(archive_handle_t *archive_handle) case S_IFBLK: case S_IFCHR: case S_IFIFO: - res = mknod(file_header->name, file_header->mode, file_header->device); + res = mknod(dst_name, file_header->mode, file_header->device); if ((res == -1) && !(archive_handle->ah_flags & ARCHIVE_EXTRACT_QUIET) ) { - bb_perror_msg("can't create node %s", file_header->name); + bb_perror_msg("can't create node %s", dst_name); } break; default: @@ -186,20 +230,20 @@ void FAST_FUNC data_extract_all(archive_handle_t *archive_handle) } #endif /* GNU tar 1.15.1 uses chown, not lchown */ - chown(file_header->name, uid, gid); + chown(dst_name, uid, gid); } /* uclibc has no lchmod, glibc is even stranger - * it has lchmod which seems to do nothing! * so we use chmod... */ if (!(archive_handle->ah_flags & ARCHIVE_DONT_RESTORE_PERM)) { - chmod(file_header->name, file_header->mode); + chmod(dst_name, file_header->mode); } if (archive_handle->ah_flags & ARCHIVE_RESTORE_DATE) { struct timeval t[2]; t[1].tv_sec = t[0].tv_sec = file_header->mtime; t[1].tv_usec = t[0].tv_usec = 0; - utimes(file_header->name, t); + utimes(dst_name, t); } } diff --git a/archival/libarchive/get_header_tar.c b/archival/libarchive/get_header_tar.c index fb68673b9..ac2be726f 100644 --- a/archival/libarchive/get_header_tar.c +++ b/archival/libarchive/get_header_tar.c @@ -418,6 +418,7 @@ char FAST_FUNC get_header_tar(archive_handle_t *archive_handle) /* Everything up to and including last ".." component is stripped */ overlapping_strcpy(file_header->name, strip_unsafe_prefix(file_header->name)); +//TODO: do the same for file_header->link_target? /* Strip trailing '/' in directories */ /* Must be done after mode is set as '/' is used to check if it's a directory */ diff --git a/archival/tar.c b/archival/tar.c index aa03ba990..566ba34f6 100644 --- a/archival/tar.c +++ b/archival/tar.c @@ -152,9 +152,12 @@ # define FNM_LEADING_DIR 0 #endif - -//#define DBG(fmt, ...) bb_error_msg("%s: " fmt, __func__, ## __VA_ARGS__) -#define DBG(...) ((void)0) +#if 0 +# define DBG(fmt, ...) bb_error_msg("%s: " fmt, __func__, ## __VA_ARGS__) +#else +# define DBG(...) ((void)0) +#endif +#define DBG_OPTION_PARSING 0 #define block_buf bb_common_bufsiz1 @@ -855,6 +858,7 @@ enum { IF_FEATURE_SEAMLESS_Z( OPTBIT_COMPRESS ,) IF_FEATURE_TAR_NOPRESERVE_TIME(OPTBIT_NOPRESERVE_TIME,) #if ENABLE_FEATURE_TAR_LONG_OPTIONS + OPTBIT_STRIP_COMPONENTS, OPTBIT_NORECURSION, IF_FEATURE_TAR_TO_COMMAND(OPTBIT_2COMMAND ,) OPTBIT_NUMERIC_OWNER, @@ -879,12 +883,13 @@ enum { OPT_GZIP = IF_FEATURE_SEAMLESS_GZ( (1 << OPTBIT_GZIP )) + 0, // z OPT_XZ = IF_FEATURE_SEAMLESS_XZ( (1 << OPTBIT_XZ )) + 0, // J OPT_COMPRESS = IF_FEATURE_SEAMLESS_Z( (1 << OPTBIT_COMPRESS )) + 0, // Z - OPT_NOPRESERVE_TIME = IF_FEATURE_TAR_NOPRESERVE_TIME((1 << OPTBIT_NOPRESERVE_TIME)) + 0, // m - OPT_NORECURSION = IF_FEATURE_TAR_LONG_OPTIONS((1 << OPTBIT_NORECURSION )) + 0, // no-recursion - OPT_2COMMAND = IF_FEATURE_TAR_TO_COMMAND( (1 << OPTBIT_2COMMAND )) + 0, // to-command - OPT_NUMERIC_OWNER = IF_FEATURE_TAR_LONG_OPTIONS((1 << OPTBIT_NUMERIC_OWNER )) + 0, // numeric-owner - OPT_NOPRESERVE_PERM = IF_FEATURE_TAR_LONG_OPTIONS((1 << OPTBIT_NOPRESERVE_PERM)) + 0, // no-same-permissions - OPT_OVERWRITE = IF_FEATURE_TAR_LONG_OPTIONS((1 << OPTBIT_OVERWRITE )) + 0, // overwrite + OPT_NOPRESERVE_TIME = IF_FEATURE_TAR_NOPRESERVE_TIME((1 << OPTBIT_NOPRESERVE_TIME)) + 0, // m + OPT_STRIP_COMPONENTS = IF_FEATURE_TAR_LONG_OPTIONS((1 << OPTBIT_STRIP_COMPONENTS)) + 0, // strip-components + OPT_NORECURSION = IF_FEATURE_TAR_LONG_OPTIONS((1 << OPTBIT_NORECURSION )) + 0, // no-recursion + OPT_2COMMAND = IF_FEATURE_TAR_TO_COMMAND( (1 << OPTBIT_2COMMAND )) + 0, // to-command + OPT_NUMERIC_OWNER = IF_FEATURE_TAR_LONG_OPTIONS((1 << OPTBIT_NUMERIC_OWNER )) + 0, // numeric-owner + OPT_NOPRESERVE_PERM = IF_FEATURE_TAR_LONG_OPTIONS((1 << OPTBIT_NOPRESERVE_PERM)) + 0, // no-same-permissions + OPT_OVERWRITE = IF_FEATURE_TAR_LONG_OPTIONS((1 << OPTBIT_OVERWRITE )) + 0, // overwrite OPT_ANY_COMPRESS = (OPT_BZIP2 | OPT_LZMA | OPT_GZIP | OPT_XZ | OPT_COMPRESS), }; @@ -928,6 +933,7 @@ static const char tar_longopts[] ALIGN1 = # if ENABLE_FEATURE_TAR_NOPRESERVE_TIME "touch\0" No_argument "m" # endif + "strip-components\0" Required_argument "\xf9" "no-recursion\0" No_argument "\xfa" # if ENABLE_FEATURE_TAR_TO_COMMAND "to-command\0" Required_argument "\xfb" @@ -973,11 +979,15 @@ int tar_main(int argc UNUSED_PARAM, char **argv) "tt:vv:" // count -t,-v IF_FEATURE_TAR_FROM("X::T::") // cumulative lists #if ENABLE_FEATURE_TAR_LONG_OPTIONS && ENABLE_FEATURE_TAR_FROM - "\xff::" // cumulative lists for --exclude + "\xff::" // --exclude=PATTERN is a list #endif IF_FEATURE_TAR_CREATE("c:") "t:x:" // at least one of these is reqd IF_FEATURE_TAR_CREATE("c--tx:t--cx:x--ct") // mutually exclusive - IF_NOT_FEATURE_TAR_CREATE("t--x:x--t"); // mutually exclusive + IF_NOT_FEATURE_TAR_CREATE("t--x:x--t") // mutually exclusive +#if ENABLE_FEATURE_TAR_LONG_OPTIONS + ":\xf9+" // --strip-components=NUM +#endif + ; #if ENABLE_FEATURE_TAR_LONG_OPTIONS applet_long_options = tar_longopts; #endif @@ -1018,10 +1028,14 @@ int tar_main(int argc UNUSED_PARAM, char **argv) IF_FEATURE_SEAMLESS_XZ( "J" ) IF_FEATURE_SEAMLESS_Z( "Z" ) IF_FEATURE_TAR_NOPRESERVE_TIME("m") + IF_FEATURE_TAR_LONG_OPTIONS("\xf9:") // --strip-components , &base_dir // -C dir , &tar_filename // -f filename IF_FEATURE_TAR_FROM(, &(tar_handle->accept)) // T IF_FEATURE_TAR_FROM(, &(tar_handle->reject)) // X +#if ENABLE_FEATURE_TAR_LONG_OPTIONS + , &tar_handle->tar__strip_components // --strip-components +#endif IF_FEATURE_TAR_TO_COMMAND(, &(tar_handle->tar__to_command)) // --to-command #if ENABLE_FEATURE_TAR_LONG_OPTIONS && ENABLE_FEATURE_TAR_FROM , &excludes // --exclude @@ -1029,11 +1043,49 @@ int tar_main(int argc UNUSED_PARAM, char **argv) , &verboseFlag // combined count for -t and -v , &verboseFlag // combined count for -t and -v ); - //bb_error_msg("opt:%08x", opt); +#if DBG_OPTION_PARSING + bb_error_msg("opt: 0x%08x", opt); +# define showopt(o) bb_error_msg("opt & %s(%x): %x", #o, o, opt & o); + showopt(OPT_TEST ); + showopt(OPT_EXTRACT ); + showopt(OPT_BASEDIR ); + showopt(OPT_TARNAME ); + showopt(OPT_2STDOUT ); + showopt(OPT_NOPRESERVE_OWNER); + showopt(OPT_P ); + showopt(OPT_VERBOSE ); + showopt(OPT_KEEP_OLD ); + showopt(OPT_CREATE ); + showopt(OPT_DEREFERENCE ); + showopt(OPT_BZIP2 ); + showopt(OPT_LZMA ); + showopt(OPT_INCLUDE_FROM ); + showopt(OPT_EXCLUDE_FROM ); + showopt(OPT_GZIP ); + showopt(OPT_XZ ); + showopt(OPT_COMPRESS ); + showopt(OPT_NOPRESERVE_TIME ); + showopt(OPT_STRIP_COMPONENTS); + showopt(OPT_NORECURSION ); + showopt(OPT_2COMMAND ); + showopt(OPT_NUMERIC_OWNER ); + showopt(OPT_NOPRESERVE_PERM ); + showopt(OPT_OVERWRITE ); + showopt(OPT_ANY_COMPRESS ); + bb_error_msg("base_dir:'%s'", base_dir); + bb_error_msg("tar_filename:'%s'", tar_filename); + bb_error_msg("verboseFlag:%d", verboseFlag); + bb_error_msg("tar_handle->tar__to_command:'%s'", tar_handle->tar__to_command); + bb_error_msg("tar_handle->tar__strip_components:%u", tar_handle->tar__strip_components); + return 0; +# undef showopt +#endif argv += optind; - if (verboseFlag) tar_handle->action_header = header_verbose_list; - if (verboseFlag == 1) tar_handle->action_header = header_list; + if (verboseFlag) + tar_handle->action_header = header_verbose_list; + if (verboseFlag == 1) + tar_handle->action_header = header_list; if (opt & OPT_EXTRACT) tar_handle->action_data = data_extract_all; diff --git a/include/bb_archive.h b/include/bb_archive.h index 2329d025d..10969b567 100644 --- a/include/bb_archive.h +++ b/include/bb_archive.h @@ -77,6 +77,9 @@ typedef struct archive_handle_t { off_t offset; /* Archiver specific. Can make it a union if it ever gets big */ +#if ENABLE_FEATURE_TAR_LONG_OPTIONS + unsigned tar__strip_components; +#endif #define PAX_NEXT_FILE 0 #define PAX_GLOBAL 1 #if ENABLE_TAR || ENABLE_DPKG || ENABLE_DPKG_DEB diff --git a/testsuite/tar.tests b/testsuite/tar.tests index 4929f4e49..383a4646c 100755 --- a/testsuite/tar.tests +++ b/testsuite/tar.tests @@ -53,6 +53,15 @@ dd if=/dev/zero bs=512 count=20 2>/dev/null | tar xvf - 2>&1; echo $? "" "" SKIP= +# "tar cf test.tar input input_dir/ input_hard1 input_hard2 input_hard1 input_dir/ input": +# GNU tar 1.26 records as hardlinks: +# input_hard2 -> input_hard1 +# input_hard1 -> input_hard1 (!!!) +# input_dir/file -> input_dir/file +# input -> input +# As of 1.24.0, we don't record last two: for them, nlink==1 +# and we check for "hardlink"ness only files with nlink!=1 +# We also don't use "hrw-r--r--" notation for hardlinks in "tar tv" listing. optional FEATURE_TAR_CREATE FEATURE_LS_SORTFILES testing "tar hardlinks and repeated files" '\ rm -rf input_* test.tar 2>/dev/null @@ -64,6 +73,7 @@ chmod -R 644 * chmod 755 input_dir tar cf test.tar input input_dir/ input_hard1 input_hard2 input_hard1 input_dir/ input tar tvf test.tar | sed "s/.*[0-9] input/input/" +rm -rf input_dir tar xf test.tar 2>&1 echo Ok: $? ls -l . input_dir/* | grep input_ | sed "s/\\(^[^ ]*\\) .* input/\\1 input/" -- 2.25.1