From f94c9bf288290c9f4e5a7c2745922abd600e88ca Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Sun, 29 Nov 2009 07:45:33 +0100 Subject: [PATCH] tar: fix bug 673 (misdetection of repeated dir as hardlink). +92 bytes While at it, remove many superfluous ops on unpack: mkdir("."), lots of umask() calls. Can remove more by caching username->uid. Signed-off-by: Denys Vlasenko --- archival/libunarchive/data_extract_all.c | 17 +-- archival/tar.c | 126 +++++++++++++---------- libbb/make_directory.c | 63 +++++++++--- 3 files changed, 129 insertions(+), 77 deletions(-) diff --git a/archival/libunarchive/data_extract_all.c b/archival/libunarchive/data_extract_all.c index 2fcddc404..1100410e6 100644 --- a/archival/libunarchive/data_extract_all.c +++ b/archival/libunarchive/data_extract_all.c @@ -13,9 +13,12 @@ void FAST_FUNC data_extract_all(archive_handle_t *archive_handle) int res; if (archive_handle->ah_flags & ARCHIVE_CREATE_LEADING_DIRS) { - char *name = xstrdup(file_header->name); - bb_make_directory(dirname(name), -1, FILEUTILS_RECUR); - free(name); + char *slash = strrchr(file_header->name, '/'); + if (slash) { + *slash = '\0'; + bb_make_directory(file_header->name, -1, FILEUTILS_RECUR); + *slash = '/'; + } } if (archive_handle->ah_flags & ARCHIVE_UNLINK_OLD) { @@ -52,8 +55,9 @@ void FAST_FUNC data_extract_all(archive_handle_t *archive_handle) /* Handle hard links separately * We identified hard links as regular files of size 0 with a symlink */ - if (S_ISREG(file_header->mode) && (file_header->link_target) - && (file_header->size == 0) + if (S_ISREG(file_header->mode) + && file_header->link_target + && file_header->size == 0 ) { /* hard link */ res = link(file_header->link_target, file_header->name); @@ -121,6 +125,7 @@ void FAST_FUNC data_extract_all(archive_handle_t *archive_handle) gid_t gid = file_header->gid; if (file_header->uname) { +//TODO: cache last name/id pair? struct passwd *pwd = getpwnam(file_header->uname); if (pwd) uid = pwd->pw_uid; } @@ -128,7 +133,7 @@ void FAST_FUNC data_extract_all(archive_handle_t *archive_handle) struct group *grp = getgrnam(file_header->gname); if (grp) gid = grp->gr_gid; } - /* GNU tar 1.15.1 use chown, not lchown */ + /* GNU tar 1.15.1 uses chown, not lchown */ chown(file_header->name, uid, gid); } else #endif diff --git a/archival/tar.c b/archival/tar.c index 95982372d..699022ee5 100644 --- a/archival/tar.c +++ b/archival/tar.c @@ -26,13 +26,16 @@ #include #include "libbb.h" #include "unarchive.h" - /* FIXME: Stop using this non-standard feature */ #ifndef FNM_LEADING_DIR -#define FNM_LEADING_DIR 0 +# define FNM_LEADING_DIR 0 #endif +//#define DBG(fmt, ...) bb_error_msg("%s: " fmt, __func__, ## __VA_ARGS__) +#define DBG(...) ((void)0) + + #define block_buf bb_common_bufsiz1 @@ -52,8 +55,7 @@ /* POSIX tar Header Block, from POSIX 1003.1-1990 */ #define NAME_SIZE 100 #define NAME_SIZE_STR "100" -typedef struct TarHeader TarHeader; -struct TarHeader { /* byte offset */ +typedef struct TarHeader { /* byte offset */ char name[NAME_SIZE]; /* 0-99 */ char mode[8]; /* 100-107 */ char uid[8]; /* 108-115 */ @@ -75,39 +77,38 @@ struct TarHeader { /* byte offset */ char devminor[8]; /* 337-344 */ char prefix[155]; /* 345-499 */ char padding[12]; /* 500-512 (pad to exactly TAR_BLOCK_SIZE) */ -}; +} TarHeader; /* ** writeTarFile(), writeFileToTarball(), and writeTarHeader() are ** the only functions that deal with the HardLinkInfo structure. ** Even these functions use the xxxHardLinkInfo() functions. */ -typedef struct HardLinkInfo HardLinkInfo; -struct HardLinkInfo { - HardLinkInfo *next; /* Next entry in list */ - dev_t dev; /* Device number */ - ino_t ino; /* Inode number */ - short linkCount; /* (Hard) Link Count */ - char name[1]; /* Start of filename (must be last) */ -}; +typedef struct HardLinkInfo { + struct HardLinkInfo *next; /* Next entry in list */ + dev_t dev; /* Device number */ + ino_t ino; /* Inode number */ +// short linkCount; /* (Hard) Link Count */ + char name[1]; /* Start of filename (must be last) */ +} HardLinkInfo; /* Some info to be carried along when creating a new tarball */ -typedef struct TarBallInfo TarBallInfo; -struct TarBallInfo { +typedef struct TarBallInfo { int tarFd; /* Open-for-write file descriptor * for the tarball */ - struct stat statBuf; /* Stat info for the tarball, letting - * us know the inode and device that the - * tarball lives, so we can avoid trying - * to include the tarball into itself */ int verboseFlag; /* Whether to print extra stuff or not */ const llist_t *excludeList; /* List of files to not include */ HardLinkInfo *hlInfoHead; /* Hard Link Tracking Information */ HardLinkInfo *hlInfo; /* Hard Link Info for the current file */ -}; +//TODO: save only st_dev + st_ino + struct stat tarFileStatBuf; /* Stat info for the tarball, letting + * us know the inode and device that the + * tarball lives, so we can avoid trying + * to include the tarball into itself */ +} TarBallInfo; /* A nice enum with all the possible tar file content types */ -enum TarFileType { +enum { REGTYPE = '0', /* regular file */ REGTYPE0 = '\0', /* regular file (ancient bug compat) */ LNKTYPE = '1', /* hard link */ @@ -120,7 +121,6 @@ enum TarFileType { GNULONGLINK = 'K', /* GNU long (>100 chars) link name */ GNULONGNAME = 'L', /* GNU long (>100 chars) file name */ }; -typedef enum TarFileType TarFileType; /* Might be faster (and bigger) if the dev/ino were stored in numeric order;) */ static void addHardLinkInfo(HardLinkInfo **hlInfoHeadPtr, @@ -135,7 +135,8 @@ static void addHardLinkInfo(HardLinkInfo **hlInfoHeadPtr, *hlInfoHeadPtr = hlInfo; hlInfo->dev = statbuf->st_dev; hlInfo->ino = statbuf->st_ino; - hlInfo->linkCount = statbuf->st_nlink; +// hlInfo->linkCount = statbuf->st_nlink; +//TODO: "normalize" the name so that "./name/" == "name" etc? strcpy(hlInfo->name, fileName); } @@ -156,11 +157,18 @@ static void freeHardLinkInfo(HardLinkInfo **hlInfoHeadPtr) } /* Might be faster (and bigger) if the dev/ino were stored in numeric order;) */ -static HardLinkInfo *findHardLinkInfo(HardLinkInfo *hlInfo, struct stat *statbuf) +static HardLinkInfo *findHardLinkInfo(HardLinkInfo *hlInfo, struct stat *statbuf, const char *filename) { while (hlInfo) { - if ((statbuf->st_ino == hlInfo->ino) && (statbuf->st_dev == hlInfo->dev)) + if ((statbuf->st_ino == hlInfo->ino) + && (statbuf->st_dev == hlInfo->dev) + /* Name must NOT match. Think "tar cf t.tar file file" + * (same file tarred twice) */ + && strcmp(filename, hlInfo->name) != 0 + ) { + DBG("found hardlink:'%s'", hlInfo->name); break; + } hlInfo = hlInfo->next; } return hlInfo; @@ -171,7 +179,7 @@ static HardLinkInfo *findHardLinkInfo(HardLinkInfo *hlInfo, struct stat *statbuf * Stores low-order bits only if whole value does not fit. */ static void putOctal(char *cp, int len, off_t value) { - char tempBuffer[sizeof(off_t)*3+1]; + char tempBuffer[sizeof(off_t)*3 + 1]; char *tempString = tempBuffer; int width; @@ -376,16 +384,19 @@ static int exclude_file(const llist_t *excluded_files, const char *file) while (excluded_files) { if (excluded_files->data[0] == '/') { if (fnmatch(excluded_files->data, file, - FNM_PATHNAME | FNM_LEADING_DIR) == 0) + FNM_PATHNAME | FNM_LEADING_DIR) == 0) return 1; } else { const char *p; for (p = file; p[0] != '\0'; p++) { - if ((p == file || p[-1] == '/') && p[0] != '/' && - fnmatch(excluded_files->data, p, - FNM_PATHNAME | FNM_LEADING_DIR) == 0) + if ((p == file || p[-1] == '/') + && p[0] != '/' + && fnmatch(excluded_files->data, p, + FNM_PATHNAME | FNM_LEADING_DIR) == 0 + ) { return 1; + } } } excluded_files = excluded_files->link; @@ -394,7 +405,7 @@ static int exclude_file(const llist_t *excluded_files, const char *file) return 0; } #else -#define exclude_file(excluded_files, file) 0 +# define exclude_file(excluded_files, file) 0 #endif static int FAST_FUNC writeFileToTarball(const char *fileName, struct stat *statbuf, @@ -404,6 +415,8 @@ static int FAST_FUNC writeFileToTarball(const char *fileName, struct stat *statb const char *header_name; int inputFileFd = -1; + DBG("writeFileToTarball('%s')", fileName); + /* Strip leading '/' (must be before memorizing hardlink's name) */ header_name = fileName; while (header_name[0] == '/') { @@ -433,17 +446,20 @@ static int FAST_FUNC writeFileToTarball(const char *fileName, struct stat *statb * by adding the file information to the HardLinkInfo linked list. */ tbInfo->hlInfo = NULL; - if (statbuf->st_nlink > 1) { - tbInfo->hlInfo = findHardLinkInfo(tbInfo->hlInfoHead, statbuf); - if (tbInfo->hlInfo == NULL) + if (!S_ISDIR(statbuf->st_mode) && statbuf->st_nlink > 1) { + DBG("'%s': st_nlink > 1", header_name); + tbInfo->hlInfo = findHardLinkInfo(tbInfo->hlInfoHead, statbuf, header_name); + if (tbInfo->hlInfo == NULL) { + DBG("'%s': addHardLinkInfo", header_name); addHardLinkInfo(&tbInfo->hlInfoHead, statbuf, header_name); + } } /* It is a bad idea to store the archive we are in the process of creating, * so check the device and inode to be sure that this particular file isn't * the new tarball */ - if (tbInfo->statBuf.st_dev == statbuf->st_dev - && tbInfo->statBuf.st_ino == statbuf->st_ino + if (tbInfo->tarFileStatBuf.st_dev == statbuf->st_dev + && tbInfo->tarFileStatBuf.st_ino == statbuf->st_ino ) { bb_error_msg("%s: file is the archive; skipping", fileName); return TRUE; @@ -505,37 +521,37 @@ static int FAST_FUNC writeFileToTarball(const char *fileName, struct stat *statb } #if ENABLE_FEATURE_SEAMLESS_GZ || ENABLE_FEATURE_SEAMLESS_BZ2 -#if !(ENABLE_FEATURE_SEAMLESS_GZ && ENABLE_FEATURE_SEAMLESS_BZ2) -#define vfork_compressor(tar_fd, gzip) vfork_compressor(tar_fd) -#endif +# if !(ENABLE_FEATURE_SEAMLESS_GZ && ENABLE_FEATURE_SEAMLESS_BZ2) +# define vfork_compressor(tar_fd, gzip) vfork_compressor(tar_fd) +# endif /* Don't inline: vfork scares gcc and pessimizes code */ static void NOINLINE vfork_compressor(int tar_fd, int gzip) { pid_t gzipPid; -#if ENABLE_FEATURE_SEAMLESS_GZ && ENABLE_FEATURE_SEAMLESS_BZ2 +# if ENABLE_FEATURE_SEAMLESS_GZ && ENABLE_FEATURE_SEAMLESS_BZ2 const char *zip_exec = (gzip == 1) ? "gzip" : "bzip2"; -#elif ENABLE_FEATURE_SEAMLESS_GZ +# elif ENABLE_FEATURE_SEAMLESS_GZ const char *zip_exec = "gzip"; -#else /* only ENABLE_FEATURE_SEAMLESS_BZ2 */ +# else /* only ENABLE_FEATURE_SEAMLESS_BZ2 */ const char *zip_exec = "bzip2"; -#endif +# endif // On Linux, vfork never unpauses parent early, although standard // allows for that. Do we want to waste bytes checking for it? -#define WAIT_FOR_CHILD 0 +# define WAIT_FOR_CHILD 0 volatile int vfork_exec_errno = 0; struct fd_pair gzipDataPipe; -#if WAIT_FOR_CHILD +# if WAIT_FOR_CHILD struct fd_pair gzipStatusPipe; xpiped_pair(gzipStatusPipe); -#endif +# endif xpiped_pair(gzipDataPipe); signal(SIGPIPE, SIG_IGN); /* we only want EPIPE on errors */ -#if defined(__GNUC__) && __GNUC__ +# if defined(__GNUC__) && __GNUC__ /* Avoid vfork clobbering */ (void) &zip_exec; -#endif +# endif gzipPid = vfork(); if (gzipPid < 0) @@ -545,12 +561,12 @@ static void NOINLINE vfork_compressor(int tar_fd, int gzip) /* child */ /* NB: close _first_, then move fds! */ close(gzipDataPipe.wr); -#if WAIT_FOR_CHILD +# if WAIT_FOR_CHILD close(gzipStatusPipe.rd); /* gzipStatusPipe.wr will close only on exec - * parent waits for this close to happen */ fcntl(gzipStatusPipe.wr, F_SETFD, FD_CLOEXEC); -#endif +# endif xmove_fd(gzipDataPipe.rd, 0); xmove_fd(tar_fd, 1); /* exec gzip/bzip2 program/applet */ @@ -562,7 +578,7 @@ static void NOINLINE vfork_compressor(int tar_fd, int gzip) /* parent */ xmove_fd(gzipDataPipe.wr, tar_fd); close(gzipDataPipe.rd); -#if WAIT_FOR_CHILD +# if WAIT_FOR_CHILD close(gzipStatusPipe.wr); while (1) { char buf; @@ -574,7 +590,7 @@ static void NOINLINE vfork_compressor(int tar_fd, int gzip) continue; /* try it again */ } close(gzipStatusPipe.rd); -#endif +# endif if (vfork_exec_errno) { errno = vfork_exec_errno; bb_perror_msg_and_die("can't execute '%s'", zip_exec); @@ -597,7 +613,7 @@ static NOINLINE int writeTarFile(int tar_fd, int verboseFlag, /* Store the stat info for the tarball's file, so * can avoid including the tarball into itself.... */ - if (fstat(tbInfo.tarFd, &tbInfo.statBuf) < 0) + if (fstat(tbInfo.tarFd, &tbInfo.tarFileStatBuf) < 0) bb_perror_msg_and_die("can't stat tar file"); #if ENABLE_FEATURE_SEAMLESS_GZ || ENABLE_FEATURE_SEAMLESS_BZ2 @@ -675,7 +691,7 @@ static llist_t *append_file_list_to_list(llist_t *list) return newlist; } #else -#define append_file_list_to_list(x) 0 +# define append_file_list_to_list(x) 0 #endif #if ENABLE_FEATURE_SEAMLESS_Z @@ -700,7 +716,7 @@ static char FAST_FUNC get_header_tar_Z(archive_handle_t *archive_handle) return EXIT_FAILURE; } #else -#define get_header_tar_Z NULL +# define get_header_tar_Z NULL #endif #ifdef CHECK_FOR_CHILD_EXITCODE diff --git a/libbb/make_directory.c b/libbb/make_directory.c index a4ad59975..4486eb1ed 100644 --- a/libbb/make_directory.c +++ b/libbb/make_directory.c @@ -28,53 +28,76 @@ int FAST_FUNC bb_make_directory(char *path, long mode, int flags) { - mode_t mask; + mode_t cur_mask; + mode_t org_mask; const char *fail_msg; - char *s = path; + char *s; char c; struct stat st; - mask = umask(0); - umask(mask & ~0300); /* Ensure intermediate dirs are wx */ + /* Happens on bb_make_directory(dirname("no_slashes"),...) */ + if (LONE_CHAR(path, '.')) + return 0; + org_mask = cur_mask = (mode_t)-1L; + s = path; while (1) { c = '\0'; - if (flags & FILEUTILS_RECUR) { /* Get the parent. */ - /* Bypass leading non-'/'s and then subsequent '/'s. */ + if (flags & FILEUTILS_RECUR) { /* Get the parent */ + /* Bypass leading non-'/'s and then subsequent '/'s */ while (*s) { if (*s == '/') { do { ++s; } while (*s == '/'); c = *s; /* Save the current char */ - *s = '\0'; /* and replace it with nul. */ + *s = '\0'; /* and replace it with nul */ break; } ++s; } } - if (!c) /* Last component uses orig umask */ - umask(mask); + if (c != '\0') { + /* Intermediate dirs: must have wx for user */ + if (cur_mask == (mode_t)-1L) { /* wasn't done yet? */ + mode_t new_mask; + org_mask = umask(0); + cur_mask = 0; + /* Clear u=wx in umask - this ensures + * they won't be cleared on mkdir */ + new_mask = (org_mask & ~(mode_t)0300); + //bb_error_msg("org_mask:%o cur_mask:%o", org_mask, new_mask); + if (new_mask != cur_mask) { + cur_mask = new_mask; + umask(new_mask); + } + } + } else { + /* Last component: uses original umask */ + //bb_error_msg("1 org_mask:%o", org_mask); + if (org_mask != cur_mask) { + cur_mask = org_mask; + umask(org_mask); + } + } if (mkdir(path, 0777) < 0) { /* If we failed for any other reason than the directory - * already exists, output a diagnostic and return -1. */ + * already exists, output a diagnostic and return -1 */ if (errno != EEXIST || !(flags & FILEUTILS_RECUR) || ((stat(path, &st) < 0) || !S_ISDIR(st.st_mode)) ) { fail_msg = "create"; - umask(mask); break; } /* Since the directory exists, don't attempt to change * permissions if it was the full target. Note that * this is not an error condition. */ if (!c) { - umask(mask); - return 0; + goto ret0; } } @@ -86,13 +109,21 @@ int FAST_FUNC bb_make_directory(char *path, long mode, int flags) fail_msg = "set permissions of"; break; } - return 0; + goto ret0; } - /* Remove any inserted nul from the path (recursive mode). */ + /* Remove any inserted nul from the path (recursive mode) */ *s = c; } /* while (1) */ bb_perror_msg("can't %s directory '%s'", fail_msg, path); - return -1; + flags = -1; + goto ret; + ret0: + flags = 0; + ret: + //bb_error_msg("2 org_mask:%o", org_mask); + if (org_mask != cur_mask) + umask(org_mask); + return flags; } -- 2.25.1