unshare: new applet
authorBartosz Golaszewski <bartekgola@gmail.com>
Fri, 1 Apr 2016 17:41:13 +0000 (19:41 +0200)
committerDenys Vlasenko <vda.linux@googlemail.com>
Fri, 1 Apr 2016 17:44:39 +0000 (19:44 +0200)
function                                             old     new   delta
unshare_main                                           -     873    +873
.rodata                                           154444  155131    +687
packed_usage                                       30329   30520    +191
unshare_longopts                                       -     106    +106
mount_namespaces                                       -      99     +99
mount_or_die                                           -      51     +51
ns_list                                                -      48     +48
wait_for_exitstatus                                    -      41     +41
opt_str                                                -      17     +17
applet_names                                        2510    2518      +8
applet_main                                         2912    2920      +8
applet_suid                                           91      92      +1
applet_install_loc                                   182     183      +1
------------------------------------------------------------------------------
(add/remove: 8/0 grow/shrink: 6/0 up/down: 2131/0)           Total: 2131 bytes
   text    data     bss     dec     hex filename
 826110    4070    9080  839260   cce5c busybox_old
 827961    4078    9080  841119   cd59f busybox_unstripped

Signed-off-by: Bartosz Golaszewski <bartekgola@gmail.com>
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
util-linux/unshare.c [new file with mode: 0644]

diff --git a/util-linux/unshare.c b/util-linux/unshare.c
new file mode 100644 (file)
index 0000000..f1a9cdf
--- /dev/null
@@ -0,0 +1,380 @@
+/* vi: set sw=4 ts=4: */
+/*
+ * Mini unshare implementation for busybox.
+ *
+ * Copyright (C) 2016 by Bartosz Golaszewski <bartekgola@gmail.com>
+ *
+ * Licensed under GPLv2 or later, see file LICENSE in this source tree.
+ */
+
+//config:config UNSHARE
+//config:      bool "unshare"
+//config:      default y
+//config:      depends on LONG_OPTS && !NOMMU
+//config:      select PLATFORM_LINUX
+//config:      help
+//config:        Run program with some namespaces unshared from parent.
+
+// depends on LONG_OPTS: it is awkward to exclude code which handles --propagation
+// and --setgroups based on LONG_OPTS, so instead applet requires LONG_OPTS.
+// depends on !NOMMU: we need fork()
+
+//applet:IF_UNSHARE(APPLET(unshare, BB_DIR_USR_BIN, BB_SUID_DROP))
+
+//kbuild:lib-$(CONFIG_UNSHARE) += unshare.o
+
+//usage:#define unshare_trivial_usage
+//usage:       "[OPTIONS] [PROG [ARGS]]"
+//usage:#define unshare_full_usage "\n"
+//usage:     "\n       -m, --mount[=FILE]      Unshare mount namespace"
+//usage:     "\n       -u, --uts[=FILE]        Unshare UTS namespace (hostname etc.)"
+//usage:     "\n       -i, --ipc[=FILE]        Unshare System V IPC namespace"
+//usage:     "\n       -n, --net[=FILE]        Unshare network namespace"
+//usage:     "\n       -p, --pid[=FILE]        Unshare PID namespace"
+//usage:     "\n       -U, --user[=FILE}       Unshare user namespace"
+//usage:     "\n       -f, --fork              Fork before execing PROG"
+//usage:     "\n       -r, --map-root-user     Map current user to root (implies -u)"
+//usage:     "\n       --mount-proc[=DIR]      Mount /proc filesystem first (implies -m)"
+//usage:     "\n       --propagation slave|shared|private|unchanged"
+//usage:     "\n                               Modify mount propagation in mount namespace"
+//usage:     "\n       --setgroups allow|deny  Control the setgroups syscall in user namespaces"
+
+#include <sched.h>
+#include <sys/mount.h>
+#include "libbb.h"
+
+static void mount_or_die(const char *source, const char *target,
+                 const char *fstype, unsigned long mountflags)
+{
+       if (mount(source, target, fstype, mountflags, NULL)) {
+               bb_perror_msg_and_die("can't mount %s on %s (flags:0x%lx)",
+                       source, target, mountflags);
+               /* fstype is always either NULL or "proc".
+                * "proc" is only used to mount /proc.
+                * No need to clutter up error message with fstype,
+                * it is easily deductible.
+                */
+       }
+}
+
+// TODO: move to libbb
+static int wait_for_exitstatus(pid_t pid)
+{
+       int exit_status, n;
+
+       n = safe_waitpid(pid, &exit_status, 0);
+       if (n < 0)
+               bb_perror_msg_and_die("waitpid");
+       return exit_status;
+}
+
+/*
+ * Longest possible path to a procfs file used in unshare. Must be able to
+ * contain the '/proc/' string, the '/ns/user' string which is the longest
+ * namespace name and a 32-bit integer representing the process ID.
+ */
+#define PATH_PROC_SETGROUPS    "/proc/self/setgroups"
+#define PATH_PROC_UIDMAP       "/proc/self/uid_map"
+#define PATH_PROC_GIDMAP       "/proc/self/gid_map"
+
+struct namespace_descr {
+       int flag;
+       const char nsfile4[4];
+};
+
+struct namespace_ctx {
+       char *path;
+};
+
+enum {
+       OPT_mount       = 1 << 0,
+       OPT_uts         = 1 << 1,
+       OPT_ipc         = 1 << 2,
+       OPT_network     = 1 << 3,
+       OPT_pid         = 1 << 4,
+       OPT_user        = 1 << 5, /* OPT_user, NS_USR_POS, and ns_list[] index must match! */
+       OPT_fork        = 1 << 6,
+       OPT_map_root    = 1 << 7,
+       OPT_mount_proc  = 1 << 8,
+       OPT_propagation = 1 << 9,
+       OPT_setgroups   = 1 << 10,
+};
+enum {
+       NS_MNT_POS = 0,
+       NS_UTS_POS,
+       NS_IPC_POS,
+       NS_NET_POS,
+       NS_PID_POS,
+       NS_USR_POS, /* OPT_user, NS_USR_POS, and ns_list[] index must match! */
+       NS_COUNT,
+};
+static const struct namespace_descr ns_list[] = {
+       { CLONE_NEWNS,   "mnt"  },
+       { CLONE_NEWUTS,  "uts"  },
+       { CLONE_NEWIPC,  "ipc"  },
+       { CLONE_NEWNET,  "net"  },
+       { CLONE_NEWPID,  "pid"  },
+       { CLONE_NEWUSER, "user" }, /* OPT_user, NS_USR_POS, and ns_list[] index must match! */
+};
+
+/*
+ * Upstream unshare doesn't support short options for --mount-proc,
+ * --propagation, --setgroups.
+ * Optional arguments (namespace mountpoints) exist only for long opts,
+ * we are forced to use "fake" letters for them.
+ * '+': stop at first non-option.
+ */
+static const char opt_str[] = "+muinpU""fr""\xfd::""\xfe:""\xff:";
+static const char unshare_longopts[] ALIGN1 =
+       "mount\0"               Optional_argument       "\xf0"
+       "uts\0"                 Optional_argument       "\xf1"
+       "ipc\0"                 Optional_argument       "\xf2"
+       "network\0"             Optional_argument       "\xf3"
+       "pid\0"                 Optional_argument       "\xf4"
+       "user\0"                Optional_argument       "\xf5"
+       "fork\0"                No_argument             "f"
+       "map-root-user\0"       No_argument             "r"
+       "mount-proc\0"          Optional_argument       "\xfd"
+       "propagation\0"         Required_argument       "\xfe"
+       "setgroups\0"           Required_argument       "\xff"
+;
+
+/* Ugly-looking string reuse trick */
+#define PRIVATE_STR   "private\0""unchanged\0""shared\0""slave\0"
+#define PRIVATE_UNCHANGED_SHARED_SLAVE   PRIVATE_STR
+
+static unsigned long parse_propagation(const char *prop_str)
+{
+       int i = index_in_strings(PRIVATE_UNCHANGED_SHARED_SLAVE, prop_str);
+       if (i < 0)
+               bb_error_msg_and_die("unrecognized: --%s=%s", "propagation", prop_str);
+       if (i == 0)
+               return MS_REC | MS_PRIVATE;
+       if (i == 1)
+               return 0;
+       if (i == 2)
+               return MS_REC | MS_SHARED;
+       return MS_REC | MS_SLAVE;
+}
+
+static void mount_namespaces(pid_t pid, struct namespace_ctx *ns_ctx_list)
+{
+       const struct namespace_descr *ns;
+       struct namespace_ctx *ns_ctx;
+       int i;
+
+       for (i = 0; i < NS_COUNT; i++) {
+               char nsf[sizeof("/proc/%u/ns/AAAA") + sizeof(int)*3];
+
+               ns = &ns_list[i];
+               ns_ctx = &ns_ctx_list[i];
+               if (!ns_ctx->path)
+                       continue;
+               sprintf(nsf, "/proc/%u/ns/%.4s", (unsigned)pid, ns->nsfile4);
+               mount_or_die(nsf, ns_ctx->path, NULL, MS_BIND);
+       }
+}
+
+int unshare_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
+int unshare_main(int argc UNUSED_PARAM, char **argv)
+{
+       int i;
+       unsigned int opts;
+       int unsflags;
+       uintptr_t need_mount;
+       const char *proc_mnt_target;
+       const char *prop_str;
+       const char *setgrp_str;
+       unsigned long prop_flags;
+       uid_t reuid = geteuid();
+       gid_t regid = getegid();
+       struct fd_pair fdp;
+       pid_t child = child; /* for compiler */
+       struct namespace_ctx ns_ctx_list[NS_COUNT];
+
+       memset(ns_ctx_list, 0, sizeof(ns_ctx_list));
+       proc_mnt_target = "/proc";
+       prop_str = PRIVATE_STR;
+       setgrp_str = NULL;
+
+       opt_complementary =
+               "\xf0""m" /* long opts (via their "fake chars") imply short opts */
+               ":\xf1""u"
+               ":\xf2""i"
+               ":\xf3""n"
+               ":\xf4""p"
+               ":\xf5""U"
+               ":ru"      /* --map-root-user or -r implies -u */
+               ":\xfd""m" /* --mount-proc implies -m */
+       ;
+       applet_long_options = unshare_longopts;
+       opts = getopt32(argv, opt_str,
+                       &proc_mnt_target, &prop_str, &setgrp_str,
+                       &ns_ctx_list[NS_MNT_POS].path,
+                       &ns_ctx_list[NS_UTS_POS].path,
+                       &ns_ctx_list[NS_IPC_POS].path,
+                       &ns_ctx_list[NS_NET_POS].path,
+                       &ns_ctx_list[NS_PID_POS].path,
+                       &ns_ctx_list[NS_USR_POS].path
+       );
+       argv += optind;
+       //bb_error_msg("opts:0x%x", opts);
+       //bb_error_msg("mount:%s", ns_ctx_list[NS_MNT_POS].path);
+       //bb_error_msg("proc_mnt_target:%s", proc_mnt_target);
+       //bb_error_msg("prop_str:%s", prop_str);
+       //bb_error_msg("setgrp_str:%s", setgrp_str);
+       //exit(1);
+
+       if (setgrp_str) {
+               if (strcmp(setgrp_str, "allow") == 0) {
+                       if (opts & OPT_map_root) {
+                               bb_error_msg_and_die(
+                                       "--setgroups=allow and --map-root-user "
+                                       "are mutually exclusive"
+                               );
+                       }
+               } else {
+                       /* It's not "allow", must be "deny" */
+                       if (strcmp(setgrp_str, "deny") != 0)
+                               bb_error_msg_and_die("unrecognized: --%s=%s",
+                                       "setgroups", setgrp_str);
+               }
+       }
+
+       unsflags = 0;
+       need_mount = 0;
+       for (i = 0; i < NS_COUNT; i++) {
+               const struct namespace_descr *ns = &ns_list[i];
+               struct namespace_ctx *ns_ctx = &ns_ctx_list[i];
+
+               if (opts & (1 << i))
+                       unsflags |= ns->flag;
+
+               need_mount |= (uintptr_t)(ns_ctx->path);
+       }
+       /* need_mount != 0 if at least one FILE was given */
+
+       prop_flags = MS_REC | MS_PRIVATE;
+       /* Silently ignore --propagation if --mount is not requested. */
+       if (opts & OPT_mount)
+               prop_flags = parse_propagation(prop_str);
+
+       /*
+        * Special case: if we were requested to unshare the mount namespace
+        * AND to make any namespace persistent (by bind mounting it) we need
+        * to spawn a child process which will wait for the parent to call
+        * unshare(), then mount parent's namespaces while still in the
+        * previous namespace.
+        */
+       fdp.wr = -1;
+       if (need_mount && (opts & OPT_mount)) {
+               /*
+                * Can't use getppid() in child, as we can be unsharing the
+                * pid namespace.
+                */
+               pid_t ppid = getpid();
+
+               xpiped_pair(fdp);
+
+               child = xfork();
+               if (child == 0) {
+                       /* Child */
+                       close(fdp.wr);
+
+                       /* Wait until parent calls unshare() */
+                       read(fdp.rd, ns_ctx_list, 1); /* ...using bogus buffer */
+                       /*close(fdp.rd);*/
+
+                       /* Mount parent's unshared namespaces. */
+                       mount_namespaces(ppid, ns_ctx_list);
+                       return EXIT_SUCCESS;
+               }
+               /* Parent continues */
+       }
+
+       if (unshare(unsflags) != 0)
+               bb_perror_msg_and_die("unshare(0x%x)", unsflags);
+
+       if (fdp.wr >= 0) {
+               close(fdp.wr); /* Release child */
+               /*close(fdp.rd);*/
+       }
+
+       if (need_mount) {
+               /* Wait for the child to finish mounting the namespaces. */
+               if (opts & OPT_mount) {
+                       int exit_status = wait_for_exitstatus(child);
+                       if (WIFEXITED(exit_status) &&
+                           WEXITSTATUS(exit_status) != EXIT_SUCCESS)
+                               return WEXITSTATUS(exit_status);
+               } else {
+                       /*
+                        * Regular way - we were requested to mount some other
+                        * namespaces: mount them after the call to unshare().
+                        */
+                       mount_namespaces(getpid(), ns_ctx_list);
+               }
+       }
+
+       /*
+        * When we're unsharing the pid namespace, it's not the process that
+        * calls unshare() that is put into the new namespace, but its first
+        * child. The user may want to use this option to spawn a new process
+        * that'll become PID 1 in this new namespace.
+        */
+       if (opts & OPT_fork) {
+               pid_t pid = xfork();
+               if (pid > 0) {
+                       /* Parent */
+                       int exit_status = wait_for_exitstatus(pid);
+                       if (WIFSIGNALED(exit_status))
+                               kill_myself_with_sig(WTERMSIG(exit_status));
+                       return WEXITSTATUS(exit_status);
+               }
+               /* Child continues */
+       }
+
+       if (opts & OPT_map_root) {
+               char uidmap_buf[sizeof("%u 0 1") + sizeof(int)*3];
+
+               /*
+                * Since Linux 3.19 unprivileged writing of /proc/self/gid_map
+                * has been disabled unless /proc/self/setgroups is written
+                * first to permanently disable the ability to call setgroups
+                * in that user namespace.
+                */
+               xopen_xwrite_close(PATH_PROC_SETGROUPS, "deny");
+               sprintf(uidmap_buf, "%u 0 1", (unsigned)reuid);
+               xopen_xwrite_close(PATH_PROC_UIDMAP, uidmap_buf);
+               sprintf(uidmap_buf, "%u 0 1", (unsigned)regid);
+               xopen_xwrite_close(PATH_PROC_GIDMAP, uidmap_buf);
+       } else
+       if (setgrp_str) {
+               /* Write "allow" or "deny" */
+               xopen_xwrite_close(PATH_PROC_SETGROUPS, setgrp_str);
+       }
+
+       if (opts & OPT_mount) {
+               mount_or_die("none", "/", NULL, prop_flags);
+       }
+
+       if (opts & OPT_mount_proc) {
+               /*
+                * When creating a new pid namespace, we might want the pid
+                * subdirectories in /proc to remain consistent with the new
+                * process IDs. Without --mount-proc the pids in /proc would
+                * still reflect the old pid namespace. This is why we make
+                * /proc private here and then do a fresh mount.
+                */
+               mount_or_die("none", proc_mnt_target, NULL, MS_PRIVATE | MS_REC);
+               mount_or_die("proc", proc_mnt_target, "proc", MS_NOSUID | MS_NOEXEC | MS_NODEV);
+       }
+
+       if (argv[0]) {
+               BB_EXECVP_or_die(argv);
+       }
+       /* unshare from util-linux 2.27.1, despite not documenting it,
+        * runs a login shell (argv0="-sh") if no PROG is given
+        */
+       run_shell(getenv("SHELL"), /*login:*/ 1, NULL, NULL);
+}