X-Git-Url: https://git.librecmc.org/?a=blobdiff_plain;f=jail%2Fjail.c;h=45906904451bee621108f7e5df3fb12930fe2538;hb=refs%2Fheads%2Fmaster;hp=40b7558188fe86a9af5f7cc9e36913a745a66963;hpb=bfd963313f155846b4f6269bbc76161f81e1e99f;p=oweals%2Fprocd.git diff --git a/jail/jail.c b/jail/jail.c index 40b7558..4590690 100644 --- a/jail/jail.c +++ b/jail/jail.c @@ -15,47 +15,56 @@ #include #include #include +#include #include #include -#include #include -#include +#include +#include #include #include #include #include #include +#include +#include -#include "elf.h" #include "capabilities.h" +#include "elf.h" +#include "fs.h" +#include "jail.h" +#include "log.h" -#include #include +#include #define STACK_SIZE (1024 * 1024) -#define OPT_ARGS "P:S:C:n:r:w:d:psulo" +#define OPT_ARGS "S:C:n:h:r:w:d:psulocU:G:NR:fFO:T:Ey" static struct { - char *path; char *name; + char *hostname; char **jail_argv; char *seccomp; char *capabilities; + char *user; + char *group; + char *extroot; + char *overlaydir; + char *tmpoverlaysize; + int no_new_privs; + int namespace; int procfs; int ronly; int sysfs; + int console; + int pw_uid; + int pw_gid; + int gr_gid; + int require_jail; } opts; -struct extra { - struct list_head list; - - const char *path; - const char *name; - int readonly; -}; - -static LIST_HEAD(extras); extern int pivot_root(const char *new_root, const char *put_old); @@ -63,6 +72,8 @@ int debug = 0; static char child_stack[STACK_SIZE]; +int console_fd; + static int mkdir_p(char *dir, mode_t mask) { char *l = strrchr(dir, '/'); @@ -83,120 +94,303 @@ static int mkdir_p(char *dir, mode_t mask) return 0; if (ret) - ERROR("mkdir failed on %s: %s\n", dir, strerror(errno)); + ERROR("mkdir(%s, %d) failed: %m\n", dir, mask); return ret; } -static int mount_bind(const char *root, const char *path, const char *name, int readonly, int error) +static int _mount_bind(const char *root, const char *path, const char *target, int readonly, int strict, int error) { - const char *p = path; struct stat s; - char old[256]; - char new[256]; + char new[PATH_MAX]; int fd; + int remount_flags = MS_BIND | MS_REMOUNT; - if (strstr(p, "local")) - p = "/lib"; - - snprintf(old, sizeof(old), "%s/%s", path, name); - snprintf(new, sizeof(new), "%s%s", root, p); - - mkdir_p(new, 0755); - - snprintf(new, sizeof(new), "%s%s/%s", root, p, name); - - if (stat(old, &s)) { - ERROR("%s does not exist\n", old); + if (stat(path, &s)) { + ERROR("stat(%s) failed: %m\n", path); return error; } + snprintf(new, sizeof(new), "%s%s", root, target?target:path); + if (S_ISDIR(s.st_mode)) { mkdir_p(new, 0755); } else { + mkdir_p(dirname(new), 0755); + snprintf(new, sizeof(new), "%s%s", root, target?target:path); fd = creat(new, 0644); if (fd == -1) { - ERROR("failed to create %s: %s\n", new, strerror(errno)); + ERROR("creat(%s) failed: %m\n", new); return -1; } close(fd); } - if (mount(old, new, NULL, MS_BIND, NULL)) { - ERROR("failed to mount -B %s %s: %s\n", old, new, strerror(errno)); + if (mount(path, new, NULL, MS_BIND, NULL)) { + ERROR("failed to mount -B %s %s: %m\n", path, new); return -1; } - if (readonly && mount(NULL, new, NULL, MS_BIND | MS_REMOUNT | MS_RDONLY, NULL)) { - ERROR("failed to remount ro %s: %s\n", new, strerror(errno)); + if (readonly) + remount_flags |= MS_RDONLY; + + if (strict) + remount_flags |= MS_NOEXEC | MS_NOSUID | MS_NODEV; + + if ((strict || readonly) && mount(NULL, new, NULL, remount_flags, NULL)) { + ERROR("failed to remount (%s%s%s) %s: %m\n", readonly?"ro":"rw", + (readonly && strict)?", ":"", strict?"strict":"", new); return -1; } - DEBUG("mount -B %s %s\n", old, new); + DEBUG("mount -B %s %s (%s%s%s)\n", path, new, + readonly?"ro":"rw", (readonly && strict)?", ":"", strict?"strict":""); return 0; } -static int build_jail_fs() +int mount_bind(const char *root, const char *path, int readonly, int error) { + return _mount_bind(root, path, NULL, readonly, 0, error); +} + +static int mount_overlay(char *jail_root, char *overlaydir) { + char *upperdir, *workdir, *optsstr; + const char mountoptsformat[] = "lowerdir=%s,upperdir=%s,workdir=%s"; + int ret = -1; + + if (asprintf(&upperdir, "%s%s", overlaydir, "/upper") < 0) + goto out; + + if (asprintf(&workdir, "%s%s", overlaydir, "/work") < 0) + goto upper_printf; + + if (asprintf(&optsstr, mountoptsformat, jail_root, upperdir, workdir) < 0) + goto work_printf; + + if (mkdir_p(upperdir, 0755) || mkdir_p(workdir, 0755)) + goto opts_printf; + + DEBUG("mount -t overlay %s %s (%s)\n", jail_root, jail_root, optsstr); + + if (mount(jail_root, jail_root, "overlay", MS_NOATIME, optsstr)) + goto opts_printf; + + ret = 0; + +opts_printf: + free(optsstr); +work_printf: + free(workdir); +upper_printf: + free(upperdir); +out: + return ret; +} + +static void pass_console(int console_fd) { - struct library *l; - struct extra *m; + struct ubus_context *ctx = ubus_connect(NULL); + static struct blob_buf req; + uint32_t id; + + if (!ctx) + return; - if (mount("tmpfs", opts.path, "tmpfs", MS_NOATIME, "mode=0755")) { - ERROR("tmpfs mount failed %s\n", strerror(errno)); + blob_buf_init(&req, 0); + blobmsg_add_string(&req, "name", opts.name); + + if (ubus_lookup_id(ctx, "container", &id) || + ubus_invoke_fd(ctx, id, "console_set", req.head, NULL, NULL, 3000, console_fd)) + INFO("ubus request failed\n"); + else + close(console_fd); + + blob_buf_free(&req); + ubus_free(ctx); +} + +static int create_dev_console(const char *jail_root) +{ + char *console_fname; + char dev_console_path[PATH_MAX]; + int slave_console_fd; + + /* Open UNIX/98 virtual console */ + console_fd = posix_openpt(O_RDWR | O_NOCTTY); + if (console_fd == -1) + return -1; + + console_fname = ptsname(console_fd); + DEBUG("got console fd %d and PTS client name %s\n", console_fd, console_fname); + if (!console_fname) + goto no_console; + + grantpt(console_fd); + unlockpt(console_fd); + + /* pass PTY master to procd */ + pass_console(console_fd); + + /* mount-bind PTY slave to /dev/console in jail */ + snprintf(dev_console_path, sizeof(dev_console_path), "%s/dev/console", jail_root); + close(creat(dev_console_path, 0620)); + + if (mount(console_fname, dev_console_path, NULL, MS_BIND, NULL)) + goto no_console; + + /* use PTY slave for stdio */ + slave_console_fd = open(console_fname, O_RDWR); /* | O_NOCTTY */ + dup2(slave_console_fd, 0); + dup2(slave_console_fd, 1); + dup2(slave_console_fd, 2); + close(slave_console_fd); + + INFO("using guest console %s\n", console_fname); + + return 0; + +no_console: + close(console_fd); + return 1; +} + +static int build_jail_fs(void) +{ + char jail_root[] = "/tmp/ujail-XXXXXX"; + char tmpovdir[] = "/tmp/ujail-overlay-XXXXXX"; + char tmpdevdir[] = "/tmp/ujail-XXXXXX/dev"; + char tmpdevptsdir[] = "/tmp/ujail-XXXXXX/dev/pts"; + char *overlaydir = NULL; + + if (mkdtemp(jail_root) == NULL) { + ERROR("mkdtemp(%s) failed: %m\n", jail_root); return -1; } - if (chdir(opts.path)) { - ERROR("failed to chdir() in the jail root\n"); + /* oldroot can't be MS_SHARED else pivot_root() fails */ + if (mount("none", "/", NULL, MS_REC|MS_PRIVATE, NULL)) { + ERROR("private mount failed %m\n"); return -1; } - avl_init(&libraries, avl_strcmp, false, NULL); - alloc_library_path("/lib64"); - alloc_library_path("/lib"); - alloc_library_path("/usr/lib"); - load_ldso_conf("/etc/ld.so.conf"); + if (opts.extroot) { + if (mount(opts.extroot, jail_root, NULL, MS_BIND, NULL)) { + ERROR("extroot mount failed %m\n"); + return -1; + } + } else { + if (mount("tmpfs", jail_root, "tmpfs", MS_NOATIME, "mode=0755")) { + ERROR("tmpfs mount failed %m\n"); + return -1; + } + } + + if (opts.tmpoverlaysize) { + char mountoptsstr[] = "mode=0755,size=XXXXXXXX"; - if (elf_load_deps(*opts.jail_argv)) { - ERROR("failed to load dependencies\n"); + snprintf(mountoptsstr, sizeof(mountoptsstr), + "mode=0755,size=%s", opts.tmpoverlaysize); + if (mkdtemp(tmpovdir) == NULL) { + ERROR("mkdtemp(%s) failed: %m\n", jail_root); + return -1; + } + if (mount("tmpfs", tmpovdir, "tmpfs", MS_NOATIME, + mountoptsstr)) { + ERROR("failed to mount tmpfs for overlay (size=%s)\n", opts.tmpoverlaysize); + return -1; + } + overlaydir = tmpovdir; + } + + if (opts.overlaydir) + overlaydir = opts.overlaydir; + + if (overlaydir) + mount_overlay(jail_root, overlaydir); + + if (chdir(jail_root)) { + ERROR("chdir(%s) (jail_root) failed: %m\n", jail_root); return -1; } - if (opts.seccomp && elf_load_deps("libpreload-seccomp.so")) { - ERROR("failed to load libpreload-seccomp.so\n"); + snprintf(tmpdevdir, sizeof(tmpdevdir), "%s/dev", jail_root); + mkdir_p(tmpdevdir, 0755); + if (mount(NULL, tmpdevdir, "tmpfs", MS_NOATIME | MS_NOEXEC | MS_NOSUID, "size=1M")) + return -1; + + snprintf(tmpdevptsdir, sizeof(tmpdevptsdir), "%s/dev/pts", jail_root); + mkdir_p(tmpdevptsdir, 0755); + if (mount(NULL, tmpdevptsdir, "devpts", MS_NOATIME | MS_NOEXEC | MS_NOSUID, NULL)) + return -1; + + if (opts.console) + create_dev_console(jail_root); + + if (mount_all(jail_root)) { + ERROR("mount_all() failed\n"); return -1; } - avl_for_each_element(&libraries, l, avl) - if (mount_bind(opts.path, l->path, l->name, 1, -1)) - return -1; + if (opts.namespace & CLONE_NEWNET) { + char hostdir[PATH_MAX], jailetc[PATH_MAX], jaillink[PATH_MAX]; + + snprintf(hostdir, PATH_MAX, "/tmp/resolv.conf-%s.d", opts.name); + mkdir_p(hostdir, 0755); + _mount_bind(jail_root, hostdir, "/tmp/resolv.conf.d", 1, 1, -1); + snprintf(jailetc, PATH_MAX, "%s/etc", jail_root); + mkdir_p(jailetc, 0755); + snprintf(jaillink, PATH_MAX, "%s/etc/resolv.conf", jail_root); + if (overlaydir) + unlink(jaillink); + symlink("../tmp/resolv.conf.d/resolv.conf.auto", jaillink); + } - list_for_each_entry(m, &extras, list) - if (mount_bind(opts.path, m->path, m->name, m->readonly, 0)) - return -1; + char dirbuf[sizeof(jail_root) + 4]; + snprintf(dirbuf, sizeof(dirbuf), "%s/old", jail_root); + mkdir(dirbuf, 0755); - char *mpoint; - if (asprintf(&mpoint, "%s/old", opts.path) < 0) { - ERROR("failed to alloc pivot path: %s\n", strerror(errno)); + if (pivot_root(jail_root, dirbuf) == -1) { + ERROR("pivot_root(%s, %s) failed: %m\n", jail_root, dirbuf); return -1; } - mkdir_p(mpoint, 0755); - if (pivot_root(opts.path, mpoint) == -1) { - ERROR("pivot_root failed:%s\n", strerror(errno)); - free(mpoint); + if (chdir("/")) { + ERROR("chdir(/) (after pivot_root) failed: %m\n"); return -1; } - free(mpoint); + + snprintf(dirbuf, sizeof(dirbuf), "/old%s", jail_root); + umount2(dirbuf, MNT_DETACH); + rmdir(dirbuf); + if (opts.tmpoverlaysize) { + char tmpdirbuf[sizeof(tmpovdir) + 4]; + snprintf(tmpdirbuf, sizeof(tmpdirbuf), "/old%s", tmpovdir); + umount2(tmpdirbuf, MNT_DETACH); + rmdir(tmpdirbuf); + } + umount2("/old", MNT_DETACH); rmdir("/old"); + if (opts.procfs) { mkdir("/proc", 0755); - mount("proc", "/proc", "proc", MS_NOATIME, 0); + mount("proc", "/proc", "proc", MS_NOATIME | MS_NODEV | MS_NOEXEC | MS_NOSUID, 0); + /* + * make /proc/sys read-only while keeping read-write to + * /proc/sys/net if CLONE_NEWNET is set. + */ + if (opts.namespace & CLONE_NEWNET) + mount("/proc/sys/net", "/proc/self/net", NULL, MS_BIND, 0); + + mount("/proc/sys", "/proc/sys", NULL, MS_BIND, 0); + mount(NULL, "/proc/sys", NULL, MS_REMOUNT | MS_RDONLY, 0); + mount(NULL, "/proc", NULL, MS_REMOUNT, 0); + + if (opts.namespace & CLONE_NEWNET) + mount("/proc/self/net", "/proc/sys/net", NULL, MS_MOVE, 0); } if (opts.sysfs) { mkdir("/sys", 0755); - mount("sysfs", "/sys", "sysfs", MS_NOATIME, 0); + mount("sysfs", "/sys", "sysfs", MS_NOATIME | MS_NODEV | MS_NOEXEC | MS_NOSUID | MS_RDONLY, 0); } if (opts.ronly) mount(NULL, "/", NULL, MS_RDONLY | MS_REMOUNT, 0); @@ -204,14 +398,108 @@ static int build_jail_fs() return 0; } +static int write_uid_gid_map(pid_t child_pid, bool gidmap, int id) +{ + int map_file; + char map_path[64]; + const char *map_format = "%d %d %d\n"; + if (snprintf(map_path, sizeof(map_path), "/proc/%d/%s", + child_pid, gidmap?"gid_map":"uid_map") < 0) + return -1; + + if ((map_file = open(map_path, O_WRONLY)) == -1) + return -1; + + if (dprintf(map_file, map_format, 0, id, 1) == -1) { + close(map_file); + return -1; + } + + close(map_file); + return 0; +} + +static int write_setgroups(pid_t child_pid, bool allow) +{ + int setgroups_file; + char setgroups_path[64]; + + if (snprintf(setgroups_path, sizeof(setgroups_path), "/proc/%d/setgroups", + child_pid) < 0) { + return -1; + } + + if ((setgroups_file = open(setgroups_path, O_WRONLY)) == -1) { + return -1; + } + + if (dprintf(setgroups_file, allow?"allow":"deny") == -1) { + close(setgroups_file); + return -1; + } + + close(setgroups_file); + return 0; +} + +static void get_jail_user(int *user, int *user_gid, int *gr_gid) +{ + struct passwd *p = NULL; + struct group *g = NULL; + + if (opts.user) { + p = getpwnam(opts.user); + if (!p) { + ERROR("failed to get uid/gid for user %s: %d (%s)\n", + opts.user, errno, strerror(errno)); + exit(EXIT_FAILURE); + } + *user = p->pw_uid; + *user_gid = p->pw_gid; + } else { + *user = -1; + *user_gid = -1; + } + + if (opts.group) { + g = getgrnam(opts.group); + if (!g) { + ERROR("failed to get gid for group %s: %m\n", opts.group); + exit(EXIT_FAILURE); + } + *gr_gid = g->gr_gid; + } else { + *gr_gid = -1; + } +}; + +static void set_jail_user(int pw_uid, int user_gid, int gr_gid) +{ + if ((user_gid != -1) && initgroups(opts.user, user_gid)) { + ERROR("failed to initgroups() for user %s: %m\n", opts.user); + exit(EXIT_FAILURE); + } + + if ((gr_gid != -1) && setregid(gr_gid, gr_gid)) { + ERROR("failed to set group id %d: %m\n", gr_gid); + exit(EXIT_FAILURE); + } + + if ((pw_uid != -1) && setreuid(pw_uid, pw_uid)) { + ERROR("failed to set user id %d: %m\n", pw_uid); + exit(EXIT_FAILURE); + } +} + #define MAX_ENVP 8 static char** build_envp(const char *seccomp) { static char *envp[MAX_ENVP]; - static char preload_var[64]; - static char seccomp_var[64]; + static char preload_var[PATH_MAX]; + static char seccomp_var[PATH_MAX]; static char debug_var[] = "LD_DEBUG=all"; - char *preload_lib = find_lib("libpreload-seccomp.so"); + static char container_var[] = "container=ujail"; + const char *preload_lib = find_lib("libpreload-seccomp.so"); int count = 0; if (seccomp && !preload_lib) { @@ -224,6 +512,9 @@ static char** build_envp(const char *seccomp) snprintf(preload_var, sizeof(preload_var), "LD_PRELOAD=%s", preload_lib); envp[count++] = preload_var; } + + envp[count++] = container_var; + if (debug > 1) envp[count++] = debug_var; @@ -233,54 +524,126 @@ static char** build_envp(const char *seccomp) static void usage(void) { fprintf(stderr, "ujail -- \n"); - fprintf(stderr, " -P \tpath where the jail will be staged\n"); - fprintf(stderr, " -S \tseccomp filter\n"); + fprintf(stderr, " -d \tshow debug log (increase num to increase verbosity)\n"); + fprintf(stderr, " -S \tseccomp filter config\n"); fprintf(stderr, " -C \tcapabilities drop config\n"); + fprintf(stderr, " -c\t\tset PR_SET_NO_NEW_PRIVS\n"); fprintf(stderr, " -n \tthe name of the jail\n"); + fprintf(stderr, "namespace jail options:\n"); + fprintf(stderr, " -h \tchange the hostname of the jail\n"); + fprintf(stderr, " -N\t\tjail has network namespace\n"); + fprintf(stderr, " -f\t\tjail has user namespace\n"); + fprintf(stderr, " -F\t\tjail has cgroups namespace\n"); fprintf(stderr, " -r \treadonly files that should be staged\n"); fprintf(stderr, " -w \twriteable files that should be staged\n"); - fprintf(stderr, " -d \tshow debug log (increase num to increase verbosity)\n"); fprintf(stderr, " -p\t\tjail has /proc\n"); fprintf(stderr, " -s\t\tjail has /sys\n"); fprintf(stderr, " -l\t\tjail has /dev/log\n"); fprintf(stderr, " -u\t\tjail has a ubus socket\n"); + fprintf(stderr, " -U \tuser to run jailed process\n"); + fprintf(stderr, " -G \tgroup to run jailed process\n"); fprintf(stderr, " -o\t\tremont jail root (/) read only\n"); + fprintf(stderr, " -R \texternal jail rootfs (system container)\n"); + fprintf(stderr, " -O \tdirectory for r/w overlayfs\n"); + fprintf(stderr, " -T \tuse tmpfs r/w overlayfs with \n"); + fprintf(stderr, " -E\t\tfail if jail cannot be setup\n"); + fprintf(stderr, " -y\t\tprovide jail console\n"); fprintf(stderr, "\nWarning: by default root inside the jail is the same\n\ and he has the same powers as root outside the jail,\n\ thus he can escape the jail and/or break stuff.\n\ -Please use an appropriate seccomp/capabilities filter (-S/-C) to restrict his powers\n"); +Please use seccomp/capabilities (-S/-C) to restrict his powers\n\n\ +If you use none of the namespace jail options,\n\ +ujail will not use namespace/build a jail,\n\ +and will only drop capabilities/apply seccomp filter.\n\n"); } -static int spawn_jail(void *arg) +static int exec_jail(void *pipes_ptr) { - if (opts.name && sethostname(opts.name, strlen(opts.name))) { - ERROR("failed to sethostname: %s\n", strerror(errno)); + int *pipes = (int*)pipes_ptr; + char buf[1]; + int pw_uid, pw_gid, gr_gid; + + close(pipes[0]); + close(pipes[3]); + + buf[0] = 'i'; + if (write(pipes[1], buf, 1) < 1) { + ERROR("can't write to parent\n"); + exit(EXIT_FAILURE); + } + if (read(pipes[2], buf, 1) < 1) { + ERROR("can't read from parent\n"); + exit(EXIT_FAILURE); + } + if (buf[0] != 'O') { + ERROR("parent had an error, child exiting\n"); + exit(EXIT_FAILURE); + } + + close(pipes[1]); + close(pipes[2]); + + if (opts.namespace & CLONE_NEWUSER) { + if (setgid(0) < 0) { + ERROR("setgid\n"); + exit(EXIT_FAILURE); + } + if (setuid(0) < 0) { + ERROR("setuid\n"); + exit(EXIT_FAILURE); + } +// if (setgroups(0, NULL) < 0) { +// ERROR("setgroups\n"); +// exit(EXIT_FAILURE); +// } } - if (build_jail_fs()) { - ERROR("failed to build jail fs"); + if (opts.namespace && opts.hostname && strlen(opts.hostname) > 0 + && sethostname(opts.hostname, strlen(opts.hostname))) { + ERROR("sethostname(%s) failed: %m\n", opts.hostname); exit(EXIT_FAILURE); } - char **envp = build_envp(opts.seccomp); - if (!envp) + if ((opts.namespace & CLONE_NEWNS) && build_jail_fs()) { + ERROR("failed to build jail fs\n"); exit(EXIT_FAILURE); + } if (opts.capabilities && drop_capabilities(opts.capabilities)) exit(EXIT_FAILURE); + if (opts.no_new_privs && prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { + ERROR("prctl(PR_SET_NO_NEW_PRIVS) failed: %m\n"); + exit(EXIT_FAILURE); + } + + if (!(opts.namespace & CLONE_NEWUSER)) { + get_jail_user(&pw_uid, &pw_gid, &gr_gid); + set_jail_user(pw_uid, pw_gid, gr_gid); + } + + char **envp = build_envp(opts.seccomp); + if (!envp) + exit(EXIT_FAILURE); + INFO("exec-ing %s\n", *opts.jail_argv); execve(*opts.jail_argv, opts.jail_argv, envp); - //we get there only if execve fails - ERROR("failed to execve %s: %s\n", *opts.jail_argv, strerror(errno)); + /* we get there only if execve fails */ + ERROR("failed to execve %s: %m\n", *opts.jail_argv); exit(EXIT_FAILURE); } static int jail_running = 1; static int jail_return_code = 0; +static void jail_process_timeout_cb(struct uloop_timeout *t); +static struct uloop_timeout jail_process_timeout = { + .cb = jail_process_timeout_cb, +}; + static void jail_process_handler(struct uloop_process *c, int ret) { + uloop_timeout_cancel(&jail_process_timeout); if (WIFEXITED(ret)) { jail_return_code = WEXITSTATUS(ret); INFO("jail (%d) exited with exit: %d\n", c->pid, jail_return_code); @@ -296,38 +659,68 @@ static struct uloop_process jail_process = { .cb = jail_process_handler, }; -static void add_extra(char *name, int readonly) +static void jail_process_timeout_cb(struct uloop_timeout *t) { - struct extra *f; + DEBUG("jail process failed to stop, sending SIGKILL\n"); + kill(jail_process.pid, SIGKILL); +} + +static void jail_handle_signal(int signo) +{ + DEBUG("forwarding signal %d to the jailed process\n", signo); + kill(jail_process.pid, signo); +} + +static int netns_open_pid(const pid_t target_ns) +{ + char pid_net_path[PATH_MAX]; + + snprintf(pid_net_path, sizeof(pid_net_path), "/proc/%u/ns/net", target_ns); - if (*name != '/') { - ERROR("%s is not an absolute path\n", name); + return open(pid_net_path, O_RDONLY); +} + +static void netns_updown(pid_t pid, bool start) +{ + struct ubus_context *ctx = ubus_connect(NULL); + static struct blob_buf req; + uint32_t id; + + if (!ctx) return; - } - f = calloc(1, sizeof(struct extra)); + blob_buf_init(&req, 0); + blobmsg_add_string(&req, "jail", opts.name); + blobmsg_add_u32(&req, "pid", pid); + blobmsg_add_u8(&req, "start", start); - f->name = basename(name); - f->path = dirname(strdup(name)); - f->readonly = readonly; + if (ubus_lookup_id(ctx, "network", &id) || + ubus_invoke(ctx, id, "netns_updown", req.head, NULL, NULL, 3000)) + INFO("ubus request failed\n"); - list_add_tail(&f->list, &extras); + blob_buf_free(&req); + ubus_free(ctx); } int main(int argc, char **argv) { + sigset_t sigmask; uid_t uid = getuid(); char log[] = "/dev/log"; char ubus[] = "/var/run/ubus.sock"; - int ret = EXIT_SUCCESS; - int ch; + int ch, i; + int pipes[4]; + char sig_buf[1]; + int netns_fd; if (uid) { - ERROR("not root, aborting: %s\n", strerror(errno)); + ERROR("not root, aborting: %m\n"); return EXIT_FAILURE; } umask(022); + mount_list_init(); + init_library_search(); while ((ch = getopt(argc, argv, OPT_ARGS)) != -1) { switch (ch) { @@ -335,89 +728,235 @@ int main(int argc, char **argv) debug = atoi(optarg); break; case 'p': + opts.namespace |= CLONE_NEWNS; opts.procfs = 1; break; case 'o': + opts.namespace |= CLONE_NEWNS; opts.ronly = 1; break; + case 'f': + opts.namespace |= CLONE_NEWUSER; + break; + case 'F': + opts.namespace |= CLONE_NEWCGROUP; + break; + case 'R': + opts.extroot = optarg; + break; case 's': + opts.namespace |= CLONE_NEWNS; opts.sysfs = 1; break; case 'S': opts.seccomp = optarg; - add_extra(optarg, 1); + add_mount(optarg, 1, -1); break; case 'C': opts.capabilities = optarg; - add_extra(optarg, 1); break; - case 'P': - opts.path = optarg; + case 'c': + opts.no_new_privs = 1; break; case 'n': opts.name = optarg; break; + case 'N': + opts.namespace |= CLONE_NEWNET; + break; + case 'h': + opts.namespace |= CLONE_NEWUTS; + opts.hostname = optarg; + break; case 'r': - add_extra(optarg, 1); + opts.namespace |= CLONE_NEWNS; + add_path_and_deps(optarg, 1, 0, 0); break; case 'w': - add_extra(optarg, 0); + opts.namespace |= CLONE_NEWNS; + add_path_and_deps(optarg, 0, 0, 0); break; case 'u': - add_extra(ubus, 0); + opts.namespace |= CLONE_NEWNS; + add_mount(ubus, 0, -1); break; case 'l': - add_extra(log, 0); + opts.namespace |= CLONE_NEWNS; + add_mount(log, 0, -1); + break; + case 'U': + opts.user = optarg; + break; + case 'G': + opts.group = optarg; + break; + case 'O': + opts.overlaydir = optarg; + break; + case 'T': + opts.tmpoverlaysize = optarg; + break; + case 'E': + opts.require_jail = 1; + break; + case 'y': + opts.console = 1; break; } } - //no param found + if (opts.namespace) + opts.namespace |= CLONE_NEWIPC | CLONE_NEWPID; + + if (opts.tmpoverlaysize && strlen(opts.tmpoverlaysize) > 8) { + ERROR("size parameter too long: \"%s\"\n", opts.tmpoverlaysize); + return -1; + } + + /* no param found */ if (argc - optind < 1) { usage(); return EXIT_FAILURE; } + if (!(opts.namespace||opts.capabilities||opts.seccomp)) { + ERROR("Not using namespaces, capabilities or seccomp !!!\n\n"); + usage(); + return EXIT_FAILURE; + } + DEBUG("Using namespaces(0x%08x), capabilities(%d), seccomp(%d)\n", + opts.namespace, + opts.capabilities != 0, + opts.seccomp != 0); opts.jail_argv = &argv[optind]; + get_jail_user(&opts.pw_uid, &opts.pw_gid, &opts.gr_gid); + + if (!opts.extroot) { + if (opts.namespace && add_path_and_deps(*opts.jail_argv, 1, -1, 0)) { + ERROR("failed to load dependencies\n"); + return -1; + } + } + + if (opts.namespace && opts.seccomp && add_path_and_deps("libpreload-seccomp.so", 1, -1, 1)) { + ERROR("failed to load libpreload-seccomp.so\n"); + opts.seccomp = 0; + if (opts.require_jail) + return -1; + } + if (opts.name) prctl(PR_SET_NAME, opts.name, NULL, NULL, NULL); - if (!opts.path && asprintf(&opts.path, "/tmp/%s", basename(*opts.jail_argv)) == -1) { - ERROR("failed to asprintf root path: %s\n", strerror(errno)); - return EXIT_FAILURE; + uloop_init(); + + sigfillset(&sigmask); + for (i = 0; i < _NSIG; i++) { + struct sigaction s = { 0 }; + + if (!sigismember(&sigmask, i)) + continue; + if ((i == SIGCHLD) || (i == SIGPIPE) || (i == SIGSEGV)) + continue; + + s.sa_handler = jail_handle_signal; + sigaction(i, &s, NULL); } - if (mkdir(opts.path, 0755)) { - ERROR("unable to create root path: %s (%s)\n", opts.path, strerror(errno)); - return EXIT_FAILURE; + if (opts.namespace) { + if (opts.namespace & CLONE_NEWNS) { + add_mount("/dev/full", 0, -1); + add_mount("/dev/null", 0, -1); + add_mount("/dev/random", 0, -1); + add_mount("/dev/urandom", 0, -1); + add_mount("/dev/zero", 0, -1); + add_mount("/dev/ptmx", 0, -1); + add_mount("/dev/tty", 0, -1); + + if (!opts.extroot && (opts.user || opts.group)) { + add_mount("/etc/passwd", 0, -1); + add_mount("/etc/group", 0, -1); + } + +#if defined(__GLIBC__) + if (!opts.extroot) + add_mount("/etc/nsswitch.conf", 0, -1); +#endif + + if (!(opts.namespace & CLONE_NEWNET)) { + add_mount("/etc/resolv.conf", 0, -1); + } + } + + if (pipe(&pipes[0]) < 0 || pipe(&pipes[2]) < 0) + return -1; + + jail_process.pid = clone(exec_jail, child_stack + STACK_SIZE, SIGCHLD | opts.namespace, &pipes); + } else { + jail_process.pid = fork(); } - uloop_init(); - jail_process.pid = clone(spawn_jail, - child_stack + STACK_SIZE, - CLONE_NEWUTS | CLONE_NEWPID | CLONE_NEWNS | CLONE_NEWIPC | SIGCHLD, argv); + if (jail_process.pid > 0) { + seteuid(0); + /* parent process */ + close(pipes[1]); + close(pipes[2]); + if (read(pipes[0], sig_buf, 1) < 1) { + ERROR("can't read from child\n"); + return -1; + } + close(pipes[0]); + if (opts.namespace & CLONE_NEWUSER) { + bool has_gr = (opts.gr_gid != -1); + if (write_setgroups(jail_process.pid, false)) { + ERROR("can't write setgroups\n"); + return -1; + } + if (opts.pw_uid != -1) { + write_uid_gid_map(jail_process.pid, 0, opts.pw_uid); + write_uid_gid_map(jail_process.pid, 1, has_gr?opts.gr_gid:opts.pw_gid); + } else { + write_uid_gid_map(jail_process.pid, 0, 65534); + write_uid_gid_map(jail_process.pid, 1, has_gr?opts.gr_gid:65534); + } + } - if (jail_process.pid != -1) { + if (opts.namespace & CLONE_NEWNET) { + if (!opts.name) { + ERROR("netns needs a named jail\n"); + return -1; + } + netns_fd = netns_open_pid(jail_process.pid); + netns_updown(jail_process.pid, true); + } + + sig_buf[0] = 'O'; + if (write(pipes[3], sig_buf, 1) < 0) { + ERROR("can't write to child\n"); + return -1; + } + close(pipes[3]); uloop_process_add(&jail_process); uloop_run(); - uloop_done(); if (jail_running) { + DEBUG("uloop interrupted, killing jail process\n"); kill(jail_process.pid, SIGTERM); - waitpid(jail_process.pid, NULL, 0); + uloop_timeout_set(&jail_process_timeout, 1000); + uloop_run(); + } + uloop_done(); + if (opts.namespace & CLONE_NEWNET) { + setns(netns_fd, CLONE_NEWNET); + netns_updown(getpid(), false); + close(netns_fd); } + return jail_return_code; + } else if (jail_process.pid == 0) { + /* fork child process */ + return exec_jail(NULL); } else { - ERROR("failed to spawn namespace: %s\n", strerror(errno)); - ret = EXIT_FAILURE; - } - - if (rmdir(opts.path)) { - ERROR("Unable to remove root path: %s (%s)\n", opts.path, strerror(errno)); - ret = EXIT_FAILURE; + ERROR("failed to clone/fork: %m\n"); + return EXIT_FAILURE; } - - if (ret) - return ret; - - return jail_return_code; }