From b2486a8922bf4977bd82c8190258e39de28c053b Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Wed, 6 Apr 2011 20:27:07 -0400 Subject: [PATCH] move rsyscall out of pthread_create module this is something of a tradeoff, as now set*id() functions, rather than pthread_create, are what pull in the code overhead for dealing with linux's refusal to implement proper POSIX thread-vs-process semantics. my motivations are: 1. it's cleaner this way, especially cleaner to optimize out the rsyscall locking overhead from pthread_create when it's not needed. 2. it's expected that only a tiny number of core system programs will ever use set*id() functions, whereas many programs may want to use threads, and making thread overhead tiny is an incentive for "light" programs to try threads. --- src/internal/libc.h | 3 +- src/internal/pthread_impl.h | 3 + src/thread/__rsyscall.c | 113 ++++++++++++++++++++++++++++++++++++ src/thread/pthread_create.c | 105 +++------------------------------ src/unistd/setgid.c | 3 +- src/unistd/setregid.c | 3 +- src/unistd/setresgid.c | 3 +- src/unistd/setresuid.c | 3 +- src/unistd/setreuid.c | 3 +- src/unistd/setuid.c | 3 +- 10 files changed, 133 insertions(+), 109 deletions(-) create mode 100644 src/thread/__rsyscall.c diff --git a/src/internal/libc.h b/src/internal/libc.h index be88dc04..c0039e77 100644 --- a/src/internal/libc.h +++ b/src/internal/libc.h @@ -10,7 +10,6 @@ struct __libc { void (*lock)(volatile int *); void (*lockfile)(FILE *); void (*fork_handler)(int); - int (*rsyscall)(int, long, long, long, long, long, long); int (*atexit)(void (*)(void)); void (*fini)(void); void (*ldso_fini)(void); @@ -48,6 +47,8 @@ void __lockfile(FILE *); #define CANCELPT_INHIBIT CANCELPT(2) #define CANCELPT_RESUME CANCELPT(-2) +int __rsyscall(int, long, long, long, long, long, long); + extern char **__environ; #define environ __environ diff --git a/src/internal/pthread_impl.h b/src/internal/pthread_impl.h index 7ab6243d..0bcc54cc 100644 --- a/src/internal/pthread_impl.h +++ b/src/internal/pthread_impl.h @@ -86,6 +86,9 @@ int __timedwait(volatile int *, int, clockid_t, const struct timespec *, int); void __wait(volatile int *, volatile int *, int, int); void __wake(volatile int *, int, int); +void __rsyscall_lock(); +void __rsyscall_unlock(); + #define DEFAULT_STACK_SIZE (16384-PAGE_SIZE) #define DEFAULT_GUARD_SIZE PAGE_SIZE diff --git a/src/thread/__rsyscall.c b/src/thread/__rsyscall.c new file mode 100644 index 00000000..923db2bf --- /dev/null +++ b/src/thread/__rsyscall.c @@ -0,0 +1,113 @@ +#include "pthread_impl.h" + +/* "rsyscall" is a mechanism by which a thread can synchronously force all + * other threads to perform an arbitrary syscall. It is necessary to work + * around the non-conformant implementation of setuid() et al on Linux, + * which affect only the calling thread and not the whole process. This + * implementation performs some tricks with signal delivery to work around + * the fact that it does not keep any list of threads in userspace. */ + +static struct { + volatile int lock, hold, blocks, cnt; + unsigned long arg[6]; + int nr; + int err; + int init; +} rs; + +static void rsyscall_handler(int sig, siginfo_t *si, void *ctx) +{ + struct pthread *self = __pthread_self(); + long r; + + if (!rs.hold || rs.cnt == libc.threads_minus_1) return; + + /* Threads which have already decremented themselves from the + * thread count must not increment rs.cnt or otherwise act. */ + if (self->dead) { + sigfillset(&((ucontext_t *)ctx)->uc_sigmask); + return; + } + + r = __syscall(rs.nr, rs.arg[0], rs.arg[1], + rs.arg[2], rs.arg[3], rs.arg[4], rs.arg[5]); + if (r < 0) rs.err=-r; + + a_inc(&rs.cnt); + __wake(&rs.cnt, 1, 1); + while(rs.hold) + __wait(&rs.hold, 0, 1, 1); + a_dec(&rs.cnt); + if (!rs.cnt) __wake(&rs.cnt, 1, 1); +} + +int __rsyscall(int nr, long a, long b, long c, long d, long e, long f) +{ + int i, ret; + sigset_t set = { 0 }; + struct pthread *self; + + if (!libc.threads_minus_1) + return syscall(nr, a, b, c, d, e, f); + + self = __pthread_self(); + + LOCK(&rs.lock); + while ((i=rs.blocks)) + __wait(&rs.blocks, 0, i, 1); + + sigfillset(&set); + __libc_sigprocmask(SIG_BLOCK, &set, &set); + + if (!rs.init) { + struct sigaction sa = { + .sa_sigaction = rsyscall_handler, + .sa_mask = set + }; + sigfillset(&sa.sa_mask); + sa.sa_sigaction = rsyscall_handler; + __libc_sigaction(SIGSYSCALL, &sa, 0); + } + + rs.nr = nr; + rs.arg[0] = a; rs.arg[1] = b; + rs.arg[2] = c; rs.arg[3] = d; + rs.arg[4] = d; rs.arg[5] = f; + rs.err = 0; + rs.cnt = 0; + rs.hold = 1; + + /* Dispatch signals until all threads respond */ + for (i=libc.threads_minus_1; i; i--) + sigqueue(self->pid, SIGSYSCALL, (union sigval){0}); + while ((i=rs.cnt) < libc.threads_minus_1) { + sigqueue(self->pid, SIGSYSCALL, (union sigval){0}); + __wait(&rs.cnt, 0, i, 1); + } + + /* Handle any lingering signals with no-op */ + __libc_sigprocmask(SIG_UNBLOCK, &set, &set); + + /* Resume other threads' signal handlers and wait for them */ + rs.hold = 0; + __wake(&rs.hold, -1, 0); + while((i=rs.cnt)) __wait(&rs.cnt, 0, i, 1); + + if (rs.err) errno = rs.err, ret = -1; + else ret = syscall(nr, a, b, c, d, e, f); + + UNLOCK(&rs.lock); + return ret; +} + +void __rsyscall_lock() +{ + a_inc(&rs.blocks); + while (rs.lock) __wait(&rs.lock, 0, 1, 1); +} + +void __rsyscall_unlock() +{ + a_dec(&rs.blocks); + if (rs.lock) __wake(&rs.blocks, 1, 1); +} diff --git a/src/thread/pthread_create.c b/src/thread/pthread_create.c index a9a08171..ec329f50 100644 --- a/src/thread/pthread_create.c +++ b/src/thread/pthread_create.c @@ -1,5 +1,11 @@ #include "pthread_impl.h" +static void dummy_0() +{ +} +weak_alias(dummy_0, __rsyscall_lock); +weak_alias(dummy_0, __rsyscall_unlock); + static void dummy_1(pthread_t self) { } @@ -72,101 +78,12 @@ static void cancelpt(int x) } } -/* "rsyscall" is a mechanism by which a thread can synchronously force all - * other threads to perform an arbitrary syscall. It is necessary to work - * around the non-conformant implementation of setuid() et al on Linux, - * which affect only the calling thread and not the whole process. This - * implementation performs some tricks with signal delivery to work around - * the fact that it does not keep any list of threads in userspace. */ - -static struct { - volatile int lock, hold, blocks, cnt; - unsigned long arg[6]; - int nr; - int err; -} rs; - -static void rsyscall_handler(int sig, siginfo_t *si, void *ctx) -{ - struct pthread *self = __pthread_self(); - long r; - - if (!rs.hold || rs.cnt == libc.threads_minus_1) return; - - /* Threads which have already decremented themselves from the - * thread count must not increment rs.cnt or otherwise act. */ - if (self->dead) { - sigfillset(&((ucontext_t *)ctx)->uc_sigmask); - return; - } - - r = __syscall(rs.nr, rs.arg[0], rs.arg[1], - rs.arg[2], rs.arg[3], rs.arg[4], rs.arg[5]); - if (r < 0) rs.err=-r; - - a_inc(&rs.cnt); - __wake(&rs.cnt, 1, 1); - while(rs.hold) - __wait(&rs.hold, 0, 1, 1); - a_dec(&rs.cnt); - if (!rs.cnt) __wake(&rs.cnt, 1, 1); -} - -static int rsyscall(int nr, long a, long b, long c, long d, long e, long f) -{ - int i, ret; - sigset_t set = { 0 }; - struct pthread *self = __pthread_self(); - sigaddset(&set, SIGSYSCALL); - - LOCK(&rs.lock); - while ((i=rs.blocks)) - __wait(&rs.blocks, 0, i, 1); - - __libc_sigprocmask(SIG_BLOCK, &set, 0); - - rs.nr = nr; - rs.arg[0] = a; rs.arg[1] = b; - rs.arg[2] = c; rs.arg[3] = d; - rs.arg[4] = d; rs.arg[5] = f; - rs.err = 0; - rs.cnt = 0; - rs.hold = 1; - - /* Dispatch signals until all threads respond */ - for (i=libc.threads_minus_1; i; i--) - sigqueue(self->pid, SIGSYSCALL, (union sigval){0}); - while ((i=rs.cnt) < libc.threads_minus_1) { - sigqueue(self->pid, SIGSYSCALL, (union sigval){0}); - __wait(&rs.cnt, 0, i, 1); - } - - /* Handle any lingering signals with no-op */ - __libc_sigprocmask(SIG_UNBLOCK, &set, 0); - - /* Resume other threads' signal handlers and wait for them */ - rs.hold = 0; - __wake(&rs.hold, -1, 0); - while((i=rs.cnt)) __wait(&rs.cnt, 0, i, 1); - - if (rs.err) errno = rs.err, ret = -1; - else ret = syscall(nr, a, b, c, d, e, f); - - UNLOCK(&rs.lock); - return ret; -} - static void init_threads() { struct sigaction sa = { .sa_flags = SA_SIGINFO | SA_RESTART }; libc.lock = __lock; libc.lockfile = __lockfile; libc.cancelpt = cancelpt; - libc.rsyscall = rsyscall; - - sigfillset(&sa.sa_mask); - sa.sa_sigaction = rsyscall_handler; - __libc_sigaction(SIGSYSCALL, &sa, 0); sigemptyset(&sa.sa_mask); sa.sa_sigaction = cancel_handler; @@ -205,7 +122,7 @@ int pthread_create(pthread_t *res, const pthread_attr_t *attr, void *(*entry)(vo size_t size, guard; struct pthread *self = pthread_self(), *new; unsigned char *map, *stack, *tsd; - static const pthread_attr_t default_attr; + const pthread_attr_t default_attr = { 0 }; if (!self) return ENOSYS; if (!init && ++init) init_threads(); @@ -236,16 +153,12 @@ int pthread_create(pthread_t *res, const pthread_attr_t *attr, void *(*entry)(vo new->tlsdesc[1] = (uintptr_t)new; stack = (void *)((uintptr_t)new-1 & ~(uintptr_t)15); - /* We must synchronize new thread creation with rsyscall - * delivery. This looks to be the least expensive way: */ - a_inc(&rs.blocks); - while (rs.lock) __wait(&rs.lock, 0, 1, 1); + __rsyscall_lock(); a_inc(&libc.threads_minus_1); ret = __uniclone(stack, start, new); - a_dec(&rs.blocks); - if (rs.lock) __wake(&rs.blocks, 1, 1); + __rsyscall_unlock(); if (ret < 0) { a_dec(&libc.threads_minus_1); diff --git a/src/unistd/setgid.c b/src/unistd/setgid.c index e98a2982..b54d2b22 100644 --- a/src/unistd/setgid.c +++ b/src/unistd/setgid.c @@ -4,6 +4,5 @@ int setgid(gid_t gid) { - if (libc.rsyscall) return libc.rsyscall(__NR_setgid, gid, 0, 0, 0, 0, 0); - return syscall(SYS_setgid, gid); + return __rsyscall(__NR_setgid, gid, 0, 0, 0, 0, 0); } diff --git a/src/unistd/setregid.c b/src/unistd/setregid.c index ff2607dc..49c59858 100644 --- a/src/unistd/setregid.c +++ b/src/unistd/setregid.c @@ -4,6 +4,5 @@ int setregid(gid_t rgid, gid_t egid) { - if (libc.rsyscall) return libc.rsyscall(__NR_setregid, rgid, egid, 0, 0, 0, 0); - return syscall(SYS_setregid, rgid, egid); + return __rsyscall(__NR_setregid, rgid, egid, 0, 0, 0, 0); } diff --git a/src/unistd/setresgid.c b/src/unistd/setresgid.c index 3c85a828..2b0c96d8 100644 --- a/src/unistd/setresgid.c +++ b/src/unistd/setresgid.c @@ -5,6 +5,5 @@ int setresgid(gid_t rgid, gid_t egid, gid_t sgid) { - if (libc.rsyscall) return libc.rsyscall(__NR_setresgid, rgid, egid, sgid, 0, 0, 0); - return syscall(SYS_setresgid, rgid, egid, sgid); + return __rsyscall(__NR_setresgid, rgid, egid, sgid, 0, 0, 0); } diff --git a/src/unistd/setresuid.c b/src/unistd/setresuid.c index 376ce406..7fa6bc38 100644 --- a/src/unistd/setresuid.c +++ b/src/unistd/setresuid.c @@ -5,6 +5,5 @@ int setresuid(uid_t ruid, uid_t euid, uid_t suid) { - if (libc.rsyscall) return libc.rsyscall(__NR_setresuid, ruid, euid, suid, 0, 0, 0); - return syscall(SYS_setresuid, ruid, euid, suid); + return __rsyscall(__NR_setresuid, ruid, euid, suid, 0, 0, 0); } diff --git a/src/unistd/setreuid.c b/src/unistd/setreuid.c index 505e8bc1..d926454a 100644 --- a/src/unistd/setreuid.c +++ b/src/unistd/setreuid.c @@ -4,6 +4,5 @@ int setreuid(uid_t ruid, uid_t euid) { - if (libc.rsyscall) return libc.rsyscall(__NR_setreuid, ruid, euid, 0, 0, 0, 0); - return syscall(SYS_setreuid, ruid, euid); + return __rsyscall(__NR_setreuid, ruid, euid, 0, 0, 0, 0); } diff --git a/src/unistd/setuid.c b/src/unistd/setuid.c index 61e8be55..da6816de 100644 --- a/src/unistd/setuid.c +++ b/src/unistd/setuid.c @@ -4,6 +4,5 @@ int setuid(uid_t uid) { - if (libc.rsyscall) return libc.rsyscall(__NR_setuid, uid, 0, 0, 0, 0, 0); - return syscall(SYS_setuid, uid); + return __rsyscall(__NR_setuid, uid, 0, 0, 0, 0, 0); } -- 2.25.1