From 78a8ef47c4d92b7680c52a85f80a81e29da86bb9 Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Thu, 15 Jan 2015 23:17:38 -0500 Subject: [PATCH] overhaul __synccall and fix AS-safety and other issues in set*id multi-threaded set*id and setrlimit use the internal __synccall function to work around the kernel's wrongful treatment of these process properties as thread-local. the old implementation of __synccall failed to be AS-safe, despite POSIX requiring setuid and setgid to be AS-safe, and was not rigorous in assuring that all threads were caught. in a worst case, threads late in the process of exiting could retain permissions after setuid reported success, in which case attacks to regain dropped permissions may have been possible under the right conditions. the new implementation of __synccall depends on the presence of /proc/self/task and will fail if it can't be opened, but is able to determine that it has caught all threads, and does not use any locks except its own. it thereby achieves AS-safety simply by blocking signals to preclude re-entry in the same thread. with this commit, all known conformance and safety issues in set*id functions should be fixed. --- src/thread/pthread_create.c | 3 + src/thread/synccall.c | 180 +++++++++++++++++++++++++++--------- 2 files changed, 138 insertions(+), 45 deletions(-) diff --git a/src/thread/pthread_create.c b/src/thread/pthread_create.c index 1a47ed15..64971d56 100644 --- a/src/thread/pthread_create.c +++ b/src/thread/pthread_create.c @@ -139,6 +139,8 @@ weak_alias(dummy, __pthread_tsd_size); static void *dummy_tsd[1] = { 0 }; weak_alias(dummy_tsd, __pthread_tsd_main); +volatile int __block_new_threads = 0; + static FILE *volatile dummy_file = 0; weak_alias(dummy_file, __stdin_used); weak_alias(dummy_file, __stdout_used); @@ -178,6 +180,7 @@ int __pthread_create(pthread_t *restrict res, const pthread_attr_t *restrict att if (attrp && !c11) attr = *attrp; __acquire_ptc(); + if (__block_new_threads) __wait(&__block_new_threads, 0, 1, 1); if (attr._a_stackaddr) { size_t need = libc.tls_size + __pthread_tsd_size; diff --git a/src/thread/synccall.c b/src/thread/synccall.c index c4149904..47d070b4 100644 --- a/src/thread/synccall.c +++ b/src/thread/synccall.c @@ -1,88 +1,178 @@ #include "pthread_impl.h" #include #include +#include +#include +#include +#include "futex.h" +#include "atomic.h" +#include "../dirent/__dirent.h" static struct chain { struct chain *next; - sem_t sem, sem2; -} *head, *cur; + int tid; + sem_t target_sem, caller_sem; +} *volatile head; +static int synccall_lock[2]; +static int target_tid; static void (*callback)(void *), *context; -static int chainlen; -static sem_t chainlock, chaindone; +static volatile int dummy = 0; +weak_alias(dummy, __block_new_threads); -static void handler(int sig, siginfo_t *si, void *ctx) +static void handler(int sig) { struct chain ch; int old_errno = errno; - if (chainlen == libc.threads_minus_1) return; + sem_init(&ch.target_sem, 0, 0); + sem_init(&ch.caller_sem, 0, 0); - sigqueue(getpid(), SIGSYNCCALL, (union sigval){0}); + ch.tid = __syscall(SYS_gettid); - sem_init(&ch.sem, 0, 0); - sem_init(&ch.sem2, 0, 0); + do ch.next = head; + while (a_cas_p(&head, ch.next, &ch) != ch.next); - while (sem_wait(&chainlock)); - ch.next = head; - head = &ch; - if (++chainlen == libc.threads_minus_1) sem_post(&chaindone); - sem_post(&chainlock); + if (a_cas(&target_tid, ch.tid, 0) == (ch.tid | 0x80000000)) + __syscall(SYS_futex, &target_tid, FUTEX_UNLOCK_PI|FUTEX_PRIVATE); - while (sem_wait(&ch.sem)); + sem_wait(&ch.target_sem); callback(context); - sem_post(&ch.sem2); - while (sem_wait(&ch.sem)); + sem_post(&ch.caller_sem); + sem_wait(&ch.target_sem); errno = old_errno; } void __synccall(void (*func)(void *), void *ctx) { - struct sigaction sa; - struct chain *next; sigset_t oldmask; + int cs, i, r, pid, self;; + DIR dir = {0}; + struct dirent *de; + struct sigaction sa = { .sa_flags = 0, .sa_handler = handler }; + struct chain *cp, *next; + struct timespec ts; + + /* Blocking signals in two steps, first only app-level signals + * before taking the lock, then all signals after taking the lock, + * is necessary to achieve AS-safety. Blocking them all first would + * deadlock if multiple threads called __synccall. Waiting to block + * any until after the lock would allow re-entry in the same thread + * with the lock already held. */ + __block_app_sigs(&oldmask); + LOCK(synccall_lock); + __block_all_sigs(0); + pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &cs); - if (!libc.threads_minus_1) { - func(ctx); - return; - } - - __inhibit_ptc(); + head = 0; - __block_all_sigs(&oldmask); + if (!libc.threaded) goto single_threaded; - sem_init(&chaindone, 0, 0); - sem_init(&chainlock, 0, 1); - chainlen = 0; - head = 0; callback = func; context = ctx; - sa.sa_flags = SA_SIGINFO | SA_RESTART; - sa.sa_sigaction = handler; - sigfillset(&sa.sa_mask); + /* This atomic store ensures that any signaled threads will see the + * above stores, and prevents more than a bounded number of threads, + * those already in pthread_create, from creating new threads until + * the value is cleared to zero again. */ + a_store(&__block_new_threads, 1); + + /* Block even implementation-internal signals, so that nothing + * interrupts the SIGSYNCCALL handlers. The main possible source + * of trouble is asynchronous cancellation. */ + memset(&sa.sa_mask, -1, sizeof sa.sa_mask); __libc_sigaction(SIGSYNCCALL, &sa, 0); - sigqueue(getpid(), SIGSYNCCALL, (union sigval){0}); - while (sem_wait(&chaindone)); + pid = __syscall(SYS_getpid); + self = __syscall(SYS_gettid); + + /* Since opendir is not AS-safe, the DIR needs to be setup manually + * in automatic storage. Thankfully this is easy. */ + dir.fd = open("/proc/self/task", O_RDONLY|O_DIRECTORY|O_CLOEXEC); + if (dir.fd < 0) goto out; + + /* Initially send one signal per counted thread. But since we can't + * synchronize with thread creation/exit here, there could be too + * few signals. This initial signaling is just an optimization, not + * part of the logic. */ + for (i=libc.threads_minus_1; i; i--) + __syscall(SYS_kill, pid, SIGSYNCCALL); + + /* Loop scanning the kernel-provided thread list until it shows no + * threads that have not already replied to the signal. */ + for (;;) { + int miss_cnt = 0; + while ((de = readdir(&dir))) { + if (!isdigit(de->d_name[0])) continue; + int tid = atoi(de->d_name); + if (tid == self || !tid) continue; + + /* Set the target thread as the PI futex owner before + * checking if it's in the list of caught threads. If it + * adds itself to the list after we check for it, then + * it will see its own tid in the PI futex and perform + * the unlock operation. */ + a_store(&target_tid, tid); + + /* Thread-already-caught is a success condition. */ + for (cp = head; cp && cp->tid != tid; cp=cp->next); + if (cp) continue; + + r = -__syscall(SYS_tgkill, pid, tid, SIGSYNCCALL); + + /* Target thread exit is a success condition. */ + if (r == ESRCH) continue; + + /* The FUTEX_LOCK_PI operation is used to loan priority + * to the target thread, which otherwise may be unable + * to run. Timeout is necessary because there is a race + * condition where the tid may be reused by a different + * process. */ + clock_gettime(CLOCK_REALTIME, &ts); + ts.tv_nsec += 10000000; + if (ts.tv_nsec >= 1000000000) { + ts.tv_sec++; + ts.tv_nsec -= 1000000000; + } + r = -__syscall(SYS_futex, &target_tid, + FUTEX_LOCK_PI|FUTEX_PRIVATE, 0, &ts); + + /* Obtaining the lock means the thread responded. ESRCH + * means the target thread exited, which is okay too. */ + if (!r || r == ESRCH) continue; + + miss_cnt++; + } + if (!miss_cnt) break; + rewinddir(&dir); + } + close(dir.fd); + + /* Serialize execution of callback in caught threads. */ + for (cp=head; cp; cp=cp->next) { + sem_post(&cp->target_sem); + sem_wait(&cp->caller_sem); + } - sa.sa_flags = 0; sa.sa_handler = SIG_IGN; __libc_sigaction(SIGSYNCCALL, &sa, 0); - for (cur=head; cur; cur=cur->next) { - sem_post(&cur->sem); - while (sem_wait(&cur->sem2)); - } +single_threaded: func(ctx); - for (cur=head; cur; cur=next) { - next = cur->next; - sem_post(&cur->sem); + /* Only release the caught threads once all threads, including the + * caller, have returned from the callback function. */ + for (cp=head; cp; cp=next) { + next = cp->next; + sem_post(&cp->target_sem); } - __restore_sigs(&oldmask); +out: + a_store(&__block_new_threads, 0); + __wake(&__block_new_threads, -1, 1); - __release_ptc(); + pthread_setcancelstate(cs, 0); + UNLOCK(synccall_lock); + __restore_sigs(&oldmask); } -- 2.25.1