From 8f11e6127fe93093f81a52b15bb1537edc3fc8af Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Fri, 15 Feb 2019 22:29:01 -0500 Subject: [PATCH] track all live threads in an AS-safe, fully-consistent linked list the hard problem here is unlinking threads from a list when they exit without creating a window of inconsistency where the kernel task for a thread still exists and is still executing instructions in userspace, but is not reflected in the list. the magic solution here is getting rid of per-thread exit futex addresses (set_tid_address), and instead using the exit futex to unlock the global thread list. since pthread_join can no longer see the thread enter a detach_state of EXITED (which depended on the exit futex address pointing to the detach_state), it must now observe the unlocking of the thread list lock before it can unmap the joined thread and return. it doesn't actually have to take the lock. for this, a __tl_sync primitive is offered, with a signature that will allow it to be enhanced for quick return even under contention on the lock, if needed. for now, the exiting thread always performs a futex wake on its detach_state. a future change could optimize this out except when there is already a joiner waiting. initial/dynamic variants of detached state no longer need to be tracked separately, since the futex address is always set to the global list lock, not a thread-local address that could become invalid on detached thread exit. all detached threads, however, must perform a second sigprocmask syscall to block implementation-internal signals, since locking the thread list with them already blocked is not permissible. the arch-independent C version of __unmapself no longer needs to take a lock or setup its own futex address to release the lock, since it must necessarily be called with the thread list lock already held, guaranteeing exclusive access to the temporary stack. changes to libc.threads_minus_1 no longer need to be atomic, since they are guarded by the thread list lock. it is largely vestigial at this point, and can be replaced with a cheaper boolean indicating whether the process is multithreaded at some point in the future. --- src/env/__init_tls.c | 5 +- src/internal/pthread_impl.h | 12 +++-- src/process/fork.c | 1 + src/thread/__unmapself.c | 5 -- src/thread/pthread_create.c | 103 +++++++++++++++++++++++++----------- src/thread/pthread_detach.c | 2 +- src/thread/pthread_join.c | 9 +++- 7 files changed, 94 insertions(+), 43 deletions(-) diff --git a/src/env/__init_tls.c b/src/env/__init_tls.c index 842886f6..f1874f2a 100644 --- a/src/env/__init_tls.c +++ b/src/env/__init_tls.c @@ -8,6 +8,8 @@ #include "atomic.h" #include "syscall.h" +volatile int __thread_list_lock; + int __init_tp(void *p) { pthread_t td = p; @@ -16,9 +18,10 @@ int __init_tp(void *p) if (r < 0) return -1; if (!r) libc.can_do_threads = 1; td->detach_state = DT_JOINABLE; - td->tid = __syscall(SYS_set_tid_address, &td->detach_state); + td->tid = __syscall(SYS_set_tid_address, &__thread_list_lock); td->locale = &libc.global_locale; td->robust_list.head = &td->robust_list.head; + td->next = td->prev = td; return 0; } diff --git a/src/internal/pthread_impl.h b/src/internal/pthread_impl.h index c677f7f6..508b40b5 100644 --- a/src/internal/pthread_impl.h +++ b/src/internal/pthread_impl.h @@ -18,7 +18,7 @@ struct pthread { * internal (accessed via asm) ABI. Do not change. */ struct pthread *self; uintptr_t *dtv; - void *unused1, *unused2; + struct pthread *prev, *next; /* non-ABI */ uintptr_t sysinfo; uintptr_t canary, canary2; @@ -56,11 +56,9 @@ struct pthread { }; enum { - DT_EXITED = 0, - DT_EXITING, + DT_EXITING = 0, DT_JOINABLE, DT_DETACHED, - DT_DYNAMIC, }; struct __timer { @@ -173,6 +171,12 @@ hidden void __acquire_ptc(void); hidden void __release_ptc(void); hidden void __inhibit_ptc(void); +hidden void __tl_lock(void); +hidden void __tl_unlock(void); +hidden void __tl_sync(pthread_t); + +extern hidden volatile int __thread_list_lock; + extern hidden unsigned __default_stacksize; extern hidden unsigned __default_guardsize; diff --git a/src/process/fork.c b/src/process/fork.c index da074ae9..11286ef4 100644 --- a/src/process/fork.c +++ b/src/process/fork.c @@ -27,6 +27,7 @@ pid_t fork(void) self->tid = __syscall(SYS_gettid); self->robust_list.off = 0; self->robust_list.pending = 0; + self->next = self->prev = self; libc.threads_minus_1 = 0; } __restore_sigs(&set); diff --git a/src/thread/__unmapself.c b/src/thread/__unmapself.c index 1d3bee1d..31d94e67 100644 --- a/src/thread/__unmapself.c +++ b/src/thread/__unmapself.c @@ -4,7 +4,6 @@ /* cheat and reuse CRTJMP macro from dynlink code */ #include "dynlink.h" -static volatile int lock; static void *unmap_base; static size_t unmap_size; static char shared_stack[256]; @@ -17,12 +16,8 @@ static void do_unmap() void __unmapself(void *base, size_t size) { - int tid=__pthread_self()->tid; char *stack = shared_stack + sizeof shared_stack; stack -= (uintptr_t)stack % 16; - while (lock || a_cas(&lock, 0, tid)) - a_spin(); - __syscall(SYS_set_tid_address, &lock); unmap_base = base; unmap_size = size; CRTJMP(do_unmap, stack); diff --git a/src/thread/pthread_create.c b/src/thread/pthread_create.c index 8761381a..03cdea0a 100644 --- a/src/thread/pthread_create.c +++ b/src/thread/pthread_create.c @@ -16,6 +16,30 @@ weak_alias(dummy_0, __pthread_tsd_run_dtors); weak_alias(dummy_0, __do_orphaned_stdio_locks); weak_alias(dummy_0, __dl_thread_cleanup); +void __tl_lock(void) +{ + if (!a_cas(&__thread_list_lock, 0, 1)) return; + do { + a_cas(&__thread_list_lock, 1, 2); + __futexwait(&__thread_list_lock, 2, 0); + } while (a_cas(&__thread_list_lock, 0, 2)); +} + +void __tl_unlock(void) +{ + if (a_swap(&__thread_list_lock, 0)==2) + __wake(&__thread_list_lock, 1, 0); +} + +void __tl_sync(pthread_t td) +{ + a_barrier(); + if (!__thread_list_lock) return; + a_cas(&__thread_list_lock, 1, 2); + __wait(&__thread_list_lock, 0, 2, 0); + __wake(&__thread_list_lock, 1, 0); +} + _Noreturn void __pthread_exit(void *result) { pthread_t self = __pthread_self(); @@ -40,24 +64,30 @@ _Noreturn void __pthread_exit(void *result) * joinable threads it's a valid usage that must be handled. */ LOCK(self->killlock); - /* Block all signals before decrementing the live thread count. - * This is important to ensure that dynamically allocated TLS - * is not under-allocated/over-committed, and possibly for other - * reasons as well. */ - __block_all_sigs(&set); - - /* It's impossible to determine whether this is "the last thread" - * until performing the atomic decrement, since multiple threads - * could exit at the same time. For the last thread, revert the - * decrement, restore the tid, and unblock signals to give the - * atexit handlers and stdio cleanup code a consistent state. */ - if (a_fetch_add(&libc.threads_minus_1, -1)==0) { - libc.threads_minus_1 = 0; - UNLOCK(self->killlock); + /* The thread list lock must be AS-safe, and thus requires + * application signals to be blocked before it can be taken. */ + __block_app_sigs(&set); + __tl_lock(); + + /* If this is the only thread in the list, don't proceed with + * termination of the thread, but restore the previous lock and + * signal state to prepare for exit to call atexit handlers. */ + if (self->next == self) { + __tl_unlock(); __restore_sigs(&set); + UNLOCK(self->killlock); exit(0); } + /* At this point we are committed to thread termination. Unlink + * the thread from the list. This change will not be visible + * until the lock is released, which only happens after SYS_exit + * has been called, via the exit futex address pointing at the lock. */ + libc.threads_minus_1--; + self->next->prev = self->prev; + self->prev->next = self->next; + self->prev = self->next = self; + /* Process robust list in userspace to handle non-pshared mutexes * and the detached thread case where the robust list head will * be invalid when the kernel would process it. */ @@ -84,15 +114,11 @@ _Noreturn void __pthread_exit(void *result) * call; the loser is responsible for freeing thread resources. */ int state = a_cas(&self->detach_state, DT_JOINABLE, DT_EXITING); - if (state>=DT_DETACHED && self->map_base) { - /* Detached threads must avoid the kernel clear_child_tid - * feature, since the virtual address will have been - * unmapped and possibly already reused by a new mapping - * at the time the kernel would perform the write. In - * the case of threads that started out detached, the - * initial clone flags are correct, but if the thread was - * detached later, we need to clear it here. */ - if (state == DT_DYNAMIC) __syscall(SYS_set_tid_address, 0); + if (state==DT_DETACHED && self->map_base) { + /* Detached threads must block even implementation-internal + * signals, since they will not have a stack in their last + * moments of existence. */ + __block_all_sigs(&set); /* Robust list will no longer be valid, and was already * processed above, so unregister it with the kernel. */ @@ -108,6 +134,9 @@ _Noreturn void __pthread_exit(void *result) __unmapself(self->map_base, self->map_size); } + /* Wake any joiner. */ + __wake(&self->detach_state, 1, 1); + /* After the kernel thread exits, its tid may be reused. Clear it * to prevent inadvertent use and inform functions that would use * it that it's no longer available. */ @@ -147,7 +176,7 @@ static int start(void *p) if (a_swap(args->perr, ret)==-2) __wake(args->perr, 1, 1); if (ret) { - self->detach_state = DT_DYNAMIC; + self->detach_state = DT_DETACHED; __pthread_exit(0); } } @@ -273,14 +302,15 @@ int __pthread_create(pthread_t *restrict res, const pthread_attr_t *restrict att new->locale = &libc.global_locale; if (attr._a_detach) { new->detach_state = DT_DETACHED; - flags -= CLONE_CHILD_CLEARTID; } else { new->detach_state = DT_JOINABLE; } new->robust_list.head = &new->robust_list.head; new->CANARY = self->CANARY; - /* Setup argument structure for the new thread on its stack. */ + /* Setup argument structure for the new thread on its stack. + * It's safe to access from the caller only until the thread + * list is unlocked. */ stack -= (uintptr_t)stack % sizeof(uintptr_t); stack -= sizeof(struct start_args); struct start_args *args = (void *)stack; @@ -294,6 +324,9 @@ int __pthread_create(pthread_t *restrict res, const pthread_attr_t *restrict att args->perr = 0; } + /* Application signals (but not the synccall signal) must be + * blocked before the thread list lock can be taken, to ensure + * that the lock is AS-safe. */ __block_app_sigs(&set); /* Ensure SIGCANCEL is unblocked in new thread. This requires @@ -303,14 +336,24 @@ int __pthread_create(pthread_t *restrict res, const pthread_attr_t *restrict att args->sig_mask[(SIGCANCEL-1)/8/sizeof(long)] &= ~(1UL<<((SIGCANCEL-1)%(8*sizeof(long)))); - a_inc(&libc.threads_minus_1); - ret = __clone((c11 ? start_c11 : start), stack, flags, args, &new->tid, TP_ADJ(new), &new->detach_state); - + __tl_lock(); + libc.threads_minus_1++; + ret = __clone((c11 ? start_c11 : start), stack, flags, args, &new->tid, TP_ADJ(new), &__thread_list_lock); + + /* If clone succeeded, new thread must be linked on the thread + * list before unlocking it, even if scheduling may still fail. */ + if (ret >= 0) { + new->next = self->next; + new->prev = self; + new->next->prev = new; + new->prev->next = new; + } + __tl_unlock(); __restore_sigs(&set); __release_ptc(); if (ret < 0) { - a_dec(&libc.threads_minus_1); + libc.threads_minus_1--; if (map) __munmap(map, size); return EAGAIN; } diff --git a/src/thread/pthread_detach.c b/src/thread/pthread_detach.c index 16b0552d..77772af2 100644 --- a/src/thread/pthread_detach.c +++ b/src/thread/pthread_detach.c @@ -5,7 +5,7 @@ static int __pthread_detach(pthread_t t) { /* If the cas fails, detach state is either already-detached * or exiting/exited, and pthread_join will trap or cleanup. */ - if (a_cas(&t->detach_state, DT_JOINABLE, DT_DYNAMIC) != DT_JOINABLE) + if (a_cas(&t->detach_state, DT_JOINABLE, DT_DETACHED) != DT_JOINABLE) return __pthread_join(t, 0); return 0; } diff --git a/src/thread/pthread_join.c b/src/thread/pthread_join.c index 54d81039..b8813e02 100644 --- a/src/thread/pthread_join.c +++ b/src/thread/pthread_join.c @@ -1,6 +1,11 @@ #include "pthread_impl.h" #include +static void dummy1(pthread_t t) +{ +} +weak_alias(dummy1, __tl_sync); + static int __pthread_timedjoin_np(pthread_t t, void **res, const struct timespec *at) { int state, cs, r = 0; @@ -9,11 +14,11 @@ static int __pthread_timedjoin_np(pthread_t t, void **res, const struct timespec if (cs == PTHREAD_CANCEL_ENABLE) __pthread_setcancelstate(cs, 0); while ((state = t->detach_state) && r != ETIMEDOUT && r != EINVAL) { if (state >= DT_DETACHED) a_crash(); - r = __timedwait_cp(&t->detach_state, state, CLOCK_REALTIME, at, 0); + r = __timedwait_cp(&t->detach_state, state, CLOCK_REALTIME, at, 1); } __pthread_setcancelstate(cs, 0); if (r == ETIMEDOUT || r == EINVAL) return r; - a_barrier(); + __tl_sync(t); if (res) *res = t->result; if (t->map_base) __munmap(t->map_base, t->map_size); return 0; -- 2.25.1