add membarrier syscall wrapper, refactor dynamic tls install to use it
authorRich Felker <dalias@aerifal.cx>
Fri, 22 Feb 2019 07:56:10 +0000 (02:56 -0500)
committerRich Felker <dalias@aerifal.cx>
Fri, 22 Feb 2019 08:25:39 +0000 (03:25 -0500)
the motivation for this change is twofold. first, it gets the fallback
logic out of the dynamic linker, improving code readability and
organization. second, it provides application code that wants to use
the membarrier syscall, which depends on preregistration of intent
before the process becomes multithreaded unless unbounded latency is
acceptable, with a symbol that, when linked, ensures that this
registration happens.

include/sys/membarrier.h [new file with mode: 0644]
ldso/dynlink.c
src/include/sys/membarrier.h [new file with mode: 0644]
src/internal/pthread_impl.h
src/linux/membarrier.c [new file with mode: 0644]
src/thread/pthread_create.c

diff --git a/include/sys/membarrier.h b/include/sys/membarrier.h
new file mode 100644 (file)
index 0000000..10cb310
--- /dev/null
@@ -0,0 +1,17 @@
+#ifndef _SYS_MEMBARRIER_H
+#define _SYS_MEMBARRIER_H
+
+#define MEMBARRIER_CMD_QUERY 0
+#define MEMBARRIER_CMD_GLOBAL 1
+#define MEMBARRIER_CMD_GLOBAL_EXPEDITED 2
+#define MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED 4
+#define MEMBARRIER_CMD_PRIVATE_EXPEDITED 8
+#define MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED 16
+#define MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE 32
+#define MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE 64
+
+#define MEMBARRIER_CMD_SHARED MEMBARRIER_CMD_GLOBAL
+
+int membarrier(int, int);
+
+#endif
index c7d9dd393d01ad7fd311a17ea88e7fe0cab9c87f..025ed1b0c81eae20d5b01593393c17931b23d944 100644 (file)
@@ -18,6 +18,7 @@
 #include <ctype.h>
 #include <dlfcn.h>
 #include <semaphore.h>
+#include <sys/membarrier.h>
 #include "pthread_impl.h"
 #include "libc.h"
 #include "dynlink.h"
@@ -1351,18 +1352,6 @@ static void update_tls_size()
        tls_align);
 }
 
-void __dl_prepare_for_threads(void)
-{
-       /* MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED */
-       __syscall(SYS_membarrier, 1<<4, 0);
-}
-
-static sem_t barrier_sem;
-static void bcast_barrier(int s)
-{
-       sem_post(&barrier_sem);
-}
-
 static void install_new_tls(void)
 {
        sigset_t set;
@@ -1397,26 +1386,11 @@ static void install_new_tls(void)
        }
 
        /* Broadcast barrier to ensure contents of new dtv is visible
-        * if the new dtv pointer is. Use SYS_membarrier if it works,
-        * otherwise emulate with a signal. */
-
-       /* MEMBARRIER_CMD_PRIVATE_EXPEDITED */
-       if (__syscall(SYS_membarrier, 1<<3, 0)) {
-               sem_init(&barrier_sem, 0, 0);
-               struct sigaction sa = {
-                       .sa_flags = SA_RESTART,
-                       .sa_handler = bcast_barrier
-               };
-               memset(&sa.sa_mask, -1, sizeof sa.sa_mask);
-               __libc_sigaction(SIGSYNCCALL, &sa, 0);  
-               for (td=self->next; td!=self; td=td->next)
-                       __syscall(SYS_tkill, td->tid, SIGSYNCCALL);
-               for (td=self->next; td!=self; td=td->next)
-                       sem_wait(&barrier_sem);
-               sa.sa_handler = SIG_IGN;
-               __libc_sigaction(SIGSYNCCALL, &sa, 0);
-               sem_destroy(&barrier_sem);
-       }
+        * if the new dtv pointer is. The __membarrier function has a
+        * fallback emulation using signals for kernels that lack the
+        * feature at the syscall level. */
+
+       __membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED, 0);
 
        /* Install new dtv for each thread. */
        for (j=0, td=self; !j || td!=self; j++, td=td->next) {
diff --git a/src/include/sys/membarrier.h b/src/include/sys/membarrier.h
new file mode 100644 (file)
index 0000000..3654491
--- /dev/null
@@ -0,0 +1,9 @@
+#ifndef SYS_MEMBARRIER_H
+#define SYS_MEMBARRIER_H
+
+#include "../../../include/sys/membarrier.h"
+#include <features.h>
+
+hidden int __membarrier(int, int);
+
+#endif
index de089967630dbaaad39a8e93e1907802a2538d22..9b001421200f59a0e276972ad7d40302583469b6 100644 (file)
@@ -130,7 +130,7 @@ hidden int __init_tp(void *);
 hidden void *__copy_tls(unsigned char *);
 hidden void __reset_tls();
 
-hidden void __dl_prepare_for_threads(void);
+hidden void __membarrier_init(void);
 hidden void __dl_thread_cleanup(void);
 hidden void __testcancel();
 hidden void __do_cleanup_push(struct __ptcb *);
diff --git a/src/linux/membarrier.c b/src/linux/membarrier.c
new file mode 100644 (file)
index 0000000..26d143e
--- /dev/null
@@ -0,0 +1,76 @@
+#include <sys/membarrier.h>
+#include <semaphore.h>
+#include <signal.h>
+#include <string.h>
+#include "pthread_impl.h"
+#include "syscall.h"
+
+static void dummy_0(void)
+{
+}
+
+static void dummy_1(pthread_t t)
+{
+}
+
+weak_alias(dummy_0, __tl_lock);
+weak_alias(dummy_0, __tl_unlock);
+weak_alias(dummy_1, __tl_sync);
+
+static sem_t barrier_sem;
+
+static void bcast_barrier(int s)
+{
+       sem_post(&barrier_sem);
+}
+
+int __membarrier(int cmd, int flags)
+{
+       int r = __syscall(SYS_membarrier, cmd, flags);
+       /* Emulate the private expedited command, which is needed by the
+        * dynamic linker for installation of dynamic TLS, for older
+        * kernels that lack the syscall. Unlike the syscall, this only
+        * synchronizes with threads of the process, not other processes
+        * sharing the VM, but such sharing is not a supported usage
+        * anyway. */
+       if (r && cmd == MEMBARRIER_CMD_PRIVATE_EXPEDITED && !flags) {
+               pthread_t self=__pthread_self(), td;
+               sigset_t set;
+               __block_app_sigs(&set);
+               __tl_lock();
+               sem_init(&barrier_sem, 0, 0);
+               struct sigaction sa = {
+                       .sa_flags = SA_RESTART,
+                       .sa_handler = bcast_barrier
+               };
+               memset(&sa.sa_mask, -1, sizeof sa.sa_mask);
+               __libc_sigaction(SIGSYNCCALL, &sa, 0);  
+               for (td=self->next; td!=self; td=td->next)
+                       __syscall(SYS_tkill, td->tid, SIGSYNCCALL);
+               for (td=self->next; td!=self; td=td->next)
+                       sem_wait(&barrier_sem);
+               sa.sa_handler = SIG_IGN;
+               __libc_sigaction(SIGSYNCCALL, &sa, 0);
+               sem_destroy(&barrier_sem);
+               __tl_unlock();
+               __restore_sigs(&set);
+               return 0;
+       }
+       return __syscall_ret(r);
+}
+
+void __membarrier_init(void)
+{
+       /* If membarrier is linked, attempt to pre-register to be able to use
+        * the private expedited command before the process becomes multi-
+        * threaded, since registering later has bad, potentially unbounded
+        * latency. This syscall should be essentially free, and it's arguably
+        * a mistake in the API design that registration was even required.
+        * For other commands, registration may impose some cost, so it's left
+        * to the application to do so if desired. Unfortunately this means
+        * library code initialized after the process becomes multi-threaded
+        * cannot use these features without accepting registration latency. */
+       __syscall(SYS_membarrier, MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED, 0);
+}
+
+weak_alias(__membarrier, membarrier);
index 54c035545003bf98c8d4d8197484a5295f8e2e10..7d4dc2ed5f4ad5c5a1bbdda806fada9e017f5992 100644 (file)
@@ -15,7 +15,7 @@ weak_alias(dummy_0, __release_ptc);
 weak_alias(dummy_0, __pthread_tsd_run_dtors);
 weak_alias(dummy_0, __do_orphaned_stdio_locks);
 weak_alias(dummy_0, __dl_thread_cleanup);
-weak_alias(dummy_0, __dl_prepare_for_threads);
+weak_alias(dummy_0, __membarrier_init);
 
 static int tl_lock_count;
 static int tl_lock_waiters;
@@ -246,7 +246,7 @@ int __pthread_create(pthread_t *restrict res, const pthread_attr_t *restrict att
                init_file_lock(__stderr_used);
                __syscall(SYS_rt_sigprocmask, SIG_UNBLOCK, SIGPT_SET, 0, _NSIG/8);
                self->tsd = (void **)__pthread_tsd_main;
-               __dl_prepare_for_threads();
+               __membarrier_init();
                libc.threaded = 1;
        }
        if (attrp && !c11) attr = *attrp;