From 4750cf4202c29a895639b89099a7bdbe9ae422b6 Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Tue, 24 Apr 2012 16:32:23 -0400 Subject: [PATCH] ditch the priority inheritance locks; use malloc's version of lock i did some testing trying to switch malloc to use the new internal lock with priority inheritance, and my malloc contention test got 20-100 times slower. if priority inheritance futexes are this slow, it's simply too high a price to pay for avoiding priority inversion. maybe we can consider them somewhere down the road once the kernel folks get their act together on this (and perferably don't link it to glibc's inefficient lock API)... as such, i've switch __lock to use malloc's implementation of lightweight locks, and updated all the users of the code to use an array with a waiter count for their locks. this should give optimal performance in the vast majority of cases, and it's simple. malloc is still using its own internal copy of the lock code because it seems to yield measurably better performance with -O3 when it's inlined (20% or more difference in the contention stress test). --- src/dirent/__dirent.h | 2 +- src/dirent/readdir.c | 5 +---- src/dirent/readdir_r.c | 6 +++--- src/dirent/rewinddir.c | 4 ++-- src/dirent/seekdir.c | 4 ++-- src/exit/atexit.c | 14 +++++++------- src/exit/exit.c | 4 ++-- src/internal/libc.h | 2 +- src/internal/stdio_impl.h | 4 ++-- src/malloc/lite_malloc.c | 8 ++++---- src/misc/syslog.c | 18 +++++++++--------- src/prng/random.c | 18 +++++++++--------- src/thread/__lock.c | 26 +++----------------------- src/thread/pthread_atfork.c | 10 +++++----- src/time/tzset.c | 6 +++--- 15 files changed, 54 insertions(+), 77 deletions(-) diff --git a/src/dirent/__dirent.h b/src/dirent/__dirent.h index 07b3ee68..38a27b06 100644 --- a/src/dirent/__dirent.h +++ b/src/dirent/__dirent.h @@ -1,9 +1,9 @@ struct __DIR_s { - int lock; int fd; off_t tell; int buf_pos; int buf_end; + int lock[2]; char buf[2048]; }; diff --git a/src/dirent/readdir.c b/src/dirent/readdir.c index 1aeb25a5..2d27d29a 100644 --- a/src/dirent/readdir.c +++ b/src/dirent/readdir.c @@ -16,10 +16,7 @@ struct dirent *readdir(DIR *dir) if (dir->buf_pos >= dir->buf_end) { int len = __getdents(dir->fd, (void *)dir->buf, sizeof dir->buf); - if (len < 0) { - dir->lock = 0; - return NULL; - } else if (len == 0) return 0; + if (len <= 0) return 0; dir->buf_end = len; dir->buf_pos = 0; } diff --git a/src/dirent/readdir_r.c b/src/dirent/readdir_r.c index 58f60325..d3d7c608 100644 --- a/src/dirent/readdir_r.c +++ b/src/dirent/readdir_r.c @@ -11,18 +11,18 @@ int readdir_r(DIR *dir, struct dirent *buf, struct dirent **result) int errno_save = errno; int ret; - LOCK(&dir->lock); + LOCK(dir->lock); errno = 0; de = readdir(dir); if ((ret = errno)) { - UNLOCK(&dir->lock); + UNLOCK(dir->lock); return ret; } errno = errno_save; if (de) memcpy(buf, de, de->d_reclen); else buf = NULL; - UNLOCK(&dir->lock); + UNLOCK(dir->lock); *result = buf; return 0; } diff --git a/src/dirent/rewinddir.c b/src/dirent/rewinddir.c index c6138f7c..f2053008 100644 --- a/src/dirent/rewinddir.c +++ b/src/dirent/rewinddir.c @@ -5,9 +5,9 @@ void rewinddir(DIR *dir) { - LOCK(&dir->lock); + LOCK(dir->lock); lseek(dir->fd, 0, SEEK_SET); dir->buf_pos = dir->buf_end = 0; dir->tell = 0; - UNLOCK(&dir->lock); + UNLOCK(dir->lock); } diff --git a/src/dirent/seekdir.c b/src/dirent/seekdir.c index 81a0e331..5be47d4a 100644 --- a/src/dirent/seekdir.c +++ b/src/dirent/seekdir.c @@ -5,8 +5,8 @@ void seekdir(DIR *dir, long off) { - LOCK(&dir->lock); + LOCK(dir->lock); dir->tell = lseek(dir->fd, off, SEEK_SET); dir->buf_pos = dir->buf_end = 0; - UNLOCK(&dir->lock); + UNLOCK(dir->lock); } diff --git a/src/exit/atexit.c b/src/exit/atexit.c index 9d9c2fbe..1b40cb9b 100644 --- a/src/exit/atexit.c +++ b/src/exit/atexit.c @@ -14,22 +14,22 @@ static struct fl void *a[COUNT]; } builtin, *head; -static int lock; +static int lock[2]; void __funcs_on_exit() { int i; void (*func)(void *), *arg; - LOCK(&lock); + LOCK(lock); for (; head; head=head->next) { for (i=COUNT-1; i>=0 && !head->f[i]; i--); if (i<0) continue; func = head->f[i]; arg = head->a[i]; head->f[i] = 0; - UNLOCK(&lock); + UNLOCK(lock); func(arg); - LOCK(&lock); + LOCK(lock); } } @@ -41,7 +41,7 @@ int __cxa_atexit(void (*func)(void *), void *arg, void *dso) { int i; - LOCK(&lock); + LOCK(lock); /* Defer initialization of head so it can be in BSS */ if (!head) head = &builtin; @@ -50,7 +50,7 @@ int __cxa_atexit(void (*func)(void *), void *arg, void *dso) if (head->f[COUNT-1]) { struct fl *new_fl = calloc(sizeof(struct fl), 1); if (!new_fl) { - UNLOCK(&lock); + UNLOCK(lock); return -1; } new_fl->next = head; @@ -62,7 +62,7 @@ int __cxa_atexit(void (*func)(void *), void *arg, void *dso) head->f[i] = func; head->a[i] = arg; - UNLOCK(&lock); + UNLOCK(lock); return 0; } diff --git a/src/exit/exit.c b/src/exit/exit.c index 1ff19dbe..ae557c09 100644 --- a/src/exit/exit.c +++ b/src/exit/exit.c @@ -13,10 +13,10 @@ weak_alias(dummy, __fflush_on_exit); void exit(int code) { - static int lock; + static int lock[2]; /* If more than one thread calls exit, hang until _Exit ends it all */ - LOCK(&lock); + LOCK(lock); /* Only do atexit & stdio flush if they were actually used */ __funcs_on_exit(); diff --git a/src/internal/libc.h b/src/internal/libc.h index d6797f90..78fca47f 100644 --- a/src/internal/libc.h +++ b/src/internal/libc.h @@ -15,7 +15,7 @@ struct __libc { volatile int threads_minus_1; int canceldisable; FILE *ofl_head; - int ofl_lock; + int ofl_lock[2]; }; diff --git a/src/internal/stdio_impl.h b/src/internal/stdio_impl.h index af7aacc8..a1f31b3c 100644 --- a/src/internal/stdio_impl.h +++ b/src/internal/stdio_impl.h @@ -88,8 +88,8 @@ int __putc_unlocked(int, FILE *); FILE *__fdopen(int, const char *); -#define OFLLOCK() LOCK(&libc.ofl_lock) -#define OFLUNLOCK() UNLOCK(&libc.ofl_lock) +#define OFLLOCK() LOCK(libc.ofl_lock) +#define OFLUNLOCK() UNLOCK(libc.ofl_lock) #define feof(f) ((f)->flags & F_EOF) #define ferror(f) ((f)->flags & F_ERR) diff --git a/src/malloc/lite_malloc.c b/src/malloc/lite_malloc.c index c8293908..673966a1 100644 --- a/src/malloc/lite_malloc.c +++ b/src/malloc/lite_malloc.c @@ -12,7 +12,7 @@ void *__simple_malloc(size_t n) { static uintptr_t cur, brk; uintptr_t base, new; - static int lock; + static int lock[2]; size_t align=1; if (!n) n++; @@ -22,7 +22,7 @@ void *__simple_malloc(size_t n) align += align; n = n + align - 1 & -align; - LOCK(&lock); + LOCK(lock); if (!cur) cur = brk = __brk(0)+16; base = cur + align-1 & -align; if (n > SIZE_MAX - PAGE_SIZE - base) goto fail; @@ -32,12 +32,12 @@ void *__simple_malloc(size_t n) brk = new; } cur = base+n; - UNLOCK(&lock); + UNLOCK(lock); return (void *)base; fail: - UNLOCK(&lock); + UNLOCK(lock); toobig: errno = ENOMEM; return 0; diff --git a/src/misc/syslog.c b/src/misc/syslog.c index cbe65209..a4f36dee 100644 --- a/src/misc/syslog.c +++ b/src/misc/syslog.c @@ -10,7 +10,7 @@ #include #include "libc.h" -static int lock; +static int lock[2]; static const char *log_ident; static int log_opt; static int log_facility = LOG_USER; @@ -36,10 +36,10 @@ void closelog(void) { int cs; pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &cs); - LOCK(&lock); + LOCK(lock); close(log_fd); log_fd = -1; - UNLOCK(&lock); + UNLOCK(lock); pthread_setcancelstate(cs, 0); } @@ -59,9 +59,9 @@ void openlog(const char *ident, int opt, int facility) { int cs; pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &cs); - LOCK(&lock); + LOCK(lock); __openlog(ident, opt, facility); - UNLOCK(&lock); + UNLOCK(lock); pthread_setcancelstate(cs, 0); } @@ -77,7 +77,7 @@ static void _vsyslog(int priority, const char *message, va_list ap) if (log_fd < 0) { __openlog(log_ident, log_opt | LOG_NDELAY, log_facility); if (log_fd < 0) { - UNLOCK(&lock); + UNLOCK(lock); return; } } @@ -98,7 +98,7 @@ static void _vsyslog(int priority, const char *message, va_list ap) sendto(log_fd, buf, l, 0, (void *)&log_addr, 11); } - UNLOCK(&lock); + UNLOCK(lock); } void __vsyslog(int priority, const char *message, va_list ap) @@ -106,9 +106,9 @@ void __vsyslog(int priority, const char *message, va_list ap) int cs; if (!(log_mask & LOG_MASK(priority&7)) || (priority&~0x3ff)) return; pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &cs); - LOCK(&lock); + LOCK(lock); _vsyslog(priority, message, ap); - UNLOCK(&lock); + UNLOCK(lock); pthread_setcancelstate(cs, 0); } diff --git a/src/prng/random.c b/src/prng/random.c index cc5702ed..4ad62058 100644 --- a/src/prng/random.c +++ b/src/prng/random.c @@ -33,7 +33,7 @@ static int n = 31; static int i = 3; static int j = 0; static uint32_t *x = init+1; -static int lock; +static int lock[2]; static uint32_t lcg31(uint32_t x) { return (1103515245*x + 12345) & 0x7fffffff; @@ -74,9 +74,9 @@ static void __srandom(unsigned seed) { } void srandom(unsigned seed) { - LOCK(&lock); + LOCK(lock); __srandom(seed); - UNLOCK(&lock); + UNLOCK(lock); } char *initstate(unsigned seed, char *state, size_t size) { @@ -84,7 +84,7 @@ char *initstate(unsigned seed, char *state, size_t size) { if (size < 8) return 0; - LOCK(&lock); + LOCK(lock); old = savestate(); if (size < 32) n = 0; @@ -98,24 +98,24 @@ char *initstate(unsigned seed, char *state, size_t size) { n = 63; x = (uint32_t*)state + 1; __srandom(seed); - UNLOCK(&lock); + UNLOCK(lock); return old; } char *setstate(char *state) { void *old; - LOCK(&lock); + LOCK(lock); old = savestate(); loadstate((uint32_t*)state); - UNLOCK(&lock); + UNLOCK(lock); return old; } long random(void) { long k; - LOCK(&lock); + LOCK(lock); if (n == 0) { k = x[0] = lcg31(x[0]); goto end; @@ -127,6 +127,6 @@ long random(void) { if (++j == n) j = 0; end: - UNLOCK(&lock); + UNLOCK(lock); return k; } diff --git a/src/thread/__lock.c b/src/thread/__lock.c index 5ba5dc5e..2f345ae7 100644 --- a/src/thread/__lock.c +++ b/src/thread/__lock.c @@ -1,32 +1,12 @@ #include "pthread_impl.h" -void __lock_2(volatile int *l) -{ - if (!__syscall(SYS_futex, l, FUTEX_LOCK_PI, 0, 0)) - return; - int old, tid = __pthread_self()->tid|INT_MIN; - while ((old = a_cas(l, 0, tid))) { - a_cas(l, old, old|INT_MIN); - __syscall(SYS_futex, l, FUTEX_WAIT, old|INT_MIN, 0); - } -} - void __lock(volatile int *l) { - if (a_cas(l, 0, __pthread_self()->tid)) __lock_2(l); -} - -void __unlock_2(volatile int *l) -{ - if (__syscall(SYS_futex, l, FUTEX_UNLOCK_PI)) { - *l = 0; - __syscall(SYS_futex, l, FUTEX_WAKE, 1); - } + while (a_swap(l, 1)) __wait(l, l+1, 1, 1); } void __unlock(volatile int *l) { - int old = *l; - if (!(old & INT_MIN) && a_cas(l, old, 0)==old) return; - __unlock_2(l); + a_store(l, 0); + if (l[1]) __wake(l, 1, 1); } diff --git a/src/thread/pthread_atfork.c b/src/thread/pthread_atfork.c index a7a82016..95fce207 100644 --- a/src/thread/pthread_atfork.c +++ b/src/thread/pthread_atfork.c @@ -8,14 +8,14 @@ static struct atfork_funcs { struct atfork_funcs *prev, *next; } *funcs; -static int lock; +static int lock[2]; void __fork_handler(int who) { struct atfork_funcs *p; if (!funcs) return; if (who < 0) { - LOCK(&lock); + LOCK(lock); for (p=funcs; p; p = p->next) { if (p->prepare) p->prepare(); funcs = p; @@ -26,7 +26,7 @@ void __fork_handler(int who) else if (who && p->child) p->child(); funcs = p; } - UNLOCK(&lock); + UNLOCK(lock); } } @@ -35,7 +35,7 @@ int pthread_atfork(void (*prepare)(void), void (*parent)(void), void (*child)(vo struct atfork_funcs *new = malloc(sizeof *new); if (!new) return -1; - LOCK(&lock); + LOCK(lock); new->next = funcs; new->prev = 0; new->prepare = prepare; @@ -43,6 +43,6 @@ int pthread_atfork(void (*prepare)(void), void (*parent)(void), void (*child)(vo new->child = child; if (funcs) funcs->prev = new; funcs = new; - UNLOCK(&lock); + UNLOCK(lock); return 0; } diff --git a/src/time/tzset.c b/src/time/tzset.c index 0cd47cf2..7e836c2f 100644 --- a/src/time/tzset.c +++ b/src/time/tzset.c @@ -106,12 +106,12 @@ void tzset(void) void __tzset(void) { - static int lock, init; + static int lock[2], init; if (init) return; - LOCK(&lock); + LOCK(lock); if (!init) tzset(); init=1; - UNLOCK(&lock); + UNLOCK(lock); } static int is_leap(int year) -- 2.25.1