overhaul sem_open
authorRich Felker <dalias@aerifal.cx>
Sun, 30 Sep 2012 23:35:40 +0000 (19:35 -0400)
committerRich Felker <dalias@aerifal.cx>
Sun, 30 Sep 2012 23:35:40 +0000 (19:35 -0400)
this function was overly complicated and not even obviously correct.
avoid using openat/linkat just like in shm_open, and instead expand
pathname using code shared with shm_open. remove bogus (and dangerous,
with priorities) use of spinlocks.

this commit also heavily streamlines the code and ensures there are no
failure cases that can happen after a new semaphore has been created
in the filesystem, since that case is unreportable.

src/mman/shm_open.c
src/thread/sem_open.c

index a9be899b2b5e9718b936e792dc37fdc662ca9d7c..b23eac7fa1df901c42f091fd122e593941448b47 100644 (file)
@@ -7,7 +7,7 @@
 
 char *__strchrnul(const char *, int);
 
-static const char *mapname(const char *name, char *buf)
+char *__shm_mapname(const char *name, char *buf)
 {
        char *p;
        while (*name == '/') name++;
@@ -28,13 +28,13 @@ static const char *mapname(const char *name, char *buf)
 int shm_open(const char *name, int flag, mode_t mode)
 {
        char buf[NAME_MAX+10];
-       if (!(name = mapname(name, buf))) return -1;
+       if (!(name = __shm_mapname(name, buf))) return -1;
        return open(name, flag|O_NOFOLLOW|O_CLOEXEC|O_NONBLOCK, mode);
 }
 
 int shm_unlink(const char *name)
 {
        char buf[NAME_MAX+10];
-       if (!(name = mapname(name, buf))) return -1;
+       if (!(name = __shm_mapname(name, buf))) return -1;
        return unlink(name);
 }
index 2e900eb3a2dc0e393d9c712c6ac460d323a29609..08f98c25a3fa270be7235df68a0b1444905e7eb2 100644 (file)
 #include <sys/stat.h>
 #include <stdlib.h>
 #include <pthread.h>
+#include "libc.h"
+
+char *__shm_mapname(const char *, char *);
 
 static struct {
        ino_t ino;
        sem_t *sem;
        int refcnt;
 } *semtab;
+static int lock[2];
 
-static int semcnt;
-static pthread_spinlock_t lock;
-static pthread_once_t once;
-
-static void init()
-{
-       semtab = calloc(sizeof *semtab, SEM_NSEMS_MAX);
-}
-
-static sem_t *find_map(ino_t ino)
-{
-       int i;
-       for (i=0; i<SEM_NSEMS_MAX && semtab[i].ino != ino; i++);
-       if (i==SEM_NSEMS_MAX) return 0;
-       if (semtab[i].refcnt == INT_MAX) return (sem_t *)-1;
-       semtab[i].refcnt++;
-       return semtab[i].sem;
-}
+#define FLAGS (O_RDWR|O_NOFOLLOW|O_CLOEXEC|O_NONBLOCK)
 
 sem_t *sem_open(const char *name, int flags, ...)
 {
        va_list ap;
        mode_t mode;
        unsigned value;
-       int fd, tfd, dir;
+       int fd, i, e, slot, first=1, cnt;
        sem_t newsem;
        void *map;
        char tmp[64];
        struct timespec ts;
        struct stat st;
-       int i;
+       char buf[NAME_MAX+10];
 
-       while (*name=='/') name++;
-       if (strchr(name, '/')) {
-               errno = EINVAL;
+       if (!(name = __shm_mapname(name, buf)))
                return SEM_FAILED;
-       }
 
-       pthread_once(&once, init);
-       if (!semtab) {
-               errno = ENOMEM;
+       LOCK(lock);
+       /* Allocate table if we don't have one yet */
+       if (!semtab && !(semtab = calloc(sizeof *semtab, SEM_NSEMS_MAX))) {
+               UNLOCK(lock);
                return SEM_FAILED;
        }
 
-       if (flags & O_CREAT) {
-               va_start(ap, flags);
-               mode = va_arg(ap, mode_t) & 0666;
-               value = va_arg(ap, unsigned);
-               va_end(ap);
-               if (value > SEM_VALUE_MAX) {
-                       errno = EINVAL;
-                       return SEM_FAILED;
-               }
-               sem_init(&newsem, 1, value);
-               clock_gettime(CLOCK_REALTIME, &ts);
-               snprintf(tmp, sizeof(tmp), "/dev/shm/%p-%p-%d-%d",
-                       &name, name, (int)getpid(), (int)ts.tv_nsec);
-               tfd = open(tmp, O_CREAT|O_EXCL|O_RDWR|O_CLOEXEC, mode);
-               if (tfd<0) return SEM_FAILED;
-               dir = open("/dev/shm", O_DIRECTORY|O_RDONLY|O_CLOEXEC);
-               if (dir<0 || write(tfd,&newsem,sizeof newsem)!=sizeof newsem) {
-                       if (dir >= 0) close(dir);
-                       close(tfd);
-                       unlink(tmp);
-                       return SEM_FAILED;
-               }
+       /* Reserve a slot in case this semaphore is not mapped yet;
+        * this is necessary because there is no way to handle
+        * failures after creation of the file. */
+       slot = -1;
+       for (cnt=i=0; i<SEM_NSEMS_MAX; i++) {
+               cnt += semtab[i].refcnt;
+               if (!semtab[i].sem && slot < 0) slot = i;
        }
+       /* Avoid possibility of overflow later */
+       if (cnt == INT_MAX || slot < 0) {
+               errno = EMFILE;
+               UNLOCK(lock);
+               return SEM_FAILED;
+       }
+       /* Dummy pointer to make a reservation */
+       semtab[slot].sem = (sem_t *)-1;
+       UNLOCK(lock);
 
-       flags &= ~O_ACCMODE;
-       flags |= O_RDWR;
+       flags &= (O_CREAT|O_EXCL);
 
-       pthread_spin_lock(&lock);
+       /* Early failure check for exclusive open; otherwise the case
+        * where the semaphore already exists is expensive. */
+       if (flags == (O_CREAT|O_EXCL) && access(name, F_OK) == 0) {
+               errno = EEXIST;
+               return SEM_FAILED;
+       }
 
        for (;;) {
-               if (!(flags & O_EXCL)) {
-                       fd = shm_open(name, flags&~O_CREAT, 0);
-                       if (fd >= 0 || errno != ENOENT) {
-                               if (flags & O_CREAT) {
-                                       close(dir);
-                                       close(tfd);
-                                       unlink(tmp);
-                               }
-                               if (fd >= 0 && fstat(fd, &st) < 0) {
+               /* If exclusive mode is not requested, try opening an
+                * existing file first and fall back to creation. */
+               if (flags != (O_CREAT|O_EXCL)) {
+                       fd = open(name, FLAGS);
+                       if (fd >= 0) {
+                               if ((map = mmap(0, sizeof(sem_t), PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0)) == MAP_FAILED ||
+                                   fstat(fd, &st) < 0) {
                                        close(fd);
-                                       fd = -1;
-                               }
-                               if (fd < 0) {
-                                       pthread_spin_unlock(&lock);
                                        return SEM_FAILED;
                                }
-                               if ((map = find_map(st.st_ino))) {
-                                       pthread_spin_unlock(&lock);
-                                       close(fd);
-                                       if (map == (sem_t *)-1)
-                                               return SEM_FAILED;
-                                       return map;
-                               }
+                               close(fd);
                                break;
                        }
+                       if (errno != ENOENT)
+                               return SEM_FAILED;
                }
-               if (!(flags & O_CREAT)) {
-                       pthread_spin_unlock(&lock);
+               if (!(flags & O_CREAT))
                        return SEM_FAILED;
+               if (first) {
+                       first = 0;
+                       va_start(ap, flags);
+                       mode = va_arg(ap, mode_t) & 0666;
+                       value = va_arg(ap, unsigned);
+                       va_end(ap);
+                       if (value > SEM_VALUE_MAX) {
+                               errno = EINVAL;
+                               return SEM_FAILED;
+                       }
+                       sem_init(&newsem, 1, value);
                }
-               if (!linkat(AT_FDCWD, tmp, dir, name, 0)) {
-                       fd = tfd;
-                       close(dir);
-                       unlink(tmp);
-                       break;
+               /* Create a temp file with the new semaphore contents
+                * and attempt to atomically link it as the new name */
+               clock_gettime(CLOCK_REALTIME, &ts);
+               snprintf(tmp, sizeof(tmp), "/dev/shm/tmp-%d", (int)ts.tv_nsec);
+               fd = open(tmp, O_CREAT|O_EXCL|FLAGS, mode);
+               if (fd < 0) {
+                       if (errno == EEXIST) continue;
+                       return SEM_FAILED;
                }
-               if ((flags & O_EXCL) || errno != EEXIST) {
-                       close(dir);
-                       close(tfd);
+               if (write(fd, &newsem, sizeof newsem) != sizeof newsem || fstat(fd, &st) < 0 ||
+                   (map = mmap(0, sizeof(sem_t), PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0)) == MAP_FAILED) {
+                       close(fd);
                        unlink(tmp);
                        return SEM_FAILED;
                }
-       }
-       if (fstat(fd, &st) < 0) {
-               pthread_spin_unlock(&lock);
                close(fd);
-               return SEM_FAILED;
-       }
-       if (semcnt == SEM_NSEMS_MAX) {
-               pthread_spin_unlock(&lock);
-               close(fd);
-               errno = EMFILE;
-               return SEM_FAILED;
+               if (link(tmp, name) == 0) break;
+               e = errno;
+               unlink(tmp);
+               /* Failure is only fatal when doing an exclusive open;
+                * otherwise, next iteration will try to open the
+                * existing file. */
+               if (e != EEXIST || flags == (O_CREAT|O_EXCL))
+                       return SEM_FAILED;
        }
-       for (i=0; i<SEM_NSEMS_MAX && semtab[i].sem; i++);
-       map = mmap(0, sizeof(sem_t), PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
-       close(fd);
-       if (map == MAP_FAILED) {
-               pthread_spin_unlock(&lock);
-               return SEM_FAILED;
+
+       /* See if the newly mapped semaphore is already mapped. If
+        * so, unmap the new mapping and use the existing one. Otherwise,
+        * add it to the table of mapped semaphores. */
+       LOCK(lock);
+       for (i=0; i<SEM_NSEMS_MAX && semtab[i].ino != st.st_ino; i++);
+       if (i<SEM_NSEMS_MAX) {
+               munmap(map, sizeof(sem_t));
+               semtab[i].refcnt++;
+               UNLOCK(lock);
+               return semtab[i].sem;
        }
-       semtab[i].ino = st.st_ino;
-       semtab[i].sem = map;
-       semtab[i].refcnt = 1;
-       pthread_spin_unlock(&lock);
+       semtab[slot].refcnt = 1;
+       semtab[slot].sem = map;
+       semtab[slot].ino = st.st_ino;
+       UNLOCK(lock);
+
        return map;
 }
 
 int sem_close(sem_t *sem)
 {
        int i;
-       pthread_spin_lock(&lock);
+       LOCK(lock);
        for (i=0; i<SEM_NSEMS_MAX && semtab[i].sem != sem; i++);
        if (!--semtab[i].refcnt) {
                semtab[i].sem = 0;
                semtab[i].ino = 0;
        }
-       pthread_spin_unlock(&lock);
-       return munmap(sem, sizeof *sem);
+       UNLOCK(lock);
+       munmap(sem, sizeof *sem);
+       return 0;
 }