#include <sys/stat.h>
#include <stdlib.h>
#include <pthread.h>
+#include "libc.h"
+
+char *__shm_mapname(const char *, char *);
static struct {
ino_t ino;
sem_t *sem;
int refcnt;
} *semtab;
+static int lock[2];
-static int semcnt;
-static pthread_spinlock_t lock;
-static pthread_once_t once;
-
-static void init()
-{
- semtab = calloc(sizeof *semtab, SEM_NSEMS_MAX);
-}
-
-static sem_t *find_map(ino_t ino)
-{
- int i;
- for (i=0; i<SEM_NSEMS_MAX && semtab[i].ino != ino; i++);
- if (i==SEM_NSEMS_MAX) return 0;
- if (semtab[i].refcnt == INT_MAX) return (sem_t *)-1;
- semtab[i].refcnt++;
- return semtab[i].sem;
-}
+#define FLAGS (O_RDWR|O_NOFOLLOW|O_CLOEXEC|O_NONBLOCK)
sem_t *sem_open(const char *name, int flags, ...)
{
va_list ap;
mode_t mode;
unsigned value;
- int fd, tfd, dir;
+ int fd, i, e, slot, first=1, cnt;
sem_t newsem;
void *map;
char tmp[64];
struct timespec ts;
struct stat st;
- int i;
+ char buf[NAME_MAX+10];
- while (*name=='/') name++;
- if (strchr(name, '/')) {
- errno = EINVAL;
+ if (!(name = __shm_mapname(name, buf)))
return SEM_FAILED;
- }
- pthread_once(&once, init);
- if (!semtab) {
- errno = ENOMEM;
+ LOCK(lock);
+ /* Allocate table if we don't have one yet */
+ if (!semtab && !(semtab = calloc(sizeof *semtab, SEM_NSEMS_MAX))) {
+ UNLOCK(lock);
return SEM_FAILED;
}
- if (flags & O_CREAT) {
- va_start(ap, flags);
- mode = va_arg(ap, mode_t) & 0666;
- value = va_arg(ap, unsigned);
- va_end(ap);
- if (value > SEM_VALUE_MAX) {
- errno = EINVAL;
- return SEM_FAILED;
- }
- sem_init(&newsem, 1, value);
- clock_gettime(CLOCK_REALTIME, &ts);
- snprintf(tmp, sizeof(tmp), "/dev/shm/%p-%p-%d-%d",
- &name, name, (int)getpid(), (int)ts.tv_nsec);
- tfd = open(tmp, O_CREAT|O_EXCL|O_RDWR|O_CLOEXEC, mode);
- if (tfd<0) return SEM_FAILED;
- dir = open("/dev/shm", O_DIRECTORY|O_RDONLY|O_CLOEXEC);
- if (dir<0 || write(tfd,&newsem,sizeof newsem)!=sizeof newsem) {
- if (dir >= 0) close(dir);
- close(tfd);
- unlink(tmp);
- return SEM_FAILED;
- }
+ /* Reserve a slot in case this semaphore is not mapped yet;
+ * this is necessary because there is no way to handle
+ * failures after creation of the file. */
+ slot = -1;
+ for (cnt=i=0; i<SEM_NSEMS_MAX; i++) {
+ cnt += semtab[i].refcnt;
+ if (!semtab[i].sem && slot < 0) slot = i;
}
+ /* Avoid possibility of overflow later */
+ if (cnt == INT_MAX || slot < 0) {
+ errno = EMFILE;
+ UNLOCK(lock);
+ return SEM_FAILED;
+ }
+ /* Dummy pointer to make a reservation */
+ semtab[slot].sem = (sem_t *)-1;
+ UNLOCK(lock);
- flags &= ~O_ACCMODE;
- flags |= O_RDWR;
+ flags &= (O_CREAT|O_EXCL);
- pthread_spin_lock(&lock);
+ /* Early failure check for exclusive open; otherwise the case
+ * where the semaphore already exists is expensive. */
+ if (flags == (O_CREAT|O_EXCL) && access(name, F_OK) == 0) {
+ errno = EEXIST;
+ return SEM_FAILED;
+ }
for (;;) {
- if (!(flags & O_EXCL)) {
- fd = shm_open(name, flags&~O_CREAT, 0);
- if (fd >= 0 || errno != ENOENT) {
- if (flags & O_CREAT) {
- close(dir);
- close(tfd);
- unlink(tmp);
- }
- if (fd >= 0 && fstat(fd, &st) < 0) {
+ /* If exclusive mode is not requested, try opening an
+ * existing file first and fall back to creation. */
+ if (flags != (O_CREAT|O_EXCL)) {
+ fd = open(name, FLAGS);
+ if (fd >= 0) {
+ if ((map = mmap(0, sizeof(sem_t), PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0)) == MAP_FAILED ||
+ fstat(fd, &st) < 0) {
close(fd);
- fd = -1;
- }
- if (fd < 0) {
- pthread_spin_unlock(&lock);
return SEM_FAILED;
}
- if ((map = find_map(st.st_ino))) {
- pthread_spin_unlock(&lock);
- close(fd);
- if (map == (sem_t *)-1)
- return SEM_FAILED;
- return map;
- }
+ close(fd);
break;
}
+ if (errno != ENOENT)
+ return SEM_FAILED;
}
- if (!(flags & O_CREAT)) {
- pthread_spin_unlock(&lock);
+ if (!(flags & O_CREAT))
return SEM_FAILED;
+ if (first) {
+ first = 0;
+ va_start(ap, flags);
+ mode = va_arg(ap, mode_t) & 0666;
+ value = va_arg(ap, unsigned);
+ va_end(ap);
+ if (value > SEM_VALUE_MAX) {
+ errno = EINVAL;
+ return SEM_FAILED;
+ }
+ sem_init(&newsem, 1, value);
}
- if (!linkat(AT_FDCWD, tmp, dir, name, 0)) {
- fd = tfd;
- close(dir);
- unlink(tmp);
- break;
+ /* Create a temp file with the new semaphore contents
+ * and attempt to atomically link it as the new name */
+ clock_gettime(CLOCK_REALTIME, &ts);
+ snprintf(tmp, sizeof(tmp), "/dev/shm/tmp-%d", (int)ts.tv_nsec);
+ fd = open(tmp, O_CREAT|O_EXCL|FLAGS, mode);
+ if (fd < 0) {
+ if (errno == EEXIST) continue;
+ return SEM_FAILED;
}
- if ((flags & O_EXCL) || errno != EEXIST) {
- close(dir);
- close(tfd);
+ if (write(fd, &newsem, sizeof newsem) != sizeof newsem || fstat(fd, &st) < 0 ||
+ (map = mmap(0, sizeof(sem_t), PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0)) == MAP_FAILED) {
+ close(fd);
unlink(tmp);
return SEM_FAILED;
}
- }
- if (fstat(fd, &st) < 0) {
- pthread_spin_unlock(&lock);
close(fd);
- return SEM_FAILED;
- }
- if (semcnt == SEM_NSEMS_MAX) {
- pthread_spin_unlock(&lock);
- close(fd);
- errno = EMFILE;
- return SEM_FAILED;
+ if (link(tmp, name) == 0) break;
+ e = errno;
+ unlink(tmp);
+ /* Failure is only fatal when doing an exclusive open;
+ * otherwise, next iteration will try to open the
+ * existing file. */
+ if (e != EEXIST || flags == (O_CREAT|O_EXCL))
+ return SEM_FAILED;
}
- for (i=0; i<SEM_NSEMS_MAX && semtab[i].sem; i++);
- map = mmap(0, sizeof(sem_t), PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
- close(fd);
- if (map == MAP_FAILED) {
- pthread_spin_unlock(&lock);
- return SEM_FAILED;
+
+ /* See if the newly mapped semaphore is already mapped. If
+ * so, unmap the new mapping and use the existing one. Otherwise,
+ * add it to the table of mapped semaphores. */
+ LOCK(lock);
+ for (i=0; i<SEM_NSEMS_MAX && semtab[i].ino != st.st_ino; i++);
+ if (i<SEM_NSEMS_MAX) {
+ munmap(map, sizeof(sem_t));
+ semtab[i].refcnt++;
+ UNLOCK(lock);
+ return semtab[i].sem;
}
- semtab[i].ino = st.st_ino;
- semtab[i].sem = map;
- semtab[i].refcnt = 1;
- pthread_spin_unlock(&lock);
+ semtab[slot].refcnt = 1;
+ semtab[slot].sem = map;
+ semtab[slot].ino = st.st_ino;
+ UNLOCK(lock);
+
return map;
}
int sem_close(sem_t *sem)
{
int i;
- pthread_spin_lock(&lock);
+ LOCK(lock);
for (i=0; i<SEM_NSEMS_MAX && semtab[i].sem != sem; i++);
if (!--semtab[i].refcnt) {
semtab[i].sem = 0;
semtab[i].ino = 0;
}
- pthread_spin_unlock(&lock);
- return munmap(sem, sizeof *sem);
+ UNLOCK(lock);
+ munmap(sem, sizeof *sem);
+ return 0;
}