struct dso **deps;
void *tls_image;
size_t tls_len, tls_size, tls_align, tls_id, tls_offset;
+ void **new_dtv;
+ unsigned char *new_tls;
+ int new_dtv_idx, new_tls_idx;
char *shortname;
char buf[];
};
struct dso *p, temp_dso = {0};
int fd;
struct stat st;
+ size_t alloc_size;
+ int n_th = 0;
/* Catch and block attempts to reload the implementation itself */
if (name[0]=='l' && name[1]=='i' && name[2]=='b') {
map = map_library(fd, &temp_dso);
close(fd);
if (!map) return 0;
- p = malloc(sizeof *p + strlen(pathname) + 1);
+
+ /* Allocate storage for the new DSO. When there is TLS, this
+ * storage must include a reservation for all pre-existing
+ * threads to obtain copies of both the new TLS, and an
+ * extended DTV capable of storing an additional slot for
+ * the newly-loaded DSO. */
+ alloc_size = sizeof *p + strlen(pathname) + 1;
+ if (runtime && temp_dso.tls_image) {
+ size_t per_th = temp_dso.tls_size + temp_dso.tls_align
+ + sizeof(void *) * (tls_cnt+3);
+ n_th = __libc.threads_minus_1 + 1;
+ if (n_th > SSIZE_MAX / per_th) alloc_size = SIZE_MAX;
+ else alloc_size += n_th * per_th;
+ }
+ p = calloc(1, alloc_size);
if (!p) {
munmap(map, map_len);
return 0;
}
memcpy(p, &temp_dso, sizeof temp_dso);
decode_dyn(p);
- if (p->tls_image) {
- p->tls_id = ++tls_cnt;
- tls_size += p->tls_size + p->tls_align + 8*sizeof(size_t) - 1
- & -4*sizeof(size_t);
- }
p->dev = st.st_dev;
p->ino = st.st_ino;
p->refcnt = 1;
strcpy(p->name, pathname);
/* Add a shortname only if name arg was not an explicit pathname. */
if (pathname != name) p->shortname = strrchr(p->name, '/')+1;
+ if (p->tls_image) {
+ p->tls_id = ++tls_cnt;
+ tls_size += p->tls_size + p->tls_align + 8*sizeof(size_t) - 1
+ & -4*sizeof(size_t);
+ p->new_dtv = (void *)(-sizeof(size_t) &
+ (uintptr_t)(p->name+strlen(p->name)+sizeof(size_t)));
+ p->new_tls = (void *)(p->new_dtv + n_th*(tls_cnt+1));
+ }
tail->next = p;
p->prev = tail;
{
}
-void *__copy_tls(unsigned char *mem, size_t cnt)
+void *__copy_tls(unsigned char *mem)
{
struct dso *p;
void **dtv = (void *)mem;
- dtv[0] = (void *)cnt;
- mem = (void *)(dtv + cnt + 1);
+ dtv[0] = (void *)tls_cnt;
+ mem = (void *)(dtv + tls_cnt + 1);
for (p=tail; p; p=p->prev) {
- if (p->tls_id-1 >= cnt) continue;
+ if (!p->tls_id) continue;
mem += -p->tls_len & (4*sizeof(size_t)-1);
mem += ((uintptr_t)p->tls_image - (uintptr_t)mem)
& (p->tls_align-1);
return mem;
}
-void *__tls_get_addr(size_t *p)
+void *__tls_get_addr(size_t *v)
{
pthread_t self = __pthread_self();
- if ((size_t)self->dtv[0] < p[0]) {
- // FIXME: obtain new DTV and TLS from the DSO
- a_crash();
+ if (self->dtv && v[0]<=(size_t)self->dtv[0] && self->dtv[v[0]])
+ return (char *)self->dtv[v[0]]+v[1];
+
+ /* Block signals to make accessing new TLS async-signal-safe */
+ sigset_t set;
+ sigfillset(&set);
+ pthread_sigmask(SIG_BLOCK, &set, &set);
+ if (self->dtv && v[0]<=(size_t)self->dtv[0] && self->dtv[v[0]]) {
+ pthread_sigmask(SIG_SETMASK, &set, 0);
+ return (char *)self->dtv[v[0]]+v[1];
+ }
+
+ /* This is safe without any locks held because, if the caller
+ * is able to request the Nth entry of the DTV, the DSO list
+ * must be valid at least that far out and it was synchronized
+ * at program startup or by an already-completed call to dlopen. */
+ struct dso *p;
+ for (p=head; p->tls_id != v[0]; p=p->next);
+
+ /* Get new DTV space from new DSO if needed */
+ if (!self->dtv || v[0] > (size_t)self->dtv[0]) {
+ void **newdtv = p->new_dtv +
+ (v[0]+1)*sizeof(void *)*a_fetch_add(&p->new_dtv_idx,1);
+ if (self->dtv) memcpy(newdtv, self->dtv,
+ ((size_t)self->dtv[0]+1) * sizeof(void *));
+ newdtv[0] = (void *)v[0];
+ self->dtv = newdtv;
}
- return (char *)self->dtv[p[0]] + p[1];
+
+ /* Get new TLS memory from new DSO */
+ unsigned char *mem = p->new_tls +
+ (p->tls_size + p->tls_align) * a_fetch_add(&p->new_tls_idx,1);
+ mem += ((uintptr_t)p->tls_image - (uintptr_t)mem) & (p->tls_align-1);
+ self->dtv[v[0]] = mem;
+ memcpy(mem, p->tls_image, p->tls_size);
+ pthread_sigmask(SIG_SETMASK, &set, 0);
+ return mem + v[1];
}
void *__dynlink(int argc, char **argv)
* to copy the TLS images again in case they had relocs. */
tls_size += sizeof(struct pthread) + 4*sizeof(size_t);
__libc.tls_size = tls_size;
- __libc.tls_cnt = tls_cnt;
if (tls_cnt) {
struct dso *p;
void *mem = mmap(0, __libc.tls_size, PROT_READ|PROT_WRITE,
MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
if (mem==MAP_FAILED ||
- !__install_initial_tls(__copy_tls(mem, tls_cnt))) {
+ !__install_initial_tls(__copy_tls(mem))) {
dprintf(2, "%s: Error getting %zu bytes thread-local storage: %m\n",
argv[0], tls_size);
_exit(127);
/* The initial DTV is located at the base of the memory
* allocated for TLS. Repeat copying TLS to pick up relocs. */
- if (tls_cnt) __copy_tls((void *)__pthread_self()->dtv, tls_cnt);
+ if (tls_cnt) __copy_tls((void *)__pthread_self()->dtv);
if (ldso_fail) _exit(127);
if (ldd_mode) _exit(0);
void *dlopen(const char *file, int mode)
{
struct dso *volatile p, *orig_tail, *next;
+ size_t orig_tls_cnt;
size_t i;
int cs;
pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &cs);
pthread_rwlock_wrlock(&lock);
+ __inhibit_ptc();
+ p = 0;
+ orig_tls_cnt = tls_cnt;
orig_tail = tail;
if (setjmp(rtld_fail)) {
/* Clean up anything new that was (partially) loaded */
- if (p->deps) for (i=0; p->deps[i]; i++)
+ if (p && p->deps) for (i=0; p->deps[i]; i++)
if (p->deps[i]->global < 0)
p->deps[i]->global = 0;
for (p=orig_tail->next; p; p=next) {
free(p->deps);
free(p);
}
+ tls_cnt = orig_tls_cnt;
+ tls_size = __libc.tls_size;
tail = orig_tail;
tail->next = 0;
p = 0;
p->global = 1;
}
+ __libc.tls_size = tls_size;
+
if (ssp_used) __init_ssp(auxv);
_dl_debug_state();
do_init_fini(tail);
end:
+ __release_ptc();
pthread_rwlock_unlock(&lock);
pthread_setcancelstate(cs, 0);
return p;
static void dummy_0()
{
}
-weak_alias(dummy_0, __synccall_lock);
-weak_alias(dummy_0, __synccall_unlock);
+weak_alias(dummy_0, __acquire_ptc);
+weak_alias(dummy_0, __release_ptc);
weak_alias(dummy_0, __pthread_tsd_run_dtors);
_Noreturn void pthread_exit(void *result)
if (f && f->lock<0) f->lock = 0;
}
-void *__copy_tls(unsigned char *, size_t);
+void *__copy_tls(unsigned char *);
int pthread_create(pthread_t *restrict res, const pthread_attr_t *restrict attr, void *(*entry)(void *), void *restrict arg)
{
struct pthread *self = pthread_self(), *new;
unsigned char *map, *stack, *tsd;
unsigned flags = 0x7d8f00;
- size_t tls_cnt = libc.tls_cnt;
- size_t tls_size = libc.tls_size;
if (!self) return ENOSYS;
if (!libc.threaded) {
libc.threaded = 1;
}
+ __acquire_ptc();
+
if (attr && attr->_a_stackaddr) {
map = 0;
tsd = (void *)(attr->_a_stackaddr-__pthread_tsd_size & -16);
if (attr) {
guard = ROUND(attr->_a_guardsize + DEFAULT_GUARD_SIZE);
size = guard + ROUND(attr->_a_stacksize
- + DEFAULT_STACK_SIZE + tls_size);
+ + DEFAULT_STACK_SIZE + libc.tls_size);
}
size += __pthread_tsd_size;
if (guard) {
}
tsd = map + size - __pthread_tsd_size;
}
- new = __copy_tls(tsd - tls_size, tls_cnt);
+ new = __copy_tls(tsd - libc.tls_size);
new->map_base = map;
new->map_size = size;
new->pid = self->pid;
new->canary = self->canary ^ (uintptr_t)&new;
stack = (void *)new;
- __synccall_lock();
-
a_inc(&libc.threads_minus_1);
ret = __clone(start, stack, flags, new, &new->tid, new, &new->tid);
- __synccall_unlock();
+ __release_ptc();
if (ret < 0) {
a_dec(&libc.threads_minus_1);