From: Rich Felker Date: Mon, 19 Dec 2016 00:38:53 +0000 (-0500) Subject: rework arm atomic/tp backends to be thumb-compatible and fdpic-ready X-Git-Tag: v1.1.16~6 X-Git-Url: https://git.librecmc.org/?a=commitdiff_plain;h=29237f7f5c09c436825a7a12b68ab4143b0ebd1f;p=oweals%2Fmusl.git rework arm atomic/tp backends to be thumb-compatible and fdpic-ready three problems are addressed: - use of pc arithmetic, which was difficult if not impossible to make correct in thumb mode on all models, so that relative rather than absolute pointers to the backends could be used. this was designed back when there was no coherent model for the early stages of the dynamic linker before relocations, and is no longer necessary. - assumption that data (the relative pointers to the backends) can be accessed at a constant displacement from the code. this will not be possible on future fdpic subarchs (for cortex-m), so move responsibility for loading the backend code address to the caller. - hard-coded arm opcodes using the .word directive. instead, use the .arch directive to work around the assembler's refusal to assemble instructions not available (or in some cases, available but just considered deprecated) in the target isa level. the obscure v6t2 arch is used for v6 code so as to (1) allow generation of thumb2 output if -mthumb is active, and (2) avoid warnings/errors for mcr barriers that clang would produce if we just set arch to v7-a. in addition, the __aeabi_read_tp function is moved out of the inner workings and implemented as an asm wrapper around a C function, so that asm code does not need to read global data. the asm wrapper serves to satisfy the ABI calling convention requirements for this function. --- diff --git a/arch/arm/atomic_arch.h b/arch/arm/atomic_arch.h index 706fa1f2..d6af84d0 100644 --- a/arch/arm/atomic_arch.h +++ b/arch/arm/atomic_arch.h @@ -1,5 +1,11 @@ -__attribute__((__visibility__("hidden"))) -extern const void *__arm_atomics[3]; /* gettp, cas, barrier */ +#if __ARM_ARCH_4__ || __ARM_ARCH_4T__ || __ARM_ARCH == 4 +#define BLX "mov lr,pc\n\tbx" +#else +#define BLX "blx" +#endif + +extern uintptr_t __attribute__((__visibility__("hidden"))) + __a_cas_ptr, __a_barrier_ptr; #if ((__ARM_ARCH_6__ || __ARM_ARCH_6K__ || __ARM_ARCH_6ZK__) && !__thumb__) \ || __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH >= 7 @@ -42,11 +48,12 @@ static inline int a_cas(volatile int *p, int t, int s) register int r0 __asm__("r0") = t; register int r1 __asm__("r1") = s; register volatile int *r2 __asm__("r2") = p; + register uintptr_t r3 __asm__("r3") = __a_cas_ptr; int old; __asm__ __volatile__ ( - "bl __a_cas" - : "+r"(r0) : "r"(r1), "r"(r2) - : "memory", "r3", "lr", "ip", "cc" ); + BLX " r3" + : "+r"(r0), "+r"(r3) : "r"(r1), "r"(r2) + : "memory", "lr", "ip", "cc" ); if (!r0) return t; if ((old=*p)!=t) return old; } @@ -58,8 +65,8 @@ static inline int a_cas(volatile int *p, int t, int s) #define a_barrier a_barrier static inline void a_barrier() { - __asm__ __volatile__("bl __a_barrier" - : : : "memory", "cc", "ip", "lr" ); + register uintptr_t ip __asm__("ip") = __a_barrier_ptr; + __asm__ __volatile__( BLX " ip" : "+r"(ip) : : "memory", "cc", "lr" ); } #endif diff --git a/arch/arm/pthread_arch.h b/arch/arm/pthread_arch.h index 8b8a7fb6..197752ef 100644 --- a/arch/arm/pthread_arch.h +++ b/arch/arm/pthread_arch.h @@ -10,15 +10,17 @@ static inline pthread_t __pthread_self() #else -static inline pthread_t __pthread_self() -{ -#ifdef __clang__ - char *p; - __asm__ __volatile__ ( "bl __a_gettp\n\tmov %0,r0" : "=r"(p) : : "cc", "r0", "lr" ); +#if __ARM_ARCH_4__ || __ARM_ARCH_4T__ || __ARM_ARCH == 4 +#define BLX "mov lr,pc\n\tbx" #else - register char *p __asm__("r0"); - __asm__ __volatile__ ( "bl __a_gettp" : "=r"(p) : : "cc", "lr" ); +#define BLX "blx" #endif + +static inline pthread_t __pthread_self() +{ + extern uintptr_t __attribute__((__visibility__("hidden"))) __a_gettp_ptr; + register uintptr_t p __asm__("r0"); + __asm__ __volatile__ ( BLX " %1" : "=r"(p) : "r"(__a_gettp_ptr) : "cc", "lr" ); return (void *)(p+8-sizeof(struct pthread)); } diff --git a/src/thread/arm/__aeabi_read_tp.s b/src/thread/arm/__aeabi_read_tp.s new file mode 100644 index 00000000..9d0cd311 --- /dev/null +++ b/src/thread/arm/__aeabi_read_tp.s @@ -0,0 +1,8 @@ +.syntax unified +.global __aeabi_read_tp +.type __aeabi_read_tp,%function +__aeabi_read_tp: + push {r1,r2,r3,lr} + bl __aeabi_read_tp_c + pop {r1,r2,r3,lr} + bx lr diff --git a/src/thread/arm/__aeabi_read_tp_c.c b/src/thread/arm/__aeabi_read_tp_c.c new file mode 100644 index 00000000..654bdc57 --- /dev/null +++ b/src/thread/arm/__aeabi_read_tp_c.c @@ -0,0 +1,8 @@ +#include "pthread_impl.h" +#include + +__attribute__((__visibility__("hidden"))) +void *__aeabi_read_tp_c(void) +{ + return (void *)((uintptr_t)__pthread_self()-8+sizeof(struct pthread)); +} diff --git a/src/thread/arm/__set_thread_area.c b/src/thread/arm/__set_thread_area.c index 61d02827..daf496c2 100644 --- a/src/thread/arm/__set_thread_area.c +++ b/src/thread/arm/__set_thread_area.c @@ -6,43 +6,47 @@ #define HWCAP_TLS (1 << 15) extern const unsigned char __attribute__((__visibility__("hidden"))) - __a_barrier_dummy[], __a_barrier_oldkuser[], - __a_barrier_v6[], __a_barrier_v7[], - __a_cas_dummy[], __a_cas_v6[], __a_cas_v7[], - __a_gettp_dummy[]; + __a_barrier_oldkuser[], __a_barrier_v6[], __a_barrier_v7[], + __a_cas_v6[], __a_cas_v7[], + __a_gettp_cp15[]; #define __a_barrier_kuser 0xffff0fa0 +#define __a_barrier_oldkuser (uintptr_t)__a_barrier_oldkuser +#define __a_barrier_v6 (uintptr_t)__a_barrier_v6 +#define __a_barrier_v7 (uintptr_t)__a_barrier_v7 + #define __a_cas_kuser 0xffff0fc0 +#define __a_cas_v6 (uintptr_t)__a_cas_v6 +#define __a_cas_v7 (uintptr_t)__a_cas_v7 + #define __a_gettp_kuser 0xffff0fe0 +#define __a_gettp_cp15 (uintptr_t)__a_gettp_cp15 extern uintptr_t __attribute__((__visibility__("hidden"))) __a_barrier_ptr, __a_cas_ptr, __a_gettp_ptr; -#define SET(op,ver) (__a_##op##_ptr = \ - (uintptr_t)__a_##op##_##ver - (uintptr_t)__a_##op##_dummy) - int __set_thread_area(void *p) { #if !__ARM_ARCH_7A__ && !__ARM_ARCH_7R__ && __ARM_ARCH < 7 if (__hwcap & HWCAP_TLS) { size_t *aux; - SET(cas, v7); - SET(barrier, v7); + __a_cas_ptr = __a_cas_v7; + __a_barrier_ptr = __a_barrier_v7; for (aux=libc.auxv; *aux; aux+=2) { if (*aux != AT_PLATFORM) continue; const char *s = (void *)aux[1]; if (s[0]!='v' || s[1]!='6' || s[2]-'0'<10u) break; - SET(cas, v6); - SET(barrier, v6); + __a_cas_ptr = __a_cas_v6; + __a_barrier_ptr = __a_barrier_v6; break; } } else { int ver = *(int *)0xffff0ffc; - SET(gettp, kuser); - SET(cas, kuser); - SET(barrier, kuser); + __a_gettp_ptr = __a_gettp_kuser; + __a_cas_ptr = __a_cas_kuser; + __a_barrier_ptr = __a_barrier_kuser; if (ver < 2) a_crash(); - if (ver < 3) SET(barrier, oldkuser); + if (ver < 3) __a_barrier_ptr = __a_barrier_oldkuser; } #endif return __syscall(0xf0005, p); diff --git a/src/thread/arm/atomics.s b/src/thread/arm/atomics.s index 673fc03b..202faa4a 100644 --- a/src/thread/arm/atomics.s +++ b/src/thread/arm/atomics.s @@ -1,20 +1,15 @@ .syntax unified .text -.global __a_barrier -.hidden __a_barrier -.type __a_barrier,%function -__a_barrier: - ldr ip,1f - ldr ip,[pc,ip] - add pc,pc,ip -1: .word __a_barrier_ptr-1b .global __a_barrier_dummy .hidden __a_barrier_dummy +.type __a_barrier_dummy,%function __a_barrier_dummy: bx lr + .global __a_barrier_oldkuser .hidden __a_barrier_oldkuser +.type __a_barrier_oldkuser,%function __a_barrier_oldkuser: push {r0,r1,r2,r3,ip,lr} mov r1,r0 @@ -24,90 +19,88 @@ __a_barrier_oldkuser: mov pc,ip pop {r0,r1,r2,r3,ip,lr} bx lr + .global __a_barrier_v6 .hidden __a_barrier_v6 +.type __a_barrier_v6,%function __a_barrier_v6: + .arch armv6t2 mcr p15,0,r0,c7,c10,5 bx lr + .global __a_barrier_v7 .hidden __a_barrier_v7 +.type __a_barrier_v7,%function __a_barrier_v7: - .word 0xf57ff05b /* dmb ish */ + .arch armv7-a + dmb ish bx lr -.global __a_cas -.hidden __a_cas -.type __a_cas,%function -__a_cas: - ldr ip,1f - ldr ip,[pc,ip] - add pc,pc,ip -1: .word __a_cas_ptr-1b .global __a_cas_dummy .hidden __a_cas_dummy +.type __a_cas_dummy,%function __a_cas_dummy: mov r3,r0 ldr r0,[r2] subs r0,r3,r0 streq r1,[r2] bx lr + .global __a_cas_v6 .hidden __a_cas_v6 +.type __a_cas_v6,%function __a_cas_v6: + .arch armv6t2 mov r3,r0 mcr p15,0,r0,c7,c10,5 -1: .word 0xe1920f9f /* ldrex r0,[r2] */ +1: ldrex r0,[r2] subs r0,r3,r0 - .word 0x01820f91 /* strexeq r0,r1,[r2] */ + strexeq r0,r1,[r2] teqeq r0,#1 beq 1b mcr p15,0,r0,c7,c10,5 bx lr + .global __a_cas_v7 .hidden __a_cas_v7 +.type __a_cas_v7,%function __a_cas_v7: + .arch armv7-a mov r3,r0 - .word 0xf57ff05b /* dmb ish */ -1: .word 0xe1920f9f /* ldrex r0,[r2] */ + dmb ish +1: ldrex r0,[r2] subs r0,r3,r0 - .word 0x01820f91 /* strexeq r0,r1,[r2] */ + strexeq r0,r1,[r2] teqeq r0,#1 beq 1b - .word 0xf57ff05b /* dmb ish */ + dmb ish bx lr -.global __aeabi_read_tp -.type __aeabi_read_tp,%function -__aeabi_read_tp: - -.global __a_gettp -.hidden __a_gettp -.type __a_gettp,%function -__a_gettp: - ldr r0,1f - ldr r0,[pc,r0] - add pc,pc,r0 -1: .word __a_gettp_ptr-1b -.global __a_gettp_dummy -.hidden __a_gettp_dummy -__a_gettp_dummy: +.global __a_gettp_cp15 +.hidden __a_gettp_cp15 +.type __a_gettp_cp15,%function +__a_gettp_cp15: mrc p15,0,r0,c13,c0,3 bx lr +/* Tag this file with minimum ISA level so as not to affect linking. */ +.arch armv4t +.eabi_attribute 6,2 + .data .align 2 .global __a_barrier_ptr .hidden __a_barrier_ptr __a_barrier_ptr: - .word 0 + .word __a_barrier_dummy .global __a_cas_ptr .hidden __a_cas_ptr __a_cas_ptr: - .word 0 + .word __a_cas_dummy .global __a_gettp_ptr .hidden __a_gettp_ptr __a_gettp_ptr: - .word 0 + .word __a_gettp_cp15