From: Rich Felker Date: Tue, 16 Jun 2015 14:28:30 +0000 (+0000) Subject: add support for sh2 interrupt-masking-based atomics to sh port X-Git-Tag: v1.1.11~36 X-Git-Url: https://git.librecmc.org/?a=commitdiff_plain;h=f9d84554bae0fa17c9a1d724549c4408022228a5;p=oweals%2Fmusl.git add support for sh2 interrupt-masking-based atomics to sh port the sh2 target is being considered an ISA subset of sh3/sh4, in the sense that binaries built for sh2 are intended to be usable on later cpu models/kernels with mmu support. so rather than hard-coding sh2-specific atomics, the runtime atomic selection mechanisms that was already in place has been extended to add sh2 atomics. at this time, the sh2 atomics are not SMP-compatible; since the ISA lacks actual atomic operations, the new code instead masks interrupts for the duration of the atomic operation, producing an atomic result on single-core. this is only possible because the kernel/hardware does not impose protections against userspace doing so. additional changes will be needed to support future SMP systems. care has been taken to avoid producing significant additional code size in the case where it's known at compile-time that the target is not sh2 and does not need sh2-specific code. --- diff --git a/arch/sh/src/__set_thread_area.c b/arch/sh/src/__set_thread_area.c new file mode 100644 index 00000000..1d3e0225 --- /dev/null +++ b/arch/sh/src/__set_thread_area.c @@ -0,0 +1,34 @@ +#include "pthread_impl.h" +#include "libc.h" +#include "sh_atomic.h" +#include + +/* Also perform sh-specific init */ + +#define CPU_HAS_LLSC 0x0040 + +__attribute__((__visibility__("hidden"))) unsigned __sh_atomic_model, __sh_nommu; + +int __set_thread_area(void *p) +{ + size_t *aux; + __asm__ __volatile__ ( "ldc %0, gbr" : : "r"(p) : "memory" ); +#ifndef __SH4A__ + if (__hwcap & CPU_HAS_LLSC) { + __sh_atomic_model = SH_A_LLSC; + return 0; + } +#if !defined(__SH3__) && !defined(__SH4__) + for (aux=libc.auxv; *aux; aux+=2) { + if (*aux != AT_PLATFORM) continue; + const char *s = (void *)aux[1]; + if (s[0]!='s' || s[1]!='h' || s[2]!='2' || s[3]-'0'<10u) break; + __sh_atomic_model = SH_A_IMASK; + __sh_nommu = 1; + return 0; + } +#endif + /* __sh_atomic_model = SH_A_GUSA; */ /* 0, default */ +#endif + return 0; +} diff --git a/arch/sh/src/atomic.c b/arch/sh/src/atomic.c index f8c615f8..7fd73074 100644 --- a/arch/sh/src/atomic.c +++ b/arch/sh/src/atomic.c @@ -1,8 +1,26 @@ #ifndef __SH4A__ +#include "sh_atomic.h" #include "atomic.h" #include "libc.h" +static inline unsigned mask() +{ + unsigned sr; + __asm__ __volatile__ ( "\n" + " stc sr,r0 \n" + " mov r0,%0 \n" + " or #0xf0,r0 \n" + " ldc r0,sr \n" + : "=&r"(sr) : : "memory", "r0" ); + return sr; +} + +static inline void unmask(unsigned sr) +{ + __asm__ __volatile__ ( "ldc %0,sr" : : "r"(sr) : "memory" ); +} + /* gusa is a hack in the kernel which lets you create a sequence of instructions * which will be restarted if the process is preempted in the middle of the * sequence. It will do for implementing atomics on non-smp systems. ABI is: @@ -25,11 +43,17 @@ " mov.l " new ", @" mem "\n" \ "1: mov r1, r15\n" -#define CPU_HAS_LLSC 0x0040 - int __sh_cas(volatile int *p, int t, int s) { - if (__hwcap & CPU_HAS_LLSC) return __sh_cas_llsc(p, t, s); + if (__sh_atomic_model == SH_A_LLSC) return __sh_cas_llsc(p, t, s); + + if (__sh_atomic_model == SH_A_IMASK) { + unsigned sr = mask(); + int old = *p; + if (old==t) *p = s; + unmask(sr); + return old; + } int old; __asm__ __volatile__( @@ -43,7 +67,15 @@ int __sh_cas(volatile int *p, int t, int s) int __sh_swap(volatile int *x, int v) { - if (__hwcap & CPU_HAS_LLSC) return __sh_swap_llsc(x, v); + if (__sh_atomic_model == SH_A_LLSC) return __sh_swap_llsc(x, v); + + if (__sh_atomic_model == SH_A_IMASK) { + unsigned sr = mask(); + int old = *x; + *x = v; + unmask(sr); + return old; + } int old; __asm__ __volatile__( @@ -55,7 +87,15 @@ int __sh_swap(volatile int *x, int v) int __sh_fetch_add(volatile int *x, int v) { - if (__hwcap & CPU_HAS_LLSC) return __sh_fetch_add_llsc(x, v); + if (__sh_atomic_model == SH_A_LLSC) return __sh_fetch_add_llsc(x, v); + + if (__sh_atomic_model == SH_A_IMASK) { + unsigned sr = mask(); + int old = *x; + *x = old + v; + unmask(sr); + return old; + } int old, dummy; __asm__ __volatile__( @@ -69,7 +109,7 @@ int __sh_fetch_add(volatile int *x, int v) void __sh_store(volatile int *p, int x) { - if (__hwcap & CPU_HAS_LLSC) return __sh_store_llsc(p, x); + if (__sh_atomic_model == SH_A_LLSC) return __sh_store_llsc(p, x); __asm__ __volatile__( " mov.l %1, @%0\n" : : "r"(p), "r"(x) : "memory"); @@ -77,7 +117,15 @@ void __sh_store(volatile int *p, int x) void __sh_and(volatile int *x, int v) { - if (__hwcap & CPU_HAS_LLSC) return __sh_and_llsc(x, v); + if (__sh_atomic_model == SH_A_LLSC) return __sh_and_llsc(x, v); + + if (__sh_atomic_model == SH_A_IMASK) { + unsigned sr = mask(); + int old = *x; + *x = old & v; + unmask(sr); + return; + } int dummy; __asm__ __volatile__( @@ -89,7 +137,15 @@ void __sh_and(volatile int *x, int v) void __sh_or(volatile int *x, int v) { - if (__hwcap & CPU_HAS_LLSC) return __sh_or_llsc(x, v); + if (__sh_atomic_model == SH_A_LLSC) return __sh_or_llsc(x, v); + + if (__sh_atomic_model == SH_A_IMASK) { + unsigned sr = mask(); + int old = *x; + *x = old | v; + unmask(sr); + return; + } int dummy; __asm__ __volatile__( diff --git a/arch/sh/src/sh_atomic.h b/arch/sh/src/sh_atomic.h new file mode 100644 index 00000000..054c2a32 --- /dev/null +++ b/arch/sh/src/sh_atomic.h @@ -0,0 +1,15 @@ +#ifndef _SH_ATOMIC_H +#define _SH_ATOMIC_H + +#define SH_A_GUSA 0 +#define SH_A_LLSC 1 +#define SH_A_CAS 2 +#if !defined(__SH3__) && !defined(__SH4__) +#define SH_A_IMASK 3 +#else +#define SH_A_IMASK -1LL /* unmatchable by unsigned int */ +#endif + +extern __attribute__((__visibility__("hidden"))) unsigned __sh_atomic_model; + +#endif diff --git a/src/thread/sh/__set_thread_area.s b/src/thread/sh/__set_thread_area.s index d9f11810..e69de29b 100644 --- a/src/thread/sh/__set_thread_area.s +++ b/src/thread/sh/__set_thread_area.s @@ -1,6 +0,0 @@ -.global __set_thread_area -.type __set_thread_area, @function -__set_thread_area: - ldc r4, gbr - rts - mov #0, r0