From: Andy Polyakov Date: Sun, 17 Jul 2011 17:40:29 +0000 (+0000) Subject: ARM assembler pack: add platform run-time detection. X-Git-Tag: OpenSSL-fips-2_0-rc1~255 X-Git-Url: https://git.librecmc.org/?a=commitdiff_plain;h=87873f4328274fc64b4089de6deabf52e5b2d481;p=oweals%2Fopenssl.git ARM assembler pack: add platform run-time detection. --- diff --git a/Configure b/Configure index 46d5cb259b..8ae48e75f3 100755 --- a/Configure +++ b/Configure @@ -135,7 +135,7 @@ my $alpha_asm="alphacpuid.o:bn_asm.o alpha-mont.o:::::sha1-alpha.o:::::::ghash-a my $mips32_asm=":bn-mips.o::aes_cbc.o aes-mips.o:::sha1-mips.o sha256-mips.o:::::::"; my $mips64_asm=":bn-mips.o mips-mont.o::aes_cbc.o aes-mips.o:::sha1-mips.o sha256-mips.o sha512-mips.o:::::::"; my $s390x_asm="s390xcap.o s390xcpuid.o:bn-s390x.o s390x-mont.o s390x-gf2m.o::aes_ctr.o aes-s390x.o:::sha1-s390x.o sha256-s390x.o sha512-s390x.o::rc4-s390x.o:::::ghash-s390x.o"; -my $armv4_asm=":bn_asm.o armv4-mont.o armv4-gf2m.o::aes_cbc.o aes-armv4.o:::sha1-armv4-large.o sha256-armv4.o sha512-armv4.o:::::::ghash-armv4.o:void"; +my $armv4_asm="armcap.o armv4cpuid.o:bn_asm.o armv4-mont.o armv4-gf2m.o::aes_cbc.o aes-armv4.o:::sha1-armv4-large.o sha256-armv4.o sha512-armv4.o:::::::ghash-armv4.o:void"; my $parisc11_asm="pariscid.o:bn_asm.o parisc-mont.o::aes_core.o aes_cbc.o aes-parisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o:::::ghash-parisc.o:32"; my $parisc20_asm="pariscid.o:pa-risc2W.o parisc-mont.o::aes_core.o aes_cbc.o aes-parisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o:::::ghash-parisc.o:64"; my $ppc32_asm="ppccpuid.o ppccap.o:bn-ppc.o ppc-mont.o ppc64-mont.o::aes_core.o aes_cbc.o aes-ppc.o:::sha1-ppc.o sha256-ppc.o:::::::"; diff --git a/TABLE b/TABLE index 2e405ec230..b7c764a9b9 100644 --- a/TABLE +++ b/TABLE @@ -1032,7 +1032,7 @@ $thread_cflag = -D_REENTRANT $sys_id = $lflags = -ldl $bn_ops = BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR -$cpuid_obj = +$cpuid_obj = armcap.o armv4cpuid.o $bn_obj = bn_asm.o armv4-mont.o armv4-gf2m.o $des_obj = $aes_obj = aes_cbc.o aes-armv4.o @@ -3688,7 +3688,7 @@ $thread_cflag = -D_REENTRANT $sys_id = $lflags = -ldl $bn_ops = BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR -$cpuid_obj = +$cpuid_obj = armcap.o armv4cpuid.o $bn_obj = bn_asm.o armv4-mont.o armv4-gf2m.o $des_obj = $aes_obj = aes_cbc.o aes-armv4.o diff --git a/config b/config index e82868721e..32bec89db2 100755 --- a/config +++ b/config @@ -630,6 +630,7 @@ case "$GUESSOS" in options="$options -DB_ENDIAN -mschedule=$CPUSCHEDULE -march=$CPUARCH" OUT="linux-generic32" ;; armv[1-3]*-*-linux2) OUT="linux-generic32" ;; + armv[7-9]*-*-linux2) OUT="linux-armv4"; options="$options -march=armv7-a" ;; arm*-*-linux2) OUT="linux-armv4" ;; sh*b-*-linux2) OUT="linux-generic32"; options="$options -DB_ENDIAN" ;; sh*-*-linux2) OUT="linux-generic32"; options="$options -DL_ENDIAN" ;; diff --git a/crypto/arm_arch.h b/crypto/arm_arch.h index 82401add19..15027ed3de 100644 --- a/crypto/arm_arch.h +++ b/crypto/arm_arch.h @@ -18,7 +18,7 @@ */ # if defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || \ defined(__ARM_ARCH_7R__)|| defined(__ARM_ARCH_7M__) || \ - defined(__ARM_ARCH_7EM) + defined(__ARM_ARCH_7EM__) # define __ARM_ARCH__ 7 # elif defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || \ defined(__ARM_ARCH_6K__)|| defined(__ARM_ARCH_6M__) || \ @@ -39,5 +39,12 @@ #include #endif +#if !__ASSEMBLER__ +extern unsigned int OPENSSL_armcap_P; + +#define ARMV7_NEON (1<<0) +#define ARMV7_TICK (1<<1) +#endif + #endif #endif diff --git a/crypto/armcap.c b/crypto/armcap.c new file mode 100644 index 0000000000..74c2c57295 --- /dev/null +++ b/crypto/armcap.c @@ -0,0 +1,77 @@ +#include +#include +#include +#include +#include +#include + +#include "arm_arch.h" + +unsigned int OPENSSL_armcap_P; + +static sigset_t all_masked; + +static sigjmp_buf ill_jmp; +static void ill_handler (int sig) { siglongjmp(ill_jmp,sig); } + +/* + * Following subroutines could have been inlined, but it's not all + * ARM compilers support inline assembler... + */ +void _armv7_neon_probe(void); +unsigned int _armv7_tick(void); + +unsigned int OPENSSL_rdtsc(void) + { + if (OPENSSL_armcap_P|ARMV7_TICK) + return _armv7_tick(); + else + return 0; + } + +void OPENSSL_cpuid_setup(void) + { + char *e; + struct sigaction ill_oact,ill_act; + sigset_t oset; + static int trigger=0; + + if (trigger) return; + trigger=1; + + if ((e=getenv("OPENSSL_armcap"))) + { + OPENSSL_armcap_P=strtoul(e,NULL,0); + return; + } + + sigfillset(&all_masked); + sigdelset(&all_masked,SIGILL); + sigdelset(&all_masked,SIGTRAP); + sigdelset(&all_masked,SIGFPE); + sigdelset(&all_masked,SIGBUS); + sigdelset(&all_masked,SIGSEGV); + + OPENSSL_armcap_P = 0; + + memset(&ill_act,0,sizeof(ill_act)); + ill_act.sa_handler = ill_handler; + ill_act.sa_mask = all_masked; + + sigprocmask(SIG_SETMASK,&ill_act.sa_mask,&oset); + sigaction(SIGILL,&ill_act,&ill_oact); + + if (sigsetjmp(ill_jmp,1) == 0) + { + _armv7_neon_probe(); + OPENSSL_armcap_P |= ARMV7_NEON; + } + if (sigsetjmp(ill_jmp,1) == 0) + { + _armv7_tick(); + OPENSSL_armcap_P |= ARMV7_TICK; + } + + sigaction (SIGILL,&ill_oact,NULL); + sigprocmask(SIG_SETMASK,&oset,NULL); + } diff --git a/crypto/armv4cpuid.S b/crypto/armv4cpuid.S new file mode 100644 index 0000000000..c9102ca2a5 --- /dev/null +++ b/crypto/armv4cpuid.S @@ -0,0 +1,154 @@ +#include "arm_arch.h" + +.text +.code 32 + +.align 5 +.global _armv7_neon_probe +.type _armv7_neon_probe,%function +_armv7_neon_probe: + .word 0xf26ee1fe @ vorr q15,q15,q15 + .word 0xe12fff1e @ bx lr +.size _armv7_neon_probe,.-_armv7_neon_probe + +.global _armv7_tick +.type _armv7_tick,%function +_armv7_tick: + mrc p15,0,r0,c9,c13,0 + .word 0xe12fff1e @ bx lr +.size _armv7_tick,.-_armv7_tick + +.global OPENSSL_atomic_add +.type OPENSSL_atomic_add,%function +OPENSSL_atomic_add: +#if __ARM_ARCH__>=6 +.Ladd: ldrex r2,[r0] + add r3,r2,r1 + strex r2,r3,[r0] + cmp r2,#0 + bne .Ladd + mov r0,r3 + .word 0xe12fff1e @ bx lr +#else + stmdb sp!,{r4-r6,lr} + ldr r2,.Lspinlock + adr r3,.Lspinlock + mov r4,r0 + mov r5,r1 + add r6,r3,r2 @ &spinlock + b .+8 +.Lspin: bl sched_yield + mov r0,#-1 + swp r0,r0,[r6] + cmp r0,#0 + bne .Lspin + + ldr r2,[r4] + add r2,r5 + str r2,[r4] + str r0,[r6] @ release spinlock + ldmia sp!,{r4-r6,lr} + tst lr,#1 + moveq pc,lr + .word 0xe12fff1e @ bx lr +#endif +.size OPENSSL_atomic_add,.-OPENSSL_atomic_add + +.global OPENSSL_cleanse +.type OPENSSL_cleanse,%function +OPENSSL_cleanse: + eor ip,ip,ip + cmp r1,#7 + subhs r1,#4 + bhs .Lot + cmp r1,#0 + beq .Lcleanse_done +.Little: + strb ip,[r0],#1 + subs r1,#1 + bhi .Little + b .Lcleanse_done + +.Lot: tst r0,#3 + beq .Laligned + strb ip,[r0],#1 + sub r1,#1 + b .Lot +.Laligned: + str ip,[r0],#4 + subs r1,#4 + bhs .Laligned + adds r1,#4 + bne .Little +.Lcleanse_done: + tst lr,#1 + moveq pc,lr + .word 0xe12fff1e @ bx lr +.size OPENSSL_cleanse,.-OPENSSL_cleanse + +.global OPENSSL_wipe_cpu +.type OPENSSL_wipe_cpu,%function +OPENSSL_wipe_cpu: + ldr r0,.LOPENSSL_armcap + adr r1,.LOPENSSL_armcap + ldr r0,[r1,r0] + eor r2,r2,r2 + eor r3,r3,r3 + eor ip,ip,ip + tst r0,#1 + beq .Lwipe_done + .word 0xf3000150 @ veor q0, q0, q0 + .word 0xf3022152 @ veor q1, q1, q1 + .word 0xf3044154 @ veor q2, q2, q2 + .word 0xf3066156 @ veor q3, q3, q3 + .word 0xf34001f0 @ veor q8, q8, q8 + .word 0xf34221f2 @ veor q9, q9, q9 + .word 0xf34441f4 @ veor q10, q10, q10 + .word 0xf34661f6 @ veor q11, q11, q11 + .word 0xf34881f8 @ veor q12, q12, q12 + .word 0xf34aa1fa @ veor q13, q13, q13 + .word 0xf34cc1fc @ veor q14, q14, q14 + .word 0xf34ee1fe @ veor q15, q15, q15 +.Lwipe_done: + mov r0,sp + tst lr,#1 + moveq pc,lr + .word 0xe12fff1e @ bx lr +.size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu + +.global OPENSSL_instrument_bus +.type OPENSSL_instrument_bus,%function +OPENSSL_instrument_bus: + eor r0,r0,r0 + tst lr,#1 + moveq pc,lr + .word 0xe12fff1e @ bx lr +.size OPENSSL_instrument_bus,.-OPENSSL_instrument_bus + +.global OPENSSL_instrument_bus2 +.type OPENSSL_instrument_bus2,%function +OPENSSL_instrument_bus2: + eor r0,r0,r0 + tst lr,#1 + moveq pc,lr + .word 0xe12fff1e @ bx lr +.size OPENSSL_instrument_bus2,.-OPENSSL_instrument_bus2 + +.align 5 +.LOPENSSL_armcap: +.word OPENSSL_armcap_P-.LOPENSSL_armcap +#if __ARM_ARCH__>=6 +.align 5 +#else +.Lspinlock: +.word atomic_add_spinlock-.Lspinlock +.align 5 + +.data +.align 2 +atomic_add_spinlock: +.word 0 +#endif + +.comm OPENSSL_armcap_P,4,4 +.hidden OPENSSL_armcap_P diff --git a/crypto/bn/asm/armv4-gf2m.pl b/crypto/bn/asm/armv4-gf2m.pl index 4fe9db9894..9928dae872 100644 --- a/crypto/bn/asm/armv4-gf2m.pl +++ b/crypto/bn/asm/armv4-gf2m.pl @@ -264,12 +264,12 @@ $code.=<<___; #if __ARM_ARCH__>=7 .align 5 .LOPENSSL_armcap: -.word OPENSSL_armcap-(.Lpic+8) +.word OPENSSL_armcap_P-(.Lpic+8) #endif -.asciz "GF2m Multiplication for ARMv4/NEON, CRYPTOGAMS by " +.asciz "GF(2^m) Multiplication for ARMv4/NEON, CRYPTOGAMS by " .align 5 -.comm OPENSSL_armcap,4,4 +.comm OPENSSL_armcap_P,4,4 ___ $code =~ s/\`([^\`]*)\`/eval $1/gem; diff --git a/crypto/modes/gcm128.c b/crypto/modes/gcm128.c index 8b9070a45b..2e42e71804 100644 --- a/crypto/modes/gcm128.c +++ b/crypto/modes/gcm128.c @@ -668,8 +668,6 @@ void gcm_ghash_4bit_x86(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len # if __ARM_ARCH__>=7 # define GHASH_ASM_ARM # define GCM_FUNCREF_4BIT -extern unsigned int OPENSSL_armcap; - void gcm_gmult_neon(u64 Xi[2],const u128 Htable[16]); void gcm_ghash_neon(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len); # endif @@ -715,7 +713,8 @@ void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx,void *key,block128_f block) #elif TABLE_BITS==4 # if defined(GHASH_ASM_X86_OR_64) # if !defined(GHASH_ASM_X86) || defined(OPENSSL_IA32_SSE2) - if (OPENSSL_ia32cap_P[1]&(1<<1)) { /* check PCLMULQDQ bit */ + if (OPENSSL_ia32cap_P[0]&(1<<24) && /* check FXSR bit */ + OPENSSL_ia32cap_P[1]&(1<<1) ) { /* check PCLMULQDQ bit */ gcm_init_clmul(ctx->Htable,ctx->H.u); ctx->gmult = gcm_gmult_clmul; ctx->ghash = gcm_ghash_clmul; @@ -736,7 +735,7 @@ void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx,void *key,block128_f block) ctx->ghash = gcm_ghash_4bit; # endif # elif defined(GHASH_ASM_ARM) - if (OPENSSL_armcap & 1) { + if (OPENSSL_armcap_P & ARMV7_NEON) { ctx->gmult = gcm_gmult_neon; ctx->ghash = gcm_ghash_neon; } else { diff --git a/crypto/sha/asm/sha512-armv4.pl b/crypto/sha/asm/sha512-armv4.pl index 8ba56e3d16..7faf37b147 100644 --- a/crypto/sha/asm/sha512-armv4.pl +++ b/crypto/sha/asm/sha512-armv4.pl @@ -221,7 +221,7 @@ WORD64(0x4cc5d4be,0xcb3e42b6, 0x597f299c,0xfc657e2a) WORD64(0x5fcb6fab,0x3ad6faec, 0x6c44198c,0x4a475817) .size K512,.-K512 .LOPENSSL_armcap: -.word OPENSSL_armcap-sha512_block_data_order +.word OPENSSL_armcap_P-sha512_block_data_order .skip 32-4 .global sha512_block_data_order @@ -231,7 +231,7 @@ sha512_block_data_order: add $len,$inp,$len,lsl#7 @ len to point at the end of inp #if __ARM_ARCH__>=7 ldr r12,.LOPENSSL_armcap - ldr r12,[r3,r12] @ OPENSSL_armcap + ldr r12,[r3,r12] @ OPENSSL_armcap_P tst r12,#1 bne .LNEON #endif @@ -573,7 +573,7 @@ $code.=<<___; .size sha512_block_data_order,.-sha512_block_data_order .asciz "SHA512 block transform for ARMv4/NEON, CRYPTOGAMS by " .align 2 -.comm OPENSSL_armcap,4,4 +.comm OPENSSL_armcap_P,4,4 ___ $code =~ s/\`([^\`]*)\`/eval $1/gem;