# throw in -D[BL]_ENDIAN, whichever appropriate...
"linux-generic32","gcc:-DTERMIO -O3 -fomit-frame-pointer -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${no_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
"linux-ppc", "gcc:-DB_ENDIAN -DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_RISC1 DES_UNROLL:${ppc32_asm}:linux32:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
-# It's believed that majority of ARM toolchains predefine appropriate -march.
-# If you compiler does not, do complement config command line with one!
+
+#######################################################################
+# Note that -march is not among compiler options in below linux-armv4
+# target line. Not specifying one is intentional to give you choice to:
+#
+# a) rely on your compiler default by not specifying one;
+# b) specify your target platform explicitly for optimal performance,
+# e.g. -march=armv6 or -march=armv7-a;
+# c) build "universal" binary that targets *range* of platforms by
+# specifying minimum and maximum supported architecture;
+#
+# As for c) option. It actually makes no sense to specify maximum to be
+# less than ARMv7, because it's the least requirement for run-time
+# switch between platform-specific code paths. And without run-time
+# switch performance would be equivalent to one for minimum. Secondly,
+# there are some natural limitations that you'd have to accept and
+# respect. Most notably you can *not* build "universal" binary for
+# big-endian platform. This is because ARMv7 processor always picks
+# instructions in little-endian order. Another similar limitation is
+# that -mthumb can't "cross" -march=armv6t2 boundary, because that's
+# where it became Thumb-2. Well, this limitation is a bit artificial,
+# because it's not really impossible, but it's deemed too tricky to
+# support. And of course you have to be sure that your binutils are
+# actually up to the task of handling maximum target platform. With all
+# this in mind here is an example of how to configure "universal" build:
+#
+# ./Configure linux-armv4 -march=armv6 -D__ARM_MAX_ARCH__=8
+#
"linux-armv4", "gcc:-DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${armv4_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
"linux-aarch64","gcc:-DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${aarch64_asm}:linux64:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
# Configure script adds minimally required -march for assembly support,
$code=<<___;
#include "arm_arch.h"
-#if __ARM_ARCH__>=7
+#if __ARM_MAX_ARCH__>=7
.text
___
-$code.=".arch armv8-a+crypto\n" if ($flavour =~ /64/);
-$code.=".fpu neon\n.code 32\n" if ($flavour !~ /64/);
+$code.=".arch armv8-a+crypto\n" if ($flavour =~ /64/);
+$code.=".arch armv7-a\n.fpu neon\n.code 32\n" if ($flavour !~ /64/);
+ #^^^^^^ this is done to simplify adoption by not depending
+ # on latest binutils.
# Assembler mnemonics are an eclectic mix of 32- and 64-bit syntax,
# NEON is mostly 32-bit mnemonics, integer - mostly 64. Goal is to
# define BSAES_ASM_EXTENDED_KEY
# define XTS_CHAIN_TWEAK
# define __ARM_ARCH__ __LINUX_ARM_ARCH__
+# define __ARM_MAX_ARCH__ __LINUX_ARM_ARCH__
#endif
#ifdef __thumb__
# define adrl adr
#endif
-#if __ARM_ARCH__>=7
+#if __ARM_MAX_ARCH__>=7
+.arch armv7-a
+.fpu neon
+
.text
.syntax unified @ ARMv7-capable assembler is expected to handle this
#ifdef __thumb2__
.code 32
#endif
-.fpu neon
-
.type _bsaes_decrypt8,%function
.align 4
_bsaes_decrypt8:
# endif
#endif
+#if !defined(__ARM_MAX_ARCH__)
+# define __ARM_MAX_ARCH__ __ARM_ARCH__
+#endif
+
+#if __ARM_MAX_ARCH__<__ARM_ARCH__
+# error "__ARM_MAX_ARCH__ can't be less than __ARM_ARCH__"
+#elif __ARM_MAX_ARCH__!=__ARM_ARCH__
+# if __ARM_ARCH__<7 && __ARM_MAX_ARCH__>=7 && defined(__ARMEB__)
+# error "can't build universal big-endian binary"
+# endif
+#endif
+
#if !__ASSEMBLER__
extern unsigned int OPENSSL_armcap_P;
#endif
#include "arm_arch.h"
-unsigned int OPENSSL_armcap_P;
+unsigned int OPENSSL_armcap_P=0;
+#if __ARM_MAX_ARCH__<7
+void OPENSSL_cpuid_setup(void) {}
+unsigned long OPENSSL_rdtsc(void) { return 0; }
+#else
static sigset_t all_masked;
static sigjmp_buf ill_jmp;
sigaction (SIGILL,&ill_oact,NULL);
sigprocmask(SIG_SETMASK,&oset,NULL);
}
+#endif
.text
.code 32
-@ Special note about using .byte directives to encode instructions.
-@ Initial reason for hand-coding instructions was to allow module to
-@ be compilable by legacy tool-chains. At later point it was pointed
-@ out that since ARMv7, instructions are always encoded in little-endian
-@ order, therefore one has to opt for endian-neutral presentation.
-@ Contemporary tool-chains offer .inst directive for this purpose,
-@ but not legacy ones. Therefore .byte. But there is an exception,
-@ namely ARMv7-R profile still allows for big-endian encoding even for
-@ instructions. This raises the question what if probe instructions
-@ appear executable to such processor operating in big-endian order?
-@ They have to be chosen in a way that avoids this problem. As failed
-@ NEON probe disables a number of other probes we have to ensure that
-@ only NEON probe instruction doesn't appear executable in big-endian
-@ order, therefore 'vorr q8,q8,q8', and not some other register. The
-@ only probe that is not bypassed on failed NEON probe is _armv7_tick,
-@ where you'll spot 'mov r0,r6' that serves this purpose. Basic idea is
-@ that if fetched in alternative byte oder instruction should crash to
-@ denote lack of probed capability...
-
-.align 5
-.global _armv7_neon_probe
-.type _armv7_neon_probe,%function
-_armv7_neon_probe:
- .byte 0xf0,0x01,0x60,0xf2 @ vorr q8,q8,q8
- .byte 0x1e,0xff,0x2f,0xe1 @ bx lr
-.size _armv7_neon_probe,.-_armv7_neon_probe
-
-.global _armv7_tick
-.type _armv7_tick,%function
-_armv7_tick:
- .byte 0x06,0x00,0xa0,0xe1 @ mov r0,r6
- .byte 0x1e,0x0f,0x51,0xec @ mrrc p15,1,r0,r1,c14 @ CNTVCT
- .byte 0x1e,0xff,0x2f,0xe1 @ bx lr
- nop
-.size _armv7_tick,.-_armv7_tick
-
-.global _armv8_aes_probe
-.type _armv8_aes_probe,%function
-_armv8_aes_probe:
- .byte 0x00,0x03,0xb0,0xf3 @ aese.8 q0,q0
- .byte 0x1e,0xff,0x2f,0xe1 @ bx lr
-.size _armv8_aes_probe,.-_armv8_aes_probe
-
-.global _armv8_sha1_probe
-.type _armv8_sha1_probe,%function
-_armv8_sha1_probe:
- .byte 0x40,0x0c,0x00,0xf2 @ sha1c.32 q0,q0,q0
- .byte 0x1e,0xff,0x2f,0xe1 @ bx lr
-.size _armv8_sha1_probe,.-_armv8_sha1_probe
-
-.global _armv8_sha256_probe
-.type _armv8_sha256_probe,%function
-_armv8_sha256_probe:
- .byte 0x40,0x0c,0x00,0xf3 @ sha256h.32 q0,q0,q0
- .byte 0x1e,0xff,0x2f,0xe1 @ bx lr
-.size _armv8_sha256_probe,.-_armv8_sha256_probe
-.global _armv8_pmull_probe
-.type _armv8_pmull_probe,%function
-_armv8_pmull_probe:
- .byte 0x00,0x0e,0xa0,0xf2 @ vmull.p64 q0,d0,d0
- .byte 0x1e,0xff,0x2f,0xe1 @ bx lr
-.size _armv8_pmull_probe,.-_armv8_pmull_probe
-
.align 5
.global OPENSSL_atomic_add
.type OPENSSL_atomic_add,%function
#endif
.size OPENSSL_cleanse,.-OPENSSL_cleanse
+#if __ARM_MAX_ARCH__>=7
+.arch armv7-a
+.fpu neon
+
+.align 5
+.global _armv7_neon_probe
+.type _armv7_neon_probe,%function
+_armv7_neon_probe:
+ vorr q0,q0,q0
+ bx lr
+.size _armv7_neon_probe,.-_armv7_neon_probe
+
+.global _armv7_tick
+.type _armv7_tick,%function
+_armv7_tick:
+ mrrc p15,1,r0,r1,c14 @ CNTVCT
+ bx lr
+.size _armv7_tick,.-_armv7_tick
+
+.global _armv8_aes_probe
+.type _armv8_aes_probe,%function
+_armv8_aes_probe:
+ .byte 0x00,0x03,0xb0,0xf3 @ aese.8 q0,q0
+ bx lr
+.size _armv8_aes_probe,.-_armv8_aes_probe
+
+.global _armv8_sha1_probe
+.type _armv8_sha1_probe,%function
+_armv8_sha1_probe:
+ .byte 0x40,0x0c,0x00,0xf2 @ sha1c.32 q0,q0,q0
+ bx lr
+.size _armv8_sha1_probe,.-_armv8_sha1_probe
+
+.global _armv8_sha256_probe
+.type _armv8_sha256_probe,%function
+_armv8_sha256_probe:
+ .byte 0x40,0x0c,0x00,0xf3 @ sha256h.32 q0,q0,q0
+ bx lr
+.size _armv8_sha256_probe,.-_armv8_sha256_probe
+.global _armv8_pmull_probe
+.type _armv8_pmull_probe,%function
+_armv8_pmull_probe:
+ .byte 0x00,0x0e,0xa0,0xf2 @ vmull.p64 q0,d0,d0
+ bx lr
+.size _armv8_pmull_probe,.-_armv8_pmull_probe
+#endif
+
.global OPENSSL_wipe_cpu
.type OPENSSL_wipe_cpu,%function
OPENSSL_wipe_cpu:
+#if __ARM_MAX_ARCH__>=7
ldr r0,.LOPENSSL_armcap
adr r1,.LOPENSSL_armcap
ldr r0,[r1,r0]
+#endif
eor r2,r2,r2
eor r3,r3,r3
eor ip,ip,ip
+#if __ARM_MAX_ARCH__>=7
tst r0,#1
beq .Lwipe_done
- .byte 0x50,0x01,0x00,0xf3 @ veor q0, q0, q0
- .byte 0x52,0x21,0x02,0xf3 @ veor q1, q1, q1
- .byte 0x54,0x41,0x04,0xf3 @ veor q2, q2, q2
- .byte 0x56,0x61,0x06,0xf3 @ veor q3, q3, q3
- .byte 0xf0,0x01,0x40,0xf3 @ veor q8, q8, q8
- .byte 0xf2,0x21,0x42,0xf3 @ veor q9, q9, q9
- .byte 0xf4,0x41,0x44,0xf3 @ veor q10, q10, q10
- .byte 0xf6,0x61,0x46,0xf3 @ veor q11, q11, q11
- .byte 0xf8,0x81,0x48,0xf3 @ veor q12, q12, q12
- .byte 0xfa,0xa1,0x4a,0xf3 @ veor q13, q13, q13
- .byte 0xfc,0xc1,0x4c,0xf3 @ veor q14, q14, q14
- .byte 0xfe,0xe1,0x4e,0xf3 @ veor q14, q14, q14
+ veor q0, q0, q0
+ veor q1, q1, q1
+ veor q2, q2, q2
+ veor q3, q3, q3
+ veor q8, q8, q8
+ veor q9, q9, q9
+ veor q10, q10, q10
+ veor q11, q11, q11
+ veor q12, q12, q12
+ veor q13, q13, q13
+ veor q14, q14, q14
+ veor q15, q15, q15
.Lwipe_done:
+#endif
mov r0,sp
#if __ARM_ARCH__>=5
bx lr
.size OPENSSL_instrument_bus2,.-OPENSSL_instrument_bus2
.align 5
+#if __ARM_MAX_ARCH__>=7
.LOPENSSL_armcap:
.word OPENSSL_armcap_P-.LOPENSSL_armcap
+#endif
#if __ARM_ARCH__>=6
.align 5
#else
.text
.code 32
-
-#if __ARM_ARCH__>=7
-.fpu neon
-#endif
___
################
# private interface to mul_1x1_ialu
# BN_ULONG a1,BN_ULONG a0,
# BN_ULONG b1,BN_ULONG b0); # r[3..0]=a1a0·b1b0
{
-my ($r,$t0,$t1,$t2,$t3)=map("q$_",(0..3,8..12));
-my ($a,$b,$k48,$k32,$k16)=map("d$_",(26..31));
-
$code.=<<___;
.global bn_GF2m_mul_2x2
.type bn_GF2m_mul_2x2,%function
.align 5
bn_GF2m_mul_2x2:
-#if __ARM_ARCH__>=7
+#if __ARM_MAX_ARCH__>=7
ldr r12,.LOPENSSL_armcap
.Lpic: ldr r12,[pc,r12]
tst r12,#1
- beq .Lialu
+ bne .LNEON
+#endif
+___
+$ret="r10"; # reassigned 1st argument
+$code.=<<___;
+ stmdb sp!,{r4-r10,lr}
+ mov $ret,r0 @ reassign 1st argument
+ mov $b,r3 @ $b=b1
+ ldr r3,[sp,#32] @ load b0
+ mov $mask,#7<<2
+ sub sp,sp,#32 @ allocate tab[8]
+
+ bl mul_1x1_ialu @ a1·b1
+ str $lo,[$ret,#8]
+ str $hi,[$ret,#12]
+
+ eor $b,$b,r3 @ flip b0 and b1
+ eor $a,$a,r2 @ flip a0 and a1
+ eor r3,r3,$b
+ eor r2,r2,$a
+ eor $b,$b,r3
+ eor $a,$a,r2
+ bl mul_1x1_ialu @ a0·b0
+ str $lo,[$ret]
+ str $hi,[$ret,#4]
+ eor $a,$a,r2
+ eor $b,$b,r3
+ bl mul_1x1_ialu @ (a1+a0)·(b1+b0)
+___
+@r=map("r$_",(6..9));
+$code.=<<___;
+ ldmia $ret,{@r[0]-@r[3]}
+ eor $lo,$lo,$hi
+ eor $hi,$hi,@r[1]
+ eor $lo,$lo,@r[0]
+ eor $hi,$hi,@r[2]
+ eor $lo,$lo,@r[3]
+ eor $hi,$hi,@r[3]
+ str $hi,[$ret,#8]
+ eor $lo,$lo,$hi
+ add sp,sp,#32 @ destroy tab[8]
+ str $lo,[$ret,#4]
+
+#if __ARM_ARCH__>=5
+ ldmia sp!,{r4-r10,pc}
+#else
+ ldmia sp!,{r4-r10,lr}
+ tst lr,#1
+ moveq pc,lr @ be binary compatible with V4, yet
+ bx lr @ interoperable with Thumb ISA:-)
+#endif
+___
+}
+{
+my ($r,$t0,$t1,$t2,$t3)=map("q$_",(0..3,8..12));
+my ($a,$b,$k48,$k32,$k16)=map("d$_",(26..31));
+
+$code.=<<___;
+#if __ARM_MAX_ARCH__>=7
+.arch armv7-a
+.fpu neon
+
+.align 5
+.LNEON:
ldr r12, [sp] @ 5th argument
vmov.32 $a, r2, r1
vmov.32 $b, r12, r3
vst1.32 {$r}, [r0]
ret @ bx lr
-.align 4
-.Lialu:
#endif
___
}
-$ret="r10"; # reassigned 1st argument
$code.=<<___;
- stmdb sp!,{r4-r10,lr}
- mov $ret,r0 @ reassign 1st argument
- mov $b,r3 @ $b=b1
- ldr r3,[sp,#32] @ load b0
- mov $mask,#7<<2
- sub sp,sp,#32 @ allocate tab[8]
-
- bl mul_1x1_ialu @ a1·b1
- str $lo,[$ret,#8]
- str $hi,[$ret,#12]
-
- eor $b,$b,r3 @ flip b0 and b1
- eor $a,$a,r2 @ flip a0 and a1
- eor r3,r3,$b
- eor r2,r2,$a
- eor $b,$b,r3
- eor $a,$a,r2
- bl mul_1x1_ialu @ a0·b0
- str $lo,[$ret]
- str $hi,[$ret,#4]
-
- eor $a,$a,r2
- eor $b,$b,r3
- bl mul_1x1_ialu @ (a1+a0)·(b1+b0)
-___
-@r=map("r$_",(6..9));
-$code.=<<___;
- ldmia $ret,{@r[0]-@r[3]}
- eor $lo,$lo,$hi
- eor $hi,$hi,@r[1]
- eor $lo,$lo,@r[0]
- eor $hi,$hi,@r[2]
- eor $lo,$lo,@r[3]
- eor $hi,$hi,@r[3]
- str $hi,[$ret,#8]
- eor $lo,$lo,$hi
- add sp,sp,#32 @ destroy tab[8]
- str $lo,[$ret,#4]
-
-#if __ARM_ARCH__>=5
- ldmia sp!,{r4-r10,pc}
-#else
- ldmia sp!,{r4-r10,lr}
- tst lr,#1
- moveq pc,lr @ be binary compatible with V4, yet
- bx lr @ interoperable with Thumb ISA:-)
-#endif
.size bn_GF2m_mul_2x2,.-bn_GF2m_mul_2x2
-#if __ARM_ARCH__>=7
+#if __ARM_MAX_ARCH__>=7
.align 5
.LOPENSSL_armcap:
.word OPENSSL_armcap_P-(.Lpic+8)
.asciz "GF(2^m) Multiplication for ARMv4/NEON, CRYPTOGAMS by <appro\@openssl.org>"
.align 5
+#if __ARM_MAX_ARCH__>=7
.comm OPENSSL_armcap_P,4,4
+#endif
___
foreach (split("\n",$code)) {
.text
.code 32
-#if __ARM_ARCH__>=7
+#if __ARM_MAX_ARCH__>=7
.align 5
.LOPENSSL_armcap:
.word OPENSSL_armcap_P-bn_mul_mont
bn_mul_mont:
ldr ip,[sp,#4] @ load num
stmdb sp!,{r0,r2} @ sp points at argument block
-#if __ARM_ARCH__>=7
+#if __ARM_MAX_ARCH__>=7
tst ip,#7
bne .Lialu
adr r0,bn_mul_mont
my ($tinptr,$toutptr,$inner,$outer)=map("r$_",(6..9));
$code.=<<___;
-#if __ARM_ARCH__>=7
+#if __ARM_MAX_ARCH__>=7
+.arch armv7-a
.fpu neon
.type bn_mul8x_mont_neon,%function
$code.=<<___;
.asciz "Montgomery multiplication for ARMv4/NEON, CRYPTOGAMS by <appro\@openssl.org>"
.align 2
-#if __ARM_ARCH__>=7
+#if __ARM_MAX_ARCH__>=7
.comm OPENSSL_armcap_P,4,4
#endif
___
#if defined(OPENSSL_CPUID_OBJ) && (defined(__arm__) || defined(__arm) || defined(__aarch64__))
#include "arm_arch.h"
-#if __ARM_ARCH__>=7
+#if __ARM_MAX_ARCH__>=7
# if defined(BSAES_ASM)
# define BSAES_CAPABLE (OPENSSL_armcap_P & ARMV7_NEON)
# endif
}
$code.=<<___;
-#if __ARM_ARCH__>=7
+#if __ARM_MAX_ARCH__>=7
+.arch armv7-a
.fpu neon
.global gcm_init_neon
# endif
# elif defined(__arm__) || defined(__arm) || defined(__aarch64__)
# include "arm_arch.h"
-# if __ARM_ARCH__>=7
+# if __ARM_MAX_ARCH__>=7
# define GHASH_ASM_ARM
# define GCM_FUNCREF_4BIT
# define PMULL_CAPABLE (OPENSSL_armcap_P & ARMV8_PMULL)
.align 5
sha1_block_data_order:
-#if __ARM_ARCH__>=7
+#if __ARM_MAX_ARCH__>=7
sub r3,pc,#8 @ sha1_block_data_order
ldr r12,.LOPENSSL_armcap
ldr r12,[r3,r12] @ OPENSSL_armcap_P
.LK_20_39: .word 0x6ed9eba1
.LK_40_59: .word 0x8f1bbcdc
.LK_60_79: .word 0xca62c1d6
+#if __ARM_MAX_ARCH__>=7
.LOPENSSL_armcap:
.word OPENSSL_armcap_P-sha1_block_data_order
+#endif
.asciz "SHA1 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by <appro\@openssl.org>"
.align 5
___
}
$code.=<<___;
-#if __ARM_ARCH__>=7
+#if __ARM_MAX_ARCH__>=7
+.arch armv7-a
.fpu neon
.type sha1_block_data_order_neon,%function
my ($W0,$W1,$ABCD_SAVE)=map("q$_",(12..14));
$code.=<<___;
-#if __ARM_ARCH__>=7
+#if __ARM_MAX_ARCH__>=7
.type sha1_block_data_order_armv8,%function
.align 5
sha1_block_data_order_armv8:
___
}}}
$code.=<<___;
+#if __ARM_MAX_ARCH__>=7
.comm OPENSSL_armcap_P,4,4
+#endif
___
{ my %opcode = (
.word 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
.size K256,.-K256
.word 0 @ terminator
+#if __ARM_MAX_ARCH__>=7
.LOPENSSL_armcap:
.word OPENSSL_armcap_P-sha256_block_data_order
+#endif
.align 5
.global sha256_block_data_order
sha256_block_data_order:
sub r3,pc,#8 @ sha256_block_data_order
add $len,$inp,$len,lsl#6 @ len to point at the end of inp
-#if __ARM_ARCH__>=7
+#if __ARM_MAX_ARCH__>=7
ldr r12,.LOPENSSL_armcap
ldr r12,[r3,r12] @ OPENSSL_armcap_P
tst r12,#ARMV8_SHA256
}
$code.=<<___;
-#if __ARM_ARCH__>=7
+#if __ARM_MAX_ARCH__>=7
+.arch armv7-a
.fpu neon
.type sha256_block_data_order_neon,%function
my $Ktbl="r3";
$code.=<<___;
-#if __ARM_ARCH__>=7
+#if __ARM_MAX_ARCH__>=7
.type sha256_block_data_order_armv8,%function
.align 5
sha256_block_data_order_armv8:
$code.=<<___;
.asciz "SHA256 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by <appro\@openssl.org>"
.align 2
+#if __ARM_MARCH_ARCH__>=7
.comm OPENSSL_armcap_P,4,4
+#endif
___
{ my %opcode = (
WORD64(0x4cc5d4be,0xcb3e42b6, 0x597f299c,0xfc657e2a)
WORD64(0x5fcb6fab,0x3ad6faec, 0x6c44198c,0x4a475817)
.size K512,.-K512
+#if __ARM_MAX_ARCH__>=7
.LOPENSSL_armcap:
.word OPENSSL_armcap_P-sha512_block_data_order
.skip 32-4
+#else
+.skip 32
+#endif
.global sha512_block_data_order
.type sha512_block_data_order,%function
sha512_block_data_order:
sub r3,pc,#8 @ sha512_block_data_order
add $len,$inp,$len,lsl#7 @ len to point at the end of inp
-#if __ARM_ARCH__>=7
+#if __ARM_MAX_ARCH__>=7
ldr r12,.LOPENSSL_armcap
ldr r12,[r3,r12] @ OPENSSL_armcap_P
tst r12,#1
}
$code.=<<___;
-#if __ARM_ARCH__>=7
+#if __ARM_MAX_ARCH__>=7
+.arch armv7-a
.fpu neon
.align 4
.size sha512_block_data_order,.-sha512_block_data_order
.asciz "SHA512 block transform for ARMv4/NEON, CRYPTOGAMS by <appro\@openssl.org>"
.align 2
+#if __ARM_MAX_ARCH__>=7
.comm OPENSSL_armcap_P,4,4
+#endif
___
$code =~ s/\`([^\`]*)\`/eval $1/gem;