Remove inconsistency in ARM support.

author Andy Polyakov <appro@openssl.org>

Fri, 7 Nov 2014 21:48:22 +0000 (22:48 +0100)

committer Andy Polyakov <appro@openssl.org>

Sun, 4 Jan 2015 22:45:08 +0000 (23:45 +0100)
author Andy Polyakov <appro@openssl.org>
Fri, 7 Nov 2014 21:48:22 +0000 (22:48 +0100)
committer Andy Polyakov <appro@openssl.org>
Sun, 4 Jan 2015 22:45:08 +0000 (23:45 +0100)
diff --git a/Configure b/Configure

index 5c4a4600d44f3b1eeb2a11dba4726b53d224eed7..6246822eb000810c275379cbe994bdb1d9f75312 100755 (executable)
--- a/Configure
+++ b/Configure
@@ -350,8 +350,34 @@ my %table=(
  # throw in -D[BL]_ENDIAN, whichever appropriate...
  "linux-generic32","gcc:-DTERMIO -O3 -fomit-frame-pointer -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${no_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
  "linux-ppc",   "gcc:-DB_ENDIAN -DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_RISC1 DES_UNROLL:${ppc32_asm}:linux32:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
-# It's believed that majority of ARM toolchains predefine appropriate -march.
-# If you compiler does not, do complement config command line with one!
+
+#######################################################################
+# Note that -march is not among compiler options in below linux-armv4
+# target line. Not specifying one is intentional to give you choice to:
+#
+# a) rely on your compiler default by not specifying one;
+# b) specify your target platform explicitly for optimal performance,
+#    e.g. -march=armv6 or -march=armv7-a;
+# c) build "universal" binary that targets *range* of platforms by
+#    specifying minimum and maximum supported architecture;
+#
+# As for c) option. It actually makes no sense to specify maximum to be
+# less than ARMv7, because it's the least requirement for run-time
+# switch between platform-specific code paths. And without run-time
+# switch performance would be equivalent to one for minimum. Secondly,
+# there are some natural limitations that you'd have to accept and
+# respect. Most notably you can *not* build "universal" binary for
+# big-endian platform. This is because ARMv7 processor always picks
+# instructions in little-endian order. Another similar limitation is
+# that -mthumb can't "cross" -march=armv6t2 boundary, because that's
+# where it became Thumb-2. Well, this limitation is a bit artificial,
+# because it's not really impossible, but it's deemed too tricky to
+# support. And of course you have to be sure that your binutils are
+# actually up to the task of handling maximum target platform. With all
+# this in mind here is an example of how to configure "universal" build:
+#
+#       ./Configure linux-armv4 -march=armv6 -D__ARM_MAX_ARCH__=8
+#
  "linux-armv4", "gcc:-DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${armv4_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
  "linux-aarch64","gcc:-DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${aarch64_asm}:linux64:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
  # Configure script adds minimally required -march for assembly support,
diff --git a/crypto/aes/asm/aesv8-armx.pl b/crypto/aes/asm/aesv8-armx.pl

index 923c7f62d5551d1e49079784b84f8db6e90688a4..1e93f86852b4033662d1c7c4d36ae3916341aea2 100755 (executable)
--- a/crypto/aes/asm/aesv8-armx.pl
+++ b/crypto/aes/asm/aesv8-armx.pl
@@ -35,11 +35,13 @@ $prefix="aes_v8";
  $code=<<___;
  #include "arm_arch.h"
  
-#if __ARM_ARCH__>=7
+#if __ARM_MAX_ARCH__>=7
  .text
  ___
-$code.=".arch  armv8-a+crypto\n"       if ($flavour =~ /64/);
-$code.=".fpu   neon\n.code     32\n"   if ($flavour !~ /64/);
+$code.=".arch  armv8-a+crypto\n"                       if ($flavour =~ /64/);
+$code.=".arch  armv7-a\n.fpu   neon\n.code     32\n"   if ($flavour !~ /64/);
+               #^^^^^^ this is done to simplify adoption by not depending
+               #       on latest binutils.
  
  # Assembler mnemonics are an eclectic mix of 32- and 64-bit syntax,
  # NEON is mostly 32-bit mnemonics, integer - mostly 64. Goal is to
diff --git a/crypto/aes/asm/bsaes-armv7.pl b/crypto/aes/asm/bsaes-armv7.pl

index f3d96d9325737fba399dc5e6613d223e03fcb7a1..fcc81d1a493374b3592c253ddd51cce3dd2fe888 100644 (file)
--- a/crypto/aes/asm/bsaes-armv7.pl
+++ b/crypto/aes/asm/bsaes-armv7.pl
@@ -702,13 +702,17 @@ $code.=<<___;
  # define BSAES_ASM_EXTENDED_KEY
  # define XTS_CHAIN_TWEAK
  # define __ARM_ARCH__ __LINUX_ARM_ARCH__
+# define __ARM_MAX_ARCH__ __LINUX_ARM_ARCH__
  #endif
  
  #ifdef __thumb__
  # define adrl adr
  #endif
  
-#if __ARM_ARCH__>=7
+#if __ARM_MAX_ARCH__>=7
+.arch  armv7-a
+.fpu   neon
+
  .text
  .syntax        unified         @ ARMv7-capable assembler is expected to handle this
  #ifdef __thumb2__
@@ -717,8 +721,6 @@ $code.=<<___;
  .code   32
  #endif
  
-.fpu   neon
-
  .type  _bsaes_decrypt8,%function
  .align 4
  _bsaes_decrypt8:
diff --git a/crypto/arm_arch.h b/crypto/arm_arch.h

index d406c8c2295f5f52b334941637c340d185e52ba8..9a125d878b1fe1245b1f1e3f1641c6d7467fd053 100644 (file)
--- a/crypto/arm_arch.h
+++ b/crypto/arm_arch.h
@@ -48,6 +48,18 @@
  # endif
  #endif
  
+#if !defined(__ARM_MAX_ARCH__)
+# define __ARM_MAX_ARCH__ __ARM_ARCH__
+#endif
+
+#if __ARM_MAX_ARCH__<__ARM_ARCH__
+# error "__ARM_MAX_ARCH__ can't be less than __ARM_ARCH__"
+#elif __ARM_MAX_ARCH__!=__ARM_ARCH__
+# if __ARM_ARCH__<7 && __ARM_MAX_ARCH__>=7 && defined(__ARMEB__)
+#  error "can't build universal big-endian binary"
+# endif
+#endif
+
  #if !__ASSEMBLER__
  extern unsigned int OPENSSL_armcap_P;
  #endif
diff --git a/crypto/armcap.c b/crypto/armcap.c

index 7e46d07a3232c06cb359fbc64110c21680c80e84..24f7a0829a697eee2e8fe84bb7ccd3a5ee78ffe7 100644 (file)
--- a/crypto/armcap.c
+++ b/crypto/armcap.c
@@ -7,8 +7,12 @@
  
  #include "arm_arch.h"
  
-unsigned int OPENSSL_armcap_P;
+unsigned int OPENSSL_armcap_P=0;
  
+#if __ARM_MAX_ARCH__<7
+void OPENSSL_cpuid_setup(void) {}
+unsigned long OPENSSL_rdtsc(void) { return 0; }
+#else
  static sigset_t all_masked;
  
  static sigjmp_buf ill_jmp;
@@ -155,3 +159,4 @@ void OPENSSL_cpuid_setup(void)
         sigaction (SIGILL,&ill_oact,NULL);
         sigprocmask(SIG_SETMASK,&oset,NULL);
         }
+#endif
diff --git a/crypto/armv4cpuid.S b/crypto/armv4cpuid.S

index 0059311303ac9f1dedebc5e3a7c920822eb1b8c7..65010ae4fe065c41b0b4bd7b570b65cc597cb7f4 100644 (file)
--- a/crypto/armv4cpuid.S
+++ b/crypto/armv4cpuid.S
@@ -3,69 +3,6 @@
  .text
  .code  32
  
-@ Special note about using .byte directives to encode instructions.
-@ Initial reason for hand-coding instructions was to allow module to
-@ be compilable by legacy tool-chains. At later point it was pointed
-@ out that since ARMv7, instructions are always encoded in little-endian
-@ order, therefore one has to opt for endian-neutral presentation.
-@ Contemporary tool-chains offer .inst directive for this purpose,
-@ but not legacy ones. Therefore .byte. But there is an exception,
-@ namely ARMv7-R profile still allows for big-endian encoding even for
-@ instructions. This raises the question what if probe instructions
-@ appear executable to such processor operating in big-endian order?
-@ They have to be chosen in a way that avoids this problem. As failed
-@ NEON probe disables a number of other probes we have to ensure that
-@ only NEON probe instruction doesn't appear executable in big-endian
-@ order, therefore 'vorr q8,q8,q8', and not some other register. The
-@ only probe that is not bypassed on failed NEON probe is _armv7_tick,
-@ where you'll spot 'mov r0,r6' that serves this purpose. Basic idea is
-@ that if fetched in alternative byte oder instruction should crash to
-@ denote lack of probed capability...
-
-.align 5
-.global        _armv7_neon_probe
-.type  _armv7_neon_probe,%function
-_armv7_neon_probe:
-       .byte   0xf0,0x01,0x60,0xf2     @ vorr  q8,q8,q8
-       .byte   0x1e,0xff,0x2f,0xe1     @ bx    lr
-.size  _armv7_neon_probe,.-_armv7_neon_probe
-
-.global        _armv7_tick
-.type  _armv7_tick,%function
-_armv7_tick:
-       .byte   0x06,0x00,0xa0,0xe1     @ mov   r0,r6
-       .byte   0x1e,0x0f,0x51,0xec     @ mrrc  p15,1,r0,r1,c14 @ CNTVCT
-       .byte   0x1e,0xff,0x2f,0xe1     @ bx    lr
-       nop
-.size  _armv7_tick,.-_armv7_tick
-
-.global        _armv8_aes_probe
-.type  _armv8_aes_probe,%function
-_armv8_aes_probe:
-       .byte   0x00,0x03,0xb0,0xf3     @ aese.8        q0,q0
-       .byte   0x1e,0xff,0x2f,0xe1     @ bx    lr
-.size  _armv8_aes_probe,.-_armv8_aes_probe
-
-.global        _armv8_sha1_probe
-.type  _armv8_sha1_probe,%function
-_armv8_sha1_probe:
-       .byte   0x40,0x0c,0x00,0xf2     @ sha1c.32      q0,q0,q0
-       .byte   0x1e,0xff,0x2f,0xe1     @ bx    lr
-.size  _armv8_sha1_probe,.-_armv8_sha1_probe
-
-.global        _armv8_sha256_probe
-.type  _armv8_sha256_probe,%function
-_armv8_sha256_probe:
-       .byte   0x40,0x0c,0x00,0xf3     @ sha256h.32    q0,q0,q0
-       .byte   0x1e,0xff,0x2f,0xe1     @ bx lr
-.size  _armv8_sha256_probe,.-_armv8_sha256_probe
-.global        _armv8_pmull_probe
-.type  _armv8_pmull_probe,%function
-_armv8_pmull_probe:
-       .byte   0x00,0x0e,0xa0,0xf2     @ vmull.p64     q0,d0,d0
-       .byte   0x1e,0xff,0x2f,0xe1     @ bx    lr
-.size  _armv8_pmull_probe,.-_armv8_pmull_probe
-
  .align 5
  .global        OPENSSL_atomic_add
  .type  OPENSSL_atomic_add,%function
@@ -139,30 +76,81 @@ OPENSSL_cleanse:
  #endif
  .size  OPENSSL_cleanse,.-OPENSSL_cleanse
  
+#if __ARM_MAX_ARCH__>=7
+.arch  armv7-a
+.fpu   neon
+
+.align 5
+.global        _armv7_neon_probe
+.type  _armv7_neon_probe,%function
+_armv7_neon_probe:
+       vorr    q0,q0,q0
+       bx      lr
+.size  _armv7_neon_probe,.-_armv7_neon_probe
+
+.global        _armv7_tick
+.type  _armv7_tick,%function
+_armv7_tick:
+       mrrc    p15,1,r0,r1,c14         @ CNTVCT
+       bx      lr
+.size  _armv7_tick,.-_armv7_tick
+
+.global        _armv8_aes_probe
+.type  _armv8_aes_probe,%function
+_armv8_aes_probe:
+       .byte   0x00,0x03,0xb0,0xf3     @ aese.8        q0,q0
+       bx      lr
+.size  _armv8_aes_probe,.-_armv8_aes_probe
+
+.global        _armv8_sha1_probe
+.type  _armv8_sha1_probe,%function
+_armv8_sha1_probe:
+       .byte   0x40,0x0c,0x00,0xf2     @ sha1c.32      q0,q0,q0
+       bx      lr
+.size  _armv8_sha1_probe,.-_armv8_sha1_probe
+
+.global        _armv8_sha256_probe
+.type  _armv8_sha256_probe,%function
+_armv8_sha256_probe:
+       .byte   0x40,0x0c,0x00,0xf3     @ sha256h.32    q0,q0,q0
+       bx      lr
+.size  _armv8_sha256_probe,.-_armv8_sha256_probe
+.global        _armv8_pmull_probe
+.type  _armv8_pmull_probe,%function
+_armv8_pmull_probe:
+       .byte   0x00,0x0e,0xa0,0xf2     @ vmull.p64     q0,d0,d0
+       bx      lr
+.size  _armv8_pmull_probe,.-_armv8_pmull_probe
+#endif
+
  .global        OPENSSL_wipe_cpu
  .type  OPENSSL_wipe_cpu,%function
  OPENSSL_wipe_cpu:
+#if __ARM_MAX_ARCH__>=7
         ldr     r0,.LOPENSSL_armcap
         adr     r1,.LOPENSSL_armcap
         ldr     r0,[r1,r0]
+#endif
         eor     r2,r2,r2
         eor     r3,r3,r3
         eor     ip,ip,ip
+#if __ARM_MAX_ARCH__>=7
         tst     r0,#1
         beq     .Lwipe_done
-       .byte   0x50,0x01,0x00,0xf3     @ veor  q0, q0, q0
-       .byte   0x52,0x21,0x02,0xf3     @ veor  q1, q1, q1
-       .byte   0x54,0x41,0x04,0xf3     @ veor  q2, q2, q2
-       .byte   0x56,0x61,0x06,0xf3     @ veor  q3, q3, q3
-       .byte   0xf0,0x01,0x40,0xf3     @ veor  q8, q8, q8
-       .byte   0xf2,0x21,0x42,0xf3     @ veor  q9, q9, q9
-       .byte   0xf4,0x41,0x44,0xf3     @ veor  q10, q10, q10
-       .byte   0xf6,0x61,0x46,0xf3     @ veor  q11, q11, q11
-       .byte   0xf8,0x81,0x48,0xf3     @ veor  q12, q12, q12
-       .byte   0xfa,0xa1,0x4a,0xf3     @ veor  q13, q13, q13
-       .byte   0xfc,0xc1,0x4c,0xf3     @ veor  q14, q14, q14
-       .byte   0xfe,0xe1,0x4e,0xf3     @ veor  q14, q14, q14
+       veor    q0, q0, q0
+       veor    q1, q1, q1
+       veor    q2, q2, q2
+       veor    q3, q3, q3
+       veor    q8, q8, q8
+       veor    q9, q9, q9
+       veor    q10, q10, q10
+       veor    q11, q11, q11
+       veor    q12, q12, q12
+       veor    q13, q13, q13
+       veor    q14, q14, q14
+       veor    q15, q15, q15
  .Lwipe_done:
+#endif
         mov     r0,sp
  #if __ARM_ARCH__>=5
         bx      lr
@@ -200,8 +188,10 @@ OPENSSL_instrument_bus2:
  .size  OPENSSL_instrument_bus2,.-OPENSSL_instrument_bus2
  
  .align 5
+#if __ARM_MAX_ARCH__>=7
  .LOPENSSL_armcap:
  .word  OPENSSL_armcap_P-.LOPENSSL_armcap
+#endif
  #if __ARM_ARCH__>=6
  .align 5
  #else
diff --git a/crypto/bn/asm/armv4-gf2m.pl b/crypto/bn/asm/armv4-gf2m.pl

index b781afbf89bebfad9a1d1c29c5491effffc66672..8f529c95cf0509d44cf0cce1730f4b235f6a643c 100644 (file)
--- a/crypto/bn/asm/armv4-gf2m.pl
+++ b/crypto/bn/asm/armv4-gf2m.pl
@@ -40,10 +40,6 @@ $code=<<___;
  
  .text
  .code  32
-
-#if __ARM_ARCH__>=7
-.fpu   neon
-#endif
  ___
  ################
  # private interface to mul_1x1_ialu
@@ -142,20 +138,80 @@ ___
  #      BN_ULONG a1,BN_ULONG a0,
  #      BN_ULONG b1,BN_ULONG b0);       # r[3..0]=a1a0·b1b0
  {
-my ($r,$t0,$t1,$t2,$t3)=map("q$_",(0..3,8..12));
-my ($a,$b,$k48,$k32,$k16)=map("d$_",(26..31));
-
  $code.=<<___;
  .global        bn_GF2m_mul_2x2
  .type  bn_GF2m_mul_2x2,%function
  .align 5
  bn_GF2m_mul_2x2:
-#if __ARM_ARCH__>=7
+#if __ARM_MAX_ARCH__>=7
         ldr     r12,.LOPENSSL_armcap
  .Lpic: ldr     r12,[pc,r12]
         tst     r12,#1
-       beq     .Lialu
+       bne     .LNEON
+#endif
+___
+$ret="r10";    # reassigned 1st argument
+$code.=<<___;
+       stmdb   sp!,{r4-r10,lr}
+       mov     $ret,r0                 @ reassign 1st argument
+       mov     $b,r3                   @ $b=b1
+       ldr     r3,[sp,#32]             @ load b0
+       mov     $mask,#7<<2
+       sub     sp,sp,#32               @ allocate tab[8]
+
+       bl      mul_1x1_ialu            @ a1·b1
+       str     $lo,[$ret,#8]
+       str     $hi,[$ret,#12]
+
+       eor     $b,$b,r3                @ flip b0 and b1
+        eor    $a,$a,r2                @ flip a0 and a1
+       eor     r3,r3,$b
+        eor    r2,r2,$a
+       eor     $b,$b,r3
+        eor    $a,$a,r2
+       bl      mul_1x1_ialu            @ a0·b0
+       str     $lo,[$ret]
+       str     $hi,[$ret,#4]
  
+       eor     $a,$a,r2
+       eor     $b,$b,r3
+       bl      mul_1x1_ialu            @ (a1+a0)·(b1+b0)
+___
+@r=map("r$_",(6..9));
+$code.=<<___;
+       ldmia   $ret,{@r[0]-@r[3]}
+       eor     $lo,$lo,$hi
+       eor     $hi,$hi,@r[1]
+       eor     $lo,$lo,@r[0]
+       eor     $hi,$hi,@r[2]
+       eor     $lo,$lo,@r[3]
+       eor     $hi,$hi,@r[3]
+       str     $hi,[$ret,#8]
+       eor     $lo,$lo,$hi
+       add     sp,sp,#32               @ destroy tab[8]
+       str     $lo,[$ret,#4]
+
+#if __ARM_ARCH__>=5
+       ldmia   sp!,{r4-r10,pc}
+#else
+       ldmia   sp!,{r4-r10,lr}
+       tst     lr,#1
+       moveq   pc,lr                   @ be binary compatible with V4, yet
+       bx      lr                      @ interoperable with Thumb ISA:-)
+#endif
+___
+}
+{
+my ($r,$t0,$t1,$t2,$t3)=map("q$_",(0..3,8..12));
+my ($a,$b,$k48,$k32,$k16)=map("d$_",(26..31));
+
+$code.=<<___;
+#if __ARM_MAX_ARCH__>=7
+.arch  armv7-a
+.fpu   neon
+
+.align 5
+.LNEON:
         ldr             r12, [sp]               @ 5th argument
         vmov.32         $a, r2, r1
         vmov.32         $b, r12, r3
@@ -203,62 +259,12 @@ bn_GF2m_mul_2x2:
  
         vst1.32         {$r}, [r0]
         ret             @ bx lr
-.align 4
-.Lialu:
  #endif
  ___
  }
-$ret="r10";    # reassigned 1st argument
  $code.=<<___;
-       stmdb   sp!,{r4-r10,lr}
-       mov     $ret,r0                 @ reassign 1st argument
-       mov     $b,r3                   @ $b=b1
-       ldr     r3,[sp,#32]             @ load b0
-       mov     $mask,#7<<2
-       sub     sp,sp,#32               @ allocate tab[8]
-
-       bl      mul_1x1_ialu            @ a1·b1
-       str     $lo,[$ret,#8]
-       str     $hi,[$ret,#12]
-
-       eor     $b,$b,r3                @ flip b0 and b1
-        eor    $a,$a,r2                @ flip a0 and a1
-       eor     r3,r3,$b
-        eor    r2,r2,$a
-       eor     $b,$b,r3
-        eor    $a,$a,r2
-       bl      mul_1x1_ialu            @ a0·b0
-       str     $lo,[$ret]
-       str     $hi,[$ret,#4]
-
-       eor     $a,$a,r2
-       eor     $b,$b,r3
-       bl      mul_1x1_ialu            @ (a1+a0)·(b1+b0)
-___
-@r=map("r$_",(6..9));
-$code.=<<___;
-       ldmia   $ret,{@r[0]-@r[3]}
-       eor     $lo,$lo,$hi
-       eor     $hi,$hi,@r[1]
-       eor     $lo,$lo,@r[0]
-       eor     $hi,$hi,@r[2]
-       eor     $lo,$lo,@r[3]
-       eor     $hi,$hi,@r[3]
-       str     $hi,[$ret,#8]
-       eor     $lo,$lo,$hi
-       add     sp,sp,#32               @ destroy tab[8]
-       str     $lo,[$ret,#4]
-
-#if __ARM_ARCH__>=5
-       ldmia   sp!,{r4-r10,pc}
-#else
-       ldmia   sp!,{r4-r10,lr}
-       tst     lr,#1
-       moveq   pc,lr                   @ be binary compatible with V4, yet
-       bx      lr                      @ interoperable with Thumb ISA:-)
-#endif
  .size  bn_GF2m_mul_2x2,.-bn_GF2m_mul_2x2
-#if __ARM_ARCH__>=7
+#if __ARM_MAX_ARCH__>=7
  .align 5
  .LOPENSSL_armcap:
  .word  OPENSSL_armcap_P-(.Lpic+8)
@@ -266,7 +272,9 @@ $code.=<<___;
  .asciz "GF(2^m) Multiplication for ARMv4/NEON, CRYPTOGAMS by <appro\@openssl.org>"
  .align 5
  
+#if __ARM_MAX_ARCH__>=7
  .comm  OPENSSL_armcap_P,4,4
+#endif
  ___
  
  foreach (split("\n",$code)) {
diff --git a/crypto/bn/asm/armv4-mont.pl b/crypto/bn/asm/armv4-mont.pl

index 72bad8e3083f984f33aac665af93fe61a1883429..1d330e9f8aa3111907aa2cca00c4a948c7501878 100644 (file)
--- a/crypto/bn/asm/armv4-mont.pl
+++ b/crypto/bn/asm/armv4-mont.pl
@@ -72,7 +72,7 @@ $code=<<___;
  .text
  .code  32
  
-#if __ARM_ARCH__>=7
+#if __ARM_MAX_ARCH__>=7
  .align 5
  .LOPENSSL_armcap:
  .word  OPENSSL_armcap_P-bn_mul_mont
@@ -85,7 +85,7 @@ $code=<<___;
  bn_mul_mont:
         ldr     ip,[sp,#4]              @ load num
         stmdb   sp!,{r0,r2}             @ sp points at argument block
-#if __ARM_ARCH__>=7
+#if __ARM_MAX_ARCH__>=7
         tst     ip,#7
         bne     .Lialu
         adr     r0,bn_mul_mont
@@ -256,7 +256,8 @@ my ($rptr,$aptr,$bptr,$nptr,$n0,$num)=map("r$_",(0..5));
  my ($tinptr,$toutptr,$inner,$outer)=map("r$_",(6..9));
  
  $code.=<<___;
-#if __ARM_ARCH__>=7
+#if __ARM_MAX_ARCH__>=7
+.arch  armv7-a
  .fpu   neon
  
  .type  bn_mul8x_mont_neon,%function
@@ -663,7 +664,7 @@ ___
  $code.=<<___;
  .asciz "Montgomery multiplication for ARMv4/NEON, CRYPTOGAMS by <appro\@openssl.org>"
  .align 2
-#if __ARM_ARCH__>=7
+#if __ARM_MAX_ARCH__>=7
  .comm  OPENSSL_armcap_P,4,4
  #endif
  ___
diff --git a/crypto/evp/e_aes.c b/crypto/evp/e_aes.c

index ba3d43b9fa9492a9dae19cde4375386624d553dc..ddd856ec71a1bfc9880d7c2b63a9e3acca984b52 100644 (file)
--- a/crypto/evp/e_aes.c
+++ b/crypto/evp/e_aes.c
@@ -1036,7 +1036,7 @@ const EVP_CIPHER *EVP_aes_##keylen##_##mode(void) \
  
  #if defined(OPENSSL_CPUID_OBJ) && (defined(__arm__) || defined(__arm) || defined(__aarch64__))
  #include "arm_arch.h"
-#if __ARM_ARCH__>=7
+#if __ARM_MAX_ARCH__>=7
  # if defined(BSAES_ASM)
  #  define BSAES_CAPABLE        (OPENSSL_armcap_P & ARMV7_NEON)
  # endif
diff --git a/crypto/modes/asm/ghash-armv4.pl b/crypto/modes/asm/ghash-armv4.pl

index 0023bf994bf33306afa9e2760b3b55176fff7c08..77fbf34465db48fc011a4060d610e05d48f6dbb2 100644 (file)
--- a/crypto/modes/asm/ghash-armv4.pl
+++ b/crypto/modes/asm/ghash-armv4.pl
@@ -365,7 +365,8 @@ ___
  }
  
  $code.=<<___;
-#if __ARM_ARCH__>=7
+#if __ARM_MAX_ARCH__>=7
+.arch  armv7-a
  .fpu   neon
  
  .global        gcm_init_neon
diff --git a/crypto/modes/gcm128.c b/crypto/modes/gcm128.c

index 261dc597a13b9bb6786fdf908c219d4ce631520e..4038d9c3642812d2ea0cbbb09e6402c39f67b2ff 100644 (file)
--- a/crypto/modes/gcm128.c
+++ b/crypto/modes/gcm128.c
@@ -675,7 +675,7 @@ void gcm_ghash_4bit_x86(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len
  #  endif
  # elif defined(__arm__) || defined(__arm) || defined(__aarch64__)
  #  include "arm_arch.h"
-#  if __ARM_ARCH__>=7
+#  if __ARM_MAX_ARCH__>=7
  #   define GHASH_ASM_ARM
  #   define GCM_FUNCREF_4BIT
  #   define PMULL_CAPABLE       (OPENSSL_armcap_P & ARMV8_PMULL)
diff --git a/crypto/sha/asm/sha1-armv4-large.pl b/crypto/sha/asm/sha1-armv4-large.pl

index 50bd07b331d60bf09b3630397d66625b92cd38c4..b2c30322c3515f19155c0e3fcd51b1ce31674738 100644 (file)
--- a/crypto/sha/asm/sha1-armv4-large.pl
+++ b/crypto/sha/asm/sha1-armv4-large.pl
@@ -174,7 +174,7 @@ $code=<<___;
  
  .align 5
  sha1_block_data_order:
-#if __ARM_ARCH__>=7
+#if __ARM_MAX_ARCH__>=7
         sub     r3,pc,#8                @ sha1_block_data_order
         ldr     r12,.LOPENSSL_armcap
         ldr     r12,[r3,r12]            @ OPENSSL_armcap_P
@@ -264,8 +264,10 @@ $code.=<<___;
  .LK_20_39:     .word   0x6ed9eba1
  .LK_40_59:     .word   0x8f1bbcdc
  .LK_60_79:     .word   0xca62c1d6
+#if __ARM_MAX_ARCH__>=7
  .LOPENSSL_armcap:
  .word  OPENSSL_armcap_P-sha1_block_data_order
+#endif
  .asciz "SHA1 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by <appro\@openssl.org>"
  .align 5
  ___
@@ -476,7 +478,8 @@ sub Xloop()
  }
  
  $code.=<<___;
-#if __ARM_ARCH__>=7
+#if __ARM_MAX_ARCH__>=7
+.arch  armv7-a
  .fpu   neon
  
  .type  sha1_block_data_order_neon,%function
@@ -563,7 +566,7 @@ my @Kxx=map("q$_",(8..11));
  my ($W0,$W1,$ABCD_SAVE)=map("q$_",(12..14));
  
  $code.=<<___;
-#if __ARM_ARCH__>=7
+#if __ARM_MAX_ARCH__>=7
  .type  sha1_block_data_order_armv8,%function
  .align 5
  sha1_block_data_order_armv8:
@@ -637,7 +640,9 @@ $code.=<<___;
  ___
  }}}
  $code.=<<___;
+#if __ARM_MAX_ARCH__>=7
  .comm  OPENSSL_armcap_P,4,4
+#endif
  ___
  
  {   my  %opcode = (
diff --git a/crypto/sha/asm/sha256-armv4.pl b/crypto/sha/asm/sha256-armv4.pl

index 505ca8f350fa959eb614134e8d251962bda87f42..b0ae93633f71b00fd12116f58699d2d74d9d364c 100644 (file)
--- a/crypto/sha/asm/sha256-armv4.pl
+++ b/crypto/sha/asm/sha256-armv4.pl
@@ -177,8 +177,10 @@ K256:
  .word  0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
  .size  K256,.-K256
  .word  0                               @ terminator
+#if __ARM_MAX_ARCH__>=7
  .LOPENSSL_armcap:
  .word  OPENSSL_armcap_P-sha256_block_data_order
+#endif
  .align 5
  
  .global        sha256_block_data_order
@@ -186,7 +188,7 @@ K256:
  sha256_block_data_order:
         sub     r3,pc,#8                @ sha256_block_data_order
         add     $len,$inp,$len,lsl#6    @ len to point at the end of inp
-#if __ARM_ARCH__>=7
+#if __ARM_MAX_ARCH__>=7
         ldr     r12,.LOPENSSL_armcap
         ldr     r12,[r3,r12]            @ OPENSSL_armcap_P
         tst     r12,#ARMV8_SHA256
@@ -423,7 +425,8 @@ sub body_00_15 () {
  }
  
  $code.=<<___;
-#if __ARM_ARCH__>=7
+#if __ARM_MAX_ARCH__>=7
+.arch  armv7-a
  .fpu   neon
  
  .type  sha256_block_data_order_neon,%function
@@ -545,7 +548,7 @@ my ($W0,$W1,$ABCD_SAVE,$EFGH_SAVE)=map("q$_",(12..15));
  my $Ktbl="r3";
  
  $code.=<<___;
-#if __ARM_ARCH__>=7
+#if __ARM_MAX_ARCH__>=7
  .type  sha256_block_data_order_armv8,%function
  .align 5
  sha256_block_data_order_armv8:
@@ -616,7 +619,9 @@ ___
  $code.=<<___;
  .asciz  "SHA256 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by <appro\@openssl.org>"
  .align 2
+#if __ARM_MARCH_ARCH__>=7
  .comm   OPENSSL_armcap_P,4,4
+#endif
  ___
  
  {   my  %opcode = (
diff --git a/crypto/sha/asm/sha512-armv4.pl b/crypto/sha/asm/sha512-armv4.pl

index 1d5275b91704cfb1af482c26d41502d1a86450fb..fb7dc506aca15c6c543a01306130558600431373 100644 (file)
--- a/crypto/sha/asm/sha512-armv4.pl
+++ b/crypto/sha/asm/sha512-armv4.pl
@@ -237,16 +237,20 @@ WORD64(0x3c9ebe0a,0x15c9bebc, 0x431d67c4,0x9c100d4c)
  WORD64(0x4cc5d4be,0xcb3e42b6, 0x597f299c,0xfc657e2a)
  WORD64(0x5fcb6fab,0x3ad6faec, 0x6c44198c,0x4a475817)
  .size  K512,.-K512
+#if __ARM_MAX_ARCH__>=7
  .LOPENSSL_armcap:
  .word  OPENSSL_armcap_P-sha512_block_data_order
  .skip  32-4
+#else
+.skip  32
+#endif
  
  .global        sha512_block_data_order
  .type  sha512_block_data_order,%function
  sha512_block_data_order:
         sub     r3,pc,#8                @ sha512_block_data_order
         add     $len,$inp,$len,lsl#7    @ len to point at the end of inp
-#if __ARM_ARCH__>=7
+#if __ARM_MAX_ARCH__>=7
         ldr     r12,.LOPENSSL_armcap
         ldr     r12,[r3,r12]            @ OPENSSL_armcap_P
         tst     r12,#1
@@ -551,7 +555,8 @@ ___
  }
  
  $code.=<<___;
-#if __ARM_ARCH__>=7
+#if __ARM_MAX_ARCH__>=7
+.arch  armv7-a
  .fpu   neon
  
  .align 4
@@ -592,7 +597,9 @@ $code.=<<___;
  .size  sha512_block_data_order,.-sha512_block_data_order
  .asciz "SHA512 block transform for ARMv4/NEON, CRYPTOGAMS by <appro\@openssl.org>"
  .align 2
+#if __ARM_MAX_ARCH__>=7
  .comm  OPENSSL_armcap_P,4,4
+#endif
  ___
  
  $code =~ s/\`([^\`]*)\`/eval $1/gem;
author	Andy Polyakov <appro@openssl.org>
	Fri, 7 Nov 2014 21:48:22 +0000 (22:48 +0100)
committer	Andy Polyakov <appro@openssl.org>
	Sun, 4 Jan 2015 22:45:08 +0000 (23:45 +0100)
Configure		patch \| blob \| history
crypto/aes/asm/aesv8-armx.pl		patch \| blob \| history
crypto/aes/asm/bsaes-armv7.pl		patch \| blob \| history
crypto/arm_arch.h		patch \| blob \| history
crypto/armcap.c		patch \| blob \| history
crypto/armv4cpuid.S		patch \| blob \| history
crypto/bn/asm/armv4-gf2m.pl		patch \| blob \| history
crypto/bn/asm/armv4-mont.pl		patch \| blob \| history
crypto/evp/e_aes.c		patch \| blob \| history
crypto/modes/asm/ghash-armv4.pl		patch \| blob \| history
crypto/modes/gcm128.c		patch \| blob \| history
crypto/sha/asm/sha1-armv4-large.pl		patch \| blob \| history
crypto/sha/asm/sha256-armv4.pl		patch \| blob \| history
crypto/sha/asm/sha512-armv4.pl		patch \| blob \| history