#endif
.text
-#if __ARM_ARCH__<7
-.code 32
-#else
+#if defined(__thumb2__) && !defined(__APPLE__)
.syntax unified
-# if defined(__thumb2__) && !defined(__APPLE__)
.thumb
-# else
+#else
.code 32
-# endif
#endif
.type AES_Te,%object
.type AES_encrypt,%function
.align 5
AES_encrypt:
-#if __ARM_ARCH__<7
+#ifndef __thumb2__
sub r3,pc,#8 @ AES_encrypt
#else
adr r3,AES_encrypt
.align 5
AES_set_encrypt_key:
_armv4_AES_set_encrypt_key:
-#if __ARM_ARCH__<7
+#ifndef __thumb2__
sub r3,pc,#8 @ AES_set_encrypt_key
#else
adr r3,AES_set_encrypt_key
#endif
teq r0,#0
-#if __ARM_ARCH__>=7
+#ifdef __thumb2__
itt eq @ Thumb2 thing, sanity check in ARM
#endif
moveq r0,#-1
beq .Labrt
teq r2,#0
-#if __ARM_ARCH__>=7
+#ifdef __thumb2__
itt eq @ Thumb2 thing, sanity check in ARM
#endif
moveq r0,#-1
teq r1,#192
beq .Lok
teq r1,#256
-#if __ARM_ARCH__>=7
+#ifdef __thumb2__
itt ne @ Thumb2 thing, sanity check in ARM
#endif
movne r0,#-1
str $s2,[$key,#-16]
subs $rounds,$rounds,#1
str $s3,[$key,#-12]
-#if __ARM_ARCH__>=7
+#ifdef __thumb2__
itt eq @ Thumb2 thing, sanity check in ARM
#endif
subeq r2,$key,#216
str $s2,[$key,#-24]
subs $rounds,$rounds,#1
str $s3,[$key,#-20]
-#if __ARM_ARCH__>=7
+#ifdef __thumb2__
itt eq @ Thumb2 thing, sanity check in ARM
#endif
subeq r2,$key,#256
.type AES_decrypt,%function
.align 5
AES_decrypt:
-#if __ARM_ARCH__<7
+#ifndef __thumb2__
sub r3,pc,#8 @ AES_decrypt
#else
adr r3,AES_decrypt
#include "arm_arch.h"
.text
+#if defined(__thumb2__) && !defined(__APPLE__)
+.syntax unified
+.thumb
+#else
.code 32
+#endif
.align 5
.global OPENSSL_atomic_add
OPENSSL_cleanse:
eor ip,ip,ip
cmp r1,#7
+#ifdef __thumb2__
+ itt hs
+#endif
subhs r1,r1,#4
bhs .Lot
cmp r1,#0
.global _armv8_aes_probe
.type _armv8_aes_probe,%function
_armv8_aes_probe:
+#if defined(__thumb2__) && !defined(__APPLE__)
+ .byte 0xb0,0xff,0x00,0x03 @ aese.8 q0,q0
+#else
.byte 0x00,0x03,0xb0,0xf3 @ aese.8 q0,q0
+#endif
bx lr
.size _armv8_aes_probe,.-_armv8_aes_probe
.global _armv8_sha1_probe
.type _armv8_sha1_probe,%function
_armv8_sha1_probe:
+#if defined(__thumb2__) && !defined(__APPLE__)
+ .byte 0x00,0xef,0x40,0x0c @ sha1c.32 q0,q0,q0
+#else
.byte 0x40,0x0c,0x00,0xf2 @ sha1c.32 q0,q0,q0
+#endif
bx lr
.size _armv8_sha1_probe,.-_armv8_sha1_probe
.global _armv8_sha256_probe
.type _armv8_sha256_probe,%function
_armv8_sha256_probe:
+#if defined(__thumb2__) && !defined(__APPLE__)
+ .byte 0x00,0xff,0x40,0x0c @ sha256h.32 q0,q0,q0
+#else
.byte 0x40,0x0c,0x00,0xf3 @ sha256h.32 q0,q0,q0
+#endif
bx lr
.size _armv8_sha256_probe,.-_armv8_sha256_probe
.global _armv8_pmull_probe
.type _armv8_pmull_probe,%function
_armv8_pmull_probe:
+#if defined(__thumb2__) && !defined(__APPLE__)
+ .byte 0xa0,0xef,0x00,0x0e @ vmull.p64 q0,d0,d0
+#else
.byte 0x00,0x0e,0xa0,0xf2 @ vmull.p64 q0,d0,d0
+#endif
bx lr
.size _armv8_pmull_probe,.-_armv8_pmull_probe
#endif
#include "arm_arch.h"
.text
+#if defined(__thumb2__) && !defined(__APPLE__)
+.syntax unified
+.thumb
+#else
.code 32
+#endif
___
################
# private interface to mul_1x1_ialu
eor $hi,$hi,$t0,lsr#8
ldr $t0,[sp,$i0] @ tab[b >> 30 ]
+#ifdef __thumb2__
+ itt ne
+#endif
eorne $lo,$lo,$b,lsl#30
eorne $hi,$hi,$b,lsr#2
tst $a,#1<<31
eor $lo,$lo,$t1,lsl#27
eor $hi,$hi,$t1,lsr#5
+#ifdef __thumb2__
+ itt ne
+#endif
eorne $lo,$lo,$b,lsl#31
eorne $hi,$hi,$b,lsr#1
eor $lo,$lo,$t0,lsl#30
.align 5
bn_GF2m_mul_2x2:
#if __ARM_MAX_ARCH__>=7
+ stmdb sp!,{r10,lr}
ldr r12,.LOPENSSL_armcap
-.Lpic: ldr r12,[pc,r12]
- tst r12,#1
+ adr r10,.LOPENSSL_armcap
+ ldr r12,[r12,r10]
+#ifdef __APPLE__
+ ldr r12,[r12]
+#endif
+ tst r12,#ARMV7_NEON
+ itt ne
+ ldrne r10,[sp],#8
bne .LNEON
+ stmdb sp!,{r4-r9}
+#else
+ stmdb sp!,{r4-r10,lr}
#endif
___
$ret="r10"; # reassigned 1st argument
$code.=<<___;
- stmdb sp!,{r4-r10,lr}
mov $ret,r0 @ reassign 1st argument
mov $b,r3 @ $b=b1
+ sub r7,sp,#36
+ mov r8,sp
+ and r7,r7,#-32
ldr r3,[sp,#32] @ load b0
mov $mask,#7<<2
- sub sp,sp,#32 @ allocate tab[8]
+ mov sp,r7 @ allocate tab[8]
+ str r8,[r7,#32]
bl mul_1x1_ialu @ a1·b1
str $lo,[$ret,#8]
$code.=<<___;
ldmia $ret,{@r[0]-@r[3]}
eor $lo,$lo,$hi
+ ldr sp,[sp,#32] @ destroy tab[8]
eor $hi,$hi,@r[1]
eor $lo,$lo,@r[0]
eor $hi,$hi,@r[2]
eor $hi,$hi,@r[3]
str $hi,[$ret,#8]
eor $lo,$lo,$hi
- add sp,sp,#32 @ destroy tab[8]
str $lo,[$ret,#4]
#if __ARM_ARCH__>=5
#if __ARM_MAX_ARCH__>=7
.align 5
.LOPENSSL_armcap:
-.word OPENSSL_armcap_P-(.Lpic+8)
+.word OPENSSL_armcap_P-.
#endif
.asciz "GF(2^m) Multiplication for ARMv4/NEON, CRYPTOGAMS by <appro\@openssl.org>"
.align 5
#include "arm_arch.h"
.text
+#if defined(__thumb2__) && !defined(__APPLE__)
+.syntax unified
+.thumb
+#else
.code 32
+#endif
#if __ARM_MAX_ARCH__>=7
.align 5
#if __ARM_MAX_ARCH__>=7
tst ip,#7
bne .Lialu
- adr r0,bn_mul_mont
+ adr r0,.Lbn_mul_mont
ldr r2,.LOPENSSL_armcap
ldr r0,[r0,r2]
#ifdef __APPLE__
#endif
cmp ip,#2
mov $num,ip @ load num
+#ifdef __thumb2__
+ ittt lt
+#endif
movlt r0,#0
addlt sp,sp,#2*4
blt .Labrt
ldr $n0,[$_n0] @ restore n0
adc $nhi,$nhi,#0
str $nlo,[$num] @ tp[num-1]=
+ mov $tj,sp
str $nhi,[$num,#4] @ tp[num]=
\f
.Louter:
- sub $tj,$num,sp @ "original" $num-1 value
+ sub $tj,$num,$tj @ "original" $num-1 value
sub $ap,$ap,$tj @ "rewind" ap to &ap[1]
ldr $bi,[$tp,#4]! @ *(++bp)
sub $np,$np,$tj @ "rewind" np to &np[1]
str $nhi,[$num,#4] @ tp[num]=
cmp $tp,$tj
+#ifdef __thumb2__
+ itt ne
+#endif
+ movne $tj,sp
bne .Louter
\f
ldr $rp,[$_rp] @ pull rp
+ mov $aj,sp
add $num,$num,#4 @ $num to point at &tp[num]
- sub $aj,$num,sp @ "original" num value
+ sub $aj,$num,$aj @ "original" num value
mov $tp,sp @ "rewind" $tp
mov $ap,$tp @ "borrow" $ap
sub $np,$np,$aj @ "rewind" $np to &np[0]
cmp $tp,$num
bne .Lcopy
- add sp,$num,#4 @ skip over tp[num+1]
+ mov sp,$num
+ add sp,sp,#4 @ skip over tp[num+1]
ldmia sp!,{r4-r12,lr} @ restore registers
add sp,sp,#2*4 @ skip over {r0,r2}
mov r0,#1
stmdb sp!,{r4-r11}
vstmdb sp!,{d8-d15} @ ABI specification says so
ldmia ip,{r4-r5} @ load rest of parameter block
+ mov ip,sp
sub $toutptr,sp,#16
vld1.32 {${Bi}[0]}, [$bptr,:32]!
bne .LNEON_sub
ldr r10, [$aptr] @ load top-most bit
+ mov r11,sp
veor q0,q0,q0
- sub r11,$bptr,sp @ this is num*4
+ sub r11,$bptr,r11 @ this is num*4
veor q1,q1,q1
mov $aptr,sp
sub $rptr,$rptr,r11 @ rewind $rptr
.LNEON_copy_n_zap:
ldmia $aptr!, {r4-r7}
ldmia $rptr, {r8-r11}
+ it cc
movcc r8, r4
vst1.64 {q0-q1}, [$nptr,:256]! @ wipe
+ itt cc
movcc r9, r5
movcc r10,r6
vst1.64 {q0-q1}, [$nptr,:256]! @ wipe
+ it cc
movcc r11,r7
ldmia $aptr, {r4-r7}
stmia $rptr!, {r8-r11}
sub $aptr,$aptr,#16
ldmia $rptr, {r8-r11}
+ it cc
movcc r8, r4
vst1.64 {q0-q1}, [$aptr,:256]! @ wipe
+ itt cc
movcc r9, r5
movcc r10,r6
vst1.64 {q0-q1}, [$nptr,:256]! @ wipe
+ it cc
movcc r11,r7
teq $aptr,$bptr @ preserves carry
stmia $rptr!, {r8-r11}
bne .LNEON_copy_n_zap
- sub sp,ip,#96
+ mov sp,ip
vldmia sp!,{d8-d15}
ldmia sp!,{r4-r11}
ret @ bx lr
#include "arm_arch.h"
.text
+#if defined(__thumb2__) && !defined(__APPLE__)
+.syntax unified
+.thumb
+#else
.code 32
+#endif
___
########################################################################
# Convert ecp_nistz256_table.c to layout expected by ecp_nistz_gather_w7
adcs $a6,$a6,$a6
mov $ff,#0
adcs $a7,$a7,$a7
+#ifdef __thumb2__
+ it cs
+#endif
movcs $ff,#-1 @ $ff = carry ? -1 : 0
b .Lreduce_by_sub
adcs $a6,$a6,$t2
mov $ff,#0
adcs $a7,$a7,$t3
+#ifdef __thumb2__
+ it cs
+#endif
movcs $ff,#-1 @ $ff = carry ? -1 : 0, "broadcast" carry
ldr lr,[sp],#4 @ pop lr
adcs $a6,$a6,$a6
mov $ff,#0
adcs $a7,$a7,$a7
+#ifdef __thumb2__
+ it cs
+#endif
movcs $ff,#-1 @ $ff = carry ? -1 : 0, "broadcast" carry
subs $a0,$a0,$ff @ subtract synthesized modulus, see
adcs $a6,$a6,$t2
mov $ff,#0
adcs $a7,$a7,$t3
+#ifdef __thumb2__
+ it cs
+#endif
movcs $ff,#-1 @ $ff = carry ? -1 : 0, "broadcast" carry
ldr lr,[sp],#4 @ pop lr
cmp $index,#0
mov $mask,#0
+#ifdef __thumb2__
+ itt ne
+#endif
subne $index,$index,#1
movne $mask,#-1
add $inp,$inp,$index,lsl#2
cmp $index,#0
mov $mask,#0
+#ifdef __thumb2__
+ itt ne
+#endif
subne $index,$index,#1
movne $mask,#-1
add $inp,$inp,$index
adcs $a6,$a6,$a6
mov $ff,#0
adcs $a7,$a7,$a7
+#ifdef __thumb2__
+ it cs
+#endif
movcs $ff,#-1 @ $ff = carry ? -1 : 0
subs $a0,$a0,$ff @ subtract synthesized modulus
stmia r3!,{r4-r11}
ldmia $b_ptr,{r4-r11}
cmp r12,#0
+#ifdef __thumb2__
+ it ne
+#endif
movne r12,#-1
stmia r3,{r4-r11}
str r12,[sp,#32*18+8] @ !in2infty
stmia r3!,{r4-r11}
ldmia $a_ptr,{r4-r11}
cmp r12,#0
+#ifdef __thumb2__
+ it ne
+#endif
movne r12,#-1
stmia r3,{r4-r11}
str r12,[sp,#32*18+4] @ !in1infty
stmia r3!,{r4-r11}
ldmia $a_ptr,{r4-r11}
cmp r12,#0
+#ifdef __thumb2__
+ it ne
+#endif
movne r12,#-1
stmia r3,{r4-r11}
str r12,[sp,#32*15+4] @ !in1infty
orr r12,r12,r11
stmia r3!,{r4-r11}
cmp r12,#0
+#ifdef __thumb2__
+ it ne
+#endif
movne r12,#-1
str r12,[sp,#32*15+8] @ !in2infty
#include "arm_arch.h"
.text
+#if defined(__thumb2__) && !defined(__APPLE__)
+.syntax unified
+.thumb
+#else
.code 32
+#endif
#ifdef __APPLE__
#define ldrplb ldrbpl
.type rem_4bit_get,%function
rem_4bit_get:
- sub $rem_4bit,pc,#8
- sub $rem_4bit,$rem_4bit,#32 @ &rem_4bit
+#if defined(__thumb2__)
+ adr $rem_4bit,rem_4bit
+#else
+ sub $rem_4bit,pc,#8+32 @ &rem_4bit
+#endif
b .Lrem_4bit_got
nop
+ nop
.size rem_4bit_get,.-rem_4bit_get
.global gcm_ghash_4bit
.type gcm_ghash_4bit,%function
+.align 4
gcm_ghash_4bit:
- sub r12,pc,#8
+#if defined(__thumb2__)
+ adr r12,rem_4bit
+#else
+ sub r12,pc,#8+48 @ &rem_4bit
+#endif
add $len,$inp,$len @ $len to point at the end
stmdb sp!,{r3-r11,lr} @ save $len/end too
- sub r12,r12,#48 @ &rem_4bit
ldmia r12,{r4-r11} @ copy rem_4bit ...
stmdb sp!,{r4-r11} @ ... to stack
eor $Zlh,$Zlh,$Zhl,lsl#28
ldrh $Tll,[sp,$nlo] @ rem_4bit[rem]
eor $Zhl,$Thl,$Zhl,lsr#4
+#ifdef __thumb2__
+ it pl
+#endif
ldrplb $nlo,[$inp,$cnt]
eor $Zhl,$Zhl,$Zhh,lsl#28
eor $Zhh,$Thh,$Zhh,lsr#4
add $nhi,$nhi,$nhi
ldmia $Thh,{$Tll-$Thh} @ load Htbl[nhi]
eor $Zll,$Tll,$Zll,lsr#4
+#ifdef __thumb2__
+ it pl
+#endif
ldrplb $Tll,[$Xi,$cnt]
eor $Zll,$Zll,$Zlh,lsl#28
eor $Zlh,$Tlh,$Zlh,lsr#4
eor $Zlh,$Zlh,$Zhl,lsl#28
eor $Zhl,$Thl,$Zhl,lsr#4
eor $Zhl,$Zhl,$Zhh,lsl#28
+#ifdef __thumb2__
+ it pl
+#endif
eorpl $nlo,$nlo,$Tll
eor $Zhh,$Thh,$Zhh,lsr#4
+#ifdef __thumb2__
+ itt pl
+#endif
andpl $nhi,$nlo,#0xf0
andpl $nlo,$nlo,#0x0f
eor $Zhh,$Zhh,$Tlh,lsl#16 @ ^= rem_4bit[rem]
add $inp,$inp,#16
mov $nhi,$Zll
___
- &Zsmash("cmp\t$inp,$len","ldrneb\t$nlo,[$inp,#15]");
+ &Zsmash("cmp\t$inp,$len","\n".
+ "#ifdef __thumb2__\n".
+ " it ne\n".
+ "#endif\n".
+ " ldrneb $nlo,[$inp,#15]");
$code.=<<___;
bne .Louter
eor $Zlh,$Zlh,$Zhl,lsl#28
ldrh $Tll,[$rem_4bit,$nlo] @ rem_4bit[rem]
eor $Zhl,$Thl,$Zhl,lsr#4
+#ifdef __thumb2__
+ it pl
+#endif
ldrplb $nlo,[$Xi,$cnt]
eor $Zhl,$Zhl,$Zhh,lsl#28
eor $Zhh,$Thh,$Zhh,lsr#4
eor $Zhl,$Thl,$Zhl,lsr#4
eor $Zhl,$Zhl,$Zhh,lsl#28
eor $Zhh,$Thh,$Zhh,lsr#4
+#ifdef __thumb2__
+ itt pl
+#endif
andpl $nhi,$nlo,#0xf0
andpl $nlo,$nlo,#0x0f
eor $Zhh,$Zhh,$Tll,lsl#16 @ ^= rem_4bit[rem]
#include "arm_arch.h"
.text
+#if defined(__thumb2__) && !defined(__APPLE__)
+.syntax unified
+.thumb
+#else
.code 32
+#endif
.global sha1_block_data_order
.type sha1_block_data_order,%function
.align 5
sha1_block_data_order:
#if __ARM_MAX_ARCH__>=7
- sub r3,pc,#8 @ sha1_block_data_order
+.Lsha1_block:
+ adr r3,.Lsha1_block
ldr r12,.LOPENSSL_armcap
ldr r12,[r3,r12] @ OPENSSL_armcap_P
#ifdef __APPLE__
&BODY_00_15(@V); unshift(@V,pop(@V));
}
$code.=<<___;
+#if defined(__thumb2__) && !defined(__APPLE__)
+ mov $t3,sp
+ teq $Xi,$t3
+#else
teq $Xi,sp
+#endif
bne .L_00_15 @ [((11+4)*5+2)*3]
sub sp,sp,#25*4
___
&BODY_20_39(@V); unshift(@V,pop(@V));
}
$code.=<<___;
+#if defined(__thumb2__) && !defined(__APPLE__)
+ mov $t3,sp
+ teq $Xi,$t3
+#else
teq $Xi,sp @ preserve carry
+#endif
bne .L_20_39_or_60_79 @ [+((12+3)*5+2)*4]
bcs .L_done @ [+((12+3)*5+2)*4], spare 300 bytes
&BODY_40_59(@V); unshift(@V,pop(@V));
}
$code.=<<___;
+#if defined(__thumb2__) && !defined(__APPLE__)
+ mov $t3,sp
+ teq $Xi,$t3
+#else
teq $Xi,sp
+#endif
bne .L_40_59 @ [+((12+5)*5+2)*4]
ldr $K,.LK_60_79
.LK_60_79: .word 0xca62c1d6
#if __ARM_MAX_ARCH__>=7
.LOPENSSL_armcap:
-.word OPENSSL_armcap_P-sha1_block_data_order
+.word OPENSSL_armcap_P-.Lsha1_block
#endif
.asciz "SHA1 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by <appro\@openssl.org>"
.align 5
&teq ($inp,$len);
&sub ($K_XX_XX,$K_XX_XX,16); # rewind $K_XX_XX
+ &it ("eq");
&subeq ($inp,$inp,64); # reload last block to avoid SEGV
&vld1_8 ("{@X[-4&7]-@X[-3&7]}","[$inp]!");
eval(shift(@insns));
@ dmb @ errata #451034 on early Cortex A8
@ vstmdb sp!,{d8-d15} @ ABI specification says so
mov $saved_sp,sp
- sub sp,sp,#64 @ alloca
+ sub $Xfer,sp,#64
adr $K_XX_XX,.LK_00_19
- bic sp,sp,#15 @ align for 128-bit stores
+ bic $Xfer,$Xfer,#15 @ align for 128-bit stores
ldmia $ctx,{$a,$b,$c,$d,$e} @ load context
- mov $Xfer,sp
+ mov sp,$Xfer @ alloca
vld1.8 {@X[-4&7]-@X[-3&7]},[$inp]! @ handles unaligned
veor $zero,$zero,$zero
add $b,$b,$t0
add $c,$c,$t1
add $d,$d,$Xfer
+ it eq
moveq sp,$saved_sp
add $e,$e,$Ki
+ it ne
ldrne $Ki,[sp]
stmia $ctx,{$a,$b,$c,$d,$e}
+ itt ne
addne $Xfer,sp,#3*16
bne .Loop_neon
$code.=<<___;
#if __ARM_MAX_ARCH__>=7
+
+# if defined(__thumb2__) && !defined(__APPLE__)
+# define INST(a,b,c,d) .byte c,d|0xf,a,b
+# else
+# define INST(a,b,c,d) .byte a,b,c,d|0x10
+# endif
+
.type sha1_block_data_order_armv8,%function
.align 5
sha1_block_data_order_armv8:
# since ARMv7 instructions are always encoded little-endian.
# correct solution is to use .inst directive, but older
# assemblers don't implement it:-(
- sprintf ".byte\t0x%02x,0x%02x,0x%02x,0x%02x\t@ %s %s",
+
+ # this fix-up provides Thumb encoding in conjunction with INST
+ $word &= ~0x10000000 if (($word & 0x0f000000) == 0x02000000);
+ sprintf "INST(0x%02x,0x%02x,0x%02x,0x%02x)\t@ %s %s",
$word&0xff,($word>>8)&0xff,
($word>>16)&0xff,($word>>24)&0xff,
$mnemonic,$arg;
#endif
.text
-#if __ARM_ARCH__<7
-.code 32
-#else
+#if defined(__thumb2__) && !defined(__APPLE__)
.syntax unified
-# if defined(__thumb2__) && !defined(__APPLE__)
-# define adrl adr
.thumb
-# else
+# define adrl adr
+#else
.code 32
-# endif
#endif
.type K256,%object
.type sha256_block_data_order,%function
sha256_block_data_order:
.Lsha256_block_data_order:
-#if __ARM_ARCH__<7
+#if __ARM_ARCH__<7 && !defined(__thumb2__)
sub r3,pc,#8 @ sha256_block_data_order
#else
- adr r3,sha256_block_data_order
+ adr r3,.Lsha256_block_data_order
#endif
#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
ldr r12,.LOPENSSL_armcap
#endif
.text
-#if __ARM_ARCH__<7 || defined(__APPLE__)
-.code 32
-#else
+#if defined(__thumb2__) && !defined(__APPLE__)
.syntax unified
-# ifdef __thumb2__
-# define adrl adr
.thumb
-# else
-.code 32
-# endif
+# define adrl adr
+#else
+.code 32
#endif
.type K512,%object
.type sha512_block_data_order,%function
sha512_block_data_order:
.Lsha512_block_data_order:
-#if __ARM_ARCH__<7
+#if __ARM_ARCH__<7 && !defined(__thumb2__)
sub r3,pc,#8 @ sha512_block_data_order
#else
- adr r3,sha512_block_data_order
+ adr r3,.Lsha512_block_data_order
#endif
#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
ldr r12,.LOPENSSL_armcap