Commentary updates.
[oweals/openssl.git] / crypto / sha / asm / sha256-armv4.pl
1 #!/usr/bin/env perl
2
3 # ====================================================================
4 # Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5 # project. The module is, however, dual licensed under OpenSSL and
6 # CRYPTOGAMS licenses depending on where you obtain it. For further
7 # details see http://www.openssl.org/~appro/cryptogams/.
8 # ====================================================================
9
10 # SHA256 block procedure for ARMv4. May 2007.
11
12 # Performance is ~2x better than gcc 3.4 generated code and in "abso-
13 # lute" terms is ~2250 cycles per 64-byte block or ~35 cycles per
14 # byte.
15
16 $ctx="r0";      $t0="r0";
17 $inp="r1";
18 $len="r2";      $t1="r2";
19 $T1="r3";
20 $A="r4";
21 $B="r5";
22 $C="r6";
23 $D="r7";
24 $E="r8";
25 $F="r9";
26 $G="r10";
27 $H="r11";
28 @V=($A,$B,$C,$D,$E,$F,$G,$H);
29 $t2="r12";
30 $Ktbl="r14";
31
32 @Sigma0=( 2,13,22);
33 @Sigma1=( 6,11,25);
34 @sigma0=( 7,18, 3);
35 @sigma1=(17,19,10);
36
37 sub BODY_00_15 {
38 my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_;
39
40 $code.=<<___ if ($i<16);
41         ldrb    $T1,[$inp,#3]                   @ $i
42         ldrb    $t2,[$inp,#2]
43         ldrb    $t1,[$inp,#1]
44         ldrb    $t0,[$inp],#4
45         orr     $T1,$T1,$t2,lsl#8
46         orr     $T1,$T1,$t1,lsl#16
47         orr     $T1,$T1,$t0,lsl#24
48         `"str   $inp,[sp,#17*4]"        if ($i==15)`
49 ___
50 $code.=<<___;
51         ldr     $t2,[$Ktbl],#4                  @ *K256++
52         str     $T1,[sp,#`$i%16`*4]
53         mov     $t0,$e,ror#$Sigma1[0]
54         eor     $t0,$t0,$e,ror#$Sigma1[1]
55         eor     $t0,$t0,$e,ror#$Sigma1[2]       @ Sigma1(e)
56         add     $T1,$T1,$t0
57         eor     $t1,$f,$g
58         and     $t1,$t1,$e
59         eor     $t1,$t1,$g                      @ Ch(e,f,g)
60         add     $T1,$T1,$t1
61         add     $T1,$T1,$h
62         add     $T1,$T1,$t2
63         mov     $h,$a,ror#$Sigma0[0]
64         eor     $h,$h,$a,ror#$Sigma0[1]
65         eor     $h,$h,$a,ror#$Sigma0[2]         @ Sigma0(a)
66         orr     $t0,$a,$b
67         and     $t0,$t0,$c
68         and     $t1,$a,$b
69         orr     $t0,$t0,$t1                     @ Maj(a,b,c)
70         add     $h,$h,$t0
71         add     $d,$d,$T1
72         add     $h,$h,$T1
73 ___
74 }
75
76 sub BODY_16_XX {
77 my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_;
78
79 $code.=<<___;
80         ldr     $t1,[sp,#`($i+1)%16`*4] @ $i
81         ldr     $t2,[sp,#`($i+14)%16`*4]
82         ldr     $T1,[sp,#`($i+0)%16`*4]
83         ldr     $inp,[sp,#`($i+9)%16`*4]
84         mov     $t0,$t1,ror#$sigma0[0]
85         eor     $t0,$t0,$t1,ror#$sigma0[1]
86         eor     $t0,$t0,$t1,lsr#$sigma0[2]      @ sigma0(X[i+1])
87         mov     $t1,$t2,ror#$sigma1[0]
88         eor     $t1,$t1,$t2,ror#$sigma1[1]
89         eor     $t1,$t1,$t2,lsr#$sigma1[2]      @ sigma1(X[i+14])
90         add     $T1,$T1,$t0
91         add     $T1,$T1,$t1
92         add     $T1,$T1,$inp
93 ___
94         &BODY_00_15(@_);
95 }
96
97 $code=<<___;
98 .text
99 .code   32
100
101 .type   K256,%object
102 .align  5
103 K256:
104 .word   0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
105 .word   0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
106 .word   0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
107 .word   0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
108 .word   0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
109 .word   0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
110 .word   0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
111 .word   0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
112 .word   0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
113 .word   0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
114 .word   0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
115 .word   0xd192e819,0xd6990624,0xf40e3585,0x106aa070
116 .word   0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
117 .word   0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
118 .word   0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
119 .word   0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
120 .size   K256,.-K256
121
122 .global sha256_block_data_order
123 .type   sha256_block_data_order,%function
124 sha256_block_data_order:
125         sub     r3,pc,#8                @ sha256_block_data_order
126         add     $len,$inp,$len,lsl#6    @ len to point at the end of inp
127         stmdb   sp!,{$ctx,$inp,$len,r4-r12,lr}
128         ldmia   $ctx,{$A,$B,$C,$D,$E,$F,$G,$H}
129         sub     $Ktbl,r3,#256           @ K256
130         sub     sp,sp,#16*4             @ alloca(X[16])
131 .Loop:
132 ___
133 for($i=0;$i<16;$i++)    { &BODY_00_15($i,@V); unshift(@V,pop(@V)); }
134 $code.=".Lrounds_16_xx:\n";
135 for (;$i<32;$i++)       { &BODY_16_XX($i,@V); unshift(@V,pop(@V)); }
136 $code.=<<___;
137         and     $t2,$t2,#0xff
138         cmp     $t2,#0xf2
139         bne     .Lrounds_16_xx
140
141         ldr     $T1,[sp,#16*4]          @ pull ctx
142         ldr     $t0,[$T1,#0]
143         ldr     $t1,[$T1,#4]
144         ldr     $t2,[$T1,#8]
145         add     $A,$A,$t0
146         ldr     $t0,[$T1,#12]
147         add     $B,$B,$t1
148         ldr     $t1,[$T1,#16]
149         add     $C,$C,$t2
150         ldr     $t2,[$T1,#20]
151         add     $D,$D,$t0
152         ldr     $t0,[$T1,#24]
153         add     $E,$E,$t1
154         ldr     $t1,[$T1,#28]
155         add     $F,$F,$t2
156         ldr     $inp,[sp,#17*4]         @ pull inp
157         ldr     $t2,[sp,#18*4]          @ pull inp+len
158         add     $G,$G,$t0
159         add     $H,$H,$t1
160         stmia   $T1,{$A,$B,$C,$D,$E,$F,$G,$H}
161         cmp     $inp,$t2
162         sub     $Ktbl,$Ktbl,#256        @ rewind Ktbl
163         bne     .Loop
164
165         add     sp,sp,#`16+3`*4 @ destroy frame
166         ldmia   sp!,{r4-r12,lr}
167         tst     lr,#1
168         moveq   pc,lr                   @ be binary compatible with V4, yet
169         bx      lr                      @ interoperable with Thumb ISA:-)
170 .size   sha256_block_data_order,.-sha256_block_data_order
171 .asciz  "SHA256 block transform for ARMv4, CRYPTOGAMS by <appro\@openssl.org>"
172 ___
173
174 $code =~ s/\`([^\`]*)\`/eval $1/gem;
175 print $code;