2 # Copyright 2009-2016 The OpenSSL Project Authors. All Rights Reserved.
4 # Licensed under the OpenSSL license (the "License"). You may not use
5 # this file except in compliance with the License. You can obtain a copy
6 # in the file LICENSE in the source distribution or at
7 # https://www.openssl.org/source/license.html
10 # ====================================================================
11 # Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
12 # project. The module is, however, dual licensed under OpenSSL and
13 # CRYPTOGAMS licenses depending on where you obtain it. For further
14 # details see http://www.openssl.org/~appro/cryptogams/.
15 # ====================================================================
17 # SHA1 block procedure for MIPS.
19 # Performance improvement is 30% on unaligned input. The "secret" is
20 # to deploy lwl/lwr pair to load unaligned input. One could have
21 # vectorized Xupdate on MIPSIII/IV, but the goal was to code MIPS32-
22 # compatible subroutine. There is room for minor optimization on
23 # little-endian platforms...
27 # Add MIPS32r2 code (>25% less instructions).
29 ######################################################################
30 # There is a number of MIPS ABI in use, O32 and N32/64 are most
31 # widely used. Then there is a new contender: NUBI. It appears that if
32 # one picks the latter, it's possible to arrange code in ABI neutral
33 # manner. Therefore let's stick to NUBI register layout:
35 ($zero,$at,$t0,$t1,$t2)=map("\$$_",(0..2,24,25));
36 ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
37 ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7,$s8,$s9,$s10,$s11)=map("\$$_",(12..23));
38 ($gp,$tp,$sp,$fp,$ra)=map("\$$_",(3,28..31));
40 # The return value is placed in $a0. Following coding rules facilitate
43 # - never ever touch $tp, "thread pointer", former $gp;
44 # - copy return value to $t0, former $v0 [or to $a0 if you're adapting
46 # - on O32 populate $a4-$a7 with 'lw $aN,4*N($sp)' if necessary;
48 # For reference here is register layout for N32/64 MIPS ABIs:
50 # ($zero,$at,$v0,$v1)=map("\$$_",(0..3));
51 # ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
52 # ($t0,$t1,$t2,$t3,$t8,$t9)=map("\$$_",(12..15,24,25));
53 # ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23));
54 # ($gp,$sp,$fp,$ra)=map("\$$_",(28..31));
56 $flavour = shift || "o32"; # supported flavours are o32,n32,64,nubi32,nubi64
58 if ($flavour =~ /64|n32/i) {
59 $PTR_ADD="dadd"; # incidentally works even on n32
60 $PTR_SUB="dsub"; # incidentally works even on n32
63 $PTR_SLL="dsll"; # incidentally works even on n32
76 ######################################################################
78 $big_endian=(`echo MIPSEL | $ENV{CC} -E -`=~/MIPSEL/)?1:0 if ($ENV{CC});
80 for (@ARGV) { $output=$_ if (/\w[\w\-]*\.\w+$/); }
81 open STDOUT,">$output";
83 if (!defined($big_endian))
84 { $big_endian=(unpack('L',pack('N',1))==1); }
86 # offsets of the Most and Least Significant Bytes
90 @X=map("\$$_",(8..23)); # a4-a7,s0-s11
99 $E="\$24"; @V=($A,$B,$C,$D,$E);
101 $t1=$num; # $num is offloaded to stack
106 my ($i,$a,$b,$c,$d,$e)=@_;
108 $code.=<<___ if (!$big_endian);
109 #if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
110 wsbh @X[$i],@X[$i] # byte swap($i)
111 rotr @X[$i],@X[$i],16
113 srl $t0,@X[$i],24 # byte swap($i)
115 andi $t2,@X[$i],0xFF00
125 #if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
129 lwl @X[$j],$j*4+$MSB($inp)
132 lwr @X[$j],$j*4+$LSB($inp)
138 lwl @X[$j],$j*4+$MSB($inp)
141 lwr @X[$j],$j*4+$LSB($inp)
158 my ($i,$a,$b,$c,$d,$e)=@_;
161 $code.=<<___ if (!$big_endian && $i==15);
162 #if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
163 wsbh @X[$i],@X[$i] # byte swap($i)
164 rotr @X[$i],@X[$i],16
166 srl $t0,@X[$i],24 # byte swap($i)
168 andi $t2,@X[$i],0xFF00
178 #if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
180 xor @X[$j%16],@X[($j+2)%16]
183 xor @X[$j%16],@X[($j+8)%16]
186 xor @X[$j%16],@X[($j+13)%16]
189 rotr @X[$j%16],@X[$j%16],31
193 xor @X[$j%16],@X[($j+2)%16]
198 xor @X[$j%16],@X[($j+8)%16]
201 xor @X[$j%16],@X[($j+13)%16]
205 addu @X[$j%16],@X[$j%16]
217 my ($i,$a,$b,$c,$d,$e)=@_;
219 $code.=<<___ if ($i<79);
220 #if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
221 xor @X[$j%16],@X[($j+2)%16]
224 xor @X[$j%16],@X[($j+8)%16]
227 xor @X[$j%16],@X[($j+13)%16]
230 rotr @X[$j%16],@X[$j%16],31
234 xor @X[$j%16],@X[($j+2)%16]
239 xor @X[$j%16],@X[($j+8)%16]
242 xor @X[$j%16],@X[($j+13)%16]
246 addu @X[$j%16],@X[$j%16]
254 $code.=<<___ if ($i==79);
255 #if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
292 my ($i,$a,$b,$c,$d,$e)=@_;
294 $code.=<<___ if ($i<79);
295 #if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
298 xor @X[$j%16],@X[($j+2)%16]
301 xor @X[$j%16],@X[($j+8)%16]
304 xor @X[$j%16],@X[($j+13)%16]
307 rotr @X[$j%16],@X[$j%16],31
311 xor @X[$j%16],@X[($j+2)%16]
316 xor @X[$j%16],@X[($j+8)%16]
319 xor @X[$j%16],@X[($j+13)%16]
324 addu @X[$j%16],@X[$j%16]
335 $FRAMESIZE=16; # large enough to accommodate NUBI saved registers
336 $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? "0xc0fff008" : "0xc0ff0000";
339 #ifdef OPENSSL_FIPSCANISTER
340 # include <openssl/fipssyms.h>
343 #if defined(__mips_smartmips) && !defined(_MIPS_ARCH_MIPS32R2)
344 #define _MIPS_ARCH_MIPS32R2
352 .globl sha1_block_data_order
353 .ent sha1_block_data_order
354 sha1_block_data_order:
355 .frame $sp,$FRAMESIZE*$SZREG,$ra
356 .mask $SAVED_REGS_MASK,-$SZREG
358 $PTR_SUB $sp,$FRAMESIZE*$SZREG
359 $REG_S $ra,($FRAMESIZE-1)*$SZREG($sp)
360 $REG_S $fp,($FRAMESIZE-2)*$SZREG($sp)
361 $REG_S $s11,($FRAMESIZE-3)*$SZREG($sp)
362 $REG_S $s10,($FRAMESIZE-4)*$SZREG($sp)
363 $REG_S $s9,($FRAMESIZE-5)*$SZREG($sp)
364 $REG_S $s8,($FRAMESIZE-6)*$SZREG($sp)
365 $REG_S $s7,($FRAMESIZE-7)*$SZREG($sp)
366 $REG_S $s6,($FRAMESIZE-8)*$SZREG($sp)
367 $REG_S $s5,($FRAMESIZE-9)*$SZREG($sp)
368 $REG_S $s4,($FRAMESIZE-10)*$SZREG($sp)
370 $code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue
371 $REG_S $s3,($FRAMESIZE-11)*$SZREG($sp)
372 $REG_S $s2,($FRAMESIZE-12)*$SZREG($sp)
373 $REG_S $s1,($FRAMESIZE-13)*$SZREG($sp)
374 $REG_S $s0,($FRAMESIZE-14)*$SZREG($sp)
375 $REG_S $gp,($FRAMESIZE-15)*$SZREG($sp)
393 ori $K,0x7999 # K_00_19
395 for ($i=0;$i<15;$i++) { &BODY_00_14($i,@V); unshift(@V,pop(@V)); }
396 for (;$i<20;$i++) { &BODY_15_19($i,@V); unshift(@V,pop(@V)); }
399 ori $K,0xeba1 # K_20_39
401 for (;$i<40;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
404 ori $K,0xbcdc # K_40_59
406 for (;$i<60;$i++) { &BODY_40_59($i,@V); unshift(@V,pop(@V)); }
409 ori $K,0xc1d6 # K_60_79
411 for (;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
431 $REG_L $ra,($FRAMESIZE-1)*$SZREG($sp)
432 $REG_L $fp,($FRAMESIZE-2)*$SZREG($sp)
433 $REG_L $s11,($FRAMESIZE-3)*$SZREG($sp)
434 $REG_L $s10,($FRAMESIZE-4)*$SZREG($sp)
435 $REG_L $s9,($FRAMESIZE-5)*$SZREG($sp)
436 $REG_L $s8,($FRAMESIZE-6)*$SZREG($sp)
437 $REG_L $s7,($FRAMESIZE-7)*$SZREG($sp)
438 $REG_L $s6,($FRAMESIZE-8)*$SZREG($sp)
439 $REG_L $s5,($FRAMESIZE-9)*$SZREG($sp)
440 $REG_L $s4,($FRAMESIZE-10)*$SZREG($sp)
442 $code.=<<___ if ($flavour =~ /nubi/i);
443 $REG_L $s3,($FRAMESIZE-11)*$SZREG($sp)
444 $REG_L $s2,($FRAMESIZE-12)*$SZREG($sp)
445 $REG_L $s1,($FRAMESIZE-13)*$SZREG($sp)
446 $REG_L $s0,($FRAMESIZE-14)*$SZREG($sp)
447 $REG_L $gp,($FRAMESIZE-15)*$SZREG($sp)
451 $PTR_ADD $sp,$FRAMESIZE*$SZREG
452 .end sha1_block_data_order
454 .asciiz "SHA1 for MIPS, CRYPTOGAMS by <appro\@openssl.org>"