2 # Copyright 2009-2016 The OpenSSL Project Authors. All Rights Reserved.
4 # Licensed under the OpenSSL license (the "License"). You may not use
5 # this file except in compliance with the License. You can obtain a copy
6 # in the file LICENSE in the source distribution or at
7 # https://www.openssl.org/source/license.html
10 # ====================================================================
11 # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
12 # project. The module is, however, dual licensed under OpenSSL and
13 # CRYPTOGAMS licenses depending on where you obtain it. For further
14 # details see http://www.openssl.org/~appro/cryptogams/.
15 # ====================================================================
17 # SHA1 block procedure for MIPS.
19 # Performance improvement is 30% on unaligned input. The "secret" is
20 # to deploy lwl/lwr pair to load unaligned input. One could have
21 # vectorized Xupdate on MIPSIII/IV, but the goal was to code MIPS32-
22 # compatible subroutine. There is room for minor optimization on
23 # little-endian platforms...
27 # Add MIPS32r2 code (>25% less instructions).
29 ######################################################################
30 # There is a number of MIPS ABI in use, O32 and N32/64 are most
31 # widely used. Then there is a new contender: NUBI. It appears that if
32 # one picks the latter, it's possible to arrange code in ABI neutral
33 # manner. Therefore let's stick to NUBI register layout:
35 ($zero,$at,$t0,$t1,$t2)=map("\$$_",(0..2,24,25));
36 ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
37 ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7,$s8,$s9,$s10,$s11)=map("\$$_",(12..23));
38 ($gp,$tp,$sp,$fp,$ra)=map("\$$_",(3,28..31));
40 # The return value is placed in $a0. Following coding rules facilitate
43 # - never ever touch $tp, "thread pointer", former $gp;
44 # - copy return value to $t0, former $v0 [or to $a0 if you're adapting
46 # - on O32 populate $a4-$a7 with 'lw $aN,4*N($sp)' if necessary;
48 # For reference here is register layout for N32/64 MIPS ABIs:
50 # ($zero,$at,$v0,$v1)=map("\$$_",(0..3));
51 # ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
52 # ($t0,$t1,$t2,$t3,$t8,$t9)=map("\$$_",(12..15,24,25));
53 # ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23));
54 # ($gp,$sp,$fp,$ra)=map("\$$_",(28..31));
56 $flavour = shift || "o32"; # supported flavours are o32,n32,64,nubi32,nubi64
58 if ($flavour =~ /64|n32/i) {
59 $PTR_ADD="daddu"; # incidentally works even on n32
60 $PTR_SUB="dsubu"; # incidentally works even on n32
63 $PTR_SLL="dsll"; # incidentally works even on n32
76 ######################################################################
78 $big_endian=(`echo MIPSEL | $ENV{CC} -E -`=~/MIPSEL/)?1:0 if ($ENV{CC});
80 for (@ARGV) { $output=$_ if (/\w[\w\-]*\.\w+$/); }
81 open STDOUT,">$output";
83 if (!defined($big_endian))
84 { $big_endian=(unpack('L',pack('N',1))==1); }
86 # offsets of the Most and Least Significant Bytes
90 @X=map("\$$_",(8..23)); # a4-a7,s0-s11
99 $E="\$24"; @V=($A,$B,$C,$D,$E);
101 $t1=$num; # $num is offloaded to stack
106 my ($i,$a,$b,$c,$d,$e)=@_;
108 $code.=<<___ if (!$big_endian);
109 #if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
110 wsbh @X[$i],@X[$i] # byte swap($i)
111 rotr @X[$i],@X[$i],16
113 srl $t0,@X[$i],24 # byte swap($i)
115 andi $t2,@X[$i],0xFF00
125 #if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
131 #if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6)
134 lwl @X[$j],$j*4+$MSB($inp)
135 lwr @X[$j],$j*4+$LSB($inp)
142 lwl @X[$j],$j*4+$MSB($inp)
145 lwr @X[$j],$j*4+$LSB($inp)
162 my ($i,$a,$b,$c,$d,$e)=@_;
165 $code.=<<___ if (!$big_endian && $i==15);
166 #if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
167 wsbh @X[$i],@X[$i] # byte swap($i)
168 rotr @X[$i],@X[$i],16
170 srl $t0,@X[$i],24 # byte swap($i)
172 andi $t2,@X[$i],0xFF00
182 #if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
184 xor @X[$j%16],@X[($j+2)%16]
187 xor @X[$j%16],@X[($j+8)%16]
190 xor @X[$j%16],@X[($j+13)%16]
193 rotr @X[$j%16],@X[$j%16],31
197 xor @X[$j%16],@X[($j+2)%16]
202 xor @X[$j%16],@X[($j+8)%16]
205 xor @X[$j%16],@X[($j+13)%16]
209 addu @X[$j%16],@X[$j%16]
221 my ($i,$a,$b,$c,$d,$e)=@_;
223 $code.=<<___ if ($i<79);
224 #if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
225 xor @X[$j%16],@X[($j+2)%16]
228 xor @X[$j%16],@X[($j+8)%16]
231 xor @X[$j%16],@X[($j+13)%16]
234 rotr @X[$j%16],@X[$j%16],31
238 xor @X[$j%16],@X[($j+2)%16]
243 xor @X[$j%16],@X[($j+8)%16]
246 xor @X[$j%16],@X[($j+13)%16]
250 addu @X[$j%16],@X[$j%16]
258 $code.=<<___ if ($i==79);
259 #if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
296 my ($i,$a,$b,$c,$d,$e)=@_;
298 $code.=<<___ if ($i<79);
299 #if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
302 xor @X[$j%16],@X[($j+2)%16]
305 xor @X[$j%16],@X[($j+8)%16]
308 xor @X[$j%16],@X[($j+13)%16]
311 rotr @X[$j%16],@X[$j%16],31
315 xor @X[$j%16],@X[($j+2)%16]
320 xor @X[$j%16],@X[($j+8)%16]
323 xor @X[$j%16],@X[($j+13)%16]
328 addu @X[$j%16],@X[$j%16]
339 $FRAMESIZE=16; # large enough to accommodate NUBI saved registers
340 $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? "0xc0fff008" : "0xc0ff0000";
343 #include "mips_arch.h"
350 .globl sha1_block_data_order
351 .ent sha1_block_data_order
352 sha1_block_data_order:
353 .frame $sp,$FRAMESIZE*$SZREG,$ra
354 .mask $SAVED_REGS_MASK,-$SZREG
356 $PTR_SUB $sp,$FRAMESIZE*$SZREG
357 $REG_S $ra,($FRAMESIZE-1)*$SZREG($sp)
358 $REG_S $fp,($FRAMESIZE-2)*$SZREG($sp)
359 $REG_S $s11,($FRAMESIZE-3)*$SZREG($sp)
360 $REG_S $s10,($FRAMESIZE-4)*$SZREG($sp)
361 $REG_S $s9,($FRAMESIZE-5)*$SZREG($sp)
362 $REG_S $s8,($FRAMESIZE-6)*$SZREG($sp)
363 $REG_S $s7,($FRAMESIZE-7)*$SZREG($sp)
364 $REG_S $s6,($FRAMESIZE-8)*$SZREG($sp)
365 $REG_S $s5,($FRAMESIZE-9)*$SZREG($sp)
366 $REG_S $s4,($FRAMESIZE-10)*$SZREG($sp)
368 $code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue
369 $REG_S $s3,($FRAMESIZE-11)*$SZREG($sp)
370 $REG_S $s2,($FRAMESIZE-12)*$SZREG($sp)
371 $REG_S $s1,($FRAMESIZE-13)*$SZREG($sp)
372 $REG_S $s0,($FRAMESIZE-14)*$SZREG($sp)
373 $REG_S $gp,($FRAMESIZE-15)*$SZREG($sp)
388 #if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6)
391 ori $K,0x7999 # K_00_19
396 ori $K,0x7999 # K_00_19
399 for ($i=0;$i<15;$i++) { &BODY_00_14($i,@V); unshift(@V,pop(@V)); }
400 for (;$i<20;$i++) { &BODY_15_19($i,@V); unshift(@V,pop(@V)); }
403 ori $K,0xeba1 # K_20_39
405 for (;$i<40;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
408 ori $K,0xbcdc # K_40_59
410 for (;$i<60;$i++) { &BODY_40_59($i,@V); unshift(@V,pop(@V)); }
413 ori $K,0xc1d6 # K_60_79
415 for (;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
435 $REG_L $ra,($FRAMESIZE-1)*$SZREG($sp)
436 $REG_L $fp,($FRAMESIZE-2)*$SZREG($sp)
437 $REG_L $s11,($FRAMESIZE-3)*$SZREG($sp)
438 $REG_L $s10,($FRAMESIZE-4)*$SZREG($sp)
439 $REG_L $s9,($FRAMESIZE-5)*$SZREG($sp)
440 $REG_L $s8,($FRAMESIZE-6)*$SZREG($sp)
441 $REG_L $s7,($FRAMESIZE-7)*$SZREG($sp)
442 $REG_L $s6,($FRAMESIZE-8)*$SZREG($sp)
443 $REG_L $s5,($FRAMESIZE-9)*$SZREG($sp)
444 $REG_L $s4,($FRAMESIZE-10)*$SZREG($sp)
446 $code.=<<___ if ($flavour =~ /nubi/i);
447 $REG_L $s3,($FRAMESIZE-11)*$SZREG($sp)
448 $REG_L $s2,($FRAMESIZE-12)*$SZREG($sp)
449 $REG_L $s1,($FRAMESIZE-13)*$SZREG($sp)
450 $REG_L $s0,($FRAMESIZE-14)*$SZREG($sp)
451 $REG_L $gp,($FRAMESIZE-15)*$SZREG($sp)
455 $PTR_ADD $sp,$FRAMESIZE*$SZREG
456 .end sha1_block_data_order
458 .asciiz "SHA1 for MIPS, CRYPTOGAMS by <appro\@openssl.org>"