3 # ====================================================================
4 # Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5 # project. The module is, however, dual licensed under OpenSSL and
6 # CRYPTOGAMS licenses depending on where you obtain it. For further
7 # details see http://www.openssl.org/~appro/cryptogams/.
8 # ====================================================================
10 # This module doesn't present direct interest for OpenSSL, because it
11 # doesn't provide better performance for longer keys. While 512-bit
12 # RSA private key operations are 40% faster, 1024-bit ones are hardly
13 # faster at all, while longer key operations are slower by up to 20%.
14 # It might be of interest to embedded system developers though, as
15 # it's smaller than 1KB, yet offers ~3x improvement over compiler
18 ######################################################################
19 # There is a number of MIPS ABI in use, O32 and N32/64 are most
20 # widely used. Then there is a new contender: NUBI. It appears that if
21 # one picks the latter, it's possible to arrange code in ABI neutral
22 # manner. Therefore let's stick to NUBI register layout:
24 ($zero,$at,$t0,$t1,$t2)=map("\$$_",(0..2,24,25));
25 ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
26 ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7,$s8,$s9,$s10,$s11)=map("\$$_",(12..23));
27 ($gp,$tp,$sp,$fp,$ra)=map("\$$_",(3,28..31));
29 # The return value is placed in $a0. Following coding rules facilitate
32 # - never ever touch $tp, "thread pointer", former $gp;
33 # - copy return value to $t0, former $v0 [or to $a0 if you're adapting
35 # - on O32 populate $a4-$a7 with 'lw $aN,4*N($sp)' if necessary;
37 # For reference here is register layout for N32/64 MIPS ABIs:
39 # ($zero,$at,$v0,$v1)=map("\$$_",(0..3));
40 # ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
41 # ($t0,$t1,$t2,$t3,$t8,$t9)=map("\$$_",(12..15,24,25));
42 # ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23));
43 # ($gp,$sp,$fp,$ra)=map("\$$_",(28..31));
45 $flavour = shift; # supported flavours are o32,n32,64,nubi32,nubi64
47 if ($flavour =~ /64|n32/i) {
48 $PTR_ADD="dadd"; # incidentally works even on n32
49 $PTR_SUB="dsub"; # incidentally works even on n32
60 $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? 0x00fff000 : 0x00ff0000;
64 ######################################################################
66 while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
67 open STDOUT,">$output";
69 if ($flavour =~ /64|n32/i) {
86 $rp=$a0; # BN_ULONG *rp,
87 $ap=$a1; # const BN_ULONG *ap,
88 $bp=$a2; # const BN_ULONG *bp,
89 $np=$a3; # const BN_ULONG *np,
90 $n0=$a4; # const BN_ULONG *n0,
123 $code.=<<___ if ($flavour =~ /o32/i);
129 beqzl $at,bn_mul_mont_internal
136 .ent bn_mul_mont_internal
137 bn_mul_mont_internal:
138 .frame $fp,$FRAMESIZE*$SZREG,$ra
139 .mask 0x40000000|$SAVED_REGS_MASK,-$SZREG
140 $PTR_SUB $sp,$FRAMESIZE*$SZREG
141 $REG_S $fp,($FRAMESIZE-1)*$SZREG($sp)
142 $REG_S $s11,($FRAMESIZE-2)*$SZREG($sp)
143 $REG_S $s10,($FRAMESIZE-3)*$SZREG($sp)
144 $REG_S $s9,($FRAMESIZE-4)*$SZREG($sp)
145 $REG_S $s8,($FRAMESIZE-5)*$SZREG($sp)
146 $REG_S $s7,($FRAMESIZE-6)*$SZREG($sp)
147 $REG_S $s6,($FRAMESIZE-7)*$SZREG($sp)
148 $REG_S $s5,($FRAMESIZE-8)*$SZREG($sp)
149 $REG_S $s4,($FRAMESIZE-9)*$SZREG($sp)
151 $code.=<<___ if ($flavour =~ /nubi/i);
152 $REG_S $s3,($FRAMESIZE-10)*$SZREG($sp)
153 $REG_S $s2,($FRAMESIZE-11)*$SZREG($sp)
154 $REG_S $s1,($FRAMESIZE-12)*$SZREG($sp)
155 $REG_S $s0,($FRAMESIZE-13)*$SZREG($sp)
162 $LD $bi,0($bp) # bp[0]
163 $LD $aj,0($ap) # ap[0]
164 $LD $nj,0($np) # np[0]
166 $PTR_SUB $sp,2*$BNSZ # place for two extra words
167 sll $num,`log($BNSZ)/log(2)`
341 $ST $hi1,2*$BNSZ($tp)
348 $PTR_ADD $tj,$sp,$num # &tp[num]
351 li $hi0,0 # clear borrow bit
354 .Lsub: $LD $lo0,($tp)
358 $SUBU $lo1,$lo0,$lo1 # tp[i]-np[i]
368 $SUBU $hi0,$hi1,$hi0 # handle upmost overflow bit
370 $PTR_SUB $rp,$num # restore rp
375 or $ap,$ap,$bp # ap=borrow?tp:rp
378 .Lcopy: $LD $aj,($ap)
392 $REG_L $fp,($FRAMESIZE-1)*$SZREG($sp)
393 $REG_L $s11,($FRAMESIZE-2)*$SZREG($sp)
394 $REG_L $s10,($FRAMESIZE-3)*$SZREG($sp)
395 $REG_L $s9,($FRAMESIZE-4)*$SZREG($sp)
396 $REG_L $s8,($FRAMESIZE-5)*$SZREG($sp)
397 $REG_L $s7,($FRAMESIZE-6)*$SZREG($sp)
398 $REG_L $s6,($FRAMESIZE-7)*$SZREG($sp)
399 $REG_L $s5,($FRAMESIZE-8)*$SZREG($sp)
400 $REG_L $s4,($FRAMESIZE-9)*$SZREG($sp)
402 $code.=<<___ if ($flavour =~ /nubi/i);
403 $REG_L $s3,($FRAMESIZE-10)*$SZREG($sp)
404 $REG_L $s2,($FRAMESIZE-11)*$SZREG($sp)
405 $REG_L $s1,($FRAMESIZE-12)*$SZREG($sp)
406 $REG_L $s0,($FRAMESIZE-13)*$SZREG($sp)
410 $PTR_ADD $sp,$FRAMESIZE*$SZREG
411 .end bn_mul_mont_internal
413 .asciiz "Montgomery Multiplication for MIPS, CRYPTOGAMS by <appro\@openssl.org>"
416 $code =~ s/\`([^\`]*)\`/eval $1/gem;