X-Git-Url: https://git.librecmc.org/?a=blobdiff_plain;f=crypto%2Fsha%2Fasm%2Fsha512-mips.pl;h=e6fd2687f887c1c7d5d340860002ce4f3aa4398a;hb=0d7903f83f84bba1d29225efd999c633a0c5ba01;hp=ba5b250890e00a88d3e1bd40cde7a341fe394024;hpb=227a822ab628267b5fd1b168a0a0bd58482b35ef;p=oweals%2Fopenssl.git diff --git a/crypto/sha/asm/sha512-mips.pl b/crypto/sha/asm/sha512-mips.pl index ba5b250890..e6fd2687f8 100644 --- a/crypto/sha/asm/sha512-mips.pl +++ b/crypto/sha/asm/sha512-mips.pl @@ -1,7 +1,14 @@ -#!/usr/bin/env perl +#! /usr/bin/env perl +# Copyright 2010-2016 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the OpenSSL license (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + # ==================================================================== -# Written by Andy Polyakov for the OpenSSL +# Written by Andy Polyakov for the OpenSSL # project. The module is, however, dual licensed under OpenSSL and # CRYPTOGAMS licenses depending on where you obtain it. For further # details see http://www.openssl.org/~appro/cryptogams/. @@ -17,6 +24,10 @@ # ~17%, but it comes for free, because it's same instruction sequence. # Improvement coefficients are for aligned input. +# September 2012. +# +# Add MIPS[32|64]R2 code (>25% less instructions). + ###################################################################### # There is a number of MIPS ABI in use, O32 and N32/64 are most # widely used. Then there is a new contender: NUBI. It appears that if @@ -45,18 +56,20 @@ # ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23)); # ($gp,$sp,$fp,$ra)=map("\$$_",(28..31)); # -$flavour = shift; # supported flavours are o32,n32,64,nubi32,nubi64 +$flavour = shift || "o32"; # supported flavours are o32,n32,64,nubi32,nubi64 if ($flavour =~ /64|n32/i) { - $PTR_ADD="dadd"; # incidentally works even on n32 - $PTR_SUB="dsub"; # incidentally works even on n32 + $PTR_LA="dla"; + $PTR_ADD="daddu"; # incidentally works even on n32 + $PTR_SUB="dsubu"; # incidentally works even on n32 $REG_S="sd"; $REG_L="ld"; $PTR_SLL="dsll"; # incidentally works even on n32 $SZREG=8; } else { - $PTR_ADD="add"; - $PTR_SUB="sub"; + $PTR_LA="la"; + $PTR_ADD="addu"; + $PTR_SUB="subu"; $REG_S="sw"; $REG_L="lw"; $PTR_SLL="sll"; @@ -68,9 +81,9 @@ $pf = ($flavour =~ /nubi/i) ? $t0 : $t2; # ###################################################################### -$big_endian=(`echo MIPSEL | $ENV{CC} -E -P -`=~/MIPSEL/)?1:0; +$big_endian=(`echo MIPSEL | $ENV{CC} -E -`=~/MIPSEL/)?1:0 if ($ENV{CC}); -for (@ARGV) { $output=$_ if (/^\w[\w\-]*\.\w+$/); } +for (@ARGV) { $output=$_ if (/\w[\w\-]*\.\w+$/); } open STDOUT,">$output"; if (!defined($big_endian)) { $big_endian=(unpack('L',pack('N',1))==1); } @@ -83,6 +96,7 @@ if ($output =~ /512/) { $SLL="dsll"; # shift left logical $SRL="dsrl"; # shift right logical $ADDU="daddu"; + $ROTR="drotr"; @Sigma0=(28,34,39); @Sigma1=(14,18,41); @sigma0=( 7, 1, 8); # right shift first @@ -97,6 +111,7 @@ if ($output =~ /512/) { $SLL="sll"; # shift left logical $SRL="srl"; # shift right logical $ADDU="addu"; + $ROTR="rotr"; @Sigma0=( 2,13,22); @Sigma1=( 6,11,25); @sigma0=( 3, 7,18); # right shift first @@ -120,10 +135,18 @@ my ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_; my ($T1,$tmp0,$tmp1,$tmp2)=(@X[4],@X[5],@X[6],@X[7]); $code.=<<___ if ($i<15); +#if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6) + ${LD} @X[1],`($i+1)*$SZ`($inp) +#else ${LD}l @X[1],`($i+1)*$SZ+$MSB`($inp) ${LD}r @X[1],`($i+1)*$SZ+$LSB`($inp) +#endif ___ $code.=<<___ if (!$big_endian && $i<16 && $SZ==4); +#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2) + wsbh @X[0],@X[0] # byte swap($i) + rotr @X[0],@X[0],16 +#else srl $tmp0,@X[0],24 # byte swap($i) srl $tmp1,@X[0],8 andi $tmp2,@X[0],0xFF00 @@ -133,8 +156,13 @@ $code.=<<___ if (!$big_endian && $i<16 && $SZ==4); or @X[0],$tmp0 or $tmp1,$tmp2 or @X[0],$tmp1 +#endif ___ $code.=<<___ if (!$big_endian && $i<16 && $SZ==8); +#if defined(_MIPS_ARCH_MIPS64R2) + dsbh @X[0],@X[0] # byte swap($i) + dshd @X[0],@X[0] +#else ori $tmp0,$zero,0xFF dsll $tmp2,$tmp0,32 or $tmp0,$tmp2 # 0x000000FF000000FF @@ -153,8 +181,31 @@ $code.=<<___ if (!$big_endian && $i<16 && $SZ==8); dsrl $tmp1,@X[0],32 dsll @X[0],32 or @X[0],$tmp1 +#endif ___ $code.=<<___; +#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2) + xor $tmp2,$f,$g # $i + $ROTR $tmp0,$e,@Sigma1[0] + $ADDU $T1,$X[0],$h + $ROTR $tmp1,$e,@Sigma1[1] + and $tmp2,$e + $ROTR $h,$e,@Sigma1[2] + xor $tmp0,$tmp1 + $ROTR $tmp1,$a,@Sigma0[0] + xor $tmp2,$g # Ch(e,f,g) + xor $tmp0,$h # Sigma1(e) + + $ROTR $h,$a,@Sigma0[1] + $ADDU $T1,$tmp2 + $LD $tmp2,`$i*$SZ`($Ktbl) # K[$i] + xor $h,$tmp1 + $ROTR $tmp1,$a,@Sigma0[2] + $ADDU $T1,$tmp0 + and $tmp0,$b,$c + xor $h,$tmp1 # Sigma0(a) + xor $tmp1,$b,$c +#else $ADDU $T1,$X[0],$h # $i $SRL $h,$e,@Sigma1[0] xor $tmp2,$f,$g @@ -184,16 +235,15 @@ $code.=<<___; xor $h,$tmp1 $SLL $tmp1,$a,`$SZ*8-@Sigma0[0]` xor $h,$tmp0 - $ST @X[0],`($i%16)*$SZ`($sp) # offload to ring buffer + and $tmp0,$b,$c xor $h,$tmp1 # Sigma0(a) - - or $tmp0,$a,$b - and $tmp1,$a,$b - and $tmp0,$c - or $tmp1,$tmp0 # Maj(a,b,c) + xor $tmp1,$b,$c +#endif + $ST @X[0],`($i%16)*$SZ`($sp) # offload to ring buffer + $ADDU $h,$tmp0 + and $tmp1,$a $ADDU $T1,$tmp2 # +=K[$i] - $ADDU $h,$tmp1 - + $ADDU $h,$tmp1 # +=Maj(a,b,c) $ADDU $d,$T1 $ADDU $h,$T1 ___ @@ -207,6 +257,20 @@ my $i=@_[0]; my ($tmp0,$tmp1,$tmp2,$tmp3)=(@X[4],@X[5],@X[6],@X[7]); $code.=<<___; +#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2) + $SRL $tmp2,@X[1],@sigma0[0] # Xupdate($i) + $ROTR $tmp0,@X[1],@sigma0[1] + $ADDU @X[0],@X[9] # +=X[i+9] + xor $tmp2,$tmp0 + $ROTR $tmp0,@X[1],@sigma0[2] + + $SRL $tmp3,@X[14],@sigma1[0] + $ROTR $tmp1,@X[14],@sigma1[1] + xor $tmp2,$tmp0 # sigma0(X[i+1]) + $ROTR $tmp0,@X[14],@sigma1[2] + xor $tmp3,$tmp1 + $ADDU @X[0],$tmp2 +#else $SRL $tmp2,@X[1],@sigma0[0] # Xupdate($i) $ADDU @X[0],@X[9] # +=X[i+9] $SLL $tmp1,@X[1],`$SZ*8-@sigma0[2]` @@ -227,7 +291,7 @@ $code.=<<___; xor $tmp3,$tmp0 $SRL $tmp0,@X[14],@sigma1[2] xor $tmp3,$tmp1 - +#endif xor $tmp3,$tmp0 # sigma1(X[i+14]) $ADDU @X[0],$tmp3 ___ @@ -235,16 +299,14 @@ ___ } $FRAMESIZE=16*$SZ+16*$SZREG; -$SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? 0xc0fff008 : 0xc0ff0000; +$SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? "0xc0fff008" : "0xc0ff0000"; $code.=<<___; -#ifdef OPENSSL_FIPSCANISTER -# include -#endif +#include "mips_arch.h" .text .set noat -#if !defined(__vxworks) || defined(__pic__) +#if !defined(__mips_eabi) && (!defined(__vxworks) || defined(__pic__)) .option pic2 #endif @@ -288,7 +350,7 @@ $code.=<<___ if ($flavour !~ /o32/i); # non-o32 PIC-ification ___ $code.=<<___; .set reorder - la $Ktbl,K${label} # PIC-ified 'load address' + $PTR_LA $Ktbl,K${label} # PIC-ified 'load address' $LD $A,0*$SZ($ctx) # load context $LD $B,1*$SZ($ctx) @@ -305,8 +367,12 @@ $code.=<<___; .align 5 .Loop: +#if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6) + ${LD} @X[0],($inp) +#else ${LD}l @X[0],$MSB($inp) ${LD}r @X[0],$LSB($inp) +#endif ___ for ($i=0;$i<16;$i++) { &BODY_00_15($i,@V); unshift(@V,pop(@V)); push(@X,shift(@X)); } @@ -351,7 +417,7 @@ $code.=<<___; $ST $G,6*$SZ($ctx) $ST $H,7*$SZ($ctx) - bnel $inp,@X[15],.Loop + bne $inp,@X[15],.Loop $PTR_SUB $Ktbl,`($rounds-16)*$SZ` # rewind $Ktbl $REG_L $ra,$FRAMESIZE-1*$SZREG($sp)