MIPS assembly pack: add support for SmartMIPS ASE.

author Andy Polyakov <appro@openssl.org>

Tue, 18 Sep 2012 12:52:23 +0000 (12:52 +0000)

committer Andy Polyakov <appro@openssl.org>

Tue, 18 Sep 2012 12:52:23 +0000 (12:52 +0000)
author Andy Polyakov <appro@openssl.org>
Tue, 18 Sep 2012 12:52:23 +0000 (12:52 +0000)
committer Andy Polyakov <appro@openssl.org>
Tue, 18 Sep 2012 12:52:23 +0000 (12:52 +0000)
diff --git a/crypto/aes/asm/aes-mips.pl b/crypto/aes/asm/aes-mips.pl

index a00193fa455d89a71721a0363859da11bcd2955c..1d8afe9682b38d42f589279e99bd23bb515e3262 100644 (file)
--- a/crypto/aes/asm/aes-mips.pl
+++ b/crypto/aes/asm/aes-mips.pl
@@ -22,7 +22,10 @@
  
  # September 2012
  #
-# Add MIPS32R2 code.
+# Add MIPS32R2 (~10% less instructions) and SmartMIPS ASE (further
+# ~25% less instructions) code. Note that there is no run-time switch,
+# instead, code path is chosen upon pre-process time, pass -mips32r2
+# or/and -msmartmips.
  
  ######################################################################
  # There is a number of MIPS ABI in use, O32 and N32/64 are most
@@ -95,7 +98,11 @@ $code.=<<___;
  # include <openssl/fipssyms.h>
  #endif
  
-#if !defined(__vxworks) || defined(__pic__)
+#if defined(__mips_smartmips) && !defined(_MIPS_ARCH_MIPS32R2)
+#define _MIPS_ARCH_MIPS32R2
+#endif
+
+#if !defined(__mips_eabi) && (!defined(__vxworks) || defined(__pic__))
  .option        pic2
  #endif
  .set   noat
@@ -131,6 +138,89 @@ _mips_AES_encrypt:
         xor     $s3,$t3
  
         sub     $cnt,1
+#if defined(__mips_smartmips)
+       ext     $i0,$s1,16,8
+.Loop_enc:
+       ext     $i1,$s2,16,8
+       ext     $i2,$s3,16,8
+       ext     $i3,$s0,16,8
+       lwxs    $t0,$i0($Tbl)           # Te1[s1>>16]
+       ext     $i0,$s2,8,8
+       lwxs    $t1,$i1($Tbl)           # Te1[s2>>16]
+       ext     $i1,$s3,8,8
+       lwxs    $t2,$i2($Tbl)           # Te1[s3>>16]
+       ext     $i2,$s0,8,8
+       lwxs    $t3,$i3($Tbl)           # Te1[s0>>16]
+       ext     $i3,$s1,8,8
+
+       lwxs    $t4,$i0($Tbl)           # Te2[s2>>8]
+       ext     $i0,$s3,0,8
+       lwxs    $t5,$i1($Tbl)           # Te2[s3>>8]
+       ext     $i1,$s0,0,8
+       lwxs    $t6,$i2($Tbl)           # Te2[s0>>8]
+       ext     $i2,$s1,0,8
+       lwxs    $t7,$i3($Tbl)           # Te2[s1>>8]
+       ext     $i3,$s2,0,8
+
+       lwxs    $t8,$i0($Tbl)           # Te3[s3]
+       ext     $i0,$s0,24,8
+       lwxs    $t9,$i1($Tbl)           # Te3[s0]
+       ext     $i1,$s1,24,8
+       lwxs    $t10,$i2($Tbl)          # Te3[s1]
+       ext     $i2,$s2,24,8
+       lwxs    $t11,$i3($Tbl)          # Te3[s2]
+       ext     $i3,$s3,24,8
+
+       rotr    $t0,$t0,8
+       rotr    $t1,$t1,8
+       rotr    $t2,$t2,8
+       rotr    $t3,$t3,8
+
+       rotr    $t4,$t4,16
+       rotr    $t5,$t5,16
+       rotr    $t6,$t6,16
+       rotr    $t7,$t7,16
+
+       xor     $t0,$t4
+       lwxs    $t4,$i0($Tbl)           # Te0[s0>>24]
+       xor     $t1,$t5
+       lwxs    $t5,$i1($Tbl)           # Te0[s1>>24]
+       xor     $t2,$t6
+       lwxs    $t6,$i2($Tbl)           # Te0[s2>>24]
+       xor     $t3,$t7
+       lwxs    $t7,$i3($Tbl)           # Te0[s3>>24]
+
+       rotr    $t8,$t8,24
+       lw      $s0,0($key0)
+       rotr    $t9,$t9,24
+       lw      $s1,4($key0)
+       rotr    $t10,$t10,24
+       lw      $s2,8($key0)
+       rotr    $t11,$t11,24
+       lw      $s3,12($key0)
+
+       xor     $t0,$t8
+       xor     $t1,$t9
+       xor     $t2,$t10
+       xor     $t3,$t11
+
+       xor     $t0,$t4
+       xor     $t1,$t5
+       xor     $t2,$t6
+       xor     $t3,$t7
+
+       sub     $cnt,1
+       $PTR_ADD $key0,16
+       xor     $s0,$t0
+       xor     $s1,$t1
+       xor     $s2,$t2
+       xor     $s3,$t3
+       .set    noreorder
+       bnez    $cnt,.Loop_enc
+       ext     $i0,$s1,16,8
+
+       _xtr    $i0,$s1,16-2
+#else
         _xtr    $i0,$s1,16-2
  .Loop_enc:
         _xtr    $i1,$s2,16-2
@@ -181,13 +271,13 @@ _mips_AES_encrypt:
         rotr    $t2,$t2,8
         rotr    $t3,$t3,8
  # if defined(_MIPSEL)
-       lw      $t4,0($i0)              # Te1[s1>>16]
+       lw      $t4,0($i0)              # Te2[s2>>8]
         _xtr    $i0,$s3,0-2
-       lw      $t5,0($i1)              # Te1[s2>>16]
+       lw      $t5,0($i1)              # Te2[s3>>8]
         _xtr    $i1,$s0,0-2
-       lw      $t6,0($i2)              # Te1[s3>>16]
+       lw      $t6,0($i2)              # Te2[s0>>8]
         _xtr    $i2,$s1,0-2
-       lw      $t7,0($i3)              # Te1[s0>>16]
+       lw      $t7,0($i3)              # Te2[s1>>8]
         _xtr    $i3,$s2,0-2
  
         and     $i0,0x3fc
@@ -198,31 +288,31 @@ _mips_AES_encrypt:
         $PTR_ADD $i1,$Tbl
         $PTR_ADD $i2,$Tbl
         $PTR_ADD $i3,$Tbl
-       lw      $t8,0($i0)              # Te1[s1>>16]
+       lw      $t8,0($i0)              # Te3[s3]
         $PTR_INS $i0,$s0,2,8
-       lw      $t9,0($i1)              # Te1[s2>>16]
+       lw      $t9,0($i1)              # Te3[s0]
         $PTR_INS $i1,$s1,2,8
-       lw      $t10,0($i2)             # Te1[s3>>16]
+       lw      $t10,0($i2)             # Te3[s1]
         $PTR_INS $i2,$s2,2,8
-       lw      $t11,0($i3)             # Te1[s0>>16]
+       lw      $t11,0($i3)             # Te3[s2]
         $PTR_INS $i3,$s3,2,8
  # else
-       lw      $t4,0($i0)              # Te1[s1>>16]
+       lw      $t4,0($i0)              # Te2[s2>>8]
         $PTR_INS $i0,$s3,2,8
-       lw      $t5,0($i1)              # Te1[s2>>16]
+       lw      $t5,0($i1)              # Te2[s3>>8]
         $PTR_INS $i1,$s0,2,8
-       lw      $t6,0($i2)              # Te1[s3>>16]
+       lw      $t6,0($i2)              # Te2[s0>>8]
         $PTR_INS $i2,$s1,2,8
-       lw      $t7,0($i3)              # Te1[s0>>16]
+       lw      $t7,0($i3)              # Te2[s1>>8]
         $PTR_INS $i3,$s2,2,8
  
-       lw      $t8,0($i0)              # Te1[s1>>16]
+       lw      $t8,0($i0)              # Te3[s3]
         _xtr    $i0,$s0,24-2
-       lw      $t9,0($i1)              # Te1[s2>>16]
+       lw      $t9,0($i1)              # Te3[s0]
         _xtr    $i1,$s1,24-2
-       lw      $t10,0($i2)             # Te1[s3>>16]
+       lw      $t10,0($i2)             # Te3[s1]
         _xtr    $i2,$s2,24-2
-       lw      $t11,0($i3)             # Te1[s0>>16]
+       lw      $t11,0($i3)             # Te3[s2]
         _xtr    $i3,$s3,24-2
  
         and     $i0,0x3fc
@@ -319,6 +409,7 @@ _mips_AES_encrypt:
         .set    noreorder
         bnez    $cnt,.Loop_enc
         _xtr    $i0,$s1,16-2
+#endif
  
         .set    reorder
         _xtr    $i1,$s2,16-2
@@ -621,6 +712,89 @@ _mips_AES_decrypt:
         xor     $s3,$t3
  
         sub     $cnt,1
+#if defined(__mips_smartmips)
+       ext     $i0,$s3,16,8
+.Loop_dec:
+       ext     $i1,$s0,16,8
+       ext     $i2,$s1,16,8
+       ext     $i3,$s2,16,8
+       lwxs    $t0,$i0($Tbl)           # Td1[s3>>16]
+       ext     $i0,$s2,8,8
+       lwxs    $t1,$i1($Tbl)           # Td1[s0>>16]
+       ext     $i1,$s3,8,8
+       lwxs    $t2,$i2($Tbl)           # Td1[s1>>16]
+       ext     $i2,$s0,8,8
+       lwxs    $t3,$i3($Tbl)           # Td1[s2>>16]
+       ext     $i3,$s1,8,8
+
+       lwxs    $t4,$i0($Tbl)           # Td2[s2>>8]
+       ext     $i0,$s1,0,8
+       lwxs    $t5,$i1($Tbl)           # Td2[s3>>8]
+       ext     $i1,$s2,0,8
+       lwxs    $t6,$i2($Tbl)           # Td2[s0>>8]
+       ext     $i2,$s3,0,8
+       lwxs    $t7,$i3($Tbl)           # Td2[s1>>8]
+       ext     $i3,$s0,0,8
+
+       lwxs    $t8,$i0($Tbl)           # Td3[s1]
+       ext     $i0,$s0,24,8
+       lwxs    $t9,$i1($Tbl)           # Td3[s2]
+       ext     $i1,$s1,24,8
+       lwxs    $t10,$i2($Tbl)          # Td3[s3]
+       ext     $i2,$s2,24,8
+       lwxs    $t11,$i3($Tbl)          # Td3[s0]
+       ext     $i3,$s3,24,8
+
+       rotr    $t0,$t0,8
+       rotr    $t1,$t1,8
+       rotr    $t2,$t2,8
+       rotr    $t3,$t3,8
+
+       rotr    $t4,$t4,16
+       rotr    $t5,$t5,16
+       rotr    $t6,$t6,16
+       rotr    $t7,$t7,16
+
+       xor     $t0,$t4
+       lwxs    $t4,$i0($Tbl)           # Td0[s0>>24]
+       xor     $t1,$t5
+       lwxs    $t5,$i1($Tbl)           # Td0[s1>>24]
+       xor     $t2,$t6
+       lwxs    $t6,$i2($Tbl)           # Td0[s2>>24]
+       xor     $t3,$t7
+       lwxs    $t7,$i3($Tbl)           # Td0[s3>>24]
+
+       rotr    $t8,$t8,24
+       lw      $s0,0($key0)
+       rotr    $t9,$t9,24
+       lw      $s1,4($key0)
+       rotr    $t10,$t10,24
+       lw      $s2,8($key0)
+       rotr    $t11,$t11,24
+       lw      $s3,12($key0)
+
+       xor     $t0,$t8
+       xor     $t1,$t9
+       xor     $t2,$t10
+       xor     $t3,$t11
+
+       xor     $t0,$t4
+       xor     $t1,$t5
+       xor     $t2,$t6
+       xor     $t3,$t7
+
+       sub     $cnt,1
+       $PTR_ADD $key0,16
+       xor     $s0,$t0
+       xor     $s1,$t1
+       xor     $s2,$t2
+       xor     $s3,$t3
+       .set    noreorder
+       bnez    $cnt,.Loop_dec
+       ext     $i0,$s3,16,8
+
+       _xtr    $i0,$s3,16-2
+#else
         _xtr    $i0,$s3,16-2
  .Loop_dec:
         _xtr    $i1,$s0,16-2
@@ -811,6 +985,7 @@ _mips_AES_decrypt:
         .set    noreorder
         bnez    $cnt,.Loop_dec
         _xtr    $i0,$s3,16-2
+#endif
  
         .set    reorder
         lw      $t4,1024($Tbl)          # prefetch Td4
@@ -1936,7 +2111,10 @@ foreach (split("\n",$code)) {
                 sprintf("$1%d($3)",eval("$2-$2%4+($2%4+1)&3"))/e;
         }
  
-       s/(rotr\s+\$[0-9]+,\$[0-9]+),([0-9]+)/sprintf("$1,%d",32-$2)/e if(!$big_endian);
+       if (!$big_endian) {
+           s/(rotr\s+\$[0-9]+,\$[0-9]+),([0-9]+)/sprintf("$1,%d",32-$2)/e;
+           s/(ext\s+\$[0-9]+,\$[0-9]+),([0-9]+),8/sprintf("$1,%d,8",24-$2)/e;
+       }
  
         print $_,"\n";
  }
diff --git a/crypto/sha/asm/sha1-mips.pl b/crypto/sha/asm/sha1-mips.pl

index 63c3efa03f6ee298cb58d229755604a0f2cffc88..73bf0609b48c376b621a6e3fdfeaaea2e99de757 100644 (file)
--- a/crypto/sha/asm/sha1-mips.pl
+++ b/crypto/sha/asm/sha1-mips.pl
@@ -17,7 +17,7 @@
  
  # September 2012.
  #
-# Add MIPS32r2 code.
+# Add MIPS32r2 code (>25% less instructions).
  
  ######################################################################
  # There is a number of MIPS ABI in use, O32 and N32/64 are most
@@ -333,6 +333,10 @@ $code=<<___;
  # include <openssl/fipssyms.h>
  #endif
  
+#if defined(__mips_smartmips) && !defined(_MIPS_ARCH_MIPS32R2)
+#define _MIPS_ARCH_MIPS32R2
+#endif
+
  .text
  
  .set   noat
diff --git a/crypto/sha/asm/sha512-mips.pl b/crypto/sha/asm/sha512-mips.pl

index d190414203a242736cc41f4fc8452869b05cac4c..1fff09bb4daccc1710f0670cc2fa48e1fd1022ef 100644 (file)
--- a/crypto/sha/asm/sha512-mips.pl
+++ b/crypto/sha/asm/sha512-mips.pl
@@ -19,7 +19,7 @@
  
  # September 2012.
  #
-# Add MIPS[32|64]r2 code.
+# Add MIPS[32|64]R2 code (>25% less instructions).
  
  ######################################################################
  # There is a number of MIPS ABI in use, O32 and N32/64 are most
@@ -293,9 +293,13 @@ $code.=<<___;
  # include <openssl/fipssyms.h>
  #endif
  
+#if defined(__mips_smartmips) && !defined(_MIPS_ARCH_MIPS32R2)
+#define _MIPS_ARCH_MIPS32R2
+#endif
+
  .text
  .set   noat
-#if !defined(__vxworks) || defined(__pic__)
+#if !defined(__mips_eabi) && (!defined(__vxworks) || defined(__pic__))
  .option        pic2
  #endif
author	Andy Polyakov <appro@openssl.org>
	Tue, 18 Sep 2012 12:52:23 +0000 (12:52 +0000)
committer	Andy Polyakov <appro@openssl.org>
	Tue, 18 Sep 2012 12:52:23 +0000 (12:52 +0000)
crypto/aes/asm/aes-mips.pl		patch \| blob \| history
crypto/sha/asm/sha1-mips.pl		patch \| blob \| history
crypto/sha/asm/sha512-mips.pl		patch \| blob \| history