ARM assembler pack: engage newly introduced armv4-gf2m module.

author Andy Polyakov <appro@openssl.org>

Thu, 5 May 2011 21:57:11 +0000 (21:57 +0000)

committer Andy Polyakov <appro@openssl.org>

Thu, 5 May 2011 21:57:11 +0000 (21:57 +0000)
author Andy Polyakov <appro@openssl.org>
Thu, 5 May 2011 21:57:11 +0000 (21:57 +0000)
committer Andy Polyakov <appro@openssl.org>
Thu, 5 May 2011 21:57:11 +0000 (21:57 +0000)
diff --git a/Configure b/Configure

index 02205d14fab246500df9678fee75cc7a42771184..d2e102794377a5789ed4289f41334b5a7e36040f 100755 (executable)
--- a/Configure
+++ b/Configure
@@ -135,7 +135,7 @@ my $alpha_asm="alphacpuid.o:bn_asm.o alpha-mont.o:::::sha1-alpha.o:::::::ghash-a
  my $mips32_asm=":bn-mips.o::aes_cbc.o aes-mips.o:::sha1-mips.o sha256-mips.o:::::::";
  my $mips64_asm=":bn-mips.o mips-mont.o::aes_cbc.o aes-mips.o:::sha1-mips.o sha256-mips.o sha512-mips.o:::::::";
  my $s390x_asm="s390xcap.o s390xcpuid.o:bn-s390x.o s390x-mont.o::aes_ctr.o aes-s390x.o:::sha1-s390x.o sha256-s390x.o sha512-s390x.o::rc4-s390x.o:::::ghash-s390x.o";
-my $armv4_asm=":bn_asm.o armv4-mont.o::aes_cbc.o aes-armv4.o:::sha1-armv4-large.o sha256-armv4.o sha512-armv4.o:::::::ghash-armv4.o:void";
+my $armv4_asm=":bn_asm.o armv4-mont.o armv4-gf2m.o::aes_cbc.o aes-armv4.o:::sha1-armv4-large.o sha256-armv4.o sha512-armv4.o:::::::ghash-armv4.o:void";
  my $parisc11_asm="pariscid.o:bn_asm.o parisc-mont.o::aes_core.o aes_cbc.o aes-parisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o:::::ghash-parisc.o:32";
  my $parisc20_asm="pariscid.o:pa-risc2W.o parisc-mont.o::aes_core.o aes_cbc.o aes-parisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o:::::ghash-parisc.o:64";
  my $ppc32_asm="ppccpuid.o ppccap.o:bn-ppc.o ppc-mont.o ppc64-mont.o::aes_core.o aes_cbc.o aes-ppc.o:::sha1-ppc.o sha256-ppc.o:::::::";
@@ -1493,6 +1493,7 @@ $cflags.=" -DOPENSSL_BN_ASM_PART_WORDS" if ($bn_obj =~ /bn-586/);
  $cflags.=" -DOPENSSL_IA32_SSE2" if (!$no_sse2 && $bn_obj =~ /86/);
  
  $cflags.=" -DOPENSSL_BN_ASM_MONT" if ($bn_obj =~ /-mont/);
+$cflags.=" -DOPENSSL_BN_ASM_GF2m" if ($bn_obj =~ /-gf2m/);
  
  if ($fips)
         {
diff --git a/TABLE b/TABLE

index c747a977bf0e78f80ad07ebbd6f90a7c6805194b..7e0748ad4ca38ee3c68f8e67c682b2635e014eaa 100644 (file)
--- a/TABLE
+++ b/TABLE
@@ -1033,7 +1033,7 @@ $sys_id       =
  $lflags       = -ldl
  $bn_ops       = BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR
  $cpuid_obj    = 
-$bn_obj       = bn_asm.o armv4-mont.o
+$bn_obj       = bn_asm.o armv4-mont.o armv4-gf2m.o
  $des_obj      = 
  $aes_obj      = aes_cbc.o aes-armv4.o
  $bf_obj       = 
@@ -3689,7 +3689,7 @@ $sys_id       =
  $lflags       = -ldl
  $bn_ops       = BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR
  $cpuid_obj    = 
-$bn_obj       = bn_asm.o armv4-mont.o
+$bn_obj       = bn_asm.o armv4-mont.o armv4-gf2m.o
  $des_obj      = 
  $aes_obj      = aes_cbc.o aes-armv4.o
  $bf_obj       = 
diff --git a/crypto/bn/Makefile b/crypto/bn/Makefile

index 74bc4f721f866c9927f1a347a6c550d06bda17e7..18d704bc979b783c04887d9824ac997ec935b173 100644 (file)
--- a/crypto/bn/Makefile
+++ b/crypto/bn/Makefile
@@ -120,6 +120,9 @@ alpha-mont.s:       asm/alpha-mont.pl
  
  # GNU make "catch all"
  %-mont.s:      asm/%-mont.pl;  $(PERL) $< $(PERLASM_SCHEME) $@
+%-gf2m.S:      asm/%-gf2m.pl;  $(PERL) $< $(PERLASM_SCHEME) $@
+
+armv4-gf2m.o:  armv4-gf2m.S
  
  files:
         $(PERL) $(TOP)/util/files.pl Makefile >> $(TOP)/MINFO
diff --git a/crypto/bn/asm/armv4-gf2m.pl b/crypto/bn/asm/armv4-gf2m.pl

index 67ec4b2c14ee91c4ba5513693914734ea350e73f..4fe9db9894aecb28cc82789d047c4b85847fd651 100644 (file)
--- a/crypto/bn/asm/armv4-gf2m.pl
+++ b/crypto/bn/asm/armv4-gf2m.pl
@@ -21,13 +21,8 @@
  # runs in even less cycles, ~30, improvement is measurable only on
  # longer keys. One has to optimize code elsewhere to get NEON glow...
  
-$a="r1";
-$b="r0";
-
-($a0,$a1,$a2,$a12,$a4,$a14)=
-($hi,$lo,$t0,$t1, $i0,$i1 )=map("r$_",(4..9),12);
-
-$mask="r12";
+while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
+open STDOUT,">$output";
  
  sub Dlo()   { shift=~m|q([1]?[0-9])|?"d".($1*2):"";     }
  sub Dhi()   { shift=~m|q([1]?[0-9])|?"d".($1*2+1):"";   }
@@ -67,9 +62,21 @@ mul_1x1_neon:
         bx      lr
  .size  mul_1x1_neon,.-mul_1x1_neon
  #endif
+___
+################
+# private interface to mul_1x1_ialu
+#
+$a="r1";
+$b="r0";
  
-.align 5
+($a0,$a1,$a2,$a12,$a4,$a14)=
+($hi,$lo,$t0,$t1, $i0,$i1 )=map("r$_",(4..9),12);
+
+$mask="r12";
+
+$code.=<<___;
  .type  mul_1x1_ialu,%function
+.align 5
  mul_1x1_ialu:
         mov     $a0,#0
         bic     $a1,$a,#3<<30           @ a1=a&0x3fffffff
@@ -147,7 +154,15 @@ mul_1x1_ialu:
  
         mov     pc,lr
  .size  mul_1x1_ialu,.-mul_1x1_ialu
+___
+################
+# void bn_GF2m_mul_2x2(BN_ULONG *r,
+#      BN_ULONG a1,BN_ULONG a0,
+#      BN_ULONG b1,BN_ULONG b0);       # r[3..0]=a1a0·b1b0
+
+($A1,$B1,$A0,$B0,$A1B1,$A0B0)=map("d$_",(18..23));
  
+$code.=<<___;
  .global        bn_GF2m_mul_2x2
  .type  bn_GF2m_mul_2x2,%function
  .align 5
@@ -157,9 +172,7 @@ bn_GF2m_mul_2x2:
  .Lpic: ldr     r12,[pc,r12]
         tst     r12,#1
         beq     .Lialu
-___
-($A1,$B1,$A0,$B0,$A0B0,$A1B1)=map("d$_",(18..23));
-$code.=<<___;
+
         veor    $A1,$A1
         vmov.32 $B1,r3,r3               @ two copies of b1
         vmov.32 ${A1}[0],r1             @ a1
diff --git a/crypto/bn/bn_gf2m.c b/crypto/bn/bn_gf2m.c

index 5a13515c3634d13769ecc19455679ae46b6bcdbc..19a101bccdaf429f8f0c581801e67211bb35f368 100644 (file)
--- a/crypto/bn/bn_gf2m.c
+++ b/crypto/bn/bn_gf2m.c
@@ -126,6 +126,7 @@ static const BN_ULONG SQR_tb[16] =
      SQR_tb[(w) >>  4 & 0xF] <<  8 | SQR_tb[(w)       & 0xF]
  #endif
  
+#if !defined(OPENSSL_BN_ASM_GF2m)
  /* Product of two polynomials a, b each with degree < BN_BITS2 - 1,
   * result is a polynomial r with degree < 2 * BN_BITS - 1
   * The caller MUST ensure that the variables have the right amount
@@ -220,7 +221,9 @@ static void bn_GF2m_mul_2x2(BN_ULONG *r, const BN_ULONG a1, const BN_ULONG a0, c
         r[2] ^= m1 ^ r[1] ^ r[3];  /* h0 ^= m1 ^ l1 ^ h1; */
         r[1] = r[3] ^ r[2] ^ r[0] ^ m1 ^ m0;  /* l1 ^= l0 ^ h0 ^ m0; */
         }
-
+#else
+void bn_GF2m_mul_2x2(BN_ULONG *r, BN_ULONG a1, BN_ULONG a0, BN_ULONG b1, BN_ULONG b0);
+#endif 
  
  /* Add polynomials a and b and store result in r; r could be a or b, a and b 
   * could be equal; r is the bitwise XOR of a and b.
author	Andy Polyakov <appro@openssl.org>
	Thu, 5 May 2011 21:57:11 +0000 (21:57 +0000)
committer	Andy Polyakov <appro@openssl.org>
	Thu, 5 May 2011 21:57:11 +0000 (21:57 +0000)
Configure		patch \| blob \| history
TABLE		patch \| blob \| history
crypto/bn/Makefile		patch \| blob \| history
crypto/bn/asm/armv4-gf2m.pl		patch \| blob \| history
crypto/bn/bn_gf2m.c		patch \| blob \| history