Expand expspeed.c to make BN_kronecker timings.

author Bodo Möller <bodo@openssl.org>

Wed, 29 Nov 2000 12:32:10 +0000 (12:32 +0000)

committer Bodo Möller <bodo@openssl.org>

Wed, 29 Nov 2000 12:32:10 +0000 (12:32 +0000)
author Bodo Möller <bodo@openssl.org>
Wed, 29 Nov 2000 12:32:10 +0000 (12:32 +0000)
committer Bodo Möller <bodo@openssl.org>
Wed, 29 Nov 2000 12:32:10 +0000 (12:32 +0000)
diff --git a/CHANGES b/CHANGES

index a469186a2703c6f6f33e535bfc7e0e13eda2e080..370ec0f9b7b2025f58fec4f1831262eab8156ed8 100644 (file)
--- a/CHANGES
+++ b/CHANGES
@@ -3,6 +3,9 @@
  
   Changes between 0.9.6 and 0.9.7  [xx XXX 2000]
  
+  *) BN_bin2bn bugfix (off-by-one error).
+     [Bodo Moeller]
+
    *) Make BN_mod_inverse faster by explicitly handling small quotients
       in the Euclid loop. (Speed gain about 20% for small moduli [256 or
       512 bits], about 30% for larger ones [1024 or 2048 bits].)
diff --git a/crypto/bn/bn.h b/crypto/bn/bn.h

index fec91d7fc7be6a7645f953d82f5a7310d491de80..c81f3de8be90ecbe5a64bbdb1770c5bf5f394752 100644 (file)
--- a/crypto/bn/bn.h
+++ b/crypto/bn/bn.h
@@ -510,6 +510,7 @@ void bn_dump1(FILE *o, const char *a, const BN_ULONG *b,int n);
  #define BN_F_BN_CTX_NEW                                         106
  #define BN_F_BN_DIV                                     107
  #define BN_F_BN_EXPAND2                                         108
+#define BN_F_BN_EXPAND_INTERNAL                                 120
  #define BN_F_BN_MOD_EXP2_MONT                           118
  #define BN_F_BN_MOD_EXP_MONT                            109
  #define BN_F_BN_MOD_EXP_MONT_WORD                       117
diff --git a/crypto/bn/bn_err.c b/crypto/bn/bn_err.c

index 14bd8bbcbafcf5507069114ecd8ac3aa0d6cb74e..75a3458b11969c6895dbf361d8118960555c2509 100644 (file)
--- a/crypto/bn/bn_err.c
+++ b/crypto/bn/bn_err.c
@@ -76,6 +76,7 @@ static ERR_STRING_DATA BN_str_functs[]=
  {ERR_PACK(0,BN_F_BN_CTX_NEW,0),        "BN_CTX_new"},
  {ERR_PACK(0,BN_F_BN_DIV,0),    "BN_div"},
  {ERR_PACK(0,BN_F_BN_EXPAND2,0),        "bn_expand2"},
+{ERR_PACK(0,BN_F_BN_EXPAND_INTERNAL,0),        "BN_EXPAND_INTERNAL"},
  {ERR_PACK(0,BN_F_BN_MOD_EXP2_MONT,0),  "BN_mod_exp2_mont"},
  {ERR_PACK(0,BN_F_BN_MOD_EXP_MONT,0),   "BN_mod_exp_mont"},
  {ERR_PACK(0,BN_F_BN_MOD_EXP_MONT_WORD,0),      "BN_mod_exp_mont_word"},
diff --git a/crypto/bn/bn_lib.c b/crypto/bn/bn_lib.c

index 0e161d173ebc855d3038fa0c2e3b6e6a00e399d4..f0dc7d52dc257ef84abebb5619402a76d9bd7adf 100644 (file)
--- a/crypto/bn/bn_lib.c
+++ b/crypto/bn/bn_lib.c
@@ -306,7 +306,7 @@ BIGNUM *BN_new(void)
  
  /* This is used both by bn_expand2() and bn_dup_expand() */
  /* The caller MUST check that words > b->dmax before calling this */
-static BN_ULONG *internal_bn_expand(const BIGNUM *b, int words)
+static BN_ULONG *bn_expand_internal(const BIGNUM *b, int words)
         {
         BN_ULONG *A,*a = NULL;
         const BN_ULONG *B;
@@ -315,13 +315,13 @@ static BN_ULONG *internal_bn_expand(const BIGNUM *b, int words)
         bn_check_top(b);        
         if (BN_get_flags(b,BN_FLG_STATIC_DATA))
                 {
-               BNerr(BN_F_BN_EXPAND2,BN_R_EXPAND_ON_STATIC_BIGNUM_DATA);
+               BNerr(BN_F_BN_EXPAND_INTERNAL,BN_R_EXPAND_ON_STATIC_BIGNUM_DATA);
                 return(NULL);
                 }
         a=A=(BN_ULONG *)OPENSSL_malloc(sizeof(BN_ULONG)*(words+1));
         if (A == NULL)
                 {
-               BNerr(BN_F_BN_EXPAND2,ERR_R_MALLOC_FAILURE);
+               BNerr(BN_F_BN_EXPAND_INTERNAL,ERR_R_MALLOC_FAILURE);
                 return(NULL);
                 }
  #if 1
@@ -329,70 +329,6 @@ static BN_ULONG *internal_bn_expand(const BIGNUM *b, int words)
         /* Check if the previous number needs to be copied */
         if (B != NULL)
                 {
-#if 0
-               /* This lot is an unrolled loop to copy b->top 
-                * BN_ULONGs from B to A
-                */
-/*
- * I have nothing against unrolling but it's usually done for
- * several reasons, namely:
- * - minimize percentage of decision making code, i.e. branches;
- * - avoid cache trashing;
- * - make it possible to schedule loads earlier;
- * Now let's examine the code below. The cornerstone of C is
- * "programmer is always right" and that's what we love it for:-)
- * For this very reason C compilers have to be paranoid when it
- * comes to data aliasing and assume the worst. Yeah, but what
- * does it mean in real life? This means that loop body below will
- * be compiled to sequence of loads immediately followed by stores
- * as compiler assumes the worst, something in A==B+1 style. As a
- * result CPU pipeline is going to starve for incoming data. Secondly
- * if A and B happen to share same cache line such code is going to
- * cause severe cache trashing. Both factors have severe impact on
- * performance of modern CPUs and this is the reason why this
- * particular piece of code is #ifdefed away and replaced by more
- * "friendly" version found in #else section below. This comment
- * also applies to BN_copy function.
- *
- *                                     <appro@fy.chalmers.se>
- */
-               for (i=b->top&(~7); i>0; i-=8)
-                       {
-                       A[0]=B[0]; A[1]=B[1]; A[2]=B[2]; A[3]=B[3];
-                       A[4]=B[4]; A[5]=B[5]; A[6]=B[6]; A[7]=B[7];
-                       A+=8;
-                       B+=8;
-                       }
-               switch (b->top&7)
-                       {
-               case 7:
-                       A[6]=B[6];
-               case 6:
-                       A[5]=B[5];
-               case 5:
-                       A[4]=B[4];
-               case 4:
-                       A[3]=B[3];
-               case 3:
-                       A[2]=B[2];
-               case 2:
-                       A[1]=B[1];
-               case 1:
-                       A[0]=B[0];
-               case 0:
-                       /* I need the 'case 0' entry for utrix cc.
-                        * If the optimizer is turned on, it does the
-                        * switch table by doing
-                        * a=top&7
-                        * a--;
-                        * goto jump_table[a];
-                        * If top is 0, this makes us jump to 0xffffffc 
-                        * which is rather bad :-(.
-                        * eric 23-Apr-1998
-                        */
-                       ;
-                       }
-#else
                 for (i=b->top>>2; i>0; i--,A+=4,B+=4)
                         {
                         /*
@@ -413,9 +349,11 @@ static BN_ULONG *internal_bn_expand(const BIGNUM *b, int words)
                 case 3: A[2]=B[2];
                 case 2: A[1]=B[1];
                 case 1: A[0]=B[0];
-               case 0: ; /* ultrix cc workaround, see above */
+               case 0: /* workaround for ultrix cc: without 'case 0', the optimizer does
+                        * the switch table by doing a=top&3; a--; goto jump_table[a];
+                        * which fails for top== 0 */
+                       ;
                         }
-#endif
                 }
  
         /* Now need to zero any data between b->top and b->max */
@@ -453,7 +391,7 @@ BIGNUM *bn_dup_expand(const BIGNUM *b, int words)
  
         if (words > b->dmax)
                 {
-               BN_ULONG *a = internal_bn_expand(b, words);
+               BN_ULONG *a = bn_expand_internal(b, words);
  
                 if (a)
                         {
@@ -472,7 +410,7 @@ BIGNUM *bn_dup_expand(const BIGNUM *b, int words)
                                 }
                         }
                 /* If a == NULL, there was an error in allocation in
-                  internal_bn_expand(), and NULL should be returned */
+                  bn_expand_internal(), and NULL should be returned */
                 }
         else
                 {
@@ -491,11 +429,12 @@ BIGNUM *bn_expand2(BIGNUM *b, int words)
         {
         if (words > b->dmax)
                 {
-               BN_ULONG *a = internal_bn_expand(b, words);
+               BN_ULONG *a = bn_expand_internal(b, words);
  
                 if (a)
                         {
-                       OPENSSL_free(b->d);
+                       if (b->d)
+                               OPENSSL_free(b->d);
                         b->d=a;
                         b->dmax=words;
                         }
@@ -547,7 +486,7 @@ BIGNUM *BN_copy(BIGNUM *a, const BIGNUM *b)
                 case 3: A[2]=B[2];
                 case 2: A[1]=B[1];
                 case 1: A[0]=B[0];
-               case 0: ; /* ultrix cc workaround, see comments in bn_expand2 */
+               case 0: ; /* ultrix cc workaround, see comments in bn_expand_internal */
                 }
  #else
         memcpy(a->d,b->d,sizeof(b->d[0])*b->top);
@@ -666,7 +605,7 @@ BIGNUM *BN_bin2bn(const unsigned char *s, int len, BIGNUM *ret)
                 return(NULL);
         i=((n-1)/BN_BYTES)+1;
         m=((n-1)%(BN_BYTES));
-       ret->top=i;
+       ret->top=i-1;
         while (n-- > 0)
                 {
                 l=(l<<8L)| *(s++);
diff --git a/crypto/bn/expspeed.c b/crypto/bn/expspeed.c

index 5f76aa41260bf351960f3fc9eb52a6f45822ae0d..7706f49da028e9f11e648d1532a8c04c533c1ef8 100644 (file)
--- a/crypto/bn/expspeed.c
+++ b/crypto/bn/expspeed.c
@@ -67,6 +67,7 @@
  /* determine timings for modexp, gcd, or modular inverse */
  #define TEST_EXP
  #undef TEST_GCD
+#undef TEST_KRON
  #undef TEST_INV
  
  
@@ -220,7 +221,7 @@ void do_mul_exp(BIGNUM *r, BIGNUM *a, BIGNUM *b, BIGNUM *c, BN_CTX *ctx)
         double tm;
         long num;
  
-#if defined(TEST_EXP) + defined(TEST_GCD) + defined(TEST_INV) != 1
+#if defined(TEST_EXP) + defined(TEST_GCD) + defined(TEST_KRON) + defined(TEST_INV) != 1
  #  error "choose one test"
  #endif
  
@@ -253,7 +254,11 @@ void do_mul_exp(BIGNUM *r, BIGNUM *a, BIGNUM *b, BIGNUM *c, BN_CTX *ctx)
  #elif defined(TEST_GCD)
                         if (!BN_gcd(r,a,b,ctx)) goto err;
                         if (!BN_gcd(r,b,c,ctx)) goto err;
-                       if (!BN_gcd(r,b,c,ctx)) goto err;
+                       if (!BN_gcd(r,c,a,ctx)) goto err;
+#elif defined(TEST_KRON)
+                       if (-2 == BN_kronecker(a,b,ctx)) goto err;
+                       if (-2 == BN_kronecker(b,c,ctx)) goto err;
+                       if (-2 == BN_kronecker(c,a,ctx)) goto err;
  #else /* TEST_INV */
                         if (!BN_mod_inverse(r,a,c,ctx)) goto err;
                         if (!BN_mod_inverse(r,b,c,ctx)) goto err;
@@ -265,6 +270,8 @@ void do_mul_exp(BIGNUM *r, BIGNUM *a, BIGNUM *b, BIGNUM *c, BN_CTX *ctx)
                         "modexp %4d ^ %4d %% %4d"
  #elif defined(TEST_GCD)
                         "3*gcd %4d %4d %4d"
+#elif defined(TEST_KRON)
+                       "3*kronecker %4d %4d %4d"
  #else /* TEST_INV */
                         "2*inv %4d %4d mod %4d"
  #endif
author	Bodo Möller <bodo@openssl.org>
	Wed, 29 Nov 2000 12:32:10 +0000 (12:32 +0000)
committer	Bodo Möller <bodo@openssl.org>
	Wed, 29 Nov 2000 12:32:10 +0000 (12:32 +0000)
CHANGES		patch \| blob \| history
crypto/bn/bn.h		patch \| blob \| history
crypto/bn/bn_err.c		patch \| blob \| history
crypto/bn/bn_lib.c		patch \| blob \| history
crypto/bn/expspeed.c		patch \| blob \| history