Fix bug in SMALL_FOOTPRINT path and clarify comment.

author Andy Polyakov <appro@openssl.org>

Fri, 14 Oct 2005 15:22:27 +0000 (15:22 +0000)

committer Andy Polyakov <appro@openssl.org>

Fri, 14 Oct 2005 15:22:27 +0000 (15:22 +0000)
author Andy Polyakov <appro@openssl.org>
Fri, 14 Oct 2005 15:22:27 +0000 (15:22 +0000)
committer Andy Polyakov <appro@openssl.org>
Fri, 14 Oct 2005 15:22:27 +0000 (15:22 +0000)
diff --git a/crypto/bn/bn_asm.c b/crypto/bn/bn_asm.c

index 9b8e380c4f7e1ae11110d3c2f9daef864b67c0ed..cd50b182b7bf95a16c0029c57db4177686159e13 100644 (file)
--- a/crypto/bn/bn_asm.c
+++ b/crypto/bn/bn_asm.c
@@ -835,8 +835,12 @@ void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a)
   * observed to give 40% faster rsa1024 private key operations and 10%
   * faster rsa4096 ones, while on AMD64 it improves rsa1024 sign only
   * by 10% and *worsens* rsa4096 sign by 15%. Once again, it's a
- * reference implementation, one to be used as start-point for
- * platform-specific assembler.
+ * reference implementation, one to be used as starting point for
+ * platform-specific assembler. Mentioned numbers apply to compiler
+ * generated code compiled with and without -DOPENSSL_BN_ASM_MONT and
+ * can vary not only from platform to platform, but even for compiler
+ * versions. Assembler vs. assembler improvement coefficients can
+ * [and are known to] differ and are to be documented elsewhere.
   */
  int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,BN_ULONG n0, int num)
         {
@@ -963,12 +967,12 @@ int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_U
         for(i=0;i<num;i++)
                 {
                 c0         = bn_mul_add_words(tp,ap,num,bp[i]);
-               c1         = tp[num] + c0;
+               c1         = (tp[num] + c0)&BN_MASK2;
                 tp[num]    = c1;
                 tp[num+1]  = (c1<c0?1:0);
  
                 c0         = bn_mul_add_words(tp,np,num,tp[0]*n0);
-               c1         = tp[num] + c0;
+               c1         = (tp[num] + c0)&BN_MASK2;
                 tp[num]    = c1;
                 tp[num+1] += (c1<c0?1:0);
                 for(j=0;j<=num;j++)     tp[j]=tp[j+1];
author	Andy Polyakov <appro@openssl.org>
	Fri, 14 Oct 2005 15:22:27 +0000 (15:22 +0000)
committer	Andy Polyakov <appro@openssl.org>
	Fri, 14 Oct 2005 15:22:27 +0000 (15:22 +0000)