bn_pollute(a); \
}
-void bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,BN_ULONG n0, int num);
-void bn_sqr_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *np,BN_ULONG n0, int num);
+int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,BN_ULONG n0, int num);
BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w);
BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w);
void bn_sqr_words(BN_ULONG *rp, const BN_ULONG *ap, int num);
#ifdef OPENSSL_BN_ASM_MONT
/*
* This is essentially reference implementation, which may or may not
- * result in performance improvement. E.g. on IA-32 this does give 40%
- * faster rsa1024 private key operations and 10% faster rsa4096 ones,
- * while on AMD64 it improves rsa1024 sign only by 10% and *worsens*
- * rsa4096 sign by 15%. Once again, it's a reference implementation,
- * one to be used as start-point for platform-specific assembler.
+ * result in performance improvement. E.g. on IA-32 this routine was
+ * observed to give 40% faster rsa1024 private key operations and 10%
+ * faster rsa4096 ones, while on AMD64 it improves rsa1024 sign only
+ * by 10% and *worsens* rsa4096 sign by 15%. Once again, it's a
+ * reference implementation, one to be used as start-point for
+ * platform-specific assembler.
*/
-void bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,BN_ULONG n0, int num)
+int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,BN_ULONG n0, int num)
{
BN_ULONG c0,c1,ml,*tp;
#ifdef mul64
volatile BN_ULONG *vp;
int i=0,j;
+#if 0 /* template for platform-specific implementation */
+ if (ap==bp) return bn_sqr_mont(rp,ap,np,n0,num);
+#endif
vp = tp = alloca((num+2)*sizeof(BN_ULONG));
tp[num] = bn_mul_words(tp,ap,num,bp[0]);
if (tp[num]!=0 || c0==0)
{
for(i=0;i<num+2;i++) vp[i] = 0;
- return;
+ return 1;
}
}
for(i=0;i<num;i++) rp[i] = tp[i], vp[i] = 0;
vp[num] = 0;
vp[num+1] = 0;
+ return 1;
}
-
-void bn_sqr_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *np,BN_ULONG n0, int num)
- {
- bn_mul_mont(rp,ap,ap,np,n0,num);
- }
+#else
+/*
+ * Return value of 0 indicates that multiplication/convolution was not
+ * performed to signal the caller to fall down to alternative/original
+ * code-path.
+ */
+int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,BN_ULONG n0, int num)
+{ return 0; }
#endif /* OPENSSL_BN_ASM_MONT */
#else /* !BN_MUL_COMBA */
}
#ifdef OPENSSL_BN_ASM_MONT
-void bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,BN_ULONG n0, int num)
+int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,BN_ULONG n0, int num)
{
BN_ULONG c0,c1,*tp;
volatile BN_ULONG *vp;
if (tp[num]!=0 || c0==0)
{
for(i=0;i<num+2;i++) vp[i] = 0;
- return;
+ return 1;
}
}
for(i=0;i<num;i++) rp[i] = tp[i], vp[i] = 0;
vp[num] = 0;
vp[num+1] = 0;
+ return 1;
}
-
-void bn_sqr_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *np,BN_ULONG n0, int num)
- {
- bn_mul_mont(rp,ap,ap,np,n0,num);
- }
+#else
+int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,BN_ULONG n0, int num)
+{ return 0; }
#endif /* OPENSSL_BN_ASM_MONT */
#endif /* !BN_MUL_COMBA */
if (num>1 && a->top==num && b->top==num)
{
if (bn_wexpand(r,num) == NULL) return 0;
- r->neg = a->neg^b->neg;
- r->top = num;
- if (a==b)
- bn_sqr_mont(r->d,a->d,mont->N.d,mont->n0,num);
- else
- bn_mul_mont(r->d,a->d,b->d,mont->N.d,mont->n0,num);
- bn_fix_top(r);
- return 1;
+ if (bn_mul_mont(r->d,a->d,b->d,mont->N.d,mont->n0,num))
+ {
+ r->neg = a->neg^b->neg;
+ r->top = num;
+ bn_fix_top(r);
+ return 1;
+ }
}
#endif