From ca04d7a20842b2de39264b6c1605c3443fc09d16 Mon Sep 17 00:00:00 2001 From: Andy Polyakov Date: Thu, 6 Oct 2005 09:12:39 +0000 Subject: [PATCH] Leave the decision to call/implement bn_sqr_mont to assembler developer. --- crypto/bn/bn.h | 3 +-- crypto/bn/bn_asm.c | 45 ++++++++++++++++++++++++++------------------- crypto/bn/bn_mont.c | 15 +++++++-------- 3 files changed, 34 insertions(+), 29 deletions(-) diff --git a/crypto/bn/bn.h b/crypto/bn/bn.h index 61aecae6d5..b0c8f09808 100644 --- a/crypto/bn/bn.h +++ b/crypto/bn/bn.h @@ -729,8 +729,7 @@ int RAND_pseudo_bytes(unsigned char *buf,int num); bn_pollute(a); \ } -void bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,BN_ULONG n0, int num); -void bn_sqr_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *np,BN_ULONG n0, int num); +int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,BN_ULONG n0, int num); BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w); BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w); void bn_sqr_words(BN_ULONG *rp, const BN_ULONG *ap, int num); diff --git a/crypto/bn/bn_asm.c b/crypto/bn/bn_asm.c index 741cf813db..9b8e380c4f 100644 --- a/crypto/bn/bn_asm.c +++ b/crypto/bn/bn_asm.c @@ -831,13 +831,14 @@ void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a) #ifdef OPENSSL_BN_ASM_MONT /* * This is essentially reference implementation, which may or may not - * result in performance improvement. E.g. on IA-32 this does give 40% - * faster rsa1024 private key operations and 10% faster rsa4096 ones, - * while on AMD64 it improves rsa1024 sign only by 10% and *worsens* - * rsa4096 sign by 15%. Once again, it's a reference implementation, - * one to be used as start-point for platform-specific assembler. + * result in performance improvement. E.g. on IA-32 this routine was + * observed to give 40% faster rsa1024 private key operations and 10% + * faster rsa4096 ones, while on AMD64 it improves rsa1024 sign only + * by 10% and *worsens* rsa4096 sign by 15%. Once again, it's a + * reference implementation, one to be used as start-point for + * platform-specific assembler. */ -void bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,BN_ULONG n0, int num) +int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,BN_ULONG n0, int num) { BN_ULONG c0,c1,ml,*tp; #ifdef mul64 @@ -846,6 +847,9 @@ void bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ volatile BN_ULONG *vp; int i=0,j; +#if 0 /* template for platform-specific implementation */ + if (ap==bp) return bn_sqr_mont(rp,ap,np,n0,num); +#endif vp = tp = alloca((num+2)*sizeof(BN_ULONG)); tp[num] = bn_mul_words(tp,ap,num,bp[0]); @@ -890,18 +894,22 @@ void bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ if (tp[num]!=0 || c0==0) { for(i=0;i1 && a->top==num && b->top==num) { if (bn_wexpand(r,num) == NULL) return 0; - r->neg = a->neg^b->neg; - r->top = num; - if (a==b) - bn_sqr_mont(r->d,a->d,mont->N.d,mont->n0,num); - else - bn_mul_mont(r->d,a->d,b->d,mont->N.d,mont->n0,num); - bn_fix_top(r); - return 1; + if (bn_mul_mont(r->d,a->d,b->d,mont->N.d,mont->n0,num)) + { + r->neg = a->neg^b->neg; + r->top = num; + bn_fix_top(r); + return 1; + } } #endif -- 2.25.1