From 984aefe0e84429cbb58a4a7ae0515621c0205c57 Mon Sep 17 00:00:00 2001 From: Andy Polyakov Date: Thu, 4 Aug 2005 17:42:58 +0000 Subject: [PATCH] 3-4 times better RSA/DSA performance on WIN64A target [from HEAD]. --- crypto/bn/bn_asm.c | 28 ++++++++++++++++++++++++++++ crypto/bn/bn_lcl.h | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+) diff --git a/crypto/bn/bn_asm.c b/crypto/bn/bn_asm.c index 19978085b2..99bc2de491 100644 --- a/crypto/bn/bn_asm.c +++ b/crypto/bn/bn_asm.c @@ -459,6 +459,34 @@ BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n) #define sqr_add_c2(a,i,j,c0,c1,c2) \ mul_add_c2((a)[i],(a)[j],c0,c1,c2) +#elif defined(BN_UMULT_LOHI) + +#define mul_add_c(a,b,c0,c1,c2) { \ + BN_ULONG ta=(a),tb=(b); \ + BN_UMULT_LOHI(t1,t2,ta,tb); \ + c0 += t1; t2 += (c0=1400 + unsigned __int64 __umulh (unsigned __int64 a,unsigned __int64 b); + unsigned __int64 _umul128 (unsigned __int64 a,unsigned __int64 b, + unsigned __int64 *h); +# pragma intrinsic(__umulh,_umul128) +# define BN_UMULT_HIGH(a,b) __umulh((a),(b)) +# define BN_UMULT_LOHI(low,high,a,b) ((low)=_umul128((a),(b),&(high))) +# endif # endif /* cpu */ #endif /* OPENSSL_NO_ASM */ @@ -313,6 +322,33 @@ extern "C" { (r1)=Hw(t); \ } +#elif defined(BN_UMULT_LOHI) +#define mul_add(r,a,w,c) { \ + BN_ULONG high,low,ret,tmp=(a); \ + ret = (r); \ + BN_UMULT_LOHI(low,high,w,tmp); \ + ret += (c); \ + (c) = (ret<(c))?1:0; \ + (c) += high; \ + ret += low; \ + (c) += (ret