From: Andy Polyakov Date: Thu, 4 Aug 2005 17:35:42 +0000 (+0000) Subject: 3-4 times better RSA/DSA performance on WIN64A target. Well, on AMD64 CPU, X-Git-Tag: OpenSSL_0_9_8k^2~1869 X-Git-Url: https://git.librecmc.org/?a=commitdiff_plain;h=11de71b04c750e6be6f5e8ac875655a2a67efaf7;p=oweals%2Fopenssl.git 3-4 times better RSA/DSA performance on WIN64A target. Well, on AMD64 CPU, EMT64T will hardly exhibit better performance... --- diff --git a/crypto/bn/bn_asm.c b/crypto/bn/bn_asm.c index 19978085b2..99bc2de491 100644 --- a/crypto/bn/bn_asm.c +++ b/crypto/bn/bn_asm.c @@ -459,6 +459,34 @@ BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n) #define sqr_add_c2(a,i,j,c0,c1,c2) \ mul_add_c2((a)[i],(a)[j],c0,c1,c2) +#elif defined(BN_UMULT_LOHI) + +#define mul_add_c(a,b,c0,c1,c2) { \ + BN_ULONG ta=(a),tb=(b); \ + BN_UMULT_LOHI(t1,t2,ta,tb); \ + c0 += t1; t2 += (c0=1400 + unsigned __int64 __umulh (unsigned __int64 a,unsigned __int64 b); + unsigned __int64 _umul128 (unsigned __int64 a,unsigned __int64 b, + unsigned __int64 *h); +# pragma intrinsic(__umulh,_umul128) +# define BN_UMULT_HIGH(a,b) __umulh((a),(b)) +# define BN_UMULT_LOHI(low,high,a,b) ((low)=_umul128((a),(b),&(high))) +# endif # endif /* cpu */ #endif /* OPENSSL_NO_ASM */ @@ -313,6 +322,33 @@ extern "C" { (r1)=Hw(t); \ } +#elif defined(BN_UMULT_LOHI) +#define mul_add(r,a,w,c) { \ + BN_ULONG high,low,ret,tmp=(a); \ + ret = (r); \ + BN_UMULT_LOHI(low,high,w,tmp); \ + ret += (c); \ + (c) = (ret<(c))?1:0; \ + (c) += high; \ + ret += low; \ + (c) += (ret