From b7f5503fa6e1feebec2ac12b8ddcb5b5672452a6 Mon Sep 17 00:00:00 2001 From: Andy Polyakov Date: Fri, 25 Sep 2015 14:00:46 +0200 Subject: [PATCH] Skylake performance results. Reviewed-by: Matt Caswell --- crypto/aes/asm/aesni-sha1-x86_64.pl | 2 ++ crypto/aes/asm/aesni-sha256-x86_64.pl | 1 + crypto/aes/asm/aesni-x86_64.pl | 1 + crypto/modes/asm/aesni-gcm-x86_64.pl | 9 +++++---- crypto/modes/asm/ghash-x86_64.pl | 5 +++-- crypto/rc4/asm/rc4-md5-x86_64.pl | 5 ++++- crypto/sha/asm/sha1-mb-x86_64.pl | 1 + crypto/sha/asm/sha1-x86_64.pl | 1 + crypto/sha/asm/sha256-mb-x86_64.pl | 1 + crypto/sha/asm/sha512-x86_64.pl | 1 + 10 files changed, 20 insertions(+), 7 deletions(-) diff --git a/crypto/aes/asm/aesni-sha1-x86_64.pl b/crypto/aes/asm/aesni-sha1-x86_64.pl index 97992adca7..952b4cfdd6 100644 --- a/crypto/aes/asm/aesni-sha1-x86_64.pl +++ b/crypto/aes/asm/aesni-sha1-x86_64.pl @@ -25,6 +25,7 @@ # Sandy Bridge 5.05[+5.0(6.1)] 10.06(11.15) 5.98(7.05) +68%(+58%) # Ivy Bridge 5.05[+4.6] 9.65 5.54 +74% # Haswell 4.43[+3.6(4.2)] 8.00(8.58) 4.55(5.21) +75%(+65%) +# Skylake 2.63[+3.5(4.1)] 6.17(6.69) 4.23(4.44) +46%(+51%) # Bulldozer 5.77[+6.0] 11.72 6.37 +84% # # AES-192-CBC @@ -39,6 +40,7 @@ # Sandy Bridge 7.05 12.06(13.15) 7.12(7.72) +69%(+70%) # Ivy Bridge 7.05 11.65 7.12 +64% # Haswell 6.19 9.76(10.34) 6.21(6.25) +57%(+65%) +# Skylake 3.62 7.16(7.68) 4.56(4.76) +57%(+61$) # Bulldozer 8.00 13.95 8.25 +69% # # (*) There are two code paths: SSSE3 and AVX. See sha1-568.pl for diff --git a/crypto/aes/asm/aesni-sha256-x86_64.pl b/crypto/aes/asm/aesni-sha256-x86_64.pl index 19b0433b3b..74dad44408 100644 --- a/crypto/aes/asm/aesni-sha256-x86_64.pl +++ b/crypto/aes/asm/aesni-sha256-x86_64.pl @@ -25,6 +25,7 @@ # Sandy Bridge 5.05/6.05/7.05+11.6 13.0 +28%/36%/43% # Ivy Bridge 5.05/6.05/7.05+10.3 11.6 +32%/41%/50% # Haswell 4.43/5.29/6.19+7.80 8.79 +39%/49%/59% +# Skylake 2.62/3.14/3.62+7.70 8.10 +27%/34%/40% # Bulldozer 5.77/6.89/8.00+13.7 13.7 +42%/50%/58% # # (*) there are XOP, AVX1 and AVX2 code pathes, meaning that diff --git a/crypto/aes/asm/aesni-x86_64.pl b/crypto/aes/asm/aesni-x86_64.pl index 25ca574f6a..6037e9e76e 100644 --- a/crypto/aes/asm/aesni-x86_64.pl +++ b/crypto/aes/asm/aesni-x86_64.pl @@ -165,6 +165,7 @@ # Westmere 3.77/1.25 1.25 1.25 1.26 # * Bridge 5.07/0.74 0.75 0.90 0.85 # Haswell 4.44/0.63 0.63 0.73 0.63 +# Skylake 2.62/0.63 0.63 0.63 0.63 # Silvermont 5.75/3.54 3.56 4.12 3.87(*) # Bulldozer 5.77/0.70 0.72 0.90 0.70 # diff --git a/crypto/modes/asm/aesni-gcm-x86_64.pl b/crypto/modes/asm/aesni-gcm-x86_64.pl index 7e4e04ea25..608c3f7805 100644 --- a/crypto/modes/asm/aesni-gcm-x86_64.pl +++ b/crypto/modes/asm/aesni-gcm-x86_64.pl @@ -22,10 +22,11 @@ # [1] and [2], with MOVBE twist suggested by Ilya Albrekht and Max # Locktyukhin of Intel Corp. who verified that it reduces shuffles # pressure with notable relative improvement, achieving 1.0 cycle per -# byte processed with 128-bit key on Haswell processor, and 0.74 - -# on Broadwell. [Mentioned results are raw profiled measurements for -# favourable packet size, one divisible by 96. Applications using the -# EVP interface will observe a few percent worse performance.] +# byte processed with 128-bit key on Haswell processor, 0.74 - on +# Broadwell, 0.63 - on Skylake... [Mentioned results are raw profiled +# measurements for favourable packet size, one divisible by 96. +# Applications using the EVP interface will observe a few percent +# worse performance.] # # [1] http://rt.openssl.org/Ticket/Display.html?id=2900&user=guest&pass=guest # [2] http://www.intel.com/content/dam/www/public/us/en/documents/software-support/enabling-high-performance-gcm.pdf diff --git a/crypto/modes/asm/ghash-x86_64.pl b/crypto/modes/asm/ghash-x86_64.pl index 5a7ce39486..a63c923429 100644 --- a/crypto/modes/asm/ghash-x86_64.pl +++ b/crypto/modes/asm/ghash-x86_64.pl @@ -64,6 +64,7 @@ # Ivy Bridge 1.80(+7%) # Haswell 0.55(+93%) (if system doesn't support AVX) # Broadwell 0.45(+110%)(if system doesn't support AVX) +# Skylake 0.44(+110%)(if system doesn't support AVX) # Bulldozer 1.49(+27%) # Silvermont 2.88(+13%) @@ -74,8 +75,8 @@ # CPUs such as Sandy and Ivy Bridge can execute it, the code performs # sub-optimally in comparison to above mentioned version. But thanks # to Ilya Albrekht and Max Locktyukhin of Intel Corp. we knew that -# it performs in 0.41 cycles per byte on Haswell processor, and in -# 0.29 on Broadwell. +# it performs in 0.41 cycles per byte on Haswell processor, in +# 0.29 on Broadwell, and in 0.36 on Skylake. # # [1] http://rt.openssl.org/Ticket/Display.html?id=2900&user=guest&pass=guest diff --git a/crypto/rc4/asm/rc4-md5-x86_64.pl b/crypto/rc4/asm/rc4-md5-x86_64.pl index 49d5438457..a92fe92269 100644 --- a/crypto/rc4/asm/rc4-md5-x86_64.pl +++ b/crypto/rc4/asm/rc4-md5-x86_64.pl @@ -29,13 +29,16 @@ # Core2 6.5 5.8 12.3 7.7 +60% # Westmere 4.3 5.2 9.5 7.0 +36% # Sandy Bridge 4.2 5.5 9.7 6.8 +43% +# Ivy Bridge 4.1 5.2 9.3 6.0 +54% +# Haswell 4.0 5.0 9.0 5.7 +60% +# Skylake 6.3(**) 5.0 11.3 5.3 +110% # Atom 9.3 6.5 15.8 11.1 +42% # VIA Nano 6.3 5.4 11.7 8.6 +37% -# Ivy Bridge 4.1 5.2 9.3 6.0 +54% # Bulldozer 4.5 5.4 9.9 7.7 +29% # # (*) rc4-x86_64.pl delivers 5.3 on Opteron, so real improvement # is +53%... +# (**) unidentified anomaly; my ($rc4,$md5)=(1,1); # what to generate? my $D="#" if (!$md5); # if set to "#", MD5 is stitched into RC4(), diff --git a/crypto/sha/asm/sha1-mb-x86_64.pl b/crypto/sha/asm/sha1-mb-x86_64.pl index a8ee075eaa..099c803ebc 100644 --- a/crypto/sha/asm/sha1-mb-x86_64.pl +++ b/crypto/sha/asm/sha1-mb-x86_64.pl @@ -19,6 +19,7 @@ # Sandy Bridge (8.16 +5.15=13.3)/n 4.99 5.98 +80% # Ivy Bridge (8.08 +5.14=13.2)/n 4.60 5.54 +68% # Haswell(iii) (8.96 +5.00=14.0)/n 3.57 4.55 +160% +# Skylake (8.70 +5.00=13.7)/n 3.64 4.20 +145% # Bulldozer (9.76 +5.76=15.5)/n 5.95 6.37 +64% # # (i) multi-block CBC encrypt with 128-bit key; diff --git a/crypto/sha/asm/sha1-x86_64.pl b/crypto/sha/asm/sha1-x86_64.pl index 9bb6b49819..d6b0722f31 100755 --- a/crypto/sha/asm/sha1-x86_64.pl +++ b/crypto/sha/asm/sha1-x86_64.pl @@ -73,6 +73,7 @@ # Sandy Bridge 7.70 6.10/+26% 4.99/+54% # Ivy Bridge 6.06 4.67/+30% 4.60/+32% # Haswell 5.45 4.15/+31% 3.57/+53% +# Skylake 5.18 4.06/+28% 3.54/+46% # Bulldozer 9.11 5.95/+53% # VIA Nano 9.32 7.15/+30% # Atom 10.3 9.17/+12% diff --git a/crypto/sha/asm/sha256-mb-x86_64.pl b/crypto/sha/asm/sha256-mb-x86_64.pl index adf2ddccd1..ab0f028bef 100644 --- a/crypto/sha/asm/sha256-mb-x86_64.pl +++ b/crypto/sha/asm/sha256-mb-x86_64.pl @@ -19,6 +19,7 @@ # Sandy Bridge (20.5 +5.15=25.7)/n 11.6 13.0 +103% # Ivy Bridge (20.4 +5.14=25.5)/n 10.3 11.6 +82% # Haswell(iii) (21.0 +5.00=26.0)/n 7.80 8.79 +170% +# Skylake (18.9 +5.00=23.9)/n 7.70 8.17 +170% # Bulldozer (21.6 +5.76=27.4)/n 13.6 13.7 +100% # # (i) multi-block CBC encrypt with 128-bit key; diff --git a/crypto/sha/asm/sha512-x86_64.pl b/crypto/sha/asm/sha512-x86_64.pl index b7b44b4411..04ab412398 100755 --- a/crypto/sha/asm/sha512-x86_64.pl +++ b/crypto/sha/asm/sha512-x86_64.pl @@ -86,6 +86,7 @@ # Sandy Bridge 17.4 14.2(+23%) 11.6(+50%(**)) 11.2 8.10(+38%(**)) # Ivy Bridge 12.6 10.5(+20%) 10.3(+22%) 8.17 7.22(+13%) # Haswell 12.2 9.28(+31%) 7.80(+56%) 7.66 5.40(+42%) +# Skylake 11.4 9.03(+26%) 7.70(+48%) 7.25 5.20(+40%) # Bulldozer 21.1 13.6(+54%) 13.6(+54%(***)) 13.5 8.58(+57%) # VIA Nano 23.0 16.5(+39%) - 14.7 - # Atom 23.0 18.9(+22%) - 14.7 - -- 2.25.1