From b59f92e75d334c9281082a02faa6c68afb614fd2 Mon Sep 17 00:00:00 2001 From: Andy Polyakov Date: Sat, 30 Aug 2014 19:13:49 +0200 Subject: [PATCH] x86[_64] assembly pack: add Silvermont performance data. Reviewed-by: Rich Salz --- crypto/aes/asm/bsaes-x86_64.pl | 2 ++ crypto/aes/asm/vpaes-x86.pl | 1 + crypto/aes/asm/vpaes-x86_64.pl | 1 + crypto/modes/asm/ghash-x86_64.pl | 1 + crypto/sha/asm/sha1-586.pl | 7 +++++-- crypto/sha/asm/sha256-586.pl | 1 + crypto/sha/asm/sha512-586.pl | 1 + crypto/sha/asm/sha512-x86_64.pl | 1 + 8 files changed, 13 insertions(+), 2 deletions(-) diff --git a/crypto/aes/asm/bsaes-x86_64.pl b/crypto/aes/asm/bsaes-x86_64.pl index d2c3978b96..3f7d33c45b 100644 --- a/crypto/aes/asm/bsaes-x86_64.pl +++ b/crypto/aes/asm/bsaes-x86_64.pl @@ -40,6 +40,7 @@ # Core 2 9.30 8.69 +7% # Nehalem(**) 7.63 6.88 +11% # Atom 17.1 16.4 +4% +# Silvermont - 12.9 # # (*) Comparison is not completely fair, because "this" is ECB, # i.e. no extra processing such as counter values calculation @@ -78,6 +79,7 @@ # Core 2 9.98 # Nehalem 7.80 # Atom 17.9 +# Silvermont 14.0 # # November 2011. # diff --git a/crypto/aes/asm/vpaes-x86.pl b/crypto/aes/asm/vpaes-x86.pl index bacf42cf0f..2ba149c3f9 100644 --- a/crypto/aes/asm/vpaes-x86.pl +++ b/crypto/aes/asm/vpaes-x86.pl @@ -30,6 +30,7 @@ # Core 2(**) 28.1/41.4/18.3 21.9/25.2(***) # Nehalem 27.9/40.4/18.1 10.2/11.9 # Atom 70.7/92.1/60.1 61.1/75.4(***) +# Silvermont 45.4/62.9/24.1 49.2/61.1(***) # # (*) "Hyper-threading" in the context refers rather to cache shared # among multiple cores, than to specifically Intel HTT. As vast diff --git a/crypto/aes/asm/vpaes-x86_64.pl b/crypto/aes/asm/vpaes-x86_64.pl index 40ef342d97..f2ef318fae 100644 --- a/crypto/aes/asm/vpaes-x86_64.pl +++ b/crypto/aes/asm/vpaes-x86_64.pl @@ -30,6 +30,7 @@ # Core 2(**) 29.6/41.1/14.3 21.9/25.2(***) # Nehalem 29.6/40.3/14.6 10.0/11.8 # Atom 57.3/74.2/32.1 60.9/77.2(***) +# Silvermont 52.7/64.0/19.5 48.8/60.8(***) # # (*) "Hyper-threading" in the context refers rather to cache shared # among multiple cores, than to specifically Intel HTT. As vast diff --git a/crypto/modes/asm/ghash-x86_64.pl b/crypto/modes/asm/ghash-x86_64.pl index 1e79227338..ce7d1cb8ba 100644 --- a/crypto/modes/asm/ghash-x86_64.pl +++ b/crypto/modes/asm/ghash-x86_64.pl @@ -64,6 +64,7 @@ # Ivy Bridge 1.80(+7%) # Haswell 0.55(+93%) (if system doesn't support AVX) # Bulldozer 1.49(+27%) +# Silvermont 2.88(+13%) # March 2013 # diff --git a/crypto/sha/asm/sha1-586.pl b/crypto/sha/asm/sha1-586.pl index 59da867848..8377299b1e 100644 --- a/crypto/sha/asm/sha1-586.pl +++ b/crypto/sha/asm/sha1-586.pl @@ -93,16 +93,19 @@ # P4 10.6 - # AMD K8 7.1 - # Core2 7.3 6.0/+22% - -# Atom 12.5 9.3(*)/+35% - # Westmere 7.3 5.5/+33% - # Sandy Bridge 8.8 6.2/+40% 5.1(**)/+73% # Ivy Bridge 7.2 4.8/+51% 4.7(**)/+53% # Haswell 6.5 4.3/+51% 4.1(**)/+58% # Bulldozer 11.6 6.0/+92% # VIA Nano 10.6 7.5/+41% +# Atom 12.5 9.3(*)/+35% +# Silvermont 14.5 9.9(*)/+46% # # (*) Loop is 1056 instructions long and expected result is ~8.25. -# It remains mystery [to me] why ILP is limited to 1.7. +# The discrepancy is because of front-end limitations, so +# called MS-ROM penalties, and on Silvermont even rotate's +# limited parallelism. # # (**) As per above comment, the result is for AVX *plus* sh[rl]d. diff --git a/crypto/sha/asm/sha256-586.pl b/crypto/sha/asm/sha256-586.pl index 0c2a778e7c..6462e45ba7 100644 --- a/crypto/sha/asm/sha256-586.pl +++ b/crypto/sha/asm/sha256-586.pl @@ -53,6 +53,7 @@ # Bulldozer 36 - 27/22 17.0 13.6 # VIA Nano 36 - 25/22 16.8 16.5 # Atom 50 - 30/25 21.9 18.9 +# Silvermont 40 - 34/31 22.9 20.6 # # (*) numbers after slash are for unrolled loop, where applicable; # (**) x86_64 assembly performance is presented for reference diff --git a/crypto/sha/asm/sha512-586.pl b/crypto/sha/asm/sha512-586.pl index 9fc792964f..e96ec00314 100644 --- a/crypto/sha/asm/sha512-586.pl +++ b/crypto/sha/asm/sha512-586.pl @@ -28,6 +28,7 @@ # Bulldozer 121 - 50 14.0 13.5 # VIA Nano 91 - 52 33 14.7 # Atom 126 - 68 48(***) 14.7 +# Silvermont 97 - 58 42(***) 17.5 # # (*) whichever best applicable. # (**) x86_64 assembler performance is presented for reference diff --git a/crypto/sha/asm/sha512-x86_64.pl b/crypto/sha/asm/sha512-x86_64.pl index 476e99fee8..b7b44b4411 100755 --- a/crypto/sha/asm/sha512-x86_64.pl +++ b/crypto/sha/asm/sha512-x86_64.pl @@ -89,6 +89,7 @@ # Bulldozer 21.1 13.6(+54%) 13.6(+54%(***)) 13.5 8.58(+57%) # VIA Nano 23.0 16.5(+39%) - 14.7 - # Atom 23.0 18.9(+22%) - 14.7 - +# Silvermont 27.4 20.6(+33%) - 17.5 - # # (*) whichever best applicable; # (**) switch from ror to shrd stands for fair share of improvement; -- 2.25.1