From: Andy Polyakov Date: Wed, 9 May 2018 10:24:05 +0000 (+0200) Subject: PPC assembly pack: add POWER9 results. X-Git-Tag: OpenSSL_1_1_1-pre7~76 X-Git-Url: https://git.librecmc.org/?a=commitdiff_plain;h=13f6857db107b1b6f68daa7fc4a6dd1293428bb1;p=oweals%2Fopenssl.git PPC assembly pack: add POWER9 results. Reviewed-by: Rich Salz --- diff --git a/crypto/aes/asm/aesp8-ppc.pl b/crypto/aes/asm/aesp8-ppc.pl index 7463df6c17..e1be23ab75 100755 --- a/crypto/aes/asm/aesp8-ppc.pl +++ b/crypto/aes/asm/aesp8-ppc.pl @@ -40,6 +40,7 @@ # CBC en-/decrypt CTR XTS # POWER8[le] 3.96/0.72 0.74 1.1 # POWER8[be] 3.75/0.65 0.66 1.0 +# POWER9[le] 3.05/0.65 0.65 0.80 $flavour = shift; diff --git a/crypto/chacha/asm/chacha-ppc.pl b/crypto/chacha/asm/chacha-ppc.pl index f972ee471a..af2f037c15 100755 --- a/crypto/chacha/asm/chacha-ppc.pl +++ b/crypto/chacha/asm/chacha-ppc.pl @@ -27,6 +27,7 @@ # PPC970/G5 9.29/+160% 4.60 # POWER7 8.62/+61% 4.27 # POWER8 8.70/+51% 3.96 +# POWER9 6.61/+29% 3.67 $flavour = shift; diff --git a/crypto/modes/asm/ghashp8-ppc.pl b/crypto/modes/asm/ghashp8-ppc.pl index 45c6438497..a1d5789cc8 100755 --- a/crypto/modes/asm/ghashp8-ppc.pl +++ b/crypto/modes/asm/ghashp8-ppc.pl @@ -30,6 +30,7 @@ # 2x aggregated reduction improves performance by 50% (resulting # performance on POWER8 is 1 cycle per processed byte), and 4x # aggregated reduction - by 170% or 2.7x (resulting in 0.55 cpb). +# POWER9 delivers 0.40 cpb. $flavour=shift; $output =shift; diff --git a/crypto/poly1305/asm/poly1305-ppc.pl b/crypto/poly1305/asm/poly1305-ppc.pl index ab65910282..8e105d77fb 100755 --- a/crypto/poly1305/asm/poly1305-ppc.pl +++ b/crypto/poly1305/asm/poly1305-ppc.pl @@ -28,6 +28,7 @@ # PPC970 7.00/+114% 3.51/+205% # POWER7 3.75/+260% 1.93/+100% # POWER8 - 2.03/+200% +# POWER9 - 1.56/+150% # # Do we need floating-point implementation for PPC? Results presented # in poly1305_ieee754.c are tricky to compare to, because they are for diff --git a/crypto/poly1305/asm/poly1305-ppcfp.pl b/crypto/poly1305/asm/poly1305-ppcfp.pl index 49f70a8c03..fc62baa222 100755 --- a/crypto/poly1305/asm/poly1305-ppcfp.pl +++ b/crypto/poly1305/asm/poly1305-ppcfp.pl @@ -26,6 +26,7 @@ # PPC970 6.03/+80% # POWER7 3.50/+30% # POWER8 3.75/+10% +# POWER9 2.80/+12% $flavour = shift; diff --git a/crypto/poly1305/poly1305_ieee754.c b/crypto/poly1305/poly1305_ieee754.c index 995a02e5c1..1a06e03558 100644 --- a/crypto/poly1305/poly1305_ieee754.c +++ b/crypto/poly1305/poly1305_ieee754.c @@ -38,6 +38,7 @@ * POWER6 4.92 * POWER7 4.50 * POWER8 4.10 + * POWER9 3.14 * * z10 11.2 * z196+ 7.30 diff --git a/crypto/sha/asm/keccak1600-ppc64.pl b/crypto/sha/asm/keccak1600-ppc64.pl index f89f71c825..60ed2f2326 100755 --- a/crypto/sha/asm/keccak1600-ppc64.pl +++ b/crypto/sha/asm/keccak1600-ppc64.pl @@ -30,6 +30,7 @@ # PPC970/G5 14.6/+120% # POWER7 10.3/+100% # POWER8 11.5/+85% +# POWER9 7.2/+45% # # (*) Corresponds to SHA3-256. Percentage after slash is improvement # over gcc-4.x-generated KECCAK_1X_ALT code. Newer compilers do diff --git a/crypto/sha/asm/keccak1600p8-ppc.pl b/crypto/sha/asm/keccak1600p8-ppc.pl index feec68839f..95e6242f99 100755 --- a/crypto/sha/asm/keccak1600p8-ppc.pl +++ b/crypto/sha/asm/keccak1600p8-ppc.pl @@ -23,7 +23,7 @@ # buffer for r=1088, which matches SHA3-256. This is 17% better than # scalar PPC64 code. It probably should be noted that if POWER8's # successor can achieve higher scalar instruction issue rate, then -# this module will loose... +# this module will loose... And it does on POWER9 with 8.8 vs. 7.2. $flavour = shift; diff --git a/crypto/sha/asm/sha512p8-ppc.pl b/crypto/sha/asm/sha512p8-ppc.pl index 93dfef20a9..e6e9467905 100755 --- a/crypto/sha/asm/sha512p8-ppc.pl +++ b/crypto/sha/asm/sha512p8-ppc.pl @@ -36,9 +36,9 @@ # little-endian system]. Numbers in square brackets are for 64-bit # build of sha512-ppc.pl, presented for reference. # -# POWER8 -# SHA256 9.9 [15.8] -# SHA512 6.3 [10.3] +# POWER8 POWER9 +# SHA256 9.9 [15.8] 9.2 [9.3] +# SHA512 6.3 [10.3] 5.8 [5.9] $flavour=shift; $output =shift;