PPC assembly pack: add POWER9 results.
authorAndy Polyakov <appro@openssl.org>
Wed, 9 May 2018 10:24:05 +0000 (12:24 +0200)
committerAndy Polyakov <appro@openssl.org>
Thu, 10 May 2018 09:44:21 +0000 (11:44 +0200)
Reviewed-by: Rich Salz <rsalz@openssl.org>
crypto/aes/asm/aesp8-ppc.pl
crypto/chacha/asm/chacha-ppc.pl
crypto/modes/asm/ghashp8-ppc.pl
crypto/poly1305/asm/poly1305-ppc.pl
crypto/poly1305/asm/poly1305-ppcfp.pl
crypto/poly1305/poly1305_ieee754.c
crypto/sha/asm/keccak1600-ppc64.pl
crypto/sha/asm/keccak1600p8-ppc.pl
crypto/sha/asm/sha512p8-ppc.pl

index 7463df6c17b0bc36b20cba4f7e6bdb89619beded..e1be23ab75c59b59604048e79044f06a7dab9edb 100755 (executable)
@@ -40,6 +40,7 @@
 #              CBC en-/decrypt CTR     XTS
 # POWER8[le]   3.96/0.72       0.74    1.1
 # POWER8[be]   3.75/0.65       0.66    1.0
+# POWER9[le]   3.05/0.65       0.65    0.80
 
 $flavour = shift;
 
index f972ee471a6539024d4cd8b6254964857c21da15..af2f037c153cf9768515470b82e068e28c64f496 100755 (executable)
@@ -27,6 +27,7 @@
 # PPC970/G5            9.29/+160%      4.60
 # POWER7               8.62/+61%       4.27
 # POWER8               8.70/+51%       3.96
+# POWER9               6.61/+29%       3.67
 
 $flavour = shift;
 
index 45c6438497ab9b86926130e58f8c1dbd4922cb05..a1d5789cc865cb6bdbde2be2d7e07f303d6eec92 100755 (executable)
@@ -30,6 +30,7 @@
 # 2x aggregated reduction improves performance by 50% (resulting
 # performance on POWER8 is 1 cycle per processed byte), and 4x
 # aggregated reduction - by 170% or 2.7x (resulting in 0.55 cpb).
+# POWER9 delivers 0.40 cpb.
 
 $flavour=shift;
 $output =shift;
index ab65910282e28a51d39bbaf6405abace01075628..8e105d77fb22a76ad0dcba45059f00f63856106d 100755 (executable)
@@ -28,6 +28,7 @@
 # PPC970               7.00/+114%      3.51/+205%
 # POWER7               3.75/+260%      1.93/+100%
 # POWER8               -               2.03/+200%
+# POWER9               -               1.56/+150%
 #
 # Do we need floating-point implementation for PPC? Results presented
 # in poly1305_ieee754.c are tricky to compare to, because they are for
index 49f70a8c03d3b500d75782fc0cd1725846a723e1..fc62baa22279f6894ff098c7f5dceed0429099bc 100755 (executable)
@@ -26,6 +26,7 @@
 # PPC970               6.03/+80%
 # POWER7               3.50/+30%
 # POWER8               3.75/+10%
+# POWER9               2.80/+12%
 
 $flavour = shift;
 
index 995a02e5c139cdc10f730ad3159229d1dd680bf5..1a06e03558aed20fb34b7b0af9a89f8261e8bb53 100644 (file)
@@ -38,6 +38,7 @@
  * POWER6               4.92
  * POWER7               4.50
  * POWER8               4.10
+ * POWER9               3.14
  *
  * z10                  11.2
  * z196+                7.30
index f89f71c825e85de83267ad1a8e8a2e5c7e466ab6..60ed2f2326ec1c7cc77d0886c9deb58ae24fd95d 100755 (executable)
@@ -30,6 +30,7 @@
 # PPC970/G5    14.6/+120%
 # POWER7       10.3/+100%
 # POWER8       11.5/+85%
+# POWER9       7.2/+45%
 #
 # (*)  Corresponds to SHA3-256. Percentage after slash is improvement
 #      over gcc-4.x-generated KECCAK_1X_ALT code. Newer compilers do
index feec68839f87e0e16acb9d94139bf25cb4a3bd7a..95e6242f998221c11c5814fcbdfa4a2010d31af9 100755 (executable)
@@ -23,7 +23,7 @@
 # buffer for r=1088, which matches SHA3-256. This is 17% better than
 # scalar PPC64 code. It probably should be noted that if POWER8's
 # successor can achieve higher scalar instruction issue rate, then
-# this module will loose...
+# this module will loose... And it does on POWER9 with 8.8 vs. 7.2.
 
 $flavour = shift;
 
index 93dfef20a9a41e80ef76fb90b9b04d3428edd244..e6e946790519dd91ea568ab4090ccc6b7cb9ffc6 100755 (executable)
@@ -36,9 +36,9 @@
 # little-endian system]. Numbers in square brackets are for 64-bit
 # build of sha512-ppc.pl, presented for reference.
 #
-#              POWER8
-# SHA256       9.9 [15.8]
-# SHA512       6.3 [10.3]
+#              POWER8          POWER9
+# SHA256       9.9 [15.8]      9.2 [9.3]
+# SHA512       6.3 [10.3]      5.8 [5.9]
 
 $flavour=shift;
 $output =shift;