PPC assembly pack: add POWER9 results.

author Andy Polyakov <appro@openssl.org>

Wed, 9 May 2018 10:24:05 +0000 (12:24 +0200)

committer Andy Polyakov <appro@openssl.org>

Thu, 10 May 2018 09:44:21 +0000 (11:44 +0200)
author Andy Polyakov <appro@openssl.org>
Wed, 9 May 2018 10:24:05 +0000 (12:24 +0200)
committer Andy Polyakov <appro@openssl.org>
Thu, 10 May 2018 09:44:21 +0000 (11:44 +0200)
diff --git a/crypto/aes/asm/aesp8-ppc.pl b/crypto/aes/asm/aesp8-ppc.pl

index 7463df6c17b0bc36b20cba4f7e6bdb89619beded..e1be23ab75c59b59604048e79044f06a7dab9edb 100755 (executable)
--- a/crypto/aes/asm/aesp8-ppc.pl
+++ b/crypto/aes/asm/aesp8-ppc.pl
@@ -40,6 +40,7 @@
  #              CBC en-/decrypt CTR     XTS
  # POWER8[le]   3.96/0.72       0.74    1.1
  # POWER8[be]   3.75/0.65       0.66    1.0
+# POWER9[le]   3.05/0.65       0.65    0.80
  
  $flavour = shift;
  
diff --git a/crypto/chacha/asm/chacha-ppc.pl b/crypto/chacha/asm/chacha-ppc.pl

index f972ee471a6539024d4cd8b6254964857c21da15..af2f037c153cf9768515470b82e068e28c64f496 100755 (executable)
--- a/crypto/chacha/asm/chacha-ppc.pl
+++ b/crypto/chacha/asm/chacha-ppc.pl
@@ -27,6 +27,7 @@
  # PPC970/G5            9.29/+160%      4.60
  # POWER7               8.62/+61%       4.27
  # POWER8               8.70/+51%       3.96
+# POWER9               6.61/+29%       3.67
  
  $flavour = shift;
  
diff --git a/crypto/modes/asm/ghashp8-ppc.pl b/crypto/modes/asm/ghashp8-ppc.pl

index 45c6438497ab9b86926130e58f8c1dbd4922cb05..a1d5789cc865cb6bdbde2be2d7e07f303d6eec92 100755 (executable)
--- a/crypto/modes/asm/ghashp8-ppc.pl
+++ b/crypto/modes/asm/ghashp8-ppc.pl
@@ -30,6 +30,7 @@
  # 2x aggregated reduction improves performance by 50% (resulting
  # performance on POWER8 is 1 cycle per processed byte), and 4x
  # aggregated reduction - by 170% or 2.7x (resulting in 0.55 cpb).
+# POWER9 delivers 0.40 cpb.
  
  $flavour=shift;
  $output =shift;
diff --git a/crypto/poly1305/asm/poly1305-ppc.pl b/crypto/poly1305/asm/poly1305-ppc.pl

index ab65910282e28a51d39bbaf6405abace01075628..8e105d77fb22a76ad0dcba45059f00f63856106d 100755 (executable)
--- a/crypto/poly1305/asm/poly1305-ppc.pl
+++ b/crypto/poly1305/asm/poly1305-ppc.pl
@@ -28,6 +28,7 @@
  # PPC970               7.00/+114%      3.51/+205%
  # POWER7               3.75/+260%      1.93/+100%
  # POWER8               -               2.03/+200%
+# POWER9               -               1.56/+150%
  #
  # Do we need floating-point implementation for PPC? Results presented
  # in poly1305_ieee754.c are tricky to compare to, because they are for
diff --git a/crypto/poly1305/asm/poly1305-ppcfp.pl b/crypto/poly1305/asm/poly1305-ppcfp.pl

index 49f70a8c03d3b500d75782fc0cd1725846a723e1..fc62baa22279f6894ff098c7f5dceed0429099bc 100755 (executable)
--- a/crypto/poly1305/asm/poly1305-ppcfp.pl
+++ b/crypto/poly1305/asm/poly1305-ppcfp.pl
@@ -26,6 +26,7 @@
  # PPC970               6.03/+80%
  # POWER7               3.50/+30%
  # POWER8               3.75/+10%
+# POWER9               2.80/+12%
  
  $flavour = shift;
  
diff --git a/crypto/poly1305/poly1305_ieee754.c b/crypto/poly1305/poly1305_ieee754.c

index 995a02e5c139cdc10f730ad3159229d1dd680bf5..1a06e03558aed20fb34b7b0af9a89f8261e8bb53 100644 (file)
--- a/crypto/poly1305/poly1305_ieee754.c
+++ b/crypto/poly1305/poly1305_ieee754.c
@@ -38,6 +38,7 @@
   * POWER6               4.92
   * POWER7               4.50
   * POWER8               4.10
+ * POWER9               3.14
   *
   * z10                  11.2
   * z196+                7.30
diff --git a/crypto/sha/asm/keccak1600-ppc64.pl b/crypto/sha/asm/keccak1600-ppc64.pl

index f89f71c825e85de83267ad1a8e8a2e5c7e466ab6..60ed2f2326ec1c7cc77d0886c9deb58ae24fd95d 100755 (executable)
--- a/crypto/sha/asm/keccak1600-ppc64.pl
+++ b/crypto/sha/asm/keccak1600-ppc64.pl
@@ -30,6 +30,7 @@
  # PPC970/G5    14.6/+120%
  # POWER7       10.3/+100%
  # POWER8       11.5/+85%
+# POWER9       7.2/+45%
  #
  # (*)  Corresponds to SHA3-256. Percentage after slash is improvement
  #      over gcc-4.x-generated KECCAK_1X_ALT code. Newer compilers do
diff --git a/crypto/sha/asm/keccak1600p8-ppc.pl b/crypto/sha/asm/keccak1600p8-ppc.pl

index feec68839f87e0e16acb9d94139bf25cb4a3bd7a..95e6242f998221c11c5814fcbdfa4a2010d31af9 100755 (executable)
--- a/crypto/sha/asm/keccak1600p8-ppc.pl
+++ b/crypto/sha/asm/keccak1600p8-ppc.pl
@@ -23,7 +23,7 @@
  # buffer for r=1088, which matches SHA3-256. This is 17% better than
  # scalar PPC64 code. It probably should be noted that if POWER8's
  # successor can achieve higher scalar instruction issue rate, then
-# this module will loose...
+# this module will loose... And it does on POWER9 with 8.8 vs. 7.2.
  
  $flavour = shift;
  
diff --git a/crypto/sha/asm/sha512p8-ppc.pl b/crypto/sha/asm/sha512p8-ppc.pl

index 93dfef20a9a41e80ef76fb90b9b04d3428edd244..e6e946790519dd91ea568ab4090ccc6b7cb9ffc6 100755 (executable)
--- a/crypto/sha/asm/sha512p8-ppc.pl
+++ b/crypto/sha/asm/sha512p8-ppc.pl
@@ -36,9 +36,9 @@
  # little-endian system]. Numbers in square brackets are for 64-bit
  # build of sha512-ppc.pl, presented for reference.
  #
-#              POWER8
-# SHA256       9.9 [15.8]
-# SHA512       6.3 [10.3]
+#              POWER8          POWER9
+# SHA256       9.9 [15.8]      9.2 [9.3]
+# SHA512       6.3 [10.3]      5.8 [5.9]
  
  $flavour=shift;
  $output =shift;
author	Andy Polyakov <appro@openssl.org>
	Wed, 9 May 2018 10:24:05 +0000 (12:24 +0200)
committer	Andy Polyakov <appro@openssl.org>
	Thu, 10 May 2018 09:44:21 +0000 (11:44 +0200)
crypto/aes/asm/aesp8-ppc.pl		patch \| blob \| history
crypto/chacha/asm/chacha-ppc.pl		patch \| blob \| history
crypto/modes/asm/ghashp8-ppc.pl		patch \| blob \| history
crypto/poly1305/asm/poly1305-ppc.pl		patch \| blob \| history
crypto/poly1305/asm/poly1305-ppcfp.pl		patch \| blob \| history
crypto/poly1305/poly1305_ieee754.c		patch \| blob \| history
crypto/sha/asm/keccak1600-ppc64.pl		patch \| blob \| history
crypto/sha/asm/keccak1600p8-ppc.pl		patch \| blob \| history
crypto/sha/asm/sha512p8-ppc.pl		patch \| blob \| history