x86_64 assembly pack: add Goldmont performance results.

author Andy Polyakov <appro@openssl.org>

Fri, 14 Oct 2016 11:25:06 +0000 (13:25 +0200)

committer Andy Polyakov <appro@openssl.org>

Mon, 24 Oct 2016 11:02:41 +0000 (13:02 +0200)
author Andy Polyakov <appro@openssl.org>
Fri, 14 Oct 2016 11:25:06 +0000 (13:25 +0200)
committer Andy Polyakov <appro@openssl.org>
Mon, 24 Oct 2016 11:02:41 +0000 (13:02 +0200)
diff --git a/crypto/aes/asm/aesni-x86_64.pl b/crypto/aes/asm/aesni-x86_64.pl

index 25dd120dd22a4c2548d3d401fbe0505080174a43..98ca17991d8fb4f8926223909d34582330e2591c 100644 (file)
--- a/crypto/aes/asm/aesni-x86_64.pl
+++ b/crypto/aes/asm/aesni-x86_64.pl
@@ -179,6 +179,7 @@
  # Haswell      4.44/0.63       0.63    0.73    0.63    0.70
  # Skylake      2.62/0.63       0.63    0.63    0.63
  # Silvermont   5.75/3.54       3.56    4.12    3.87(*) 4.11
+# Goldmont     3.82/1.26       1.26    1.29    1.29    1.50
  # Bulldozer    5.77/0.70       0.72    0.90    0.70    0.95
  #
  # (*)  Atom Silvermont ECB result is suboptimal because of penalties
diff --git a/crypto/aes/asm/bsaes-x86_64.pl b/crypto/aes/asm/bsaes-x86_64.pl

index 6b14a517dc58a96ee2ceda27f08dc66f44fa1b9c..921d870e98bdcf2b08dee54f53fafedb01bc91cb 100644 (file)
--- a/crypto/aes/asm/bsaes-x86_64.pl
+++ b/crypto/aes/asm/bsaes-x86_64.pl
@@ -48,6 +48,7 @@
  # Nehalem(**)  7.63            6.88            +11%
  # Atom         17.1            16.4            +4%
  # Silvermont   -               12.9
+# Goldmont     -               8.85
  #
  # (*)  Comparison is not completely fair, because "this" is ECB,
  #      i.e. no extra processing such as counter values calculation
@@ -87,6 +88,7 @@
  # Nehalem      7.80
  # Atom         17.9
  # Silvermont   14.0
+# Goldmont     10.2
  #
  # November 2011.
  #
diff --git a/crypto/aes/asm/vpaes-x86_64.pl b/crypto/aes/asm/vpaes-x86_64.pl

index 265b6aa3620169b491828b9b8e6ed97e73377bd2..422e8ee4423e6d1063e79c3c783c7e76d98fd843 100644 (file)
--- a/crypto/aes/asm/vpaes-x86_64.pl
+++ b/crypto/aes/asm/vpaes-x86_64.pl
@@ -38,6 +38,7 @@
  # Nehalem      29.6/40.3/14.6          10.0/11.8
  # Atom         57.3/74.2/32.1          60.9/77.2(***)
  # Silvermont   52.7/64.0/19.5          48.8/60.8(***)
+# Goldmont     38.9/49.0/17.8          10.6/12.6
  #
  # (*)  "Hyper-threading" in the context refers rather to cache shared
  #      among multiple cores, than to specifically Intel HTT. As vast
diff --git a/crypto/chacha/asm/chacha-x86.pl b/crypto/chacha/asm/chacha-x86.pl

index 3c6e67d9c8738ec84c51bed0b68928d54a31590c..f00b7d2935b47b6789e0990a57ddce0b88a41036 100755 (executable)
--- a/crypto/chacha/asm/chacha-x86.pl
+++ b/crypto/chacha/asm/chacha-x86.pl
@@ -29,6 +29,7 @@
  # Sandy Bridge 10.5/+47%       3.20
  # Haswell      8.15/+50%       2.83
  # Silvermont   17.4/+36%       8.35
+# Goldmont     13.4/+40%       4.36
  # Sledgehammer 10.2/+54%
  # Bulldozer    13.4/+50%       4.38(*)
  #
diff --git a/crypto/chacha/asm/chacha-x86_64.pl b/crypto/chacha/asm/chacha-x86_64.pl

index 4b1750cd5dea6dd41bd5896ad7e07856e9a94e9d..347dfcb3e578a35fb54981dd9880f17cd8d58d28 100755 (executable)
--- a/crypto/chacha/asm/chacha-x86_64.pl
+++ b/crypto/chacha/asm/chacha-x86_64.pl
@@ -29,6 +29,7 @@
  # Ivy Bridge   6.71/+46%       5.40/6.49       2.41
  # Haswell      5.92/+43%       5.20/6.45       2.42        1.23
  # Silvermont   12.0/+33%       7.75/7.40       7.03(iii)
+# Goldmont     10.6/+17%       5.10/-          3.28
  # Sledgehammer 7.28/+52%       -/14.2(ii)      -
  # Bulldozer    9.66/+28%       9.85/11.1       3.06(iv)
  # VIA Nano     10.5/+46%       6.72/8.60       6.05
diff --git a/crypto/modes/asm/ghash-x86_64.pl b/crypto/modes/asm/ghash-x86_64.pl

index b4a8ddbd2e80955915708ae369c892c0ccf21929..387e3f854efa7b8f76968a402ec9384b72a56e79 100644 (file)
--- a/crypto/modes/asm/ghash-x86_64.pl
+++ b/crypto/modes/asm/ghash-x86_64.pl
@@ -74,6 +74,7 @@
  # Skylake      0.44(+110%)(if system doesn't support AVX)
  # Bulldozer    1.49(+27%)
  # Silvermont   2.88(+13%)
+# Goldmont     1.08(+24%)
  
  # March 2013
  #
diff --git a/crypto/poly1305/asm/poly1305-x86.pl b/crypto/poly1305/asm/poly1305-x86.pl

index ecc0ee62eaecb9a14bb21a7666d584b58713cd62..ab24dfcfaddaaa082f36382d30dd62adf777581f 100755 (executable)
--- a/crypto/poly1305/asm/poly1305-x86.pl
+++ b/crypto/poly1305/asm/poly1305-x86.pl
@@ -30,6 +30,7 @@
  # Sandy Bridge 3.90/+100%      1.36
  # Haswell      3.88/+70%       1.18            0.72
  # Silvermont   11.0/+40%       4.80
+# Goldmont     4.10/+200%      2.10
  # VIA Nano     6.71/+90%       2.47
  # Sledgehammer 3.51/+180%      4.27
  # Bulldozer    4.53/+140%      1.31
diff --git a/crypto/poly1305/asm/poly1305-x86_64.pl b/crypto/poly1305/asm/poly1305-x86_64.pl

index 784ff4b75837fb2f7b2b60328eb60eedaa90d3ef..4c22ded58024ba5c84fb5196db1e08e011c2151c 100755 (executable)
--- a/crypto/poly1305/asm/poly1305-x86_64.pl
+++ b/crypto/poly1305/asm/poly1305-x86_64.pl
@@ -29,6 +29,7 @@
  # Haswell      1.14/+175%      1.11            0.65
  # Skylake      1.13/+120%      0.96            0.51
  # Silvermont   2.83/+95%       -
+# Goldmont     1.70/+180%      -
  # VIA Nano     1.82/+150%      -
  # Sledgehammer 1.38/+160%      -
  # Bulldozer    2.30/+130%      0.97
diff --git a/crypto/sha/asm/sha1-x86_64.pl b/crypto/sha/asm/sha1-x86_64.pl

index 97baae37cd954c7ecddb70669ce981f1a0985beb..e11c6e4742ec4bf4aacd92054c33270c49ab57bc 100755 (executable)
--- a/crypto/sha/asm/sha1-x86_64.pl
+++ b/crypto/sha/asm/sha1-x86_64.pl
@@ -85,9 +85,11 @@
  # VIA Nano     9.32            7.15/+30%
  # Atom         10.3            9.17/+12%
  # Silvermont   13.1(*)         9.37/+40%
+# Goldmont     8.13            6.42/+27%       1.70/+380%(**)
  #
  # (*)  obviously suboptimal result, nothing was done about it,
  #      because SSSE3 code is compiled unconditionally;
+# (**) SHAEXT result
  
  $flavour = shift;
  $output  = shift;
diff --git a/crypto/sha/asm/sha512-586.pl b/crypto/sha/asm/sha512-586.pl

index 0887e061489a56cfe4ef31779e7161a3ea4bcd4e..3873934b698135e2b56f1699ea4e797392ffca67 100644 (file)
--- a/crypto/sha/asm/sha512-586.pl
+++ b/crypto/sha/asm/sha512-586.pl
@@ -36,6 +36,7 @@
  # VIA Nano     91      -       52      33      14.7
  # Atom         126     -       68      48(***) 14.7
  # Silvermont   97      -       58      42(***) 17.5
+# Goldmont     80      -       48      19.5    12.0
  #
  # (*)  whichever best applicable.
  # (**) x86_64 assembler performance is presented for reference
diff --git a/crypto/sha/asm/sha512-x86_64.pl b/crypto/sha/asm/sha512-x86_64.pl

index 63a62656f6508deaf7ab55799d28cdf412a399ad..c9b7b2812370e1324e600d00976d4bba3848e667 100755 (executable)
--- a/crypto/sha/asm/sha512-x86_64.pl
+++ b/crypto/sha/asm/sha512-x86_64.pl
@@ -98,8 +98,9 @@
  # VIA Nano     23.0    16.5(+39%)  -               14.7    -
  # Atom         23.0    18.9(+22%)  -               14.7    -
  # Silvermont   27.4    20.6(+33%)  -               17.5    -
+# Goldmont     18.9    14.3(+32%)  4.16(+350%)     12.0    -
  #
-# (*)  whichever best applicable;
+# (*)  whichever best applicable, including SHAEXT;
  # (**) switch from ror to shrd stands for fair share of improvement;
  # (***)        execution time is fully determined by remaining integer-only
  #      part, body_00_15; reducing the amount of SIMD instructions
author	Andy Polyakov <appro@openssl.org>
	Fri, 14 Oct 2016 11:25:06 +0000 (13:25 +0200)
committer	Andy Polyakov <appro@openssl.org>
	Mon, 24 Oct 2016 11:02:41 +0000 (13:02 +0200)
crypto/aes/asm/aesni-x86_64.pl		patch \| blob \| history
crypto/aes/asm/bsaes-x86_64.pl		patch \| blob \| history
crypto/aes/asm/vpaes-x86_64.pl		patch \| blob \| history
crypto/chacha/asm/chacha-x86.pl		patch \| blob \| history
crypto/chacha/asm/chacha-x86_64.pl		patch \| blob \| history
crypto/modes/asm/ghash-x86_64.pl		patch \| blob \| history
crypto/poly1305/asm/poly1305-x86.pl		patch \| blob \| history
crypto/poly1305/asm/poly1305-x86_64.pl		patch \| blob \| history
crypto/sha/asm/sha1-x86_64.pl		patch \| blob \| history
crypto/sha/asm/sha512-586.pl		patch \| blob \| history
crypto/sha/asm/sha512-x86_64.pl		patch \| blob \| history