x86_64 assembly pack: add Goldmont performance results.
authorAndy Polyakov <appro@openssl.org>
Fri, 14 Oct 2016 11:25:06 +0000 (13:25 +0200)
committerAndy Polyakov <appro@openssl.org>
Mon, 24 Oct 2016 11:02:41 +0000 (13:02 +0200)
Reviewed-by: Richard Levitte <levitte@openssl.org>
(cherry picked from commit ace05265d2d599e350cf84ed60955b7f2b173bc9)

crypto/aes/asm/aesni-x86_64.pl
crypto/aes/asm/bsaes-x86_64.pl
crypto/aes/asm/vpaes-x86_64.pl
crypto/chacha/asm/chacha-x86.pl
crypto/chacha/asm/chacha-x86_64.pl
crypto/modes/asm/ghash-x86_64.pl
crypto/poly1305/asm/poly1305-x86.pl
crypto/poly1305/asm/poly1305-x86_64.pl
crypto/sha/asm/sha1-x86_64.pl
crypto/sha/asm/sha512-586.pl
crypto/sha/asm/sha512-x86_64.pl

index 25dd120dd22a4c2548d3d401fbe0505080174a43..98ca17991d8fb4f8926223909d34582330e2591c 100644 (file)
 # Haswell      4.44/0.63       0.63    0.73    0.63    0.70
 # Skylake      2.62/0.63       0.63    0.63    0.63
 # Silvermont   5.75/3.54       3.56    4.12    3.87(*) 4.11
+# Goldmont     3.82/1.26       1.26    1.29    1.29    1.50
 # Bulldozer    5.77/0.70       0.72    0.90    0.70    0.95
 #
 # (*)  Atom Silvermont ECB result is suboptimal because of penalties
index 6b14a517dc58a96ee2ceda27f08dc66f44fa1b9c..921d870e98bdcf2b08dee54f53fafedb01bc91cb 100644 (file)
@@ -48,6 +48,7 @@
 # Nehalem(**)  7.63            6.88            +11%
 # Atom         17.1            16.4            +4%
 # Silvermont   -               12.9
+# Goldmont     -               8.85
 #
 # (*)  Comparison is not completely fair, because "this" is ECB,
 #      i.e. no extra processing such as counter values calculation
@@ -87,6 +88,7 @@
 # Nehalem      7.80
 # Atom         17.9
 # Silvermont   14.0
+# Goldmont     10.2
 #
 # November 2011.
 #
index 265b6aa3620169b491828b9b8e6ed97e73377bd2..422e8ee4423e6d1063e79c3c783c7e76d98fd843 100644 (file)
@@ -38,6 +38,7 @@
 # Nehalem      29.6/40.3/14.6          10.0/11.8
 # Atom         57.3/74.2/32.1          60.9/77.2(***)
 # Silvermont   52.7/64.0/19.5          48.8/60.8(***)
+# Goldmont     38.9/49.0/17.8          10.6/12.6
 #
 # (*)  "Hyper-threading" in the context refers rather to cache shared
 #      among multiple cores, than to specifically Intel HTT. As vast
index 3c6e67d9c8738ec84c51bed0b68928d54a31590c..f00b7d2935b47b6789e0990a57ddce0b88a41036 100755 (executable)
@@ -29,6 +29,7 @@
 # Sandy Bridge 10.5/+47%       3.20
 # Haswell      8.15/+50%       2.83
 # Silvermont   17.4/+36%       8.35
+# Goldmont     13.4/+40%       4.36
 # Sledgehammer 10.2/+54%
 # Bulldozer    13.4/+50%       4.38(*)
 #
index 4b1750cd5dea6dd41bd5896ad7e07856e9a94e9d..347dfcb3e578a35fb54981dd9880f17cd8d58d28 100755 (executable)
@@ -29,6 +29,7 @@
 # Ivy Bridge   6.71/+46%       5.40/6.49       2.41
 # Haswell      5.92/+43%       5.20/6.45       2.42        1.23
 # Silvermont   12.0/+33%       7.75/7.40       7.03(iii)
+# Goldmont     10.6/+17%       5.10/-          3.28
 # Sledgehammer 7.28/+52%       -/14.2(ii)      -
 # Bulldozer    9.66/+28%       9.85/11.1       3.06(iv)
 # VIA Nano     10.5/+46%       6.72/8.60       6.05
index b4a8ddbd2e80955915708ae369c892c0ccf21929..387e3f854efa7b8f76968a402ec9384b72a56e79 100644 (file)
@@ -74,6 +74,7 @@
 # Skylake      0.44(+110%)(if system doesn't support AVX)
 # Bulldozer    1.49(+27%)
 # Silvermont   2.88(+13%)
+# Goldmont     1.08(+24%)
 
 # March 2013
 #
index ecc0ee62eaecb9a14bb21a7666d584b58713cd62..ab24dfcfaddaaa082f36382d30dd62adf777581f 100755 (executable)
@@ -30,6 +30,7 @@
 # Sandy Bridge 3.90/+100%      1.36
 # Haswell      3.88/+70%       1.18            0.72
 # Silvermont   11.0/+40%       4.80
+# Goldmont     4.10/+200%      2.10
 # VIA Nano     6.71/+90%       2.47
 # Sledgehammer 3.51/+180%      4.27
 # Bulldozer    4.53/+140%      1.31
index 784ff4b75837fb2f7b2b60328eb60eedaa90d3ef..4c22ded58024ba5c84fb5196db1e08e011c2151c 100755 (executable)
@@ -29,6 +29,7 @@
 # Haswell      1.14/+175%      1.11            0.65
 # Skylake      1.13/+120%      0.96            0.51
 # Silvermont   2.83/+95%       -
+# Goldmont     1.70/+180%      -
 # VIA Nano     1.82/+150%      -
 # Sledgehammer 1.38/+160%      -
 # Bulldozer    2.30/+130%      0.97
index 97baae37cd954c7ecddb70669ce981f1a0985beb..e11c6e4742ec4bf4aacd92054c33270c49ab57bc 100755 (executable)
 # VIA Nano     9.32            7.15/+30%
 # Atom         10.3            9.17/+12%
 # Silvermont   13.1(*)         9.37/+40%
+# Goldmont     8.13            6.42/+27%       1.70/+380%(**)
 #
 # (*)  obviously suboptimal result, nothing was done about it,
 #      because SSSE3 code is compiled unconditionally;
+# (**) SHAEXT result
 
 $flavour = shift;
 $output  = shift;
index 0887e061489a56cfe4ef31779e7161a3ea4bcd4e..3873934b698135e2b56f1699ea4e797392ffca67 100644 (file)
@@ -36,6 +36,7 @@
 # VIA Nano     91      -       52      33      14.7
 # Atom         126     -       68      48(***) 14.7
 # Silvermont   97      -       58      42(***) 17.5
+# Goldmont     80      -       48      19.5    12.0
 #
 # (*)  whichever best applicable.
 # (**) x86_64 assembler performance is presented for reference
index 63a62656f6508deaf7ab55799d28cdf412a399ad..c9b7b2812370e1324e600d00976d4bba3848e667 100755 (executable)
@@ -98,8 +98,9 @@
 # VIA Nano     23.0    16.5(+39%)  -               14.7    -
 # Atom         23.0    18.9(+22%)  -               14.7    -
 # Silvermont   27.4    20.6(+33%)  -               17.5    -
+# Goldmont     18.9    14.3(+32%)  4.16(+350%)     12.0    -
 #
-# (*)  whichever best applicable;
+# (*)  whichever best applicable, including SHAEXT;
 # (**) switch from ror to shrd stands for fair share of improvement;
 # (***)        execution time is fully determined by remaining integer-only
 #      part, body_00_15; reducing the amount of SIMD instructions