ARM64 assembly pack: add ThunderX2 results.
authorAndy Polyakov <appro@openssl.org>
Wed, 17 Apr 2019 19:08:13 +0000 (21:08 +0200)
committerAndy Polyakov <appro@openssl.org>
Wed, 17 Apr 2019 19:08:13 +0000 (21:08 +0200)
Reviewed-by: Tim Hudson <tjh@openssl.org>
Reviewed-by: Richard Levitte <levitte@openssl.org>
(Merged from https://github.com/openssl/openssl/pull/8776)

crypto/aes/asm/aesv8-armx.pl
crypto/aes/asm/vpaes-armv8.pl
crypto/chacha/asm/chacha-armv8.pl
crypto/modes/asm/ghashv8-armx.pl
crypto/poly1305/asm/poly1305-armv8.pl
crypto/sha/asm/keccak1600-armv8.pl
crypto/sha/asm/sha1-armv8.pl
crypto/sha/asm/sha512-armv8.pl

index 81bc1cbf1c228b72eb69db95caf3c20848691474..b708a61d50de3f19e30209d6f1c2f9519bfaaa29 100755 (executable)
@@ -36,6 +36,7 @@
 # Denver       1.96            0.86            0.80
 # Mongoose     1.33            1.20            1.20
 # Kryo         1.26            0.94            1.00
+# ThunderX2    5.95            1.53            1.55
 #
 # (*)  original 3.64/1.34/1.32 results were for r0p0 revision
 #      and are still same even for updated module;
index f08ae583833fffe0a08a744b771d52e0f5f8f4cc..c7839b320841f195794d00505fb8b96a633cbb59 100755 (executable)
@@ -30,6 +30,7 @@
 # Denver(***)       16.6(**)    15.1/17.8(**)    [8.80/9.93         ]
 # Apple A7(***)     22.7(**)    10.9/14.3        [8.45/10.0         ]
 # Mongoose(***)     26.3(**)    21.0/25.0(**)    [13.3/16.8         ]
+# ThunderX2(***)    39.4(**)    33.8/48.6(**)
 #
 # (*)  ECB denotes approximate result for parallelizable modes
 #      such as CBC decrypt, CTR, etc.;
index 56ba1c36ba6762efb197f51ea9da4db5a6c9452f..dc38cbd42d18c48c35b5b9c23a91c06723c87b14 100755 (executable)
@@ -29,6 +29,7 @@
 # X-Gene               9.50/+46%       8.82            8.89(*)
 # Mongoose             8.00/+44%       3.64            3.25
 # Kryo                 8.17/+50%       4.83            4.65
+# ThunderX2            7.26/+48%       7.91            4.30
 #
 # (*)  it's expected that doubling interleave factor doesn't help
 #      all processors, only those with higher NEON latency and
index e89158331209baf658750e10e0cae08b90b29827..fbc49d1c5ba415737dad3b3b7c76197fb531a29e 100644 (file)
@@ -42,6 +42,7 @@
 # Denver       0.51            0.65            6.02
 # Mongoose     0.65            1.10            8.06
 # Kryo         0.76            1.16            8.00
+# ThunderX2    1.05
 #
 # (*)  presented for reference/comparison purposes;
 
index b7aa7dc90b2758b09d072c64033d81d38828df72..b5dd61e1a91c09094d4b4e2c608625b040f47bc1 100755 (executable)
@@ -29,6 +29,7 @@
 # X-Gene       2.13/+68%       2.27
 # Mongoose     1.77/+75%       1.12
 # Kryo         2.70/+55%       1.13
+# ThunderX2    1.17/+95%       1.36
 #
 # (*)  estimate based on resources availability is less than 1.0,
 #      i.e. measured result is worse than expected, presumably binary
index bd15a52a2eb35597f8eb57772b27f07aa9252e45..dc72f18b557a8e46321f800ba3f510bfccb152dc 100755 (executable)
@@ -51,6 +51,7 @@
 # Kryo         12
 # Denver       7.8
 # Apple A7     7.2
+# ThunderX2    9.7
 #
 # (*)  Corresponds to SHA3-256. No improvement coefficients are listed
 #      because they vary too much from compiler to compiler. Newer
index 7a0cbf539bad1c2631fe548c693d14537134a2c0..12403eb783340ea5a0642ef83cb95bde92aa1ada 100644 (file)
@@ -27,6 +27,7 @@
 # X-Gene                               8.80 (+200%)
 # Mongoose     2.05                    6.50 (+160%)
 # Kryo         1.88                    8.00 (+90%)
+# ThunderX2    2.64                    6.36 (+150%)
 #
 # (*)  Software results are presented mostly for reference purposes.
 # (**) Keep in mind that Denver relies on binary translation, which
index f7c67219ed0904ae83bc68a4412639e5564d920c..b9ba05ba3b1b45275affcb1f02b17262288a9bed 100644 (file)
@@ -28,6 +28,7 @@
 # X-Gene                       20.0 (+100%)    12.8 (+300%(***))
 # Mongoose     2.36            13.0 (+50%)     8.36 (+33%)
 # Kryo         1.92            17.4 (+30%)     11.2 (+8%)
+# ThunderX2    2.54            13.2 (+40%)     8.40 (+18%)
 #
 # (*)  Software SHA256 results are of lesser relevance, presented
 #      mostly for informational purposes.