Commentary updates.
authorAndy Polyakov <appro@openssl.org>
Thu, 13 Sep 2007 07:27:10 +0000 (07:27 +0000)
committerAndy Polyakov <appro@openssl.org>
Thu, 13 Sep 2007 07:27:10 +0000 (07:27 +0000)
crypto/sha/asm/sha1-ppc.pl
crypto/sha/asm/sha1-x86_64.pl
crypto/sha/asm/sha256-armv4.pl
crypto/sha/asm/sha512-ppc.pl
crypto/sha/asm/sha512-s390x.pl

index fbd051652c17f0577c8d80dc583db61b65088353..2c84d5914923a7142a35f4c4cddd31abf51e87f3 100755 (executable)
@@ -18,6 +18,7 @@
 #                      -m64    -m32
 # ----------------------------------
 # PPC970,gcc-4.0.0     +76%    +59%
+# Power6,xlc-7         +68%    +33%
 
 $output = shift;
 
index 93a4f017965ae200f1d357585419105c29bd69f2..f7ed67a72609ef6cfb0fd396e8e2f9aa7d12a90a 100755 (executable)
@@ -27,7 +27,7 @@
 #              gcc 3.4         32-bit asm      cycles/byte
 # Opteron      +45%            +20%            6.8
 # Xeon P4      +65%            +0%             9.9
-# Core2                +60%            +10%            8.8
+# Core2                +60%            +10%            7.0
 
 $output=shift;
 
index d2465e3aa88f24344d4b1ba7a33bfac2960b92ec..4dd09619a6a72c8b2a2e598f0a4407242071024c 100644 (file)
@@ -7,9 +7,11 @@
 # details see http://www.openssl.org/~appro/cryptogams/.
 # ====================================================================
 
-# SHA256 block procedure for ARMv4.
+# SHA256 block procedure for ARMv4. May 2007.
 
-# Performance is ~2x better than gcc 3.4 generated code.
+# Performance is ~2x better than gcc 3.4 generated code and in "abso-
+# lute" terms is ~2250 cycles per 64-byte block or ~35 cycles per
+# byte.
 
 $ctx="r0";     $t0="r0";
 $inp="r1";
@@ -165,7 +167,7 @@ $code.=<<___;
        tst     lr,#1
        moveq   pc,lr                   @ be binary compatible with V4, yet
        bx      lr                      @ interoperable with Thumb ISA:-)
-.size   sha256_block_data_order,.-sha1_block_data_order
+.size   sha256_block_data_order,.-sha256_block_data_order
 .asciz  "SHA256 block transform for ARMv4, CRYPTOGAMS by <appro\@openssl.org>"
 ___
 
index 087faab866059e0ca9d5a348a8f1baaf999787e1..0ec4b0bcc36b030194133cda3bc631c1a180b869 100755 (executable)
@@ -16,6 +16,7 @@
 #                      -m64    -m32    |       -m64    -m32
 # --------------------------------------+-----------------------
 # PPC970,gcc-4.0.0     +50%    +38%    |       +40%    +410%(*)
+# Power6,xlc-7         +150%   +90%    |       +100%   +430%(*)
 #
 # (*)  64-bit code in 32-bit application context, which actually is
 #      on TODO list. It should be noted that for safe deployment in
index 5dd17473fabd231ecb38e8fa5bda7dd4b8e9a479..d2aceec68c0007dff23d07bfe44a833ed0703ce1 100644 (file)
@@ -12,7 +12,7 @@
 # April 2007.
 #
 # sha256_block_data_order is reportedly >3 times faster than gcc 3.3
-# generated code (must to be a bug in compiler, as improvement is
+# generated code (must be a bug in compiler, as improvement is
 # "pathologically" high, in particular in comparison to other SHA
 # modules). But the real twist is that it detects if hardware support
 # for SHA256 is available and in such case utilizes it. Then the