Add 0.9.7 specific comments to RC4 assembler modules.

author Andy Polyakov <appro@openssl.org>

Tue, 30 Nov 2004 15:46:46 +0000 (15:46 +0000)

committer Andy Polyakov <appro@openssl.org>

Tue, 30 Nov 2004 15:46:46 +0000 (15:46 +0000)
author Andy Polyakov <appro@openssl.org>
Tue, 30 Nov 2004 15:46:46 +0000 (15:46 +0000)
committer Andy Polyakov <appro@openssl.org>
Tue, 30 Nov 2004 15:46:46 +0000 (15:46 +0000)
diff --git a/crypto/rc4/asm/rc4-586.pl b/crypto/rc4/asm/rc4-586.pl

index 977a9f1237f2d00010302d7e4d16c4142cb3973e..07b2bc6fcdb9777ceef4a631ff846e97e4266426 100644 (file)
--- a/crypto/rc4/asm/rc4-586.pl
+++ b/crypto/rc4/asm/rc4-586.pl
@@ -1,7 +1,7 @@
  #!/usr/local/bin/perl
  
  # At some point it became apparent that the original SSLeay RC4
-# assembler implementation performs suboptimal on latest IA-32
+# assembler implementation performs suboptimaly on latest IA-32
  # microarchitectures. After re-tuning performance has changed as
  # following:
  #
@@ -15,10 +15,12 @@
  #      In other words code performing further 13% faster on AMD
  #      would perform almost 2 times slower on Intel PIII...
  #      For reference! This code delivers ~80% of rc4-amd64.pl
-#      performance on same Opteron machine.
+#      performance on the same Opteron machine.
  # (**) This number requires compressed key schedule set up by
-#      RC4_set_key, see commentary section in rc4_skey.c for
-#      further details.
+#      RC4_set_key and therefore doesn't apply to 0.9.7 [option for
+#      compressed key schedule is implemented in 0.9.8 and later,
+#      see commentary section in rc4_skey.c for further details].
+#
  #                                      <appro@fy.chalmers.se>
  
  push(@INC,"perlasm","../../perlasm");
@@ -130,6 +132,8 @@ sub RC4
          &add(  $d,     8);
  
         # detect compressed schedule, see commentary section in rc4_skey.c...
+       # in 0.9.7 context ~50 bytes below RC4_CHAR label remain redundant,
+       # as compressed key schedule is set up in 0.9.8 and later.
         &cmp(&DWP(256,$d),-1);
         &je(&label("RC4_CHAR"));
  
@@ -190,7 +194,8 @@ sub RC4
         &jmp(&label("finished"));
  
         &align(16);
-       # this is essentially Intel P4 specific codepath, see rc4_skey.c...
+       # this is essentially Intel P4 specific codepath, see rc4_skey.c,
+       # and is engaged in 0.9.8 and later context...
         &set_label("RC4_CHAR");
  
         &lea    ($ty,&DWP(0,$in,$ty));
diff --git a/crypto/rc4/asm/rc4-amd64.pl b/crypto/rc4/asm/rc4-amd64.pl

index 35e426d561a35a0c117cc3161132c07bb5df8a00..9e0da8af995604f1f5343fe3b5034b15d0d9657d 100755 (executable)
--- a/crypto/rc4/asm/rc4-amd64.pl
+++ b/crypto/rc4/asm/rc4-amd64.pl
@@ -30,7 +30,9 @@
  # RC4_CHAR. Kind of ironic, huh? As it's apparently impossible to
  # compose blended code, which would perform even within 30% marginal
  # on either AMD and Intel platforms, I implement both cases. See
-# rc4_skey.c for further details...
+# rc4_skey.c for further details... This applies to 0.9.8 and later.
+# In 0.9.7 context RC4_CHAR codepath is never engaged and ~70 bytes
+# of code remain redundant.
  
  $output=shift;
  
diff --git a/crypto/rc4/asm/rc4-ia64.S b/crypto/rc4/asm/rc4-ia64.S

index 4af7fba7b3562a2c7925f816d9ba8b59f43fcdb5..ae84af672958a3d41a0686541c3dfd3cfa80a8d0 100644 (file)
--- a/crypto/rc4/asm/rc4-ia64.S
+++ b/crypto/rc4/asm/rc4-ia64.S
@@ -18,7 +18,7 @@
  // to input and output streams. Secondly, less obvious, it's possible
  // to pull up some references to elements of the key schedule itself.
  // Fact is that such prior loads are not safe only for "degenerated"
-// key schedule, when all elements equal to the same value, which is
+// key schedule, when some elements equal to the same value, which is
  // never the case [key schedule setup routine makes sure it's not].
  // Furthermore. In order to compress loop body to the minimum, I chose
  // to deploy deposit instruction, which substitutes for the whole
author	Andy Polyakov <appro@openssl.org>
	Tue, 30 Nov 2004 15:46:46 +0000 (15:46 +0000)
committer	Andy Polyakov <appro@openssl.org>
	Tue, 30 Nov 2004 15:46:46 +0000 (15:46 +0000)
crypto/rc4/asm/rc4-586.pl		patch \| blob \| history
crypto/rc4/asm/rc4-amd64.pl		patch \| blob \| history
crypto/rc4/asm/rc4-ia64.S		patch \| blob \| history