Reverted patch for PR#2095. Addressed by Andy now in x86_64-xlate.pl

[oweals/openssl.git] / crypto / sha / asm / sha512-armv4.pl
diff --git a/crypto/sha/asm/sha512-armv4.pl b/crypto/sha/asm/sha512-armv4.pl

index 0d2ef8b1f44ff66ae8a51be019a5b241317f518f..f27e9cd31953a8f86bc6212a2addfdb289cedce0 100644 (file)
--- a/crypto/sha/asm/sha512-armv4.pl
+++ b/crypto/sha/asm/sha512-armv4.pl
@@ -12,12 +12,18 @@
  # This code is ~4.5 (four and a half) times faster than code generated
  # by gcc 3.4 and it spends ~72 clock cycles per byte. 
  
-# This module currently has dependency on byte order, namely *dword*
-# order in ctx->h[0-9]. I have to think of a way to reliably detect
-# "endianness" [and flip below two constants] or arrange given dword
-# order in C.
-$lo=0; # this denotes little-endian platform.
-$hi=4;
+# Byte order [in]dependence. =========================================
+#
+# Caller is expected to maintain specific *dword* order in h[0-7],
+# namely with most significant dword at *lower* address, which is
+# reflected in below two parameters. *Byte* order within these dwords
+# in turn is whatever *native* byte order on current platform.
+$hi=0;
+$lo=4;
+# ====================================================================
+
+$output=shift;
+open STDOUT,">$output";
  
  $ctx="r0";
  $inp="r1";
@@ -104,7 +110,7 @@ $code.=<<___;
         orreq   $Ktbl,$Ktbl,#1
  
         ldr     $t2,[sp,#$Boff+0]       @ b.lo
-       ldr     $t3,[sp,#$Coff+0]               @ c.lo
+       ldr     $t3,[sp,#$Coff+0]       @ c.lo
         @ Sigma0(x)     (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
         @ LO            lo>>28^hi<<4  ^ hi>>2^lo<<30 ^ hi>>7^lo<<25
         @ HI            hi>>28^lo<<4  ^ lo>>2^hi<<30 ^ lo>>7^hi<<25
@@ -387,4 +393,6 @@ $code.=<<___;
  ___
  
  $code =~ s/\`([^\`]*)\`/eval $1/gem;
+$code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm;   # make it possible to compile with -march=armv4
  print $code;
+close STDOUT; # enforce flush