From 76108ba7eb162f2f280109b04285f74c9b5b140b Mon Sep 17 00:00:00 2001
From: Andy Polyakov <appro@openssl.org>
Date: Thu, 23 Aug 2007 12:04:42 +0000
Subject: [PATCH] Updates from HEAD.

---
 crypto/aes/asm/aes-x86_64.pl    | 27 ++++++++++++++++-----------
 crypto/perlasm/x86_64-xlate.pl  | 15 ++++++++++++---
 crypto/sha/asm/sha512-x86_64.pl |  8 +++++++-
 crypto/sha/sha512.c             |  3 +++
 crypto/x86_64cpuid.pl           |  4 ++--
 5 files changed, 40 insertions(+), 17 deletions(-)

diff --git a/crypto/aes/asm/aes-x86_64.pl b/crypto/aes/asm/aes-x86_64.pl
index a45fddb4e1..995448a110 100755
--- a/crypto/aes/asm/aes-x86_64.pl
+++ b/crypto/aes/asm/aes-x86_64.pl
@@ -626,14 +626,13 @@ AES_encrypt:
 
 	call	_x86_64_AES_encrypt_compact
 
-	lea	16(%rsp),%rsp
-	pop	$out		# restore out
+	mov	16(%rsp),$out	# restore out
+	mov	24(%rsp),%rsp
 	mov	$s0,0($out)	# write output vector
 	mov	$s1,4($out)
 	mov	$s2,8($out)
 	mov	$s3,12($out)
 
-	mov	(%rsp),%rsp
 	pop	%r15
 	pop	%r14
 	pop	%r13
@@ -1217,19 +1216,18 @@ AES_decrypt:
 	sub	$sbox,%rbp
 	and	\$0x300,%rbp
 	lea	($sbox,%rbp),$sbox
-	shr	\$3,%rbp		# recall "magic" constants!
+	shr	\$3,%rbp	# recall "magic" constants!
 	add	%rbp,$sbox
 
 	call	_x86_64_AES_decrypt_compact
 
-	lea	16(%rsp),%rsp
-	pop	$out	# restore out
-	mov	$s0,0($out)
+	mov	16(%rsp),$out	# restore out
+	mov	24(%rsp),%rsp
+	mov	$s0,0($out)	# write output vector
 	mov	$s1,4($out)
 	mov	$s2,8($out)
 	mov	$s3,12($out)
 
-	mov	(%rsp),%rsp
 	pop	%r15
 	pop	%r14
 	pop	%r13
@@ -1275,6 +1273,13 @@ $code.=<<___;
 .type	AES_set_encrypt_key,\@function,3
 .align	16
 AES_set_encrypt_key:
+	call	_x86_64_AES_set_encrypt_key
+	ret
+.size	AES_set_encrypt_key,.-AES_set_encrypt_key
+
+.type	_x86_64_AES_set_encrypt_key,\@abi-omnipotent
+.align	16
+_x86_64_AES_set_encrypt_key:
 	push	%rbx
 	push	%rbp
 
@@ -1461,8 +1466,8 @@ $code.=<<___;
 .Lexit:
 	pop	%rbp
 	pop	%rbx
-	ret
-.size	AES_set_encrypt_key,.-AES_set_encrypt_key
+	.byte	0xf3,0xc3			# rep ret
+.size	_x86_64_AES_set_encrypt_key,.-_x86_64_AES_set_encrypt_key
 ___
 
 sub deckey_ref()
@@ -1527,7 +1532,7 @@ $code.=<<___;
 .align	16
 AES_set_decrypt_key:
 	push	%rdx			# save key schedule
-	call	AES_set_encrypt_key
+	call	_x86_64_AES_set_encrypt_key
 	cmp	\$0,%eax
 	pop	%r8			# restore key schedule
 	jne	.Labort
diff --git a/crypto/perlasm/x86_64-xlate.pl b/crypto/perlasm/x86_64-xlate.pl
index 1e1e4b8bf6..19d372556f 100755
--- a/crypto/perlasm/x86_64-xlate.pl
+++ b/crypto/perlasm/x86_64-xlate.pl
@@ -85,6 +85,8 @@ my $current_function;
 	    if ($self->{op} =~ /(movz)b.*/) {	# movz is pain...
 		$self->{op} = $1;
 		$self->{sz} = "b";
+	    } elsif ($self->{op} =~ /call/) {
+		$self->{sz} = ""
 	    } elsif ($self->{op} =~ /([a-z]{3,})([qlwb])/) {
 		$self->{op} = $1;
 		$self->{sz} = $2;
@@ -358,7 +360,7 @@ my $current_function;
 				    $self->{value} = $v;
 				    last;
 				  };
-		/\.extern/  && do { $self->{value} = "EXTRN\t".$line; last;  };
+		/\.extern/  && do { $self->{value} = "EXTRN\t".$line.":BYTE"; last;  };
 		/\.globl/   && do { $self->{value} = "PUBLIC\t".$line; last; };
 		/\.type/    && do { ($sym,$type,$narg) = split(',',$line);
 				    if ($type eq "\@function") {
@@ -394,8 +396,15 @@ my $current_function;
 				    last;
 				  };
 		/\.asciz/   && do { if ($line =~ /^"(.*)"$/) {
-					$self->{value} = "DB\t"
-						.join(",",unpack("C*",$1),0);
+					my @str=unpack("C*",$1);
+					push @str,0;
+					while ($#str>15) {
+					    $self->{value}.="DB\t"
+						.join(",",@str[0..15])."\n";
+					    foreach (0..15) { shift @str; }
+					}
+					$self->{value}.="DB\t"
+						.join(",",@str) if (@str);
 				    }
 				    last;
 				  };
diff --git a/crypto/sha/asm/sha512-x86_64.pl b/crypto/sha/asm/sha512-x86_64.pl
index 4cf5c623d6..b6252d31ec 100755
--- a/crypto/sha/asm/sha512-x86_64.pl
+++ b/crypto/sha/asm/sha512-x86_64.pl
@@ -41,7 +41,13 @@
 # apparently are not atomic instructions, but implemented in microcode.
 
 $output=shift;
-open STDOUT,"| $^X ../perlasm/x86_64-xlate.pl $output";
+
+$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
+( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
+die "can't locate x86_64-xlate.pl";
+
+open STDOUT,"| $^X $xlate $output";
 
 if ($output =~ /512/) {
 	$func="sha512_block_data_order";
diff --git a/crypto/sha/sha512.c b/crypto/sha/sha512.c
index dabad88fda..b4da9ea521 100644
--- a/crypto/sha/sha512.c
+++ b/crypto/sha/sha512.c
@@ -371,6 +371,9 @@ static const SHA_LONG64 K512[80] = {
     }
 #   endif
 #   define PULL64(x) __pull64be(&(x))
+#   if _MSC_VER<=1200
+#    pragma inline_depth(0)
+#   endif
 #  endif
 # endif
 #endif
diff --git a/crypto/x86_64cpuid.pl b/crypto/x86_64cpuid.pl
index 0c5d3397d5..9f0f06bd4e 100644
--- a/crypto/x86_64cpuid.pl
+++ b/crypto/x86_64cpuid.pl
@@ -136,11 +136,11 @@ OPENSSL_ia32_cpuid:
 	cpuid
 	cmp	\$0,%r9d
 	jne	.Lnotintel
-	or	\$1<<20,%edx		# use reserved bit to engage RC4_CHAR
+	or	\$0x00100000,%edx	# use reserved 20th bit to engage RC4_CHAR
 	and	\$15,%ah
 	cmp	\$15,%ah		# examine Family ID
 	je	.Lnotintel
-	or	\$1<<30,%edx		# use reserved bit to skip unrolled loop
+	or	\$0x40000000,%edx	# use reserved 30th bit to skip unrolled loop
 .Lnotintel:
 	bt	\$28,%edx		# test hyper-threading bit
 	jnc	.Ldone
-- 
2.25.1