X-Git-Url: https://git.librecmc.org/?a=blobdiff_plain;f=crypto%2Fbn%2Fasm%2Fx86_64-mont5.pl;h=5e70547704f87f1077cf5f1eaf540f1dda7f5cce;hb=9bb3e5fd87905e3e9f5f7edcc2e22d98360510ab;hp=1666fbd7a2d4a92b30754d30286a9e005e091f5a;hpb=668a709a8d7ea374ee72ad2d43ac72ec60a80eee;p=oweals%2Fopenssl.git

diff --git a/crypto/bn/asm/x86_64-mont5.pl b/crypto/bn/asm/x86_64-mont5.pl
index 1666fbd7a2..5e70547704 100755
--- a/crypto/bn/asm/x86_64-mont5.pl
+++ b/crypto/bn/asm/x86_64-mont5.pl
@@ -1,7 +1,7 @@
 #! /usr/bin/env perl
-# Copyright 2011-2016 The OpenSSL Project Authors. All Rights Reserved.
+# Copyright 2011-2018 The OpenSSL Project Authors. All Rights Reserved.
 #
-# Licensed under the OpenSSL license (the "License").  You may not use
+# Licensed under the Apache License 2.0 (the "License").  You may not use
 # this file except in compliance with the License.  You can obtain a copy
 # in the file LICENSE in the source distribution or at
 # https://www.openssl.org/source/license.html
@@ -31,9 +31,10 @@
 # the np argument is not just modulus value, but one interleaved
 # with 0. This is to optimize post-condition...
 
-$flavour = shift;
-$output  = shift;
-if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
+# $output is the last argument if it looks like a file (it has an extension)
+# $flavour is the first argument if it doesn't look like a file
+$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
+$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
 
 $win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
 
@@ -42,7 +43,8 @@ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
 ( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
 die "can't locate x86_64-xlate.pl";
 
-open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\"";
+open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\""
+    or die "can't call $xlate: $!";
 *STDOUT=*OUT;
 
 if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1`
@@ -60,7 +62,7 @@ if (!$addx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) &&
 	$addx = ($1>=12);
 }
 
-if (!$addx && `$ENV{CC} -v 2>&1` =~ /((?:^clang|LLVM) version|.*based on LLVM) ([3-9])\.([0-9]+)/) {
+if (!$addx && `$ENV{CC} -v 2>&1` =~ /((?:^clang|LLVM) version|.*based on LLVM) ([0-9]+)\.([0-9]+)/) {
 	my $ver = $2 + $3/100.0;	# 3.1->3.01, 3.10->3.10
 	$addx = ($ver>=3.03);
 }
@@ -419,22 +421,23 @@ $code.=<<___;
 	mov	%rax,($rp,$i,8)		# rp[i]=tp[i]-np[i]
 	mov	8($ap,$i,8),%rax	# tp[i+1]
 	lea	1($i),$i		# i++
-	dec	$j			# doesnn't affect CF!
+	dec	$j			# doesn't affect CF!
 	jnz	.Lsub
 
 	sbb	\$0,%rax		# handle upmost overflow bit
+	mov	\$-1,%rbx
+	xor	%rax,%rbx
 	xor	$i,$i
-	and	%rax,$ap
-	not	%rax
-	mov	$rp,$np
-	and	%rax,$np
 	mov	$num,$j			# j=num
-	or	$np,$ap			# ap=borrow?tp:rp
-.align	16
-.Lcopy:					# copy or in-place refresh
-	mov	($ap,$i,8),%rax
+
+.Lcopy:					# conditional copy
+	mov	($rp,$i,8),%rcx
+	mov	(%rsp,$i,8),%rdx
+	and	%rbx,%rcx
+	and	%rax,%rdx
 	mov	$i,(%rsp,$i,8)		# zap temporary vector
-	mov	%rax,($rp,$i,8)		# rp[i]=tp[i]
+	or	%rcx,%rdx
+	mov	%rdx,($rp,$i,8)		# rp[i]=tp[i]
 	lea	1($i),$i
 	sub	\$1,$j
 	jnz	.Lcopy
@@ -579,6 +582,7 @@ $code.=<<___;
 .type	mul4x_internal,\@abi-omnipotent
 .align	32
 mul4x_internal:
+.cfi_startproc
 	shl	\$5,$num		# $num was in bytes
 	movd	`($win64?56:8)`(%rax),%xmm5	# load 7th argument, index
 	lea	.Linc(%rip),%rax
@@ -1073,6 +1077,7 @@ $code.=<<___
 ___
 }
 $code.=<<___;
+.cfi_endproc
 .size	mul4x_internal,.-mul4x_internal
 ___
 }}}
@@ -1238,6 +1243,7 @@ $code.=<<___;
 .align	32
 bn_sqr8x_internal:
 __bn_sqr8x_internal:
+.cfi_startproc
 	##############################################################
 	# Squaring part:
 	#
@@ -2029,6 +2035,7 @@ __bn_sqr8x_reduction:
 	cmp	%rdx,$tptr		# end of t[]?
 	jb	.L8x_reduction_loop
 	ret
+.cfi_endproc
 .size	bn_sqr8x_internal,.-bn_sqr8x_internal
 ___
 }
@@ -2041,6 +2048,7 @@ $code.=<<___;
 .type	__bn_post4x_internal,\@abi-omnipotent
 .align	32
 __bn_post4x_internal:
+.cfi_startproc
 	mov	8*0($nptr),%r12
 	lea	(%rdi,$num),$tptr	# %rdi was $tptr above
 	mov	$num,%rcx
@@ -2091,6 +2099,7 @@ __bn_post4x_internal:
 	mov	$num,%r10		# prepare for back-to-back call
 	neg	$num			# restore $num
 	ret
+.cfi_endproc
 .size	__bn_post4x_internal,.-__bn_post4x_internal
 ___
 }
@@ -2100,10 +2109,12 @@ $code.=<<___;
 .type	bn_from_montgomery,\@abi-omnipotent
 .align	32
 bn_from_montgomery:
+.cfi_startproc
 	testl	\$7,`($win64?"48(%rsp)":"%r9d")`
 	jz	bn_from_mont8x
 	xor	%eax,%eax
 	ret
+.cfi_endproc
 .size	bn_from_montgomery,.-bn_from_montgomery
 
 .type	bn_from_mont8x,\@function,6
@@ -2399,6 +2410,7 @@ bn_mulx4x_mont_gather5:
 .type	mulx4x_internal,\@abi-omnipotent
 .align	32
 mulx4x_internal:
+.cfi_startproc
 	mov	$num,8(%rsp)		# save -$num (it was in bytes)
 	mov	$num,%r10
 	neg	$num			# restore $num
@@ -2421,7 +2433,7 @@ my $N=$STRIDE/4;		# should match cache line size
 $code.=<<___;
 	movdqa	0(%rax),%xmm0		# 00000001000000010000000000000000
 	movdqa	16(%rax),%xmm1		# 00000002000000020000000200000002
-	lea	88-112(%rsp,%r10),%r10	# place the mask after tp[num+1] (+ICache optimizaton)
+	lea	88-112(%rsp,%r10),%r10	# place the mask after tp[num+1] (+ICache optimization)
 	lea	128($bp),$bptr		# size optimization
 
 	pshufd	\$0,%xmm5,%xmm5		# broadcast index
@@ -2749,6 +2761,7 @@ $code.=<<___;
 	mov	8*2(%rbp),%r14
 	mov	8*3(%rbp),%r15
 	jmp	.Lsqrx4x_sub_entry	# common post-condition
+.cfi_endproc
 .size	mulx4x_internal,.-mulx4x_internal
 ___
 }{
@@ -2909,6 +2922,7 @@ bn_powerx5:
 .align	32
 bn_sqrx8x_internal:
 __bn_sqrx8x_internal:
+.cfi_startproc
 	##################################################################
 	# Squaring part:
 	#
@@ -3541,6 +3555,7 @@ __bn_sqrx8x_reduction:
 	cmp	8+8(%rsp),%r8		# end of t[]?
 	jb	.Lsqrx8x_reduction_loop
 	ret
+.cfi_endproc
 .size	bn_sqrx8x_internal,.-bn_sqrx8x_internal
 ___
 }
@@ -3552,6 +3567,7 @@ my ($rptr,$nptr)=("%rdx","%rbp");
 $code.=<<___;
 .align	32
 __bn_postx4x_internal:
+.cfi_startproc
 	mov	8*0($nptr),%r12
 	mov	%rcx,%r10		# -$num
 	mov	%rcx,%r9		# -$num
@@ -3599,6 +3615,7 @@ __bn_postx4x_internal:
 	neg	%r9			# restore $num
 
 	ret
+.cfi_endproc
 .size	__bn_postx4x_internal,.-__bn_postx4x_internal
 ___
 }
@@ -3615,6 +3632,7 @@ $code.=<<___;
 .type	bn_get_bits5,\@abi-omnipotent
 .align	16
 bn_get_bits5:
+.cfi_startproc
 	lea	0($inp),%r10
 	lea	1($inp),%r11
 	mov	$num,%ecx
@@ -3628,12 +3646,14 @@ bn_get_bits5:
 	shrl	%cl,%eax
 	and	\$31,%eax
 	ret
+.cfi_endproc
 .size	bn_get_bits5,.-bn_get_bits5
 
 .globl	bn_scatter5
 .type	bn_scatter5,\@abi-omnipotent
 .align	16
 bn_scatter5:
+.cfi_startproc
 	cmp	\$0, $num
 	jz	.Lscatter_epilogue
 	lea	($tbl,$idx,8),$tbl
@@ -3646,6 +3666,7 @@ bn_scatter5:
 	jnz	.Lscatter
 .Lscatter_epilogue:
 	ret
+.cfi_endproc
 .size	bn_scatter5,.-bn_scatter5
 
 .globl	bn_gather5
@@ -3653,6 +3674,7 @@ bn_scatter5:
 .align	32
 bn_gather5:
 .LSEH_begin_bn_gather5:			# Win64 thing, but harmless in other cases
+.cfi_startproc
 	# I can't trust assembler to use specific encoding:-(
 	.byte	0x4c,0x8d,0x14,0x24			#lea    (%rsp),%r10
 	.byte	0x48,0x81,0xec,0x08,0x01,0x00,0x00	#sub	$0x108,%rsp
@@ -3737,6 +3759,7 @@ $code.=<<___;
 	lea	(%r10),%rsp
 	ret
 .LSEH_end_bn_gather5:
+.cfi_endproc
 .size	bn_gather5,.-bn_gather5
 ___
 }