-#!/usr/bin/env perl
+#! /usr/bin/env perl
+# Copyright 2013-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the Apache License 2.0 (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
#
# ====================================================================
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
# Applications using the EVP interface will observe a few percent
# worse performance.]
#
+# Knights Landing processes 1 byte in 1.25 cycles (measured with EVP).
+#
# [1] http://rt.openssl.org/Ticket/Display.html?id=2900&user=guest&pass=guest
# [2] http://www.intel.com/content/dam/www/public/us/en/documents/software-support/enabling-high-performance-gcm.pdf
-$flavour = shift;
-$output = shift;
-if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
+# $output is the last argument if it looks like a file (it has an extension)
+# $flavour is the first argument if it doesn't look like a file
+$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
+$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1`
=~ /GNU assembler version ([2-9]\.[0-9]+)/) {
- $avx = ($1>=2.19) + ($1>=2.22);
+ $avx = ($1>=2.20) + ($1>=2.22);
}
if (!$avx && $win64 && ($flavour =~ /nasm/ || $ENV{ASM} =~ /nasm/) &&
$avx = ($1>=10) + ($1>=11);
}
-if (!$avx && `$ENV{CC} -v 2>&1` =~ /((?:^clang|LLVM) version|based on LLVM) ([3-9]\.[0-9]+)/) {
+if (!$avx && `$ENV{CC} -v 2>&1` =~ /((?:^clang|LLVM) version|.*based on LLVM) ([0-9]+\.[0-9]+)/) {
$avx = ($2>=3.0) + ($2>3.0);
}
-open OUT,"| \"$^X\" $xlate $flavour $output";
+open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\""
+ or die "can't call $xlate: $!";
*STDOUT=*OUT;
if ($avx>1) {{{
.type _aesni_ctr32_ghash_6x,\@abi-omnipotent
.align 32
_aesni_ctr32_ghash_6x:
+.cfi_startproc
vmovdqu 0x20($const),$T2 # borrow $T2, .Lone_msb
sub \$6,$len
vpxor $Z0,$Z0,$Z0 # $Z0 = 0
vpxor $Z0,$Xi,$Xi # modulo-scheduled
ret
+.cfi_endproc
.size _aesni_ctr32_ghash_6x,.-_aesni_ctr32_ghash_6x
___
######################################################################
.type aesni_gcm_decrypt,\@function,6
.align 32
aesni_gcm_decrypt:
+.cfi_startproc
xor $ret,$ret
cmp \$0x60,$len # minimal accepted length
jb .Lgcm_dec_abort
lea (%rsp),%rax # save stack pointer
+.cfi_def_cfa_register %rax
push %rbx
+.cfi_push %rbx
push %rbp
+.cfi_push %rbp
push %r12
+.cfi_push %r12
push %r13
+.cfi_push %r13
push %r14
+.cfi_push %r14
push %r15
+.cfi_push %r15
___
$code.=<<___ if ($win64);
lea -0xa8(%rsp),%rsp
___
$code.=<<___ if ($win64);
movaps -0xd8(%rax),%xmm6
- movaps -0xd8(%rax),%xmm7
+ movaps -0xc8(%rax),%xmm7
movaps -0xb8(%rax),%xmm8
movaps -0xa8(%rax),%xmm9
movaps -0x98(%rax),%xmm10
___
$code.=<<___;
mov -48(%rax),%r15
+.cfi_restore %r15
mov -40(%rax),%r14
+.cfi_restore %r14
mov -32(%rax),%r13
+.cfi_restore %r13
mov -24(%rax),%r12
+.cfi_restore %r12
mov -16(%rax),%rbp
+.cfi_restore %rbp
mov -8(%rax),%rbx
+.cfi_restore %rbx
lea (%rax),%rsp # restore %rsp
+.cfi_def_cfa_register %rsp
.Lgcm_dec_abort:
mov $ret,%rax # return value
ret
+.cfi_endproc
.size aesni_gcm_decrypt,.-aesni_gcm_decrypt
___
.type _aesni_ctr32_6x,\@abi-omnipotent
.align 32
_aesni_ctr32_6x:
+.cfi_startproc
vmovdqu 0x00-0x80($key),$Z0 # borrow $Z0 for $rndkey
vmovdqu 0x20($const),$T2 # borrow $T2, .Lone_msb
lea -1($rounds),%r13
vpshufb $Ii,$T1,$T1 # next counter value
vpxor $Z0,$inout5,$inout5
jmp .Loop_ctr32
+.cfi_endproc
.size _aesni_ctr32_6x,.-_aesni_ctr32_6x
.globl aesni_gcm_encrypt
.type aesni_gcm_encrypt,\@function,6
.align 32
aesni_gcm_encrypt:
+.cfi_startproc
xor $ret,$ret
cmp \$0x60*3,$len # minimal accepted length
jb .Lgcm_enc_abort
lea (%rsp),%rax # save stack pointer
+.cfi_def_cfa_register %rax
push %rbx
+.cfi_push %rbx
push %rbp
+.cfi_push %rbp
push %r12
+.cfi_push %r12
push %r13
+.cfi_push %r13
push %r14
+.cfi_push %r14
push %r15
+.cfi_push %r15
___
$code.=<<___ if ($win64);
lea -0xa8(%rsp),%rsp
___
$code.=<<___;
mov -48(%rax),%r15
+.cfi_restore %r15
mov -40(%rax),%r14
+.cfi_restore %r14
mov -32(%rax),%r13
+.cfi_restore %r13
mov -24(%rax),%r12
+.cfi_restore %r12
mov -16(%rax),%rbp
+.cfi_restore %rbp
mov -8(%rax),%rbx
+.cfi_restore %rbx
lea (%rax),%rsp # restore %rsp
+.cfi_def_cfa_register %rsp
.Lgcm_enc_abort:
mov $ret,%rax # return value
ret
+.cfi_endproc
.size aesni_gcm_encrypt,.-aesni_gcm_encrypt
___
.globl aesni_gcm_encrypt
.type aesni_gcm_encrypt,\@abi-omnipotent
aesni_gcm_encrypt:
+.cfi_startproc
xor %eax,%eax
ret
+.cfi_endproc
.size aesni_gcm_encrypt,.-aesni_gcm_encrypt
.globl aesni_gcm_decrypt
.type aesni_gcm_decrypt,\@abi-omnipotent
aesni_gcm_decrypt:
+.cfi_startproc
xor %eax,%eax
ret
+.cfi_endproc
.size aesni_gcm_decrypt,.-aesni_gcm_decrypt
___
}}}
print $code;
-close STDOUT;
+close STDOUT or die "error closing STDOUT";