From 1fb039fde223891ce375688abbc45a7dea63523a Mon Sep 17 00:00:00 2001 From: Andy Polyakov Date: Sun, 15 Jul 2012 13:37:35 +0000 Subject: [PATCH] wp-x86_64.pl: ~10% performance improvement. (cherry picked from commit 701d593f7095db84459c76265349a83d30a4cae5) --- crypto/whrlpool/asm/wp-x86_64.pl | 43 +++++++++++++++++--------------- 1 file changed, 23 insertions(+), 20 deletions(-) diff --git a/crypto/whrlpool/asm/wp-x86_64.pl b/crypto/whrlpool/asm/wp-x86_64.pl index 24b2ff60c3..5a3bdbcf20 100644 --- a/crypto/whrlpool/asm/wp-x86_64.pl +++ b/crypto/whrlpool/asm/wp-x86_64.pl @@ -91,41 +91,44 @@ for($i=0;$i<8;$i++) { $code.="mov @mm[$i],64+$i*8(%rsp)\n"; } # S=L $code.=<<___; xor %rsi,%rsi mov %rsi,24(%rbx) # zero round counter + jmp .Lround .align 16 .Lround: mov 4096(%rbp,%rsi,8),@mm[0] # rc[r] mov 0(%rsp),%eax mov 4(%rsp),%ebx + movz %al,%ecx + movz %ah,%edx ___ for($i=0;$i<8;$i++) { my $func = ($i==0)? "mov" : "xor"; $code.=<<___; - mov %al,%cl - mov %ah,%dl + shr \$16,%eax lea (%rcx,%rcx),%rsi + movz %al,%ecx lea (%rdx,%rdx),%rdi - shr \$16,%eax + movz %ah,%edx xor 0(%rbp,%rsi,8),@mm[0] $func 7(%rbp,%rdi,8),@mm[1] - mov %al,%cl - mov %ah,%dl mov $i*8+8(%rsp),%eax # ($i+1)*8 lea (%rcx,%rcx),%rsi + movz %bl,%ecx lea (%rdx,%rdx),%rdi + movz %bh,%edx $func 6(%rbp,%rsi,8),@mm[2] $func 5(%rbp,%rdi,8),@mm[3] - mov %bl,%cl - mov %bh,%dl + shr \$16,%ebx lea (%rcx,%rcx),%rsi + movz %bl,%ecx lea (%rdx,%rdx),%rdi - shr \$16,%ebx + movz %bh,%edx $func 4(%rbp,%rsi,8),@mm[4] $func 3(%rbp,%rdi,8),@mm[5] - mov %bl,%cl - mov %bh,%dl mov $i*8+8+4(%rsp),%ebx # ($i+1)*8+4 lea (%rcx,%rcx),%rsi + movz %al,%ecx lea (%rdx,%rdx),%rdi + movz %ah,%edx $func 2(%rbp,%rsi,8),@mm[6] $func 1(%rbp,%rdi,8),@mm[7] ___ @@ -134,32 +137,32 @@ ___ for($i=0;$i<8;$i++) { $code.="mov @mm[$i],$i*8(%rsp)\n"; } # K=L for($i=0;$i<8;$i++) { $code.=<<___; - mov %al,%cl - mov %ah,%dl + shr \$16,%eax lea (%rcx,%rcx),%rsi + movz %al,%ecx lea (%rdx,%rdx),%rdi - shr \$16,%eax + movz %ah,%edx xor 0(%rbp,%rsi,8),@mm[0] xor 7(%rbp,%rdi,8),@mm[1] - mov %al,%cl - mov %ah,%dl `"mov 64+$i*8+8(%rsp),%eax" if($i<7);` # 64+($i+1)*8 lea (%rcx,%rcx),%rsi + movz %bl,%ecx lea (%rdx,%rdx),%rdi + movz %bh,%edx xor 6(%rbp,%rsi,8),@mm[2] xor 5(%rbp,%rdi,8),@mm[3] - mov %bl,%cl - mov %bh,%dl + shr \$16,%ebx lea (%rcx,%rcx),%rsi + movz %bl,%ecx lea (%rdx,%rdx),%rdi - shr \$16,%ebx + movz %bh,%edx xor 4(%rbp,%rsi,8),@mm[4] xor 3(%rbp,%rdi,8),@mm[5] - mov %bl,%cl - mov %bh,%dl `"mov 64+$i*8+8+4(%rsp),%ebx" if($i<7);` # 64+($i+1)*8+4 lea (%rcx,%rcx),%rsi + movz %al,%ecx lea (%rdx,%rdx),%rdi + movz %ah,%edx xor 2(%rbp,%rsi,8),@mm[6] xor 1(%rbp,%rdi,8),@mm[7] ___ -- 2.25.1