From 9c4fe782607d8542c5f55ef1b5c687fef1da5d75 Mon Sep 17 00:00:00 2001 From: Andy Polyakov Date: Tue, 20 Sep 2005 07:56:03 +0000 Subject: [PATCH] MD5 x86_64 assembler update. Submitted by: Marc Bevand and Charles Liu --- crypto/md5/asm/md5-x86_64.pl | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/crypto/md5/asm/md5-x86_64.pl b/crypto/md5/asm/md5-x86_64.pl index c36a7febf7..75b4085a7b 100755 --- a/crypto/md5/asm/md5-x86_64.pl +++ b/crypto/md5/asm/md5-x86_64.pl @@ -15,7 +15,7 @@ my $code; # dst = x + ((dst + F(x,y,z) + X[k] + T_i) <<< s) # %r10d = X[k_next] # %r11d = z' (copy of z for the next step) -# Each round1_step() takes about 5.71 clocks (9 instructions, 1.58 IPC) +# Each round1_step() takes about 5.3 clocks (9 instructions, 1.7 IPC) sub round1_step { my ($pos, $dst, $x, $y, $z, $k_next, $T_i, $s) = @_; @@ -37,22 +37,26 @@ EOF # round2_step() does: # dst = x + ((dst + G(x,y,z) + X[k] + T_i) <<< s) # %r10d = X[k_next] -# %r11d = y' (copy of y for the next step) -# Each round2_step() takes about 6.22 clocks (9 instructions, 1.45 IPC) +# %r11d = z' (copy of z for the next step) +# %r12d = z' (copy of z for the next step) +# Each round2_step() takes about 5.4 clocks (11 instructions, 2.0 IPC) sub round2_step { my ($pos, $dst, $x, $y, $z, $k_next, $T_i, $s) = @_; $code .= " mov 1*4(%rsi), %r10d /* (NEXT STEP) X[1] */\n" if ($pos == -1); - $code .= " mov %ecx, %r11d /* (NEXT STEP) y' = %ecx */\n" if ($pos == -1); + $code .= " mov %edx, %r11d /* (NEXT STEP) z' = %edx */\n" if ($pos == -1); + $code .= " mov %edx, %r12d /* (NEXT STEP) z' = %edx */\n" if ($pos == -1); $code .= <