From: Andy Polyakov Date: Sat, 3 Jun 2017 19:08:57 +0000 (+0200) Subject: ec/asm/ecp_nistz256-x86_64.pl: minor sqr_montx cleanup. X-Git-Tag: OpenSSL_1_1_1-pre1~1394 X-Git-Url: https://git.librecmc.org/?a=commitdiff_plain;h=8fc063dcc9668589fd95533d25932396d60987f9;p=oweals%2Fopenssl.git ec/asm/ecp_nistz256-x86_64.pl: minor sqr_montx cleanup. Drop some redundant instructions in reduction in ecp_nistz256_sqr_montx. Reviewed-by: Rich Salz --- diff --git a/crypto/ec/asm/ecp_nistz256-x86_64.pl b/crypto/ec/asm/ecp_nistz256-x86_64.pl index 99bbb0b670..2faa8d7265 100755 --- a/crypto/ec/asm/ecp_nistz256-x86_64.pl +++ b/crypto/ec/asm/ecp_nistz256-x86_64.pl @@ -1281,19 +1281,18 @@ __ecp_nistz256_sqr_montx: adox $t1, $acc5 .byte 0x67,0x67 mulx %rdx, $t0, $t4 - mov $acc0, %rdx + mov .Lpoly+8*3(%rip), %rdx adox $t0, $acc6 shlx $a_ptr, $acc0, $t0 adox $t4, $acc7 shrx $a_ptr, $acc0, $t4 - mov .Lpoly+8*3(%rip), $t1 + mov %rdx,$t1 # reduction step 1 add $t0, $acc1 adc $t4, $acc2 - mulx $t1, $t0, $acc0 - mov $acc1, %rdx + mulx $acc0, $t0, $acc0 adc $t0, $acc3 shlx $a_ptr, $acc1, $t0 adc \$0, $acc0 @@ -1303,8 +1302,7 @@ __ecp_nistz256_sqr_montx: add $t0, $acc2 adc $t4, $acc3 - mulx $t1, $t0, $acc1 - mov $acc2, %rdx + mulx $acc1, $t0, $acc1 adc $t0, $acc0 shlx $a_ptr, $acc2, $t0 adc \$0, $acc1 @@ -1314,8 +1312,7 @@ __ecp_nistz256_sqr_montx: add $t0, $acc3 adc $t4, $acc0 - mulx $t1, $t0, $acc2 - mov $acc3, %rdx + mulx $acc2, $t0, $acc2 adc $t0, $acc1 shlx $a_ptr, $acc3, $t0 adc \$0, $acc2 @@ -1325,12 +1322,12 @@ __ecp_nistz256_sqr_montx: add $t0, $acc0 adc $t4, $acc1 - mulx $t1, $t0, $acc3 + mulx $acc3, $t0, $acc3 adc $t0, $acc2 adc \$0, $acc3 - xor $t3, $t3 # cf=0 - adc $acc0, $acc4 # accumulate upper half + xor $t3, $t3 + add $acc0, $acc4 # accumulate upper half mov .Lpoly+8*1(%rip), $a_ptr adc $acc1, $acc5 mov $acc4, $acc0 @@ -1339,8 +1336,7 @@ __ecp_nistz256_sqr_montx: mov $acc5, $acc1 adc \$0, $t3 - xor %eax, %eax # cf=0 - sbb \$-1, $acc4 # .Lpoly[0] + sub \$-1, $acc4 # .Lpoly[0] mov $acc6, $acc2 sbb $a_ptr, $acc5 # .Lpoly[1] sbb \$0, $acc6 # .Lpoly[2]