ec/asm/ecp_nistz256-x86_64.pl: minor sqr_montx cleanup.

author Andy Polyakov <appro@openssl.org>

Sat, 3 Jun 2017 19:08:57 +0000 (21:08 +0200)

committer Andy Polyakov <appro@openssl.org>

Mon, 5 Jun 2017 12:25:16 +0000 (14:25 +0200)
author Andy Polyakov <appro@openssl.org>
Sat, 3 Jun 2017 19:08:57 +0000 (21:08 +0200)
committer Andy Polyakov <appro@openssl.org>
Mon, 5 Jun 2017 12:25:16 +0000 (14:25 +0200)
diff --git a/crypto/ec/asm/ecp_nistz256-x86_64.pl b/crypto/ec/asm/ecp_nistz256-x86_64.pl

index 99bbb0b67005d0e049a4be025aa086becb6aab7f..2faa8d7265065c0a16e69c2a3382dc90464c24c7 100755 (executable)
--- a/crypto/ec/asm/ecp_nistz256-x86_64.pl
+++ b/crypto/ec/asm/ecp_nistz256-x86_64.pl
@@ -1281,19 +1281,18 @@ __ecp_nistz256_sqr_montx:
         adox    $t1, $acc5
         .byte   0x67,0x67
         mulx    %rdx, $t0, $t4
-        mov    $acc0, %rdx
+        mov    .Lpoly+8*3(%rip), %rdx
         adox    $t0, $acc6
          shlx   $a_ptr, $acc0, $t0
         adox    $t4, $acc7
          shrx   $a_ptr, $acc0, $t4
-        mov    .Lpoly+8*3(%rip), $t1
+       mov     %rdx,$t1
  
         # reduction step 1
         add     $t0, $acc1
         adc     $t4, $acc2
  
-       mulx    $t1, $t0, $acc0
-        mov    $acc1, %rdx
+       mulx    $acc0, $t0, $acc0
         adc     $t0, $acc3
          shlx   $a_ptr, $acc1, $t0
         adc     \$0, $acc0
@@ -1303,8 +1302,7 @@ __ecp_nistz256_sqr_montx:
         add     $t0, $acc2
         adc     $t4, $acc3
  
-       mulx    $t1, $t0, $acc1
-        mov    $acc2, %rdx
+       mulx    $acc1, $t0, $acc1
         adc     $t0, $acc0
          shlx   $a_ptr, $acc2, $t0
         adc     \$0, $acc1
@@ -1314,8 +1312,7 @@ __ecp_nistz256_sqr_montx:
         add     $t0, $acc3
         adc     $t4, $acc0
  
-       mulx    $t1, $t0, $acc2
-        mov    $acc3, %rdx
+       mulx    $acc2, $t0, $acc2
         adc     $t0, $acc1
          shlx   $a_ptr, $acc3, $t0
         adc     \$0, $acc2
@@ -1325,12 +1322,12 @@ __ecp_nistz256_sqr_montx:
         add     $t0, $acc0
         adc     $t4, $acc1
  
-       mulx    $t1, $t0, $acc3
+       mulx    $acc3, $t0, $acc3
         adc     $t0, $acc2
         adc     \$0, $acc3
  
-       xor     $t3, $t3                # cf=0
-       adc     $acc0, $acc4            # accumulate upper half
+       xor     $t3, $t3
+       add     $acc0, $acc4            # accumulate upper half
          mov    .Lpoly+8*1(%rip), $a_ptr
         adc     $acc1, $acc5
          mov    $acc4, $acc0
@@ -1339,8 +1336,7 @@ __ecp_nistz256_sqr_montx:
          mov    $acc5, $acc1
         adc     \$0, $t3
  
-       xor     %eax, %eax              # cf=0
-       sbb     \$-1, $acc4             # .Lpoly[0]
+       sub     \$-1, $acc4             # .Lpoly[0]
          mov    $acc6, $acc2
         sbb     $a_ptr, $acc5           # .Lpoly[1]
         sbb     \$0, $acc6              # .Lpoly[2]
author	Andy Polyakov <appro@openssl.org>
	Sat, 3 Jun 2017 19:08:57 +0000 (21:08 +0200)
committer	Andy Polyakov <appro@openssl.org>
	Mon, 5 Jun 2017 12:25:16 +0000 (14:25 +0200)