From 46ac489a1369f6d938adda356accab83acf2987a Mon Sep 17 00:00:00 2001 From: Bernd Edlinger Date: Wed, 4 Dec 2019 12:57:41 +0100 Subject: [PATCH] Improve the overflow handling in rsaz_512_sqr We have always a carry in %rcx or %rbx in range 0..2 from the previous stage, that is added to the result of the 64-bit square, but the low nibble of any square can only be 0, 1, 4, 9. Therefore one "adcq $0, %rdx" can be removed. Likewise in the ADX code we can remove one "adcx %rbp, $out" since %rbp is always 0, and carry is also zero, therefore that is a no-op. Reviewed-by: Paul Dale (Merged from https://github.com/openssl/openssl/pull/10575) --- crypto/bn/asm/rsaz-x86_64.pl | 42 ++++++++++++++++++------------------ 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/crypto/bn/asm/rsaz-x86_64.pl b/crypto/bn/asm/rsaz-x86_64.pl index 7534d5cd03..f4d9c9b129 100755 --- a/crypto/bn/asm/rsaz-x86_64.pl +++ b/crypto/bn/asm/rsaz-x86_64.pl @@ -242,9 +242,9 @@ $code.=<<___; adcq \$0, %rbx mulq %rax + # rcx <= 2 and rax <= 0xFFFF..F9, so carry must be zero here addq %rcx, %rax movq 16($inp), %rbp - adcq \$0, %rdx addq %rax, %r9 movq 24($inp), %rax adcq %rdx, %r10 @@ -298,9 +298,9 @@ $code.=<<___; adcq \$0, %rcx mulq %rax + # rbx <= 2 and rax <= 0xFFFF..F9, so carry must be zero here addq %rbx, %rax movq 24($inp), %r10 - adcq \$0, %rdx addq %rax, %r11 movq 32($inp), %rax adcq %rdx, %r12 @@ -349,8 +349,8 @@ $code.=<<___; adcq \$0, %rbx mulq %rax + # rcx <= 2 and rax <= 0xFFFF..F9, so carry must be zero here addq %rcx, %rax - adcq \$0, %rdx addq %rax, %r13 movq %r12, %rax # 40($inp) adcq %rdx, %r14 @@ -389,8 +389,8 @@ $code.=<<___; adcq \$0, %rcx mulq %rax + # rbx <= 2 and rax <= 0xFFFF..F9, so carry must be zero here addq %rbx, %rax - adcq \$0, %rdx addq %rax, %r15 movq %rbp, %rax # 48($inp) adcq %rdx, %r8 @@ -420,8 +420,8 @@ $code.=<<___; adcq \$0, %rbx mulq %rax + # rcx <= 2 and rax <= 0xFFFF..F9, so carry must be zero here addq %rcx, %rax - adcq \$0, %rdx addq %rax, %r9 movq %r14, %rax # 56($inp) adcq %rdx, %r10 @@ -443,8 +443,8 @@ $code.=<<___; adcq \$0, %rcx mulq %rax + # rbx <= 2 and rax <= 0xFFFF..F9, so carry must be zero here addq %rbx, %rax - adcq \$0, %rdx addq %rax, %r11 movq %r14, %rax # 56($inp) adcq %rdx, %r12 @@ -459,8 +459,8 @@ $code.=<<___; adcq \$0, %rbx mulq %rax + # rcx <= 2 and rax <= 0xFFFF..F9, so carry must be zero here addq %rcx, %rax - adcq \$0, %rdx addq %r13, %rax adcq %rbx, %rdx @@ -572,9 +572,9 @@ $code.=<<___; .byte 0x48,0x8b,0x96,0x10,0x00,0x00,0x00 # mov 16($inp), %rdx xor %rbx, %rbx + adox %r9, %r9 + # rcx <= 2 and rax <= 0xFFFF..F9, so carry must be zero here adcx %rcx, %rax - adox %r9, %r9 - adcx %rbp, $out adox %r10, %r10 adcx %rax, %r9 adox %rbp, %rbx @@ -609,9 +609,9 @@ $code.=<<___; mov 24($inp), %rdx xor %rcx, %rcx + adox %r11, %r11 + # rbx <= 2 and rax <= 0xFFFF..F9, so carry must be zero here adcx %rbx, %rax - adox %r11, %r11 - adcx %rbp, $out adox %r12, %r12 adcx %rax, %r11 adox %rbp, %rcx @@ -642,9 +642,9 @@ $code.=<<___; mov 32($inp), %rdx xor %rbx, %rbx + adox %r13, %r13 + # rcx <= 2 and rax <= 0xFFFF..F9, so carry must be zero here adcx %rcx, %rax - adox %r13, %r13 - adcx %rbp, $out adox %r14, %r14 adcx %rax, %r13 adox %rbp, %rbx @@ -671,9 +671,9 @@ $code.=<<___; adox %rbp, %r11 xor %rcx, %rcx + adox %r15, %r15 + # rbx <= 2 and rax <= 0xFFFF..F9, so carry must be zero here adcx %rbx, %rax - adox %r15, %r15 - adcx %rbp, $out adox %r8, %r8 adcx %rax, %r15 adox %rbp, %rcx @@ -696,9 +696,9 @@ $code.=<<___; mov 48($inp), %rdx xor %rbx, %rbx + adox %r9, %r9 + # rcx <= 2 and rax <= 0xFFFF..F9, so carry must be zero here adcx %rcx, %rax - adox %r9, %r9 - adcx %rbp, $out adox %r10, %r10 adcx %rax, %r9 adcx $out, %r10 @@ -716,9 +716,9 @@ $code.=<<___; mulx %rdx, %rax, $out xor %rcx, %rcx mov 56($inp), %rdx + adox %r11, %r11 + # rbx <= 2 and rax <= 0xFFFF..F9, so carry must be zero here adcx %rbx, %rax - adox %r11, %r11 - adcx %rbp, $out adox %r12, %r12 adcx %rax, %r11 adox %rbp, %rcx @@ -731,9 +731,9 @@ $code.=<<___; #eighth iteration mulx %rdx, %rax, %rdx xor %rbx, %rbx + adox %r13, %r13 + # rcx <= 2 and rax <= 0xFFFF..F9, so carry must be zero here adcx %rcx, %rax - adox %r13, %r13 - adcx %rbp, %rdx adox %rbp, %rbx adcx %r13, %rax adcx %rdx, %rbx -- 2.25.1