From: Andy Polyakov Date: Sat, 30 Dec 2017 14:08:31 +0000 (+0100) Subject: ec/ecp_nistz256.c: improve ECDSA sign by 30-40%. X-Git-Tag: OpenSSL_1_1_1-pre1~229 X-Git-Url: https://git.librecmc.org/?a=commitdiff_plain;h=eb7916960bf50f436593abe3d5f2e0592d291017;p=oweals%2Fopenssl.git ec/ecp_nistz256.c: improve ECDSA sign by 30-40%. This is based on RT#3810, which added dedicated modular inversion. ECDSA verify results improves as well, but not as much. Reviewed-by: Rich Salz (Merged from https://github.com/openssl/openssl/pull/5001) --- diff --git a/crypto/ec/asm/ecp_nistz256-x86_64.pl b/crypto/ec/asm/ecp_nistz256-x86_64.pl index 48d64645f0..85c4131983 100755 --- a/crypto/ec/asm/ecp_nistz256-x86_64.pl +++ b/crypto/ec/asm/ecp_nistz256-x86_64.pl @@ -1,15 +1,17 @@ #! /usr/bin/env perl # Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved. # Copyright (c) 2014, Intel Corporation. All Rights Reserved. +# Copyright (c) 2015 CloudFlare, Inc. # # Licensed under the OpenSSL license (the "License"). You may not use # this file except in compliance with the License. You can obtain a copy # in the file LICENSE in the source distribution or at # https://www.openssl.org/source/license.html # -# Originally written by Shay Gueron (1, 2), and Vlad Krasnov (1) +# Originally written by Shay Gueron (1, 2), and Vlad Krasnov (1, 3) # (1) Intel Corporation, Israel Development Center, Haifa, Israel # (2) University of Haifa, Israel +# (3) CloudFlare, Inc. # # Reference: # S.Gueron and V.Krasnov, "Fast Prime Field Elliptic Curve Cryptography with @@ -18,23 +20,25 @@ # Further optimization by : # # this/original with/without -DECP_NISTZ256_ASM(*) -# Opteron +12-49% +110-150% -# Bulldozer +14-45% +175-210% -# P4 +18-46% n/a :-( -# Westmere +12-34% +80-87% -# Sandy Bridge +9-35% +110-120% -# Ivy Bridge +9-35% +110-125% -# Haswell +8-37% +140-160% -# Broadwell +18-58% +145-210% -# Atom +15-50% +130-180% -# VIA Nano +43-160% +300-480% +# Opteron +15-49% +150-195% +# Bulldozer +18-45% +175-240% +# P4 +24-46% +100-150% +# Westmere +18-34% +87-160% +# Sandy Bridge +14-35% +120-185% +# Ivy Bridge +11-35% +125-180% +# Haswell +10-37% +160-200% +# Broadwell +24-58% +210-270% +# Atom +20-50% +180-240% +# VIA Nano +50-160% +480-480% # # (*) "without -DECP_NISTZ256_ASM" refers to build with # "enable-ec_nistp_64_gcc_128"; # # Ranges denote minimum and maximum improvement coefficients depending -# on benchmark. Lower coefficients are for ECDSA sign, relatively fastest -# server-side operation. Keep in mind that +100% means 2x improvement. +# on benchmark. In "this/original" column lower coefficient is for +# ECDSA sign, while in "with/without" - for ECDH key agreement, and +# higher - for ECDSA sign, relatively fastest server-side operation. +# Keep in mind that +100% means 2x improvement. $flavour = shift; $output = shift; @@ -95,6 +99,12 @@ $code.=<<___; .long 3,3,3,3,3,3,3,3 .LONE_mont: .quad 0x0000000000000001, 0xffffffff00000000, 0xffffffffffffffff, 0x00000000fffffffe + +# Constants for computations modulo ord(p256) +.Lord: +.quad 0xf3b9cac2fc632551, 0xbce6faada7179e84, 0xffffffffffffffff, 0xffffffff00000000 +.LordK: +.quad 0xccd1c8aaee00bc4f ___ { @@ -481,6 +491,1014 @@ my ($acc0,$acc1,$acc2,$acc3,$acc4,$acc5,$acc6,$acc7)=map("%r$_",(8..15)); my ($t0,$t1,$t2,$t3,$t4)=("%rcx","%rbp","%rbx","%rdx","%rax"); my ($poly1,$poly3)=($acc6,$acc7); +$code.=<<___; +################################################################################ +# void ecp_nistz256_ord_mul_mont( +# uint64_t res[4], +# uint64_t a[4], +# uint64_t b[4]); + +.globl ecp_nistz256_ord_mul_mont +.type ecp_nistz256_ord_mul_mont,\@function,3 +.align 32 +ecp_nistz256_ord_mul_mont: +___ +$code.=<<___ if ($addx); + mov \$0x80100, %ecx + and OPENSSL_ia32cap_P+8(%rip), %ecx + cmp \$0x80100, %ecx + je .Lecp_nistz256_ord_mul_montx +___ +$code.=<<___; + push %rbp + push %rbx + push %r12 + push %r13 + push %r14 + push %r15 + + mov 8*0($b_org), %rax + mov $b_org, $b_ptr + lea .Lord(%rip), %r14 + mov .LordK(%rip), %r15 + + ################################# * b[0] + mov %rax, $t0 + mulq 8*0($a_ptr) + mov %rax, $acc0 + mov $t0, %rax + mov %rdx, $acc1 + + mulq 8*1($a_ptr) + add %rax, $acc1 + mov $t0, %rax + adc \$0, %rdx + mov %rdx, $acc2 + + mulq 8*2($a_ptr) + add %rax, $acc2 + mov $t0, %rax + adc \$0, %rdx + + mov $acc0, $acc5 + imulq %r15,$acc0 + + mov %rdx, $acc3 + mulq 8*3($a_ptr) + add %rax, $acc3 + mov $acc0, %rax + adc \$0, %rdx + mov %rdx, $acc4 + + ################################# First reduction step + mulq 8*0(%r14) + mov $acc0, $t1 + add %rax, $acc5 # guaranteed to be zero + mov $acc0, %rax + adc \$0, %rdx + mov %rdx, $t0 + + sub $acc0, $acc2 + sbb \$0, $acc0 # can't borrow + + mulq 8*1(%r14) + add $t0, $acc1 + adc \$0, %rdx + add %rax, $acc1 + mov $t1, %rax + adc %rdx, $acc2 + mov $t1, %rdx + adc \$0, $acc0 # can't overflow + + shl \$32, %rax + shr \$32, %rdx + sub %rax, $acc3 + mov 8*1($b_ptr), %rax + sbb %rdx, $t1 # can't borrow + + add $acc0, $acc3 + adc $t1, $acc4 + adc \$0, $acc5 + + ################################# * b[1] + mov %rax, $t0 + mulq 8*0($a_ptr) + add %rax, $acc1 + mov $t0, %rax + adc \$0, %rdx + mov %rdx, $t1 + + mulq 8*1($a_ptr) + add $t1, $acc2 + adc \$0, %rdx + add %rax, $acc2 + mov $t0, %rax + adc \$0, %rdx + mov %rdx, $t1 + + mulq 8*2($a_ptr) + add $t1, $acc3 + adc \$0, %rdx + add %rax, $acc3 + mov $t0, %rax + adc \$0, %rdx + + mov $acc1, $t0 + imulq %r15, $acc1 + + mov %rdx, $t1 + mulq 8*3($a_ptr) + add $t1, $acc4 + adc \$0, %rdx + xor $acc0, $acc0 + add %rax, $acc4 + mov $acc1, %rax + adc %rdx, $acc5 + adc \$0, $acc0 + + ################################# Second reduction step + mulq 8*0(%r14) + mov $acc1, $t1 + add %rax, $t0 # guaranteed to be zero + mov $acc1, %rax + adc %rdx, $t0 + + sub $acc1, $acc3 + sbb \$0, $acc1 # can't borrow + + mulq 8*1(%r14) + add $t0, $acc2 + adc \$0, %rdx + add %rax, $acc2 + mov $t1, %rax + adc %rdx, $acc3 + mov $t1, %rdx + adc \$0, $acc1 # can't overflow + + shl \$32, %rax + shr \$32, %rdx + sub %rax, $acc4 + mov 8*2($b_ptr), %rax + sbb %rdx, $t1 # can't borrow + + add $acc1, $acc4 + adc $t1, $acc5 + adc \$0, $acc0 + + ################################## * b[2] + mov %rax, $t0 + mulq 8*0($a_ptr) + add %rax, $acc2 + mov $t0, %rax + adc \$0, %rdx + mov %rdx, $t1 + + mulq 8*1($a_ptr) + add $t1, $acc3 + adc \$0, %rdx + add %rax, $acc3 + mov $t0, %rax + adc \$0, %rdx + mov %rdx, $t1 + + mulq 8*2($a_ptr) + add $t1, $acc4 + adc \$0, %rdx + add %rax, $acc4 + mov $t0, %rax + adc \$0, %rdx + + mov $acc2, $t0 + imulq %r15, $acc2 + + mov %rdx, $t1 + mulq 8*3($a_ptr) + add $t1, $acc5 + adc \$0, %rdx + xor $acc1, $acc1 + add %rax, $acc5 + mov $acc2, %rax + adc %rdx, $acc0 + adc \$0, $acc1 + + ################################# Third reduction step + mulq 8*0(%r14) + mov $acc2, $t1 + add %rax, $t0 # guaranteed to be zero + mov $acc2, %rax + adc %rdx, $t0 + + sub $acc2, $acc4 + sbb \$0, $acc2 # can't borrow + + mulq 8*1(%r14) + add $t0, $acc3 + adc \$0, %rdx + add %rax, $acc3 + mov $t1, %rax + adc %rdx, $acc4 + mov $t1, %rdx + adc \$0, $acc2 # can't overflow + + shl \$32, %rax + shr \$32, %rdx + sub %rax, $acc5 + mov 8*3($b_ptr), %rax + sbb %rdx, $t1 # can't borrow + + add $acc2, $acc5 + adc $t1, $acc0 + adc \$0, $acc1 + + ################################# * b[3] + mov %rax, $t0 + mulq 8*0($a_ptr) + add %rax, $acc3 + mov $t0, %rax + adc \$0, %rdx + mov %rdx, $t1 + + mulq 8*1($a_ptr) + add $t1, $acc4 + adc \$0, %rdx + add %rax, $acc4 + mov $t0, %rax + adc \$0, %rdx + mov %rdx, $t1 + + mulq 8*2($a_ptr) + add $t1, $acc5 + adc \$0, %rdx + add %rax, $acc5 + mov $t0, %rax + adc \$0, %rdx + + mov $acc3, $t0 + imulq %r15, $acc3 + + mov %rdx, $t1 + mulq 8*3($a_ptr) + add $t1, $acc0 + adc \$0, %rdx + xor $acc2, $acc2 + add %rax, $acc0 + mov $acc3, %rax + adc %rdx, $acc1 + adc \$0, $acc2 + + ################################# Last reduction step + mulq 8*0(%r14) + mov $acc3, $t1 + add %rax, $t0 # guaranteed to be zero + mov $acc3, %rax + adc %rdx, $t0 + + sub $acc3, $acc5 + sbb \$0, $acc3 # can't borrow + + mulq 8*1(%r14) + add $t0, $acc4 + adc \$0, %rdx + add %rax, $acc4 + mov $t1, %rax + adc %rdx, $acc5 + mov $t1, %rdx + adc \$0, $acc3 # can't overflow + + shl \$32, %rax + shr \$32, %rdx + sub %rax, $acc0 + sbb %rdx, $t1 # can't borrow + + add $acc3, $acc0 + adc $t1, $acc1 + adc \$0, $acc2 + + ################################# Subtract ord + mov $acc4, $a_ptr + sub 8*0(%r14), $acc4 + mov $acc5, $acc3 + sbb 8*1(%r14), $acc5 + mov $acc0, $t0 + sbb 8*2(%r14), $acc0 + mov $acc1, $t1 + sbb 8*3(%r14), $acc1 + sbb \$0, $acc2 + + cmovc $a_ptr, $acc4 + cmovc $acc3, $acc5 + cmovc $t0, $acc0 + cmovc $t1, $acc1 + + mov $acc4, 8*0($r_ptr) + mov $acc5, 8*1($r_ptr) + mov $acc0, 8*2($r_ptr) + mov $acc1, 8*3($r_ptr) + + pop %r15 + pop %r14 + pop %r13 + pop %r12 + pop %rbx + pop %rbp + ret +.size ecp_nistz256_ord_mul_mont,.-ecp_nistz256_ord_mul_mont + +################################################################################ +# void ecp_nistz256_ord_sqr_mont( +# uint64_t res[4], +# uint64_t a[4], +# int rep); + +.globl ecp_nistz256_ord_sqr_mont +.type ecp_nistz256_ord_sqr_mont,\@function,3 +.align 32 +ecp_nistz256_ord_sqr_mont: +___ +$code.=<<___ if ($addx); + mov \$0x80100, %ecx + and OPENSSL_ia32cap_P+8(%rip), %ecx + cmp \$0x80100, %ecx + je .Lecp_nistz256_ord_sqr_montx +___ +$code.=<<___; + push %rbp + push %rbx + push %r12 + push %r13 + push %r14 + push %r15 + + mov 8*0($a_ptr), $acc0 + mov 8*1($a_ptr), %rax + mov 8*2($a_ptr), $acc6 + mov 8*3($a_ptr), $acc7 + lea .Lord(%rip), $a_ptr # pointer to modulus + mov $b_org, $b_ptr + jmp .Loop_ord_sqr + +.align 32 +.Loop_ord_sqr: + ################################# a[1:] * a[0] + mov %rax, $t1 # put aside a[1] + mul $acc0 # a[1] * a[0] + mov %rax, $acc1 + movq $t1, %xmm1 # offload a[1] + mov $acc6, %rax + mov %rdx, $acc2 + + mul $acc0 # a[2] * a[0] + add %rax, $acc2 + mov $acc7, %rax + movq $acc6, %xmm2 # offload a[2] + adc \$0, %rdx + mov %rdx, $acc3 + + mul $acc0 # a[3] * a[0] + add %rax, $acc3 + mov $acc7, %rax + movq $acc7, %xmm3 # offload a[3] + adc \$0, %rdx + mov %rdx, $acc4 + + ################################# a[3] * a[2] + mul $acc6 # a[3] * a[2] + mov %rax, $acc5 + mov $acc6, %rax + mov %rdx, $acc6 + + ################################# a[2:] * a[1] + mul $t1 # a[2] * a[1] + add %rax, $acc3 + mov $acc7, %rax + adc \$0, %rdx + mov %rdx, $acc7 + + mul $t1 # a[3] * a[1] + add %rax, $acc4 + adc \$0, %rdx + + add $acc7, $acc4 + adc %rdx, $acc5 + adc \$0, $acc6 # can't overflow + + ################################# *2 + xor $acc7, $acc7 + mov $acc0, %rax + add $acc1, $acc1 + adc $acc2, $acc2 + adc $acc3, $acc3 + adc $acc4, $acc4 + adc $acc5, $acc5 + adc $acc6, $acc6 + adc \$0, $acc7 + + ################################# Missing products + mul %rax # a[0] * a[0] + mov %rax, $acc0 + movq %xmm1, %rax + mov %rdx, $t1 + + mul %rax # a[1] * a[1] + add $t1, $acc1 + adc %rax, $acc2 + movq %xmm2, %rax + adc \$0, %rdx + mov %rdx, $t1 + + mul %rax # a[2] * a[2] + add $t1, $acc3 + adc %rax, $acc4 + movq %xmm3, %rax + adc \$0, %rdx + mov %rdx, $t1 + + mov $acc0, $t0 + imulq 8*4($a_ptr), $acc0 # *= .LordK + + mul %rax # a[3] * a[3] + add $t1, $acc5 + adc %rax, $acc6 + mov 8*0($a_ptr), %rax # modulus[0] + adc %rdx, $acc7 # can't overflow + + ################################# First reduction step + mul $acc0 + mov $acc0, $t1 + add %rax, $t0 # guaranteed to be zero + mov 8*1($a_ptr), %rax # modulus[1] + adc %rdx, $t0 + + sub $acc0, $acc2 + sbb \$0, $t1 # can't borrow + + mul $acc0 + add $t0, $acc1 + adc \$0, %rdx + add %rax, $acc1 + mov $acc0, %rax + adc %rdx, $acc2 + mov $acc0, %rdx + adc \$0, $t1 # can't overflow + + mov $acc1, $t0 + imulq 8*4($a_ptr), $acc1 # *= .LordK + + shl \$32, %rax + shr \$32, %rdx + sub %rax, $acc3 + mov 8*0($a_ptr), %rax + sbb %rdx, $acc0 # can't borrow + + add $t1, $acc3 + adc \$0, $acc0 # can't overflow + + ################################# Second reduction step + mul $acc1 + mov $acc1, $t1 + add %rax, $t0 # guaranteed to be zero + mov 8*1($a_ptr), %rax + adc %rdx, $t0 + + sub $acc1, $acc3 + sbb \$0, $t1 # can't borrow + + mul $acc1 + add $t0, $acc2 + adc \$0, %rdx + add %rax, $acc2 + mov $acc1, %rax + adc %rdx, $acc3 + mov $acc1, %rdx + adc \$0, $t1 # can't overflow + + mov $acc2, $t0 + imulq 8*4($a_ptr), $acc2 # *= .LordK + + shl \$32, %rax + shr \$32, %rdx + sub %rax, $acc0 + mov 8*0($a_ptr), %rax + sbb %rdx, $acc1 # can't borrow + + add $t1, $acc0 + adc \$0, $acc1 # can't overflow + + ################################# Third reduction step + mul $acc2 + mov $acc2, $t1 + add %rax, $t0 # guaranteed to be zero + mov 8*1($a_ptr), %rax + adc %rdx, $t0 + + sub $acc2, $acc0 + sbb \$0, $t1 # can't borrow + + mul $acc2 + add $t0, $acc3 + adc \$0, %rdx + add %rax, $acc3 + mov $acc2, %rax + adc %rdx, $acc0 + mov $acc2, %rdx + adc \$0, $t1 # can't overflow + + mov $acc3, $t0 + imulq 8*4($a_ptr), $acc3 # *= .LordK + + shl \$32, %rax + shr \$32, %rdx + sub %rax, $acc1 + mov 8*0($a_ptr), %rax + sbb %rdx, $acc2 # can't borrow + + add $t1, $acc1 + adc \$0, $acc2 # can't overflow + + ################################# Last reduction step + mul $acc3 + mov $acc3, $t1 + add %rax, $t0 # guaranteed to be zero + mov 8*1($a_ptr), %rax + adc %rdx, $t0 + + sub $acc3, $acc1 + sbb \$0, $t1 # can't borrow + + mul $acc3 + add $t0, $acc0 + adc \$0, %rdx + add %rax, $acc0 + mov $acc3, %rax + adc %rdx, $acc1 + mov $acc3, %rdx + adc \$0, $t1 # can't overflow + + shl \$32, %rax + shr \$32, %rdx + sub %rax, $acc2 + sbb %rdx, $acc3 # can't borrow + + add $t1, $acc2 + adc \$0, $acc3 # can't overflow + + ################################# Add bits [511:256] of the sqr result + xor %rdx, %rdx + add $acc4, $acc0 + adc $acc5, $acc1 + mov $acc0, $acc4 + adc $acc6, $acc2 + adc $acc7, $acc3 + mov $acc1, %rax + adc \$0, %rdx + + ################################# Compare to modulus + sub 8*0($a_ptr), $acc0 + mov $acc2, $acc6 + sbb 8*1($a_ptr), $acc1 + sbb 8*2($a_ptr), $acc2 + mov $acc3, $acc7 + sbb 8*3($a_ptr), $acc3 + sbb \$0, %rdx + + cmovc $acc4, $acc0 + cmovnc $acc1, %rax + cmovnc $acc2, $acc6 + cmovnc $acc3, $acc7 + + dec $b_ptr + jnz .Loop_ord_sqr + + mov $acc0, 8*0($r_ptr) + mov %rax, 8*1($r_ptr) + pxor %xmm1, %xmm1 + mov $acc6, 8*2($r_ptr) + pxor %xmm2, %xmm2 + mov $acc7, 8*3($r_ptr) + pxor %xmm3, %xmm3 + + pop %r15 + pop %r14 + pop %r13 + pop %r12 + pop %rbx + pop %rbp + ret +.size ecp_nistz256_ord_sqr_mont,.-ecp_nistz256_ord_sqr_mont +___ + +$code.=<<___ if ($addx); +################################################################################ +.type ecp_nistz256_ord_mul_montx,\@function,3 +.align 32 +ecp_nistz256_ord_mul_montx: +.Lecp_nistz256_ord_mul_montx: + push %rbp + push %rbx + push %r12 + push %r13 + push %r14 + push %r15 + + mov $b_org, $b_ptr + mov 8*0($b_org), %rdx + mov 8*0($a_ptr), $acc1 + mov 8*1($a_ptr), $acc2 + mov 8*2($a_ptr), $acc3 + mov 8*3($a_ptr), $acc4 + lea -128($a_ptr), $a_ptr # control u-op density + lea .Lord-128(%rip), %r14 + mov .LordK(%rip), %r15 + + ################################# Multiply by b[0] + mulx $acc1, $acc0, $acc1 + mulx $acc2, $t0, $acc2 + mulx $acc3, $t1, $acc3 + add $t0, $acc1 + mulx $acc4, $t0, $acc4 + mov $acc0, %rdx + mulx %r15, %rdx, %rax + adc $t1, $acc2 + adc $t0, $acc3 + adc \$0, $acc4 + + ################################# reduction + xor $acc5, $acc5 # $acc5=0, cf=0, of=0 + mulx 8*0+128(%r14), $t0, $t1 + adcx $t0, $acc0 # guaranteed to be zero + adox $t1, $acc1 + + mulx 8*1+128(%r14), $t0, $t1 + adcx $t0, $acc1 + adox $t1, $acc2 + + mulx 8*2+128(%r14), $t0, $t1 + adcx $t0, $acc2 + adox $t1, $acc3 + + mulx 8*3+128(%r14), $t0, $t1 + mov 8*1($b_ptr), %rdx + adcx $t0, $acc3 + adox $t1, $acc4 + adcx $acc0, $acc4 + adox $acc0, $acc5 + adc \$0, $acc5 # cf=0, of=0 + + ################################# Multiply by b[1] + mulx 8*0+128($a_ptr), $t0, $t1 + adcx $t0, $acc1 + adox $t1, $acc2 + + mulx 8*1+128($a_ptr), $t0, $t1 + adcx $t0, $acc2 + adox $t1, $acc3 + + mulx 8*2+128($a_ptr), $t0, $t1 + adcx $t0, $acc3 + adox $t1, $acc4 + + mulx 8*3+128($a_ptr), $t0, $t1 + mov $acc1, %rdx + mulx %r15, %rdx, %rax + adcx $t0, $acc4 + adox $t1, $acc5 + + adcx $acc0, $acc5 + adox $acc0, $acc0 + adc \$0, $acc0 # cf=0, of=0 + + ################################# reduction + mulx 8*0+128(%r14), $t0, $t1 + adcx $t0, $acc1 # guaranteed to be zero + adox $t1, $acc2 + + mulx 8*1+128(%r14), $t0, $t1 + adcx $t0, $acc2 + adox $t1, $acc3 + + mulx 8*2+128(%r14), $t0, $t1 + adcx $t0, $acc3 + adox $t1, $acc4 + + mulx 8*3+128(%r14), $t0, $t1 + mov 8*2($b_ptr), %rdx + adcx $t0, $acc4 + adox $t1, $acc5 + adcx $acc1, $acc5 + adox $acc1, $acc0 + adc \$0, $acc0 # cf=0, of=0 + + ################################# Multiply by b[2] + mulx 8*0+128($a_ptr), $t0, $t1 + adcx $t0, $acc2 + adox $t1, $acc3 + + mulx 8*1+128($a_ptr), $t0, $t1 + adcx $t0, $acc3 + adox $t1, $acc4 + + mulx 8*2+128($a_ptr), $t0, $t1 + adcx $t0, $acc4 + adox $t1, $acc5 + + mulx 8*3+128($a_ptr), $t0, $t1 + mov $acc2, %rdx + mulx %r15, %rdx, %rax + adcx $t0, $acc5 + adox $t1, $acc0 + + adcx $acc1, $acc0 + adox $acc1, $acc1 + adc \$0, $acc1 # cf=0, of=0 + + ################################# reduction + mulx 8*0+128(%r14), $t0, $t1 + adcx $t0, $acc2 # guaranteed to be zero + adox $t1, $acc3 + + mulx 8*1+128(%r14), $t0, $t1 + adcx $t0, $acc3 + adox $t1, $acc4 + + mulx 8*2+128(%r14), $t0, $t1 + adcx $t0, $acc4 + adox $t1, $acc5 + + mulx 8*3+128(%r14), $t0, $t1 + mov 8*3($b_ptr), %rdx + adcx $t0, $acc5 + adox $t1, $acc0 + adcx $acc2, $acc0 + adox $acc2, $acc1 + adc \$0, $acc1 # cf=0, of=0 + + ################################# Multiply by b[3] + mulx 8*0+128($a_ptr), $t0, $t1 + adcx $t0, $acc3 + adox $t1, $acc4 + + mulx 8*1+128($a_ptr), $t0, $t1 + adcx $t0, $acc4 + adox $t1, $acc5 + + mulx 8*2+128($a_ptr), $t0, $t1 + adcx $t0, $acc5 + adox $t1, $acc0 + + mulx 8*3+128($a_ptr), $t0, $t1 + mov $acc3, %rdx + mulx %r15, %rdx, %rax + adcx $t0, $acc0 + adox $t1, $acc1 + + adcx $acc2, $acc1 + adox $acc2, $acc2 + adc \$0, $acc2 # cf=0, of=0 + + ################################# reduction + mulx 8*0+128(%r14), $t0, $t1 + adcx $t0, $acc3 # guranteed to be zero + adox $t1, $acc4 + + mulx 8*1+128(%r14), $t0, $t1 + adcx $t0, $acc4 + adox $t1, $acc5 + + mulx 8*2+128(%r14), $t0, $t1 + adcx $t0, $acc5 + adox $t1, $acc0 + + mulx 8*3+128(%r14), $t0, $t1 + lea 128(%r14),%r14 + mov $acc4, $t2 + adcx $t0, $acc0 + adox $t1, $acc1 + mov $acc5, $t3 + adcx $acc3, $acc1 + adox $acc3, $acc2 + adc \$0, $acc2 + + ################################# + # Branch-less conditional subtraction of P + mov $acc0, $t0 + sub 8*0(%r14), $acc4 + sbb 8*1(%r14), $acc5 + sbb 8*2(%r14), $acc0 + mov $acc1, $t1 + sbb 8*3(%r14), $acc1 + sbb \$0, $acc2 + + cmovc $t2, $acc4 + cmovc $t3, $acc5 + cmovc $t0, $acc0 + cmovc $t1, $acc1 + + mov $acc4, 8*0($r_ptr) + mov $acc5, 8*1($r_ptr) + mov $acc0, 8*2($r_ptr) + mov $acc1, 8*3($r_ptr) + + pop %r15 + pop %r14 + pop %r13 + pop %r12 + pop %rbx + pop %rbp + ret +.size ecp_nistz256_ord_mul_montx,.-ecp_nistz256_ord_mul_montx + +.type ecp_nistz256_ord_sqr_montx,\@function,3 +.align 32 +ecp_nistz256_ord_sqr_montx: +.Lecp_nistz256_ord_sqr_montx: + push %rbp + push %rbx + push %r12 + push %r13 + push %r14 + push %r15 + + mov $b_org, $b_ptr + mov 8*0($a_ptr), %rdx + mov 8*1($a_ptr), $acc6 + mov 8*2($a_ptr), $acc7 + mov 8*3($a_ptr), $acc0 + lea .Lord(%rip), $a_ptr + jmp .Loop_ord_sqrx + +.align 32 +.Loop_ord_sqrx: + mulx $acc6, $acc1, $acc2 # a[0]*a[1] + mulx $acc7, $t0, $acc3 # a[0]*a[2] + mov %rdx, %rax # offload a[0] + movq $acc6, %xmm1 # offload a[1] + mulx $acc0, $t1, $acc4 # a[0]*a[3] + mov $acc6, %rdx + add $t0, $acc2 + movq $acc7, %xmm2 # offload a[2] + adc $t1, $acc3 + adc \$0, $acc4 + xor $acc5, $acc5 # $acc5=0,cf=0,of=0 + ################################# + mulx $acc7, $t0, $t1 # a[1]*a[2] + adcx $t0, $acc3 + adox $t1, $acc4 + + mulx $acc0, $t0, $t1 # a[1]*a[3] + mov $acc7, %rdx + adcx $t0, $acc4 + adox $t1, $acc5 + adc \$0, $acc5 + ################################# + mulx $acc0, $t0, $acc6 # a[2]*a[3] + mov %rax, %rdx + movq $acc0, %xmm3 # offload a[3] + xor $acc7, $acc7 # $acc7=0,cf=0,of=0 + adcx $acc1, $acc1 # acc1:6<<1 + adox $t0, $acc5 + adcx $acc2, $acc2 + adox $acc7, $acc6 # of=0 + + ################################# a[i]*a[i] + mulx %rdx, $acc0, $t1 + movq %xmm1, %rdx + adcx $acc3, $acc3 + adox $t1, $acc1 + adcx $acc4, $acc4 + mulx %rdx, $t0, $t4 + movq %xmm2, %rdx + adcx $acc5, $acc5 + adox $t0, $acc2 + adcx $acc6, $acc6 + mulx %rdx, $t0, $t1 + .byte 0x67 + movq %xmm3, %rdx + adox $t4, $acc3 + adcx $acc7, $acc7 + adox $t0, $acc4 + adox $t1, $acc5 + mulx %rdx, $t0, $t4 + adox $t0, $acc6 + adox $t4, $acc7 + + ################################# reduction + mov $acc0, %rdx + mulx 8*4($a_ptr), %rdx, $t0 + + xor %rax, %rax # cf=0, of=0 + mulx 8*0($a_ptr), $t0, $t1 + adcx $t0, $acc0 # guaranteed to be zero + adox $t1, $acc1 + mulx 8*1($a_ptr), $t0, $t1 + adcx $t0, $acc1 + adox $t1, $acc2 + mulx 8*2($a_ptr), $t0, $t1 + adcx $t0, $acc2 + adox $t1, $acc3 + mulx 8*3($a_ptr), $t0, $t1 + adcx $t0, $acc3 + adox $t1, $acc0 # of=0 + adcx %rax, $acc0 # cf=0 + + ################################# + mov $acc1, %rdx + mulx 8*4($a_ptr), %rdx, $t0 + + mulx 8*0($a_ptr), $t0, $t1 + adox $t0, $acc1 # guaranteed to be zero + adcx $t1, $acc2 + mulx 8*1($a_ptr), $t0, $t1 + adox $t0, $acc2 + adcx $t1, $acc3 + mulx 8*2($a_ptr), $t0, $t1 + adox $t0, $acc3 + adcx $t1, $acc0 + mulx 8*3($a_ptr), $t0, $t1 + adox $t0, $acc0 + adcx $t1, $acc1 # cf=0 + adox %rax, $acc1 # of=0 + + ################################# + mov $acc2, %rdx + mulx 8*4($a_ptr), %rdx, $t0 + + mulx 8*0($a_ptr), $t0, $t1 + adcx $t0, $acc2 # guaranteed to be zero + adox $t1, $acc3 + mulx 8*1($a_ptr), $t0, $t1 + adcx $t0, $acc3 + adox $t1, $acc0 + mulx 8*2($a_ptr), $t0, $t1 + adcx $t0, $acc0 + adox $t1, $acc1 + mulx 8*3($a_ptr), $t0, $t1 + adcx $t0, $acc1 + adox $t1, $acc2 # of=0 + adcx %rax, $acc2 # cf=0 + + ################################# + mov $acc3, %rdx + mulx 8*4($a_ptr), %rdx, $t0 + + mulx 8*0($a_ptr), $t0, $t1 + adox $t0, $acc3 # guaranteed to be zero + adcx $t1, $acc0 + mulx 8*1($a_ptr), $t0, $t1 + adox $t0, $acc0 + adcx $t1, $acc1 + mulx 8*2($a_ptr), $t0, $t1 + adox $t0, $acc1 + adcx $t1, $acc2 + mulx 8*3($a_ptr), $t0, $t1 + adox $t0, $acc2 + adcx $t1, $acc3 + adox %rax, $acc3 + + ################################# accumulate upper half + add $acc0, $acc4 # add $acc4, $acc0 + adc $acc5, $acc1 + mov $acc4, %rdx + adc $acc6, $acc2 + adc $acc7, $acc3 + mov $acc1, $acc6 + adc \$0, %rax + + ################################# compare to modulus + sub 8*0($a_ptr), $acc4 + mov $acc2, $acc7 + sbb 8*1($a_ptr), $acc1 + sbb 8*2($a_ptr), $acc2 + mov $acc3, $acc0 + sbb 8*3($a_ptr), $acc3 + sbb \$0, %rax + + cmovnc $acc4, %rdx + cmovnc $acc1, $acc6 + cmovnc $acc2, $acc7 + cmovnc $acc3, $acc0 + + dec $b_ptr + jnz .Loop_ord_sqrx + + mov %rdx, 8*0($r_ptr) + mov $acc6, 8*1($r_ptr) + pxor %xmm1, %xmm1 + mov $acc7, 8*2($r_ptr) + pxor %xmm2, %xmm2 + mov $acc0, 8*3($r_ptr) + pxor %xmm3, %xmm3 + + pop %r15 + pop %r14 + pop %r13 + pop %r12 + pop %rbx + pop %rbp + ret + +.size ecp_nistz256_ord_sqr_montx,.-ecp_nistz256_ord_sqr_montx +___ + $code.=<<___; ################################################################################ # void ecp_nistz256_to_mont( diff --git a/crypto/ec/ec_err.c b/crypto/ec/ec_err.c index 9f82b4ef7e..efec5a7112 100644 --- a/crypto/ec/ec_err.c +++ b/crypto/ec/ec_err.c @@ -48,6 +48,8 @@ static const ERR_STRING_DATA EC_str_functs[] = { "ECPKParameters_print_fp"}, {ERR_PACK(ERR_LIB_EC, EC_F_ECP_NISTZ256_GET_AFFINE, 0), "ecp_nistz256_get_affine"}, + {ERR_PACK(ERR_LIB_EC, EC_F_ECP_NISTZ256_INV_MOD_ORD, 0), + "ecp_nistz256_inv_mod_ord"}, {ERR_PACK(ERR_LIB_EC, EC_F_ECP_NISTZ256_MULT_PRECOMPUTE, 0), "ecp_nistz256_mult_precompute"}, {ERR_PACK(ERR_LIB_EC, EC_F_ECP_NISTZ256_POINTS_MUL, 0), diff --git a/crypto/ec/ec_lcl.h b/crypto/ec/ec_lcl.h index 6cc0190aa7..540aa534e7 100644 --- a/crypto/ec/ec_lcl.h +++ b/crypto/ec/ec_lcl.h @@ -155,6 +155,9 @@ struct ec_method_st { /* custom ECDH operation */ int (*ecdh_compute_key)(unsigned char **pout, size_t *poutlen, const EC_POINT *pub_key, const EC_KEY *ecdh); + /* Inverse modulo order */ + int (*field_inverse_mod_ord)(const EC_GROUP *, BIGNUM *r, BIGNUM *x, + BN_CTX *ctx); }; /* @@ -520,7 +523,6 @@ void ec_GFp_nistp_points_make_affine_internal(size_t num, void *point_array, void ec_GFp_nistp_recode_scalar_bits(unsigned char *sign, unsigned char *digit, unsigned char in); #endif -int ec_precompute_mont_data(EC_GROUP *); int ec_group_simple_order_bits(const EC_GROUP *group); #ifdef ECP_NISTZ256_ASM @@ -604,3 +606,6 @@ int X25519(uint8_t out_shared_key[32], const uint8_t private_key[32], const uint8_t peer_public_value[32]); void X25519_public_from_private(uint8_t out_public_value[32], const uint8_t private_key[32]); + +int EC_GROUP_do_inverse_ord(const EC_GROUP *group, BIGNUM *res, + BIGNUM *x, BN_CTX *ctx); diff --git a/crypto/ec/ec_lib.c b/crypto/ec/ec_lib.c index 7ae48cf291..8d508ddb6a 100644 --- a/crypto/ec/ec_lib.c +++ b/crypto/ec/ec_lib.c @@ -261,6 +261,8 @@ int EC_METHOD_get_field_type(const EC_METHOD *meth) return meth->field_type; } +static int ec_precompute_mont_data(EC_GROUP *); + int EC_GROUP_set_generator(EC_GROUP *group, const EC_POINT *generator, const BIGNUM *order, const BIGNUM *cofactor) { @@ -961,7 +963,7 @@ int EC_GROUP_have_precompute_mult(const EC_GROUP *group) * ec_precompute_mont_data sets |group->mont_data| from |group->order| and * returns one on success. On error it returns zero. */ -int ec_precompute_mont_data(EC_GROUP *group) +static int ec_precompute_mont_data(EC_GROUP *group) { BN_CTX *ctx = BN_CTX_new(); int ret = 0; @@ -1006,3 +1008,12 @@ int ec_group_simple_order_bits(const EC_GROUP *group) return 0; return BN_num_bits(group->order); } + +int EC_GROUP_do_inverse_ord(const EC_GROUP *group, BIGNUM *res, + BIGNUM *x, BN_CTX *ctx) +{ + if (group->meth->field_inverse_mod_ord != NULL) + return group->meth->field_inverse_mod_ord(group, res, x, ctx); + else + return 0; +} diff --git a/crypto/ec/ecdsa_ossl.c b/crypto/ec/ecdsa_ossl.c index 30458f1402..a405d38b69 100644 --- a/crypto/ec/ecdsa_ossl.c +++ b/crypto/ec/ecdsa_ossl.c @@ -153,30 +153,33 @@ static int ecdsa_sign_setup(EC_KEY *eckey, BN_CTX *ctx_in, } while (BN_is_zero(r)); - /* compute the inverse of k */ - if (EC_GROUP_get_mont_data(group) != NULL) { - /* - * We want inverse in constant time, therefore we utilize the fact - * order must be prime and use Fermat's Little Theorem instead. - */ - if (!BN_set_word(X, 2)) { - ECerr(EC_F_ECDSA_SIGN_SETUP, ERR_R_BN_LIB); - goto err; - } - if (!BN_mod_sub(X, order, X, order, ctx)) { - ECerr(EC_F_ECDSA_SIGN_SETUP, ERR_R_BN_LIB); - goto err; - } - BN_set_flags(X, BN_FLG_CONSTTIME); - if (!BN_mod_exp_mont_consttime - (k, k, X, order, ctx, EC_GROUP_get_mont_data(group))) { - ECerr(EC_F_ECDSA_SIGN_SETUP, ERR_R_BN_LIB); - goto err; - } - } else { - if (!BN_mod_inverse(k, k, order, ctx)) { - ECerr(EC_F_ECDSA_SIGN_SETUP, ERR_R_BN_LIB); - goto err; + /* Check if optimized inverse is implemented */ + if (EC_GROUP_do_inverse_ord(group, k, k, ctx) == 0) { + /* compute the inverse of k */ + if (group->mont_data != NULL) { + /* + * We want inverse in constant time, therefore we utilize the fact + * order must be prime and use Fermats Little Theorem instead. + */ + if (!BN_set_word(X, 2)) { + ECerr(EC_F_ECDSA_SIGN_SETUP, ERR_R_BN_LIB); + goto err; + } + if (!BN_mod_sub(X, order, X, order, ctx)) { + ECerr(EC_F_ECDSA_SIGN_SETUP, ERR_R_BN_LIB); + goto err; + } + BN_set_flags(X, BN_FLG_CONSTTIME); + if (!BN_mod_exp_mont_consttime(k, k, X, order, ctx, + group->mont_data)) { + ECerr(EC_F_ECDSA_SIGN_SETUP, ERR_R_BN_LIB); + goto err; + } + } else { + if (!BN_mod_inverse(k, k, order, ctx)) { + ECerr(EC_F_ECDSA_SIGN_SETUP, ERR_R_BN_LIB); + goto err; + } } } @@ -407,9 +410,12 @@ int ossl_ecdsa_verify_sig(const unsigned char *dgst, int dgst_len, goto err; } /* calculate tmp1 = inv(S) mod order */ - if (!BN_mod_inverse(u2, sig->s, order, ctx)) { - ECerr(EC_F_OSSL_ECDSA_VERIFY_SIG, ERR_R_BN_LIB); - goto err; + /* Check if optimized inverse is implemented */ + if (EC_GROUP_do_inverse_ord(group, u2, sig->s, ctx) == 0) { + if (!BN_mod_inverse(u2, sig->s, order, ctx)) { + ECerr(EC_F_OSSL_ECDSA_VERIFY_SIG, ERR_R_BN_LIB); + goto err; + } } /* digest -> m */ i = BN_num_bits(order); diff --git a/crypto/ec/ecp_nistz256.c b/crypto/ec/ecp_nistz256.c index 3863a61b02..6c85884fee 100644 --- a/crypto/ec/ecp_nistz256.c +++ b/crypto/ec/ecp_nistz256.c @@ -1,15 +1,17 @@ /* * Copyright 2014-2017 The OpenSSL Project Authors. All Rights Reserved. * Copyright (c) 2014, Intel Corporation. All Rights Reserved. + * Copyright (c) 2015, CloudFlare, Inc. * * Licensed under the OpenSSL license (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy * in the file LICENSE in the source distribution or at * https://www.openssl.org/source/license.html * - * Originally written by Shay Gueron (1, 2), and Vlad Krasnov (1) + * Originally written by Shay Gueron (1, 2), and Vlad Krasnov (1, 3) * (1) Intel Corporation, Israel Development Center, Haifa, Israel * (2) University of Haifa, Israel + * (3) CloudFlare, Inc. * * Reference: * S.Gueron and V.Krasnov, "Fast Prime Field Elliptic Curve Cryptography with @@ -908,7 +910,7 @@ __owur static int ecp_nistz256_mult_precompute(EC_GROUP *group, BN_CTX *ctx) */ #if defined(ECP_NISTZ256_AVX2) # if !(defined(__x86_64) || defined(__x86_64__) || \ - defined(_M_AMD64) || defined(_MX64)) || \ + defined(_M_AMD64) || defined(_M_X64)) || \ !(defined(__GNUC__) || defined(_MSC_VER)) /* this is for ALIGN32 */ # undef ECP_NISTZ256_AVX2 # else @@ -1495,6 +1497,117 @@ static int ecp_nistz256_window_have_precompute_mult(const EC_GROUP *group) return HAVEPRECOMP(group, nistz256); } +#if defined(__x86_64) || defined(__x86_64__) || \ + defined(_M_AMD64) || defined(_M_X64) || \ + defined(__powerpc64__) || defined(_ARCH_PP64) +/* + * Montgomery mul modulo Order(P): res = a*b*2^-256 mod Order(P) + */ +void ecp_nistz256_ord_mul_mont(BN_ULONG res[P256_LIMBS], + const BN_ULONG a[P256_LIMBS], + const BN_ULONG b[P256_LIMBS]); +void ecp_nistz256_ord_sqr_mont(BN_ULONG res[P256_LIMBS], + const BN_ULONG a[P256_LIMBS], + int rep); + +static int ecp_nistz256_inv_mod_ord(const EC_GROUP *group, BIGNUM *r, + BIGNUM *x, BN_CTX *ctx) +{ + /* RR = 2^512 mod ord(p256) */ + static const BN_ULONG RR[P256_LIMBS] = { TOBN(0x83244c95,0xbe79eea2), + TOBN(0x4699799c,0x49bd6fa6), + TOBN(0x2845b239,0x2b6bec59), + TOBN(0x66e12d94,0xf3d95620) }; + /* The constant 1 (unlike ONE that is one in Montgomery representation) */ + static const BN_ULONG one[P256_LIMBS] = { TOBN(0,1),TOBN(0,0), + TOBN(0,0),TOBN(0,0) }; + /* expLo - the low 128bit of the exponent we use (ord(p256) - 2), + * split into 4bit windows */ + static const unsigned char expLo[32] = { 0xb,0xc,0xe,0x6,0xf,0xa,0xa,0xd, + 0xa,0x7,0x1,0x7,0x9,0xe,0x8,0x4, + 0xf,0x3,0xb,0x9,0xc,0xa,0xc,0x2, + 0xf,0xc,0x6,0x3,0x2,0x5,0x4,0xf }; + /* + * We don't use entry 0 in the table, so we omit it and address + * with -1 offset. + */ + BN_ULONG table[15][P256_LIMBS]; + BN_ULONG out[P256_LIMBS], t[P256_LIMBS]; + int i, ret = 0; + + /* + * Catch allocation failure early. + */ + if (bn_wexpand(r, P256_LIMBS) == NULL) { + ECerr(EC_F_ECP_NISTZ256_INV_MOD_ORD, ERR_R_BN_LIB); + goto err; + } + + if ((BN_num_bits(x) > 256) || BN_is_negative(x)) { + BIGNUM *tmp; + + if ((tmp = BN_CTX_get(ctx)) == NULL + || !BN_nnmod(tmp, x, group->order, ctx)) { + ECerr(EC_F_ECP_NISTZ256_INV_MOD_ORD, ERR_R_BN_LIB); + goto err; + } + x = tmp; + } + + if (!ecp_nistz256_bignum_to_field_elem(t, x)) { + ECerr(EC_F_ECP_NISTZ256_INV_MOD_ORD, EC_R_COORDINATES_OUT_OF_RANGE); + goto err; + } + + ecp_nistz256_ord_mul_mont(table[0], t, RR); + for (i = 2; i < 16; i += 2) { + ecp_nistz256_ord_sqr_mont(table[i-1], table[i/2-1], 1); + ecp_nistz256_ord_mul_mont(table[i], table[i-1], table[0]); + } + + /* + * The top 128bit of the exponent are highly redudndant, so we + * perform an optimized flow + */ + ecp_nistz256_ord_sqr_mont(t, table[15-1], 4); /* f0 */ + ecp_nistz256_ord_mul_mont(t, t, table[15-1]); /* ff */ + + ecp_nistz256_ord_sqr_mont(out, t, 8); /* ff00 */ + ecp_nistz256_ord_mul_mont(out, out, t); /* ffff */ + + ecp_nistz256_ord_sqr_mont(t, out, 16); /* ffff0000 */ + ecp_nistz256_ord_mul_mont(t, t, out); /* ffffffff */ + + ecp_nistz256_ord_sqr_mont(out, t, 64); /* ffffffff0000000000000000 */ + ecp_nistz256_ord_mul_mont(out, out, t); /* ffffffff00000000ffffffff */ + + ecp_nistz256_ord_sqr_mont(out, out, 32); /* ffffffff00000000ffffffff00000000 */ + ecp_nistz256_ord_mul_mont(out, out, t); /* ffffffff00000000ffffffffffffffff */ + + /* + * The bottom 128 bit of the exponent are easier done with a table + */ + for(i = 0; i < 32; i++) { + ecp_nistz256_ord_sqr_mont(out, out, 4); + /* The exponent is public, no need in constant-time access */ + ecp_nistz256_ord_mul_mont(out, out, table[expLo[i]-1]); + } + ecp_nistz256_ord_mul_mont(out, out, one); + + /* + * Can't fail, but check return code to be consistent anyway. + */ + if (!bn_set_words(r, out, P256_LIMBS)) + goto err; + + ret = 1; +err: + return ret; +} +#else +# define ecp_nistz256_inv_mod_ord NULL +#endif + const EC_METHOD *EC_GFp_nistz256_method(void) { static const EC_METHOD ret = { @@ -1544,7 +1657,8 @@ const EC_METHOD *EC_GFp_nistz256_method(void) ec_key_simple_generate_public_key, 0, /* keycopy */ 0, /* keyfinish */ - ecdh_simple_compute_key + ecdh_simple_compute_key, + ecp_nistz256_inv_mod_ord /* can be #defined-ed NULL */ }; return &ret; diff --git a/crypto/err/openssl.txt b/crypto/err/openssl.txt index 9ec000964a..64496627bb 100644 --- a/crypto/err/openssl.txt +++ b/crypto/err/openssl.txt @@ -458,6 +458,7 @@ EC_F_ECPARAMETERS_PRINT_FP:148:ECParameters_print_fp EC_F_ECPKPARAMETERS_PRINT:149:ECPKParameters_print EC_F_ECPKPARAMETERS_PRINT_FP:150:ECPKParameters_print_fp EC_F_ECP_NISTZ256_GET_AFFINE:240:ecp_nistz256_get_affine +EC_F_ECP_NISTZ256_INV_MOD_ORD:275:ecp_nistz256_inv_mod_ord EC_F_ECP_NISTZ256_MULT_PRECOMPUTE:243:ecp_nistz256_mult_precompute EC_F_ECP_NISTZ256_POINTS_MUL:241:ecp_nistz256_points_mul EC_F_ECP_NISTZ256_PRE_COMP_NEW:244:ecp_nistz256_pre_comp_new diff --git a/include/openssl/ecerr.h b/include/openssl/ecerr.h index bd09cb7f1b..a1b9ea180e 100644 --- a/include/openssl/ecerr.h +++ b/include/openssl/ecerr.h @@ -50,6 +50,7 @@ int ERR_load_EC_strings(void); # define EC_F_ECPKPARAMETERS_PRINT 149 # define EC_F_ECPKPARAMETERS_PRINT_FP 150 # define EC_F_ECP_NISTZ256_GET_AFFINE 240 +# define EC_F_ECP_NISTZ256_INV_MOD_ORD 275 # define EC_F_ECP_NISTZ256_MULT_PRECOMPUTE 243 # define EC_F_ECP_NISTZ256_POINTS_MUL 241 # define EC_F_ECP_NISTZ256_PRE_COMP_NEW 244