From: Andy Polyakov Date: Wed, 18 Jul 2018 13:14:44 +0000 (+0200) Subject: ec/asm/ecp_nistz256-{!x86_64}.pl: fix scatter_w7 function. X-Git-Tag: OpenSSL_1_1_0i~40 X-Git-Url: https://git.librecmc.org/?a=commitdiff_plain;h=5c2bac9289e97b47dc1fd1603efe09d3e10ce9cb;p=oweals%2Fopenssl.git ec/asm/ecp_nistz256-{!x86_64}.pl: fix scatter_w7 function. The ecp_nistz256_scatter_w7 function is called when application attempts to use custom generator, i.e. rarely. Even though non-x86_64 versions were wrong, it didn't affect point operations, they were just not as fast as expected. Reviewed-by: Rich Salz (Merged from https://github.com/openssl/openssl/pull/6738) (cherry picked from commit 87a75b3e5c04a1696208c279f32d1114b862cfed) --- diff --git a/crypto/ec/asm/ecp_nistz256-armv4.pl b/crypto/ec/asm/ecp_nistz256-armv4.pl index 39d4cb9a56..4b58135979 100755 --- a/crypto/ec/asm/ecp_nistz256-armv4.pl +++ b/crypto/ec/asm/ecp_nistz256-armv4.pl @@ -894,13 +894,13 @@ ecp_nistz256_scatter_w7: .Loop_scatter_w7: ldr $mask,[$inp],#4 subs $index,$index,#1 - strb $mask,[$out,#64*0-1] + strb $mask,[$out,#64*0] mov $mask,$mask,lsr#8 - strb $mask,[$out,#64*1-1] + strb $mask,[$out,#64*1] mov $mask,$mask,lsr#8 - strb $mask,[$out,#64*2-1] + strb $mask,[$out,#64*2] mov $mask,$mask,lsr#8 - strb $mask,[$out,#64*3-1] + strb $mask,[$out,#64*3] add $out,$out,#64*4 bne .Loop_scatter_w7 diff --git a/crypto/ec/asm/ecp_nistz256-armv8.pl b/crypto/ec/asm/ecp_nistz256-armv8.pl index cdc91617ff..a726cc3bbb 100644 --- a/crypto/ec/asm/ecp_nistz256-armv8.pl +++ b/crypto/ec/asm/ecp_nistz256-armv8.pl @@ -1477,21 +1477,21 @@ ecp_nistz256_scatter_w7: prfm pstl1strm,[$out,#4096+64*5] prfm pstl1strm,[$out,#4096+64*6] prfm pstl1strm,[$out,#4096+64*7] - strb w3,[$out,#64*0-1] + strb w3,[$out,#64*0] lsr x3,x3,#8 - strb w3,[$out,#64*1-1] + strb w3,[$out,#64*1] lsr x3,x3,#8 - strb w3,[$out,#64*2-1] + strb w3,[$out,#64*2] lsr x3,x3,#8 - strb w3,[$out,#64*3-1] + strb w3,[$out,#64*3] lsr x3,x3,#8 - strb w3,[$out,#64*4-1] + strb w3,[$out,#64*4] lsr x3,x3,#8 - strb w3,[$out,#64*5-1] + strb w3,[$out,#64*5] lsr x3,x3,#8 - strb w3,[$out,#64*6-1] + strb w3,[$out,#64*6] lsr x3,x3,#8 - strb w3,[$out,#64*7-1] + strb w3,[$out,#64*7] add $out,$out,#64*8 b.ne .Loop_scatter_w7 diff --git a/crypto/ec/asm/ecp_nistz256-sparcv9.pl b/crypto/ec/asm/ecp_nistz256-sparcv9.pl index 97201cb271..8bad859c8b 100755 --- a/crypto/ec/asm/ecp_nistz256-sparcv9.pl +++ b/crypto/ec/asm/ecp_nistz256-sparcv9.pl @@ -1531,13 +1531,13 @@ ecp_nistz256_scatter_w7: ld [$inp],%l0 add $inp,4,$inp subcc $index,1,$index - stb %l0,[$out+64*0-1] + stb %l0,[$out+64*0] srl %l0,8,%l1 - stb %l1,[$out+64*1-1] + stb %l1,[$out+64*1] srl %l0,16,%l2 - stb %l2,[$out+64*2-1] + stb %l2,[$out+64*2] srl %l0,24,%l3 - stb %l3,[$out+64*3-1] + stb %l3,[$out+64*3] bne .Loop_scatter_w7 add $out,64*4,$out diff --git a/crypto/ec/asm/ecp_nistz256-x86.pl b/crypto/ec/asm/ecp_nistz256-x86.pl index 1d9e00616b..4fa27fce17 100755 --- a/crypto/ec/asm/ecp_nistz256-x86.pl +++ b/crypto/ec/asm/ecp_nistz256-x86.pl @@ -1179,7 +1179,7 @@ for ($i=0;$i<7;$i++) { &mov ("esi",&wparam(1)); &mov ("ebp",&wparam(2)); - &lea ("edi",&DWP(-1,"edi","ebp")); + &lea ("edi",&DWP(0,"edi","ebp")); &mov ("ebp",64/4); &set_label("scatter_w7_loop"); &mov ("eax",&DWP(0,"esi"));