ec/asm/ecp_nistz256-{!x86_64}.pl: fix scatter_w7 function.
authorAndy Polyakov <appro@openssl.org>
Wed, 18 Jul 2018 13:14:44 +0000 (15:14 +0200)
committerAndy Polyakov <appro@openssl.org>
Sun, 22 Jul 2018 13:24:23 +0000 (15:24 +0200)
The ecp_nistz256_scatter_w7 function is called when application
attempts to use custom generator, i.e. rarely. Even though non-x86_64
versions were wrong, it didn't affect point operations, they were just
not as fast as expected.

Reviewed-by: Rich Salz <rsalz@openssl.org>
(Merged from https://github.com/openssl/openssl/pull/6738)

(cherry picked from commit 87a75b3e5c04a1696208c279f32d1114b862cfed)

crypto/ec/asm/ecp_nistz256-armv4.pl
crypto/ec/asm/ecp_nistz256-armv8.pl
crypto/ec/asm/ecp_nistz256-sparcv9.pl
crypto/ec/asm/ecp_nistz256-x86.pl

index 39d4cb9a567935ed28a14846a5d531346cb27af2..4b58135979ce617bba09367da1b778bea965978f 100755 (executable)
@@ -894,13 +894,13 @@ ecp_nistz256_scatter_w7:
 .Loop_scatter_w7:
        ldr     $mask,[$inp],#4
        subs    $index,$index,#1
-       strb    $mask,[$out,#64*0-1]
+       strb    $mask,[$out,#64*0]
        mov     $mask,$mask,lsr#8
-       strb    $mask,[$out,#64*1-1]
+       strb    $mask,[$out,#64*1]
        mov     $mask,$mask,lsr#8
-       strb    $mask,[$out,#64*2-1]
+       strb    $mask,[$out,#64*2]
        mov     $mask,$mask,lsr#8
-       strb    $mask,[$out,#64*3-1]
+       strb    $mask,[$out,#64*3]
        add     $out,$out,#64*4
        bne     .Loop_scatter_w7
 
index cdc91617ff13eea730a572238ff484d4597f23ec..a726cc3bbbf5778fd407bf4d50493051a75b8180 100644 (file)
@@ -1477,21 +1477,21 @@ ecp_nistz256_scatter_w7:
        prfm    pstl1strm,[$out,#4096+64*5]
        prfm    pstl1strm,[$out,#4096+64*6]
        prfm    pstl1strm,[$out,#4096+64*7]
-       strb    w3,[$out,#64*0-1]
+       strb    w3,[$out,#64*0]
        lsr     x3,x3,#8
-       strb    w3,[$out,#64*1-1]
+       strb    w3,[$out,#64*1]
        lsr     x3,x3,#8
-       strb    w3,[$out,#64*2-1]
+       strb    w3,[$out,#64*2]
        lsr     x3,x3,#8
-       strb    w3,[$out,#64*3-1]
+       strb    w3,[$out,#64*3]
        lsr     x3,x3,#8
-       strb    w3,[$out,#64*4-1]
+       strb    w3,[$out,#64*4]
        lsr     x3,x3,#8
-       strb    w3,[$out,#64*5-1]
+       strb    w3,[$out,#64*5]
        lsr     x3,x3,#8
-       strb    w3,[$out,#64*6-1]
+       strb    w3,[$out,#64*6]
        lsr     x3,x3,#8
-       strb    w3,[$out,#64*7-1]
+       strb    w3,[$out,#64*7]
        add     $out,$out,#64*8
        b.ne    .Loop_scatter_w7
 
index 97201cb271b55e5e1a2be603db22b8a80b78dad5..8bad859c8bd75ede6c5c2a7d5d25e25b3ffbe0ef 100755 (executable)
@@ -1531,13 +1531,13 @@ ecp_nistz256_scatter_w7:
        ld      [$inp],%l0
        add     $inp,4,$inp
        subcc   $index,1,$index
-       stb     %l0,[$out+64*0-1]
+       stb     %l0,[$out+64*0]
        srl     %l0,8,%l1
-       stb     %l1,[$out+64*1-1]
+       stb     %l1,[$out+64*1]
        srl     %l0,16,%l2
-       stb     %l2,[$out+64*2-1]
+       stb     %l2,[$out+64*2]
        srl     %l0,24,%l3
-       stb     %l3,[$out+64*3-1]
+       stb     %l3,[$out+64*3]
        bne     .Loop_scatter_w7
        add     $out,64*4,$out
 
index 1d9e00616b58d0a17f0f6c26c79605aae5cf5278..4fa27fce1720051849b6e644180f9670ce1edc9a 100755 (executable)
@@ -1179,7 +1179,7 @@ for ($i=0;$i<7;$i++) {
        &mov    ("esi",&wparam(1));
        &mov    ("ebp",&wparam(2));
 
-       &lea    ("edi",&DWP(-1,"edi","ebp"));
+       &lea    ("edi",&DWP(0,"edi","ebp"));
        &mov    ("ebp",64/4);
 &set_label("scatter_w7_loop");
        &mov    ("eax",&DWP(0,"esi"));