armv4-mont.pl: profiler-assisted optimization gives 8%-14% improvement
authorAndy Polyakov <appro@openssl.org>
Sat, 13 Aug 2011 12:38:41 +0000 (12:38 +0000)
committerAndy Polyakov <appro@openssl.org>
Sat, 13 Aug 2011 12:38:41 +0000 (12:38 +0000)
(more for longer keys) on RSA/DSA.

crypto/bn/asm/armv4-mont.pl

index a6fa7cb1bffcc72bb539aef63cc42c6349aeeb05..f78a8b5f0f5573141fa7f6496b7a34d0acadd0e2 100644 (file)
@@ -92,9 +92,9 @@ bn_mul_mont:
 .L1st:
        ldr     $aj,[$ap],#4            @ ap[j],ap++
        mov     $alo,$ahi
+       ldr     $nj,[$np],#4            @ np[j],np++
        mov     $ahi,#0
        umlal   $alo,$ahi,$aj,$bi       @ ap[j]*bp[0]
-       ldr     $nj,[$np],#4            @ np[j],np++
        mov     $nhi,#0
        umlal   $nlo,$nhi,$nj,$n0       @ np[j]*n0
        adds    $nlo,$nlo,$alo
@@ -104,21 +104,21 @@ bn_mul_mont:
        bne     .L1st
 
        adds    $nlo,$nlo,$ahi
+       ldr     $tp,[$_bp]              @ restore bp
        mov     $nhi,#0
+       ldr     $n0,[$_n0]              @ restore n0
        adc     $nhi,$nhi,#0
-       ldr     $tp,[$_bp]              @ restore bp
        str     $nlo,[$num]             @ tp[num-1]=
-       ldr     $n0,[$_n0]              @ restore n0
        str     $nhi,[$num,#4]          @ tp[num]=
 \f
 .Louter:
        sub     $tj,$num,sp             @ "original" $num-1 value
        sub     $ap,$ap,$tj             @ "rewind" ap to &ap[1]
-       sub     $np,$np,$tj             @ "rewind" np to &np[1]
        ldr     $bi,[$tp,#4]!           @ *(++bp)
+       sub     $np,$np,$tj             @ "rewind" np to &np[1]
        ldr     $aj,[$ap,#-4]           @ ap[0]
-       ldr     $nj,[$np,#-4]           @ np[0]
        ldr     $alo,[sp]               @ tp[0]
+       ldr     $nj,[$np,#-4]           @ np[0]
        ldr     $tj,[sp,#4]             @ tp[1]
 
        mov     $ahi,#0
@@ -132,13 +132,13 @@ bn_mul_mont:
 .Linner:
        ldr     $aj,[$ap],#4            @ ap[j],ap++
        adds    $alo,$ahi,$tj           @ +=tp[j]
+       ldr     $nj,[$np],#4            @ np[j],np++
        mov     $ahi,#0
        umlal   $alo,$ahi,$aj,$bi       @ ap[j]*bp[i]
-       ldr     $nj,[$np],#4            @ np[j],np++
        mov     $nhi,#0
        umlal   $nlo,$nhi,$nj,$n0       @ np[j]*n0
-       ldr     $tj,[$tp,#8]            @ tp[j+1]
        adc     $ahi,$ahi,#0
+       ldr     $tj,[$tp,#8]            @ tp[j+1]
        adds    $nlo,$nlo,$alo
        str     $nlo,[$tp],#4           @ tp[j-1]=,tp++
        adc     $nlo,$nhi,#0
@@ -147,13 +147,13 @@ bn_mul_mont:
 
        adds    $nlo,$nlo,$ahi
        mov     $nhi,#0
+       ldr     $tp,[$_bp]              @ restore bp
        adc     $nhi,$nhi,#0
+       ldr     $n0,[$_n0]              @ restore n0
        adds    $nlo,$nlo,$tj
-       adc     $nhi,$nhi,#0
-       ldr     $tp,[$_bp]              @ restore bp
        ldr     $tj,[$_bpend]           @ restore &bp[num]
+       adc     $nhi,$nhi,#0
        str     $nlo,[$num]             @ tp[num-1]=
-       ldr     $n0,[$_n0]              @ restore n0
        str     $nhi,[$num,#4]          @ tp[num]=
 
        cmp     $tp,$tj