From 8ea975d07060eac1e20cd9ae5bb74a0a4fb2b1bb Mon Sep 17 00:00:00 2001 From: Andy Polyakov Date: Wed, 9 Aug 2006 15:40:30 +0000 Subject: [PATCH] +20% tune-up for Power5. --- crypto/bn/asm/ppc-mont.pl | 29 +++++++++++------------------ 1 file changed, 11 insertions(+), 18 deletions(-) diff --git a/crypto/bn/asm/ppc-mont.pl b/crypto/bn/asm/ppc-mont.pl index c345c1b30e..280d31244b 100644 --- a/crypto/bn/asm/ppc-mont.pl +++ b/crypto/bn/asm/ppc-mont.pl @@ -162,17 +162,16 @@ $code=<<___; .align 4 L1st: $LDX $aj,$ap,$j ; ap[j] - $LDX $nj,$np,$j ; np[j] addc $lo0,$alo,$hi0 + $LDX $nj,$np,$j ; np[j] addze $hi0,$ahi $UMULL $alo,$aj,$m0 ; ap[j]*bp[0] - $UMULH $ahi,$aj,$m0 - addc $lo1,$nlo,$hi1 + $UMULH $ahi,$aj,$m0 addze $hi1,$nhi $UMULL $nlo,$nj,$m1 ; np[j]*m1 - $UMULH $nhi,$nj,$m1 addc $lo1,$lo1,$lo0 ; np[j]*m1+ap[j]*bp[0] + $UMULH $nhi,$nj,$m1 addze $hi1,$hi1 $ST $lo1,0($tp) ; tp[j-1] @@ -206,20 +205,16 @@ Louter: $LD $aj,$BNSZ($ap) ; ap[1] $LD $nj,0($np) ; np[0] addc $lo0,$lo0,$tj ; ap[0]*bp[i]+tp[0] + $UMULL $alo,$aj,$m0 ; ap[j]*bp[i] addze $hi0,$hi0 - $UMULL $m1,$lo0,$n0 ; tp[0]*n0 - - $UMULL $alo,$aj,$m0 ; ap[j]*bp[i] $UMULH $ahi,$aj,$m0 - $UMULL $lo1,$nj,$m1 ; np[0]*m1 $UMULH $hi1,$nj,$m1 $LD $nj,$BNSZ($np) ; np[1] addc $lo1,$lo1,$lo0 - addze $hi1,$hi1 - $UMULL $nlo,$nj,$m1 ; np[1]*m1 + addze $hi1,$hi1 $UMULH $nhi,$nj,$m1 mtctr $num @@ -227,24 +222,22 @@ Louter: .align 4 Linner: $LDX $aj,$ap,$j ; ap[j] - $LD $tj,$BNSZ($tp) ; tp[j] addc $lo0,$alo,$hi0 + $LD $tj,$BNSZ($tp) ; tp[j] addze $hi0,$ahi $LDX $nj,$np,$j ; np[j] - addc $lo0,$lo0,$tj ; ap[j]*bp[i]+tp[j] - addze $hi0,$hi0 - $UMULL $alo,$aj,$m0 ; ap[j]*bp[i] - $UMULH $ahi,$aj,$m0 - addc $lo1,$nlo,$hi1 + $UMULL $alo,$aj,$m0 ; ap[j]*bp[i] addze $hi1,$nhi + $UMULH $ahi,$aj,$m0 + addc $lo0,$lo0,$tj ; ap[j]*bp[i]+tp[j] $UMULL $nlo,$nj,$m1 ; np[j]*m1 + addze $hi0,$hi0 $UMULH $nhi,$nj,$m1 addc $lo1,$lo1,$lo0 ; np[j]*m1+ap[j]*bp[i]+tp[j] + addi $j,$j,$BNSZ ; j++ addze $hi1,$hi1 $ST $lo1,0($tp) ; tp[j-1] - - addi $j,$j,$BNSZ ; j++ addi $tp,$tp,$BNSZ ; tp++ bdnz- Linner ;Linner -- 2.25.1