aesni-x86.pl: eliminate development comments.
[oweals/openssl.git] / crypto / aes / asm / aes-ppc.pl
index 903075ec3775a9801dbb30347fe4e0147754f7d8..ce427655ef7bf4fb960b54d30bbbdfcef20704d8 100644 (file)
 # at 1/2 of ppc_AES_encrypt speed, while ppc_AES_decrypt_compact -
 # at 1/3 of ppc_AES_decrypt.
 
-$output = shift;
+$flavour = shift;
 
-if ($output =~ /64\.s/) {
+if ($flavour =~ /64/) {
        $SIZE_T =8;
        $STU    ="stdu";
        $POP    ="ld";
        $PUSH   ="std";
-} elsif ($output =~ /32\.s/) {
+} elsif ($flavour =~ /32/) {
        $SIZE_T =4;
        $STU    ="stwu";
        $POP    ="lwz";
        $PUSH   ="stw";
-} else { die "nonsense $output"; }
+} else { die "nonsense $flavour"; }
 
 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
 ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
 ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
 die "can't locate ppc-xlate.pl";
 
-( defined shift || open STDOUT,"| $^X $xlate $output" ) ||
-       die "can't call $xlate: $!";
+open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
 
 $FRAME=32*$SIZE_T;
 
@@ -93,6 +92,7 @@ $mask80=$Tbl2;
 $mask1b=$Tbl3;
 
 $code.=<<___;
+.machine       "any"
 .text
 
 .align 7
@@ -397,42 +397,42 @@ Lppc_AES_encrypt:
 Lenc_loop:
        rlwinm  $acc00,$s0,`32-24+3`,21,28
        rlwinm  $acc01,$s1,`32-24+3`,21,28
-       rlwinm  $acc02,$s2,`32-24+3`,21,28
-       rlwinm  $acc03,$s3,`32-24+3`,21,28
        lwz     $t0,0($key)
        lwz     $t1,4($key)
+       rlwinm  $acc02,$s2,`32-24+3`,21,28
+       rlwinm  $acc03,$s3,`32-24+3`,21,28
        lwz     $t2,8($key)
        lwz     $t3,12($key)
        rlwinm  $acc04,$s1,`32-16+3`,21,28
        rlwinm  $acc05,$s2,`32-16+3`,21,28
-       rlwinm  $acc06,$s3,`32-16+3`,21,28
-       rlwinm  $acc07,$s0,`32-16+3`,21,28
        lwzx    $acc00,$Tbl0,$acc00
        lwzx    $acc01,$Tbl0,$acc01
+       rlwinm  $acc06,$s3,`32-16+3`,21,28
+       rlwinm  $acc07,$s0,`32-16+3`,21,28
        lwzx    $acc02,$Tbl0,$acc02
        lwzx    $acc03,$Tbl0,$acc03
        rlwinm  $acc08,$s2,`32-8+3`,21,28
        rlwinm  $acc09,$s3,`32-8+3`,21,28
-       rlwinm  $acc10,$s0,`32-8+3`,21,28
-       rlwinm  $acc11,$s1,`32-8+3`,21,28
        lwzx    $acc04,$Tbl1,$acc04
        lwzx    $acc05,$Tbl1,$acc05
+       rlwinm  $acc10,$s0,`32-8+3`,21,28
+       rlwinm  $acc11,$s1,`32-8+3`,21,28
        lwzx    $acc06,$Tbl1,$acc06
        lwzx    $acc07,$Tbl1,$acc07
        rlwinm  $acc12,$s3,`0+3`,21,28
        rlwinm  $acc13,$s0,`0+3`,21,28
-       rlwinm  $acc14,$s1,`0+3`,21,28
-       rlwinm  $acc15,$s2,`0+3`,21,28
        lwzx    $acc08,$Tbl2,$acc08
        lwzx    $acc09,$Tbl2,$acc09
+       rlwinm  $acc14,$s1,`0+3`,21,28
+       rlwinm  $acc15,$s2,`0+3`,21,28
        lwzx    $acc10,$Tbl2,$acc10
        lwzx    $acc11,$Tbl2,$acc11
        xor     $t0,$t0,$acc00
        xor     $t1,$t1,$acc01
-       xor     $t2,$t2,$acc02
-       xor     $t3,$t3,$acc03
        lwzx    $acc12,$Tbl3,$acc12
        lwzx    $acc13,$Tbl3,$acc13
+       xor     $t2,$t2,$acc02
+       xor     $t3,$t3,$acc03
        lwzx    $acc14,$Tbl3,$acc14
        lwzx    $acc15,$Tbl3,$acc15
        xor     $t0,$t0,$acc04
@@ -462,42 +462,42 @@ Lenc_loop:
        lwz     $acc11,`2048+224`($Tbl0)
        rlwinm  $acc00,$s0,`32-24`,24,31
        rlwinm  $acc01,$s1,`32-24`,24,31
-       rlwinm  $acc02,$s2,`32-24`,24,31
-       rlwinm  $acc03,$s3,`32-24`,24,31
        lwz     $t0,0($key)
        lwz     $t1,4($key)
+       rlwinm  $acc02,$s2,`32-24`,24,31
+       rlwinm  $acc03,$s3,`32-24`,24,31
        lwz     $t2,8($key)
        lwz     $t3,12($key)
        rlwinm  $acc04,$s1,`32-16`,24,31
        rlwinm  $acc05,$s2,`32-16`,24,31
-       rlwinm  $acc06,$s3,`32-16`,24,31
-       rlwinm  $acc07,$s0,`32-16`,24,31
        lbzx    $acc00,$Tbl2,$acc00
        lbzx    $acc01,$Tbl2,$acc01
+       rlwinm  $acc06,$s3,`32-16`,24,31
+       rlwinm  $acc07,$s0,`32-16`,24,31
        lbzx    $acc02,$Tbl2,$acc02
        lbzx    $acc03,$Tbl2,$acc03
        rlwinm  $acc08,$s2,`32-8`,24,31
        rlwinm  $acc09,$s3,`32-8`,24,31
-       rlwinm  $acc10,$s0,`32-8`,24,31
-       rlwinm  $acc11,$s1,`32-8`,24,31
        lbzx    $acc04,$Tbl2,$acc04
        lbzx    $acc05,$Tbl2,$acc05
+       rlwinm  $acc10,$s0,`32-8`,24,31
+       rlwinm  $acc11,$s1,`32-8`,24,31
        lbzx    $acc06,$Tbl2,$acc06
        lbzx    $acc07,$Tbl2,$acc07
        rlwinm  $acc12,$s3,`0`,24,31
        rlwinm  $acc13,$s0,`0`,24,31
-       rlwinm  $acc14,$s1,`0`,24,31
-       rlwinm  $acc15,$s2,`0`,24,31
        lbzx    $acc08,$Tbl2,$acc08
        lbzx    $acc09,$Tbl2,$acc09
+       rlwinm  $acc14,$s1,`0`,24,31
+       rlwinm  $acc15,$s2,`0`,24,31
        lbzx    $acc10,$Tbl2,$acc10
        lbzx    $acc11,$Tbl2,$acc11
        rlwinm  $s0,$acc00,24,0,7
        rlwinm  $s1,$acc01,24,0,7
-       rlwinm  $s2,$acc02,24,0,7
-       rlwinm  $s3,$acc03,24,0,7
        lbzx    $acc12,$Tbl2,$acc12
        lbzx    $acc13,$Tbl2,$acc13
+       rlwinm  $s2,$acc02,24,0,7
+       rlwinm  $s3,$acc03,24,0,7
        lbzx    $acc14,$Tbl2,$acc14
        lbzx    $acc15,$Tbl2,$acc15
        rlwimi  $s0,$acc04,16,8,15
@@ -542,42 +542,38 @@ Lenc_compact_loop:
        rlwinm  $acc01,$s1,`32-24`,24,31
        rlwinm  $acc02,$s2,`32-24`,24,31
        rlwinm  $acc03,$s3,`32-24`,24,31
-       lwz     $t0,0($key)
-       lwz     $t1,4($key)
-       lwz     $t2,8($key)
-       lwz     $t3,12($key)
-       rlwinm  $acc04,$s1,`32-16`,24,31
-       rlwinm  $acc05,$s2,`32-16`,24,31
-       rlwinm  $acc06,$s3,`32-16`,24,31
-       rlwinm  $acc07,$s0,`32-16`,24,31
        lbzx    $acc00,$Tbl1,$acc00
        lbzx    $acc01,$Tbl1,$acc01
+       rlwinm  $acc04,$s1,`32-16`,24,31
+       rlwinm  $acc05,$s2,`32-16`,24,31
        lbzx    $acc02,$Tbl1,$acc02
        lbzx    $acc03,$Tbl1,$acc03
-       rlwinm  $acc08,$s2,`32-8`,24,31
-       rlwinm  $acc09,$s3,`32-8`,24,31
-       rlwinm  $acc10,$s0,`32-8`,24,31
-       rlwinm  $acc11,$s1,`32-8`,24,31
+       rlwinm  $acc06,$s3,`32-16`,24,31
+       rlwinm  $acc07,$s0,`32-16`,24,31
        lbzx    $acc04,$Tbl1,$acc04
        lbzx    $acc05,$Tbl1,$acc05
+       rlwinm  $acc08,$s2,`32-8`,24,31
+       rlwinm  $acc09,$s3,`32-8`,24,31
        lbzx    $acc06,$Tbl1,$acc06
        lbzx    $acc07,$Tbl1,$acc07
-       rlwinm  $acc12,$s3,`0`,24,31
-       rlwinm  $acc13,$s0,`0`,24,31
-       rlwinm  $acc14,$s1,`0`,24,31
-       rlwinm  $acc15,$s2,`0`,24,31
+       rlwinm  $acc10,$s0,`32-8`,24,31
+       rlwinm  $acc11,$s1,`32-8`,24,31
        lbzx    $acc08,$Tbl1,$acc08
        lbzx    $acc09,$Tbl1,$acc09
+       rlwinm  $acc12,$s3,`0`,24,31
+       rlwinm  $acc13,$s0,`0`,24,31
        lbzx    $acc10,$Tbl1,$acc10
        lbzx    $acc11,$Tbl1,$acc11
-       rlwinm  $s0,$acc00,24,0,7
-       rlwinm  $s1,$acc01,24,0,7
-       rlwinm  $s2,$acc02,24,0,7
-       rlwinm  $s3,$acc03,24,0,7
+       rlwinm  $acc14,$s1,`0`,24,31
+       rlwinm  $acc15,$s2,`0`,24,31
        lbzx    $acc12,$Tbl1,$acc12
        lbzx    $acc13,$Tbl1,$acc13
+       rlwinm  $s0,$acc00,24,0,7
+       rlwinm  $s1,$acc01,24,0,7
        lbzx    $acc14,$Tbl1,$acc14
        lbzx    $acc15,$Tbl1,$acc15
+       rlwinm  $s2,$acc02,24,0,7
+       rlwinm  $s3,$acc03,24,0,7
        rlwimi  $s0,$acc04,16,8,15
        rlwimi  $s1,$acc05,16,8,15
        rlwimi  $s2,$acc06,16,8,15
@@ -586,8 +582,12 @@ Lenc_compact_loop:
        rlwimi  $s1,$acc09,8,16,23
        rlwimi  $s2,$acc10,8,16,23
        rlwimi  $s3,$acc11,8,16,23
+       lwz     $t0,0($key)
+       lwz     $t1,4($key)
        or      $s0,$s0,$acc12
        or      $s1,$s1,$acc13
+       lwz     $t2,8($key)
+       lwz     $t3,12($key)
        or      $s2,$s2,$acc14
        or      $s3,$s3,$acc15
 
@@ -746,42 +746,42 @@ Lppc_AES_decrypt:
 Ldec_loop:
        rlwinm  $acc00,$s0,`32-24+3`,21,28
        rlwinm  $acc01,$s1,`32-24+3`,21,28
-       rlwinm  $acc02,$s2,`32-24+3`,21,28
-       rlwinm  $acc03,$s3,`32-24+3`,21,28
        lwz     $t0,0($key)
        lwz     $t1,4($key)
+       rlwinm  $acc02,$s2,`32-24+3`,21,28
+       rlwinm  $acc03,$s3,`32-24+3`,21,28
        lwz     $t2,8($key)
        lwz     $t3,12($key)
        rlwinm  $acc04,$s3,`32-16+3`,21,28
        rlwinm  $acc05,$s0,`32-16+3`,21,28
-       rlwinm  $acc06,$s1,`32-16+3`,21,28
-       rlwinm  $acc07,$s2,`32-16+3`,21,28
        lwzx    $acc00,$Tbl0,$acc00
        lwzx    $acc01,$Tbl0,$acc01
+       rlwinm  $acc06,$s1,`32-16+3`,21,28
+       rlwinm  $acc07,$s2,`32-16+3`,21,28
        lwzx    $acc02,$Tbl0,$acc02
        lwzx    $acc03,$Tbl0,$acc03
        rlwinm  $acc08,$s2,`32-8+3`,21,28
        rlwinm  $acc09,$s3,`32-8+3`,21,28
-       rlwinm  $acc10,$s0,`32-8+3`,21,28
-       rlwinm  $acc11,$s1,`32-8+3`,21,28
        lwzx    $acc04,$Tbl1,$acc04
        lwzx    $acc05,$Tbl1,$acc05
+       rlwinm  $acc10,$s0,`32-8+3`,21,28
+       rlwinm  $acc11,$s1,`32-8+3`,21,28
        lwzx    $acc06,$Tbl1,$acc06
        lwzx    $acc07,$Tbl1,$acc07
        rlwinm  $acc12,$s1,`0+3`,21,28
        rlwinm  $acc13,$s2,`0+3`,21,28
-       rlwinm  $acc14,$s3,`0+3`,21,28
-       rlwinm  $acc15,$s0,`0+3`,21,28
        lwzx    $acc08,$Tbl2,$acc08
        lwzx    $acc09,$Tbl2,$acc09
+       rlwinm  $acc14,$s3,`0+3`,21,28
+       rlwinm  $acc15,$s0,`0+3`,21,28
        lwzx    $acc10,$Tbl2,$acc10
        lwzx    $acc11,$Tbl2,$acc11
        xor     $t0,$t0,$acc00
        xor     $t1,$t1,$acc01
-       xor     $t2,$t2,$acc02
-       xor     $t3,$t3,$acc03
        lwzx    $acc12,$Tbl3,$acc12
        lwzx    $acc13,$Tbl3,$acc13
+       xor     $t2,$t2,$acc02
+       xor     $t3,$t3,$acc03
        lwzx    $acc14,$Tbl3,$acc14
        lwzx    $acc15,$Tbl3,$acc15
        xor     $t0,$t0,$acc04
@@ -811,42 +811,42 @@ Ldec_loop:
        lwz     $acc11,`2048+224`($Tbl0)
        rlwinm  $acc00,$s0,`32-24`,24,31
        rlwinm  $acc01,$s1,`32-24`,24,31
-       rlwinm  $acc02,$s2,`32-24`,24,31
-       rlwinm  $acc03,$s3,`32-24`,24,31
        lwz     $t0,0($key)
        lwz     $t1,4($key)
+       rlwinm  $acc02,$s2,`32-24`,24,31
+       rlwinm  $acc03,$s3,`32-24`,24,31
        lwz     $t2,8($key)
        lwz     $t3,12($key)
        rlwinm  $acc04,$s3,`32-16`,24,31
        rlwinm  $acc05,$s0,`32-16`,24,31
-       rlwinm  $acc06,$s1,`32-16`,24,31
-       rlwinm  $acc07,$s2,`32-16`,24,31
        lbzx    $acc00,$Tbl2,$acc00
        lbzx    $acc01,$Tbl2,$acc01
+       rlwinm  $acc06,$s1,`32-16`,24,31
+       rlwinm  $acc07,$s2,`32-16`,24,31
        lbzx    $acc02,$Tbl2,$acc02
        lbzx    $acc03,$Tbl2,$acc03
        rlwinm  $acc08,$s2,`32-8`,24,31
        rlwinm  $acc09,$s3,`32-8`,24,31
-       rlwinm  $acc10,$s0,`32-8`,24,31
-       rlwinm  $acc11,$s1,`32-8`,24,31
        lbzx    $acc04,$Tbl2,$acc04
        lbzx    $acc05,$Tbl2,$acc05
+       rlwinm  $acc10,$s0,`32-8`,24,31
+       rlwinm  $acc11,$s1,`32-8`,24,31
        lbzx    $acc06,$Tbl2,$acc06
        lbzx    $acc07,$Tbl2,$acc07
        rlwinm  $acc12,$s1,`0`,24,31
        rlwinm  $acc13,$s2,`0`,24,31
-       rlwinm  $acc14,$s3,`0`,24,31
-       rlwinm  $acc15,$s0,`0`,24,31
        lbzx    $acc08,$Tbl2,$acc08
        lbzx    $acc09,$Tbl2,$acc09
+       rlwinm  $acc14,$s3,`0`,24,31
+       rlwinm  $acc15,$s0,`0`,24,31
        lbzx    $acc10,$Tbl2,$acc10
        lbzx    $acc11,$Tbl2,$acc11
        rlwinm  $s0,$acc00,24,0,7
        rlwinm  $s1,$acc01,24,0,7
-       rlwinm  $s2,$acc02,24,0,7
-       rlwinm  $s3,$acc03,24,0,7
        lbzx    $acc12,$Tbl2,$acc12
        lbzx    $acc13,$Tbl2,$acc13
+       rlwinm  $s2,$acc02,24,0,7
+       rlwinm  $s3,$acc03,24,0,7
        lbzx    $acc14,$Tbl2,$acc14
        lbzx    $acc15,$Tbl2,$acc15
        rlwimi  $s0,$acc04,16,8,15
@@ -897,42 +897,38 @@ Ldec_compact_loop:
        rlwinm  $acc01,$s1,`32-24`,24,31
        rlwinm  $acc02,$s2,`32-24`,24,31
        rlwinm  $acc03,$s3,`32-24`,24,31
-       lwz     $t0,0($key)
-       lwz     $t1,4($key)
-       lwz     $t2,8($key)
-       lwz     $t3,12($key)
-       rlwinm  $acc04,$s3,`32-16`,24,31
-       rlwinm  $acc05,$s0,`32-16`,24,31
-       rlwinm  $acc06,$s1,`32-16`,24,31
-       rlwinm  $acc07,$s2,`32-16`,24,31
        lbzx    $acc00,$Tbl1,$acc00
        lbzx    $acc01,$Tbl1,$acc01
+       rlwinm  $acc04,$s3,`32-16`,24,31
+       rlwinm  $acc05,$s0,`32-16`,24,31
        lbzx    $acc02,$Tbl1,$acc02
        lbzx    $acc03,$Tbl1,$acc03
-       rlwinm  $acc08,$s2,`32-8`,24,31
-       rlwinm  $acc09,$s3,`32-8`,24,31
-       rlwinm  $acc10,$s0,`32-8`,24,31
-       rlwinm  $acc11,$s1,`32-8`,24,31
+       rlwinm  $acc06,$s1,`32-16`,24,31
+       rlwinm  $acc07,$s2,`32-16`,24,31
        lbzx    $acc04,$Tbl1,$acc04
        lbzx    $acc05,$Tbl1,$acc05
+       rlwinm  $acc08,$s2,`32-8`,24,31
+       rlwinm  $acc09,$s3,`32-8`,24,31
        lbzx    $acc06,$Tbl1,$acc06
        lbzx    $acc07,$Tbl1,$acc07
-       rlwinm  $acc12,$s1,`0`,24,31
-       rlwinm  $acc13,$s2,`0`,24,31
-       rlwinm  $acc14,$s3,`0`,24,31
-       rlwinm  $acc15,$s0,`0`,24,31
+       rlwinm  $acc10,$s0,`32-8`,24,31
+       rlwinm  $acc11,$s1,`32-8`,24,31
        lbzx    $acc08,$Tbl1,$acc08
        lbzx    $acc09,$Tbl1,$acc09
+       rlwinm  $acc12,$s1,`0`,24,31
+       rlwinm  $acc13,$s2,`0`,24,31
        lbzx    $acc10,$Tbl1,$acc10
        lbzx    $acc11,$Tbl1,$acc11
-       rlwinm  $s0,$acc00,24,0,7
-       rlwinm  $s1,$acc01,24,0,7
-       rlwinm  $s2,$acc02,24,0,7
-       rlwinm  $s3,$acc03,24,0,7
+       rlwinm  $acc14,$s3,`0`,24,31
+       rlwinm  $acc15,$s0,`0`,24,31
        lbzx    $acc12,$Tbl1,$acc12
        lbzx    $acc13,$Tbl1,$acc13
+       rlwinm  $s0,$acc00,24,0,7
+       rlwinm  $s1,$acc01,24,0,7
        lbzx    $acc14,$Tbl1,$acc14
        lbzx    $acc15,$Tbl1,$acc15
+       rlwinm  $s2,$acc02,24,0,7
+       rlwinm  $s3,$acc03,24,0,7
        rlwimi  $s0,$acc04,16,8,15
        rlwimi  $s1,$acc05,16,8,15
        rlwimi  $s2,$acc06,16,8,15
@@ -941,8 +937,12 @@ Ldec_compact_loop:
        rlwimi  $s1,$acc09,8,16,23
        rlwimi  $s2,$acc10,8,16,23
        rlwimi  $s3,$acc11,8,16,23
+       lwz     $t0,0($key)
+       lwz     $t1,4($key)
        or      $s0,$s0,$acc12
        or      $s1,$s1,$acc13
+       lwz     $t2,8($key)
+       lwz     $t3,12($key)
        or      $s2,$s2,$acc14
        or      $s3,$s3,$acc15
 
@@ -1004,12 +1004,12 @@ $code.=<<___ if ($SIZE_T==8);
        xor     $acc04,$acc04,$s0       # r4^r0
        xor     $acc06,$acc06,$s2
 
-       extrdi  $acc01,$acc00,0,32
-       extrdi  $acc03,$acc02,0,32
-       extrdi  $acc05,$acc04,0,32
-       extrdi  $acc07,$acc06,0,32
-       extrdi  $acc09,$acc08,0,32
-       extrdi  $acc11,$acc10,0,32
+       extrdi  $acc01,$acc00,32,0
+       extrdi  $acc03,$acc02,32,0
+       extrdi  $acc05,$acc04,32,0
+       extrdi  $acc07,$acc06,32,0
+       extrdi  $acc09,$acc08,32,0
+       extrdi  $acc11,$acc10,32,0
 ___
 $code.=<<___ if ($SIZE_T==4);
        and     $acc00,$s0,$mask80      # r1=r0&0x80808080