} else { die "nonsense $flavour"; }
$sp="r1";
-$FRAME=8*$SIZE_T;
+$FRAME=6*$SIZE_T+13*16; # 13*16 is for v20-v31 offload
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
.globl .vpaes_encrypt
.align 5
.vpaes_encrypt:
+ $STU $sp,-$FRAME($sp)
+ li r10,`15+6*$SIZE_T`
+ li r11,`31+6*$SIZE_T`
mflr r6
mfspr r7, 256 # save vrsave
+ stvx v20,r10,$sp
+ addi r10,r10,16
+ stvx v21,r11,$sp
+ addi r11,r11,16
+ stvx v22,r10,$sp
+ addi r10,r10,16
+ stvx v23,r11,$sp
+ addi r11,r11,16
+ stvx v24,r10,$sp
+ addi r10,r10,16
+ stvx v25,r11,$sp
+ addi r11,r11,16
+ stvx v26,r10,$sp
+ addi r10,r10,16
+ stvx v27,r11,$sp
+ addi r11,r11,16
+ stvx v28,r10,$sp
+ addi r10,r10,16
+ stvx v29,r11,$sp
+ addi r11,r11,16
+ stvx v30,r10,$sp
+ stvx v31,r11,$sp
+ lwz r7,`$FRAME-4`($sp) # save vrsave
li r0, -1
- $PUSH r6,$LRSAVE($sp)
+ $PUSH r6,`$FRAME+$LRSAVE`($sp)
mtspr 256, r0 # preserve all AltiVec registers
bl _vpaes_encrypt_preheat
vsel v1, $outhead, v1, $outmask
stvx v1, 0, $out
+ li r10,`15+6*$SIZE_T`
+ li r11,`31+6*$SIZE_T`
mtlr r6
mtspr 256, r7 # restore vrsave
+ lvx v20,r10,$sp
+ addi r10,r10,16
+ lvx v21,r11,$sp
+ addi r11,r11,16
+ lvx v22,r10,$sp
+ addi r10,r10,16
+ lvx v23,r11,$sp
+ addi r11,r11,16
+ lvx v24,r10,$sp
+ addi r10,r10,16
+ lvx v25,r11,$sp
+ addi r11,r11,16
+ lvx v26,r10,$sp
+ addi r10,r10,16
+ lvx v27,r11,$sp
+ addi r11,r11,16
+ lvx v28,r10,$sp
+ addi r10,r10,16
+ lvx v29,r11,$sp
+ addi r11,r11,16
+ lvx v30,r10,$sp
+ lvx v31,r11,$sp
+ addi $sp,$sp,$FRAME
blr
.long 0
- .byte 0,12,0x14,1,0,0,3,0
+ .byte 0,12,0x04,1,0x80,0,3,0
.long 0
.size .vpaes_encrypt,.-.vpaes_encrypt
.globl .vpaes_decrypt
.align 5
.vpaes_decrypt:
+ $STU $sp,-$FRAME($sp)
+ li r10,`15+6*$SIZE_T`
+ li r11,`31+6*$SIZE_T`
mflr r6
mfspr r7, 256 # save vrsave
+ stvx v20,r10,$sp
+ addi r10,r10,16
+ stvx v21,r11,$sp
+ addi r11,r11,16
+ stvx v22,r10,$sp
+ addi r10,r10,16
+ stvx v23,r11,$sp
+ addi r11,r11,16
+ stvx v24,r10,$sp
+ addi r10,r10,16
+ stvx v25,r11,$sp
+ addi r11,r11,16
+ stvx v26,r10,$sp
+ addi r10,r10,16
+ stvx v27,r11,$sp
+ addi r11,r11,16
+ stvx v28,r10,$sp
+ addi r10,r10,16
+ stvx v29,r11,$sp
+ addi r11,r11,16
+ stvx v30,r10,$sp
+ stvx v31,r11,$sp
+ lwz r7,`$FRAME-4`($sp) # save vrsave
li r0, -1
- $PUSH r6,$LRSAVE($sp)
+ $PUSH r6,`$FRAME+$LRSAVE`($sp)
mtspr 256, r0 # preserve all AltiVec registers
bl _vpaes_decrypt_preheat
vsel v1, $outhead, v1, $outmask
stvx v1, 0, $out
+ li r10,`15+6*$SIZE_T`
+ li r11,`31+6*$SIZE_T`
mtlr r6
mtspr 256, r7 # restore vrsave
+ lvx v20,r10,$sp
+ addi r10,r10,16
+ lvx v21,r11,$sp
+ addi r11,r11,16
+ lvx v22,r10,$sp
+ addi r10,r10,16
+ lvx v23,r11,$sp
+ addi r11,r11,16
+ lvx v24,r10,$sp
+ addi r10,r10,16
+ lvx v25,r11,$sp
+ addi r11,r11,16
+ lvx v26,r10,$sp
+ addi r10,r10,16
+ lvx v27,r11,$sp
+ addi r11,r11,16
+ lvx v28,r10,$sp
+ addi r10,r10,16
+ lvx v29,r11,$sp
+ addi r11,r11,16
+ lvx v30,r10,$sp
+ lvx v31,r11,$sp
+ addi $sp,$sp,$FRAME
blr
.long 0
- .byte 0,12,0x14,1,0,0,3,0
+ .byte 0,12,0x04,1,0x80,0,3,0
.long 0
.size .vpaes_decrypt,.-.vpaes_decrypt
.globl .vpaes_cbc_encrypt
.align 5
.vpaes_cbc_encrypt:
- $STU $sp,-$FRAME($sp)
+ $STU $sp,-`($FRAME+2*$SIZE_T)`($sp)
mflr r0
- $PUSH r30,$FRAME-$SIZE_T*2($sp)
+ li r10,`15+6*$SIZE_T`
+ li r11,`31+6*$SIZE_T`
+ mfspr r12, 256
+ stvx v20,r10,$sp
+ addi r10,r10,16
+ stvx v21,r11,$sp
+ addi r11,r11,16
+ stvx v22,r10,$sp
+ addi r10,r10,16
+ stvx v23,r11,$sp
+ addi r11,r11,16
+ stvx v24,r10,$sp
+ addi r10,r10,16
+ stvx v25,r11,$sp
+ addi r11,r11,16
+ stvx v26,r10,$sp
+ addi r10,r10,16
+ stvx v27,r11,$sp
+ addi r11,r11,16
+ stvx v28,r10,$sp
+ addi r10,r10,16
+ stvx v29,r11,$sp
+ addi r11,r11,16
+ stvx v30,r10,$sp
+ stvx v31,r11,$sp
+ lwz r12,`$FRAME-4`($sp) # save vrsave
+ $PUSH r30,`$FRAME+$SIZE_T*0`($sp)
+ $PUSH r31,`$FRAME+$SIZE_T*1`($sp)
li r9, 16
- $PUSH r31,$FRAME-$SIZE_T*1($sp)
- $PUSH r0, $FRAME+$LRSAVE($sp)
+ $PUSH r0, `$FRAME+$SIZE_T*2+$LRSAVE`($sp)
sub. r30, r5, r9 # copy length-16
mr r5, r6 # copy pointer to key
blt Lcbc_abort
cmpwi r8, 0 # test direction
li r6, -1
- mfspr r7, 256
+ mr r7, r12 # copy vrsave
mtspr 256, r6 # preserve all AltiVec registers
lvx v24, 0, r31 # load [potentially unaligned] iv
stvx v1, r6, r31
mtspr 256, r7 # restore vrsave
+ li r10,`15+6*$SIZE_T`
+ li r11,`31+6*$SIZE_T`
+ lvx v20,r10,$sp
+ addi r10,r10,16
+ lvx v21,r11,$sp
+ addi r11,r11,16
+ lvx v22,r10,$sp
+ addi r10,r10,16
+ lvx v23,r11,$sp
+ addi r11,r11,16
+ lvx v24,r10,$sp
+ addi r10,r10,16
+ lvx v25,r11,$sp
+ addi r11,r11,16
+ lvx v26,r10,$sp
+ addi r10,r10,16
+ lvx v27,r11,$sp
+ addi r11,r11,16
+ lvx v28,r10,$sp
+ addi r10,r10,16
+ lvx v29,r11,$sp
+ addi r11,r11,16
+ lvx v30,r10,$sp
+ lvx v31,r11,$sp
Lcbc_abort:
- $POP r0, $FRAME+$LRSAVE($sp)
- $POP r30,$FRAME-$SIZE_T*2($sp)
- $POP r31,$FRAME-$SIZE_T*1($sp)
+ $POP r0, `$FRAME+$SIZE_T*2+$LRSAVE`($sp)
+ $POP r30,`$FRAME+$SIZE_T*0`($sp)
+ $POP r31,`$FRAME+$SIZE_T*1`($sp)
mtlr r0
- addi $sp,$sp,$FRAME
+ addi $sp,$sp,`$FRAME+$SIZE_T*2`
blr
.long 0
.byte 0,12,0x04,1,0x80,2,6,0
.globl .vpaes_set_encrypt_key
.align 5
.vpaes_set_encrypt_key:
+ $STU $sp,-$FRAME($sp)
+ li r10,`15+6*$SIZE_T`
+ li r11,`31+6*$SIZE_T`
mflr r0
mfspr r6, 256 # save vrsave
+ stvx v20,r10,$sp
+ addi r10,r10,16
+ stvx v21,r11,$sp
+ addi r11,r11,16
+ stvx v22,r10,$sp
+ addi r10,r10,16
+ stvx v23,r11,$sp
+ addi r11,r11,16
+ stvx v24,r10,$sp
+ addi r10,r10,16
+ stvx v25,r11,$sp
+ addi r11,r11,16
+ stvx v26,r10,$sp
+ addi r10,r10,16
+ stvx v27,r11,$sp
+ addi r11,r11,16
+ stvx v28,r10,$sp
+ addi r10,r10,16
+ stvx v29,r11,$sp
+ addi r11,r11,16
+ stvx v30,r10,$sp
+ stvx v31,r11,$sp
+ lwz r6,`$FRAME-4`($sp) # save vrsave
li r7, -1
- $PUSH r0, $LRSAVE($sp)
+ $PUSH r0, `$FRAME+$LRSAVE`($sp)
mtspr 256, r7 # preserve all AltiVec registers
srwi r9, $bits, 5 # shr \$5,%eax
li r8, 0x30 # mov \$0x30,%r8d
bl _vpaes_schedule_core
- $POP r0, $LRSAVE($sp)
+ $POP r0, `$FRAME+$LRSAVE`($sp)
+ li r10,`15+6*$SIZE_T`
+ li r11,`31+6*$SIZE_T`
mtspr 256, r6 # restore vrsave
mtlr r0
xor r3, r3, r3
+ lvx v20,r10,$sp
+ addi r10,r10,16
+ lvx v21,r11,$sp
+ addi r11,r11,16
+ lvx v22,r10,$sp
+ addi r10,r10,16
+ lvx v23,r11,$sp
+ addi r11,r11,16
+ lvx v24,r10,$sp
+ addi r10,r10,16
+ lvx v25,r11,$sp
+ addi r11,r11,16
+ lvx v26,r10,$sp
+ addi r10,r10,16
+ lvx v27,r11,$sp
+ addi r11,r11,16
+ lvx v28,r10,$sp
+ addi r10,r10,16
+ lvx v29,r11,$sp
+ addi r11,r11,16
+ lvx v30,r10,$sp
+ lvx v31,r11,$sp
+ addi $sp,$sp,$FRAME
blr
.long 0
- .byte 0,12,0x14,1,0,3,0
+ .byte 0,12,0x04,1,0x80,3,0
.long 0
.size .vpaes_set_encrypt_key,.-.vpaes_set_encrypt_key
.globl .vpaes_set_decrypt_key
.align 4
.vpaes_set_decrypt_key:
+ $STU $sp,-$FRAME($sp)
+ li r10,`15+6*$SIZE_T`
+ li r11,`31+6*$SIZE_T`
mflr r0
mfspr r6, 256 # save vrsave
+ stvx v20,r10,$sp
+ addi r10,r10,16
+ stvx v21,r11,$sp
+ addi r11,r11,16
+ stvx v22,r10,$sp
+ addi r10,r10,16
+ stvx v23,r11,$sp
+ addi r11,r11,16
+ stvx v24,r10,$sp
+ addi r10,r10,16
+ stvx v25,r11,$sp
+ addi r11,r11,16
+ stvx v26,r10,$sp
+ addi r10,r10,16
+ stvx v27,r11,$sp
+ addi r11,r11,16
+ stvx v28,r10,$sp
+ addi r10,r10,16
+ stvx v29,r11,$sp
+ addi r11,r11,16
+ stvx v30,r10,$sp
+ stvx v31,r11,$sp
+ lwz r6,`$FRAME-4`($sp) # save vrsave
li r7, -1
- $PUSH r0, $LRSAVE($sp)
+ $PUSH r0, `$FRAME+$LRSAVE`($sp)
mtspr 256, r7 # preserve all AltiVec registers
srwi r9, $bits, 5 # shr \$5,%eax
xori r8, r8, 32 # xor \$32,%r8d # nbits==192?0:32
bl _vpaes_schedule_core
- $POP r0, $LRSAVE($sp)
+ $POP r0, `$FRAME+$LRSAVE`($sp)
+ li r10,`15+6*$SIZE_T`
+ li r11,`31+6*$SIZE_T`
mtspr 256, r6 # restore vrsave
mtlr r0
xor r3, r3, r3
+ lvx v20,r10,$sp
+ addi r10,r10,16
+ lvx v21,r11,$sp
+ addi r11,r11,16
+ lvx v22,r10,$sp
+ addi r10,r10,16
+ lvx v23,r11,$sp
+ addi r11,r11,16
+ lvx v24,r10,$sp
+ addi r10,r10,16
+ lvx v25,r11,$sp
+ addi r11,r11,16
+ lvx v26,r10,$sp
+ addi r10,r10,16
+ lvx v27,r11,$sp
+ addi r11,r11,16
+ lvx v28,r10,$sp
+ addi r10,r10,16
+ lvx v29,r11,$sp
+ addi r11,r11,16
+ lvx v30,r10,$sp
+ lvx v31,r11,$sp
+ addi $sp,$sp,$FRAME
blr
.long 0
- .byte 0,12,0x14,1,0,3,0
+ .byte 0,12,0x04,1,0x80,3,0
.long 0
.size .vpaes_set_decrypt_key,.-.vpaes_set_decrypt_key
___
}
+$code =~ s/\`([^\`]*)\`/eval($1)/gem;
+
print $code;
close STDOUT;