3 # ====================================================================
4 # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
5 # project. The module is, however, dual licensed under OpenSSL and
6 # CRYPTOGAMS licenses depending on where you obtain it. For further
7 # details see http://www.openssl.org/~appro/cryptogams/.
8 # ====================================================================
10 # This module implements support for AES instructions as per PowerISA
11 # specification version 2.07, first implemented by POWER8 processor.
12 # The module is endian-agnostic in sense that it supports both big-
13 # and little-endian cases. Data alignment in parallelizable modes is
14 # handled with VSX loads and stores, which implies MSR.VSX flag being
15 # set. It should also be noted that ISA specification doesn't prohibit
16 # alignment exceptions for these instructions on page boundaries.
17 # Initially alignment was handled in pure AltiVec/VMX way [when data
18 # is aligned programmatically, which in turn guarantees exception-
19 # free execution], but it turned to hamper performance when vcipher
20 # instructions are interleaved. It's reckoned that eventual
21 # misalignment penalties at page boundaries are in average lower
22 # than additional overhead in pure AltiVec approach.
26 if ($flavour =~ /64/) {
34 } elsif ($flavour =~ /32/) {
42 } else { die "nonsense $flavour"; }
44 $LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
46 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
47 ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
48 ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
49 die "can't locate ppc-xlate.pl";
51 open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
59 #########################################################################
60 {{{ # Key setup procedures #
61 my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8));
62 my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6));
63 my ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11));
72 .long 0x01000000, 0x01000000, 0x01000000, 0x01000000 ?rev
73 .long 0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000 ?rev
74 .long 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c ?rev
79 mflr $ptr #vvvvv "distance between . and rcon
84 .byte 0,12,0x14,0,0,0,0,0
85 .asciz "AES for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
87 .globl .${prefix}_set_encrypt_key
89 .${prefix}_set_encrypt_key:
92 $PUSH r11,$LRSAVE($sp)
96 beq- Lenc_key_abort # if ($inp==0) return -1;
98 beq- Lenc_key_abort # if ($out==0) return -1;
116 addi $inp,$inp,15 # 15 is not typo
117 lvsr $key,0,r9 # borrow $key
121 le?vspltisb $mask,0x0f # borrow $mask
123 le?vxor $key,$key,$mask # adjust for byte swap
126 vperm $in0,$in0,$in1,$key # align [and byte swap in LE]
128 vxor $zero,$zero,$zero
131 ?lvsr $outperm,0,$out
134 ?vperm $outmask,$zero,$outmask,$outperm
144 vperm $key,$in0,$in0,$mask # rotate-n-splat
145 vsldoi $tmp,$zero,$in0,12 # >>32
146 vperm $outtail,$in0,$in0,$outperm # rotate
147 vsel $stage,$outhead,$outtail,$outmask
148 vmr $outhead,$outtail
149 vcipherlast $key,$key,$rcon
154 vsldoi $tmp,$zero,$tmp,12 # >>32
156 vsldoi $tmp,$zero,$tmp,12 # >>32
158 vadduwm $rcon,$rcon,$rcon
162 lvx $rcon,0,$ptr # last two round keys
164 vperm $key,$in0,$in0,$mask # rotate-n-splat
165 vsldoi $tmp,$zero,$in0,12 # >>32
166 vperm $outtail,$in0,$in0,$outperm # rotate
167 vsel $stage,$outhead,$outtail,$outmask
168 vmr $outhead,$outtail
169 vcipherlast $key,$key,$rcon
174 vsldoi $tmp,$zero,$tmp,12 # >>32
176 vsldoi $tmp,$zero,$tmp,12 # >>32
178 vadduwm $rcon,$rcon,$rcon
181 vperm $key,$in0,$in0,$mask # rotate-n-splat
182 vsldoi $tmp,$zero,$in0,12 # >>32
183 vperm $outtail,$in0,$in0,$outperm # rotate
184 vsel $stage,$outhead,$outtail,$outmask
185 vmr $outhead,$outtail
186 vcipherlast $key,$key,$rcon
191 vsldoi $tmp,$zero,$tmp,12 # >>32
193 vsldoi $tmp,$zero,$tmp,12 # >>32
196 vperm $outtail,$in0,$in0,$outperm # rotate
197 vsel $stage,$outhead,$outtail,$outmask
198 vmr $outhead,$outtail
201 addi $inp,$out,15 # 15 is not typo
211 vperm $outtail,$in0,$in0,$outperm # rotate
212 vsel $stage,$outhead,$outtail,$outmask
213 vmr $outhead,$outtail
216 vperm $in1,$in1,$tmp,$key # align [and byte swap in LE]
217 vspltisb $key,8 # borrow $key
219 vsububm $mask,$mask,$key # adjust the mask
222 vperm $key,$in1,$in1,$mask # roate-n-splat
223 vsldoi $tmp,$zero,$in0,12 # >>32
224 vcipherlast $key,$key,$rcon
227 vsldoi $tmp,$zero,$tmp,12 # >>32
229 vsldoi $tmp,$zero,$tmp,12 # >>32
232 vsldoi $stage,$zero,$in1,8
235 vsldoi $in1,$zero,$in1,12 # >>32
236 vadduwm $rcon,$rcon,$rcon
240 vsldoi $stage,$stage,$in0,8
242 vperm $key,$in1,$in1,$mask # rotate-n-splat
243 vsldoi $tmp,$zero,$in0,12 # >>32
244 vperm $outtail,$stage,$stage,$outperm # rotate
245 vsel $stage,$outhead,$outtail,$outmask
246 vmr $outhead,$outtail
247 vcipherlast $key,$key,$rcon
251 vsldoi $stage,$in0,$in1,8
253 vsldoi $tmp,$zero,$tmp,12 # >>32
254 vperm $outtail,$stage,$stage,$outperm # rotate
255 vsel $stage,$outhead,$outtail,$outmask
256 vmr $outhead,$outtail
258 vsldoi $tmp,$zero,$tmp,12 # >>32
265 vsldoi $in1,$zero,$in1,12 # >>32
266 vadduwm $rcon,$rcon,$rcon
270 vperm $outtail,$in0,$in0,$outperm # rotate
271 vsel $stage,$outhead,$outtail,$outmask
272 vmr $outhead,$outtail
274 addi $inp,$out,15 # 15 is not typo
287 vperm $outtail,$in0,$in0,$outperm # rotate
288 vsel $stage,$outhead,$outtail,$outmask
289 vmr $outhead,$outtail
292 vperm $in1,$in1,$tmp,$key # align [and byte swap in LE]
296 vperm $key,$in1,$in1,$mask # rotate-n-splat
297 vsldoi $tmp,$zero,$in0,12 # >>32
298 vperm $outtail,$in1,$in1,$outperm # rotate
299 vsel $stage,$outhead,$outtail,$outmask
300 vmr $outhead,$outtail
301 vcipherlast $key,$key,$rcon
306 vsldoi $tmp,$zero,$tmp,12 # >>32
308 vsldoi $tmp,$zero,$tmp,12 # >>32
310 vadduwm $rcon,$rcon,$rcon
312 vperm $outtail,$in0,$in0,$outperm # rotate
313 vsel $stage,$outhead,$outtail,$outmask
314 vmr $outhead,$outtail
316 addi $inp,$out,15 # 15 is not typo
320 vspltw $key,$in0,3 # just splat
321 vsldoi $tmp,$zero,$in1,12 # >>32
325 vsldoi $tmp,$zero,$tmp,12 # >>32
327 vsldoi $tmp,$zero,$tmp,12 # >>32
335 lvx $in1,0,$inp # redundant in aligned case
336 vsel $in1,$outhead,$in1,$outmask
346 .byte 0,12,0x14,1,0,0,3,0
348 .size .${prefix}_set_encrypt_key,.-.${prefix}_set_encrypt_key
350 .globl .${prefix}_set_decrypt_key
352 .${prefix}_set_decrypt_key:
353 $STU $sp,-$FRAME($sp)
355 $PUSH r10,$FRAME+$LRSAVE($sp)
363 subi $inp,$out,240 # first round key
364 srwi $rounds,$rounds,1
365 add $out,$inp,$cnt # last round key
389 xor r3,r3,r3 # return value
394 .byte 0,12,4,1,0x80,0,3,0
396 .size .${prefix}_set_decrypt_key,.-.${prefix}_set_decrypt_key
399 #########################################################################
400 {{{ # Single block en- and decrypt procedures #
403 my $n = $dir eq "de" ? "n" : "";
404 my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7));
407 .globl .${prefix}_${dir}crypt
409 .${prefix}_${dir}crypt:
410 lwz $rounds,240($key)
413 li $idx,15 # 15 is not typo
419 lvsl v2,0,$inp # inpperm
421 ?lvsl v3,0,r11 # outperm
424 vperm v0,v0,v1,v2 # align [and byte swap in LE]
426 ?lvsl v5,0,$key # keyperm
427 srwi $rounds,$rounds,1
430 subi $rounds,$rounds,1
431 ?vperm v1,v1,v2,v5 # align round key
453 v${n}cipherlast v0,v0,v1
457 li $idx,15 # 15 is not typo
458 ?vperm v2,v1,v2,v3 # outmask
460 lvx v1,0,$out # outhead
461 vperm v0,v0,v0,v3 # rotate [and byte swap in LE]
471 .byte 0,12,0x14,0,0,0,3,0
473 .size .${prefix}_${dir}crypt,.-.${prefix}_${dir}crypt
479 #########################################################################
480 {{{ # CBC en- and decrypt procedures #
481 my ($inp,$out,$len,$key,$ivp,$enc,$rounds,$idx)=map("r$_",(3..10));
482 my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3));
483 my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm)=
486 .globl .${prefix}_cbc_encrypt
488 .${prefix}_cbc_encrypt:
492 cmpwi $enc,0 # test direction
498 vxor $rndkey0,$rndkey0,$rndkey0
499 le?vspltisb $tmp,0x0f
501 lvx $ivec,0,$ivp # load [unaligned] iv
503 lvx $inptail,$idx,$ivp
504 le?vxor $inpperm,$inpperm,$tmp
505 vperm $ivec,$ivec,$inptail,$inpperm
508 ?lvsl $keyperm,0,$key # prepare for unaligned key
509 lwz $rounds,240($key)
511 lvsr $inpperm,0,r11 # prepare for unaligned load
513 addi $inp,$inp,15 # 15 is not typo
514 le?vxor $inpperm,$inpperm,$tmp
516 ?lvsr $outperm,0,$out # prepare for unaligned store
519 ?vperm $outmask,$rndkey0,$outmask,$outperm
520 le?vxor $outperm,$outperm,$tmp
522 srwi $rounds,$rounds,1
524 subi $rounds,$rounds,1
532 subi $len,$len,16 # len-=16
535 vperm $inout,$inout,$inptail,$inpperm
536 lvx $rndkey1,$idx,$key
538 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
539 vxor $inout,$inout,$rndkey0
540 lvx $rndkey0,$idx,$key
542 vxor $inout,$inout,$ivec
545 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
546 vcipher $inout,$inout,$rndkey1
547 lvx $rndkey1,$idx,$key
549 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
550 vcipher $inout,$inout,$rndkey0
551 lvx $rndkey0,$idx,$key
555 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
556 vcipher $inout,$inout,$rndkey1
557 lvx $rndkey1,$idx,$key
559 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
560 vcipherlast $ivec,$inout,$rndkey0
563 vperm $tmp,$ivec,$ivec,$outperm
564 vsel $inout,$outhead,$tmp,$outmask
575 bge _aesp8_cbc_decrypt8x
580 subi $len,$len,16 # len-=16
583 vperm $tmp,$tmp,$inptail,$inpperm
584 lvx $rndkey1,$idx,$key
586 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
587 vxor $inout,$tmp,$rndkey0
588 lvx $rndkey0,$idx,$key
592 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
593 vncipher $inout,$inout,$rndkey1
594 lvx $rndkey1,$idx,$key
596 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
597 vncipher $inout,$inout,$rndkey0
598 lvx $rndkey0,$idx,$key
602 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
603 vncipher $inout,$inout,$rndkey1
604 lvx $rndkey1,$idx,$key
606 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
607 vncipherlast $inout,$inout,$rndkey0
610 vxor $inout,$inout,$ivec
612 vperm $tmp,$inout,$inout,$outperm
613 vsel $inout,$outhead,$tmp,$outmask
621 lvx $inout,0,$out # redundant in aligned case
622 vsel $inout,$outhead,$inout,$outmask
625 neg $enc,$ivp # write [unaligned] iv
626 li $idx,15 # 15 is not typo
627 vxor $rndkey0,$rndkey0,$rndkey0
629 le?vspltisb $tmp,0x0f
630 ?lvsl $outperm,0,$enc
631 ?vperm $outmask,$rndkey0,$outmask,$outperm
632 le?vxor $outperm,$outperm,$tmp
634 vperm $ivec,$ivec,$ivec,$outperm
635 vsel $inout,$outhead,$ivec,$outmask
636 lvx $inptail,$idx,$ivp
638 vsel $inout,$ivec,$inptail,$outmask
639 stvx $inout,$idx,$ivp
644 .byte 0,12,0x14,0,0,0,6,0
647 #########################################################################
648 {{ # Optimized CBC decrypt procedure #
650 my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
651 my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10..13));
652 my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(14..21));
653 my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
654 # v26-v31 last 6 round keys
655 my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
659 _aesp8_cbc_decrypt8x:
660 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
661 li r10,`$FRAME+8*16+15`
662 li r11,`$FRAME+8*16+31`
663 stvx v20,r10,$sp # ABI says so
686 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
688 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
690 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
692 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
694 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
696 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
698 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
702 subi $rounds,$rounds,3 # -4 in total
703 subi $len,$len,128 # bias
705 lvx $rndkey0,$x00,$key # load key schedule
709 ?vperm $rndkey0,$rndkey0,v30,$keyperm
710 addi $key_,$sp,$FRAME+15
714 ?vperm v24,v30,v31,$keyperm
717 stvx v24,$x00,$key_ # off-load round[1]
718 ?vperm v25,v31,v30,$keyperm
720 stvx v25,$x10,$key_ # off-load round[2]
721 addi $key_,$key_,0x20
722 bdnz Load_cbc_dec_key
725 ?vperm v24,v30,v31,$keyperm
727 stvx v24,$x00,$key_ # off-load round[3]
728 ?vperm v25,v31,v26,$keyperm
730 stvx v25,$x10,$key_ # off-load round[4]
731 addi $key_,$sp,$FRAME+15 # rewind $key_
732 ?vperm v26,v26,v27,$keyperm
734 ?vperm v27,v27,v28,$keyperm
736 ?vperm v28,v28,v29,$keyperm
738 ?vperm v29,v29,v30,$keyperm
739 lvx $out0,$x70,$key # borrow $out0
740 ?vperm v30,v30,v31,$keyperm
741 lvx v24,$x00,$key_ # pre-load round[1]
742 ?vperm v31,v31,$out0,$keyperm
743 lvx v25,$x10,$key_ # pre-load round[2]
745 #lvx $inptail,0,$inp # "caller" already did this
746 #addi $inp,$inp,15 # 15 is not typo
747 subi $inp,$inp,15 # undo "caller"
750 lvx_u $in0,$x00,$inp # load first 8 "words"
751 le?lvsl $inpperm,0,$idx
752 le?vspltisb $tmp,0x0f
754 le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u
756 le?vperm $in0,$in0,$in0,$inpperm
758 le?vperm $in1,$in1,$in1,$inpperm
760 le?vperm $in2,$in2,$in2,$inpperm
761 vxor $out0,$in0,$rndkey0
763 le?vperm $in3,$in3,$in3,$inpperm
764 vxor $out1,$in1,$rndkey0
766 le?vperm $in4,$in4,$in4,$inpperm
767 vxor $out2,$in2,$rndkey0
770 le?vperm $in5,$in5,$in5,$inpperm
771 vxor $out3,$in3,$rndkey0
772 le?vperm $in6,$in6,$in6,$inpperm
773 vxor $out4,$in4,$rndkey0
774 le?vperm $in7,$in7,$in7,$inpperm
775 vxor $out5,$in5,$rndkey0
776 vxor $out6,$in6,$rndkey0
777 vxor $out7,$in7,$rndkey0
783 vncipher $out0,$out0,v24
784 vncipher $out1,$out1,v24
785 vncipher $out2,$out2,v24
786 vncipher $out3,$out3,v24
787 vncipher $out4,$out4,v24
788 vncipher $out5,$out5,v24
789 vncipher $out6,$out6,v24
790 vncipher $out7,$out7,v24
791 lvx v24,$x20,$key_ # round[3]
792 addi $key_,$key_,0x20
794 vncipher $out0,$out0,v25
795 vncipher $out1,$out1,v25
796 vncipher $out2,$out2,v25
797 vncipher $out3,$out3,v25
798 vncipher $out4,$out4,v25
799 vncipher $out5,$out5,v25
800 vncipher $out6,$out6,v25
801 vncipher $out7,$out7,v25
802 lvx v25,$x10,$key_ # round[4]
805 subic $len,$len,128 # $len-=128
806 vncipher $out0,$out0,v24
807 vncipher $out1,$out1,v24
808 vncipher $out2,$out2,v24
809 vncipher $out3,$out3,v24
810 vncipher $out4,$out4,v24
811 vncipher $out5,$out5,v24
812 vncipher $out6,$out6,v24
813 vncipher $out7,$out7,v24
815 subfe. r0,r0,r0 # borrow?-1:0
816 vncipher $out0,$out0,v25
817 vncipher $out1,$out1,v25
818 vncipher $out2,$out2,v25
819 vncipher $out3,$out3,v25
820 vncipher $out4,$out4,v25
821 vncipher $out5,$out5,v25
822 vncipher $out6,$out6,v25
823 vncipher $out7,$out7,v25
826 vncipher $out0,$out0,v26
827 vncipher $out1,$out1,v26
828 vncipher $out2,$out2,v26
829 vncipher $out3,$out3,v26
830 vncipher $out4,$out4,v26
831 vncipher $out5,$out5,v26
832 vncipher $out6,$out6,v26
833 vncipher $out7,$out7,v26
835 add $inp,$inp,r0 # $inp is adjusted in such
836 # way that at exit from the
837 # loop inX-in7 are loaded
839 vncipher $out0,$out0,v27
840 vncipher $out1,$out1,v27
841 vncipher $out2,$out2,v27
842 vncipher $out3,$out3,v27
843 vncipher $out4,$out4,v27
844 vncipher $out5,$out5,v27
845 vncipher $out6,$out6,v27
846 vncipher $out7,$out7,v27
848 addi $key_,$sp,$FRAME+15 # rewind $key_
849 vncipher $out0,$out0,v28
850 vncipher $out1,$out1,v28
851 vncipher $out2,$out2,v28
852 vncipher $out3,$out3,v28
853 vncipher $out4,$out4,v28
854 vncipher $out5,$out5,v28
855 vncipher $out6,$out6,v28
856 vncipher $out7,$out7,v28
857 lvx v24,$x00,$key_ # re-pre-load round[1]
859 vncipher $out0,$out0,v29
860 vncipher $out1,$out1,v29
861 vncipher $out2,$out2,v29
862 vncipher $out3,$out3,v29
863 vncipher $out4,$out4,v29
864 vncipher $out5,$out5,v29
865 vncipher $out6,$out6,v29
866 vncipher $out7,$out7,v29
867 lvx v25,$x10,$key_ # re-pre-load round[2]
869 vncipher $out0,$out0,v30
870 vxor $ivec,$ivec,v31 # xor with last round key
871 vncipher $out1,$out1,v30
873 vncipher $out2,$out2,v30
875 vncipher $out3,$out3,v30
877 vncipher $out4,$out4,v30
879 vncipher $out5,$out5,v30
881 vncipher $out6,$out6,v30
883 vncipher $out7,$out7,v30
886 vncipherlast $out0,$out0,$ivec
887 vncipherlast $out1,$out1,$in0
888 lvx_u $in0,$x00,$inp # load next input block
889 vncipherlast $out2,$out2,$in1
891 vncipherlast $out3,$out3,$in2
892 le?vperm $in0,$in0,$in0,$inpperm
894 vncipherlast $out4,$out4,$in3
895 le?vperm $in1,$in1,$in1,$inpperm
897 vncipherlast $out5,$out5,$in4
898 le?vperm $in2,$in2,$in2,$inpperm
900 vncipherlast $out6,$out6,$in5
901 le?vperm $in3,$in3,$in3,$inpperm
903 vncipherlast $out7,$out7,$in6
904 le?vperm $in4,$in4,$in4,$inpperm
907 le?vperm $in5,$in5,$in5,$inpperm
911 le?vperm $out0,$out0,$out0,$inpperm
912 le?vperm $out1,$out1,$out1,$inpperm
913 stvx_u $out0,$x00,$out
914 le?vperm $in6,$in6,$in6,$inpperm
915 vxor $out0,$in0,$rndkey0
916 le?vperm $out2,$out2,$out2,$inpperm
917 stvx_u $out1,$x10,$out
918 le?vperm $in7,$in7,$in7,$inpperm
919 vxor $out1,$in1,$rndkey0
920 le?vperm $out3,$out3,$out3,$inpperm
921 stvx_u $out2,$x20,$out
922 vxor $out2,$in2,$rndkey0
923 le?vperm $out4,$out4,$out4,$inpperm
924 stvx_u $out3,$x30,$out
925 vxor $out3,$in3,$rndkey0
926 le?vperm $out5,$out5,$out5,$inpperm
927 stvx_u $out4,$x40,$out
928 vxor $out4,$in4,$rndkey0
929 le?vperm $out6,$out6,$out6,$inpperm
930 stvx_u $out5,$x50,$out
931 vxor $out5,$in5,$rndkey0
932 le?vperm $out7,$out7,$out7,$inpperm
933 stvx_u $out6,$x60,$out
934 vxor $out6,$in6,$rndkey0
935 stvx_u $out7,$x70,$out
937 vxor $out7,$in7,$rndkey0
940 beq Loop_cbc_dec8x # did $len-=128 borrow?
947 Loop_cbc_dec8x_tail: # up to 7 "words" tail...
948 vncipher $out1,$out1,v24
949 vncipher $out2,$out2,v24
950 vncipher $out3,$out3,v24
951 vncipher $out4,$out4,v24
952 vncipher $out5,$out5,v24
953 vncipher $out6,$out6,v24
954 vncipher $out7,$out7,v24
955 lvx v24,$x20,$key_ # round[3]
956 addi $key_,$key_,0x20
958 vncipher $out1,$out1,v25
959 vncipher $out2,$out2,v25
960 vncipher $out3,$out3,v25
961 vncipher $out4,$out4,v25
962 vncipher $out5,$out5,v25
963 vncipher $out6,$out6,v25
964 vncipher $out7,$out7,v25
965 lvx v25,$x10,$key_ # round[4]
966 bdnz Loop_cbc_dec8x_tail
968 vncipher $out1,$out1,v24
969 vncipher $out2,$out2,v24
970 vncipher $out3,$out3,v24
971 vncipher $out4,$out4,v24
972 vncipher $out5,$out5,v24
973 vncipher $out6,$out6,v24
974 vncipher $out7,$out7,v24
976 vncipher $out1,$out1,v25
977 vncipher $out2,$out2,v25
978 vncipher $out3,$out3,v25
979 vncipher $out4,$out4,v25
980 vncipher $out5,$out5,v25
981 vncipher $out6,$out6,v25
982 vncipher $out7,$out7,v25
984 vncipher $out1,$out1,v26
985 vncipher $out2,$out2,v26
986 vncipher $out3,$out3,v26
987 vncipher $out4,$out4,v26
988 vncipher $out5,$out5,v26
989 vncipher $out6,$out6,v26
990 vncipher $out7,$out7,v26
992 vncipher $out1,$out1,v27
993 vncipher $out2,$out2,v27
994 vncipher $out3,$out3,v27
995 vncipher $out4,$out4,v27
996 vncipher $out5,$out5,v27
997 vncipher $out6,$out6,v27
998 vncipher $out7,$out7,v27
1000 vncipher $out1,$out1,v28
1001 vncipher $out2,$out2,v28
1002 vncipher $out3,$out3,v28
1003 vncipher $out4,$out4,v28
1004 vncipher $out5,$out5,v28
1005 vncipher $out6,$out6,v28
1006 vncipher $out7,$out7,v28
1008 vncipher $out1,$out1,v29
1009 vncipher $out2,$out2,v29
1010 vncipher $out3,$out3,v29
1011 vncipher $out4,$out4,v29
1012 vncipher $out5,$out5,v29
1013 vncipher $out6,$out6,v29
1014 vncipher $out7,$out7,v29
1016 vncipher $out1,$out1,v30
1017 vxor $ivec,$ivec,v31 # last round key
1018 vncipher $out2,$out2,v30
1020 vncipher $out3,$out3,v30
1022 vncipher $out4,$out4,v30
1024 vncipher $out5,$out5,v30
1026 vncipher $out6,$out6,v30
1028 vncipher $out7,$out7,v30
1031 cmplwi $len,32 # switch($len)
1036 blt Lcbc_dec8x_three
1045 vncipherlast $out1,$out1,$ivec
1046 vncipherlast $out2,$out2,$in1
1047 vncipherlast $out3,$out3,$in2
1048 vncipherlast $out4,$out4,$in3
1049 vncipherlast $out5,$out5,$in4
1050 vncipherlast $out6,$out6,$in5
1051 vncipherlast $out7,$out7,$in6
1054 le?vperm $out1,$out1,$out1,$inpperm
1055 le?vperm $out2,$out2,$out2,$inpperm
1056 stvx_u $out1,$x00,$out
1057 le?vperm $out3,$out3,$out3,$inpperm
1058 stvx_u $out2,$x10,$out
1059 le?vperm $out4,$out4,$out4,$inpperm
1060 stvx_u $out3,$x20,$out
1061 le?vperm $out5,$out5,$out5,$inpperm
1062 stvx_u $out4,$x30,$out
1063 le?vperm $out6,$out6,$out6,$inpperm
1064 stvx_u $out5,$x40,$out
1065 le?vperm $out7,$out7,$out7,$inpperm
1066 stvx_u $out6,$x50,$out
1067 stvx_u $out7,$x60,$out
1073 vncipherlast $out2,$out2,$ivec
1074 vncipherlast $out3,$out3,$in2
1075 vncipherlast $out4,$out4,$in3
1076 vncipherlast $out5,$out5,$in4
1077 vncipherlast $out6,$out6,$in5
1078 vncipherlast $out7,$out7,$in6
1081 le?vperm $out2,$out2,$out2,$inpperm
1082 le?vperm $out3,$out3,$out3,$inpperm
1083 stvx_u $out2,$x00,$out
1084 le?vperm $out4,$out4,$out4,$inpperm
1085 stvx_u $out3,$x10,$out
1086 le?vperm $out5,$out5,$out5,$inpperm
1087 stvx_u $out4,$x20,$out
1088 le?vperm $out6,$out6,$out6,$inpperm
1089 stvx_u $out5,$x30,$out
1090 le?vperm $out7,$out7,$out7,$inpperm
1091 stvx_u $out6,$x40,$out
1092 stvx_u $out7,$x50,$out
1098 vncipherlast $out3,$out3,$ivec
1099 vncipherlast $out4,$out4,$in3
1100 vncipherlast $out5,$out5,$in4
1101 vncipherlast $out6,$out6,$in5
1102 vncipherlast $out7,$out7,$in6
1105 le?vperm $out3,$out3,$out3,$inpperm
1106 le?vperm $out4,$out4,$out4,$inpperm
1107 stvx_u $out3,$x00,$out
1108 le?vperm $out5,$out5,$out5,$inpperm
1109 stvx_u $out4,$x10,$out
1110 le?vperm $out6,$out6,$out6,$inpperm
1111 stvx_u $out5,$x20,$out
1112 le?vperm $out7,$out7,$out7,$inpperm
1113 stvx_u $out6,$x30,$out
1114 stvx_u $out7,$x40,$out
1120 vncipherlast $out4,$out4,$ivec
1121 vncipherlast $out5,$out5,$in4
1122 vncipherlast $out6,$out6,$in5
1123 vncipherlast $out7,$out7,$in6
1126 le?vperm $out4,$out4,$out4,$inpperm
1127 le?vperm $out5,$out5,$out5,$inpperm
1128 stvx_u $out4,$x00,$out
1129 le?vperm $out6,$out6,$out6,$inpperm
1130 stvx_u $out5,$x10,$out
1131 le?vperm $out7,$out7,$out7,$inpperm
1132 stvx_u $out6,$x20,$out
1133 stvx_u $out7,$x30,$out
1139 vncipherlast $out5,$out5,$ivec
1140 vncipherlast $out6,$out6,$in5
1141 vncipherlast $out7,$out7,$in6
1144 le?vperm $out5,$out5,$out5,$inpperm
1145 le?vperm $out6,$out6,$out6,$inpperm
1146 stvx_u $out5,$x00,$out
1147 le?vperm $out7,$out7,$out7,$inpperm
1148 stvx_u $out6,$x10,$out
1149 stvx_u $out7,$x20,$out
1155 vncipherlast $out6,$out6,$ivec
1156 vncipherlast $out7,$out7,$in6
1159 le?vperm $out6,$out6,$out6,$inpperm
1160 le?vperm $out7,$out7,$out7,$inpperm
1161 stvx_u $out6,$x00,$out
1162 stvx_u $out7,$x10,$out
1168 vncipherlast $out7,$out7,$ivec
1171 le?vperm $out7,$out7,$out7,$inpperm
1176 le?vperm $ivec,$ivec,$ivec,$inpperm
1177 stvx_u $ivec,0,$ivp # write [unaligned] iv
1181 stvx $inpperm,r10,$sp # wipe copies of round keys
1183 stvx $inpperm,r11,$sp
1185 stvx $inpperm,r10,$sp
1187 stvx $inpperm,r11,$sp
1189 stvx $inpperm,r10,$sp
1191 stvx $inpperm,r11,$sp
1193 stvx $inpperm,r10,$sp
1195 stvx $inpperm,r11,$sp
1199 lvx v20,r10,$sp # ABI says so
1221 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
1222 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
1223 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
1224 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
1225 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
1226 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
1227 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
1230 .byte 0,12,0x14,0,0x80,6,6,0
1232 .size .${prefix}_cbc_encrypt,.-.${prefix}_cbc_encrypt
1236 #########################################################################
1237 {{{ # CTR procedure[s] #
1238 my ($inp,$out,$len,$key,$ivp,$x10,$rounds,$idx)=map("r$_",(3..10));
1239 my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3));
1240 my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm,$one)=
1245 .globl .${prefix}_ctr32_encrypt_blocks
1247 .${prefix}_ctr32_encrypt_blocks:
1256 vxor $rndkey0,$rndkey0,$rndkey0
1257 le?vspltisb $tmp,0x0f
1259 lvx $ivec,0,$ivp # load [unaligned] iv
1260 lvsl $inpperm,0,$ivp
1261 lvx $inptail,$idx,$ivp
1263 le?vxor $inpperm,$inpperm,$tmp
1264 vperm $ivec,$ivec,$inptail,$inpperm
1265 vsldoi $one,$rndkey0,$one,1
1268 ?lvsl $keyperm,0,$key # prepare for unaligned key
1269 lwz $rounds,240($key)
1271 lvsr $inpperm,0,r11 # prepare for unaligned load
1273 addi $inp,$inp,15 # 15 is not typo
1274 le?vxor $inpperm,$inpperm,$tmp
1276 srwi $rounds,$rounds,1
1278 subi $rounds,$rounds,1
1281 bge _aesp8_ctr32_encrypt8x
1283 ?lvsr $outperm,0,$out # prepare for unaligned store
1284 vspltisb $outmask,-1
1286 ?vperm $outmask,$rndkey0,$outmask,$outperm
1287 le?vxor $outperm,$outperm,$tmp
1291 lvx $rndkey1,$idx,$key
1293 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
1294 vxor $inout,$ivec,$rndkey0
1295 lvx $rndkey0,$idx,$key
1301 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
1302 vcipher $inout,$inout,$rndkey1
1303 lvx $rndkey1,$idx,$key
1305 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
1306 vcipher $inout,$inout,$rndkey0
1307 lvx $rndkey0,$idx,$key
1311 vadduwm $ivec,$ivec,$one
1315 subic. $len,$len,1 # blocks--
1317 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
1318 vcipher $inout,$inout,$rndkey1
1319 lvx $rndkey1,$idx,$key
1320 vperm $dat,$dat,$inptail,$inpperm
1322 ?vperm $rndkey1,$rndkey0,$rndkey1,$keyperm
1324 vxor $dat,$dat,$rndkey1 # last round key
1325 vcipherlast $inout,$inout,$dat
1327 lvx $rndkey1,$idx,$key
1329 vperm $inout,$inout,$inout,$outperm
1330 vsel $dat,$outhead,$inout,$outmask
1332 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
1334 vxor $inout,$ivec,$rndkey0
1335 lvx $rndkey0,$idx,$key
1342 lvx $inout,0,$out # redundant in aligned case
1343 vsel $inout,$outhead,$inout,$outmask
1349 .byte 0,12,0x14,0,0,0,6,0
1352 #########################################################################
1353 {{ # Optimized CTR procedure #
1355 my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
1356 my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10,12..14));
1357 my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(15..22));
1358 my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
1359 # v26-v31 last 6 round keys
1360 my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
1361 my ($two,$three,$four)=($outhead,$outperm,$outmask);
1365 _aesp8_ctr32_encrypt8x:
1366 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
1367 li r10,`$FRAME+8*16+15`
1368 li r11,`$FRAME+8*16+31`
1369 stvx v20,r10,$sp # ABI says so
1392 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
1394 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
1396 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
1398 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
1400 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
1402 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
1404 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
1408 subi $rounds,$rounds,3 # -4 in total
1410 lvx $rndkey0,$x00,$key # load key schedule
1414 ?vperm $rndkey0,$rndkey0,v30,$keyperm
1415 addi $key_,$sp,$FRAME+15
1419 ?vperm v24,v30,v31,$keyperm
1422 stvx v24,$x00,$key_ # off-load round[1]
1423 ?vperm v25,v31,v30,$keyperm
1425 stvx v25,$x10,$key_ # off-load round[2]
1426 addi $key_,$key_,0x20
1427 bdnz Load_ctr32_enc_key
1430 ?vperm v24,v30,v31,$keyperm
1432 stvx v24,$x00,$key_ # off-load round[3]
1433 ?vperm v25,v31,v26,$keyperm
1435 stvx v25,$x10,$key_ # off-load round[4]
1436 addi $key_,$sp,$FRAME+15 # rewind $key_
1437 ?vperm v26,v26,v27,$keyperm
1439 ?vperm v27,v27,v28,$keyperm
1441 ?vperm v28,v28,v29,$keyperm
1443 ?vperm v29,v29,v30,$keyperm
1444 lvx $out0,$x70,$key # borrow $out0
1445 ?vperm v30,v30,v31,$keyperm
1446 lvx v24,$x00,$key_ # pre-load round[1]
1447 ?vperm v31,v31,$out0,$keyperm
1448 lvx v25,$x10,$key_ # pre-load round[2]
1450 vadduwm $two,$one,$one
1451 subi $inp,$inp,15 # undo "caller"
1454 vadduwm $out1,$ivec,$one # counter values ...
1455 vadduwm $out2,$ivec,$two
1456 vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0]
1458 vadduwm $out3,$out1,$two
1459 vxor $out1,$out1,$rndkey0
1460 le?lvsl $inpperm,0,$idx
1461 vadduwm $out4,$out2,$two
1462 vxor $out2,$out2,$rndkey0
1463 le?vspltisb $tmp,0x0f
1464 vadduwm $out5,$out3,$two
1465 vxor $out3,$out3,$rndkey0
1466 le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u
1467 vadduwm $out6,$out4,$two
1468 vxor $out4,$out4,$rndkey0
1469 vadduwm $out7,$out5,$two
1470 vxor $out5,$out5,$rndkey0
1471 vadduwm $ivec,$out6,$two # next counter value
1472 vxor $out6,$out6,$rndkey0
1473 vxor $out7,$out7,$rndkey0
1479 vcipher $out0,$out0,v24
1480 vcipher $out1,$out1,v24
1481 vcipher $out2,$out2,v24
1482 vcipher $out3,$out3,v24
1483 vcipher $out4,$out4,v24
1484 vcipher $out5,$out5,v24
1485 vcipher $out6,$out6,v24
1486 vcipher $out7,$out7,v24
1487 Loop_ctr32_enc8x_middle:
1488 lvx v24,$x20,$key_ # round[3]
1489 addi $key_,$key_,0x20
1491 vcipher $out0,$out0,v25
1492 vcipher $out1,$out1,v25
1493 vcipher $out2,$out2,v25
1494 vcipher $out3,$out3,v25
1495 vcipher $out4,$out4,v25
1496 vcipher $out5,$out5,v25
1497 vcipher $out6,$out6,v25
1498 vcipher $out7,$out7,v25
1499 lvx v25,$x10,$key_ # round[4]
1500 bdnz Loop_ctr32_enc8x
1502 subic r11,$len,256 # $len-256, borrow $key_
1503 vcipher $out0,$out0,v24
1504 vcipher $out1,$out1,v24
1505 vcipher $out2,$out2,v24
1506 vcipher $out3,$out3,v24
1507 vcipher $out4,$out4,v24
1508 vcipher $out5,$out5,v24
1509 vcipher $out6,$out6,v24
1510 vcipher $out7,$out7,v24
1512 subfe r0,r0,r0 # borrow?-1:0
1513 vcipher $out0,$out0,v25
1514 vcipher $out1,$out1,v25
1515 vcipher $out2,$out2,v25
1516 vcipher $out3,$out3,v25
1517 vcipher $out4,$out4,v25
1518 vcipher $out5,$out5,v25
1519 vcipher $out6,$out6,v25
1520 vcipher $out7,$out7,v25
1523 addi $key_,$sp,$FRAME+15 # rewind $key_
1524 vcipher $out0,$out0,v26
1525 vcipher $out1,$out1,v26
1526 vcipher $out2,$out2,v26
1527 vcipher $out3,$out3,v26
1528 vcipher $out4,$out4,v26
1529 vcipher $out5,$out5,v26
1530 vcipher $out6,$out6,v26
1531 vcipher $out7,$out7,v26
1532 lvx v24,$x00,$key_ # re-pre-load round[1]
1534 subic $len,$len,129 # $len-=129
1535 vcipher $out0,$out0,v27
1536 addi $len,$len,1 # $len-=128 really
1537 vcipher $out1,$out1,v27
1538 vcipher $out2,$out2,v27
1539 vcipher $out3,$out3,v27
1540 vcipher $out4,$out4,v27
1541 vcipher $out5,$out5,v27
1542 vcipher $out6,$out6,v27
1543 vcipher $out7,$out7,v27
1544 lvx v25,$x10,$key_ # re-pre-load round[2]
1546 vcipher $out0,$out0,v28
1547 lvx_u $in0,$x00,$inp # load input
1548 vcipher $out1,$out1,v28
1549 lvx_u $in1,$x10,$inp
1550 vcipher $out2,$out2,v28
1551 lvx_u $in2,$x20,$inp
1552 vcipher $out3,$out3,v28
1553 lvx_u $in3,$x30,$inp
1554 vcipher $out4,$out4,v28
1555 lvx_u $in4,$x40,$inp
1556 vcipher $out5,$out5,v28
1557 lvx_u $in5,$x50,$inp
1558 vcipher $out6,$out6,v28
1559 lvx_u $in6,$x60,$inp
1560 vcipher $out7,$out7,v28
1561 lvx_u $in7,$x70,$inp
1564 vcipher $out0,$out0,v29
1565 le?vperm $in0,$in0,$in0,$inpperm
1566 vcipher $out1,$out1,v29
1567 le?vperm $in1,$in1,$in1,$inpperm
1568 vcipher $out2,$out2,v29
1569 le?vperm $in2,$in2,$in2,$inpperm
1570 vcipher $out3,$out3,v29
1571 le?vperm $in3,$in3,$in3,$inpperm
1572 vcipher $out4,$out4,v29
1573 le?vperm $in4,$in4,$in4,$inpperm
1574 vcipher $out5,$out5,v29
1575 le?vperm $in5,$in5,$in5,$inpperm
1576 vcipher $out6,$out6,v29
1577 le?vperm $in6,$in6,$in6,$inpperm
1578 vcipher $out7,$out7,v29
1579 le?vperm $in7,$in7,$in7,$inpperm
1581 add $inp,$inp,r0 # $inp is adjusted in such
1582 # way that at exit from the
1583 # loop inX-in7 are loaded
1585 subfe. r0,r0,r0 # borrow?-1:0
1586 vcipher $out0,$out0,v30
1587 vxor $in0,$in0,v31 # xor with last round key
1588 vcipher $out1,$out1,v30
1590 vcipher $out2,$out2,v30
1592 vcipher $out3,$out3,v30
1594 vcipher $out4,$out4,v30
1596 vcipher $out5,$out5,v30
1598 vcipher $out6,$out6,v30
1600 vcipher $out7,$out7,v30
1603 bne Lctr32_enc8x_break # did $len-129 borrow?
1605 vcipherlast $in0,$out0,$in0
1606 vcipherlast $in1,$out1,$in1
1607 vadduwm $out1,$ivec,$one # counter values ...
1608 vcipherlast $in2,$out2,$in2
1609 vadduwm $out2,$ivec,$two
1610 vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0]
1611 vcipherlast $in3,$out3,$in3
1612 vadduwm $out3,$out1,$two
1613 vxor $out1,$out1,$rndkey0
1614 vcipherlast $in4,$out4,$in4
1615 vadduwm $out4,$out2,$two
1616 vxor $out2,$out2,$rndkey0
1617 vcipherlast $in5,$out5,$in5
1618 vadduwm $out5,$out3,$two
1619 vxor $out3,$out3,$rndkey0
1620 vcipherlast $in6,$out6,$in6
1621 vadduwm $out6,$out4,$two
1622 vxor $out4,$out4,$rndkey0
1623 vcipherlast $in7,$out7,$in7
1624 vadduwm $out7,$out5,$two
1625 vxor $out5,$out5,$rndkey0
1626 le?vperm $in0,$in0,$in0,$inpperm
1627 vadduwm $ivec,$out6,$two # next counter value
1628 vxor $out6,$out6,$rndkey0
1629 le?vperm $in1,$in1,$in1,$inpperm
1630 vxor $out7,$out7,$rndkey0
1633 vcipher $out0,$out0,v24
1634 stvx_u $in0,$x00,$out
1635 le?vperm $in2,$in2,$in2,$inpperm
1636 vcipher $out1,$out1,v24
1637 stvx_u $in1,$x10,$out
1638 le?vperm $in3,$in3,$in3,$inpperm
1639 vcipher $out2,$out2,v24
1640 stvx_u $in2,$x20,$out
1641 le?vperm $in4,$in4,$in4,$inpperm
1642 vcipher $out3,$out3,v24
1643 stvx_u $in3,$x30,$out
1644 le?vperm $in5,$in5,$in5,$inpperm
1645 vcipher $out4,$out4,v24
1646 stvx_u $in4,$x40,$out
1647 le?vperm $in6,$in6,$in6,$inpperm
1648 vcipher $out5,$out5,v24
1649 stvx_u $in5,$x50,$out
1650 le?vperm $in7,$in7,$in7,$inpperm
1651 vcipher $out6,$out6,v24
1652 stvx_u $in6,$x60,$out
1653 vcipher $out7,$out7,v24
1654 stvx_u $in7,$x70,$out
1657 b Loop_ctr32_enc8x_middle
1662 blt Lctr32_enc8x_one
1664 beq Lctr32_enc8x_two
1666 blt Lctr32_enc8x_three
1668 beq Lctr32_enc8x_four
1670 blt Lctr32_enc8x_five
1672 beq Lctr32_enc8x_six
1674 blt Lctr32_enc8x_seven
1677 vcipherlast $out0,$out0,$in0
1678 vcipherlast $out1,$out1,$in1
1679 vcipherlast $out2,$out2,$in2
1680 vcipherlast $out3,$out3,$in3
1681 vcipherlast $out4,$out4,$in4
1682 vcipherlast $out5,$out5,$in5
1683 vcipherlast $out6,$out6,$in6
1684 vcipherlast $out7,$out7,$in7
1686 le?vperm $out0,$out0,$out0,$inpperm
1687 le?vperm $out1,$out1,$out1,$inpperm
1688 stvx_u $out0,$x00,$out
1689 le?vperm $out2,$out2,$out2,$inpperm
1690 stvx_u $out1,$x10,$out
1691 le?vperm $out3,$out3,$out3,$inpperm
1692 stvx_u $out2,$x20,$out
1693 le?vperm $out4,$out4,$out4,$inpperm
1694 stvx_u $out3,$x30,$out
1695 le?vperm $out5,$out5,$out5,$inpperm
1696 stvx_u $out4,$x40,$out
1697 le?vperm $out6,$out6,$out6,$inpperm
1698 stvx_u $out5,$x50,$out
1699 le?vperm $out7,$out7,$out7,$inpperm
1700 stvx_u $out6,$x60,$out
1701 stvx_u $out7,$x70,$out
1707 vcipherlast $out0,$out0,$in1
1708 vcipherlast $out1,$out1,$in2
1709 vcipherlast $out2,$out2,$in3
1710 vcipherlast $out3,$out3,$in4
1711 vcipherlast $out4,$out4,$in5
1712 vcipherlast $out5,$out5,$in6
1713 vcipherlast $out6,$out6,$in7
1715 le?vperm $out0,$out0,$out0,$inpperm
1716 le?vperm $out1,$out1,$out1,$inpperm
1717 stvx_u $out0,$x00,$out
1718 le?vperm $out2,$out2,$out2,$inpperm
1719 stvx_u $out1,$x10,$out
1720 le?vperm $out3,$out3,$out3,$inpperm
1721 stvx_u $out2,$x20,$out
1722 le?vperm $out4,$out4,$out4,$inpperm
1723 stvx_u $out3,$x30,$out
1724 le?vperm $out5,$out5,$out5,$inpperm
1725 stvx_u $out4,$x40,$out
1726 le?vperm $out6,$out6,$out6,$inpperm
1727 stvx_u $out5,$x50,$out
1728 stvx_u $out6,$x60,$out
1734 vcipherlast $out0,$out0,$in2
1735 vcipherlast $out1,$out1,$in3
1736 vcipherlast $out2,$out2,$in4
1737 vcipherlast $out3,$out3,$in5
1738 vcipherlast $out4,$out4,$in6
1739 vcipherlast $out5,$out5,$in7
1741 le?vperm $out0,$out0,$out0,$inpperm
1742 le?vperm $out1,$out1,$out1,$inpperm
1743 stvx_u $out0,$x00,$out
1744 le?vperm $out2,$out2,$out2,$inpperm
1745 stvx_u $out1,$x10,$out
1746 le?vperm $out3,$out3,$out3,$inpperm
1747 stvx_u $out2,$x20,$out
1748 le?vperm $out4,$out4,$out4,$inpperm
1749 stvx_u $out3,$x30,$out
1750 le?vperm $out5,$out5,$out5,$inpperm
1751 stvx_u $out4,$x40,$out
1752 stvx_u $out5,$x50,$out
1758 vcipherlast $out0,$out0,$in3
1759 vcipherlast $out1,$out1,$in4
1760 vcipherlast $out2,$out2,$in5
1761 vcipherlast $out3,$out3,$in6
1762 vcipherlast $out4,$out4,$in7
1764 le?vperm $out0,$out0,$out0,$inpperm
1765 le?vperm $out1,$out1,$out1,$inpperm
1766 stvx_u $out0,$x00,$out
1767 le?vperm $out2,$out2,$out2,$inpperm
1768 stvx_u $out1,$x10,$out
1769 le?vperm $out3,$out3,$out3,$inpperm
1770 stvx_u $out2,$x20,$out
1771 le?vperm $out4,$out4,$out4,$inpperm
1772 stvx_u $out3,$x30,$out
1773 stvx_u $out4,$x40,$out
1779 vcipherlast $out0,$out0,$in4
1780 vcipherlast $out1,$out1,$in5
1781 vcipherlast $out2,$out2,$in6
1782 vcipherlast $out3,$out3,$in7
1784 le?vperm $out0,$out0,$out0,$inpperm
1785 le?vperm $out1,$out1,$out1,$inpperm
1786 stvx_u $out0,$x00,$out
1787 le?vperm $out2,$out2,$out2,$inpperm
1788 stvx_u $out1,$x10,$out
1789 le?vperm $out3,$out3,$out3,$inpperm
1790 stvx_u $out2,$x20,$out
1791 stvx_u $out3,$x30,$out
1797 vcipherlast $out0,$out0,$in5
1798 vcipherlast $out1,$out1,$in6
1799 vcipherlast $out2,$out2,$in7
1801 le?vperm $out0,$out0,$out0,$inpperm
1802 le?vperm $out1,$out1,$out1,$inpperm
1803 stvx_u $out0,$x00,$out
1804 le?vperm $out2,$out2,$out2,$inpperm
1805 stvx_u $out1,$x10,$out
1806 stvx_u $out2,$x20,$out
1812 vcipherlast $out0,$out0,$in6
1813 vcipherlast $out1,$out1,$in7
1815 le?vperm $out0,$out0,$out0,$inpperm
1816 le?vperm $out1,$out1,$out1,$inpperm
1817 stvx_u $out0,$x00,$out
1818 stvx_u $out1,$x10,$out
1824 vcipherlast $out0,$out0,$in7
1826 le?vperm $out0,$out0,$out0,$inpperm
1833 stvx $inpperm,r10,$sp # wipe copies of round keys
1835 stvx $inpperm,r11,$sp
1837 stvx $inpperm,r10,$sp
1839 stvx $inpperm,r11,$sp
1841 stvx $inpperm,r10,$sp
1843 stvx $inpperm,r11,$sp
1845 stvx $inpperm,r10,$sp
1847 stvx $inpperm,r11,$sp
1851 lvx v20,r10,$sp # ABI says so
1873 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
1874 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
1875 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
1876 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
1877 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
1878 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
1879 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
1882 .byte 0,12,0x14,0,0x80,6,6,0
1884 .size .${prefix}_ctr32_encrypt_blocks,.-.${prefix}_ctr32_encrypt_blocks
1889 foreach(split("\n",$code)) {
1890 s/\`([^\`]*)\`/eval($1)/geo;
1892 # constants table endian-specific conversion
1893 if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) {
1897 # convert to endian-agnostic format
1899 foreach (split(/,\s*/,$2)) {
1900 my $l = /^0/?oct:int;
1901 push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff;
1904 @bytes = map(/^0/?oct:int,split(/,\s*/,$2));
1907 # little-endian conversion
1908 if ($flavour =~ /le$/o) {
1909 SWITCH: for($conv) {
1910 /\?inv/ && do { @bytes=map($_^0xf,@bytes); last; };
1911 /\?rev/ && do { @bytes=reverse(@bytes); last; };
1916 print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n";
1919 $consts=0 if (m/Lconsts:/o); # end of table
1921 # instructions prefixed with '?' are endian-specific and need
1922 # to be adjusted accordingly...
1923 if ($flavour =~ /le$/o) { # little-endian
1928 s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or
1929 s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or
1930 s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o;
1931 } else { # big-endian