2 # Copyright 2014-2018 The OpenSSL Project Authors. All Rights Reserved.
4 # Licensed under the Apache License 2.0 (the "License"). You may not use
5 # this file except in compliance with the License. You can obtain a copy
6 # in the file LICENSE in the source distribution or at
7 # https://www.openssl.org/source/license.html
10 # ====================================================================
11 # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
12 # project. The module is, however, dual licensed under OpenSSL and
13 # CRYPTOGAMS licenses depending on where you obtain it. For further
14 # details see http://www.openssl.org/~appro/cryptogams/.
15 # ====================================================================
17 # This module implements support for AES instructions as per PowerISA
18 # specification version 2.07, first implemented by POWER8 processor.
19 # The module is endian-agnostic in sense that it supports both big-
20 # and little-endian cases. Data alignment in parallelizable modes is
21 # handled with VSX loads and stores, which implies MSR.VSX flag being
22 # set. It should also be noted that ISA specification doesn't prohibit
23 # alignment exceptions for these instructions on page boundaries.
24 # Initially alignment was handled in pure AltiVec/VMX way [when data
25 # is aligned programmatically, which in turn guarantees exception-
26 # free execution], but it turned to hamper performance when vcipher
27 # instructions are interleaved. It's reckoned that eventual
28 # misalignment penalties at page boundaries are in average lower
29 # than additional overhead in pure AltiVec approach.
33 # Add XTS subroutine, 9x on little- and 12x improvement on big-endian
34 # systems were measured.
36 ######################################################################
37 # Current large-block performance in cycles per byte processed with
38 # 128-bit key (less is better).
40 # CBC en-/decrypt CTR XTS
41 # POWER8[le] 3.96/0.72 0.74 1.1
42 # POWER8[be] 3.75/0.65 0.66 1.0
43 # POWER9[le] 4.02/0.86 0.84 1.05
44 # POWER9[be] 3.99/0.78 0.79 0.97
48 if ($flavour =~ /64/) {
56 } elsif ($flavour =~ /32/) {
64 } else { die "nonsense $flavour"; }
66 $LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
68 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
69 ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
70 ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
71 die "can't locate ppc-xlate.pl";
73 open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
81 #########################################################################
82 {{{ # Key setup procedures #
83 my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8));
84 my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6));
85 my ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11));
94 .long 0x01000000, 0x01000000, 0x01000000, 0x01000000 ?rev
95 .long 0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000 ?rev
96 .long 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c ?rev
101 mflr $ptr #vvvvv "distance between . and rcon
106 .byte 0,12,0x14,0,0,0,0,0
107 .asciz "AES for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
109 .globl .${prefix}_set_encrypt_key
111 .${prefix}_set_encrypt_key:
114 $PUSH r11,$LRSAVE($sp)
118 beq- Lenc_key_abort # if ($inp==0) return -1;
120 beq- Lenc_key_abort # if ($out==0) return -1;
138 addi $inp,$inp,15 # 15 is not typo
139 lvsr $key,0,r9 # borrow $key
143 le?vspltisb $mask,0x0f # borrow $mask
145 le?vxor $key,$key,$mask # adjust for byte swap
148 vperm $in0,$in0,$in1,$key # align [and byte swap in LE]
150 vxor $zero,$zero,$zero
153 ?lvsr $outperm,0,$out
156 ?vperm $outmask,$zero,$outmask,$outperm
166 vperm $key,$in0,$in0,$mask # rotate-n-splat
167 vsldoi $tmp,$zero,$in0,12 # >>32
168 vperm $outtail,$in0,$in0,$outperm # rotate
169 vsel $stage,$outhead,$outtail,$outmask
170 vmr $outhead,$outtail
171 vcipherlast $key,$key,$rcon
176 vsldoi $tmp,$zero,$tmp,12 # >>32
178 vsldoi $tmp,$zero,$tmp,12 # >>32
180 vadduwm $rcon,$rcon,$rcon
184 lvx $rcon,0,$ptr # last two round keys
186 vperm $key,$in0,$in0,$mask # rotate-n-splat
187 vsldoi $tmp,$zero,$in0,12 # >>32
188 vperm $outtail,$in0,$in0,$outperm # rotate
189 vsel $stage,$outhead,$outtail,$outmask
190 vmr $outhead,$outtail
191 vcipherlast $key,$key,$rcon
196 vsldoi $tmp,$zero,$tmp,12 # >>32
198 vsldoi $tmp,$zero,$tmp,12 # >>32
200 vadduwm $rcon,$rcon,$rcon
203 vperm $key,$in0,$in0,$mask # rotate-n-splat
204 vsldoi $tmp,$zero,$in0,12 # >>32
205 vperm $outtail,$in0,$in0,$outperm # rotate
206 vsel $stage,$outhead,$outtail,$outmask
207 vmr $outhead,$outtail
208 vcipherlast $key,$key,$rcon
213 vsldoi $tmp,$zero,$tmp,12 # >>32
215 vsldoi $tmp,$zero,$tmp,12 # >>32
218 vperm $outtail,$in0,$in0,$outperm # rotate
219 vsel $stage,$outhead,$outtail,$outmask
220 vmr $outhead,$outtail
223 addi $inp,$out,15 # 15 is not typo
233 vperm $outtail,$in0,$in0,$outperm # rotate
234 vsel $stage,$outhead,$outtail,$outmask
235 vmr $outhead,$outtail
238 vperm $in1,$in1,$tmp,$key # align [and byte swap in LE]
239 vspltisb $key,8 # borrow $key
241 vsububm $mask,$mask,$key # adjust the mask
244 vperm $key,$in1,$in1,$mask # roate-n-splat
245 vsldoi $tmp,$zero,$in0,12 # >>32
246 vcipherlast $key,$key,$rcon
249 vsldoi $tmp,$zero,$tmp,12 # >>32
251 vsldoi $tmp,$zero,$tmp,12 # >>32
254 vsldoi $stage,$zero,$in1,8
257 vsldoi $in1,$zero,$in1,12 # >>32
258 vadduwm $rcon,$rcon,$rcon
262 vsldoi $stage,$stage,$in0,8
264 vperm $key,$in1,$in1,$mask # rotate-n-splat
265 vsldoi $tmp,$zero,$in0,12 # >>32
266 vperm $outtail,$stage,$stage,$outperm # rotate
267 vsel $stage,$outhead,$outtail,$outmask
268 vmr $outhead,$outtail
269 vcipherlast $key,$key,$rcon
273 vsldoi $stage,$in0,$in1,8
275 vsldoi $tmp,$zero,$tmp,12 # >>32
276 vperm $outtail,$stage,$stage,$outperm # rotate
277 vsel $stage,$outhead,$outtail,$outmask
278 vmr $outhead,$outtail
280 vsldoi $tmp,$zero,$tmp,12 # >>32
287 vsldoi $in1,$zero,$in1,12 # >>32
288 vadduwm $rcon,$rcon,$rcon
292 vperm $outtail,$in0,$in0,$outperm # rotate
293 vsel $stage,$outhead,$outtail,$outmask
294 vmr $outhead,$outtail
296 addi $inp,$out,15 # 15 is not typo
309 vperm $outtail,$in0,$in0,$outperm # rotate
310 vsel $stage,$outhead,$outtail,$outmask
311 vmr $outhead,$outtail
314 vperm $in1,$in1,$tmp,$key # align [and byte swap in LE]
318 vperm $key,$in1,$in1,$mask # rotate-n-splat
319 vsldoi $tmp,$zero,$in0,12 # >>32
320 vperm $outtail,$in1,$in1,$outperm # rotate
321 vsel $stage,$outhead,$outtail,$outmask
322 vmr $outhead,$outtail
323 vcipherlast $key,$key,$rcon
328 vsldoi $tmp,$zero,$tmp,12 # >>32
330 vsldoi $tmp,$zero,$tmp,12 # >>32
332 vadduwm $rcon,$rcon,$rcon
334 vperm $outtail,$in0,$in0,$outperm # rotate
335 vsel $stage,$outhead,$outtail,$outmask
336 vmr $outhead,$outtail
338 addi $inp,$out,15 # 15 is not typo
342 vspltw $key,$in0,3 # just splat
343 vsldoi $tmp,$zero,$in1,12 # >>32
347 vsldoi $tmp,$zero,$tmp,12 # >>32
349 vsldoi $tmp,$zero,$tmp,12 # >>32
357 lvx $in1,0,$inp # redundant in aligned case
358 vsel $in1,$outhead,$in1,$outmask
368 .byte 0,12,0x14,1,0,0,3,0
370 .size .${prefix}_set_encrypt_key,.-.${prefix}_set_encrypt_key
372 .globl .${prefix}_set_decrypt_key
374 .${prefix}_set_decrypt_key:
375 $STU $sp,-$FRAME($sp)
377 $PUSH r10,$FRAME+$LRSAVE($sp)
385 subi $inp,$out,240 # first round key
386 srwi $rounds,$rounds,1
387 add $out,$inp,$cnt # last round key
411 xor r3,r3,r3 # return value
416 .byte 0,12,4,1,0x80,0,3,0
418 .size .${prefix}_set_decrypt_key,.-.${prefix}_set_decrypt_key
421 #########################################################################
422 {{{ # Single block en- and decrypt procedures #
425 my $n = $dir eq "de" ? "n" : "";
426 my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7));
429 .globl .${prefix}_${dir}crypt
431 .${prefix}_${dir}crypt:
432 lwz $rounds,240($key)
435 li $idx,15 # 15 is not typo
441 lvsl v2,0,$inp # inpperm
443 ?lvsl v3,0,r11 # outperm
446 vperm v0,v0,v1,v2 # align [and byte swap in LE]
448 ?lvsl v5,0,$key # keyperm
449 srwi $rounds,$rounds,1
452 subi $rounds,$rounds,1
453 ?vperm v1,v1,v2,v5 # align round key
475 v${n}cipherlast v0,v0,v1
479 li $idx,15 # 15 is not typo
480 ?vperm v2,v1,v2,v3 # outmask
482 lvx v1,0,$out # outhead
483 vperm v0,v0,v0,v3 # rotate [and byte swap in LE]
493 .byte 0,12,0x14,0,0,0,3,0
495 .size .${prefix}_${dir}crypt,.-.${prefix}_${dir}crypt
501 #########################################################################
502 {{{ # CBC en- and decrypt procedures #
503 my ($inp,$out,$len,$key,$ivp,$enc,$rounds,$idx)=map("r$_",(3..10));
504 my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3));
505 my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm)=
508 .globl .${prefix}_cbc_encrypt
510 .${prefix}_cbc_encrypt:
514 cmpwi $enc,0 # test direction
520 vxor $rndkey0,$rndkey0,$rndkey0
521 le?vspltisb $tmp,0x0f
523 lvx $ivec,0,$ivp # load [unaligned] iv
525 lvx $inptail,$idx,$ivp
526 le?vxor $inpperm,$inpperm,$tmp
527 vperm $ivec,$ivec,$inptail,$inpperm
530 ?lvsl $keyperm,0,$key # prepare for unaligned key
531 lwz $rounds,240($key)
533 lvsr $inpperm,0,r11 # prepare for unaligned load
535 addi $inp,$inp,15 # 15 is not typo
536 le?vxor $inpperm,$inpperm,$tmp
538 ?lvsr $outperm,0,$out # prepare for unaligned store
541 ?vperm $outmask,$rndkey0,$outmask,$outperm
542 le?vxor $outperm,$outperm,$tmp
544 srwi $rounds,$rounds,1
546 subi $rounds,$rounds,1
554 subi $len,$len,16 # len-=16
557 vperm $inout,$inout,$inptail,$inpperm
558 lvx $rndkey1,$idx,$key
560 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
561 vxor $inout,$inout,$rndkey0
562 lvx $rndkey0,$idx,$key
564 vxor $inout,$inout,$ivec
567 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
568 vcipher $inout,$inout,$rndkey1
569 lvx $rndkey1,$idx,$key
571 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
572 vcipher $inout,$inout,$rndkey0
573 lvx $rndkey0,$idx,$key
577 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
578 vcipher $inout,$inout,$rndkey1
579 lvx $rndkey1,$idx,$key
581 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
582 vcipherlast $ivec,$inout,$rndkey0
585 vperm $tmp,$ivec,$ivec,$outperm
586 vsel $inout,$outhead,$tmp,$outmask
597 bge _aesp8_cbc_decrypt8x
602 subi $len,$len,16 # len-=16
605 vperm $tmp,$tmp,$inptail,$inpperm
606 lvx $rndkey1,$idx,$key
608 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
609 vxor $inout,$tmp,$rndkey0
610 lvx $rndkey0,$idx,$key
614 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
615 vncipher $inout,$inout,$rndkey1
616 lvx $rndkey1,$idx,$key
618 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
619 vncipher $inout,$inout,$rndkey0
620 lvx $rndkey0,$idx,$key
624 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
625 vncipher $inout,$inout,$rndkey1
626 lvx $rndkey1,$idx,$key
628 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
629 vncipherlast $inout,$inout,$rndkey0
632 vxor $inout,$inout,$ivec
634 vperm $tmp,$inout,$inout,$outperm
635 vsel $inout,$outhead,$tmp,$outmask
643 lvx $inout,0,$out # redundant in aligned case
644 vsel $inout,$outhead,$inout,$outmask
647 neg $enc,$ivp # write [unaligned] iv
648 li $idx,15 # 15 is not typo
649 vxor $rndkey0,$rndkey0,$rndkey0
651 le?vspltisb $tmp,0x0f
652 ?lvsl $outperm,0,$enc
653 ?vperm $outmask,$rndkey0,$outmask,$outperm
654 le?vxor $outperm,$outperm,$tmp
656 vperm $ivec,$ivec,$ivec,$outperm
657 vsel $inout,$outhead,$ivec,$outmask
658 lvx $inptail,$idx,$ivp
660 vsel $inout,$ivec,$inptail,$outmask
661 stvx $inout,$idx,$ivp
666 .byte 0,12,0x14,0,0,0,6,0
669 #########################################################################
670 {{ # Optimized CBC decrypt procedure #
672 my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
673 $x00=0 if ($flavour =~ /osx/);
674 my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10..13));
675 my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(14..21));
676 my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
677 # v26-v31 last 6 round keys
678 my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
682 _aesp8_cbc_decrypt8x:
683 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
684 li r10,`$FRAME+8*16+15`
685 li r11,`$FRAME+8*16+31`
686 stvx v20,r10,$sp # ABI says so
709 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
711 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
713 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
715 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
717 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
719 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
721 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
725 subi $rounds,$rounds,3 # -4 in total
726 subi $len,$len,128 # bias
728 lvx $rndkey0,$x00,$key # load key schedule
732 ?vperm $rndkey0,$rndkey0,v30,$keyperm
733 addi $key_,$sp,$FRAME+15
737 ?vperm v24,v30,v31,$keyperm
740 stvx v24,$x00,$key_ # off-load round[1]
741 ?vperm v25,v31,v30,$keyperm
743 stvx v25,$x10,$key_ # off-load round[2]
744 addi $key_,$key_,0x20
745 bdnz Load_cbc_dec_key
748 ?vperm v24,v30,v31,$keyperm
750 stvx v24,$x00,$key_ # off-load round[3]
751 ?vperm v25,v31,v26,$keyperm
753 stvx v25,$x10,$key_ # off-load round[4]
754 addi $key_,$sp,$FRAME+15 # rewind $key_
755 ?vperm v26,v26,v27,$keyperm
757 ?vperm v27,v27,v28,$keyperm
759 ?vperm v28,v28,v29,$keyperm
761 ?vperm v29,v29,v30,$keyperm
762 lvx $out0,$x70,$key # borrow $out0
763 ?vperm v30,v30,v31,$keyperm
764 lvx v24,$x00,$key_ # pre-load round[1]
765 ?vperm v31,v31,$out0,$keyperm
766 lvx v25,$x10,$key_ # pre-load round[2]
768 #lvx $inptail,0,$inp # "caller" already did this
769 #addi $inp,$inp,15 # 15 is not typo
770 subi $inp,$inp,15 # undo "caller"
773 lvx_u $in0,$x00,$inp # load first 8 "words"
774 le?lvsl $inpperm,0,$idx
775 le?vspltisb $tmp,0x0f
777 le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u
779 le?vperm $in0,$in0,$in0,$inpperm
781 le?vperm $in1,$in1,$in1,$inpperm
783 le?vperm $in2,$in2,$in2,$inpperm
784 vxor $out0,$in0,$rndkey0
786 le?vperm $in3,$in3,$in3,$inpperm
787 vxor $out1,$in1,$rndkey0
789 le?vperm $in4,$in4,$in4,$inpperm
790 vxor $out2,$in2,$rndkey0
793 le?vperm $in5,$in5,$in5,$inpperm
794 vxor $out3,$in3,$rndkey0
795 le?vperm $in6,$in6,$in6,$inpperm
796 vxor $out4,$in4,$rndkey0
797 le?vperm $in7,$in7,$in7,$inpperm
798 vxor $out5,$in5,$rndkey0
799 vxor $out6,$in6,$rndkey0
800 vxor $out7,$in7,$rndkey0
806 vncipher $out0,$out0,v24
807 vncipher $out1,$out1,v24
808 vncipher $out2,$out2,v24
809 vncipher $out3,$out3,v24
810 vncipher $out4,$out4,v24
811 vncipher $out5,$out5,v24
812 vncipher $out6,$out6,v24
813 vncipher $out7,$out7,v24
814 lvx v24,$x20,$key_ # round[3]
815 addi $key_,$key_,0x20
817 vncipher $out0,$out0,v25
818 vncipher $out1,$out1,v25
819 vncipher $out2,$out2,v25
820 vncipher $out3,$out3,v25
821 vncipher $out4,$out4,v25
822 vncipher $out5,$out5,v25
823 vncipher $out6,$out6,v25
824 vncipher $out7,$out7,v25
825 lvx v25,$x10,$key_ # round[4]
828 subic $len,$len,128 # $len-=128
829 vncipher $out0,$out0,v24
830 vncipher $out1,$out1,v24
831 vncipher $out2,$out2,v24
832 vncipher $out3,$out3,v24
833 vncipher $out4,$out4,v24
834 vncipher $out5,$out5,v24
835 vncipher $out6,$out6,v24
836 vncipher $out7,$out7,v24
838 subfe. r0,r0,r0 # borrow?-1:0
839 vncipher $out0,$out0,v25
840 vncipher $out1,$out1,v25
841 vncipher $out2,$out2,v25
842 vncipher $out3,$out3,v25
843 vncipher $out4,$out4,v25
844 vncipher $out5,$out5,v25
845 vncipher $out6,$out6,v25
846 vncipher $out7,$out7,v25
849 vncipher $out0,$out0,v26
850 vncipher $out1,$out1,v26
851 vncipher $out2,$out2,v26
852 vncipher $out3,$out3,v26
853 vncipher $out4,$out4,v26
854 vncipher $out5,$out5,v26
855 vncipher $out6,$out6,v26
856 vncipher $out7,$out7,v26
858 add $inp,$inp,r0 # $inp is adjusted in such
859 # way that at exit from the
860 # loop inX-in7 are loaded
862 vncipher $out0,$out0,v27
863 vncipher $out1,$out1,v27
864 vncipher $out2,$out2,v27
865 vncipher $out3,$out3,v27
866 vncipher $out4,$out4,v27
867 vncipher $out5,$out5,v27
868 vncipher $out6,$out6,v27
869 vncipher $out7,$out7,v27
871 addi $key_,$sp,$FRAME+15 # rewind $key_
872 vncipher $out0,$out0,v28
873 vncipher $out1,$out1,v28
874 vncipher $out2,$out2,v28
875 vncipher $out3,$out3,v28
876 vncipher $out4,$out4,v28
877 vncipher $out5,$out5,v28
878 vncipher $out6,$out6,v28
879 vncipher $out7,$out7,v28
880 lvx v24,$x00,$key_ # re-pre-load round[1]
882 vncipher $out0,$out0,v29
883 vncipher $out1,$out1,v29
884 vncipher $out2,$out2,v29
885 vncipher $out3,$out3,v29
886 vncipher $out4,$out4,v29
887 vncipher $out5,$out5,v29
888 vncipher $out6,$out6,v29
889 vncipher $out7,$out7,v29
890 lvx v25,$x10,$key_ # re-pre-load round[2]
892 vncipher $out0,$out0,v30
893 vxor $ivec,$ivec,v31 # xor with last round key
894 vncipher $out1,$out1,v30
896 vncipher $out2,$out2,v30
898 vncipher $out3,$out3,v30
900 vncipher $out4,$out4,v30
902 vncipher $out5,$out5,v30
904 vncipher $out6,$out6,v30
906 vncipher $out7,$out7,v30
909 vncipherlast $out0,$out0,$ivec
910 vncipherlast $out1,$out1,$in0
911 lvx_u $in0,$x00,$inp # load next input block
912 vncipherlast $out2,$out2,$in1
914 vncipherlast $out3,$out3,$in2
915 le?vperm $in0,$in0,$in0,$inpperm
917 vncipherlast $out4,$out4,$in3
918 le?vperm $in1,$in1,$in1,$inpperm
920 vncipherlast $out5,$out5,$in4
921 le?vperm $in2,$in2,$in2,$inpperm
923 vncipherlast $out6,$out6,$in5
924 le?vperm $in3,$in3,$in3,$inpperm
926 vncipherlast $out7,$out7,$in6
927 le?vperm $in4,$in4,$in4,$inpperm
930 le?vperm $in5,$in5,$in5,$inpperm
934 le?vperm $out0,$out0,$out0,$inpperm
935 le?vperm $out1,$out1,$out1,$inpperm
936 stvx_u $out0,$x00,$out
937 le?vperm $in6,$in6,$in6,$inpperm
938 vxor $out0,$in0,$rndkey0
939 le?vperm $out2,$out2,$out2,$inpperm
940 stvx_u $out1,$x10,$out
941 le?vperm $in7,$in7,$in7,$inpperm
942 vxor $out1,$in1,$rndkey0
943 le?vperm $out3,$out3,$out3,$inpperm
944 stvx_u $out2,$x20,$out
945 vxor $out2,$in2,$rndkey0
946 le?vperm $out4,$out4,$out4,$inpperm
947 stvx_u $out3,$x30,$out
948 vxor $out3,$in3,$rndkey0
949 le?vperm $out5,$out5,$out5,$inpperm
950 stvx_u $out4,$x40,$out
951 vxor $out4,$in4,$rndkey0
952 le?vperm $out6,$out6,$out6,$inpperm
953 stvx_u $out5,$x50,$out
954 vxor $out5,$in5,$rndkey0
955 le?vperm $out7,$out7,$out7,$inpperm
956 stvx_u $out6,$x60,$out
957 vxor $out6,$in6,$rndkey0
958 stvx_u $out7,$x70,$out
960 vxor $out7,$in7,$rndkey0
963 beq Loop_cbc_dec8x # did $len-=128 borrow?
970 Loop_cbc_dec8x_tail: # up to 7 "words" tail...
971 vncipher $out1,$out1,v24
972 vncipher $out2,$out2,v24
973 vncipher $out3,$out3,v24
974 vncipher $out4,$out4,v24
975 vncipher $out5,$out5,v24
976 vncipher $out6,$out6,v24
977 vncipher $out7,$out7,v24
978 lvx v24,$x20,$key_ # round[3]
979 addi $key_,$key_,0x20
981 vncipher $out1,$out1,v25
982 vncipher $out2,$out2,v25
983 vncipher $out3,$out3,v25
984 vncipher $out4,$out4,v25
985 vncipher $out5,$out5,v25
986 vncipher $out6,$out6,v25
987 vncipher $out7,$out7,v25
988 lvx v25,$x10,$key_ # round[4]
989 bdnz Loop_cbc_dec8x_tail
991 vncipher $out1,$out1,v24
992 vncipher $out2,$out2,v24
993 vncipher $out3,$out3,v24
994 vncipher $out4,$out4,v24
995 vncipher $out5,$out5,v24
996 vncipher $out6,$out6,v24
997 vncipher $out7,$out7,v24
999 vncipher $out1,$out1,v25
1000 vncipher $out2,$out2,v25
1001 vncipher $out3,$out3,v25
1002 vncipher $out4,$out4,v25
1003 vncipher $out5,$out5,v25
1004 vncipher $out6,$out6,v25
1005 vncipher $out7,$out7,v25
1007 vncipher $out1,$out1,v26
1008 vncipher $out2,$out2,v26
1009 vncipher $out3,$out3,v26
1010 vncipher $out4,$out4,v26
1011 vncipher $out5,$out5,v26
1012 vncipher $out6,$out6,v26
1013 vncipher $out7,$out7,v26
1015 vncipher $out1,$out1,v27
1016 vncipher $out2,$out2,v27
1017 vncipher $out3,$out3,v27
1018 vncipher $out4,$out4,v27
1019 vncipher $out5,$out5,v27
1020 vncipher $out6,$out6,v27
1021 vncipher $out7,$out7,v27
1023 vncipher $out1,$out1,v28
1024 vncipher $out2,$out2,v28
1025 vncipher $out3,$out3,v28
1026 vncipher $out4,$out4,v28
1027 vncipher $out5,$out5,v28
1028 vncipher $out6,$out6,v28
1029 vncipher $out7,$out7,v28
1031 vncipher $out1,$out1,v29
1032 vncipher $out2,$out2,v29
1033 vncipher $out3,$out3,v29
1034 vncipher $out4,$out4,v29
1035 vncipher $out5,$out5,v29
1036 vncipher $out6,$out6,v29
1037 vncipher $out7,$out7,v29
1039 vncipher $out1,$out1,v30
1040 vxor $ivec,$ivec,v31 # last round key
1041 vncipher $out2,$out2,v30
1043 vncipher $out3,$out3,v30
1045 vncipher $out4,$out4,v30
1047 vncipher $out5,$out5,v30
1049 vncipher $out6,$out6,v30
1051 vncipher $out7,$out7,v30
1054 cmplwi $len,32 # switch($len)
1059 blt Lcbc_dec8x_three
1068 vncipherlast $out1,$out1,$ivec
1069 vncipherlast $out2,$out2,$in1
1070 vncipherlast $out3,$out3,$in2
1071 vncipherlast $out4,$out4,$in3
1072 vncipherlast $out5,$out5,$in4
1073 vncipherlast $out6,$out6,$in5
1074 vncipherlast $out7,$out7,$in6
1077 le?vperm $out1,$out1,$out1,$inpperm
1078 le?vperm $out2,$out2,$out2,$inpperm
1079 stvx_u $out1,$x00,$out
1080 le?vperm $out3,$out3,$out3,$inpperm
1081 stvx_u $out2,$x10,$out
1082 le?vperm $out4,$out4,$out4,$inpperm
1083 stvx_u $out3,$x20,$out
1084 le?vperm $out5,$out5,$out5,$inpperm
1085 stvx_u $out4,$x30,$out
1086 le?vperm $out6,$out6,$out6,$inpperm
1087 stvx_u $out5,$x40,$out
1088 le?vperm $out7,$out7,$out7,$inpperm
1089 stvx_u $out6,$x50,$out
1090 stvx_u $out7,$x60,$out
1096 vncipherlast $out2,$out2,$ivec
1097 vncipherlast $out3,$out3,$in2
1098 vncipherlast $out4,$out4,$in3
1099 vncipherlast $out5,$out5,$in4
1100 vncipherlast $out6,$out6,$in5
1101 vncipherlast $out7,$out7,$in6
1104 le?vperm $out2,$out2,$out2,$inpperm
1105 le?vperm $out3,$out3,$out3,$inpperm
1106 stvx_u $out2,$x00,$out
1107 le?vperm $out4,$out4,$out4,$inpperm
1108 stvx_u $out3,$x10,$out
1109 le?vperm $out5,$out5,$out5,$inpperm
1110 stvx_u $out4,$x20,$out
1111 le?vperm $out6,$out6,$out6,$inpperm
1112 stvx_u $out5,$x30,$out
1113 le?vperm $out7,$out7,$out7,$inpperm
1114 stvx_u $out6,$x40,$out
1115 stvx_u $out7,$x50,$out
1121 vncipherlast $out3,$out3,$ivec
1122 vncipherlast $out4,$out4,$in3
1123 vncipherlast $out5,$out5,$in4
1124 vncipherlast $out6,$out6,$in5
1125 vncipherlast $out7,$out7,$in6
1128 le?vperm $out3,$out3,$out3,$inpperm
1129 le?vperm $out4,$out4,$out4,$inpperm
1130 stvx_u $out3,$x00,$out
1131 le?vperm $out5,$out5,$out5,$inpperm
1132 stvx_u $out4,$x10,$out
1133 le?vperm $out6,$out6,$out6,$inpperm
1134 stvx_u $out5,$x20,$out
1135 le?vperm $out7,$out7,$out7,$inpperm
1136 stvx_u $out6,$x30,$out
1137 stvx_u $out7,$x40,$out
1143 vncipherlast $out4,$out4,$ivec
1144 vncipherlast $out5,$out5,$in4
1145 vncipherlast $out6,$out6,$in5
1146 vncipherlast $out7,$out7,$in6
1149 le?vperm $out4,$out4,$out4,$inpperm
1150 le?vperm $out5,$out5,$out5,$inpperm
1151 stvx_u $out4,$x00,$out
1152 le?vperm $out6,$out6,$out6,$inpperm
1153 stvx_u $out5,$x10,$out
1154 le?vperm $out7,$out7,$out7,$inpperm
1155 stvx_u $out6,$x20,$out
1156 stvx_u $out7,$x30,$out
1162 vncipherlast $out5,$out5,$ivec
1163 vncipherlast $out6,$out6,$in5
1164 vncipherlast $out7,$out7,$in6
1167 le?vperm $out5,$out5,$out5,$inpperm
1168 le?vperm $out6,$out6,$out6,$inpperm
1169 stvx_u $out5,$x00,$out
1170 le?vperm $out7,$out7,$out7,$inpperm
1171 stvx_u $out6,$x10,$out
1172 stvx_u $out7,$x20,$out
1178 vncipherlast $out6,$out6,$ivec
1179 vncipherlast $out7,$out7,$in6
1182 le?vperm $out6,$out6,$out6,$inpperm
1183 le?vperm $out7,$out7,$out7,$inpperm
1184 stvx_u $out6,$x00,$out
1185 stvx_u $out7,$x10,$out
1191 vncipherlast $out7,$out7,$ivec
1194 le?vperm $out7,$out7,$out7,$inpperm
1199 le?vperm $ivec,$ivec,$ivec,$inpperm
1200 stvx_u $ivec,0,$ivp # write [unaligned] iv
1204 stvx $inpperm,r10,$sp # wipe copies of round keys
1206 stvx $inpperm,r11,$sp
1208 stvx $inpperm,r10,$sp
1210 stvx $inpperm,r11,$sp
1212 stvx $inpperm,r10,$sp
1214 stvx $inpperm,r11,$sp
1216 stvx $inpperm,r10,$sp
1218 stvx $inpperm,r11,$sp
1222 lvx v20,r10,$sp # ABI says so
1244 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
1245 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
1246 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
1247 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
1248 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
1249 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
1250 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
1253 .byte 0,12,0x04,0,0x80,6,6,0
1255 .size .${prefix}_cbc_encrypt,.-.${prefix}_cbc_encrypt
1259 #########################################################################
1260 {{{ # CTR procedure[s] #
1261 my ($inp,$out,$len,$key,$ivp,$x10,$rounds,$idx)=map("r$_",(3..10));
1262 my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3));
1263 my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm,$one)=
1268 .globl .${prefix}_ctr32_encrypt_blocks
1270 .${prefix}_ctr32_encrypt_blocks:
1279 vxor $rndkey0,$rndkey0,$rndkey0
1280 le?vspltisb $tmp,0x0f
1282 lvx $ivec,0,$ivp # load [unaligned] iv
1283 lvsl $inpperm,0,$ivp
1284 lvx $inptail,$idx,$ivp
1286 le?vxor $inpperm,$inpperm,$tmp
1287 vperm $ivec,$ivec,$inptail,$inpperm
1288 vsldoi $one,$rndkey0,$one,1
1291 ?lvsl $keyperm,0,$key # prepare for unaligned key
1292 lwz $rounds,240($key)
1294 lvsr $inpperm,0,r11 # prepare for unaligned load
1296 addi $inp,$inp,15 # 15 is not typo
1297 le?vxor $inpperm,$inpperm,$tmp
1299 srwi $rounds,$rounds,1
1301 subi $rounds,$rounds,1
1304 bge _aesp8_ctr32_encrypt8x
1306 ?lvsr $outperm,0,$out # prepare for unaligned store
1307 vspltisb $outmask,-1
1309 ?vperm $outmask,$rndkey0,$outmask,$outperm
1310 le?vxor $outperm,$outperm,$tmp
1314 lvx $rndkey1,$idx,$key
1316 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
1317 vxor $inout,$ivec,$rndkey0
1318 lvx $rndkey0,$idx,$key
1324 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
1325 vcipher $inout,$inout,$rndkey1
1326 lvx $rndkey1,$idx,$key
1328 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
1329 vcipher $inout,$inout,$rndkey0
1330 lvx $rndkey0,$idx,$key
1334 vadduqm $ivec,$ivec,$one
1338 subic. $len,$len,1 # blocks--
1340 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
1341 vcipher $inout,$inout,$rndkey1
1342 lvx $rndkey1,$idx,$key
1343 vperm $dat,$dat,$inptail,$inpperm
1345 ?vperm $rndkey1,$rndkey0,$rndkey1,$keyperm
1347 vxor $dat,$dat,$rndkey1 # last round key
1348 vcipherlast $inout,$inout,$dat
1350 lvx $rndkey1,$idx,$key
1352 vperm $inout,$inout,$inout,$outperm
1353 vsel $dat,$outhead,$inout,$outmask
1355 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
1357 vxor $inout,$ivec,$rndkey0
1358 lvx $rndkey0,$idx,$key
1365 lvx $inout,0,$out # redundant in aligned case
1366 vsel $inout,$outhead,$inout,$outmask
1372 .byte 0,12,0x14,0,0,0,6,0
1375 #########################################################################
1376 {{ # Optimized CTR procedure #
1378 my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
1379 $x00=0 if ($flavour =~ /osx/);
1380 my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10,12..14));
1381 my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(15..22));
1382 my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
1383 # v26-v31 last 6 round keys
1384 my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
1385 my ($two,$three,$four)=($outhead,$outperm,$outmask);
1389 _aesp8_ctr32_encrypt8x:
1390 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
1391 li r10,`$FRAME+8*16+15`
1392 li r11,`$FRAME+8*16+31`
1393 stvx v20,r10,$sp # ABI says so
1416 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
1418 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
1420 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
1422 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
1424 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
1426 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
1428 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
1432 subi $rounds,$rounds,3 # -4 in total
1434 lvx $rndkey0,$x00,$key # load key schedule
1438 ?vperm $rndkey0,$rndkey0,v30,$keyperm
1439 addi $key_,$sp,$FRAME+15
1443 ?vperm v24,v30,v31,$keyperm
1446 stvx v24,$x00,$key_ # off-load round[1]
1447 ?vperm v25,v31,v30,$keyperm
1449 stvx v25,$x10,$key_ # off-load round[2]
1450 addi $key_,$key_,0x20
1451 bdnz Load_ctr32_enc_key
1454 ?vperm v24,v30,v31,$keyperm
1456 stvx v24,$x00,$key_ # off-load round[3]
1457 ?vperm v25,v31,v26,$keyperm
1459 stvx v25,$x10,$key_ # off-load round[4]
1460 addi $key_,$sp,$FRAME+15 # rewind $key_
1461 ?vperm v26,v26,v27,$keyperm
1463 ?vperm v27,v27,v28,$keyperm
1465 ?vperm v28,v28,v29,$keyperm
1467 ?vperm v29,v29,v30,$keyperm
1468 lvx $out0,$x70,$key # borrow $out0
1469 ?vperm v30,v30,v31,$keyperm
1470 lvx v24,$x00,$key_ # pre-load round[1]
1471 ?vperm v31,v31,$out0,$keyperm
1472 lvx v25,$x10,$key_ # pre-load round[2]
1474 vadduwm $two,$one,$one
1475 subi $inp,$inp,15 # undo "caller"
1478 vadduwm $out1,$ivec,$one # counter values ...
1479 vadduwm $out2,$ivec,$two
1480 vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0]
1482 vadduwm $out3,$out1,$two
1483 vxor $out1,$out1,$rndkey0
1484 le?lvsl $inpperm,0,$idx
1485 vadduwm $out4,$out2,$two
1486 vxor $out2,$out2,$rndkey0
1487 le?vspltisb $tmp,0x0f
1488 vadduwm $out5,$out3,$two
1489 vxor $out3,$out3,$rndkey0
1490 le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u
1491 vadduwm $out6,$out4,$two
1492 vxor $out4,$out4,$rndkey0
1493 vadduwm $out7,$out5,$two
1494 vxor $out5,$out5,$rndkey0
1495 vadduwm $ivec,$out6,$two # next counter value
1496 vxor $out6,$out6,$rndkey0
1497 vxor $out7,$out7,$rndkey0
1503 vcipher $out0,$out0,v24
1504 vcipher $out1,$out1,v24
1505 vcipher $out2,$out2,v24
1506 vcipher $out3,$out3,v24
1507 vcipher $out4,$out4,v24
1508 vcipher $out5,$out5,v24
1509 vcipher $out6,$out6,v24
1510 vcipher $out7,$out7,v24
1511 Loop_ctr32_enc8x_middle:
1512 lvx v24,$x20,$key_ # round[3]
1513 addi $key_,$key_,0x20
1515 vcipher $out0,$out0,v25
1516 vcipher $out1,$out1,v25
1517 vcipher $out2,$out2,v25
1518 vcipher $out3,$out3,v25
1519 vcipher $out4,$out4,v25
1520 vcipher $out5,$out5,v25
1521 vcipher $out6,$out6,v25
1522 vcipher $out7,$out7,v25
1523 lvx v25,$x10,$key_ # round[4]
1524 bdnz Loop_ctr32_enc8x
1526 subic r11,$len,256 # $len-256, borrow $key_
1527 vcipher $out0,$out0,v24
1528 vcipher $out1,$out1,v24
1529 vcipher $out2,$out2,v24
1530 vcipher $out3,$out3,v24
1531 vcipher $out4,$out4,v24
1532 vcipher $out5,$out5,v24
1533 vcipher $out6,$out6,v24
1534 vcipher $out7,$out7,v24
1536 subfe r0,r0,r0 # borrow?-1:0
1537 vcipher $out0,$out0,v25
1538 vcipher $out1,$out1,v25
1539 vcipher $out2,$out2,v25
1540 vcipher $out3,$out3,v25
1541 vcipher $out4,$out4,v25
1542 vcipher $out5,$out5,v25
1543 vcipher $out6,$out6,v25
1544 vcipher $out7,$out7,v25
1547 addi $key_,$sp,$FRAME+15 # rewind $key_
1548 vcipher $out0,$out0,v26
1549 vcipher $out1,$out1,v26
1550 vcipher $out2,$out2,v26
1551 vcipher $out3,$out3,v26
1552 vcipher $out4,$out4,v26
1553 vcipher $out5,$out5,v26
1554 vcipher $out6,$out6,v26
1555 vcipher $out7,$out7,v26
1556 lvx v24,$x00,$key_ # re-pre-load round[1]
1558 subic $len,$len,129 # $len-=129
1559 vcipher $out0,$out0,v27
1560 addi $len,$len,1 # $len-=128 really
1561 vcipher $out1,$out1,v27
1562 vcipher $out2,$out2,v27
1563 vcipher $out3,$out3,v27
1564 vcipher $out4,$out4,v27
1565 vcipher $out5,$out5,v27
1566 vcipher $out6,$out6,v27
1567 vcipher $out7,$out7,v27
1568 lvx v25,$x10,$key_ # re-pre-load round[2]
1570 vcipher $out0,$out0,v28
1571 lvx_u $in0,$x00,$inp # load input
1572 vcipher $out1,$out1,v28
1573 lvx_u $in1,$x10,$inp
1574 vcipher $out2,$out2,v28
1575 lvx_u $in2,$x20,$inp
1576 vcipher $out3,$out3,v28
1577 lvx_u $in3,$x30,$inp
1578 vcipher $out4,$out4,v28
1579 lvx_u $in4,$x40,$inp
1580 vcipher $out5,$out5,v28
1581 lvx_u $in5,$x50,$inp
1582 vcipher $out6,$out6,v28
1583 lvx_u $in6,$x60,$inp
1584 vcipher $out7,$out7,v28
1585 lvx_u $in7,$x70,$inp
1588 vcipher $out0,$out0,v29
1589 le?vperm $in0,$in0,$in0,$inpperm
1590 vcipher $out1,$out1,v29
1591 le?vperm $in1,$in1,$in1,$inpperm
1592 vcipher $out2,$out2,v29
1593 le?vperm $in2,$in2,$in2,$inpperm
1594 vcipher $out3,$out3,v29
1595 le?vperm $in3,$in3,$in3,$inpperm
1596 vcipher $out4,$out4,v29
1597 le?vperm $in4,$in4,$in4,$inpperm
1598 vcipher $out5,$out5,v29
1599 le?vperm $in5,$in5,$in5,$inpperm
1600 vcipher $out6,$out6,v29
1601 le?vperm $in6,$in6,$in6,$inpperm
1602 vcipher $out7,$out7,v29
1603 le?vperm $in7,$in7,$in7,$inpperm
1605 add $inp,$inp,r0 # $inp is adjusted in such
1606 # way that at exit from the
1607 # loop inX-in7 are loaded
1609 subfe. r0,r0,r0 # borrow?-1:0
1610 vcipher $out0,$out0,v30
1611 vxor $in0,$in0,v31 # xor with last round key
1612 vcipher $out1,$out1,v30
1614 vcipher $out2,$out2,v30
1616 vcipher $out3,$out3,v30
1618 vcipher $out4,$out4,v30
1620 vcipher $out5,$out5,v30
1622 vcipher $out6,$out6,v30
1624 vcipher $out7,$out7,v30
1627 bne Lctr32_enc8x_break # did $len-129 borrow?
1629 vcipherlast $in0,$out0,$in0
1630 vcipherlast $in1,$out1,$in1
1631 vadduwm $out1,$ivec,$one # counter values ...
1632 vcipherlast $in2,$out2,$in2
1633 vadduwm $out2,$ivec,$two
1634 vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0]
1635 vcipherlast $in3,$out3,$in3
1636 vadduwm $out3,$out1,$two
1637 vxor $out1,$out1,$rndkey0
1638 vcipherlast $in4,$out4,$in4
1639 vadduwm $out4,$out2,$two
1640 vxor $out2,$out2,$rndkey0
1641 vcipherlast $in5,$out5,$in5
1642 vadduwm $out5,$out3,$two
1643 vxor $out3,$out3,$rndkey0
1644 vcipherlast $in6,$out6,$in6
1645 vadduwm $out6,$out4,$two
1646 vxor $out4,$out4,$rndkey0
1647 vcipherlast $in7,$out7,$in7
1648 vadduwm $out7,$out5,$two
1649 vxor $out5,$out5,$rndkey0
1650 le?vperm $in0,$in0,$in0,$inpperm
1651 vadduwm $ivec,$out6,$two # next counter value
1652 vxor $out6,$out6,$rndkey0
1653 le?vperm $in1,$in1,$in1,$inpperm
1654 vxor $out7,$out7,$rndkey0
1657 vcipher $out0,$out0,v24
1658 stvx_u $in0,$x00,$out
1659 le?vperm $in2,$in2,$in2,$inpperm
1660 vcipher $out1,$out1,v24
1661 stvx_u $in1,$x10,$out
1662 le?vperm $in3,$in3,$in3,$inpperm
1663 vcipher $out2,$out2,v24
1664 stvx_u $in2,$x20,$out
1665 le?vperm $in4,$in4,$in4,$inpperm
1666 vcipher $out3,$out3,v24
1667 stvx_u $in3,$x30,$out
1668 le?vperm $in5,$in5,$in5,$inpperm
1669 vcipher $out4,$out4,v24
1670 stvx_u $in4,$x40,$out
1671 le?vperm $in6,$in6,$in6,$inpperm
1672 vcipher $out5,$out5,v24
1673 stvx_u $in5,$x50,$out
1674 le?vperm $in7,$in7,$in7,$inpperm
1675 vcipher $out6,$out6,v24
1676 stvx_u $in6,$x60,$out
1677 vcipher $out7,$out7,v24
1678 stvx_u $in7,$x70,$out
1681 b Loop_ctr32_enc8x_middle
1686 blt Lctr32_enc8x_one
1688 beq Lctr32_enc8x_two
1690 blt Lctr32_enc8x_three
1692 beq Lctr32_enc8x_four
1694 blt Lctr32_enc8x_five
1696 beq Lctr32_enc8x_six
1698 blt Lctr32_enc8x_seven
1701 vcipherlast $out0,$out0,$in0
1702 vcipherlast $out1,$out1,$in1
1703 vcipherlast $out2,$out2,$in2
1704 vcipherlast $out3,$out3,$in3
1705 vcipherlast $out4,$out4,$in4
1706 vcipherlast $out5,$out5,$in5
1707 vcipherlast $out6,$out6,$in6
1708 vcipherlast $out7,$out7,$in7
1710 le?vperm $out0,$out0,$out0,$inpperm
1711 le?vperm $out1,$out1,$out1,$inpperm
1712 stvx_u $out0,$x00,$out
1713 le?vperm $out2,$out2,$out2,$inpperm
1714 stvx_u $out1,$x10,$out
1715 le?vperm $out3,$out3,$out3,$inpperm
1716 stvx_u $out2,$x20,$out
1717 le?vperm $out4,$out4,$out4,$inpperm
1718 stvx_u $out3,$x30,$out
1719 le?vperm $out5,$out5,$out5,$inpperm
1720 stvx_u $out4,$x40,$out
1721 le?vperm $out6,$out6,$out6,$inpperm
1722 stvx_u $out5,$x50,$out
1723 le?vperm $out7,$out7,$out7,$inpperm
1724 stvx_u $out6,$x60,$out
1725 stvx_u $out7,$x70,$out
1731 vcipherlast $out0,$out0,$in1
1732 vcipherlast $out1,$out1,$in2
1733 vcipherlast $out2,$out2,$in3
1734 vcipherlast $out3,$out3,$in4
1735 vcipherlast $out4,$out4,$in5
1736 vcipherlast $out5,$out5,$in6
1737 vcipherlast $out6,$out6,$in7
1739 le?vperm $out0,$out0,$out0,$inpperm
1740 le?vperm $out1,$out1,$out1,$inpperm
1741 stvx_u $out0,$x00,$out
1742 le?vperm $out2,$out2,$out2,$inpperm
1743 stvx_u $out1,$x10,$out
1744 le?vperm $out3,$out3,$out3,$inpperm
1745 stvx_u $out2,$x20,$out
1746 le?vperm $out4,$out4,$out4,$inpperm
1747 stvx_u $out3,$x30,$out
1748 le?vperm $out5,$out5,$out5,$inpperm
1749 stvx_u $out4,$x40,$out
1750 le?vperm $out6,$out6,$out6,$inpperm
1751 stvx_u $out5,$x50,$out
1752 stvx_u $out6,$x60,$out
1758 vcipherlast $out0,$out0,$in2
1759 vcipherlast $out1,$out1,$in3
1760 vcipherlast $out2,$out2,$in4
1761 vcipherlast $out3,$out3,$in5
1762 vcipherlast $out4,$out4,$in6
1763 vcipherlast $out5,$out5,$in7
1765 le?vperm $out0,$out0,$out0,$inpperm
1766 le?vperm $out1,$out1,$out1,$inpperm
1767 stvx_u $out0,$x00,$out
1768 le?vperm $out2,$out2,$out2,$inpperm
1769 stvx_u $out1,$x10,$out
1770 le?vperm $out3,$out3,$out3,$inpperm
1771 stvx_u $out2,$x20,$out
1772 le?vperm $out4,$out4,$out4,$inpperm
1773 stvx_u $out3,$x30,$out
1774 le?vperm $out5,$out5,$out5,$inpperm
1775 stvx_u $out4,$x40,$out
1776 stvx_u $out5,$x50,$out
1782 vcipherlast $out0,$out0,$in3
1783 vcipherlast $out1,$out1,$in4
1784 vcipherlast $out2,$out2,$in5
1785 vcipherlast $out3,$out3,$in6
1786 vcipherlast $out4,$out4,$in7
1788 le?vperm $out0,$out0,$out0,$inpperm
1789 le?vperm $out1,$out1,$out1,$inpperm
1790 stvx_u $out0,$x00,$out
1791 le?vperm $out2,$out2,$out2,$inpperm
1792 stvx_u $out1,$x10,$out
1793 le?vperm $out3,$out3,$out3,$inpperm
1794 stvx_u $out2,$x20,$out
1795 le?vperm $out4,$out4,$out4,$inpperm
1796 stvx_u $out3,$x30,$out
1797 stvx_u $out4,$x40,$out
1803 vcipherlast $out0,$out0,$in4
1804 vcipherlast $out1,$out1,$in5
1805 vcipherlast $out2,$out2,$in6
1806 vcipherlast $out3,$out3,$in7
1808 le?vperm $out0,$out0,$out0,$inpperm
1809 le?vperm $out1,$out1,$out1,$inpperm
1810 stvx_u $out0,$x00,$out
1811 le?vperm $out2,$out2,$out2,$inpperm
1812 stvx_u $out1,$x10,$out
1813 le?vperm $out3,$out3,$out3,$inpperm
1814 stvx_u $out2,$x20,$out
1815 stvx_u $out3,$x30,$out
1821 vcipherlast $out0,$out0,$in5
1822 vcipherlast $out1,$out1,$in6
1823 vcipherlast $out2,$out2,$in7
1825 le?vperm $out0,$out0,$out0,$inpperm
1826 le?vperm $out1,$out1,$out1,$inpperm
1827 stvx_u $out0,$x00,$out
1828 le?vperm $out2,$out2,$out2,$inpperm
1829 stvx_u $out1,$x10,$out
1830 stvx_u $out2,$x20,$out
1836 vcipherlast $out0,$out0,$in6
1837 vcipherlast $out1,$out1,$in7
1839 le?vperm $out0,$out0,$out0,$inpperm
1840 le?vperm $out1,$out1,$out1,$inpperm
1841 stvx_u $out0,$x00,$out
1842 stvx_u $out1,$x10,$out
1848 vcipherlast $out0,$out0,$in7
1850 le?vperm $out0,$out0,$out0,$inpperm
1857 stvx $inpperm,r10,$sp # wipe copies of round keys
1859 stvx $inpperm,r11,$sp
1861 stvx $inpperm,r10,$sp
1863 stvx $inpperm,r11,$sp
1865 stvx $inpperm,r10,$sp
1867 stvx $inpperm,r11,$sp
1869 stvx $inpperm,r10,$sp
1871 stvx $inpperm,r11,$sp
1875 lvx v20,r10,$sp # ABI says so
1897 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
1898 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
1899 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
1900 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
1901 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
1902 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
1903 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
1906 .byte 0,12,0x04,0,0x80,6,6,0
1908 .size .${prefix}_ctr32_encrypt_blocks,.-.${prefix}_ctr32_encrypt_blocks
1912 #########################################################################
1913 {{{ # XTS procedures #
1914 # int aes_p8_xts_[en|de]crypt(const char *inp, char *out, size_t len, #
1915 # const AES_KEY *key1, const AES_KEY *key2, #
1916 # [const] unsigned char iv[16]); #
1917 # If $key2 is NULL, then a "tweak chaining" mode is engaged, in which #
1918 # input tweak value is assumed to be encrypted already, and last tweak #
1919 # value, one suitable for consecutive call on same chunk of data, is #
1920 # written back to original buffer. In addition, in "tweak chaining" #
1921 # mode only complete input blocks are processed. #
1923 my ($inp,$out,$len,$key1,$key2,$ivp,$rounds,$idx) = map("r$_",(3..10));
1924 my ($rndkey0,$rndkey1,$inout) = map("v$_",(0..2));
1925 my ($output,$inptail,$inpperm,$leperm,$keyperm) = map("v$_",(3..7));
1926 my ($tweak,$seven,$eighty7,$tmp,$tweak1) = map("v$_",(8..12));
1927 my $taillen = $key2;
1929 ($inp,$idx) = ($idx,$inp); # reassign
1932 .globl .${prefix}_xts_encrypt
1934 .${prefix}_xts_encrypt:
1935 mr $inp,r3 # reassign
1941 mfspr r12,256 # save vrsave
1945 vspltisb $seven,0x07 # 0x070707..07
1946 le?lvsl $leperm,r11,r11
1947 le?vspltisb $tmp,0x0f
1948 le?vxor $leperm,$leperm,$seven
1951 lvx $tweak,0,$ivp # load [unaligned] iv
1952 lvsl $inpperm,0,$ivp
1953 lvx $inptail,$idx,$ivp
1954 le?vxor $inpperm,$inpperm,$tmp
1955 vperm $tweak,$tweak,$inptail,$inpperm
1958 lvsr $inpperm,0,r11 # prepare for unaligned load
1960 addi $inp,$inp,15 # 15 is not typo
1961 le?vxor $inpperm,$inpperm,$tmp
1963 ${UCMP}i $key2,0 # key2==NULL?
1964 beq Lxts_enc_no_key2
1966 ?lvsl $keyperm,0,$key2 # prepare for unaligned key
1967 lwz $rounds,240($key2)
1968 srwi $rounds,$rounds,1
1969 subi $rounds,$rounds,1
1972 lvx $rndkey0,0,$key2
1973 lvx $rndkey1,$idx,$key2
1975 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
1976 vxor $tweak,$tweak,$rndkey0
1977 lvx $rndkey0,$idx,$key2
1982 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
1983 vcipher $tweak,$tweak,$rndkey1
1984 lvx $rndkey1,$idx,$key2
1986 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
1987 vcipher $tweak,$tweak,$rndkey0
1988 lvx $rndkey0,$idx,$key2
1992 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
1993 vcipher $tweak,$tweak,$rndkey1
1994 lvx $rndkey1,$idx,$key2
1995 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
1996 vcipherlast $tweak,$tweak,$rndkey0
1998 li $ivp,0 # don't chain the tweak
2003 and $len,$len,$idx # in "tweak chaining"
2004 # mode only complete
2005 # blocks are processed
2010 ?lvsl $keyperm,0,$key1 # prepare for unaligned key
2011 lwz $rounds,240($key1)
2012 srwi $rounds,$rounds,1
2013 subi $rounds,$rounds,1
2016 vslb $eighty7,$seven,$seven # 0x808080..80
2017 vor $eighty7,$eighty7,$seven # 0x878787..87
2018 vspltisb $tmp,1 # 0x010101..01
2019 vsldoi $eighty7,$eighty7,$tmp,15 # 0x870101..01
2022 bge _aesp8_xts_encrypt6x
2024 andi. $taillen,$len,15
2026 subi $taillen,$taillen,16
2031 lvx $rndkey0,0,$key1
2032 lvx $rndkey1,$idx,$key1
2034 vperm $inout,$inout,$inptail,$inpperm
2035 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2036 vxor $inout,$inout,$tweak
2037 vxor $inout,$inout,$rndkey0
2038 lvx $rndkey0,$idx,$key1
2045 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2046 vcipher $inout,$inout,$rndkey1
2047 lvx $rndkey1,$idx,$key1
2049 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2050 vcipher $inout,$inout,$rndkey0
2051 lvx $rndkey0,$idx,$key1
2055 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2056 vcipher $inout,$inout,$rndkey1
2057 lvx $rndkey1,$idx,$key1
2059 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2060 vxor $rndkey0,$rndkey0,$tweak
2061 vcipherlast $output,$inout,$rndkey0
2063 le?vperm $tmp,$output,$output,$leperm
2065 le?stvx_u $tmp,0,$out
2066 be?stvx_u $output,0,$out
2075 lvx $rndkey0,0,$key1
2076 lvx $rndkey1,$idx,$key1
2084 vsrab $tmp,$tweak,$seven # next tweak value
2085 vaddubm $tweak,$tweak,$tweak
2086 vsldoi $tmp,$tmp,$tmp,15
2087 vand $tmp,$tmp,$eighty7
2088 vxor $tweak,$tweak,$tmp
2090 vperm $inout,$inout,$inptail,$inpperm
2091 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2092 vxor $inout,$inout,$tweak
2093 vxor $output,$output,$rndkey0 # just in case $len<16
2094 vxor $inout,$inout,$rndkey0
2095 lvx $rndkey0,$idx,$key1
2102 vxor $output,$output,$tweak
2103 lvsr $inpperm,0,$len # $inpperm is no longer needed
2104 vxor $inptail,$inptail,$inptail # $inptail is no longer needed
2106 vperm $inptail,$inptail,$tmp,$inpperm
2107 vsel $inout,$inout,$output,$inptail
2116 bdnz Loop_xts_enc_steal
2119 b Loop_xts_enc # one more time...
2125 vsrab $tmp,$tweak,$seven # next tweak value
2126 vaddubm $tweak,$tweak,$tweak
2127 vsldoi $tmp,$tmp,$tmp,15
2128 vand $tmp,$tmp,$eighty7
2129 vxor $tweak,$tweak,$tmp
2131 le?vperm $tweak,$tweak,$tweak,$leperm
2132 stvx_u $tweak,0,$ivp
2135 mtspr 256,r12 # restore vrsave
2139 .byte 0,12,0x04,0,0x80,6,6,0
2141 .size .${prefix}_xts_encrypt,.-.${prefix}_xts_encrypt
2143 .globl .${prefix}_xts_decrypt
2145 .${prefix}_xts_decrypt:
2146 mr $inp,r3 # reassign
2152 mfspr r12,256 # save vrsave
2161 vspltisb $seven,0x07 # 0x070707..07
2162 le?lvsl $leperm,r11,r11
2163 le?vspltisb $tmp,0x0f
2164 le?vxor $leperm,$leperm,$seven
2167 lvx $tweak,0,$ivp # load [unaligned] iv
2168 lvsl $inpperm,0,$ivp
2169 lvx $inptail,$idx,$ivp
2170 le?vxor $inpperm,$inpperm,$tmp
2171 vperm $tweak,$tweak,$inptail,$inpperm
2174 lvsr $inpperm,0,r11 # prepare for unaligned load
2176 addi $inp,$inp,15 # 15 is not typo
2177 le?vxor $inpperm,$inpperm,$tmp
2179 ${UCMP}i $key2,0 # key2==NULL?
2180 beq Lxts_dec_no_key2
2182 ?lvsl $keyperm,0,$key2 # prepare for unaligned key
2183 lwz $rounds,240($key2)
2184 srwi $rounds,$rounds,1
2185 subi $rounds,$rounds,1
2188 lvx $rndkey0,0,$key2
2189 lvx $rndkey1,$idx,$key2
2191 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2192 vxor $tweak,$tweak,$rndkey0
2193 lvx $rndkey0,$idx,$key2
2198 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2199 vcipher $tweak,$tweak,$rndkey1
2200 lvx $rndkey1,$idx,$key2
2202 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2203 vcipher $tweak,$tweak,$rndkey0
2204 lvx $rndkey0,$idx,$key2
2208 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2209 vcipher $tweak,$tweak,$rndkey1
2210 lvx $rndkey1,$idx,$key2
2211 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2212 vcipherlast $tweak,$tweak,$rndkey0
2214 li $ivp,0 # don't chain the tweak
2220 add $len,$len,$idx # in "tweak chaining"
2221 # mode only complete
2222 # blocks are processed
2227 ?lvsl $keyperm,0,$key1 # prepare for unaligned key
2228 lwz $rounds,240($key1)
2229 srwi $rounds,$rounds,1
2230 subi $rounds,$rounds,1
2233 vslb $eighty7,$seven,$seven # 0x808080..80
2234 vor $eighty7,$eighty7,$seven # 0x878787..87
2235 vspltisb $tmp,1 # 0x010101..01
2236 vsldoi $eighty7,$eighty7,$tmp,15 # 0x870101..01
2239 bge _aesp8_xts_decrypt6x
2241 lvx $rndkey0,0,$key1
2242 lvx $rndkey1,$idx,$key1
2244 vperm $inout,$inout,$inptail,$inpperm
2245 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2246 vxor $inout,$inout,$tweak
2247 vxor $inout,$inout,$rndkey0
2248 lvx $rndkey0,$idx,$key1
2258 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2259 vncipher $inout,$inout,$rndkey1
2260 lvx $rndkey1,$idx,$key1
2262 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2263 vncipher $inout,$inout,$rndkey0
2264 lvx $rndkey0,$idx,$key1
2268 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2269 vncipher $inout,$inout,$rndkey1
2270 lvx $rndkey1,$idx,$key1
2272 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2273 vxor $rndkey0,$rndkey0,$tweak
2274 vncipherlast $output,$inout,$rndkey0
2276 le?vperm $tmp,$output,$output,$leperm
2278 le?stvx_u $tmp,0,$out
2279 be?stvx_u $output,0,$out
2288 lvx $rndkey0,0,$key1
2289 lvx $rndkey1,$idx,$key1
2292 vsrab $tmp,$tweak,$seven # next tweak value
2293 vaddubm $tweak,$tweak,$tweak
2294 vsldoi $tmp,$tmp,$tmp,15
2295 vand $tmp,$tmp,$eighty7
2296 vxor $tweak,$tweak,$tmp
2298 vperm $inout,$inout,$inptail,$inpperm
2299 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2300 vxor $inout,$inout,$tweak
2301 vxor $inout,$inout,$rndkey0
2302 lvx $rndkey0,$idx,$key1
2310 vsrab $tmp,$tweak,$seven # next tweak value
2311 vaddubm $tweak1,$tweak,$tweak
2312 vsldoi $tmp,$tmp,$tmp,15
2313 vand $tmp,$tmp,$eighty7
2314 vxor $tweak1,$tweak1,$tmp
2319 vxor $inout,$inout,$tweak # :-(
2320 vxor $inout,$inout,$tweak1 # :-)
2323 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2324 vncipher $inout,$inout,$rndkey1
2325 lvx $rndkey1,$idx,$key1
2327 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2328 vncipher $inout,$inout,$rndkey0
2329 lvx $rndkey0,$idx,$key1
2331 bdnz Loop_xts_dec_short
2333 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2334 vncipher $inout,$inout,$rndkey1
2335 lvx $rndkey1,$idx,$key1
2337 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2338 vxor $rndkey0,$rndkey0,$tweak1
2339 vncipherlast $output,$inout,$rndkey0
2341 le?vperm $tmp,$output,$output,$leperm
2343 le?stvx_u $tmp,0,$out
2344 be?stvx_u $output,0,$out
2349 lvx $rndkey0,0,$key1
2350 lvx $rndkey1,$idx,$key1
2352 vperm $inout,$inout,$inptail,$inpperm
2353 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2355 lvsr $inpperm,0,$len # $inpperm is no longer needed
2356 vxor $inptail,$inptail,$inptail # $inptail is no longer needed
2358 vperm $inptail,$inptail,$tmp,$inpperm
2359 vsel $inout,$inout,$output,$inptail
2361 vxor $rndkey0,$rndkey0,$tweak
2362 vxor $inout,$inout,$rndkey0
2363 lvx $rndkey0,$idx,$key1
2372 bdnz Loop_xts_dec_steal
2375 b Loop_xts_dec # one more time...
2381 vsrab $tmp,$tweak,$seven # next tweak value
2382 vaddubm $tweak,$tweak,$tweak
2383 vsldoi $tmp,$tmp,$tmp,15
2384 vand $tmp,$tmp,$eighty7
2385 vxor $tweak,$tweak,$tmp
2387 le?vperm $tweak,$tweak,$tweak,$leperm
2388 stvx_u $tweak,0,$ivp
2391 mtspr 256,r12 # restore vrsave
2395 .byte 0,12,0x04,0,0x80,6,6,0
2397 .size .${prefix}_xts_decrypt,.-.${prefix}_xts_decrypt
2399 #########################################################################
2400 {{ # Optimized XTS procedures #
2402 my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,3,26..31));
2403 $x00=0 if ($flavour =~ /osx/);
2404 my ($in0, $in1, $in2, $in3, $in4, $in5 )=map("v$_",(0..5));
2405 my ($out0, $out1, $out2, $out3, $out4, $out5)=map("v$_",(7,12..16));
2406 my ($twk0, $twk1, $twk2, $twk3, $twk4, $twk5)=map("v$_",(17..22));
2407 my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
2408 # v26-v31 last 6 round keys
2409 my ($keyperm)=($out0); # aliases with "caller", redundant assignment
2414 _aesp8_xts_encrypt6x:
2415 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
2417 li r7,`$FRAME+8*16+15`
2418 li r3,`$FRAME+8*16+31`
2419 $PUSH r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
2420 stvx v20,r7,$sp # ABI says so
2443 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
2445 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
2447 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
2449 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
2451 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
2453 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
2455 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
2459 subi $rounds,$rounds,3 # -4 in total
2461 lvx $rndkey0,$x00,$key1 # load key schedule
2463 addi $key1,$key1,0x20
2465 ?vperm $rndkey0,$rndkey0,v30,$keyperm
2466 addi $key_,$sp,$FRAME+15
2470 ?vperm v24,v30,v31,$keyperm
2472 addi $key1,$key1,0x20
2473 stvx v24,$x00,$key_ # off-load round[1]
2474 ?vperm v25,v31,v30,$keyperm
2476 stvx v25,$x10,$key_ # off-load round[2]
2477 addi $key_,$key_,0x20
2478 bdnz Load_xts_enc_key
2481 ?vperm v24,v30,v31,$keyperm
2483 stvx v24,$x00,$key_ # off-load round[3]
2484 ?vperm v25,v31,v26,$keyperm
2486 stvx v25,$x10,$key_ # off-load round[4]
2487 addi $key_,$sp,$FRAME+15 # rewind $key_
2488 ?vperm v26,v26,v27,$keyperm
2490 ?vperm v27,v27,v28,$keyperm
2492 ?vperm v28,v28,v29,$keyperm
2494 ?vperm v29,v29,v30,$keyperm
2495 lvx $twk5,$x70,$key1 # borrow $twk5
2496 ?vperm v30,v30,v31,$keyperm
2497 lvx v24,$x00,$key_ # pre-load round[1]
2498 ?vperm v31,v31,$twk5,$keyperm
2499 lvx v25,$x10,$key_ # pre-load round[2]
2501 vperm $in0,$inout,$inptail,$inpperm
2502 subi $inp,$inp,31 # undo "caller"
2503 vxor $twk0,$tweak,$rndkey0
2504 vsrab $tmp,$tweak,$seven # next tweak value
2505 vaddubm $tweak,$tweak,$tweak
2506 vsldoi $tmp,$tmp,$tmp,15
2507 vand $tmp,$tmp,$eighty7
2508 vxor $out0,$in0,$twk0
2509 vxor $tweak,$tweak,$tmp
2511 lvx_u $in1,$x10,$inp
2512 vxor $twk1,$tweak,$rndkey0
2513 vsrab $tmp,$tweak,$seven # next tweak value
2514 vaddubm $tweak,$tweak,$tweak
2515 vsldoi $tmp,$tmp,$tmp,15
2516 le?vperm $in1,$in1,$in1,$leperm
2517 vand $tmp,$tmp,$eighty7
2518 vxor $out1,$in1,$twk1
2519 vxor $tweak,$tweak,$tmp
2521 lvx_u $in2,$x20,$inp
2522 andi. $taillen,$len,15
2523 vxor $twk2,$tweak,$rndkey0
2524 vsrab $tmp,$tweak,$seven # next tweak value
2525 vaddubm $tweak,$tweak,$tweak
2526 vsldoi $tmp,$tmp,$tmp,15
2527 le?vperm $in2,$in2,$in2,$leperm
2528 vand $tmp,$tmp,$eighty7
2529 vxor $out2,$in2,$twk2
2530 vxor $tweak,$tweak,$tmp
2532 lvx_u $in3,$x30,$inp
2533 sub $len,$len,$taillen
2534 vxor $twk3,$tweak,$rndkey0
2535 vsrab $tmp,$tweak,$seven # next tweak value
2536 vaddubm $tweak,$tweak,$tweak
2537 vsldoi $tmp,$tmp,$tmp,15
2538 le?vperm $in3,$in3,$in3,$leperm
2539 vand $tmp,$tmp,$eighty7
2540 vxor $out3,$in3,$twk3
2541 vxor $tweak,$tweak,$tmp
2543 lvx_u $in4,$x40,$inp
2545 vxor $twk4,$tweak,$rndkey0
2546 vsrab $tmp,$tweak,$seven # next tweak value
2547 vaddubm $tweak,$tweak,$tweak
2548 vsldoi $tmp,$tmp,$tmp,15
2549 le?vperm $in4,$in4,$in4,$leperm
2550 vand $tmp,$tmp,$eighty7
2551 vxor $out4,$in4,$twk4
2552 vxor $tweak,$tweak,$tmp
2554 lvx_u $in5,$x50,$inp
2556 vxor $twk5,$tweak,$rndkey0
2557 vsrab $tmp,$tweak,$seven # next tweak value
2558 vaddubm $tweak,$tweak,$tweak
2559 vsldoi $tmp,$tmp,$tmp,15
2560 le?vperm $in5,$in5,$in5,$leperm
2561 vand $tmp,$tmp,$eighty7
2562 vxor $out5,$in5,$twk5
2563 vxor $tweak,$tweak,$tmp
2565 vxor v31,v31,$rndkey0
2571 vcipher $out0,$out0,v24
2572 vcipher $out1,$out1,v24
2573 vcipher $out2,$out2,v24
2574 vcipher $out3,$out3,v24
2575 vcipher $out4,$out4,v24
2576 vcipher $out5,$out5,v24
2577 lvx v24,$x20,$key_ # round[3]
2578 addi $key_,$key_,0x20
2580 vcipher $out0,$out0,v25
2581 vcipher $out1,$out1,v25
2582 vcipher $out2,$out2,v25
2583 vcipher $out3,$out3,v25
2584 vcipher $out4,$out4,v25
2585 vcipher $out5,$out5,v25
2586 lvx v25,$x10,$key_ # round[4]
2589 subic $len,$len,96 # $len-=96
2590 vxor $in0,$twk0,v31 # xor with last round key
2591 vcipher $out0,$out0,v24
2592 vcipher $out1,$out1,v24
2593 vsrab $tmp,$tweak,$seven # next tweak value
2594 vxor $twk0,$tweak,$rndkey0
2595 vaddubm $tweak,$tweak,$tweak
2596 vcipher $out2,$out2,v24
2597 vcipher $out3,$out3,v24
2598 vsldoi $tmp,$tmp,$tmp,15
2599 vcipher $out4,$out4,v24
2600 vcipher $out5,$out5,v24
2602 subfe. r0,r0,r0 # borrow?-1:0
2603 vand $tmp,$tmp,$eighty7
2604 vcipher $out0,$out0,v25
2605 vcipher $out1,$out1,v25
2606 vxor $tweak,$tweak,$tmp
2607 vcipher $out2,$out2,v25
2608 vcipher $out3,$out3,v25
2610 vsrab $tmp,$tweak,$seven # next tweak value
2611 vxor $twk1,$tweak,$rndkey0
2612 vcipher $out4,$out4,v25
2613 vcipher $out5,$out5,v25
2616 vaddubm $tweak,$tweak,$tweak
2617 vsldoi $tmp,$tmp,$tmp,15
2618 vcipher $out0,$out0,v26
2619 vcipher $out1,$out1,v26
2620 vand $tmp,$tmp,$eighty7
2621 vcipher $out2,$out2,v26
2622 vcipher $out3,$out3,v26
2623 vxor $tweak,$tweak,$tmp
2624 vcipher $out4,$out4,v26
2625 vcipher $out5,$out5,v26
2627 add $inp,$inp,r0 # $inp is adjusted in such
2628 # way that at exit from the
2629 # loop inX-in5 are loaded
2632 vsrab $tmp,$tweak,$seven # next tweak value
2633 vxor $twk2,$tweak,$rndkey0
2634 vaddubm $tweak,$tweak,$tweak
2635 vcipher $out0,$out0,v27
2636 vcipher $out1,$out1,v27
2637 vsldoi $tmp,$tmp,$tmp,15
2638 vcipher $out2,$out2,v27
2639 vcipher $out3,$out3,v27
2640 vand $tmp,$tmp,$eighty7
2641 vcipher $out4,$out4,v27
2642 vcipher $out5,$out5,v27
2644 addi $key_,$sp,$FRAME+15 # rewind $key_
2645 vxor $tweak,$tweak,$tmp
2646 vcipher $out0,$out0,v28
2647 vcipher $out1,$out1,v28
2649 vsrab $tmp,$tweak,$seven # next tweak value
2650 vxor $twk3,$tweak,$rndkey0
2651 vcipher $out2,$out2,v28
2652 vcipher $out3,$out3,v28
2653 vaddubm $tweak,$tweak,$tweak
2654 vsldoi $tmp,$tmp,$tmp,15
2655 vcipher $out4,$out4,v28
2656 vcipher $out5,$out5,v28
2657 lvx v24,$x00,$key_ # re-pre-load round[1]
2658 vand $tmp,$tmp,$eighty7
2660 vcipher $out0,$out0,v29
2661 vcipher $out1,$out1,v29
2662 vxor $tweak,$tweak,$tmp
2663 vcipher $out2,$out2,v29
2664 vcipher $out3,$out3,v29
2666 vsrab $tmp,$tweak,$seven # next tweak value
2667 vxor $twk4,$tweak,$rndkey0
2668 vcipher $out4,$out4,v29
2669 vcipher $out5,$out5,v29
2670 lvx v25,$x10,$key_ # re-pre-load round[2]
2671 vaddubm $tweak,$tweak,$tweak
2672 vsldoi $tmp,$tmp,$tmp,15
2674 vcipher $out0,$out0,v30
2675 vcipher $out1,$out1,v30
2676 vand $tmp,$tmp,$eighty7
2677 vcipher $out2,$out2,v30
2678 vcipher $out3,$out3,v30
2679 vxor $tweak,$tweak,$tmp
2680 vcipher $out4,$out4,v30
2681 vcipher $out5,$out5,v30
2683 vsrab $tmp,$tweak,$seven # next tweak value
2684 vxor $twk5,$tweak,$rndkey0
2686 vcipherlast $out0,$out0,$in0
2687 lvx_u $in0,$x00,$inp # load next input block
2688 vaddubm $tweak,$tweak,$tweak
2689 vsldoi $tmp,$tmp,$tmp,15
2690 vcipherlast $out1,$out1,$in1
2691 lvx_u $in1,$x10,$inp
2692 vcipherlast $out2,$out2,$in2
2693 le?vperm $in0,$in0,$in0,$leperm
2694 lvx_u $in2,$x20,$inp
2695 vand $tmp,$tmp,$eighty7
2696 vcipherlast $out3,$out3,$in3
2697 le?vperm $in1,$in1,$in1,$leperm
2698 lvx_u $in3,$x30,$inp
2699 vcipherlast $out4,$out4,$in4
2700 le?vperm $in2,$in2,$in2,$leperm
2701 lvx_u $in4,$x40,$inp
2702 vxor $tweak,$tweak,$tmp
2703 vcipherlast $tmp,$out5,$in5 # last block might be needed
2705 le?vperm $in3,$in3,$in3,$leperm
2706 lvx_u $in5,$x50,$inp
2708 le?vperm $in4,$in4,$in4,$leperm
2709 le?vperm $in5,$in5,$in5,$leperm
2711 le?vperm $out0,$out0,$out0,$leperm
2712 le?vperm $out1,$out1,$out1,$leperm
2713 stvx_u $out0,$x00,$out # store output
2714 vxor $out0,$in0,$twk0
2715 le?vperm $out2,$out2,$out2,$leperm
2716 stvx_u $out1,$x10,$out
2717 vxor $out1,$in1,$twk1
2718 le?vperm $out3,$out3,$out3,$leperm
2719 stvx_u $out2,$x20,$out
2720 vxor $out2,$in2,$twk2
2721 le?vperm $out4,$out4,$out4,$leperm
2722 stvx_u $out3,$x30,$out
2723 vxor $out3,$in3,$twk3
2724 le?vperm $out5,$tmp,$tmp,$leperm
2725 stvx_u $out4,$x40,$out
2726 vxor $out4,$in4,$twk4
2727 le?stvx_u $out5,$x50,$out
2728 be?stvx_u $tmp, $x50,$out
2729 vxor $out5,$in5,$twk5
2733 beq Loop_xts_enc6x # did $len-=96 borrow?
2735 addic. $len,$len,0x60
2742 blt Lxts_enc6x_three
2747 vxor $out0,$in1,$twk0
2748 vxor $out1,$in2,$twk1
2749 vxor $out2,$in3,$twk2
2750 vxor $out3,$in4,$twk3
2751 vxor $out4,$in5,$twk4
2755 le?vperm $out0,$out0,$out0,$leperm
2756 vmr $twk0,$twk5 # unused tweak
2757 le?vperm $out1,$out1,$out1,$leperm
2758 stvx_u $out0,$x00,$out # store output
2759 le?vperm $out2,$out2,$out2,$leperm
2760 stvx_u $out1,$x10,$out
2761 le?vperm $out3,$out3,$out3,$leperm
2762 stvx_u $out2,$x20,$out
2763 vxor $tmp,$out4,$twk5 # last block prep for stealing
2764 le?vperm $out4,$out4,$out4,$leperm
2765 stvx_u $out3,$x30,$out
2766 stvx_u $out4,$x40,$out
2768 bne Lxts_enc6x_steal
2773 vxor $out0,$in2,$twk0
2774 vxor $out1,$in3,$twk1
2775 vxor $out2,$in4,$twk2
2776 vxor $out3,$in5,$twk3
2777 vxor $out4,$out4,$out4
2781 le?vperm $out0,$out0,$out0,$leperm
2782 vmr $twk0,$twk4 # unused tweak
2783 le?vperm $out1,$out1,$out1,$leperm
2784 stvx_u $out0,$x00,$out # store output
2785 le?vperm $out2,$out2,$out2,$leperm
2786 stvx_u $out1,$x10,$out
2787 vxor $tmp,$out3,$twk4 # last block prep for stealing
2788 le?vperm $out3,$out3,$out3,$leperm
2789 stvx_u $out2,$x20,$out
2790 stvx_u $out3,$x30,$out
2792 bne Lxts_enc6x_steal
2797 vxor $out0,$in3,$twk0
2798 vxor $out1,$in4,$twk1
2799 vxor $out2,$in5,$twk2
2800 vxor $out3,$out3,$out3
2801 vxor $out4,$out4,$out4
2805 le?vperm $out0,$out0,$out0,$leperm
2806 vmr $twk0,$twk3 # unused tweak
2807 le?vperm $out1,$out1,$out1,$leperm
2808 stvx_u $out0,$x00,$out # store output
2809 vxor $tmp,$out2,$twk3 # last block prep for stealing
2810 le?vperm $out2,$out2,$out2,$leperm
2811 stvx_u $out1,$x10,$out
2812 stvx_u $out2,$x20,$out
2814 bne Lxts_enc6x_steal
2819 vxor $out0,$in4,$twk0
2820 vxor $out1,$in5,$twk1
2821 vxor $out2,$out2,$out2
2822 vxor $out3,$out3,$out3
2823 vxor $out4,$out4,$out4
2827 le?vperm $out0,$out0,$out0,$leperm
2828 vmr $twk0,$twk2 # unused tweak
2829 vxor $tmp,$out1,$twk2 # last block prep for stealing
2830 le?vperm $out1,$out1,$out1,$leperm
2831 stvx_u $out0,$x00,$out # store output
2832 stvx_u $out1,$x10,$out
2834 bne Lxts_enc6x_steal
2839 vxor $out0,$in5,$twk0
2842 vcipher $out0,$out0,v24
2843 lvx v24,$x20,$key_ # round[3]
2844 addi $key_,$key_,0x20
2846 vcipher $out0,$out0,v25
2847 lvx v25,$x10,$key_ # round[4]
2850 add $inp,$inp,$taillen
2852 vcipher $out0,$out0,v24
2855 vcipher $out0,$out0,v25
2857 lvsr $inpperm,0,$taillen
2858 vcipher $out0,$out0,v26
2861 vcipher $out0,$out0,v27
2863 addi $key_,$sp,$FRAME+15 # rewind $key_
2864 vcipher $out0,$out0,v28
2865 lvx v24,$x00,$key_ # re-pre-load round[1]
2867 vcipher $out0,$out0,v29
2868 lvx v25,$x10,$key_ # re-pre-load round[2]
2869 vxor $twk0,$twk0,v31
2871 le?vperm $in0,$in0,$in0,$leperm
2872 vcipher $out0,$out0,v30
2874 vperm $in0,$in0,$in0,$inpperm
2875 vcipherlast $out0,$out0,$twk0
2877 vmr $twk0,$twk1 # unused tweak
2878 vxor $tmp,$out0,$twk1 # last block prep for stealing
2879 le?vperm $out0,$out0,$out0,$leperm
2880 stvx_u $out0,$x00,$out # store output
2882 bne Lxts_enc6x_steal
2890 add $inp,$inp,$taillen
2893 lvsr $inpperm,0,$taillen # $in5 is no more
2894 le?vperm $in0,$in0,$in0,$leperm
2895 vperm $in0,$in0,$in0,$inpperm
2896 vxor $tmp,$tmp,$twk0
2898 vxor $in0,$in0,$twk0
2899 vxor $out0,$out0,$out0
2901 vperm $out0,$out0,$out1,$inpperm
2902 vsel $out0,$in0,$tmp,$out0 # $tmp is last block, remember?
2907 Loop_xts_enc6x_steal:
2910 bdnz Loop_xts_enc6x_steal
2914 b Loop_xts_enc1x # one more time...
2921 vxor $tweak,$twk0,$rndkey0
2922 le?vperm $tweak,$tweak,$tweak,$leperm
2923 stvx_u $tweak,0,$ivp
2929 stvx $seven,r10,$sp # wipe copies of round keys
2947 lvx v20,r10,$sp # ABI says so
2969 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
2970 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
2971 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
2972 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
2973 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
2974 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
2975 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
2978 .byte 0,12,0x04,1,0x80,6,6,0
2983 vcipher $out0,$out0,v24
2984 vcipher $out1,$out1,v24
2985 vcipher $out2,$out2,v24
2986 vcipher $out3,$out3,v24
2987 vcipher $out4,$out4,v24
2988 lvx v24,$x20,$key_ # round[3]
2989 addi $key_,$key_,0x20
2991 vcipher $out0,$out0,v25
2992 vcipher $out1,$out1,v25
2993 vcipher $out2,$out2,v25
2994 vcipher $out3,$out3,v25
2995 vcipher $out4,$out4,v25
2996 lvx v25,$x10,$key_ # round[4]
2997 bdnz _aesp8_xts_enc5x
2999 add $inp,$inp,$taillen
3001 vcipher $out0,$out0,v24
3002 vcipher $out1,$out1,v24
3003 vcipher $out2,$out2,v24
3004 vcipher $out3,$out3,v24
3005 vcipher $out4,$out4,v24
3008 vcipher $out0,$out0,v25
3009 vcipher $out1,$out1,v25
3010 vcipher $out2,$out2,v25
3011 vcipher $out3,$out3,v25
3012 vcipher $out4,$out4,v25
3013 vxor $twk0,$twk0,v31
3015 vcipher $out0,$out0,v26
3016 lvsr $inpperm,0,$taillen # $in5 is no more
3017 vcipher $out1,$out1,v26
3018 vcipher $out2,$out2,v26
3019 vcipher $out3,$out3,v26
3020 vcipher $out4,$out4,v26
3023 vcipher $out0,$out0,v27
3025 vcipher $out1,$out1,v27
3026 vcipher $out2,$out2,v27
3027 vcipher $out3,$out3,v27
3028 vcipher $out4,$out4,v27
3031 addi $key_,$sp,$FRAME+15 # rewind $key_
3032 vcipher $out0,$out0,v28
3033 vcipher $out1,$out1,v28
3034 vcipher $out2,$out2,v28
3035 vcipher $out3,$out3,v28
3036 vcipher $out4,$out4,v28
3037 lvx v24,$x00,$key_ # re-pre-load round[1]
3040 vcipher $out0,$out0,v29
3041 le?vperm $in0,$in0,$in0,$leperm
3042 vcipher $out1,$out1,v29
3043 vcipher $out2,$out2,v29
3044 vcipher $out3,$out3,v29
3045 vcipher $out4,$out4,v29
3046 lvx v25,$x10,$key_ # re-pre-load round[2]
3049 vcipher $out0,$out0,v30
3050 vperm $in0,$in0,$in0,$inpperm
3051 vcipher $out1,$out1,v30
3052 vcipher $out2,$out2,v30
3053 vcipher $out3,$out3,v30
3054 vcipher $out4,$out4,v30
3056 vcipherlast $out0,$out0,$twk0
3057 vcipherlast $out1,$out1,$in1
3058 vcipherlast $out2,$out2,$in2
3059 vcipherlast $out3,$out3,$in3
3060 vcipherlast $out4,$out4,$in4
3063 .byte 0,12,0x14,0,0,0,0,0
3066 _aesp8_xts_decrypt6x:
3067 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
3069 li r7,`$FRAME+8*16+15`
3070 li r3,`$FRAME+8*16+31`
3071 $PUSH r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
3072 stvx v20,r7,$sp # ABI says so
3095 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
3097 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
3099 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
3101 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
3103 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
3105 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
3107 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
3111 subi $rounds,$rounds,3 # -4 in total
3113 lvx $rndkey0,$x00,$key1 # load key schedule
3115 addi $key1,$key1,0x20
3117 ?vperm $rndkey0,$rndkey0,v30,$keyperm
3118 addi $key_,$sp,$FRAME+15
3122 ?vperm v24,v30,v31,$keyperm
3124 addi $key1,$key1,0x20
3125 stvx v24,$x00,$key_ # off-load round[1]
3126 ?vperm v25,v31,v30,$keyperm
3128 stvx v25,$x10,$key_ # off-load round[2]
3129 addi $key_,$key_,0x20
3130 bdnz Load_xts_dec_key
3133 ?vperm v24,v30,v31,$keyperm
3135 stvx v24,$x00,$key_ # off-load round[3]
3136 ?vperm v25,v31,v26,$keyperm
3138 stvx v25,$x10,$key_ # off-load round[4]
3139 addi $key_,$sp,$FRAME+15 # rewind $key_
3140 ?vperm v26,v26,v27,$keyperm
3142 ?vperm v27,v27,v28,$keyperm
3144 ?vperm v28,v28,v29,$keyperm
3146 ?vperm v29,v29,v30,$keyperm
3147 lvx $twk5,$x70,$key1 # borrow $twk5
3148 ?vperm v30,v30,v31,$keyperm
3149 lvx v24,$x00,$key_ # pre-load round[1]
3150 ?vperm v31,v31,$twk5,$keyperm
3151 lvx v25,$x10,$key_ # pre-load round[2]
3153 vperm $in0,$inout,$inptail,$inpperm
3154 subi $inp,$inp,31 # undo "caller"
3155 vxor $twk0,$tweak,$rndkey0
3156 vsrab $tmp,$tweak,$seven # next tweak value
3157 vaddubm $tweak,$tweak,$tweak
3158 vsldoi $tmp,$tmp,$tmp,15
3159 vand $tmp,$tmp,$eighty7
3160 vxor $out0,$in0,$twk0
3161 vxor $tweak,$tweak,$tmp
3163 lvx_u $in1,$x10,$inp
3164 vxor $twk1,$tweak,$rndkey0
3165 vsrab $tmp,$tweak,$seven # next tweak value
3166 vaddubm $tweak,$tweak,$tweak
3167 vsldoi $tmp,$tmp,$tmp,15
3168 le?vperm $in1,$in1,$in1,$leperm
3169 vand $tmp,$tmp,$eighty7
3170 vxor $out1,$in1,$twk1
3171 vxor $tweak,$tweak,$tmp
3173 lvx_u $in2,$x20,$inp
3174 andi. $taillen,$len,15
3175 vxor $twk2,$tweak,$rndkey0
3176 vsrab $tmp,$tweak,$seven # next tweak value
3177 vaddubm $tweak,$tweak,$tweak
3178 vsldoi $tmp,$tmp,$tmp,15
3179 le?vperm $in2,$in2,$in2,$leperm
3180 vand $tmp,$tmp,$eighty7
3181 vxor $out2,$in2,$twk2
3182 vxor $tweak,$tweak,$tmp
3184 lvx_u $in3,$x30,$inp
3185 sub $len,$len,$taillen
3186 vxor $twk3,$tweak,$rndkey0
3187 vsrab $tmp,$tweak,$seven # next tweak value
3188 vaddubm $tweak,$tweak,$tweak
3189 vsldoi $tmp,$tmp,$tmp,15
3190 le?vperm $in3,$in3,$in3,$leperm
3191 vand $tmp,$tmp,$eighty7
3192 vxor $out3,$in3,$twk3
3193 vxor $tweak,$tweak,$tmp
3195 lvx_u $in4,$x40,$inp
3197 vxor $twk4,$tweak,$rndkey0
3198 vsrab $tmp,$tweak,$seven # next tweak value
3199 vaddubm $tweak,$tweak,$tweak
3200 vsldoi $tmp,$tmp,$tmp,15
3201 le?vperm $in4,$in4,$in4,$leperm
3202 vand $tmp,$tmp,$eighty7
3203 vxor $out4,$in4,$twk4
3204 vxor $tweak,$tweak,$tmp
3206 lvx_u $in5,$x50,$inp
3208 vxor $twk5,$tweak,$rndkey0
3209 vsrab $tmp,$tweak,$seven # next tweak value
3210 vaddubm $tweak,$tweak,$tweak
3211 vsldoi $tmp,$tmp,$tmp,15
3212 le?vperm $in5,$in5,$in5,$leperm
3213 vand $tmp,$tmp,$eighty7
3214 vxor $out5,$in5,$twk5
3215 vxor $tweak,$tweak,$tmp
3217 vxor v31,v31,$rndkey0
3223 vncipher $out0,$out0,v24
3224 vncipher $out1,$out1,v24
3225 vncipher $out2,$out2,v24
3226 vncipher $out3,$out3,v24
3227 vncipher $out4,$out4,v24
3228 vncipher $out5,$out5,v24
3229 lvx v24,$x20,$key_ # round[3]
3230 addi $key_,$key_,0x20
3232 vncipher $out0,$out0,v25
3233 vncipher $out1,$out1,v25
3234 vncipher $out2,$out2,v25
3235 vncipher $out3,$out3,v25
3236 vncipher $out4,$out4,v25
3237 vncipher $out5,$out5,v25
3238 lvx v25,$x10,$key_ # round[4]
3241 subic $len,$len,96 # $len-=96
3242 vxor $in0,$twk0,v31 # xor with last round key
3243 vncipher $out0,$out0,v24
3244 vncipher $out1,$out1,v24
3245 vsrab $tmp,$tweak,$seven # next tweak value
3246 vxor $twk0,$tweak,$rndkey0
3247 vaddubm $tweak,$tweak,$tweak
3248 vncipher $out2,$out2,v24
3249 vncipher $out3,$out3,v24
3250 vsldoi $tmp,$tmp,$tmp,15
3251 vncipher $out4,$out4,v24
3252 vncipher $out5,$out5,v24
3254 subfe. r0,r0,r0 # borrow?-1:0
3255 vand $tmp,$tmp,$eighty7
3256 vncipher $out0,$out0,v25
3257 vncipher $out1,$out1,v25
3258 vxor $tweak,$tweak,$tmp
3259 vncipher $out2,$out2,v25
3260 vncipher $out3,$out3,v25
3262 vsrab $tmp,$tweak,$seven # next tweak value
3263 vxor $twk1,$tweak,$rndkey0
3264 vncipher $out4,$out4,v25
3265 vncipher $out5,$out5,v25
3268 vaddubm $tweak,$tweak,$tweak
3269 vsldoi $tmp,$tmp,$tmp,15
3270 vncipher $out0,$out0,v26
3271 vncipher $out1,$out1,v26
3272 vand $tmp,$tmp,$eighty7
3273 vncipher $out2,$out2,v26
3274 vncipher $out3,$out3,v26
3275 vxor $tweak,$tweak,$tmp
3276 vncipher $out4,$out4,v26
3277 vncipher $out5,$out5,v26
3279 add $inp,$inp,r0 # $inp is adjusted in such
3280 # way that at exit from the
3281 # loop inX-in5 are loaded
3284 vsrab $tmp,$tweak,$seven # next tweak value
3285 vxor $twk2,$tweak,$rndkey0
3286 vaddubm $tweak,$tweak,$tweak
3287 vncipher $out0,$out0,v27
3288 vncipher $out1,$out1,v27
3289 vsldoi $tmp,$tmp,$tmp,15
3290 vncipher $out2,$out2,v27
3291 vncipher $out3,$out3,v27
3292 vand $tmp,$tmp,$eighty7
3293 vncipher $out4,$out4,v27
3294 vncipher $out5,$out5,v27
3296 addi $key_,$sp,$FRAME+15 # rewind $key_
3297 vxor $tweak,$tweak,$tmp
3298 vncipher $out0,$out0,v28
3299 vncipher $out1,$out1,v28
3301 vsrab $tmp,$tweak,$seven # next tweak value
3302 vxor $twk3,$tweak,$rndkey0
3303 vncipher $out2,$out2,v28
3304 vncipher $out3,$out3,v28
3305 vaddubm $tweak,$tweak,$tweak
3306 vsldoi $tmp,$tmp,$tmp,15
3307 vncipher $out4,$out4,v28
3308 vncipher $out5,$out5,v28
3309 lvx v24,$x00,$key_ # re-pre-load round[1]
3310 vand $tmp,$tmp,$eighty7
3312 vncipher $out0,$out0,v29
3313 vncipher $out1,$out1,v29
3314 vxor $tweak,$tweak,$tmp
3315 vncipher $out2,$out2,v29
3316 vncipher $out3,$out3,v29
3318 vsrab $tmp,$tweak,$seven # next tweak value
3319 vxor $twk4,$tweak,$rndkey0
3320 vncipher $out4,$out4,v29
3321 vncipher $out5,$out5,v29
3322 lvx v25,$x10,$key_ # re-pre-load round[2]
3323 vaddubm $tweak,$tweak,$tweak
3324 vsldoi $tmp,$tmp,$tmp,15
3326 vncipher $out0,$out0,v30
3327 vncipher $out1,$out1,v30
3328 vand $tmp,$tmp,$eighty7
3329 vncipher $out2,$out2,v30
3330 vncipher $out3,$out3,v30
3331 vxor $tweak,$tweak,$tmp
3332 vncipher $out4,$out4,v30
3333 vncipher $out5,$out5,v30
3335 vsrab $tmp,$tweak,$seven # next tweak value
3336 vxor $twk5,$tweak,$rndkey0
3338 vncipherlast $out0,$out0,$in0
3339 lvx_u $in0,$x00,$inp # load next input block
3340 vaddubm $tweak,$tweak,$tweak
3341 vsldoi $tmp,$tmp,$tmp,15
3342 vncipherlast $out1,$out1,$in1
3343 lvx_u $in1,$x10,$inp
3344 vncipherlast $out2,$out2,$in2
3345 le?vperm $in0,$in0,$in0,$leperm
3346 lvx_u $in2,$x20,$inp
3347 vand $tmp,$tmp,$eighty7
3348 vncipherlast $out3,$out3,$in3
3349 le?vperm $in1,$in1,$in1,$leperm
3350 lvx_u $in3,$x30,$inp
3351 vncipherlast $out4,$out4,$in4
3352 le?vperm $in2,$in2,$in2,$leperm
3353 lvx_u $in4,$x40,$inp
3354 vxor $tweak,$tweak,$tmp
3355 vncipherlast $out5,$out5,$in5
3356 le?vperm $in3,$in3,$in3,$leperm
3357 lvx_u $in5,$x50,$inp
3359 le?vperm $in4,$in4,$in4,$leperm
3360 le?vperm $in5,$in5,$in5,$leperm
3362 le?vperm $out0,$out0,$out0,$leperm
3363 le?vperm $out1,$out1,$out1,$leperm
3364 stvx_u $out0,$x00,$out # store output
3365 vxor $out0,$in0,$twk0
3366 le?vperm $out2,$out2,$out2,$leperm
3367 stvx_u $out1,$x10,$out
3368 vxor $out1,$in1,$twk1
3369 le?vperm $out3,$out3,$out3,$leperm
3370 stvx_u $out2,$x20,$out
3371 vxor $out2,$in2,$twk2
3372 le?vperm $out4,$out4,$out4,$leperm
3373 stvx_u $out3,$x30,$out
3374 vxor $out3,$in3,$twk3
3375 le?vperm $out5,$out5,$out5,$leperm
3376 stvx_u $out4,$x40,$out
3377 vxor $out4,$in4,$twk4
3378 stvx_u $out5,$x50,$out
3379 vxor $out5,$in5,$twk5
3383 beq Loop_xts_dec6x # did $len-=96 borrow?
3385 addic. $len,$len,0x60
3392 blt Lxts_dec6x_three
3397 vxor $out0,$in1,$twk0
3398 vxor $out1,$in2,$twk1
3399 vxor $out2,$in3,$twk2
3400 vxor $out3,$in4,$twk3
3401 vxor $out4,$in5,$twk4
3405 le?vperm $out0,$out0,$out0,$leperm
3406 vmr $twk0,$twk5 # unused tweak
3407 vxor $twk1,$tweak,$rndkey0
3408 le?vperm $out1,$out1,$out1,$leperm
3409 stvx_u $out0,$x00,$out # store output
3410 vxor $out0,$in0,$twk1
3411 le?vperm $out2,$out2,$out2,$leperm
3412 stvx_u $out1,$x10,$out
3413 le?vperm $out3,$out3,$out3,$leperm
3414 stvx_u $out2,$x20,$out
3415 le?vperm $out4,$out4,$out4,$leperm
3416 stvx_u $out3,$x30,$out
3417 stvx_u $out4,$x40,$out
3419 bne Lxts_dec6x_steal
3424 vxor $out0,$in2,$twk0
3425 vxor $out1,$in3,$twk1
3426 vxor $out2,$in4,$twk2
3427 vxor $out3,$in5,$twk3
3428 vxor $out4,$out4,$out4
3432 le?vperm $out0,$out0,$out0,$leperm
3433 vmr $twk0,$twk4 # unused tweak
3435 le?vperm $out1,$out1,$out1,$leperm
3436 stvx_u $out0,$x00,$out # store output
3437 vxor $out0,$in0,$twk5
3438 le?vperm $out2,$out2,$out2,$leperm
3439 stvx_u $out1,$x10,$out
3440 le?vperm $out3,$out3,$out3,$leperm
3441 stvx_u $out2,$x20,$out
3442 stvx_u $out3,$x30,$out
3444 bne Lxts_dec6x_steal
3449 vxor $out0,$in3,$twk0
3450 vxor $out1,$in4,$twk1
3451 vxor $out2,$in5,$twk2
3452 vxor $out3,$out3,$out3
3453 vxor $out4,$out4,$out4
3457 le?vperm $out0,$out0,$out0,$leperm
3458 vmr $twk0,$twk3 # unused tweak
3460 le?vperm $out1,$out1,$out1,$leperm
3461 stvx_u $out0,$x00,$out # store output
3462 vxor $out0,$in0,$twk4
3463 le?vperm $out2,$out2,$out2,$leperm
3464 stvx_u $out1,$x10,$out
3465 stvx_u $out2,$x20,$out
3467 bne Lxts_dec6x_steal
3472 vxor $out0,$in4,$twk0
3473 vxor $out1,$in5,$twk1
3474 vxor $out2,$out2,$out2
3475 vxor $out3,$out3,$out3
3476 vxor $out4,$out4,$out4
3480 le?vperm $out0,$out0,$out0,$leperm
3481 vmr $twk0,$twk2 # unused tweak
3483 le?vperm $out1,$out1,$out1,$leperm
3484 stvx_u $out0,$x00,$out # store output
3485 vxor $out0,$in0,$twk3
3486 stvx_u $out1,$x10,$out
3488 bne Lxts_dec6x_steal
3493 vxor $out0,$in5,$twk0
3496 vncipher $out0,$out0,v24
3497 lvx v24,$x20,$key_ # round[3]
3498 addi $key_,$key_,0x20
3500 vncipher $out0,$out0,v25
3501 lvx v25,$x10,$key_ # round[4]
3505 vncipher $out0,$out0,v24
3509 vncipher $out0,$out0,v25
3512 vncipher $out0,$out0,v26
3515 vncipher $out0,$out0,v27
3517 addi $key_,$sp,$FRAME+15 # rewind $key_
3518 vncipher $out0,$out0,v28
3519 lvx v24,$x00,$key_ # re-pre-load round[1]
3521 vncipher $out0,$out0,v29
3522 lvx v25,$x10,$key_ # re-pre-load round[2]
3523 vxor $twk0,$twk0,v31
3525 le?vperm $in0,$in0,$in0,$leperm
3526 vncipher $out0,$out0,v30
3529 vncipherlast $out0,$out0,$twk0
3531 vmr $twk0,$twk1 # unused tweak
3533 le?vperm $out0,$out0,$out0,$leperm
3534 stvx_u $out0,$x00,$out # store output
3536 vxor $out0,$in0,$twk2
3537 bne Lxts_dec6x_steal
3546 le?vperm $in0,$in0,$in0,$leperm
3547 vxor $out0,$in0,$twk1
3549 vncipher $out0,$out0,v24
3550 lvx v24,$x20,$key_ # round[3]
3551 addi $key_,$key_,0x20
3553 vncipher $out0,$out0,v25
3554 lvx v25,$x10,$key_ # round[4]
3555 bdnz Lxts_dec6x_steal
3557 add $inp,$inp,$taillen
3558 vncipher $out0,$out0,v24
3561 vncipher $out0,$out0,v25
3564 vncipher $out0,$out0,v26
3566 lvsr $inpperm,0,$taillen # $in5 is no more
3567 vncipher $out0,$out0,v27
3569 addi $key_,$sp,$FRAME+15 # rewind $key_
3570 vncipher $out0,$out0,v28
3571 lvx v24,$x00,$key_ # re-pre-load round[1]
3573 vncipher $out0,$out0,v29
3574 lvx v25,$x10,$key_ # re-pre-load round[2]
3575 vxor $twk1,$twk1,v31
3577 le?vperm $in0,$in0,$in0,$leperm
3578 vncipher $out0,$out0,v30
3580 vperm $in0,$in0,$in0,$inpperm
3581 vncipherlast $tmp,$out0,$twk1
3583 le?vperm $out0,$tmp,$tmp,$leperm
3584 le?stvx_u $out0,0,$out
3585 be?stvx_u $tmp,0,$out
3587 vxor $out0,$out0,$out0
3589 vperm $out0,$out0,$out1,$inpperm
3590 vsel $out0,$in0,$tmp,$out0
3591 vxor $out0,$out0,$twk0
3595 Loop_xts_dec6x_steal:
3598 bdnz Loop_xts_dec6x_steal
3602 b Loop_xts_dec1x # one more time...
3609 vxor $tweak,$twk0,$rndkey0
3610 le?vperm $tweak,$tweak,$tweak,$leperm
3611 stvx_u $tweak,0,$ivp
3617 stvx $seven,r10,$sp # wipe copies of round keys
3635 lvx v20,r10,$sp # ABI says so
3657 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
3658 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
3659 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
3660 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
3661 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
3662 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
3663 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
3666 .byte 0,12,0x04,1,0x80,6,6,0
3671 vncipher $out0,$out0,v24
3672 vncipher $out1,$out1,v24
3673 vncipher $out2,$out2,v24
3674 vncipher $out3,$out3,v24
3675 vncipher $out4,$out4,v24
3676 lvx v24,$x20,$key_ # round[3]
3677 addi $key_,$key_,0x20
3679 vncipher $out0,$out0,v25
3680 vncipher $out1,$out1,v25
3681 vncipher $out2,$out2,v25
3682 vncipher $out3,$out3,v25
3683 vncipher $out4,$out4,v25
3684 lvx v25,$x10,$key_ # round[4]
3685 bdnz _aesp8_xts_dec5x
3688 vncipher $out0,$out0,v24
3689 vncipher $out1,$out1,v24
3690 vncipher $out2,$out2,v24
3691 vncipher $out3,$out3,v24
3692 vncipher $out4,$out4,v24
3696 vncipher $out0,$out0,v25
3697 vncipher $out1,$out1,v25
3698 vncipher $out2,$out2,v25
3699 vncipher $out3,$out3,v25
3700 vncipher $out4,$out4,v25
3701 vxor $twk0,$twk0,v31
3704 vncipher $out0,$out0,v26
3705 vncipher $out1,$out1,v26
3706 vncipher $out2,$out2,v26
3707 vncipher $out3,$out3,v26
3708 vncipher $out4,$out4,v26
3711 vncipher $out0,$out0,v27
3713 vncipher $out1,$out1,v27
3714 vncipher $out2,$out2,v27
3715 vncipher $out3,$out3,v27
3716 vncipher $out4,$out4,v27
3719 addi $key_,$sp,$FRAME+15 # rewind $key_
3720 vncipher $out0,$out0,v28
3721 vncipher $out1,$out1,v28
3722 vncipher $out2,$out2,v28
3723 vncipher $out3,$out3,v28
3724 vncipher $out4,$out4,v28
3725 lvx v24,$x00,$key_ # re-pre-load round[1]
3728 vncipher $out0,$out0,v29
3729 le?vperm $in0,$in0,$in0,$leperm
3730 vncipher $out1,$out1,v29
3731 vncipher $out2,$out2,v29
3732 vncipher $out3,$out3,v29
3733 vncipher $out4,$out4,v29
3734 lvx v25,$x10,$key_ # re-pre-load round[2]
3737 vncipher $out0,$out0,v30
3738 vncipher $out1,$out1,v30
3739 vncipher $out2,$out2,v30
3740 vncipher $out3,$out3,v30
3741 vncipher $out4,$out4,v30
3743 vncipherlast $out0,$out0,$twk0
3744 vncipherlast $out1,$out1,$in1
3745 vncipherlast $out2,$out2,$in2
3746 vncipherlast $out3,$out3,$in3
3747 vncipherlast $out4,$out4,$in4
3751 .byte 0,12,0x14,0,0,0,0,0
3756 foreach(split("\n",$code)) {
3757 s/\`([^\`]*)\`/eval($1)/geo;
3759 # constants table endian-specific conversion
3760 if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) {
3764 # convert to endian-agnostic format
3766 foreach (split(/,\s*/,$2)) {
3767 my $l = /^0/?oct:int;
3768 push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff;
3771 @bytes = map(/^0/?oct:int,split(/,\s*/,$2));
3774 # little-endian conversion
3775 if ($flavour =~ /le$/o) {
3776 SWITCH: for($conv) {
3777 /\?inv/ && do { @bytes=map($_^0xf,@bytes); last; };
3778 /\?rev/ && do { @bytes=reverse(@bytes); last; };
3783 print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n";
3786 $consts=0 if (m/Lconsts:/o); # end of table
3788 # instructions prefixed with '?' are endian-specific and need
3789 # to be adjusted accordingly...
3790 if ($flavour =~ /le$/o) { # little-endian
3795 s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or
3796 s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or
3797 s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o;
3798 } else { # big-endian