Linux-libre 5.7.6-gnu
[librecmc/linux-libre.git] / drivers / crypto / vmx / aesp8-ppc.pl
1 #! /usr/bin/env perl
2 # SPDX-License-Identifier: GPL-2.0
3
4 # This code is taken from CRYPTOGAMs[1] and is included here using the option
5 # in the license to distribute the code under the GPL. Therefore this program
6 # is free software; you can redistribute it and/or modify it under the terms of
7 # the GNU General Public License version 2 as published by the Free Software
8 # Foundation.
9 #
10 # [1] https://www.openssl.org/~appro/cryptogams/
11
12 # Copyright (c) 2006-2017, CRYPTOGAMS by <appro@openssl.org>
13 # All rights reserved.
14 #
15 # Redistribution and use in source and binary forms, with or without
16 # modification, are permitted provided that the following conditions
17 # are met:
18 #
19 #       * Redistributions of source code must retain copyright notices,
20 #         this list of conditions and the following disclaimer.
21 #
22 #       * Redistributions in binary form must reproduce the above
23 #         copyright notice, this list of conditions and the following
24 #         disclaimer in the documentation and/or other materials
25 #         provided with the distribution.
26 #
27 #       * Neither the name of the CRYPTOGAMS nor the names of its
28 #         copyright holder and contributors may be used to endorse or
29 #         promote products derived from this software without specific
30 #         prior written permission.
31 #
32 # ALTERNATIVELY, provided that this notice is retained in full, this
33 # product may be distributed under the terms of the GNU General Public
34 # License (GPL), in which case the provisions of the GPL apply INSTEAD OF
35 # those given above.
36 #
37 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
38 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
39 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
40 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
41 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
42 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
43 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
44 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
45 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
46 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
47 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
48
49 # ====================================================================
50 # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
51 # project. The module is, however, dual licensed under OpenSSL and
52 # CRYPTOGAMS licenses depending on where you obtain it. For further
53 # details see http://www.openssl.org/~appro/cryptogams/.
54 # ====================================================================
55 #
56 # This module implements support for AES instructions as per PowerISA
57 # specification version 2.07, first implemented by POWER8 processor.
58 # The module is endian-agnostic in sense that it supports both big-
59 # and little-endian cases. Data alignment in parallelizable modes is
60 # handled with VSX loads and stores, which implies MSR.VSX flag being
61 # set. It should also be noted that ISA specification doesn't prohibit
62 # alignment exceptions for these instructions on page boundaries.
63 # Initially alignment was handled in pure AltiVec/VMX way [when data
64 # is aligned programmatically, which in turn guarantees exception-
65 # free execution], but it turned to hamper performance when vcipher
66 # instructions are interleaved. It's reckoned that eventual
67 # misalignment penalties at page boundaries are in average lower
68 # than additional overhead in pure AltiVec approach.
69 #
70 # May 2016
71 #
72 # Add XTS subroutine, 9x on little- and 12x improvement on big-endian
73 # systems were measured.
74 #
75 ######################################################################
76 # Current large-block performance in cycles per byte processed with
77 # 128-bit key (less is better).
78 #
79 #               CBC en-/decrypt CTR     XTS
80 # POWER8[le]    3.96/0.72       0.74    1.1
81 # POWER8[be]    3.75/0.65       0.66    1.0
82
83 $flavour = shift;
84
85 if ($flavour =~ /64/) {
86         $SIZE_T =8;
87         $LRSAVE =2*$SIZE_T;
88         $STU    ="stdu";
89         $POP    ="ld";
90         $PUSH   ="std";
91         $UCMP   ="cmpld";
92         $SHL    ="sldi";
93 } elsif ($flavour =~ /32/) {
94         $SIZE_T =4;
95         $LRSAVE =$SIZE_T;
96         $STU    ="stwu";
97         $POP    ="lwz";
98         $PUSH   ="stw";
99         $UCMP   ="cmplw";
100         $SHL    ="slwi";
101 } else { die "nonsense $flavour"; }
102
103 $LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
104
105 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
106 ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
107 ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
108 die "can't locate ppc-xlate.pl";
109
110 open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
111
112 $FRAME=8*$SIZE_T;
113 $prefix="aes_p8";
114
115 $sp="r1";
116 $vrsave="r12";
117
118 #########################################################################
119 {{{     # Key setup procedures                                          #
120 my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8));
121 my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6));
122 my ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11));
123
124 $code.=<<___;
125 .machine        "any"
126
127 .text
128
129 .align  7
130 rcon:
131 .long   0x01000000, 0x01000000, 0x01000000, 0x01000000  ?rev
132 .long   0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000  ?rev
133 .long   0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c  ?rev
134 .long   0,0,0,0                                         ?asis
135 Lconsts:
136         mflr    r0
137         bcl     20,31,\$+4
138         mflr    $ptr     #vvvvv "distance between . and rcon
139         addi    $ptr,$ptr,-0x48
140         mtlr    r0
141         blr
142         .long   0
143         .byte   0,12,0x14,0,0,0,0,0
144 .asciz  "AES for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
145
146 .globl  .${prefix}_set_encrypt_key
147 Lset_encrypt_key:
148         mflr            r11
149         $PUSH           r11,$LRSAVE($sp)
150
151         li              $ptr,-1
152         ${UCMP}i        $inp,0
153         beq-            Lenc_key_abort          # if ($inp==0) return -1;
154         ${UCMP}i        $out,0
155         beq-            Lenc_key_abort          # if ($out==0) return -1;
156         li              $ptr,-2
157         cmpwi           $bits,128
158         blt-            Lenc_key_abort
159         cmpwi           $bits,256
160         bgt-            Lenc_key_abort
161         andi.           r0,$bits,0x3f
162         bne-            Lenc_key_abort
163
164         lis             r0,0xfff0
165         mfspr           $vrsave,256
166         mtspr           256,r0
167
168         bl              Lconsts
169         mtlr            r11
170
171         neg             r9,$inp
172         lvx             $in0,0,$inp
173         addi            $inp,$inp,15            # 15 is not typo
174         lvsr            $key,0,r9               # borrow $key
175         li              r8,0x20
176         cmpwi           $bits,192
177         lvx             $in1,0,$inp
178         le?vspltisb     $mask,0x0f              # borrow $mask
179         lvx             $rcon,0,$ptr
180         le?vxor         $key,$key,$mask         # adjust for byte swap
181         lvx             $mask,r8,$ptr
182         addi            $ptr,$ptr,0x10
183         vperm           $in0,$in0,$in1,$key     # align [and byte swap in LE]
184         li              $cnt,8
185         vxor            $zero,$zero,$zero
186         mtctr           $cnt
187
188         ?lvsr           $outperm,0,$out
189         vspltisb        $outmask,-1
190         lvx             $outhead,0,$out
191         ?vperm          $outmask,$zero,$outmask,$outperm
192
193         blt             Loop128
194         addi            $inp,$inp,8
195         beq             L192
196         addi            $inp,$inp,8
197         b               L256
198
199 .align  4
200 Loop128:
201         vperm           $key,$in0,$in0,$mask    # rotate-n-splat
202         vsldoi          $tmp,$zero,$in0,12      # >>32
203          vperm          $outtail,$in0,$in0,$outperm     # rotate
204          vsel           $stage,$outhead,$outtail,$outmask
205          vmr            $outhead,$outtail
206         vcipherlast     $key,$key,$rcon
207          stvx           $stage,0,$out
208          addi           $out,$out,16
209
210         vxor            $in0,$in0,$tmp
211         vsldoi          $tmp,$zero,$tmp,12      # >>32
212         vxor            $in0,$in0,$tmp
213         vsldoi          $tmp,$zero,$tmp,12      # >>32
214         vxor            $in0,$in0,$tmp
215          vadduwm        $rcon,$rcon,$rcon
216         vxor            $in0,$in0,$key
217         bdnz            Loop128
218
219         lvx             $rcon,0,$ptr            # last two round keys
220
221         vperm           $key,$in0,$in0,$mask    # rotate-n-splat
222         vsldoi          $tmp,$zero,$in0,12      # >>32
223          vperm          $outtail,$in0,$in0,$outperm     # rotate
224          vsel           $stage,$outhead,$outtail,$outmask
225          vmr            $outhead,$outtail
226         vcipherlast     $key,$key,$rcon
227          stvx           $stage,0,$out
228          addi           $out,$out,16
229
230         vxor            $in0,$in0,$tmp
231         vsldoi          $tmp,$zero,$tmp,12      # >>32
232         vxor            $in0,$in0,$tmp
233         vsldoi          $tmp,$zero,$tmp,12      # >>32
234         vxor            $in0,$in0,$tmp
235          vadduwm        $rcon,$rcon,$rcon
236         vxor            $in0,$in0,$key
237
238         vperm           $key,$in0,$in0,$mask    # rotate-n-splat
239         vsldoi          $tmp,$zero,$in0,12      # >>32
240          vperm          $outtail,$in0,$in0,$outperm     # rotate
241          vsel           $stage,$outhead,$outtail,$outmask
242          vmr            $outhead,$outtail
243         vcipherlast     $key,$key,$rcon
244          stvx           $stage,0,$out
245          addi           $out,$out,16
246
247         vxor            $in0,$in0,$tmp
248         vsldoi          $tmp,$zero,$tmp,12      # >>32
249         vxor            $in0,$in0,$tmp
250         vsldoi          $tmp,$zero,$tmp,12      # >>32
251         vxor            $in0,$in0,$tmp
252         vxor            $in0,$in0,$key
253          vperm          $outtail,$in0,$in0,$outperm     # rotate
254          vsel           $stage,$outhead,$outtail,$outmask
255          vmr            $outhead,$outtail
256          stvx           $stage,0,$out
257
258         addi            $inp,$out,15            # 15 is not typo
259         addi            $out,$out,0x50
260
261         li              $rounds,10
262         b               Ldone
263
264 .align  4
265 L192:
266         lvx             $tmp,0,$inp
267         li              $cnt,4
268          vperm          $outtail,$in0,$in0,$outperm     # rotate
269          vsel           $stage,$outhead,$outtail,$outmask
270          vmr            $outhead,$outtail
271          stvx           $stage,0,$out
272          addi           $out,$out,16
273         vperm           $in1,$in1,$tmp,$key     # align [and byte swap in LE]
274         vspltisb        $key,8                  # borrow $key
275         mtctr           $cnt
276         vsububm         $mask,$mask,$key        # adjust the mask
277
278 Loop192:
279         vperm           $key,$in1,$in1,$mask    # roate-n-splat
280         vsldoi          $tmp,$zero,$in0,12      # >>32
281         vcipherlast     $key,$key,$rcon
282
283         vxor            $in0,$in0,$tmp
284         vsldoi          $tmp,$zero,$tmp,12      # >>32
285         vxor            $in0,$in0,$tmp
286         vsldoi          $tmp,$zero,$tmp,12      # >>32
287         vxor            $in0,$in0,$tmp
288
289          vsldoi         $stage,$zero,$in1,8
290         vspltw          $tmp,$in0,3
291         vxor            $tmp,$tmp,$in1
292         vsldoi          $in1,$zero,$in1,12      # >>32
293          vadduwm        $rcon,$rcon,$rcon
294         vxor            $in1,$in1,$tmp
295         vxor            $in0,$in0,$key
296         vxor            $in1,$in1,$key
297          vsldoi         $stage,$stage,$in0,8
298
299         vperm           $key,$in1,$in1,$mask    # rotate-n-splat
300         vsldoi          $tmp,$zero,$in0,12      # >>32
301          vperm          $outtail,$stage,$stage,$outperm # rotate
302          vsel           $stage,$outhead,$outtail,$outmask
303          vmr            $outhead,$outtail
304         vcipherlast     $key,$key,$rcon
305          stvx           $stage,0,$out
306          addi           $out,$out,16
307
308          vsldoi         $stage,$in0,$in1,8
309         vxor            $in0,$in0,$tmp
310         vsldoi          $tmp,$zero,$tmp,12      # >>32
311          vperm          $outtail,$stage,$stage,$outperm # rotate
312          vsel           $stage,$outhead,$outtail,$outmask
313          vmr            $outhead,$outtail
314         vxor            $in0,$in0,$tmp
315         vsldoi          $tmp,$zero,$tmp,12      # >>32
316         vxor            $in0,$in0,$tmp
317          stvx           $stage,0,$out
318          addi           $out,$out,16
319
320         vspltw          $tmp,$in0,3
321         vxor            $tmp,$tmp,$in1
322         vsldoi          $in1,$zero,$in1,12      # >>32
323          vadduwm        $rcon,$rcon,$rcon
324         vxor            $in1,$in1,$tmp
325         vxor            $in0,$in0,$key
326         vxor            $in1,$in1,$key
327          vperm          $outtail,$in0,$in0,$outperm     # rotate
328          vsel           $stage,$outhead,$outtail,$outmask
329          vmr            $outhead,$outtail
330          stvx           $stage,0,$out
331          addi           $inp,$out,15            # 15 is not typo
332          addi           $out,$out,16
333         bdnz            Loop192
334
335         li              $rounds,12
336         addi            $out,$out,0x20
337         b               Ldone
338
339 .align  4
340 L256:
341         lvx             $tmp,0,$inp
342         li              $cnt,7
343         li              $rounds,14
344          vperm          $outtail,$in0,$in0,$outperm     # rotate
345          vsel           $stage,$outhead,$outtail,$outmask
346          vmr            $outhead,$outtail
347          stvx           $stage,0,$out
348          addi           $out,$out,16
349         vperm           $in1,$in1,$tmp,$key     # align [and byte swap in LE]
350         mtctr           $cnt
351
352 Loop256:
353         vperm           $key,$in1,$in1,$mask    # rotate-n-splat
354         vsldoi          $tmp,$zero,$in0,12      # >>32
355          vperm          $outtail,$in1,$in1,$outperm     # rotate
356          vsel           $stage,$outhead,$outtail,$outmask
357          vmr            $outhead,$outtail
358         vcipherlast     $key,$key,$rcon
359          stvx           $stage,0,$out
360          addi           $out,$out,16
361
362         vxor            $in0,$in0,$tmp
363         vsldoi          $tmp,$zero,$tmp,12      # >>32
364         vxor            $in0,$in0,$tmp
365         vsldoi          $tmp,$zero,$tmp,12      # >>32
366         vxor            $in0,$in0,$tmp
367          vadduwm        $rcon,$rcon,$rcon
368         vxor            $in0,$in0,$key
369          vperm          $outtail,$in0,$in0,$outperm     # rotate
370          vsel           $stage,$outhead,$outtail,$outmask
371          vmr            $outhead,$outtail
372          stvx           $stage,0,$out
373          addi           $inp,$out,15            # 15 is not typo
374          addi           $out,$out,16
375         bdz             Ldone
376
377         vspltw          $key,$in0,3             # just splat
378         vsldoi          $tmp,$zero,$in1,12      # >>32
379         vsbox           $key,$key
380
381         vxor            $in1,$in1,$tmp
382         vsldoi          $tmp,$zero,$tmp,12      # >>32
383         vxor            $in1,$in1,$tmp
384         vsldoi          $tmp,$zero,$tmp,12      # >>32
385         vxor            $in1,$in1,$tmp
386
387         vxor            $in1,$in1,$key
388         b               Loop256
389
390 .align  4
391 Ldone:
392         lvx             $in1,0,$inp             # redundant in aligned case
393         vsel            $in1,$outhead,$in1,$outmask
394         stvx            $in1,0,$inp
395         li              $ptr,0
396         mtspr           256,$vrsave
397         stw             $rounds,0($out)
398
399 Lenc_key_abort:
400         mr              r3,$ptr
401         blr
402         .long           0
403         .byte           0,12,0x14,1,0,0,3,0
404         .long           0
405 .size   .${prefix}_set_encrypt_key,.-.${prefix}_set_encrypt_key
406
407 .globl  .${prefix}_set_decrypt_key
408         $STU            $sp,-$FRAME($sp)
409         mflr            r10
410         $PUSH           r10,$FRAME+$LRSAVE($sp)
411         bl              Lset_encrypt_key
412         mtlr            r10
413
414         cmpwi           r3,0
415         bne-            Ldec_key_abort
416
417         slwi            $cnt,$rounds,4
418         subi            $inp,$out,240           # first round key
419         srwi            $rounds,$rounds,1
420         add             $out,$inp,$cnt          # last round key
421         mtctr           $rounds
422
423 Ldeckey:
424         lwz             r0, 0($inp)
425         lwz             r6, 4($inp)
426         lwz             r7, 8($inp)
427         lwz             r8, 12($inp)
428         addi            $inp,$inp,16
429         lwz             r9, 0($out)
430         lwz             r10,4($out)
431         lwz             r11,8($out)
432         lwz             r12,12($out)
433         stw             r0, 0($out)
434         stw             r6, 4($out)
435         stw             r7, 8($out)
436         stw             r8, 12($out)
437         subi            $out,$out,16
438         stw             r9, -16($inp)
439         stw             r10,-12($inp)
440         stw             r11,-8($inp)
441         stw             r12,-4($inp)
442         bdnz            Ldeckey
443
444         xor             r3,r3,r3                # return value
445 Ldec_key_abort:
446         addi            $sp,$sp,$FRAME
447         blr
448         .long           0
449         .byte           0,12,4,1,0x80,0,3,0
450         .long           0
451 .size   .${prefix}_set_decrypt_key,.-.${prefix}_set_decrypt_key
452 ___
453 }}}
454 #########################################################################
455 {{{     # Single block en- and decrypt procedures                       #
456 sub gen_block () {
457 my $dir = shift;
458 my $n   = $dir eq "de" ? "n" : "";
459 my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7));
460
461 $code.=<<___;
462 .globl  .${prefix}_${dir}crypt
463         lwz             $rounds,240($key)
464         lis             r0,0xfc00
465         mfspr           $vrsave,256
466         li              $idx,15                 # 15 is not typo
467         mtspr           256,r0
468
469         lvx             v0,0,$inp
470         neg             r11,$out
471         lvx             v1,$idx,$inp
472         lvsl            v2,0,$inp               # inpperm
473         le?vspltisb     v4,0x0f
474         ?lvsl           v3,0,r11                # outperm
475         le?vxor         v2,v2,v4
476         li              $idx,16
477         vperm           v0,v0,v1,v2             # align [and byte swap in LE]
478         lvx             v1,0,$key
479         ?lvsl           v5,0,$key               # keyperm
480         srwi            $rounds,$rounds,1
481         lvx             v2,$idx,$key
482         addi            $idx,$idx,16
483         subi            $rounds,$rounds,1
484         ?vperm          v1,v1,v2,v5             # align round key
485
486         vxor            v0,v0,v1
487         lvx             v1,$idx,$key
488         addi            $idx,$idx,16
489         mtctr           $rounds
490
491 Loop_${dir}c:
492         ?vperm          v2,v2,v1,v5
493         v${n}cipher     v0,v0,v2
494         lvx             v2,$idx,$key
495         addi            $idx,$idx,16
496         ?vperm          v1,v1,v2,v5
497         v${n}cipher     v0,v0,v1
498         lvx             v1,$idx,$key
499         addi            $idx,$idx,16
500         bdnz            Loop_${dir}c
501
502         ?vperm          v2,v2,v1,v5
503         v${n}cipher     v0,v0,v2
504         lvx             v2,$idx,$key
505         ?vperm          v1,v1,v2,v5
506         v${n}cipherlast v0,v0,v1
507
508         vspltisb        v2,-1
509         vxor            v1,v1,v1
510         li              $idx,15                 # 15 is not typo
511         ?vperm          v2,v1,v2,v3             # outmask
512         le?vxor         v3,v3,v4
513         lvx             v1,0,$out               # outhead
514         vperm           v0,v0,v0,v3             # rotate [and byte swap in LE]
515         vsel            v1,v1,v0,v2
516         lvx             v4,$idx,$out
517         stvx            v1,0,$out
518         vsel            v0,v0,v4,v2
519         stvx            v0,$idx,$out
520
521         mtspr           256,$vrsave
522         blr
523         .long           0
524         .byte           0,12,0x14,0,0,0,3,0
525         .long           0
526 .size   .${prefix}_${dir}crypt,.-.${prefix}_${dir}crypt
527 ___
528 }
529 &gen_block("en");
530 &gen_block("de");
531 }}}
532 #########################################################################
533 {{{     # CBC en- and decrypt procedures                                #
534 my ($inp,$out,$len,$key,$ivp,$enc,$rounds,$idx)=map("r$_",(3..10));
535 my ($rndkey0,$rndkey1,$inout,$tmp)=             map("v$_",(0..3));
536 my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm)=
537                                                 map("v$_",(4..10));
538 $code.=<<___;
539 .globl  .${prefix}_cbc_encrypt
540         ${UCMP}i        $len,16
541         bltlr-
542
543         cmpwi           $enc,0                  # test direction
544         lis             r0,0xffe0
545         mfspr           $vrsave,256
546         mtspr           256,r0
547
548         li              $idx,15
549         vxor            $rndkey0,$rndkey0,$rndkey0
550         le?vspltisb     $tmp,0x0f
551
552         lvx             $ivec,0,$ivp            # load [unaligned] iv
553         lvsl            $inpperm,0,$ivp
554         lvx             $inptail,$idx,$ivp
555         le?vxor         $inpperm,$inpperm,$tmp
556         vperm           $ivec,$ivec,$inptail,$inpperm
557
558         neg             r11,$inp
559         ?lvsl           $keyperm,0,$key         # prepare for unaligned key
560         lwz             $rounds,240($key)
561
562         lvsr            $inpperm,0,r11          # prepare for unaligned load
563         lvx             $inptail,0,$inp
564         addi            $inp,$inp,15            # 15 is not typo
565         le?vxor         $inpperm,$inpperm,$tmp
566
567         ?lvsr           $outperm,0,$out         # prepare for unaligned store
568         vspltisb        $outmask,-1
569         lvx             $outhead,0,$out
570         ?vperm          $outmask,$rndkey0,$outmask,$outperm
571         le?vxor         $outperm,$outperm,$tmp
572
573         srwi            $rounds,$rounds,1
574         li              $idx,16
575         subi            $rounds,$rounds,1
576         beq             Lcbc_dec
577
578 Lcbc_enc:
579         vmr             $inout,$inptail
580         lvx             $inptail,0,$inp
581         addi            $inp,$inp,16
582         mtctr           $rounds
583         subi            $len,$len,16            # len-=16
584
585         lvx             $rndkey0,0,$key
586          vperm          $inout,$inout,$inptail,$inpperm
587         lvx             $rndkey1,$idx,$key
588         addi            $idx,$idx,16
589         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
590         vxor            $inout,$inout,$rndkey0
591         lvx             $rndkey0,$idx,$key
592         addi            $idx,$idx,16
593         vxor            $inout,$inout,$ivec
594
595 Loop_cbc_enc:
596         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
597         vcipher         $inout,$inout,$rndkey1
598         lvx             $rndkey1,$idx,$key
599         addi            $idx,$idx,16
600         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
601         vcipher         $inout,$inout,$rndkey0
602         lvx             $rndkey0,$idx,$key
603         addi            $idx,$idx,16
604         bdnz            Loop_cbc_enc
605
606         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
607         vcipher         $inout,$inout,$rndkey1
608         lvx             $rndkey1,$idx,$key
609         li              $idx,16
610         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
611         vcipherlast     $ivec,$inout,$rndkey0
612         ${UCMP}i        $len,16
613
614         vperm           $tmp,$ivec,$ivec,$outperm
615         vsel            $inout,$outhead,$tmp,$outmask
616         vmr             $outhead,$tmp
617         stvx            $inout,0,$out
618         addi            $out,$out,16
619         bge             Lcbc_enc
620
621         b               Lcbc_done
622
623 .align  4
624 Lcbc_dec:
625         ${UCMP}i        $len,128
626         bge             _aesp8_cbc_decrypt8x
627         vmr             $tmp,$inptail
628         lvx             $inptail,0,$inp
629         addi            $inp,$inp,16
630         mtctr           $rounds
631         subi            $len,$len,16            # len-=16
632
633         lvx             $rndkey0,0,$key
634          vperm          $tmp,$tmp,$inptail,$inpperm
635         lvx             $rndkey1,$idx,$key
636         addi            $idx,$idx,16
637         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
638         vxor            $inout,$tmp,$rndkey0
639         lvx             $rndkey0,$idx,$key
640         addi            $idx,$idx,16
641
642 Loop_cbc_dec:
643         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
644         vncipher        $inout,$inout,$rndkey1
645         lvx             $rndkey1,$idx,$key
646         addi            $idx,$idx,16
647         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
648         vncipher        $inout,$inout,$rndkey0
649         lvx             $rndkey0,$idx,$key
650         addi            $idx,$idx,16
651         bdnz            Loop_cbc_dec
652
653         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
654         vncipher        $inout,$inout,$rndkey1
655         lvx             $rndkey1,$idx,$key
656         li              $idx,16
657         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
658         vncipherlast    $inout,$inout,$rndkey0
659         ${UCMP}i        $len,16
660
661         vxor            $inout,$inout,$ivec
662         vmr             $ivec,$tmp
663         vperm           $tmp,$inout,$inout,$outperm
664         vsel            $inout,$outhead,$tmp,$outmask
665         vmr             $outhead,$tmp
666         stvx            $inout,0,$out
667         addi            $out,$out,16
668         bge             Lcbc_dec
669
670 Lcbc_done:
671         addi            $out,$out,-1
672         lvx             $inout,0,$out           # redundant in aligned case
673         vsel            $inout,$outhead,$inout,$outmask
674         stvx            $inout,0,$out
675
676         neg             $enc,$ivp               # write [unaligned] iv
677         li              $idx,15                 # 15 is not typo
678         vxor            $rndkey0,$rndkey0,$rndkey0
679         vspltisb        $outmask,-1
680         le?vspltisb     $tmp,0x0f
681         ?lvsl           $outperm,0,$enc
682         ?vperm          $outmask,$rndkey0,$outmask,$outperm
683         le?vxor         $outperm,$outperm,$tmp
684         lvx             $outhead,0,$ivp
685         vperm           $ivec,$ivec,$ivec,$outperm
686         vsel            $inout,$outhead,$ivec,$outmask
687         lvx             $inptail,$idx,$ivp
688         stvx            $inout,0,$ivp
689         vsel            $inout,$ivec,$inptail,$outmask
690         stvx            $inout,$idx,$ivp
691
692         mtspr           256,$vrsave
693         blr
694         .long           0
695         .byte           0,12,0x14,0,0,0,6,0
696         .long           0
697 ___
698 #########################################################################
699 {{      # Optimized CBC decrypt procedure                               #
700 my $key_="r11";
701 my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
702 my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10..13));
703 my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(14..21));
704 my $rndkey0="v23";      # v24-v25 rotating buffer for first found keys
705                         # v26-v31 last 6 round keys
706 my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
707
708 $code.=<<___;
709 .align  5
710 _aesp8_cbc_decrypt8x:
711         $STU            $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
712         li              r10,`$FRAME+8*16+15`
713         li              r11,`$FRAME+8*16+31`
714         stvx            v20,r10,$sp             # ABI says so
715         addi            r10,r10,32
716         stvx            v21,r11,$sp
717         addi            r11,r11,32
718         stvx            v22,r10,$sp
719         addi            r10,r10,32
720         stvx            v23,r11,$sp
721         addi            r11,r11,32
722         stvx            v24,r10,$sp
723         addi            r10,r10,32
724         stvx            v25,r11,$sp
725         addi            r11,r11,32
726         stvx            v26,r10,$sp
727         addi            r10,r10,32
728         stvx            v27,r11,$sp
729         addi            r11,r11,32
730         stvx            v28,r10,$sp
731         addi            r10,r10,32
732         stvx            v29,r11,$sp
733         addi            r11,r11,32
734         stvx            v30,r10,$sp
735         stvx            v31,r11,$sp
736         li              r0,-1
737         stw             $vrsave,`$FRAME+21*16-4`($sp)   # save vrsave
738         li              $x10,0x10
739         $PUSH           r26,`$FRAME+21*16+0*$SIZE_T`($sp)
740         li              $x20,0x20
741         $PUSH           r27,`$FRAME+21*16+1*$SIZE_T`($sp)
742         li              $x30,0x30
743         $PUSH           r28,`$FRAME+21*16+2*$SIZE_T`($sp)
744         li              $x40,0x40
745         $PUSH           r29,`$FRAME+21*16+3*$SIZE_T`($sp)
746         li              $x50,0x50
747         $PUSH           r30,`$FRAME+21*16+4*$SIZE_T`($sp)
748         li              $x60,0x60
749         $PUSH           r31,`$FRAME+21*16+5*$SIZE_T`($sp)
750         li              $x70,0x70
751         mtspr           256,r0
752
753         subi            $rounds,$rounds,3       # -4 in total
754         subi            $len,$len,128           # bias
755
756         lvx             $rndkey0,$x00,$key      # load key schedule
757         lvx             v30,$x10,$key
758         addi            $key,$key,0x20
759         lvx             v31,$x00,$key
760         ?vperm          $rndkey0,$rndkey0,v30,$keyperm
761         addi            $key_,$sp,$FRAME+15
762         mtctr           $rounds
763
764 Load_cbc_dec_key:
765         ?vperm          v24,v30,v31,$keyperm
766         lvx             v30,$x10,$key
767         addi            $key,$key,0x20
768         stvx            v24,$x00,$key_          # off-load round[1]
769         ?vperm          v25,v31,v30,$keyperm
770         lvx             v31,$x00,$key
771         stvx            v25,$x10,$key_          # off-load round[2]
772         addi            $key_,$key_,0x20
773         bdnz            Load_cbc_dec_key
774
775         lvx             v26,$x10,$key
776         ?vperm          v24,v30,v31,$keyperm
777         lvx             v27,$x20,$key
778         stvx            v24,$x00,$key_          # off-load round[3]
779         ?vperm          v25,v31,v26,$keyperm
780         lvx             v28,$x30,$key
781         stvx            v25,$x10,$key_          # off-load round[4]
782         addi            $key_,$sp,$FRAME+15     # rewind $key_
783         ?vperm          v26,v26,v27,$keyperm
784         lvx             v29,$x40,$key
785         ?vperm          v27,v27,v28,$keyperm
786         lvx             v30,$x50,$key
787         ?vperm          v28,v28,v29,$keyperm
788         lvx             v31,$x60,$key
789         ?vperm          v29,v29,v30,$keyperm
790         lvx             $out0,$x70,$key         # borrow $out0
791         ?vperm          v30,v30,v31,$keyperm
792         lvx             v24,$x00,$key_          # pre-load round[1]
793         ?vperm          v31,v31,$out0,$keyperm
794         lvx             v25,$x10,$key_          # pre-load round[2]
795
796         #lvx            $inptail,0,$inp         # "caller" already did this
797         #addi           $inp,$inp,15            # 15 is not typo
798         subi            $inp,$inp,15            # undo "caller"
799
800          le?li          $idx,8
801         lvx_u           $in0,$x00,$inp          # load first 8 "words"
802          le?lvsl        $inpperm,0,$idx
803          le?vspltisb    $tmp,0x0f
804         lvx_u           $in1,$x10,$inp
805          le?vxor        $inpperm,$inpperm,$tmp  # transform for lvx_u/stvx_u
806         lvx_u           $in2,$x20,$inp
807          le?vperm       $in0,$in0,$in0,$inpperm
808         lvx_u           $in3,$x30,$inp
809          le?vperm       $in1,$in1,$in1,$inpperm
810         lvx_u           $in4,$x40,$inp
811          le?vperm       $in2,$in2,$in2,$inpperm
812         vxor            $out0,$in0,$rndkey0
813         lvx_u           $in5,$x50,$inp
814          le?vperm       $in3,$in3,$in3,$inpperm
815         vxor            $out1,$in1,$rndkey0
816         lvx_u           $in6,$x60,$inp
817          le?vperm       $in4,$in4,$in4,$inpperm
818         vxor            $out2,$in2,$rndkey0
819         lvx_u           $in7,$x70,$inp
820         addi            $inp,$inp,0x80
821          le?vperm       $in5,$in5,$in5,$inpperm
822         vxor            $out3,$in3,$rndkey0
823          le?vperm       $in6,$in6,$in6,$inpperm
824         vxor            $out4,$in4,$rndkey0
825          le?vperm       $in7,$in7,$in7,$inpperm
826         vxor            $out5,$in5,$rndkey0
827         vxor            $out6,$in6,$rndkey0
828         vxor            $out7,$in7,$rndkey0
829
830         mtctr           $rounds
831         b               Loop_cbc_dec8x
832 .align  5
833 Loop_cbc_dec8x:
834         vncipher        $out0,$out0,v24
835         vncipher        $out1,$out1,v24
836         vncipher        $out2,$out2,v24
837         vncipher        $out3,$out3,v24
838         vncipher        $out4,$out4,v24
839         vncipher        $out5,$out5,v24
840         vncipher        $out6,$out6,v24
841         vncipher        $out7,$out7,v24
842         lvx             v24,$x20,$key_          # round[3]
843         addi            $key_,$key_,0x20
844
845         vncipher        $out0,$out0,v25
846         vncipher        $out1,$out1,v25
847         vncipher        $out2,$out2,v25
848         vncipher        $out3,$out3,v25
849         vncipher        $out4,$out4,v25
850         vncipher        $out5,$out5,v25
851         vncipher        $out6,$out6,v25
852         vncipher        $out7,$out7,v25
853         lvx             v25,$x10,$key_          # round[4]
854         bdnz            Loop_cbc_dec8x
855
856         subic           $len,$len,128           # $len-=128
857         vncipher        $out0,$out0,v24
858         vncipher        $out1,$out1,v24
859         vncipher        $out2,$out2,v24
860         vncipher        $out3,$out3,v24
861         vncipher        $out4,$out4,v24
862         vncipher        $out5,$out5,v24
863         vncipher        $out6,$out6,v24
864         vncipher        $out7,$out7,v24
865
866         subfe.          r0,r0,r0                # borrow?-1:0
867         vncipher        $out0,$out0,v25
868         vncipher        $out1,$out1,v25
869         vncipher        $out2,$out2,v25
870         vncipher        $out3,$out3,v25
871         vncipher        $out4,$out4,v25
872         vncipher        $out5,$out5,v25
873         vncipher        $out6,$out6,v25
874         vncipher        $out7,$out7,v25
875
876         and             r0,r0,$len
877         vncipher        $out0,$out0,v26
878         vncipher        $out1,$out1,v26
879         vncipher        $out2,$out2,v26
880         vncipher        $out3,$out3,v26
881         vncipher        $out4,$out4,v26
882         vncipher        $out5,$out5,v26
883         vncipher        $out6,$out6,v26
884         vncipher        $out7,$out7,v26
885
886         add             $inp,$inp,r0            # $inp is adjusted in such
887                                                 # way that at exit from the
888                                                 # loop inX-in7 are loaded
889                                                 # with last "words"
890         vncipher        $out0,$out0,v27
891         vncipher        $out1,$out1,v27
892         vncipher        $out2,$out2,v27
893         vncipher        $out3,$out3,v27
894         vncipher        $out4,$out4,v27
895         vncipher        $out5,$out5,v27
896         vncipher        $out6,$out6,v27
897         vncipher        $out7,$out7,v27
898
899         addi            $key_,$sp,$FRAME+15     # rewind $key_
900         vncipher        $out0,$out0,v28
901         vncipher        $out1,$out1,v28
902         vncipher        $out2,$out2,v28
903         vncipher        $out3,$out3,v28
904         vncipher        $out4,$out4,v28
905         vncipher        $out5,$out5,v28
906         vncipher        $out6,$out6,v28
907         vncipher        $out7,$out7,v28
908         lvx             v24,$x00,$key_          # re-pre-load round[1]
909
910         vncipher        $out0,$out0,v29
911         vncipher        $out1,$out1,v29
912         vncipher        $out2,$out2,v29
913         vncipher        $out3,$out3,v29
914         vncipher        $out4,$out4,v29
915         vncipher        $out5,$out5,v29
916         vncipher        $out6,$out6,v29
917         vncipher        $out7,$out7,v29
918         lvx             v25,$x10,$key_          # re-pre-load round[2]
919
920         vncipher        $out0,$out0,v30
921          vxor           $ivec,$ivec,v31         # xor with last round key
922         vncipher        $out1,$out1,v30
923          vxor           $in0,$in0,v31
924         vncipher        $out2,$out2,v30
925          vxor           $in1,$in1,v31
926         vncipher        $out3,$out3,v30
927          vxor           $in2,$in2,v31
928         vncipher        $out4,$out4,v30
929          vxor           $in3,$in3,v31
930         vncipher        $out5,$out5,v30
931          vxor           $in4,$in4,v31
932         vncipher        $out6,$out6,v30
933          vxor           $in5,$in5,v31
934         vncipher        $out7,$out7,v30
935          vxor           $in6,$in6,v31
936
937         vncipherlast    $out0,$out0,$ivec
938         vncipherlast    $out1,$out1,$in0
939          lvx_u          $in0,$x00,$inp          # load next input block
940         vncipherlast    $out2,$out2,$in1
941          lvx_u          $in1,$x10,$inp
942         vncipherlast    $out3,$out3,$in2
943          le?vperm       $in0,$in0,$in0,$inpperm
944          lvx_u          $in2,$x20,$inp
945         vncipherlast    $out4,$out4,$in3
946          le?vperm       $in1,$in1,$in1,$inpperm
947          lvx_u          $in3,$x30,$inp
948         vncipherlast    $out5,$out5,$in4
949          le?vperm       $in2,$in2,$in2,$inpperm
950          lvx_u          $in4,$x40,$inp
951         vncipherlast    $out6,$out6,$in5
952          le?vperm       $in3,$in3,$in3,$inpperm
953          lvx_u          $in5,$x50,$inp
954         vncipherlast    $out7,$out7,$in6
955          le?vperm       $in4,$in4,$in4,$inpperm
956          lvx_u          $in6,$x60,$inp
957         vmr             $ivec,$in7
958          le?vperm       $in5,$in5,$in5,$inpperm
959          lvx_u          $in7,$x70,$inp
960          addi           $inp,$inp,0x80
961
962         le?vperm        $out0,$out0,$out0,$inpperm
963         le?vperm        $out1,$out1,$out1,$inpperm
964         stvx_u          $out0,$x00,$out
965          le?vperm       $in6,$in6,$in6,$inpperm
966          vxor           $out0,$in0,$rndkey0
967         le?vperm        $out2,$out2,$out2,$inpperm
968         stvx_u          $out1,$x10,$out
969          le?vperm       $in7,$in7,$in7,$inpperm
970          vxor           $out1,$in1,$rndkey0
971         le?vperm        $out3,$out3,$out3,$inpperm
972         stvx_u          $out2,$x20,$out
973          vxor           $out2,$in2,$rndkey0
974         le?vperm        $out4,$out4,$out4,$inpperm
975         stvx_u          $out3,$x30,$out
976          vxor           $out3,$in3,$rndkey0
977         le?vperm        $out5,$out5,$out5,$inpperm
978         stvx_u          $out4,$x40,$out
979          vxor           $out4,$in4,$rndkey0
980         le?vperm        $out6,$out6,$out6,$inpperm
981         stvx_u          $out5,$x50,$out
982          vxor           $out5,$in5,$rndkey0
983         le?vperm        $out7,$out7,$out7,$inpperm
984         stvx_u          $out6,$x60,$out
985          vxor           $out6,$in6,$rndkey0
986         stvx_u          $out7,$x70,$out
987         addi            $out,$out,0x80
988          vxor           $out7,$in7,$rndkey0
989
990         mtctr           $rounds
991         beq             Loop_cbc_dec8x          # did $len-=128 borrow?
992
993         addic.          $len,$len,128
994         beq             Lcbc_dec8x_done
995         nop
996         nop
997
998 Loop_cbc_dec8x_tail:                            # up to 7 "words" tail...
999         vncipher        $out1,$out1,v24
1000         vncipher        $out2,$out2,v24
1001         vncipher        $out3,$out3,v24
1002         vncipher        $out4,$out4,v24
1003         vncipher        $out5,$out5,v24
1004         vncipher        $out6,$out6,v24
1005         vncipher        $out7,$out7,v24
1006         lvx             v24,$x20,$key_          # round[3]
1007         addi            $key_,$key_,0x20
1008
1009         vncipher        $out1,$out1,v25
1010         vncipher        $out2,$out2,v25
1011         vncipher        $out3,$out3,v25
1012         vncipher        $out4,$out4,v25
1013         vncipher        $out5,$out5,v25
1014         vncipher        $out6,$out6,v25
1015         vncipher        $out7,$out7,v25
1016         lvx             v25,$x10,$key_          # round[4]
1017         bdnz            Loop_cbc_dec8x_tail
1018
1019         vncipher        $out1,$out1,v24
1020         vncipher        $out2,$out2,v24
1021         vncipher        $out3,$out3,v24
1022         vncipher        $out4,$out4,v24
1023         vncipher        $out5,$out5,v24
1024         vncipher        $out6,$out6,v24
1025         vncipher        $out7,$out7,v24
1026
1027         vncipher        $out1,$out1,v25
1028         vncipher        $out2,$out2,v25
1029         vncipher        $out3,$out3,v25
1030         vncipher        $out4,$out4,v25
1031         vncipher        $out5,$out5,v25
1032         vncipher        $out6,$out6,v25
1033         vncipher        $out7,$out7,v25
1034
1035         vncipher        $out1,$out1,v26
1036         vncipher        $out2,$out2,v26
1037         vncipher        $out3,$out3,v26
1038         vncipher        $out4,$out4,v26
1039         vncipher        $out5,$out5,v26
1040         vncipher        $out6,$out6,v26
1041         vncipher        $out7,$out7,v26
1042
1043         vncipher        $out1,$out1,v27
1044         vncipher        $out2,$out2,v27
1045         vncipher        $out3,$out3,v27
1046         vncipher        $out4,$out4,v27
1047         vncipher        $out5,$out5,v27
1048         vncipher        $out6,$out6,v27
1049         vncipher        $out7,$out7,v27
1050
1051         vncipher        $out1,$out1,v28
1052         vncipher        $out2,$out2,v28
1053         vncipher        $out3,$out3,v28
1054         vncipher        $out4,$out4,v28
1055         vncipher        $out5,$out5,v28
1056         vncipher        $out6,$out6,v28
1057         vncipher        $out7,$out7,v28
1058
1059         vncipher        $out1,$out1,v29
1060         vncipher        $out2,$out2,v29
1061         vncipher        $out3,$out3,v29
1062         vncipher        $out4,$out4,v29
1063         vncipher        $out5,$out5,v29
1064         vncipher        $out6,$out6,v29
1065         vncipher        $out7,$out7,v29
1066
1067         vncipher        $out1,$out1,v30
1068          vxor           $ivec,$ivec,v31         # last round key
1069         vncipher        $out2,$out2,v30
1070          vxor           $in1,$in1,v31
1071         vncipher        $out3,$out3,v30
1072          vxor           $in2,$in2,v31
1073         vncipher        $out4,$out4,v30
1074          vxor           $in3,$in3,v31
1075         vncipher        $out5,$out5,v30
1076          vxor           $in4,$in4,v31
1077         vncipher        $out6,$out6,v30
1078          vxor           $in5,$in5,v31
1079         vncipher        $out7,$out7,v30
1080          vxor           $in6,$in6,v31
1081
1082         cmplwi          $len,32                 # switch($len)
1083         blt             Lcbc_dec8x_one
1084         nop
1085         beq             Lcbc_dec8x_two
1086         cmplwi          $len,64
1087         blt             Lcbc_dec8x_three
1088         nop
1089         beq             Lcbc_dec8x_four
1090         cmplwi          $len,96
1091         blt             Lcbc_dec8x_five
1092         nop
1093         beq             Lcbc_dec8x_six
1094
1095 Lcbc_dec8x_seven:
1096         vncipherlast    $out1,$out1,$ivec
1097         vncipherlast    $out2,$out2,$in1
1098         vncipherlast    $out3,$out3,$in2
1099         vncipherlast    $out4,$out4,$in3
1100         vncipherlast    $out5,$out5,$in4
1101         vncipherlast    $out6,$out6,$in5
1102         vncipherlast    $out7,$out7,$in6
1103         vmr             $ivec,$in7
1104
1105         le?vperm        $out1,$out1,$out1,$inpperm
1106         le?vperm        $out2,$out2,$out2,$inpperm
1107         stvx_u          $out1,$x00,$out
1108         le?vperm        $out3,$out3,$out3,$inpperm
1109         stvx_u          $out2,$x10,$out
1110         le?vperm        $out4,$out4,$out4,$inpperm
1111         stvx_u          $out3,$x20,$out
1112         le?vperm        $out5,$out5,$out5,$inpperm
1113         stvx_u          $out4,$x30,$out
1114         le?vperm        $out6,$out6,$out6,$inpperm
1115         stvx_u          $out5,$x40,$out
1116         le?vperm        $out7,$out7,$out7,$inpperm
1117         stvx_u          $out6,$x50,$out
1118         stvx_u          $out7,$x60,$out
1119         addi            $out,$out,0x70
1120         b               Lcbc_dec8x_done
1121
1122 .align  5
1123 Lcbc_dec8x_six:
1124         vncipherlast    $out2,$out2,$ivec
1125         vncipherlast    $out3,$out3,$in2
1126         vncipherlast    $out4,$out4,$in3
1127         vncipherlast    $out5,$out5,$in4
1128         vncipherlast    $out6,$out6,$in5
1129         vncipherlast    $out7,$out7,$in6
1130         vmr             $ivec,$in7
1131
1132         le?vperm        $out2,$out2,$out2,$inpperm
1133         le?vperm        $out3,$out3,$out3,$inpperm
1134         stvx_u          $out2,$x00,$out
1135         le?vperm        $out4,$out4,$out4,$inpperm
1136         stvx_u          $out3,$x10,$out
1137         le?vperm        $out5,$out5,$out5,$inpperm
1138         stvx_u          $out4,$x20,$out
1139         le?vperm        $out6,$out6,$out6,$inpperm
1140         stvx_u          $out5,$x30,$out
1141         le?vperm        $out7,$out7,$out7,$inpperm
1142         stvx_u          $out6,$x40,$out
1143         stvx_u          $out7,$x50,$out
1144         addi            $out,$out,0x60
1145         b               Lcbc_dec8x_done
1146
1147 .align  5
1148 Lcbc_dec8x_five:
1149         vncipherlast    $out3,$out3,$ivec
1150         vncipherlast    $out4,$out4,$in3
1151         vncipherlast    $out5,$out5,$in4
1152         vncipherlast    $out6,$out6,$in5
1153         vncipherlast    $out7,$out7,$in6
1154         vmr             $ivec,$in7
1155
1156         le?vperm        $out3,$out3,$out3,$inpperm
1157         le?vperm        $out4,$out4,$out4,$inpperm
1158         stvx_u          $out3,$x00,$out
1159         le?vperm        $out5,$out5,$out5,$inpperm
1160         stvx_u          $out4,$x10,$out
1161         le?vperm        $out6,$out6,$out6,$inpperm
1162         stvx_u          $out5,$x20,$out
1163         le?vperm        $out7,$out7,$out7,$inpperm
1164         stvx_u          $out6,$x30,$out
1165         stvx_u          $out7,$x40,$out
1166         addi            $out,$out,0x50
1167         b               Lcbc_dec8x_done
1168
1169 .align  5
1170 Lcbc_dec8x_four:
1171         vncipherlast    $out4,$out4,$ivec
1172         vncipherlast    $out5,$out5,$in4
1173         vncipherlast    $out6,$out6,$in5
1174         vncipherlast    $out7,$out7,$in6
1175         vmr             $ivec,$in7
1176
1177         le?vperm        $out4,$out4,$out4,$inpperm
1178         le?vperm        $out5,$out5,$out5,$inpperm
1179         stvx_u          $out4,$x00,$out
1180         le?vperm        $out6,$out6,$out6,$inpperm
1181         stvx_u          $out5,$x10,$out
1182         le?vperm        $out7,$out7,$out7,$inpperm
1183         stvx_u          $out6,$x20,$out
1184         stvx_u          $out7,$x30,$out
1185         addi            $out,$out,0x40
1186         b               Lcbc_dec8x_done
1187
1188 .align  5
1189 Lcbc_dec8x_three:
1190         vncipherlast    $out5,$out5,$ivec
1191         vncipherlast    $out6,$out6,$in5
1192         vncipherlast    $out7,$out7,$in6
1193         vmr             $ivec,$in7
1194
1195         le?vperm        $out5,$out5,$out5,$inpperm
1196         le?vperm        $out6,$out6,$out6,$inpperm
1197         stvx_u          $out5,$x00,$out
1198         le?vperm        $out7,$out7,$out7,$inpperm
1199         stvx_u          $out6,$x10,$out
1200         stvx_u          $out7,$x20,$out
1201         addi            $out,$out,0x30
1202         b               Lcbc_dec8x_done
1203
1204 .align  5
1205 Lcbc_dec8x_two:
1206         vncipherlast    $out6,$out6,$ivec
1207         vncipherlast    $out7,$out7,$in6
1208         vmr             $ivec,$in7
1209
1210         le?vperm        $out6,$out6,$out6,$inpperm
1211         le?vperm        $out7,$out7,$out7,$inpperm
1212         stvx_u          $out6,$x00,$out
1213         stvx_u          $out7,$x10,$out
1214         addi            $out,$out,0x20
1215         b               Lcbc_dec8x_done
1216
1217 .align  5
1218 Lcbc_dec8x_one:
1219         vncipherlast    $out7,$out7,$ivec
1220         vmr             $ivec,$in7
1221
1222         le?vperm        $out7,$out7,$out7,$inpperm
1223         stvx_u          $out7,0,$out
1224         addi            $out,$out,0x10
1225
1226 Lcbc_dec8x_done:
1227         le?vperm        $ivec,$ivec,$ivec,$inpperm
1228         stvx_u          $ivec,0,$ivp            # write [unaligned] iv
1229
1230         li              r10,`$FRAME+15`
1231         li              r11,`$FRAME+31`
1232         stvx            $inpperm,r10,$sp        # wipe copies of round keys
1233         addi            r10,r10,32
1234         stvx            $inpperm,r11,$sp
1235         addi            r11,r11,32
1236         stvx            $inpperm,r10,$sp
1237         addi            r10,r10,32
1238         stvx            $inpperm,r11,$sp
1239         addi            r11,r11,32
1240         stvx            $inpperm,r10,$sp
1241         addi            r10,r10,32
1242         stvx            $inpperm,r11,$sp
1243         addi            r11,r11,32
1244         stvx            $inpperm,r10,$sp
1245         addi            r10,r10,32
1246         stvx            $inpperm,r11,$sp
1247         addi            r11,r11,32
1248
1249         mtspr           256,$vrsave
1250         lvx             v20,r10,$sp             # ABI says so
1251         addi            r10,r10,32
1252         lvx             v21,r11,$sp
1253         addi            r11,r11,32
1254         lvx             v22,r10,$sp
1255         addi            r10,r10,32
1256         lvx             v23,r11,$sp
1257         addi            r11,r11,32
1258         lvx             v24,r10,$sp
1259         addi            r10,r10,32
1260         lvx             v25,r11,$sp
1261         addi            r11,r11,32
1262         lvx             v26,r10,$sp
1263         addi            r10,r10,32
1264         lvx             v27,r11,$sp
1265         addi            r11,r11,32
1266         lvx             v28,r10,$sp
1267         addi            r10,r10,32
1268         lvx             v29,r11,$sp
1269         addi            r11,r11,32
1270         lvx             v30,r10,$sp
1271         lvx             v31,r11,$sp
1272         $POP            r26,`$FRAME+21*16+0*$SIZE_T`($sp)
1273         $POP            r27,`$FRAME+21*16+1*$SIZE_T`($sp)
1274         $POP            r28,`$FRAME+21*16+2*$SIZE_T`($sp)
1275         $POP            r29,`$FRAME+21*16+3*$SIZE_T`($sp)
1276         $POP            r30,`$FRAME+21*16+4*$SIZE_T`($sp)
1277         $POP            r31,`$FRAME+21*16+5*$SIZE_T`($sp)
1278         addi            $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
1279         blr
1280         .long           0
1281         .byte           0,12,0x14,0,0x80,6,6,0
1282         .long           0
1283 .size   .${prefix}_cbc_encrypt,.-.${prefix}_cbc_encrypt
1284 ___
1285 }}      }}}
1286
1287 #########################################################################
1288 {{{     # CTR procedure[s]                                              #
1289
1290 ####################### WARNING: Here be dragons! #######################
1291 #
1292 # This code is written as 'ctr32', based on a 32-bit counter used
1293 # upstream. The kernel does *not* use a 32-bit counter. The kernel uses
1294 # a 128-bit counter.
1295 #
1296 # This leads to subtle changes from the upstream code: the counter
1297 # is incremented with vaddu_q_m rather than vaddu_w_m. This occurs in
1298 # both the bulk (8 blocks at a time) path, and in the individual block
1299 # path. Be aware of this when doing updates.
1300 #
1301 # See:
1302 # 1d4aa0b4c181 ("crypto: vmx - Fixing AES-CTR counter bug")
1303 # 009b30ac7444 ("crypto: vmx - CTR: always increment IV as quadword")
1304 # https://github.com/openssl/openssl/pull/8942
1305 #
1306 #########################################################################
1307 my ($inp,$out,$len,$key,$ivp,$x10,$rounds,$idx)=map("r$_",(3..10));
1308 my ($rndkey0,$rndkey1,$inout,$tmp)=             map("v$_",(0..3));
1309 my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm,$one)=
1310                                                 map("v$_",(4..11));
1311 my $dat=$tmp;
1312
1313 $code.=<<___;
1314 .globl  .${prefix}_ctr32_encrypt_blocks
1315         ${UCMP}i        $len,1
1316         bltlr-
1317
1318         lis             r0,0xfff0
1319         mfspr           $vrsave,256
1320         mtspr           256,r0
1321
1322         li              $idx,15
1323         vxor            $rndkey0,$rndkey0,$rndkey0
1324         le?vspltisb     $tmp,0x0f
1325
1326         lvx             $ivec,0,$ivp            # load [unaligned] iv
1327         lvsl            $inpperm,0,$ivp
1328         lvx             $inptail,$idx,$ivp
1329          vspltisb       $one,1
1330         le?vxor         $inpperm,$inpperm,$tmp
1331         vperm           $ivec,$ivec,$inptail,$inpperm
1332          vsldoi         $one,$rndkey0,$one,1
1333
1334         neg             r11,$inp
1335         ?lvsl           $keyperm,0,$key         # prepare for unaligned key
1336         lwz             $rounds,240($key)
1337
1338         lvsr            $inpperm,0,r11          # prepare for unaligned load
1339         lvx             $inptail,0,$inp
1340         addi            $inp,$inp,15            # 15 is not typo
1341         le?vxor         $inpperm,$inpperm,$tmp
1342
1343         srwi            $rounds,$rounds,1
1344         li              $idx,16
1345         subi            $rounds,$rounds,1
1346
1347         ${UCMP}i        $len,8
1348         bge             _aesp8_ctr32_encrypt8x
1349
1350         ?lvsr           $outperm,0,$out         # prepare for unaligned store
1351         vspltisb        $outmask,-1
1352         lvx             $outhead,0,$out
1353         ?vperm          $outmask,$rndkey0,$outmask,$outperm
1354         le?vxor         $outperm,$outperm,$tmp
1355
1356         lvx             $rndkey0,0,$key
1357         mtctr           $rounds
1358         lvx             $rndkey1,$idx,$key
1359         addi            $idx,$idx,16
1360         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
1361         vxor            $inout,$ivec,$rndkey0
1362         lvx             $rndkey0,$idx,$key
1363         addi            $idx,$idx,16
1364         b               Loop_ctr32_enc
1365
1366 .align  5
1367 Loop_ctr32_enc:
1368         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
1369         vcipher         $inout,$inout,$rndkey1
1370         lvx             $rndkey1,$idx,$key
1371         addi            $idx,$idx,16
1372         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
1373         vcipher         $inout,$inout,$rndkey0
1374         lvx             $rndkey0,$idx,$key
1375         addi            $idx,$idx,16
1376         bdnz            Loop_ctr32_enc
1377
1378         vadduqm         $ivec,$ivec,$one        # Kernel change for 128-bit
1379          vmr            $dat,$inptail
1380          lvx            $inptail,0,$inp
1381          addi           $inp,$inp,16
1382          subic.         $len,$len,1             # blocks--
1383
1384         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
1385         vcipher         $inout,$inout,$rndkey1
1386         lvx             $rndkey1,$idx,$key
1387          vperm          $dat,$dat,$inptail,$inpperm
1388          li             $idx,16
1389         ?vperm          $rndkey1,$rndkey0,$rndkey1,$keyperm
1390          lvx            $rndkey0,0,$key
1391         vxor            $dat,$dat,$rndkey1      # last round key
1392         vcipherlast     $inout,$inout,$dat
1393
1394          lvx            $rndkey1,$idx,$key
1395          addi           $idx,$idx,16
1396         vperm           $inout,$inout,$inout,$outperm
1397         vsel            $dat,$outhead,$inout,$outmask
1398          mtctr          $rounds
1399          ?vperm         $rndkey0,$rndkey0,$rndkey1,$keyperm
1400         vmr             $outhead,$inout
1401          vxor           $inout,$ivec,$rndkey0
1402          lvx            $rndkey0,$idx,$key
1403          addi           $idx,$idx,16
1404         stvx            $dat,0,$out
1405         addi            $out,$out,16
1406         bne             Loop_ctr32_enc
1407
1408         addi            $out,$out,-1
1409         lvx             $inout,0,$out           # redundant in aligned case
1410         vsel            $inout,$outhead,$inout,$outmask
1411         stvx            $inout,0,$out
1412
1413         mtspr           256,$vrsave
1414         blr
1415         .long           0
1416         .byte           0,12,0x14,0,0,0,6,0
1417         .long           0
1418 ___
1419 #########################################################################
1420 {{      # Optimized CTR procedure                                       #
1421 my $key_="r11";
1422 my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
1423 my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10,12..14));
1424 my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(15..22));
1425 my $rndkey0="v23";      # v24-v25 rotating buffer for first found keys
1426                         # v26-v31 last 6 round keys
1427 my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
1428 my ($two,$three,$four)=($outhead,$outperm,$outmask);
1429
1430 $code.=<<___;
1431 .align  5
1432 _aesp8_ctr32_encrypt8x:
1433         $STU            $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
1434         li              r10,`$FRAME+8*16+15`
1435         li              r11,`$FRAME+8*16+31`
1436         stvx            v20,r10,$sp             # ABI says so
1437         addi            r10,r10,32
1438         stvx            v21,r11,$sp
1439         addi            r11,r11,32
1440         stvx            v22,r10,$sp
1441         addi            r10,r10,32
1442         stvx            v23,r11,$sp
1443         addi            r11,r11,32
1444         stvx            v24,r10,$sp
1445         addi            r10,r10,32
1446         stvx            v25,r11,$sp
1447         addi            r11,r11,32
1448         stvx            v26,r10,$sp
1449         addi            r10,r10,32
1450         stvx            v27,r11,$sp
1451         addi            r11,r11,32
1452         stvx            v28,r10,$sp
1453         addi            r10,r10,32
1454         stvx            v29,r11,$sp
1455         addi            r11,r11,32
1456         stvx            v30,r10,$sp
1457         stvx            v31,r11,$sp
1458         li              r0,-1
1459         stw             $vrsave,`$FRAME+21*16-4`($sp)   # save vrsave
1460         li              $x10,0x10
1461         $PUSH           r26,`$FRAME+21*16+0*$SIZE_T`($sp)
1462         li              $x20,0x20
1463         $PUSH           r27,`$FRAME+21*16+1*$SIZE_T`($sp)
1464         li              $x30,0x30
1465         $PUSH           r28,`$FRAME+21*16+2*$SIZE_T`($sp)
1466         li              $x40,0x40
1467         $PUSH           r29,`$FRAME+21*16+3*$SIZE_T`($sp)
1468         li              $x50,0x50
1469         $PUSH           r30,`$FRAME+21*16+4*$SIZE_T`($sp)
1470         li              $x60,0x60
1471         $PUSH           r31,`$FRAME+21*16+5*$SIZE_T`($sp)
1472         li              $x70,0x70
1473         mtspr           256,r0
1474
1475         subi            $rounds,$rounds,3       # -4 in total
1476
1477         lvx             $rndkey0,$x00,$key      # load key schedule
1478         lvx             v30,$x10,$key
1479         addi            $key,$key,0x20
1480         lvx             v31,$x00,$key
1481         ?vperm          $rndkey0,$rndkey0,v30,$keyperm
1482         addi            $key_,$sp,$FRAME+15
1483         mtctr           $rounds
1484
1485 Load_ctr32_enc_key:
1486         ?vperm          v24,v30,v31,$keyperm
1487         lvx             v30,$x10,$key
1488         addi            $key,$key,0x20
1489         stvx            v24,$x00,$key_          # off-load round[1]
1490         ?vperm          v25,v31,v30,$keyperm
1491         lvx             v31,$x00,$key
1492         stvx            v25,$x10,$key_          # off-load round[2]
1493         addi            $key_,$key_,0x20
1494         bdnz            Load_ctr32_enc_key
1495
1496         lvx             v26,$x10,$key
1497         ?vperm          v24,v30,v31,$keyperm
1498         lvx             v27,$x20,$key
1499         stvx            v24,$x00,$key_          # off-load round[3]
1500         ?vperm          v25,v31,v26,$keyperm
1501         lvx             v28,$x30,$key
1502         stvx            v25,$x10,$key_          # off-load round[4]
1503         addi            $key_,$sp,$FRAME+15     # rewind $key_
1504         ?vperm          v26,v26,v27,$keyperm
1505         lvx             v29,$x40,$key
1506         ?vperm          v27,v27,v28,$keyperm
1507         lvx             v30,$x50,$key
1508         ?vperm          v28,v28,v29,$keyperm
1509         lvx             v31,$x60,$key
1510         ?vperm          v29,v29,v30,$keyperm
1511         lvx             $out0,$x70,$key         # borrow $out0
1512         ?vperm          v30,v30,v31,$keyperm
1513         lvx             v24,$x00,$key_          # pre-load round[1]
1514         ?vperm          v31,v31,$out0,$keyperm
1515         lvx             v25,$x10,$key_          # pre-load round[2]
1516
1517         vadduqm         $two,$one,$one
1518         subi            $inp,$inp,15            # undo "caller"
1519         $SHL            $len,$len,4
1520
1521         vadduqm         $out1,$ivec,$one        # counter values ...
1522         vadduqm         $out2,$ivec,$two        # (do all ctr adds as 128-bit)
1523         vxor            $out0,$ivec,$rndkey0    # ... xored with rndkey[0]
1524          le?li          $idx,8
1525         vadduqm         $out3,$out1,$two
1526         vxor            $out1,$out1,$rndkey0
1527          le?lvsl        $inpperm,0,$idx
1528         vadduqm         $out4,$out2,$two
1529         vxor            $out2,$out2,$rndkey0
1530          le?vspltisb    $tmp,0x0f
1531         vadduqm         $out5,$out3,$two
1532         vxor            $out3,$out3,$rndkey0
1533          le?vxor        $inpperm,$inpperm,$tmp  # transform for lvx_u/stvx_u
1534         vadduqm         $out6,$out4,$two
1535         vxor            $out4,$out4,$rndkey0
1536         vadduqm         $out7,$out5,$two
1537         vxor            $out5,$out5,$rndkey0
1538         vadduqm         $ivec,$out6,$two        # next counter value
1539         vxor            $out6,$out6,$rndkey0
1540         vxor            $out7,$out7,$rndkey0
1541
1542         mtctr           $rounds
1543         b               Loop_ctr32_enc8x
1544 .align  5
1545 Loop_ctr32_enc8x:
1546         vcipher         $out0,$out0,v24
1547         vcipher         $out1,$out1,v24
1548         vcipher         $out2,$out2,v24
1549         vcipher         $out3,$out3,v24
1550         vcipher         $out4,$out4,v24
1551         vcipher         $out5,$out5,v24
1552         vcipher         $out6,$out6,v24
1553         vcipher         $out7,$out7,v24
1554 Loop_ctr32_enc8x_middle:
1555         lvx             v24,$x20,$key_          # round[3]
1556         addi            $key_,$key_,0x20
1557
1558         vcipher         $out0,$out0,v25
1559         vcipher         $out1,$out1,v25
1560         vcipher         $out2,$out2,v25
1561         vcipher         $out3,$out3,v25
1562         vcipher         $out4,$out4,v25
1563         vcipher         $out5,$out5,v25
1564         vcipher         $out6,$out6,v25
1565         vcipher         $out7,$out7,v25
1566         lvx             v25,$x10,$key_          # round[4]
1567         bdnz            Loop_ctr32_enc8x
1568
1569         subic           r11,$len,256            # $len-256, borrow $key_
1570         vcipher         $out0,$out0,v24
1571         vcipher         $out1,$out1,v24
1572         vcipher         $out2,$out2,v24
1573         vcipher         $out3,$out3,v24
1574         vcipher         $out4,$out4,v24
1575         vcipher         $out5,$out5,v24
1576         vcipher         $out6,$out6,v24
1577         vcipher         $out7,$out7,v24
1578
1579         subfe           r0,r0,r0                # borrow?-1:0
1580         vcipher         $out0,$out0,v25
1581         vcipher         $out1,$out1,v25
1582         vcipher         $out2,$out2,v25
1583         vcipher         $out3,$out3,v25
1584         vcipher         $out4,$out4,v25
1585         vcipher         $out5,$out5,v25
1586         vcipher         $out6,$out6,v25
1587         vcipher         $out7,$out7,v25
1588
1589         and             r0,r0,r11
1590         addi            $key_,$sp,$FRAME+15     # rewind $key_
1591         vcipher         $out0,$out0,v26
1592         vcipher         $out1,$out1,v26
1593         vcipher         $out2,$out2,v26
1594         vcipher         $out3,$out3,v26
1595         vcipher         $out4,$out4,v26
1596         vcipher         $out5,$out5,v26
1597         vcipher         $out6,$out6,v26
1598         vcipher         $out7,$out7,v26
1599         lvx             v24,$x00,$key_          # re-pre-load round[1]
1600
1601         subic           $len,$len,129           # $len-=129
1602         vcipher         $out0,$out0,v27
1603         addi            $len,$len,1             # $len-=128 really
1604         vcipher         $out1,$out1,v27
1605         vcipher         $out2,$out2,v27
1606         vcipher         $out3,$out3,v27
1607         vcipher         $out4,$out4,v27
1608         vcipher         $out5,$out5,v27
1609         vcipher         $out6,$out6,v27
1610         vcipher         $out7,$out7,v27
1611         lvx             v25,$x10,$key_          # re-pre-load round[2]
1612
1613         vcipher         $out0,$out0,v28
1614          lvx_u          $in0,$x00,$inp          # load input
1615         vcipher         $out1,$out1,v28
1616          lvx_u          $in1,$x10,$inp
1617         vcipher         $out2,$out2,v28
1618          lvx_u          $in2,$x20,$inp
1619         vcipher         $out3,$out3,v28
1620          lvx_u          $in3,$x30,$inp
1621         vcipher         $out4,$out4,v28
1622          lvx_u          $in4,$x40,$inp
1623         vcipher         $out5,$out5,v28
1624          lvx_u          $in5,$x50,$inp
1625         vcipher         $out6,$out6,v28
1626          lvx_u          $in6,$x60,$inp
1627         vcipher         $out7,$out7,v28
1628          lvx_u          $in7,$x70,$inp
1629          addi           $inp,$inp,0x80
1630
1631         vcipher         $out0,$out0,v29
1632          le?vperm       $in0,$in0,$in0,$inpperm
1633         vcipher         $out1,$out1,v29
1634          le?vperm       $in1,$in1,$in1,$inpperm
1635         vcipher         $out2,$out2,v29
1636          le?vperm       $in2,$in2,$in2,$inpperm
1637         vcipher         $out3,$out3,v29
1638          le?vperm       $in3,$in3,$in3,$inpperm
1639         vcipher         $out4,$out4,v29
1640          le?vperm       $in4,$in4,$in4,$inpperm
1641         vcipher         $out5,$out5,v29
1642          le?vperm       $in5,$in5,$in5,$inpperm
1643         vcipher         $out6,$out6,v29
1644          le?vperm       $in6,$in6,$in6,$inpperm
1645         vcipher         $out7,$out7,v29
1646          le?vperm       $in7,$in7,$in7,$inpperm
1647
1648         add             $inp,$inp,r0            # $inp is adjusted in such
1649                                                 # way that at exit from the
1650                                                 # loop inX-in7 are loaded
1651                                                 # with last "words"
1652         subfe.          r0,r0,r0                # borrow?-1:0
1653         vcipher         $out0,$out0,v30
1654          vxor           $in0,$in0,v31           # xor with last round key
1655         vcipher         $out1,$out1,v30
1656          vxor           $in1,$in1,v31
1657         vcipher         $out2,$out2,v30
1658          vxor           $in2,$in2,v31
1659         vcipher         $out3,$out3,v30
1660          vxor           $in3,$in3,v31
1661         vcipher         $out4,$out4,v30
1662          vxor           $in4,$in4,v31
1663         vcipher         $out5,$out5,v30
1664          vxor           $in5,$in5,v31
1665         vcipher         $out6,$out6,v30
1666          vxor           $in6,$in6,v31
1667         vcipher         $out7,$out7,v30
1668          vxor           $in7,$in7,v31
1669
1670         bne             Lctr32_enc8x_break      # did $len-129 borrow?
1671
1672         vcipherlast     $in0,$out0,$in0
1673         vcipherlast     $in1,$out1,$in1
1674          vadduqm        $out1,$ivec,$one        # counter values ...
1675         vcipherlast     $in2,$out2,$in2
1676          vadduqm        $out2,$ivec,$two
1677          vxor           $out0,$ivec,$rndkey0    # ... xored with rndkey[0]
1678         vcipherlast     $in3,$out3,$in3
1679          vadduqm        $out3,$out1,$two
1680          vxor           $out1,$out1,$rndkey0
1681         vcipherlast     $in4,$out4,$in4
1682          vadduqm        $out4,$out2,$two
1683          vxor           $out2,$out2,$rndkey0
1684         vcipherlast     $in5,$out5,$in5
1685          vadduqm        $out5,$out3,$two
1686          vxor           $out3,$out3,$rndkey0
1687         vcipherlast     $in6,$out6,$in6
1688          vadduqm        $out6,$out4,$two
1689          vxor           $out4,$out4,$rndkey0
1690         vcipherlast     $in7,$out7,$in7
1691          vadduqm        $out7,$out5,$two
1692          vxor           $out5,$out5,$rndkey0
1693         le?vperm        $in0,$in0,$in0,$inpperm
1694          vadduqm        $ivec,$out6,$two        # next counter value
1695          vxor           $out6,$out6,$rndkey0
1696         le?vperm        $in1,$in1,$in1,$inpperm
1697          vxor           $out7,$out7,$rndkey0
1698         mtctr           $rounds
1699
1700          vcipher        $out0,$out0,v24
1701         stvx_u          $in0,$x00,$out
1702         le?vperm        $in2,$in2,$in2,$inpperm
1703          vcipher        $out1,$out1,v24
1704         stvx_u          $in1,$x10,$out
1705         le?vperm        $in3,$in3,$in3,$inpperm
1706          vcipher        $out2,$out2,v24
1707         stvx_u          $in2,$x20,$out
1708         le?vperm        $in4,$in4,$in4,$inpperm
1709          vcipher        $out3,$out3,v24
1710         stvx_u          $in3,$x30,$out
1711         le?vperm        $in5,$in5,$in5,$inpperm
1712          vcipher        $out4,$out4,v24
1713         stvx_u          $in4,$x40,$out
1714         le?vperm        $in6,$in6,$in6,$inpperm
1715          vcipher        $out5,$out5,v24
1716         stvx_u          $in5,$x50,$out
1717         le?vperm        $in7,$in7,$in7,$inpperm
1718          vcipher        $out6,$out6,v24
1719         stvx_u          $in6,$x60,$out
1720          vcipher        $out7,$out7,v24
1721         stvx_u          $in7,$x70,$out
1722         addi            $out,$out,0x80
1723
1724         b               Loop_ctr32_enc8x_middle
1725
1726 .align  5
1727 Lctr32_enc8x_break:
1728         cmpwi           $len,-0x60
1729         blt             Lctr32_enc8x_one
1730         nop
1731         beq             Lctr32_enc8x_two
1732         cmpwi           $len,-0x40
1733         blt             Lctr32_enc8x_three
1734         nop
1735         beq             Lctr32_enc8x_four
1736         cmpwi           $len,-0x20
1737         blt             Lctr32_enc8x_five
1738         nop
1739         beq             Lctr32_enc8x_six
1740         cmpwi           $len,0x00
1741         blt             Lctr32_enc8x_seven
1742
1743 Lctr32_enc8x_eight:
1744         vcipherlast     $out0,$out0,$in0
1745         vcipherlast     $out1,$out1,$in1
1746         vcipherlast     $out2,$out2,$in2
1747         vcipherlast     $out3,$out3,$in3
1748         vcipherlast     $out4,$out4,$in4
1749         vcipherlast     $out5,$out5,$in5
1750         vcipherlast     $out6,$out6,$in6
1751         vcipherlast     $out7,$out7,$in7
1752
1753         le?vperm        $out0,$out0,$out0,$inpperm
1754         le?vperm        $out1,$out1,$out1,$inpperm
1755         stvx_u          $out0,$x00,$out
1756         le?vperm        $out2,$out2,$out2,$inpperm
1757         stvx_u          $out1,$x10,$out
1758         le?vperm        $out3,$out3,$out3,$inpperm
1759         stvx_u          $out2,$x20,$out
1760         le?vperm        $out4,$out4,$out4,$inpperm
1761         stvx_u          $out3,$x30,$out
1762         le?vperm        $out5,$out5,$out5,$inpperm
1763         stvx_u          $out4,$x40,$out
1764         le?vperm        $out6,$out6,$out6,$inpperm
1765         stvx_u          $out5,$x50,$out
1766         le?vperm        $out7,$out7,$out7,$inpperm
1767         stvx_u          $out6,$x60,$out
1768         stvx_u          $out7,$x70,$out
1769         addi            $out,$out,0x80
1770         b               Lctr32_enc8x_done
1771
1772 .align  5
1773 Lctr32_enc8x_seven:
1774         vcipherlast     $out0,$out0,$in1
1775         vcipherlast     $out1,$out1,$in2
1776         vcipherlast     $out2,$out2,$in3
1777         vcipherlast     $out3,$out3,$in4
1778         vcipherlast     $out4,$out4,$in5
1779         vcipherlast     $out5,$out5,$in6
1780         vcipherlast     $out6,$out6,$in7
1781
1782         le?vperm        $out0,$out0,$out0,$inpperm
1783         le?vperm        $out1,$out1,$out1,$inpperm
1784         stvx_u          $out0,$x00,$out
1785         le?vperm        $out2,$out2,$out2,$inpperm
1786         stvx_u          $out1,$x10,$out
1787         le?vperm        $out3,$out3,$out3,$inpperm
1788         stvx_u          $out2,$x20,$out
1789         le?vperm        $out4,$out4,$out4,$inpperm
1790         stvx_u          $out3,$x30,$out
1791         le?vperm        $out5,$out5,$out5,$inpperm
1792         stvx_u          $out4,$x40,$out
1793         le?vperm        $out6,$out6,$out6,$inpperm
1794         stvx_u          $out5,$x50,$out
1795         stvx_u          $out6,$x60,$out
1796         addi            $out,$out,0x70
1797         b               Lctr32_enc8x_done
1798
1799 .align  5
1800 Lctr32_enc8x_six:
1801         vcipherlast     $out0,$out0,$in2
1802         vcipherlast     $out1,$out1,$in3
1803         vcipherlast     $out2,$out2,$in4
1804         vcipherlast     $out3,$out3,$in5
1805         vcipherlast     $out4,$out4,$in6
1806         vcipherlast     $out5,$out5,$in7
1807
1808         le?vperm        $out0,$out0,$out0,$inpperm
1809         le?vperm        $out1,$out1,$out1,$inpperm
1810         stvx_u          $out0,$x00,$out
1811         le?vperm        $out2,$out2,$out2,$inpperm
1812         stvx_u          $out1,$x10,$out
1813         le?vperm        $out3,$out3,$out3,$inpperm
1814         stvx_u          $out2,$x20,$out
1815         le?vperm        $out4,$out4,$out4,$inpperm
1816         stvx_u          $out3,$x30,$out
1817         le?vperm        $out5,$out5,$out5,$inpperm
1818         stvx_u          $out4,$x40,$out
1819         stvx_u          $out5,$x50,$out
1820         addi            $out,$out,0x60
1821         b               Lctr32_enc8x_done
1822
1823 .align  5
1824 Lctr32_enc8x_five:
1825         vcipherlast     $out0,$out0,$in3
1826         vcipherlast     $out1,$out1,$in4
1827         vcipherlast     $out2,$out2,$in5
1828         vcipherlast     $out3,$out3,$in6
1829         vcipherlast     $out4,$out4,$in7
1830
1831         le?vperm        $out0,$out0,$out0,$inpperm
1832         le?vperm        $out1,$out1,$out1,$inpperm
1833         stvx_u          $out0,$x00,$out
1834         le?vperm        $out2,$out2,$out2,$inpperm
1835         stvx_u          $out1,$x10,$out
1836         le?vperm        $out3,$out3,$out3,$inpperm
1837         stvx_u          $out2,$x20,$out
1838         le?vperm        $out4,$out4,$out4,$inpperm
1839         stvx_u          $out3,$x30,$out
1840         stvx_u          $out4,$x40,$out
1841         addi            $out,$out,0x50
1842         b               Lctr32_enc8x_done
1843
1844 .align  5
1845 Lctr32_enc8x_four:
1846         vcipherlast     $out0,$out0,$in4
1847         vcipherlast     $out1,$out1,$in5
1848         vcipherlast     $out2,$out2,$in6
1849         vcipherlast     $out3,$out3,$in7
1850
1851         le?vperm        $out0,$out0,$out0,$inpperm
1852         le?vperm        $out1,$out1,$out1,$inpperm
1853         stvx_u          $out0,$x00,$out
1854         le?vperm        $out2,$out2,$out2,$inpperm
1855         stvx_u          $out1,$x10,$out
1856         le?vperm        $out3,$out3,$out3,$inpperm
1857         stvx_u          $out2,$x20,$out
1858         stvx_u          $out3,$x30,$out
1859         addi            $out,$out,0x40
1860         b               Lctr32_enc8x_done
1861
1862 .align  5
1863 Lctr32_enc8x_three:
1864         vcipherlast     $out0,$out0,$in5
1865         vcipherlast     $out1,$out1,$in6
1866         vcipherlast     $out2,$out2,$in7
1867
1868         le?vperm        $out0,$out0,$out0,$inpperm
1869         le?vperm        $out1,$out1,$out1,$inpperm
1870         stvx_u          $out0,$x00,$out
1871         le?vperm        $out2,$out2,$out2,$inpperm
1872         stvx_u          $out1,$x10,$out
1873         stvx_u          $out2,$x20,$out
1874         addi            $out,$out,0x30
1875         b               Lctr32_enc8x_done
1876
1877 .align  5
1878 Lctr32_enc8x_two:
1879         vcipherlast     $out0,$out0,$in6
1880         vcipherlast     $out1,$out1,$in7
1881
1882         le?vperm        $out0,$out0,$out0,$inpperm
1883         le?vperm        $out1,$out1,$out1,$inpperm
1884         stvx_u          $out0,$x00,$out
1885         stvx_u          $out1,$x10,$out
1886         addi            $out,$out,0x20
1887         b               Lctr32_enc8x_done
1888
1889 .align  5
1890 Lctr32_enc8x_one:
1891         vcipherlast     $out0,$out0,$in7
1892
1893         le?vperm        $out0,$out0,$out0,$inpperm
1894         stvx_u          $out0,0,$out
1895         addi            $out,$out,0x10
1896
1897 Lctr32_enc8x_done:
1898         li              r10,`$FRAME+15`
1899         li              r11,`$FRAME+31`
1900         stvx            $inpperm,r10,$sp        # wipe copies of round keys
1901         addi            r10,r10,32
1902         stvx            $inpperm,r11,$sp
1903         addi            r11,r11,32
1904         stvx            $inpperm,r10,$sp
1905         addi            r10,r10,32
1906         stvx            $inpperm,r11,$sp
1907         addi            r11,r11,32
1908         stvx            $inpperm,r10,$sp
1909         addi            r10,r10,32
1910         stvx            $inpperm,r11,$sp
1911         addi            r11,r11,32
1912         stvx            $inpperm,r10,$sp
1913         addi            r10,r10,32
1914         stvx            $inpperm,r11,$sp
1915         addi            r11,r11,32
1916
1917         mtspr           256,$vrsave
1918         lvx             v20,r10,$sp             # ABI says so
1919         addi            r10,r10,32
1920         lvx             v21,r11,$sp
1921         addi            r11,r11,32
1922         lvx             v22,r10,$sp
1923         addi            r10,r10,32
1924         lvx             v23,r11,$sp
1925         addi            r11,r11,32
1926         lvx             v24,r10,$sp
1927         addi            r10,r10,32
1928         lvx             v25,r11,$sp
1929         addi            r11,r11,32
1930         lvx             v26,r10,$sp
1931         addi            r10,r10,32
1932         lvx             v27,r11,$sp
1933         addi            r11,r11,32
1934         lvx             v28,r10,$sp
1935         addi            r10,r10,32
1936         lvx             v29,r11,$sp
1937         addi            r11,r11,32
1938         lvx             v30,r10,$sp
1939         lvx             v31,r11,$sp
1940         $POP            r26,`$FRAME+21*16+0*$SIZE_T`($sp)
1941         $POP            r27,`$FRAME+21*16+1*$SIZE_T`($sp)
1942         $POP            r28,`$FRAME+21*16+2*$SIZE_T`($sp)
1943         $POP            r29,`$FRAME+21*16+3*$SIZE_T`($sp)
1944         $POP            r30,`$FRAME+21*16+4*$SIZE_T`($sp)
1945         $POP            r31,`$FRAME+21*16+5*$SIZE_T`($sp)
1946         addi            $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
1947         blr
1948         .long           0
1949         .byte           0,12,0x14,0,0x80,6,6,0
1950         .long           0
1951 .size   .${prefix}_ctr32_encrypt_blocks,.-.${prefix}_ctr32_encrypt_blocks
1952 ___
1953 }}      }}}
1954
1955 #########################################################################
1956 {{{     # XTS procedures                                                #
1957 # int aes_p8_xts_[en|de]crypt(const char *inp, char *out, size_t len,   #
1958 #                             const AES_KEY *key1, const AES_KEY *key2, #
1959 #                             [const] unsigned char iv[16]);            #
1960 # If $key2 is NULL, then a "tweak chaining" mode is engaged, in which   #
1961 # input tweak value is assumed to be encrypted already, and last tweak  #
1962 # value, one suitable for consecutive call on same chunk of data, is    #
1963 # written back to original buffer. In addition, in "tweak chaining"     #
1964 # mode only complete input blocks are processed.                        #
1965
1966 my ($inp,$out,$len,$key1,$key2,$ivp,$rounds,$idx) =     map("r$_",(3..10));
1967 my ($rndkey0,$rndkey1,$inout) =                         map("v$_",(0..2));
1968 my ($output,$inptail,$inpperm,$leperm,$keyperm) =       map("v$_",(3..7));
1969 my ($tweak,$seven,$eighty7,$tmp,$tweak1) =              map("v$_",(8..12));
1970 my $taillen = $key2;
1971
1972    ($inp,$idx) = ($idx,$inp);                           # reassign
1973
1974 $code.=<<___;
1975 .globl  .${prefix}_xts_encrypt
1976         mr              $inp,r3                         # reassign
1977         li              r3,-1
1978         ${UCMP}i        $len,16
1979         bltlr-
1980
1981         lis             r0,0xfff0
1982         mfspr           r12,256                         # save vrsave
1983         li              r11,0
1984         mtspr           256,r0
1985
1986         vspltisb        $seven,0x07                     # 0x070707..07
1987         le?lvsl         $leperm,r11,r11
1988         le?vspltisb     $tmp,0x0f
1989         le?vxor         $leperm,$leperm,$seven
1990
1991         li              $idx,15
1992         lvx             $tweak,0,$ivp                   # load [unaligned] iv
1993         lvsl            $inpperm,0,$ivp
1994         lvx             $inptail,$idx,$ivp
1995         le?vxor         $inpperm,$inpperm,$tmp
1996         vperm           $tweak,$tweak,$inptail,$inpperm
1997
1998         neg             r11,$inp
1999         lvsr            $inpperm,0,r11                  # prepare for unaligned load
2000         lvx             $inout,0,$inp
2001         addi            $inp,$inp,15                    # 15 is not typo
2002         le?vxor         $inpperm,$inpperm,$tmp
2003
2004         ${UCMP}i        $key2,0                         # key2==NULL?
2005         beq             Lxts_enc_no_key2
2006
2007         ?lvsl           $keyperm,0,$key2                # prepare for unaligned key
2008         lwz             $rounds,240($key2)
2009         srwi            $rounds,$rounds,1
2010         subi            $rounds,$rounds,1
2011         li              $idx,16
2012
2013         lvx             $rndkey0,0,$key2
2014         lvx             $rndkey1,$idx,$key2
2015         addi            $idx,$idx,16
2016         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2017         vxor            $tweak,$tweak,$rndkey0
2018         lvx             $rndkey0,$idx,$key2
2019         addi            $idx,$idx,16
2020         mtctr           $rounds
2021
2022 Ltweak_xts_enc:
2023         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
2024         vcipher         $tweak,$tweak,$rndkey1
2025         lvx             $rndkey1,$idx,$key2
2026         addi            $idx,$idx,16
2027         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2028         vcipher         $tweak,$tweak,$rndkey0
2029         lvx             $rndkey0,$idx,$key2
2030         addi            $idx,$idx,16
2031         bdnz            Ltweak_xts_enc
2032
2033         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
2034         vcipher         $tweak,$tweak,$rndkey1
2035         lvx             $rndkey1,$idx,$key2
2036         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2037         vcipherlast     $tweak,$tweak,$rndkey0
2038
2039         li              $ivp,0                          # don't chain the tweak
2040         b               Lxts_enc
2041
2042 Lxts_enc_no_key2:
2043         li              $idx,-16
2044         and             $len,$len,$idx                  # in "tweak chaining"
2045                                                         # mode only complete
2046                                                         # blocks are processed
2047 Lxts_enc:
2048         lvx             $inptail,0,$inp
2049         addi            $inp,$inp,16
2050
2051         ?lvsl           $keyperm,0,$key1                # prepare for unaligned key
2052         lwz             $rounds,240($key1)
2053         srwi            $rounds,$rounds,1
2054         subi            $rounds,$rounds,1
2055         li              $idx,16
2056
2057         vslb            $eighty7,$seven,$seven          # 0x808080..80
2058         vor             $eighty7,$eighty7,$seven        # 0x878787..87
2059         vspltisb        $tmp,1                          # 0x010101..01
2060         vsldoi          $eighty7,$eighty7,$tmp,15       # 0x870101..01
2061
2062         ${UCMP}i        $len,96
2063         bge             _aesp8_xts_encrypt6x
2064
2065         andi.           $taillen,$len,15
2066         subic           r0,$len,32
2067         subi            $taillen,$taillen,16
2068         subfe           r0,r0,r0
2069         and             r0,r0,$taillen
2070         add             $inp,$inp,r0
2071
2072         lvx             $rndkey0,0,$key1
2073         lvx             $rndkey1,$idx,$key1
2074         addi            $idx,$idx,16
2075         vperm           $inout,$inout,$inptail,$inpperm
2076         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2077         vxor            $inout,$inout,$tweak
2078         vxor            $inout,$inout,$rndkey0
2079         lvx             $rndkey0,$idx,$key1
2080         addi            $idx,$idx,16
2081         mtctr           $rounds
2082         b               Loop_xts_enc
2083
2084 .align  5
2085 Loop_xts_enc:
2086         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
2087         vcipher         $inout,$inout,$rndkey1
2088         lvx             $rndkey1,$idx,$key1
2089         addi            $idx,$idx,16
2090         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2091         vcipher         $inout,$inout,$rndkey0
2092         lvx             $rndkey0,$idx,$key1
2093         addi            $idx,$idx,16
2094         bdnz            Loop_xts_enc
2095
2096         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
2097         vcipher         $inout,$inout,$rndkey1
2098         lvx             $rndkey1,$idx,$key1
2099         li              $idx,16
2100         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2101         vxor            $rndkey0,$rndkey0,$tweak
2102         vcipherlast     $output,$inout,$rndkey0
2103
2104         le?vperm        $tmp,$output,$output,$leperm
2105         be?nop
2106         le?stvx_u       $tmp,0,$out
2107         be?stvx_u       $output,0,$out
2108         addi            $out,$out,16
2109
2110         subic.          $len,$len,16
2111         beq             Lxts_enc_done
2112
2113         vmr             $inout,$inptail
2114         lvx             $inptail,0,$inp
2115         addi            $inp,$inp,16
2116         lvx             $rndkey0,0,$key1
2117         lvx             $rndkey1,$idx,$key1
2118         addi            $idx,$idx,16
2119
2120         subic           r0,$len,32
2121         subfe           r0,r0,r0
2122         and             r0,r0,$taillen
2123         add             $inp,$inp,r0
2124
2125         vsrab           $tmp,$tweak,$seven              # next tweak value
2126         vaddubm         $tweak,$tweak,$tweak
2127         vsldoi          $tmp,$tmp,$tmp,15
2128         vand            $tmp,$tmp,$eighty7
2129         vxor            $tweak,$tweak,$tmp
2130
2131         vperm           $inout,$inout,$inptail,$inpperm
2132         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2133         vxor            $inout,$inout,$tweak
2134         vxor            $output,$output,$rndkey0        # just in case $len<16
2135         vxor            $inout,$inout,$rndkey0
2136         lvx             $rndkey0,$idx,$key1
2137         addi            $idx,$idx,16
2138
2139         mtctr           $rounds
2140         ${UCMP}i        $len,16
2141         bge             Loop_xts_enc
2142
2143         vxor            $output,$output,$tweak
2144         lvsr            $inpperm,0,$len                 # $inpperm is no longer needed
2145         vxor            $inptail,$inptail,$inptail      # $inptail is no longer needed
2146         vspltisb        $tmp,-1
2147         vperm           $inptail,$inptail,$tmp,$inpperm
2148         vsel            $inout,$inout,$output,$inptail
2149
2150         subi            r11,$out,17
2151         subi            $out,$out,16
2152         mtctr           $len
2153         li              $len,16
2154 Loop_xts_enc_steal:
2155         lbzu            r0,1(r11)
2156         stb             r0,16(r11)
2157         bdnz            Loop_xts_enc_steal
2158
2159         mtctr           $rounds
2160         b               Loop_xts_enc                    # one more time...
2161
2162 Lxts_enc_done:
2163         ${UCMP}i        $ivp,0
2164         beq             Lxts_enc_ret
2165
2166         vsrab           $tmp,$tweak,$seven              # next tweak value
2167         vaddubm         $tweak,$tweak,$tweak
2168         vsldoi          $tmp,$tmp,$tmp,15
2169         vand            $tmp,$tmp,$eighty7
2170         vxor            $tweak,$tweak,$tmp
2171
2172         le?vperm        $tweak,$tweak,$tweak,$leperm
2173         stvx_u          $tweak,0,$ivp
2174
2175 Lxts_enc_ret:
2176         mtspr           256,r12                         # restore vrsave
2177         li              r3,0
2178         blr
2179         .long           0
2180         .byte           0,12,0x04,0,0x80,6,6,0
2181         .long           0
2182 .size   .${prefix}_xts_encrypt,.-.${prefix}_xts_encrypt
2183
2184 .globl  .${prefix}_xts_decrypt
2185         mr              $inp,r3                         # reassign
2186         li              r3,-1
2187         ${UCMP}i        $len,16
2188         bltlr-
2189
2190         lis             r0,0xfff8
2191         mfspr           r12,256                         # save vrsave
2192         li              r11,0
2193         mtspr           256,r0
2194
2195         andi.           r0,$len,15
2196         neg             r0,r0
2197         andi.           r0,r0,16
2198         sub             $len,$len,r0
2199
2200         vspltisb        $seven,0x07                     # 0x070707..07
2201         le?lvsl         $leperm,r11,r11
2202         le?vspltisb     $tmp,0x0f
2203         le?vxor         $leperm,$leperm,$seven
2204
2205         li              $idx,15
2206         lvx             $tweak,0,$ivp                   # load [unaligned] iv
2207         lvsl            $inpperm,0,$ivp
2208         lvx             $inptail,$idx,$ivp
2209         le?vxor         $inpperm,$inpperm,$tmp
2210         vperm           $tweak,$tweak,$inptail,$inpperm
2211
2212         neg             r11,$inp
2213         lvsr            $inpperm,0,r11                  # prepare for unaligned load
2214         lvx             $inout,0,$inp
2215         addi            $inp,$inp,15                    # 15 is not typo
2216         le?vxor         $inpperm,$inpperm,$tmp
2217
2218         ${UCMP}i        $key2,0                         # key2==NULL?
2219         beq             Lxts_dec_no_key2
2220
2221         ?lvsl           $keyperm,0,$key2                # prepare for unaligned key
2222         lwz             $rounds,240($key2)
2223         srwi            $rounds,$rounds,1
2224         subi            $rounds,$rounds,1
2225         li              $idx,16
2226
2227         lvx             $rndkey0,0,$key2
2228         lvx             $rndkey1,$idx,$key2
2229         addi            $idx,$idx,16
2230         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2231         vxor            $tweak,$tweak,$rndkey0
2232         lvx             $rndkey0,$idx,$key2
2233         addi            $idx,$idx,16
2234         mtctr           $rounds
2235
2236 Ltweak_xts_dec:
2237         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
2238         vcipher         $tweak,$tweak,$rndkey1
2239         lvx             $rndkey1,$idx,$key2
2240         addi            $idx,$idx,16
2241         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2242         vcipher         $tweak,$tweak,$rndkey0
2243         lvx             $rndkey0,$idx,$key2
2244         addi            $idx,$idx,16
2245         bdnz            Ltweak_xts_dec
2246
2247         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
2248         vcipher         $tweak,$tweak,$rndkey1
2249         lvx             $rndkey1,$idx,$key2
2250         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2251         vcipherlast     $tweak,$tweak,$rndkey0
2252
2253         li              $ivp,0                          # don't chain the tweak
2254         b               Lxts_dec
2255
2256 Lxts_dec_no_key2:
2257         neg             $idx,$len
2258         andi.           $idx,$idx,15
2259         add             $len,$len,$idx                  # in "tweak chaining"
2260                                                         # mode only complete
2261                                                         # blocks are processed
2262 Lxts_dec:
2263         lvx             $inptail,0,$inp
2264         addi            $inp,$inp,16
2265
2266         ?lvsl           $keyperm,0,$key1                # prepare for unaligned key
2267         lwz             $rounds,240($key1)
2268         srwi            $rounds,$rounds,1
2269         subi            $rounds,$rounds,1
2270         li              $idx,16
2271
2272         vslb            $eighty7,$seven,$seven          # 0x808080..80
2273         vor             $eighty7,$eighty7,$seven        # 0x878787..87
2274         vspltisb        $tmp,1                          # 0x010101..01
2275         vsldoi          $eighty7,$eighty7,$tmp,15       # 0x870101..01
2276
2277         ${UCMP}i        $len,96
2278         bge             _aesp8_xts_decrypt6x
2279
2280         lvx             $rndkey0,0,$key1
2281         lvx             $rndkey1,$idx,$key1
2282         addi            $idx,$idx,16
2283         vperm           $inout,$inout,$inptail,$inpperm
2284         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2285         vxor            $inout,$inout,$tweak
2286         vxor            $inout,$inout,$rndkey0
2287         lvx             $rndkey0,$idx,$key1
2288         addi            $idx,$idx,16
2289         mtctr           $rounds
2290
2291         ${UCMP}i        $len,16
2292         blt             Ltail_xts_dec
2293         be?b            Loop_xts_dec
2294
2295 .align  5
2296 Loop_xts_dec:
2297         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
2298         vncipher        $inout,$inout,$rndkey1
2299         lvx             $rndkey1,$idx,$key1
2300         addi            $idx,$idx,16
2301         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2302         vncipher        $inout,$inout,$rndkey0
2303         lvx             $rndkey0,$idx,$key1
2304         addi            $idx,$idx,16
2305         bdnz            Loop_xts_dec
2306
2307         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
2308         vncipher        $inout,$inout,$rndkey1
2309         lvx             $rndkey1,$idx,$key1
2310         li              $idx,16
2311         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2312         vxor            $rndkey0,$rndkey0,$tweak
2313         vncipherlast    $output,$inout,$rndkey0
2314
2315         le?vperm        $tmp,$output,$output,$leperm
2316         be?nop
2317         le?stvx_u       $tmp,0,$out
2318         be?stvx_u       $output,0,$out
2319         addi            $out,$out,16
2320
2321         subic.          $len,$len,16
2322         beq             Lxts_dec_done
2323
2324         vmr             $inout,$inptail
2325         lvx             $inptail,0,$inp
2326         addi            $inp,$inp,16
2327         lvx             $rndkey0,0,$key1
2328         lvx             $rndkey1,$idx,$key1
2329         addi            $idx,$idx,16
2330
2331         vsrab           $tmp,$tweak,$seven              # next tweak value
2332         vaddubm         $tweak,$tweak,$tweak
2333         vsldoi          $tmp,$tmp,$tmp,15
2334         vand            $tmp,$tmp,$eighty7
2335         vxor            $tweak,$tweak,$tmp
2336
2337         vperm           $inout,$inout,$inptail,$inpperm
2338         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2339         vxor            $inout,$inout,$tweak
2340         vxor            $inout,$inout,$rndkey0
2341         lvx             $rndkey0,$idx,$key1
2342         addi            $idx,$idx,16
2343
2344         mtctr           $rounds
2345         ${UCMP}i        $len,16
2346         bge             Loop_xts_dec
2347
2348 Ltail_xts_dec:
2349         vsrab           $tmp,$tweak,$seven              # next tweak value
2350         vaddubm         $tweak1,$tweak,$tweak
2351         vsldoi          $tmp,$tmp,$tmp,15
2352         vand            $tmp,$tmp,$eighty7
2353         vxor            $tweak1,$tweak1,$tmp
2354
2355         subi            $inp,$inp,16
2356         add             $inp,$inp,$len
2357
2358         vxor            $inout,$inout,$tweak            # :-(
2359         vxor            $inout,$inout,$tweak1           # :-)
2360
2361 Loop_xts_dec_short:
2362         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
2363         vncipher        $inout,$inout,$rndkey1
2364         lvx             $rndkey1,$idx,$key1
2365         addi            $idx,$idx,16
2366         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2367         vncipher        $inout,$inout,$rndkey0
2368         lvx             $rndkey0,$idx,$key1
2369         addi            $idx,$idx,16
2370         bdnz            Loop_xts_dec_short
2371
2372         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
2373         vncipher        $inout,$inout,$rndkey1
2374         lvx             $rndkey1,$idx,$key1
2375         li              $idx,16
2376         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2377         vxor            $rndkey0,$rndkey0,$tweak1
2378         vncipherlast    $output,$inout,$rndkey0
2379
2380         le?vperm        $tmp,$output,$output,$leperm
2381         be?nop
2382         le?stvx_u       $tmp,0,$out
2383         be?stvx_u       $output,0,$out
2384
2385         vmr             $inout,$inptail
2386         lvx             $inptail,0,$inp
2387         #addi           $inp,$inp,16
2388         lvx             $rndkey0,0,$key1
2389         lvx             $rndkey1,$idx,$key1
2390         addi            $idx,$idx,16
2391         vperm           $inout,$inout,$inptail,$inpperm
2392         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2393
2394         lvsr            $inpperm,0,$len                 # $inpperm is no longer needed
2395         vxor            $inptail,$inptail,$inptail      # $inptail is no longer needed
2396         vspltisb        $tmp,-1
2397         vperm           $inptail,$inptail,$tmp,$inpperm
2398         vsel            $inout,$inout,$output,$inptail
2399
2400         vxor            $rndkey0,$rndkey0,$tweak
2401         vxor            $inout,$inout,$rndkey0
2402         lvx             $rndkey0,$idx,$key1
2403         addi            $idx,$idx,16
2404
2405         subi            r11,$out,1
2406         mtctr           $len
2407         li              $len,16
2408 Loop_xts_dec_steal:
2409         lbzu            r0,1(r11)
2410         stb             r0,16(r11)
2411         bdnz            Loop_xts_dec_steal
2412
2413         mtctr           $rounds
2414         b               Loop_xts_dec                    # one more time...
2415
2416 Lxts_dec_done:
2417         ${UCMP}i        $ivp,0
2418         beq             Lxts_dec_ret
2419
2420         vsrab           $tmp,$tweak,$seven              # next tweak value
2421         vaddubm         $tweak,$tweak,$tweak
2422         vsldoi          $tmp,$tmp,$tmp,15
2423         vand            $tmp,$tmp,$eighty7
2424         vxor            $tweak,$tweak,$tmp
2425
2426         le?vperm        $tweak,$tweak,$tweak,$leperm
2427         stvx_u          $tweak,0,$ivp
2428
2429 Lxts_dec_ret:
2430         mtspr           256,r12                         # restore vrsave
2431         li              r3,0
2432         blr
2433         .long           0
2434         .byte           0,12,0x04,0,0x80,6,6,0
2435         .long           0
2436 .size   .${prefix}_xts_decrypt,.-.${prefix}_xts_decrypt
2437 ___
2438 #########################################################################
2439 {{      # Optimized XTS procedures                                      #
2440 my $key_=$key2;
2441 my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,3,26..31));
2442     $x00=0 if ($flavour =~ /osx/);
2443 my ($in0,  $in1,  $in2,  $in3,  $in4,  $in5 )=map("v$_",(0..5));
2444 my ($out0, $out1, $out2, $out3, $out4, $out5)=map("v$_",(7,12..16));
2445 my ($twk0, $twk1, $twk2, $twk3, $twk4, $twk5)=map("v$_",(17..22));
2446 my $rndkey0="v23";      # v24-v25 rotating buffer for first found keys
2447                         # v26-v31 last 6 round keys
2448 my ($keyperm)=($out0);  # aliases with "caller", redundant assignment
2449 my $taillen=$x70;
2450
2451 $code.=<<___;
2452 .align  5
2453 _aesp8_xts_encrypt6x:
2454         $STU            $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
2455         mflr            r11
2456         li              r7,`$FRAME+8*16+15`
2457         li              r3,`$FRAME+8*16+31`
2458         $PUSH           r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
2459         stvx            v20,r7,$sp              # ABI says so
2460         addi            r7,r7,32
2461         stvx            v21,r3,$sp
2462         addi            r3,r3,32
2463         stvx            v22,r7,$sp
2464         addi            r7,r7,32
2465         stvx            v23,r3,$sp
2466         addi            r3,r3,32
2467         stvx            v24,r7,$sp
2468         addi            r7,r7,32
2469         stvx            v25,r3,$sp
2470         addi            r3,r3,32
2471         stvx            v26,r7,$sp
2472         addi            r7,r7,32
2473         stvx            v27,r3,$sp
2474         addi            r3,r3,32
2475         stvx            v28,r7,$sp
2476         addi            r7,r7,32
2477         stvx            v29,r3,$sp
2478         addi            r3,r3,32
2479         stvx            v30,r7,$sp
2480         stvx            v31,r3,$sp
2481         li              r0,-1
2482         stw             $vrsave,`$FRAME+21*16-4`($sp)   # save vrsave
2483         li              $x10,0x10
2484         $PUSH           r26,`$FRAME+21*16+0*$SIZE_T`($sp)
2485         li              $x20,0x20
2486         $PUSH           r27,`$FRAME+21*16+1*$SIZE_T`($sp)
2487         li              $x30,0x30
2488         $PUSH           r28,`$FRAME+21*16+2*$SIZE_T`($sp)
2489         li              $x40,0x40
2490         $PUSH           r29,`$FRAME+21*16+3*$SIZE_T`($sp)
2491         li              $x50,0x50
2492         $PUSH           r30,`$FRAME+21*16+4*$SIZE_T`($sp)
2493         li              $x60,0x60
2494         $PUSH           r31,`$FRAME+21*16+5*$SIZE_T`($sp)
2495         li              $x70,0x70
2496         mtspr           256,r0
2497
2498         subi            $rounds,$rounds,3       # -4 in total
2499
2500         lvx             $rndkey0,$x00,$key1     # load key schedule
2501         lvx             v30,$x10,$key1
2502         addi            $key1,$key1,0x20
2503         lvx             v31,$x00,$key1
2504         ?vperm          $rndkey0,$rndkey0,v30,$keyperm
2505         addi            $key_,$sp,$FRAME+15
2506         mtctr           $rounds
2507
2508 Load_xts_enc_key:
2509         ?vperm          v24,v30,v31,$keyperm
2510         lvx             v30,$x10,$key1
2511         addi            $key1,$key1,0x20
2512         stvx            v24,$x00,$key_          # off-load round[1]
2513         ?vperm          v25,v31,v30,$keyperm
2514         lvx             v31,$x00,$key1
2515         stvx            v25,$x10,$key_          # off-load round[2]
2516         addi            $key_,$key_,0x20
2517         bdnz            Load_xts_enc_key
2518
2519         lvx             v26,$x10,$key1
2520         ?vperm          v24,v30,v31,$keyperm
2521         lvx             v27,$x20,$key1
2522         stvx            v24,$x00,$key_          # off-load round[3]
2523         ?vperm          v25,v31,v26,$keyperm
2524         lvx             v28,$x30,$key1
2525         stvx            v25,$x10,$key_          # off-load round[4]
2526         addi            $key_,$sp,$FRAME+15     # rewind $key_
2527         ?vperm          v26,v26,v27,$keyperm
2528         lvx             v29,$x40,$key1
2529         ?vperm          v27,v27,v28,$keyperm
2530         lvx             v30,$x50,$key1
2531         ?vperm          v28,v28,v29,$keyperm
2532         lvx             v31,$x60,$key1
2533         ?vperm          v29,v29,v30,$keyperm
2534         lvx             $twk5,$x70,$key1        # borrow $twk5
2535         ?vperm          v30,v30,v31,$keyperm
2536         lvx             v24,$x00,$key_          # pre-load round[1]
2537         ?vperm          v31,v31,$twk5,$keyperm
2538         lvx             v25,$x10,$key_          # pre-load round[2]
2539
2540          vperm          $in0,$inout,$inptail,$inpperm
2541          subi           $inp,$inp,31            # undo "caller"
2542         vxor            $twk0,$tweak,$rndkey0
2543         vsrab           $tmp,$tweak,$seven      # next tweak value
2544         vaddubm         $tweak,$tweak,$tweak
2545         vsldoi          $tmp,$tmp,$tmp,15
2546         vand            $tmp,$tmp,$eighty7
2547          vxor           $out0,$in0,$twk0
2548         vxor            $tweak,$tweak,$tmp
2549
2550          lvx_u          $in1,$x10,$inp
2551         vxor            $twk1,$tweak,$rndkey0
2552         vsrab           $tmp,$tweak,$seven      # next tweak value
2553         vaddubm         $tweak,$tweak,$tweak
2554         vsldoi          $tmp,$tmp,$tmp,15
2555          le?vperm       $in1,$in1,$in1,$leperm
2556         vand            $tmp,$tmp,$eighty7
2557          vxor           $out1,$in1,$twk1
2558         vxor            $tweak,$tweak,$tmp
2559
2560          lvx_u          $in2,$x20,$inp
2561          andi.          $taillen,$len,15
2562         vxor            $twk2,$tweak,$rndkey0
2563         vsrab           $tmp,$tweak,$seven      # next tweak value
2564         vaddubm         $tweak,$tweak,$tweak
2565         vsldoi          $tmp,$tmp,$tmp,15
2566          le?vperm       $in2,$in2,$in2,$leperm
2567         vand            $tmp,$tmp,$eighty7
2568          vxor           $out2,$in2,$twk2
2569         vxor            $tweak,$tweak,$tmp
2570
2571          lvx_u          $in3,$x30,$inp
2572          sub            $len,$len,$taillen
2573         vxor            $twk3,$tweak,$rndkey0
2574         vsrab           $tmp,$tweak,$seven      # next tweak value
2575         vaddubm         $tweak,$tweak,$tweak
2576         vsldoi          $tmp,$tmp,$tmp,15
2577          le?vperm       $in3,$in3,$in3,$leperm
2578         vand            $tmp,$tmp,$eighty7
2579          vxor           $out3,$in3,$twk3
2580         vxor            $tweak,$tweak,$tmp
2581
2582          lvx_u          $in4,$x40,$inp
2583          subi           $len,$len,0x60
2584         vxor            $twk4,$tweak,$rndkey0
2585         vsrab           $tmp,$tweak,$seven      # next tweak value
2586         vaddubm         $tweak,$tweak,$tweak
2587         vsldoi          $tmp,$tmp,$tmp,15
2588          le?vperm       $in4,$in4,$in4,$leperm
2589         vand            $tmp,$tmp,$eighty7
2590          vxor           $out4,$in4,$twk4
2591         vxor            $tweak,$tweak,$tmp
2592
2593          lvx_u          $in5,$x50,$inp
2594          addi           $inp,$inp,0x60
2595         vxor            $twk5,$tweak,$rndkey0
2596         vsrab           $tmp,$tweak,$seven      # next tweak value
2597         vaddubm         $tweak,$tweak,$tweak
2598         vsldoi          $tmp,$tmp,$tmp,15
2599          le?vperm       $in5,$in5,$in5,$leperm
2600         vand            $tmp,$tmp,$eighty7
2601          vxor           $out5,$in5,$twk5
2602         vxor            $tweak,$tweak,$tmp
2603
2604         vxor            v31,v31,$rndkey0
2605         mtctr           $rounds
2606         b               Loop_xts_enc6x
2607
2608 .align  5
2609 Loop_xts_enc6x:
2610         vcipher         $out0,$out0,v24
2611         vcipher         $out1,$out1,v24
2612         vcipher         $out2,$out2,v24
2613         vcipher         $out3,$out3,v24
2614         vcipher         $out4,$out4,v24
2615         vcipher         $out5,$out5,v24
2616         lvx             v24,$x20,$key_          # round[3]
2617         addi            $key_,$key_,0x20
2618
2619         vcipher         $out0,$out0,v25
2620         vcipher         $out1,$out1,v25
2621         vcipher         $out2,$out2,v25
2622         vcipher         $out3,$out3,v25
2623         vcipher         $out4,$out4,v25
2624         vcipher         $out5,$out5,v25
2625         lvx             v25,$x10,$key_          # round[4]
2626         bdnz            Loop_xts_enc6x
2627
2628         subic           $len,$len,96            # $len-=96
2629          vxor           $in0,$twk0,v31          # xor with last round key
2630         vcipher         $out0,$out0,v24
2631         vcipher         $out1,$out1,v24
2632          vsrab          $tmp,$tweak,$seven      # next tweak value
2633          vxor           $twk0,$tweak,$rndkey0
2634          vaddubm        $tweak,$tweak,$tweak
2635         vcipher         $out2,$out2,v24
2636         vcipher         $out3,$out3,v24
2637          vsldoi         $tmp,$tmp,$tmp,15
2638         vcipher         $out4,$out4,v24
2639         vcipher         $out5,$out5,v24
2640
2641         subfe.          r0,r0,r0                # borrow?-1:0
2642          vand           $tmp,$tmp,$eighty7
2643         vcipher         $out0,$out0,v25
2644         vcipher         $out1,$out1,v25
2645          vxor           $tweak,$tweak,$tmp
2646         vcipher         $out2,$out2,v25
2647         vcipher         $out3,$out3,v25
2648          vxor           $in1,$twk1,v31
2649          vsrab          $tmp,$tweak,$seven      # next tweak value
2650          vxor           $twk1,$tweak,$rndkey0
2651         vcipher         $out4,$out4,v25
2652         vcipher         $out5,$out5,v25
2653
2654         and             r0,r0,$len
2655          vaddubm        $tweak,$tweak,$tweak
2656          vsldoi         $tmp,$tmp,$tmp,15
2657         vcipher         $out0,$out0,v26
2658         vcipher         $out1,$out1,v26
2659          vand           $tmp,$tmp,$eighty7
2660         vcipher         $out2,$out2,v26
2661         vcipher         $out3,$out3,v26
2662          vxor           $tweak,$tweak,$tmp
2663         vcipher         $out4,$out4,v26
2664         vcipher         $out5,$out5,v26
2665
2666         add             $inp,$inp,r0            # $inp is adjusted in such
2667                                                 # way that at exit from the
2668                                                 # loop inX-in5 are loaded
2669                                                 # with last "words"
2670          vxor           $in2,$twk2,v31
2671          vsrab          $tmp,$tweak,$seven      # next tweak value
2672          vxor           $twk2,$tweak,$rndkey0
2673          vaddubm        $tweak,$tweak,$tweak
2674         vcipher         $out0,$out0,v27
2675         vcipher         $out1,$out1,v27
2676          vsldoi         $tmp,$tmp,$tmp,15
2677         vcipher         $out2,$out2,v27
2678         vcipher         $out3,$out3,v27
2679          vand           $tmp,$tmp,$eighty7
2680         vcipher         $out4,$out4,v27
2681         vcipher         $out5,$out5,v27
2682
2683         addi            $key_,$sp,$FRAME+15     # rewind $key_
2684          vxor           $tweak,$tweak,$tmp
2685         vcipher         $out0,$out0,v28
2686         vcipher         $out1,$out1,v28
2687          vxor           $in3,$twk3,v31
2688          vsrab          $tmp,$tweak,$seven      # next tweak value
2689          vxor           $twk3,$tweak,$rndkey0
2690         vcipher         $out2,$out2,v28
2691         vcipher         $out3,$out3,v28
2692          vaddubm        $tweak,$tweak,$tweak
2693          vsldoi         $tmp,$tmp,$tmp,15
2694         vcipher         $out4,$out4,v28
2695         vcipher         $out5,$out5,v28
2696         lvx             v24,$x00,$key_          # re-pre-load round[1]
2697          vand           $tmp,$tmp,$eighty7
2698
2699         vcipher         $out0,$out0,v29
2700         vcipher         $out1,$out1,v29
2701          vxor           $tweak,$tweak,$tmp
2702         vcipher         $out2,$out2,v29
2703         vcipher         $out3,$out3,v29
2704          vxor           $in4,$twk4,v31
2705          vsrab          $tmp,$tweak,$seven      # next tweak value
2706          vxor           $twk4,$tweak,$rndkey0
2707         vcipher         $out4,$out4,v29
2708         vcipher         $out5,$out5,v29
2709         lvx             v25,$x10,$key_          # re-pre-load round[2]
2710          vaddubm        $tweak,$tweak,$tweak
2711          vsldoi         $tmp,$tmp,$tmp,15
2712
2713         vcipher         $out0,$out0,v30
2714         vcipher         $out1,$out1,v30
2715          vand           $tmp,$tmp,$eighty7
2716         vcipher         $out2,$out2,v30
2717         vcipher         $out3,$out3,v30
2718          vxor           $tweak,$tweak,$tmp
2719         vcipher         $out4,$out4,v30
2720         vcipher         $out5,$out5,v30
2721          vxor           $in5,$twk5,v31
2722          vsrab          $tmp,$tweak,$seven      # next tweak value
2723          vxor           $twk5,$tweak,$rndkey0
2724
2725         vcipherlast     $out0,$out0,$in0
2726          lvx_u          $in0,$x00,$inp          # load next input block
2727          vaddubm        $tweak,$tweak,$tweak
2728          vsldoi         $tmp,$tmp,$tmp,15
2729         vcipherlast     $out1,$out1,$in1
2730          lvx_u          $in1,$x10,$inp
2731         vcipherlast     $out2,$out2,$in2
2732          le?vperm       $in0,$in0,$in0,$leperm
2733          lvx_u          $in2,$x20,$inp
2734          vand           $tmp,$tmp,$eighty7
2735         vcipherlast     $out3,$out3,$in3
2736          le?vperm       $in1,$in1,$in1,$leperm
2737          lvx_u          $in3,$x30,$inp
2738         vcipherlast     $out4,$out4,$in4
2739          le?vperm       $in2,$in2,$in2,$leperm
2740          lvx_u          $in4,$x40,$inp
2741          vxor           $tweak,$tweak,$tmp
2742         vcipherlast     $tmp,$out5,$in5         # last block might be needed
2743                                                 # in stealing mode
2744          le?vperm       $in3,$in3,$in3,$leperm
2745          lvx_u          $in5,$x50,$inp
2746          addi           $inp,$inp,0x60
2747          le?vperm       $in4,$in4,$in4,$leperm
2748          le?vperm       $in5,$in5,$in5,$leperm
2749
2750         le?vperm        $out0,$out0,$out0,$leperm
2751         le?vperm        $out1,$out1,$out1,$leperm
2752         stvx_u          $out0,$x00,$out         # store output
2753          vxor           $out0,$in0,$twk0
2754         le?vperm        $out2,$out2,$out2,$leperm
2755         stvx_u          $out1,$x10,$out
2756          vxor           $out1,$in1,$twk1
2757         le?vperm        $out3,$out3,$out3,$leperm
2758         stvx_u          $out2,$x20,$out
2759          vxor           $out2,$in2,$twk2
2760         le?vperm        $out4,$out4,$out4,$leperm
2761         stvx_u          $out3,$x30,$out
2762          vxor           $out3,$in3,$twk3
2763         le?vperm        $out5,$tmp,$tmp,$leperm
2764         stvx_u          $out4,$x40,$out
2765          vxor           $out4,$in4,$twk4
2766         le?stvx_u       $out5,$x50,$out
2767         be?stvx_u       $tmp, $x50,$out
2768          vxor           $out5,$in5,$twk5
2769         addi            $out,$out,0x60
2770
2771         mtctr           $rounds
2772         beq             Loop_xts_enc6x          # did $len-=96 borrow?
2773
2774         addic.          $len,$len,0x60
2775         beq             Lxts_enc6x_zero
2776         cmpwi           $len,0x20
2777         blt             Lxts_enc6x_one
2778         nop
2779         beq             Lxts_enc6x_two
2780         cmpwi           $len,0x40
2781         blt             Lxts_enc6x_three
2782         nop
2783         beq             Lxts_enc6x_four
2784
2785 Lxts_enc6x_five:
2786         vxor            $out0,$in1,$twk0
2787         vxor            $out1,$in2,$twk1
2788         vxor            $out2,$in3,$twk2
2789         vxor            $out3,$in4,$twk3
2790         vxor            $out4,$in5,$twk4
2791
2792         bl              _aesp8_xts_enc5x
2793
2794         le?vperm        $out0,$out0,$out0,$leperm
2795         vmr             $twk0,$twk5             # unused tweak
2796         le?vperm        $out1,$out1,$out1,$leperm
2797         stvx_u          $out0,$x00,$out         # store output
2798         le?vperm        $out2,$out2,$out2,$leperm
2799         stvx_u          $out1,$x10,$out
2800         le?vperm        $out3,$out3,$out3,$leperm
2801         stvx_u          $out2,$x20,$out
2802         vxor            $tmp,$out4,$twk5        # last block prep for stealing
2803         le?vperm        $out4,$out4,$out4,$leperm
2804         stvx_u          $out3,$x30,$out
2805         stvx_u          $out4,$x40,$out
2806         addi            $out,$out,0x50
2807         bne             Lxts_enc6x_steal
2808         b               Lxts_enc6x_done
2809
2810 .align  4
2811 Lxts_enc6x_four:
2812         vxor            $out0,$in2,$twk0
2813         vxor            $out1,$in3,$twk1
2814         vxor            $out2,$in4,$twk2
2815         vxor            $out3,$in5,$twk3
2816         vxor            $out4,$out4,$out4
2817
2818         bl              _aesp8_xts_enc5x
2819
2820         le?vperm        $out0,$out0,$out0,$leperm
2821         vmr             $twk0,$twk4             # unused tweak
2822         le?vperm        $out1,$out1,$out1,$leperm
2823         stvx_u          $out0,$x00,$out         # store output
2824         le?vperm        $out2,$out2,$out2,$leperm
2825         stvx_u          $out1,$x10,$out
2826         vxor            $tmp,$out3,$twk4        # last block prep for stealing
2827         le?vperm        $out3,$out3,$out3,$leperm
2828         stvx_u          $out2,$x20,$out
2829         stvx_u          $out3,$x30,$out
2830         addi            $out,$out,0x40
2831         bne             Lxts_enc6x_steal
2832         b               Lxts_enc6x_done
2833
2834 .align  4
2835 Lxts_enc6x_three:
2836         vxor            $out0,$in3,$twk0
2837         vxor            $out1,$in4,$twk1
2838         vxor            $out2,$in5,$twk2
2839         vxor            $out3,$out3,$out3
2840         vxor            $out4,$out4,$out4
2841
2842         bl              _aesp8_xts_enc5x
2843
2844         le?vperm        $out0,$out0,$out0,$leperm
2845         vmr             $twk0,$twk3             # unused tweak
2846         le?vperm        $out1,$out1,$out1,$leperm
2847         stvx_u          $out0,$x00,$out         # store output
2848         vxor            $tmp,$out2,$twk3        # last block prep for stealing
2849         le?vperm        $out2,$out2,$out2,$leperm
2850         stvx_u          $out1,$x10,$out
2851         stvx_u          $out2,$x20,$out
2852         addi            $out,$out,0x30
2853         bne             Lxts_enc6x_steal
2854         b               Lxts_enc6x_done
2855
2856 .align  4
2857 Lxts_enc6x_two:
2858         vxor            $out0,$in4,$twk0
2859         vxor            $out1,$in5,$twk1
2860         vxor            $out2,$out2,$out2
2861         vxor            $out3,$out3,$out3
2862         vxor            $out4,$out4,$out4
2863
2864         bl              _aesp8_xts_enc5x
2865
2866         le?vperm        $out0,$out0,$out0,$leperm
2867         vmr             $twk0,$twk2             # unused tweak
2868         vxor            $tmp,$out1,$twk2        # last block prep for stealing
2869         le?vperm        $out1,$out1,$out1,$leperm
2870         stvx_u          $out0,$x00,$out         # store output
2871         stvx_u          $out1,$x10,$out
2872         addi            $out,$out,0x20
2873         bne             Lxts_enc6x_steal
2874         b               Lxts_enc6x_done
2875
2876 .align  4
2877 Lxts_enc6x_one:
2878         vxor            $out0,$in5,$twk0
2879         nop
2880 Loop_xts_enc1x:
2881         vcipher         $out0,$out0,v24
2882         lvx             v24,$x20,$key_          # round[3]
2883         addi            $key_,$key_,0x20
2884
2885         vcipher         $out0,$out0,v25
2886         lvx             v25,$x10,$key_          # round[4]
2887         bdnz            Loop_xts_enc1x
2888
2889         add             $inp,$inp,$taillen
2890         cmpwi           $taillen,0
2891         vcipher         $out0,$out0,v24
2892
2893         subi            $inp,$inp,16
2894         vcipher         $out0,$out0,v25
2895
2896         lvsr            $inpperm,0,$taillen
2897         vcipher         $out0,$out0,v26
2898
2899         lvx_u           $in0,0,$inp
2900         vcipher         $out0,$out0,v27
2901
2902         addi            $key_,$sp,$FRAME+15     # rewind $key_
2903         vcipher         $out0,$out0,v28
2904         lvx             v24,$x00,$key_          # re-pre-load round[1]
2905
2906         vcipher         $out0,$out0,v29
2907         lvx             v25,$x10,$key_          # re-pre-load round[2]
2908          vxor           $twk0,$twk0,v31
2909
2910         le?vperm        $in0,$in0,$in0,$leperm
2911         vcipher         $out0,$out0,v30
2912
2913         vperm           $in0,$in0,$in0,$inpperm
2914         vcipherlast     $out0,$out0,$twk0
2915
2916         vmr             $twk0,$twk1             # unused tweak
2917         vxor            $tmp,$out0,$twk1        # last block prep for stealing
2918         le?vperm        $out0,$out0,$out0,$leperm
2919         stvx_u          $out0,$x00,$out         # store output
2920         addi            $out,$out,0x10
2921         bne             Lxts_enc6x_steal
2922         b               Lxts_enc6x_done
2923
2924 .align  4
2925 Lxts_enc6x_zero:
2926         cmpwi           $taillen,0
2927         beq             Lxts_enc6x_done
2928
2929         add             $inp,$inp,$taillen
2930         subi            $inp,$inp,16
2931         lvx_u           $in0,0,$inp
2932         lvsr            $inpperm,0,$taillen     # $in5 is no more
2933         le?vperm        $in0,$in0,$in0,$leperm
2934         vperm           $in0,$in0,$in0,$inpperm
2935         vxor            $tmp,$tmp,$twk0
2936 Lxts_enc6x_steal:
2937         vxor            $in0,$in0,$twk0
2938         vxor            $out0,$out0,$out0
2939         vspltisb        $out1,-1
2940         vperm           $out0,$out0,$out1,$inpperm
2941         vsel            $out0,$in0,$tmp,$out0   # $tmp is last block, remember?
2942
2943         subi            r30,$out,17
2944         subi            $out,$out,16
2945         mtctr           $taillen
2946 Loop_xts_enc6x_steal:
2947         lbzu            r0,1(r30)
2948         stb             r0,16(r30)
2949         bdnz            Loop_xts_enc6x_steal
2950
2951         li              $taillen,0
2952         mtctr           $rounds
2953         b               Loop_xts_enc1x          # one more time...
2954
2955 .align  4
2956 Lxts_enc6x_done:
2957         ${UCMP}i        $ivp,0
2958         beq             Lxts_enc6x_ret
2959
2960         vxor            $tweak,$twk0,$rndkey0
2961         le?vperm        $tweak,$tweak,$tweak,$leperm
2962         stvx_u          $tweak,0,$ivp
2963
2964 Lxts_enc6x_ret:
2965         mtlr            r11
2966         li              r10,`$FRAME+15`
2967         li              r11,`$FRAME+31`
2968         stvx            $seven,r10,$sp          # wipe copies of round keys
2969         addi            r10,r10,32
2970         stvx            $seven,r11,$sp
2971         addi            r11,r11,32
2972         stvx            $seven,r10,$sp
2973         addi            r10,r10,32
2974         stvx            $seven,r11,$sp
2975         addi            r11,r11,32
2976         stvx            $seven,r10,$sp
2977         addi            r10,r10,32
2978         stvx            $seven,r11,$sp
2979         addi            r11,r11,32
2980         stvx            $seven,r10,$sp
2981         addi            r10,r10,32
2982         stvx            $seven,r11,$sp
2983         addi            r11,r11,32
2984
2985         mtspr           256,$vrsave
2986         lvx             v20,r10,$sp             # ABI says so
2987         addi            r10,r10,32
2988         lvx             v21,r11,$sp
2989         addi            r11,r11,32
2990         lvx             v22,r10,$sp
2991         addi            r10,r10,32
2992         lvx             v23,r11,$sp
2993         addi            r11,r11,32
2994         lvx             v24,r10,$sp
2995         addi            r10,r10,32
2996         lvx             v25,r11,$sp
2997         addi            r11,r11,32
2998         lvx             v26,r10,$sp
2999         addi            r10,r10,32
3000         lvx             v27,r11,$sp
3001         addi            r11,r11,32
3002         lvx             v28,r10,$sp
3003         addi            r10,r10,32
3004         lvx             v29,r11,$sp
3005         addi            r11,r11,32
3006         lvx             v30,r10,$sp
3007         lvx             v31,r11,$sp
3008         $POP            r26,`$FRAME+21*16+0*$SIZE_T`($sp)
3009         $POP            r27,`$FRAME+21*16+1*$SIZE_T`($sp)
3010         $POP            r28,`$FRAME+21*16+2*$SIZE_T`($sp)
3011         $POP            r29,`$FRAME+21*16+3*$SIZE_T`($sp)
3012         $POP            r30,`$FRAME+21*16+4*$SIZE_T`($sp)
3013         $POP            r31,`$FRAME+21*16+5*$SIZE_T`($sp)
3014         addi            $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
3015         blr
3016         .long           0
3017         .byte           0,12,0x04,1,0x80,6,6,0
3018         .long           0
3019
3020 .align  5
3021 _aesp8_xts_enc5x:
3022         vcipher         $out0,$out0,v24
3023         vcipher         $out1,$out1,v24
3024         vcipher         $out2,$out2,v24
3025         vcipher         $out3,$out3,v24
3026         vcipher         $out4,$out4,v24
3027         lvx             v24,$x20,$key_          # round[3]
3028         addi            $key_,$key_,0x20
3029
3030         vcipher         $out0,$out0,v25
3031         vcipher         $out1,$out1,v25
3032         vcipher         $out2,$out2,v25
3033         vcipher         $out3,$out3,v25
3034         vcipher         $out4,$out4,v25
3035         lvx             v25,$x10,$key_          # round[4]
3036         bdnz            _aesp8_xts_enc5x
3037
3038         add             $inp,$inp,$taillen
3039         cmpwi           $taillen,0
3040         vcipher         $out0,$out0,v24
3041         vcipher         $out1,$out1,v24
3042         vcipher         $out2,$out2,v24
3043         vcipher         $out3,$out3,v24
3044         vcipher         $out4,$out4,v24
3045
3046         subi            $inp,$inp,16
3047         vcipher         $out0,$out0,v25
3048         vcipher         $out1,$out1,v25
3049         vcipher         $out2,$out2,v25
3050         vcipher         $out3,$out3,v25
3051         vcipher         $out4,$out4,v25
3052          vxor           $twk0,$twk0,v31
3053
3054         vcipher         $out0,$out0,v26
3055         lvsr            $inpperm,r0,$taillen    # $in5 is no more
3056         vcipher         $out1,$out1,v26
3057         vcipher         $out2,$out2,v26
3058         vcipher         $out3,$out3,v26
3059         vcipher         $out4,$out4,v26
3060          vxor           $in1,$twk1,v31
3061
3062         vcipher         $out0,$out0,v27
3063         lvx_u           $in0,0,$inp
3064         vcipher         $out1,$out1,v27
3065         vcipher         $out2,$out2,v27
3066         vcipher         $out3,$out3,v27
3067         vcipher         $out4,$out4,v27
3068          vxor           $in2,$twk2,v31
3069
3070         addi            $key_,$sp,$FRAME+15     # rewind $key_
3071         vcipher         $out0,$out0,v28
3072         vcipher         $out1,$out1,v28
3073         vcipher         $out2,$out2,v28
3074         vcipher         $out3,$out3,v28
3075         vcipher         $out4,$out4,v28
3076         lvx             v24,$x00,$key_          # re-pre-load round[1]
3077          vxor           $in3,$twk3,v31
3078
3079         vcipher         $out0,$out0,v29
3080         le?vperm        $in0,$in0,$in0,$leperm
3081         vcipher         $out1,$out1,v29
3082         vcipher         $out2,$out2,v29
3083         vcipher         $out3,$out3,v29
3084         vcipher         $out4,$out4,v29
3085         lvx             v25,$x10,$key_          # re-pre-load round[2]
3086          vxor           $in4,$twk4,v31
3087
3088         vcipher         $out0,$out0,v30
3089         vperm           $in0,$in0,$in0,$inpperm
3090         vcipher         $out1,$out1,v30
3091         vcipher         $out2,$out2,v30
3092         vcipher         $out3,$out3,v30
3093         vcipher         $out4,$out4,v30
3094
3095         vcipherlast     $out0,$out0,$twk0
3096         vcipherlast     $out1,$out1,$in1
3097         vcipherlast     $out2,$out2,$in2
3098         vcipherlast     $out3,$out3,$in3
3099         vcipherlast     $out4,$out4,$in4
3100         blr
3101         .long           0
3102         .byte           0,12,0x14,0,0,0,0,0
3103
3104 .align  5
3105 _aesp8_xts_decrypt6x:
3106         $STU            $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
3107         mflr            r11
3108         li              r7,`$FRAME+8*16+15`
3109         li              r3,`$FRAME+8*16+31`
3110         $PUSH           r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
3111         stvx            v20,r7,$sp              # ABI says so
3112         addi            r7,r7,32
3113         stvx            v21,r3,$sp
3114         addi            r3,r3,32
3115         stvx            v22,r7,$sp
3116         addi            r7,r7,32
3117         stvx            v23,r3,$sp
3118         addi            r3,r3,32
3119         stvx            v24,r7,$sp
3120         addi            r7,r7,32
3121         stvx            v25,r3,$sp
3122         addi            r3,r3,32
3123         stvx            v26,r7,$sp
3124         addi            r7,r7,32
3125         stvx            v27,r3,$sp
3126         addi            r3,r3,32
3127         stvx            v28,r7,$sp
3128         addi            r7,r7,32
3129         stvx            v29,r3,$sp
3130         addi            r3,r3,32
3131         stvx            v30,r7,$sp
3132         stvx            v31,r3,$sp
3133         li              r0,-1
3134         stw             $vrsave,`$FRAME+21*16-4`($sp)   # save vrsave
3135         li              $x10,0x10
3136         $PUSH           r26,`$FRAME+21*16+0*$SIZE_T`($sp)
3137         li              $x20,0x20
3138         $PUSH           r27,`$FRAME+21*16+1*$SIZE_T`($sp)
3139         li              $x30,0x30
3140         $PUSH           r28,`$FRAME+21*16+2*$SIZE_T`($sp)
3141         li              $x40,0x40
3142         $PUSH           r29,`$FRAME+21*16+3*$SIZE_T`($sp)
3143         li              $x50,0x50
3144         $PUSH           r30,`$FRAME+21*16+4*$SIZE_T`($sp)
3145         li              $x60,0x60
3146         $PUSH           r31,`$FRAME+21*16+5*$SIZE_T`($sp)
3147         li              $x70,0x70
3148         mtspr           256,r0
3149
3150         subi            $rounds,$rounds,3       # -4 in total
3151
3152         lvx             $rndkey0,$x00,$key1     # load key schedule
3153         lvx             v30,$x10,$key1
3154         addi            $key1,$key1,0x20
3155         lvx             v31,$x00,$key1
3156         ?vperm          $rndkey0,$rndkey0,v30,$keyperm
3157         addi            $key_,$sp,$FRAME+15
3158         mtctr           $rounds
3159
3160 Load_xts_dec_key:
3161         ?vperm          v24,v30,v31,$keyperm
3162         lvx             v30,$x10,$key1
3163         addi            $key1,$key1,0x20
3164         stvx            v24,$x00,$key_          # off-load round[1]
3165         ?vperm          v25,v31,v30,$keyperm
3166         lvx             v31,$x00,$key1
3167         stvx            v25,$x10,$key_          # off-load round[2]
3168         addi            $key_,$key_,0x20
3169         bdnz            Load_xts_dec_key
3170
3171         lvx             v26,$x10,$key1
3172         ?vperm          v24,v30,v31,$keyperm
3173         lvx             v27,$x20,$key1
3174         stvx            v24,$x00,$key_          # off-load round[3]
3175         ?vperm          v25,v31,v26,$keyperm
3176         lvx             v28,$x30,$key1
3177         stvx            v25,$x10,$key_          # off-load round[4]
3178         addi            $key_,$sp,$FRAME+15     # rewind $key_
3179         ?vperm          v26,v26,v27,$keyperm
3180         lvx             v29,$x40,$key1
3181         ?vperm          v27,v27,v28,$keyperm
3182         lvx             v30,$x50,$key1
3183         ?vperm          v28,v28,v29,$keyperm
3184         lvx             v31,$x60,$key1
3185         ?vperm          v29,v29,v30,$keyperm
3186         lvx             $twk5,$x70,$key1        # borrow $twk5
3187         ?vperm          v30,v30,v31,$keyperm
3188         lvx             v24,$x00,$key_          # pre-load round[1]
3189         ?vperm          v31,v31,$twk5,$keyperm
3190         lvx             v25,$x10,$key_          # pre-load round[2]
3191
3192          vperm          $in0,$inout,$inptail,$inpperm
3193          subi           $inp,$inp,31            # undo "caller"
3194         vxor            $twk0,$tweak,$rndkey0
3195         vsrab           $tmp,$tweak,$seven      # next tweak value
3196         vaddubm         $tweak,$tweak,$tweak
3197         vsldoi          $tmp,$tmp,$tmp,15
3198         vand            $tmp,$tmp,$eighty7
3199          vxor           $out0,$in0,$twk0
3200         vxor            $tweak,$tweak,$tmp
3201
3202          lvx_u          $in1,$x10,$inp
3203         vxor            $twk1,$tweak,$rndkey0
3204         vsrab           $tmp,$tweak,$seven      # next tweak value
3205         vaddubm         $tweak,$tweak,$tweak
3206         vsldoi          $tmp,$tmp,$tmp,15
3207          le?vperm       $in1,$in1,$in1,$leperm
3208         vand            $tmp,$tmp,$eighty7
3209          vxor           $out1,$in1,$twk1
3210         vxor            $tweak,$tweak,$tmp
3211
3212          lvx_u          $in2,$x20,$inp
3213          andi.          $taillen,$len,15
3214         vxor            $twk2,$tweak,$rndkey0
3215         vsrab           $tmp,$tweak,$seven      # next tweak value
3216         vaddubm         $tweak,$tweak,$tweak
3217         vsldoi          $tmp,$tmp,$tmp,15
3218          le?vperm       $in2,$in2,$in2,$leperm
3219         vand            $tmp,$tmp,$eighty7
3220          vxor           $out2,$in2,$twk2
3221         vxor            $tweak,$tweak,$tmp
3222
3223          lvx_u          $in3,$x30,$inp
3224          sub            $len,$len,$taillen
3225         vxor            $twk3,$tweak,$rndkey0
3226         vsrab           $tmp,$tweak,$seven      # next tweak value
3227         vaddubm         $tweak,$tweak,$tweak
3228         vsldoi          $tmp,$tmp,$tmp,15
3229          le?vperm       $in3,$in3,$in3,$leperm
3230         vand            $tmp,$tmp,$eighty7
3231          vxor           $out3,$in3,$twk3
3232         vxor            $tweak,$tweak,$tmp
3233
3234          lvx_u          $in4,$x40,$inp
3235          subi           $len,$len,0x60
3236         vxor            $twk4,$tweak,$rndkey0
3237         vsrab           $tmp,$tweak,$seven      # next tweak value
3238         vaddubm         $tweak,$tweak,$tweak
3239         vsldoi          $tmp,$tmp,$tmp,15
3240          le?vperm       $in4,$in4,$in4,$leperm
3241         vand            $tmp,$tmp,$eighty7
3242          vxor           $out4,$in4,$twk4
3243         vxor            $tweak,$tweak,$tmp
3244
3245          lvx_u          $in5,$x50,$inp
3246          addi           $inp,$inp,0x60
3247         vxor            $twk5,$tweak,$rndkey0
3248         vsrab           $tmp,$tweak,$seven      # next tweak value
3249         vaddubm         $tweak,$tweak,$tweak
3250         vsldoi          $tmp,$tmp,$tmp,15
3251          le?vperm       $in5,$in5,$in5,$leperm
3252         vand            $tmp,$tmp,$eighty7
3253          vxor           $out5,$in5,$twk5
3254         vxor            $tweak,$tweak,$tmp
3255
3256         vxor            v31,v31,$rndkey0
3257         mtctr           $rounds
3258         b               Loop_xts_dec6x
3259
3260 .align  5
3261 Loop_xts_dec6x:
3262         vncipher        $out0,$out0,v24
3263         vncipher        $out1,$out1,v24
3264         vncipher        $out2,$out2,v24
3265         vncipher        $out3,$out3,v24
3266         vncipher        $out4,$out4,v24
3267         vncipher        $out5,$out5,v24
3268         lvx             v24,$x20,$key_          # round[3]
3269         addi            $key_,$key_,0x20
3270
3271         vncipher        $out0,$out0,v25
3272         vncipher        $out1,$out1,v25
3273         vncipher        $out2,$out2,v25
3274         vncipher        $out3,$out3,v25
3275         vncipher        $out4,$out4,v25
3276         vncipher        $out5,$out5,v25
3277         lvx             v25,$x10,$key_          # round[4]
3278         bdnz            Loop_xts_dec6x
3279
3280         subic           $len,$len,96            # $len-=96
3281          vxor           $in0,$twk0,v31          # xor with last round key
3282         vncipher        $out0,$out0,v24
3283         vncipher        $out1,$out1,v24
3284          vsrab          $tmp,$tweak,$seven      # next tweak value
3285          vxor           $twk0,$tweak,$rndkey0
3286          vaddubm        $tweak,$tweak,$tweak
3287         vncipher        $out2,$out2,v24
3288         vncipher        $out3,$out3,v24
3289          vsldoi         $tmp,$tmp,$tmp,15
3290         vncipher        $out4,$out4,v24
3291         vncipher        $out5,$out5,v24
3292
3293         subfe.          r0,r0,r0                # borrow?-1:0
3294          vand           $tmp,$tmp,$eighty7
3295         vncipher        $out0,$out0,v25
3296         vncipher        $out1,$out1,v25
3297          vxor           $tweak,$tweak,$tmp
3298         vncipher        $out2,$out2,v25
3299         vncipher        $out3,$out3,v25
3300          vxor           $in1,$twk1,v31
3301          vsrab          $tmp,$tweak,$seven      # next tweak value
3302          vxor           $twk1,$tweak,$rndkey0
3303         vncipher        $out4,$out4,v25
3304         vncipher        $out5,$out5,v25
3305
3306         and             r0,r0,$len
3307          vaddubm        $tweak,$tweak,$tweak
3308          vsldoi         $tmp,$tmp,$tmp,15
3309         vncipher        $out0,$out0,v26
3310         vncipher        $out1,$out1,v26
3311          vand           $tmp,$tmp,$eighty7
3312         vncipher        $out2,$out2,v26
3313         vncipher        $out3,$out3,v26
3314          vxor           $tweak,$tweak,$tmp
3315         vncipher        $out4,$out4,v26
3316         vncipher        $out5,$out5,v26
3317
3318         add             $inp,$inp,r0            # $inp is adjusted in such
3319                                                 # way that at exit from the
3320                                                 # loop inX-in5 are loaded
3321                                                 # with last "words"
3322          vxor           $in2,$twk2,v31
3323          vsrab          $tmp,$tweak,$seven      # next tweak value
3324          vxor           $twk2,$tweak,$rndkey0
3325          vaddubm        $tweak,$tweak,$tweak
3326         vncipher        $out0,$out0,v27
3327         vncipher        $out1,$out1,v27
3328          vsldoi         $tmp,$tmp,$tmp,15
3329         vncipher        $out2,$out2,v27
3330         vncipher        $out3,$out3,v27
3331          vand           $tmp,$tmp,$eighty7
3332         vncipher        $out4,$out4,v27
3333         vncipher        $out5,$out5,v27
3334
3335         addi            $key_,$sp,$FRAME+15     # rewind $key_
3336          vxor           $tweak,$tweak,$tmp
3337         vncipher        $out0,$out0,v28
3338         vncipher        $out1,$out1,v28
3339          vxor           $in3,$twk3,v31
3340          vsrab          $tmp,$tweak,$seven      # next tweak value
3341          vxor           $twk3,$tweak,$rndkey0
3342         vncipher        $out2,$out2,v28
3343         vncipher        $out3,$out3,v28
3344          vaddubm        $tweak,$tweak,$tweak
3345          vsldoi         $tmp,$tmp,$tmp,15
3346         vncipher        $out4,$out4,v28
3347         vncipher        $out5,$out5,v28
3348         lvx             v24,$x00,$key_          # re-pre-load round[1]
3349          vand           $tmp,$tmp,$eighty7
3350
3351         vncipher        $out0,$out0,v29
3352         vncipher        $out1,$out1,v29
3353          vxor           $tweak,$tweak,$tmp
3354         vncipher        $out2,$out2,v29
3355         vncipher        $out3,$out3,v29
3356          vxor           $in4,$twk4,v31
3357          vsrab          $tmp,$tweak,$seven      # next tweak value
3358          vxor           $twk4,$tweak,$rndkey0
3359         vncipher        $out4,$out4,v29
3360         vncipher        $out5,$out5,v29
3361         lvx             v25,$x10,$key_          # re-pre-load round[2]
3362          vaddubm        $tweak,$tweak,$tweak
3363          vsldoi         $tmp,$tmp,$tmp,15
3364
3365         vncipher        $out0,$out0,v30
3366         vncipher        $out1,$out1,v30
3367          vand           $tmp,$tmp,$eighty7
3368         vncipher        $out2,$out2,v30
3369         vncipher        $out3,$out3,v30
3370          vxor           $tweak,$tweak,$tmp
3371         vncipher        $out4,$out4,v30
3372         vncipher        $out5,$out5,v30
3373          vxor           $in5,$twk5,v31
3374          vsrab          $tmp,$tweak,$seven      # next tweak value
3375          vxor           $twk5,$tweak,$rndkey0
3376
3377         vncipherlast    $out0,$out0,$in0
3378          lvx_u          $in0,$x00,$inp          # load next input block
3379          vaddubm        $tweak,$tweak,$tweak
3380          vsldoi         $tmp,$tmp,$tmp,15
3381         vncipherlast    $out1,$out1,$in1
3382          lvx_u          $in1,$x10,$inp
3383         vncipherlast    $out2,$out2,$in2
3384          le?vperm       $in0,$in0,$in0,$leperm
3385          lvx_u          $in2,$x20,$inp
3386          vand           $tmp,$tmp,$eighty7
3387         vncipherlast    $out3,$out3,$in3
3388          le?vperm       $in1,$in1,$in1,$leperm
3389          lvx_u          $in3,$x30,$inp
3390         vncipherlast    $out4,$out4,$in4
3391          le?vperm       $in2,$in2,$in2,$leperm
3392          lvx_u          $in4,$x40,$inp
3393          vxor           $tweak,$tweak,$tmp
3394         vncipherlast    $out5,$out5,$in5
3395          le?vperm       $in3,$in3,$in3,$leperm
3396          lvx_u          $in5,$x50,$inp
3397          addi           $inp,$inp,0x60
3398          le?vperm       $in4,$in4,$in4,$leperm
3399          le?vperm       $in5,$in5,$in5,$leperm
3400
3401         le?vperm        $out0,$out0,$out0,$leperm
3402         le?vperm        $out1,$out1,$out1,$leperm
3403         stvx_u          $out0,$x00,$out         # store output
3404          vxor           $out0,$in0,$twk0
3405         le?vperm        $out2,$out2,$out2,$leperm
3406         stvx_u          $out1,$x10,$out
3407          vxor           $out1,$in1,$twk1
3408         le?vperm        $out3,$out3,$out3,$leperm
3409         stvx_u          $out2,$x20,$out
3410          vxor           $out2,$in2,$twk2
3411         le?vperm        $out4,$out4,$out4,$leperm
3412         stvx_u          $out3,$x30,$out
3413          vxor           $out3,$in3,$twk3
3414         le?vperm        $out5,$out5,$out5,$leperm
3415         stvx_u          $out4,$x40,$out
3416          vxor           $out4,$in4,$twk4
3417         stvx_u          $out5,$x50,$out
3418          vxor           $out5,$in5,$twk5
3419         addi            $out,$out,0x60
3420
3421         mtctr           $rounds
3422         beq             Loop_xts_dec6x          # did $len-=96 borrow?
3423
3424         addic.          $len,$len,0x60
3425         beq             Lxts_dec6x_zero
3426         cmpwi           $len,0x20
3427         blt             Lxts_dec6x_one
3428         nop
3429         beq             Lxts_dec6x_two
3430         cmpwi           $len,0x40
3431         blt             Lxts_dec6x_three
3432         nop
3433         beq             Lxts_dec6x_four
3434
3435 Lxts_dec6x_five:
3436         vxor            $out0,$in1,$twk0
3437         vxor            $out1,$in2,$twk1
3438         vxor            $out2,$in3,$twk2
3439         vxor            $out3,$in4,$twk3
3440         vxor            $out4,$in5,$twk4
3441
3442         bl              _aesp8_xts_dec5x
3443
3444         le?vperm        $out0,$out0,$out0,$leperm
3445         vmr             $twk0,$twk5             # unused tweak
3446         vxor            $twk1,$tweak,$rndkey0
3447         le?vperm        $out1,$out1,$out1,$leperm
3448         stvx_u          $out0,$x00,$out         # store output
3449         vxor            $out0,$in0,$twk1
3450         le?vperm        $out2,$out2,$out2,$leperm
3451         stvx_u          $out1,$x10,$out
3452         le?vperm        $out3,$out3,$out3,$leperm
3453         stvx_u          $out2,$x20,$out
3454         le?vperm        $out4,$out4,$out4,$leperm
3455         stvx_u          $out3,$x30,$out
3456         stvx_u          $out4,$x40,$out
3457         addi            $out,$out,0x50
3458         bne             Lxts_dec6x_steal
3459         b               Lxts_dec6x_done
3460
3461 .align  4
3462 Lxts_dec6x_four:
3463         vxor            $out0,$in2,$twk0
3464         vxor            $out1,$in3,$twk1
3465         vxor            $out2,$in4,$twk2
3466         vxor            $out3,$in5,$twk3
3467         vxor            $out4,$out4,$out4
3468
3469         bl              _aesp8_xts_dec5x
3470
3471         le?vperm        $out0,$out0,$out0,$leperm
3472         vmr             $twk0,$twk4             # unused tweak
3473         vmr             $twk1,$twk5
3474         le?vperm        $out1,$out1,$out1,$leperm
3475         stvx_u          $out0,$x00,$out         # store output
3476         vxor            $out0,$in0,$twk5
3477         le?vperm        $out2,$out2,$out2,$leperm
3478         stvx_u          $out1,$x10,$out
3479         le?vperm        $out3,$out3,$out3,$leperm
3480         stvx_u          $out2,$x20,$out
3481         stvx_u          $out3,$x30,$out
3482         addi            $out,$out,0x40
3483         bne             Lxts_dec6x_steal
3484         b               Lxts_dec6x_done
3485
3486 .align  4
3487 Lxts_dec6x_three:
3488         vxor            $out0,$in3,$twk0
3489         vxor            $out1,$in4,$twk1
3490         vxor            $out2,$in5,$twk2
3491         vxor            $out3,$out3,$out3
3492         vxor            $out4,$out4,$out4
3493
3494         bl              _aesp8_xts_dec5x
3495
3496         le?vperm        $out0,$out0,$out0,$leperm
3497         vmr             $twk0,$twk3             # unused tweak
3498         vmr             $twk1,$twk4
3499         le?vperm        $out1,$out1,$out1,$leperm
3500         stvx_u          $out0,$x00,$out         # store output
3501         vxor            $out0,$in0,$twk4
3502         le?vperm        $out2,$out2,$out2,$leperm
3503         stvx_u          $out1,$x10,$out
3504         stvx_u          $out2,$x20,$out
3505         addi            $out,$out,0x30
3506         bne             Lxts_dec6x_steal
3507         b               Lxts_dec6x_done
3508
3509 .align  4
3510 Lxts_dec6x_two:
3511         vxor            $out0,$in4,$twk0
3512         vxor            $out1,$in5,$twk1
3513         vxor            $out2,$out2,$out2
3514         vxor            $out3,$out3,$out3
3515         vxor            $out4,$out4,$out4
3516
3517         bl              _aesp8_xts_dec5x
3518
3519         le?vperm        $out0,$out0,$out0,$leperm
3520         vmr             $twk0,$twk2             # unused tweak
3521         vmr             $twk1,$twk3
3522         le?vperm        $out1,$out1,$out1,$leperm
3523         stvx_u          $out0,$x00,$out         # store output
3524         vxor            $out0,$in0,$twk3
3525         stvx_u          $out1,$x10,$out
3526         addi            $out,$out,0x20
3527         bne             Lxts_dec6x_steal
3528         b               Lxts_dec6x_done
3529
3530 .align  4
3531 Lxts_dec6x_one:
3532         vxor            $out0,$in5,$twk0
3533         nop
3534 Loop_xts_dec1x:
3535         vncipher        $out0,$out0,v24
3536         lvx             v24,$x20,$key_          # round[3]
3537         addi            $key_,$key_,0x20
3538
3539         vncipher        $out0,$out0,v25
3540         lvx             v25,$x10,$key_          # round[4]
3541         bdnz            Loop_xts_dec1x
3542
3543         subi            r0,$taillen,1
3544         vncipher        $out0,$out0,v24
3545
3546         andi.           r0,r0,16
3547         cmpwi           $taillen,0
3548         vncipher        $out0,$out0,v25
3549
3550         sub             $inp,$inp,r0
3551         vncipher        $out0,$out0,v26
3552
3553         lvx_u           $in0,0,$inp
3554         vncipher        $out0,$out0,v27
3555
3556         addi            $key_,$sp,$FRAME+15     # rewind $key_
3557         vncipher        $out0,$out0,v28
3558         lvx             v24,$x00,$key_          # re-pre-load round[1]
3559
3560         vncipher        $out0,$out0,v29
3561         lvx             v25,$x10,$key_          # re-pre-load round[2]
3562          vxor           $twk0,$twk0,v31
3563
3564         le?vperm        $in0,$in0,$in0,$leperm
3565         vncipher        $out0,$out0,v30
3566
3567         mtctr           $rounds
3568         vncipherlast    $out0,$out0,$twk0
3569
3570         vmr             $twk0,$twk1             # unused tweak
3571         vmr             $twk1,$twk2
3572         le?vperm        $out0,$out0,$out0,$leperm
3573         stvx_u          $out0,$x00,$out         # store output
3574         addi            $out,$out,0x10
3575         vxor            $out0,$in0,$twk2
3576         bne             Lxts_dec6x_steal
3577         b               Lxts_dec6x_done
3578
3579 .align  4
3580 Lxts_dec6x_zero:
3581         cmpwi           $taillen,0
3582         beq             Lxts_dec6x_done
3583
3584         lvx_u           $in0,0,$inp
3585         le?vperm        $in0,$in0,$in0,$leperm
3586         vxor            $out0,$in0,$twk1
3587 Lxts_dec6x_steal:
3588         vncipher        $out0,$out0,v24
3589         lvx             v24,$x20,$key_          # round[3]
3590         addi            $key_,$key_,0x20
3591
3592         vncipher        $out0,$out0,v25
3593         lvx             v25,$x10,$key_          # round[4]
3594         bdnz            Lxts_dec6x_steal
3595
3596         add             $inp,$inp,$taillen
3597         vncipher        $out0,$out0,v24
3598
3599         cmpwi           $taillen,0
3600         vncipher        $out0,$out0,v25
3601
3602         lvx_u           $in0,0,$inp
3603         vncipher        $out0,$out0,v26
3604
3605         lvsr            $inpperm,0,$taillen     # $in5 is no more
3606         vncipher        $out0,$out0,v27
3607
3608         addi            $key_,$sp,$FRAME+15     # rewind $key_
3609         vncipher        $out0,$out0,v28
3610         lvx             v24,$x00,$key_          # re-pre-load round[1]
3611
3612         vncipher        $out0,$out0,v29
3613         lvx             v25,$x10,$key_          # re-pre-load round[2]
3614          vxor           $twk1,$twk1,v31
3615
3616         le?vperm        $in0,$in0,$in0,$leperm
3617         vncipher        $out0,$out0,v30
3618
3619         vperm           $in0,$in0,$in0,$inpperm
3620         vncipherlast    $tmp,$out0,$twk1
3621
3622         le?vperm        $out0,$tmp,$tmp,$leperm
3623         le?stvx_u       $out0,0,$out
3624         be?stvx_u       $tmp,0,$out
3625
3626         vxor            $out0,$out0,$out0
3627         vspltisb        $out1,-1
3628         vperm           $out0,$out0,$out1,$inpperm
3629         vsel            $out0,$in0,$tmp,$out0
3630         vxor            $out0,$out0,$twk0
3631
3632         subi            r30,$out,1
3633         mtctr           $taillen
3634 Loop_xts_dec6x_steal:
3635         lbzu            r0,1(r30)
3636         stb             r0,16(r30)
3637         bdnz            Loop_xts_dec6x_steal
3638
3639         li              $taillen,0
3640         mtctr           $rounds
3641         b               Loop_xts_dec1x          # one more time...
3642
3643 .align  4
3644 Lxts_dec6x_done:
3645         ${UCMP}i        $ivp,0
3646         beq             Lxts_dec6x_ret
3647
3648         vxor            $tweak,$twk0,$rndkey0
3649         le?vperm        $tweak,$tweak,$tweak,$leperm
3650         stvx_u          $tweak,0,$ivp
3651
3652 Lxts_dec6x_ret:
3653         mtlr            r11
3654         li              r10,`$FRAME+15`
3655         li              r11,`$FRAME+31`
3656         stvx            $seven,r10,$sp          # wipe copies of round keys
3657         addi            r10,r10,32
3658         stvx            $seven,r11,$sp
3659         addi            r11,r11,32
3660         stvx            $seven,r10,$sp
3661         addi            r10,r10,32
3662         stvx            $seven,r11,$sp
3663         addi            r11,r11,32
3664         stvx            $seven,r10,$sp
3665         addi            r10,r10,32
3666         stvx            $seven,r11,$sp
3667         addi            r11,r11,32
3668         stvx            $seven,r10,$sp
3669         addi            r10,r10,32
3670         stvx            $seven,r11,$sp
3671         addi            r11,r11,32
3672
3673         mtspr           256,$vrsave
3674         lvx             v20,r10,$sp             # ABI says so
3675         addi            r10,r10,32
3676         lvx             v21,r11,$sp
3677         addi            r11,r11,32
3678         lvx             v22,r10,$sp
3679         addi            r10,r10,32
3680         lvx             v23,r11,$sp
3681         addi            r11,r11,32
3682         lvx             v24,r10,$sp
3683         addi            r10,r10,32
3684         lvx             v25,r11,$sp
3685         addi            r11,r11,32
3686         lvx             v26,r10,$sp
3687         addi            r10,r10,32
3688         lvx             v27,r11,$sp
3689         addi            r11,r11,32
3690         lvx             v28,r10,$sp
3691         addi            r10,r10,32
3692         lvx             v29,r11,$sp
3693         addi            r11,r11,32
3694         lvx             v30,r10,$sp
3695         lvx             v31,r11,$sp
3696         $POP            r26,`$FRAME+21*16+0*$SIZE_T`($sp)
3697         $POP            r27,`$FRAME+21*16+1*$SIZE_T`($sp)
3698         $POP            r28,`$FRAME+21*16+2*$SIZE_T`($sp)
3699         $POP            r29,`$FRAME+21*16+3*$SIZE_T`($sp)
3700         $POP            r30,`$FRAME+21*16+4*$SIZE_T`($sp)
3701         $POP            r31,`$FRAME+21*16+5*$SIZE_T`($sp)
3702         addi            $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
3703         blr
3704         .long           0
3705         .byte           0,12,0x04,1,0x80,6,6,0
3706         .long           0
3707
3708 .align  5
3709 _aesp8_xts_dec5x:
3710         vncipher        $out0,$out0,v24
3711         vncipher        $out1,$out1,v24
3712         vncipher        $out2,$out2,v24
3713         vncipher        $out3,$out3,v24
3714         vncipher        $out4,$out4,v24
3715         lvx             v24,$x20,$key_          # round[3]
3716         addi            $key_,$key_,0x20
3717
3718         vncipher        $out0,$out0,v25
3719         vncipher        $out1,$out1,v25
3720         vncipher        $out2,$out2,v25
3721         vncipher        $out3,$out3,v25
3722         vncipher        $out4,$out4,v25
3723         lvx             v25,$x10,$key_          # round[4]
3724         bdnz            _aesp8_xts_dec5x
3725
3726         subi            r0,$taillen,1
3727         vncipher        $out0,$out0,v24
3728         vncipher        $out1,$out1,v24
3729         vncipher        $out2,$out2,v24
3730         vncipher        $out3,$out3,v24
3731         vncipher        $out4,$out4,v24
3732
3733         andi.           r0,r0,16
3734         cmpwi           $taillen,0
3735         vncipher        $out0,$out0,v25
3736         vncipher        $out1,$out1,v25
3737         vncipher        $out2,$out2,v25
3738         vncipher        $out3,$out3,v25
3739         vncipher        $out4,$out4,v25
3740          vxor           $twk0,$twk0,v31
3741
3742         sub             $inp,$inp,r0
3743         vncipher        $out0,$out0,v26
3744         vncipher        $out1,$out1,v26
3745         vncipher        $out2,$out2,v26
3746         vncipher        $out3,$out3,v26
3747         vncipher        $out4,$out4,v26
3748          vxor           $in1,$twk1,v31
3749
3750         vncipher        $out0,$out0,v27
3751         lvx_u           $in0,0,$inp
3752         vncipher        $out1,$out1,v27
3753         vncipher        $out2,$out2,v27
3754         vncipher        $out3,$out3,v27
3755         vncipher        $out4,$out4,v27
3756          vxor           $in2,$twk2,v31
3757
3758         addi            $key_,$sp,$FRAME+15     # rewind $key_
3759         vncipher        $out0,$out0,v28
3760         vncipher        $out1,$out1,v28
3761         vncipher        $out2,$out2,v28
3762         vncipher        $out3,$out3,v28
3763         vncipher        $out4,$out4,v28
3764         lvx             v24,$x00,$key_          # re-pre-load round[1]
3765          vxor           $in3,$twk3,v31
3766
3767         vncipher        $out0,$out0,v29
3768         le?vperm        $in0,$in0,$in0,$leperm
3769         vncipher        $out1,$out1,v29
3770         vncipher        $out2,$out2,v29
3771         vncipher        $out3,$out3,v29
3772         vncipher        $out4,$out4,v29
3773         lvx             v25,$x10,$key_          # re-pre-load round[2]
3774          vxor           $in4,$twk4,v31
3775
3776         vncipher        $out0,$out0,v30
3777         vncipher        $out1,$out1,v30
3778         vncipher        $out2,$out2,v30
3779         vncipher        $out3,$out3,v30
3780         vncipher        $out4,$out4,v30
3781
3782         vncipherlast    $out0,$out0,$twk0
3783         vncipherlast    $out1,$out1,$in1
3784         vncipherlast    $out2,$out2,$in2
3785         vncipherlast    $out3,$out3,$in3
3786         vncipherlast    $out4,$out4,$in4
3787         mtctr           $rounds
3788         blr
3789         .long           0
3790         .byte           0,12,0x14,0,0,0,0,0
3791 ___
3792 }}      }}}
3793
3794 my $consts=1;
3795 foreach(split("\n",$code)) {
3796         s/\`([^\`]*)\`/eval($1)/geo;
3797
3798         # constants table endian-specific conversion
3799         if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) {
3800             my $conv=$3;
3801             my @bytes=();
3802
3803             # convert to endian-agnostic format
3804             if ($1 eq "long") {
3805               foreach (split(/,\s*/,$2)) {
3806                 my $l = /^0/?oct:int;
3807                 push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff;
3808               }
3809             } else {
3810                 @bytes = map(/^0/?oct:int,split(/,\s*/,$2));
3811             }
3812
3813             # little-endian conversion
3814             if ($flavour =~ /le$/o) {
3815                 SWITCH: for($conv)  {
3816                     /\?inv/ && do   { @bytes=map($_^0xf,@bytes); last; };
3817                     /\?rev/ && do   { @bytes=reverse(@bytes);    last; };
3818                 }
3819             }
3820
3821             #emit
3822             print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n";
3823             next;
3824         }
3825         $consts=0 if (m/Lconsts:/o);    # end of table
3826
3827         # instructions prefixed with '?' are endian-specific and need
3828         # to be adjusted accordingly...
3829         if ($flavour =~ /le$/o) {       # little-endian
3830             s/le\?//o           or
3831             s/be\?/#be#/o       or
3832             s/\?lvsr/lvsl/o     or
3833             s/\?lvsl/lvsr/o     or
3834             s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or
3835             s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or
3836             s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o;
3837         } else {                        # big-endian
3838             s/le\?/#le#/o       or
3839             s/be\?//o           or
3840             s/\?([a-z]+)/$1/o;
3841         }
3842
3843         print $_,"\n";
3844 }
3845
3846 close STDOUT;