3 # ====================================================================
4 # Written by David S. Miller <davem@devemloft.net> and Andy Polyakov
5 # <appro@openssl.org>. The module is licensed under 2-clause BSD
6 # license. October 2012. All rights reserved.
7 # ====================================================================
9 ######################################################################
12 # AES round instructions complete in 3 cycles and can be issued every
13 # cycle. It means that round calculations should take 4*rounds cycles,
14 # because any given round instruction depends on result of *both*
15 # previous instructions:
23 # Provided that fxor [with IV] takes 3 cycles to complete, critical
24 # path length for CBC encrypt would be 3+4*rounds, or in other words
25 # it should process one byte in at least (3+4*rounds)/16 cycles. This
26 # estimate doesn't account for "collateral" instructions, such as
27 # fetching input from memory, xor-ing it with zero-round key and
28 # storing the result. Yet, *measured* performance [for data aligned
29 # at 64-bit boundary!] deviates from this equation by less than 0.5%:
31 # 128-bit key 192- 256-
32 # CBC encrypt 2.70/2.90(*) 3.20/3.40 3.70/3.90
33 # (*) numbers after slash are for
36 # Out-of-order execution logic managed to fully overlap "collateral"
37 # instructions with those on critical path. Amazing!
39 # As with Intel AES-NI, question is if it's possible to improve
40 # performance of parallelizeable modes by interleaving round
41 # instructions. Provided round instruction latency and throughput
42 # optimal interleave factor is 2. But can we expect 2x performance
43 # improvement? Well, as round instructions can be issued one per
44 # cycle, they don't saturate the 2-way issue pipeline and therefore
45 # there is room for "collateral" calculations... Yet, 2x speed-up
46 # over CBC encrypt remains unattaintable:
48 # 128-bit key 192- 256-
49 # CBC decrypt 1.64/2.11 1.89/2.37 2.23/2.61
50 # CTR 1.64/2.08(*) 1.89/2.33 2.23/2.61
51 # (*) numbers after slash are for
54 # Estimates based on amount of instructions under assumption that
55 # round instructions are not pairable with any other instruction
56 # suggest that latter is the actual case and pipeline runs
57 # underutilized. It should be noted that T4 out-of-order execution
58 # logic is so capable that performance gain from 2x interleave is
59 # not even impressive, ~7-13% over non-interleaved code, largest
62 # To anchor to something else, software implementation processes
63 # one byte in 29 cycles with 128-bit key on same processor. Intel
64 # Sandy Bridge encrypts byte in 5.07 cycles in CBC mode and decrypts
65 # in 0.93, naturally with AES-NI.
67 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
68 push(@INC,"${dir}","${dir}../../perlasm");
69 require "sparcv9_modes.pl";
73 $::evp=1; # if $evp is set to 0, script generates module with
74 # AES_[en|de]crypt, AES_set_[en|de]crypt_key and AES_cbc_encrypt entry
75 # points. These however are not fully compatible with openssl/aes.h,
76 # because they expect AES_KEY to be aligned at 64-bit boundary. When
77 # used through EVP, alignment is arranged at EVP layer. Second thing
78 # that is arranged by EVP is at least 32-bit alignment of IV.
80 ######################################################################
81 # single-round subroutines
84 my ($inp,$out,$key,$rounds,$tmp,$mask)=map("%o$_",(0..5));
86 $code.=<<___ if ($::abibits==64);
87 .register %g2,#scratch
88 .register %g3,#scratch
97 andcc $inp, 7, %g1 ! is input aligned?
106 ldx [$inp + 16], $inp
116 ld [$key + 240], $rounds
117 ldd [$key + 16], %f12
118 ldd [$key + 24], %f14
123 srl $rounds, 1, $rounds
124 ldd [$key + 32], %f16
125 sub $rounds, 1, $rounds
126 ldd [$key + 40], %f18
130 aes_eround01 %f12, %f0, %f2, %f4
131 aes_eround23 %f14, %f0, %f2, %f2
134 sub $rounds,1,$rounds
135 aes_eround01 %f16, %f4, %f2, %f0
136 aes_eround23 %f18, %f4, %f2, %f2
137 ldd [$key + 16], %f16
138 ldd [$key + 24], %f18
139 brnz,pt $rounds, .Lenc
142 andcc $out, 7, $tmp ! is output aligned?
143 aes_eround01 %f12, %f0, %f2, %f4
144 aes_eround23 %f14, %f0, %f2, %f2
145 aes_eround01_l %f16, %f4, %f2, %f0
146 aes_eround23_l %f18, %f4, %f2, %f2
155 2: alignaddrl $out, %g0, $out
157 srl $mask, $tmp, $mask
159 faligndata %f0, %f0, %f4
160 faligndata %f0, %f2, %f6
161 faligndata %f2, %f2, %f8
163 stda %f4, [$out + $mask]0xc0 ! partial store
166 orn %g0, $mask, $mask
168 stda %f8, [$out + $mask]0xc0 ! partial store
169 .type aes_t4_encrypt,#function
170 .size aes_t4_encrypt,.-aes_t4_encrypt
172 .globl aes_t4_decrypt
175 andcc $inp, 7, %g1 ! is input aligned?
184 ldx [$inp + 16], $inp
194 ld [$key + 240], $rounds
195 ldd [$key + 16], %f12
196 ldd [$key + 24], %f14
201 srl $rounds, 1, $rounds
202 ldd [$key + 32], %f16
203 sub $rounds, 1, $rounds
204 ldd [$key + 40], %f18
208 aes_dround01 %f12, %f0, %f2, %f4
209 aes_dround23 %f14, %f0, %f2, %f2
212 sub $rounds,1,$rounds
213 aes_dround01 %f16, %f4, %f2, %f0
214 aes_dround23 %f18, %f4, %f2, %f2
215 ldd [$key + 16], %f16
216 ldd [$key + 24], %f18
217 brnz,pt $rounds, .Ldec
220 andcc $out, 7, $tmp ! is output aligned?
221 aes_dround01 %f12, %f0, %f2, %f4
222 aes_dround23 %f14, %f0, %f2, %f2
223 aes_dround01_l %f16, %f4, %f2, %f0
224 aes_dround23_l %f18, %f4, %f2, %f2
233 2: alignaddrl $out, %g0, $out
235 srl $mask, $tmp, $mask
237 faligndata %f0, %f0, %f4
238 faligndata %f0, %f2, %f6
239 faligndata %f2, %f2, %f8
241 stda %f4, [$out + $mask]0xc0 ! partial store
244 orn %g0, $mask, $mask
246 stda %f8, [$out + $mask]0xc0 ! partial store
247 .type aes_t4_decrypt,#function
248 .size aes_t4_decrypt,.-aes_t4_decrypt
252 ######################################################################
253 # key setup subroutines
256 my ($inp,$bits,$out,$tmp)=map("%o$_",(0..5));
258 .globl aes_t4_set_encrypt_key
260 aes_t4_set_encrypt_key:
263 alignaddr $inp, %g0, $inp
271 brz,pt $tmp, .L256aligned
275 faligndata %f0, %f2, %f0
276 faligndata %f2, %f4, %f2
277 faligndata %f4, %f6, %f4
278 faligndata %f6, %f8, %f6
281 for ($i=0; $i<6; $i++) {
283 std %f0, [$out + `32*$i+0`]
284 aes_kexpand1 %f0, %f6, $i, %f0
285 std %f2, [$out + `32*$i+8`]
286 aes_kexpand2 %f2, %f0, %f2
287 std %f4, [$out + `32*$i+16`]
288 aes_kexpand0 %f4, %f2, %f4
289 std %f6, [$out + `32*$i+24`]
290 aes_kexpand2 %f6, %f4, %f6
294 std %f0, [$out + `32*$i+0`]
295 aes_kexpand1 %f0, %f6, $i, %f0
296 std %f2, [$out + `32*$i+8`]
297 aes_kexpand2 %f2, %f0, %f2
298 std %f4, [$out + `32*$i+16`]
299 std %f6, [$out + `32*$i+24`]
300 std %f0, [$out + `32*$i+32`]
301 std %f2, [$out + `32*$i+40`]
304 st $tmp, [$out + 240]
310 brz,pt $tmp, .L192aligned
314 faligndata %f0, %f2, %f0
315 faligndata %f2, %f4, %f2
316 faligndata %f4, %f6, %f4
319 for ($i=0; $i<7; $i++) {
321 std %f0, [$out + `24*$i+0`]
322 aes_kexpand1 %f0, %f4, $i, %f0
323 std %f2, [$out + `24*$i+8`]
324 aes_kexpand2 %f2, %f0, %f2
325 std %f4, [$out + `24*$i+16`]
326 aes_kexpand2 %f4, %f2, %f4
330 std %f0, [$out + `24*$i+0`]
331 aes_kexpand1 %f0, %f4, $i, %f0
332 std %f2, [$out + `24*$i+8`]
333 aes_kexpand2 %f2, %f0, %f2
334 std %f4, [$out + `24*$i+16`]
335 std %f0, [$out + `24*$i+24`]
336 std %f2, [$out + `24*$i+32`]
339 st $tmp, [$out + 240]
345 brz,pt $tmp, .L128aligned
349 faligndata %f0, %f2, %f0
350 faligndata %f2, %f4, %f2
353 for ($i=0; $i<10; $i++) {
355 std %f0, [$out + `16*$i+0`]
356 aes_kexpand1 %f0, %f2, $i, %f0
357 std %f2, [$out + `16*$i+8`]
358 aes_kexpand2 %f2, %f0, %f2
362 std %f0, [$out + `16*$i+0`]
363 std %f2, [$out + `16*$i+8`]
366 st $tmp, [$out + 240]
369 .type aes_t4_set_encrypt_key,#function
370 .size aes_t4_set_encrypt_key,.-aes_t4_set_encrypt_key
372 .globl aes_t4_set_decrypt_key
374 aes_t4_set_decrypt_key:
376 call .Lset_encrypt_key
380 sll $tmp, 4, $inp ! $tmp is number of rounds
382 add $out, $inp, $inp ! $inp=$out+16*rounds
383 srl $tmp, 2, $tmp ! $tmp=(rounds+2)/4
392 ldd [$inp - 16], %f12
401 std %f12, [$out + 16]
402 std %f14, [$out + 24]
404 brnz $tmp, .Lkey_flip
409 .type aes_t4_set_decrypt_key,#function
410 .size aes_t4_set_decrypt_key,.-aes_t4_set_decrypt_key
415 my ($inp,$out,$len,$key,$ivec,$enc)=map("%i$_",(0..5));
416 my ($ileft,$iright,$ooff,$omask,$ivoff)=map("%l$_",(1..7));
422 for ($i=0; $i<4; $i++) {
424 aes_eround01 %f`16+8*$i+0`, %f0, %f2, %f4
425 aes_eround23 %f`16+8*$i+2`, %f0, %f2, %f2
426 aes_eround01 %f`16+8*$i+4`, %f4, %f2, %f0
427 aes_eround23 %f`16+8*$i+6`, %f4, %f2, %f2
431 aes_eround01 %f48, %f0, %f2, %f4
432 aes_eround23 %f50, %f0, %f2, %f2
433 aes_eround01_l %f52, %f4, %f2, %f0
435 aes_eround23_l %f54, %f4, %f2, %f2
436 .type _aes128_encrypt_1x,#function
437 .size _aes128_encrypt_1x,.-_aes128_encrypt_1x
442 for ($i=0; $i<4; $i++) {
444 aes_eround01 %f`16+8*$i+0`, %f0, %f2, %f8
445 aes_eround23 %f`16+8*$i+2`, %f0, %f2, %f2
446 aes_eround01 %f`16+8*$i+0`, %f4, %f6, %f10
447 aes_eround23 %f`16+8*$i+2`, %f4, %f6, %f6
448 aes_eround01 %f`16+8*$i+4`, %f8, %f2, %f0
449 aes_eround23 %f`16+8*$i+6`, %f8, %f2, %f2
450 aes_eround01 %f`16+8*$i+4`, %f10, %f6, %f4
451 aes_eround23 %f`16+8*$i+6`, %f10, %f6, %f6
455 aes_eround01 %f48, %f0, %f2, %f8
456 aes_eround23 %f50, %f0, %f2, %f2
457 aes_eround01 %f48, %f4, %f6, %f10
458 aes_eround23 %f50, %f4, %f6, %f6
459 aes_eround01_l %f52, %f8, %f2, %f0
460 aes_eround23_l %f54, %f8, %f2, %f2
461 aes_eround01_l %f52, %f10, %f6, %f4
463 aes_eround23_l %f54, %f10, %f6, %f6
464 .type _aes128_encrypt_2x,#function
465 .size _aes128_encrypt_2x,.-_aes128_encrypt_2x
472 for ($i=2; $i<22;$i++) { # load key schedule
474 ldd [$key + `8*$i`], %f`12+2*$i`
480 .type _aes128_loadkey,#function
481 .size _aes128_loadkey,.-_aes128_loadkey
482 _aes128_load_enckey=_aes128_loadkey
483 _aes128_load_deckey=_aes128_loadkey
487 &alg_cbc_encrypt_implement("aes",128);
489 &alg_ctr32_implement("aes",128);
490 &alg_xts_implement("aes",128,"en");
491 &alg_xts_implement("aes",128,"de");
493 &alg_cbc_decrypt_implement("aes",128);
499 for ($i=0; $i<4; $i++) {
501 aes_dround01 %f`16+8*$i+0`, %f0, %f2, %f4
502 aes_dround23 %f`16+8*$i+2`, %f0, %f2, %f2
503 aes_dround01 %f`16+8*$i+4`, %f4, %f2, %f0
504 aes_dround23 %f`16+8*$i+6`, %f4, %f2, %f2
508 aes_dround01 %f48, %f0, %f2, %f4
509 aes_dround23 %f50, %f0, %f2, %f2
510 aes_dround01_l %f52, %f4, %f2, %f0
512 aes_dround23_l %f54, %f4, %f2, %f2
513 .type _aes128_decrypt_1x,#function
514 .size _aes128_decrypt_1x,.-_aes128_decrypt_1x
519 for ($i=0; $i<4; $i++) {
521 aes_dround01 %f`16+8*$i+0`, %f0, %f2, %f8
522 aes_dround23 %f`16+8*$i+2`, %f0, %f2, %f2
523 aes_dround01 %f`16+8*$i+0`, %f4, %f6, %f10
524 aes_dround23 %f`16+8*$i+2`, %f4, %f6, %f6
525 aes_dround01 %f`16+8*$i+4`, %f8, %f2, %f0
526 aes_dround23 %f`16+8*$i+6`, %f8, %f2, %f2
527 aes_dround01 %f`16+8*$i+4`, %f10, %f6, %f4
528 aes_dround23 %f`16+8*$i+6`, %f10, %f6, %f6
532 aes_dround01 %f48, %f0, %f2, %f8
533 aes_dround23 %f50, %f0, %f2, %f2
534 aes_dround01 %f48, %f4, %f6, %f10
535 aes_dround23 %f50, %f4, %f6, %f6
536 aes_dround01_l %f52, %f8, %f2, %f0
537 aes_dround23_l %f54, %f8, %f2, %f2
538 aes_dround01_l %f52, %f10, %f6, %f4
540 aes_dround23_l %f54, %f10, %f6, %f6
541 .type _aes128_decrypt_2x,#function
542 .size _aes128_decrypt_2x,.-_aes128_decrypt_2x
549 for ($i=0; $i<5; $i++) {
551 aes_eround01 %f`16+8*$i+0`, %f0, %f2, %f4
552 aes_eround23 %f`16+8*$i+2`, %f0, %f2, %f2
553 aes_eround01 %f`16+8*$i+4`, %f4, %f2, %f0
554 aes_eround23 %f`16+8*$i+6`, %f4, %f2, %f2
558 aes_eround01 %f56, %f0, %f2, %f4
559 aes_eround23 %f58, %f0, %f2, %f2
560 aes_eround01_l %f60, %f4, %f2, %f0
562 aes_eround23_l %f62, %f4, %f2, %f2
563 .type _aes192_encrypt_1x,#function
564 .size _aes192_encrypt_1x,.-_aes192_encrypt_1x
569 for ($i=0; $i<5; $i++) {
571 aes_eround01 %f`16+8*$i+0`, %f0, %f2, %f8
572 aes_eround23 %f`16+8*$i+2`, %f0, %f2, %f2
573 aes_eround01 %f`16+8*$i+0`, %f4, %f6, %f10
574 aes_eround23 %f`16+8*$i+2`, %f4, %f6, %f6
575 aes_eround01 %f`16+8*$i+4`, %f8, %f2, %f0
576 aes_eround23 %f`16+8*$i+6`, %f8, %f2, %f2
577 aes_eround01 %f`16+8*$i+4`, %f10, %f6, %f4
578 aes_eround23 %f`16+8*$i+6`, %f10, %f6, %f6
582 aes_eround01 %f56, %f0, %f2, %f8
583 aes_eround23 %f58, %f0, %f2, %f2
584 aes_eround01 %f56, %f4, %f6, %f10
585 aes_eround23 %f58, %f4, %f6, %f6
586 aes_eround01_l %f60, %f8, %f2, %f0
587 aes_eround23_l %f62, %f8, %f2, %f2
588 aes_eround01_l %f60, %f10, %f6, %f4
590 aes_eround23_l %f62, %f10, %f6, %f6
591 .type _aes192_encrypt_2x,#function
592 .size _aes192_encrypt_2x,.-_aes192_encrypt_2x
596 aes_eround01 %f16, %f0, %f2, %f4
597 aes_eround23 %f18, %f0, %f2, %f2
598 ldd [$key + 208], %f16
599 ldd [$key + 216], %f18
600 aes_eround01 %f20, %f4, %f2, %f0
601 aes_eround23 %f22, %f4, %f2, %f2
602 ldd [$key + 224], %f20
603 ldd [$key + 232], %f22
605 for ($i=1; $i<6; $i++) {
607 aes_eround01 %f`16+8*$i+0`, %f0, %f2, %f4
608 aes_eround23 %f`16+8*$i+2`, %f0, %f2, %f2
609 aes_eround01 %f`16+8*$i+4`, %f4, %f2, %f0
610 aes_eround23 %f`16+8*$i+6`, %f4, %f2, %f2
614 aes_eround01 %f16, %f0, %f2, %f4
615 aes_eround23 %f18, %f0, %f2, %f2
616 ldd [$key + 16], %f16
617 ldd [$key + 24], %f18
618 aes_eround01_l %f20, %f4, %f2, %f0
619 aes_eround23_l %f22, %f4, %f2, %f2
620 ldd [$key + 32], %f20
622 ldd [$key + 40], %f22
623 .type _aes256_encrypt_1x,#function
624 .size _aes256_encrypt_1x,.-_aes256_encrypt_1x
628 aes_eround01 %f16, %f0, %f2, %f8
629 aes_eround23 %f18, %f0, %f2, %f2
630 aes_eround01 %f16, %f4, %f6, %f10
631 aes_eround23 %f18, %f4, %f6, %f6
632 ldd [$key + 208], %f16
633 ldd [$key + 216], %f18
634 aes_eround01 %f20, %f8, %f2, %f0
635 aes_eround23 %f22, %f8, %f2, %f2
636 aes_eround01 %f20, %f10, %f6, %f4
637 aes_eround23 %f22, %f10, %f6, %f6
638 ldd [$key + 224], %f20
639 ldd [$key + 232], %f22
641 for ($i=1; $i<6; $i++) {
643 aes_eround01 %f`16+8*$i+0`, %f0, %f2, %f8
644 aes_eround23 %f`16+8*$i+2`, %f0, %f2, %f2
645 aes_eround01 %f`16+8*$i+0`, %f4, %f6, %f10
646 aes_eround23 %f`16+8*$i+2`, %f4, %f6, %f6
647 aes_eround01 %f`16+8*$i+4`, %f8, %f2, %f0
648 aes_eround23 %f`16+8*$i+6`, %f8, %f2, %f2
649 aes_eround01 %f`16+8*$i+4`, %f10, %f6, %f4
650 aes_eround23 %f`16+8*$i+6`, %f10, %f6, %f6
654 aes_eround01 %f16, %f0, %f2, %f8
655 aes_eround23 %f18, %f0, %f2, %f2
656 aes_eround01 %f16, %f4, %f6, %f10
657 aes_eround23 %f18, %f4, %f6, %f6
658 ldd [$key + 16], %f16
659 ldd [$key + 24], %f18
660 aes_eround01_l %f20, %f8, %f2, %f0
661 aes_eround23_l %f22, %f8, %f2, %f2
662 aes_eround01_l %f20, %f10, %f6, %f4
663 aes_eround23_l %f22, %f10, %f6, %f6
664 ldd [$key + 32], %f20
666 ldd [$key + 40], %f22
667 .type _aes256_encrypt_2x,#function
668 .size _aes256_encrypt_2x,.-_aes256_encrypt_2x
675 for ($i=2; $i<26;$i++) { # load key schedule
677 ldd [$key + `8*$i`], %f`12+2*$i`
683 .type _aes192_loadkey,#function
684 .size _aes192_loadkey,.-_aes192_loadkey
685 _aes256_loadkey=_aes192_loadkey
686 _aes192_load_enckey=_aes192_loadkey
687 _aes192_load_deckey=_aes192_loadkey
688 _aes256_load_enckey=_aes192_loadkey
689 _aes256_load_deckey=_aes192_loadkey
692 &alg_cbc_encrypt_implement("aes",256);
693 &alg_cbc_encrypt_implement("aes",192);
695 &alg_ctr32_implement("aes",256);
696 &alg_xts_implement("aes",256,"en");
697 &alg_xts_implement("aes",256,"de");
698 &alg_ctr32_implement("aes",192);
700 &alg_cbc_decrypt_implement("aes",192);
701 &alg_cbc_decrypt_implement("aes",256);
706 aes_dround01 %f16, %f0, %f2, %f4
707 aes_dround23 %f18, %f0, %f2, %f2
708 ldd [$key + 208], %f16
709 ldd [$key + 216], %f18
710 aes_dround01 %f20, %f4, %f2, %f0
711 aes_dround23 %f22, %f4, %f2, %f2
712 ldd [$key + 224], %f20
713 ldd [$key + 232], %f22
715 for ($i=1; $i<6; $i++) {
717 aes_dround01 %f`16+8*$i+0`, %f0, %f2, %f4
718 aes_dround23 %f`16+8*$i+2`, %f0, %f2, %f2
719 aes_dround01 %f`16+8*$i+4`, %f4, %f2, %f0
720 aes_dround23 %f`16+8*$i+6`, %f4, %f2, %f2
724 aes_dround01 %f16, %f0, %f2, %f4
725 aes_dround23 %f18, %f0, %f2, %f2
726 ldd [$key + 16], %f16
727 ldd [$key + 24], %f18
728 aes_dround01_l %f20, %f4, %f2, %f0
729 aes_dround23_l %f22, %f4, %f2, %f2
730 ldd [$key + 32], %f20
732 ldd [$key + 40], %f22
733 .type _aes256_decrypt_1x,#function
734 .size _aes256_decrypt_1x,.-_aes256_decrypt_1x
738 aes_dround01 %f16, %f0, %f2, %f8
739 aes_dround23 %f18, %f0, %f2, %f2
740 aes_dround01 %f16, %f4, %f6, %f10
741 aes_dround23 %f18, %f4, %f6, %f6
742 ldd [$key + 208], %f16
743 ldd [$key + 216], %f18
744 aes_dround01 %f20, %f8, %f2, %f0
745 aes_dround23 %f22, %f8, %f2, %f2
746 aes_dround01 %f20, %f10, %f6, %f4
747 aes_dround23 %f22, %f10, %f6, %f6
748 ldd [$key + 224], %f20
749 ldd [$key + 232], %f22
751 for ($i=1; $i<6; $i++) {
753 aes_dround01 %f`16+8*$i+0`, %f0, %f2, %f8
754 aes_dround23 %f`16+8*$i+2`, %f0, %f2, %f2
755 aes_dround01 %f`16+8*$i+0`, %f4, %f6, %f10
756 aes_dround23 %f`16+8*$i+2`, %f4, %f6, %f6
757 aes_dround01 %f`16+8*$i+4`, %f8, %f2, %f0
758 aes_dround23 %f`16+8*$i+6`, %f8, %f2, %f2
759 aes_dround01 %f`16+8*$i+4`, %f10, %f6, %f4
760 aes_dround23 %f`16+8*$i+6`, %f10, %f6, %f6
764 aes_dround01 %f16, %f0, %f2, %f8
765 aes_dround23 %f18, %f0, %f2, %f2
766 aes_dround01 %f16, %f4, %f6, %f10
767 aes_dround23 %f18, %f4, %f6, %f6
768 ldd [$key + 16], %f16
769 ldd [$key + 24], %f18
770 aes_dround01_l %f20, %f8, %f2, %f0
771 aes_dround23_l %f22, %f8, %f2, %f2
772 aes_dround01_l %f20, %f10, %f6, %f4
773 aes_dround23_l %f22, %f10, %f6, %f6
774 ldd [$key + 32], %f20
776 ldd [$key + 40], %f22
777 .type _aes256_decrypt_2x,#function
778 .size _aes256_decrypt_2x,.-_aes256_decrypt_2x
783 for ($i=0; $i<5; $i++) {
785 aes_dround01 %f`16+8*$i+0`, %f0, %f2, %f4
786 aes_dround23 %f`16+8*$i+2`, %f0, %f2, %f2
787 aes_dround01 %f`16+8*$i+4`, %f4, %f2, %f0
788 aes_dround23 %f`16+8*$i+6`, %f4, %f2, %f2
792 aes_dround01 %f56, %f0, %f2, %f4
793 aes_dround23 %f58, %f0, %f2, %f2
794 aes_dround01_l %f60, %f4, %f2, %f0
796 aes_dround23_l %f62, %f4, %f2, %f2
797 .type _aes192_decrypt_1x,#function
798 .size _aes192_decrypt_1x,.-_aes192_decrypt_1x
803 for ($i=0; $i<5; $i++) {
805 aes_dround01 %f`16+8*$i+0`, %f0, %f2, %f8
806 aes_dround23 %f`16+8*$i+2`, %f0, %f2, %f2
807 aes_dround01 %f`16+8*$i+0`, %f4, %f6, %f10
808 aes_dround23 %f`16+8*$i+2`, %f4, %f6, %f6
809 aes_dround01 %f`16+8*$i+4`, %f8, %f2, %f0
810 aes_dround23 %f`16+8*$i+6`, %f8, %f2, %f2
811 aes_dround01 %f`16+8*$i+4`, %f10, %f6, %f4
812 aes_dround23 %f`16+8*$i+6`, %f10, %f6, %f6
816 aes_dround01 %f56, %f0, %f2, %f8
817 aes_dround23 %f58, %f0, %f2, %f2
818 aes_dround01 %f56, %f4, %f6, %f10
819 aes_dround23 %f58, %f4, %f6, %f6
820 aes_dround01_l %f60, %f8, %f2, %f0
821 aes_dround23_l %f62, %f8, %f2, %f2
822 aes_dround01_l %f60, %f10, %f6, %f4
824 aes_dround23_l %f62, %f10, %f6, %f6
825 .type _aes192_decrypt_2x,#function
826 .size _aes192_decrypt_2x,.-_aes192_decrypt_2x
833 AES_encrypt=aes_t4_encrypt
835 AES_decrypt=aes_t4_decrypt
836 .global AES_set_encrypt_key
839 andcc %o2, 7, %g0 ! check alignment
846 andncc %o1, 0x1c0, %g0
852 b aes_t4_set_encrypt_key
856 .type AES_set_encrypt_key,#function
857 .size AES_set_encrypt_key,.-AES_set_encrypt_key
859 .global AES_set_decrypt_key
862 andcc %o2, 7, %g0 ! check alignment
869 andncc %o1, 0x1c0, %g0
875 b aes_t4_set_decrypt_key
879 .type AES_set_decrypt_key,#function
880 .size AES_set_decrypt_key,.-AES_set_decrypt_key
883 my ($inp,$out,$len,$key,$ivec,$enc)=map("%o$_",(0..5));
886 .globl AES_cbc_encrypt
891 brz $enc, .Lcbc_decrypt
894 bl,pt %icc, aes128_t4_cbc_encrypt
896 be,pn %icc, aes192_t4_cbc_encrypt
898 ba aes256_t4_cbc_encrypt
902 bl,pt %icc, aes128_t4_cbc_decrypt
904 be,pn %icc, aes192_t4_cbc_decrypt
906 ba aes256_t4_cbc_decrypt
908 .type AES_cbc_encrypt,#function
909 .size AES_cbc_encrypt,.-AES_cbc_encrypt
913 .asciz "AES for SPARC T4, David S. Miller, Andy Polyakov"