2 # Copyright 2012-2016 The OpenSSL Project Authors. All Rights Reserved.
4 # Licensed under the Apache License 2.0 (the "License"). You may not use
5 # this file except in compliance with the License. You can obtain a copy
6 # in the file LICENSE in the source distribution or at
7 # https://www.openssl.org/source/license.html
10 # ====================================================================
11 # Written by David S. Miller and Andy Polyakov.
12 # The module is licensed under 2-clause BSD
13 # license. October 2012. All rights reserved.
14 # ====================================================================
16 ######################################################################
17 # Camellia for SPARC T4.
19 # As with AES below results [for aligned data] are virtually identical
20 # to critical path lengths for 3-cycle instruction latency:
22 # 128-bit key 192/256-
23 # CBC encrypt 4.14/4.21(*) 5.46/5.52
24 # (*) numbers after slash are for
27 # As with Intel AES-NI, question is if it's possible to improve
28 # performance of parallelizable modes by interleaving round
29 # instructions. In Camellia every instruction is dependent on
30 # previous, which means that there is place for 2 additional ones
31 # in between two dependent. Can we expect 3x performance improvement?
32 # At least one can argue that it should be possible to break 2x
33 # barrier... For some reason not even 2x appears to be possible:
35 # 128-bit key 192/256-
36 # CBC decrypt 2.21/2.74 2.99/3.40
37 # CTR 2.15/2.68(*) 2.93/3.34
38 # (*) numbers after slash are for
41 # This is for 2x interleave. But compared to 1x interleave CBC decrypt
42 # improved by ... 0% for 128-bit key, and 11% for 192/256-bit one.
43 # So that out-of-order execution logic can take non-interleaved code
44 # to 1.87x, but can't take 2x interleaved one any further. There
45 # surely is some explanation... As result 3x interleave was not even
46 # attempted. Instead an effort was made to share specific modes
47 # implementations with AES module (therefore sparct4_modes.pl).
49 # To anchor to something else, software C implementation processes
50 # one byte in 38 cycles with 128-bit key on same processor.
52 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
53 push(@INC,"${dir}","${dir}../../perlasm");
54 require "sparcv9_modes.pl";
56 $output = pop and open STDOUT,">$output";
58 $::evp=1; # if $evp is set to 0, script generates module with
59 # Camellia_[en|de]crypt, Camellia_set_key and Camellia_cbc_encrypt
60 # entry points. These are fully compatible with openssl/camellia.h.
62 ######################################################################
63 # single-round subroutines
66 my ($inp,$out,$key,$rounds,$tmp,$mask)=map("%o$_",(0..5));
69 #include "sparc_arch.h"
73 .globl cmll_t4_encrypt
76 andcc $inp, 7, %g1 ! is input aligned?
95 ld [$key + 272], $rounds ! grandRounds, 3 or 4
100 ldd [$key + 32], %f16
101 ldd [$key + 40], %f18
104 ldd [$key + 48], %f20
105 ldd [$key + 56], %f22
106 sub $rounds, 1, $rounds
107 ldd [$key + 64], %f24
108 ldd [$key + 72], %f26
112 camellia_f %f12, %f2, %f0, %f2
114 sub $rounds,1,$rounds
115 camellia_f %f14, %f0, %f2, %f0
117 camellia_f %f16, %f2, %f0, %f2
118 ldd [$key + 16], %f16
119 camellia_f %f18, %f0, %f2, %f0
120 ldd [$key + 24], %f18
121 camellia_f %f20, %f2, %f0, %f2
122 ldd [$key + 32], %f20
123 camellia_f %f22, %f0, %f2, %f0
124 ldd [$key + 40], %f22
125 camellia_fl %f24, %f0, %f0
126 ldd [$key + 48], %f24
127 camellia_fli %f26, %f2, %f2
128 ldd [$key + 56], %f26
129 brnz,pt $rounds, .Lenc
132 andcc $out, 7, $tmp ! is output aligned?
133 camellia_f %f12, %f2, %f0, %f2
134 camellia_f %f14, %f0, %f2, %f0
135 camellia_f %f16, %f2, %f0, %f2
136 camellia_f %f18, %f0, %f2, %f0
137 camellia_f %f20, %f2, %f0, %f4
138 camellia_f %f22, %f0, %f4, %f2
149 2: alignaddrl $out, %g0, $out
151 srl $mask, $tmp, $mask
153 faligndata %f0, %f0, %f4
154 faligndata %f0, %f2, %f6
155 faligndata %f2, %f2, %f8
157 stda %f4, [$out + $mask]0xc0 ! partial store
160 orn %g0, $mask, $mask
162 stda %f8, [$out + $mask]0xc0 ! partial store
163 .type cmll_t4_encrypt,#function
164 .size cmll_t4_encrypt,.-cmll_t4_encrypt
166 .globl cmll_t4_decrypt
169 ld [$key + 272], $rounds ! grandRounds, 3 or 4
170 andcc $inp, 7, %g1 ! is input aligned?
173 sll $rounds, 6, $rounds
174 add $rounds, $key, $key
179 ldx [$inp + 16], $inp
192 ldd [$key - 16], %f14
195 ldd [$key - 24], %f16
196 ldd [$key - 32], %f18
199 ldd [$key - 40], %f20
200 ldd [$key - 48], %f22
201 sub $rounds, 64, $rounds
202 ldd [$key - 56], %f24
203 ldd [$key - 64], %f26
207 camellia_f %f12, %f2, %f0, %f2
209 sub $rounds, 64, $rounds
210 camellia_f %f14, %f0, %f2, %f0
211 ldd [$key - 16], %f14
212 camellia_f %f16, %f2, %f0, %f2
213 ldd [$key - 24], %f16
214 camellia_f %f18, %f0, %f2, %f0
215 ldd [$key - 32], %f18
216 camellia_f %f20, %f2, %f0, %f2
217 ldd [$key - 40], %f20
218 camellia_f %f22, %f0, %f2, %f0
219 ldd [$key - 48], %f22
220 camellia_fl %f24, %f0, %f0
221 ldd [$key - 56], %f24
222 camellia_fli %f26, %f2, %f2
223 ldd [$key - 64], %f26
224 brnz,pt $rounds, .Ldec
227 andcc $out, 7, $tmp ! is output aligned?
228 camellia_f %f12, %f2, %f0, %f2
229 camellia_f %f14, %f0, %f2, %f0
230 camellia_f %f16, %f2, %f0, %f2
231 camellia_f %f18, %f0, %f2, %f0
232 camellia_f %f20, %f2, %f0, %f4
233 camellia_f %f22, %f0, %f4, %f2
244 2: alignaddrl $out, %g0, $out
246 srl $mask, $tmp, $mask
248 faligndata %f0, %f0, %f4
249 faligndata %f0, %f2, %f6
250 faligndata %f2, %f2, %f8
252 stda %f4, [$out + $mask]0xc0 ! partial store
255 orn %g0, $mask, $mask
257 stda %f8, [$out + $mask]0xc0 ! partial store
258 .type cmll_t4_decrypt,#function
259 .size cmll_t4_decrypt,.-cmll_t4_decrypt
263 ######################################################################
264 # key setup subroutines
270 "srlx %o4, 64-$rot, %g4\n\t".
271 "sllx %o4, $rot, %o4\n\t".
272 "srlx %o5, 64-$rot, %g5\n\t".
273 "sllx %o5, $rot, %o5\n\t".
274 "or %o4, %g5, %o4\n\t".
278 my ($inp,$bits,$out,$tmp)=map("%o$_",(0..5));
280 .globl cmll_t4_set_key
284 alignaddr $inp, %g0, $inp
293 brz,pt $tmp, .L256aligned
297 faligndata %f0, %f2, %f0
298 faligndata %f2, %f4, %f2
299 faligndata %f4, %f6, %f4
301 faligndata %f6, %f8, %f6
305 brz,a,pt $tmp, .L256aligned
310 faligndata %f0, %f2, %f0
311 faligndata %f2, %f4, %f2
312 faligndata %f4, %f6, %f4
316 std %f0, [$out + 0] ! k[0, 1]
318 std %f2, [$out + 8] ! k[2, 3]
326 brz,pt $tmp, .L128aligned
331 faligndata %f0, %f2, %f0
332 faligndata %f2, %f4, %f2
335 std %f0, [$out + 0] ! k[0, 1]
337 std %f2, [$out + 8] ! k[2, 3]
343 add %o7, SIGMA-1b, %o4
351 camellia_f %f16, %f2, %f0, %f2
352 camellia_f %f18, %f0, %f2, %f0
355 camellia_f %f20, %f2, %f0, %f2
356 camellia_f %f22, %f0, %f2, %f0
358 bge,pn %icc, .L256key
360 std %f0, [$out + 0x10] ! k[ 4, 5]
361 std %f2, [$out + 0x18] ! k[ 6, 7]
366 stx %o4, [$out + 0x30] ! k[12, 13]
367 stx %o5, [$out + 0x38] ! k[14, 15]
369 stx %o4, [$out + 0x40] ! k[16, 17]
370 stx %o5, [$out + 0x48] ! k[18, 19]
372 stx %o4, [$out + 0x60] ! k[24, 25]
374 stx %o4, [$out + 0x70] ! k[28, 29]
375 stx %o5, [$out + 0x78] ! k[30, 31]
377 stx %o4, [$out + 0xa0] ! k[40, 41]
378 stx %o5, [$out + 0xa8] ! k[42, 43]
380 stx %o4, [$out + 0xc0] ! k[48, 49]
381 stx %o5, [$out + 0xc8] ! k[50, 51]
383 movdtox %f28, %o4 ! k[ 0, 1]
384 movdtox %f30, %o5 ! k[ 2, 3]
386 stx %o4, [$out + 0x20] ! k[ 8, 9]
387 stx %o5, [$out + 0x28] ! k[10, 11]
389 stx %o4, [$out + 0x50] ! k[20, 21]
390 stx %o5, [$out + 0x58] ! k[22, 23]
392 stx %o5, [$out + 0x68] ! k[26, 27]
394 stx %o4, [$out + 0x80] ! k[32, 33]
395 stx %o5, [$out + 0x88] ! k[34, 35]
397 stx %o4, [$out + 0x90] ! k[36, 37]
398 stx %o5, [$out + 0x98] ! k[38, 39]
400 stx %o4, [$out + 0xb0] ! k[44, 45]
401 stx %o5, [$out + 0xb8] ! k[46, 47]
404 st $tmp, [$out + 0x110]
413 std %f0, [$out + 0x30] ! k[12, 13]
414 std %f2, [$out + 0x38] ! k[14, 15]
418 camellia_f %f24, %f2, %f0, %f2
419 camellia_f %f26, %f0, %f2, %f0
421 std %f0, [$out + 0x10] ! k[ 4, 5]
422 std %f2, [$out + 0x18] ! k[ 6, 7]
427 stx %o4, [$out + 0x50] ! k[20, 21]
428 stx %o5, [$out + 0x58] ! k[22, 23]
430 stx %o4, [$out + 0xa0] ! k[40, 41]
431 stx %o5, [$out + 0xa8] ! k[42, 43]
433 stx %o4, [$out + 0x100] ! k[64, 65]
434 stx %o5, [$out + 0x108] ! k[66, 67]
436 movdtox %f4, %o4 ! k[ 8, 9]
437 movdtox %f6, %o5 ! k[10, 11]
439 stx %o4, [$out + 0x20] ! k[ 8, 9]
440 stx %o5, [$out + 0x28] ! k[10, 11]
442 stx %o4, [$out + 0x40] ! k[16, 17]
443 stx %o5, [$out + 0x48] ! k[18, 19]
445 stx %o4, [$out + 0x90] ! k[36, 37]
446 stx %o5, [$out + 0x98] ! k[38, 39]
448 stx %o4, [$out + 0xd0] ! k[52, 53]
449 stx %o5, [$out + 0xd8] ! k[54, 55]
450 ldx [$out + 0x30], %o4 ! k[12, 13]
451 ldx [$out + 0x38], %o5 ! k[14, 15]
453 stx %o4, [$out + 0x30] ! k[12, 13]
454 stx %o5, [$out + 0x38] ! k[14, 15]
456 stx %o4, [$out + 0x70] ! k[28, 29]
457 stx %o5, [$out + 0x78] ! k[30, 31]
460 st %o4, [$out + 0xc0] ! k[48]
461 st %g5, [$out + 0xc4] ! k[49]
462 st %o5, [$out + 0xc8] ! k[50]
463 st %g4, [$out + 0xcc] ! k[51]
465 stx %o4, [$out + 0xe0] ! k[56, 57]
466 stx %o5, [$out + 0xe8] ! k[58, 59]
468 movdtox %f28, %o4 ! k[ 0, 1]
469 movdtox %f30, %o5 ! k[ 2, 3]
471 stx %o4, [$out + 0x60] ! k[24, 25]
472 stx %o5, [$out + 0x68] ! k[26, 27]
474 stx %o4, [$out + 0x80] ! k[32, 33]
475 stx %o5, [$out + 0x88] ! k[34, 35]
477 stx %o4, [$out + 0xb0] ! k[44, 45]
478 stx %o5, [$out + 0xb8] ! k[46, 47]
480 stx %o4, [$out + 0xf0] ! k[60, 61]
481 stx %o5, [$out + 0xf8] ! k[62, 63]
484 st $tmp, [$out + 0x110]
487 .type cmll_t4_set_key,#function
488 .size cmll_t4_set_key,.-cmll_t4_set_key
491 .long 0xa09e667f, 0x3bcc908b, 0xb67ae858, 0x4caa73b2
492 .long 0xc6ef372f, 0xe94f82be, 0x54ff53a5, 0xf1d36f1c
493 .long 0x10e527fa, 0xde682d1d, 0xb05688c2, 0xb3e6c1fd
496 .asciz "Camellia for SPARC T4, David S. Miller, Andy Polyakov"
501 my ($inp,$out,$len,$key,$ivec,$enc)=map("%i$_",(0..5));
502 my ($ileft,$iright,$ooff,$omask,$ivoff)=map("%l$_",(1..7));
506 _cmll128_load_enckey:
510 for ($i=2; $i<26;$i++) { # load key schedule
512 ldd [$key + `8*$i`], %f`12+2*$i`
518 .type _cmll128_load_enckey,#function
519 .size _cmll128_load_enckey,.-_cmll128_load_enckey
520 _cmll256_load_enckey=_cmll128_load_enckey
523 _cmll256_load_deckey:
524 ldd [$key + 64], %f62
525 ldd [$key + 72], %f60
528 _cmll128_load_deckey:
533 for ($i=2; $i<24;$i++) { # load key schedule
535 ldd [$key + `8*$i`], %f`62-2*$i`
539 ldx [$key + 192], %g4
541 ldx [$key + 200], %g5
542 .type _cmll256_load_deckey,#function
543 .size _cmll256_load_deckey,.-_cmll256_load_deckey
548 for ($i=0; $i<3; $i++) {
550 camellia_f %f`16+16*$i+0`, %f2, %f0, %f2
551 camellia_f %f`16+16*$i+2`, %f0, %f2, %f0
552 camellia_f %f`16+16*$i+4`, %f2, %f0, %f2
553 camellia_f %f`16+16*$i+6`, %f0, %f2, %f0
555 $code.=<<___ if ($i<2);
556 camellia_f %f`16+16*$i+8`, %f2, %f0, %f2
557 camellia_f %f`16+16*$i+10`, %f0, %f2, %f0
558 camellia_fl %f`16+16*$i+12`, %f0, %f0
559 camellia_fli %f`16+16*$i+14`, %f2, %f2
563 camellia_f %f56, %f2, %f0, %f4
564 camellia_f %f58, %f0, %f4, %f2
568 .type _cmll128_encrypt_1x,#function
569 .size _cmll128_encrypt_1x,.-_cmll128_encrypt_1x
570 _cmll128_decrypt_1x=_cmll128_encrypt_1x
575 for ($i=0; $i<3; $i++) {
577 camellia_f %f`16+16*$i+0`, %f2, %f0, %f2
578 camellia_f %f`16+16*$i+0`, %f6, %f4, %f6
579 camellia_f %f`16+16*$i+2`, %f0, %f2, %f0
580 camellia_f %f`16+16*$i+2`, %f4, %f6, %f4
581 camellia_f %f`16+16*$i+4`, %f2, %f0, %f2
582 camellia_f %f`16+16*$i+4`, %f6, %f4, %f6
583 camellia_f %f`16+16*$i+6`, %f0, %f2, %f0
584 camellia_f %f`16+16*$i+6`, %f4, %f6, %f4
586 $code.=<<___ if ($i<2);
587 camellia_f %f`16+16*$i+8`, %f2, %f0, %f2
588 camellia_f %f`16+16*$i+8`, %f6, %f4, %f6
589 camellia_f %f`16+16*$i+10`, %f0, %f2, %f0
590 camellia_f %f`16+16*$i+10`, %f4, %f6, %f4
591 camellia_fl %f`16+16*$i+12`, %f0, %f0
592 camellia_fl %f`16+16*$i+12`, %f4, %f4
593 camellia_fli %f`16+16*$i+14`, %f2, %f2
594 camellia_fli %f`16+16*$i+14`, %f6, %f6
598 camellia_f %f56, %f2, %f0, %f8
599 camellia_f %f56, %f6, %f4, %f10
600 camellia_f %f58, %f0, %f8, %f2
601 camellia_f %f58, %f4, %f10, %f6
607 .type _cmll128_encrypt_2x,#function
608 .size _cmll128_encrypt_2x,.-_cmll128_encrypt_2x
609 _cmll128_decrypt_2x=_cmll128_encrypt_2x
613 camellia_f %f16, %f2, %f0, %f2
614 camellia_f %f18, %f0, %f2, %f0
615 ldd [$key + 208], %f16
616 ldd [$key + 216], %f18
617 camellia_f %f20, %f2, %f0, %f2
618 camellia_f %f22, %f0, %f2, %f0
619 ldd [$key + 224], %f20
620 ldd [$key + 232], %f22
621 camellia_f %f24, %f2, %f0, %f2
622 camellia_f %f26, %f0, %f2, %f0
623 ldd [$key + 240], %f24
624 ldd [$key + 248], %f26
625 camellia_fl %f28, %f0, %f0
626 camellia_fli %f30, %f2, %f2
627 ldd [$key + 256], %f28
628 ldd [$key + 264], %f30
630 for ($i=1; $i<3; $i++) {
632 camellia_f %f`16+16*$i+0`, %f2, %f0, %f2
633 camellia_f %f`16+16*$i+2`, %f0, %f2, %f0
634 camellia_f %f`16+16*$i+4`, %f2, %f0, %f2
635 camellia_f %f`16+16*$i+6`, %f0, %f2, %f0
636 camellia_f %f`16+16*$i+8`, %f2, %f0, %f2
637 camellia_f %f`16+16*$i+10`, %f0, %f2, %f0
638 camellia_fl %f`16+16*$i+12`, %f0, %f0
639 camellia_fli %f`16+16*$i+14`, %f2, %f2
643 camellia_f %f16, %f2, %f0, %f2
644 camellia_f %f18, %f0, %f2, %f0
645 ldd [$key + 16], %f16
646 ldd [$key + 24], %f18
647 camellia_f %f20, %f2, %f0, %f2
648 camellia_f %f22, %f0, %f2, %f0
649 ldd [$key + 32], %f20
650 ldd [$key + 40], %f22
651 camellia_f %f24, %f2, %f0, %f4
652 camellia_f %f26, %f0, %f4, %f2
653 ldd [$key + 48], %f24
654 ldd [$key + 56], %f26
657 ldd [$key + 64], %f28
659 ldd [$key + 72], %f30
660 .type _cmll256_encrypt_1x,#function
661 .size _cmll256_encrypt_1x,.-_cmll256_encrypt_1x
665 camellia_f %f16, %f2, %f0, %f2
666 camellia_f %f16, %f6, %f4, %f6
667 camellia_f %f18, %f0, %f2, %f0
668 camellia_f %f18, %f4, %f6, %f4
669 ldd [$key + 208], %f16
670 ldd [$key + 216], %f18
671 camellia_f %f20, %f2, %f0, %f2
672 camellia_f %f20, %f6, %f4, %f6
673 camellia_f %f22, %f0, %f2, %f0
674 camellia_f %f22, %f4, %f6, %f4
675 ldd [$key + 224], %f20
676 ldd [$key + 232], %f22
677 camellia_f %f24, %f2, %f0, %f2
678 camellia_f %f24, %f6, %f4, %f6
679 camellia_f %f26, %f0, %f2, %f0
680 camellia_f %f26, %f4, %f6, %f4
681 ldd [$key + 240], %f24
682 ldd [$key + 248], %f26
683 camellia_fl %f28, %f0, %f0
684 camellia_fl %f28, %f4, %f4
685 camellia_fli %f30, %f2, %f2
686 camellia_fli %f30, %f6, %f6
687 ldd [$key + 256], %f28
688 ldd [$key + 264], %f30
690 for ($i=1; $i<3; $i++) {
692 camellia_f %f`16+16*$i+0`, %f2, %f0, %f2
693 camellia_f %f`16+16*$i+0`, %f6, %f4, %f6
694 camellia_f %f`16+16*$i+2`, %f0, %f2, %f0
695 camellia_f %f`16+16*$i+2`, %f4, %f6, %f4
696 camellia_f %f`16+16*$i+4`, %f2, %f0, %f2
697 camellia_f %f`16+16*$i+4`, %f6, %f4, %f6
698 camellia_f %f`16+16*$i+6`, %f0, %f2, %f0
699 camellia_f %f`16+16*$i+6`, %f4, %f6, %f4
700 camellia_f %f`16+16*$i+8`, %f2, %f0, %f2
701 camellia_f %f`16+16*$i+8`, %f6, %f4, %f6
702 camellia_f %f`16+16*$i+10`, %f0, %f2, %f0
703 camellia_f %f`16+16*$i+10`, %f4, %f6, %f4
704 camellia_fl %f`16+16*$i+12`, %f0, %f0
705 camellia_fl %f`16+16*$i+12`, %f4, %f4
706 camellia_fli %f`16+16*$i+14`, %f2, %f2
707 camellia_fli %f`16+16*$i+14`, %f6, %f6
711 camellia_f %f16, %f2, %f0, %f2
712 camellia_f %f16, %f6, %f4, %f6
713 camellia_f %f18, %f0, %f2, %f0
714 camellia_f %f18, %f4, %f6, %f4
715 ldd [$key + 16], %f16
716 ldd [$key + 24], %f18
717 camellia_f %f20, %f2, %f0, %f2
718 camellia_f %f20, %f6, %f4, %f6
719 camellia_f %f22, %f0, %f2, %f0
720 camellia_f %f22, %f4, %f6, %f4
721 ldd [$key + 32], %f20
722 ldd [$key + 40], %f22
723 camellia_f %f24, %f2, %f0, %f8
724 camellia_f %f24, %f6, %f4, %f10
725 camellia_f %f26, %f0, %f8, %f2
726 camellia_f %f26, %f4, %f10, %f6
727 ldd [$key + 48], %f24
728 ldd [$key + 56], %f26
733 ldd [$key + 64], %f28
735 ldd [$key + 72], %f30
736 .type _cmll256_encrypt_2x,#function
737 .size _cmll256_encrypt_2x,.-_cmll256_encrypt_2x
741 camellia_f %f16, %f2, %f0, %f2
742 camellia_f %f18, %f0, %f2, %f0
744 ldd [$key - 16], %f18
745 camellia_f %f20, %f2, %f0, %f2
746 camellia_f %f22, %f0, %f2, %f0
747 ldd [$key - 24], %f20
748 ldd [$key - 32], %f22
749 camellia_f %f24, %f2, %f0, %f2
750 camellia_f %f26, %f0, %f2, %f0
751 ldd [$key - 40], %f24
752 ldd [$key - 48], %f26
753 camellia_fl %f28, %f0, %f0
754 camellia_fli %f30, %f2, %f2
755 ldd [$key - 56], %f28
756 ldd [$key - 64], %f30
758 for ($i=1; $i<3; $i++) {
760 camellia_f %f`16+16*$i+0`, %f2, %f0, %f2
761 camellia_f %f`16+16*$i+2`, %f0, %f2, %f0
762 camellia_f %f`16+16*$i+4`, %f2, %f0, %f2
763 camellia_f %f`16+16*$i+6`, %f0, %f2, %f0
764 camellia_f %f`16+16*$i+8`, %f2, %f0, %f2
765 camellia_f %f`16+16*$i+10`, %f0, %f2, %f0
766 camellia_fl %f`16+16*$i+12`, %f0, %f0
767 camellia_fli %f`16+16*$i+14`, %f2, %f2
771 camellia_f %f16, %f2, %f0, %f2
772 camellia_f %f18, %f0, %f2, %f0
773 ldd [$key + 184], %f16
774 ldd [$key + 176], %f18
775 camellia_f %f20, %f2, %f0, %f2
776 camellia_f %f22, %f0, %f2, %f0
777 ldd [$key + 168], %f20
778 ldd [$key + 160], %f22
779 camellia_f %f24, %f2, %f0, %f4
780 camellia_f %f26, %f0, %f4, %f2
781 ldd [$key + 152], %f24
782 ldd [$key + 144], %f26
785 ldd [$key + 136], %f28
787 ldd [$key + 128], %f30
788 .type _cmll256_decrypt_1x,#function
789 .size _cmll256_decrypt_1x,.-_cmll256_decrypt_1x
793 camellia_f %f16, %f2, %f0, %f2
794 camellia_f %f16, %f6, %f4, %f6
795 camellia_f %f18, %f0, %f2, %f0
796 camellia_f %f18, %f4, %f6, %f4
798 ldd [$key - 16], %f18
799 camellia_f %f20, %f2, %f0, %f2
800 camellia_f %f20, %f6, %f4, %f6
801 camellia_f %f22, %f0, %f2, %f0
802 camellia_f %f22, %f4, %f6, %f4
803 ldd [$key - 24], %f20
804 ldd [$key - 32], %f22
805 camellia_f %f24, %f2, %f0, %f2
806 camellia_f %f24, %f6, %f4, %f6
807 camellia_f %f26, %f0, %f2, %f0
808 camellia_f %f26, %f4, %f6, %f4
809 ldd [$key - 40], %f24
810 ldd [$key - 48], %f26
811 camellia_fl %f28, %f0, %f0
812 camellia_fl %f28, %f4, %f4
813 camellia_fli %f30, %f2, %f2
814 camellia_fli %f30, %f6, %f6
815 ldd [$key - 56], %f28
816 ldd [$key - 64], %f30
818 for ($i=1; $i<3; $i++) {
820 camellia_f %f`16+16*$i+0`, %f2, %f0, %f2
821 camellia_f %f`16+16*$i+0`, %f6, %f4, %f6
822 camellia_f %f`16+16*$i+2`, %f0, %f2, %f0
823 camellia_f %f`16+16*$i+2`, %f4, %f6, %f4
824 camellia_f %f`16+16*$i+4`, %f2, %f0, %f2
825 camellia_f %f`16+16*$i+4`, %f6, %f4, %f6
826 camellia_f %f`16+16*$i+6`, %f0, %f2, %f0
827 camellia_f %f`16+16*$i+6`, %f4, %f6, %f4
828 camellia_f %f`16+16*$i+8`, %f2, %f0, %f2
829 camellia_f %f`16+16*$i+8`, %f6, %f4, %f6
830 camellia_f %f`16+16*$i+10`, %f0, %f2, %f0
831 camellia_f %f`16+16*$i+10`, %f4, %f6, %f4
832 camellia_fl %f`16+16*$i+12`, %f0, %f0
833 camellia_fl %f`16+16*$i+12`, %f4, %f4
834 camellia_fli %f`16+16*$i+14`, %f2, %f2
835 camellia_fli %f`16+16*$i+14`, %f6, %f6
839 camellia_f %f16, %f2, %f0, %f2
840 camellia_f %f16, %f6, %f4, %f6
841 camellia_f %f18, %f0, %f2, %f0
842 camellia_f %f18, %f4, %f6, %f4
843 ldd [$key + 184], %f16
844 ldd [$key + 176], %f18
845 camellia_f %f20, %f2, %f0, %f2
846 camellia_f %f20, %f6, %f4, %f6
847 camellia_f %f22, %f0, %f2, %f0
848 camellia_f %f22, %f4, %f6, %f4
849 ldd [$key + 168], %f20
850 ldd [$key + 160], %f22
851 camellia_f %f24, %f2, %f0, %f8
852 camellia_f %f24, %f6, %f4, %f10
853 camellia_f %f26, %f0, %f8, %f2
854 camellia_f %f26, %f4, %f10, %f6
855 ldd [$key + 152], %f24
856 ldd [$key + 144], %f26
861 ldd [$key + 136], %f28
863 ldd [$key + 128], %f30
864 .type _cmll256_decrypt_2x,#function
865 .size _cmll256_decrypt_2x,.-_cmll256_decrypt_2x
868 &alg_cbc_encrypt_implement("cmll",128);
869 &alg_cbc_encrypt_implement("cmll",256);
871 &alg_cbc_decrypt_implement("cmll",128);
872 &alg_cbc_decrypt_implement("cmll",256);
875 &alg_ctr32_implement("cmll",128);
876 &alg_ctr32_implement("cmll",256);
882 .global Camellia_encrypt
883 Camellia_encrypt=cmll_t4_encrypt
884 .global Camellia_decrypt
885 Camellia_decrypt=cmll_t4_decrypt
886 .global Camellia_set_key
889 andcc %o2, 7, %g0 ! double-check alignment
896 andncc %o1, 0x1c0, %g0
906 .type Camellia_set_key,#function
907 .size Camellia_set_key,.-Camellia_set_key
910 my ($inp,$out,$len,$key,$ivec,$enc)=map("%o$_",(0..5));
913 .globl Camellia_cbc_encrypt
915 Camellia_cbc_encrypt:
918 brz $enc, .Lcbc_decrypt
921 be,pt %icc, cmll128_t4_cbc_encrypt
923 ba cmll256_t4_cbc_encrypt
927 be,pt %icc, cmll128_t4_cbc_decrypt
929 ba cmll256_t4_cbc_decrypt
931 .type Camellia_cbc_encrypt,#function
932 .size Camellia_cbc_encrypt,.-Camellia_cbc_encrypt
938 close STDOUT or die "error closing STDOUT";