3 # ====================================================================
4 # Written by David S. Miller <davem@devemloft.net> and Andy Polyakov
5 # <appro@openssl.org>. The module is licensed under 2-clause BSD
6 # license. March 2013. All rights reserved.
7 # ====================================================================
9 ######################################################################
12 # As with other hardware-assisted ciphers CBC encrypt results [for
13 # aligned data] are virtually identical to critical path lengths:
16 # CBC encrypt 4.14/4.15(*) 11.7/11.7
17 # CBC decrypt 1.77/4.11(**) 6.42/7.47
19 # (*) numbers after slash are for
21 # (**) this is result for largest
22 # block size, unlike all other
23 # cases smaller blocks results
26 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
27 push(@INC,"${dir}","${dir}../../perlasm");
28 require "sparcv9_modes.pl";
32 $code.=<<___ if ($::abibits==64);
33 .register %g2,#scratch
34 .register %g3,#scratch
41 { my ($inp,$out)=("%o0","%o1");
45 .globl des_t4_key_expand
46 .type des_t4_key_expand,#function
49 alignaddr $inp, %g0, $inp
51 ldd [$inp + 0x00], %f0
52 ldd [$inp + 0x08], %f2
53 faligndata %f0, %f2, %f0
54 1: des_kexpand %f0, 0, %f0
55 des_kexpand %f0, 1, %f2
56 std %f0, [$out + 0x00]
57 des_kexpand %f2, 3, %f6
58 std %f2, [$out + 0x08]
59 des_kexpand %f2, 2, %f4
60 des_kexpand %f6, 3, %f10
61 std %f6, [$out + 0x18]
62 des_kexpand %f6, 2, %f8
63 std %f4, [$out + 0x10]
64 des_kexpand %f10, 3, %f14
65 std %f10, [$out + 0x28]
66 des_kexpand %f10, 2, %f12
67 std %f8, [$out + 0x20]
68 des_kexpand %f14, 1, %f16
69 std %f14, [$out + 0x38]
70 des_kexpand %f16, 3, %f20
71 std %f12, [$out + 0x30]
72 des_kexpand %f16, 2, %f18
73 std %f16, [$out + 0x40]
74 des_kexpand %f20, 3, %f24
75 std %f20, [$out + 0x50]
76 des_kexpand %f20, 2, %f22
77 std %f18, [$out + 0x48]
78 des_kexpand %f24, 3, %f28
79 std %f24, [$out + 0x60]
80 des_kexpand %f24, 2, %f26
81 std %f22, [$out + 0x58]
82 des_kexpand %f28, 1, %f30
83 std %f28, [$out + 0x70]
84 std %f26, [$out + 0x68]
86 std %f30, [$out + 0x78]
87 .size des_t4_key_expand,.-des_t4_key_expand
90 { my ($inp,$out,$len,$key,$ivec) = map("%o$_",(0..4));
91 my ($ileft,$iright,$omask) = map("%g$_",(1..3));
94 .globl des_t4_cbc_encrypt
98 be,pn $::size_t_cc, .Lcbc_abort
100 ld [$ivec + 0], %f0 ! load ivec
105 sll $ileft, 3, $ileft
108 prefetch [$inp + 63], 20
109 sub %g0, $ileft, $iright
111 alignaddrl $out, %g0, $out
112 srl $omask, %g4, $omask
117 ldd [$key + 0x00], %f4 ! load key schedule
118 ldd [$key + 0x08], %f6
119 ldd [$key + 0x10], %f8
120 ldd [$key + 0x18], %f10
121 ldd [$key + 0x20], %f12
122 ldd [$key + 0x28], %f14
123 ldd [$key + 0x30], %f16
124 ldd [$key + 0x38], %f18
125 ldd [$key + 0x40], %f20
126 ldd [$key + 0x48], %f22
127 ldd [$key + 0x50], %f24
128 ldd [$key + 0x58], %f26
129 ldd [$key + 0x60], %f28
130 ldd [$key + 0x68], %f30
131 ldd [$key + 0x70], %f32
132 ldd [$key + 0x78], %f34
140 sllx %g4, $ileft, %g4
141 srlx %g5, $iright, %g5
145 prefetch [$inp + 8+63], 20
147 fxor %f2, %f0, %f0 ! ^= ivec
148 prefetch [$out + 63], 22
151 des_round %f4, %f6, %f0, %f0
152 des_round %f8, %f10, %f0, %f0
153 des_round %f12, %f14, %f0, %f0
154 des_round %f16, %f18, %f0, %f0
155 des_round %f20, %f22, %f0, %f0
156 des_round %f24, %f26, %f0, %f0
157 des_round %f28, %f30, %f0, %f0
158 des_round %f32, %f34, %f0, %f0
165 brnz,pt $len, .Ldes_cbc_enc_loop
168 st %f0, [$ivec + 0] ! write out ivec
176 2: ldxa [$inp]0x82, %g4 ! avoid read-after-write hazard
177 ! and ~4x deterioration
179 faligndata %f0, %f0, %f2 ! handle unaligned output
181 stda %f2, [$out + $omask]0xc0 ! partial store
183 orn %g0, $omask, $omask
184 stda %f2, [$out + $omask]0xc0 ! partial store
186 brnz,pt $len, .Ldes_cbc_enc_loop+4
187 orn %g0, $omask, $omask
189 st %f0, [$ivec + 0] ! write out ivec
192 .type des_t4_cbc_encrypt,#function
193 .size des_t4_cbc_encrypt,.-des_t4_cbc_encrypt
195 .globl des_t4_cbc_decrypt
199 be,pn $::size_t_cc, .Lcbc_abort
201 ld [$ivec + 0], %f2 ! load ivec
206 sll $ileft, 3, $ileft
209 prefetch [$inp + 63], 20
210 sub %g0, $ileft, $iright
212 alignaddrl $out, %g0, $out
213 srl $omask, %g4, $omask
218 ldd [$key + 0x78], %f4 ! load key schedule
219 ldd [$key + 0x70], %f6
220 ldd [$key + 0x68], %f8
221 ldd [$key + 0x60], %f10
222 ldd [$key + 0x58], %f12
223 ldd [$key + 0x50], %f14
224 ldd [$key + 0x48], %f16
225 ldd [$key + 0x40], %f18
226 ldd [$key + 0x38], %f20
227 ldd [$key + 0x30], %f22
228 ldd [$key + 0x28], %f24
229 ldd [$key + 0x20], %f26
230 ldd [$key + 0x18], %f28
231 ldd [$key + 0x10], %f30
232 ldd [$key + 0x08], %f32
233 ldd [$key + 0x00], %f34
241 sllx %g4, $ileft, %g4
242 srlx %g5, $iright, %g5
246 prefetch [$inp + 8+63], 20
248 prefetch [$out + 63], 22
251 des_round %f4, %f6, %f0, %f0
252 des_round %f8, %f10, %f0, %f0
253 des_round %f12, %f14, %f0, %f0
254 des_round %f16, %f18, %f0, %f0
255 des_round %f20, %f22, %f0, %f0
256 des_round %f24, %f26, %f0, %f0
257 des_round %f28, %f30, %f0, %f0
258 des_round %f32, %f34, %f0, %f0
261 fxor %f2, %f0, %f0 ! ^= ivec
268 brnz,pt $len, .Ldes_cbc_dec_loop
271 st %f2, [$ivec + 0] ! write out ivec
276 2: ldxa [$inp]0x82, %g4 ! avoid read-after-write hazard
277 ! and ~4x deterioration
279 faligndata %f0, %f0, %f0 ! handle unaligned output
281 stda %f0, [$out + $omask]0xc0 ! partial store
283 orn %g0, $omask, $omask
284 stda %f0, [$out + $omask]0xc0 ! partial store
286 brnz,pt $len, .Ldes_cbc_dec_loop+4
287 orn %g0, $omask, $omask
289 st %f2, [$ivec + 0] ! write out ivec
292 .type des_t4_cbc_decrypt,#function
293 .size des_t4_cbc_decrypt,.-des_t4_cbc_decrypt
296 # One might wonder why does one have back-to-back des_iip/des_ip
297 # pairs between EDE passes. Indeed, aren't they inverse of each other?
298 # They almost are. Outcome of the pair is 32-bit words being swapped
299 # in target register. Consider pair of des_iip/des_ip as a way to
300 # perform the due swap, it's actually fastest way in this case.
303 .globl des_t4_ede3_cbc_encrypt
305 des_t4_ede3_cbc_encrypt:
307 be,pn $::size_t_cc, .Lcbc_abort
309 ld [$ivec + 0], %f0 ! load ivec
314 sll $ileft, 3, $ileft
317 prefetch [$inp + 63], 20
318 sub %g0, $ileft, $iright
320 alignaddrl $out, %g0, $out
321 srl $omask, %g4, $omask
326 ldd [$key + 0x00], %f4 ! load key schedule
327 ldd [$key + 0x08], %f6
328 ldd [$key + 0x10], %f8
329 ldd [$key + 0x18], %f10
330 ldd [$key + 0x20], %f12
331 ldd [$key + 0x28], %f14
332 ldd [$key + 0x30], %f16
333 ldd [$key + 0x38], %f18
334 ldd [$key + 0x40], %f20
335 ldd [$key + 0x48], %f22
336 ldd [$key + 0x50], %f24
337 ldd [$key + 0x58], %f26
338 ldd [$key + 0x60], %f28
339 ldd [$key + 0x68], %f30
340 ldd [$key + 0x70], %f32
341 ldd [$key + 0x78], %f34
343 .Ldes_ede3_cbc_enc_loop:
349 sllx %g4, $ileft, %g4
350 srlx %g5, $iright, %g5
354 prefetch [$inp + 8+63], 20
356 fxor %f2, %f0, %f0 ! ^= ivec
357 prefetch [$out + 63], 22
360 des_round %f4, %f6, %f0, %f0
361 des_round %f8, %f10, %f0, %f0
362 des_round %f12, %f14, %f0, %f0
363 des_round %f16, %f18, %f0, %f0
364 ldd [$key + 0x100-0x08], %f36
365 ldd [$key + 0x100-0x10], %f38
366 des_round %f20, %f22, %f0, %f0
367 ldd [$key + 0x100-0x18], %f40
368 ldd [$key + 0x100-0x20], %f42
369 des_round %f24, %f26, %f0, %f0
370 ldd [$key + 0x100-0x28], %f44
371 ldd [$key + 0x100-0x30], %f46
372 des_round %f28, %f30, %f0, %f0
373 ldd [$key + 0x100-0x38], %f48
374 ldd [$key + 0x100-0x40], %f50
375 des_round %f32, %f34, %f0, %f0
376 ldd [$key + 0x100-0x48], %f52
377 ldd [$key + 0x100-0x50], %f54
380 ldd [$key + 0x100-0x58], %f56
381 ldd [$key + 0x100-0x60], %f58
383 ldd [$key + 0x100-0x68], %f60
384 ldd [$key + 0x100-0x70], %f62
385 des_round %f36, %f38, %f0, %f0
386 ldd [$key + 0x100-0x78], %f36
387 ldd [$key + 0x100-0x80], %f38
388 des_round %f40, %f42, %f0, %f0
389 des_round %f44, %f46, %f0, %f0
390 des_round %f48, %f50, %f0, %f0
391 ldd [$key + 0x100+0x00], %f40
392 ldd [$key + 0x100+0x08], %f42
393 des_round %f52, %f54, %f0, %f0
394 ldd [$key + 0x100+0x10], %f44
395 ldd [$key + 0x100+0x18], %f46
396 des_round %f56, %f58, %f0, %f0
397 ldd [$key + 0x100+0x20], %f48
398 ldd [$key + 0x100+0x28], %f50
399 des_round %f60, %f62, %f0, %f0
400 ldd [$key + 0x100+0x30], %f52
401 ldd [$key + 0x100+0x38], %f54
402 des_round %f36, %f38, %f0, %f0
403 ldd [$key + 0x100+0x40], %f56
404 ldd [$key + 0x100+0x48], %f58
407 ldd [$key + 0x100+0x50], %f60
408 ldd [$key + 0x100+0x58], %f62
410 ldd [$key + 0x100+0x60], %f36
411 ldd [$key + 0x100+0x68], %f38
412 des_round %f40, %f42, %f0, %f0
413 ldd [$key + 0x100+0x70], %f40
414 ldd [$key + 0x100+0x78], %f42
415 des_round %f44, %f46, %f0, %f0
416 des_round %f48, %f50, %f0, %f0
417 des_round %f52, %f54, %f0, %f0
418 des_round %f56, %f58, %f0, %f0
419 des_round %f60, %f62, %f0, %f0
420 des_round %f36, %f38, %f0, %f0
421 des_round %f40, %f42, %f0, %f0
428 brnz,pt $len, .Ldes_ede3_cbc_enc_loop
431 st %f0, [$ivec + 0] ! write out ivec
436 2: ldxa [$inp]0x82, %g4 ! avoid read-after-write hazard
437 ! and ~2x deterioration
439 faligndata %f0, %f0, %f2 ! handle unaligned output
441 stda %f2, [$out + $omask]0xc0 ! partial store
443 orn %g0, $omask, $omask
444 stda %f2, [$out + $omask]0xc0 ! partial store
446 brnz,pt $len, .Ldes_ede3_cbc_enc_loop+4
447 orn %g0, $omask, $omask
449 st %f0, [$ivec + 0] ! write out ivec
452 .type des_t4_ede3_cbc_encrypt,#function
453 .size des_t4_ede3_cbc_encrypt,.-des_t4_ede3_cbc_encrypt
455 .globl des_t4_ede3_cbc_decrypt
457 des_t4_ede3_cbc_decrypt:
459 be,pn $::size_t_cc, .Lcbc_abort
461 ld [$ivec + 0], %f2 ! load ivec
466 sll $ileft, 3, $ileft
469 prefetch [$inp + 63], 20
470 sub %g0, $ileft, $iright
472 alignaddrl $out, %g0, $out
473 srl $omask, %g4, $omask
478 ldd [$key + 0x100+0x78], %f4 ! load key schedule
479 ldd [$key + 0x100+0x70], %f6
480 ldd [$key + 0x100+0x68], %f8
481 ldd [$key + 0x100+0x60], %f10
482 ldd [$key + 0x100+0x58], %f12
483 ldd [$key + 0x100+0x50], %f14
484 ldd [$key + 0x100+0x48], %f16
485 ldd [$key + 0x100+0x40], %f18
486 ldd [$key + 0x100+0x38], %f20
487 ldd [$key + 0x100+0x30], %f22
488 ldd [$key + 0x100+0x28], %f24
489 ldd [$key + 0x100+0x20], %f26
490 ldd [$key + 0x100+0x18], %f28
491 ldd [$key + 0x100+0x10], %f30
492 ldd [$key + 0x100+0x08], %f32
493 ldd [$key + 0x100+0x00], %f34
495 .Ldes_ede3_cbc_dec_loop:
501 sllx %g4, $ileft, %g4
502 srlx %g5, $iright, %g5
506 prefetch [$inp + 8+63], 20
508 prefetch [$out + 63], 22
511 des_round %f4, %f6, %f0, %f0
512 des_round %f8, %f10, %f0, %f0
513 des_round %f12, %f14, %f0, %f0
514 des_round %f16, %f18, %f0, %f0
515 ldd [$key + 0x80+0x00], %f36
516 ldd [$key + 0x80+0x08], %f38
517 des_round %f20, %f22, %f0, %f0
518 ldd [$key + 0x80+0x10], %f40
519 ldd [$key + 0x80+0x18], %f42
520 des_round %f24, %f26, %f0, %f0
521 ldd [$key + 0x80+0x20], %f44
522 ldd [$key + 0x80+0x28], %f46
523 des_round %f28, %f30, %f0, %f0
524 ldd [$key + 0x80+0x30], %f48
525 ldd [$key + 0x80+0x38], %f50
526 des_round %f32, %f34, %f0, %f0
527 ldd [$key + 0x80+0x40], %f52
528 ldd [$key + 0x80+0x48], %f54
531 ldd [$key + 0x80+0x50], %f56
532 ldd [$key + 0x80+0x58], %f58
534 ldd [$key + 0x80+0x60], %f60
535 ldd [$key + 0x80+0x68], %f62
536 des_round %f36, %f38, %f0, %f0
537 ldd [$key + 0x80+0x70], %f36
538 ldd [$key + 0x80+0x78], %f38
539 des_round %f40, %f42, %f0, %f0
540 des_round %f44, %f46, %f0, %f0
541 des_round %f48, %f50, %f0, %f0
542 ldd [$key + 0x80-0x08], %f40
543 ldd [$key + 0x80-0x10], %f42
544 des_round %f52, %f54, %f0, %f0
545 ldd [$key + 0x80-0x18], %f44
546 ldd [$key + 0x80-0x20], %f46
547 des_round %f56, %f58, %f0, %f0
548 ldd [$key + 0x80-0x28], %f48
549 ldd [$key + 0x80-0x30], %f50
550 des_round %f60, %f62, %f0, %f0
551 ldd [$key + 0x80-0x38], %f52
552 ldd [$key + 0x80-0x40], %f54
553 des_round %f36, %f38, %f0, %f0
554 ldd [$key + 0x80-0x48], %f56
555 ldd [$key + 0x80-0x50], %f58
558 ldd [$key + 0x80-0x58], %f60
559 ldd [$key + 0x80-0x60], %f62
561 ldd [$key + 0x80-0x68], %f36
562 ldd [$key + 0x80-0x70], %f38
563 des_round %f40, %f42, %f0, %f0
564 ldd [$key + 0x80-0x78], %f40
565 ldd [$key + 0x80-0x80], %f42
566 des_round %f44, %f46, %f0, %f0
567 des_round %f48, %f50, %f0, %f0
568 des_round %f52, %f54, %f0, %f0
569 des_round %f56, %f58, %f0, %f0
570 des_round %f60, %f62, %f0, %f0
571 des_round %f36, %f38, %f0, %f0
572 des_round %f40, %f42, %f0, %f0
575 fxor %f2, %f0, %f0 ! ^= ivec
582 brnz,pt $len, .Ldes_ede3_cbc_dec_loop
585 st %f2, [$ivec + 0] ! write out ivec
590 2: ldxa [$inp]0x82, %g4 ! avoid read-after-write hazard
591 ! and ~3x deterioration
593 faligndata %f0, %f0, %f0 ! handle unaligned output
595 stda %f0, [$out + $omask]0xc0 ! partial store
597 orn %g0, $omask, $omask
598 stda %f0, [$out + $omask]0xc0 ! partial store
600 brnz,pt $len, .Ldes_ede3_cbc_dec_loop+4
601 orn %g0, $omask, $omask
603 st %f2, [$ivec + 0] ! write out ivec
606 .type des_t4_ede3_cbc_decrypt,#function
607 .size des_t4_ede3_cbc_decrypt,.-des_t4_ede3_cbc_decrypt
611 .asciz "DES for SPARC T4, David S. Miller, Andy Polyakov"