3 # ====================================================================
4 # Written by David S. Miller <davem@devemloft.net> and Andy Polyakov
5 # <appro@openssl.org>. The module is licensed under 2-clause BSD
6 # license. March 2013. All rights reserved.
7 # ====================================================================
9 ######################################################################
12 # As with other hardware-assisted ciphers CBC encrypt results [for
13 # aligned data] are virtually identical to critical path lengths:
16 # CBC encrypt 4.14/4.15(*) 11.7/11.7
17 # CBC decrypt 1.77/4.11(**) 6.42/7.47
19 # (*) numbers after slash are for
21 # (**) this is result for largest
22 # block size, unlike all other
23 # cases smaller blocks results
26 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
27 push(@INC,"${dir}","${dir}../../perlasm");
28 require "sparcv9_modes.pl";
32 $code.=<<___ if ($::abibits==64);
33 .register %g2,#scratch
34 .register %g3,#scratch
41 { my ($inp,$out)=("%o0","%o1");
45 .globl des_t4_key_expand
46 .type des_t4_key_expand,#function
49 alignaddr $inp, %g0, $inp
51 ldd [$inp + 0x00], %f0
52 ldd [$inp + 0x08], %f2
53 faligndata %f0, %f2, %f0
54 1: des_kexpand %f0, 0, %f0
55 des_kexpand %f0, 1, %f2
56 std %f0, [$out + 0x00]
57 des_kexpand %f2, 3, %f6
58 std %f2, [$out + 0x08]
59 des_kexpand %f2, 2, %f4
60 des_kexpand %f6, 3, %f10
61 std %f6, [$out + 0x18]
62 des_kexpand %f6, 2, %f8
63 std %f4, [$out + 0x10]
64 des_kexpand %f10, 3, %f14
65 std %f10, [$out + 0x28]
66 des_kexpand %f10, 2, %f12
67 std %f8, [$out + 0x20]
68 des_kexpand %f14, 1, %f16
69 std %f14, [$out + 0x38]
70 des_kexpand %f16, 3, %f20
71 std %f12, [$out + 0x30]
72 des_kexpand %f16, 2, %f18
73 std %f16, [$out + 0x40]
74 des_kexpand %f20, 3, %f24
75 std %f20, [$out + 0x50]
76 des_kexpand %f20, 2, %f22
77 std %f18, [$out + 0x48]
78 des_kexpand %f24, 3, %f28
79 std %f24, [$out + 0x60]
80 des_kexpand %f24, 2, %f26
81 std %f22, [$out + 0x58]
82 des_kexpand %f28, 1, %f30
83 std %f28, [$out + 0x70]
84 std %f26, [$out + 0x68]
86 std %f30, [$out + 0x78]
87 .size des_t4_key_expand,.-des_t4_key_expand
90 { my ($inp,$out,$len,$key,$ivec) = map("%o$_",(0..4));
91 my ($ileft,$iright,$omask) = map("%g$_",(1..3));
94 .globl des_t4_cbc_encrypt
97 ld [$ivec + 0], %f0 ! load ivec
102 sll $ileft, 3, $ileft
105 prefetch [$inp + 63], 20
106 sub %g0, $ileft, $iright
108 alignaddrl $out, %g0, $out
109 srl $omask, %g4, $omask
114 ldd [$key + 0x00], %f4 ! load key schedule
115 ldd [$key + 0x08], %f6
116 ldd [$key + 0x10], %f8
117 ldd [$key + 0x18], %f10
118 ldd [$key + 0x20], %f12
119 ldd [$key + 0x28], %f14
120 ldd [$key + 0x30], %f16
121 ldd [$key + 0x38], %f18
122 ldd [$key + 0x40], %f20
123 ldd [$key + 0x48], %f22
124 ldd [$key + 0x50], %f24
125 ldd [$key + 0x58], %f26
126 ldd [$key + 0x60], %f28
127 ldd [$key + 0x68], %f30
128 ldd [$key + 0x70], %f32
129 ldd [$key + 0x78], %f34
137 sllx %g4, $ileft, %g4
138 srlx %g5, $iright, %g5
142 prefetch [$inp + 8+63], 20
144 fxor %f2, %f0, %f0 ! ^= ivec
145 prefetch [$out + 63], 22
148 des_round %f4, %f6, %f0, %f0
149 des_round %f8, %f10, %f0, %f0
150 des_round %f12, %f14, %f0, %f0
151 des_round %f16, %f18, %f0, %f0
152 des_round %f20, %f22, %f0, %f0
153 des_round %f24, %f26, %f0, %f0
154 des_round %f28, %f30, %f0, %f0
155 des_round %f32, %f34, %f0, %f0
162 brnz,pt $len, .Ldes_cbc_enc_loop
165 st %f0, [$ivec + 0] ! write out ivec
170 2: ldxa [$inp]0x82, %g4 ! avoid read-after-write hazard
171 ! and ~4x deterioration
173 faligndata %f0, %f0, %f2 ! handle unaligned output
175 stda %f8, [$out + $omask]0xc0 ! partial store
177 orn %g0, $omask, $omask
178 stda %f8, [$out + $omask]0xc0 ! partial store
180 brnz,pt $len, .Ldes_cbc_enc_loop+4
181 orn %g0, $omask, $omask
183 st %f0, [$ivec + 0] ! write out ivec
186 .type des_t4_cbc_encrypt,#function
187 .size des_t4_cbc_encrypt,.-des_t4_cbc_encrypt
189 .globl des_t4_cbc_decrypt
192 ld [$ivec + 0], %f2 ! load ivec
197 sll $ileft, 3, $ileft
200 prefetch [$inp + 63], 20
201 sub %g0, $ileft, $iright
203 alignaddrl $out, %g0, $out
204 srl $omask, %g4, $omask
209 ldd [$key + 0x78], %f4 ! load key schedule
210 ldd [$key + 0x70], %f6
211 ldd [$key + 0x68], %f8
212 ldd [$key + 0x60], %f10
213 ldd [$key + 0x58], %f12
214 ldd [$key + 0x50], %f14
215 ldd [$key + 0x48], %f16
216 ldd [$key + 0x40], %f18
217 ldd [$key + 0x38], %f20
218 ldd [$key + 0x30], %f22
219 ldd [$key + 0x28], %f24
220 ldd [$key + 0x20], %f26
221 ldd [$key + 0x18], %f28
222 ldd [$key + 0x10], %f30
223 ldd [$key + 0x08], %f32
224 ldd [$key + 0x00], %f34
232 sllx %g4, $ileft, %g4
233 srlx %g5, $iright, %g5
237 prefetch [$inp + 8+63], 20
239 prefetch [$out + 63], 22
242 des_round %f4, %f6, %f0, %f0
243 des_round %f8, %f10, %f0, %f0
244 des_round %f12, %f14, %f0, %f0
245 des_round %f16, %f18, %f0, %f0
246 des_round %f20, %f22, %f0, %f0
247 des_round %f24, %f26, %f0, %f0
248 des_round %f28, %f30, %f0, %f0
249 des_round %f32, %f34, %f0, %f0
252 fxor %f2, %f0, %f0 ! ^= ivec
259 brnz,pt $len, .Ldes_cbc_dec_loop
262 st %f2, [$ivec + 0] ! write out ivec
267 2: ldxa [$inp]0x82, %g4 ! avoid read-after-write hazard
268 ! and ~4x deterioration
270 faligndata %f0, %f0, %f0 ! handle unaligned output
272 stda %f0, [$out + $omask]0xc0 ! partial store
274 orn %g0, $omask, $omask
275 stda %f0, [$out + $omask]0xc0 ! partial store
277 brnz,pt $len, .Ldes_cbc_dec_loop+4
278 orn %g0, $omask, $omask
280 st %f2, [$ivec + 0] ! write out ivec
283 .type des_t4_cbc_decrypt,#function
284 .size des_t4_cbc_decrypt,.-des_t4_cbc_decrypt
286 .globl des_t4_ede3_cbc_encrypt
288 des_t4_ede3_cbc_encrypt:
289 ld [$ivec + 0], %f0 ! load ivec
294 sll $ileft, 3, $ileft
297 prefetch [$inp + 63], 20
298 sub %g0, $ileft, $iright
300 alignaddrl $out, %g0, $out
301 srl $omask, %g4, $omask
306 ldd [$key + 0x00], %f4 ! load key schedule
307 ldd [$key + 0x08], %f6
308 ldd [$key + 0x10], %f8
309 ldd [$key + 0x18], %f10
310 ldd [$key + 0x20], %f12
311 ldd [$key + 0x28], %f14
312 ldd [$key + 0x30], %f16
313 ldd [$key + 0x38], %f18
314 ldd [$key + 0x40], %f20
315 ldd [$key + 0x48], %f22
316 ldd [$key + 0x50], %f24
317 ldd [$key + 0x58], %f26
318 ldd [$key + 0x60], %f28
319 ldd [$key + 0x68], %f30
320 ldd [$key + 0x70], %f32
321 ldd [$key + 0x78], %f34
323 .Ldes_ede3_cbc_enc_loop:
329 sllx %g4, $ileft, %g4
330 srlx %g5, $iright, %g5
334 prefetch [$inp + 8+63], 20
336 fxor %f2, %f0, %f0 ! ^= ivec
337 prefetch [$out + 63], 22
340 des_round %f4, %f6, %f0, %f0
341 des_round %f8, %f10, %f0, %f0
342 des_round %f12, %f14, %f0, %f0
343 des_round %f16, %f18, %f0, %f0
344 ldd [$key + 0x100-0x08], %f36
345 ldd [$key + 0x100-0x10], %f38
346 des_round %f20, %f22, %f0, %f0
347 ldd [$key + 0x100-0x18], %f40
348 ldd [$key + 0x100-0x20], %f42
349 des_round %f24, %f26, %f0, %f0
350 ldd [$key + 0x100-0x28], %f44
351 ldd [$key + 0x100-0x30], %f46
352 des_round %f28, %f30, %f0, %f0
353 ldd [$key + 0x100-0x38], %f48
354 ldd [$key + 0x100-0x40], %f50
355 des_round %f32, %f34, %f0, %f0
356 ldd [$key + 0x100-0x48], %f52
357 ldd [$key + 0x100-0x50], %f54
360 ldd [$key + 0x100-0x58], %f56
361 ldd [$key + 0x100-0x60], %f58
363 ldd [$key + 0x100-0x68], %f60
364 ldd [$key + 0x100-0x70], %f62
365 des_round %f36, %f38, %f0, %f0
366 ldd [$key + 0x100-0x78], %f36
367 ldd [$key + 0x100-0x80], %f38
368 des_round %f40, %f42, %f0, %f0
369 des_round %f44, %f46, %f0, %f0
370 des_round %f48, %f50, %f0, %f0
371 ldd [$key + 0x100+0x00], %f40
372 ldd [$key + 0x100+0x08], %f42
373 des_round %f52, %f54, %f0, %f0
374 ldd [$key + 0x100+0x10], %f44
375 ldd [$key + 0x100+0x18], %f46
376 des_round %f56, %f58, %f0, %f0
377 ldd [$key + 0x100+0x20], %f48
378 ldd [$key + 0x100+0x28], %f50
379 des_round %f60, %f62, %f0, %f0
380 ldd [$key + 0x100+0x30], %f52
381 ldd [$key + 0x100+0x38], %f54
382 des_round %f36, %f38, %f0, %f0
383 ldd [$key + 0x100+0x40], %f56
384 ldd [$key + 0x100+0x48], %f58
387 ldd [$key + 0x100+0x50], %f60
388 ldd [$key + 0x100+0x58], %f62
390 ldd [$key + 0x100+0x60], %f36
391 ldd [$key + 0x100+0x68], %f38
392 des_round %f40, %f42, %f0, %f0
393 ldd [$key + 0x100+0x70], %f40
394 ldd [$key + 0x100+0x78], %f42
395 des_round %f44, %f46, %f0, %f0
396 des_round %f48, %f50, %f0, %f0
397 des_round %f52, %f54, %f0, %f0
398 des_round %f56, %f58, %f0, %f0
399 des_round %f60, %f62, %f0, %f0
400 des_round %f36, %f38, %f0, %f0
401 des_round %f40, %f42, %f0, %f0
408 brnz,pt $len, .Ldes_ede3_cbc_enc_loop
411 st %f0, [$ivec + 0] ! write out ivec
416 2: ldxa [$inp]0x82, %g4 ! avoid read-after-write hazard
417 ! and ~2x deterioration
419 faligndata %f0, %f0, %f2 ! handle unaligned output
421 stda %f2, [$out + $omask]0xc0 ! partial store
423 orn %g0, $omask, $omask
424 stda %f2, [$out + $omask]0xc0 ! partial store
426 brnz,pt $len, .Ldes_ede3_cbc_enc_loop+4
427 orn %g0, $omask, $omask
429 st %f0, [$ivec + 0] ! write out ivec
432 .type des_t4_ede3_cbc_encrypt,#function
433 .size des_t4_ede3_cbc_encrypt,.-des_t4_ede3_cbc_encrypt
435 .globl des_t4_ede3_cbc_decrypt
437 des_t4_ede3_cbc_decrypt:
438 ld [$ivec + 0], %f2 ! load ivec
443 sll $ileft, 3, $ileft
446 prefetch [$inp + 63], 20
447 sub %g0, $ileft, $iright
449 alignaddrl $out, %g0, $out
450 srl $omask, %g4, $omask
455 ldd [$key + 0x100+0x78], %f4 ! load key schedule
456 ldd [$key + 0x100+0x70], %f6
457 ldd [$key + 0x100+0x68], %f8
458 ldd [$key + 0x100+0x60], %f10
459 ldd [$key + 0x100+0x58], %f12
460 ldd [$key + 0x100+0x50], %f14
461 ldd [$key + 0x100+0x48], %f16
462 ldd [$key + 0x100+0x40], %f18
463 ldd [$key + 0x100+0x38], %f20
464 ldd [$key + 0x100+0x30], %f22
465 ldd [$key + 0x100+0x28], %f24
466 ldd [$key + 0x100+0x20], %f26
467 ldd [$key + 0x100+0x18], %f28
468 ldd [$key + 0x100+0x10], %f30
469 ldd [$key + 0x100+0x08], %f32
470 ldd [$key + 0x100+0x00], %f34
472 .Ldes_ede3_cbc_dec_loop:
478 sllx %g4, $ileft, %g4
479 srlx %g5, $iright, %g5
483 prefetch [$inp + 8+63], 20
485 prefetch [$out + 63], 22
488 des_round %f4, %f6, %f0, %f0
489 des_round %f8, %f10, %f0, %f0
490 des_round %f12, %f14, %f0, %f0
491 des_round %f16, %f18, %f0, %f0
492 ldd [$key + 0x80+0x00], %f36
493 ldd [$key + 0x80+0x08], %f38
494 des_round %f20, %f22, %f0, %f0
495 ldd [$key + 0x80+0x10], %f40
496 ldd [$key + 0x80+0x18], %f42
497 des_round %f24, %f26, %f0, %f0
498 ldd [$key + 0x80+0x20], %f44
499 ldd [$key + 0x80+0x28], %f46
500 des_round %f28, %f30, %f0, %f0
501 ldd [$key + 0x80+0x30], %f48
502 ldd [$key + 0x80+0x38], %f50
503 des_round %f32, %f34, %f0, %f0
504 ldd [$key + 0x80+0x40], %f52
505 ldd [$key + 0x80+0x48], %f54
508 ldd [$key + 0x80+0x50], %f56
509 ldd [$key + 0x80+0x58], %f58
511 ldd [$key + 0x80+0x60], %f60
512 ldd [$key + 0x80+0x68], %f62
513 des_round %f36, %f38, %f0, %f0
514 ldd [$key + 0x80+0x70], %f36
515 ldd [$key + 0x80+0x78], %f38
516 des_round %f40, %f42, %f0, %f0
517 des_round %f44, %f46, %f0, %f0
518 des_round %f48, %f50, %f0, %f0
519 ldd [$key + 0x80-0x08], %f40
520 ldd [$key + 0x80-0x10], %f42
521 des_round %f52, %f54, %f0, %f0
522 ldd [$key + 0x80-0x18], %f44
523 ldd [$key + 0x80-0x20], %f46
524 des_round %f56, %f58, %f0, %f0
525 ldd [$key + 0x80-0x28], %f48
526 ldd [$key + 0x80-0x30], %f50
527 des_round %f60, %f62, %f0, %f0
528 ldd [$key + 0x80-0x38], %f52
529 ldd [$key + 0x80-0x40], %f54
530 des_round %f36, %f38, %f0, %f0
531 ldd [$key + 0x80-0x48], %f56
532 ldd [$key + 0x80-0x50], %f58
535 ldd [$key + 0x80-0x58], %f60
536 ldd [$key + 0x80-0x60], %f62
538 ldd [$key + 0x80-0x68], %f36
539 ldd [$key + 0x80-0x70], %f38
540 des_round %f40, %f42, %f0, %f0
541 ldd [$key + 0x80-0x78], %f40
542 ldd [$key + 0x80-0x80], %f42
543 des_round %f44, %f46, %f0, %f0
544 des_round %f48, %f50, %f0, %f0
545 des_round %f52, %f54, %f0, %f0
546 des_round %f56, %f58, %f0, %f0
547 des_round %f60, %f62, %f0, %f0
548 des_round %f36, %f38, %f0, %f0
549 des_round %f40, %f42, %f0, %f0
552 fxor %f2, %f0, %f0 ! ^= ivec
559 brnz,pt $len, .Ldes_ede3_cbc_dec_loop
562 st %f2, [$ivec + 0] ! write out ivec
567 2: ldxa [$inp]0x82, %g4 ! avoid read-after-write hazard
568 ! and ~3x deterioration
570 faligndata %f0, %f0, %f0 ! handle unaligned output
572 stda %f0, [$out + $omask]0xc0 ! partial store
574 orn %g0, $omask, $omask
575 stda %f0, [$out + $omask]0xc0 ! partial store
577 brnz,pt $len, .Ldes_ede3_cbc_dec_loop+4
578 orn %g0, $omask, $omask
580 st %f2, [$ivec + 0] ! write out ivec
583 .type des_t4_ede3_cbc_decrypt,#function
584 .size des_t4_ede3_cbc_decrypt,.-des_t4_ede3_cbc_decrypt
588 .asciz "DES for SPARC T4, David S. Miller, Andy Polyakov"