3 # ====================================================================
4 # Written by David S. Miller <davem@devemloft.net> and Andy Polyakov
5 # <appro@openssl.org>. The module is licensed under 2-clause BSD
6 # license. March 2013. All rights reserved.
7 # ====================================================================
9 ######################################################################
12 # As with other hardware-assisted ciphers CBC encrypt results [for
13 # aligned data] are virtually identical to critical path lengths:
16 # CBC encrypt 4.14/4.15(*) 11.7/11.7
17 # CBC decrypt 1.77/4.11(**) 6.42/7.47
19 # (*) numbers after slash are for
21 # (**) this is result for largest
22 # block size, unlike all other
23 # cases smaller blocks results
26 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
27 push(@INC,"${dir}","${dir}../../perlasm");
28 require "sparcv9_modes.pl";
32 $code.=<<___ if ($::abibits==64);
33 .register %g2,#scratch
34 .register %g3,#scratch
41 { my ($inp,$out)=("%o0","%o1");
45 .globl des_t4_key_expand
46 .type des_t4_key_expand,#function
49 alignaddr $inp, %g0, $inp
51 ldd [$inp + 0x00], %f0
52 ldd [$inp + 0x08], %f2
53 faligndata %f0, %f2, %f0
54 1: des_kexpand %f0, 0, %f0
55 des_kexpand %f0, 1, %f2
56 std %f0, [$out + 0x00]
57 des_kexpand %f2, 3, %f6
58 std %f2, [$out + 0x08]
59 des_kexpand %f2, 2, %f4
60 des_kexpand %f6, 3, %f10
61 std %f6, [$out + 0x18]
62 des_kexpand %f6, 2, %f8
63 std %f4, [$out + 0x10]
64 des_kexpand %f10, 3, %f14
65 std %f10, [$out + 0x28]
66 des_kexpand %f10, 2, %f12
67 std %f8, [$out + 0x20]
68 des_kexpand %f14, 1, %f16
69 std %f14, [$out + 0x38]
70 des_kexpand %f16, 3, %f20
71 std %f12, [$out + 0x30]
72 des_kexpand %f16, 2, %f18
73 std %f16, [$out + 0x40]
74 des_kexpand %f20, 3, %f24
75 std %f20, [$out + 0x50]
76 des_kexpand %f20, 2, %f22
77 std %f18, [$out + 0x48]
78 des_kexpand %f24, 3, %f28
79 std %f24, [$out + 0x60]
80 des_kexpand %f24, 2, %f26
81 std %f22, [$out + 0x58]
82 des_kexpand %f28, 1, %f30
83 std %f28, [$out + 0x70]
84 std %f26, [$out + 0x68]
86 std %f30, [$out + 0x78]
87 .size des_t4_key_expand,.-des_t4_key_expand
90 { my ($inp,$out,$len,$key,$ivec) = map("%o$_",(0..4));
91 my ($ileft,$iright,$omask) = map("%g$_",(1..3));
94 .globl des_t4_cbc_encrypt
97 ld [$ivec + 0], %f0 ! load ivec
102 sll $ileft, 3, $ileft
105 prefetch [$inp + 63], 20
106 sub %g0, $ileft, $iright
108 alignaddrl $out, %g0, $out
109 srl $omask, %g4, $omask
114 ldd [$key + 0x00], %f4 ! load key schedule
115 ldd [$key + 0x08], %f6
116 ldd [$key + 0x10], %f8
117 ldd [$key + 0x18], %f10
118 ldd [$key + 0x20], %f12
119 ldd [$key + 0x28], %f14
120 ldd [$key + 0x30], %f16
121 ldd [$key + 0x38], %f18
122 ldd [$key + 0x40], %f20
123 ldd [$key + 0x48], %f22
124 ldd [$key + 0x50], %f24
125 ldd [$key + 0x58], %f26
126 ldd [$key + 0x60], %f28
127 ldd [$key + 0x68], %f30
128 ldd [$key + 0x70], %f32
129 ldd [$key + 0x78], %f34
137 sllx %g4, $ileft, %g4
138 srlx %g5, $iright, %g5
142 prefetch [$inp + 8+63], 20
144 fxor %f2, %f0, %f0 ! ^= ivec
145 prefetch [$out + 63], 22
148 des_round %f4, %f6, %f0, %f0
149 des_round %f8, %f10, %f0, %f0
150 des_round %f12, %f14, %f0, %f0
151 des_round %f16, %f18, %f0, %f0
152 des_round %f20, %f22, %f0, %f0
153 des_round %f24, %f26, %f0, %f0
154 des_round %f28, %f30, %f0, %f0
155 des_round %f32, %f34, %f0, %f0
162 brnz,pt $len, .Ldes_cbc_enc_loop
165 st %f0, [$ivec + 0] ! write out ivec
170 2: ldxa [$inp]0x82, %g4 ! avoid read-after-write hazard
171 ! and ~4x deterioration
173 faligndata %f0, %f0, %f2 ! handle unaligned output
175 stda %f8, [$out + $omask]0xc0 ! partial store
177 orn %g0, $omask, $omask
178 stda %f8, [$out + $omask]0xc0 ! partial store
180 brnz,pt $len, .Ldes_cbc_enc_loop+4
181 orn %g0, $omask, $omask
183 st %f0, [$ivec + 0] ! write out ivec
186 .type des_t4_cbc_encrypt,#function
187 .size des_t4_cbc_encrypt,.-des_t4_cbc_encrypt
189 .globl des_t4_cbc_decrypt
192 ld [$ivec + 0], %f2 ! load ivec
197 sll $ileft, 3, $ileft
200 prefetch [$inp + 63], 20
201 sub %g0, $ileft, $iright
203 alignaddrl $out, %g0, $out
204 srl $omask, %g4, $omask
209 ldd [$key + 0x78], %f4 ! load key schedule
210 ldd [$key + 0x70], %f6
211 ldd [$key + 0x68], %f8
212 ldd [$key + 0x60], %f10
213 ldd [$key + 0x58], %f12
214 ldd [$key + 0x50], %f14
215 ldd [$key + 0x48], %f16
216 ldd [$key + 0x40], %f18
217 ldd [$key + 0x38], %f20
218 ldd [$key + 0x30], %f22
219 ldd [$key + 0x28], %f24
220 ldd [$key + 0x20], %f26
221 ldd [$key + 0x18], %f28
222 ldd [$key + 0x10], %f30
223 ldd [$key + 0x08], %f32
224 ldd [$key + 0x00], %f34
232 sllx %g4, $ileft, %g4
233 srlx %g5, $iright, %g5
237 prefetch [$inp + 8+63], 20
239 prefetch [$out + 63], 22
242 des_round %f4, %f6, %f0, %f0
243 des_round %f8, %f10, %f0, %f0
244 des_round %f12, %f14, %f0, %f0
245 des_round %f16, %f18, %f0, %f0
246 des_round %f20, %f22, %f0, %f0
247 des_round %f24, %f26, %f0, %f0
248 des_round %f28, %f30, %f0, %f0
249 des_round %f32, %f34, %f0, %f0
252 fxor %f2, %f0, %f0 ! ^= ivec
259 brnz,pt $len, .Ldes_cbc_dec_loop
262 st %f2, [$ivec + 0] ! write out ivec
267 2: ldxa [$inp]0x82, %g4 ! avoid read-after-write hazard
268 ! and ~4x deterioration
270 faligndata %f0, %f0, %f0 ! handle unaligned output
272 stda %f0, [$out + $omask]0xc0 ! partial store
274 orn %g0, $omask, $omask
275 stda %f0, [$out + $omask]0xc0 ! partial store
277 brnz,pt $len, .Ldes_cbc_dec_loop+4
278 orn %g0, $omask, $omask
280 st %f2, [$ivec + 0] ! write out ivec
283 .type des_t4_cbc_decrypt,#function
284 .size des_t4_cbc_decrypt,.-des_t4_cbc_decrypt
287 # One might wonder why does one have back-to-back des_iip/des_ip
288 # pairs between EDE passes. Indeed, aren't they inverse of each other?
289 # They almost are. Outcome of the pair is 32-bit words being swapped
290 # in target register. Consider pair of des_iip/des_ip as a way to
291 # perform the due swap, it's actually fastest way in this case.
294 .globl des_t4_ede3_cbc_encrypt
296 des_t4_ede3_cbc_encrypt:
297 ld [$ivec + 0], %f0 ! load ivec
302 sll $ileft, 3, $ileft
305 prefetch [$inp + 63], 20
306 sub %g0, $ileft, $iright
308 alignaddrl $out, %g0, $out
309 srl $omask, %g4, $omask
314 ldd [$key + 0x00], %f4 ! load key schedule
315 ldd [$key + 0x08], %f6
316 ldd [$key + 0x10], %f8
317 ldd [$key + 0x18], %f10
318 ldd [$key + 0x20], %f12
319 ldd [$key + 0x28], %f14
320 ldd [$key + 0x30], %f16
321 ldd [$key + 0x38], %f18
322 ldd [$key + 0x40], %f20
323 ldd [$key + 0x48], %f22
324 ldd [$key + 0x50], %f24
325 ldd [$key + 0x58], %f26
326 ldd [$key + 0x60], %f28
327 ldd [$key + 0x68], %f30
328 ldd [$key + 0x70], %f32
329 ldd [$key + 0x78], %f34
331 .Ldes_ede3_cbc_enc_loop:
337 sllx %g4, $ileft, %g4
338 srlx %g5, $iright, %g5
342 prefetch [$inp + 8+63], 20
344 fxor %f2, %f0, %f0 ! ^= ivec
345 prefetch [$out + 63], 22
348 des_round %f4, %f6, %f0, %f0
349 des_round %f8, %f10, %f0, %f0
350 des_round %f12, %f14, %f0, %f0
351 des_round %f16, %f18, %f0, %f0
352 ldd [$key + 0x100-0x08], %f36
353 ldd [$key + 0x100-0x10], %f38
354 des_round %f20, %f22, %f0, %f0
355 ldd [$key + 0x100-0x18], %f40
356 ldd [$key + 0x100-0x20], %f42
357 des_round %f24, %f26, %f0, %f0
358 ldd [$key + 0x100-0x28], %f44
359 ldd [$key + 0x100-0x30], %f46
360 des_round %f28, %f30, %f0, %f0
361 ldd [$key + 0x100-0x38], %f48
362 ldd [$key + 0x100-0x40], %f50
363 des_round %f32, %f34, %f0, %f0
364 ldd [$key + 0x100-0x48], %f52
365 ldd [$key + 0x100-0x50], %f54
368 ldd [$key + 0x100-0x58], %f56
369 ldd [$key + 0x100-0x60], %f58
371 ldd [$key + 0x100-0x68], %f60
372 ldd [$key + 0x100-0x70], %f62
373 des_round %f36, %f38, %f0, %f0
374 ldd [$key + 0x100-0x78], %f36
375 ldd [$key + 0x100-0x80], %f38
376 des_round %f40, %f42, %f0, %f0
377 des_round %f44, %f46, %f0, %f0
378 des_round %f48, %f50, %f0, %f0
379 ldd [$key + 0x100+0x00], %f40
380 ldd [$key + 0x100+0x08], %f42
381 des_round %f52, %f54, %f0, %f0
382 ldd [$key + 0x100+0x10], %f44
383 ldd [$key + 0x100+0x18], %f46
384 des_round %f56, %f58, %f0, %f0
385 ldd [$key + 0x100+0x20], %f48
386 ldd [$key + 0x100+0x28], %f50
387 des_round %f60, %f62, %f0, %f0
388 ldd [$key + 0x100+0x30], %f52
389 ldd [$key + 0x100+0x38], %f54
390 des_round %f36, %f38, %f0, %f0
391 ldd [$key + 0x100+0x40], %f56
392 ldd [$key + 0x100+0x48], %f58
395 ldd [$key + 0x100+0x50], %f60
396 ldd [$key + 0x100+0x58], %f62
398 ldd [$key + 0x100+0x60], %f36
399 ldd [$key + 0x100+0x68], %f38
400 des_round %f40, %f42, %f0, %f0
401 ldd [$key + 0x100+0x70], %f40
402 ldd [$key + 0x100+0x78], %f42
403 des_round %f44, %f46, %f0, %f0
404 des_round %f48, %f50, %f0, %f0
405 des_round %f52, %f54, %f0, %f0
406 des_round %f56, %f58, %f0, %f0
407 des_round %f60, %f62, %f0, %f0
408 des_round %f36, %f38, %f0, %f0
409 des_round %f40, %f42, %f0, %f0
416 brnz,pt $len, .Ldes_ede3_cbc_enc_loop
419 st %f0, [$ivec + 0] ! write out ivec
424 2: ldxa [$inp]0x82, %g4 ! avoid read-after-write hazard
425 ! and ~2x deterioration
427 faligndata %f0, %f0, %f2 ! handle unaligned output
429 stda %f2, [$out + $omask]0xc0 ! partial store
431 orn %g0, $omask, $omask
432 stda %f2, [$out + $omask]0xc0 ! partial store
434 brnz,pt $len, .Ldes_ede3_cbc_enc_loop+4
435 orn %g0, $omask, $omask
437 st %f0, [$ivec + 0] ! write out ivec
440 .type des_t4_ede3_cbc_encrypt,#function
441 .size des_t4_ede3_cbc_encrypt,.-des_t4_ede3_cbc_encrypt
443 .globl des_t4_ede3_cbc_decrypt
445 des_t4_ede3_cbc_decrypt:
446 ld [$ivec + 0], %f2 ! load ivec
451 sll $ileft, 3, $ileft
454 prefetch [$inp + 63], 20
455 sub %g0, $ileft, $iright
457 alignaddrl $out, %g0, $out
458 srl $omask, %g4, $omask
463 ldd [$key + 0x100+0x78], %f4 ! load key schedule
464 ldd [$key + 0x100+0x70], %f6
465 ldd [$key + 0x100+0x68], %f8
466 ldd [$key + 0x100+0x60], %f10
467 ldd [$key + 0x100+0x58], %f12
468 ldd [$key + 0x100+0x50], %f14
469 ldd [$key + 0x100+0x48], %f16
470 ldd [$key + 0x100+0x40], %f18
471 ldd [$key + 0x100+0x38], %f20
472 ldd [$key + 0x100+0x30], %f22
473 ldd [$key + 0x100+0x28], %f24
474 ldd [$key + 0x100+0x20], %f26
475 ldd [$key + 0x100+0x18], %f28
476 ldd [$key + 0x100+0x10], %f30
477 ldd [$key + 0x100+0x08], %f32
478 ldd [$key + 0x100+0x00], %f34
480 .Ldes_ede3_cbc_dec_loop:
486 sllx %g4, $ileft, %g4
487 srlx %g5, $iright, %g5
491 prefetch [$inp + 8+63], 20
493 prefetch [$out + 63], 22
496 des_round %f4, %f6, %f0, %f0
497 des_round %f8, %f10, %f0, %f0
498 des_round %f12, %f14, %f0, %f0
499 des_round %f16, %f18, %f0, %f0
500 ldd [$key + 0x80+0x00], %f36
501 ldd [$key + 0x80+0x08], %f38
502 des_round %f20, %f22, %f0, %f0
503 ldd [$key + 0x80+0x10], %f40
504 ldd [$key + 0x80+0x18], %f42
505 des_round %f24, %f26, %f0, %f0
506 ldd [$key + 0x80+0x20], %f44
507 ldd [$key + 0x80+0x28], %f46
508 des_round %f28, %f30, %f0, %f0
509 ldd [$key + 0x80+0x30], %f48
510 ldd [$key + 0x80+0x38], %f50
511 des_round %f32, %f34, %f0, %f0
512 ldd [$key + 0x80+0x40], %f52
513 ldd [$key + 0x80+0x48], %f54
516 ldd [$key + 0x80+0x50], %f56
517 ldd [$key + 0x80+0x58], %f58
519 ldd [$key + 0x80+0x60], %f60
520 ldd [$key + 0x80+0x68], %f62
521 des_round %f36, %f38, %f0, %f0
522 ldd [$key + 0x80+0x70], %f36
523 ldd [$key + 0x80+0x78], %f38
524 des_round %f40, %f42, %f0, %f0
525 des_round %f44, %f46, %f0, %f0
526 des_round %f48, %f50, %f0, %f0
527 ldd [$key + 0x80-0x08], %f40
528 ldd [$key + 0x80-0x10], %f42
529 des_round %f52, %f54, %f0, %f0
530 ldd [$key + 0x80-0x18], %f44
531 ldd [$key + 0x80-0x20], %f46
532 des_round %f56, %f58, %f0, %f0
533 ldd [$key + 0x80-0x28], %f48
534 ldd [$key + 0x80-0x30], %f50
535 des_round %f60, %f62, %f0, %f0
536 ldd [$key + 0x80-0x38], %f52
537 ldd [$key + 0x80-0x40], %f54
538 des_round %f36, %f38, %f0, %f0
539 ldd [$key + 0x80-0x48], %f56
540 ldd [$key + 0x80-0x50], %f58
543 ldd [$key + 0x80-0x58], %f60
544 ldd [$key + 0x80-0x60], %f62
546 ldd [$key + 0x80-0x68], %f36
547 ldd [$key + 0x80-0x70], %f38
548 des_round %f40, %f42, %f0, %f0
549 ldd [$key + 0x80-0x78], %f40
550 ldd [$key + 0x80-0x80], %f42
551 des_round %f44, %f46, %f0, %f0
552 des_round %f48, %f50, %f0, %f0
553 des_round %f52, %f54, %f0, %f0
554 des_round %f56, %f58, %f0, %f0
555 des_round %f60, %f62, %f0, %f0
556 des_round %f36, %f38, %f0, %f0
557 des_round %f40, %f42, %f0, %f0
560 fxor %f2, %f0, %f0 ! ^= ivec
567 brnz,pt $len, .Ldes_ede3_cbc_dec_loop
570 st %f2, [$ivec + 0] ! write out ivec
575 2: ldxa [$inp]0x82, %g4 ! avoid read-after-write hazard
576 ! and ~3x deterioration
578 faligndata %f0, %f0, %f0 ! handle unaligned output
580 stda %f0, [$out + $omask]0xc0 ! partial store
582 orn %g0, $omask, $omask
583 stda %f0, [$out + $omask]0xc0 ! partial store
585 brnz,pt $len, .Ldes_ede3_cbc_dec_loop+4
586 orn %g0, $omask, $omask
588 st %f2, [$ivec + 0] ! write out ivec
591 .type des_t4_ede3_cbc_decrypt,#function
592 .size des_t4_ede3_cbc_decrypt,.-des_t4_ede3_cbc_decrypt
596 .asciz "DES for SPARC T4, David S. Miller, Andy Polyakov"