2 # Copyright 2013-2016 The OpenSSL Project Authors. All Rights Reserved.
4 # Licensed under the Apache License 2.0 (the "License"). You may not use
5 # this file except in compliance with the License. You can obtain a copy
6 # in the file LICENSE in the source distribution or at
7 # https://www.openssl.org/source/license.html
10 # ====================================================================
11 # Written by David S. Miller and Andy Polyakov.
12 # The module is licensed under 2-clause BSD
13 # license. March 2013. All rights reserved.
14 # ====================================================================
16 ######################################################################
19 # As with other hardware-assisted ciphers CBC encrypt results [for
20 # aligned data] are virtually identical to critical path lengths:
23 # CBC encrypt 4.14/4.15(*) 11.7/11.7
24 # CBC decrypt 1.77/4.11(**) 6.42/7.47
26 # (*) numbers after slash are for
28 # (**) this is result for largest
29 # block size, unlike all other
30 # cases smaller blocks results
33 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
34 push(@INC,"${dir}","${dir}../../perlasm");
35 require "sparcv9_modes.pl";
37 $output=pop and open STDOUT,">$output";
40 #include "sparc_arch.h"
43 .register %g2,#scratch
44 .register %g3,#scratch
50 { my ($inp,$out)=("%o0","%o1");
54 .globl des_t4_key_expand
55 .type des_t4_key_expand,#function
58 alignaddr $inp, %g0, $inp
60 ldd [$inp + 0x00], %f0
61 ldd [$inp + 0x08], %f2
62 faligndata %f0, %f2, %f0
63 1: des_kexpand %f0, 0, %f0
64 des_kexpand %f0, 1, %f2
65 std %f0, [$out + 0x00]
66 des_kexpand %f2, 3, %f6
67 std %f2, [$out + 0x08]
68 des_kexpand %f2, 2, %f4
69 des_kexpand %f6, 3, %f10
70 std %f6, [$out + 0x18]
71 des_kexpand %f6, 2, %f8
72 std %f4, [$out + 0x10]
73 des_kexpand %f10, 3, %f14
74 std %f10, [$out + 0x28]
75 des_kexpand %f10, 2, %f12
76 std %f8, [$out + 0x20]
77 des_kexpand %f14, 1, %f16
78 std %f14, [$out + 0x38]
79 des_kexpand %f16, 3, %f20
80 std %f12, [$out + 0x30]
81 des_kexpand %f16, 2, %f18
82 std %f16, [$out + 0x40]
83 des_kexpand %f20, 3, %f24
84 std %f20, [$out + 0x50]
85 des_kexpand %f20, 2, %f22
86 std %f18, [$out + 0x48]
87 des_kexpand %f24, 3, %f28
88 std %f24, [$out + 0x60]
89 des_kexpand %f24, 2, %f26
90 std %f22, [$out + 0x58]
91 des_kexpand %f28, 1, %f30
92 std %f28, [$out + 0x70]
93 std %f26, [$out + 0x68]
95 std %f30, [$out + 0x78]
96 .size des_t4_key_expand,.-des_t4_key_expand
99 { my ($inp,$out,$len,$key,$ivec) = map("%o$_",(0..4));
100 my ($ileft,$iright,$omask) = map("%g$_",(1..3));
103 .globl des_t4_cbc_encrypt
107 be,pn $::size_t_cc, .Lcbc_abort
108 srln $len, 0, $len ! needed on v8+, "nop" on v9
109 ld [$ivec + 0], %f0 ! load ivec
114 sll $ileft, 3, $ileft
117 prefetch [$inp + 63], 20
118 sub %g0, $ileft, $iright
120 alignaddrl $out, %g0, $out
121 srl $omask, %g4, $omask
126 ldd [$key + 0x00], %f4 ! load key schedule
127 ldd [$key + 0x08], %f6
128 ldd [$key + 0x10], %f8
129 ldd [$key + 0x18], %f10
130 ldd [$key + 0x20], %f12
131 ldd [$key + 0x28], %f14
132 ldd [$key + 0x30], %f16
133 ldd [$key + 0x38], %f18
134 ldd [$key + 0x40], %f20
135 ldd [$key + 0x48], %f22
136 ldd [$key + 0x50], %f24
137 ldd [$key + 0x58], %f26
138 ldd [$key + 0x60], %f28
139 ldd [$key + 0x68], %f30
140 ldd [$key + 0x70], %f32
141 ldd [$key + 0x78], %f34
149 sllx %g4, $ileft, %g4
150 srlx %g5, $iright, %g5
154 prefetch [$inp + 8+63], 20
156 fxor %f2, %f0, %f0 ! ^= ivec
157 prefetch [$out + 63], 22
160 des_round %f4, %f6, %f0, %f0
161 des_round %f8, %f10, %f0, %f0
162 des_round %f12, %f14, %f0, %f0
163 des_round %f16, %f18, %f0, %f0
164 des_round %f20, %f22, %f0, %f0
165 des_round %f24, %f26, %f0, %f0
166 des_round %f28, %f30, %f0, %f0
167 des_round %f32, %f34, %f0, %f0
174 brnz,pt $len, .Ldes_cbc_enc_loop
177 st %f0, [$ivec + 0] ! write out ivec
185 2: ldxa [$inp]0x82, %g4 ! avoid read-after-write hazard
186 ! and ~4x deterioration
188 faligndata %f0, %f0, %f2 ! handle unaligned output
190 stda %f2, [$out + $omask]0xc0 ! partial store
192 orn %g0, $omask, $omask
193 stda %f2, [$out + $omask]0xc0 ! partial store
195 brnz,pt $len, .Ldes_cbc_enc_loop+4
196 orn %g0, $omask, $omask
198 st %f0, [$ivec + 0] ! write out ivec
201 .type des_t4_cbc_encrypt,#function
202 .size des_t4_cbc_encrypt,.-des_t4_cbc_encrypt
204 .globl des_t4_cbc_decrypt
208 be,pn $::size_t_cc, .Lcbc_abort
209 srln $len, 0, $len ! needed on v8+, "nop" on v9
210 ld [$ivec + 0], %f2 ! load ivec
215 sll $ileft, 3, $ileft
218 prefetch [$inp + 63], 20
219 sub %g0, $ileft, $iright
221 alignaddrl $out, %g0, $out
222 srl $omask, %g4, $omask
227 ldd [$key + 0x78], %f4 ! load key schedule
228 ldd [$key + 0x70], %f6
229 ldd [$key + 0x68], %f8
230 ldd [$key + 0x60], %f10
231 ldd [$key + 0x58], %f12
232 ldd [$key + 0x50], %f14
233 ldd [$key + 0x48], %f16
234 ldd [$key + 0x40], %f18
235 ldd [$key + 0x38], %f20
236 ldd [$key + 0x30], %f22
237 ldd [$key + 0x28], %f24
238 ldd [$key + 0x20], %f26
239 ldd [$key + 0x18], %f28
240 ldd [$key + 0x10], %f30
241 ldd [$key + 0x08], %f32
242 ldd [$key + 0x00], %f34
250 sllx %g4, $ileft, %g4
251 srlx %g5, $iright, %g5
255 prefetch [$inp + 8+63], 20
257 prefetch [$out + 63], 22
260 des_round %f4, %f6, %f0, %f0
261 des_round %f8, %f10, %f0, %f0
262 des_round %f12, %f14, %f0, %f0
263 des_round %f16, %f18, %f0, %f0
264 des_round %f20, %f22, %f0, %f0
265 des_round %f24, %f26, %f0, %f0
266 des_round %f28, %f30, %f0, %f0
267 des_round %f32, %f34, %f0, %f0
270 fxor %f2, %f0, %f0 ! ^= ivec
277 brnz,pt $len, .Ldes_cbc_dec_loop
280 st %f2, [$ivec + 0] ! write out ivec
285 2: ldxa [$inp]0x82, %g4 ! avoid read-after-write hazard
286 ! and ~4x deterioration
288 faligndata %f0, %f0, %f0 ! handle unaligned output
290 stda %f0, [$out + $omask]0xc0 ! partial store
292 orn %g0, $omask, $omask
293 stda %f0, [$out + $omask]0xc0 ! partial store
295 brnz,pt $len, .Ldes_cbc_dec_loop+4
296 orn %g0, $omask, $omask
298 st %f2, [$ivec + 0] ! write out ivec
301 .type des_t4_cbc_decrypt,#function
302 .size des_t4_cbc_decrypt,.-des_t4_cbc_decrypt
305 # One might wonder why does one have back-to-back des_iip/des_ip
306 # pairs between EDE passes. Indeed, aren't they inverse of each other?
307 # They almost are. Outcome of the pair is 32-bit words being swapped
308 # in target register. Consider pair of des_iip/des_ip as a way to
309 # perform the due swap, it's actually fastest way in this case.
312 .globl des_t4_ede3_cbc_encrypt
314 des_t4_ede3_cbc_encrypt:
316 be,pn $::size_t_cc, .Lcbc_abort
317 srln $len, 0, $len ! needed on v8+, "nop" on v9
318 ld [$ivec + 0], %f0 ! load ivec
323 sll $ileft, 3, $ileft
326 prefetch [$inp + 63], 20
327 sub %g0, $ileft, $iright
329 alignaddrl $out, %g0, $out
330 srl $omask, %g4, $omask
335 ldd [$key + 0x00], %f4 ! load key schedule
336 ldd [$key + 0x08], %f6
337 ldd [$key + 0x10], %f8
338 ldd [$key + 0x18], %f10
339 ldd [$key + 0x20], %f12
340 ldd [$key + 0x28], %f14
341 ldd [$key + 0x30], %f16
342 ldd [$key + 0x38], %f18
343 ldd [$key + 0x40], %f20
344 ldd [$key + 0x48], %f22
345 ldd [$key + 0x50], %f24
346 ldd [$key + 0x58], %f26
347 ldd [$key + 0x60], %f28
348 ldd [$key + 0x68], %f30
349 ldd [$key + 0x70], %f32
350 ldd [$key + 0x78], %f34
352 .Ldes_ede3_cbc_enc_loop:
358 sllx %g4, $ileft, %g4
359 srlx %g5, $iright, %g5
363 prefetch [$inp + 8+63], 20
365 fxor %f2, %f0, %f0 ! ^= ivec
366 prefetch [$out + 63], 22
369 des_round %f4, %f6, %f0, %f0
370 des_round %f8, %f10, %f0, %f0
371 des_round %f12, %f14, %f0, %f0
372 des_round %f16, %f18, %f0, %f0
373 ldd [$key + 0x100-0x08], %f36
374 ldd [$key + 0x100-0x10], %f38
375 des_round %f20, %f22, %f0, %f0
376 ldd [$key + 0x100-0x18], %f40
377 ldd [$key + 0x100-0x20], %f42
378 des_round %f24, %f26, %f0, %f0
379 ldd [$key + 0x100-0x28], %f44
380 ldd [$key + 0x100-0x30], %f46
381 des_round %f28, %f30, %f0, %f0
382 ldd [$key + 0x100-0x38], %f48
383 ldd [$key + 0x100-0x40], %f50
384 des_round %f32, %f34, %f0, %f0
385 ldd [$key + 0x100-0x48], %f52
386 ldd [$key + 0x100-0x50], %f54
389 ldd [$key + 0x100-0x58], %f56
390 ldd [$key + 0x100-0x60], %f58
392 ldd [$key + 0x100-0x68], %f60
393 ldd [$key + 0x100-0x70], %f62
394 des_round %f36, %f38, %f0, %f0
395 ldd [$key + 0x100-0x78], %f36
396 ldd [$key + 0x100-0x80], %f38
397 des_round %f40, %f42, %f0, %f0
398 des_round %f44, %f46, %f0, %f0
399 des_round %f48, %f50, %f0, %f0
400 ldd [$key + 0x100+0x00], %f40
401 ldd [$key + 0x100+0x08], %f42
402 des_round %f52, %f54, %f0, %f0
403 ldd [$key + 0x100+0x10], %f44
404 ldd [$key + 0x100+0x18], %f46
405 des_round %f56, %f58, %f0, %f0
406 ldd [$key + 0x100+0x20], %f48
407 ldd [$key + 0x100+0x28], %f50
408 des_round %f60, %f62, %f0, %f0
409 ldd [$key + 0x100+0x30], %f52
410 ldd [$key + 0x100+0x38], %f54
411 des_round %f36, %f38, %f0, %f0
412 ldd [$key + 0x100+0x40], %f56
413 ldd [$key + 0x100+0x48], %f58
416 ldd [$key + 0x100+0x50], %f60
417 ldd [$key + 0x100+0x58], %f62
419 ldd [$key + 0x100+0x60], %f36
420 ldd [$key + 0x100+0x68], %f38
421 des_round %f40, %f42, %f0, %f0
422 ldd [$key + 0x100+0x70], %f40
423 ldd [$key + 0x100+0x78], %f42
424 des_round %f44, %f46, %f0, %f0
425 des_round %f48, %f50, %f0, %f0
426 des_round %f52, %f54, %f0, %f0
427 des_round %f56, %f58, %f0, %f0
428 des_round %f60, %f62, %f0, %f0
429 des_round %f36, %f38, %f0, %f0
430 des_round %f40, %f42, %f0, %f0
437 brnz,pt $len, .Ldes_ede3_cbc_enc_loop
440 st %f0, [$ivec + 0] ! write out ivec
445 2: ldxa [$inp]0x82, %g4 ! avoid read-after-write hazard
446 ! and ~2x deterioration
448 faligndata %f0, %f0, %f2 ! handle unaligned output
450 stda %f2, [$out + $omask]0xc0 ! partial store
452 orn %g0, $omask, $omask
453 stda %f2, [$out + $omask]0xc0 ! partial store
455 brnz,pt $len, .Ldes_ede3_cbc_enc_loop+4
456 orn %g0, $omask, $omask
458 st %f0, [$ivec + 0] ! write out ivec
461 .type des_t4_ede3_cbc_encrypt,#function
462 .size des_t4_ede3_cbc_encrypt,.-des_t4_ede3_cbc_encrypt
464 .globl des_t4_ede3_cbc_decrypt
466 des_t4_ede3_cbc_decrypt:
468 be,pn $::size_t_cc, .Lcbc_abort
469 srln $len, 0, $len ! needed on v8+, "nop" on v9
470 ld [$ivec + 0], %f2 ! load ivec
475 sll $ileft, 3, $ileft
478 prefetch [$inp + 63], 20
479 sub %g0, $ileft, $iright
481 alignaddrl $out, %g0, $out
482 srl $omask, %g4, $omask
487 ldd [$key + 0x100+0x78], %f4 ! load key schedule
488 ldd [$key + 0x100+0x70], %f6
489 ldd [$key + 0x100+0x68], %f8
490 ldd [$key + 0x100+0x60], %f10
491 ldd [$key + 0x100+0x58], %f12
492 ldd [$key + 0x100+0x50], %f14
493 ldd [$key + 0x100+0x48], %f16
494 ldd [$key + 0x100+0x40], %f18
495 ldd [$key + 0x100+0x38], %f20
496 ldd [$key + 0x100+0x30], %f22
497 ldd [$key + 0x100+0x28], %f24
498 ldd [$key + 0x100+0x20], %f26
499 ldd [$key + 0x100+0x18], %f28
500 ldd [$key + 0x100+0x10], %f30
501 ldd [$key + 0x100+0x08], %f32
502 ldd [$key + 0x100+0x00], %f34
504 .Ldes_ede3_cbc_dec_loop:
510 sllx %g4, $ileft, %g4
511 srlx %g5, $iright, %g5
515 prefetch [$inp + 8+63], 20
517 prefetch [$out + 63], 22
520 des_round %f4, %f6, %f0, %f0
521 des_round %f8, %f10, %f0, %f0
522 des_round %f12, %f14, %f0, %f0
523 des_round %f16, %f18, %f0, %f0
524 ldd [$key + 0x80+0x00], %f36
525 ldd [$key + 0x80+0x08], %f38
526 des_round %f20, %f22, %f0, %f0
527 ldd [$key + 0x80+0x10], %f40
528 ldd [$key + 0x80+0x18], %f42
529 des_round %f24, %f26, %f0, %f0
530 ldd [$key + 0x80+0x20], %f44
531 ldd [$key + 0x80+0x28], %f46
532 des_round %f28, %f30, %f0, %f0
533 ldd [$key + 0x80+0x30], %f48
534 ldd [$key + 0x80+0x38], %f50
535 des_round %f32, %f34, %f0, %f0
536 ldd [$key + 0x80+0x40], %f52
537 ldd [$key + 0x80+0x48], %f54
540 ldd [$key + 0x80+0x50], %f56
541 ldd [$key + 0x80+0x58], %f58
543 ldd [$key + 0x80+0x60], %f60
544 ldd [$key + 0x80+0x68], %f62
545 des_round %f36, %f38, %f0, %f0
546 ldd [$key + 0x80+0x70], %f36
547 ldd [$key + 0x80+0x78], %f38
548 des_round %f40, %f42, %f0, %f0
549 des_round %f44, %f46, %f0, %f0
550 des_round %f48, %f50, %f0, %f0
551 ldd [$key + 0x80-0x08], %f40
552 ldd [$key + 0x80-0x10], %f42
553 des_round %f52, %f54, %f0, %f0
554 ldd [$key + 0x80-0x18], %f44
555 ldd [$key + 0x80-0x20], %f46
556 des_round %f56, %f58, %f0, %f0
557 ldd [$key + 0x80-0x28], %f48
558 ldd [$key + 0x80-0x30], %f50
559 des_round %f60, %f62, %f0, %f0
560 ldd [$key + 0x80-0x38], %f52
561 ldd [$key + 0x80-0x40], %f54
562 des_round %f36, %f38, %f0, %f0
563 ldd [$key + 0x80-0x48], %f56
564 ldd [$key + 0x80-0x50], %f58
567 ldd [$key + 0x80-0x58], %f60
568 ldd [$key + 0x80-0x60], %f62
570 ldd [$key + 0x80-0x68], %f36
571 ldd [$key + 0x80-0x70], %f38
572 des_round %f40, %f42, %f0, %f0
573 ldd [$key + 0x80-0x78], %f40
574 ldd [$key + 0x80-0x80], %f42
575 des_round %f44, %f46, %f0, %f0
576 des_round %f48, %f50, %f0, %f0
577 des_round %f52, %f54, %f0, %f0
578 des_round %f56, %f58, %f0, %f0
579 des_round %f60, %f62, %f0, %f0
580 des_round %f36, %f38, %f0, %f0
581 des_round %f40, %f42, %f0, %f0
584 fxor %f2, %f0, %f0 ! ^= ivec
591 brnz,pt $len, .Ldes_ede3_cbc_dec_loop
594 st %f2, [$ivec + 0] ! write out ivec
599 2: ldxa [$inp]0x82, %g4 ! avoid read-after-write hazard
600 ! and ~3x deterioration
602 faligndata %f0, %f0, %f0 ! handle unaligned output
604 stda %f0, [$out + $omask]0xc0 ! partial store
606 orn %g0, $omask, $omask
607 stda %f0, [$out + $omask]0xc0 ! partial store
609 brnz,pt $len, .Ldes_ede3_cbc_dec_loop+4
610 orn %g0, $omask, $omask
612 st %f2, [$ivec + 0] ! write out ivec
615 .type des_t4_ede3_cbc_decrypt,#function
616 .size des_t4_ede3_cbc_decrypt,.-des_t4_ede3_cbc_decrypt
620 .asciz "DES for SPARC T4, David S. Miller, Andy Polyakov"