2 ! des_enc.S (generated from des_enc.m4)
4 ! UltraSPARC assembler version of the LibDES/SSLeay/OpenSSL des_enc.c file.
6 ! Version 1.0. 32-bit version.
10 ! Version 2.0. 32/64-bit, PIC-ification, blended CPU adaptation
15 ! Assembler version: Copyright Svend Olaf Mikkelsen.
17 ! Original C code: Copyright Eric A. Young.
19 ! This code can be freely used by LibDES/SSLeay/OpenSSL users.
21 ! The LibDES/SSLeay/OpenSSL copyright notices must be respected.
23 ! This version can be redistributed.
25 ! To expand the m4 macros: m4 -B 8192 des_enc.m4 > des_enc.S
27 ! Global registers 1 to 5 are used. This is the same as done by the
28 ! cc compiler. The UltraSPARC load/store little endian feature is used.
30 ! Instruction grouping often refers to one CPU cycle.
32 ! Assemble through gcc: gcc -c -mcpu=ultrasparc -o des_enc.o des_enc.S
34 ! Assemble through cc: cc -c -xarch=v8plusa -o des_enc.o des_enc.S
36 ! Performance improvement according to './apps/openssl speed des'
39 ! 23% faster than cc-5.2 -xarch=v8plus -xO5
40 ! 115% faster than gcc-3.2.1 -m32 -mcpu=ultrasparc -O5
42 ! 50% faster than cc-5.2 -xarch=v9 -xO5
43 ! 100% faster than gcc-3.2.1 -m64 -mcpu=ultrasparc -O5
46 .ident "des_enc.m4 2.1"
48 #if defined(__SUNPRO_C) && defined(__sparcv9)
49 # define ABI64 /* They've said -xarch=v9 at command line */
50 #elif defined(__GNUC__) && defined(__arch64__)
51 # define ABI64 /* They've said -m64 at command line */
55 .register %g2,#scratch
56 .register %g3,#scratch
63 # ifndef OPENSSL_SYSNAME_ULTRASPARC
64 # define OPENSSL_SYSNAME_ULTRASPARC
121 ! The logic used in initial and final permutations is the same as in
122 ! the C code. The permutations are done with a clever shift, xor, and
125 ! The macro also loads address sbox 1 to 5 to global 1 to 5, address
126 ! sbox 6 to local6, and addres sbox 8 to out3.
128 ! Rotates the halfs 3 left to bring the sbox bits in convenient positions.
130 ! Loads key first round from address in parameter 5 to out0, out1.
132 ! After the the original LibDES initial permutation, the resulting left
133 ! is in the variable initially used for right and vice versa. The macro
134 ! implements the possibility to keep the halfs in the original registers.
138 ! parameter 3 result left (modify in first round)
139 ! parameter 4 result right (use in first round)
140 ! parameter 5 key address
141 ! parameter 6 1/2 for include encryption/decryption
142 ! parameter 7 1 for move in1 to in3
143 ! parameter 8 1 for move in3 to in4, 2 for move in4 to in3
144 ! parameter 9 1 for load ks3 and ks2 to in4 and in3
149 ! $1 $2 $4 $3 $5 $6 $7 $8 $9
151 ld [out2+256], local1
154 xor local4, $1, local4
155 ifelse($7,1,{mov in1, in3},{nop})
157 ld [out2+260], local2
158 and local4, local1, local4
159 ifelse($8,1,{mov in3, in4},{})
160 ifelse($8,2,{mov in4, in3},{})
162 ld [out2+280], out4 ! loop counter
163 sll local4, 4, local1
166 ld [out2+264], local3
170 ifelse($9,1,{LDPTR KS3, in4},{})
171 xor local4, $2, local4
172 nop !sethi %hi(DES_SPtrans), global1 ! sbox addr
174 ifelse($9,1,{LDPTR KS2, in3},{})
175 and local4, local2, local4
176 nop !or global1, %lo(DES_SPtrans), global1 ! sbox addr
178 sll local4, 16, local1
184 sethi %hi(16711680), local5
185 xor local4, $1, local4
187 and local4, local3, local4
188 or local5, 255, local5
190 sll local4, 2, local2
196 xor local4, $2, local4
197 add global1, 768, global4
199 and local4, local5, local4
200 add global1, 1024, global5
202 ld [out2+272], local7
203 sll local4, 8, local1
209 ld [$5], out0 ! key 7531
210 xor local4, $1, local4
211 add global1, 256, global2
213 ld [$5+4], out1 ! key 8642
214 and local4, local7, local4
215 add global1, 512, global3
217 sll local4, 1, local1
224 add global1, 1280, local6 ! address sbox 8
227 add global1, 1792, out3 ! address sbox 8
230 or local4, local3, $4
232 or local2, local1, $3
236 ld [out2+284], local5 ! 0x0000FC00 used in the rounds
237 or local2, local1, $3
241 and local1, 252, local1
247 ld [out2+284], local5 ! 0x0000FC00 used in the rounds
248 or local2, local1, $3
252 and local1, 252, local1
260 ! The logic used in the DES rounds is the same as in the C code,
261 ! except that calculations for sbox 1 and sbox 5 begin before
262 ! the previous round is finished.
264 ! In each round one half (work) is modified based on key and the
267 ! In this version we do two rounds in a loop repeated 7 times
268 ! and two rounds seperately.
270 ! One half has the bits for the sboxes in the following positions:
272 ! 777777xx555555xx333333xx111111xx
274 ! 88xx666666xx444444xx222222xx8888
276 ! The bits for each sbox are xor-ed with the key bits for that box.
277 ! The above xx bits are cleared, and the result used for lookup in
278 ! the sbox table. Each sbox entry contains the 4 output bits permuted
279 ! into 32 bits according to the P permutation.
281 ! In the description of DES, left and right are switched after
282 ! each round, except after last round. In this code the original
283 ! left and right are kept in the same register in all rounds, meaning
284 ! that after the 16 rounds the result for right is in the register
285 ! originally used for left.
287 ! parameter 1 first work (left in first round)
288 ! parameter 2 first use (right in first round)
289 ! parameter 3 enc/dec 1/-1
290 ! parameter 4 loop label
291 ! parameter 5 key address register
292 ! parameter 6 optional address for key next encryption/decryption
293 ! parameter 7 not empty for include retl
295 ! also compares in2 to 8
297 define(rounds_macro, {
300 ! $1 $2 $3 $4 $5 $6 $7 $8 $9
304 ld [out2+284], local5 ! 0x0000FC00
306 and local1, 252, local1
311 ! local6 is address sbox 6
312 ! out3 is address sbox 8
313 ! out4 is loop counter
315 ld [global1+local1], local1
316 xor $2, out1, out1 ! 8642
317 xor $2, out0, out0 ! 7531
318 fmovs %f0, %f0 ! fxor used for alignment
320 srl out1, 4, local0 ! rotate 4 right
321 and out0, local5, local3 ! 3
324 ld [$5+$3*8], local7 ! key 7531 next round
325 srl local3, 8, local3 ! 3
326 and local0, 252, local2 ! 2
329 ld [global3+local3],local3 ! 3
330 sll out1, 28, out1 ! rotate
331 xor $1, local1, $1 ! 1 finished, local1 now sbox 7
333 ld [global2+local2], local2 ! 2
334 srl out0, 24, local1 ! 7
335 or out1, local0, out1 ! rotate
337 ldub [out2+local1], local1 ! 7 (and 0xFC)
338 srl out1, 24, local0 ! 8
339 and out1, local5, local4 ! 4
341 ldub [out2+local0], local0 ! 8 (and 0xFC)
342 srl local4, 8, local4 ! 4
343 xor $1, local2, $1 ! 2 finished local2 now sbox 6
345 ld [global4+local4],local4 ! 4
346 srl out1, 16, local2 ! 6
347 xor $1, local3, $1 ! 3 finished local3 now sbox 5
349 ld [out3+local0],local0 ! 8
350 and local2, 252, local2 ! 6
351 add global1, 1536, local5 ! address sbox 7
353 ld [local6+local2], local2 ! 6
354 srl out0, 16, local3 ! 5
355 xor $1, local4, $1 ! 4 finished
357 ld [local5+local1],local1 ! 7
358 and local3, 252, local3 ! 5
359 xor $1, local0, $1 ! 8 finished
361 ld [global5+local3],local3 ! 5
362 xor $1, local2, $1 ! 6 finished
365 ld [$5+$3*8+4], out0 ! key 8642 next round
366 xor $1, local7, local2 ! sbox 5 next round
367 xor $1, local1, $1 ! 7 finished
369 srl local2, 16, local2 ! sbox 5 next round
370 xor $1, local3, $1 ! 5 finished
372 ld [$5+$3*16+4], out1 ! key 8642 next round again
373 and local2, 252, local2 ! sbox5 next round
375 xor $1, local7, local7 ! 7531
377 ld [global5+local2], local2 ! 5
378 srl local7, 24, local3 ! 7
379 xor $1, out0, out0 ! 8642
381 ldub [out2+local3], local3 ! 7 (and 0xFC)
382 srl out0, 4, local0 ! rotate 4 right
383 and local7, 252, local1 ! 1
385 sll out0, 28, out0 ! rotate
386 xor $2, local2, $2 ! 5 finished local2 used
388 srl local0, 8, local4 ! 4
389 and local0, 252, local2 ! 2
390 ld [local5+local3], local3 ! 7
392 srl local0, 16, local5 ! 6
393 or out0, local0, out0 ! rotate
394 ld [global2+local2], local2 ! 2
397 ld [$5+$3*16], out0 ! key 7531 next round
398 and local4, 252, local4 ! 4
400 and local5, 252, local5 ! 6
401 ld [global4+local4], local4 ! 4
402 xor $2, local3, $2 ! 7 finished local3 used
404 and local0, 252, local0 ! 8
405 ld [local6+local5], local5 ! 6
406 xor $2, local2, $2 ! 2 finished local2 now sbox 3
408 srl local7, 8, local2 ! 3 start
409 ld [out3+local0], local0 ! 8
410 xor $2, local4, $2 ! 4 finished
412 and local2, 252, local2 ! 3
413 ld [global1+local1], local1 ! 1
414 xor $2, local5, $2 ! 6 finished local5 used
416 ld [global3+local2], local2 ! 3
417 xor $2, local0, $2 ! 8 finished
418 add $5, $3*16, $5 ! enc add 8, dec add -8 to key pointer
420 ld [out2+284], local5 ! 0x0000FC00
421 xor $2, out0, local4 ! sbox 1 next round
422 xor $2, local1, $2 ! 1 finished
424 xor $2, local2, $2 ! 3 finished
425 #ifdef OPENSSL_SYSNAME_ULTRASPARC
430 and local4, 252, local1 ! sbox 1 next round
434 ld [global1+local1], local1
438 srl out1, 4, local0 ! rotate
439 and out0, local5, local3
441 ld [$5+$3*8], local7 ! key 7531
442 srl local3, 8, local3
443 and local0, 252, local2
445 ld [global3+local3],local3
446 sll out1, 28, out1 ! rotate
447 xor $1, local1, $1 ! 1 finished, local1 now sbox 7
449 ld [global2+local2], local2
451 or out1, local0, out1 ! rotate
453 ldub [out2+local1], local1
455 and out1, local5, local4
457 ldub [out2+local0], local0
458 srl local4, 8, local4
459 xor $1, local2, $1 ! 2 finished local2 now sbox 6
461 ld [global4+local4],local4
463 xor $1, local3, $1 ! 3 finished local3 now sbox 5
465 ld [out3+local0],local0
466 and local2, 252, local2
467 add global1, 1536, local5 ! address sbox 7
469 ld [local6+local2], local2
471 xor $1, local4, $1 ! 4 finished
473 ld [local5+local1],local1
474 and local3, 252, local3
477 ld [global5+local3],local3
478 xor $1, local2, $1 ! 6 finished
481 ifelse($6,{}, {}, {ld [out2+280], out4}) ! loop counter
482 xor $1, local7, local2 ! sbox 5 next round
483 xor $1, local1, $1 ! 7 finished
486 srl local2, 16, local2 ! sbox 5 next round
487 xor $1, local3, $1 ! 5 finished
489 and local2, 252, local2
490 ! next round (two rounds more)
491 xor $1, local7, local7 ! 7531
493 ld [global5+local2], local2
494 srl local7, 24, local3
495 xor $1, out0, out0 ! 8642
497 ldub [out2+local3], local3
498 srl out0, 4, local0 ! rotate
499 and local7, 252, local1
501 sll out0, 28, out0 ! rotate
502 xor $2, local2, $2 ! 5 finished local2 used
504 srl local0, 8, local4
505 and local0, 252, local2
506 ld [local5+local3], local3
508 srl local0, 16, local5
509 or out0, local0, out0 ! rotate
510 ld [global2+local2], local2
513 ifelse($6,{}, {}, {ld [$6], out0}) ! key next encryption/decryption
514 and local4, 252, local4
516 and local5, 252, local5
517 ld [global4+local4], local4
518 xor $2, local3, $2 ! 7 finished local3 used
520 and local0, 252, local0
521 ld [local6+local5], local5
522 xor $2, local2, $2 ! 2 finished local2 now sbox 3
524 srl local7, 8, local2 ! 3 start
525 ld [out3+local0], local0
528 and local2, 252, local2
529 ld [global1+local1], local1
530 xor $2, local5, $2 ! 6 finished local5 used
532 ld [global3+local2], local2
536 ifelse($6,{}, {}, {ld [$6+4], out1}) ! key next encryption/decryption
540 ifelse($7,{}, {}, {retl})
547 ! parameter 1 right (original left)
548 ! parameter 2 left (original right)
549 ! parameter 3 1 for optional store to [in0]
550 ! parameter 4 1 for load input/output address to local5/7
552 ! The final permutation logic switches the halfes, meaning that
553 ! left and right ends up the the registers originally used.
558 ! $1 $2 $3 $4 $5 $6 $7 $8 $9
560 ! initially undo the rotate 3 left done after initial permutation
561 ! original left is received shifted 3 right and 29 left in local3/4
564 or local3, local4, $1
567 sethi %hi(0x55555555), local2
570 or local2, %lo(0x55555555), local2
573 sethi %hi(0x00ff00ff), local1
574 xor local3, $1, local3
575 or local1, %lo(0x00ff00ff), local1
576 and local3, local2, local3
577 sethi %hi(0x33333333), local4
578 sll local3, 1, local2
584 xor local3, $2, local3
585 or local4, %lo(0x33333333), local4
586 and local3, local1, local3
587 sethi %hi(0x0000ffff), local1
588 sll local3, 8, local2
594 xor local3, $1, local3
595 or local1, %lo(0x0000ffff), local1
596 and local3, local4, local3
597 sethi %hi(0x0f0f0f0f), local4
598 sll local3, 2, local2
600 ifelse($4,1, {LDPTR INPUT, local5})
603 ifelse($4,1, {LDPTR OUTPUT, local7})
606 xor local3, $2, local3
607 or local4, %lo(0x0f0f0f0f), local4
608 and local3, local1, local3
609 sll local3, 16, local2
611 xor $2, local3, local1
613 srl local1, 4, local3
615 xor local3, $1, local3
616 and local3, local4, local3
617 sll local3, 4, local2
623 ifelse($3,1, {st $1, [in0]})
625 xor local1, local2, $2
627 ifelse($3,1, {st $2, [in0+4]})
634 ! Does initial permutation for next block mixed with
635 ! final permutation for current block.
637 ! parameter 1 original left
638 ! parameter 2 original right
639 ! parameter 3 left ip
640 ! parameter 4 right ip
641 ! parameter 5 1: load ks1/ks2 to in3/in4, add 120 to in4
644 ! also adds -8 to length in2 and loads loop counter to out4
646 define(fp_ip_macro, {
649 ! $1 $2 $3 $4 $5 $6 $7 $8 $9
651 define({temp1},{out4})
652 define({temp2},{local3})
654 define({ip1},{local1})
655 define({ip2},{local2})
656 define({ip4},{local4})
657 define({ip5},{local5})
659 ! $1 in local3, local4
663 or local3, local4, $1
666 ifelse($5,2,{mov in4, in3})
675 and temp1, ip5, temp1
676 xor local0, $3, local0
681 and local0, ip1, local0
684 sll local0, 4, local7
694 xor local0, $4, local0
695 and temp1, ip4, temp1
696 and local0, ip2, local0
699 sll local0, 16, local7
705 ld [out2+264], temp2 ! ip3
709 xor local0, $3, local0
710 and temp1, temp2, temp1
711 and local0, temp2, local0
714 sll local0, 2, local7
722 xor local0, $4, local0
723 and temp1, ip2, temp1
724 and local0, ip4, local0
726 xor $2, temp1, local4
727 sll local0, 8, local7
734 xor local0, $3, local0
737 and local0, ip5, local0
739 sll local0, 1, local7
746 and temp1, ip1, temp1
751 ifelse($5,1,{LDPTR KS2, in4})
753 xor local4, temp2, $2
755 ! reload since used as temporar:
757 ld [out2+280], out4 ! loop counter
760 ifelse($5,1,{add in4, 120, in4})
762 ifelse($5,1,{LDPTR KS1, in3})
765 or local0, local5, $4
766 or local2, local7, $3
772 ! {load_little_endian}
774 ! parameter 1 address
775 ! parameter 2 destination left
776 ! parameter 3 destination right
777 ! parameter 4 temporar
780 define(load_little_endian, {
782 ! {load_little_endian}
783 ! $1 $2 $3 $4 $5 $6 $7 $8 $9
785 ! first in memory to rightmost in register
787 #ifdef OPENSSL_SYSNAME_ULTRASPARC
833 ! {load_little_endian_inc}
835 ! parameter 1 address
836 ! parameter 2 destination left
837 ! parameter 3 destination right
838 ! parameter 4 temporar
843 define(load_little_endian_inc, {
845 ! {load_little_endian_inc}
846 ! $1 $2 $3 $4 $5 $6 $7 $8 $9
848 ! first in memory to rightmost in register
850 #ifdef OPENSSL_SYSNAME_ULTRASPARC
899 ! Loads 1 to 7 bytes little endian
900 ! Remaining bytes are zeroed.
902 ! parameter 1 address
904 ! parameter 3 destination register left
905 ! parameter 4 destination register right
909 ! parameter 8 return label
911 define(load_n_bytes, {
914 ! $1 $2 $5 $6 $7 $8 $7 $8 $9
919 add %o7,$7.jmp.table-$7.0,$5
971 ! {store_little_endian}
973 ! parameter 1 address
974 ! parameter 2 source left
975 ! parameter 3 source right
976 ! parameter 4 temporar
978 define(store_little_endian, {
980 ! {store_little_endian}
981 ! $1 $2 $3 $4 $5 $6 $7 $8 $9
983 ! rightmost in register to first in memory
985 #ifdef OPENSSL_SYSNAME_ULTRASPARC
1034 ! Stores 1 to 7 bytes little endian
1036 ! parameter 1 address
1037 ! parameter 2 length
1038 ! parameter 3 source register left
1039 ! parameter 4 source register right
1043 ! parameter 8 return label
1045 define(store_n_bytes, {
1048 ! $1 $2 $5 $6 $7 $8 $7 $8 $9
1053 add %o7,$7.jmp.table-$7.0,$5
1106 define(testvalue,{1})
1108 define(register_init, {
1110 ! For test purposes:
1112 sethi %hi(testvalue), local0
1113 or local0, %lo(testvalue), local0
1115 ifelse($1,{},{}, {mov local0, $1})
1116 ifelse($2,{},{}, {mov local0, $2})
1117 ifelse($3,{},{}, {mov local0, $3})
1118 ifelse($4,{},{}, {mov local0, $4})
1119 ifelse($5,{},{}, {mov local0, $5})
1120 ifelse($6,{},{}, {mov local0, $6})
1121 ifelse($7,{},{}, {mov local0, $7})
1122 ifelse($8,{},{}, {mov local0, $8})
1152 ! loads key next encryption/decryption first round from [in4]
1154 rounds_macro(in5, out5, 1, .des_enc.1, in3, in4, retl)
1161 ! implemented with out5 as first parameter to avoid
1162 ! register exchange in ede modes
1165 ! loads key next encryption/decryption first round from [in3]
1167 rounds_macro(out5, in5, -1, .des_dec.1, in4, in3, retl)
1171 ! void DES_encrypt1(data, ks, enc)
1172 ! *******************************
1175 .global DES_encrypt1
1176 .type DES_encrypt1,#function
1180 save %sp, FRAME, %sp
1183 mov .PIC.me.up-(.-4),out0
1185 ld [in0], in5 ! left
1188 #ifdef OPENSSL_SYSNAME_ULTRASPARC
1189 be,pn %icc, .encrypt.dec ! enc/dec
1193 ld [in0+4], out5 ! right
1195 ! parameter 6 1/2 for include encryption/decryption
1196 ! parameter 7 1 for move in1 to in3
1197 ! parameter 8 1 for move in3 to in4, 2 for move in4 to in3
1199 ip_macro(in5, out5, in5, out5, in3, 0, 1, 1)
1201 rounds_macro(in5, out5, 1, .des_encrypt1.1, in3, in4) ! in4 not used
1203 fp_macro(in5, out5, 1) ! 1 for store to [in0]
1210 add in1, 120, in3 ! use last subkey for first round
1212 ! parameter 6 1/2 for include encryption/decryption
1213 ! parameter 7 1 for move in1 to in3
1214 ! parameter 8 1 for move in3 to in4, 2 for move in4 to in3
1216 ip_macro(in5, out5, out5, in5, in4, 2, 0, 1) ! include dec, ks in4
1218 fp_macro(out5, in5, 1) ! 1 for store to [in0]
1224 .size DES_encrypt1,.DES_encrypt1.end-DES_encrypt1
1227 ! void DES_encrypt2(data, ks, enc)
1228 !*********************************
1230 ! encrypts/decrypts without initial/final permutation
1233 .global DES_encrypt2
1234 .type DES_encrypt2,#function
1238 save %sp, FRAME, %sp
1241 mov .PIC.me.up-(.-4),out0
1243 ! Set sbox address 1 to 6 and rotate halfs 3 left
1244 ! Errors caught by destest? Yes. Still? *NO*
1246 !sethi %hi(DES_SPtrans), global1 ! address sbox 1
1248 !or global1, %lo(DES_SPtrans), global1 ! sbox 1
1250 add global1, 256, global2 ! sbox 2
1251 add global1, 512, global3 ! sbox 3
1253 ld [in0], out5 ! right
1254 add global1, 768, global4 ! sbox 4
1255 add global1, 1024, global5 ! sbox 5
1257 ld [in0+4], in5 ! left
1258 add global1, 1280, local6 ! sbox 6
1259 add global1, 1792, out3 ! sbox 8
1264 mov in1, in3 ! key address to in3
1270 add in5, local5, in5
1272 add out5, local7, out5
1275 ! we use our own stackframe
1277 #ifdef OPENSSL_SYSNAME_ULTRASPARC
1278 be,pn %icc, .encrypt2.dec ! decryption
1282 STPTR in0, [%sp+BIAS+ARG0+0*ARGSZ]
1284 ld [in3], out0 ! key 7531 first round
1285 mov LOOPS, out4 ! loop counter
1287 ld [in3+4], out1 ! key 8642 first round
1288 sethi %hi(0x0000FC00), local5
1299 LDPTR [%sp+BIAS+ARG0+0*ARGSZ], in0
1312 ld [in4], out0 ! key 7531 first round
1313 mov LOOPS, out4 ! loop counter
1315 ld [in4+4], out1 ! key 8642 first round
1316 sethi %hi(0x0000FC00), local5
1318 mov in5, local1 ! left expected in out5
1332 LDPTR [%sp+BIAS+ARG0+0*ARGSZ], in0
1341 .size DES_encrypt2, .DES_encrypt2.end-DES_encrypt2
1344 ! void DES_encrypt3(data, ks1, ks2, ks3)
1345 ! **************************************
1348 .global DES_encrypt3
1349 .type DES_encrypt3,#function
1353 save %sp, FRAME, %sp
1356 mov .PIC.me.up-(.-4),out0
1358 ld [in0], in5 ! left
1359 add in2, 120, in4 ! ks2
1361 ld [in0+4], out5 ! right
1362 mov in3, in2 ! save ks3
1364 ! parameter 6 1/2 for include encryption/decryption
1365 ! parameter 7 1 for mov in1 to in3
1366 ! parameter 8 1 for mov in3 to in4
1367 ! parameter 9 1 for load ks3 and ks2 to in4 and in3
1369 ip_macro(in5, out5, in5, out5, in3, 1, 1, 0, 0)
1372 mov in2, in3 ! preload ks3
1377 fp_macro(in5, out5, 1)
1383 .size DES_encrypt3,.DES_encrypt3.end-DES_encrypt3
1386 ! void DES_decrypt3(data, ks1, ks2, ks3)
1387 ! **************************************
1390 .global DES_decrypt3
1391 .type DES_decrypt3,#function
1395 save %sp, FRAME, %sp
1398 mov .PIC.me.up-(.-4),out0
1400 ld [in0], in5 ! left
1401 add in3, 120, in4 ! ks3
1403 ld [in0+4], out5 ! right
1406 ! parameter 6 1/2 for include encryption/decryption
1407 ! parameter 7 1 for mov in1 to in3
1408 ! parameter 8 1 for mov in3 to in4
1409 ! parameter 9 1 for load ks3 and ks2 to in4 and in3
1411 ip_macro(in5, out5, out5, in5, in4, 2, 0, 0, 0)
1414 add in1, 120, in4 ! preload ks1
1419 fp_macro(out5, in5, 1)
1425 .size DES_decrypt3,.DES_decrypt3.end-DES_decrypt3
1428 .type .des_and,#object
1433 ! This table is used for AND 0xFC when it is known that register
1434 ! bits 8-31 are zero. Makes it possible to do three arithmetic
1435 ! operations in one cycle.
1437 .byte 0, 0, 0, 0, 4, 4, 4, 4
1438 .byte 8, 8, 8, 8, 12, 12, 12, 12
1439 .byte 16, 16, 16, 16, 20, 20, 20, 20
1440 .byte 24, 24, 24, 24, 28, 28, 28, 28
1441 .byte 32, 32, 32, 32, 36, 36, 36, 36
1442 .byte 40, 40, 40, 40, 44, 44, 44, 44
1443 .byte 48, 48, 48, 48, 52, 52, 52, 52
1444 .byte 56, 56, 56, 56, 60, 60, 60, 60
1445 .byte 64, 64, 64, 64, 68, 68, 68, 68
1446 .byte 72, 72, 72, 72, 76, 76, 76, 76
1447 .byte 80, 80, 80, 80, 84, 84, 84, 84
1448 .byte 88, 88, 88, 88, 92, 92, 92, 92
1449 .byte 96, 96, 96, 96, 100, 100, 100, 100
1450 .byte 104, 104, 104, 104, 108, 108, 108, 108
1451 .byte 112, 112, 112, 112, 116, 116, 116, 116
1452 .byte 120, 120, 120, 120, 124, 124, 124, 124
1453 .byte 128, 128, 128, 128, 132, 132, 132, 132
1454 .byte 136, 136, 136, 136, 140, 140, 140, 140
1455 .byte 144, 144, 144, 144, 148, 148, 148, 148
1456 .byte 152, 152, 152, 152, 156, 156, 156, 156
1457 .byte 160, 160, 160, 160, 164, 164, 164, 164
1458 .byte 168, 168, 168, 168, 172, 172, 172, 172
1459 .byte 176, 176, 176, 176, 180, 180, 180, 180
1460 .byte 184, 184, 184, 184, 188, 188, 188, 188
1461 .byte 192, 192, 192, 192, 196, 196, 196, 196
1462 .byte 200, 200, 200, 200, 204, 204, 204, 204
1463 .byte 208, 208, 208, 208, 212, 212, 212, 212
1464 .byte 216, 216, 216, 216, 220, 220, 220, 220
1465 .byte 224, 224, 224, 224, 228, 228, 228, 228
1466 .byte 232, 232, 232, 232, 236, 236, 236, 236
1467 .byte 240, 240, 240, 240, 244, 244, 244, 244
1468 .byte 248, 248, 248, 248, 252, 252, 252, 252
1470 ! 5 numbers for initil/final permutation
1472 .word 0x0f0f0f0f ! offset 256
1473 .word 0x0000ffff ! 260
1474 .word 0x33333333 ! 264
1475 .word 0x00ff00ff ! 268
1476 .word 0x55555555 ! 272
1480 .word 0x0000FC00 ! 284
1482 .word %r_disp32(DES_SPtrans)
1484 ! input: out0 offset between .PIC.me.up and caller
1485 ! output: out0 pointer to .PIC.me.up
1486 ! out2 pointer to .des_and
1487 ! global1 pointer to DES_SPtrans
1490 add out0,%o7,out0 ! pointer to .PIC.me.up
1492 ld [out0+(.PIC.DES_SPtrans-.PIC.me.up)],global1
1493 add global1,(.PIC.DES_SPtrans-.PIC.me.up),global1
1494 add global1,out0,global1
1497 ! In case anybody wonders why this code is same for both ABI.
1498 ! To start with it is not. Do note LDPTR below. But of course
1499 ! you must be wondering why the rest of it does not contain
1500 ! things like %hh, %hm and %lm. Well, those are needed only
1501 ! if OpenSSL library *itself* will become larger than 4GB,
1502 ! which is not going to happen any time soon.
1503 sethi %hi(DES_SPtrans),global1
1504 or global1,%lo(DES_SPtrans),global1
1505 sethi %hi(_GLOBAL_OFFSET_TABLE_-(.PIC.me.up-.)),out2
1506 add global1,out0,global1
1507 add out2,%lo(_GLOBAL_OFFSET_TABLE_-(.PIC.me.up-.)),out2
1508 LDPTR [out2+global1],global1
1510 setn DES_SPtrans,out2,global1 ! synthetic instruction !
1511 # elif defined(ABI64)
1512 sethi %hh(DES_SPtrans),out2
1513 or out2,%hm(DES_SPtrans),out2
1514 sethi %lm(DES_SPtrans),global1
1515 or global1,%lo(DES_SPtrans),global1
1517 or out2,global1,global1
1519 sethi %hi(DES_SPtrans),global1
1520 or global1,%lo(DES_SPtrans),global1
1524 add out0,.des_and-.PIC.me.up,out2
1526 ! void DES_ncbc_encrypt(input, output, length, schedule, ivec, enc)
1527 ! *****************************************************************
1531 .global DES_ncbc_encrypt
1532 .type DES_ncbc_encrypt,#function
1536 save %sp, FRAME, %sp
1538 define({INPUT}, { [%sp+BIAS+ARG0+0*ARGSZ] })
1539 define({OUTPUT}, { [%sp+BIAS+ARG0+1*ARGSZ] })
1540 define({IVEC}, { [%sp+BIAS+ARG0+4*ARGSZ] })
1543 mov .PIC.me.up-(.-4),out0
1547 #ifdef OPENSSL_SYSNAME_ULTRASPARC
1548 be,pn %icc, .ncbc.dec
1554 ! addr left right temp label
1555 load_little_endian(in4, in5, out5, local3, .LLE1) ! iv
1557 addcc in2, -8, in2 ! bytes missing when first block done
1559 #ifdef OPENSSL_SYSNAME_ULTRASPARC
1560 bl,pn %icc, .ncbc.enc.seven.or.less
1562 bl .ncbc.enc.seven.or.less
1564 mov in3, in4 ! schedule
1566 .ncbc.enc.next.block:
1568 load_little_endian(in0, out4, global4, local3, .LLE2) ! block
1570 .ncbc.enc.next.block_1:
1572 xor in5, out4, in5 ! iv xor
1573 xor out5, global4, out5 ! iv xor
1575 ! parameter 8 1 for move in3 to in4, 2 for move in4 to in3
1576 ip_macro(in5, out5, in5, out5, in3, 0, 0, 2)
1578 .ncbc.enc.next.block_2:
1580 !// call .des_enc ! compares in2 to 8
1581 ! rounds inlined for alignment purposes
1583 add global1, 768, global4 ! address sbox 4 since register used below
1585 rounds_macro(in5, out5, 1, .ncbc.enc.1, in3, in4) ! include encryption ks in3
1587 #ifdef OPENSSL_SYSNAME_ULTRASPARC
1588 bl,pn %icc, .ncbc.enc.next.block_fp
1590 bl .ncbc.enc.next.block_fp
1592 add in0, 8, in0 ! input address
1594 ! If 8 or more bytes are to be encrypted after this block,
1595 ! we combine final permutation for this block with initial
1596 ! permutation for next block. Load next block:
1598 load_little_endian(in0, global3, global4, local5, .LLE12)
1600 ! parameter 1 original left
1601 ! parameter 2 original right
1602 ! parameter 3 left ip
1603 ! parameter 4 right ip
1604 ! parameter 5 1: load ks1/ks2 to in3/in4, add 120 to in4
1607 ! also adds -8 to length in2 and loads loop counter to out4
1609 fp_ip_macro(out0, out1, global3, global4, 2)
1611 store_little_endian(in1, out0, out1, local3, .SLE10) ! block
1613 ld [in3], out0 ! key 7531 first round next block
1615 xor global3, out5, in5 ! iv xor next block
1617 ld [in3+4], out1 ! key 8642
1618 add global1, 512, global3 ! address sbox 3 since register used
1619 xor global4, local1, out5 ! iv xor next block
1621 ba .ncbc.enc.next.block_2
1622 add in1, 8, in1 ! output adress
1624 .ncbc.enc.next.block_fp:
1628 store_little_endian(in1, in5, out5, local3, .SLE1) ! block
1630 addcc in2, -8, in2 ! bytes missing when next block done
1632 #ifdef OPENSSL_SYSNAME_ULTRASPARC
1633 bpos,pt %icc, .ncbc.enc.next.block ! also jumps if 0
1635 bpos .ncbc.enc.next.block
1639 .ncbc.enc.seven.or.less:
1643 #ifdef OPENSSL_SYSNAME_ULTRASPARC
1644 ble,pt %icc, .ncbc.enc.finish
1646 ble .ncbc.enc.finish
1650 add in2, 8, local1 ! bytes to load
1652 ! addr, length, dest left, dest right, temp, temp2, label, ret label
1653 load_n_bytes(in0, local1, global4, out4, local2, local3, .LNB1, .ncbc.enc.next.block_1)
1655 ! Loads 1 to 7 bytes little endian to global4, out4
1661 store_little_endian(local4, in5, out5, local5, .SLE2) ! ivec
1673 LDPTR IVEC, local7 ! ivec
1674 #ifdef OPENSSL_SYSNAME_ULTRASPARC
1675 ble,pn %icc, .ncbc.dec.finish
1677 ble .ncbc.dec.finish
1679 mov in3, in4 ! schedule
1682 mov in0, local5 ! input
1684 load_little_endian(local7, in0, in1, local3, .LLE3) ! ivec
1686 .ncbc.dec.next.block:
1688 load_little_endian(local5, in5, out5, local3, .LLE4) ! block
1690 ! parameter 6 1/2 for include encryption/decryption
1691 ! parameter 7 1 for mov in1 to in3
1692 ! parameter 8 1 for mov in3 to in4
1694 ip_macro(in5, out5, out5, in5, in4, 2, 0, 1) ! include decryprion ks in4
1696 fp_macro(out5, in5, 0, 1) ! 1 for input and output address to local5/7
1698 ! in2 is bytes left to be stored
1699 ! in2 is compared to 8 in the rounds
1701 xor out5, in0, out4 ! iv xor
1702 #ifdef OPENSSL_SYSNAME_ULTRASPARC
1703 bl,pn %icc, .ncbc.dec.seven.or.less
1705 bl .ncbc.dec.seven.or.less
1707 xor in5, in1, global4 ! iv xor
1709 ! Load ivec next block now, since input and output address might be the same.
1711 load_little_endian_inc(local5, in0, in1, local3, .LLE5) ! iv
1713 store_little_endian(local7, out4, global4, local3, .SLE3)
1716 add local7, 8, local7
1719 #ifdef OPENSSL_SYSNAME_ULTRASPARC
1720 bg,pt %icc, .ncbc.dec.next.block
1722 bg .ncbc.dec.next.block
1724 STPTR local7, OUTPUT
1729 LDPTR IVEC, local4 ! ivec
1730 store_little_endian(local4, in0, in1, local5, .SLE4)
1737 .ncbc.dec.seven.or.less:
1739 load_little_endian_inc(local5, in0, in1, local3, .LLE13) ! ivec
1741 store_n_bytes(local7, in2, global4, out4, local3, local4, .SNB1, .ncbc.dec.store.iv)
1744 .DES_ncbc_encrypt.end:
1745 .size DES_ncbc_encrypt, .DES_ncbc_encrypt.end-DES_ncbc_encrypt
1748 ! void DES_ede3_cbc_encrypt(input, output, lenght, ks1, ks2, ks3, ivec, enc)
1749 ! **************************************************************************
1753 .global DES_ede3_cbc_encrypt
1754 .type DES_ede3_cbc_encrypt,#function
1756 DES_ede3_cbc_encrypt:
1758 save %sp, FRAME, %sp
1760 define({KS1}, { [%sp+BIAS+ARG0+3*ARGSZ] })
1761 define({KS2}, { [%sp+BIAS+ARG0+4*ARGSZ] })
1762 define({KS3}, { [%sp+BIAS+ARG0+5*ARGSZ] })
1765 mov .PIC.me.up-(.-4),out0
1767 LDPTR [%fp+BIAS+ARG0+7*ARGSZ], local3 ! enc
1768 LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local4 ! ivec
1771 #ifdef OPENSSL_SYSNAME_ULTRASPARC
1772 be,pn %icc, .ede3.dec
1780 load_little_endian(local4, in5, out5, local3, .LLE6) ! ivec
1782 addcc in2, -8, in2 ! bytes missing after next block
1784 #ifdef OPENSSL_SYSNAME_ULTRASPARC
1785 bl,pn %icc, .ede3.enc.seven.or.less
1787 bl .ede3.enc.seven.or.less
1791 .ede3.enc.next.block:
1793 load_little_endian(in0, out4, global4, local3, .LLE7)
1795 .ede3.enc.next.block_1:
1798 xor in5, out4, in5 ! iv xor
1799 xor out5, global4, out5 ! iv xor
1802 add in4, 120, in4 ! for decryption we use last subkey first
1805 ip_macro(in5, out5, in5, out5, in3)
1807 .ede3.enc.next.block_2:
1809 call .des_enc ! ks1 in3
1812 call .des_dec ! ks2 in4
1815 call .des_enc ! ks3 in3 compares in2 to 8
1818 #ifdef OPENSSL_SYSNAME_ULTRASPARC
1819 bl,pn %icc, .ede3.enc.next.block_fp
1821 bl .ede3.enc.next.block_fp
1825 ! If 8 or more bytes are to be encrypted after this block,
1826 ! we combine final permutation for this block with initial
1827 ! permutation for next block. Load next block:
1829 load_little_endian(in0, global3, global4, local5, .LLE11)
1831 ! parameter 1 original left
1832 ! parameter 2 original right
1833 ! parameter 3 left ip
1834 ! parameter 4 right ip
1835 ! parameter 5 1: load ks1/ks2 to in3/in4, add 120 to in4
1838 ! also adds -8 to length in2 and loads loop counter to out4
1840 fp_ip_macro(out0, out1, global3, global4, 1)
1842 store_little_endian(in1, out0, out1, local3, .SLE9) ! block
1845 xor global3, out5, in5 ! iv xor next block
1847 ld [in3], out0 ! key 7531
1848 add global1, 512, global3 ! address sbox 3
1849 xor global4, local1, out5 ! iv xor next block
1851 ld [in3+4], out1 ! key 8642
1852 add global1, 768, global4 ! address sbox 4
1853 ba .ede3.enc.next.block_2
1856 .ede3.enc.next.block_fp:
1860 store_little_endian(in1, in5, out5, local3, .SLE5) ! block
1862 addcc in2, -8, in2 ! bytes missing when next block done
1864 #ifdef OPENSSL_SYSNAME_ULTRASPARC
1865 bpos,pt %icc, .ede3.enc.next.block
1867 bpos .ede3.enc.next.block
1871 .ede3.enc.seven.or.less:
1875 #ifdef OPENSSL_SYSNAME_ULTRASPARC
1876 ble,pt %icc, .ede3.enc.finish
1878 ble .ede3.enc.finish
1882 add in2, 8, local1 ! bytes to load
1884 ! addr, length, dest left, dest right, temp, temp2, label, ret label
1885 load_n_bytes(in0, local1, global4, out4, local2, local3, .LNB2, .ede3.enc.next.block_1)
1889 LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local4 ! ivec
1890 store_little_endian(local4, in5, out5, local5, .SLE6) ! ivec
1907 #ifdef OPENSSL_SYSNAME_ULTRASPARC
1908 ble %icc, .ede3.dec.finish
1910 ble .ede3.dec.finish
1914 LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local7 ! iv
1915 load_little_endian(local7, in0, in1, local3, .LLE8)
1917 .ede3.dec.next.block:
1919 load_little_endian(local5, in5, out5, local3, .LLE9)
1921 ! parameter 6 1/2 for include encryption/decryption
1922 ! parameter 7 1 for mov in1 to in3
1923 ! parameter 8 1 for mov in3 to in4
1924 ! parameter 9 1 for load ks3 and ks2 to in4 and in3
1926 ip_macro(in5, out5, out5, in5, in4, 2, 0, 0, 1) ! inc .des_dec ks3 in4
1928 call .des_enc ! ks2 in3
1931 call .des_dec ! ks1 in4
1934 fp_macro(out5, in5, 0, 1) ! 1 for input and output address local5/7
1936 ! in2 is bytes left to be stored
1937 ! in2 is compared to 8 in the rounds
1940 #ifdef OPENSSL_SYSNAME_ULTRASPARC
1941 bl,pn %icc, .ede3.dec.seven.or.less
1943 bl .ede3.dec.seven.or.less
1945 xor in5, in1, global4
1947 load_little_endian_inc(local5, in0, in1, local3, .LLE10) ! iv next block
1949 store_little_endian(local7, out4, global4, local3, .SLE7) ! block
1953 add local7, 8, local7
1955 #ifdef OPENSSL_SYSNAME_ULTRASPARC
1956 bg,pt %icc, .ede3.dec.next.block
1958 bg .ede3.dec.next.block
1960 STPTR local7, OUTPUT
1964 LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local4 ! ivec
1965 store_little_endian(local4, in0, in1, local5, .SLE8) ! ivec
1972 .ede3.dec.seven.or.less:
1974 load_little_endian_inc(local5, in0, in1, local3, .LLE14) ! iv
1976 store_n_bytes(local7, in2, global4, out4, local3, local4, .SNB2, .ede3.dec.store.iv)
1979 .DES_ede3_cbc_encrypt.end:
1980 .size DES_ede3_cbc_encrypt,.DES_ede3_cbc_encrypt.end-DES_ede3_cbc_encrypt