2 ! des_enc.S (generated from des_enc.m4)
4 ! UltraSPARC assembler version of the LibDES/SSLeay/OpenSSL des_enc.c file.
6 ! Version 1.0. 32-bit version.
10 ! Version 2.0. 32/64-bit, PIC-ification, blended CPU adaptation
15 ! Assembler version: Copyright Svend Olaf Mikkelsen.
17 ! Original C code: Copyright Eric A. Young.
19 ! This code can be freely used by LibDES/SSLeay/OpenSSL users.
21 ! The LibDES/SSLeay/OpenSSL copyright notices must be respected.
23 ! This version can be redistributed.
25 ! To expand the m4 macros: m4 -B 8192 des_enc.m4 > des_enc.S
27 ! Global registers 1 to 5 are used. This is the same as done by the
28 ! cc compiler. The UltraSPARC load/store little endian feature is used.
30 ! Instruction grouping often refers to one CPU cycle.
32 ! Assemble through gcc: gcc -c -mcpu=ultrasparc -o des_enc.o des_enc.S
34 ! Assemble through cc: cc -c -xarch=v8plusa -o des_enc.o des_enc.S
36 ! Performance improvement according to './apps/openssl speed des'
39 ! 23% faster than cc-5.2 -xarch=v8plus -xO5
40 ! 115% faster than gcc-3.2.1 -m32 -mcpu=ultrasparc -O5
42 ! 50% faster than cc-5.2 -xarch=v9 -xO5
43 ! 100% faster than gcc-3.2.1 -m64 -mcpu=ultrasparc -O5
46 .ident "des_enc.m4 2.0"
48 #if defined(__SUNPRO_C) && defined(__sparcv9)
49 # define ABI64 /* They've said -xarch=v9 at command line */
50 #elif defined(__GNUC__) && defined(__arch64__)
51 # define ABI64 /* They've said -m64 at command line */
55 .register %g2,#scratch
56 .register %g3,#scratch
63 # ifndef OPENSSL_SYSNAME_ULTRASPARC
64 # define OPENSSL_SYSNAME_ULTRASPARC
120 ! The logic used in initial and final permutations is the same as in
121 ! the C code. The permutations are done with a clever shift, xor, and
124 ! The macro also loads address sbox 1 to 5 to global 1 to 5, address
125 ! sbox 6 to local6, and addres sbox 8 to out3.
127 ! Rotates the halfs 3 left to bring the sbox bits in convenient positions.
129 ! Loads key first round from address in parameter 5 to out0, out1.
131 ! After the the original LibDES initial permutation, the resulting left
132 ! is in the variable initially used for right and vice versa. The macro
133 ! implements the possibility to keep the halfs in the original registers.
137 ! parameter 3 result left (modify in first round)
138 ! parameter 4 result right (use in first round)
139 ! parameter 5 key address
140 ! parameter 6 1/2 for include encryption/decryption
141 ! parameter 7 1 for move in1 to in3
142 ! parameter 8 1 for move in3 to in4, 2 for move in4 to in3
143 ! parameter 9 1 for load ks3 and ks2 to in4 and in3
148 ! $1 $2 $4 $3 $5 $6 $7 $8 $9
150 ld [out2+256], local1
153 xor local4, $1, local4
154 ifelse($7,1,{mov in1, in3},{nop})
156 ld [out2+260], local2
157 and local4, local1, local4
158 ifelse($8,1,{mov in3, in4},{})
159 ifelse($8,2,{mov in4, in3},{})
161 ld [out2+280], out4 ! loop counter
162 sll local4, 4, local1
165 ld [out2+264], local3
169 ifelse($9,1,{LDPTR KS3, in4},{})
170 xor local4, $2, local4
171 nop !sethi %hi(DES_SPtrans), global1 ! sbox addr
173 ifelse($9,1,{LDPTR KS2, in3},{})
174 and local4, local2, local4
175 nop !or global1, %lo(DES_SPtrans), global1 ! sbox addr
177 sll local4, 16, local1
183 sethi %hi(16711680), local5
184 xor local4, $1, local4
186 and local4, local3, local4
187 or local5, 255, local5
189 sll local4, 2, local2
195 xor local4, $2, local4
196 add global1, 768, global4
198 and local4, local5, local4
199 add global1, 1024, global5
201 ld [out2+272], local7
202 sll local4, 8, local1
208 ld [$5], out0 ! key 7531
209 xor local4, $1, local4
210 add global1, 256, global2
212 ld [$5+4], out1 ! key 8642
213 and local4, local7, local4
214 add global1, 512, global3
216 sll local4, 1, local1
223 add global1, 1280, local6 ! address sbox 8
226 add global1, 1792, out3 ! address sbox 8
229 or local4, local3, $4
231 or local2, local1, $3
235 ld [out2+284], local5 ! 0x0000FC00 used in the rounds
236 or local2, local1, $3
240 and local1, 252, local1
246 ld [out2+284], local5 ! 0x0000FC00 used in the rounds
247 or local2, local1, $3
251 and local1, 252, local1
259 ! The logic used in the DES rounds is the same as in the C code,
260 ! except that calculations for sbox 1 and sbox 5 begin before
261 ! the previous round is finished.
263 ! In each round one half (work) is modified based on key and the
266 ! In this version we do two rounds in a loop repeated 7 times
267 ! and two rounds seperately.
269 ! One half has the bits for the sboxes in the following positions:
271 ! 777777xx555555xx333333xx111111xx
273 ! 88xx666666xx444444xx222222xx8888
275 ! The bits for each sbox are xor-ed with the key bits for that box.
276 ! The above xx bits are cleared, and the result used for lookup in
277 ! the sbox table. Each sbox entry contains the 4 output bits permuted
278 ! into 32 bits according to the P permutation.
280 ! In the description of DES, left and right are switched after
281 ! each round, except after last round. In this code the original
282 ! left and right are kept in the same register in all rounds, meaning
283 ! that after the 16 rounds the result for right is in the register
284 ! originally used for left.
286 ! parameter 1 first work (left in first round)
287 ! parameter 2 first use (right in first round)
288 ! parameter 3 enc/dec 1/-1
289 ! parameter 4 loop label
290 ! parameter 5 key address register
291 ! parameter 6 optional address for key next encryption/decryption
292 ! parameter 7 not empty for include retl
294 ! also compares in2 to 8
296 define(rounds_macro, {
299 ! $1 $2 $3 $4 $5 $6 $7 $8 $9
303 ld [out2+284], local5 ! 0x0000FC00
305 and local1, 252, local1
310 ! local6 is address sbox 6
311 ! out3 is address sbox 8
312 ! out4 is loop counter
314 ld [global1+local1], local1
315 xor $2, out1, out1 ! 8642
316 xor $2, out0, out0 ! 7531
317 fmovs %f0, %f0 ! fxor used for alignment
319 srl out1, 4, local0 ! rotate 4 right
320 and out0, local5, local3 ! 3
323 ld [$5+$3*8], local7 ! key 7531 next round
324 srl local3, 8, local3 ! 3
325 and local0, 252, local2 ! 2
328 ld [global3+local3],local3 ! 3
329 sll out1, 28, out1 ! rotate
330 xor $1, local1, $1 ! 1 finished, local1 now sbox 7
332 ld [global2+local2], local2 ! 2
333 srl out0, 24, local1 ! 7
334 or out1, local0, out1 ! rotate
336 ldub [out2+local1], local1 ! 7 (and 0xFC)
337 srl out1, 24, local0 ! 8
338 and out1, local5, local4 ! 4
340 ldub [out2+local0], local0 ! 8 (and 0xFC)
341 srl local4, 8, local4 ! 4
342 xor $1, local2, $1 ! 2 finished local2 now sbox 6
344 ld [global4+local4],local4 ! 4
345 srl out1, 16, local2 ! 6
346 xor $1, local3, $1 ! 3 finished local3 now sbox 5
348 ld [out3+local0],local0 ! 8
349 and local2, 252, local2 ! 6
350 add global1, 1536, local5 ! address sbox 7
352 ld [local6+local2], local2 ! 6
353 srl out0, 16, local3 ! 5
354 xor $1, local4, $1 ! 4 finished
356 ld [local5+local1],local1 ! 7
357 and local3, 252, local3 ! 5
358 xor $1, local0, $1 ! 8 finished
360 ld [global5+local3],local3 ! 5
361 xor $1, local2, $1 ! 6 finished
364 ld [$5+$3*8+4], out0 ! key 8642 next round
365 xor $1, local7, local2 ! sbox 5 next round
366 xor $1, local1, $1 ! 7 finished
368 srl local2, 16, local2 ! sbox 5 next round
369 xor $1, local3, $1 ! 5 finished
371 ld [$5+$3*16+4], out1 ! key 8642 next round again
372 and local2, 252, local2 ! sbox5 next round
374 xor $1, local7, local7 ! 7531
376 ld [global5+local2], local2 ! 5
377 srl local7, 24, local3 ! 7
378 xor $1, out0, out0 ! 8642
380 ldub [out2+local3], local3 ! 7 (and 0xFC)
381 srl out0, 4, local0 ! rotate 4 right
382 and local7, 252, local1 ! 1
384 sll out0, 28, out0 ! rotate
385 xor $2, local2, $2 ! 5 finished local2 used
387 srl local0, 8, local4 ! 4
388 and local0, 252, local2 ! 2
389 ld [local5+local3], local3 ! 7
391 srl local0, 16, local5 ! 6
392 or out0, local0, out0 ! rotate
393 ld [global2+local2], local2 ! 2
396 ld [$5+$3*16], out0 ! key 7531 next round
397 and local4, 252, local4 ! 4
399 and local5, 252, local5 ! 6
400 ld [global4+local4], local4 ! 4
401 xor $2, local3, $2 ! 7 finished local3 used
403 and local0, 252, local0 ! 8
404 ld [local6+local5], local5 ! 6
405 xor $2, local2, $2 ! 2 finished local2 now sbox 3
407 srl local7, 8, local2 ! 3 start
408 ld [out3+local0], local0 ! 8
409 xor $2, local4, $2 ! 4 finished
411 and local2, 252, local2 ! 3
412 ld [global1+local1], local1 ! 1
413 xor $2, local5, $2 ! 6 finished local5 used
415 ld [global3+local2], local2 ! 3
416 xor $2, local0, $2 ! 8 finished
417 add $5, $3*16, $5 ! enc add 8, dec add -8 to key pointer
419 ld [out2+284], local5 ! 0x0000FC00
420 xor $2, out0, local4 ! sbox 1 next round
421 xor $2, local1, $2 ! 1 finished
423 xor $2, local2, $2 ! 3 finished
424 #ifdef OPENSSL_SYSNAME_ULTRASPARC
429 and local4, 252, local1 ! sbox 1 next round
433 ld [global1+local1], local1
437 srl out1, 4, local0 ! rotate
438 and out0, local5, local3
440 ld [$5+$3*8], local7 ! key 7531
441 srl local3, 8, local3
442 and local0, 252, local2
444 ld [global3+local3],local3
445 sll out1, 28, out1 ! rotate
446 xor $1, local1, $1 ! 1 finished, local1 now sbox 7
448 ld [global2+local2], local2
450 or out1, local0, out1 ! rotate
452 ldub [out2+local1], local1
454 and out1, local5, local4
456 ldub [out2+local0], local0
457 srl local4, 8, local4
458 xor $1, local2, $1 ! 2 finished local2 now sbox 6
460 ld [global4+local4],local4
462 xor $1, local3, $1 ! 3 finished local3 now sbox 5
464 ld [out3+local0],local0
465 and local2, 252, local2
466 add global1, 1536, local5 ! address sbox 7
468 ld [local6+local2], local2
470 xor $1, local4, $1 ! 4 finished
472 ld [local5+local1],local1
473 and local3, 252, local3
476 ld [global5+local3],local3
477 xor $1, local2, $1 ! 6 finished
480 ifelse($6,{}, {}, {ld [out2+280], out4}) ! loop counter
481 xor $1, local7, local2 ! sbox 5 next round
482 xor $1, local1, $1 ! 7 finished
485 srl local2, 16, local2 ! sbox 5 next round
486 xor $1, local3, $1 ! 5 finished
488 and local2, 252, local2
489 ! next round (two rounds more)
490 xor $1, local7, local7 ! 7531
492 ld [global5+local2], local2
493 srl local7, 24, local3
494 xor $1, out0, out0 ! 8642
496 ldub [out2+local3], local3
497 srl out0, 4, local0 ! rotate
498 and local7, 252, local1
500 sll out0, 28, out0 ! rotate
501 xor $2, local2, $2 ! 5 finished local2 used
503 srl local0, 8, local4
504 and local0, 252, local2
505 ld [local5+local3], local3
507 srl local0, 16, local5
508 or out0, local0, out0 ! rotate
509 ld [global2+local2], local2
512 ifelse($6,{}, {}, {ld [$6], out0}) ! key next encryption/decryption
513 and local4, 252, local4
515 and local5, 252, local5
516 ld [global4+local4], local4
517 xor $2, local3, $2 ! 7 finished local3 used
519 and local0, 252, local0
520 ld [local6+local5], local5
521 xor $2, local2, $2 ! 2 finished local2 now sbox 3
523 srl local7, 8, local2 ! 3 start
524 ld [out3+local0], local0
527 and local2, 252, local2
528 ld [global1+local1], local1
529 xor $2, local5, $2 ! 6 finished local5 used
531 ld [global3+local2], local2
535 ifelse($6,{}, {}, {ld [$6+4], out1}) ! key next encryption/decryption
539 ifelse($7,{}, {}, {retl})
546 ! parameter 1 right (original left)
547 ! parameter 2 left (original right)
548 ! parameter 3 1 for optional store to [in0]
549 ! parameter 4 1 for load input/output address to local5/7
551 ! The final permutation logic switches the halfes, meaning that
552 ! left and right ends up the the registers originally used.
557 ! $1 $2 $3 $4 $5 $6 $7 $8 $9
559 ! initially undo the rotate 3 left done after initial permutation
560 ! original left is received shifted 3 right and 29 left in local3/4
563 or local3, local4, $1
566 sethi %hi(0x55555555), local2
569 or local2, %lo(0x55555555), local2
572 sethi %hi(0x00ff00ff), local1
573 xor local3, $1, local3
574 or local1, %lo(0x00ff00ff), local1
575 and local3, local2, local3
576 sethi %hi(0x33333333), local4
577 sll local3, 1, local2
583 xor local3, $2, local3
584 or local4, %lo(0x33333333), local4
585 and local3, local1, local3
586 sethi %hi(0x0000ffff), local1
587 sll local3, 8, local2
593 xor local3, $1, local3
594 or local1, %lo(0x0000ffff), local1
595 and local3, local4, local3
596 sethi %hi(0x0f0f0f0f), local4
597 sll local3, 2, local2
599 ifelse($4,1, {LDPTR INPUT, local5})
602 ifelse($4,1, {LDPTR OUTPUT, local7})
605 xor local3, $2, local3
606 or local4, %lo(0x0f0f0f0f), local4
607 and local3, local1, local3
608 sll local3, 16, local2
610 xor $2, local3, local1
612 srl local1, 4, local3
614 xor local3, $1, local3
615 and local3, local4, local3
616 sll local3, 4, local2
622 ifelse($3,1, {st $1, [in0]})
624 xor local1, local2, $2
626 ifelse($3,1, {st $2, [in0+4]})
633 ! Does initial permutation for next block mixed with
634 ! final permutation for current block.
636 ! parameter 1 original left
637 ! parameter 2 original right
638 ! parameter 3 left ip
639 ! parameter 4 right ip
640 ! parameter 5 1: load ks1/ks2 to in3/in4, add 120 to in4
643 ! also adds -8 to length in2 and loads loop counter to out4
645 define(fp_ip_macro, {
648 ! $1 $2 $3 $4 $5 $6 $7 $8 $9
650 define({temp1},{out4})
651 define({temp2},{local3})
653 define({ip1},{local1})
654 define({ip2},{local2})
655 define({ip4},{local4})
656 define({ip5},{local5})
658 ! $1 in local3, local4
662 or local3, local4, $1
665 ifelse($5,2,{mov in4, in3})
674 and temp1, ip5, temp1
675 xor local0, $3, local0
680 and local0, ip1, local0
683 sll local0, 4, local7
693 xor local0, $4, local0
694 and temp1, ip4, temp1
695 and local0, ip2, local0
698 sll local0, 16, local7
704 ld [out2+264], temp2 ! ip3
708 xor local0, $3, local0
709 and temp1, temp2, temp1
710 and local0, temp2, local0
713 sll local0, 2, local7
721 xor local0, $4, local0
722 and temp1, ip2, temp1
723 and local0, ip4, local0
725 xor $2, temp1, local4
726 sll local0, 8, local7
733 xor local0, $3, local0
736 and local0, ip5, local0
738 sll local0, 1, local7
745 and temp1, ip1, temp1
750 ifelse($5,1,{LDPTR KS2, in4})
752 xor local4, temp2, $2
754 ! reload since used as temporar:
756 ld [out2+280], out4 ! loop counter
759 ifelse($5,1,{add in4, 120, in4})
761 ifelse($5,1,{LDPTR KS1, in3})
764 or local0, local5, $4
765 or local2, local7, $3
771 ! {load_little_endian}
773 ! parameter 1 address
774 ! parameter 2 destination left
775 ! parameter 3 destination right
776 ! parameter 4 temporar
779 define(load_little_endian, {
781 ! {load_little_endian}
782 ! $1 $2 $3 $4 $5 $6 $7 $8 $9
784 ! first in memory to rightmost in register
786 #ifdef OPENSSL_SYSNAME_ULTRASPARC
832 ! {load_little_endian_inc}
834 ! parameter 1 address
835 ! parameter 2 destination left
836 ! parameter 3 destination right
837 ! parameter 4 temporar
842 define(load_little_endian_inc, {
844 ! {load_little_endian_inc}
845 ! $1 $2 $3 $4 $5 $6 $7 $8 $9
847 ! first in memory to rightmost in register
849 #ifdef OPENSSL_SYSNAME_ULTRASPARC
898 ! Loads 1 to 7 bytes little endian
899 ! Remaining bytes are zeroed.
901 ! parameter 1 address
903 ! parameter 3 destination register left
904 ! parameter 4 destination register right
908 ! parameter 8 return label
910 define(load_n_bytes, {
913 ! $1 $2 $5 $6 $7 $8 $7 $8 $9
918 add %o7,$7.jmp.table-$7.0,$5
970 ! {store_little_endian}
972 ! parameter 1 address
973 ! parameter 2 source left
974 ! parameter 3 source right
975 ! parameter 4 temporar
977 define(store_little_endian, {
979 ! {store_little_endian}
980 ! $1 $2 $3 $4 $5 $6 $7 $8 $9
982 ! rightmost in register to first in memory
984 #ifdef OPENSSL_SYSNAME_ULTRASPARC
1033 ! Stores 1 to 7 bytes little endian
1035 ! parameter 1 address
1036 ! parameter 2 length
1037 ! parameter 3 source register left
1038 ! parameter 4 source register right
1042 ! parameter 8 return label
1044 define(store_n_bytes, {
1047 ! $1 $2 $5 $6 $7 $8 $7 $8 $9
1052 add %o7,$7.jmp.table-$7.0,$5
1105 define(testvalue,{1})
1107 define(register_init, {
1109 ! For test purposes:
1111 sethi %hi(testvalue), local0
1112 or local0, %lo(testvalue), local0
1114 ifelse($1,{},{}, {mov local0, $1})
1115 ifelse($2,{},{}, {mov local0, $2})
1116 ifelse($3,{},{}, {mov local0, $3})
1117 ifelse($4,{},{}, {mov local0, $4})
1118 ifelse($5,{},{}, {mov local0, $5})
1119 ifelse($6,{},{}, {mov local0, $6})
1120 ifelse($7,{},{}, {mov local0, $7})
1121 ifelse($8,{},{}, {mov local0, $8})
1151 ! loads key next encryption/decryption first round from [in4]
1153 rounds_macro(in5, out5, 1, .des_enc.1, in3, in4, retl)
1160 ! implemented with out5 as first parameter to avoid
1161 ! register exchange in ede modes
1164 ! loads key next encryption/decryption first round from [in3]
1166 rounds_macro(out5, in5, -1, .des_dec.1, in4, in3, retl)
1170 ! void DES_encrypt1(data, ks, enc)
1171 ! *******************************
1174 .global DES_encrypt1
1175 .type DES_encrypt1,#function
1179 save %sp, FRAME, %sp
1182 mov .PIC.me.up-(.-4),out0
1184 ld [in0], in5 ! left
1187 #ifdef OPENSSL_SYSNAME_ULTRASPARC
1188 be,pn %icc, .encrypt.dec ! enc/dec
1192 ld [in0+4], out5 ! right
1194 ! parameter 6 1/2 for include encryption/decryption
1195 ! parameter 7 1 for move in1 to in3
1196 ! parameter 8 1 for move in3 to in4, 2 for move in4 to in3
1198 ip_macro(in5, out5, in5, out5, in3, 0, 1, 1)
1200 rounds_macro(in5, out5, 1, .des_encrypt1.1, in3, in4) ! in4 not used
1202 fp_macro(in5, out5, 1) ! 1 for store to [in0]
1209 add in1, 120, in3 ! use last subkey for first round
1211 ! parameter 6 1/2 for include encryption/decryption
1212 ! parameter 7 1 for move in1 to in3
1213 ! parameter 8 1 for move in3 to in4, 2 for move in4 to in3
1215 ip_macro(in5, out5, out5, in5, in4, 2, 0, 1) ! include dec, ks in4
1217 fp_macro(out5, in5, 1) ! 1 for store to [in0]
1223 .size DES_encrypt1,.DES_encrypt1.end-DES_encrypt1
1226 ! void DES_encrypt2(data, ks, enc)
1227 !*********************************
1229 ! encrypts/decrypts without initial/final permutation
1232 .global DES_encrypt2
1233 .type DES_encrypt2,#function
1237 save %sp, FRAME, %sp
1240 mov .PIC.me.up-(.-4),out0
1242 ! Set sbox address 1 to 6 and rotate halfs 3 left
1243 ! Errors caught by destest? Yes. Still? *NO*
1245 !sethi %hi(DES_SPtrans), global1 ! address sbox 1
1247 !or global1, %lo(DES_SPtrans), global1 ! sbox 1
1249 add global1, 256, global2 ! sbox 2
1250 add global1, 512, global3 ! sbox 3
1252 ld [in0], out5 ! right
1253 add global1, 768, global4 ! sbox 4
1254 add global1, 1024, global5 ! sbox 5
1256 ld [in0+4], in5 ! left
1257 add global1, 1280, local6 ! sbox 6
1258 add global1, 1792, out3 ! sbox 8
1263 mov in1, in3 ! key address to in3
1269 add in5, local5, in5
1271 add out5, local7, out5
1274 ! we use our own stackframe
1276 #ifdef OPENSSL_SYSNAME_ULTRASPARC
1277 be,pn %icc, .encrypt2.dec ! decryption
1281 STPTR in0, [%sp+BIAS+ARG0+0*ARGSZ]
1283 ld [in3], out0 ! key 7531 first round
1284 mov LOOPS, out4 ! loop counter
1286 ld [in3+4], out1 ! key 8642 first round
1287 sethi %hi(0x0000FC00), local5
1298 LDPTR [%sp+BIAS+ARG0+0*ARGSZ], in0
1311 ld [in4], out0 ! key 7531 first round
1312 mov LOOPS, out4 ! loop counter
1314 ld [in4+4], out1 ! key 8642 first round
1315 sethi %hi(0x0000FC00), local5
1317 mov in5, local1 ! left expected in out5
1331 LDPTR [%sp+BIAS+ARG0+0*ARGSZ], in0
1340 .size DES_encrypt2, .DES_encrypt2.end-DES_encrypt2
1343 ! void DES_encrypt3(data, ks1, ks2, ks3)
1344 ! **************************************
1347 .global DES_encrypt3
1348 .type DES_encrypt3,#function
1352 save %sp, FRAME, %sp
1355 mov .PIC.me.up-(.-4),out0
1357 ld [in0], in5 ! left
1358 add in2, 120, in4 ! ks2
1360 ld [in0+4], out5 ! right
1361 mov in3, in2 ! save ks3
1363 ! parameter 6 1/2 for include encryption/decryption
1364 ! parameter 7 1 for mov in1 to in3
1365 ! parameter 8 1 for mov in3 to in4
1366 ! parameter 9 1 for load ks3 and ks2 to in4 and in3
1368 ip_macro(in5, out5, in5, out5, in3, 1, 1, 0, 0)
1371 mov in2, in3 ! preload ks3
1376 fp_macro(in5, out5, 1)
1382 .size DES_encrypt3,.DES_encrypt3.end-DES_encrypt3
1385 ! void DES_decrypt3(data, ks1, ks2, ks3)
1386 ! **************************************
1389 .global DES_decrypt3
1390 .type DES_decrypt3,#function
1394 save %sp, FRAME, %sp
1397 mov .PIC.me.up-(.-4),out0
1399 ld [in0], in5 ! left
1400 add in3, 120, in4 ! ks3
1402 ld [in0+4], out5 ! right
1405 ! parameter 6 1/2 for include encryption/decryption
1406 ! parameter 7 1 for mov in1 to in3
1407 ! parameter 8 1 for mov in3 to in4
1408 ! parameter 9 1 for load ks3 and ks2 to in4 and in3
1410 ip_macro(in5, out5, out5, in5, in4, 2, 0, 0, 0)
1413 add in1, 120, in4 ! preload ks1
1418 fp_macro(out5, in5, 1)
1424 .size DES_decrypt3,.DES_decrypt3.end-DES_decrypt3
1426 ! input: out0 offset between .PIC.me.up and caller
1427 ! output: out0 pointer to .PIC.me.up
1428 ! out2 pointer to .des_and
1429 ! global1 pointer to DES_SPtrans
1432 add out0,%o7,out0 ! pointer to .PIC.me.up
1435 sethi %hi(DES_SPtrans),global1
1436 or global1,%lo(DES_SPtrans),global1
1437 sethi %hi(_GLOBAL_OFFSET_TABLE_-(.PIC.me.up-.)),out2
1438 add global1,out0,global1
1439 add out2,%lo(_GLOBAL_OFFSET_TABLE_-(.PIC.me.up-.)),out2
1440 LDPTR [out2+global1],global1
1442 setn DES_SPtrans,out2,global1 ! synthetic instruction !
1446 add out0,.des_and-.PIC.me.up,out2
1449 .type .des_and,#object
1454 ! This table is used for AND 0xFC when it is known that register
1455 ! bits 8-31 are zero. Makes it possible to do three arithmetic
1456 ! operations in one cycle.
1458 .byte 0, 0, 0, 0, 4, 4, 4, 4
1459 .byte 8, 8, 8, 8, 12, 12, 12, 12
1460 .byte 16, 16, 16, 16, 20, 20, 20, 20
1461 .byte 24, 24, 24, 24, 28, 28, 28, 28
1462 .byte 32, 32, 32, 32, 36, 36, 36, 36
1463 .byte 40, 40, 40, 40, 44, 44, 44, 44
1464 .byte 48, 48, 48, 48, 52, 52, 52, 52
1465 .byte 56, 56, 56, 56, 60, 60, 60, 60
1466 .byte 64, 64, 64, 64, 68, 68, 68, 68
1467 .byte 72, 72, 72, 72, 76, 76, 76, 76
1468 .byte 80, 80, 80, 80, 84, 84, 84, 84
1469 .byte 88, 88, 88, 88, 92, 92, 92, 92
1470 .byte 96, 96, 96, 96, 100, 100, 100, 100
1471 .byte 104, 104, 104, 104, 108, 108, 108, 108
1472 .byte 112, 112, 112, 112, 116, 116, 116, 116
1473 .byte 120, 120, 120, 120, 124, 124, 124, 124
1474 .byte 128, 128, 128, 128, 132, 132, 132, 132
1475 .byte 136, 136, 136, 136, 140, 140, 140, 140
1476 .byte 144, 144, 144, 144, 148, 148, 148, 148
1477 .byte 152, 152, 152, 152, 156, 156, 156, 156
1478 .byte 160, 160, 160, 160, 164, 164, 164, 164
1479 .byte 168, 168, 168, 168, 172, 172, 172, 172
1480 .byte 176, 176, 176, 176, 180, 180, 180, 180
1481 .byte 184, 184, 184, 184, 188, 188, 188, 188
1482 .byte 192, 192, 192, 192, 196, 196, 196, 196
1483 .byte 200, 200, 200, 200, 204, 204, 204, 204
1484 .byte 208, 208, 208, 208, 212, 212, 212, 212
1485 .byte 216, 216, 216, 216, 220, 220, 220, 220
1486 .byte 224, 224, 224, 224, 228, 228, 228, 228
1487 .byte 232, 232, 232, 232, 236, 236, 236, 236
1488 .byte 240, 240, 240, 240, 244, 244, 244, 244
1489 .byte 248, 248, 248, 248, 252, 252, 252, 252
1491 ! 5 numbers for initil/final permutation
1493 .word 0x0f0f0f0f ! offset 256
1494 .word 0x0000ffff ! 260
1495 .word 0x33333333 ! 264
1496 .word 0x00ff00ff ! 268
1497 .word 0x55555555 ! 272
1501 .word 0x0000FC00 ! 284
1503 ! void DES_ncbc_encrypt(input, output, length, schedule, ivec, enc)
1504 ! *****************************************************************
1508 .global DES_ncbc_encrypt
1509 .type DES_ncbc_encrypt,#function
1513 save %sp, FRAME, %sp
1515 define({INPUT}, { [%sp+BIAS+ARG0+0*ARGSZ] })
1516 define({OUTPUT}, { [%sp+BIAS+ARG0+1*ARGSZ] })
1517 define({IVEC}, { [%sp+BIAS+ARG0+4*ARGSZ] })
1520 mov .PIC.me.up-(.-4),out0
1524 #ifdef OPENSSL_SYSNAME_ULTRASPARC
1525 be,pn %icc, .ncbc.dec
1531 ! addr left right temp label
1532 load_little_endian(in4, in5, out5, local3, .LLE1) ! iv
1534 addcc in2, -8, in2 ! bytes missing when first block done
1536 #ifdef OPENSSL_SYSNAME_ULTRASPARC
1537 bl,pn %icc, .ncbc.enc.seven.or.less
1539 bl .ncbc.enc.seven.or.less
1541 mov in3, in4 ! schedule
1543 .ncbc.enc.next.block:
1545 load_little_endian(in0, out4, global4, local3, .LLE2) ! block
1547 .ncbc.enc.next.block_1:
1549 xor in5, out4, in5 ! iv xor
1550 xor out5, global4, out5 ! iv xor
1552 ! parameter 8 1 for move in3 to in4, 2 for move in4 to in3
1553 ip_macro(in5, out5, in5, out5, in3, 0, 0, 2)
1555 .ncbc.enc.next.block_2:
1557 !// call .des_enc ! compares in2 to 8
1558 ! rounds inlined for alignment purposes
1560 add global1, 768, global4 ! address sbox 4 since register used below
1562 rounds_macro(in5, out5, 1, .ncbc.enc.1, in3, in4) ! include encryption ks in3
1564 #ifdef OPENSSL_SYSNAME_ULTRASPARC
1565 bl,pn %icc, .ncbc.enc.next.block_fp
1567 bl .ncbc.enc.next.block_fp
1569 add in0, 8, in0 ! input address
1571 ! If 8 or more bytes are to be encrypted after this block,
1572 ! we combine final permutation for this block with initial
1573 ! permutation for next block. Load next block:
1575 load_little_endian(in0, global3, global4, local5, .LLE12)
1577 ! parameter 1 original left
1578 ! parameter 2 original right
1579 ! parameter 3 left ip
1580 ! parameter 4 right ip
1581 ! parameter 5 1: load ks1/ks2 to in3/in4, add 120 to in4
1584 ! also adds -8 to length in2 and loads loop counter to out4
1586 fp_ip_macro(out0, out1, global3, global4, 2)
1588 store_little_endian(in1, out0, out1, local3, .SLE10) ! block
1590 ld [in3], out0 ! key 7531 first round next block
1592 xor global3, out5, in5 ! iv xor next block
1594 ld [in3+4], out1 ! key 8642
1595 add global1, 512, global3 ! address sbox 3 since register used
1596 xor global4, local1, out5 ! iv xor next block
1598 ba .ncbc.enc.next.block_2
1599 add in1, 8, in1 ! output adress
1601 .ncbc.enc.next.block_fp:
1605 store_little_endian(in1, in5, out5, local3, .SLE1) ! block
1607 addcc in2, -8, in2 ! bytes missing when next block done
1609 #ifdef OPENSSL_SYSNAME_ULTRASPARC
1610 bpos,pt %icc, .ncbc.enc.next.block ! also jumps if 0
1612 bpos .ncbc.enc.next.block
1616 .ncbc.enc.seven.or.less:
1620 #ifdef OPENSSL_SYSNAME_ULTRASPARC
1621 ble,pt %icc, .ncbc.enc.finish
1623 ble .ncbc.enc.finish
1627 add in2, 8, local1 ! bytes to load
1629 ! addr, length, dest left, dest right, temp, temp2, label, ret label
1630 load_n_bytes(in0, local1, global4, out4, local2, local3, .LNB1, .ncbc.enc.next.block_1)
1632 ! Loads 1 to 7 bytes little endian to global4, out4
1638 store_little_endian(local4, in5, out5, local5, .SLE2) ! ivec
1650 LDPTR IVEC, local7 ! ivec
1651 #ifdef OPENSSL_SYSNAME_ULTRASPARC
1652 ble,pn %icc, .ncbc.dec.finish
1654 ble .ncbc.dec.finish
1656 mov in3, in4 ! schedule
1659 mov in0, local5 ! input
1661 load_little_endian(local7, in0, in1, local3, .LLE3) ! ivec
1663 .ncbc.dec.next.block:
1665 load_little_endian(local5, in5, out5, local3, .LLE4) ! block
1667 ! parameter 6 1/2 for include encryption/decryption
1668 ! parameter 7 1 for mov in1 to in3
1669 ! parameter 8 1 for mov in3 to in4
1671 ip_macro(in5, out5, out5, in5, in4, 2, 0, 1) ! include decryprion ks in4
1673 fp_macro(out5, in5, 0, 1) ! 1 for input and output address to local5/7
1675 ! in2 is bytes left to be stored
1676 ! in2 is compared to 8 in the rounds
1678 xor out5, in0, out4 ! iv xor
1679 #ifdef OPENSSL_SYSNAME_ULTRASPARC
1680 bl,pn %icc, .ncbc.dec.seven.or.less
1682 bl .ncbc.dec.seven.or.less
1684 xor in5, in1, global4 ! iv xor
1686 ! Load ivec next block now, since input and output address might be the same.
1688 load_little_endian_inc(local5, in0, in1, local3, .LLE5) ! iv
1690 store_little_endian(local7, out4, global4, local3, .SLE3)
1693 add local7, 8, local7
1696 #ifdef OPENSSL_SYSNAME_ULTRASPARC
1697 bg,pt %icc, .ncbc.dec.next.block
1699 bg .ncbc.dec.next.block
1701 STPTR local7, OUTPUT
1706 LDPTR IVEC, local4 ! ivec
1707 store_little_endian(local4, in0, in1, local5, .SLE4)
1714 .ncbc.dec.seven.or.less:
1716 load_little_endian_inc(local5, in0, in1, local3, .LLE13) ! ivec
1718 store_n_bytes(local7, in2, global4, out4, local3, local4, .SNB1, .ncbc.dec.store.iv)
1721 .DES_ncbc_encrypt.end:
1722 .size DES_ncbc_encrypt, .DES_ncbc_encrypt.end-DES_ncbc_encrypt
1725 ! void DES_ede3_cbc_encrypt(input, output, lenght, ks1, ks2, ks3, ivec, enc)
1726 ! **************************************************************************
1730 .global DES_ede3_cbc_encrypt
1731 .type DES_ede3_cbc_encrypt,#function
1733 DES_ede3_cbc_encrypt:
1735 save %sp, FRAME, %sp
1737 define({KS1}, { [%sp+BIAS+ARG0+3*ARGSZ] })
1738 define({KS2}, { [%sp+BIAS+ARG0+4*ARGSZ] })
1739 define({KS3}, { [%sp+BIAS+ARG0+5*ARGSZ] })
1742 mov .PIC.me.up-(.-4),out0
1744 LDPTR [%fp+BIAS+ARG0+7*ARGSZ], local3 ! enc
1745 LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local4 ! ivec
1748 #ifdef OPENSSL_SYSNAME_ULTRASPARC
1749 be,pn %icc, .ede3.dec
1757 load_little_endian(local4, in5, out5, local3, .LLE6) ! ivec
1759 addcc in2, -8, in2 ! bytes missing after next block
1761 #ifdef OPENSSL_SYSNAME_ULTRASPARC
1762 bl,pn %icc, .ede3.enc.seven.or.less
1764 bl .ede3.enc.seven.or.less
1768 .ede3.enc.next.block:
1770 load_little_endian(in0, out4, global4, local3, .LLE7)
1772 .ede3.enc.next.block_1:
1775 xor in5, out4, in5 ! iv xor
1776 xor out5, global4, out5 ! iv xor
1779 add in4, 120, in4 ! for decryption we use last subkey first
1782 ip_macro(in5, out5, in5, out5, in3)
1784 .ede3.enc.next.block_2:
1786 call .des_enc ! ks1 in3
1789 call .des_dec ! ks2 in4
1792 call .des_enc ! ks3 in3 compares in2 to 8
1795 #ifdef OPENSSL_SYSNAME_ULTRASPARC
1796 bl,pn %icc, .ede3.enc.next.block_fp
1798 bl .ede3.enc.next.block_fp
1802 ! If 8 or more bytes are to be encrypted after this block,
1803 ! we combine final permutation for this block with initial
1804 ! permutation for next block. Load next block:
1806 load_little_endian(in0, global3, global4, local5, .LLE11)
1808 ! parameter 1 original left
1809 ! parameter 2 original right
1810 ! parameter 3 left ip
1811 ! parameter 4 right ip
1812 ! parameter 5 1: load ks1/ks2 to in3/in4, add 120 to in4
1815 ! also adds -8 to length in2 and loads loop counter to out4
1817 fp_ip_macro(out0, out1, global3, global4, 1)
1819 store_little_endian(in1, out0, out1, local3, .SLE9) ! block
1822 xor global3, out5, in5 ! iv xor next block
1824 ld [in3], out0 ! key 7531
1825 add global1, 512, global3 ! address sbox 3
1826 xor global4, local1, out5 ! iv xor next block
1828 ld [in3+4], out1 ! key 8642
1829 add global1, 768, global4 ! address sbox 4
1830 ba .ede3.enc.next.block_2
1833 .ede3.enc.next.block_fp:
1837 store_little_endian(in1, in5, out5, local3, .SLE5) ! block
1839 addcc in2, -8, in2 ! bytes missing when next block done
1841 #ifdef OPENSSL_SYSNAME_ULTRASPARC
1842 bpos,pt %icc, .ede3.enc.next.block
1844 bpos .ede3.enc.next.block
1848 .ede3.enc.seven.or.less:
1852 #ifdef OPENSSL_SYSNAME_ULTRASPARC
1853 ble,pt %icc, .ede3.enc.finish
1855 ble .ede3.enc.finish
1859 add in2, 8, local1 ! bytes to load
1861 ! addr, length, dest left, dest right, temp, temp2, label, ret label
1862 load_n_bytes(in0, local1, global4, out4, local2, local3, .LNB2, .ede3.enc.next.block_1)
1866 LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local4 ! ivec
1867 store_little_endian(local4, in5, out5, local5, .SLE6) ! ivec
1884 #ifdef OPENSSL_SYSNAME_ULTRASPARC
1885 ble %icc, .ede3.dec.finish
1887 ble .ede3.dec.finish
1891 LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local7 ! iv
1892 load_little_endian(local7, in0, in1, local3, .LLE8)
1894 .ede3.dec.next.block:
1896 load_little_endian(local5, in5, out5, local3, .LLE9)
1898 ! parameter 6 1/2 for include encryption/decryption
1899 ! parameter 7 1 for mov in1 to in3
1900 ! parameter 8 1 for mov in3 to in4
1901 ! parameter 9 1 for load ks3 and ks2 to in4 and in3
1903 ip_macro(in5, out5, out5, in5, in4, 2, 0, 0, 1) ! inc .des_dec ks3 in4
1905 call .des_enc ! ks2 in3
1908 call .des_dec ! ks1 in4
1911 fp_macro(out5, in5, 0, 1) ! 1 for input and output address local5/7
1913 ! in2 is bytes left to be stored
1914 ! in2 is compared to 8 in the rounds
1917 #ifdef OPENSSL_SYSNAME_ULTRASPARC
1918 bl,pn %icc, .ede3.dec.seven.or.less
1920 bl .ede3.dec.seven.or.less
1922 xor in5, in1, global4
1924 load_little_endian_inc(local5, in0, in1, local3, .LLE10) ! iv next block
1926 store_little_endian(local7, out4, global4, local3, .SLE7) ! block
1930 add local7, 8, local7
1932 #ifdef OPENSSL_SYSNAME_ULTRASPARC
1933 bg,pt %icc, .ede3.dec.next.block
1935 bg .ede3.dec.next.block
1937 STPTR local7, OUTPUT
1941 LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local4 ! ivec
1942 store_little_endian(local4, in0, in1, local5, .SLE8) ! ivec
1949 .ede3.dec.seven.or.less:
1951 load_little_endian_inc(local5, in0, in1, local3, .LLE14) ! iv
1953 store_n_bytes(local7, in2, global4, out4, local3, local4, .SNB2, .ede3.dec.store.iv)
1956 .DES_ede3_cbc_encrypt.end:
1957 .size DES_ede3_cbc_encrypt,.DES_ede3_cbc_encrypt.end-DES_ede3_cbc_encrypt