2 ! des_enc.S (generated from des_enc.m4)
4 ! UltraSPARC assembler version of the LibDES/SSLeay/OpenSSL des_enc.c file.
6 ! Version 1.0. 32-bit version.
10 ! Version 2.0. 32/64-bit, PIC-ification, blended CPU adaptation
15 ! Assembler version: Copyright Svend Olaf Mikkelsen.
17 ! Original C code: Copyright Eric A. Young.
19 ! This code can be freely used by LibDES/SSLeay/OpenSSL users.
21 ! The LibDES/SSLeay/OpenSSL copyright notices must be respected.
23 ! This version can be redistributed.
25 ! To expand the m4 macros: m4 -B 8192 des_enc.m4 > des_enc.S
27 ! Global registers 1 to 5 are used. This is the same as done by the
28 ! cc compiler. The UltraSPARC load/store little endian feature is used.
30 ! Instruction grouping often refers to one CPU cycle.
32 ! Assemble through gcc: gcc -c -mcpu=ultrasparc -o des_enc.o des_enc.S
34 ! Assemble through cc: cc -c -xarch=v8plusa -o des_enc.o des_enc.S
36 ! Performance improvement according to './apps/openssl speed des'
39 ! 23% faster than cc-5.2 -xarch=v8plus -xO5
40 ! 115% faster than gcc-3.2.1 -m32 -mcpu=ultrasparc -O5
42 ! 50% faster than cc-5.2 -xarch=v9 -xO5
43 ! 100% faster than gcc-3.2.1 -m64 -mcpu=ultrasparc -O5
46 .ident "des_enc.m4 2.0"
48 #if defined(__SUNPRO_C) && defined(__sparcv9)
49 # define ABI64 /* They've said -xarch=v9 at command line */
50 #elif defined(__GNUC__) && defined(__arch64__)
51 # define ABI64 /* They've said -m64 at command line */
55 .register %g2,#scratch
56 .register %g3,#scratch
63 # ifndef OPENSSL_SYSNAME_ULTRASPARC
64 # define OPENSSL_SYSNAME_ULTRASPARC
121 ! The logic used in initial and final permutations is the same as in
122 ! the C code. The permutations are done with a clever shift, xor, and
125 ! The macro also loads address sbox 1 to 5 to global 1 to 5, address
126 ! sbox 6 to local6, and addres sbox 8 to out3.
128 ! Rotates the halfs 3 left to bring the sbox bits in convenient positions.
130 ! Loads key first round from address in parameter 5 to out0, out1.
132 ! After the the original LibDES initial permutation, the resulting left
133 ! is in the variable initially used for right and vice versa. The macro
134 ! implements the possibility to keep the halfs in the original registers.
138 ! parameter 3 result left (modify in first round)
139 ! parameter 4 result right (use in first round)
140 ! parameter 5 key address
141 ! parameter 6 1/2 for include encryption/decryption
142 ! parameter 7 1 for move in1 to in3
143 ! parameter 8 1 for move in3 to in4, 2 for move in4 to in3
144 ! parameter 9 1 for load ks3 and ks2 to in4 and in3
149 ! $1 $2 $4 $3 $5 $6 $7 $8 $9
151 ld [out2+256], local1
154 xor local4, $1, local4
155 ifelse($7,1,{mov in1, in3},{nop})
157 ld [out2+260], local2
158 and local4, local1, local4
159 ifelse($8,1,{mov in3, in4},{})
160 ifelse($8,2,{mov in4, in3},{})
162 ld [out2+280], out4 ! loop counter
163 sll local4, 4, local1
166 ld [out2+264], local3
170 ifelse($9,1,{LDPTR KS3, in4},{})
171 xor local4, $2, local4
172 nop !sethi %hi(DES_SPtrans), global1 ! sbox addr
174 ifelse($9,1,{LDPTR KS2, in3},{})
175 and local4, local2, local4
176 nop !or global1, %lo(DES_SPtrans), global1 ! sbox addr
178 sll local4, 16, local1
184 sethi %hi(16711680), local5
185 xor local4, $1, local4
187 and local4, local3, local4
188 or local5, 255, local5
190 sll local4, 2, local2
196 xor local4, $2, local4
197 add global1, 768, global4
199 and local4, local5, local4
200 add global1, 1024, global5
202 ld [out2+272], local7
203 sll local4, 8, local1
209 ld [$5], out0 ! key 7531
210 xor local4, $1, local4
211 add global1, 256, global2
213 ld [$5+4], out1 ! key 8642
214 and local4, local7, local4
215 add global1, 512, global3
217 sll local4, 1, local1
224 add global1, 1280, local6 ! address sbox 8
227 add global1, 1792, out3 ! address sbox 8
230 or local4, local3, $4
232 or local2, local1, $3
236 ld [out2+284], local5 ! 0x0000FC00 used in the rounds
237 or local2, local1, $3
241 and local1, 252, local1
247 ld [out2+284], local5 ! 0x0000FC00 used in the rounds
248 or local2, local1, $3
252 and local1, 252, local1
260 ! The logic used in the DES rounds is the same as in the C code,
261 ! except that calculations for sbox 1 and sbox 5 begin before
262 ! the previous round is finished.
264 ! In each round one half (work) is modified based on key and the
267 ! In this version we do two rounds in a loop repeated 7 times
268 ! and two rounds seperately.
270 ! One half has the bits for the sboxes in the following positions:
272 ! 777777xx555555xx333333xx111111xx
274 ! 88xx666666xx444444xx222222xx8888
276 ! The bits for each sbox are xor-ed with the key bits for that box.
277 ! The above xx bits are cleared, and the result used for lookup in
278 ! the sbox table. Each sbox entry contains the 4 output bits permuted
279 ! into 32 bits according to the P permutation.
281 ! In the description of DES, left and right are switched after
282 ! each round, except after last round. In this code the original
283 ! left and right are kept in the same register in all rounds, meaning
284 ! that after the 16 rounds the result for right is in the register
285 ! originally used for left.
287 ! parameter 1 first work (left in first round)
288 ! parameter 2 first use (right in first round)
289 ! parameter 3 enc/dec 1/-1
290 ! parameter 4 loop label
291 ! parameter 5 key address register
292 ! parameter 6 optional address for key next encryption/decryption
293 ! parameter 7 not empty for include retl
295 ! also compares in2 to 8
297 define(rounds_macro, {
300 ! $1 $2 $3 $4 $5 $6 $7 $8 $9
304 ld [out2+284], local5 ! 0x0000FC00
306 and local1, 252, local1
311 ! local6 is address sbox 6
312 ! out3 is address sbox 8
313 ! out4 is loop counter
315 ld [global1+local1], local1
316 xor $2, out1, out1 ! 8642
317 xor $2, out0, out0 ! 7531
318 fmovs %f0, %f0 ! fxor used for alignment
320 srl out1, 4, local0 ! rotate 4 right
321 and out0, local5, local3 ! 3
324 ld [$5+$3*8], local7 ! key 7531 next round
325 srl local3, 8, local3 ! 3
326 and local0, 252, local2 ! 2
329 ld [global3+local3],local3 ! 3
330 sll out1, 28, out1 ! rotate
331 xor $1, local1, $1 ! 1 finished, local1 now sbox 7
333 ld [global2+local2], local2 ! 2
334 srl out0, 24, local1 ! 7
335 or out1, local0, out1 ! rotate
337 ldub [out2+local1], local1 ! 7 (and 0xFC)
338 srl out1, 24, local0 ! 8
339 and out1, local5, local4 ! 4
341 ldub [out2+local0], local0 ! 8 (and 0xFC)
342 srl local4, 8, local4 ! 4
343 xor $1, local2, $1 ! 2 finished local2 now sbox 6
345 ld [global4+local4],local4 ! 4
346 srl out1, 16, local2 ! 6
347 xor $1, local3, $1 ! 3 finished local3 now sbox 5
349 ld [out3+local0],local0 ! 8
350 and local2, 252, local2 ! 6
351 add global1, 1536, local5 ! address sbox 7
353 ld [local6+local2], local2 ! 6
354 srl out0, 16, local3 ! 5
355 xor $1, local4, $1 ! 4 finished
357 ld [local5+local1],local1 ! 7
358 and local3, 252, local3 ! 5
359 xor $1, local0, $1 ! 8 finished
361 ld [global5+local3],local3 ! 5
362 xor $1, local2, $1 ! 6 finished
365 ld [$5+$3*8+4], out0 ! key 8642 next round
366 xor $1, local7, local2 ! sbox 5 next round
367 xor $1, local1, $1 ! 7 finished
369 srl local2, 16, local2 ! sbox 5 next round
370 xor $1, local3, $1 ! 5 finished
372 ld [$5+$3*16+4], out1 ! key 8642 next round again
373 and local2, 252, local2 ! sbox5 next round
375 xor $1, local7, local7 ! 7531
377 ld [global5+local2], local2 ! 5
378 srl local7, 24, local3 ! 7
379 xor $1, out0, out0 ! 8642
381 ldub [out2+local3], local3 ! 7 (and 0xFC)
382 srl out0, 4, local0 ! rotate 4 right
383 and local7, 252, local1 ! 1
385 sll out0, 28, out0 ! rotate
386 xor $2, local2, $2 ! 5 finished local2 used
388 srl local0, 8, local4 ! 4
389 and local0, 252, local2 ! 2
390 ld [local5+local3], local3 ! 7
392 srl local0, 16, local5 ! 6
393 or out0, local0, out0 ! rotate
394 ld [global2+local2], local2 ! 2
397 ld [$5+$3*16], out0 ! key 7531 next round
398 and local4, 252, local4 ! 4
400 and local5, 252, local5 ! 6
401 ld [global4+local4], local4 ! 4
402 xor $2, local3, $2 ! 7 finished local3 used
404 and local0, 252, local0 ! 8
405 ld [local6+local5], local5 ! 6
406 xor $2, local2, $2 ! 2 finished local2 now sbox 3
408 srl local7, 8, local2 ! 3 start
409 ld [out3+local0], local0 ! 8
410 xor $2, local4, $2 ! 4 finished
412 and local2, 252, local2 ! 3
413 ld [global1+local1], local1 ! 1
414 xor $2, local5, $2 ! 6 finished local5 used
416 ld [global3+local2], local2 ! 3
417 xor $2, local0, $2 ! 8 finished
418 add $5, $3*16, $5 ! enc add 8, dec add -8 to key pointer
420 ld [out2+284], local5 ! 0x0000FC00
421 xor $2, out0, local4 ! sbox 1 next round
422 xor $2, local1, $2 ! 1 finished
424 xor $2, local2, $2 ! 3 finished
425 #ifdef OPENSSL_SYSNAME_ULTRASPARC
430 and local4, 252, local1 ! sbox 1 next round
434 ld [global1+local1], local1
438 srl out1, 4, local0 ! rotate
439 and out0, local5, local3
441 ld [$5+$3*8], local7 ! key 7531
442 srl local3, 8, local3
443 and local0, 252, local2
445 ld [global3+local3],local3
446 sll out1, 28, out1 ! rotate
447 xor $1, local1, $1 ! 1 finished, local1 now sbox 7
449 ld [global2+local2], local2
451 or out1, local0, out1 ! rotate
453 ldub [out2+local1], local1
455 and out1, local5, local4
457 ldub [out2+local0], local0
458 srl local4, 8, local4
459 xor $1, local2, $1 ! 2 finished local2 now sbox 6
461 ld [global4+local4],local4
463 xor $1, local3, $1 ! 3 finished local3 now sbox 5
465 ld [out3+local0],local0
466 and local2, 252, local2
467 add global1, 1536, local5 ! address sbox 7
469 ld [local6+local2], local2
471 xor $1, local4, $1 ! 4 finished
473 ld [local5+local1],local1
474 and local3, 252, local3
477 ld [global5+local3],local3
478 xor $1, local2, $1 ! 6 finished
481 ifelse($6,{}, {}, {ld [out2+280], out4}) ! loop counter
482 xor $1, local7, local2 ! sbox 5 next round
483 xor $1, local1, $1 ! 7 finished
486 srl local2, 16, local2 ! sbox 5 next round
487 xor $1, local3, $1 ! 5 finished
489 and local2, 252, local2
490 ! next round (two rounds more)
491 xor $1, local7, local7 ! 7531
493 ld [global5+local2], local2
494 srl local7, 24, local3
495 xor $1, out0, out0 ! 8642
497 ldub [out2+local3], local3
498 srl out0, 4, local0 ! rotate
499 and local7, 252, local1
501 sll out0, 28, out0 ! rotate
502 xor $2, local2, $2 ! 5 finished local2 used
504 srl local0, 8, local4
505 and local0, 252, local2
506 ld [local5+local3], local3
508 srl local0, 16, local5
509 or out0, local0, out0 ! rotate
510 ld [global2+local2], local2
513 ifelse($6,{}, {}, {ld [$6], out0}) ! key next encryption/decryption
514 and local4, 252, local4
516 and local5, 252, local5
517 ld [global4+local4], local4
518 xor $2, local3, $2 ! 7 finished local3 used
520 and local0, 252, local0
521 ld [local6+local5], local5
522 xor $2, local2, $2 ! 2 finished local2 now sbox 3
524 srl local7, 8, local2 ! 3 start
525 ld [out3+local0], local0
528 and local2, 252, local2
529 ld [global1+local1], local1
530 xor $2, local5, $2 ! 6 finished local5 used
532 ld [global3+local2], local2
536 ifelse($6,{}, {}, {ld [$6+4], out1}) ! key next encryption/decryption
540 ifelse($7,{}, {}, {retl})
547 ! parameter 1 right (original left)
548 ! parameter 2 left (original right)
549 ! parameter 3 1 for optional store to [in0]
550 ! parameter 4 1 for load input/output address to local5/7
552 ! The final permutation logic switches the halfes, meaning that
553 ! left and right ends up the the registers originally used.
558 ! $1 $2 $3 $4 $5 $6 $7 $8 $9
560 ! initially undo the rotate 3 left done after initial permutation
561 ! original left is received shifted 3 right and 29 left in local3/4
564 or local3, local4, $1
567 sethi %hi(0x55555555), local2
570 or local2, %lo(0x55555555), local2
573 sethi %hi(0x00ff00ff), local1
574 xor local3, $1, local3
575 or local1, %lo(0x00ff00ff), local1
576 and local3, local2, local3
577 sethi %hi(0x33333333), local4
578 sll local3, 1, local2
584 xor local3, $2, local3
585 or local4, %lo(0x33333333), local4
586 and local3, local1, local3
587 sethi %hi(0x0000ffff), local1
588 sll local3, 8, local2
594 xor local3, $1, local3
595 or local1, %lo(0x0000ffff), local1
596 and local3, local4, local3
597 sethi %hi(0x0f0f0f0f), local4
598 sll local3, 2, local2
600 ifelse($4,1, {LDPTR INPUT, local5})
603 ifelse($4,1, {LDPTR OUTPUT, local7})
606 xor local3, $2, local3
607 or local4, %lo(0x0f0f0f0f), local4
608 and local3, local1, local3
609 sll local3, 16, local2
611 xor $2, local3, local1
613 srl local1, 4, local3
615 xor local3, $1, local3
616 and local3, local4, local3
617 sll local3, 4, local2
623 ifelse($3,1, {st $1, [in0]})
625 xor local1, local2, $2
627 ifelse($3,1, {st $2, [in0+4]})
634 ! Does initial permutation for next block mixed with
635 ! final permutation for current block.
637 ! parameter 1 original left
638 ! parameter 2 original right
639 ! parameter 3 left ip
640 ! parameter 4 right ip
641 ! parameter 5 1: load ks1/ks2 to in3/in4, add 120 to in4
644 ! also adds -8 to length in2 and loads loop counter to out4
646 define(fp_ip_macro, {
649 ! $1 $2 $3 $4 $5 $6 $7 $8 $9
651 define({temp1},{out4})
652 define({temp2},{local3})
654 define({ip1},{local1})
655 define({ip2},{local2})
656 define({ip4},{local4})
657 define({ip5},{local5})
659 ! $1 in local3, local4
663 or local3, local4, $1
666 ifelse($5,2,{mov in4, in3})
675 and temp1, ip5, temp1
676 xor local0, $3, local0
681 and local0, ip1, local0
684 sll local0, 4, local7
694 xor local0, $4, local0
695 and temp1, ip4, temp1
696 and local0, ip2, local0
699 sll local0, 16, local7
705 ld [out2+264], temp2 ! ip3
709 xor local0, $3, local0
710 and temp1, temp2, temp1
711 and local0, temp2, local0
714 sll local0, 2, local7
722 xor local0, $4, local0
723 and temp1, ip2, temp1
724 and local0, ip4, local0
726 xor $2, temp1, local4
727 sll local0, 8, local7
734 xor local0, $3, local0
737 and local0, ip5, local0
739 sll local0, 1, local7
746 and temp1, ip1, temp1
751 ifelse($5,1,{LDPTR KS2, in4})
753 xor local4, temp2, $2
755 ! reload since used as temporar:
757 ld [out2+280], out4 ! loop counter
760 ifelse($5,1,{add in4, 120, in4})
762 ifelse($5,1,{LDPTR KS1, in3})
765 or local0, local5, $4
766 or local2, local7, $3
772 ! {load_little_endian}
774 ! parameter 1 address
775 ! parameter 2 destination left
776 ! parameter 3 destination right
777 ! parameter 4 temporar
780 define(load_little_endian, {
782 ! {load_little_endian}
783 ! $1 $2 $3 $4 $5 $6 $7 $8 $9
785 ! first in memory to rightmost in register
787 #ifdef OPENSSL_SYSNAME_ULTRASPARC
833 ! {load_little_endian_inc}
835 ! parameter 1 address
836 ! parameter 2 destination left
837 ! parameter 3 destination right
838 ! parameter 4 temporar
843 define(load_little_endian_inc, {
845 ! {load_little_endian_inc}
846 ! $1 $2 $3 $4 $5 $6 $7 $8 $9
848 ! first in memory to rightmost in register
850 #ifdef OPENSSL_SYSNAME_ULTRASPARC
899 ! Loads 1 to 7 bytes little endian
900 ! Remaining bytes are zeroed.
902 ! parameter 1 address
904 ! parameter 3 destination register left
905 ! parameter 4 destination register right
909 ! parameter 8 return label
911 define(load_n_bytes, {
914 ! $1 $2 $5 $6 $7 $8 $7 $8 $9
919 add %o7,$7.jmp.table-$7.0,$5
971 ! {store_little_endian}
973 ! parameter 1 address
974 ! parameter 2 source left
975 ! parameter 3 source right
976 ! parameter 4 temporar
978 define(store_little_endian, {
980 ! {store_little_endian}
981 ! $1 $2 $3 $4 $5 $6 $7 $8 $9
983 ! rightmost in register to first in memory
985 #ifdef OPENSSL_SYSNAME_ULTRASPARC
1034 ! Stores 1 to 7 bytes little endian
1036 ! parameter 1 address
1037 ! parameter 2 length
1038 ! parameter 3 source register left
1039 ! parameter 4 source register right
1043 ! parameter 8 return label
1045 define(store_n_bytes, {
1048 ! $1 $2 $5 $6 $7 $8 $7 $8 $9
1053 add %o7,$7.jmp.table-$7.0,$5
1106 define(testvalue,{1})
1108 define(register_init, {
1110 ! For test purposes:
1112 sethi %hi(testvalue), local0
1113 or local0, %lo(testvalue), local0
1115 ifelse($1,{},{}, {mov local0, $1})
1116 ifelse($2,{},{}, {mov local0, $2})
1117 ifelse($3,{},{}, {mov local0, $3})
1118 ifelse($4,{},{}, {mov local0, $4})
1119 ifelse($5,{},{}, {mov local0, $5})
1120 ifelse($6,{},{}, {mov local0, $6})
1121 ifelse($7,{},{}, {mov local0, $7})
1122 ifelse($8,{},{}, {mov local0, $8})
1152 ! loads key next encryption/decryption first round from [in4]
1154 rounds_macro(in5, out5, 1, .des_enc.1, in3, in4, retl)
1161 ! implemented with out5 as first parameter to avoid
1162 ! register exchange in ede modes
1165 ! loads key next encryption/decryption first round from [in3]
1167 rounds_macro(out5, in5, -1, .des_dec.1, in4, in3, retl)
1171 ! void DES_encrypt1(data, ks, enc)
1172 ! *******************************
1175 .global DES_encrypt1
1176 .type DES_encrypt1,#function
1180 save %sp, FRAME, %sp
1183 mov .PIC.me.up-(.-4),out0
1185 ld [in0], in5 ! left
1188 #ifdef OPENSSL_SYSNAME_ULTRASPARC
1189 be,pn %icc, .encrypt.dec ! enc/dec
1193 ld [in0+4], out5 ! right
1195 ! parameter 6 1/2 for include encryption/decryption
1196 ! parameter 7 1 for move in1 to in3
1197 ! parameter 8 1 for move in3 to in4, 2 for move in4 to in3
1199 ip_macro(in5, out5, in5, out5, in3, 0, 1, 1)
1201 rounds_macro(in5, out5, 1, .des_encrypt1.1, in3, in4) ! in4 not used
1203 fp_macro(in5, out5, 1) ! 1 for store to [in0]
1210 add in1, 120, in3 ! use last subkey for first round
1212 ! parameter 6 1/2 for include encryption/decryption
1213 ! parameter 7 1 for move in1 to in3
1214 ! parameter 8 1 for move in3 to in4, 2 for move in4 to in3
1216 ip_macro(in5, out5, out5, in5, in4, 2, 0, 1) ! include dec, ks in4
1218 fp_macro(out5, in5, 1) ! 1 for store to [in0]
1224 .size DES_encrypt1,.DES_encrypt1.end-DES_encrypt1
1227 ! void DES_encrypt2(data, ks, enc)
1228 !*********************************
1230 ! encrypts/decrypts without initial/final permutation
1233 .global DES_encrypt2
1234 .type DES_encrypt2,#function
1238 save %sp, FRAME, %sp
1241 mov .PIC.me.up-(.-4),out0
1243 ! Set sbox address 1 to 6 and rotate halfs 3 left
1244 ! Errors caught by destest? Yes. Still? *NO*
1246 !sethi %hi(DES_SPtrans), global1 ! address sbox 1
1248 !or global1, %lo(DES_SPtrans), global1 ! sbox 1
1250 add global1, 256, global2 ! sbox 2
1251 add global1, 512, global3 ! sbox 3
1253 ld [in0], out5 ! right
1254 add global1, 768, global4 ! sbox 4
1255 add global1, 1024, global5 ! sbox 5
1257 ld [in0+4], in5 ! left
1258 add global1, 1280, local6 ! sbox 6
1259 add global1, 1792, out3 ! sbox 8
1264 mov in1, in3 ! key address to in3
1270 add in5, local5, in5
1272 add out5, local7, out5
1275 ! we use our own stackframe
1277 #ifdef OPENSSL_SYSNAME_ULTRASPARC
1278 be,pn %icc, .encrypt2.dec ! decryption
1282 STPTR in0, [%sp+BIAS+ARG0+0*ARGSZ]
1284 ld [in3], out0 ! key 7531 first round
1285 mov LOOPS, out4 ! loop counter
1287 ld [in3+4], out1 ! key 8642 first round
1288 sethi %hi(0x0000FC00), local5
1299 LDPTR [%sp+BIAS+ARG0+0*ARGSZ], in0
1312 ld [in4], out0 ! key 7531 first round
1313 mov LOOPS, out4 ! loop counter
1315 ld [in4+4], out1 ! key 8642 first round
1316 sethi %hi(0x0000FC00), local5
1318 mov in5, local1 ! left expected in out5
1332 LDPTR [%sp+BIAS+ARG0+0*ARGSZ], in0
1341 .size DES_encrypt2, .DES_encrypt2.end-DES_encrypt2
1344 ! void DES_encrypt3(data, ks1, ks2, ks3)
1345 ! **************************************
1348 .global DES_encrypt3
1349 .type DES_encrypt3,#function
1353 save %sp, FRAME, %sp
1356 mov .PIC.me.up-(.-4),out0
1358 ld [in0], in5 ! left
1359 add in2, 120, in4 ! ks2
1361 ld [in0+4], out5 ! right
1362 mov in3, in2 ! save ks3
1364 ! parameter 6 1/2 for include encryption/decryption
1365 ! parameter 7 1 for mov in1 to in3
1366 ! parameter 8 1 for mov in3 to in4
1367 ! parameter 9 1 for load ks3 and ks2 to in4 and in3
1369 ip_macro(in5, out5, in5, out5, in3, 1, 1, 0, 0)
1372 mov in2, in3 ! preload ks3
1377 fp_macro(in5, out5, 1)
1383 .size DES_encrypt3,.DES_encrypt3.end-DES_encrypt3
1386 ! void DES_decrypt3(data, ks1, ks2, ks3)
1387 ! **************************************
1390 .global DES_decrypt3
1391 .type DES_decrypt3,#function
1395 save %sp, FRAME, %sp
1398 mov .PIC.me.up-(.-4),out0
1400 ld [in0], in5 ! left
1401 add in3, 120, in4 ! ks3
1403 ld [in0+4], out5 ! right
1406 ! parameter 6 1/2 for include encryption/decryption
1407 ! parameter 7 1 for mov in1 to in3
1408 ! parameter 8 1 for mov in3 to in4
1409 ! parameter 9 1 for load ks3 and ks2 to in4 and in3
1411 ip_macro(in5, out5, out5, in5, in4, 2, 0, 0, 0)
1414 add in1, 120, in4 ! preload ks1
1419 fp_macro(out5, in5, 1)
1425 .size DES_decrypt3,.DES_decrypt3.end-DES_decrypt3
1427 ! input: out0 offset between .PIC.me.up and caller
1428 ! output: out0 pointer to .PIC.me.up
1429 ! out2 pointer to .des_and
1430 ! global1 pointer to DES_SPtrans
1433 add out0,%o7,out0 ! pointer to .PIC.me.up
1436 ! In case anybody wonders why this code is same for both ABI.
1437 ! To start with it's not. Do note LDPTR below. But of course
1438 ! you must be wondering why the rest of it doesn't contain
1439 ! things like %hh, %hm and %lm. Well, those are needed only
1440 ! if OpenSSL library *itself* will become larger than 4GB,
1441 ! which is not going to happen any time soon.
1442 sethi %hi(DES_SPtrans),global1
1443 or global1,%lo(DES_SPtrans),global1
1444 sethi %hi(_GLOBAL_OFFSET_TABLE_-(.PIC.me.up-.)),out2
1445 add global1,out0,global1
1446 add out2,%lo(_GLOBAL_OFFSET_TABLE_-(.PIC.me.up-.)),out2
1447 LDPTR [out2+global1],global1
1449 setn DES_SPtrans,out2,global1 ! synthetic instruction !
1450 #elif defined(ABI64)
1451 sethi %hh(DES_SPtrans),out2
1452 or out2,%hm(DES_SPtrans),out2
1453 sethi %lm(DES_SPtrans),global1
1454 or global1,%lo(DES_SPtrans),global1
1456 or out2,global1,global1
1458 sethi %hi(DES_SPtrans),global1
1459 or global1,%lo(DES_SPtrans),global1
1463 add out0,.des_and-.PIC.me.up,out2
1466 .type .des_and,#object
1471 ! This table is used for AND 0xFC when it is known that register
1472 ! bits 8-31 are zero. Makes it possible to do three arithmetic
1473 ! operations in one cycle.
1475 .byte 0, 0, 0, 0, 4, 4, 4, 4
1476 .byte 8, 8, 8, 8, 12, 12, 12, 12
1477 .byte 16, 16, 16, 16, 20, 20, 20, 20
1478 .byte 24, 24, 24, 24, 28, 28, 28, 28
1479 .byte 32, 32, 32, 32, 36, 36, 36, 36
1480 .byte 40, 40, 40, 40, 44, 44, 44, 44
1481 .byte 48, 48, 48, 48, 52, 52, 52, 52
1482 .byte 56, 56, 56, 56, 60, 60, 60, 60
1483 .byte 64, 64, 64, 64, 68, 68, 68, 68
1484 .byte 72, 72, 72, 72, 76, 76, 76, 76
1485 .byte 80, 80, 80, 80, 84, 84, 84, 84
1486 .byte 88, 88, 88, 88, 92, 92, 92, 92
1487 .byte 96, 96, 96, 96, 100, 100, 100, 100
1488 .byte 104, 104, 104, 104, 108, 108, 108, 108
1489 .byte 112, 112, 112, 112, 116, 116, 116, 116
1490 .byte 120, 120, 120, 120, 124, 124, 124, 124
1491 .byte 128, 128, 128, 128, 132, 132, 132, 132
1492 .byte 136, 136, 136, 136, 140, 140, 140, 140
1493 .byte 144, 144, 144, 144, 148, 148, 148, 148
1494 .byte 152, 152, 152, 152, 156, 156, 156, 156
1495 .byte 160, 160, 160, 160, 164, 164, 164, 164
1496 .byte 168, 168, 168, 168, 172, 172, 172, 172
1497 .byte 176, 176, 176, 176, 180, 180, 180, 180
1498 .byte 184, 184, 184, 184, 188, 188, 188, 188
1499 .byte 192, 192, 192, 192, 196, 196, 196, 196
1500 .byte 200, 200, 200, 200, 204, 204, 204, 204
1501 .byte 208, 208, 208, 208, 212, 212, 212, 212
1502 .byte 216, 216, 216, 216, 220, 220, 220, 220
1503 .byte 224, 224, 224, 224, 228, 228, 228, 228
1504 .byte 232, 232, 232, 232, 236, 236, 236, 236
1505 .byte 240, 240, 240, 240, 244, 244, 244, 244
1506 .byte 248, 248, 248, 248, 252, 252, 252, 252
1508 ! 5 numbers for initil/final permutation
1510 .word 0x0f0f0f0f ! offset 256
1511 .word 0x0000ffff ! 260
1512 .word 0x33333333 ! 264
1513 .word 0x00ff00ff ! 268
1514 .word 0x55555555 ! 272
1518 .word 0x0000FC00 ! 284
1520 ! void DES_ncbc_encrypt(input, output, length, schedule, ivec, enc)
1521 ! *****************************************************************
1525 .global DES_ncbc_encrypt
1526 .type DES_ncbc_encrypt,#function
1530 save %sp, FRAME, %sp
1532 define({INPUT}, { [%sp+BIAS+ARG0+0*ARGSZ] })
1533 define({OUTPUT}, { [%sp+BIAS+ARG0+1*ARGSZ] })
1534 define({IVEC}, { [%sp+BIAS+ARG0+4*ARGSZ] })
1537 mov .PIC.me.up-(.-4),out0
1541 #ifdef OPENSSL_SYSNAME_ULTRASPARC
1542 be,pn %icc, .ncbc.dec
1548 ! addr left right temp label
1549 load_little_endian(in4, in5, out5, local3, .LLE1) ! iv
1551 addcc in2, -8, in2 ! bytes missing when first block done
1553 #ifdef OPENSSL_SYSNAME_ULTRASPARC
1554 bl,pn %icc, .ncbc.enc.seven.or.less
1556 bl .ncbc.enc.seven.or.less
1558 mov in3, in4 ! schedule
1560 .ncbc.enc.next.block:
1562 load_little_endian(in0, out4, global4, local3, .LLE2) ! block
1564 .ncbc.enc.next.block_1:
1566 xor in5, out4, in5 ! iv xor
1567 xor out5, global4, out5 ! iv xor
1569 ! parameter 8 1 for move in3 to in4, 2 for move in4 to in3
1570 ip_macro(in5, out5, in5, out5, in3, 0, 0, 2)
1572 .ncbc.enc.next.block_2:
1574 !// call .des_enc ! compares in2 to 8
1575 ! rounds inlined for alignment purposes
1577 add global1, 768, global4 ! address sbox 4 since register used below
1579 rounds_macro(in5, out5, 1, .ncbc.enc.1, in3, in4) ! include encryption ks in3
1581 #ifdef OPENSSL_SYSNAME_ULTRASPARC
1582 bl,pn %icc, .ncbc.enc.next.block_fp
1584 bl .ncbc.enc.next.block_fp
1586 add in0, 8, in0 ! input address
1588 ! If 8 or more bytes are to be encrypted after this block,
1589 ! we combine final permutation for this block with initial
1590 ! permutation for next block. Load next block:
1592 load_little_endian(in0, global3, global4, local5, .LLE12)
1594 ! parameter 1 original left
1595 ! parameter 2 original right
1596 ! parameter 3 left ip
1597 ! parameter 4 right ip
1598 ! parameter 5 1: load ks1/ks2 to in3/in4, add 120 to in4
1601 ! also adds -8 to length in2 and loads loop counter to out4
1603 fp_ip_macro(out0, out1, global3, global4, 2)
1605 store_little_endian(in1, out0, out1, local3, .SLE10) ! block
1607 ld [in3], out0 ! key 7531 first round next block
1609 xor global3, out5, in5 ! iv xor next block
1611 ld [in3+4], out1 ! key 8642
1612 add global1, 512, global3 ! address sbox 3 since register used
1613 xor global4, local1, out5 ! iv xor next block
1615 ba .ncbc.enc.next.block_2
1616 add in1, 8, in1 ! output adress
1618 .ncbc.enc.next.block_fp:
1622 store_little_endian(in1, in5, out5, local3, .SLE1) ! block
1624 addcc in2, -8, in2 ! bytes missing when next block done
1626 #ifdef OPENSSL_SYSNAME_ULTRASPARC
1627 bpos,pt %icc, .ncbc.enc.next.block ! also jumps if 0
1629 bpos .ncbc.enc.next.block
1633 .ncbc.enc.seven.or.less:
1637 #ifdef OPENSSL_SYSNAME_ULTRASPARC
1638 ble,pt %icc, .ncbc.enc.finish
1640 ble .ncbc.enc.finish
1644 add in2, 8, local1 ! bytes to load
1646 ! addr, length, dest left, dest right, temp, temp2, label, ret label
1647 load_n_bytes(in0, local1, global4, out4, local2, local3, .LNB1, .ncbc.enc.next.block_1)
1649 ! Loads 1 to 7 bytes little endian to global4, out4
1655 store_little_endian(local4, in5, out5, local5, .SLE2) ! ivec
1667 LDPTR IVEC, local7 ! ivec
1668 #ifdef OPENSSL_SYSNAME_ULTRASPARC
1669 ble,pn %icc, .ncbc.dec.finish
1671 ble .ncbc.dec.finish
1673 mov in3, in4 ! schedule
1676 mov in0, local5 ! input
1678 load_little_endian(local7, in0, in1, local3, .LLE3) ! ivec
1680 .ncbc.dec.next.block:
1682 load_little_endian(local5, in5, out5, local3, .LLE4) ! block
1684 ! parameter 6 1/2 for include encryption/decryption
1685 ! parameter 7 1 for mov in1 to in3
1686 ! parameter 8 1 for mov in3 to in4
1688 ip_macro(in5, out5, out5, in5, in4, 2, 0, 1) ! include decryprion ks in4
1690 fp_macro(out5, in5, 0, 1) ! 1 for input and output address to local5/7
1692 ! in2 is bytes left to be stored
1693 ! in2 is compared to 8 in the rounds
1695 xor out5, in0, out4 ! iv xor
1696 #ifdef OPENSSL_SYSNAME_ULTRASPARC
1697 bl,pn %icc, .ncbc.dec.seven.or.less
1699 bl .ncbc.dec.seven.or.less
1701 xor in5, in1, global4 ! iv xor
1703 ! Load ivec next block now, since input and output address might be the same.
1705 load_little_endian_inc(local5, in0, in1, local3, .LLE5) ! iv
1707 store_little_endian(local7, out4, global4, local3, .SLE3)
1710 add local7, 8, local7
1713 #ifdef OPENSSL_SYSNAME_ULTRASPARC
1714 bg,pt %icc, .ncbc.dec.next.block
1716 bg .ncbc.dec.next.block
1718 STPTR local7, OUTPUT
1723 LDPTR IVEC, local4 ! ivec
1724 store_little_endian(local4, in0, in1, local5, .SLE4)
1731 .ncbc.dec.seven.or.less:
1733 load_little_endian_inc(local5, in0, in1, local3, .LLE13) ! ivec
1735 store_n_bytes(local7, in2, global4, out4, local3, local4, .SNB1, .ncbc.dec.store.iv)
1738 .DES_ncbc_encrypt.end:
1739 .size DES_ncbc_encrypt, .DES_ncbc_encrypt.end-DES_ncbc_encrypt
1742 ! void DES_ede3_cbc_encrypt(input, output, lenght, ks1, ks2, ks3, ivec, enc)
1743 ! **************************************************************************
1747 .global DES_ede3_cbc_encrypt
1748 .type DES_ede3_cbc_encrypt,#function
1750 DES_ede3_cbc_encrypt:
1752 save %sp, FRAME, %sp
1754 define({KS1}, { [%sp+BIAS+ARG0+3*ARGSZ] })
1755 define({KS2}, { [%sp+BIAS+ARG0+4*ARGSZ] })
1756 define({KS3}, { [%sp+BIAS+ARG0+5*ARGSZ] })
1759 mov .PIC.me.up-(.-4),out0
1761 LDPTR [%fp+BIAS+ARG0+7*ARGSZ], local3 ! enc
1762 LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local4 ! ivec
1765 #ifdef OPENSSL_SYSNAME_ULTRASPARC
1766 be,pn %icc, .ede3.dec
1774 load_little_endian(local4, in5, out5, local3, .LLE6) ! ivec
1776 addcc in2, -8, in2 ! bytes missing after next block
1778 #ifdef OPENSSL_SYSNAME_ULTRASPARC
1779 bl,pn %icc, .ede3.enc.seven.or.less
1781 bl .ede3.enc.seven.or.less
1785 .ede3.enc.next.block:
1787 load_little_endian(in0, out4, global4, local3, .LLE7)
1789 .ede3.enc.next.block_1:
1792 xor in5, out4, in5 ! iv xor
1793 xor out5, global4, out5 ! iv xor
1796 add in4, 120, in4 ! for decryption we use last subkey first
1799 ip_macro(in5, out5, in5, out5, in3)
1801 .ede3.enc.next.block_2:
1803 call .des_enc ! ks1 in3
1806 call .des_dec ! ks2 in4
1809 call .des_enc ! ks3 in3 compares in2 to 8
1812 #ifdef OPENSSL_SYSNAME_ULTRASPARC
1813 bl,pn %icc, .ede3.enc.next.block_fp
1815 bl .ede3.enc.next.block_fp
1819 ! If 8 or more bytes are to be encrypted after this block,
1820 ! we combine final permutation for this block with initial
1821 ! permutation for next block. Load next block:
1823 load_little_endian(in0, global3, global4, local5, .LLE11)
1825 ! parameter 1 original left
1826 ! parameter 2 original right
1827 ! parameter 3 left ip
1828 ! parameter 4 right ip
1829 ! parameter 5 1: load ks1/ks2 to in3/in4, add 120 to in4
1832 ! also adds -8 to length in2 and loads loop counter to out4
1834 fp_ip_macro(out0, out1, global3, global4, 1)
1836 store_little_endian(in1, out0, out1, local3, .SLE9) ! block
1839 xor global3, out5, in5 ! iv xor next block
1841 ld [in3], out0 ! key 7531
1842 add global1, 512, global3 ! address sbox 3
1843 xor global4, local1, out5 ! iv xor next block
1845 ld [in3+4], out1 ! key 8642
1846 add global1, 768, global4 ! address sbox 4
1847 ba .ede3.enc.next.block_2
1850 .ede3.enc.next.block_fp:
1854 store_little_endian(in1, in5, out5, local3, .SLE5) ! block
1856 addcc in2, -8, in2 ! bytes missing when next block done
1858 #ifdef OPENSSL_SYSNAME_ULTRASPARC
1859 bpos,pt %icc, .ede3.enc.next.block
1861 bpos .ede3.enc.next.block
1865 .ede3.enc.seven.or.less:
1869 #ifdef OPENSSL_SYSNAME_ULTRASPARC
1870 ble,pt %icc, .ede3.enc.finish
1872 ble .ede3.enc.finish
1876 add in2, 8, local1 ! bytes to load
1878 ! addr, length, dest left, dest right, temp, temp2, label, ret label
1879 load_n_bytes(in0, local1, global4, out4, local2, local3, .LNB2, .ede3.enc.next.block_1)
1883 LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local4 ! ivec
1884 store_little_endian(local4, in5, out5, local5, .SLE6) ! ivec
1901 #ifdef OPENSSL_SYSNAME_ULTRASPARC
1902 ble %icc, .ede3.dec.finish
1904 ble .ede3.dec.finish
1908 LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local7 ! iv
1909 load_little_endian(local7, in0, in1, local3, .LLE8)
1911 .ede3.dec.next.block:
1913 load_little_endian(local5, in5, out5, local3, .LLE9)
1915 ! parameter 6 1/2 for include encryption/decryption
1916 ! parameter 7 1 for mov in1 to in3
1917 ! parameter 8 1 for mov in3 to in4
1918 ! parameter 9 1 for load ks3 and ks2 to in4 and in3
1920 ip_macro(in5, out5, out5, in5, in4, 2, 0, 0, 1) ! inc .des_dec ks3 in4
1922 call .des_enc ! ks2 in3
1925 call .des_dec ! ks1 in4
1928 fp_macro(out5, in5, 0, 1) ! 1 for input and output address local5/7
1930 ! in2 is bytes left to be stored
1931 ! in2 is compared to 8 in the rounds
1934 #ifdef OPENSSL_SYSNAME_ULTRASPARC
1935 bl,pn %icc, .ede3.dec.seven.or.less
1937 bl .ede3.dec.seven.or.less
1939 xor in5, in1, global4
1941 load_little_endian_inc(local5, in0, in1, local3, .LLE10) ! iv next block
1943 store_little_endian(local7, out4, global4, local3, .SLE7) ! block
1947 add local7, 8, local7
1949 #ifdef OPENSSL_SYSNAME_ULTRASPARC
1950 bg,pt %icc, .ede3.dec.next.block
1952 bg .ede3.dec.next.block
1954 STPTR local7, OUTPUT
1958 LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local4 ! ivec
1959 store_little_endian(local4, in0, in1, local5, .SLE8) ! ivec
1966 .ede3.dec.seven.or.less:
1968 load_little_endian_inc(local5, in0, in1, local3, .LLE14) ! iv
1970 store_n_bytes(local7, in2, global4, out4, local3, local4, .SNB2, .ede3.dec.store.iv)
1973 .DES_ede3_cbc_encrypt.end:
1974 .size DES_ede3_cbc_encrypt,.DES_ede3_cbc_encrypt.end-DES_ede3_cbc_encrypt