-! des_enc.m4
-! des_enc.S (generated from des_enc.m4)
+! Copyright 2000-2016 The OpenSSL Project Authors. All Rights Reserved.
!
-! UltraSPARC assembler version of the LibDES/SSLeay/OpenSSL des_enc.c file.
-!
-! Version 1.0. 32-bit version.
-!
-! June 8, 2000.
-!
-! Assembler version: Copyright Svend Olaf Mikkelsen.
-!
-! Original C code: Copyright Eric A. Young.
-!
-! This code can be freely used by LibDES/SSLeay/OpenSSL users.
-!
-! The LibDES/SSLeay/OpenSSL copyright notices must be respected.
-!
-! This version can be redistributed.
+! Licensed under the OpenSSL license (the "License"). You may not use
+! this file except in compliance with the License. You can obtain a copy
+! in the file LICENSE in the source distribution or at
+! https://www.openssl.org/source/license.html
!
! To expand the m4 macros: m4 -B 8192 des_enc.m4 > des_enc.S
!
! Assemble through gcc: gcc -c -mcpu=ultrasparc -o des_enc.o des_enc.S
!
! Assemble through cc: cc -c -xarch=v8plusa -o des_enc.o des_enc.S
+!
+! Performance improvement according to './apps/openssl speed des'
+!
+! 32-bit build:
+! 23% faster than cc-5.2 -xarch=v8plus -xO5
+! 115% faster than gcc-3.2.1 -m32 -mcpu=ultrasparc -O5
+! 64-bit build:
+! 50% faster than cc-5.2 -xarch=v9 -xO5
+! 100% faster than gcc-3.2.1 -m64 -mcpu=ultrasparc -O5
+!
-
-.ident "des_enc.m4 1.0"
+.ident "des_enc.m4 2.1"
+.file "des_enc-sparc.S"
+
+#include <openssl/opensslconf.h>
+
+#ifdef OPENSSL_FIPSCANISTER
+#include <openssl/fipssyms.h>
+#endif
+
+#if defined(__SUNPRO_C) && defined(__sparcv9)
+# define ABI64 /* They've said -xarch=v9 at command line */
+#elif defined(__GNUC__) && defined(__arch64__)
+# define ABI64 /* They've said -m64 at command line */
+#endif
+
+#ifdef ABI64
+ .register %g2,#scratch
+ .register %g3,#scratch
+# define FRAME -192
+# define BIAS 2047
+# define LDPTR ldx
+# define STPTR stx
+# define ARG0 128
+# define ARGSZ 8
+#else
+# define FRAME -96
+# define BIAS 0
+# define LDPTR ld
+# define STPTR st
+# define ARG0 68
+# define ARGSZ 4
+#endif
#define LOOPS 7
#define out6 %o6
#define out7 %o7
+#define stub stb
changequote({,})
srl $1, 16, local4
xor $2, local1, $2
- ifelse($9,1,{ld KS3, in4},{})
+ ifelse($9,1,{LDPTR KS3, in4},{})
xor local4, $2, local4
- sethi %hi(des_SPtrans), global1 ! sbox addr
+ nop !sethi %hi(DES_SPtrans), global1 ! sbox addr
- ifelse($9,1,{ld KS2, in3},{})
+ ifelse($9,1,{LDPTR KS2, in3},{})
and local4, local2, local4
- or global1, %lo(des_SPtrans), global1 ! sbox addr
+ nop !or global1, %lo(DES_SPtrans), global1 ! sbox addr
sll local4, 16, local1
xor $2, local4, $2
! other half (use).
!
! In this version we do two rounds in a loop repeated 7 times
-! and two rounds seperately.
+! and two rounds separately.
!
! One half has the bits for the sboxes in the following positions:
!
xor $2, out0, local1
ld [out2+284], local5 ! 0x0000FC00
- ba,pt %icc, $4
+ ba $4
and local1, 252, local1
.align 32
ld [global1+local1], local1
xor $2, out1, out1 ! 8642
xor $2, out0, out0 ! 7531
- fxor %f0, %f0, %f0 ! fxor used for alignment
+ ! fmovs %f0, %f0 ! fxor used for alignment
srl out1, 4, local0 ! rotate 4 right
and out0, local5, local3 ! 3
- fxor %f0, %f0, %f0
+ ! fmovs %f0, %f0
ld [$5+$3*8], local7 ! key 7531 next round
srl local3, 8, local3 ! 3
and local0, 252, local2 ! 2
- fxor %f0, %f0, %f0
+ ! fmovs %f0, %f0
ld [global3+local3],local3 ! 3
sll out1, 28, out1 ! rotate
xor $2, local1, $2 ! 1 finished
xor $2, local2, $2 ! 3 finished
- bne,pt %icc, $4
+ bne $4
and local4, 252, local1 ! sbox 1 next round
! two rounds more:
sethi %hi(0x0f0f0f0f), local4
sll local3, 2, local2
- ifelse($4,1, {ld INPUT, local5})
+ ifelse($4,1, {LDPTR INPUT, local5})
xor $1, local3, $1
- ifelse($4,1, {ld OUTPUT, local7})
+ ifelse($4,1, {LDPTR OUTPUT, local7})
srl $1, 16, local3
xor $2, local2, $2
xor local3, $2, local3
sll temp1, 4, temp2
xor $1, temp1, $1
- ifelse($5,1,{ld KS2, in4})
+ ifelse($5,1,{LDPTR KS2, in4})
sll $4, 3, local2
xor local4, temp2, $2
srl $3, 29, local0
ifelse($5,1,{add in4, 120, in4})
- ifelse($5,1,{ld KS1, in3})
+ ifelse($5,1,{LDPTR KS1, in3})
srl $4, 29, local7
or local0, local5, $4
! first in memory to rightmost in register
- andcc $1, 3, global0
- bne,pn %icc, $5
- nop
-
- lda [$1] 0x88, $2
- add $1, 4, $4
-
- ba,pt %icc, $5a
- lda [$4] 0x88, $3
-
$5:
ldub [$1+3], $2
! first in memory to rightmost in register
- andcc $1, 3, global0
- bne,pn %icc, $5
- nop
-
- lda [$1] 0x88, $2
- add $1, 4, $1
-
- lda [$1] 0x88, $3
- ba,pt %icc, $5a
- add $1, 4, $1
-
$5:
ldub [$1+3], $2
! {load_n_bytes}
! $1 $2 $5 $6 $7 $8 $7 $8 $9
+$7.0: call .+8
sll $2, 2, $6
- sethi %hi($7.jmp.table), $5
- or $5, %lo($7.jmp.table), $5
+ add %o7,$7.jmp.table-$7.0,$5
add $5, $6, $5
mov 0, $4
ld [$5], $5
- jmp $5
+ jmp %o7+$5
mov 0, $3
$7.7:
or $4, $5, $4
$7.1:
ldub [$1+0], $5
- ba,pt %icc, $8
+ ba $8
or $4, $5, $4
.align 4
$7.jmp.table:
.word 0
- .word $7.1
- .word $7.2
- .word $7.3
- .word $7.4
- .word $7.5
- .word $7.6
- .word $7.7
+ .word $7.1-$7.0
+ .word $7.2-$7.0
+ .word $7.3-$7.0
+ .word $7.4-$7.0
+ .word $7.5-$7.0
+ .word $7.6-$7.0
+ .word $7.7-$7.0
})
! rightmost in register to first in memory
- andcc $1, 3, global0
- bne,pn %icc, $5
- nop
-
- sta $2, [$1] 0x88
- add $1, 4, $4
-
- ba,pt %icc, $5a
- sta $3, [$4] 0x88
-
$5:
and $2, 255, $4
stub $4, [$1+0]
! {store_n_bytes}
! $1 $2 $5 $6 $7 $8 $7 $8 $9
+$7.0: call .+8
sll $2, 2, $6
- sethi %hi($7.jmp.table), $5
- or $5, %lo($7.jmp.table), $5
+ add %o7,$7.jmp.table-$7.0,$5
add $5, $6, $5
ld [$5], $5
- jmp $5
+
+ jmp %o7+$5
nop
$7.7:
and $4, 0xff, $5
- ba,pt %icc, $8
+ ba $8
stub $5, [$1]
.align 4
$7.jmp.table:
.word 0
- .word $7.1
- .word $7.2
- .word $7.3
- .word $7.4
- .word $7.5
- .word $7.6
- .word $7.7
+ .word $7.1-$7.0
+ .word $7.2-$7.0
+ .word $7.3-$7.0
+ .word $7.4-$7.0
+ .word $7.5-$7.0
+ .word $7.6-$7.0
+ .word $7.7-$7.0
})
})
- .global .des_and
-.section ".rodata"
- .align 8
- .type .des_and,#object
- .size .des_and,284
-
-.des_and:
-
-! This table is used for AND 0xFC when it is known that register
-! bits 8-31 are zero. Makes it possible to do three arithmetic
-! operations in one cycle.
-
- .byte 0, 0, 0, 0, 4, 4, 4, 4
- .byte 8, 8, 8, 8, 12, 12, 12, 12
- .byte 16, 16, 16, 16, 20, 20, 20, 20
- .byte 24, 24, 24, 24, 28, 28, 28, 28
- .byte 32, 32, 32, 32, 36, 36, 36, 36
- .byte 40, 40, 40, 40, 44, 44, 44, 44
- .byte 48, 48, 48, 48, 52, 52, 52, 52
- .byte 56, 56, 56, 56, 60, 60, 60, 60
- .byte 64, 64, 64, 64, 68, 68, 68, 68
- .byte 72, 72, 72, 72, 76, 76, 76, 76
- .byte 80, 80, 80, 80, 84, 84, 84, 84
- .byte 88, 88, 88, 88, 92, 92, 92, 92
- .byte 96, 96, 96, 96, 100, 100, 100, 100
- .byte 104, 104, 104, 104, 108, 108, 108, 108
- .byte 112, 112, 112, 112, 116, 116, 116, 116
- .byte 120, 120, 120, 120, 124, 124, 124, 124
- .byte 128, 128, 128, 128, 132, 132, 132, 132
- .byte 136, 136, 136, 136, 140, 140, 140, 140
- .byte 144, 144, 144, 144, 148, 148, 148, 148
- .byte 152, 152, 152, 152, 156, 156, 156, 156
- .byte 160, 160, 160, 160, 164, 164, 164, 164
- .byte 168, 168, 168, 168, 172, 172, 172, 172
- .byte 176, 176, 176, 176, 180, 180, 180, 180
- .byte 184, 184, 184, 184, 188, 188, 188, 188
- .byte 192, 192, 192, 192, 196, 196, 196, 196
- .byte 200, 200, 200, 200, 204, 204, 204, 204
- .byte 208, 208, 208, 208, 212, 212, 212, 212
- .byte 216, 216, 216, 216, 220, 220, 220, 220
- .byte 224, 224, 224, 224, 228, 228, 228, 228
- .byte 232, 232, 232, 232, 236, 236, 236, 236
- .byte 240, 240, 240, 240, 244, 244, 244, 244
- .byte 248, 248, 248, 248, 252, 252, 252, 252
-
- ! 5 numbers for initil/final permutation
-
- .word 0x0f0f0f0f ! offset 256
- .word 0x0000ffff ! 260
- .word 0x33333333 ! 264
- .word 0x00ff00ff ! 268
- .word 0x55555555 ! 272
-
- .word 0 ! 276
- .word LOOPS ! 280
- .word 0x0000FC00 ! 284
-
-
.section ".text"
.align 32
-! void des_encrypt(data, ks, enc)
+! void DES_encrypt1(data, ks, enc)
! *******************************
.align 32
- .global des_encrypt
- .type des_encrypt,#function
+ .global DES_encrypt1
+ .type DES_encrypt1,#function
+
+DES_encrypt1:
-des_encrypt:
+ save %sp, FRAME, %sp
- save %sp, -96, %sp
+ sethi %hi(.PIC.DES_SPtrans-1f),global1
+ or global1,%lo(.PIC.DES_SPtrans-1f),global1
+1: call .+8
+ add %o7,global1,global1
+ sub global1,.PIC.DES_SPtrans-.des_and,out2
ld [in0], in5 ! left
- sethi %hi(.des_and), out2 ! address constants
cmp in2, 0 ! enc
+ be .encrypt.dec
ld [in0+4], out5 ! right
- be,pn %icc, .encrypt.dec ! enc/dec
- or out2, %lo(.des_and), out2 ! address constants
! parameter 6 1/2 for include encryption/decryption
! parameter 7 1 for move in1 to in3
ip_macro(in5, out5, in5, out5, in3, 0, 1, 1)
- rounds_macro(in5, out5, 1, .des_encrypt.1, in3, in4) ! in4 not used
+ rounds_macro(in5, out5, 1, .des_encrypt1.1, in3, in4) ! in4 not used
fp_macro(in5, out5, 1) ! 1 for store to [in0]
- return in7+8
- nop
+ ret
+ restore
.encrypt.dec:
fp_macro(out5, in5, 1) ! 1 for store to [in0]
- return in7+8
- nop
+ ret
+ restore
-.des_encrypt.end:
- .size des_encrypt,.des_encrypt.end-des_encrypt
+.DES_encrypt1.end:
+ .size DES_encrypt1,.DES_encrypt1.end-DES_encrypt1
-! void des_encrypt2(data, ks, enc)
+! void DES_encrypt2(data, ks, enc)
!*********************************
! encrypts/decrypts without initial/final permutation
.align 32
- .global des_encrypt2
- .type des_encrypt2,#function
+ .global DES_encrypt2
+ .type DES_encrypt2,#function
-des_encrypt2:
+DES_encrypt2:
- save %sp, -112, %sp
+ save %sp, FRAME, %sp
+
+ sethi %hi(.PIC.DES_SPtrans-1f),global1
+ or global1,%lo(.PIC.DES_SPtrans-1f),global1
+1: call .+8
+ add %o7,global1,global1
+ sub global1,.PIC.DES_SPtrans-.des_and,out2
! Set sbox address 1 to 6 and rotate halfs 3 left
! Errors caught by destest? Yes. Still? *NO*
- sethi %hi(des_SPtrans), global1 ! address sbox 1
- sethi %hi(.des_and), out2 ! address constants
+ !sethi %hi(DES_SPtrans), global1 ! address sbox 1
- or global1, %lo(des_SPtrans), global1 ! sbox 1
- or out2, %lo(.des_and), out2 ! adress constants
+ !or global1, %lo(DES_SPtrans), global1 ! sbox 1
add global1, 256, global2 ! sbox 2
add global1, 512, global3 ! sbox 3
! we use our own stackframe
- be,pn %icc, .encrypt2.dec ! decryption
- st in0, [%sp+68]
+ be .encrypt2.dec
+ STPTR in0, [%sp+BIAS+ARG0+0*ARGSZ]
ld [in3], out0 ! key 7531 first round
mov LOOPS, out4 ! loop counter
sll out5, 29, in1
add in5, in0, in5
srl out5, 3, out5
- ld [%sp+68], in0
+ LDPTR [%sp+BIAS+ARG0+0*ARGSZ], in0
add out5, in1, out5
st in5, [in0]
st out5, [in0+4]
- return in7+8
- nop
+ ret
+ restore
.encrypt2.dec:
sll out5, 29, in1
add in5, in0, in5
srl out5, 3, out5
- ld [%sp+68], in0
+ LDPTR [%sp+BIAS+ARG0+0*ARGSZ], in0
add out5, in1, out5
st out5, [in0]
st in5, [in0+4]
- return in7+8
- nop
+ ret
+ restore
-.des_encrypt2.end:
- .size des_encrypt2, .des_encrypt2.end-des_encrypt2
+.DES_encrypt2.end:
+ .size DES_encrypt2, .DES_encrypt2.end-DES_encrypt2
-! void des_encrypt3(data, ks1, ks2, ks3)
+! void DES_encrypt3(data, ks1, ks2, ks3)
! **************************************
.align 32
- .global des_encrypt3
- .type des_encrypt3,#function
+ .global DES_encrypt3
+ .type DES_encrypt3,#function
-des_encrypt3:
+DES_encrypt3:
- save %sp, -96, %sp
+ save %sp, FRAME, %sp
+ sethi %hi(.PIC.DES_SPtrans-1f),global1
+ or global1,%lo(.PIC.DES_SPtrans-1f),global1
+1: call .+8
+ add %o7,global1,global1
+ sub global1,.PIC.DES_SPtrans-.des_and,out2
+
ld [in0], in5 ! left
add in2, 120, in4 ! ks2
- sethi %hi(.des_and), out2 ! address constants
ld [in0+4], out5 ! right
mov in3, in2 ! save ks3
- or out2, %lo(.des_and), out2 ! address constants
! parameter 6 1/2 for include encryption/decryption
! parameter 7 1 for mov in1 to in3
fp_macro(in5, out5, 1)
- return in7+8
- nop
+ ret
+ restore
-.des_encrypt3.end:
- .size des_encrypt3,.des_encrypt3.end-des_encrypt3
+.DES_encrypt3.end:
+ .size DES_encrypt3,.DES_encrypt3.end-DES_encrypt3
-! void des_decrypt3(data, ks1, ks2, ks3)
+! void DES_decrypt3(data, ks1, ks2, ks3)
! **************************************
.align 32
- .global des_decrypt3
- .type des_decrypt3,#function
+ .global DES_decrypt3
+ .type DES_decrypt3,#function
-des_decrypt3:
+DES_decrypt3:
- save %sp, -96, %sp
+ save %sp, FRAME, %sp
+ sethi %hi(.PIC.DES_SPtrans-1f),global1
+ or global1,%lo(.PIC.DES_SPtrans-1f),global1
+1: call .+8
+ add %o7,global1,global1
+ sub global1,.PIC.DES_SPtrans-.des_and,out2
+
ld [in0], in5 ! left
add in3, 120, in4 ! ks3
- sethi %hi(.des_and), out2
ld [in0+4], out5 ! right
mov in2, in3 ! ks2
- or out2, %lo(.des_and), out2
! parameter 6 1/2 for include encryption/decryption
! parameter 7 1 for mov in1 to in3
fp_macro(out5, in5, 1)
- return in7+8
- nop
-
-.des_decrypt3.end:
- .size des_decrypt3,.des_decrypt3.end-des_decrypt3
+ ret
+ restore
+.DES_decrypt3.end:
+ .size DES_decrypt3,.DES_decrypt3.end-DES_decrypt3
-
-! void des_ncbc_encrypt(input, output, length, schedule, ivec, enc)
+! void DES_ncbc_encrypt(input, output, length, schedule, ivec, enc)
! *****************************************************************
.align 32
- .global des_ncbc_encrypt
- .type des_ncbc_encrypt,#function
+ .global DES_ncbc_encrypt
+ .type DES_ncbc_encrypt,#function
-des_ncbc_encrypt:
+DES_ncbc_encrypt:
- save %sp, -96, %sp
+ save %sp, FRAME, %sp
- define({INPUT}, { [%sp+68] })
- define({OUTPUT}, { [%sp+72] })
- define({IVEC}, { [%sp+84] })
+ define({INPUT}, { [%sp+BIAS+ARG0+0*ARGSZ] })
+ define({OUTPUT}, { [%sp+BIAS+ARG0+1*ARGSZ] })
+ define({IVEC}, { [%sp+BIAS+ARG0+4*ARGSZ] })
+
+ sethi %hi(.PIC.DES_SPtrans-1f),global1
+ or global1,%lo(.PIC.DES_SPtrans-1f),global1
+1: call .+8
+ add %o7,global1,global1
+ sub global1,.PIC.DES_SPtrans-.des_and,out2
cmp in5, 0 ! enc
- sethi %hi(.des_and), out2 ! address constants
- be,pn %icc, .ncbc.dec
- st in4, IVEC
+ be .ncbc.dec
+ STPTR in4, IVEC
! addr left right temp label
load_little_endian(in4, in5, out5, local3, .LLE1) ! iv
addcc in2, -8, in2 ! bytes missing when first block done
+ bl .ncbc.enc.seven.or.less
mov in3, in4 ! schedule
- bl,pn %icc, .ncbc.enc.seven.or.less
- or out2, %lo(.des_and), out2
.ncbc.enc.next.block:
rounds_macro(in5, out5, 1, .ncbc.enc.1, in3, in4) ! include encryption ks in3
- bl,pn %icc, .ncbc.enc.next.block_fp
+ bl .ncbc.enc.next.block_fp
add in0, 8, in0 ! input address
! If 8 or more bytes are to be encrypted after this block,
add global1, 512, global3 ! address sbox 3 since register used
xor global4, local1, out5 ! iv xor next block
- ba,pt %icc, .ncbc.enc.next.block_2
- add in1, 8, in1 ! output adress
+ ba .ncbc.enc.next.block_2
+ add in1, 8, in1 ! output address
.ncbc.enc.next.block_fp:
addcc in2, -8, in2 ! bytes missing when next block done
- bpos,pt %icc, .ncbc.enc.next.block ! also jumps if 0
+ bpos .ncbc.enc.next.block
add in1, 8, in1
.ncbc.enc.seven.or.less:
cmp in2, -8
- ble,pt %icc, .ncbc.enc.finish
+ ble .ncbc.enc.finish
nop
add in2, 8, local1 ! bytes to load
.ncbc.enc.finish:
- ld IVEC, local4
+ LDPTR IVEC, local4
store_little_endian(local4, in5, out5, local5, .SLE2) ! ivec
- return in7+8
- nop
+ ret
+ restore
.ncbc.dec:
- st in0, INPUT
+ STPTR in0, INPUT
cmp in2, 0 ! length
add in3, 120, in3
- ld IVEC, local7 ! ivec
- ble,pn %icc, .ncbc.dec.finish
+ LDPTR IVEC, local7 ! ivec
+ ble .ncbc.dec.finish
mov in3, in4 ! schedule
- st in1, OUTPUT
- or out2, %lo(.des_and), out2 ! address constants low part
+ STPTR in1, OUTPUT
mov in0, local5 ! input
load_little_endian(local7, in0, in1, local3, .LLE3) ! ivec
! in2 is compared to 8 in the rounds
xor out5, in0, out4 ! iv xor
- bl,pn %icc, .ncbc.dec.seven.or.less
+ bl .ncbc.dec.seven.or.less
xor in5, in1, global4 ! iv xor
! Load ivec next block now, since input and output address might be the same.
store_little_endian(local7, out4, global4, local3, .SLE3)
- st local5, INPUT
+ STPTR local5, INPUT
add local7, 8, local7
addcc in2, -8, in2
- bg,pt %icc, .ncbc.dec.next.block
- st local7, OUTPUT
+ bg .ncbc.dec.next.block
+ STPTR local7, OUTPUT
.ncbc.dec.store.iv:
- ld IVEC, local4 ! ivec
+ LDPTR IVEC, local4 ! ivec
store_little_endian(local4, in0, in1, local5, .SLE4)
.ncbc.dec.finish:
- return in7+8
- nop
+ ret
+ restore
.ncbc.dec.seven.or.less:
store_n_bytes(local7, in2, global4, out4, local3, local4, .SNB1, .ncbc.dec.store.iv)
-.des_ncbc_encrypt.end:
- .size des_ncbc_encrypt, .des_ncbc_encrypt.end-des_ncbc_encrypt
+.DES_ncbc_encrypt.end:
+ .size DES_ncbc_encrypt, .DES_ncbc_encrypt.end-DES_ncbc_encrypt
-! void des_ede3_cbc_encrypt(input, output, lenght, ks1, ks2, ks3, ivec, enc)
+! void DES_ede3_cbc_encrypt(input, output, lenght, ks1, ks2, ks3, ivec, enc)
! **************************************************************************
.align 32
- .global des_ede3_cbc_encrypt
- .type des_ede3_cbc_encrypt,#function
+ .global DES_ede3_cbc_encrypt
+ .type DES_ede3_cbc_encrypt,#function
-des_ede3_cbc_encrypt:
+DES_ede3_cbc_encrypt:
- save %sp, -96, %sp
+ save %sp, FRAME, %sp
- define({LENGTH},{ [%sp+76] })
- define({KS1}, { [%sp+80] })
- define({KS2}, { [%sp+84] })
- define({KS3}, { [%sp+88] })
+ define({KS1}, { [%sp+BIAS+ARG0+3*ARGSZ] })
+ define({KS2}, { [%sp+BIAS+ARG0+4*ARGSZ] })
+ define({KS3}, { [%sp+BIAS+ARG0+5*ARGSZ] })
- ld [%fp+96], local3 ! enc
- sethi %hi(.des_and), out2
+ sethi %hi(.PIC.DES_SPtrans-1f),global1
+ or global1,%lo(.PIC.DES_SPtrans-1f),global1
+1: call .+8
+ add %o7,global1,global1
+ sub global1,.PIC.DES_SPtrans-.des_and,out2
- ld [%fp+92], local4 ! ivec
- or out2, %lo(.des_and), out2
+ LDPTR [%fp+BIAS+ARG0+7*ARGSZ], local3 ! enc
+ LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local4 ! ivec
cmp local3, 0 ! enc
- be,pn %icc, .ede3.dec
- st in4, KS2
+ be .ede3.dec
+ STPTR in4, KS2
- st in5, KS3
+ STPTR in5, KS3
load_little_endian(local4, in5, out5, local3, .LLE6) ! ivec
addcc in2, -8, in2 ! bytes missing after next block
- bl,pn %icc, .ede3.enc.seven.or.less
- st in3, KS1
+ bl .ede3.enc.seven.or.less
+ STPTR in3, KS1
.ede3.enc.next.block:
.ede3.enc.next.block_1:
- ld KS2, in4
+ LDPTR KS2, in4
xor in5, out4, in5 ! iv xor
xor out5, global4, out5 ! iv xor
- ld KS1, in3
+ LDPTR KS1, in3
add in4, 120, in4 ! for decryption we use last subkey first
nop
nop
call .des_dec ! ks2 in4
- ld KS3, in3
+ LDPTR KS3, in3
call .des_enc ! ks3 in3 compares in2 to 8
nop
- bl,pn %icc, .ede3.enc.next.block_fp
+ bl .ede3.enc.next.block_fp
add in0, 8, in0
! If 8 or more bytes are to be encrypted after this block,
ld [in3+4], out1 ! key 8642
add global1, 768, global4 ! address sbox 4
- ba,pt %icc, .ede3.enc.next.block_2
+ ba .ede3.enc.next.block_2
add in1, 8, in1
.ede3.enc.next.block_fp:
addcc in2, -8, in2 ! bytes missing when next block done
- bpos,pt %icc, .ede3.enc.next.block
+ bpos .ede3.enc.next.block
add in1, 8, in1
.ede3.enc.seven.or.less:
cmp in2, -8
- ble,pt %icc, .ede3.enc.finish
+ ble .ede3.enc.finish
nop
add in2, 8, local1 ! bytes to load
.ede3.enc.finish:
- ld [%fp+92], local4 ! ivec
+ LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local4 ! ivec
store_little_endian(local4, in5, out5, local5, .SLE6) ! ivec
- return in7+8
- nop
-
+ ret
+ restore
.ede3.dec:
- st in0, INPUT
+ STPTR in0, INPUT
add in5, 120, in5
- st in1, OUTPUT
+ STPTR in1, OUTPUT
mov in0, local5
add in3, 120, in3
- st in3, KS1
+ STPTR in3, KS1
cmp in2, 0
- ble %icc, .ede3.dec.finish
- st in5, KS3
+ ble .ede3.dec.finish
+ STPTR in5, KS3
- ld [%fp+92], local7 ! iv
+ LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local7 ! iv
load_little_endian(local7, in0, in1, local3, .LLE8)
.ede3.dec.next.block:
ip_macro(in5, out5, out5, in5, in4, 2, 0, 0, 1) ! inc .des_dec ks3 in4
call .des_enc ! ks2 in3
- ld KS1, in4
+ LDPTR KS1, in4
call .des_dec ! ks1 in4
nop
! in2 is compared to 8 in the rounds
xor out5, in0, out4
- bl,pn %icc, .ede3.dec.seven.or.less
+ bl .ede3.dec.seven.or.less
xor in5, in1, global4
load_little_endian_inc(local5, in0, in1, local3, .LLE10) ! iv next block
store_little_endian(local7, out4, global4, local3, .SLE7) ! block
- st local5, INPUT
+ STPTR local5, INPUT
addcc in2, -8, in2
add local7, 8, local7
- bg,pt %icc, .ede3.dec.next.block
- st local7, OUTPUT
+ bg .ede3.dec.next.block
+ STPTR local7, OUTPUT
.ede3.dec.store.iv:
- ld [%fp+92], local4 ! ivec
+ LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local4 ! ivec
store_little_endian(local4, in0, in1, local5, .SLE8) ! ivec
.ede3.dec.finish:
- return in7+8
- nop
-
+ ret
+ restore
.ede3.dec.seven.or.less:
store_n_bytes(local7, in2, global4, out4, local3, local4, .SNB2, .ede3.dec.store.iv)
-.des_ede3_cbc_encrypt.end:
- .size des_ede3_cbc_encrypt,.des_ede3_cbc_encrypt.end-des_ede3_cbc_encrypt
+.DES_ede3_cbc_encrypt.end:
+ .size DES_ede3_cbc_encrypt,.DES_ede3_cbc_encrypt.end-DES_ede3_cbc_encrypt
+
+ .align 256
+ .type .des_and,#object
+ .size .des_and,284
+
+.des_and:
+
+! This table is used for AND 0xFC when it is known that register
+! bits 8-31 are zero. Makes it possible to do three arithmetic
+! operations in one cycle.
+
+ .byte 0, 0, 0, 0, 4, 4, 4, 4
+ .byte 8, 8, 8, 8, 12, 12, 12, 12
+ .byte 16, 16, 16, 16, 20, 20, 20, 20
+ .byte 24, 24, 24, 24, 28, 28, 28, 28
+ .byte 32, 32, 32, 32, 36, 36, 36, 36
+ .byte 40, 40, 40, 40, 44, 44, 44, 44
+ .byte 48, 48, 48, 48, 52, 52, 52, 52
+ .byte 56, 56, 56, 56, 60, 60, 60, 60
+ .byte 64, 64, 64, 64, 68, 68, 68, 68
+ .byte 72, 72, 72, 72, 76, 76, 76, 76
+ .byte 80, 80, 80, 80, 84, 84, 84, 84
+ .byte 88, 88, 88, 88, 92, 92, 92, 92
+ .byte 96, 96, 96, 96, 100, 100, 100, 100
+ .byte 104, 104, 104, 104, 108, 108, 108, 108
+ .byte 112, 112, 112, 112, 116, 116, 116, 116
+ .byte 120, 120, 120, 120, 124, 124, 124, 124
+ .byte 128, 128, 128, 128, 132, 132, 132, 132
+ .byte 136, 136, 136, 136, 140, 140, 140, 140
+ .byte 144, 144, 144, 144, 148, 148, 148, 148
+ .byte 152, 152, 152, 152, 156, 156, 156, 156
+ .byte 160, 160, 160, 160, 164, 164, 164, 164
+ .byte 168, 168, 168, 168, 172, 172, 172, 172
+ .byte 176, 176, 176, 176, 180, 180, 180, 180
+ .byte 184, 184, 184, 184, 188, 188, 188, 188
+ .byte 192, 192, 192, 192, 196, 196, 196, 196
+ .byte 200, 200, 200, 200, 204, 204, 204, 204
+ .byte 208, 208, 208, 208, 212, 212, 212, 212
+ .byte 216, 216, 216, 216, 220, 220, 220, 220
+ .byte 224, 224, 224, 224, 228, 228, 228, 228
+ .byte 232, 232, 232, 232, 236, 236, 236, 236
+ .byte 240, 240, 240, 240, 244, 244, 244, 244
+ .byte 248, 248, 248, 248, 252, 252, 252, 252
+
+ ! 5 numbers for initil/final permutation
+
+ .word 0x0f0f0f0f ! offset 256
+ .word 0x0000ffff ! 260
+ .word 0x33333333 ! 264
+ .word 0x00ff00ff ! 268
+ .word 0x55555555 ! 272
+
+ .word 0 ! 276
+ .word LOOPS ! 280
+ .word 0x0000FC00 ! 284
+
+ .global DES_SPtrans
+ .type DES_SPtrans,#object
+ .size DES_SPtrans,2048
+.align 64
+DES_SPtrans:
+.PIC.DES_SPtrans:
+ ! nibble 0
+ .word 0x02080800, 0x00080000, 0x02000002, 0x02080802
+ .word 0x02000000, 0x00080802, 0x00080002, 0x02000002
+ .word 0x00080802, 0x02080800, 0x02080000, 0x00000802
+ .word 0x02000802, 0x02000000, 0x00000000, 0x00080002
+ .word 0x00080000, 0x00000002, 0x02000800, 0x00080800
+ .word 0x02080802, 0x02080000, 0x00000802, 0x02000800
+ .word 0x00000002, 0x00000800, 0x00080800, 0x02080002
+ .word 0x00000800, 0x02000802, 0x02080002, 0x00000000
+ .word 0x00000000, 0x02080802, 0x02000800, 0x00080002
+ .word 0x02080800, 0x00080000, 0x00000802, 0x02000800
+ .word 0x02080002, 0x00000800, 0x00080800, 0x02000002
+ .word 0x00080802, 0x00000002, 0x02000002, 0x02080000
+ .word 0x02080802, 0x00080800, 0x02080000, 0x02000802
+ .word 0x02000000, 0x00000802, 0x00080002, 0x00000000
+ .word 0x00080000, 0x02000000, 0x02000802, 0x02080800
+ .word 0x00000002, 0x02080002, 0x00000800, 0x00080802
+ ! nibble 1
+ .word 0x40108010, 0x00000000, 0x00108000, 0x40100000
+ .word 0x40000010, 0x00008010, 0x40008000, 0x00108000
+ .word 0x00008000, 0x40100010, 0x00000010, 0x40008000
+ .word 0x00100010, 0x40108000, 0x40100000, 0x00000010
+ .word 0x00100000, 0x40008010, 0x40100010, 0x00008000
+ .word 0x00108010, 0x40000000, 0x00000000, 0x00100010
+ .word 0x40008010, 0x00108010, 0x40108000, 0x40000010
+ .word 0x40000000, 0x00100000, 0x00008010, 0x40108010
+ .word 0x00100010, 0x40108000, 0x40008000, 0x00108010
+ .word 0x40108010, 0x00100010, 0x40000010, 0x00000000
+ .word 0x40000000, 0x00008010, 0x00100000, 0x40100010
+ .word 0x00008000, 0x40000000, 0x00108010, 0x40008010
+ .word 0x40108000, 0x00008000, 0x00000000, 0x40000010
+ .word 0x00000010, 0x40108010, 0x00108000, 0x40100000
+ .word 0x40100010, 0x00100000, 0x00008010, 0x40008000
+ .word 0x40008010, 0x00000010, 0x40100000, 0x00108000
+ ! nibble 2
+ .word 0x04000001, 0x04040100, 0x00000100, 0x04000101
+ .word 0x00040001, 0x04000000, 0x04000101, 0x00040100
+ .word 0x04000100, 0x00040000, 0x04040000, 0x00000001
+ .word 0x04040101, 0x00000101, 0x00000001, 0x04040001
+ .word 0x00000000, 0x00040001, 0x04040100, 0x00000100
+ .word 0x00000101, 0x04040101, 0x00040000, 0x04000001
+ .word 0x04040001, 0x04000100, 0x00040101, 0x04040000
+ .word 0x00040100, 0x00000000, 0x04000000, 0x00040101
+ .word 0x04040100, 0x00000100, 0x00000001, 0x00040000
+ .word 0x00000101, 0x00040001, 0x04040000, 0x04000101
+ .word 0x00000000, 0x04040100, 0x00040100, 0x04040001
+ .word 0x00040001, 0x04000000, 0x04040101, 0x00000001
+ .word 0x00040101, 0x04000001, 0x04000000, 0x04040101
+ .word 0x00040000, 0x04000100, 0x04000101, 0x00040100
+ .word 0x04000100, 0x00000000, 0x04040001, 0x00000101
+ .word 0x04000001, 0x00040101, 0x00000100, 0x04040000
+ ! nibble 3
+ .word 0x00401008, 0x10001000, 0x00000008, 0x10401008
+ .word 0x00000000, 0x10400000, 0x10001008, 0x00400008
+ .word 0x10401000, 0x10000008, 0x10000000, 0x00001008
+ .word 0x10000008, 0x00401008, 0x00400000, 0x10000000
+ .word 0x10400008, 0x00401000, 0x00001000, 0x00000008
+ .word 0x00401000, 0x10001008, 0x10400000, 0x00001000
+ .word 0x00001008, 0x00000000, 0x00400008, 0x10401000
+ .word 0x10001000, 0x10400008, 0x10401008, 0x00400000
+ .word 0x10400008, 0x00001008, 0x00400000, 0x10000008
+ .word 0x00401000, 0x10001000, 0x00000008, 0x10400000
+ .word 0x10001008, 0x00000000, 0x00001000, 0x00400008
+ .word 0x00000000, 0x10400008, 0x10401000, 0x00001000
+ .word 0x10000000, 0x10401008, 0x00401008, 0x00400000
+ .word 0x10401008, 0x00000008, 0x10001000, 0x00401008
+ .word 0x00400008, 0x00401000, 0x10400000, 0x10001008
+ .word 0x00001008, 0x10000000, 0x10000008, 0x10401000
+ ! nibble 4
+ .word 0x08000000, 0x00010000, 0x00000400, 0x08010420
+ .word 0x08010020, 0x08000400, 0x00010420, 0x08010000
+ .word 0x00010000, 0x00000020, 0x08000020, 0x00010400
+ .word 0x08000420, 0x08010020, 0x08010400, 0x00000000
+ .word 0x00010400, 0x08000000, 0x00010020, 0x00000420
+ .word 0x08000400, 0x00010420, 0x00000000, 0x08000020
+ .word 0x00000020, 0x08000420, 0x08010420, 0x00010020
+ .word 0x08010000, 0x00000400, 0x00000420, 0x08010400
+ .word 0x08010400, 0x08000420, 0x00010020, 0x08010000
+ .word 0x00010000, 0x00000020, 0x08000020, 0x08000400
+ .word 0x08000000, 0x00010400, 0x08010420, 0x00000000
+ .word 0x00010420, 0x08000000, 0x00000400, 0x00010020
+ .word 0x08000420, 0x00000400, 0x00000000, 0x08010420
+ .word 0x08010020, 0x08010400, 0x00000420, 0x00010000
+ .word 0x00010400, 0x08010020, 0x08000400, 0x00000420
+ .word 0x00000020, 0x00010420, 0x08010000, 0x08000020
+ ! nibble 5
+ .word 0x80000040, 0x00200040, 0x00000000, 0x80202000
+ .word 0x00200040, 0x00002000, 0x80002040, 0x00200000
+ .word 0x00002040, 0x80202040, 0x00202000, 0x80000000
+ .word 0x80002000, 0x80000040, 0x80200000, 0x00202040
+ .word 0x00200000, 0x80002040, 0x80200040, 0x00000000
+ .word 0x00002000, 0x00000040, 0x80202000, 0x80200040
+ .word 0x80202040, 0x80200000, 0x80000000, 0x00002040
+ .word 0x00000040, 0x00202000, 0x00202040, 0x80002000
+ .word 0x00002040, 0x80000000, 0x80002000, 0x00202040
+ .word 0x80202000, 0x00200040, 0x00000000, 0x80002000
+ .word 0x80000000, 0x00002000, 0x80200040, 0x00200000
+ .word 0x00200040, 0x80202040, 0x00202000, 0x00000040
+ .word 0x80202040, 0x00202000, 0x00200000, 0x80002040
+ .word 0x80000040, 0x80200000, 0x00202040, 0x00000000
+ .word 0x00002000, 0x80000040, 0x80002040, 0x80202000
+ .word 0x80200000, 0x00002040, 0x00000040, 0x80200040
+ ! nibble 6
+ .word 0x00004000, 0x00000200, 0x01000200, 0x01000004
+ .word 0x01004204, 0x00004004, 0x00004200, 0x00000000
+ .word 0x01000000, 0x01000204, 0x00000204, 0x01004000
+ .word 0x00000004, 0x01004200, 0x01004000, 0x00000204
+ .word 0x01000204, 0x00004000, 0x00004004, 0x01004204
+ .word 0x00000000, 0x01000200, 0x01000004, 0x00004200
+ .word 0x01004004, 0x00004204, 0x01004200, 0x00000004
+ .word 0x00004204, 0x01004004, 0x00000200, 0x01000000
+ .word 0x00004204, 0x01004000, 0x01004004, 0x00000204
+ .word 0x00004000, 0x00000200, 0x01000000, 0x01004004
+ .word 0x01000204, 0x00004204, 0x00004200, 0x00000000
+ .word 0x00000200, 0x01000004, 0x00000004, 0x01000200
+ .word 0x00000000, 0x01000204, 0x01000200, 0x00004200
+ .word 0x00000204, 0x00004000, 0x01004204, 0x01000000
+ .word 0x01004200, 0x00000004, 0x00004004, 0x01004204
+ .word 0x01000004, 0x01004200, 0x01004000, 0x00004004
+ ! nibble 7
+ .word 0x20800080, 0x20820000, 0x00020080, 0x00000000
+ .word 0x20020000, 0x00800080, 0x20800000, 0x20820080
+ .word 0x00000080, 0x20000000, 0x00820000, 0x00020080
+ .word 0x00820080, 0x20020080, 0x20000080, 0x20800000
+ .word 0x00020000, 0x00820080, 0x00800080, 0x20020000
+ .word 0x20820080, 0x20000080, 0x00000000, 0x00820000
+ .word 0x20000000, 0x00800000, 0x20020080, 0x20800080
+ .word 0x00800000, 0x00020000, 0x20820000, 0x00000080
+ .word 0x00800000, 0x00020000, 0x20000080, 0x20820080
+ .word 0x00020080, 0x20000000, 0x00000000, 0x00820000
+ .word 0x20800080, 0x20020080, 0x20020000, 0x00800080
+ .word 0x20820000, 0x00000080, 0x00800080, 0x20020000
+ .word 0x20820080, 0x00800000, 0x20800000, 0x20000080
+ .word 0x00820000, 0x00020080, 0x20020080, 0x20800000
+ .word 0x00000080, 0x20820000, 0x00820080, 0x00000000
+ .word 0x20000000, 0x20800080, 0x00020000, 0x00820080
+