"linux-alpha+bwx-ccc","ccc:-fast -readonly_strings -DL_ENDIAN -DTERMIO::-D_REENTRANT:::SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_INT DES_PTR DES_RISC1 DES_UNROLL:${alpha_asm}",
#
# TI_CGT_C6000_7.3.x is a requirement
-"linux-c64xplus","cl6x:--linux --strip_coff_underscore -ea=.s -eo=.o -mv6400+ -o2 -ox -ms -pden -DOPENSSL_SMALL_FOOTPRINT::-D_REENTRANT:::BN_LLONG:c64xpluscpuid.o:bn-c64xplus.o c64xplus-gf2m.o::aes-c64xplus.o aes_cbc.o aes_ctr.o:::sha1-c64xplus.o sha256-c64xplus.o sha512-c64xplus.o:::::::ghash-c64xplus.o::void:dlfcn:linux-shared:--pic:-z --sysv --shared:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):true",
+"linux-c64xplus","cl6x:--linux -ea=.s -eo=.o -mv6400+ -o2 -ox -ms -pden -DOPENSSL_SMALL_FOOTPRINT::-D_REENTRANT:::BN_LLONG:c64xpluscpuid.o:bn-c64xplus.o c64xplus-gf2m.o::aes-c64xplus.o aes_cbc.o aes_ctr.o:::sha1-c64xplus.o sha256-c64xplus.o sha512-c64xplus.o:::::::ghash-c64xplus.o::void:dlfcn:linux-shared:--pic:-z --sysv --shared:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):true",
# Android: linux-* but without -DTERMIO and pointers to headers and libs.
"android","gcc:-mandroid -I\$(ANDROID_DEV)/include -B\$(ANDROID_DEV)/lib -O3 -fomit-frame-pointer -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${no_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
*** linux-c64xplus
$cc = cl6x
-$cflags = --linux --strip_coff_underscore -ea=.s -eo=.o -mv6400+ -o2 -ox -ms -pden -DOPENSSL_SMALL_FOOTPRINT
+$cflags = --linux -ea=.s -eo=.o -mv6400+ -o2 -ox -ms -pden -DOPENSSL_SMALL_FOOTPRINT
$unistd =
$thread_cflag = -D_REENTRANT
$sys_id =
.text
.if __TI_EABI__
.nocmp
+ .asg AES_encrypt,_AES_encrypt
+ .asg AES_decrypt,_AES_decrypt
+ .asg AES_set_encrypt_key,_AES_set_encrypt_key
+ .asg AES_set_decrypt_key,_AES_set_decrypt_key
+ .asg AES_ctr32_encrypt,_AES_ctr32_encrypt
.endif
.asg B3,RA
}
# Tables are kept in endian-neutral manner
$code.=<<___;
+ .if __TI_EABI__
+ .sect ".text:aes_asm.const"
+ .else
.sect ".const:aes_asm"
+ .endif
.align 128
AES_Te:
.byte 0xc6,0x63,0x63,0xa5, 0xf8,0x7c,0x7c,0x84
___
print $code;
+close STDOUT;
;; SPLOOPs spin at ... 2*n cycles [plus epilogue].
;;====================================================================
.text
+ .if __TI_EABI__
+ .asg bn_mul_add_words,_bn_mul_add_words
+ .asg bn_mul_words,_bn_mul_words
+ .asg bn_sqr_words,_bn_sqr_words
+ .asg bn_add_words,_bn_add_words
+ .asg bn_sub_words,_bn_sub_words
+ .asg bn_div_words,_bn_div_words
+ .asg bn_sqr_comba8,_bn_sqr_comba8
+ .asg bn_mul_comba8,_bn_mul_comba8
+ .asg bn_sqr_comba4,_bn_sqr_comba4
+ .asg bn_mul_comba4,_bn_mul_comba4
+ .endif
.asg B3,RA
.asg A4,ARG0
.endasmfunc
.global _bn_div_words
- .global __divull
_bn_div_words:
.asmfunc
- CALLP __divull,A3 ; jump to rts64plus.lib
-|| MV ARG0,A5
-|| MV ARG1,ARG0
-|| MV ARG2,ARG1
-|| ZERO B5
+ LMBD 1,A6,A0 ; leading zero bits in dv
+ LMBD 1,A4,A1 ; leading zero bits in hi
+|| MVK 32,B0
+ CMPLTU A1,A0,A2
+|| ADD A0,B0,B0
+ [ A2] BNOP RA
+||[ A2] MVK -1,A4 ; return overflow
+||[!A2] MV A4,A3 ; reassign hi
+ [!A2] MV B4,A4 ; reassign lo, will be quotient
+||[!A2] MVC B0,ILC
+ [!A2] SHL A6,A0,A6 ; normalize dv
+|| MVK 1,A1
+
+ [!A2] CMPLTU A3,A6,A1 ; hi<dv?
+||[!A2] SHL A4,1,A5:A4 ; lo<<1
+ [!A1] SUB A3,A6,A3 ; hi-=dv
+||[!A1] OR 1,A4,A4
+ [!A2] SHRU A3,31,A1 ; upper bit
+||[!A2] ADDAH A5,A3,A3 ; hi<<1|lo>>31
+
+ SPLOOP 3
+ [!A1] CMPLTU A3,A6,A1 ; hi<dv?
+||[ A1] ZERO A1
+|| SHL A4,1,A5:A4 ; lo<<1
+ [!A1] SUB A3,A6,A3 ; hi-=dv
+||[!A1] OR 1,A4,A4 ; quotient
+ SHRU A3,31,A1 ; upper bit
+|| ADDAH A5,A3,A3 ; hi<<1|lo>>31
+ SPKERNEL
+
+ BNOP RA,5
.endasmfunc
;;====================================================================
|| LDW *A5++,B6 ; ap[0]
|| MV A0,A3 ; const A3=M
.else
- ;; This alternative is exercise in fully unrolled Comba
+ ;; This alternative is an exercise in fully unrolled Comba
;; algorithm implementation that operates at n*(n+1)+12, or
;; as little as 32 cycles...
LDW *ARG1[0],B16 ; a[0]
}
$code.=<<___;
.text
+ .if __TI_EABI__
+ .asg bn_GF2m_mul_2x2,_bn_GF2m_mul_2x2
+ .endif
.global _bn_GF2m_mul_2x2
_bn_GF2m_mul_2x2:
$code.=<<___;
.text
+ .if __TI_EABI__
+ .asg OPENSSL_rdtsc,_OPENSSL_rdtsc
+ .asg OPENSSL_cleanse,_OPENSSL_cleanse
+ .asg OPENSSL_atomic_add,_OPENSSL_atomic_add
+ .asg OPENSSL_wipe_cpu,_OPENSSL_wipe_cpu
+ .asg OPENSSL_instrument_bus,_OPENSSL_instrument_bus
+ .asg OPENSSL_instrument_bus2,_OPENSSL_instrument_bus2
+ .endif
.asg B3,RA
$code.=<<___;
.text
+ .if __TI_EABI__
+ .asg gcm_gmult_1bit,_gcm_gmult_1bit
+ .asg gcm_gmult_4bit,_gcm_gmult_4bit
+ .asg gcm_ghash_4bit,_gcm_ghash_4bit
+ .endif
.asg B3,RA
# 8/2 S1 L1x S2 | ....
#####... ................|............
$code.=<<___;
- XORMPY $H0,$xia,$H0x ; 0 ; H·Xi[i]
+ XORMPY $H0,$xia,$H0x ; 0 ; H·(Xi[i]<<1)
|| XORMPY $H01u,$xib,$H01y
|| [A0] LDBU *--${xip},$x0
XORMPY $H1,$xia,$H1x ; 1
XORMPY $H3,$xia,$H3x ; 3
|| XORMPY $H3u,$xib,$H3y
||[!A0] MVK.D 15,A0 ; *--${xip} counter
- XOR.L $H0x,$Z0,$Z0 ; 4 ; Z^=H·Xi[i]
+ XOR.L $H0x,$Z0,$Z0 ; 4 ; Z^=H·(Xi[i]<<1)
|| [A0] SUB.S A0,1,A0
XOR.L $H1x,$Z1,$Z1 ; 5
|| AND.D $H01y,$FF000000,$H0z
$code=<<___;
.text
+ .if __TI_EABI__
+ .asg sha1_block_data_order,_sha1_block_data_order
+ .endif
.asg B3,RA
.asg A15,FP
.text
.if __TI_EABI__
.nocmp
+ .asg sha256_block_data_order,_sha256_block_data_order
.endif
.asg B3,RA
|| STW $H,*${CTXB}[7]
.endasmfunc
+ .if __TI_EABI__
+ .sect ".text:sha_asm.const"
+ .else
.sect ".const:sha_asm"
+ .endif
.align 128
K256:
.uword 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5
___
print $code;
+close STDOUT;
.text
.if __TI_EABI__
.nocmp
+ .asg sha512_block_data_order,_sha512_block_data_order
.endif
.asg B3,RA
NOP 2 ; wait till FP is committed
.endasmfunc
+ .if __TI_EABI__
+ .sect ".text:sha_asm.const"
+ .else
.sect ".const:sha_asm"
+ .endif
.align 128
K512:
.uword 0x428a2f98,0xd728ae22, 0x71374491,0x23ef65cd