+;; Copyright 2012-2016 The OpenSSL Project Authors. All Rights Reserved.
+;;
+;; Licensed under the OpenSSL license (the "License"). You may not use
+;; this file except in compliance with the License. You can obtain a copy
+;; in the file LICENSE in the source distribution or at
+;; https://www.openssl.org/source/license.html
+;;
;;====================================================================
;; Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
;; project.
;; SPLOOPs spin at ... 2*n cycles [plus epilogue].
;;====================================================================
.text
+
+ .if .ASSEMBLER_VERSION<7000000
+ .asg 0,__TI_EABI__
+ .endif
.if __TI_EABI__
.asg bn_mul_add_words,_bn_mul_add_words
.asg bn_mul_words,_bn_mul_words
.if 0
BNOP sploopNxM?,3
;; Above mentioned m*2*(n+1)+10 does not apply in n=m=4 case,
- ;; because of read-after-write penalties, it's rather
- ;; n*2*(n+3)+10, or 66 cycles [plus various overheads]...
+ ;; because of low-counter effect, when prologue phase finishes
+ ;; before SPKERNEL instruction is reached. As result it's 25%
+ ;; slower than expected...
MVK 4,B0 ; N, RILC
|| MVK 4,A0 ; M, outer loop counter
|| MV ARG1,A5 ; copy ap