+;; Copyright 2012-2016 The OpenSSL Project Authors. All Rights Reserved.
+;;
+;; Licensed under the OpenSSL license (the "License"). You may not use
+;; this file except in compliance with the License. You can obtain a copy
+;; in the file LICENSE in the source distribution or at
+;; https://www.openssl.org/source/license.html
+;;
;;====================================================================
;; Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
;; project.
.if 0
BNOP sploopNxM?,3
;; Above mentioned m*2*(n+1)+10 does not apply in n=m=4 case,
- ;; because of read-after-write penalties, it's rather
- ;; n*2*(n+3)+10, or 66 cycles [plus various overheads]...
+ ;; because of low-counter effect, when prologue phase finishes
+ ;; before SPKERNEL instruction is reached. As result it's 25%
+ ;; slower than expected...
MVK 4,B0 ; N, RILC
|| MVK 4,A0 ; M, outer loop counter
|| MV ARG1,A5 ; copy ap