X-Git-Url: https://git.librecmc.org/?a=blobdiff_plain;f=crypto%2Fbn%2Fasm%2Fmips-mont.pl;h=a907571bec3f48af164c1401db8f4cc170c3b93b;hb=77b072504ec464eac5e0f9aab19cadb9c4e311d1;hp=e2395f4b5ddab09599717eb21f41f3a66454b5b1;hpb=0985473636b8bb998eb887c28489cfe5e57905a5;p=oweals%2Fopenssl.git diff --git a/crypto/bn/asm/mips-mont.pl b/crypto/bn/asm/mips-mont.pl index e2395f4b5d..a907571bec 100644 --- a/crypto/bn/asm/mips-mont.pl +++ b/crypto/bn/asm/mips-mont.pl @@ -1,19 +1,30 @@ -#!/usr/bin/env perl +#! /usr/bin/env perl +# Copyright 2010-2016 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the OpenSSL license (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + # # ==================================================================== -# Written by Andy Polyakov for the OpenSSL +# Written by Andy Polyakov for the OpenSSL # project. The module is, however, dual licensed under OpenSSL and # CRYPTOGAMS licenses depending on where you obtain it. For further # details see http://www.openssl.org/~appro/cryptogams/. # ==================================================================== # This module doesn't present direct interest for OpenSSL, because it -# doesn't provide better performance for longer keys. While 512-bit -# RSA private key operations are 40% faster, 1024-bit ones are hardly -# faster at all, while longer key operations are slower by up to 20%. -# It might be of interest to embedded system developers though, as -# it's smaller than 1KB, yet offers ~3x improvement over compiler -# generated code. +# doesn't provide better performance for longer keys, at least not on +# in-order-execution cores. While 512-bit RSA sign operations can be +# 65% faster in 64-bit mode, 1024-bit ones are only 15% faster, and +# 4096-bit ones are up to 15% slower. In 32-bit mode it varies from +# 16% improvement for 512-bit RSA sign to -33% for 4096-bit RSA +# verify:-( All comparisons are against bn_mul_mont-free assembler. +# The module might be of interest to embedded system developers, as +# the code is smaller than 1KB, yet offers >3x improvement on MIPS64 +# and 75-30% [less for longer keys] on MIPS32 over compiler-generated +# code. ###################################################################### # There is a number of MIPS ABI in use, O32 and N32/64 are most @@ -42,7 +53,7 @@ # ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23)); # ($gp,$sp,$fp,$ra)=map("\$$_",(28..31)); # -$flavour = shift; # supported flavours are o32,n32,64,nubi32,nubi64 +$flavour = shift || "o32"; # supported flavours are o32,n32,64,nubi32,nubi64 if ($flavour =~ /64|n32/i) { $PTR_ADD="dadd"; # incidentally works even on n32 @@ -63,7 +74,7 @@ $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? 0x00fff000 : 0x00ff0000; # ###################################################################### -while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} +while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {} open STDOUT,">$output"; if ($flavour =~ /64|n32/i) { @@ -126,9 +137,12 @@ $code.=<<___ if ($flavour =~ /o32/i); ___ $code.=<<___; slt $at,$num,4 - beqzl $at,bn_mul_mont_internal + bnez $at,1f li $t0,0 - jr $ra + slt $at,$num,17 # on in-order CPU + bnez $at,bn_mul_mont_internal + nop +1: jr $ra li $a0,0 .end bn_mul_mont