X-Git-Url: https://git.librecmc.org/?a=blobdiff_plain;f=crypto%2Fsha%2Fasm%2Fsha1-s390x.pl;h=4de9f384c11bea258f1f8242a9e2b14ee01f4a71;hb=32be631ca1f2b73c92e4f7f5d23f1c3aee80ec69;hp=e22e86fa1400d5afcc4dc3698e6d71a59ad3549d;hpb=8626230a0227b15c0e0542f5a65f802ee32772b6;p=oweals%2Fopenssl.git diff --git a/crypto/sha/asm/sha1-s390x.pl b/crypto/sha/asm/sha1-s390x.pl index e22e86fa14..4de9f384c1 100644 --- a/crypto/sha/asm/sha1-s390x.pl +++ b/crypto/sha/asm/sha1-s390x.pl @@ -1,7 +1,14 @@ -#!/usr/bin/env perl +#! /usr/bin/env perl +# Copyright 2007-2016 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the Apache License 2.0 (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + # ==================================================================== -# Written by Andy Polyakov for the OpenSSL +# Written by Andy Polyakov for the OpenSSL # project. The module is, however, dual licensed under OpenSSL and # CRYPTOGAMS licenses depending on where you obtain it. For further # details see http://www.openssl.org/~appro/cryptogams/. @@ -21,10 +28,32 @@ # instructions to favour dual-issue z10 pipeline. On z10 hardware is # "only" ~2.3x faster than software. +# November 2010. +# +# Adapt for -m31 build. If kernel supports what's called "highgprs" +# feature on Linux [see /proc/cpuinfo], it's possible to use 64-bit +# instructions and achieve "64-bit" performance even in 31-bit legacy +# application context. The feature is not specific to any particular +# processor, as long as it's "z-CPU". Latter implies that the code +# remains z/Architecture specific. On z990 it was measured to perform +# 23% better than code generated by gcc 4.3. + $kimdfunc=1; # magic function code for kimd instruction -$output=shift; -open STDOUT,">$output"; +# $output is the last argument if it looks like a file (it has an extension) +# $flavour is the first argument if it doesn't look like a file +$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef; +$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef; + +if ($flavour =~ /3[12]/) { + $SIZE_T=4; + $g=""; +} else { + $SIZE_T=8; + $g="g"; +} + +$output and open STDOUT,">$output"; $K_00_39="%r0"; $K=$K_00_39; $K_40_79="%r1"; @@ -42,13 +71,14 @@ $t1="%r11"; @X=("%r12","%r13","%r14"); $sp="%r15"; -$frame=160+16*4; +$stdframe=16*$SIZE_T+4*8; +$frame=$stdframe+16*4; sub Xupdate { my $i=shift; $code.=<<___ if ($i==15); - lg $prefetch,160($sp) ### Xupdate(16) warm-up + lg $prefetch,$stdframe($sp) ### Xupdate(16) warm-up lr $X[0],$X[2] ___ return if ($i&1); # Xupdate is vectorized and executed every 2nd cycle @@ -58,8 +88,8 @@ $code.=<<___ if ($i<16); ___ $code.=<<___ if ($i>=16); xgr $X[0],$prefetch ### Xupdate($i) - lg $prefetch,`160+4*(($i+2)%16)`($sp) - xg $X[0],`160+4*(($i+8)%16)`($sp) + lg $prefetch,`$stdframe+4*(($i+2)%16)`($sp) + xg $X[0],`$stdframe+4*(($i+8)%16)`($sp) xgr $X[0],$prefetch rll $X[0],$X[0],1 rllg $X[1],$X[0],32 @@ -68,7 +98,7 @@ $code.=<<___ if ($i>=16); lr $X[2],$X[1] # feedback ___ $code.=<<___ if ($i<=70); - stg $X[0],`160+4*($i%16)`($sp) + stg $X[0],`$stdframe+4*($i%16)`($sp) ___ unshift(@X,pop(@X)); } @@ -132,6 +162,8 @@ ___ } $code.=<<___; +#include "s390x_arch.h" + .text .align 64 .type Ktable,\@object @@ -143,10 +175,8 @@ Ktable: .long 0x5a827999,0x6ed9eba1,0x8f1bbcdc,0xca62c1d6 sha1_block_data_order: ___ $code.=<<___ if ($kimdfunc); - lghi %r0,0 - la %r1,16($sp) - .long 0xb93e0002 # kimd %r0,%r2 - lg %r0,16($sp) + larl %r1,OPENSSL_s390xcap_P + lg %r0,S390X_KIMD(%r1) # check kimd capabilities tmhh %r0,`0x8000>>$kimdfunc` jz .Lsoftware lghi %r0,$kimdfunc @@ -156,15 +186,16 @@ $code.=<<___ if ($kimdfunc); .long 0xb93e0002 # kimd %r0,%r2 brc 1,.-4 # pay attention to "partial completion" br %r14 +.align 16 .Lsoftware: ___ $code.=<<___; lghi %r1,-$frame - stg $ctx,16($sp) - stmg %r6,%r15,48($sp) + st${g} $ctx,`2*$SIZE_T`($sp) + stm${g} %r6,%r15,`6*$SIZE_T`($sp) lgr %r0,$sp la $sp,0(%r1,$sp) - stg %r0,0($sp) + st${g} %r0,0($sp) larl $t0,Ktable llgf $A,0($ctx) @@ -194,7 +225,7 @@ ___ for (;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } $code.=<<___; - lg $ctx,`$frame+16`($sp) + l${g} $ctx,`$frame+2*$SIZE_T`($sp) la $inp,64($inp) al $A,0($ctx) al $B,4($ctx) @@ -206,9 +237,9 @@ $code.=<<___; st $C,8($ctx) st $D,12($ctx) st $E,16($ctx) - brct $len,.Lloop + brct${g} $len,.Lloop - lmg %r6,%r15,`$frame+48`($sp) + lm${g} %r6,%r15,`$frame+6*$SIZE_T`($sp) br %r14 .size sha1_block_data_order,.-sha1_block_data_order .string "SHA1 block transform for s390x, CRYPTOGAMS by " @@ -217,4 +248,4 @@ ___ $code =~ s/\`([^\`]*)\`/eval $1/gem; print $code; -close STDOUT; +close STDOUT or die "error closing STDOUT";