X-Git-Url: https://git.librecmc.org/?a=blobdiff_plain;f=crypto%2Fsha%2Fasm%2Fsha512-s390x.pl;h=5c6786d85c07c1080e50f2aee1b45fe9a96c09f9;hb=a21314dbbc56cd30580123d74b3106a628540965;hp=e4f1812c68a269a4733069964ecc74ea722af2e1;hpb=8626230a0227b15c0e0542f5a65f802ee32772b6;p=oweals%2Fopenssl.git diff --git a/crypto/sha/asm/sha512-s390x.pl b/crypto/sha/asm/sha512-s390x.pl index e4f1812c68..5c6786d85c 100644 --- a/crypto/sha/asm/sha512-s390x.pl +++ b/crypto/sha/asm/sha512-s390x.pl @@ -1,7 +1,14 @@ -#!/usr/bin/env perl +#! /usr/bin/env perl +# Copyright 2007-2016 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the Apache License 2.0 (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + # ==================================================================== -# Written by Andy Polyakov for the OpenSSL +# Written by Andy Polyakov for the OpenSSL # project. The module is, however, dual licensed under OpenSSL and # CRYPTOGAMS licenses depending on where you obtain it. For further # details see http://www.openssl.org/~appro/cryptogams/. @@ -26,6 +33,29 @@ # favour dual-issue z10 pipeline. Hardware SHA256/512 is ~4.7x faster # than software. +# November 2010. +# +# Adapt for -m31 build. If kernel supports what's called "highgprs" +# feature on Linux [see /proc/cpuinfo], it's possible to use 64-bit +# instructions and achieve "64-bit" performance even in 31-bit legacy +# application context. The feature is not specific to any particular +# processor, as long as it's "z-CPU". Latter implies that the code +# remains z/Architecture specific. On z990 SHA256 was measured to +# perform 2.4x and SHA512 - 13x better than code generated by gcc 4.3. + +# $output is the last argument if it looks like a file (it has an extension) +# $flavour is the first argument if it doesn't look like a file +$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef; +$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef; + +if ($flavour =~ /3[12]/) { + $SIZE_T=4; + $g=""; +} else { + $SIZE_T=8; + $g="g"; +} + $t0="%r0"; $t1="%r1"; $ctx="%r2"; $t2="%r2"; @@ -44,7 +74,6 @@ $tbl="%r13"; $T1="%r14"; $sp="%r15"; -$output=shift; open STDOUT,">$output"; if ($output =~ /512/) { @@ -78,7 +107,8 @@ if ($output =~ /512/) { } $Func="sha${label}_block_data_order"; $Table="K${label}"; -$frame=160+16*$SZ; +$stdframe=16*$SIZE_T+4*8; +$frame=$stdframe+16*$SZ; sub BODY_00_15 { my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_; @@ -93,9 +123,9 @@ $code.=<<___; xgr $t0,$t1 $ROT $t1,$t1,`$Sigma1[2]-$Sigma1[1]` xgr $t2,$g - $ST $T1,`160+$SZ*($i%16)`($sp) + $ST $T1,`$stdframe+$SZ*($i%16)`($sp) xgr $t0,$t1 # Sigma1(e) - la $T1,0($T1,$h) # T1+=h + algr $T1,$h # T1+=h ngr $t2,$e lgr $t1,$a algr $T1,$t0 # T1+=Sigma1(e) @@ -113,7 +143,7 @@ $code.=<<___; ngr $t2,$b algr $h,$T1 # h+=T1 ogr $t2,$t1 # Maj(a,b,c) - la $d,0($d,$T1) # d+=T1 + algr $d,$T1 # d+=T1 algr $h,$t2 # h+=Maj(a,b,c) ___ } @@ -122,19 +152,19 @@ sub BODY_16_XX { my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_; $code.=<<___; - $LD $T1,`160+$SZ*(($i+1)%16)`($sp) ### $i - $LD $t1,`160+$SZ*(($i+14)%16)`($sp) + $LD $T1,`$stdframe+$SZ*(($i+1)%16)`($sp) ### $i + $LD $t1,`$stdframe+$SZ*(($i+14)%16)`($sp) $ROT $t0,$T1,$sigma0[0] $SHR $T1,$sigma0[2] $ROT $t2,$t0,`$sigma0[1]-$sigma0[0]` xgr $T1,$t0 $ROT $t0,$t1,$sigma1[0] - xgr $T1,$t2 # sigma0(X[i+1]) + xgr $T1,$t2 # sigma0(X[i+1]) $SHR $t1,$sigma1[2] - $ADD $T1,`160+$SZ*($i%16)`($sp) # +=X[i] + $ADD $T1,`$stdframe+$SZ*($i%16)`($sp) # +=X[i] xgr $t1,$t0 $ROT $t0,$t0,`$sigma1[1]-$sigma1[0]` - $ADD $T1,`160+$SZ*(($i+9)%16)`($sp) # +=X[i+9] + $ADD $T1,`$stdframe+$SZ*(($i+9)%16)`($sp) # +=X[i+9] xgr $t1,$t0 # sigma1(X[i+14]) algr $T1,$t1 # +=sigma1(X[i+14]) ___ @@ -142,6 +172,8 @@ ___ } $code.=<<___; +#include "s390x_arch.h" + .text .align 64 .type $Table,\@object @@ -212,31 +244,30 @@ $code.=<<___; .globl $Func .type $Func,\@function $Func: + sllg $len,$len,`log(16*$SZ)/log(2)` ___ $code.=<<___ if ($kimdfunc); - lghi %r0,0 - la %r1,16($sp) - .long 0xb93e0002 # kimd %r0,%r2 - lg %r0,16($sp) + larl %r1,OPENSSL_s390xcap_P + lg %r0,S390X_KIMD(%r1) # check kimd capabilities tmhh %r0,`0x8000>>$kimdfunc` jz .Lsoftware lghi %r0,$kimdfunc lgr %r1,$ctx lgr %r2,$inp - sllg %r3,$len,`log(16*$SZ)/log(2)` + lgr %r3,$len .long 0xb93e0002 # kimd %r0,%r2 brc 1,.-4 # pay attention to "partial completion" br %r14 +.align 16 .Lsoftware: ___ $code.=<<___; - sllg $len,$len,`log(16*$SZ)/log(2)` lghi %r1,-$frame - agr $len,$inp - stmg $ctx,%r15,16($sp) + la $len,0($len,$inp) + stm${g} $ctx,%r15,`2*$SIZE_T`($sp) lgr %r0,$sp la $sp,0(%r1,$sp) - stg %r0,0($sp) + st${g} %r0,0($sp) larl $tbl,$Table $LD $A,`0*$SZ`($ctx) @@ -260,7 +291,7 @@ $code.=<<___; clgr $len,$t0 jne .Lrounds_16_xx - lg $ctx,`$frame+16`($sp) + l${g} $ctx,`$frame+2*$SIZE_T`($sp) la $inp,`16*$SZ`($inp) $ADD $A,`0*$SZ`($ctx) $ADD $B,`1*$SZ`($ctx) @@ -278,10 +309,10 @@ $code.=<<___; $ST $F,`5*$SZ`($ctx) $ST $G,`6*$SZ`($ctx) $ST $H,`7*$SZ`($ctx) - clg $inp,`$frame+32`($sp) + cl${g} $inp,`$frame+4*$SIZE_T`($sp) jne .Lloop - lmg %r6,%r15,`$frame+48`($sp) + lm${g} %r6,%r15,`$frame+6*$SIZE_T`($sp) br %r14 .size $Func,.-$Func .string "SHA${label} block transform for s390x, CRYPTOGAMS by " @@ -292,4 +323,4 @@ $code =~ s/\`([^\`]*)\`/eval $1/gem; $code =~ s/(srlg\s+)(%r[0-9]+),/$1$2,$2,/gm; print $code; -close STDOUT; +close STDOUT or die "error closing STDOUT: $!";