X-Git-Url: https://git.librecmc.org/?a=blobdiff_plain;f=crypto%2Faes%2Fasm%2Faes-s390x.pl;h=4f55a6be82e9c00b87338e5440066d6d66c81727;hb=c918d8e2831eb4ff067855023036873ce15bd0df;hp=231a29982cee314cd77e8662ea9d9cbaa29c7d69;hpb=670ad0fbf6ebcf113e278d8174081a7e2d2fa44c;p=oweals%2Fopenssl.git diff --git a/crypto/aes/asm/aes-s390x.pl b/crypto/aes/asm/aes-s390x.pl index 231a29982c..4f55a6be82 100644 --- a/crypto/aes/asm/aes-s390x.pl +++ b/crypto/aes/asm/aes-s390x.pl @@ -1,7 +1,14 @@ -#!/usr/bin/env perl +#! /usr/bin/env perl +# Copyright 2007-2018 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the Apache License 2.0 (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + # ==================================================================== -# Written by Andy Polyakov for the OpenSSL +# Written by Andy Polyakov for the OpenSSL # project. The module is, however, dual licensed under OpenSSL and # CRYPTOGAMS licenses depending on where you obtain it. For further # details see http://www.openssl.org/~appro/cryptogams/. @@ -37,7 +44,7 @@ # minimize/avoid Address Generation Interlock hazard and to favour # dual-issue z10 pipeline. This gave ~25% improvement on z10 and # almost 50% on z9. The gain is smaller on z10, because being dual- -# issue z10 makes it improssible to eliminate the interlock condition: +# issue z10 makes it impossible to eliminate the interlock condition: # critial path is not long enough. Yet it spends ~24 cycles per byte # processed with 128-bit key. # @@ -122,6 +129,8 @@ sub _data_word() } $code=<<___; +#include "s390x_arch.h" + .text .type AES_Te,\@object @@ -397,7 +406,7 @@ _s390x_AES_encrypt: or $s1,$t1 or $t2,$i2 or $t3,$i3 - + srlg $i1,$s2,`8-3` # i0 srlg $i2,$s2,`16-3` # i1 nr $i1,$mask @@ -450,7 +459,7 @@ _s390x_AES_encrypt: x $s2,24($key) x $s3,28($key) - br $ra + br $ra .size _s390x_AES_encrypt,.-_s390x_AES_encrypt ___ @@ -772,7 +781,7 @@ _s390x_AES_decrypt: x $s2,24($key) x $s3,28($key) - br $ra + br $ra .size _s390x_AES_decrypt,.-_s390x_AES_decrypt ___ @@ -806,7 +815,7 @@ _s390x_AES_set_encrypt_key: .Lproceed: ___ $code.=<<___ if (!$softonly); - # convert bits to km code, [128,192,256]->[18,19,20] + # convert bits to km(c) code, [128,192,256]->[18,19,20] lhi %r5,-128 lhi %r0,18 ar %r5,$bits @@ -814,13 +823,10 @@ $code.=<<___ if (!$softonly); ar %r5,%r0 larl %r1,OPENSSL_s390xcap_P - lg %r0,0(%r1) - tmhl %r0,0x4000 # check for message-security assist - jz .Lekey_internal - llihh %r0,0x8000 srlg %r0,%r0,0(%r5) - ng %r0,48(%r1) # check kmc capability vector + ng %r0,S390X_KM(%r1) # check availability of both km... + ng %r0,S390X_KMC(%r1) # ...and kmc support for given key length jz .Lekey_internal lmg %r0,%r1,0($inp) # just copy 128 bits... @@ -835,7 +841,7 @@ $code.=<<___ if (!$softonly); stg %r1,24($key) 1: st $bits,236($key) # save bits [for debugging purposes] lgr $t0,%r5 - st %r5,240($key) # save km code + st %r5,240($key) # save km(c) code lghi %r2,0 br %r14 ___ @@ -1080,7 +1086,7 @@ $code.=<<___ if (!$softonly); lhi $t1,16 cr $t0,$t1 jl .Lgo - oill $t0,0x80 # set "decrypt" bit + oill $t0,S390X_DECRYPT # set "decrypt" bit st $t0,240($key) br $ra ___ @@ -1219,7 +1225,7 @@ $code.=<<___ if (!$softonly); .align 16 .Lkmc_truncated: ahi $key,-1 # it's the way it's encoded in mvc - tmll %r0,0x80 + tmll %r0,S390X_DECRYPT jnz .Lkmc_truncated_dec lghi %r1,0 stg %r1,16*$SIZE_T($sp) @@ -1290,7 +1296,7 @@ $code.=<<___; .Lcbc_enc_done: l${g} $ivp,6*$SIZE_T($sp) st $s0,0($ivp) - st $s1,4($ivp) + st $s1,4($ivp) st $s2,8($ivp) st $s3,12($ivp) @@ -1399,7 +1405,61 @@ $code.=<<___ if (!$softonly); clr %r0,%r1 jl .Lctr32_software - stm${g} %r6,$s3,6*$SIZE_T($sp) + st${g} $s2,10*$SIZE_T($sp) + st${g} $s3,11*$SIZE_T($sp) + + clr $len,%r1 # does work even in 64-bit mode + jle .Lctr32_nokma # kma is slower for <= 16 blocks + + larl %r1,OPENSSL_s390xcap_P + lr $s2,%r0 + llihh $s3,0x8000 + srlg $s3,$s3,0($s2) + ng $s3,S390X_KMA(%r1) # check kma capability vector + jz .Lctr32_nokma + + l${g}hi %r1,-$stdframe-112 + l${g}r $s3,$sp + la $sp,0(%r1,$sp) # prepare parameter block + + lhi %r1,0x0600 + sllg $len,$len,4 + or %r0,%r1 # set HS and LAAD flags + + st${g} $s3,0($sp) # backchain + la %r1,$stdframe($sp) + + lmg $s2,$s3,0($key) # copy key + stg $s2,$stdframe+80($sp) + stg $s3,$stdframe+88($sp) + lmg $s2,$s3,16($key) + stg $s2,$stdframe+96($sp) + stg $s3,$stdframe+104($sp) + + lmg $s2,$s3,0($ivp) # copy iv + stg $s2,$stdframe+64($sp) + ahi $s3,-1 # kma requires counter-1 + stg $s3,$stdframe+72($sp) + st $s3,$stdframe+12($sp) # copy counter + + lghi $s2,0 # no AAD + lghi $s3,0 + + .long 0xb929a042 # kma $out,$s2,$inp + brc 1,.-4 # pay attention to "partial completion" + + stg %r0,$stdframe+80($sp) # wipe key + stg %r0,$stdframe+88($sp) + stg %r0,$stdframe+96($sp) + stg %r0,$stdframe+104($sp) + la $sp,$stdframe+112($sp) + + lm${g} $s2,$s3,10*$SIZE_T($sp) + br $ra + +.align 16 +.Lctr32_nokma: + stm${g} %r6,$s1,6*$SIZE_T($sp) slgr $out,$inp la %r1,0($key) # %r1 is permanent copy of $key @@ -1432,18 +1492,13 @@ $code.=<<___ if (!$softonly); .Lctr32_hw_switch: ___ -$code.=<<___ if (0); ######### kmctr code was measured to be ~12% slower - larl $s0,OPENSSL_s390xcap_P - lg $s0,8($s0) - tmhh $s0,0x0004 # check for message_security-assist-4 - jz .Lctr32_km_loop - +$code.=<<___ if (!$softonly && 0);# kmctr code was measured to be ~12% slower llgfr $s0,%r0 lgr $s1,%r1 larl %r1,OPENSSL_s390xcap_P llihh %r0,0x8000 # check if kmctr supports the function code srlg %r0,%r0,0($s0) - ng %r0,64(%r1) # check kmctr capability vector + ng %r0,S390X_KMCTR(%r1) # check kmctr capability vector lgr %r0,$s0 lgr %r1,$s1 jz .Lctr32_km_loop @@ -1481,7 +1536,7 @@ $code.=<<___ if (0); ######### kmctr code was measured to be ~12% slower br $ra .align 16 ___ -$code.=<<___; +$code.=<<___ if (!$softonly); .Lctr32_km_loop: la $s2,16($sp) lgr $s3,$fp @@ -1568,8 +1623,8 @@ ___ } ######################################################################## -# void AES_xts_encrypt(const char *inp,char *out,size_t len, -# const AES_KEY *key1, const AES_KEY *key2, +# void AES_xts_encrypt(const unsigned char *inp, unsigned char *out, +# size_t len, const AES_KEY *key1, const AES_KEY *key2, # const unsigned char iv[16]); # { @@ -1593,7 +1648,7 @@ $code.=<<___ if(1); larl %r1,OPENSSL_s390xcap_P llihh %r0,0x8000 srlg %r0,%r0,32($s1) # check for 32+function code - ng %r0,32(%r1) # check km capability vector + ng %r0,S390X_KM(%r1) # check km capability vector lgr %r0,$s0 # restore the function code la %r1,0($key1) # restore $key1 jz .Lxts_km_vanilla @@ -1628,7 +1683,7 @@ $code.=<<___ if(1); llgc $len,2*$SIZE_T-1($sp) nill $len,0x0f # $len%=16 br $ra - + .align 16 .Lxts_km_vanilla: ___ @@ -1855,7 +1910,7 @@ $code.=<<___; xgr $s1,%r1 lrvgr $s1,$s1 # flip byte order lrvgr $s3,$s3 - srlg $s0,$s1,32 # smash the tweak to 4x32-bits + srlg $s0,$s1,32 # smash the tweak to 4x32-bits stg $s1,$tweak+0($sp) # save the tweak llgfr $s1,$s1 srlg $s2,$s3,32 @@ -1906,7 +1961,7 @@ $code.=<<___; xgr $s1,%r1 lrvgr $s1,$s1 # flip byte order lrvgr $s3,$s3 - srlg $s0,$s1,32 # smash the tweak to 4x32-bits + srlg $s0,$s1,32 # smash the tweak to 4x32-bits stg $s1,$tweak+0($sp) # save the tweak llgfr $s1,$s1 srlg $s2,$s3,32 @@ -1937,8 +1992,8 @@ $code.=<<___; br $ra .size AES_xts_encrypt,.-AES_xts_encrypt ___ -# void AES_xts_decrypt(const char *inp,char *out,size_t len, -# const AES_KEY *key1, const AES_KEY *key2, +# void AES_xts_decrypt(const unsigned char *inp, unsigned char *out, +# size_t len, const AES_KEY *key1, const AES_KEY *key2, # const unsigned char iv[16]); # $code.=<<___; @@ -2098,7 +2153,7 @@ $code.=<<___; xgr $s1,%r1 lrvgr $s1,$s1 # flip byte order lrvgr $s3,$s3 - srlg $s0,$s1,32 # smash the tweak to 4x32-bits + srlg $s0,$s1,32 # smash the tweak to 4x32-bits stg $s1,$tweak+0($sp) # save the tweak llgfr $s1,$s1 srlg $s2,$s3,32 @@ -2220,7 +2275,6 @@ ___ } $code.=<<___; .string "AES for s390x, CRYPTOGAMS by " -.comm OPENSSL_s390xcap_P,80,8 ___ $code =~ s/\`([^\`]*)\`/eval $1/gem;