X-Git-Url: https://git.librecmc.org/?a=blobdiff_plain;f=crypto%2Faes%2Fasm%2Faest4-sparcv9.pl;h=c9a2ff69d35e96a4db25988c0d280a4431fda456;hb=32be631ca1f2b73c92e4f7f5d23f1c3aee80ec69;hp=558acd603b82570b4f53288249d3018dd37e8056;hpb=8ed11a815ee62472fc197d1a1a3dcdb6c0681342;p=oweals%2Fopenssl.git diff --git a/crypto/aes/asm/aest4-sparcv9.pl b/crypto/aes/asm/aest4-sparcv9.pl index 558acd603b..c9a2ff69d3 100644 --- a/crypto/aes/asm/aest4-sparcv9.pl +++ b/crypto/aes/asm/aest4-sparcv9.pl @@ -1,9 +1,16 @@ -#!/usr/bin/env perl +#! /usr/bin/env perl +# Copyright 2012-2016 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the Apache License 2.0 (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + # ==================================================================== -# Written by David S. Miller and Andy Polyakov -# . The module is licensed under 2-clause BSD -# license. October 2012. All rights reserved. +# Written by David S. Miller and Andy Polyakov. +# The module is licensed under 2-clause BSD license. October 2012. +# All rights reserved. # ==================================================================== ###################################################################### @@ -37,7 +44,7 @@ # instructions with those on critical path. Amazing! # # As with Intel AES-NI, question is if it's possible to improve -# performance of parallelizeable modes by interleaving round +# performance of parallelizable modes by interleaving round # instructions. Provided round instruction latency and throughput # optimal interleave factor is 2. But can we expect 2x performance # improvement? Well, as round instructions can be issued one per @@ -68,7 +75,7 @@ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; push(@INC,"${dir}","${dir}../../perlasm"); require "sparcv9_modes.pl"; -&asm_init(@ARGV); +$output = pop and open STDOUT,">$output"; $::evp=1; # if $evp is set to 0, script generates module with # AES_[en|de]crypt, AES_set_[en|de]crypt_key and AES_cbc_encrypt entry @@ -83,7 +90,14 @@ $::evp=1; # if $evp is set to 0, script generates module with { my ($inp,$out,$key,$rounds,$tmp,$mask)=map("%o$_",(0..5)); -$code=<<___; +$code.=<<___; +#include "sparc_arch.h" + +#ifdef __arch64__ +.register %g2,#scratch +.register %g3,#scratch +#endif + .text .globl aes_t4_encrypt @@ -411,24 +425,6 @@ my ($inp,$out,$len,$key,$ivec,$enc)=map("%i$_",(0..5)); my ($ileft,$iright,$ooff,$omask,$ivoff)=map("%l$_",(1..7)); $code.=<<___; -.align 32 -_aes128_loadkey: - ldx [$key + 0], %g4 - ldx [$key + 8], %g5 -___ -for ($i=2; $i<22;$i++) { # load key schedule - $code.=<<___; - ldd [$key + `8*$i`], %f`12+2*$i` -___ -} -$code.=<<___; - retl - nop -.type _aes128_loadkey,#function -.size _aes128_loadkey,.-_aes128_loadkey -_aes128_load_enckey=_aes128_loadkey -_aes128_load_deckey=_aes128_loadkey - .align 32 _aes128_encrypt_1x: ___ @@ -477,6 +473,35 @@ $code.=<<___; .type _aes128_encrypt_2x,#function .size _aes128_encrypt_2x,.-_aes128_encrypt_2x +.align 32 +_aes128_loadkey: + ldx [$key + 0], %g4 + ldx [$key + 8], %g5 +___ +for ($i=2; $i<22;$i++) { # load key schedule + $code.=<<___; + ldd [$key + `8*$i`], %f`12+2*$i` +___ +} +$code.=<<___; + retl + nop +.type _aes128_loadkey,#function +.size _aes128_loadkey,.-_aes128_loadkey +_aes128_load_enckey=_aes128_loadkey +_aes128_load_deckey=_aes128_loadkey + +___ + +&alg_cbc_encrypt_implement("aes",128); +if ($::evp) { + &alg_ctr32_implement("aes",128); + &alg_xts_implement("aes",128,"en"); + &alg_xts_implement("aes",128,"de"); +} +&alg_cbc_decrypt_implement("aes",128); + +$code.=<<___; .align 32 _aes128_decrypt_1x: ___ @@ -524,28 +549,9 @@ $code.=<<___; aes_dround23_l %f54, %f10, %f6, %f6 .type _aes128_decrypt_2x,#function .size _aes128_decrypt_2x,.-_aes128_decrypt_2x - -.align 32 -_aes192_loadkey: -_aes256_loadkey: - ldx [$key + 0], %g4 - ldx [$key + 8], %g5 -___ -for ($i=2; $i<26;$i++) { # load key schedule - $code.=<<___; - ldd [$key + `8*$i`], %f`12+2*$i` ___ -} -$code.=<<___; - retl - nop -.type _aes192_loadkey,#function -.size _aes192_loadkey,.-_aes192_loadkey -_aes192_load_enckey=_aes192_loadkey -_aes192_load_deckey=_aes192_loadkey -_aes256_load_enckey=_aes192_loadkey -_aes256_load_deckey=_aes192_loadkey +$code.=<<___; .align 32 _aes192_encrypt_1x: ___ @@ -594,54 +600,6 @@ $code.=<<___; .type _aes192_encrypt_2x,#function .size _aes192_encrypt_2x,.-_aes192_encrypt_2x -.align 32 -_aes192_decrypt_1x: -___ -for ($i=0; $i<5; $i++) { - $code.=<<___; - aes_dround01 %f`16+8*$i+0`, %f0, %f2, %f4 - aes_dround23 %f`16+8*$i+2`, %f0, %f2, %f2 - aes_dround01 %f`16+8*$i+4`, %f4, %f2, %f0 - aes_dround23 %f`16+8*$i+6`, %f4, %f2, %f2 -___ -} -$code.=<<___; - aes_dround01 %f56, %f0, %f2, %f4 - aes_dround23 %f58, %f0, %f2, %f2 - aes_dround01_l %f60, %f4, %f2, %f0 - retl - aes_dround23_l %f62, %f4, %f2, %f2 -.type _aes192_decrypt_1x,#function -.size _aes192_decrypt_1x,.-_aes192_decrypt_1x - -.align 32 -_aes192_decrypt_2x: -___ -for ($i=0; $i<5; $i++) { - $code.=<<___; - aes_dround01 %f`16+8*$i+0`, %f0, %f2, %f8 - aes_dround23 %f`16+8*$i+2`, %f0, %f2, %f2 - aes_dround01 %f`16+8*$i+0`, %f4, %f6, %f10 - aes_dround23 %f`16+8*$i+2`, %f4, %f6, %f6 - aes_dround01 %f`16+8*$i+4`, %f8, %f2, %f0 - aes_dround23 %f`16+8*$i+6`, %f8, %f2, %f2 - aes_dround01 %f`16+8*$i+4`, %f10, %f6, %f4 - aes_dround23 %f`16+8*$i+6`, %f10, %f6, %f6 -___ -} -$code.=<<___; - aes_dround01 %f56, %f0, %f2, %f8 - aes_dround23 %f58, %f0, %f2, %f2 - aes_dround01 %f56, %f4, %f6, %f10 - aes_dround23 %f58, %f4, %f6, %f6 - aes_dround01_l %f60, %f8, %f2, %f0 - aes_dround23_l %f62, %f8, %f2, %f2 - aes_dround01_l %f60, %f10, %f6, %f4 - retl - aes_dround23_l %f62, %f10, %f6, %f6 -.type _aes192_decrypt_2x,#function -.size _aes192_decrypt_2x,.-_aes192_decrypt_2x - .align 32 _aes256_encrypt_1x: aes_eround01 %f16, %f0, %f2, %f4 @@ -718,6 +676,40 @@ $code.=<<___; .type _aes256_encrypt_2x,#function .size _aes256_encrypt_2x,.-_aes256_encrypt_2x +.align 32 +_aes192_loadkey: + ldx [$key + 0], %g4 + ldx [$key + 8], %g5 +___ +for ($i=2; $i<26;$i++) { # load key schedule + $code.=<<___; + ldd [$key + `8*$i`], %f`12+2*$i` +___ +} +$code.=<<___; + retl + nop +.type _aes192_loadkey,#function +.size _aes192_loadkey,.-_aes192_loadkey +_aes256_loadkey=_aes192_loadkey +_aes192_load_enckey=_aes192_loadkey +_aes192_load_deckey=_aes192_loadkey +_aes256_load_enckey=_aes192_loadkey +_aes256_load_deckey=_aes192_loadkey +___ + +&alg_cbc_encrypt_implement("aes",256); +&alg_cbc_encrypt_implement("aes",192); +if ($::evp) { + &alg_ctr32_implement("aes",256); + &alg_xts_implement("aes",256,"en"); + &alg_xts_implement("aes",256,"de"); + &alg_ctr32_implement("aes",192); +} +&alg_cbc_decrypt_implement("aes",192); +&alg_cbc_decrypt_implement("aes",256); + +$code.=<<___; .align 32 _aes256_decrypt_1x: aes_dround01 %f16, %f0, %f2, %f4 @@ -793,21 +785,55 @@ $code.=<<___; ldd [$key + 40], %f22 .type _aes256_decrypt_2x,#function .size _aes256_decrypt_2x,.-_aes256_decrypt_2x -___ -&alg_cbc_encrypt_implement("aes",128); -&alg_cbc_encrypt_implement("aes",192); -&alg_cbc_encrypt_implement("aes",256); - -&alg_cbc_decrypt_implement("aes",128); -&alg_cbc_decrypt_implement("aes",192); -&alg_cbc_decrypt_implement("aes",256); +.align 32 +_aes192_decrypt_1x: +___ +for ($i=0; $i<5; $i++) { + $code.=<<___; + aes_dround01 %f`16+8*$i+0`, %f0, %f2, %f4 + aes_dround23 %f`16+8*$i+2`, %f0, %f2, %f2 + aes_dround01 %f`16+8*$i+4`, %f4, %f2, %f0 + aes_dround23 %f`16+8*$i+6`, %f4, %f2, %f2 +___ +} +$code.=<<___; + aes_dround01 %f56, %f0, %f2, %f4 + aes_dround23 %f58, %f0, %f2, %f2 + aes_dround01_l %f60, %f4, %f2, %f0 + retl + aes_dround23_l %f62, %f4, %f2, %f2 +.type _aes192_decrypt_1x,#function +.size _aes192_decrypt_1x,.-_aes192_decrypt_1x -if ($::evp) { - &alg_ctr32_implement("aes",128); - &alg_ctr32_implement("aes",192); - &alg_ctr32_implement("aes",256); +.align 32 +_aes192_decrypt_2x: +___ +for ($i=0; $i<5; $i++) { + $code.=<<___; + aes_dround01 %f`16+8*$i+0`, %f0, %f2, %f8 + aes_dround23 %f`16+8*$i+2`, %f0, %f2, %f2 + aes_dround01 %f`16+8*$i+0`, %f4, %f6, %f10 + aes_dround23 %f`16+8*$i+2`, %f4, %f6, %f6 + aes_dround01 %f`16+8*$i+4`, %f8, %f2, %f0 + aes_dround23 %f`16+8*$i+6`, %f8, %f2, %f2 + aes_dround01 %f`16+8*$i+4`, %f10, %f6, %f4 + aes_dround23 %f`16+8*$i+6`, %f10, %f6, %f6 +___ } +$code.=<<___; + aes_dround01 %f56, %f0, %f2, %f8 + aes_dround23 %f58, %f0, %f2, %f2 + aes_dround01 %f56, %f4, %f6, %f10 + aes_dround23 %f58, %f4, %f6, %f6 + aes_dround01_l %f60, %f8, %f2, %f0 + aes_dround23_l %f62, %f8, %f2, %f2 + aes_dround01_l %f60, %f10, %f6, %f4 + retl + aes_dround23_l %f62, %f10, %f6, %f6 +.type _aes192_decrypt_2x,#function +.size _aes192_decrypt_2x,.-_aes192_decrypt_2x +___ }}} if (!$::evp) { @@ -899,4 +925,4 @@ ___ &emit_assembler(); -close STDOUT; +close STDOUT or die "error closing STDOUT";