2 # Copyright 2016 The OpenSSL Project Authors. All Rights Reserved.
4 # Licensed under the OpenSSL license (the "License"). You may not use
5 # this file except in compliance with the License. You can obtain a copy
6 # in the file LICENSE in the source distribution or at
7 # https://www.openssl.org/source/license.html
10 # ====================================================================
11 # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
12 # project. The module is, however, dual licensed under OpenSSL and
13 # CRYPTOGAMS licenses depending on where you obtain it. For further
14 # details see http://www.openssl.org/~appro/cryptogams/.
15 # ====================================================================
17 # This module implements Poly1305 hash for s390x.
21 # ~6.6/2.3 cpb on z10/z196+, >2x improvement over compiler-generated
22 # code. For older compiler improvement coefficient is >3x, because
23 # then base 2^64 and base 2^32 implementations are compared.
25 # On side note, z13 enables vector base 2^26 implementation...
29 if ($flavour =~ /3[12]/) {
37 while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {}
38 open STDOUT,">$output";
42 my ($ctx,$inp,$len,$padbit) = map("%r$_",(2..5));
48 .type poly1305_init,\@function
53 stg %r0,0($ctx) # zero hash value
60 lrvg %r4,0($inp) # load little-endian key
63 nihl %r1,0xffc0 # 0xffffffc0ffffffff
64 srlg %r0,%r1,4 # 0x0ffffffc0fffffff
66 nill %r1,0xfffc # 0x0ffffffc0ffffffc
77 .size poly1305_init,.-poly1305_init
80 my ($d0hi,$d0lo,$d1hi,$d1lo,$t0,$h0,$t1,$h1,$h2) = map("%r$_",(6..14));
81 my ($r0,$r1,$s1) = map("%r$_",(0..2));
84 .globl poly1305_blocks
85 .type poly1305_blocks,\@function
88 srl${g} $len,4 # fixed-up in 64-bit build
93 stm${g} %r6,%r14,`6*$SIZE_T`($sp)
95 llgfr $padbit,$padbit # clear upper half, much needed with
97 lg $r0,32($ctx) # load key
100 lg $h0,0($ctx) # load hash value
104 st$g $ctx,`2*$SIZE_T`($sp) # off-load $ctx
106 algr $s1,$r1 # s1 = r1 + r1>>2
111 lrvg $d0lo,0($inp) # load little-endian input
115 algr $d0lo,$h0 # accumulate input
119 mlgr $d0hi,$r0 # h0*r0 -> $d0hi:$d0lo
121 mlgr $d1hi,$s1 # h1*5*r1 -> $d1hi:$d1lo
123 mlgr $t0,$r1 # h0*r1 -> $t0:$h0
124 mlgr $t1,$r0 # h1*r0 -> $t1:$h1
135 msgr $d1lo,$s1 # h2*s1
139 alcgr $t1,$d1hi # $d1hi is zero
144 lghi $h0,-4 # final reduction step
152 alcgr $h1,$d1hi # $d1hi is still zero
153 alcgr $h2,$d1hi # $d1hi is still zero
157 l$g $ctx,`2*$SIZE_T`($sp) # restore $ctx
159 stg $h0,0($ctx) # store hash value
163 lm${g} %r6,%r14,`6*$SIZE_T`($sp)
166 .size poly1305_blocks,.-poly1305_blocks
170 my ($mac,$nonce)=($inp,$len);
171 my ($h0,$h1,$h2,$d0,$d1)=map("%r$_",(5..9));
175 .type poly1305_emit,\@function
178 stm${g} %r6,%r9,`6*$SIZE_T`($sp)
189 algr $h0,%r0 # compare to modulus
193 srlg $h2,$h2,2 # did it borrow/carry?
194 slgr %r1,$h2 # 0-$h2>>2
195 lg $h2,0($nonce) # load nonce
205 rllg $d0,$h2,32 # flip nonce words
209 algr $h0,$d0 # accumulate nonce
212 strvg $h0,0($mac) # write little-endian result
215 lm${g} %r6,%r9,`6*$SIZE_T`($sp)
217 .size poly1305_emit,.-poly1305_emit
219 .string "Poly1305 for s390x, CRYPTOGAMS by <appro\@openssl.org>"
223 $code =~ s/\`([^\`]*)\`/eval $1/gem;
224 $code =~ s/\b(srlg\s+)(%r[0-9]+\s*,)\s*([0-9]+)/$1$2$2$3/gm;