3 # ====================================================================
4 # Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5 # project. The module is, however, dual licensed under OpenSSL and
6 # CRYPTOGAMS licenses depending on where you obtain it. For further
7 # details see http://www.openssl.org/~appro/cryptogams/.
8 # ====================================================================
10 # SHA1 block procedure for s390x.
14 # Performance is >30% better than gcc 3.3 generated code. But the real
15 # twist is that SHA1 hardware support is detected and utilized. In
16 # which case performance can reach further >4.5x for larger chunks.
20 # Optimize Xupdate for amount of memory references and reschedule
21 # instructions to favour dual-issue z10 pipeline. On z10 hardware is
22 # "only" ~2.3x faster than software.
24 $kimdfunc=1; # magic function code for kimd instruction
27 open STDOUT,">$output";
29 $K_00_39="%r0"; $K=$K_00_39;
31 $ctx="%r2"; $prefetch="%r2";
39 $E="%r9"; @V=($A,$B,$C,$D,$E);
42 @X=("%r12","%r13","%r14");
50 $code.=<<___ if ($i==15);
51 lg $prefetch,160($sp) ### Xupdate(16) warm-up
54 return if ($i&1); # Xupdate is vectorized and executed every 2nd cycle
55 $code.=<<___ if ($i<16);
56 lg $X[0],`$i*4`($inp) ### Xload($i)
59 $code.=<<___ if ($i>=16);
60 xgr $X[0],$prefetch ### Xupdate($i)
61 lg $prefetch,`160+4*(($i+2)%16)`($sp)
62 xg $X[0],`160+4*(($i+8)%16)`($sp)
68 lr $X[2],$X[1] # feedback
70 $code.=<<___ if ($i<=70);
71 stg $X[0],`160+4*($i%16)`($sp)
77 my ($i,$a,$b,$c,$d,$e)=@_;
96 my ($i,$a,$b,$c,$d,$e)=@_;
114 my ($i,$a,$b,$c,$d,$e)=@_;
137 .type Ktable,\@object
138 Ktable: .long 0x5a827999,0x6ed9eba1,0x8f1bbcdc,0xca62c1d6
139 .skip 48 #.long 0,0,0,0,0,0,0,0,0,0,0,0
140 .size Ktable,.-Ktable
141 .globl sha1_block_data_order
142 .type sha1_block_data_order,\@function
143 sha1_block_data_order:
145 $code.=<<___ if ($kimdfunc);
146 larl %r1,OPENSSL_s390xcap_P
148 tmhl %r0,0x4000 # check for message-security assist
152 .long 0xb93e0002 # kimd %r0,%r2
154 tmhh %r0,`0x8000>>$kimdfunc`
160 .long 0xb93e0002 # kimd %r0,%r2
161 brc 1,.-4 # pay attention to "partial completion"
169 stmg %r6,%r15,48($sp)
185 rllg $K_00_39,$K_00_39,32
187 for ($i=0;$i<20;$i++) { &BODY_00_19($i,@V); unshift(@V,pop(@V)); }
189 rllg $K_00_39,$K_00_39,32
191 for (;$i<40;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
192 $code.=<<___; $K=$K_40_79;
193 rllg $K_40_79,$K_40_79,32
195 for (;$i<60;$i++) { &BODY_40_59($i,@V); unshift(@V,pop(@V)); }
197 rllg $K_40_79,$K_40_79,32
199 for (;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
202 lg $ctx,`$frame+16`($sp)
216 lmg %r6,%r15,`$frame+48`($sp)
218 .size sha1_block_data_order,.-sha1_block_data_order
219 .string "SHA1 block transform for s390x, CRYPTOGAMS by <appro\@openssl.org>"
220 .comm OPENSSL_s390xcap_P,8,8
223 $code =~ s/\`([^\`]*)\`/eval $1/gem;