2 # Copyright 2009-2016 The OpenSSL Project Authors. All Rights Reserved.
4 # Licensed under the OpenSSL license (the "License"). You may not use
5 # this file except in compliance with the License. You can obtain a copy
6 # in the file LICENSE in the source distribution or at
7 # https://www.openssl.org/source/license.html
10 # ====================================================================
11 # Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
12 # project. The module is, however, dual licensed under OpenSSL and
13 # CRYPTOGAMS licenses depending on where you obtain it. For further
14 # details see http://www.openssl.org/~appro/cryptogams/.
15 # ====================================================================
21 # Performance is 33% better than gcc 3.2 generated code on PA-7100LC.
22 # For reference, [4x] unrolled loop is >40% faster than folded one.
23 # It's possible to unroll loop 8 times on PA-RISC 2.0, but improvement
24 # is believed to be not sufficient to justify the effort...
26 # Special thanks to polarhome.com for providing HP-UX account.
28 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
32 open STDOUT,">$output";
34 if ($flavour =~ /64/) {
54 $FRAME=4*$SIZE_T+$FRAME_MARKER; # 4 saved regs + frame marker
55 # [+ argument transfer]
56 $SZ=1; # defaults to RC4_CHAR
57 if (open CONF,"<${dir}../../opensslconf.h") {
59 if (m/#\s*define\s+RC4_INT\s+(.*)/) {
60 $SZ = ($1=~/char$/) ? 1 : 4;
67 if ($SZ==1) { # RC4_CHAR
72 } else { # RC4_INT (~5% faster than RC4_CHAR on PA-7100LC)
97 sub unrolledloopbody {
98 for ($i=0;$i<4;$i++) {
101 `sprintf("$LDX %$TY(%$key),%$dat1") if ($i>0)`
102 and $mask,$XX[1],$XX[1]
105 $LDX $XX[1]($key),$TX[1]
108 comclr,<> $XX[1],$YY,%r0 ; conditional
109 copy $TX[0],$TX[1] ; move
110 `sprintf("%sdep %$dat1,%d,8,%$acc",$i==1?"z":"",8*($i-1)+7) if ($i>0)`
117 push(@TX,shift(@TX)); push(@XX,shift(@XX)); # "rotate" registers
121 my ($label,$count)=@_;
131 ldbx $inp($out),$dat1
133 and $mask,$XX[0],$XX[0]
135 $LDX $XX[0]($key),$TX[0]
140 addib,<> -1,$count,$label ; $count is always small
148 .SUBSPA \$CODE\$,QUAD=0,ALIGN=8,ACCESS=0x2C,CODE_ONLY
150 .EXPORT RC4,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR
153 .CALLINFO FRAME=`$FRAME-4*$SIZE_T`,NO_CALLS,SAVE_RP,ENTRY_GR=6
155 $PUSH %r2,-$SAVED_RP(%sp) ; standard prologue
156 $PUSHMA %r3,$FRAME(%sp)
157 $PUSH %r4,`-$FRAME+1*$SIZE_T`(%sp)
158 $PUSH %r5,`-$FRAME+2*$SIZE_T`(%sp)
159 $PUSH %r6,`-$FRAME+3*$SIZE_T`(%sp)
161 cmpib,*= 0,$len,L\$abort
162 sub $inp,$out,$inp ; distance between $inp and $out
164 $LD `0*$SZ`($key),$XX[0]
165 $LD `1*$SZ`($key),$YY
166 ldo `2*$SZ`($key),$key
171 ldo 1($XX[0]),$XX[0] ; warm up loop
172 and $mask,$XX[0],$XX[0]
173 $LDX $XX[0]($key),$TX[0]
175 cmpib,*>>= 6,$len,L\$oop1 ; is $len large enough to bother?
178 and,<> $out,$dat0,$rem ; is $out aligned?
183 &foldedloop("L\$alignout",$rem); # process till $out is aligned
186 L\$alignedout ; $len is at least 4 here
187 and,<> $inp,$dat0,$acc ; is $inp aligned?
189 sub $inp,$acc,$rem ; align $inp
191 sh3addl $acc,%r0,$acc
193 mtctl $acc,%cr11 ; load %sar with vshd align factor
194 ldwx $rem($out),$dat0
201 ldwx $rem($out),$dat1
203 or $ix,$acc,$acc ; last piece, no need to dep
204 vshd $dat0,$dat1,$iy ; align data
208 cmpib,*<< 3,$len,L\$oop4misalignedinp
210 cmpib,*= 0,$len,L\$done
221 ldwx $inp($out),$dat0
223 or $ix,$acc,$acc ; last piece, no need to dep
226 cmpib,*<< 3,$len,L\$oop4
228 cmpib,*= 0,$len,L\$done
231 &foldedloop("L\$oop1",$len);
234 $POP `-$FRAME-$SAVED_RP`(%sp),%r2
235 ldo -1($XX[0]),$XX[0] ; chill out loop
237 and $mask,$XX[0],$XX[0]
239 $ST $XX[0],`-2*$SZ`($key)
240 $ST $YY,`-1*$SZ`($key)
241 $POP `-$FRAME+1*$SIZE_T`(%sp),%r4
242 $POP `-$FRAME+2*$SIZE_T`(%sp),%r5
243 $POP `-$FRAME+3*$SIZE_T`(%sp),%r6
247 $POPMB -$FRAME(%sp),%r3
253 .EXPORT RC4_set_key,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR
259 $ST %r0,`0*$SZ`($key)
260 $ST %r0,`1*$SZ`($key)
261 ldo `2*$SZ`($key),$key
266 bb,>= @XX[0],`31-8`,L\$1st ; @XX[0]<256
269 ldo `-256*$SZ`($key),$key ; rewind $key
270 addl $len,$inp,$inp ; $inp to point at the end
271 sub %r0,$len,%r23 ; inverse index
277 $LDX @XX[0]($key),@TX[0]
278 ldbx %r23($inp),@TX[1]
279 addi,nuv 1,%r23,%r23 ; increment and conditional
280 sub %r0,$len,%r23 ; inverse index
281 addl @TX[0],@XX[1],@XX[1]
282 addl @TX[1],@XX[1],@XX[1]
283 and $mask,@XX[1],@XX[1]
285 $LDX @XX[1]($key),@TX[1]
289 bb,>= @XX[0],`31-8`,L\$2nd ; @XX[0]<256
297 .EXPORT RC4_options,ENTRY
309 ldo L\$opts-L\$pic(%r28),%r28
313 .STRINGZ "rc4(4x,`$SZ==1?"char":"int"`)"
314 .STRINGZ "RC4 for PA-RISC, CRYPTOGAMS by <appro\@openssl.org>"
316 $code =~ s/\`([^\`]*)\`/eval $1/gem;
317 $code =~ s/cmpib,\*/comib,/gm if ($SIZE_T==4);
318 $code =~ s/\bbv\b/bve/gm if ($SIZE_T==8);