2 # Copyright 2009-2018 The OpenSSL Project Authors. All Rights Reserved.
4 # Licensed under the Apache License 2.0 (the "License"). You may not use
5 # this file except in compliance with the License. You can obtain a copy
6 # in the file LICENSE in the source distribution or at
7 # https://www.openssl.org/source/license.html
10 # ====================================================================
11 # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
12 # project. The module is, however, dual licensed under OpenSSL and
13 # CRYPTOGAMS licenses depending on where you obtain it. For further
14 # details see http://www.openssl.org/~appro/cryptogams/.
15 # ====================================================================
21 # Performance is 33% better than gcc 3.2 generated code on PA-7100LC.
22 # For reference, [4x] unrolled loop is >40% faster than folded one.
23 # It's possible to unroll loop 8 times on PA-RISC 2.0, but improvement
24 # is believed to be not sufficient to justify the effort...
26 # Special thanks to polarhome.com for providing HP-UX account.
28 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
30 # $output is the last argument if it looks like a file (it has an extension)
31 # $flavour is the first argument if it doesn't look like a file
32 $output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
33 $flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
35 $output and open STDOUT,">$output";
37 if ($flavour =~ /64/) {
57 $FRAME=4*$SIZE_T+$FRAME_MARKER; # 4 saved regs + frame marker
58 # [+ argument transfer]
59 $SZ=1; # defaults to RC4_CHAR
60 if (open CONF,"<${dir}../../opensslconf.h") {
62 if (m/#\s*define\s+RC4_INT\s+(.*)/) {
63 $SZ = ($1=~/char$/) ? 1 : 4;
70 if ($SZ==1) { # RC4_CHAR
75 } else { # RC4_INT (~5% faster than RC4_CHAR on PA-7100LC)
100 sub unrolledloopbody {
101 for ($i=0;$i<4;$i++) {
104 `sprintf("$LDX %$TY(%$key),%$dat1") if ($i>0)`
105 and $mask,$XX[1],$XX[1]
108 $LDX $XX[1]($key),$TX[1]
111 comclr,<> $XX[1],$YY,%r0 ; conditional
112 copy $TX[0],$TX[1] ; move
113 `sprintf("%sdep %$dat1,%d,8,%$acc",$i==1?"z":"",8*($i-1)+7) if ($i>0)`
120 push(@TX,shift(@TX)); push(@XX,shift(@XX)); # "rotate" registers
124 my ($label,$count)=@_;
134 ldbx $inp($out),$dat1
136 and $mask,$XX[0],$XX[0]
138 $LDX $XX[0]($key),$TX[0]
143 addib,<> -1,$count,$label ; $count is always small
151 .SUBSPA \$CODE\$,QUAD=0,ALIGN=8,ACCESS=0x2C,CODE_ONLY
153 .EXPORT RC4,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR
156 .CALLINFO FRAME=`$FRAME-4*$SIZE_T`,NO_CALLS,SAVE_RP,ENTRY_GR=6
158 $PUSH %r2,-$SAVED_RP(%sp) ; standard prologue
159 $PUSHMA %r3,$FRAME(%sp)
160 $PUSH %r4,`-$FRAME+1*$SIZE_T`(%sp)
161 $PUSH %r5,`-$FRAME+2*$SIZE_T`(%sp)
162 $PUSH %r6,`-$FRAME+3*$SIZE_T`(%sp)
164 cmpib,*= 0,$len,L\$abort
165 sub $inp,$out,$inp ; distance between $inp and $out
167 $LD `0*$SZ`($key),$XX[0]
168 $LD `1*$SZ`($key),$YY
169 ldo `2*$SZ`($key),$key
174 ldo 1($XX[0]),$XX[0] ; warm up loop
175 and $mask,$XX[0],$XX[0]
176 $LDX $XX[0]($key),$TX[0]
178 cmpib,*>>= 6,$len,L\$oop1 ; is $len large enough to bother?
181 and,<> $out,$dat0,$rem ; is $out aligned?
186 &foldedloop("L\$alignout",$rem); # process till $out is aligned
189 L\$alignedout ; $len is at least 4 here
190 and,<> $inp,$dat0,$acc ; is $inp aligned?
192 sub $inp,$acc,$rem ; align $inp
194 sh3addl $acc,%r0,$acc
196 mtctl $acc,%cr11 ; load %sar with vshd align factor
197 ldwx $rem($out),$dat0
204 ldwx $rem($out),$dat1
206 or $ix,$acc,$acc ; last piece, no need to dep
207 vshd $dat0,$dat1,$iy ; align data
211 cmpib,*<< 3,$len,L\$oop4misalignedinp
213 cmpib,*= 0,$len,L\$done
224 ldwx $inp($out),$dat0
226 or $ix,$acc,$acc ; last piece, no need to dep
229 cmpib,*<< 3,$len,L\$oop4
231 cmpib,*= 0,$len,L\$done
234 &foldedloop("L\$oop1",$len);
237 $POP `-$FRAME-$SAVED_RP`(%sp),%r2
238 ldo -1($XX[0]),$XX[0] ; chill out loop
240 and $mask,$XX[0],$XX[0]
242 $ST $XX[0],`-2*$SZ`($key)
243 $ST $YY,`-1*$SZ`($key)
244 $POP `-$FRAME+1*$SIZE_T`(%sp),%r4
245 $POP `-$FRAME+2*$SIZE_T`(%sp),%r5
246 $POP `-$FRAME+3*$SIZE_T`(%sp),%r6
250 $POPMB -$FRAME(%sp),%r3
256 .EXPORT RC4_set_key,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR
262 $ST %r0,`0*$SZ`($key)
263 $ST %r0,`1*$SZ`($key)
264 ldo `2*$SZ`($key),$key
269 bb,>= @XX[0],`31-8`,L\$1st ; @XX[0]<256
272 ldo `-256*$SZ`($key),$key ; rewind $key
273 addl $len,$inp,$inp ; $inp to point at the end
274 sub %r0,$len,%r23 ; inverse index
280 $LDX @XX[0]($key),@TX[0]
281 ldbx %r23($inp),@TX[1]
282 addi,nuv 1,%r23,%r23 ; increment and conditional
283 sub %r0,$len,%r23 ; inverse index
284 addl @TX[0],@XX[1],@XX[1]
285 addl @TX[1],@XX[1],@XX[1]
286 and $mask,@XX[1],@XX[1]
288 $LDX @XX[1]($key),@TX[1]
292 bb,>= @XX[0],`31-8`,L\$2nd ; @XX[0]<256
300 .EXPORT RC4_options,ENTRY
312 ldo L\$opts-L\$pic(%r28),%r28
316 .STRINGZ "rc4(4x,`$SZ==1?"char":"int"`)"
317 .STRINGZ "RC4 for PA-RISC, CRYPTOGAMS by <appro\@openssl.org>"
320 if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1`
321 =~ /GNU assembler/) {
325 foreach(split("\n",$code)) {
326 s/\`([^\`]*)\`/eval $1/ge;
328 s/(\.LEVEL\s+2\.0)W/$1w/ if ($gnuas && $SIZE_T==8);
329 s/\.SPACE\s+\$TEXT\$/.text/ if ($gnuas && $SIZE_T==8);
330 s/\.SUBSPA.*// if ($gnuas && $SIZE_T==8);
331 s/cmpib,\*/comib,/ if ($SIZE_T==4);
332 s/\bbv\b/bve/ if ($SIZE_T==8);
336 close STDOUT or die "error closing STDOUT";