2 # Copyright 2009-2018 The OpenSSL Project Authors. All Rights Reserved.
4 # Licensed under the Apache License 2.0 (the "License"). You may not use
5 # this file except in compliance with the License. You can obtain a copy
6 # in the file LICENSE in the source distribution or at
7 # https://www.openssl.org/source/license.html
10 # ====================================================================
11 # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
12 # project. The module is, however, dual licensed under OpenSSL and
13 # CRYPTOGAMS licenses depending on where you obtain it. For further
14 # details see http://www.openssl.org/~appro/cryptogams/.
15 # ====================================================================
17 # SHA256/512 block procedure for PA-RISC.
21 # SHA256 performance is >75% better than gcc 3.2 generated code on
22 # PA-7100LC. Compared to code generated by vendor compiler this
23 # implementation is almost 70% faster in 64-bit build, but delivers
24 # virtually same performance in 32-bit build on PA-8600.
26 # SHA512 performance is >2.9x better than gcc 3.2 generated code on
27 # PA-7100LC, PA-RISC 1.1 processor. Then implementation detects if the
28 # code is executed on PA-RISC 2.0 processor and switches to 64-bit
29 # code path delivering adequate performance even in "blended" 32-bit
30 # build. Though 64-bit code is not any faster than code generated by
31 # vendor compiler on PA-8600...
33 # Special thanks to polarhome.com for providing HP-UX account.
35 # $output is the last argument if it looks like a file (it has an extension)
36 # $flavour is the first argument if it doesn't look like a file
37 $output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
38 $flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
40 $output and open STDOUT,">$output";
42 if ($flavour =~ /64/) {
62 if ($output =~ /512/) {
63 $func="sha512_block_data_order";
75 $func="sha256_block_data_order";
88 $FRAME=16*$SIZE_T+$FRAME_MARKER;# 16 saved regs + frame marker
89 # [+ argument transfer]
90 $XOFF=16*$SZ+32; # local variables
92 $XOFF+=$FRAME_MARKER; # distance between %sp and local variables
94 $ctx="%r26"; # zapped by $a0
95 $inp="%r25"; # zapped by $a1
96 $num="%r24"; # zapped by $t0
104 @V=($A,$B,$C,$D,$E,$F,$G,$H)=("%r17","%r18","%r19","%r20","%r21","%r22","%r23","%r28");
106 @X=("%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7", "%r8",
107 "%r9", "%r10","%r11","%r12","%r13","%r14","%r15","%r16",$inp);
110 my ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_;
112 _ror $e,$Sigma1[0],$a0
114 _ror $e,$Sigma1[1],$a1
118 _ror $a1,`$Sigma1[2]-$Sigma1[1]`,$a1
119 or $t0,$t1,$t1 ; Ch(e,f,g)
121 xor $a0,$a1,$a1 ; Sigma1(e)
123 _ror $a,$Sigma0[0],$a0
126 _ror $a,$Sigma0[1],$a1
130 _ror $a1,`$Sigma0[2]-$Sigma0[1]`,$a1
133 xor $a0,$a1,$a1 ; Sigma0(a)
135 xor $t1,$t0,$t0 ; Maj(a,b,c)
136 `"$LDM $SZ($Tbl),$t1" if ($i<15)`
144 my ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_;
147 _ror @X[($i+1)%16],$sigma0[0],$a0
148 _ror @X[($i+1)%16],$sigma0[1],$a1
149 addl @X[($i+9)%16],@X[$i],@X[$i]
150 _ror @X[($i+14)%16],$sigma1[0],$t0
151 _ror @X[($i+14)%16],$sigma1[1],$t1
153 _shr @X[($i+1)%16],$sigma0[2],$a1
155 _shr @X[($i+14)%16],$sigma1[2],$t1
156 xor $a1,$a0,$a0 ; sigma0(X[(i+1)&0x0f])
157 xor $t1,$t0,$t0 ; sigma1(X[(i+14)&0x0f])
159 addl $a0,@X[$i],@X[$i]
160 addl $t0,@X[$i],@X[$i]
162 $code.=<<___ if ($i==15);
164 comiclr,<> $LAST10BITS,$a1,%r0
165 ldo 1($Tbl),$Tbl ; signal end of $Tbl
167 &ROUND_00_15($i+16,$a,$b,$c,$d,$e,$f,$g,$h);
173 .SUBSPA \$CODE\$,QUAD=0,ALIGN=8,ACCESS=0x2C,CODE_ONLY
178 $code.=<<___ if ($SZ==8);
179 .WORD 0x428a2f98,0xd728ae22,0x71374491,0x23ef65cd
180 .WORD 0xb5c0fbcf,0xec4d3b2f,0xe9b5dba5,0x8189dbbc
181 .WORD 0x3956c25b,0xf348b538,0x59f111f1,0xb605d019
182 .WORD 0x923f82a4,0xaf194f9b,0xab1c5ed5,0xda6d8118
183 .WORD 0xd807aa98,0xa3030242,0x12835b01,0x45706fbe
184 .WORD 0x243185be,0x4ee4b28c,0x550c7dc3,0xd5ffb4e2
185 .WORD 0x72be5d74,0xf27b896f,0x80deb1fe,0x3b1696b1
186 .WORD 0x9bdc06a7,0x25c71235,0xc19bf174,0xcf692694
187 .WORD 0xe49b69c1,0x9ef14ad2,0xefbe4786,0x384f25e3
188 .WORD 0x0fc19dc6,0x8b8cd5b5,0x240ca1cc,0x77ac9c65
189 .WORD 0x2de92c6f,0x592b0275,0x4a7484aa,0x6ea6e483
190 .WORD 0x5cb0a9dc,0xbd41fbd4,0x76f988da,0x831153b5
191 .WORD 0x983e5152,0xee66dfab,0xa831c66d,0x2db43210
192 .WORD 0xb00327c8,0x98fb213f,0xbf597fc7,0xbeef0ee4
193 .WORD 0xc6e00bf3,0x3da88fc2,0xd5a79147,0x930aa725
194 .WORD 0x06ca6351,0xe003826f,0x14292967,0x0a0e6e70
195 .WORD 0x27b70a85,0x46d22ffc,0x2e1b2138,0x5c26c926
196 .WORD 0x4d2c6dfc,0x5ac42aed,0x53380d13,0x9d95b3df
197 .WORD 0x650a7354,0x8baf63de,0x766a0abb,0x3c77b2a8
198 .WORD 0x81c2c92e,0x47edaee6,0x92722c85,0x1482353b
199 .WORD 0xa2bfe8a1,0x4cf10364,0xa81a664b,0xbc423001
200 .WORD 0xc24b8b70,0xd0f89791,0xc76c51a3,0x0654be30
201 .WORD 0xd192e819,0xd6ef5218,0xd6990624,0x5565a910
202 .WORD 0xf40e3585,0x5771202a,0x106aa070,0x32bbd1b8
203 .WORD 0x19a4c116,0xb8d2d0c8,0x1e376c08,0x5141ab53
204 .WORD 0x2748774c,0xdf8eeb99,0x34b0bcb5,0xe19b48a8
205 .WORD 0x391c0cb3,0xc5c95a63,0x4ed8aa4a,0xe3418acb
206 .WORD 0x5b9cca4f,0x7763e373,0x682e6ff3,0xd6b2b8a3
207 .WORD 0x748f82ee,0x5defb2fc,0x78a5636f,0x43172f60
208 .WORD 0x84c87814,0xa1f0ab72,0x8cc70208,0x1a6439ec
209 .WORD 0x90befffa,0x23631e28,0xa4506ceb,0xde82bde9
210 .WORD 0xbef9a3f7,0xb2c67915,0xc67178f2,0xe372532b
211 .WORD 0xca273ece,0xea26619c,0xd186b8c7,0x21c0c207
212 .WORD 0xeada7dd6,0xcde0eb1e,0xf57d4f7f,0xee6ed178
213 .WORD 0x06f067aa,0x72176fba,0x0a637dc5,0xa2c898a6
214 .WORD 0x113f9804,0xbef90dae,0x1b710b35,0x131c471b
215 .WORD 0x28db77f5,0x23047d84,0x32caab7b,0x40c72493
216 .WORD 0x3c9ebe0a,0x15c9bebc,0x431d67c4,0x9c100d4c
217 .WORD 0x4cc5d4be,0xcb3e42b6,0x597f299c,0xfc657e2a
218 .WORD 0x5fcb6fab,0x3ad6faec,0x6c44198c,0x4a475817
220 $code.=<<___ if ($SZ==4);
221 .WORD 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
222 .WORD 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
223 .WORD 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
224 .WORD 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
225 .WORD 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
226 .WORD 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
227 .WORD 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
228 .WORD 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
229 .WORD 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
230 .WORD 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
231 .WORD 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
232 .WORD 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
233 .WORD 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
234 .WORD 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
235 .WORD 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
236 .WORD 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
240 .EXPORT $func,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR
244 .CALLINFO FRAME=`$FRAME-16*$SIZE_T`,NO_CALLS,SAVE_RP,ENTRY_GR=18
246 $PUSH %r2,-$SAVED_RP(%sp) ; standard prologue
247 $PUSHMA %r3,$FRAME(%sp)
248 $PUSH %r4,`-$FRAME+1*$SIZE_T`(%sp)
249 $PUSH %r5,`-$FRAME+2*$SIZE_T`(%sp)
250 $PUSH %r6,`-$FRAME+3*$SIZE_T`(%sp)
251 $PUSH %r7,`-$FRAME+4*$SIZE_T`(%sp)
252 $PUSH %r8,`-$FRAME+5*$SIZE_T`(%sp)
253 $PUSH %r9,`-$FRAME+6*$SIZE_T`(%sp)
254 $PUSH %r10,`-$FRAME+7*$SIZE_T`(%sp)
255 $PUSH %r11,`-$FRAME+8*$SIZE_T`(%sp)
256 $PUSH %r12,`-$FRAME+9*$SIZE_T`(%sp)
257 $PUSH %r13,`-$FRAME+10*$SIZE_T`(%sp)
258 $PUSH %r14,`-$FRAME+11*$SIZE_T`(%sp)
259 $PUSH %r15,`-$FRAME+12*$SIZE_T`(%sp)
260 $PUSH %r16,`-$FRAME+13*$SIZE_T`(%sp)
261 $PUSH %r17,`-$FRAME+14*$SIZE_T`(%sp)
262 $PUSH %r18,`-$FRAME+15*$SIZE_T`(%sp)
264 _shl $num,`log(16*$SZ)/log(2)`,$num
265 addl $inp,$num,$num ; $num to point at the end of $inp
267 $PUSH $num,`-$FRAME_MARKER-4*$SIZE_T`(%sp) ; save arguments
268 $PUSH $inp,`-$FRAME_MARKER-3*$SIZE_T`(%sp)
269 $PUSH $ctx,`-$FRAME_MARKER-2*$SIZE_T`(%sp)
274 andcm $Tbl,$t1,$Tbl ; wipe privilege level
275 ldo L\$table-L\$pic($Tbl),$Tbl
277 $code.=<<___ if ($SZ==8 && $SIZE_T==4);
280 extrd,u,*= $t1,%sar,1,$t1 ; executes on PA-RISC 1.0
285 $LD `0*$SZ`($ctx),$A ; load context
294 extru $inp,31,`log($SZ)/log(2)`,$t0
297 mtctl $t0,%cr11 ; load %sar with align factor
302 andcm $inp,$t0,$t0 ; align $inp
304 for ($i=0;$i<15;$i++) { # load input block
305 $code.="\t$LD `$SZ*$i`($t0),@X[$i]\n"; }
307 cmpb,*= $inp,$t0,L\$aligned
308 $LD `$SZ*15`($t0),@X[15]
309 $LD `$SZ*16`($t0),@X[16]
311 for ($i=0;$i<16;$i++) { # align data
312 $code.="\t_align @X[$i],@X[$i+1],@X[$i]\n"; }
315 nop ; otherwise /usr/ccs/bin/as is confused by below .WORD
318 for($i=0;$i<16;$i++) { &ROUND_00_15($i,@V); unshift(@V,pop(@V)); }
321 nop ; otherwise /usr/ccs/bin/as is confused by below .WORD
323 for(;$i<32;$i++) { &ROUND_16_xx($i,@V); unshift(@V,pop(@V)); }
325 bb,>= $Tbl,31,L\$rounds ; end of $Tbl signalled?
328 $POP `-$FRAME_MARKER-2*$SIZE_T`(%sp),$ctx ; restore arguments
329 $POP `-$FRAME_MARKER-3*$SIZE_T`(%sp),$inp
330 $POP `-$FRAME_MARKER-4*$SIZE_T`(%sp),$num
331 ldo `-$rounds*$SZ-1`($Tbl),$Tbl ; rewind $Tbl
333 $LD `0*$SZ`($ctx),@X[0] ; load context
334 $LD `1*$SZ`($ctx),@X[1]
335 $LD `2*$SZ`($ctx),@X[2]
336 $LD `3*$SZ`($ctx),@X[3]
337 $LD `4*$SZ`($ctx),@X[4]
338 $LD `5*$SZ`($ctx),@X[5]
340 $LD `6*$SZ`($ctx),@X[6]
342 $LD `7*$SZ`($ctx),@X[7]
343 ldo `16*$SZ`($inp),$inp ; advance $inp
345 $ST $A,`0*$SZ`($ctx) ; save context
360 cmpb,*<>,n $inp,$num,L\$oop
361 $PUSH $inp,`-$FRAME_MARKER-3*$SIZE_T`(%sp) ; save $inp
363 if ($SZ==8 && $SIZE_T==4) # SHA512 for 32-bit PA-RISC 1.0
373 @V=( $Ahi, $Alo, $Bhi, $Blo, $Chi, $Clo, $Dhi, $Dlo,
374 $Ehi, $Elo, $Fhi, $Flo, $Ghi, $Glo, $Hhi, $Hlo) =
375 ( "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7", "%r8",
376 "%r9","%r10","%r11","%r12","%r13","%r14","%r15","%r16");
387 @X=("%r23","%r24","%r25","%r26"); # zaps $num,$inp,$ctx
389 sub ROUND_00_15_pa1 {
390 my ($i,$ahi,$alo,$bhi,$blo,$chi,$clo,$dhi,$dlo,
391 $ehi,$elo,$fhi,$flo,$ghi,$glo,$hhi,$hlo,$flag)=@_;
392 my ($Xhi,$Xlo,$Xnhi,$Xnlo) = @X;
394 $code.=<<___ if (!$flag);
395 ldw `-$XOFF+8*(($i+1)%16)`(%sp),$Xnhi
396 ldw `-$XOFF+8*(($i+1)%16)+4`(%sp),$Xnlo ; load X[i+1]
399 shd $ehi,$elo,$Sigma1[0],$t0
401 shd $elo,$ehi,$Sigma1[0],$t1
402 addc $Xhi,$hhi,$hhi ; h += X[i]
403 shd $ehi,$elo,$Sigma1[1],$t2
405 shd $elo,$ehi,$Sigma1[1],$t3
406 ldw -4($Tbl),$Xlo ; load K[i]
411 shd $ehi,$elo,$Sigma1[2],$t2
413 shd $elo,$ehi,$Sigma1[2],$t3
416 xor $t3,$t1,$t1 ; Sigma1(e)
419 addc $Xhi,$hhi,$hhi ; h += K[i]
420 xor $a3,$a1,$a1 ; Ch(e,f,g)
423 shd $ahi,$alo,$Sigma0[0],$t0
424 addc $t1,$hhi,$hhi ; h += Sigma1(e)
425 shd $alo,$ahi,$Sigma0[0],$t1
427 shd $ahi,$alo,$Sigma0[1],$t2
428 addc $a1,$hhi,$hhi ; h += Ch(e,f,g)
429 shd $alo,$ahi,$Sigma0[1],$t3
433 shd $ahi,$alo,$Sigma0[2],$t2
435 shd $alo,$ahi,$Sigma0[2],$t3
438 xor $t3,$t1,$t1 ; Sigma0(a)
445 addc $hhi,$dhi,$dhi ; d += h
449 addc $t1,$hhi,$hhi ; h += Sigma0(a)
452 xor $a3,$a1,$a1 ; Maj(a,b,c)
453 addc $a1,$hhi,$hhi ; h += Maj(a,b,c)
456 $code.=<<___ if ($i==15 && $flag);
457 extru $Xlo,31,10,$Xlo
458 comiclr,= $LAST10BITS,$Xlo,%r0
462 push(@X,shift(@X)); push(@X,shift(@X));
465 sub ROUND_16_xx_pa1 {
466 my ($Xhi,$Xlo,$Xnhi,$Xnlo) = @X;
470 ldw `-$XOFF+8*(($i+1)%16)`(%sp),$Xnhi
471 ldw `-$XOFF+8*(($i+1)%16)+4`(%sp),$Xnlo ; load X[i+1]
472 ldw `-$XOFF+8*(($i+9)%16)`(%sp),$a1
473 ldw `-$XOFF+8*(($i+9)%16)+4`(%sp),$a0 ; load X[i+9]
474 ldw `-$XOFF+8*(($i+14)%16)`(%sp),$a3
475 ldw `-$XOFF+8*(($i+14)%16)+4`(%sp),$a2 ; load X[i+14]
476 shd $Xnhi,$Xnlo,$sigma0[0],$t0
477 shd $Xnlo,$Xnhi,$sigma0[0],$t1
479 shd $Xnhi,$Xnlo,$sigma0[1],$t2
481 shd $Xnlo,$Xnhi,$sigma0[1],$t3
483 shd $Xnhi,$Xnlo,$sigma0[2],$t2
485 extru $Xnhi,`31-$sigma0[2]`,`32-$sigma0[2]`,$t3
487 shd $a3,$a2,$sigma1[0],$a0
488 xor $t3,$t1,$t1 ; sigma0(X[i+1)&0x0f])
489 shd $a2,$a3,$sigma1[0],$a1
491 shd $a3,$a2,$sigma1[1],$t2
493 shd $a2,$a3,$sigma1[1],$t3
495 shd $a3,$a2,$sigma1[2],$t2
497 extru $a3,`31-$sigma1[2]`,`32-$sigma1[2]`,$t3
499 xor $t3,$a1,$a1 ; sigma0(X[i+14)&0x0f])
503 stw $Xhi,`-$XOFF+8*($i%16)`(%sp)
504 stw $Xlo,`-$XOFF+8*($i%16)+4`(%sp)
506 &ROUND_00_15_pa1($i,@_,1);
509 ldw `0*4`($ctx),$Ahi ; load context
519 ldw `10*4`($ctx),$Fhi
520 ldw `11*4`($ctx),$Flo
521 ldw `12*4`($ctx),$Ghi
522 ldw `13*4`($ctx),$Glo
523 ldw `14*4`($ctx),$Hhi
524 ldw `15*4`($ctx),$Hlo
529 mtctl $t0,%cr11 ; load %sar with align factor
533 comib,= 0,$a3,L\$aligned_pa1
536 ldw `0*4`($inp),$X[0]
537 ldw `1*4`($inp),$X[1]
544 vshd $X[0],$X[1],$X[0]
546 stw $X[0],`-$XOFF+0*4`(%sp)
549 stw $X[1],`-$XOFF+1*4`(%sp)
554 my @t=($t2,$t3,$a0,$a1,$a2,$a3,$t0,$t1);
555 for ($i=2;$i<=(128/4-8);$i++) {
557 stw $t[0],`-$XOFF+$i*4`(%sp)
558 ldw `(8+$i)*4`($inp),$t[0]
559 vshd $t[1],$t[2],$t[1]
563 for (;$i<(128/4-1);$i++) {
565 stw $t[0],`-$XOFF+$i*4`(%sp)
566 vshd $t[1],$t[2],$t[1]
572 stw $t[0],`-$XOFF+$i*4`(%sp)
578 ldw `0*4`($inp),$X[0]
579 ldw `1*4`($inp),$X[1]
586 stw $X[0],`-$XOFF+0*4`(%sp)
588 stw $X[1],`-$XOFF+1*4`(%sp)
592 my @t=($t2,$t3,$a0,$a1,$a2,$a3,$t0,$t1);
593 for ($i=2;$i<(128/4-8);$i++) {
595 stw $t[0],`-$XOFF+$i*4`(%sp)
596 ldw `(8+$i)*4`($inp),$t[0]
600 for (;$i<128/4;$i++) {
602 stw $t[0],`-$XOFF+$i*4`(%sp)
606 $code.="L\$collected_pa1\n";
609 for($i=0;$i<16;$i++) { &ROUND_00_15_pa1($i,@V); unshift(@V,pop(@V)); unshift(@V,pop(@V)); }
610 $code.="L\$rounds_pa1\n";
611 for(;$i<32;$i++) { &ROUND_16_xx_pa1($i,@V); unshift(@V,pop(@V)); unshift(@V,pop(@V)); }
614 $POP `-$FRAME_MARKER-2*$SIZE_T`(%sp),$ctx ; restore arguments
615 $POP `-$FRAME_MARKER-3*$SIZE_T`(%sp),$inp
616 $POP `-$FRAME_MARKER-4*$SIZE_T`(%sp),$num
617 ldo `-$rounds*$SZ`($Tbl),$Tbl ; rewind $Tbl
619 ldw `0*4`($ctx),$t1 ; update context
658 ldo `16*$SZ`($inp),$inp ; advance $inp
662 stw $Fhi,`10*4`($ctx)
663 stw $Flo,`11*4`($ctx)
664 stw $Ghi,`12*4`($ctx)
665 stw $Glo,`13*4`($ctx)
666 stw $Hhi,`14*4`($ctx)
667 comb,= $inp,$num,L\$done
668 stw $Hlo,`15*4`($ctx)
670 $PUSH $inp,`-$FRAME_MARKER-3*$SIZE_T`(%sp) ; save $inp
675 $POP `-$FRAME-$SAVED_RP`(%sp),%r2 ; standard epilogue
676 $POP `-$FRAME+1*$SIZE_T`(%sp),%r4
677 $POP `-$FRAME+2*$SIZE_T`(%sp),%r5
678 $POP `-$FRAME+3*$SIZE_T`(%sp),%r6
679 $POP `-$FRAME+4*$SIZE_T`(%sp),%r7
680 $POP `-$FRAME+5*$SIZE_T`(%sp),%r8
681 $POP `-$FRAME+6*$SIZE_T`(%sp),%r9
682 $POP `-$FRAME+7*$SIZE_T`(%sp),%r10
683 $POP `-$FRAME+8*$SIZE_T`(%sp),%r11
684 $POP `-$FRAME+9*$SIZE_T`(%sp),%r12
685 $POP `-$FRAME+10*$SIZE_T`(%sp),%r13
686 $POP `-$FRAME+11*$SIZE_T`(%sp),%r14
687 $POP `-$FRAME+12*$SIZE_T`(%sp),%r15
688 $POP `-$FRAME+13*$SIZE_T`(%sp),%r16
689 $POP `-$FRAME+14*$SIZE_T`(%sp),%r17
690 $POP `-$FRAME+15*$SIZE_T`(%sp),%r18
693 $POPMB -$FRAME(%sp),%r3
695 .STRINGZ "SHA`64*$SZ` block transform for PA-RISC, CRYPTOGAMS by <appro\@openssl.org>"
698 # Explicitly encode PA-RISC 2.0 instructions used in this module, so
699 # that it can be compiled with .LEVEL 1.0. It should be noted that I
700 # wouldn't have to do this, if GNU assembler understood .ALLOW 2.0
704 my ($mod,$args) = @_;
705 my $orig = "ldd$mod\t$args";
707 if ($args =~ /(\-?[0-9]+)\(%r([0-9]+)\),%r([0-9]+)/) # format 3 suffices
708 { my $opcode=(0x14<<26)|($2<<21)|($3<<16)|(($1&0x1FF8)<<1)|(($1>>13)&1);
709 $opcode|=(1<<3) if ($mod =~ /^,m/);
710 $opcode|=(1<<2) if ($mod =~ /^,mb/);
711 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
717 my ($mod,$args) = @_;
718 my $orig = "std$mod\t$args";
720 if ($args =~ /%r([0-9]+),(\-?[0-9]+)\(%r([0-9]+)\)/) # format 3 suffices
721 { my $opcode=(0x1c<<26)|($3<<21)|($1<<16)|(($2&0x1FF8)<<1)|(($2>>13)&1);
722 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
728 my ($mod,$args) = @_;
729 my $orig = "extrd$mod\t$args";
731 # I only have ",u" completer, it's implicitly encoded...
732 if ($args =~ /%r([0-9]+),([0-9]+),([0-9]+),%r([0-9]+)/) # format 15
733 { my $opcode=(0x36<<26)|($1<<21)|($4<<16);
735 $opcode |= (($2&0x20)<<6)|(($2&0x1f)<<5); # encode pos
736 $opcode |= (($len&0x20)<<7)|($len&0x1f); # encode len
737 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
739 elsif ($args =~ /%r([0-9]+),%sar,([0-9]+),%r([0-9]+)/) # format 12
740 { my $opcode=(0x34<<26)|($1<<21)|($3<<16)|(2<<11)|(1<<9);
742 $opcode |= (($len&0x20)<<3)|($len&0x1f); # encode len
743 $opcode |= (1<<13) if ($mod =~ /,\**=/);
744 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
750 my ($mod,$args) = @_;
751 my $orig = "shrpd$mod\t$args";
753 if ($args =~ /%r([0-9]+),%r([0-9]+),([0-9]+),%r([0-9]+)/) # format 14
754 { my $opcode=(0x34<<26)|($2<<21)|($1<<16)|(1<<10)|$4;
756 $opcode |= (($cpos&0x20)<<6)|(($cpos&0x1f)<<5); # encode sa
757 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
759 elsif ($args =~ /%r([0-9]+),%r([0-9]+),%sar,%r([0-9]+)/) # format 11
760 { sprintf "\t.WORD\t0x%08x\t; %s",
761 (0x34<<26)|($2<<21)|($1<<16)|(1<<9)|$3,$orig;
767 my ($mnemonic,$mod,$args)=@_;
768 my $opcode = eval("\$$mnemonic");
770 ref($opcode) eq 'CODE' ? &$opcode($mod,$args) : "\t$mnemonic$mod\t$args";
773 if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1`
774 =~ /GNU assembler/) {
778 foreach (split("\n",$code)) {
779 s/\`([^\`]*)\`/eval $1/ge;
781 s/shd\s+(%r[0-9]+),(%r[0-9]+),([0-9]+)/
782 $3>31 ? sprintf("shd\t%$2,%$1,%d",$3-32) # rotation for >=32
783 : sprintf("shd\t%$1,%$2,%d",$3)/e or
784 # translate made up instructions: _ror, _shr, _align, _shl
785 s/_ror(\s+)(%r[0-9]+),/
786 ($SZ==4 ? "shd" : "shrpd")."$1$2,$2,"/e or
788 s/_shr(\s+%r[0-9]+),([0-9]+),/
789 $SZ==4 ? sprintf("extru%s,%d,%d,",$1,31-$2,32-$2)
790 : sprintf("extrd,u%s,%d,%d,",$1,63-$2,64-$2)/e or
792 s/_align(\s+%r[0-9]+,%r[0-9]+),/
793 ($SZ==4 ? "vshd$1," : "shrpd$1,%sar,")/e or
795 s/_shl(\s+%r[0-9]+),([0-9]+),/
796 $SIZE_T==4 ? sprintf("zdep%s,%d,%d,",$1,31-$2,32-$2)
797 : sprintf("depd,z%s,%d,%d,",$1,63-$2,64-$2)/e;
799 s/^\s+([a-z]+)([\S]*)\s+([\S]*)/&assemble($1,$2,$3)/e if ($SIZE_T==4);
801 s/(\.LEVEL\s+2\.0)W/$1w/ if ($gnuas && $SIZE_T==8);
802 s/\.SPACE\s+\$TEXT\$/.text/ if ($gnuas && $SIZE_T==8);
803 s/\.SUBSPA.*// if ($gnuas && $SIZE_T==8);
804 s/cmpb,\*/comb,/ if ($SIZE_T==4);
805 s/\bbv\b/bve/ if ($SIZE_T==8);
810 close STDOUT or die "error closing STDOUT";