2 # Copyright 2009-2016 The OpenSSL Project Authors. All Rights Reserved.
4 # Licensed under the OpenSSL license (the "License"). You may not use
5 # this file except in compliance with the License. You can obtain a copy
6 # in the file LICENSE in the source distribution or at
7 # https://www.openssl.org/source/license.html
10 # ====================================================================
11 # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
12 # project. The module is, however, dual licensed under OpenSSL and
13 # CRYPTOGAMS licenses depending on where you obtain it. For further
14 # details see http://www.openssl.org/~appro/cryptogams/.
15 # ====================================================================
17 # SHA256/512 block procedure for PA-RISC.
21 # SHA256 performance is >75% better than gcc 3.2 generated code on
22 # PA-7100LC. Compared to code generated by vendor compiler this
23 # implementation is almost 70% faster in 64-bit build, but delivers
24 # virtually same performance in 32-bit build on PA-8600.
26 # SHA512 performance is >2.9x better than gcc 3.2 generated code on
27 # PA-7100LC, PA-RISC 1.1 processor. Then implementation detects if the
28 # code is executed on PA-RISC 2.0 processor and switches to 64-bit
29 # code path delivering adequate performance even in "blended" 32-bit
30 # build. Though 64-bit code is not any faster than code generated by
31 # vendor compiler on PA-8600...
33 # Special thanks to polarhome.com for providing HP-UX account.
37 open STDOUT,">$output";
39 if ($flavour =~ /64/) {
59 if ($output =~ /512/) {
60 $func="sha512_block_data_order";
72 $func="sha256_block_data_order";
85 $FRAME=16*$SIZE_T+$FRAME_MARKER;# 16 saved regs + frame marker
86 # [+ argument transfer]
87 $XOFF=16*$SZ+32; # local variables
89 $XOFF+=$FRAME_MARKER; # distance between %sp and local variables
91 $ctx="%r26"; # zapped by $a0
92 $inp="%r25"; # zapped by $a1
93 $num="%r24"; # zapped by $t0
101 @V=($A,$B,$C,$D,$E,$F,$G,$H)=("%r17","%r18","%r19","%r20","%r21","%r22","%r23","%r28");
103 @X=("%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7", "%r8",
104 "%r9", "%r10","%r11","%r12","%r13","%r14","%r15","%r16",$inp);
107 my ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_;
109 _ror $e,$Sigma1[0],$a0
111 _ror $e,$Sigma1[1],$a1
115 _ror $a1,`$Sigma1[2]-$Sigma1[1]`,$a1
116 or $t0,$t1,$t1 ; Ch(e,f,g)
118 xor $a0,$a1,$a1 ; Sigma1(e)
120 _ror $a,$Sigma0[0],$a0
123 _ror $a,$Sigma0[1],$a1
127 _ror $a1,`$Sigma0[2]-$Sigma0[1]`,$a1
130 xor $a0,$a1,$a1 ; Sigma0(a)
132 xor $t1,$t0,$t0 ; Maj(a,b,c)
133 `"$LDM $SZ($Tbl),$t1" if ($i<15)`
141 my ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_;
144 _ror @X[($i+1)%16],$sigma0[0],$a0
145 _ror @X[($i+1)%16],$sigma0[1],$a1
146 addl @X[($i+9)%16],@X[$i],@X[$i]
147 _ror @X[($i+14)%16],$sigma1[0],$t0
148 _ror @X[($i+14)%16],$sigma1[1],$t1
150 _shr @X[($i+1)%16],$sigma0[2],$a1
152 _shr @X[($i+14)%16],$sigma1[2],$t1
153 xor $a1,$a0,$a0 ; sigma0(X[(i+1)&0x0f])
154 xor $t1,$t0,$t0 ; sigma1(X[(i+14)&0x0f])
156 addl $a0,@X[$i],@X[$i]
157 addl $t0,@X[$i],@X[$i]
159 $code.=<<___ if ($i==15);
161 comiclr,<> $LAST10BITS,$a1,%r0
162 ldo 1($Tbl),$Tbl ; signal end of $Tbl
164 &ROUND_00_15($i+16,$a,$b,$c,$d,$e,$f,$g,$h);
170 .SUBSPA \$CODE\$,QUAD=0,ALIGN=8,ACCESS=0x2C,CODE_ONLY
175 $code.=<<___ if ($SZ==8);
176 .WORD 0x428a2f98,0xd728ae22,0x71374491,0x23ef65cd
177 .WORD 0xb5c0fbcf,0xec4d3b2f,0xe9b5dba5,0x8189dbbc
178 .WORD 0x3956c25b,0xf348b538,0x59f111f1,0xb605d019
179 .WORD 0x923f82a4,0xaf194f9b,0xab1c5ed5,0xda6d8118
180 .WORD 0xd807aa98,0xa3030242,0x12835b01,0x45706fbe
181 .WORD 0x243185be,0x4ee4b28c,0x550c7dc3,0xd5ffb4e2
182 .WORD 0x72be5d74,0xf27b896f,0x80deb1fe,0x3b1696b1
183 .WORD 0x9bdc06a7,0x25c71235,0xc19bf174,0xcf692694
184 .WORD 0xe49b69c1,0x9ef14ad2,0xefbe4786,0x384f25e3
185 .WORD 0x0fc19dc6,0x8b8cd5b5,0x240ca1cc,0x77ac9c65
186 .WORD 0x2de92c6f,0x592b0275,0x4a7484aa,0x6ea6e483
187 .WORD 0x5cb0a9dc,0xbd41fbd4,0x76f988da,0x831153b5
188 .WORD 0x983e5152,0xee66dfab,0xa831c66d,0x2db43210
189 .WORD 0xb00327c8,0x98fb213f,0xbf597fc7,0xbeef0ee4
190 .WORD 0xc6e00bf3,0x3da88fc2,0xd5a79147,0x930aa725
191 .WORD 0x06ca6351,0xe003826f,0x14292967,0x0a0e6e70
192 .WORD 0x27b70a85,0x46d22ffc,0x2e1b2138,0x5c26c926
193 .WORD 0x4d2c6dfc,0x5ac42aed,0x53380d13,0x9d95b3df
194 .WORD 0x650a7354,0x8baf63de,0x766a0abb,0x3c77b2a8
195 .WORD 0x81c2c92e,0x47edaee6,0x92722c85,0x1482353b
196 .WORD 0xa2bfe8a1,0x4cf10364,0xa81a664b,0xbc423001
197 .WORD 0xc24b8b70,0xd0f89791,0xc76c51a3,0x0654be30
198 .WORD 0xd192e819,0xd6ef5218,0xd6990624,0x5565a910
199 .WORD 0xf40e3585,0x5771202a,0x106aa070,0x32bbd1b8
200 .WORD 0x19a4c116,0xb8d2d0c8,0x1e376c08,0x5141ab53
201 .WORD 0x2748774c,0xdf8eeb99,0x34b0bcb5,0xe19b48a8
202 .WORD 0x391c0cb3,0xc5c95a63,0x4ed8aa4a,0xe3418acb
203 .WORD 0x5b9cca4f,0x7763e373,0x682e6ff3,0xd6b2b8a3
204 .WORD 0x748f82ee,0x5defb2fc,0x78a5636f,0x43172f60
205 .WORD 0x84c87814,0xa1f0ab72,0x8cc70208,0x1a6439ec
206 .WORD 0x90befffa,0x23631e28,0xa4506ceb,0xde82bde9
207 .WORD 0xbef9a3f7,0xb2c67915,0xc67178f2,0xe372532b
208 .WORD 0xca273ece,0xea26619c,0xd186b8c7,0x21c0c207
209 .WORD 0xeada7dd6,0xcde0eb1e,0xf57d4f7f,0xee6ed178
210 .WORD 0x06f067aa,0x72176fba,0x0a637dc5,0xa2c898a6
211 .WORD 0x113f9804,0xbef90dae,0x1b710b35,0x131c471b
212 .WORD 0x28db77f5,0x23047d84,0x32caab7b,0x40c72493
213 .WORD 0x3c9ebe0a,0x15c9bebc,0x431d67c4,0x9c100d4c
214 .WORD 0x4cc5d4be,0xcb3e42b6,0x597f299c,0xfc657e2a
215 .WORD 0x5fcb6fab,0x3ad6faec,0x6c44198c,0x4a475817
217 $code.=<<___ if ($SZ==4);
218 .WORD 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
219 .WORD 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
220 .WORD 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
221 .WORD 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
222 .WORD 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
223 .WORD 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
224 .WORD 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
225 .WORD 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
226 .WORD 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
227 .WORD 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
228 .WORD 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
229 .WORD 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
230 .WORD 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
231 .WORD 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
232 .WORD 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
233 .WORD 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
237 .EXPORT $func,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR
241 .CALLINFO FRAME=`$FRAME-16*$SIZE_T`,NO_CALLS,SAVE_RP,ENTRY_GR=18
243 $PUSH %r2,-$SAVED_RP(%sp) ; standard prologue
244 $PUSHMA %r3,$FRAME(%sp)
245 $PUSH %r4,`-$FRAME+1*$SIZE_T`(%sp)
246 $PUSH %r5,`-$FRAME+2*$SIZE_T`(%sp)
247 $PUSH %r6,`-$FRAME+3*$SIZE_T`(%sp)
248 $PUSH %r7,`-$FRAME+4*$SIZE_T`(%sp)
249 $PUSH %r8,`-$FRAME+5*$SIZE_T`(%sp)
250 $PUSH %r9,`-$FRAME+6*$SIZE_T`(%sp)
251 $PUSH %r10,`-$FRAME+7*$SIZE_T`(%sp)
252 $PUSH %r11,`-$FRAME+8*$SIZE_T`(%sp)
253 $PUSH %r12,`-$FRAME+9*$SIZE_T`(%sp)
254 $PUSH %r13,`-$FRAME+10*$SIZE_T`(%sp)
255 $PUSH %r14,`-$FRAME+11*$SIZE_T`(%sp)
256 $PUSH %r15,`-$FRAME+12*$SIZE_T`(%sp)
257 $PUSH %r16,`-$FRAME+13*$SIZE_T`(%sp)
258 $PUSH %r17,`-$FRAME+14*$SIZE_T`(%sp)
259 $PUSH %r18,`-$FRAME+15*$SIZE_T`(%sp)
261 _shl $num,`log(16*$SZ)/log(2)`,$num
262 addl $inp,$num,$num ; $num to point at the end of $inp
264 $PUSH $num,`-$FRAME_MARKER-4*$SIZE_T`(%sp) ; save arguments
265 $PUSH $inp,`-$FRAME_MARKER-3*$SIZE_T`(%sp)
266 $PUSH $ctx,`-$FRAME_MARKER-2*$SIZE_T`(%sp)
271 andcm $Tbl,$t1,$Tbl ; wipe privilege level
272 ldo L\$table-L\$pic($Tbl),$Tbl
274 $code.=<<___ if ($SZ==8 && $SIZE_T==4);
277 extrd,u,*= $t1,%sar,1,$t1 ; executes on PA-RISC 1.0
282 $LD `0*$SZ`($ctx),$A ; load context
291 extru $inp,31,`log($SZ)/log(2)`,$t0
294 mtctl $t0,%cr11 ; load %sar with align factor
299 andcm $inp,$t0,$t0 ; align $inp
301 for ($i=0;$i<15;$i++) { # load input block
302 $code.="\t$LD `$SZ*$i`($t0),@X[$i]\n"; }
304 cmpb,*= $inp,$t0,L\$aligned
305 $LD `$SZ*15`($t0),@X[15]
306 $LD `$SZ*16`($t0),@X[16]
308 for ($i=0;$i<16;$i++) { # align data
309 $code.="\t_align @X[$i],@X[$i+1],@X[$i]\n"; }
312 nop ; otherwise /usr/ccs/bin/as is confused by below .WORD
315 for($i=0;$i<16;$i++) { &ROUND_00_15($i,@V); unshift(@V,pop(@V)); }
318 nop ; otherwise /usr/ccs/bin/as is confused by below .WORD
320 for(;$i<32;$i++) { &ROUND_16_xx($i,@V); unshift(@V,pop(@V)); }
322 bb,>= $Tbl,31,L\$rounds ; end of $Tbl signalled?
325 $POP `-$FRAME_MARKER-2*$SIZE_T`(%sp),$ctx ; restore arguments
326 $POP `-$FRAME_MARKER-3*$SIZE_T`(%sp),$inp
327 $POP `-$FRAME_MARKER-4*$SIZE_T`(%sp),$num
328 ldo `-$rounds*$SZ-1`($Tbl),$Tbl ; rewind $Tbl
330 $LD `0*$SZ`($ctx),@X[0] ; load context
331 $LD `1*$SZ`($ctx),@X[1]
332 $LD `2*$SZ`($ctx),@X[2]
333 $LD `3*$SZ`($ctx),@X[3]
334 $LD `4*$SZ`($ctx),@X[4]
335 $LD `5*$SZ`($ctx),@X[5]
337 $LD `6*$SZ`($ctx),@X[6]
339 $LD `7*$SZ`($ctx),@X[7]
340 ldo `16*$SZ`($inp),$inp ; advance $inp
342 $ST $A,`0*$SZ`($ctx) ; save context
357 cmpb,*<>,n $inp,$num,L\$oop
358 $PUSH $inp,`-$FRAME_MARKER-3*$SIZE_T`(%sp) ; save $inp
360 if ($SZ==8 && $SIZE_T==4) # SHA512 for 32-bit PA-RISC 1.0
370 @V=( $Ahi, $Alo, $Bhi, $Blo, $Chi, $Clo, $Dhi, $Dlo,
371 $Ehi, $Elo, $Fhi, $Flo, $Ghi, $Glo, $Hhi, $Hlo) =
372 ( "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7", "%r8",
373 "%r9","%r10","%r11","%r12","%r13","%r14","%r15","%r16");
384 @X=("%r23","%r24","%r25","%r26"); # zaps $num,$inp,$ctx
386 sub ROUND_00_15_pa1 {
387 my ($i,$ahi,$alo,$bhi,$blo,$chi,$clo,$dhi,$dlo,
388 $ehi,$elo,$fhi,$flo,$ghi,$glo,$hhi,$hlo,$flag)=@_;
389 my ($Xhi,$Xlo,$Xnhi,$Xnlo) = @X;
391 $code.=<<___ if (!$flag);
392 ldw `-$XOFF+8*(($i+1)%16)`(%sp),$Xnhi
393 ldw `-$XOFF+8*(($i+1)%16)+4`(%sp),$Xnlo ; load X[i+1]
396 shd $ehi,$elo,$Sigma1[0],$t0
398 shd $elo,$ehi,$Sigma1[0],$t1
399 addc $Xhi,$hhi,$hhi ; h += X[i]
400 shd $ehi,$elo,$Sigma1[1],$t2
402 shd $elo,$ehi,$Sigma1[1],$t3
403 ldw -4($Tbl),$Xlo ; load K[i]
408 shd $ehi,$elo,$Sigma1[2],$t2
410 shd $elo,$ehi,$Sigma1[2],$t3
413 xor $t3,$t1,$t1 ; Sigma1(e)
416 addc $Xhi,$hhi,$hhi ; h += K[i]
417 xor $a3,$a1,$a1 ; Ch(e,f,g)
420 shd $ahi,$alo,$Sigma0[0],$t0
421 addc $t1,$hhi,$hhi ; h += Sigma1(e)
422 shd $alo,$ahi,$Sigma0[0],$t1
424 shd $ahi,$alo,$Sigma0[1],$t2
425 addc $a1,$hhi,$hhi ; h += Ch(e,f,g)
426 shd $alo,$ahi,$Sigma0[1],$t3
430 shd $ahi,$alo,$Sigma0[2],$t2
432 shd $alo,$ahi,$Sigma0[2],$t3
435 xor $t3,$t1,$t1 ; Sigma0(a)
442 addc $hhi,$dhi,$dhi ; d += h
446 addc $t1,$hhi,$hhi ; h += Sigma0(a)
449 xor $a3,$a1,$a1 ; Maj(a,b,c)
450 addc $a1,$hhi,$hhi ; h += Maj(a,b,c)
453 $code.=<<___ if ($i==15 && $flag);
454 extru $Xlo,31,10,$Xlo
455 comiclr,= $LAST10BITS,$Xlo,%r0
459 push(@X,shift(@X)); push(@X,shift(@X));
462 sub ROUND_16_xx_pa1 {
463 my ($Xhi,$Xlo,$Xnhi,$Xnlo) = @X;
467 ldw `-$XOFF+8*(($i+1)%16)`(%sp),$Xnhi
468 ldw `-$XOFF+8*(($i+1)%16)+4`(%sp),$Xnlo ; load X[i+1]
469 ldw `-$XOFF+8*(($i+9)%16)`(%sp),$a1
470 ldw `-$XOFF+8*(($i+9)%16)+4`(%sp),$a0 ; load X[i+9]
471 ldw `-$XOFF+8*(($i+14)%16)`(%sp),$a3
472 ldw `-$XOFF+8*(($i+14)%16)+4`(%sp),$a2 ; load X[i+14]
473 shd $Xnhi,$Xnlo,$sigma0[0],$t0
474 shd $Xnlo,$Xnhi,$sigma0[0],$t1
476 shd $Xnhi,$Xnlo,$sigma0[1],$t2
478 shd $Xnlo,$Xnhi,$sigma0[1],$t3
480 shd $Xnhi,$Xnlo,$sigma0[2],$t2
482 extru $Xnhi,`31-$sigma0[2]`,`32-$sigma0[2]`,$t3
484 shd $a3,$a2,$sigma1[0],$a0
485 xor $t3,$t1,$t1 ; sigma0(X[i+1)&0x0f])
486 shd $a2,$a3,$sigma1[0],$a1
488 shd $a3,$a2,$sigma1[1],$t2
490 shd $a2,$a3,$sigma1[1],$t3
492 shd $a3,$a2,$sigma1[2],$t2
494 extru $a3,`31-$sigma1[2]`,`32-$sigma1[2]`,$t3
496 xor $t3,$a1,$a1 ; sigma0(X[i+14)&0x0f])
500 stw $Xhi,`-$XOFF+8*($i%16)`(%sp)
501 stw $Xlo,`-$XOFF+8*($i%16)+4`(%sp)
503 &ROUND_00_15_pa1($i,@_,1);
506 ldw `0*4`($ctx),$Ahi ; load context
516 ldw `10*4`($ctx),$Fhi
517 ldw `11*4`($ctx),$Flo
518 ldw `12*4`($ctx),$Ghi
519 ldw `13*4`($ctx),$Glo
520 ldw `14*4`($ctx),$Hhi
521 ldw `15*4`($ctx),$Hlo
526 mtctl $t0,%cr11 ; load %sar with align factor
530 comib,= 0,$a3,L\$aligned_pa1
533 ldw `0*4`($inp),$X[0]
534 ldw `1*4`($inp),$X[1]
541 vshd $X[0],$X[1],$X[0]
543 stw $X[0],`-$XOFF+0*4`(%sp)
546 stw $X[1],`-$XOFF+1*4`(%sp)
551 my @t=($t2,$t3,$a0,$a1,$a2,$a3,$t0,$t1);
552 for ($i=2;$i<=(128/4-8);$i++) {
554 stw $t[0],`-$XOFF+$i*4`(%sp)
555 ldw `(8+$i)*4`($inp),$t[0]
556 vshd $t[1],$t[2],$t[1]
560 for (;$i<(128/4-1);$i++) {
562 stw $t[0],`-$XOFF+$i*4`(%sp)
563 vshd $t[1],$t[2],$t[1]
569 stw $t[0],`-$XOFF+$i*4`(%sp)
575 ldw `0*4`($inp),$X[0]
576 ldw `1*4`($inp),$X[1]
583 stw $X[0],`-$XOFF+0*4`(%sp)
585 stw $X[1],`-$XOFF+1*4`(%sp)
589 my @t=($t2,$t3,$a0,$a1,$a2,$a3,$t0,$t1);
590 for ($i=2;$i<(128/4-8);$i++) {
592 stw $t[0],`-$XOFF+$i*4`(%sp)
593 ldw `(8+$i)*4`($inp),$t[0]
597 for (;$i<128/4;$i++) {
599 stw $t[0],`-$XOFF+$i*4`(%sp)
603 $code.="L\$collected_pa1\n";
606 for($i=0;$i<16;$i++) { &ROUND_00_15_pa1($i,@V); unshift(@V,pop(@V)); unshift(@V,pop(@V)); }
607 $code.="L\$rounds_pa1\n";
608 for(;$i<32;$i++) { &ROUND_16_xx_pa1($i,@V); unshift(@V,pop(@V)); unshift(@V,pop(@V)); }
611 $POP `-$FRAME_MARKER-2*$SIZE_T`(%sp),$ctx ; restore arguments
612 $POP `-$FRAME_MARKER-3*$SIZE_T`(%sp),$inp
613 $POP `-$FRAME_MARKER-4*$SIZE_T`(%sp),$num
614 ldo `-$rounds*$SZ`($Tbl),$Tbl ; rewind $Tbl
616 ldw `0*4`($ctx),$t1 ; update context
655 ldo `16*$SZ`($inp),$inp ; advance $inp
659 stw $Fhi,`10*4`($ctx)
660 stw $Flo,`11*4`($ctx)
661 stw $Ghi,`12*4`($ctx)
662 stw $Glo,`13*4`($ctx)
663 stw $Hhi,`14*4`($ctx)
664 comb,= $inp,$num,L\$done
665 stw $Hlo,`15*4`($ctx)
667 $PUSH $inp,`-$FRAME_MARKER-3*$SIZE_T`(%sp) ; save $inp
672 $POP `-$FRAME-$SAVED_RP`(%sp),%r2 ; standard epilogue
673 $POP `-$FRAME+1*$SIZE_T`(%sp),%r4
674 $POP `-$FRAME+2*$SIZE_T`(%sp),%r5
675 $POP `-$FRAME+3*$SIZE_T`(%sp),%r6
676 $POP `-$FRAME+4*$SIZE_T`(%sp),%r7
677 $POP `-$FRAME+5*$SIZE_T`(%sp),%r8
678 $POP `-$FRAME+6*$SIZE_T`(%sp),%r9
679 $POP `-$FRAME+7*$SIZE_T`(%sp),%r10
680 $POP `-$FRAME+8*$SIZE_T`(%sp),%r11
681 $POP `-$FRAME+9*$SIZE_T`(%sp),%r12
682 $POP `-$FRAME+10*$SIZE_T`(%sp),%r13
683 $POP `-$FRAME+11*$SIZE_T`(%sp),%r14
684 $POP `-$FRAME+12*$SIZE_T`(%sp),%r15
685 $POP `-$FRAME+13*$SIZE_T`(%sp),%r16
686 $POP `-$FRAME+14*$SIZE_T`(%sp),%r17
687 $POP `-$FRAME+15*$SIZE_T`(%sp),%r18
690 $POPMB -$FRAME(%sp),%r3
692 .STRINGZ "SHA`64*$SZ` block transform for PA-RISC, CRYPTOGAMS by <appro\@openssl.org>"
695 # Explicitly encode PA-RISC 2.0 instructions used in this module, so
696 # that it can be compiled with .LEVEL 1.0. It should be noted that I
697 # wouldn't have to do this, if GNU assembler understood .ALLOW 2.0
701 my ($mod,$args) = @_;
702 my $orig = "ldd$mod\t$args";
704 if ($args =~ /(\-?[0-9]+)\(%r([0-9]+)\),%r([0-9]+)/) # format 3 suffices
705 { my $opcode=(0x14<<26)|($2<<21)|($3<<16)|(($1&0x1FF8)<<1)|(($1>>13)&1);
706 $opcode|=(1<<3) if ($mod =~ /^,m/);
707 $opcode|=(1<<2) if ($mod =~ /^,mb/);
708 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
714 my ($mod,$args) = @_;
715 my $orig = "std$mod\t$args";
717 if ($args =~ /%r([0-9]+),(\-?[0-9]+)\(%r([0-9]+)\)/) # format 3 suffices
718 { my $opcode=(0x1c<<26)|($3<<21)|($1<<16)|(($2&0x1FF8)<<1)|(($2>>13)&1);
719 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
725 my ($mod,$args) = @_;
726 my $orig = "extrd$mod\t$args";
728 # I only have ",u" completer, it's implicitly encoded...
729 if ($args =~ /%r([0-9]+),([0-9]+),([0-9]+),%r([0-9]+)/) # format 15
730 { my $opcode=(0x36<<26)|($1<<21)|($4<<16);
732 $opcode |= (($2&0x20)<<6)|(($2&0x1f)<<5); # encode pos
733 $opcode |= (($len&0x20)<<7)|($len&0x1f); # encode len
734 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
736 elsif ($args =~ /%r([0-9]+),%sar,([0-9]+),%r([0-9]+)/) # format 12
737 { my $opcode=(0x34<<26)|($1<<21)|($3<<16)|(2<<11)|(1<<9);
739 $opcode |= (($len&0x20)<<3)|($len&0x1f); # encode len
740 $opcode |= (1<<13) if ($mod =~ /,\**=/);
741 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
747 my ($mod,$args) = @_;
748 my $orig = "shrpd$mod\t$args";
750 if ($args =~ /%r([0-9]+),%r([0-9]+),([0-9]+),%r([0-9]+)/) # format 14
751 { my $opcode=(0x34<<26)|($2<<21)|($1<<16)|(1<<10)|$4;
753 $opcode |= (($cpos&0x20)<<6)|(($cpos&0x1f)<<5); # encode sa
754 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
756 elsif ($args =~ /%r([0-9]+),%r([0-9]+),%sar,%r([0-9]+)/) # format 11
757 { sprintf "\t.WORD\t0x%08x\t; %s",
758 (0x34<<26)|($2<<21)|($1<<16)|(1<<9)|$3,$orig;
764 my ($mnemonic,$mod,$args)=@_;
765 my $opcode = eval("\$$mnemonic");
767 ref($opcode) eq 'CODE' ? &$opcode($mod,$args) : "\t$mnemonic$mod\t$args";
770 foreach (split("\n",$code)) {
771 s/\`([^\`]*)\`/eval $1/ge;
773 s/shd\s+(%r[0-9]+),(%r[0-9]+),([0-9]+)/
774 $3>31 ? sprintf("shd\t%$2,%$1,%d",$3-32) # rotation for >=32
775 : sprintf("shd\t%$1,%$2,%d",$3)/e or
776 # translate made up instructons: _ror, _shr, _align, _shl
777 s/_ror(\s+)(%r[0-9]+),/
778 ($SZ==4 ? "shd" : "shrpd")."$1$2,$2,"/e or
780 s/_shr(\s+%r[0-9]+),([0-9]+),/
781 $SZ==4 ? sprintf("extru%s,%d,%d,",$1,31-$2,32-$2)
782 : sprintf("extrd,u%s,%d,%d,",$1,63-$2,64-$2)/e or
784 s/_align(\s+%r[0-9]+,%r[0-9]+),/
785 ($SZ==4 ? "vshd$1," : "shrpd$1,%sar,")/e or
787 s/_shl(\s+%r[0-9]+),([0-9]+),/
788 $SIZE_T==4 ? sprintf("zdep%s,%d,%d,",$1,31-$2,32-$2)
789 : sprintf("depd,z%s,%d,%d,",$1,63-$2,64-$2)/e;
791 s/^\s+([a-z]+)([\S]*)\s+([\S]*)/&assemble($1,$2,$3)/e if ($SIZE_T==4);
793 s/cmpb,\*/comb,/ if ($SIZE_T==4);
795 s/\bbv\b/bve/ if ($SIZE_T==8);