From: Andy Polyakov Date: Thu, 21 Oct 2010 15:56:55 +0000 (+0000) Subject: Add aes-mips.pl assembler module. X-Git-Tag: OpenSSL-fips-2_0-rc1~957 X-Git-Url: https://git.librecmc.org/?a=commitdiff_plain;h=bb550038820e20b730659d28412d0e46bbd01490;p=oweals%2Fopenssl.git Add aes-mips.pl assembler module. --- diff --git a/crypto/aes/asm/aes-mips.pl b/crypto/aes/asm/aes-mips.pl new file mode 100644 index 0000000000..8431f9fcfb --- /dev/null +++ b/crypto/aes/asm/aes-mips.pl @@ -0,0 +1,1606 @@ +#!/usr/bin/env perl + +# ==================================================================== +# Written by Andy Polyakov for the OpenSSL +# project. The module is, however, dual licensed under OpenSSL and +# CRYPTOGAMS licenses depending on where you obtain it. For further +# details see http://www.openssl.org/~appro/cryptogams/. +# ==================================================================== + +# AES for MIPS + +# October 2010 +# +# Code uses 1K[+256B] S-box and on single-issue core [such as R5000] +# spends ~68 cycles per byte processed with 128-bit key. This is ~16% +# faster than gcc-generated code, which is not very impressive. But +# recall that compressed S-box requires extra processing, namely +# additional rotations. Rotations are implemented with lwl/lwr pairs, +# which is normally used for loading unaligned data. Another cool +# thing about this module is its endian neutrality, which means that +# it processes data without ever changing byte order... + +###################################################################### +# There is a number of MIPS ABI in use, O32 and N32/64 are most +# widely used. Then there is a new contender: NUBI. It appears that if +# one picks the latter, it's possible to arrange code in ABI neutral +# manner. Therefore let's stick to NUBI register layout: +# +($zero,$at,$t0,$t1,$t2)=map("\$$_",(0..2,24,25)); +($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11)); +($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7,$s8,$s9,$s10,$s11)=map("\$$_",(12..23)); +($gp,$tp,$sp,$fp,$ra)=map("\$$_",(3,28..31)); +# +# The return value is placed in $a0. Following coding rules facilitate +# interoperability: +# +# - never ever touch $tp, "thread pointer", former $gp; +# - copy return value to $t0, former $v0 [or to $a0 if you're adapting +# old code]; +# - on O32 populate $a4-$a7 with 'lw $aN,4*N($sp)' if necessary; +# +# For reference here is register layout for N32/64 MIPS ABIs: +# +# ($zero,$at,$v0,$v1)=map("\$$_",(0..3)); +# ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11)); +# ($t0,$t1,$t2,$t3,$t8,$t9)=map("\$$_",(12..15,24,25)); +# ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23)); +# ($gp,$sp,$fp,$ra)=map("\$$_",(28..31)); +# +$flavour = shift; # supported flavours are o32,n32,64,nubi32,nubi64 + +if ($flavour =~ /64|n32/i) { + $PTR_ADD="dadd"; # incidentally works even on n32 + $PTR_SUB="dsub"; # incidentally works even on n32 + $REG_S="sd"; + $REG_L="ld"; + $PTR_SLL="dsll"; # incidentally works even on n32 + $SZREG=8; +} else { + $PTR_ADD="add"; + $PTR_SUB="sub"; + $REG_S="sw"; + $REG_L="lw"; + $PTR_SLL="sll"; + $SZREG=4; +} +$pf = ($flavour =~ /nubi/i) ? $t0 : $t2; +# +# +# +###################################################################### + +for (@ARGV) { $big_endian=1 if (/\-DB_ENDIAN/); + $big_endian=0 if (/\-DL_ENDIAN/); + $output=$_ if (/^\w[\w\-]*\.\w+$/); } +open STDOUT,">$output"; + +if (!defined($big_endian)) +{ $big_endian=(unpack('L',pack('N',1))==1); } + +while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} +open STDOUT,">$output"; + +my ($MSB,$LSB)=(0,3); # automatically converted to little-endian + +$code.=<<___; +.text + +.option pic2 +.set noat +___ + +{{{ +my $FRAMESIZE=16*$SZREG; +my $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? 0xc0fff008 : 0xc0ff0000; + +my ($inp,$out,$key,$Tbl,$s0,$s1,$s2,$s3)=($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7); +my ($i0,$i1,$i2,$i3)=($at,$t0,$t1,$t2); +my ($t0,$t1,$t2,$t3,$t4,$t5,$t6,$t7,$t8,$t9,$t10,$t11) = map("\$$_",(12..23)); +my ($key0,$cnt)=($gp,$fp); + +# instuction ordering is "stolen" from output from MIPSpro assembler +# invoked with -mips3 -O3 arguments... +$code.=<<___; +.align 5 +.ent _mips_AES_encrypt +_mips_AES_encrypt: + .frame $sp,0,$ra + .set reorder + lw $t0,0($key) + lw $t1,4($key) + lw $t2,8($key) + lw $t3,12($key) + lw $cnt,240($key) + $PTR_ADD $key0,$key,16 + + xor $s0,$t0 + xor $s1,$t1 + xor $s2,$t2 + xor $s3,$t3 + + sub $cnt,1 + _xtr $i0,$s1,16-2 +.Loop_enc: + _xtr $i1,$s2,16-2 + _xtr $i2,$s3,16-2 + _xtr $i3,$s0,16-2 + and $i0,0x3fc + and $i1,0x3fc + and $i2,0x3fc + and $i3,0x3fc + $PTR_ADD $i0,$Tbl + $PTR_ADD $i1,$Tbl + $PTR_ADD $i2,$Tbl + $PTR_ADD $i3,$Tbl + lwl $t0,3($i0) # Te1[s1>>16] + lwl $t1,3($i1) # Te1[s2>>16] + lwl $t2,3($i2) # Te1[s3>>16] + lwl $t3,3($i3) # Te1[s0>>16] + lwr $t0,2($i0) # Te1[s1>>16] + lwr $t1,2($i1) # Te1[s2>>16] + lwr $t2,2($i2) # Te1[s3>>16] + lwr $t3,2($i3) # Te1[s0>>16] + + _xtr $i0,$s2,8-2 + _xtr $i1,$s3,8-2 + _xtr $i2,$s0,8-2 + _xtr $i3,$s1,8-2 + and $i0,0x3fc + and $i1,0x3fc + and $i2,0x3fc + and $i3,0x3fc + $PTR_ADD $i0,$Tbl + $PTR_ADD $i1,$Tbl + $PTR_ADD $i2,$Tbl + $PTR_ADD $i3,$Tbl + lwl $t4,2($i0) # Te2[s2>>8] + lwl $t5,2($i1) # Te2[s3>>8] + lwl $t6,2($i2) # Te2[s0>>8] + lwl $t7,2($i3) # Te2[s1>>8] + lwr $t4,1($i0) # Te2[s2>>8] + lwr $t5,1($i1) # Te2[s3>>8] + lwr $t6,1($i2) # Te2[s0>>8] + lwr $t7,1($i3) # Te2[s1>>8] + + _xtr $i0,$s3,0-2 + _xtr $i1,$s0,0-2 + _xtr $i2,$s1,0-2 + _xtr $i3,$s2,0-2 + and $i0,0x3fc + and $i1,0x3fc + and $i2,0x3fc + and $i3,0x3fc + $PTR_ADD $i0,$Tbl + $PTR_ADD $i1,$Tbl + $PTR_ADD $i2,$Tbl + $PTR_ADD $i3,$Tbl + lwl $t8,1($i0) # Te3[s3] + lwl $t9,1($i1) # Te3[s0] + lwl $t10,1($i2) # Te3[s1] + lwl $t11,1($i3) # Te3[s2] + lwr $t8,0($i0) # Te3[s3] + lwr $t9,0($i1) # Te3[s0] + lwr $t10,0($i2) # Te3[s1] + lwr $t11,0($i3) # Te3[s2] + + _xtr $i0,$s0,24-2 + _xtr $i1,$s1,24-2 + _xtr $i2,$s2,24-2 + _xtr $i3,$s3,24-2 + and $i0,0x3fc + and $i1,0x3fc + and $i2,0x3fc + and $i3,0x3fc + $PTR_ADD $i0,$Tbl + $PTR_ADD $i1,$Tbl + $PTR_ADD $i2,$Tbl + $PTR_ADD $i3,$Tbl + xor $t0,$t4 + xor $t1,$t5 + xor $t2,$t6 + xor $t3,$t7 + lw $t4,0($i0) # Te0[s0>>24] + lw $t5,0($i1) # Te0[s1>>24] + lw $t6,0($i2) # Te0[s2>>24] + lw $t7,0($i3) # Te0[s3>>24] + + lw $s0,0($key0) + lw $s1,4($key0) + lw $s2,8($key0) + lw $s3,12($key0) + + xor $t0,$t8 + xor $t1,$t9 + xor $t2,$t10 + xor $t3,$t11 + + xor $t0,$t4 + xor $t1,$t5 + xor $t2,$t6 + xor $t3,$t7 + + sub $cnt,1 + $PTR_ADD $key0,16 + xor $s0,$t0 + xor $s1,$t1 + xor $s2,$t2 + xor $s3,$t3 + .set noreorder + bnez $cnt,.Loop_enc + _xtr $i0,$s1,16-2 + + .set reorder + _xtr $i1,$s2,16-2 + _xtr $i2,$s3,16-2 + _xtr $i3,$s0,16-2 + and $i0,0x3fc + and $i1,0x3fc + and $i2,0x3fc + and $i3,0x3fc + $PTR_ADD $i0,$Tbl + $PTR_ADD $i1,$Tbl + $PTR_ADD $i2,$Tbl + $PTR_ADD $i3,$Tbl + lbu $t0,2($i0) # Te4[s1>>16] + lbu $t1,2($i1) # Te4[s2>>16] + lbu $t2,2($i2) # Te4[s3>>16] + lbu $t3,2($i3) # Te4[s0>>16] + + _xtr $i0,$s2,8-2 + _xtr $i1,$s3,8-2 + _xtr $i2,$s0,8-2 + _xtr $i3,$s1,8-2 + and $i0,0x3fc + and $i1,0x3fc + and $i2,0x3fc + and $i3,0x3fc + $PTR_ADD $i0,$Tbl + $PTR_ADD $i1,$Tbl + $PTR_ADD $i2,$Tbl + $PTR_ADD $i3,$Tbl + lbu $t4,2($i0) # Te4[s2>>8] + lbu $t5,2($i1) # Te4[s3>>8] + lbu $t6,2($i2) # Te4[s0>>8] + lbu $t7,2($i3) # Te4[s1>>8] + + _xtr $i0,$s0,24-2 + _xtr $i1,$s1,24-2 + _xtr $i2,$s2,24-2 + _xtr $i3,$s3,24-2 + and $i0,0x3fc + and $i1,0x3fc + and $i2,0x3fc + and $i3,0x3fc + $PTR_ADD $i0,$Tbl + $PTR_ADD $i1,$Tbl + $PTR_ADD $i2,$Tbl + $PTR_ADD $i3,$Tbl + lbu $t8,2($i0) # Te4[s0>>24] + lbu $t9,2($i1) # Te4[s1>>24] + lbu $t10,2($i2) # Te4[s2>>24] + lbu $t11,2($i3) # Te4[s3>>24] + + _xtr $i0,$s3,0-2 + _xtr $i1,$s0,0-2 + _xtr $i2,$s1,0-2 + _xtr $i3,$s2,0-2 + and $i0,0x3fc + and $i1,0x3fc + and $i2,0x3fc + and $i3,0x3fc + + _ins $t0,16 + _ins $t1,16 + _ins $t2,16 + _ins $t3,16 + + _ins $t4,8 + _ins $t5,8 + _ins $t6,8 + _ins $t7,8 + + xor $t0,$t4 + xor $t1,$t5 + xor $t2,$t6 + xor $t3,$t7 + + $PTR_ADD $i0,$Tbl + $PTR_ADD $i1,$Tbl + $PTR_ADD $i2,$Tbl + $PTR_ADD $i3,$Tbl + lbu $t4,2($i0) # Te4[s3] + lbu $t5,2($i1) # Te4[s0] + lbu $t6,2($i2) # Te4[s1] + lbu $t7,2($i3) # Te4[s2] + + _ins $t8,24 + _ins $t9,24 + _ins $t10,24 + _ins $t11,24 + + lw $s0,0($key0) + lw $s1,4($key0) + lw $s2,8($key0) + lw $s3,12($key0) + + xor $t0,$t8 + xor $t1,$t9 + xor $t2,$t10 + xor $t3,$t11 + + _ins $t4,0 + _ins $t5,0 + _ins $t6,0 + _ins $t7,0 + + xor $t0,$t4 + xor $t1,$t5 + xor $t2,$t6 + xor $t3,$t7 + + xor $s0,$t0 + xor $s1,$t1 + xor $s2,$t2 + xor $s3,$t3 + + jr $ra +.end _mips_AES_encrypt + +.align 5 +.globl AES_encrypt +.ent AES_encrypt +AES_encrypt: + .frame $sp,$FRAMESIZE,$ra + .mask $SAVED_REGS_MASK,-$SZREG + .set noreorder +___ +$code.=<<___ if ($flavour =~ /o32/i); # o32 PIC-ification + .cpload $pf +___ +$code.=<<___; + $PTR_SUB $sp,$FRAMESIZE + $REG_S $ra,$FRAMESIZE-1*$SZREG($sp) + $REG_S $fp,$FRAMESIZE-2*$SZREG($sp) + $REG_S $s11,$FRAMESIZE-3*$SZREG($sp) + $REG_S $s10,$FRAMESIZE-4*$SZREG($sp) + $REG_S $s9,$FRAMESIZE-5*$SZREG($sp) + $REG_S $s8,$FRAMESIZE-6*$SZREG($sp) + $REG_S $s7,$FRAMESIZE-7*$SZREG($sp) + $REG_S $s6,$FRAMESIZE-8*$SZREG($sp) + $REG_S $s5,$FRAMESIZE-9*$SZREG($sp) + $REG_S $s4,$FRAMESIZE-10*$SZREG($sp) +___ +$code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue + $REG_S \$15,$FRAMESIZE-11*$SZREG($sp) + $REG_S \$14,$FRAMESIZE-12*$SZREG($sp) + $REG_S \$13,$FRAMESIZE-13*$SZREG($sp) + $REG_S \$12,$FRAMESIZE-14*$SZREG($sp) + $REG_S $gp,$FRAMESIZE-15*$SZREG($sp) +___ +$code.=<<___ if ($flavour !~ /o32/i); # non-o32 PIC-ification + .cplocal $Tbl + .cpsetup $pf,$zero,AES_encrypt +___ +$code.=<<___; + .set reorder + la $Tbl,AES_Te # PIC-ified 'load address' + + lwl $s0,0+$MSB($inp) + lwl $s1,4+$MSB($inp) + lwl $s2,8+$MSB($inp) + lwl $s3,12+$MSB($inp) + lwr $s0,0+$LSB($inp) + lwr $s1,4+$LSB($inp) + lwr $s2,8+$LSB($inp) + lwr $s3,12+$LSB($inp) + + bal _mips_AES_encrypt + + swr $s0,0+$LSB($out) + swr $s1,4+$LSB($out) + swr $s2,8+$LSB($out) + swr $s3,12+$LSB($out) + swl $s0,0+$MSB($out) + swl $s1,4+$MSB($out) + swl $s2,8+$MSB($out) + swl $s3,12+$MSB($out) + + .set noreorder + $REG_L $ra,$FRAMESIZE-1*$SZREG($sp) + $REG_L $fp,$FRAMESIZE-2*$SZREG($sp) + $REG_L $s11,$FRAMESIZE-3*$SZREG($sp) + $REG_L $s10,$FRAMESIZE-4*$SZREG($sp) + $REG_L $s9,$FRAMESIZE-5*$SZREG($sp) + $REG_L $s8,$FRAMESIZE-6*$SZREG($sp) + $REG_L $s7,$FRAMESIZE-7*$SZREG($sp) + $REG_L $s6,$FRAMESIZE-8*$SZREG($sp) + $REG_L $s5,$FRAMESIZE-9*$SZREG($sp) + $REG_L $s4,$FRAMESIZE-10*$SZREG($sp) +___ +$code.=<<___ if ($flavour =~ /nubi/i); + $REG_L \$15,$FRAMESIZE-11*$SZREG($sp) + $REG_L \$14,$FRAMESIZE-12*$SZREG($sp) + $REG_L \$13,$FRAMESIZE-13*$SZREG($sp) + $REG_L \$12,$FRAMESIZE-14*$SZREG($sp) + $REG_L $gp,$FRAMESIZE-15*$SZREG($sp) +___ +$code.=<<___; + jr $ra + $PTR_ADD $sp,$FRAMESIZE +.end AES_encrypt +___ + +$code.=<<___; +.align 5 +.ent _mips_AES_decrypt +_mips_AES_decrypt: + .frame $sp,0,$ra + .set reorder + lw $t0,0($key) + lw $t1,4($key) + lw $t2,8($key) + lw $t3,12($key) + lw $cnt,240($key) + $PTR_ADD $key0,$key,16 + + xor $s0,$t0 + xor $s1,$t1 + xor $s2,$t2 + xor $s3,$t3 + + sub $cnt,1 + _xtr $i0,$s3,16-2 +.Loop_dec: + _xtr $i1,$s0,16-2 + _xtr $i2,$s1,16-2 + _xtr $i3,$s2,16-2 + and $i0,0x3fc + and $i1,0x3fc + and $i2,0x3fc + and $i3,0x3fc + $PTR_ADD $i0,$Tbl + $PTR_ADD $i1,$Tbl + $PTR_ADD $i2,$Tbl + $PTR_ADD $i3,$Tbl + lwl $t0,3($i0) # Td1[s3>>16] + lwl $t1,3($i1) # Td1[s0>>16] + lwl $t2,3($i2) # Td1[s1>>16] + lwl $t3,3($i3) # Td1[s2>>16] + lwr $t0,2($i0) # Td1[s3>>16] + lwr $t1,2($i1) # Td1[s0>>16] + lwr $t2,2($i2) # Td1[s1>>16] + lwr $t3,2($i3) # Td1[s2>>16] + + _xtr $i0,$s2,8-2 + _xtr $i1,$s3,8-2 + _xtr $i2,$s0,8-2 + _xtr $i3,$s1,8-2 + and $i0,0x3fc + and $i1,0x3fc + and $i2,0x3fc + and $i3,0x3fc + $PTR_ADD $i0,$Tbl + $PTR_ADD $i1,$Tbl + $PTR_ADD $i2,$Tbl + $PTR_ADD $i3,$Tbl + lwl $t4,2($i0) # Td2[s2>>8] + lwl $t5,2($i1) # Td2[s3>>8] + lwl $t6,2($i2) # Td2[s0>>8] + lwl $t7,2($i3) # Td2[s1>>8] + lwr $t4,1($i0) # Td2[s2>>8] + lwr $t5,1($i1) # Td2[s3>>8] + lwr $t6,1($i2) # Td2[s0>>8] + lwr $t7,1($i3) # Td2[s1>>8] + + _xtr $i0,$s1,0-2 + _xtr $i1,$s2,0-2 + _xtr $i2,$s3,0-2 + _xtr $i3,$s0,0-2 + and $i0,0x3fc + and $i1,0x3fc + and $i2,0x3fc + and $i3,0x3fc + $PTR_ADD $i0,$Tbl + $PTR_ADD $i1,$Tbl + $PTR_ADD $i2,$Tbl + $PTR_ADD $i3,$Tbl + lwl $t8,1($i0) # Td3[s1] + lwl $t9,1($i1) # Td3[s2] + lwl $t10,1($i2) # Td3[s3] + lwl $t11,1($i3) # Td3[s0] + lwr $t8,0($i0) # Td3[s1] + lwr $t9,0($i1) # Td3[s2] + lwr $t10,0($i2) # Td3[s3] + lwr $t11,0($i3) # Td3[s0] + + _xtr $i0,$s0,24-2 + _xtr $i1,$s1,24-2 + _xtr $i2,$s2,24-2 + _xtr $i3,$s3,24-2 + and $i0,0x3fc + and $i1,0x3fc + and $i2,0x3fc + and $i3,0x3fc + $PTR_ADD $i0,$Tbl + $PTR_ADD $i1,$Tbl + $PTR_ADD $i2,$Tbl + $PTR_ADD $i3,$Tbl + + xor $t0,$t4 + xor $t1,$t5 + xor $t2,$t6 + xor $t3,$t7 + + + lw $t4,0($i0) # Td0[s0>>24] + lw $t5,0($i1) # Td0[s1>>24] + lw $t6,0($i2) # Td0[s2>>24] + lw $t7,0($i3) # Td0[s3>>24] + + lw $s0,0($key0) + lw $s1,4($key0) + lw $s2,8($key0) + lw $s3,12($key0) + + xor $t0,$t8 + xor $t1,$t9 + xor $t2,$t10 + xor $t3,$t11 + + xor $t0,$t4 + xor $t1,$t5 + xor $t2,$t6 + xor $t3,$t7 + + sub $cnt,1 + $PTR_ADD $key0,16 + xor $s0,$t0 + xor $s1,$t1 + xor $s2,$t2 + xor $s3,$t3 + .set noreorder + bnez $cnt,.Loop_dec + _xtr $i0,$s3,16-2 + + .set reorder + lw $t4,1024($Tbl) # prefetch Td4 + lw $t5,1024+32($Tbl) + lw $t6,1024+64($Tbl) + lw $t7,1024+96($Tbl) + lw $t8,1024+128($Tbl) + lw $t9,1024+160($Tbl) + lw $t10,1024+192($Tbl) + lw $t11,1024+224($Tbl) + + _xtr $i0,$s3,16 + _xtr $i1,$s0,16 + _xtr $i2,$s1,16 + _xtr $i3,$s2,16 + and $i0,0xff + and $i1,0xff + and $i2,0xff + and $i3,0xff + $PTR_ADD $i0,$Tbl + $PTR_ADD $i1,$Tbl + $PTR_ADD $i2,$Tbl + $PTR_ADD $i3,$Tbl + lbu $t0,1024($i0) # Td4[s3>>16] + lbu $t1,1024($i1) # Td4[s0>>16] + lbu $t2,1024($i2) # Td4[s1>>16] + lbu $t3,1024($i3) # Td4[s2>>16] + + _xtr $i0,$s2,8 + _xtr $i1,$s3,8 + _xtr $i2,$s0,8 + _xtr $i3,$s1,8 + and $i0,0xff + and $i1,0xff + and $i2,0xff + and $i3,0xff + $PTR_ADD $i0,$Tbl + $PTR_ADD $i1,$Tbl + $PTR_ADD $i2,$Tbl + $PTR_ADD $i3,$Tbl + lbu $t4,1024($i0) # Td4[s2>>8] + lbu $t5,1024($i1) # Td4[s3>>8] + lbu $t6,1024($i2) # Td4[s0>>8] + lbu $t7,1024($i3) # Td4[s1>>8] + + _xtr $i0,$s0,24 + _xtr $i1,$s1,24 + _xtr $i2,$s2,24 + _xtr $i3,$s3,24 + $PTR_ADD $i0,$Tbl + $PTR_ADD $i1,$Tbl + $PTR_ADD $i2,$Tbl + $PTR_ADD $i3,$Tbl + lbu $t8,1024($i0) # Td4[s0>>24] + lbu $t9,1024($i1) # Td4[s1>>24] + lbu $t10,1024($i2) # Td4[s2>>24] + lbu $t11,1024($i3) # Td4[s3>>24] + + _xtr $i0,$s1,0 + _xtr $i1,$s2,0 + _xtr $i2,$s3,0 + _xtr $i3,$s0,0 + + _ins $t0,16 + _ins $t1,16 + _ins $t2,16 + _ins $t3,16 + + _ins $t4,8 + _ins $t5,8 + _ins $t6,8 + _ins $t7,8 + + xor $t0,$t4 + xor $t1,$t5 + xor $t2,$t6 + xor $t3,$t7 + + $PTR_ADD $i0,$Tbl + $PTR_ADD $i1,$Tbl + $PTR_ADD $i2,$Tbl + $PTR_ADD $i3,$Tbl + lbu $t4,1024($i0) # Td4[s1] + lbu $t5,1024($i1) # Td4[s2] + lbu $t6,1024($i2) # Td4[s3] + lbu $t7,1024($i3) # Td4[s0] + + _ins $t8,24 + _ins $t9,24 + _ins $t10,24 + _ins $t11,24 + + lw $s0,0($key0) + lw $s1,4($key0) + lw $s2,8($key0) + lw $s3,12($key0) + + _ins $t4,0 + _ins $t5,0 + _ins $t6,0 + _ins $t7,0 + + + xor $t0,$t8 + xor $t1,$t9 + xor $t2,$t10 + xor $t3,$t11 + + xor $t0,$t4 + xor $t1,$t5 + xor $t2,$t6 + xor $t3,$t7 + + xor $s0,$t0 + xor $s1,$t1 + xor $s2,$t2 + xor $s3,$t3 + + jr $ra +.end _mips_AES_decrypt + +.align 5 +.globl AES_decrypt +.ent AES_decrypt +AES_decrypt: + .frame $sp,$FRAMESIZE,$ra + .mask $SAVED_REGS_MASK,-$SZREG + .set noreorder +___ +$code.=<<___ if ($flavour =~ /o32/i); # o32 PIC-ification + .cpload $pf +___ +$code.=<<___; + $PTR_SUB $sp,$FRAMESIZE + $REG_S $ra,$FRAMESIZE-1*$SZREG($sp) + $REG_S $fp,$FRAMESIZE-2*$SZREG($sp) + $REG_S $s11,$FRAMESIZE-3*$SZREG($sp) + $REG_S $s10,$FRAMESIZE-4*$SZREG($sp) + $REG_S $s9,$FRAMESIZE-5*$SZREG($sp) + $REG_S $s8,$FRAMESIZE-6*$SZREG($sp) + $REG_S $s7,$FRAMESIZE-7*$SZREG($sp) + $REG_S $s6,$FRAMESIZE-8*$SZREG($sp) + $REG_S $s5,$FRAMESIZE-9*$SZREG($sp) + $REG_S $s4,$FRAMESIZE-10*$SZREG($sp) +___ +$code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue + $REG_S \$15,$FRAMESIZE-11*$SZREG($sp) + $REG_S \$14,$FRAMESIZE-12*$SZREG($sp) + $REG_S \$13,$FRAMESIZE-13*$SZREG($sp) + $REG_S \$12,$FRAMESIZE-14*$SZREG($sp) + $REG_S $gp,$FRAMESIZE-15*$SZREG($sp) +___ +$code.=<<___ if ($flavour !~ /o32/i); # non-o32 PIC-ification + .cplocal $Tbl + .cpsetup $pf,$zero,AES_decrypt +___ +$code.=<<___; + .set reorder + la $Tbl,AES_Td # PIC-ified 'load address' + + lwl $s0,0+$MSB($inp) + lwl $s1,4+$MSB($inp) + lwl $s2,8+$MSB($inp) + lwl $s3,12+$MSB($inp) + lwr $s0,0+$LSB($inp) + lwr $s1,4+$LSB($inp) + lwr $s2,8+$LSB($inp) + lwr $s3,12+$LSB($inp) + + bal _mips_AES_decrypt + + swr $s0,0+$LSB($out) + swr $s1,4+$LSB($out) + swr $s2,8+$LSB($out) + swr $s3,12+$LSB($out) + swl $s0,0+$MSB($out) + swl $s1,4+$MSB($out) + swl $s2,8+$MSB($out) + swl $s3,12+$MSB($out) + + .set noreorder + $REG_L $ra,$FRAMESIZE-1*$SZREG($sp) + $REG_L $fp,$FRAMESIZE-2*$SZREG($sp) + $REG_L $s11,$FRAMESIZE-3*$SZREG($sp) + $REG_L $s10,$FRAMESIZE-4*$SZREG($sp) + $REG_L $s9,$FRAMESIZE-5*$SZREG($sp) + $REG_L $s8,$FRAMESIZE-6*$SZREG($sp) + $REG_L $s7,$FRAMESIZE-7*$SZREG($sp) + $REG_L $s6,$FRAMESIZE-8*$SZREG($sp) + $REG_L $s5,$FRAMESIZE-9*$SZREG($sp) + $REG_L $s4,$FRAMESIZE-10*$SZREG($sp) +___ +$code.=<<___ if ($flavour =~ /nubi/i); + $REG_L \$15,$FRAMESIZE-11*$SZREG($sp) + $REG_L \$14,$FRAMESIZE-12*$SZREG($sp) + $REG_L \$13,$FRAMESIZE-13*$SZREG($sp) + $REG_L \$12,$FRAMESIZE-14*$SZREG($sp) + $REG_L $gp,$FRAMESIZE-15*$SZREG($sp) +___ +$code.=<<___; + jr $ra + $PTR_ADD $sp,$FRAMESIZE +.end AES_decrypt +___ +}}} + +{{{ +my $FRAMESIZE=8*$SZREG; +my $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? 0xc000f008 : 0xc0000000; + +my ($inp,$bits,$key,$Tbl)=($a0,$a1,$a2,$a3); +my ($rk0,$rk1,$rk2,$rk3,$rk4,$rk5,$rk6,$rk7)=($a4,$a5,$a6,$a7,$s0,$s1,$s2,$s3); +my ($i0,$i1,$i2,$i3)=($at,$t0,$t1,$t2); +my ($rcon,$cnt)=($gp,$fp); + +$code.=<<___; +.align 5 +.ent _mips_AES_set_encrypt_key +_mips_AES_set_encrypt_key: + .frame $sp,0,$ra + .set noreorder + beqz $inp,.Lekey_done + li $t0,-1 + beqz $key,.Lekey_done + $PTR_ADD $rcon,$Tbl,1024+256 + + .set reorder + lwl $rk0,0+$MSB($inp) # load 128 bits + lwl $rk1,4+$MSB($inp) + lwl $rk2,8+$MSB($inp) + lwl $rk3,12+$MSB($inp) + li $at,128 + lwr $rk0,0+$LSB($inp) + lwr $rk1,4+$LSB($inp) + lwr $rk2,8+$LSB($inp) + lwr $rk3,12+$LSB($inp) + .set noreorder + beq $bits,$at,.L128bits + li $cnt,10 + + .set reorder + lwl $rk4,16+$MSB($inp) # load 192 bits + lwl $rk5,20+$MSB($inp) + li $at,192 + lwr $rk4,16+$LSB($inp) + lwr $rk5,20+$LSB($inp) + .set noreorder + beq $bits,$at,.L192bits + li $cnt,8 + + .set reorder + lwl $rk6,24+$MSB($inp) # load 256 bits + lwl $rk7,28+$MSB($inp) + li $at,256 + lwr $rk6,24+$LSB($inp) + lwr $rk7,28+$LSB($inp) + .set noreorder + beq $bits,$at,.L256bits + li $cnt,7 + + b .Lekey_done + li $t0,-2 + +.align 4 +.L128bits: + .set reorder + srl $i0,$rk3,16 + srl $i1,$rk3,8 + and $i0,0xff + and $i1,0xff + and $i2,$rk3,0xff + srl $i3,$rk3,24 + $PTR_ADD $i0,$Tbl + $PTR_ADD $i1,$Tbl + $PTR_ADD $i2,$Tbl + $PTR_ADD $i3,$Tbl + lbu $i0,1024($i0) + lbu $i1,1024($i1) + lbu $i2,1024($i2) + lbu $i3,1024($i3) + + sw $rk0,0($key) + sw $rk1,4($key) + sw $rk2,8($key) + sw $rk3,12($key) + sub $cnt,1 + $PTR_ADD $key,16 + + _bias $i0,24 + _bias $i1,16 + _bias $i2,8 + _bias $i3,0 + + xor $rk0,$i0 + lw $i0,0($rcon) + xor $rk0,$i1 + xor $rk0,$i2 + xor $rk0,$i3 + xor $rk0,$i0 + + xor $rk1,$rk0 + xor $rk2,$rk1 + xor $rk3,$rk2 + + .set noreorder + bnez $cnt,.L128bits + $PTR_ADD $rcon,4 + + sw $rk0,0($key) + sw $rk1,4($key) + sw $rk2,8($key) + li $cnt,10 + sw $rk3,12($key) + li $t0,0 + sw $cnt,80($key) + b .Lekey_done + $PTR_SUB $key,10*16 + +.align 4 +.L192bits: + .set reorder + srl $i0,$rk5,16 + srl $i1,$rk5,8 + and $i0,0xff + and $i1,0xff + and $i2,$rk5,0xff + srl $i3,$rk5,24 + $PTR_ADD $i0,$Tbl + $PTR_ADD $i1,$Tbl + $PTR_ADD $i2,$Tbl + $PTR_ADD $i3,$Tbl + lbu $i0,1024($i0) + lbu $i1,1024($i1) + lbu $i2,1024($i2) + lbu $i3,1024($i3) + + sw $rk0,0($key) + sw $rk1,4($key) + sw $rk2,8($key) + sw $rk3,12($key) + sw $rk4,16($key) + sw $rk5,20($key) + sub $cnt,1 + $PTR_ADD $key,24 + + _bias $i0,24 + _bias $i1,16 + _bias $i2,8 + _bias $i3,0 + + xor $rk0,$i0 + lw $i0,0($rcon) + xor $rk0,$i1 + xor $rk0,$i2 + xor $rk0,$i3 + xor $rk0,$i0 + + xor $rk1,$rk0 + xor $rk2,$rk1 + xor $rk3,$rk2 + xor $rk4,$rk3 + xor $rk5,$rk4 + + .set noreorder + bnez $cnt,.L192bits + $PTR_ADD $rcon,4 + + sw $rk0,0($key) + sw $rk1,4($key) + sw $rk2,8($key) + li $cnt,12 + sw $rk3,12($key) + li $t0,0 + sw $cnt,48($key) + b .Lekey_done + $PTR_SUB $key,12*16 + +.align 4 +.L256bits: + .set reorder + srl $i0,$rk7,16 + srl $i1,$rk7,8 + and $i0,0xff + and $i1,0xff + and $i2,$rk7,0xff + srl $i3,$rk7,24 + $PTR_ADD $i0,$Tbl + $PTR_ADD $i1,$Tbl + $PTR_ADD $i2,$Tbl + $PTR_ADD $i3,$Tbl + lbu $i0,1024($i0) + lbu $i1,1024($i1) + lbu $i2,1024($i2) + lbu $i3,1024($i3) + + sw $rk0,0($key) + sw $rk1,4($key) + sw $rk2,8($key) + sw $rk3,12($key) + sw $rk4,16($key) + sw $rk5,20($key) + sw $rk6,24($key) + sw $rk7,28($key) + sub $cnt,1 + + _bias $i0,24 + _bias $i1,16 + _bias $i2,8 + _bias $i3,0 + + xor $rk0,$i0 + lw $i0,0($rcon) + xor $rk0,$i1 + xor $rk0,$i2 + xor $rk0,$i3 + xor $rk0,$i0 + + xor $rk1,$rk0 + xor $rk2,$rk1 + xor $rk3,$rk2 + beqz $cnt,.L256bits_done + + srl $i0,$rk3,24 + srl $i1,$rk3,16 + srl $i2,$rk3,8 + and $i3,$rk3,0xff + and $i1,0xff + and $i2,0xff + $PTR_ADD $i0,$Tbl + $PTR_ADD $i1,$Tbl + $PTR_ADD $i2,$Tbl + $PTR_ADD $i3,$Tbl + lbu $i0,1024($i0) + lbu $i1,1024($i1) + lbu $i2,1024($i2) + lbu $i3,1024($i3) + sll $i0,24 + sll $i1,16 + sll $i2,8 + + xor $rk4,$i0 + xor $rk4,$i1 + xor $rk4,$i2 + xor $rk4,$i3 + + xor $rk5,$rk4 + xor $rk6,$rk5 + xor $rk7,$rk6 + + $PTR_ADD $key,32 + .set noreorder + b .L256bits + $PTR_ADD $rcon,4 + +.L256bits_done: + sw $rk0,32($key) + sw $rk1,36($key) + sw $rk2,40($key) + li $cnt,14 + sw $rk3,44($key) + li $t0,0 + sw $cnt,48($key) + $PTR_SUB $key,12*16 + +.Lekey_done: + jr $ra + nop +.end _mips_AES_set_encrypt_key + +.globl AES_set_encrypt_key +.ent AES_set_encrypt_key +AES_set_encrypt_key: + .frame $sp,$FRAMESIZE,$ra + .mask $SAVED_REGS_MASK,-$SZREG + .set noreorder +___ +$code.=<<___ if ($flavour =~ /o32/i); # o32 PIC-ification + .cpload $pf +___ +$code.=<<___; + $PTR_SUB $sp,$FRAMESIZE + $REG_S $ra,$FRAMESIZE-1*$SZREG($sp) + $REG_S $fp,$FRAMESIZE-2*$SZREG($sp) +___ +$code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue + $REG_S $s3,$FRAMESIZE-3*$SZREG($sp) + $REG_S $s2,$FRAMESIZE-4*$SZREG($sp) + $REG_S $s1,$FRAMESIZE-5*$SZREG($sp) + $REG_S $s0,$FRAMESIZE-6*$SZREG($sp) + $REG_S $gp,$FRAMESIZE-7*$SZREG($sp) +___ +$code.=<<___ if ($flavour !~ /o32/i); # non-o32 PIC-ification + .cplocal $Tbl + .cpsetup $pf,$zero,AES_set_encrypt_key +___ +$code.=<<___; + .set reorder + la $Tbl,AES_Te # PIC-ified 'load address' + + bal _mips_AES_set_encrypt_key + + .set noreorder + move $a0,$t0 + $REG_L $ra,$FRAMESIZE-1*$SZREG($sp) + $REG_L $fp,$FRAMESIZE-2*$SZREG($sp) +___ +$code.=<<___ if ($flavour =~ /nubi/i); + $REG_L $s3,$FRAMESIZE-11*$SZREG($sp) + $REG_L $s2,$FRAMESIZE-12*$SZREG($sp) + $REG_L $s1,$FRAMESIZE-13*$SZREG($sp) + $REG_L $s0,$FRAMESIZE-14*$SZREG($sp) + $REG_L $gp,$FRAMESIZE-15*$SZREG($sp) +___ +$code.=<<___; + jr $ra + $PTR_ADD $sp,$FRAMESIZE +.end AES_set_encrypt_key +___ + +my ($head,$tail)=($inp,$bits); +my ($tp1,$tp2,$tp4,$tp8,$tp9,$tpb,$tpd,$tpe)=($a4,$a5,$a6,$a7,$s0,$s1,$s2,$s3); +my ($m,$x80808080,$x7f7f7f7f,$x1b1b1b1b)=($at,$t0,$t1,$t2); +$code.=<<___; +.align 5 +.globl AES_set_decrypt_key +.ent AES_set_decrypt_key +AES_set_decrypt_key: + .frame $sp,$FRAMESIZE,$ra + .mask $SAVED_REGS_MASK,-$SZREG + .set noreorder +___ +$code.=<<___ if ($flavour =~ /o32/i); # o32 PIC-ification + .cpload $pf +___ +$code.=<<___; + $PTR_SUB $sp,$FRAMESIZE + $REG_S $ra,$FRAMESIZE-1*$SZREG($sp) + $REG_S $fp,$FRAMESIZE-2*$SZREG($sp) +___ +$code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue + $REG_S $s3,$FRAMESIZE-3*$SZREG($sp) + $REG_S $s2,$FRAMESIZE-4*$SZREG($sp) + $REG_S $s1,$FRAMESIZE-5*$SZREG($sp) + $REG_S $s0,$FRAMESIZE-6*$SZREG($sp) + $REG_S $gp,$FRAMESIZE-7*$SZREG($sp) +___ +$code.=<<___ if ($flavour !~ /o32/i); # non-o32 PIC-ification + .cplocal $Tbl + .cpsetup $pf,$zero,AES_set_decrypt_key +___ +$code.=<<___; + .set reorder + la $Tbl,AES_Te # PIC-ified 'load address' + + bal _mips_AES_set_encrypt_key + + bltz $t0,.Ldkey_done + + sll $at,$cnt,4 + $PTR_ADD $head,$key,0 + $PTR_ADD $tail,$key,$at +.align 4 +.Lswap: + lw $rk0,0($head) + lw $rk1,4($head) + lw $rk2,8($head) + lw $rk3,12($head) + lw $rk4,0($tail) + lw $rk5,4($tail) + lw $rk6,8($tail) + lw $rk7,12($tail) + sw $rk0,0($tail) + sw $rk1,4($tail) + sw $rk2,8($tail) + sw $rk3,12($tail) + $PTR_ADD $head,16 + $PTR_SUB $tail,16 + sw $rk4,-16($head) + sw $rk5,-12($head) + sw $rk6,-8($head) + sw $rk7,-4($head) + bne $head,$tail,.Lswap + + lw $tp1,16($key) # modulo-scheduled + lui $x80808080,0x8080 + sub $cnt,1 + or $x80808080,0x8080 + sll $cnt,2 + $PTR_ADD $key,16 + lui $x1b1b1b1b,0x1b1b + nor $x7f7f7f7f,$zero,$x80808080 + or $x1b1b1b1b,0x1b1b +.align 4 +.Lmix: + and $m,$tp1,$x80808080 + and $tp2,$tp1,$x7f7f7f7f + srl $tp4,$m,7 + addu $tp2,$tp2 # tp2<<1 + subu $m,$tp4 + and $m,$x1b1b1b1b + xor $tp2,$m + + and $m,$tp2,$x80808080 + and $tp4,$tp2,$x7f7f7f7f + srl $tp8,$m,7 + addu $tp4,$tp4 # tp4<<1 + subu $m,$tp8 + and $m,$x1b1b1b1b + xor $tp4,$m + + and $m,$tp4,$x80808080 + and $tp8,$tp4,$x7f7f7f7f + srl $tp9,$m,7 + addu $tp8,$tp8 # tp8<<1 + subu $m,$tp9 + and $m,$x1b1b1b1b + xor $tp8,$m + + xor $tp9,$tp8,$tp1 + xor $tpe,$tp8,$tp4 + xor $tpb,$tp9,$tp2 + xor $tpd,$tp9,$tp4 + + _ror $tp1,$tpd,16 + xor $tpe,$tp2 + _ror $tp2,$tpd,-16 + xor $tpe,$tp1 + _ror $tp1,$tp9,8 + xor $tpe,$tp2 + _ror $tp2,$tp9,-24 + xor $tpe,$tp1 + _ror $tp1,$tpb,24 + xor $tpe,$tp2 + _ror $tp2,$tpb,-8 + xor $tpe,$tp1 + lw $tp1,4($key) # modulo-scheduled + xor $tpe,$tp2 + sub $cnt,1 + sw $tpe,0($key) + $PTR_ADD $key,4 + bnez $cnt,.Lmix + + li $t0,0 +.Ldkey_done: + .set noreorder + move $a0,$t0 + $REG_L $ra,$FRAMESIZE-1*$SZREG($sp) + $REG_L $fp,$FRAMESIZE-2*$SZREG($sp) +___ +$code.=<<___ if ($flavour =~ /nubi/i); + $REG_L $s3,$FRAMESIZE-11*$SZREG($sp) + $REG_L $s2,$FRAMESIZE-12*$SZREG($sp) + $REG_L $s1,$FRAMESIZE-13*$SZREG($sp) + $REG_L $s0,$FRAMESIZE-14*$SZREG($sp) + $REG_L $gp,$FRAMESIZE-15*$SZREG($sp) +___ +$code.=<<___; + jr $ra + $PTR_ADD $sp,$FRAMESIZE +.end AES_set_decrypt_key +___ +}}} + +###################################################################### +# Tables are kept in endian-neutral manner +$code.=<<___; +.rdata +.align 6 +AES_Te: +.byte 0xc6,0x63,0x63,0xa5, 0xf8,0x7c,0x7c,0x84 # Te0 +.byte 0xee,0x77,0x77,0x99, 0xf6,0x7b,0x7b,0x8d +.byte 0xff,0xf2,0xf2,0x0d, 0xd6,0x6b,0x6b,0xbd +.byte 0xde,0x6f,0x6f,0xb1, 0x91,0xc5,0xc5,0x54 +.byte 0x60,0x30,0x30,0x50, 0x02,0x01,0x01,0x03 +.byte 0xce,0x67,0x67,0xa9, 0x56,0x2b,0x2b,0x7d +.byte 0xe7,0xfe,0xfe,0x19, 0xb5,0xd7,0xd7,0x62 +.byte 0x4d,0xab,0xab,0xe6, 0xec,0x76,0x76,0x9a +.byte 0x8f,0xca,0xca,0x45, 0x1f,0x82,0x82,0x9d +.byte 0x89,0xc9,0xc9,0x40, 0xfa,0x7d,0x7d,0x87 +.byte 0xef,0xfa,0xfa,0x15, 0xb2,0x59,0x59,0xeb +.byte 0x8e,0x47,0x47,0xc9, 0xfb,0xf0,0xf0,0x0b +.byte 0x41,0xad,0xad,0xec, 0xb3,0xd4,0xd4,0x67 +.byte 0x5f,0xa2,0xa2,0xfd, 0x45,0xaf,0xaf,0xea +.byte 0x23,0x9c,0x9c,0xbf, 0x53,0xa4,0xa4,0xf7 +.byte 0xe4,0x72,0x72,0x96, 0x9b,0xc0,0xc0,0x5b +.byte 0x75,0xb7,0xb7,0xc2, 0xe1,0xfd,0xfd,0x1c +.byte 0x3d,0x93,0x93,0xae, 0x4c,0x26,0x26,0x6a +.byte 0x6c,0x36,0x36,0x5a, 0x7e,0x3f,0x3f,0x41 +.byte 0xf5,0xf7,0xf7,0x02, 0x83,0xcc,0xcc,0x4f +.byte 0x68,0x34,0x34,0x5c, 0x51,0xa5,0xa5,0xf4 +.byte 0xd1,0xe5,0xe5,0x34, 0xf9,0xf1,0xf1,0x08 +.byte 0xe2,0x71,0x71,0x93, 0xab,0xd8,0xd8,0x73 +.byte 0x62,0x31,0x31,0x53, 0x2a,0x15,0x15,0x3f +.byte 0x08,0x04,0x04,0x0c, 0x95,0xc7,0xc7,0x52 +.byte 0x46,0x23,0x23,0x65, 0x9d,0xc3,0xc3,0x5e +.byte 0x30,0x18,0x18,0x28, 0x37,0x96,0x96,0xa1 +.byte 0x0a,0x05,0x05,0x0f, 0x2f,0x9a,0x9a,0xb5 +.byte 0x0e,0x07,0x07,0x09, 0x24,0x12,0x12,0x36 +.byte 0x1b,0x80,0x80,0x9b, 0xdf,0xe2,0xe2,0x3d +.byte 0xcd,0xeb,0xeb,0x26, 0x4e,0x27,0x27,0x69 +.byte 0x7f,0xb2,0xb2,0xcd, 0xea,0x75,0x75,0x9f +.byte 0x12,0x09,0x09,0x1b, 0x1d,0x83,0x83,0x9e +.byte 0x58,0x2c,0x2c,0x74, 0x34,0x1a,0x1a,0x2e +.byte 0x36,0x1b,0x1b,0x2d, 0xdc,0x6e,0x6e,0xb2 +.byte 0xb4,0x5a,0x5a,0xee, 0x5b,0xa0,0xa0,0xfb +.byte 0xa4,0x52,0x52,0xf6, 0x76,0x3b,0x3b,0x4d +.byte 0xb7,0xd6,0xd6,0x61, 0x7d,0xb3,0xb3,0xce +.byte 0x52,0x29,0x29,0x7b, 0xdd,0xe3,0xe3,0x3e +.byte 0x5e,0x2f,0x2f,0x71, 0x13,0x84,0x84,0x97 +.byte 0xa6,0x53,0x53,0xf5, 0xb9,0xd1,0xd1,0x68 +.byte 0x00,0x00,0x00,0x00, 0xc1,0xed,0xed,0x2c +.byte 0x40,0x20,0x20,0x60, 0xe3,0xfc,0xfc,0x1f +.byte 0x79,0xb1,0xb1,0xc8, 0xb6,0x5b,0x5b,0xed +.byte 0xd4,0x6a,0x6a,0xbe, 0x8d,0xcb,0xcb,0x46 +.byte 0x67,0xbe,0xbe,0xd9, 0x72,0x39,0x39,0x4b +.byte 0x94,0x4a,0x4a,0xde, 0x98,0x4c,0x4c,0xd4 +.byte 0xb0,0x58,0x58,0xe8, 0x85,0xcf,0xcf,0x4a +.byte 0xbb,0xd0,0xd0,0x6b, 0xc5,0xef,0xef,0x2a +.byte 0x4f,0xaa,0xaa,0xe5, 0xed,0xfb,0xfb,0x16 +.byte 0x86,0x43,0x43,0xc5, 0x9a,0x4d,0x4d,0xd7 +.byte 0x66,0x33,0x33,0x55, 0x11,0x85,0x85,0x94 +.byte 0x8a,0x45,0x45,0xcf, 0xe9,0xf9,0xf9,0x10 +.byte 0x04,0x02,0x02,0x06, 0xfe,0x7f,0x7f,0x81 +.byte 0xa0,0x50,0x50,0xf0, 0x78,0x3c,0x3c,0x44 +.byte 0x25,0x9f,0x9f,0xba, 0x4b,0xa8,0xa8,0xe3 +.byte 0xa2,0x51,0x51,0xf3, 0x5d,0xa3,0xa3,0xfe +.byte 0x80,0x40,0x40,0xc0, 0x05,0x8f,0x8f,0x8a +.byte 0x3f,0x92,0x92,0xad, 0x21,0x9d,0x9d,0xbc +.byte 0x70,0x38,0x38,0x48, 0xf1,0xf5,0xf5,0x04 +.byte 0x63,0xbc,0xbc,0xdf, 0x77,0xb6,0xb6,0xc1 +.byte 0xaf,0xda,0xda,0x75, 0x42,0x21,0x21,0x63 +.byte 0x20,0x10,0x10,0x30, 0xe5,0xff,0xff,0x1a +.byte 0xfd,0xf3,0xf3,0x0e, 0xbf,0xd2,0xd2,0x6d +.byte 0x81,0xcd,0xcd,0x4c, 0x18,0x0c,0x0c,0x14 +.byte 0x26,0x13,0x13,0x35, 0xc3,0xec,0xec,0x2f +.byte 0xbe,0x5f,0x5f,0xe1, 0x35,0x97,0x97,0xa2 +.byte 0x88,0x44,0x44,0xcc, 0x2e,0x17,0x17,0x39 +.byte 0x93,0xc4,0xc4,0x57, 0x55,0xa7,0xa7,0xf2 +.byte 0xfc,0x7e,0x7e,0x82, 0x7a,0x3d,0x3d,0x47 +.byte 0xc8,0x64,0x64,0xac, 0xba,0x5d,0x5d,0xe7 +.byte 0x32,0x19,0x19,0x2b, 0xe6,0x73,0x73,0x95 +.byte 0xc0,0x60,0x60,0xa0, 0x19,0x81,0x81,0x98 +.byte 0x9e,0x4f,0x4f,0xd1, 0xa3,0xdc,0xdc,0x7f +.byte 0x44,0x22,0x22,0x66, 0x54,0x2a,0x2a,0x7e +.byte 0x3b,0x90,0x90,0xab, 0x0b,0x88,0x88,0x83 +.byte 0x8c,0x46,0x46,0xca, 0xc7,0xee,0xee,0x29 +.byte 0x6b,0xb8,0xb8,0xd3, 0x28,0x14,0x14,0x3c +.byte 0xa7,0xde,0xde,0x79, 0xbc,0x5e,0x5e,0xe2 +.byte 0x16,0x0b,0x0b,0x1d, 0xad,0xdb,0xdb,0x76 +.byte 0xdb,0xe0,0xe0,0x3b, 0x64,0x32,0x32,0x56 +.byte 0x74,0x3a,0x3a,0x4e, 0x14,0x0a,0x0a,0x1e +.byte 0x92,0x49,0x49,0xdb, 0x0c,0x06,0x06,0x0a +.byte 0x48,0x24,0x24,0x6c, 0xb8,0x5c,0x5c,0xe4 +.byte 0x9f,0xc2,0xc2,0x5d, 0xbd,0xd3,0xd3,0x6e +.byte 0x43,0xac,0xac,0xef, 0xc4,0x62,0x62,0xa6 +.byte 0x39,0x91,0x91,0xa8, 0x31,0x95,0x95,0xa4 +.byte 0xd3,0xe4,0xe4,0x37, 0xf2,0x79,0x79,0x8b +.byte 0xd5,0xe7,0xe7,0x32, 0x8b,0xc8,0xc8,0x43 +.byte 0x6e,0x37,0x37,0x59, 0xda,0x6d,0x6d,0xb7 +.byte 0x01,0x8d,0x8d,0x8c, 0xb1,0xd5,0xd5,0x64 +.byte 0x9c,0x4e,0x4e,0xd2, 0x49,0xa9,0xa9,0xe0 +.byte 0xd8,0x6c,0x6c,0xb4, 0xac,0x56,0x56,0xfa +.byte 0xf3,0xf4,0xf4,0x07, 0xcf,0xea,0xea,0x25 +.byte 0xca,0x65,0x65,0xaf, 0xf4,0x7a,0x7a,0x8e +.byte 0x47,0xae,0xae,0xe9, 0x10,0x08,0x08,0x18 +.byte 0x6f,0xba,0xba,0xd5, 0xf0,0x78,0x78,0x88 +.byte 0x4a,0x25,0x25,0x6f, 0x5c,0x2e,0x2e,0x72 +.byte 0x38,0x1c,0x1c,0x24, 0x57,0xa6,0xa6,0xf1 +.byte 0x73,0xb4,0xb4,0xc7, 0x97,0xc6,0xc6,0x51 +.byte 0xcb,0xe8,0xe8,0x23, 0xa1,0xdd,0xdd,0x7c +.byte 0xe8,0x74,0x74,0x9c, 0x3e,0x1f,0x1f,0x21 +.byte 0x96,0x4b,0x4b,0xdd, 0x61,0xbd,0xbd,0xdc +.byte 0x0d,0x8b,0x8b,0x86, 0x0f,0x8a,0x8a,0x85 +.byte 0xe0,0x70,0x70,0x90, 0x7c,0x3e,0x3e,0x42 +.byte 0x71,0xb5,0xb5,0xc4, 0xcc,0x66,0x66,0xaa +.byte 0x90,0x48,0x48,0xd8, 0x06,0x03,0x03,0x05 +.byte 0xf7,0xf6,0xf6,0x01, 0x1c,0x0e,0x0e,0x12 +.byte 0xc2,0x61,0x61,0xa3, 0x6a,0x35,0x35,0x5f +.byte 0xae,0x57,0x57,0xf9, 0x69,0xb9,0xb9,0xd0 +.byte 0x17,0x86,0x86,0x91, 0x99,0xc1,0xc1,0x58 +.byte 0x3a,0x1d,0x1d,0x27, 0x27,0x9e,0x9e,0xb9 +.byte 0xd9,0xe1,0xe1,0x38, 0xeb,0xf8,0xf8,0x13 +.byte 0x2b,0x98,0x98,0xb3, 0x22,0x11,0x11,0x33 +.byte 0xd2,0x69,0x69,0xbb, 0xa9,0xd9,0xd9,0x70 +.byte 0x07,0x8e,0x8e,0x89, 0x33,0x94,0x94,0xa7 +.byte 0x2d,0x9b,0x9b,0xb6, 0x3c,0x1e,0x1e,0x22 +.byte 0x15,0x87,0x87,0x92, 0xc9,0xe9,0xe9,0x20 +.byte 0x87,0xce,0xce,0x49, 0xaa,0x55,0x55,0xff +.byte 0x50,0x28,0x28,0x78, 0xa5,0xdf,0xdf,0x7a +.byte 0x03,0x8c,0x8c,0x8f, 0x59,0xa1,0xa1,0xf8 +.byte 0x09,0x89,0x89,0x80, 0x1a,0x0d,0x0d,0x17 +.byte 0x65,0xbf,0xbf,0xda, 0xd7,0xe6,0xe6,0x31 +.byte 0x84,0x42,0x42,0xc6, 0xd0,0x68,0x68,0xb8 +.byte 0x82,0x41,0x41,0xc3, 0x29,0x99,0x99,0xb0 +.byte 0x5a,0x2d,0x2d,0x77, 0x1e,0x0f,0x0f,0x11 +.byte 0x7b,0xb0,0xb0,0xcb, 0xa8,0x54,0x54,0xfc +.byte 0x6d,0xbb,0xbb,0xd6, 0x2c,0x16,0x16,0x3a + +.byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5 # Te4 +.byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76 +.byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0 +.byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0 +.byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc +.byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15 +.byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a +.byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75 +.byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0 +.byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84 +.byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b +.byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf +.byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85 +.byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8 +.byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5 +.byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2 +.byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17 +.byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73 +.byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88 +.byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb +.byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c +.byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79 +.byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9 +.byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08 +.byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6 +.byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a +.byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e +.byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e +.byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94 +.byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf +.byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68 +.byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 + +.byte 0x01,0x00,0x00,0x00, 0x02,0x00,0x00,0x00 # rcon +.byte 0x04,0x00,0x00,0x00, 0x08,0x00,0x00,0x00 +.byte 0x10,0x00,0x00,0x00, 0x20,0x00,0x00,0x00 +.byte 0x40,0x00,0x00,0x00, 0x80,0x00,0x00,0x00 +.byte 0x1B,0x00,0x00,0x00, 0x36,0x00,0x00,0x00 + +.align 6 +AES_Td: +.byte 0x51,0xf4,0xa7,0x50, 0x7e,0x41,0x65,0x53 # Td0 +.byte 0x1a,0x17,0xa4,0xc3, 0x3a,0x27,0x5e,0x96 +.byte 0x3b,0xab,0x6b,0xcb, 0x1f,0x9d,0x45,0xf1 +.byte 0xac,0xfa,0x58,0xab, 0x4b,0xe3,0x03,0x93 +.byte 0x20,0x30,0xfa,0x55, 0xad,0x76,0x6d,0xf6 +.byte 0x88,0xcc,0x76,0x91, 0xf5,0x02,0x4c,0x25 +.byte 0x4f,0xe5,0xd7,0xfc, 0xc5,0x2a,0xcb,0xd7 +.byte 0x26,0x35,0x44,0x80, 0xb5,0x62,0xa3,0x8f +.byte 0xde,0xb1,0x5a,0x49, 0x25,0xba,0x1b,0x67 +.byte 0x45,0xea,0x0e,0x98, 0x5d,0xfe,0xc0,0xe1 +.byte 0xc3,0x2f,0x75,0x02, 0x81,0x4c,0xf0,0x12 +.byte 0x8d,0x46,0x97,0xa3, 0x6b,0xd3,0xf9,0xc6 +.byte 0x03,0x8f,0x5f,0xe7, 0x15,0x92,0x9c,0x95 +.byte 0xbf,0x6d,0x7a,0xeb, 0x95,0x52,0x59,0xda +.byte 0xd4,0xbe,0x83,0x2d, 0x58,0x74,0x21,0xd3 +.byte 0x49,0xe0,0x69,0x29, 0x8e,0xc9,0xc8,0x44 +.byte 0x75,0xc2,0x89,0x6a, 0xf4,0x8e,0x79,0x78 +.byte 0x99,0x58,0x3e,0x6b, 0x27,0xb9,0x71,0xdd +.byte 0xbe,0xe1,0x4f,0xb6, 0xf0,0x88,0xad,0x17 +.byte 0xc9,0x20,0xac,0x66, 0x7d,0xce,0x3a,0xb4 +.byte 0x63,0xdf,0x4a,0x18, 0xe5,0x1a,0x31,0x82 +.byte 0x97,0x51,0x33,0x60, 0x62,0x53,0x7f,0x45 +.byte 0xb1,0x64,0x77,0xe0, 0xbb,0x6b,0xae,0x84 +.byte 0xfe,0x81,0xa0,0x1c, 0xf9,0x08,0x2b,0x94 +.byte 0x70,0x48,0x68,0x58, 0x8f,0x45,0xfd,0x19 +.byte 0x94,0xde,0x6c,0x87, 0x52,0x7b,0xf8,0xb7 +.byte 0xab,0x73,0xd3,0x23, 0x72,0x4b,0x02,0xe2 +.byte 0xe3,0x1f,0x8f,0x57, 0x66,0x55,0xab,0x2a +.byte 0xb2,0xeb,0x28,0x07, 0x2f,0xb5,0xc2,0x03 +.byte 0x86,0xc5,0x7b,0x9a, 0xd3,0x37,0x08,0xa5 +.byte 0x30,0x28,0x87,0xf2, 0x23,0xbf,0xa5,0xb2 +.byte 0x02,0x03,0x6a,0xba, 0xed,0x16,0x82,0x5c +.byte 0x8a,0xcf,0x1c,0x2b, 0xa7,0x79,0xb4,0x92 +.byte 0xf3,0x07,0xf2,0xf0, 0x4e,0x69,0xe2,0xa1 +.byte 0x65,0xda,0xf4,0xcd, 0x06,0x05,0xbe,0xd5 +.byte 0xd1,0x34,0x62,0x1f, 0xc4,0xa6,0xfe,0x8a +.byte 0x34,0x2e,0x53,0x9d, 0xa2,0xf3,0x55,0xa0 +.byte 0x05,0x8a,0xe1,0x32, 0xa4,0xf6,0xeb,0x75 +.byte 0x0b,0x83,0xec,0x39, 0x40,0x60,0xef,0xaa +.byte 0x5e,0x71,0x9f,0x06, 0xbd,0x6e,0x10,0x51 +.byte 0x3e,0x21,0x8a,0xf9, 0x96,0xdd,0x06,0x3d +.byte 0xdd,0x3e,0x05,0xae, 0x4d,0xe6,0xbd,0x46 +.byte 0x91,0x54,0x8d,0xb5, 0x71,0xc4,0x5d,0x05 +.byte 0x04,0x06,0xd4,0x6f, 0x60,0x50,0x15,0xff +.byte 0x19,0x98,0xfb,0x24, 0xd6,0xbd,0xe9,0x97 +.byte 0x89,0x40,0x43,0xcc, 0x67,0xd9,0x9e,0x77 +.byte 0xb0,0xe8,0x42,0xbd, 0x07,0x89,0x8b,0x88 +.byte 0xe7,0x19,0x5b,0x38, 0x79,0xc8,0xee,0xdb +.byte 0xa1,0x7c,0x0a,0x47, 0x7c,0x42,0x0f,0xe9 +.byte 0xf8,0x84,0x1e,0xc9, 0x00,0x00,0x00,0x00 +.byte 0x09,0x80,0x86,0x83, 0x32,0x2b,0xed,0x48 +.byte 0x1e,0x11,0x70,0xac, 0x6c,0x5a,0x72,0x4e +.byte 0xfd,0x0e,0xff,0xfb, 0x0f,0x85,0x38,0x56 +.byte 0x3d,0xae,0xd5,0x1e, 0x36,0x2d,0x39,0x27 +.byte 0x0a,0x0f,0xd9,0x64, 0x68,0x5c,0xa6,0x21 +.byte 0x9b,0x5b,0x54,0xd1, 0x24,0x36,0x2e,0x3a +.byte 0x0c,0x0a,0x67,0xb1, 0x93,0x57,0xe7,0x0f +.byte 0xb4,0xee,0x96,0xd2, 0x1b,0x9b,0x91,0x9e +.byte 0x80,0xc0,0xc5,0x4f, 0x61,0xdc,0x20,0xa2 +.byte 0x5a,0x77,0x4b,0x69, 0x1c,0x12,0x1a,0x16 +.byte 0xe2,0x93,0xba,0x0a, 0xc0,0xa0,0x2a,0xe5 +.byte 0x3c,0x22,0xe0,0x43, 0x12,0x1b,0x17,0x1d +.byte 0x0e,0x09,0x0d,0x0b, 0xf2,0x8b,0xc7,0xad +.byte 0x2d,0xb6,0xa8,0xb9, 0x14,0x1e,0xa9,0xc8 +.byte 0x57,0xf1,0x19,0x85, 0xaf,0x75,0x07,0x4c +.byte 0xee,0x99,0xdd,0xbb, 0xa3,0x7f,0x60,0xfd +.byte 0xf7,0x01,0x26,0x9f, 0x5c,0x72,0xf5,0xbc +.byte 0x44,0x66,0x3b,0xc5, 0x5b,0xfb,0x7e,0x34 +.byte 0x8b,0x43,0x29,0x76, 0xcb,0x23,0xc6,0xdc +.byte 0xb6,0xed,0xfc,0x68, 0xb8,0xe4,0xf1,0x63 +.byte 0xd7,0x31,0xdc,0xca, 0x42,0x63,0x85,0x10 +.byte 0x13,0x97,0x22,0x40, 0x84,0xc6,0x11,0x20 +.byte 0x85,0x4a,0x24,0x7d, 0xd2,0xbb,0x3d,0xf8 +.byte 0xae,0xf9,0x32,0x11, 0xc7,0x29,0xa1,0x6d +.byte 0x1d,0x9e,0x2f,0x4b, 0xdc,0xb2,0x30,0xf3 +.byte 0x0d,0x86,0x52,0xec, 0x77,0xc1,0xe3,0xd0 +.byte 0x2b,0xb3,0x16,0x6c, 0xa9,0x70,0xb9,0x99 +.byte 0x11,0x94,0x48,0xfa, 0x47,0xe9,0x64,0x22 +.byte 0xa8,0xfc,0x8c,0xc4, 0xa0,0xf0,0x3f,0x1a +.byte 0x56,0x7d,0x2c,0xd8, 0x22,0x33,0x90,0xef +.byte 0x87,0x49,0x4e,0xc7, 0xd9,0x38,0xd1,0xc1 +.byte 0x8c,0xca,0xa2,0xfe, 0x98,0xd4,0x0b,0x36 +.byte 0xa6,0xf5,0x81,0xcf, 0xa5,0x7a,0xde,0x28 +.byte 0xda,0xb7,0x8e,0x26, 0x3f,0xad,0xbf,0xa4 +.byte 0x2c,0x3a,0x9d,0xe4, 0x50,0x78,0x92,0x0d +.byte 0x6a,0x5f,0xcc,0x9b, 0x54,0x7e,0x46,0x62 +.byte 0xf6,0x8d,0x13,0xc2, 0x90,0xd8,0xb8,0xe8 +.byte 0x2e,0x39,0xf7,0x5e, 0x82,0xc3,0xaf,0xf5 +.byte 0x9f,0x5d,0x80,0xbe, 0x69,0xd0,0x93,0x7c +.byte 0x6f,0xd5,0x2d,0xa9, 0xcf,0x25,0x12,0xb3 +.byte 0xc8,0xac,0x99,0x3b, 0x10,0x18,0x7d,0xa7 +.byte 0xe8,0x9c,0x63,0x6e, 0xdb,0x3b,0xbb,0x7b +.byte 0xcd,0x26,0x78,0x09, 0x6e,0x59,0x18,0xf4 +.byte 0xec,0x9a,0xb7,0x01, 0x83,0x4f,0x9a,0xa8 +.byte 0xe6,0x95,0x6e,0x65, 0xaa,0xff,0xe6,0x7e +.byte 0x21,0xbc,0xcf,0x08, 0xef,0x15,0xe8,0xe6 +.byte 0xba,0xe7,0x9b,0xd9, 0x4a,0x6f,0x36,0xce +.byte 0xea,0x9f,0x09,0xd4, 0x29,0xb0,0x7c,0xd6 +.byte 0x31,0xa4,0xb2,0xaf, 0x2a,0x3f,0x23,0x31 +.byte 0xc6,0xa5,0x94,0x30, 0x35,0xa2,0x66,0xc0 +.byte 0x74,0x4e,0xbc,0x37, 0xfc,0x82,0xca,0xa6 +.byte 0xe0,0x90,0xd0,0xb0, 0x33,0xa7,0xd8,0x15 +.byte 0xf1,0x04,0x98,0x4a, 0x41,0xec,0xda,0xf7 +.byte 0x7f,0xcd,0x50,0x0e, 0x17,0x91,0xf6,0x2f +.byte 0x76,0x4d,0xd6,0x8d, 0x43,0xef,0xb0,0x4d +.byte 0xcc,0xaa,0x4d,0x54, 0xe4,0x96,0x04,0xdf +.byte 0x9e,0xd1,0xb5,0xe3, 0x4c,0x6a,0x88,0x1b +.byte 0xc1,0x2c,0x1f,0xb8, 0x46,0x65,0x51,0x7f +.byte 0x9d,0x5e,0xea,0x04, 0x01,0x8c,0x35,0x5d +.byte 0xfa,0x87,0x74,0x73, 0xfb,0x0b,0x41,0x2e +.byte 0xb3,0x67,0x1d,0x5a, 0x92,0xdb,0xd2,0x52 +.byte 0xe9,0x10,0x56,0x33, 0x6d,0xd6,0x47,0x13 +.byte 0x9a,0xd7,0x61,0x8c, 0x37,0xa1,0x0c,0x7a +.byte 0x59,0xf8,0x14,0x8e, 0xeb,0x13,0x3c,0x89 +.byte 0xce,0xa9,0x27,0xee, 0xb7,0x61,0xc9,0x35 +.byte 0xe1,0x1c,0xe5,0xed, 0x7a,0x47,0xb1,0x3c +.byte 0x9c,0xd2,0xdf,0x59, 0x55,0xf2,0x73,0x3f +.byte 0x18,0x14,0xce,0x79, 0x73,0xc7,0x37,0xbf +.byte 0x53,0xf7,0xcd,0xea, 0x5f,0xfd,0xaa,0x5b +.byte 0xdf,0x3d,0x6f,0x14, 0x78,0x44,0xdb,0x86 +.byte 0xca,0xaf,0xf3,0x81, 0xb9,0x68,0xc4,0x3e +.byte 0x38,0x24,0x34,0x2c, 0xc2,0xa3,0x40,0x5f +.byte 0x16,0x1d,0xc3,0x72, 0xbc,0xe2,0x25,0x0c +.byte 0x28,0x3c,0x49,0x8b, 0xff,0x0d,0x95,0x41 +.byte 0x39,0xa8,0x01,0x71, 0x08,0x0c,0xb3,0xde +.byte 0xd8,0xb4,0xe4,0x9c, 0x64,0x56,0xc1,0x90 +.byte 0x7b,0xcb,0x84,0x61, 0xd5,0x32,0xb6,0x70 +.byte 0x48,0x6c,0x5c,0x74, 0xd0,0xb8,0x57,0x42 + +.byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38 # Td4 +.byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb +.byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87 +.byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb +.byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d +.byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e +.byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2 +.byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25 +.byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16 +.byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92 +.byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda +.byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84 +.byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a +.byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06 +.byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02 +.byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b +.byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea +.byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73 +.byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85 +.byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e +.byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89 +.byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b +.byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20 +.byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4 +.byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31 +.byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f +.byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d +.byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef +.byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0 +.byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61 +.byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26 +.byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d +___ + +foreach (split("\n",$code)) { + s/\`([^\`]*)\`/eval $1/ge; + + # made-up _instructions, _xtr, _ins, _ror and _bias, cope + # with byte order dependencies... + if (/^\s+_/) { + s/(_[a-z]+\s+)(\$[0-9]+),([^,]+)(#.*)*$/$1$2,$2,$3/; + + s/_xtr\s+(\$[0-9]+),(\$[0-9]+),([0-9]+(\-2)*)/ + sprintf("srl\t$1,$2,%d",$big_endian ? eval($3) + : eval("24-$3"))/e or + s/_ins\s+(\$[0-9]+),(\$[0-9]+),([0-9]+)/ + sprintf("sll\t$1,$2,%d",$big_endian ? eval($3) + : eval("24-$3"))/e or + s/_ror\s+(\$[0-9]+),(\$[0-9]+),(\-?[0-9]+)/ + sprintf("srl\t$1,$2,%d",$big_endian ? eval($3) + : eval("$3*-1"))/e or + s/_bias\s+(\$[0-9]+),(\$[0-9]+),([0-9]+)/ + sprintf("sll\t$1,$2,%d",$big_endian ? eval($3) + : eval("($3-16)&31"))/e; + + s/srl\s+(\$[0-9]+),(\$[0-9]+),\-([0-9]+)/ + sprintf("sll\t$1,$2,$3")/e or + s/srl\s+(\$[0-9]+),(\$[0-9]+),0/ + sprintf("and\t$1,$2,0xff")/e or + s/(sll\s+\$[0-9]+,\$[0-9]+,0)/#$1/; + } + + # convert lwl/lwr and swr/swl to little-endian order + if (!$big_endian && /^\s+[sl]w[lr]\s+/) { + s/([sl]wl.*)([0-9]+)\((\$[0-9]+)\)/ + sprintf("$1%d($3)",eval("$2-$2%4+($2%4-1)&3"))/e or + s/([sl]wr.*)([0-9]+)\((\$[0-9]+)\)/ + sprintf("$1%d($3)",eval("$2-$2%4+($2%4+1)&3"))/e; + } + + print $_,"\n"; +} + +close STDOUT;