# thing about this module is its endian neutrality, which means that
# it processes data without ever changing byte order...
+# September 2012
+#
+# Add MIPS32R2 code.
+
######################################################################
# There is a number of MIPS ABI in use, O32 and N32/64 are most
# widely used. Then there is a new contender: NUBI. It appears that if
if ($flavour =~ /64|n32/i) {
$PTR_ADD="dadd"; # incidentally works even on n32
$PTR_SUB="dsub"; # incidentally works even on n32
+ $PTR_INS="dins";
$REG_S="sd";
$REG_L="ld";
$PTR_SLL="dsll"; # incidentally works even on n32
} else {
$PTR_ADD="add";
$PTR_SUB="sub";
+ $PTR_INS="ins";
$REG_S="sw";
$REG_L="lw";
$PTR_SLL="sll";
$PTR_ADD $i1,$Tbl
$PTR_ADD $i2,$Tbl
$PTR_ADD $i3,$Tbl
+#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
+ lw $t0,0($i0) # Te1[s1>>16]
+ _xtr $i0,$s2,8-2
+ lw $t1,0($i1) # Te1[s2>>16]
+ _xtr $i1,$s3,8-2
+ lw $t2,0($i2) # Te1[s3>>16]
+ _xtr $i2,$s0,8-2
+ lw $t3,0($i3) # Te1[s0>>16]
+ _xtr $i3,$s1,8-2
+#else
lwl $t0,3($i0) # Te1[s1>>16]
lwl $t1,3($i1) # Te1[s2>>16]
lwl $t2,3($i2) # Te1[s3>>16]
_xtr $i2,$s0,8-2
lwr $t3,2($i3) # Te1[s0>>16]
_xtr $i3,$s1,8-2
+#endif
+ and $i0,0x3fc
+ and $i1,0x3fc
+ and $i2,0x3fc
+ and $i3,0x3fc
+ $PTR_ADD $i0,$Tbl
+ $PTR_ADD $i1,$Tbl
+ $PTR_ADD $i2,$Tbl
+ $PTR_ADD $i3,$Tbl
+#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
+ rotr $t0,$t0,8
+ rotr $t1,$t1,8
+ rotr $t2,$t2,8
+ rotr $t3,$t3,8
+# if defined(_MIPSEL)
+ lw $t4,0($i0) # Te1[s1>>16]
+ _xtr $i0,$s3,0-2
+ lw $t5,0($i1) # Te1[s2>>16]
+ _xtr $i1,$s0,0-2
+ lw $t6,0($i2) # Te1[s3>>16]
+ _xtr $i2,$s1,0-2
+ lw $t7,0($i3) # Te1[s0>>16]
+ _xtr $i3,$s2,0-2
+
+ and $i0,0x3fc
+ and $i1,0x3fc
+ and $i2,0x3fc
+ and $i3,0x3fc
+ $PTR_ADD $i0,$Tbl
+ $PTR_ADD $i1,$Tbl
+ $PTR_ADD $i2,$Tbl
+ $PTR_ADD $i3,$Tbl
+ lw $t8,0($i0) # Te1[s1>>16]
+ $PTR_INS $i0,$s0,2,8
+ lw $t9,0($i1) # Te1[s2>>16]
+ $PTR_INS $i1,$s1,2,8
+ lw $t10,0($i2) # Te1[s3>>16]
+ $PTR_INS $i2,$s2,2,8
+ lw $t11,0($i3) # Te1[s0>>16]
+ $PTR_INS $i3,$s3,2,8
+# else
+ lw $t4,0($i0) # Te1[s1>>16]
+ $PTR_INS $i0,$s3,2,8
+ lw $t5,0($i1) # Te1[s2>>16]
+ $PTR_INS $i1,$s0,2,8
+ lw $t6,0($i2) # Te1[s3>>16]
+ $PTR_INS $i2,$s1,2,8
+ lw $t7,0($i3) # Te1[s0>>16]
+ $PTR_INS $i3,$s2,2,8
+
+ lw $t8,0($i0) # Te1[s1>>16]
+ _xtr $i0,$s0,24-2
+ lw $t9,0($i1) # Te1[s2>>16]
+ _xtr $i1,$s1,24-2
+ lw $t10,0($i2) # Te1[s3>>16]
+ _xtr $i2,$s2,24-2
+ lw $t11,0($i3) # Te1[s0>>16]
+ _xtr $i3,$s3,24-2
and $i0,0x3fc
and $i1,0x3fc
$PTR_ADD $i1,$Tbl
$PTR_ADD $i2,$Tbl
$PTR_ADD $i3,$Tbl
+# endif
+ rotr $t4,$t4,16
+ rotr $t5,$t5,16
+ rotr $t6,$t6,16
+ rotr $t7,$t7,16
+
+ rotr $t8,$t8,24
+ rotr $t9,$t9,24
+ rotr $t10,$t10,24
+ rotr $t11,$t11,24
+#else
lwl $t4,2($i0) # Te2[s2>>8]
lwl $t5,2($i1) # Te2[s3>>8]
lwl $t6,2($i2) # Te2[s0>>8]
$PTR_ADD $i1,$Tbl
$PTR_ADD $i2,$Tbl
$PTR_ADD $i3,$Tbl
+#endif
xor $t0,$t4
lw $t4,0($i0) # Te0[s0>>24]
xor $t1,$t5
$PTR_ADD $i1,$Tbl
$PTR_ADD $i2,$Tbl
$PTR_ADD $i3,$Tbl
+#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
+# if defined(_MIPSEL)
+ lbu $t4,2($i0) # Te4[s2>>8]
+ $PTR_INS $i0,$s0,2,8
+ lbu $t5,2($i1) # Te4[s3>>8]
+ $PTR_INS $i1,$s1,2,8
+ lbu $t6,2($i2) # Te4[s0>>8]
+ $PTR_INS $i2,$s2,2,8
+ lbu $t7,2($i3) # Te4[s1>>8]
+ $PTR_INS $i3,$s3,2,8
+
+ lbu $t8,2($i0) # Te4[s0>>24]
+ _xtr $i0,$s3,0-2
+ lbu $t9,2($i1) # Te4[s1>>24]
+ _xtr $i1,$s0,0-2
+ lbu $t10,2($i2) # Te4[s2>>24]
+ _xtr $i2,$s1,0-2
+ lbu $t11,2($i3) # Te4[s3>>24]
+ _xtr $i3,$s2,0-2
+
+ and $i0,0x3fc
+ and $i1,0x3fc
+ and $i2,0x3fc
+ and $i3,0x3fc
+ $PTR_ADD $i0,$Tbl
+ $PTR_ADD $i1,$Tbl
+ $PTR_ADD $i2,$Tbl
+ $PTR_ADD $i3,$Tbl
+# else
+ lbu $t4,2($i0) # Te4[s2>>8]
+ _xtr $i0,$s0,24-2
+ lbu $t5,2($i1) # Te4[s3>>8]
+ _xtr $i1,$s1,24-2
+ lbu $t6,2($i2) # Te4[s0>>8]
+ _xtr $i2,$s2,24-2
+ lbu $t7,2($i3) # Te4[s1>>8]
+ _xtr $i3,$s3,24-2
+
+ and $i0,0x3fc
+ and $i1,0x3fc
+ and $i2,0x3fc
+ and $i3,0x3fc
+ $PTR_ADD $i0,$Tbl
+ $PTR_ADD $i1,$Tbl
+ $PTR_ADD $i2,$Tbl
+ $PTR_ADD $i3,$Tbl
+ lbu $t8,2($i0) # Te4[s0>>24]
+ $PTR_INS $i0,$s3,2,8
+ lbu $t9,2($i1) # Te4[s1>>24]
+ $PTR_INS $i1,$s0,2,8
+ lbu $t10,2($i2) # Te4[s2>>24]
+ $PTR_INS $i2,$s1,2,8
+ lbu $t11,2($i3) # Te4[s3>>24]
+ $PTR_INS $i3,$s2,2,8
+# endif
+ _ins $t0,16
+ _ins $t1,16
+ _ins $t2,16
+ _ins $t3,16
+
+ _ins2 $t0,$t4,8
+ lbu $t4,2($i0) # Te4[s3]
+ _ins2 $t1,$t5,8
+ lbu $t5,2($i1) # Te4[s0]
+ _ins2 $t2,$t6,8
+ lbu $t6,2($i2) # Te4[s1]
+ _ins2 $t3,$t7,8
+ lbu $t7,2($i3) # Te4[s2]
+
+ _ins2 $t0,$t8,24
+ lw $s0,0($key0)
+ _ins2 $t1,$t9,24
+ lw $s1,4($key0)
+ _ins2 $t2,$t10,24
+ lw $s2,8($key0)
+ _ins2 $t3,$t11,24
+ lw $s3,12($key0)
+
+ _ins2 $t0,$t4,0
+ _ins2 $t1,$t5,0
+ _ins2 $t2,$t6,0
+ _ins2 $t3,$t7,0
+#else
lbu $t4,2($i0) # Te4[s2>>8]
_xtr $i0,$s0,24-2
lbu $t5,2($i1) # Te4[s3>>8]
xor $t1,$t5
xor $t2,$t6
xor $t3,$t7
-
+#endif
xor $s0,$t0
xor $s1,$t1
xor $s2,$t2
$PTR_ADD $i1,$Tbl
$PTR_ADD $i2,$Tbl
$PTR_ADD $i3,$Tbl
+#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
+ lw $t0,0($i0) # Td1[s3>>16]
+ _xtr $i0,$s2,8-2
+ lw $t1,0($i1) # Td1[s0>>16]
+ _xtr $i1,$s3,8-2
+ lw $t2,0($i2) # Td1[s1>>16]
+ _xtr $i2,$s0,8-2
+ lw $t3,0($i3) # Td1[s2>>16]
+ _xtr $i3,$s1,8-2
+#else
lwl $t0,3($i0) # Td1[s3>>16]
lwl $t1,3($i1) # Td1[s0>>16]
lwl $t2,3($i2) # Td1[s1>>16]
_xtr $i2,$s0,8-2
lwr $t3,2($i3) # Td1[s2>>16]
_xtr $i3,$s1,8-2
+#endif
+
+ and $i0,0x3fc
+ and $i1,0x3fc
+ and $i2,0x3fc
+ and $i3,0x3fc
+ $PTR_ADD $i0,$Tbl
+ $PTR_ADD $i1,$Tbl
+ $PTR_ADD $i2,$Tbl
+ $PTR_ADD $i3,$Tbl
+#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
+ rotr $t0,$t0,8
+ rotr $t1,$t1,8
+ rotr $t2,$t2,8
+ rotr $t3,$t3,8
+# if defined(_MIPSEL)
+ lw $t4,0($i0) # Td2[s2>>8]
+ _xtr $i0,$s1,0-2
+ lw $t5,0($i1) # Td2[s3>>8]
+ _xtr $i1,$s2,0-2
+ lw $t6,0($i2) # Td2[s0>>8]
+ _xtr $i2,$s3,0-2
+ lw $t7,0($i3) # Td2[s1>>8]
+ _xtr $i3,$s0,0-2
and $i0,0x3fc
and $i1,0x3fc
$PTR_ADD $i1,$Tbl
$PTR_ADD $i2,$Tbl
$PTR_ADD $i3,$Tbl
+ lw $t8,0($i0) # Td3[s1]
+ $PTR_INS $i0,$s0,2,8
+ lw $t9,0($i1) # Td3[s2]
+ $PTR_INS $i1,$s1,2,8
+ lw $t10,0($i2) # Td3[s3]
+ $PTR_INS $i2,$s2,2,8
+ lw $t11,0($i3) # Td3[s0]
+ $PTR_INS $i3,$s3,2,8
+#else
+ lw $t4,0($i0) # Td2[s2>>8]
+ $PTR_INS $i0,$s1,2,8
+ lw $t5,0($i1) # Td2[s3>>8]
+ $PTR_INS $i1,$s2,2,8
+ lw $t6,0($i2) # Td2[s0>>8]
+ $PTR_INS $i2,$s3,2,8
+ lw $t7,0($i3) # Td2[s1>>8]
+ $PTR_INS $i3,$s0,2,8
+
+ lw $t8,0($i0) # Td3[s1]
+ _xtr $i0,$s0,24-2
+ lw $t9,0($i1) # Td3[s2]
+ _xtr $i1,$s1,24-2
+ lw $t10,0($i2) # Td3[s3]
+ _xtr $i2,$s2,24-2
+ lw $t11,0($i3) # Td3[s0]
+ _xtr $i3,$s3,24-2
+
+ and $i0,0x3fc
+ and $i1,0x3fc
+ and $i2,0x3fc
+ and $i3,0x3fc
+ $PTR_ADD $i0,$Tbl
+ $PTR_ADD $i1,$Tbl
+ $PTR_ADD $i2,$Tbl
+ $PTR_ADD $i3,$Tbl
+#endif
+ rotr $t4,$t4,16
+ rotr $t5,$t5,16
+ rotr $t6,$t6,16
+ rotr $t7,$t7,16
+
+ rotr $t8,$t8,24
+ rotr $t9,$t9,24
+ rotr $t10,$t10,24
+ rotr $t11,$t11,24
+#else
lwl $t4,2($i0) # Td2[s2>>8]
lwl $t5,2($i1) # Td2[s3>>8]
lwl $t6,2($i2) # Td2[s0>>8]
$PTR_ADD $i1,$Tbl
$PTR_ADD $i2,$Tbl
$PTR_ADD $i3,$Tbl
+#endif
xor $t0,$t4
lw $t4,0($i0) # Td0[s0>>24]
$PTR_ADD $i1,$Tbl
$PTR_ADD $i2,$Tbl
$PTR_ADD $i3,$Tbl
+#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
+# if defined(_MIPSEL)
+ lbu $t4,1024($i0) # Td4[s2>>8]
+ $PTR_INS $i0,$s0,0,8
+ lbu $t5,1024($i1) # Td4[s3>>8]
+ $PTR_INS $i1,$s1,0,8
+ lbu $t6,1024($i2) # Td4[s0>>8]
+ $PTR_INS $i2,$s2,0,8
+ lbu $t7,1024($i3) # Td4[s1>>8]
+ $PTR_INS $i3,$s3,0,8
+
+ lbu $t8,1024($i0) # Td4[s0>>24]
+ _xtr $i0,$s1,0
+ lbu $t9,1024($i1) # Td4[s1>>24]
+ _xtr $i1,$s2,0
+ lbu $t10,1024($i2) # Td4[s2>>24]
+ _xtr $i2,$s3,0
+ lbu $t11,1024($i3) # Td4[s3>>24]
+ _xtr $i3,$s0,0
+
+ $PTR_ADD $i0,$Tbl
+ $PTR_ADD $i1,$Tbl
+ $PTR_ADD $i2,$Tbl
+ $PTR_ADD $i3,$Tbl
+# else
+ lbu $t4,1024($i0) # Td4[s2>>8]
+ _xtr $i0,$s0,24
+ lbu $t5,1024($i1) # Td4[s3>>8]
+ _xtr $i1,$s1,24
+ lbu $t6,1024($i2) # Td4[s0>>8]
+ _xtr $i2,$s2,24
+ lbu $t7,1024($i3) # Td4[s1>>8]
+ _xtr $i3,$s3,24
+
+ $PTR_ADD $i0,$Tbl
+ $PTR_ADD $i1,$Tbl
+ $PTR_ADD $i2,$Tbl
+ $PTR_ADD $i3,$Tbl
+ lbu $t8,1024($i0) # Td4[s0>>24]
+ $PTR_INS $i0,$s1,0,8
+ lbu $t9,1024($i1) # Td4[s1>>24]
+ $PTR_INS $i1,$s2,0,8
+ lbu $t10,1024($i2) # Td4[s2>>24]
+ $PTR_INS $i2,$s3,0,8
+ lbu $t11,1024($i3) # Td4[s3>>24]
+ $PTR_INS $i3,$s0,0,8
+# endif
+ _ins $t0,16
+ _ins $t1,16
+ _ins $t2,16
+ _ins $t3,16
+
+ _ins2 $t0,$t4,8
+ lbu $t4,1024($i0) # Td4[s1]
+ _ins2 $t1,$t5,8
+ lbu $t5,1024($i1) # Td4[s2]
+ _ins2 $t2,$t6,8
+ lbu $t6,1024($i2) # Td4[s3]
+ _ins2 $t3,$t7,8
+ lbu $t7,1024($i3) # Td4[s0]
+
+ _ins2 $t0,$t8,24
+ lw $s0,0($key0)
+ _ins2 $t1,$t9,24
+ lw $s1,4($key0)
+ _ins2 $t2,$t10,24
+ lw $s2,8($key0)
+ _ins2 $t3,$t11,24
+ lw $s3,12($key0)
+
+ _ins2 $t0,$t4,0
+ _ins2 $t1,$t5,0
+ _ins2 $t2,$t6,0
+ _ins2 $t3,$t7,0
+#else
lbu $t4,1024($i0) # Td4[s2>>8]
_xtr $i0,$s0,24
lbu $t5,1024($i1) # Td4[s3>>8]
xor $t1,$t5
xor $t2,$t6
xor $t3,$t7
+#endif
xor $s0,$t0
xor $s1,$t1
beqz $inp,.Lekey_done
li $t0,-1
beqz $key,.Lekey_done
- $PTR_ADD $rcon,$Tbl,1024+256
+ $PTR_ADD $rcon,$Tbl,256
.set reorder
lwl $rk0,0+$MSB($inp) # load 128 bits
$PTR_ADD $i1,$Tbl
$PTR_ADD $i2,$Tbl
$PTR_ADD $i3,$Tbl
- lbu $i0,1024($i0)
- lbu $i1,1024($i1)
- lbu $i2,1024($i2)
- lbu $i3,1024($i3)
+ lbu $i0,0($i0)
+ lbu $i1,0($i1)
+ lbu $i2,0($i2)
+ lbu $i3,0($i3)
sw $rk0,0($key)
sw $rk1,4($key)
$PTR_ADD $i1,$Tbl
$PTR_ADD $i2,$Tbl
$PTR_ADD $i3,$Tbl
- lbu $i0,1024($i0)
- lbu $i1,1024($i1)
- lbu $i2,1024($i2)
- lbu $i3,1024($i3)
+ lbu $i0,0($i0)
+ lbu $i1,0($i1)
+ lbu $i2,0($i2)
+ lbu $i3,0($i3)
sw $rk0,0($key)
sw $rk1,4($key)
$PTR_ADD $i1,$Tbl
$PTR_ADD $i2,$Tbl
$PTR_ADD $i3,$Tbl
- lbu $i0,1024($i0)
- lbu $i1,1024($i1)
- lbu $i2,1024($i2)
- lbu $i3,1024($i3)
+ lbu $i0,0($i0)
+ lbu $i1,0($i1)
+ lbu $i2,0($i2)
+ lbu $i3,0($i3)
sw $rk0,0($key)
sw $rk1,4($key)
$PTR_ADD $i1,$Tbl
$PTR_ADD $i2,$Tbl
$PTR_ADD $i3,$Tbl
- lbu $i0,1024($i0)
- lbu $i1,1024($i1)
- lbu $i2,1024($i2)
- lbu $i3,1024($i3)
+ lbu $i0,0($i0)
+ lbu $i1,0($i1)
+ lbu $i2,0($i2)
+ lbu $i3,0($i3)
sll $i0,24
sll $i1,16
sll $i2,8
___
$code.=<<___;
.set reorder
- la $Tbl,AES_Te # PIC-ified 'load address'
+ la $Tbl,AES_Te4 # PIC-ified 'load address'
bal _mips_AES_set_encrypt_key
___
$code.=<<___;
.set reorder
- la $Tbl,AES_Te # PIC-ified 'load address'
+ la $Tbl,AES_Te4 # PIC-ified 'load address'
bal _mips_AES_set_encrypt_key
xor $tpb,$tp9,$tp2
xor $tpd,$tp9,$tp4
+#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
+ rotr $tp1,$tpd,16
+ xor $tpe,$tp2
+ rotr $tp2,$tp9,8
+ xor $tpe,$tp1
+ rotr $tp4,$tpb,24
+ xor $tpe,$tp2
+ lw $tp1,4($key) # modulo-scheduled
+ xor $tpe,$tp4
+#else
_ror $tp1,$tpd,16
xor $tpe,$tp2
_ror $tp2,$tpd,-16
xor $tpe,$tp1
lw $tp1,4($key) # modulo-scheduled
xor $tpe,$tp2
+#endif
sub $cnt,1
sw $tpe,0($key)
$PTR_ADD $key,4
# Tables are kept in endian-neutral manner
$code.=<<___;
.rdata
-.align 6
+.align 10
AES_Te:
.byte 0xc6,0x63,0x63,0xa5, 0xf8,0x7c,0x7c,0x84 # Te0
.byte 0xee,0x77,0x77,0x99, 0xf6,0x7b,0x7b,0x8d
.byte 0x7b,0xb0,0xb0,0xcb, 0xa8,0x54,0x54,0xfc
.byte 0x6d,0xbb,0xbb,0xd6, 0x2c,0x16,0x16,0x3a
-.byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5 # Te4
-.byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
-.byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
-.byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
-.byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
-.byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
-.byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
-.byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
-.byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
-.byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
-.byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
-.byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
-.byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
-.byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
-.byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
-.byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
-.byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
-.byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
-.byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
-.byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
-.byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
-.byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
-.byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
-.byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
-.byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
-.byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
-.byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
-.byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
-.byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
-.byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
-.byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
-.byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
-
-.byte 0x01,0x00,0x00,0x00, 0x02,0x00,0x00,0x00 # rcon
-.byte 0x04,0x00,0x00,0x00, 0x08,0x00,0x00,0x00
-.byte 0x10,0x00,0x00,0x00, 0x20,0x00,0x00,0x00
-.byte 0x40,0x00,0x00,0x00, 0x80,0x00,0x00,0x00
-.byte 0x1B,0x00,0x00,0x00, 0x36,0x00,0x00,0x00
-
-.align 6
AES_Td:
.byte 0x51,0xf4,0xa7,0x50, 0x7e,0x41,0x65,0x53 # Td0
.byte 0x1a,0x17,0xa4,0xc3, 0x3a,0x27,0x5e,0x96
.byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
.byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
.byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
+
+AES_Te4:
+.byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5 # Te4
+.byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
+.byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
+.byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
+.byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
+.byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
+.byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
+.byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
+.byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
+.byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
+.byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
+.byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
+.byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
+.byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
+.byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
+.byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
+.byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
+.byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
+.byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
+.byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
+.byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
+.byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
+.byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
+.byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
+.byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
+.byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
+.byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
+.byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
+.byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
+.byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
+.byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
+.byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
+
+.byte 0x01,0x00,0x00,0x00, 0x02,0x00,0x00,0x00 # rcon
+.byte 0x04,0x00,0x00,0x00, 0x08,0x00,0x00,0x00
+.byte 0x10,0x00,0x00,0x00, 0x20,0x00,0x00,0x00
+.byte 0x40,0x00,0x00,0x00, 0x80,0x00,0x00,0x00
+.byte 0x1B,0x00,0x00,0x00, 0x36,0x00,0x00,0x00
___
\f
foreach (split("\n",$code)) {
s/_ins\s+(\$[0-9]+),(\$[0-9]+),([0-9]+)/
sprintf("sll\t$1,$2,%d",$big_endian ? eval($3)
: eval("24-$3"))/e or
+ s/_ins2\s+(\$[0-9]+),(\$[0-9]+),([0-9]+)/
+ sprintf("ins\t$1,$2,%d,8",$big_endian ? eval($3)
+ : eval("24-$3"))/e or
s/_ror\s+(\$[0-9]+),(\$[0-9]+),(\-?[0-9]+)/
sprintf("srl\t$1,$2,%d",$big_endian ? eval($3)
: eval("$3*-1"))/e or
sprintf("$1%d($3)",eval("$2-$2%4+($2%4+1)&3"))/e;
}
+ s/(rotr\s+\$[0-9]+,\$[0-9]+),([0-9]+)/sprintf("$1,%d",32-$2)/e if(!$big_endian);
+
print $_,"\n";
}