rk0=r8; rk1=r9;
-prsave=r10;
+pfssave=r2;
+lcsave=r10;
+prsave=r3;
maskff=r11;
twenty4=r14;
sixteen=r15;
// Clobber: r16-r31,rk0-rk1,r32-r43
.align 32
_ia64_AES_encrypt:
+ .prologue
+ .altrp b6
+ .body
{ .mmi; alloc r16=ar.pfs,12,0,0,8
LDKEY t0=[rk0],2*KSZ
mov pr.rot=1<<16 }
.skip 16
AES_encrypt:
.prologue
- .save ar.pfs,r2
-{ .mmi; alloc r2=ar.pfs,3,0,12,0
- addl out8=@ltoff(AES_Te#),gp
- .save ar.lc,r3
- mov r3=ar.lc }
-{ .mmi; and out0=3,in0
- ADDP in0=0,in0
- ADDP out11=KSZ*60,in2 };; // &AES_KEY->rounds
+ .save ar.pfs,pfssave
+{ .mmi; alloc pfssave=ar.pfs,3,0,12,0
+ and out0=3,in0
+ mov r3=ip }
+{ .mmi; ADDP in0=0,in0
+ ADDP out11=KSZ*60,in2 // &AES_KEY->rounds
+ .save ar.lc,lcsave
+ mov lcsave=ar.lc };;
- .body
-{ .mmi; ld8 out8=[out8] // Te0
- ld4 out11=[out11] // AES_KEY->rounds
+{ .mmi; ld4 out11=[out11] // AES_KEY->rounds
+ add out8=(AES_Te#-AES_encrypt#),r3 // Te0
+ .save pr,prsave
mov prsave=pr }
+ .body
#if defined(_HPUX_SOURCE) // HPUX is big-endian, cut 15+15 cycles...
{ .mib; cmp.ne p6,p0=out0,r0
add out0=4,in0
ADDP in1=0,in1
(p6) br.spnt .Le_o_unaligned };;
-{ .mii; mov ar.pfs=r2
- mov ar.lc=r3 }
+{ .mii; mov ar.pfs=psfsave
+ mov ar.lc=lcsave }
{ .mmi; st4 [in1]=r16,8 // s0
st4 [in0]=r20,8 // s1
mov pr=prsave,0x1ffff };;
mov pr=prsave,0x1ffff }//;;
{ .mmi; st1 [out1]=r26,4
st1 [out0]=r27,4
- mov ar.pfs=r2 };;
+ mov ar.pfs=pfssave };;
{ .mmi; st1 [out3]=r28
st1 [out2]=r29
- mov ar.lc=r3 }//;;
+ mov ar.lc=lcsave }//;;
{ .mmb; st1 [out1]=r30
st1 [out0]=r31
br.ret.sptk.many b0 };;
// Clobber: r16-r31,rk0-rk1,r32-r43
.align 32
_ia64_AES_decrypt:
+ .prologue
+ .altrp b6
+ .body
{ .mmi; alloc r16=ar.pfs,12,0,0,8
LDKEY t0=[rk0],2*KSZ
mov pr.rot=1<<16 }
.skip 16
AES_decrypt:
.prologue
- .save ar.pfs,r2
-{ .mmi; alloc r2=ar.pfs,3,0,12,0
- addl out8=@ltoff(AES_Td#),gp
- .save ar.lc,r3
- mov r3=ar.lc }
-{ .mmi; and out0=3,in0
- ADDP in0=0,in0
- ADDP out11=KSZ*60,in2 };; // &AES_KEY->rounds
+ .save ar.pfs,pfssave
+{ .mmi; alloc pfssave=ar.pfs,3,0,12,0
+ and out0=3,in0
+ mov r3=ip }
+{ .mmi; ADDP in0=0,in0
+ ADDP out11=KSZ*60,in2 // &AES_KEY->rounds
+ .save ar.lc,lcsave
+ mov lcsave=ar.lc };;
- .body
-{ .mmi; ld8 out8=[out8] // Te0
- ld4 out11=[out11] // AES_KEY->rounds
+{ .mmi; ld4 out11=[out11] // AES_KEY->rounds
+ add out8=(AES_Td#-AES_decrypt#),r3 // Td0
+ .save pr,prsave
mov prsave=pr }
+ .body
#if defined(_HPUX_SOURCE) // HPUX is big-endian, cut 15+15 cycles...
{ .mib; cmp.ne p6,p0=out0,r0
add out0=4,in0
ADDP in1=0,in1
(p6) br.spnt .Ld_o_unaligned };;
-{ .mii; mov ar.pfs=r2
- mov ar.lc=r3 }
+{ .mii; mov ar.pfs=pfssave
+ mov ar.lc=lcsave }
{ .mmi; st4 [in1]=r16,8 // s0
st4 [in0]=r20,8 // s1
mov pr=prsave,0x1ffff };;
mov pr=prsave,0x1ffff }//;;
{ .mmi; st1 [out1]=r26,4
st1 [out0]=r27,4
- mov ar.pfs=r2 };;
+ mov ar.pfs=pfssave };;
{ .mmi; st1 [out3]=r28
st1 [out2]=r29
- mov ar.lc=r3 }//;;
+ mov ar.lc=lcsave }//;;
{ .mmb; st1 [out1]=r30
st1 [out0]=r31
br.ret.sptk.many b0 };;
.explicit
.text
+pfssave=r2;
+lcsave=r3;
prsave=r14;
K=r15;
A=r16; B=r17; C=r18; D=r19;
.align 32
$func:
.prologue
- .save ar.pfs,r2
-{ .mmi; alloc r2=ar.pfs,3,17,0,16
+ .save ar.pfs,pfssave
+{ .mmi; alloc pfssave=ar.pfs,3,17,0,16
$ADDP ctx=0,r32 // 1st arg
- .save ar.lc,r3
- mov r3=ar.lc }
+ .save ar.lc,lcsave
+ mov lcsave=ar.lc }
{ .mmi; $ADDP input=0,r33 // 2nd arg
- addl Ktbl=\@ltoff($TABLE#),gp
+ mov num=r34 // 3rd arg
.save pr,prsave
mov prsave=pr };;
.body
-{ .mii; ld8 Ktbl=[Ktbl]
- mov num=r34 };; // 3rd arg
-
{ .mib; add r8=0*$SZ,ctx
add r9=1*$SZ,ctx
brp.loop.imp .L_first16,.L_first16_ctop
brp.loop.imp .L_rest,.L_rest_ctop
};;
// load A-H
+.Lpic_point:
{ .mmi; $LDW A=[r8],4*$SZ
$LDW B=[r9],4*$SZ
- mov sgm0=$sigma0[2] }
+ mov Ktbl=ip }
{ .mmi; $LDW C=[r10],4*$SZ
$LDW D=[r11],4*$SZ
- mov sgm1=$sigma1[2] };;
+ mov sgm0=$sigma0[2] };;
{ .mmi; $LDW E=[r8]
- $LDW F=[r9] }
+ $LDW F=[r9]
+ add Ktbl=($TABLE#-.Lpic_point),Ktbl }
{ .mmi; $LDW G=[r10]
$LDW H=[r11]
cmp.ne p15,p14=0,r35 };; // used in sha256_block
.L_outer:
-{ .mii; mov ar.lc=15
+{ .mii; mov sgm1=$sigma1[2]
+ mov ar.lc=15
mov ar.ec=1 };;
.align 32
.L_first16:
(p6) add Ktbl=-$SZ*$rounds,Ktbl }
{ .mmi; $LDW r38=[r10],-4*$SZ
$LDW r39=[r11],-4*$SZ
-(p7) mov ar.lc=r3 };;
+(p7) mov ar.lc=lcsave };;
{ .mmi; add A=A,r32
add B=B,r33
add C=C,r34 }