Reviewed-by: Matt Caswell <matt@openssl.org>
Reviewed-by: Richard Levitte <levitte@openssl.org>
(Merged from https://github.com/openssl/openssl/pull/8444)
#
# r=1088(*)
#
-# PPC970/G5 14.6/+120%
-# POWER7 10.3/+100%
-# POWER8 11.5/+85%
-# POWER9 9.4/+45%
+# PPC970/G5 14.0/+130%
+# POWER7 9.7/+110%
+# POWER8 10.6/+100%
+# POWER9 8.2/+66%
#
# (*) Corresponds to SHA3-256. Percentage after slash is improvement
# over gcc-4.x-generated KECCAK_1X_ALT code. Newer compilers do
.type dword_le_load,\@function
.align 5
dword_le_load:
- lbzu r0,1(r3)
- lbzu r4,1(r3)
- lbzu r5,1(r3)
+ lbz r0,1(r3)
+ lbz r4,2(r3)
+ lbz r5,3(r3)
insrdi r0,r4,8,48
- lbzu r4,1(r3)
+ lbz r4,4(r3)
insrdi r0,r5,8,40
- lbzu r5,1(r3)
+ lbz r5,5(r3)
insrdi r0,r4,8,32
- lbzu r4,1(r3)
+ lbz r4,6(r3)
insrdi r0,r5,8,24
- lbzu r5,1(r3)
+ lbz r5,7(r3)
insrdi r0,r4,8,16
- lbzu r4,1(r3)
+ lbzu r4,8(r3)
insrdi r0,r5,8,8
insrdi r0,r4,8,0
blr
${UCMP}i $len,8
blt .Lsqueeze_tail
- stbu r0,1($out)
+ stb r0,1($out)
srdi r0,r0,8
- stbu r0,1($out)
+ stb r0,2($out)
srdi r0,r0,8
- stbu r0,1($out)
+ stb r0,3($out)
srdi r0,r0,8
- stbu r0,1($out)
+ stb r0,4($out)
srdi r0,r0,8
- stbu r0,1($out)
+ stb r0,5($out)
srdi r0,r0,8
- stbu r0,1($out)
+ stb r0,6($out)
srdi r0,r0,8
- stbu r0,1($out)
+ stb r0,7($out)
srdi r0,r0,8
- stbu r0,1($out)
+ stbu r0,8($out)
subic. $len,$len,8
beq .Lsqueeze_done