+ beq cr1, Lcbc_dec_loop # $out is aligned
+
+ vmr v0, $inptail
+ lvx $inptail, 0, $inp
+ addi $inp, $inp, 16
+ ?vperm v0, v0, $inptail, $inpperm
+ vmr v25, v0 # put aside input
+
+ bl _vpaes_decrypt_core
+
+ andi. r8, $out, 15
+ vxor v0, v0, v24 # ^= iv
+ vmr v24, v25
+ sub r9, $out, r8
+ vperm $outhead, v0, v0, $outperm # rotate right/left
+
+Lcbc_dec_head:
+ stvebx $outhead, r8, r9
+ cmpwi r8, 15
+ addi r8, r8, 1
+ bne Lcbc_dec_head
+
+ sub. r30, r30, r0 # len -= 16
+ addi $out, $out, 16
+ beq Lcbc_unaligned_done
+