.type sha1_multi_block,\@function,3
.align 32
sha1_multi_block:
+.cfi_startproc
mov OPENSSL_ia32cap_P+4(%rip),%rcx
bt \$61,%rcx # check SHA bit
jc _shaext_shortcut
___
$code.=<<___;
mov %rsp,%rax
+.cfi_def_cfa_register %rax
push %rbx
+.cfi_push %rbx
push %rbp
+.cfi_push %rbx
___
$code.=<<___ if ($win64);
lea -0xa8(%rsp),%rsp
sub \$`$REG_SZ*18`,%rsp
and \$-256,%rsp
mov %rax,`$REG_SZ*17`(%rsp) # original %rsp
+.cfi_cfa_expression %rsp+`$REG_SZ*17`,deref,+8
.Lbody:
lea K_XX_XX(%rip),$Tbl
lea `$REG_SZ*16`(%rsp),%rbx
.Ldone:
mov `$REG_SZ*17`(%rsp),%rax # original %rsp
+.cfi_def_cfa %rax,8
___
$code.=<<___ if ($win64);
movaps -0xb8(%rax),%xmm6
___
$code.=<<___;
mov -16(%rax),%rbp
+.cfi_restore %rbp
mov -8(%rax),%rbx
+.cfi_restore %rbx
lea (%rax),%rsp
+.cfi_def_cfa_register %rsp
.Lepilogue:
ret
+.cfi_endproc
.size sha1_multi_block,.-sha1_multi_block
___
{{{
.type sha1_multi_block_shaext,\@function,3
.align 32
sha1_multi_block_shaext:
+.cfi_startproc
_shaext_shortcut:
mov %rsp,%rax
+.cfi_def_cfa_register %rax
push %rbx
+.cfi_push %rbx
push %rbp
+.cfi_push %rbp
___
$code.=<<___ if ($win64);
lea -0xa8(%rsp),%rsp
___
$code.=<<___;
mov -16(%rax),%rbp
+.cfi_restore %rbp
mov -8(%rax),%rbx
+.cfi_restore %rbx
lea (%rax),%rsp
+.cfi_def_cfa_register %rsp
.Lepilogue_shaext:
ret
+.cfi_endproc
.size sha1_multi_block_shaext,.-sha1_multi_block_shaext
___
}}}
.type sha1_multi_block_avx,\@function,3
.align 32
sha1_multi_block_avx:
+.cfi_startproc
_avx_shortcut:
___
$code.=<<___ if ($avx>1);
___
$code.=<<___;
mov %rsp,%rax
+.cfi_def_cfa_register %rax
push %rbx
+.cfi_push %rbx
push %rbp
+.cfi_push %rbp
___
$code.=<<___ if ($win64);
lea -0xa8(%rsp),%rsp
sub \$`$REG_SZ*18`, %rsp
and \$-256,%rsp
mov %rax,`$REG_SZ*17`(%rsp) # original %rsp
+.cfi_cfa_expression %rsp+`$REG_SZ*17`,deref,+8
.Lbody_avx:
lea K_XX_XX(%rip),$Tbl
lea `$REG_SZ*16`(%rsp),%rbx
.Ldone_avx:
mov `$REG_SZ*17`(%rsp),%rax # original %rsp
+.cfi_def_cfa %rax,8
vzeroupper
___
$code.=<<___ if ($win64);
___
$code.=<<___;
mov -16(%rax),%rbp
+.cfi_restore %rbp
mov -8(%rax),%rbx
+.cfi_restore %rbx
lea (%rax),%rsp
+.cfi_def_cfa_register %rsp
.Lepilogue_avx:
ret
+.cfi_endproc
.size sha1_multi_block_avx,.-sha1_multi_block_avx
___
.type sha1_multi_block_avx2,\@function,3
.align 32
sha1_multi_block_avx2:
+.cfi_startproc
_avx2_shortcut:
mov %rsp,%rax
+.cfi_def_cfa_register %rax
push %rbx
+.cfi_push %rbx
push %rbp
+.cfi_push %rbp
push %r12
+.cfi_push %r12
push %r13
+.cfi_push %r13
push %r14
+.cfi_push %r14
push %r15
+.cfi_push %r15
___
$code.=<<___ if ($win64);
lea -0xa8(%rsp),%rsp
sub \$`$REG_SZ*18`, %rsp
and \$-256,%rsp
mov %rax,`$REG_SZ*17`(%rsp) # original %rsp
+.cfi_cfa_expression %rsp+`$REG_SZ*17`,deref,+8
.Lbody_avx2:
lea K_XX_XX(%rip),$Tbl
shr \$1,$num
.Ldone_avx2:
mov `$REG_SZ*17`(%rsp),%rax # original %rsp
+.cfi_def_cfa %rax,8
vzeroupper
___
$code.=<<___ if ($win64);
___
$code.=<<___;
mov -48(%rax),%r15
+.cfi_restore %r15
mov -40(%rax),%r14
+.cfi_restore %r14
mov -32(%rax),%r13
+.cfi_restore %r13
mov -24(%rax),%r12
+.cfi_restore %r12
mov -16(%rax),%rbp
+.cfi_restore %rbp
mov -8(%rax),%rbx
+.cfi_restore %rbx
lea (%rax),%rsp
+.cfi_def_cfa_register %rsp
.Lepilogue_avx2:
ret
+.cfi_endproc
.size sha1_multi_block_avx2,.-sha1_multi_block_avx2
___
} }}}
.type sha256_multi_block,\@function,3
.align 32
sha256_multi_block:
+.cfi_startproc
mov OPENSSL_ia32cap_P+4(%rip),%rcx
bt \$61,%rcx # check SHA bit
jc _shaext_shortcut
___
$code.=<<___;
mov %rsp,%rax
+.cfi_def_cfa_register %rax
push %rbx
+.cfi_push %rbx
push %rbp
+.cfi_push %rbp
___
$code.=<<___ if ($win64);
lea -0xa8(%rsp),%rsp
sub \$`$REG_SZ*18`, %rsp
and \$-256,%rsp
mov %rax,`$REG_SZ*17`(%rsp) # original %rsp
+.cfi_cfa_expression %rsp+`$REG_SZ*17`,deref,+8
.Lbody:
lea K256+128(%rip),$Tbl
lea `$REG_SZ*16`(%rsp),%rbx
.Ldone:
mov `$REG_SZ*17`(%rsp),%rax # original %rsp
+.cfi_def_cfa %rax,8
___
$code.=<<___ if ($win64);
movaps -0xb8(%rax),%xmm6
___
$code.=<<___;
mov -16(%rax),%rbp
+.cfi_restore %rbp
mov -8(%rax),%rbx
+.cfi_restore %rbx
lea (%rax),%rsp
+.cfi_def_cfa_register %rsp
.Lepilogue:
ret
+.cfi_endproc
.size sha256_multi_block,.-sha256_multi_block
___
{{{
.type sha256_multi_block_shaext,\@function,3
.align 32
sha256_multi_block_shaext:
+.cfi_startproc
_shaext_shortcut:
mov %rsp,%rax
+.cfi_def_cfa_register %rax
push %rbx
+.cfi_push %rbx
push %rbp
+.cfi_push %rbp
___
$code.=<<___ if ($win64);
lea -0xa8(%rsp),%rsp
___
$code.=<<___;
mov -16(%rax),%rbp
+.cfi_restore %rbp
mov -8(%rax),%rbx
+.cfi_restore %rbx
lea (%rax),%rsp
+.cfi_def_cfa_register %rsp
.Lepilogue_shaext:
ret
+.cfi_endproc
.size sha256_multi_block_shaext,.-sha256_multi_block_shaext
___
}}}
.type sha256_multi_block_avx,\@function,3
.align 32
sha256_multi_block_avx:
+.cfi_startproc
_avx_shortcut:
___
$code.=<<___ if ($avx>1);
___
$code.=<<___;
mov %rsp,%rax
+.cfi_def_cfa_register %rax
push %rbx
+.cfi_push %rbx
push %rbp
+.cfi_push %rbp
___
$code.=<<___ if ($win64);
lea -0xa8(%rsp),%rsp
sub \$`$REG_SZ*18`, %rsp
and \$-256,%rsp
mov %rax,`$REG_SZ*17`(%rsp) # original %rsp
+.cfi_cfa_expression %rsp+`$REG_SZ*17`,deref,+8
.Lbody_avx:
lea K256+128(%rip),$Tbl
lea `$REG_SZ*16`(%rsp),%rbx
.Ldone_avx:
mov `$REG_SZ*17`(%rsp),%rax # original %rsp
+.cfi_def_cfa %rax,8
vzeroupper
___
$code.=<<___ if ($win64);
___
$code.=<<___;
mov -16(%rax),%rbp
+.cfi_restore %rbp
mov -8(%rax),%rbx
+.cfi_restore %rbx
lea (%rax),%rsp
+.cfi_def_cfa_register %rsp
.Lepilogue_avx:
ret
+.cfi_endproc
.size sha256_multi_block_avx,.-sha256_multi_block_avx
___
if ($avx>1) {
.type sha256_multi_block_avx2,\@function,3
.align 32
sha256_multi_block_avx2:
+.cfi_startproc
_avx2_shortcut:
mov %rsp,%rax
+.cfi_def_cfa_register %rax
push %rbx
+.cfi_push %rbx
push %rbp
+.cfi_push %rbp
push %r12
+.cfi_push %r12
push %r13
+.cfi_push %r13
push %r14
+.cfi_push %r14
push %r15
+.cfi_push %r15
___
$code.=<<___ if ($win64);
lea -0xa8(%rsp),%rsp
sub \$`$REG_SZ*18`, %rsp
and \$-256,%rsp
mov %rax,`$REG_SZ*17`(%rsp) # original %rsp
+.cfi_cfa_expression %rsp+`$REG_SZ*17`,deref,+8
.Lbody_avx2:
lea K256+128(%rip),$Tbl
lea 0x80($ctx),$ctx # size optimization
.Ldone_avx2:
mov `$REG_SZ*17`(%rsp),%rax # original %rsp
+.cfi_def_cfa %rax,8
vzeroupper
___
$code.=<<___ if ($win64);
___
$code.=<<___;
mov -48(%rax),%r15
+.cfi_restore %r15
mov -40(%rax),%r14
+.cfi_restore %r14
mov -32(%rax),%r13
+.cfi_restore %r13
mov -24(%rax),%r12
+.cfi_restore %r12
mov -16(%rax),%rbp
+.cfi_restore %rbp
mov -8(%rax),%rbx
+.cfi_restore %rbx
lea (%rax),%rsp
+.cfi_def_cfa_register %rsp
.Lepilogue_avx2:
ret
+.cfi_endproc
.size sha256_multi_block_avx2,.-sha256_multi_block_avx2
___
} }}}
$_ctx="16*$SZ+0*8(%rsp)";
$_inp="16*$SZ+1*8(%rsp)";
$_end="16*$SZ+2*8(%rsp)";
-$_rsp="16*$SZ+3*8(%rsp)";
+$_rsp="`16*$SZ+3*8`(%rsp)";
$framesz="16*$SZ+4*8";
.type $func,\@function,3
.align 16
$func:
+.cfi_startproc
___
$code.=<<___ if ($SZ==4 || $avx);
lea OPENSSL_ia32cap_P(%rip),%r11
___
$code.=<<___;
mov %rsp,%rax # copy %rsp
+.cfi_def_cfa_register %rax
push %rbx
+.cfi_push %rbx
push %rbp
+.cfi_push %rbp
push %r12
+.cfi_push %r12
push %r13
+.cfi_push %r13
push %r14
+.cfi_push %r14
push %r15
+.cfi_push %r15
shl \$4,%rdx # num*16
sub \$$framesz,%rsp
lea ($inp,%rdx,$SZ),%rdx # inp+num*16*$SZ
mov $inp,$_inp # save inp, 2nd arh
mov %rdx,$_end # save end pointer, "3rd" arg
mov %rax,$_rsp # save copy of %rsp
+.cfi_cfa_expression $_rsp,deref,+8
.Lprologue:
mov $SZ*0($ctx),$A
jb .Lloop
mov $_rsp,%rsi
+.cfi_def_cfa %rsi,8
mov -48(%rsi),%r15
+.cfi_restore %r15
mov -40(%rsi),%r14
+.cfi_restore %r14
mov -32(%rsi),%r13
+.cfi_restore %r13
mov -24(%rsi),%r12
+.cfi_restore %r12
mov -16(%rsi),%rbp
+.cfi_restore %rbp
mov -8(%rsi),%rbx
+.cfi_restore %rbx
lea (%rsi),%rsp
+.cfi_def_cfa_register %rsp
.Lepilogue:
ret
+.cfi_endproc
.size $func,.-$func
___
.type ${func}_ssse3,\@function,3
.align 64
${func}_ssse3:
+.cfi_startproc
.Lssse3_shortcut:
mov %rsp,%rax # copy %rsp
+.cfi_def_cfa_register %rax
push %rbx
+.cfi_push %rbx
push %rbp
+.cfi_push %rbp
push %r12
+.cfi_push %r12
push %r13
+.cfi_push %r13
push %r14
+.cfi_push %r14
push %r15
+.cfi_push %r15
shl \$4,%rdx # num*16
sub \$`$framesz+$win64*16*4`,%rsp
lea ($inp,%rdx,$SZ),%rdx # inp+num*16*$SZ
mov $inp,$_inp # save inp, 2nd arh
mov %rdx,$_end # save end pointer, "3rd" arg
mov %rax,$_rsp # save copy of %rsp
+.cfi_cfa_expression $_rsp,deref,+8
___
$code.=<<___ if ($win64);
movaps %xmm6,16*$SZ+32(%rsp)
jb .Lloop_ssse3
mov $_rsp,%rsi
+.cfi_def_cfa %rsi,8
___
$code.=<<___ if ($win64);
movaps 16*$SZ+32(%rsp),%xmm6
___
$code.=<<___;
mov -48(%rsi),%r15
+.cfi_restore %r15
mov -40(%rsi),%r14
+.cfi_restore %r14
mov -32(%rsi),%r13
+.cfi_restore %r13
mov -24(%rsi),%r12
+.cfi_restore %r12
mov -16(%rsi),%rbp
+.cfi_restore %rbp
mov -8(%rsi),%rbx
+.cfi_restore %rbx
lea (%rsi),%rsp
+.cfi_def_cfa_register %rsp
.Lepilogue_ssse3:
ret
+.cfi_endproc
.size ${func}_ssse3,.-${func}_ssse3
___
}
.type ${func}_xop,\@function,3
.align 64
${func}_xop:
+.cfi_startproc
.Lxop_shortcut:
mov %rsp,%rax # copy %rsp
+.cfi_def_cfa_register %rax
push %rbx
+.cfi_push %rbx
push %rbp
+.cfi_push %rbp
push %r12
+.cfi_push %r12
push %r13
+.cfi_push %r13
push %r14
+.cfi_push %r14
push %r15
+.cfi_push %r15
shl \$4,%rdx # num*16
sub \$`$framesz+$win64*16*($SZ==4?4:6)`,%rsp
lea ($inp,%rdx,$SZ),%rdx # inp+num*16*$SZ
mov $inp,$_inp # save inp, 2nd arh
mov %rdx,$_end # save end pointer, "3rd" arg
mov %rax,$_rsp # save copy of %rsp
+.cfi_cfa_expression $_rsp,deref,+8
___
$code.=<<___ if ($win64);
movaps %xmm6,16*$SZ+32(%rsp)
jb .Lloop_xop
mov $_rsp,%rsi
+.cfi_def_cfa %rsi,8
vzeroupper
___
$code.=<<___ if ($win64);
___
$code.=<<___;
mov -48(%rsi),%r15
+.cfi_restore %r15
mov -40(%rsi),%r14
+.cfi_restore %r14
mov -32(%rsi),%r13
+.cfi_restore %r13
mov -24(%rsi),%r12
+.cfi_restore %r12
mov -16(%rsi),%rbp
+.cfi_restore %rbp
mov -8(%rsi),%rbx
+.cfi_restore %rbx
lea (%rsi),%rsp
+.cfi_def_cfa_register %rsp
.Lepilogue_xop:
ret
+.cfi_endproc
.size ${func}_xop,.-${func}_xop
___
}
.type ${func}_avx,\@function,3
.align 64
${func}_avx:
+.cfi_startproc
.Lavx_shortcut:
mov %rsp,%rax # copy %rsp
+.cfi_def_cfa_register %rax
push %rbx
+.cfi_push %rbx
push %rbp
+.cfi_push %rbp
push %r12
+.cfi_push %r12
push %r13
+.cfi_push %r13
push %r14
+.cfi_push %r14
push %r15
+.cfi_push %r15
shl \$4,%rdx # num*16
sub \$`$framesz+$win64*16*($SZ==4?4:6)`,%rsp
lea ($inp,%rdx,$SZ),%rdx # inp+num*16*$SZ
mov $inp,$_inp # save inp, 2nd arh
mov %rdx,$_end # save end pointer, "3rd" arg
mov %rax,$_rsp # save copy of %rsp
+.cfi_cfa_expression $_rsp,deref,+8
___
$code.=<<___ if ($win64);
movaps %xmm6,16*$SZ+32(%rsp)
jb .Lloop_avx
mov $_rsp,%rsi
+.cfi_def_cfa %rsi,8
vzeroupper
___
$code.=<<___ if ($win64);
___
$code.=<<___;
mov -48(%rsi),%r15
+.cfi_restore %r15
mov -40(%rsi),%r14
+.cfi_restore %r14
mov -32(%rsi),%r13
+.cfi_restore %r13
mov -24(%rsi),%r12
+.cfi_restore %r12
mov -16(%rsi),%rbp
+.cfi_restore %rbp
mov -8(%rsi),%rbx
+.cfi_restore %rbx
lea (%rsi),%rsp
+.cfi_def_cfa_register %rsp
.Lepilogue_avx:
ret
+.cfi_endproc
.size ${func}_avx,.-${func}_avx
___
.type ${func}_avx2,\@function,3
.align 64
${func}_avx2:
+.cfi_startproc
.Lavx2_shortcut:
mov %rsp,%rax # copy %rsp
+.cfi_def_cfa_register %rax
push %rbx
+.cfi_push %rbx
push %rbp
+.cfi_push %rbp
push %r12
+.cfi_push %r12
push %r13
+.cfi_push %r13
push %r14
+.cfi_push %r14
push %r15
+.cfi_push %r15
sub \$`2*$SZ*$rounds+4*8+$win64*16*($SZ==4?4:6)`,%rsp
shl \$4,%rdx # num*16
and \$-256*$SZ,%rsp # align stack frame
mov $inp,$_inp # save inp, 2nd arh
mov %rdx,$_end # save end pointer, "3rd" arg
mov %rax,$_rsp # save copy of %rsp
+.cfi_cfa_expression $_rsp,deref,+8
___
$code.=<<___ if ($win64);
movaps %xmm6,16*$SZ+32(%rsp)
.Ldone_avx2:
lea ($Tbl),%rsp
mov $_rsp,%rsi
+.cfi_def_cfa %rsi,8
vzeroupper
___
$code.=<<___ if ($win64);
___
$code.=<<___;
mov -48(%rsi),%r15
+.cfi_restore %r15
mov -40(%rsi),%r14
+.cfi_restore %r14
mov -32(%rsi),%r13
+.cfi_restore %r13
mov -24(%rsi),%r12
+.cfi_restore %r12
mov -16(%rsi),%rbp
+.cfi_restore %rbp
mov -8(%rsi),%rbx
+.cfi_restore %rbx
lea (%rsi),%rsp
+.cfi_def_cfa_register %rsp
.Lepilogue_avx2:
ret
+.cfi_endproc
.size ${func}_avx2,.-${func}_avx2
___
}}