From e2eabed110b6d88fd15ac3063cab23dcd5090b44 Mon Sep 17 00:00:00 2001 From: Andy Polyakov Date: Tue, 10 Jun 2014 23:08:06 +0200 Subject: [PATCH] aesni-mb-x86_64.pl: add Win64 SEH. --- crypto/aes/asm/aesni-mb-x86_64.pl | 194 +++++++++++++++++++++++++++--- 1 file changed, 177 insertions(+), 17 deletions(-) diff --git a/crypto/aes/asm/aesni-mb-x86_64.pl b/crypto/aes/asm/aesni-mb-x86_64.pl index e6ac9f9ecf..ddfe084cc7 100644 --- a/crypto/aes/asm/aesni-mb-x86_64.pl +++ b/crypto/aes/asm/aesni-mb-x86_64.pl @@ -115,7 +115,7 @@ $code.=<<___; push %r15 ___ $code.=<<___ if ($win64); - lea -0x78(%rsp),%rsp + lea -0xa8(%rsp),%rsp movaps %xmm6,(%rsp) movaps %xmm7,0x10(%rsp) movaps %xmm8,0x20(%rsp) @@ -123,6 +123,9 @@ $code.=<<___ if ($win64); movaps %xmm10,0x40(%rsp) movaps %xmm11,0x50(%rsp) movaps %xmm12,0x60(%rsp) + movaps %xmm13,-0x68(%rax) # not used, saved to share se_handler + movaps %xmm14,-0x58(%rax) + movaps %xmm15,-0x48(%rax) ___ $code.=<<___; # stack layout @@ -323,13 +326,16 @@ $code.=<<___; .Lenc4x_done: ___ $code.=<<___ if ($win64); - movaps -0xa8(%rax),%xmm6 - movaps -0x98(%rax),%xmm7 - movaps -0x88(%rax),%xmm8 - movaps -0x78(%rax),%xmm9 - movaps -0x68(%rax),%xmm10 - movaps -0x58(%rax),%xmm11 - movaps -0x48(%rax),%xmm12 + movaps -0xd8(%rax),%xmm6 + movaps -0xc8(%rax),%xmm7 + movaps -0xb8(%rax),%xmm8 + movaps -0xa8(%rax),%xmm9 + movaps -0x98(%rax),%xmm10 + movaps -0x88(%rax),%xmm11 + movaps -0x78(%rax),%xmm12 + #movaps -0x68(%rax),%xmm13 + #movaps -0x58(%rax),%xmm14 + #movaps -0x48(%rax),%xmm15 ___ $code.=<<___; mov -48(%rax),%r15 @@ -339,6 +345,7 @@ $code.=<<___; mov -16(%rax),%rbp mov -8(%rax),%rbx lea (%rax),%rsp +.Lenc4x_epilogue: ret .size aesni_multi_cbc_encrypt,.-aesni_multi_cbc_encrypt @@ -367,7 +374,7 @@ $code.=<<___; push %r15 ___ $code.=<<___ if ($win64); - lea -0x78(%rsp),%rsp + lea -0xa8(%rsp),%rsp movaps %xmm6,(%rsp) movaps %xmm7,0x10(%rsp) movaps %xmm8,0x20(%rsp) @@ -375,6 +382,9 @@ $code.=<<___ if ($win64); movaps %xmm10,0x40(%rsp) movaps %xmm11,0x50(%rsp) movaps %xmm12,0x60(%rsp) + movaps %xmm13,-0x68(%rax) # not used, saved to share se_handler + movaps %xmm14,-0x58(%rax) + movaps %xmm15,-0x48(%rax) ___ $code.=<<___; # stack layout @@ -451,8 +461,8 @@ $code.=<<___; cmp `32+4*$i`(%rsp),$one aesdec $rndkey,@out[0] aesdec $rndkey,@out[1] - cmovge $sink,@inptr[$i] # cancel input aesdec $rndkey,@out[2] + cmovge $sink,@inptr[$i] # cancel input cmovg $sink,@outptr[$i] # sink output aesdec $rndkey,@out[3] movups `0x40+16*$i-0x78`($key),$rndkey @@ -566,13 +576,16 @@ $code.=<<___; .Ldec4x_done: ___ $code.=<<___ if ($win64); - movaps -0xa8(%rax),%xmm6 - movaps -0x98(%rax),%xmm7 - movaps -0x88(%rax),%xmm8 - movaps -0x78(%rax),%xmm9 - movaps -0x68(%rax),%xmm10 - movaps -0x58(%rax),%xmm11 - movaps -0x48(%rax),%xmm12 + movaps -0xd8(%rax),%xmm6 + movaps -0xc8(%rax),%xmm7 + movaps -0xb8(%rax),%xmm8 + movaps -0xa8(%rax),%xmm9 + movaps -0x98(%rax),%xmm10 + movaps -0x88(%rax),%xmm11 + movaps -0x78(%rax),%xmm12 + #movaps -0x68(%rax),%xmm13 + #movaps -0x58(%rax),%xmm14 + #movaps -0x48(%rax),%xmm15 ___ $code.=<<___; mov -48(%rax),%r15 @@ -582,6 +595,7 @@ $code.=<<___; mov -16(%rax),%rbp mov -8(%rax),%rbx lea (%rax),%rsp +.Ldec4x_epilogue: ret .size aesni_multi_cbc_decrypt,.-aesni_multi_cbc_decrypt ___ @@ -864,6 +878,7 @@ $code.=<<___; mov -16(%rax),%rbp mov -8(%rax),%rbx lea (%rax),%rsp +.Lenc8x_epilogue: ret .size aesni_multi_cbc_encrypt_avx,.-aesni_multi_cbc_encrypt_avx @@ -1169,11 +1184,156 @@ $code.=<<___; mov -16(%rax),%rbp mov -8(%rax),%rbx lea (%rax),%rsp +.Ldec8x_epilogue: ret .size aesni_multi_cbc_decrypt_avx,.-aesni_multi_cbc_decrypt_avx ___ }}} +if ($win64) { +# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame, +# CONTEXT *context,DISPATCHER_CONTEXT *disp) +$rec="%rcx"; +$frame="%rdx"; +$context="%r8"; +$disp="%r9"; + +$code.=<<___; +.extern __imp_RtlVirtualUnwind +.type se_handler,\@abi-omnipotent +.align 16 +se_handler: + push %rsi + push %rdi + push %rbx + push %rbp + push %r12 + push %r13 + push %r14 + push %r15 + pushfq + sub \$64,%rsp + + mov 120($context),%rax # pull context->Rax + mov 248($context),%rbx # pull context->Rip + + mov 8($disp),%rsi # disp->ImageBase + mov 56($disp),%r11 # disp->HandlerData + + mov 0(%r11),%r10d # HandlerData[0] + lea (%rsi,%r10),%r10 # prologue label + cmp %r10,%rbx # context->Rip<.Lprologue + jb .Lin_prologue + + mov 152($context),%rax # pull context->Rsp + + mov 4(%r11),%r10d # HandlerData[1] + lea (%rsi,%r10),%r10 # epilogue label + cmp %r10,%rbx # context->Rip>=.Lepilogue + jae .Lin_prologue + + mov 16(%rax),%rax # pull saved stack pointer + + mov -8(%rax),%rbx + mov -16(%rax),%rbp + mov -24(%rax),%r12 + mov -32(%rax),%r13 + mov -40(%rax),%r14 + mov -48(%rax),%r15 + mov %rbx,144($context) # restore context->Rbx + mov %rbp,160($context) # restore context->Rbp + mov %r12,216($context) # restore cotnext->R12 + mov %r13,224($context) # restore cotnext->R13 + mov %r14,232($context) # restore cotnext->R14 + mov %r15,240($context) # restore cotnext->R15 + + lea -56-10*16(%rax),%rsi + lea 512($context),%rdi # &context.Xmm6 + mov \$20,%ecx + .long 0xa548f3fc # cld; rep movsq + +.Lin_prologue: + mov 8(%rax),%rdi + mov 16(%rax),%rsi + mov %rax,152($context) # restore context->Rsp + mov %rsi,168($context) # restore context->Rsi + mov %rdi,176($context) # restore context->Rdi + + mov 40($disp),%rdi # disp->ContextRecord + mov $context,%rsi # context + mov \$154,%ecx # sizeof(CONTEXT) + .long 0xa548f3fc # cld; rep movsq + + mov $disp,%rsi + xor %rcx,%rcx # arg1, UNW_FLAG_NHANDLER + mov 8(%rsi),%rdx # arg2, disp->ImageBase + mov 0(%rsi),%r8 # arg3, disp->ControlPc + mov 16(%rsi),%r9 # arg4, disp->FunctionEntry + mov 40(%rsi),%r10 # disp->ContextRecord + lea 56(%rsi),%r11 # &disp->HandlerData + lea 24(%rsi),%r12 # &disp->EstablisherFrame + mov %r10,32(%rsp) # arg5 + mov %r11,40(%rsp) # arg6 + mov %r12,48(%rsp) # arg7 + mov %rcx,56(%rsp) # arg8, (NULL) + call *__imp_RtlVirtualUnwind(%rip) + + mov \$1,%eax # ExceptionContinueSearch + add \$64,%rsp + popfq + pop %r15 + pop %r14 + pop %r13 + pop %r12 + pop %rbp + pop %rbx + pop %rdi + pop %rsi + ret +.size se_handler,.-se_handler + +.section .pdata +.align 4 + .rva .LSEH_begin_aesni_multi_cbc_encrypt + .rva .LSEH_end_aesni_multi_cbc_encrypt + .rva .LSEH_info_aesni_multi_cbc_encrypt + .rva .LSEH_begin_aesni_multi_cbc_decrypt + .rva .LSEH_end_aesni_multi_cbc_decrypt + .rva .LSEH_info_aesni_multi_cbc_decrypt +___ +$code.=<<___ if ($avx); + .rva .LSEH_begin_aesni_multi_cbc_encrypt_avx + .rva .LSEH_end_aesni_multi_cbc_encrypt_avx + .rva .LSEH_info_aesni_multi_cbc_encrypt_avx + .rva .LSEH_begin_aesni_multi_cbc_decrypt_avx + .rva .LSEH_end_aesni_multi_cbc_decrypt_avx + .rva .LSEH_info_aesni_multi_cbc_decrypt_avx +___ +$code.=<<___; +.section .xdata +.align 8 +.LSEH_info_aesni_multi_cbc_encrypt: + .byte 9,0,0,0 + .rva se_handler + .rva .Lenc4x_body,.Lenc4x_epilogue # HandlerData[] +.LSEH_info_aesni_multi_cbc_decrypt: + .byte 9,0,0,0 + .rva se_handler + .rva .Ldec4x_body,.Ldec4x_epilogue # HandlerData[] +___ +$code.=<<___ if ($avx); +.LSEH_info_aesni_multi_cbc_encrypt_avx: + .byte 9,0,0,0 + .rva se_handler + .rva .Lenc8x_body,.Lenc8x_epilogue # HandlerData[] +.LSEH_info_aesni_multi_cbc_decrypt_avx: + .byte 9,0,0,0 + .rva se_handler + .rva .Ldec8x_body,.Ldec8x_epilogue # HandlerData[] +___ +} +#################################################################### + sub rex { local *opcode=shift; my ($dst,$src)=@_; -- 2.25.1