SPARCv9 assembler pack: refine CPU detection on Linux, fix for "unaligned
[oweals/openssl.git] / crypto / sha / asm / sha1-x86_64.pl
index f3cb1347fb08ccd12b83734df4f8a00147c3f38e..4edc5ea9ad5562a450073eeea2ec1964f37b01b5 100755 (executable)
 #
 #              gcc 3.4         32-bit asm      cycles/byte
 # Opteron      +45%            +20%            6.8
-# Xeon         +65%            +0%             9.9
+# Xeon P4      +65%            +0%             9.9
+# Core2                +60%            +10%            7.0
 
-$output=shift;
+$flavour = shift;
+$output  = shift;
+if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
+
+$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
 
 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
 ( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
 ( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
 die "can't locate x86_64-xlate.pl";
 
-open STDOUT,"| $^X $xlate $output";
+open STDOUT,"| $^X $xlate $flavour $output";
 
 $ctx="%rdi";   # 1st arg
 $inp="%rsi";   # 2nd arg
@@ -68,13 +73,14 @@ $func:
        push    %rbx
        push    %rbp
        push    %r12
-       mov     %rsp,%rax
+       mov     %rsp,%r11
        mov     %rdi,$ctx       # reassigned argument
        sub     \$`8+16*4`,%rsp
        mov     %rsi,$inp       # reassigned argument
        and     \$-64,%rsp
        mov     %rdx,$num       # reassigned argument
-       mov     %rax,`16*4`(%rsp)
+       mov     %r11,`16*4`(%rsp)
+.Lprologue:
 
        mov     0($ctx),$A
        mov     4($ctx),$B
@@ -87,10 +93,12 @@ ___
 sub EPILOGUE {
 my $func=shift;
 $code.=<<___;
-       mov     `16*4`(%rsp),%rsp
-       pop     %r12
-       pop     %rbp
-       pop     %rbx
+       mov     `16*4`(%rsp),%rsi
+       mov     (%rsi),%r12
+       mov     8(%rsi),%rbp
+       mov     16(%rsi),%rbx
+       lea     24(%rsi),%rsp
+.Lepilogue:
        ret
 .size  $func,.-$func
 ___
@@ -232,7 +240,109 @@ ___
 &EPILOGUE("sha1_block_data_order");
 $code.=<<___;
 .asciz "SHA1 block transform for x86_64, CRYPTOGAMS by <appro\@openssl.org>"
+.align 16
+___
+
+# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame,
+#              CONTEXT *context,DISPATCHER_CONTEXT *disp)
+if ($win64) {
+$rec="%rcx";
+$frame="%rdx";
+$context="%r8";
+$disp="%r9";
+
+$code.=<<___;
+.extern        __imp_RtlVirtualUnwind
+.type  se_handler,\@abi-omnipotent
+.align 16
+se_handler:
+       push    %rsi
+       push    %rdi
+       push    %rbx
+       push    %rbp
+       push    %r12
+       push    %r13
+       push    %r14
+       push    %r15
+       pushfq
+       sub     \$64,%rsp
+
+       mov     120($context),%rax      # pull context->Rax
+       mov     248($context),%rbx      # pull context->Rip
+
+       lea     .Lprologue(%rip),%r10
+       cmp     %r10,%rbx               # context->Rip<.Lprologue
+       jb      .Lin_prologue
+
+       mov     152($context),%rax      # pull context->Rsp
+
+       lea     .Lepilogue(%rip),%r10
+       cmp     %r10,%rbx               # context->Rip>=.Lepilogue
+       jae     .Lin_prologue
+
+       mov     `16*4`(%rax),%rax       # pull saved stack pointer
+       lea     24(%rax),%rax
+
+       mov     -8(%rax),%rbx
+       mov     -16(%rax),%rbp
+       mov     -24(%rax),%r12
+       mov     %rbx,144($context)      # restore context->Rbx
+       mov     %rbp,160($context)      # restore context->Rbp
+       mov     %r12,216($context)      # restore context->R12
+
+.Lin_prologue:
+       mov     8(%rax),%rdi
+       mov     16(%rax),%rsi
+       mov     %rax,152($context)      # restore context->Rsp
+       mov     %rsi,168($context)      # restore context->Rsi
+       mov     %rdi,176($context)      # restore context->Rdi
+
+       mov     40($disp),%rdi          # disp->ContextRecord
+       mov     $context,%rsi           # context
+       mov     \$154,%ecx              # sizeof(CONTEXT)
+       .long   0xa548f3fc              # cld; rep movsq
+
+       mov     $disp,%rsi
+       xor     %rcx,%rcx               # arg1, UNW_FLAG_NHANDLER
+       mov     8(%rsi),%rdx            # arg2, disp->ImageBase
+       mov     0(%rsi),%r8             # arg3, disp->ControlPc
+       mov     16(%rsi),%r9            # arg4, disp->FunctionEntry
+       mov     40(%rsi),%r10           # disp->ContextRecord
+       lea     56(%rsi),%r11           # &disp->HandlerData
+       lea     24(%rsi),%r12           # &disp->EstablisherFrame
+       mov     %r10,32(%rsp)           # arg5
+       mov     %r11,40(%rsp)           # arg6
+       mov     %r12,48(%rsp)           # arg7
+       mov     %rcx,56(%rsp)           # arg8, (NULL)
+       call    *__imp_RtlVirtualUnwind(%rip)
+
+       mov     \$1,%eax                # ExceptionContinueSearch
+       add     \$64,%rsp
+       popfq
+       pop     %r15
+       pop     %r14
+       pop     %r13
+       pop     %r12
+       pop     %rbp
+       pop     %rbx
+       pop     %rdi
+       pop     %rsi
+       ret
+.size  se_handler,.-se_handler
+
+.section       .pdata
+.align 4
+       .rva    .LSEH_begin_sha1_block_data_order
+       .rva    .LSEH_end_sha1_block_data_order
+       .rva    .LSEH_info_sha1_block_data_order
+
+.section       .xdata
+.align 8
+.LSEH_info_sha1_block_data_order:
+       .byte   9,0,0,0
+       .rva    se_handler
 ___
+}
 
 ####################################################################