From 08d62e9f1a122d2a9029a8130b55525f44274d9f Mon Sep 17 00:00:00 2001 From: Andy Polyakov Date: Sat, 8 Oct 2011 21:37:44 +0000 Subject: [PATCH] e_padlock-x86[_64].pl: SHA fixes, comply with specification and fix bug. --- engines/asm/e_padlock-x86.pl | 98 ++++++++++++++++++++++++++++++--- engines/asm/e_padlock-x86_64.pl | 71 ++++++++++++++++++++++-- 2 files changed, 156 insertions(+), 13 deletions(-) diff --git a/engines/asm/e_padlock-x86.pl b/engines/asm/e_padlock-x86.pl index 61e91d889f..e211706ae1 100644 --- a/engines/asm/e_padlock-x86.pl +++ b/engines/asm/e_padlock-x86.pl @@ -352,19 +352,34 @@ my ($mode,$opcode) = @_; &push ("edi"); &push ("esi"); &xor ("eax","eax"); + &mov ("edi",&wparam(0)); + &mov ("esi",&wparam(1)); + &mov ("ecx",&wparam(2)); if ($::win32 or $::coff) { &push (&::islabel("_win32_segv_handler")); &data_byte(0x64,0xff,0x30); # push %fs:(%eax) &data_byte(0x64,0x89,0x20); # mov %esp,%fs:(%eax) } - &mov ("edi",&wparam(0)); - &mov ("esi",&wparam(1)); - &mov ("ecx",&wparam(2)); + &mov ("edx","esp"); # put aside %esp + &add ("esp",-128); # 32 is enough but spec says 128 + &movups ("xmm0",&QWP(0,"edi")); # copy-in context + &and ("esp",-16); + &mov ("eax",&DWP(16,"edi")); + &movaps (&QWP(0,"esp"),"xmm0"); + &mov ("edi","esp"); + &mov (&DWP(16,"esp"),"eax"); + &xor ("eax","eax"); &data_byte(0xf3,0x0f,0xa6,0xc8); # rep xsha1 + &movaps ("xmm0",&QWP(0,"esp")); + &mov ("eax",&DWP(16,"esp")); + &mov ("esp","edx"); # restore %esp if ($::win32 or $::coff) { &data_byte(0x64,0x8f,0x05,0,0,0,0); # pop %fs:0 &lea ("esp",&DWP(4,"esp")); } + &mov ("edi",&wparam(0)); + &movups (&QWP(0,"edi"),"xmm0"); # copy-out context + &mov (&DWP(16,"edi"),"eax"); &pop ("esi"); &pop ("edi"); &ret (); @@ -373,12 +388,26 @@ my ($mode,$opcode) = @_; &function_begin_B("padlock_sha1_blocks"); &push ("edi"); &push ("esi"); - &mov ("eax",-1); &mov ("edi",&wparam(0)); &mov ("esi",&wparam(1)); + &mov ("edx","esp"); # put aside %esp &mov ("ecx",&wparam(2)); + &add ("esp",-128); + &movups ("xmm0",&QWP(0,"edi")); # copy-in context + &and ("esp",-16); + &mov ("eax",&DWP(16,"edi")); + &movaps (&QWP(0,"esp"),"xmm0"); + &mov ("edi","esp"); + &mov (&DWP(16,"esp"),"eax"); + &mov ("eax",-1); &data_byte(0xf3,0x0f,0xa6,0xc8); # rep xsha1 - &pop ("esi"); + &movaps ("xmm0",&QWP(0,"esp")); + &mov ("eax",&DWP(16,"esp")); + &mov ("esp","edx"); # restore %esp + &mov ("edi",&wparam(0)); + &movups (&QWP(0,"edi"),"xmm0"); # copy-out context + &mov (&DWP(16,"edi"),"eax"); + &pop ("esi"); &pop ("edi"); &ret (); &function_end_B("padlock_sha1_blocks"); @@ -387,19 +416,34 @@ my ($mode,$opcode) = @_; &push ("edi"); &push ("esi"); &xor ("eax","eax"); + &mov ("edi",&wparam(0)); + &mov ("esi",&wparam(1)); + &mov ("ecx",&wparam(2)); if ($::win32 or $::coff) { &push (&::islabel("_win32_segv_handler")); &data_byte(0x64,0xff,0x30); # push %fs:(%eax) &data_byte(0x64,0x89,0x20); # mov %esp,%fs:(%eax) } - &mov ("edi",&wparam(0)); - &mov ("esi",&wparam(1)); - &mov ("ecx",&wparam(2)); + &mov ("edx","esp"); # put aside %esp + &add ("esp",-128); + &movups ("xmm0",&QWP(0,"edi")); # copy-in context + &and ("esp",-16); + &movups ("xmm1",&QWP(16,"edi")); + &movaps (&QWP(0,"esp"),"xmm0"); + &mov ("edi","esp"); + &movaps (&QWP(16,"esp"),"xmm1"); + &xor ("eax","eax"); &data_byte(0xf3,0x0f,0xa6,0xd0); # rep xsha256 + &movaps ("xmm0",&QWP(0,"esp")); + &movaps ("xmm1",&QWP(16,"esp")); + &mov ("esp","edx"); # restore %esp if ($::win32 or $::coff) { &data_byte(0x64,0x8f,0x05,0,0,0,0); # pop %fs:0 &lea ("esp",&DWP(4,"esp")); } + &mov ("edi",&wparam(0)); + &movups (&QWP(0,"edi"),"xmm0"); # copy-out context + &movups (&QWP(16,"edi"),"xmm1"); &pop ("esi"); &pop ("edi"); &ret (); @@ -408,11 +452,25 @@ my ($mode,$opcode) = @_; &function_begin_B("padlock_sha256_blocks"); &push ("edi"); &push ("esi"); - &mov ("eax",-1); &mov ("edi",&wparam(0)); &mov ("esi",&wparam(1)); &mov ("ecx",&wparam(2)); + &mov ("edx","esp"); # put aside %esp + &add ("esp",-128); + &movups ("xmm0",&QWP(0,"edi")); # copy-in context + &and ("esp",-16); + &movups ("xmm1",&QWP(16,"edi")); + &movaps (&QWP(0,"esp"),"xmm0"); + &mov ("edi","esp"); + &movaps (&QWP(16,"esp"),"xmm1"); + &mov ("eax",-1); &data_byte(0xf3,0x0f,0xa6,0xd0); # rep xsha256 + &movaps ("xmm0",&QWP(0,"esp")); + &movaps ("xmm1",&QWP(16,"esp")); + &mov ("esp","edx"); # restore %esp + &mov ("edi",&wparam(0)); + &movups (&QWP(0,"edi"),"xmm0"); # copy-out context + &movups (&QWP(16,"edi"),"xmm1"); &pop ("esi"); &pop ("edi"); &ret (); @@ -424,7 +482,29 @@ my ($mode,$opcode) = @_; &mov ("edi",&wparam(0)); &mov ("esi",&wparam(1)); &mov ("ecx",&wparam(2)); + &mov ("edx","esp"); # put aside %esp + &add ("esp",-128); + &movups ("xmm0",&QWP(0,"edi")); # copy-in context + &and ("esp",-16); + &movups ("xmm1",&QWP(16,"edi")); + &movups ("xmm2",&QWP(32,"edi")); + &movups ("xmm3",&QWP(48,"edi")); + &movaps (&QWP(0,"esp"),"xmm0"); + &mov ("edi","esp"); + &movaps (&QWP(16,"esp"),"xmm1"); + &movaps (&QWP(32,"esp"),"xmm2"); + &movaps (&QWP(48,"esp"),"xmm3"); &data_byte(0xf3,0x0f,0xa6,0xe0); # rep xsha512 + &movaps ("xmm0",&QWP(0,"esp")); + &movaps ("xmm1",&QWP(16,"esp")); + &movaps ("xmm2",&QWP(32,"esp")); + &movaps ("xmm3",&QWP(48,"esp")); + &mov ("esp","edx"); # restore %esp + &mov ("edi",&wparam(0)); + &movups (&QWP(0,"edi"),"xmm0"); # copy-out context + &movups (&QWP(16,"edi"),"xmm1"); + &movups (&QWP(32,"edi"),"xmm2"); + &movups (&QWP(48,"edi"),"xmm3"); &pop ("esi"); &pop ("edi"); &ret (); diff --git a/engines/asm/e_padlock-x86_64.pl b/engines/asm/e_padlock-x86_64.pl index 13c371be67..ad61974988 100644 --- a/engines/asm/e_padlock-x86_64.pl +++ b/engines/asm/e_padlock-x86_64.pl @@ -146,9 +146,20 @@ padlock_xstore: .type padlock_sha1_oneshot,\@function,3 .align 16 padlock_sha1_oneshot: - xor %rax,%rax mov %rdx,%rcx + mov %rdi,%rdx # put aside %rdi + movups (%rdi),%xmm0 # copy-in context + sub \$128+8,%rsp + mov 16(%rdi),%eax + movaps %xmm0,(%rsp) + mov %rsp,%rdi + mov %eax,16(%rsp) + xor %rax,%rax .byte 0xf3,0x0f,0xa6,0xc8 # rep xsha1 + movaps (%rsp),%xmm0 + mov 16(%rsp),%eax + movups %xmm0,(%rdx) # copy-out context + mov %eax,16(%rdx) ret .size padlock_sha1_oneshot,.-padlock_sha1_oneshot @@ -156,9 +167,20 @@ padlock_sha1_oneshot: .type padlock_sha1_blocks,\@function,3 .align 16 padlock_sha1_blocks: - mov \$-1,%rax mov %rdx,%rcx + mov %rdi,%rdx # put aside %rdi + movups (%rdi),%xmm0 # copy-in context + sub \$128+8,%rsp + mov 16(%rdi),%eax + movaps %xmm0,(%rsp) + mov %rsp,%rdi + mov %eax,16(%rsp) + mov \$-1,%rax .byte 0xf3,0x0f,0xa6,0xc8 # rep xsha1 + movaps (%rsp),%xmm0 + mov 16(%rsp),%eax + movups %xmm0,(%rdx) # copy-out context + mov %eax,16(%rdx) ret .size padlock_sha1_blocks,.-padlock_sha1_blocks @@ -166,9 +188,20 @@ padlock_sha1_blocks: .type padlock_sha256_oneshot,\@function,3 .align 16 padlock_sha256_oneshot: - xor %rax,%rax mov %rdx,%rcx + mov %rdi,%rdx # put aside %rdi + movups (%rdi),%xmm0 # copy-in context + sub \$128+8,%rsp + movups 16(%rdi),%xmm1 + movaps %xmm0,(%rsp) + mov %rsp,%rdi + movaps %xmm1,16(%rsp) + xor %rax,%rax .byte 0xf3,0x0f,0xa6,0xd0 # rep xsha256 + movaps (%rsp),%xmm0 + movaps 16(%rsp),%xmm1 + movups %xmm0,(%rdx) # copy-out context + movups %xmm1,16(%rdx) ret .size padlock_sha256_oneshot,.-padlock_sha256_oneshot @@ -176,9 +209,20 @@ padlock_sha256_oneshot: .type padlock_sha256_blocks,\@function,3 .align 16 padlock_sha256_blocks: - mov \$-1,%rax mov %rdx,%rcx + mov %rdi,%rdx # put aside %rdi + movups (%rdi),%xmm0 # copy-in context + sub \$128+8,%rsp + movups 16(%rdi),%xmm1 + movaps %xmm0,(%rsp) + mov %rsp,%rdi + movaps %xmm1,16(%rsp) + mov \$-1,%rax .byte 0xf3,0x0f,0xa6,0xd0 # rep xsha256 + movaps (%rsp),%xmm0 + movaps 16(%rsp),%xmm1 + movups %xmm0,(%rdx) # copy-out context + movups %xmm1,16(%rdx) ret .size padlock_sha256_blocks,.-padlock_sha256_blocks @@ -187,7 +231,26 @@ padlock_sha256_blocks: .align 16 padlock_sha512_blocks: mov %rdx,%rcx + mov %rdi,%rdx # put aside %rdi + movups (%rdi),%xmm0 # copy-in context + sub \$128+8,%rsp + movups 16(%rdi),%xmm1 + movups 32(%rdi),%xmm2 + movups 48(%rdi),%xmm3 + movaps %xmm0,(%rsp) + mov %rsp,%rdi + movaps %xmm1,16(%rsp) + movaps %xmm2,32(%rsp) + movaps %xmm3,48(%rsp) .byte 0xf3,0x0f,0xa6,0xe0 # rep xha512 + movaps (%rsp),%xmm0 + movaps 16(%rsp),%xmm1 + movaps 32(%rsp),%xmm2 + movaps 48(%rsp),%xmm3 + movups %xmm0,(%rdx) # copy-out context + movups %xmm1,16(%rdx) + movups %xmm2,32(%rdx) + movups %xmm3,48(%rdx) ret .size padlock_sha512_blocks,.-padlock_sha512_blocks ___ -- 2.25.1