From 6c8ce3c2ffd8aee6d0db6e37a369f64586ad8f31 Mon Sep 17 00:00:00 2001 From: Andy Polyakov Date: Tue, 11 Oct 2011 21:07:53 +0000 Subject: [PATCH] e_padlock-x86[_64].pl: protection against prefetch errata. --- engines/asm/e_padlock-x86.pl | 40 ++++++++++++++++++++------ engines/asm/e_padlock-x86_64.pl | 50 +++++++++++++++++++++++++++------ 2 files changed, 73 insertions(+), 17 deletions(-) diff --git a/engines/asm/e_padlock-x86.pl b/engines/asm/e_padlock-x86.pl index e211706ae1..1b2ba52253 100644 --- a/engines/asm/e_padlock-x86.pl +++ b/engines/asm/e_padlock-x86.pl @@ -37,6 +37,7 @@ require "x86asm.pl"; &asm_init($ARGV[0],$0); +%PADLOCK_MARGIN=(ecb=>128, cbc=>64); # prefetch errata $PADLOCK_CHUNK=512; # Must be a power of 2 larger than 16 $ctx="edx"; @@ -187,6 +188,10 @@ my ($mode,$opcode) = @_; &movq ("mm0",&QWP(-16,$ctx)); # load [upper part of] counter } else { &xor ("ebx","ebx"); + if ($PADLOCK_MARGIN{$mode}) { + &cmp ($len,$PADLOCK_MARGIN{$mode}); + &jbe (&label("${mode}_short")); + } &test (&DWP(0,$ctx),1<<5); # align bit in control word &jnz (&label("${mode}_aligned")); &test ($out,0x0f); @@ -285,20 +290,39 @@ my ($mode,$opcode) = @_; &mov ($chunk,$PADLOCK_CHUNK); &jnz (&label("${mode}_loop")); if ($mode ne "ctr32") { - &test ($out,0x0f); # out_misaligned - &jz (&label("${mode}_done")); + &cmp ("esp","ebp"); + &je (&label("${mode}_done")); } - &mov ($len,"ebp"); - &mov ($out,"esp"); - &sub ($len,"esp"); - &xor ("eax","eax"); - &shr ($len,2); - &data_byte(0xf3,0xab); # rep stosl + &pxor ("xmm0","xmm0"); + &lea ("eax",&DWP(0,"esp")); +&set_label("${mode}_bzero"); + &movaps (&QWP(0,"eax"),"xmm0"); + &lea ("eax",&DWP(16,"eax")); + &cmp ("ebp","eax"); + &ja (&label("${mode}_bzero")); + &set_label("${mode}_done"); &lea ("esp",&DWP(24,"ebp")); if ($mode ne "ctr32") { &jmp (&label("${mode}_exit")); +&set_label("${mode}_short",16); + &xor ("eax","eax"); + &lea ("ebp",&DWP(-24,"esp")); + &sub ("eax",$len); + &lea ("esp",&DWP(0,"eax","ebp")); + &and ("esp",-16); + &xor ($chunk,$chunk); +&set_label("${mode}_short_copy"); + &movups ("xmm0",&QWP(0,$inp,$chunk)); + &lea ($chunk,&DWP(16,$chunk)); + &cmp ($len,$chunk); + &movaps (&QWP(-16,"esp",$chunk),"xmm0"); + &ja (&label("${mode}_short_copy")); + &mov ($inp,"esp"); + &mov ($chunk,$len); + &jmp (&label("${mode}_loop")); + &set_label("${mode}_aligned",16); &lea ("eax",&DWP(-16,$ctx)); # ivp &lea ("ebx",&DWP(16,$ctx)); # key diff --git a/engines/asm/e_padlock-x86_64.pl b/engines/asm/e_padlock-x86_64.pl index db79a62ad6..5091c7aaca 100644 --- a/engines/asm/e_padlock-x86_64.pl +++ b/engines/asm/e_padlock-x86_64.pl @@ -27,6 +27,7 @@ open STDOUT,"| $^X $xlate $flavour $output"; $code=".text\n"; +%PADLOCK_MARGIN=(ecb=>128, cbc=>64, ctr32=>64); # prefetch errata $PADLOCK_CHUNK=512; # Must be a power of 2 between 32 and 2^20 $ctx="%rdx"; @@ -284,6 +285,17 @@ padlock_${mode}_encrypt: lea 16($ctx),$ctx # control word xor %eax,%eax xor %ebx,%ebx +___ +# Formally speaking correct condtion is $len<=$margin and $inp+$margin +# crosses page boundary [and next page is unreadable]. But $inp can +# be unaligned in which case data can be copied to $out if latter is +# aligned, in which case $out+$margin has to be checked. Covering all +# cases appears more complicated than just copying short input... +$code.=<<___ if ($PADLOCK_MARGIN{$mode}); + cmp \$$PADLOCK_MARGIN{$mode},$len + jbe .L${mode}_short +___ +$code.=<<___; testl \$`1<<5`,($ctx) # align bit in control word jnz .L${mode}_aligned test \$0x0f,$out @@ -305,6 +317,7 @@ padlock_${mode}_encrypt: lea (%rax,%rbp),%rsp ___ $code.=<<___ if ($mode eq "ctr32"); +.L${mode}_reenter: mov -4($ctx),%eax # pull 32-bit counter bswap %eax neg %eax @@ -373,19 +386,38 @@ $code.=<<___; mov \$$PADLOCK_CHUNK,$chunk jnz .L${mode}_loop - test \$0x0f,$out - jz .L${mode}_done + cmp %rsp,%rbp + je .L${mode}_done + + pxor %xmm0,%xmm0 + lea (%rsp),%rax +.L${mode}_bzero: + movaps %xmm0,(%rax) + lea 16(%rax),%rax + cmp %rax,%rbp + ja .L${mode}_bzero - mov %rbp,$len - mov %rsp,$out - sub %rsp,$len - xor %rax,%rax - shr \$3,$len - .byte 0xf3,0x48,0xab # rep stosq .L${mode}_done: lea (%rbp),%rsp jmp .L${mode}_exit - +___ +$code.=<<___ if ($PADLOCK_MARGIN{$mode}); +.align 16 +.L${mode}_short: + mov %rsp,%rbp + sub $len,%rsp + xor $chunk,$chunk +.L${mode}_short_copy: + movups ($inp,$chunk),%xmm0 + lea 16($chunk),$chunk + cmp $chunk,$len + movaps %xmm0,-16(%rsp,$chunk) + ja .L${mode}_short_copy + mov %rsp,$inp + mov $len,$chunk + jmp .L${mode}_`${mode} eq "ctr32"?"reenter":"loop"` +___ +$code.=<<___; .align 16 .L${mode}_aligned: ___ -- 2.25.1