From: Andy Polyakov Date: Fri, 4 Mar 2016 10:39:11 +0000 (+0100) Subject: bn/asm/x86[_64]-mont*.pl: complement alloca with page-walking. X-Git-Tag: OpenSSL_1_0_1t~31 X-Git-Url: https://git.librecmc.org/?a=commitdiff_plain;h=a15971944091fa01d959566b17ce86225346c83c;p=oweals%2Fopenssl.git bn/asm/x86[_64]-mont*.pl: complement alloca with page-walking. Some OSes, *cough*-dows, insist on stack being "wired" to physical memory in strictly sequential manner, i.e. if stack allocation spans two pages, then reference to farmost one can be punishable by SEGV. But page walking can do good even on other OSes, because it guarantees that villain thread hits the guard page before it can make damage to innocent one... Reviewed-by: Rich Salz (cherry picked from commit adc4f1fc25b2cac90076f1e1695b05b7aeeae501) Resolved conflicts: crypto/bn/asm/x86_64-mont.pl crypto/bn/asm/x86_64-mont5.pl Reviewed-by: Richard Levitte --- diff --git a/crypto/bn/asm/x86-mont.pl b/crypto/bn/asm/x86-mont.pl index e8f6b05084..89f4de61e8 100755 --- a/crypto/bn/asm/x86-mont.pl +++ b/crypto/bn/asm/x86-mont.pl @@ -85,6 +85,21 @@ $frame=32; # size of above frame rounded up to 16n &and ("esp",-64); # align to cache line + # Some OSes, *cough*-dows, insist on stack being "wired" to + # physical memory in strictly sequential manner, i.e. if stack + # allocation spans two pages, then reference to farmost one can + # be punishable by SEGV. But page walking can do good even on + # other OSes, because it guarantees that villain thread hits + # the guard page before it can make damage to innocent one... + &mov ("eax","ebp"); + &sub ("eax","esp"); + &and ("eax",-4096); +&set_label("page_walk"); + &mov ("edx",&DWP(0,"esp","eax")); + &sub ("eax",4096); + &data_byte(0x2e); + &jnc (&label("page_walk")); + ################################# load argument block... &mov ("eax",&DWP(0*4,"esi"));# BN_ULONG *rp &mov ("ebx",&DWP(1*4,"esi"));# const BN_ULONG *ap diff --git a/crypto/bn/asm/x86_64-mont.pl b/crypto/bn/asm/x86_64-mont.pl index 17fb94c84c..c8ae019da1 100755 --- a/crypto/bn/asm/x86_64-mont.pl +++ b/crypto/bn/asm/x86_64-mont.pl @@ -91,6 +91,20 @@ bn_mul_mont: mov %r11,8(%rsp,$num,8) # tp[num+1]=%rsp .Lmul_body: + # Some OSes, *cough*-dows, insist on stack being "wired" to + # physical memory in strictly sequential manner, i.e. if stack + # allocation spans two pages, then reference to farmost one can + # be punishable by SEGV. But page walking can do good even on + # other OSes, because it guarantees that villain thread hits + # the guard page before it can make damage to innocent one... + sub %rsp,%r11 + and \$-4096,%r11 +.Lmul_page_walk: + mov (%rsp,%r11),%r10 + sub \$4096,%r11 + .byte 0x66,0x2e # predict non-taken + jnc .Lmul_page_walk + mov $bp,%r12 # reassign $bp ___ $bp="%r12"; @@ -296,6 +310,14 @@ bn_mul4x_mont: mov %r11,8(%rsp,$num,8) # tp[num+1]=%rsp .Lmul4x_body: + sub %rsp,%r11 + and \$-4096,%r11 +.Lmul4x_page_walk: + mov (%rsp,%r11),%r10 + sub \$4096,%r11 + .byte 0x2e # predict non-taken + jnc .Lmul4x_page_walk + mov $rp,16(%rsp,$num,8) # tp[num+2]=$rp mov %rdx,%r12 # reassign $bp ___ @@ -707,6 +729,7 @@ $code.=<<___; .align 16 bn_sqr4x_mont: .Lsqr4x_enter: + mov %rsp,%rax push %rbx push %rbp push %r12 @@ -715,12 +738,23 @@ bn_sqr4x_mont: push %r15 shl \$3,${num}d # convert $num to bytes - xor %r10,%r10 mov %rsp,%r11 # put aside %rsp - sub $num,%r10 # -$num + neg $num # -$num mov ($n0),$n0 # *n0 - lea -72(%rsp,%r10,2),%rsp # alloca(frame+2*$num) + lea -72(%rsp,$num,2),%rsp # alloca(frame+2*$num) and \$-1024,%rsp # minimize TLB usage + + sub %rsp,%r11 + and \$-4096,%r11 +.Lsqr4x_page_walk: + mov (%rsp,%r11),%r10 + sub \$4096,%r11 + .byte 0x2e # predict non-taken + jnc .Lsqr4x_page_walk + + mov $num,%r10 + neg $num # restore $num + lea -48(%rax),%r11 # restore saved %rsp ############################################################## # Stack layout # diff --git a/crypto/bn/asm/x86_64-mont5.pl b/crypto/bn/asm/x86_64-mont5.pl index 235979181f..99243ae3ec 100755 --- a/crypto/bn/asm/x86_64-mont5.pl +++ b/crypto/bn/asm/x86_64-mont5.pl @@ -84,6 +84,20 @@ bn_mul_mont_gather5: mov %rax,8(%rsp,$num,8) # tp[num+1]=%rsp .Lmul_body: + # Some OSes, *cough*-dows, insist on stack being "wired" to + # physical memory in strictly sequential manner, i.e. if stack + # allocation spans two pages, then reference to farmost one can + # be punishable by SEGV. But page walking can do good even on + # other OSes, because it guarantees that villain thread hits + # the guard page before it can make damage to innocent one... + sub %rsp,%rax + and \$-4096,%rax +.Lmul_page_walk: + mov (%rsp,%rax),%r11 + sub \$4096,%rax + .byte 0x2e # predict non-taken + jnc .Lmul_page_walk + lea 128($bp),%r12 # reassign $bp (+size optimization) ___ $bp="%r12"; @@ -407,6 +421,14 @@ bn_mul4x_mont_gather5: mov %rax,8(%rsp,$num,8) # tp[num+1]=%rsp .Lmul4x_body: + sub %rsp,%rax + and \$-4096,%rax +.Lmul4x_page_walk: + mov (%rsp,%rax),%r11 + sub \$4096,%rax + .byte 0x2e # predict non-taken + jnc .Lmul4x_page_walk + mov $rp,16(%rsp,$num,8) # tp[num+2]=$rp lea 128(%rdx),%r12 # reassign $bp (+size optimization) ___