From e303f55fc7dcfce113d71e0ab9652c69fb1ec36d Mon Sep 17 00:00:00 2001 From: Andy Polyakov Date: Sun, 26 Apr 2009 17:49:41 +0000 Subject: [PATCH] Expand OPENSS_ia32cap to 64 bits. --- crypto/cryptlib.c | 22 +++++++++++++----- crypto/cryptlib.h | 2 +- crypto/perlasm/x86gas.pl | 2 +- crypto/perlasm/x86masm.pl | 2 +- crypto/perlasm/x86nasm.pl | 2 +- doc/crypto/OPENSSL_ia32cap.pod | 41 ++++++++++++++++++++-------------- 6 files changed, 44 insertions(+), 27 deletions(-) diff --git a/crypto/cryptlib.c b/crypto/cryptlib.c index 44eb2bbc97..520e42d624 100644 --- a/crypto/cryptlib.c +++ b/crypto/cryptlib.c @@ -659,30 +659,40 @@ const char *CRYPTO_get_lock_name(int type) #if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \ defined(__INTEL__) || \ - defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD64) + defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD64) || defined(_M_X64) -unsigned long OPENSSL_ia32cap_P=0; -unsigned long *OPENSSL_ia32cap_loc(void) { return &OPENSSL_ia32cap_P; } +unsigned int OPENSSL_ia32cap_P[2]; +unsigned int *OPENSSL_ia32cap_loc(void) { return OPENSSL_ia32cap_P; } #if defined(OPENSSL_CPUID_OBJ) && !defined(OPENSSL_NO_ASM) && !defined(I386_ONLY) #define OPENSSL_CPUID_SETUP +#if defined(_WIN32) +typedef unsigned __int64 IA32CAP; +#define strtoull _strtoui64 +#else +typedef unsigned long long IA32CAP; +#endif void OPENSSL_cpuid_setup(void) { static int trigger=0; - unsigned long OPENSSL_ia32_cpuid(void); + IA32CAP OPENSSL_ia32_cpuid(void); + IA32CAP vec; char *env; if (trigger) return; trigger=1; if ((env=getenv("OPENSSL_ia32cap"))) - OPENSSL_ia32cap_P = strtoul(env,NULL,0)|(1<<10); + vec = strtoull(env,NULL,0); else - OPENSSL_ia32cap_P = OPENSSL_ia32_cpuid()|(1<<10); + vec = OPENSSL_ia32_cpuid(); + /* * |(1<<10) sets a reserved bit to signal that variable * was initialized already... This is to avoid interference * with cpuid snippets in ELF .init segment. */ + OPENSSL_ia32cap_P[0] = (unsigned int)vec|(1<<10); + OPENSSL_ia32cap_P[1] = (unsigned int)(vec>>32); } #endif diff --git a/crypto/cryptlib.h b/crypto/cryptlib.h index 5ceaa964b5..4761a894a8 100644 --- a/crypto/cryptlib.h +++ b/crypto/cryptlib.h @@ -99,7 +99,7 @@ extern "C" { #define HEX_SIZE(type) (sizeof(type)*2) void OPENSSL_cpuid_setup(void); -extern unsigned long OPENSSL_ia32cap_P; +extern unsigned int OPENSSL_ia32cap_P[]; void OPENSSL_showfatal(const char *,...); void *OPENSSL_stderr(void); extern int OPENSSL_NONPIC_relocated; diff --git a/crypto/perlasm/x86gas.pl b/crypto/perlasm/x86gas.pl index 6eab727fd4..b470507730 100644 --- a/crypto/perlasm/x86gas.pl +++ b/crypto/perlasm/x86gas.pl @@ -150,7 +150,7 @@ sub ::public_label sub ::file_end { if (grep {/\b${nmdecor}OPENSSL_ia32cap_P\b/i} @out) { - my $tmp=".comm\t${nmdecor}OPENSSL_ia32cap_P,4"; + my $tmp=".comm\t${nmdecor}OPENSSL_ia32cap_P,8"; if ($::elf) { push (@out,"$tmp,4\n"); } else { push (@out,"$tmp\n"); } } diff --git a/crypto/perlasm/x86masm.pl b/crypto/perlasm/x86masm.pl index 4eca7bc367..3365114cd0 100644 --- a/crypto/perlasm/x86masm.pl +++ b/crypto/perlasm/x86masm.pl @@ -127,7 +127,7 @@ ___ if (grep {/\b${nmdecor}OPENSSL_ia32cap_P\b/i} @out) { my $comm=<<___; .bss SEGMENT -COMM ${nmdecor}OPENSSL_ia32cap_P:DWORD +COMM ${nmdecor}OPENSSL_ia32cap_P:QWORD .bss ENDS ___ # comment out OPENSSL_ia32cap_P declarations diff --git a/crypto/perlasm/x86nasm.pl b/crypto/perlasm/x86nasm.pl index ce2bed9bb2..1a384582bf 100644 --- a/crypto/perlasm/x86nasm.pl +++ b/crypto/perlasm/x86nasm.pl @@ -114,7 +114,7 @@ sub ::file_end { if (grep {/\b${nmdecor}OPENSSL_ia32cap_P\b/i} @out) { my $comm=<<___; ${drdecor}segment .bss -${drdecor}common ${nmdecor}OPENSSL_ia32cap_P 4 +${drdecor}common ${nmdecor}OPENSSL_ia32cap_P 8 ___ # comment out OPENSSL_ia32cap_P declarations grep {s/(^extern\s+${nmdecor}OPENSSL_ia32cap_P)/\;$1/} @out; diff --git a/doc/crypto/OPENSSL_ia32cap.pod b/doc/crypto/OPENSSL_ia32cap.pod index 2e659d34a5..b7d8a7618f 100644 --- a/doc/crypto/OPENSSL_ia32cap.pod +++ b/doc/crypto/OPENSSL_ia32cap.pod @@ -6,28 +6,29 @@ OPENSSL_ia32cap - finding the IA-32 processor capabilities =head1 SYNOPSIS - unsigned long *OPENSSL_ia32cap_loc(void); - #define OPENSSL_ia32cap (*(OPENSSL_ia32cap_loc())) + unsigned int *OPENSSL_ia32cap_loc(void); + #define OPENSSL_ia32cap ((OPENSSL_ia32cap_loc())[0]) =head1 DESCRIPTION Value returned by OPENSSL_ia32cap_loc() is address of a variable -containing IA-32 processor capabilities bit vector as it appears in EDX -register after executing CPUID instruction with EAX=1 input value (see -Intel Application Note #241618). Naturally it's meaningful on IA-32[E] -platforms only. The variable is normally set up automatically upon -toolkit initialization, but can be manipulated afterwards to modify -crypto library behaviour. For the moment of this writing six bits are -significant, namely: - -1. bit #28 denoting Hyperthreading, which is used to distiguish +containing IA-32 processor capabilities bit vector as it appears in +EDX:ECX register pair after executing CPUID instruction with EAX=1 +input value (see Intel Application Note #241618). Naturally it's +meaningful on x86 and x86_64 platforms only. The variable is normally +set up automatically upon toolkit initialization, but can be +manipulated afterwards to modify crypto library behaviour. For the +moment of this writing seven bits are significant, namely: + +1. bit #4 denoting presence of Time-Stamp Counter. +2. bit #20, reserved by Intel, is used to choose between RC4 code + paths; +3. bit #23 denoting MMX support; +4. bit #25 denoting SSE support; +5. bit #26 denoting SSE2 support; +6. bit #28 denoting Hyperthreading, which is used to distiguish cores with shared cache; -2. bit #26 denoting SSE2 support; -3. bit #25 denoting SSE support; -4. bit #23 denoting MMX support; -5. bit #20, reserved by Intel, is used to choose between RC4 code - pathes; -6. bit #4 denoting presence of Time-Stamp Counter. +7. bit #57 denoting Intel AES instruction set extension; For example, clearing bit #26 at run-time disables high-performance SSE2 code present in the crypto library. You might have to do this if @@ -40,4 +41,10 @@ OPENSSL_ia32cap=0x12900010 apps/openssl', to achieve same effect without modifying the application source code. Alternatively you can reconfigure the toolkit with no-sse2 option and recompile. +Less intuituve is clearing bit #28. The truth is that it's not copied +from CPUID output verbatim, but is adjusted to reflect whether or not +the data cache is actually shared between logical cores. This in turn +affects the decision on whether or not expensive countermeasures +against cache-timing attacks are applied, most notably in AES assembler +module. =cut -- 2.25.1