From 162f677def843739f4984d674b4ad33eb726e7ea Mon Sep 17 00:00:00 2001 From: Andy Polyakov Date: Sun, 1 Apr 2007 17:28:08 +0000 Subject: [PATCH] Update x86cpuid.pl to correctly detect shared cache and to support new RC4_set_key. --- crypto/x86cpuid.pl | 24 ++++++++++++++++++++++ doc/crypto/OPENSSL_ia32cap.pod | 37 ++++++++++++++++++++-------------- 2 files changed, 46 insertions(+), 15 deletions(-) diff --git a/crypto/x86cpuid.pl b/crypto/x86cpuid.pl index c53c9bc998..7d924a60b7 100644 --- a/crypto/x86cpuid.pl +++ b/crypto/x86cpuid.pl @@ -20,12 +20,36 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } &xor ("ecx","eax"); &bt ("ecx",21); &jnc (&label("nocpuid")); + &xor ("eax","eax"); + &cpuid (); + &xor ("eax","eax"); + &cmp ("ebx",0x756e6547); # "Genu" + &setne (&LB("eax")); + &mov ("ebp","eax"); + &cmp ("edx",0x49656e69); # "ineI" + &setne (&LB("eax")); + &or ("ebp","eax"); + &cmp ("ecx",0x6c65746e); # "ntel" + &setne (&LB("eax")); + &or ("ebp","eax"); &mov ("eax",1); &cpuid (); + &bt ("edx",28); # test hyper-threading bit + &jnc (&label("nocpuid")); + &cmp ("ebp",0); + &jne (&label("notintel")); + &or ("edx",1<<20); # use reserved bit to engage RC4_CHAR +&set_label("notintel"); + &shr ("ebx",16); + &cmp (&LB("ebx"),1); # see if cache is shared(*) + &ja (&label("nocpuid")); + &and ("edx",~(1<<28)); # clear hyper-threading bit if not &set_label("nocpuid"); &mov ("eax","edx"); &mov ("edx","ecx"); &function_end("OPENSSL_ia32_cpuid"); +# (*) on Core2 this value is set to 2 denoting the fact that L2 +# cache is shared between cores. &external_label("OPENSSL_ia32cap_P"); diff --git a/doc/crypto/OPENSSL_ia32cap.pod b/doc/crypto/OPENSSL_ia32cap.pod index 03ac6e6834..2e659d34a5 100644 --- a/doc/crypto/OPENSSL_ia32cap.pod +++ b/doc/crypto/OPENSSL_ia32cap.pod @@ -17,20 +17,27 @@ register after executing CPUID instruction with EAX=1 input value (see Intel Application Note #241618). Naturally it's meaningful on IA-32[E] platforms only. The variable is normally set up automatically upon toolkit initialization, but can be manipulated afterwards to modify -crypto library behaviour. For the moment of this writing five bits are -significant, namely bit #28 denoting Hyperthreading, which is used to -distinguish Intel P4 core, bit #26 denoting SSE2 support, bit #25 -denoting SSE support, bit #23 denoting MMX support, and bit #4 denoting -presence of Time-Stamp Counter. Clearing bit #26 at run-time for -example disables high-performance SSE2 code present in the crypto -library. You might have to do this if target OpenSSL application is -executed on SSE2 capable CPU, but under control of OS which does not -support SSE2 extentions. Even though you can manipulate the value -programmatically, you most likely will find it more appropriate to set -up an environment variable with the same name prior starting target -application, e.g. 'env OPENSSL_ia32cap=0x12800010 apps/openssl', to -achieve same effect without modifying the application source code. -Alternatively you can reconfigure the toolkit with no-sse2 option and -recompile. +crypto library behaviour. For the moment of this writing six bits are +significant, namely: + +1. bit #28 denoting Hyperthreading, which is used to distiguish + cores with shared cache; +2. bit #26 denoting SSE2 support; +3. bit #25 denoting SSE support; +4. bit #23 denoting MMX support; +5. bit #20, reserved by Intel, is used to choose between RC4 code + pathes; +6. bit #4 denoting presence of Time-Stamp Counter. + +For example, clearing bit #26 at run-time disables high-performance +SSE2 code present in the crypto library. You might have to do this if +target OpenSSL application is executed on SSE2 capable CPU, but under +control of OS which does not support SSE2 extentions. Even though you +can manipulate the value programmatically, you most likely will find it +more appropriate to set up an environment variable with the same name +prior starting target application, e.g. on Intel P4 processor 'env +OPENSSL_ia32cap=0x12900010 apps/openssl', to achieve same effect +without modifying the application source code. Alternatively you can +reconfigure the toolkit with no-sse2 option and recompile. =cut -- 2.25.1