From 761393bba79700d48dc1b4b67b928488c9f99397 Mon Sep 17 00:00:00 2001 From: Andy Polyakov Date: Thu, 14 May 2009 18:17:26 +0000 Subject: [PATCH] x86[_64]cpuid.pl: further refine shared cache detection. --- crypto/x86_64cpuid.pl | 34 +++++++++++++++++++++++++++++++--- crypto/x86cpuid.pl | 38 ++++++++++++++++++++++++++++++++------ 2 files changed, 63 insertions(+), 9 deletions(-) diff --git a/crypto/x86_64cpuid.pl b/crypto/x86_64cpuid.pl index a8ee099377..862118f285 100644 --- a/crypto/x86_64cpuid.pl +++ b/crypto/x86_64cpuid.pl @@ -50,6 +50,8 @@ OPENSSL_ia32_cpuid: xor %eax,%eax cpuid + mov %eax,%r11d # max value for standard query level + xor %eax,%eax cmp \$0x756e6547,%ebx # "Genu" setne %al @@ -60,7 +62,6 @@ OPENSSL_ia32_cpuid: cmp \$0x6c65746e,%ecx # "ntel" setne %al or %eax,%r9d # 0 indicates Intel CPU - mov \$1,%r10d # "number of [AMD] cores" jz .Lintel cmp \$0x68747541,%ebx # "Auth" @@ -74,10 +75,10 @@ OPENSSL_ia32_cpuid: or %eax,%r10d # 0 indicates AMD CPU jnz .Lintel + # AMD specific mov \$0x80000000,%eax cpuid cmp \$0x80000008,%eax - mov \$1,%r10d # "number of [AMD] cores" jb .Lintel mov \$0x80000008,%eax @@ -85,7 +86,29 @@ OPENSSL_ia32_cpuid: movzb %cl,%r10 # number of cores - 1 inc %r10 # number of cores + mov \$1,%eax + cpuid + bt \$28,%edx # test hyper-threading bit + jnc .Ldone + shr \$16,%ebx # number of logical processors + cmp %r10b,%bl + ja .Ldone + and \$0xefffffff,%edx # ~(1<<28) + jmp .Ldone + .Lintel: + cmp \$4,%r11d + mov \$-1,%r10d + jb .Lnocacheinfo + + mov \$4,%eax + mov \$0,%ecx # query L1D + cpuid + mov %eax,%r10d + shr \$14,%r10d + and \$0xfff,%r10d # number of cores -1 per L1D + +.Lnocacheinfo: mov \$1,%eax cpuid cmp \$0,%r9d @@ -98,8 +121,13 @@ OPENSSL_ia32_cpuid: .Lnotintel: bt \$28,%edx # test hyper-threading bit jnc .Ldone + and \$0xefffffff,%edx # ~(1<<28) + cmp \$0,%r10d + je .Ldone + + or \$0x10000000,%edx # 1<<28 shr \$16,%ebx - cmp %r10b,%bl # see if cache is shared + cmp \$1,%bl # see if cache is shared ja .Ldone and \$0xefffffff,%edx # ~(1<<28) .Ldone: diff --git a/crypto/x86cpuid.pl b/crypto/x86cpuid.pl index 36c79ca01e..e5dcc58124 100644 --- a/crypto/x86cpuid.pl +++ b/crypto/x86cpuid.pl @@ -23,6 +23,8 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } &jnc (&label("done")); &xor ("eax","eax"); &cpuid (); + &mov ("edi","eax"); # max value for standard query level + &xor ("eax","eax"); &cmp ("ebx",0x756e6547); # "Genu" &setne (&LB("eax")); @@ -33,7 +35,6 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } &cmp ("ecx",0x6c65746e); # "ntel" &setne (&LB("eax")); &or ("ebp","eax"); # 0 indicates Intel CPU - &mov ("esi",1); # "number of [AMD] cores" &jz (&label("intel")); &cmp ("ebx",0x68747541); # "Auth" @@ -47,10 +48,10 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } &or ("esi","eax"); # 0 indicates AMD CPU &jnz (&label("intel")); + # AMD specific &mov ("eax",0x80000000); &cpuid (); &cmp ("eax",0x80000008); - &mov ("esi",1); # "number of [AMD] cores" &jb (&label("intel")); &mov ("eax",0x80000008); @@ -58,7 +59,30 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } &movz ("esi",&LB("ecx")); # number of cores - 1 &inc ("esi"); # number of cores + &mov ("eax",1); + &cpuid (); + &bt ("edx",28); + &jnc (&label("done")); + &shr ("ebx",16); + &and ("ebx",0xff); + &cmp ("ebx","esi"); + &ja (&label("done")); + &and ("edx",0xefffffff); # clear hyper-threading bit + &jmp (&label("done")); + &set_label("intel"); + &cmp ("edi",4); + &mov ("edi",-1); + &jb (&label("nocacheinfo")); + + &mov ("eax",4); + &mov ("ecx",0); # query L1D + &cpuid (); + &mov ("edi","eax"); + &shr ("edi",14); + &and ("edi",0xfff); # number of cores -1 per L1D + +&set_label("nocacheinfo"); &mov ("eax",1); &cpuid (); &cmp ("ebp",0); @@ -70,17 +94,19 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } &set_label("notP4"); &bt ("edx",28); # test hyper-threading bit &jnc (&label("done")); + &and ("edx",0xefffffff); + &cmp ("edi",0); + &je (&label("done")); + + &or ("edx",0x10000000); &shr ("ebx",16); - &and ("ebx",0xff); - &cmp ("ebx","esi"); # see if cache is shared(*) + &cmp (&LB("ebx"),1); &ja (&label("done")); &and ("edx",0xefffffff); # clear hyper-threading bit if not &set_label("done"); &mov ("eax","edx"); &mov ("edx","ecx"); &function_end("OPENSSL_ia32_cpuid"); -# (*) on Core2 this value is set to 2 denoting the fact that L2 -# cache is shared between cores. &external_label("OPENSSL_ia32cap_P"); -- 2.25.1