From baa5f5242200bfcf7e8b6a1182a4d46f27a55ecc Mon Sep 17 00:00:00 2001 From: Andy Polyakov Date: Thu, 14 May 2009 18:25:29 +0000 Subject: [PATCH] x86[_64]cpuid.pl: update from HEAD. --- crypto/x86_64cpuid.pl | 55 +++++++++++++++++++++++++++++++++++++- crypto/x86cpuid.pl | 61 ++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 111 insertions(+), 5 deletions(-) diff --git a/crypto/x86_64cpuid.pl b/crypto/x86_64cpuid.pl index c54b9e3681..862118f285 100644 --- a/crypto/x86_64cpuid.pl +++ b/crypto/x86_64cpuid.pl @@ -50,6 +50,8 @@ OPENSSL_ia32_cpuid: xor %eax,%eax cpuid + mov %eax,%r11d # max value for standard query level + xor %eax,%eax cmp \$0x756e6547,%ebx # "Genu" setne %al @@ -59,8 +61,54 @@ OPENSSL_ia32_cpuid: or %eax,%r9d cmp \$0x6c65746e,%ecx # "ntel" setne %al - or %eax,%r9d + or %eax,%r9d # 0 indicates Intel CPU + jz .Lintel + + cmp \$0x68747541,%ebx # "Auth" + setne %al + mov %eax,%r10d + cmp \$0x69746E65,%edx # "enti" + setne %al + or %eax,%r10d + cmp \$0x444D4163,%ecx # "cAMD" + setne %al + or %eax,%r10d # 0 indicates AMD CPU + jnz .Lintel + + # AMD specific + mov \$0x80000000,%eax + cpuid + cmp \$0x80000008,%eax + jb .Lintel + + mov \$0x80000008,%eax + cpuid + movzb %cl,%r10 # number of cores - 1 + inc %r10 # number of cores + + mov \$1,%eax + cpuid + bt \$28,%edx # test hyper-threading bit + jnc .Ldone + shr \$16,%ebx # number of logical processors + cmp %r10b,%bl + ja .Ldone + and \$0xefffffff,%edx # ~(1<<28) + jmp .Ldone +.Lintel: + cmp \$4,%r11d + mov \$-1,%r10d + jb .Lnocacheinfo + + mov \$4,%eax + mov \$0,%ecx # query L1D + cpuid + mov %eax,%r10d + shr \$14,%r10d + and \$0xfff,%r10d # number of cores -1 per L1D + +.Lnocacheinfo: mov \$1,%eax cpuid cmp \$0,%r9d @@ -73,6 +121,11 @@ OPENSSL_ia32_cpuid: .Lnotintel: bt \$28,%edx # test hyper-threading bit jnc .Ldone + and \$0xefffffff,%edx # ~(1<<28) + cmp \$0,%r10d + je .Ldone + + or \$0x10000000,%edx # 1<<28 shr \$16,%ebx cmp \$1,%bl # see if cache is shared ja .Ldone diff --git a/crypto/x86cpuid.pl b/crypto/x86cpuid.pl index c329111203..e5dcc58124 100644 --- a/crypto/x86cpuid.pl +++ b/crypto/x86cpuid.pl @@ -23,6 +23,8 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } &jnc (&label("done")); &xor ("eax","eax"); &cpuid (); + &mov ("edi","eax"); # max value for standard query level + &xor ("eax","eax"); &cmp ("ebx",0x756e6547); # "Genu" &setne (&LB("eax")); @@ -32,7 +34,55 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } &or ("ebp","eax"); &cmp ("ecx",0x6c65746e); # "ntel" &setne (&LB("eax")); - &or ("ebp","eax"); + &or ("ebp","eax"); # 0 indicates Intel CPU + &jz (&label("intel")); + + &cmp ("ebx",0x68747541); # "Auth" + &setne (&LB("eax")); + &mov ("esi","eax"); + &cmp ("edx",0x69746E65); # "enti" + &setne (&LB("eax")); + &or ("esi","eax"); + &cmp ("ecx",0x444D4163); # "cAMD" + &setne (&LB("eax")); + &or ("esi","eax"); # 0 indicates AMD CPU + &jnz (&label("intel")); + + # AMD specific + &mov ("eax",0x80000000); + &cpuid (); + &cmp ("eax",0x80000008); + &jb (&label("intel")); + + &mov ("eax",0x80000008); + &cpuid (); + &movz ("esi",&LB("ecx")); # number of cores - 1 + &inc ("esi"); # number of cores + + &mov ("eax",1); + &cpuid (); + &bt ("edx",28); + &jnc (&label("done")); + &shr ("ebx",16); + &and ("ebx",0xff); + &cmp ("ebx","esi"); + &ja (&label("done")); + &and ("edx",0xefffffff); # clear hyper-threading bit + &jmp (&label("done")); + +&set_label("intel"); + &cmp ("edi",4); + &mov ("edi",-1); + &jb (&label("nocacheinfo")); + + &mov ("eax",4); + &mov ("ecx",0); # query L1D + &cpuid (); + &mov ("edi","eax"); + &shr ("edi",14); + &and ("edi",0xfff); # number of cores -1 per L1D + +&set_label("nocacheinfo"); &mov ("eax",1); &cpuid (); &cmp ("ebp",0); @@ -44,16 +94,19 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } &set_label("notP4"); &bt ("edx",28); # test hyper-threading bit &jnc (&label("done")); + &and ("edx",0xefffffff); + &cmp ("edi",0); + &je (&label("done")); + + &or ("edx",0x10000000); &shr ("ebx",16); - &cmp (&LB("ebx"),1); # see if cache is shared(*) + &cmp (&LB("ebx"),1); &ja (&label("done")); &and ("edx",0xefffffff); # clear hyper-threading bit if not &set_label("done"); &mov ("eax","edx"); &mov ("edx","ecx"); &function_end("OPENSSL_ia32_cpuid"); -# (*) on Core2 this value is set to 2 denoting the fact that L2 -# cache is shared between cores. &external_label("OPENSSL_ia32cap_P"); -- 2.25.1