From 095db72024d34163aa327997f5808b8c3a01f3ea Mon Sep 17 00:00:00 2001 From: Andy Polyakov Date: Sun, 11 Nov 2007 19:44:42 +0000 Subject: [PATCH] x86cpuid.pl update [from HEAD]. --- crypto/perlasm/x86unix.pl | 41 ++------------------------------------- crypto/x86cpuid.pl | 32 ++++++++++++++++++++++++++++-- 2 files changed, 32 insertions(+), 41 deletions(-) diff --git a/crypto/perlasm/x86unix.pl b/crypto/perlasm/x86unix.pl index e71050b6bc..53507b6b84 100644 --- a/crypto/perlasm/x86unix.pl +++ b/crypto/perlasm/x86unix.pl @@ -541,50 +541,13 @@ sub main'set_label sub main'file_end { # try to detect if SSE2 or MMX extensions were used on ELF platform... - if ($main'elf && grep {/%[x]*mm[0-7]/i} @out) { + if ($main'elf && grep {/\b%[x]*mm[0-7]\b|OPENSSL_ia32cap_P\b/i} @out) { local($tmp); push (@out,"\n.section\t.bss\n"); push (@out,".comm\t${under}OPENSSL_ia32cap_P,4,4\n"); - push (@out,".section\t.init\n"); - # One can argue that it's wasteful to craft every - # SSE/MMX module with this snippet... Well, it's 72 - # bytes long and for the moment we have two modules. - # Let's argue when we have 7 modules or so... - # - # $1<<10 sets a reserved bit to signal that variable - # was initialized already... - &main'picmeup("edx","OPENSSL_ia32cap_P"); - $tmp=<<___; - cmpl \$0,(%edx) - jne 1f - movl \$1<<10,(%edx) - pushf - popl %eax - movl %eax,%ecx - xorl \$1<<21,%eax - pushl %eax - popf - pushf - popl %eax - xorl %ecx,%eax - btl \$21,%eax - jnc 1f - pushl %edi - pushl %ebx - movl %edx,%edi - movl \$1,%eax - .byte 0x0f,0xa2 - orl \$1<<10,%edx - movl %edx,0(%edi) - popl %ebx - popl %edi - jmp 1f - .align $align - 1: -___ - push (@out,$tmp); + return; } if ($const ne "") diff --git a/crypto/x86cpuid.pl b/crypto/x86cpuid.pl index c53c9bc998..4408ef2936 100644 --- a/crypto/x86cpuid.pl +++ b/crypto/x86cpuid.pl @@ -19,13 +19,41 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } &pop ("eax"); &xor ("ecx","eax"); &bt ("ecx",21); - &jnc (&label("nocpuid")); + &jnc (&label("done")); + &xor ("eax","eax"); + &cpuid (); + &xor ("eax","eax"); + &cmp ("ebx",0x756e6547); # "Genu" + &data_byte(0x0f,0x95,0xc0); #&setne (&LB("eax")); + &mov ("ebp","eax"); + &cmp ("edx",0x49656e69); # "ineI" + &data_byte(0x0f,0x95,0xc0); #&setne (&LB("eax")); + &or ("ebp","eax"); + &cmp ("ecx",0x6c65746e); # "ntel" + &data_byte(0x0f,0x95,0xc0); #&setne (&LB("eax")); + &or ("ebp","eax"); &mov ("eax",1); &cpuid (); -&set_label("nocpuid"); + &cmp ("ebp",0); + &jne (&label("notP4")); + &and ("eax",15<<8); # familiy ID + &cmp ("eax",15<<8); # P4? + &jne (&label("notP4")); + &or ("edx",1<<20); # use reserved bit to engage RC4_CHAR +&set_label("notP4"); + &bt ("edx",28); # test hyper-threading bit + &jnc (&label("done")); + &shr ("ebx",16); + &and ("ebx",0xff); + &cmp ("ebx",1); # see if cache is shared(*) + &ja (&label("done")); + &and ("edx",0xefffffff); # clear hyper-threading bit if not +&set_label("done"); &mov ("eax","edx"); &mov ("edx","ecx"); &function_end("OPENSSL_ia32_cpuid"); +# (*) on Core2 this value is set to 2 denoting the fact that L2 +# cache is shared between cores. &external_label("OPENSSL_ia32cap_P"); -- 2.25.1