From b56cb7c6eac9613dbbaed3b32cf121d16583810a Mon Sep 17 00:00:00 2001 From: Andy Polyakov Date: Mon, 23 Jul 2007 16:18:36 +0000 Subject: [PATCH] x86*cpuid update [from HEAD]. --- crypto/x86_64cpuid.pl | 99 ++++++++++++++++++++++++++----------------- crypto/x86cpuid.pl | 31 +++++++++++++- 2 files changed, 89 insertions(+), 41 deletions(-) diff --git a/crypto/x86_64cpuid.pl b/crypto/x86_64cpuid.pl index 4d88ad191b..0c5d3397d5 100644 --- a/crypto/x86_64cpuid.pl +++ b/crypto/x86_64cpuid.pl @@ -1,19 +1,12 @@ #!/usr/bin/env perl $output=shift; -$win64a=1 if ($output =~ /win64a\.[s|asm]/); +$masm=1 if ($output =~ /\.asm/); open STDOUT,">$output" || die "can't open $output: $!"; -print<<___ if(defined($win64a)); +print<<___ if(defined($masm)); _TEXT SEGMENT PUBLIC OPENSSL_rdtsc -ALIGN 16 -OPENSSL_rdtsc PROC - rdtsc - shl rdx,32 - or rax,rdx - ret -OPENSSL_rdtsc ENDP PUBLIC OPENSSL_atomic_add ALIGN 16 @@ -45,35 +38,16 @@ OPENSSL_wipe_cpu PROC lea rax,QWORD PTR[rsp+8] ret OPENSSL_wipe_cpu ENDP - -OPENSSL_ia32_cpuid PROC - mov r8,rbx - mov eax,1 - cpuid - shl rcx,32 - mov eax,edx - mov rbx,r8 - or rax,rcx - ret -OPENSSL_ia32_cpuid ENDP _TEXT ENDS CRT\$XIU SEGMENT EXTRN OPENSSL_cpuid_setup:PROC DQ OPENSSL_cpuid_setup CRT\$XIU ENDS -END + ___ -print<<___ if(!defined($win64a)); +print<<___ if(!defined($masm)); .text -.globl OPENSSL_rdtsc -.align 16 -OPENSSL_rdtsc: - rdtsc - shlq \$32,%rdx - orq %rdx,%rax - ret -.size OPENSSL_rdtsc,.-OPENSSL_rdtsc .globl OPENSSL_atomic_add .type OPENSSL_atomic_add,\@function @@ -120,19 +94,66 @@ OPENSSL_wipe_cpu: ret .size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu +.section .init + call OPENSSL_cpuid_setup + +___ + +open STDOUT,"| $^X perlasm/x86_64-xlate.pl $output"; +print<<___; +.text + +.globl OPENSSL_rdtsc +.type OPENSSL_rdtsc,\@abi-omnipotent +.align 16 +OPENSSL_rdtsc: + rdtsc + shl \$32,%rdx + or %rdx,%rax + ret +.size OPENSSL_rdtsc,.-OPENSSL_rdtsc + .globl OPENSSL_ia32_cpuid +.type OPENSSL_ia32_cpuid,\@abi-omnipotent .align 16 OPENSSL_ia32_cpuid: - movq %rbx,%r8 - movl \$1,%eax + mov %rbx,%r8 + + xor %eax,%eax + cpuid + xor %eax,%eax + cmp \$0x756e6547,%ebx # "Genu" + setne %al + mov %eax,%r9d + cmp \$0x49656e69,%edx # "ineI" + setne %al + or %eax,%r9d + cmp \$0x6c65746e,%ecx # "ntel" + setne %al + or %eax,%r9d + + mov \$1,%eax cpuid - shlq \$32,%rcx - movl %edx,%eax - movq %r8,%rbx - orq %rcx,%rax + cmp \$0,%r9d + jne .Lnotintel + or \$1<<20,%edx # use reserved bit to engage RC4_CHAR + and \$15,%ah + cmp \$15,%ah # examine Family ID + je .Lnotintel + or \$1<<30,%edx # use reserved bit to skip unrolled loop +.Lnotintel: + bt \$28,%edx # test hyper-threading bit + jnc .Ldone + shr \$16,%ebx + cmp \$1,%bl # see if cache is shared + ja .Ldone + and \$0xefffffff,%edx # ~(1<<28) +.Ldone: + shl \$32,%rcx + mov %edx,%eax + mov %r8,%rbx + or %rcx,%rax ret .size OPENSSL_ia32_cpuid,.-OPENSSL_ia32_cpuid - -.section .init - call OPENSSL_cpuid_setup ___ +close STDOUT; # flush diff --git a/crypto/x86cpuid.pl b/crypto/x86cpuid.pl index c53c9bc998..6bf6a7b57f 100644 --- a/crypto/x86cpuid.pl +++ b/crypto/x86cpuid.pl @@ -19,13 +19,40 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } &pop ("eax"); &xor ("ecx","eax"); &bt ("ecx",21); - &jnc (&label("nocpuid")); + &jnc (&label("done")); + &xor ("eax","eax"); + &cpuid (); + &xor ("eax","eax"); + &cmp ("ebx",0x756e6547); # "Genu" + &setne (&LB("eax")); + &mov ("ebp","eax"); + &cmp ("edx",0x49656e69); # "ineI" + &setne (&LB("eax")); + &or ("ebp","eax"); + &cmp ("ecx",0x6c65746e); # "ntel" + &setne (&LB("eax")); + &or ("ebp","eax"); &mov ("eax",1); &cpuid (); -&set_label("nocpuid"); + &cmp ("ebp",0); + &jne (&label("notP4")); + &and (&HB("eax"),15); # familiy ID + &cmp (&HB("eax"),15); # P4? + &jne (&label("notP4")); + &or ("edx",1<<20); # use reserved bit to engage RC4_CHAR +&set_label("notP4"); + &bt ("edx",28); # test hyper-threading bit + &jnc (&label("done")); + &shr ("ebx",16); + &cmp (&LB("ebx"),1); # see if cache is shared(*) + &ja (&label("done")); + &and ("edx",0xefffffff); # clear hyper-threading bit if not +&set_label("done"); &mov ("eax","edx"); &mov ("edx","ecx"); &function_end("OPENSSL_ia32_cpuid"); +# (*) on Core2 this value is set to 2 denoting the fact that L2 +# cache is shared between cores. &external_label("OPENSSL_ia32cap_P"); -- 2.25.1