Don't ignore config_name parameter passed to OPENSSL_config(). Use
[oweals/openssl.git] / crypto / x86_64cpuid.pl
index 777d557783d8b6a85862fdb030a329d95feb104a..f9f2827636a58e25eb470e0366105f92baf862a3 100644 (file)
@@ -48,8 +48,37 @@ OPENSSL_wipe_cpu     ENDP
 
 OPENSSL_ia32_cpuid     PROC
        mov     r8,rbx
+
+       xor     eax,eax
+       cpuid
+       xor     eax,eax
+       cmp     ebx,0756e6547h
+       setne   al
+       mov     r9d,eax
+       cmp     edx,049656e69h
+       setne   al
+       or      r9d,eax
+       cmp     ecx,06c65746eh
+       setne   al
+       or      r9d,eax
+
        mov     eax,1
        cpuid
+       bt      edx,28
+       jnc     \$Ldone
+       cmp     r9,0
+       jne     \$Lnotintel
+       or      edx,000100000h
+       and     ah,15
+       cmp     ah,15
+       je      \$Lnotintel
+       or      edx,040000000h
+\$Lnotintel:
+       shr     ebx,16
+       cmp     bl,1
+       ja      \$Ldone
+       and     edx,0efffffffh
+\$Ldone:
        shl     rcx,32
        mov     eax,edx
        mov     rbx,r8
@@ -70,8 +99,8 @@ print<<___ if(!defined($win64a));
 .align 16
 OPENSSL_rdtsc:
        rdtsc
-       shl     \$32,%rdx
-       or      %rdx,%rax
+       shlq    \$32,%rdx
+       orq     %rdx,%rax
        ret
 .size  OPENSSL_rdtsc,.-OPENSSL_rdtsc
 
@@ -80,11 +109,11 @@ OPENSSL_rdtsc:
 .align 16
 OPENSSL_atomic_add:
        movl    (%rdi),%eax
-.Lspin:        lea     (%rsi,%rax),%r8
-lock;  cmpxchg %r8d,(%rdi)
+.Lspin:        leaq    (%rsi,%rax),%r8
+lock;  cmpxchgl        %r8d,(%rdi)
        jne     .Lspin
-       mov     %r8d,%eax
-       cdqe
+       movl    %r8d,%eax
+       .byte   0x48,0x98
        ret
 .size  OPENSSL_atomic_add,.-OPENSSL_atomic_add
 
@@ -108,28 +137,57 @@ OPENSSL_wipe_cpu:
        pxor    %xmm13,%xmm13
        pxor    %xmm14,%xmm14
        pxor    %xmm15,%xmm15
-       xor     %rcx,%rcx
-       xor     %rdx,%rdx
-       xor     %rsi,%rsi
-       xor     %rdi,%rdi
-       xor     %r8,%r8
-       xor     %r9,%r9
-       xor     %r10,%r10
-       xor     %r11,%r11
-       lea     8(%rsp),%rax
+       xorq    %rcx,%rcx
+       xorq    %rdx,%rdx
+       xorq    %rsi,%rsi
+       xorq    %rdi,%rdi
+       xorq    %r8,%r8
+       xorq    %r9,%r9
+       xorq    %r10,%r10
+       xorq    %r11,%r11
+       leaq    8(%rsp),%rax
        ret
 .size  OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
 
 .globl OPENSSL_ia32_cpuid
 .align 16
 OPENSSL_ia32_cpuid:
-       mov     %rbx,%r8
-       mov     \$1,%eax
+       movq    %rbx,%r8
+
+       xor     %eax,%eax
+       cpuid
+       xor     %eax,%eax
+       cmp     \$0x756e6547,%ebx       # "Genu"
+       setne   %al
+       mov     %eax,%r9d
+       cmp     \$0x49656e69,%edx       # "ineI"
+       setne   %al
+       or      %eax,%r9d
+       cmp     \$0x6c65746e,%ecx       # "ntel"
+       setne   %al
+       or      %eax,%r9d
+
+       movl    \$1,%eax
        cpuid
-       shl     \$32,%rcx
-       mov     %edx,%eax
-       mov     %r8,%rbx
-       or      %rcx,%rax
+       bt      \$28,%edx               # test hyper-threading bit
+       jnc     .Ldone
+       cmp     \$0,%r9
+       jne     .Lnotintel
+       or      \$1<<20,%edx            # use reserved bit to engage RC4_CHAR
+       and     \$15,%ah
+       cmp     \$15,%ah                # examine Family ID
+       je      .Lnotintel
+       or      \$1<<30,%edx            # use reserved bit to skip unrolled loop
+.Lnotintel:
+       shr     \$16,%ebx
+       cmp     \$1,%bl                 # see if cache is shared
+       ja      .Ldone
+       and     \$~(1<<28),%edx
+.Ldone:
+       shlq    \$32,%rcx
+       movl    %edx,%eax
+       movq    %r8,%rbx
+       orq     %rcx,%rax
        ret
 .size  OPENSSL_ia32_cpuid,.-OPENSSL_ia32_cpuid