x86cpuid.pl update [from HEAD].
authorAndy Polyakov <appro@openssl.org>
Sun, 11 Nov 2007 19:44:42 +0000 (19:44 +0000)
committerAndy Polyakov <appro@openssl.org>
Sun, 11 Nov 2007 19:44:42 +0000 (19:44 +0000)
crypto/perlasm/x86unix.pl
crypto/x86cpuid.pl

index e71050b6bcb512b4f16bdc11b4fd617fd5f32028..53507b6b8432f214b5bfea523b6dea30991ca4af 100644 (file)
@@ -541,50 +541,13 @@ sub main'set_label
 sub main'file_end
        {
        # try to detect if SSE2 or MMX extensions were used on ELF platform...
-       if ($main'elf && grep {/%[x]*mm[0-7]/i} @out) {
+       if ($main'elf && grep {/\b%[x]*mm[0-7]\b|OPENSSL_ia32cap_P\b/i} @out) {
                local($tmp);
 
                push (@out,"\n.section\t.bss\n");
                push (@out,".comm\t${under}OPENSSL_ia32cap_P,4,4\n");
 
-               push (@out,".section\t.init\n");
-               # One can argue that it's wasteful to craft every
-               # SSE/MMX module with this snippet... Well, it's 72
-               # bytes long and for the moment we have two modules.
-               # Let's argue when we have 7 modules or so...
-               #
-               # $1<<10 sets a reserved bit to signal that variable
-               # was initialized already...
-               &main'picmeup("edx","OPENSSL_ia32cap_P");
-               $tmp=<<___;
-               cmpl    \$0,(%edx)
-               jne     1f
-               movl    \$1<<10,(%edx)
-               pushf
-               popl    %eax
-               movl    %eax,%ecx
-               xorl    \$1<<21,%eax
-               pushl   %eax
-               popf
-               pushf
-               popl    %eax
-               xorl    %ecx,%eax
-               btl     \$21,%eax
-               jnc     1f
-               pushl   %edi
-               pushl   %ebx
-               movl    %edx,%edi
-               movl    \$1,%eax
-               .byte   0x0f,0xa2
-               orl     \$1<<10,%edx
-               movl    %edx,0(%edi)
-               popl    %ebx
-               popl    %edi
-               jmp     1f
-       .align  $align
-       1:
-___
-               push (@out,$tmp);
+               return;
        }
 
        if ($const ne "")
index c53c9bc9980f6c696eb304713d2b79b5a01822e0..4408ef2936ecff908d7965e2fa9ed718ccfe6331 100644 (file)
@@ -19,13 +19,41 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
        &pop    ("eax");
        &xor    ("ecx","eax");
        &bt     ("ecx",21);
-       &jnc    (&label("nocpuid"));
+       &jnc    (&label("done"));
+       &xor    ("eax","eax");
+       &cpuid  ();
+       &xor    ("eax","eax");
+       &cmp    ("ebx",0x756e6547);     # "Genu"
+       &data_byte(0x0f,0x95,0xc0);     #&setne (&LB("eax"));
+       &mov    ("ebp","eax");
+       &cmp    ("edx",0x49656e69);     # "ineI"
+       &data_byte(0x0f,0x95,0xc0);     #&setne (&LB("eax"));
+       &or     ("ebp","eax");
+       &cmp    ("ecx",0x6c65746e);     # "ntel"
+       &data_byte(0x0f,0x95,0xc0);     #&setne (&LB("eax"));
+       &or     ("ebp","eax");
        &mov    ("eax",1);
        &cpuid  ();
-&set_label("nocpuid");
+       &cmp    ("ebp",0);
+       &jne    (&label("notP4"));
+       &and    ("eax",15<<8);          # familiy ID
+       &cmp    ("eax",15<<8);          # P4?
+       &jne    (&label("notP4"));
+       &or     ("edx",1<<20);          # use reserved bit to engage RC4_CHAR
+&set_label("notP4");
+       &bt     ("edx",28);             # test hyper-threading bit
+       &jnc    (&label("done"));
+       &shr    ("ebx",16);
+       &and    ("ebx",0xff);
+       &cmp    ("ebx",1);              # see if cache is shared(*)
+       &ja     (&label("done"));
+       &and    ("edx",0xefffffff);     # clear hyper-threading bit if not
+&set_label("done");
        &mov    ("eax","edx");
        &mov    ("edx","ecx");
 &function_end("OPENSSL_ia32_cpuid");
+# (*)  on Core2 this value is set to 2 denoting the fact that L2
+#      cache is shared between cores.
 
 &external_label("OPENSSL_ia32cap_P");