defined(__INTEL__) || \
defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD64) || defined(_M_X64)
-unsigned int OPENSSL_ia32cap_P[2];
+extern unsigned int OPENSSL_ia32cap_P[4];
unsigned long *OPENSSL_ia32cap_loc(void)
{ if (sizeof(long)==4)
/*
* is 32-bit.
*/
OPENSSL_ia32cap_P[1]=0;
+
+ OPENSSL_ia32cap_P[2]=0;
+
return (unsigned long *)OPENSSL_ia32cap_P;
}
#endif
void OPENSSL_cpuid_setup(void)
{ static int trigger=0;
- IA32CAP OPENSSL_ia32_cpuid(void);
+ IA32CAP OPENSSL_ia32_cpuid(unsigned int *);
IA32CAP vec;
char *env;
#else
if (!sscanf(env+off,"%lli",(long long *)&vec)) vec = strtoul(env+off,NULL,0);
#endif
- if (off) vec = OPENSSL_ia32_cpuid()&~vec;
+ if (off) vec = OPENSSL_ia32_cpuid(OPENSSL_ia32cap_P)&~vec;
+ else if (env[0]==':') vec = OPENSSL_ia32_cpuid(OPENSSL_ia32cap_P);
+
+ OPENSSL_ia32cap_P[2] = 0;
+ if ((env=strchr(env,':'))) {
+ unsigned int vecx;
+ env++;
+ off = (env[0]=='~')?1:0;
+ vecx = strtoul(env+off,NULL,0);
+ if (off) OPENSSL_ia32cap_P[2] &= ~vecx;
+ else OPENSSL_ia32cap_P[2] = vecx;
+ }
}
else
- vec = OPENSSL_ia32_cpuid();
+ vec = OPENSSL_ia32_cpuid(OPENSSL_ia32cap_P);
/*
* |(1<<10) sets a reserved bit to signal that variable
OPENSSL_ia32cap_P[0] = (unsigned int)vec|(1<<10);
OPENSSL_ia32cap_P[1] = (unsigned int)(vec>>32);
}
+#else
+unsigned int OPENSSL_ia32cap_P[4];
#endif
#else
call OPENSSL_cpuid_setup
.hidden OPENSSL_ia32cap_P
-.comm OPENSSL_ia32cap_P,8,4
+.comm OPENSSL_ia32cap_P,16,4
.text
.size OPENSSL_rdtsc,.-OPENSSL_rdtsc
.globl OPENSSL_ia32_cpuid
-.type OPENSSL_ia32_cpuid,\@abi-omnipotent
+.type OPENSSL_ia32_cpuid,\@function,1
.align 16
OPENSSL_ia32_cpuid:
mov %rbx,%r8 # save %rbx
xor %eax,%eax
+ mov %eax,8(%rdi) # clear 3rd word
cpuid
mov %eax,%r11d # max value for standard query level
shr \$14,%r10d
and \$0xfff,%r10d # number of cores -1 per L1D
+ cmp \$7,%r11d
+ jb .Lnocacheinfo
+
+ mov \$7,%eax
+ xor %ecx,%ecx
+ cpuid
+ mov %ebx,8(%rdi)
+
.Lnocacheinfo:
mov \$1,%eax
cpuid
.Lclear_avx:
mov \$0xefffe7ff,%eax # ~(1<<28|1<<12|1<<11)
and %eax,%r9d # clear AVX, FMA and AMD XOP bits
+ andl \$0xffffffdf,8(%rdi) # cleax AVX2, ~(1<<5)
.Ldone:
shl \$32,%r9
mov %r10d,%eax
&xor ("eax","eax");
&bt ("ecx",21);
&jnc (&label("nocpuid"));
+ &mov ("esi",&wparam(0));
+ &mov (&DWP(8,"esi"),"eax"); # clear 3rd word
&cpuid ();
&mov ("edi","eax"); # max value for standard query level
&jmp (&label("generic"));
&set_label("intel");
+ &cmp ("edi",7);
+ &jb (&label("cacheinfo"));
+
+ &mov ("esi",&wparam(0));
+ &mov ("eax",7);
+ &xor ("ecx","ecx");
+ &cpuid ();
+ &mov (&DWP(8,"esi"),"ebx");
+
+&set_label("cacheinfo");
&cmp ("edi",4);
&mov ("edi",-1);
&jb (&label("nocacheinfo"));
&and ("esi",0xfeffffff); # clear FXSR
&set_label("clear_avx");
&and ("ebp",0xefffe7ff); # clear AVX, FMA and AMD XOP bits
+ &mov ("edi",&wparam(0));
+ &and (&DWP(8,"edi"),0xffffffdf); # clear AVX2
&set_label("done");
&mov ("eax","esi");
&mov ("edx","ebp");
&function_begin_B("OPENSSL_far_spin");
&pushf ();
- &pop ("eax")
+ &pop ("eax");
&bt ("eax",9);
&jnc (&label("nospin")); # interrupts are disabled