X-Git-Url: https://git.librecmc.org/?a=blobdiff_plain;f=crypto%2Fx86_64cpuid.pl;h=c54b9e3681c6cebba14a9509a43c13865079dccc;hb=cec2af75102df52c2f270b3751e3487f47fccb6c;hp=777d557783d8b6a85862fdb030a329d95feb104a;hpb=5f1841cdcae459924c3d1d92fcaf3110068c7cda;p=oweals%2Fopenssl.git diff --git a/crypto/x86_64cpuid.pl b/crypto/x86_64cpuid.pl index 777d557783..c54b9e3681 100644 --- a/crypto/x86_64cpuid.pl +++ b/crypto/x86_64cpuid.pl @@ -1,72 +1,39 @@ #!/usr/bin/env perl -$output=shift; -$win64a=1 if ($output =~ /win64a\.[s|asm]/); -open STDOUT,">$output" || die "can't open $output: $!"; +$flavour = shift; +$output = shift; +if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } -print<<___ if(defined($win64a)); -_TEXT SEGMENT -PUBLIC OPENSSL_rdtsc -ALIGN 16 -OPENSSL_rdtsc PROC - rdtsc - shl rdx,32 - or rax,rdx - ret -OPENSSL_rdtsc ENDP +$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/); -PUBLIC OPENSSL_atomic_add -ALIGN 16 -OPENSSL_atomic_add PROC - mov eax,DWORD PTR[rcx] -\$Lspin: lea r8,DWORD PTR[rdx+rax] -lock cmpxchg DWORD PTR[rcx],r8d - jne \$Lspin - mov eax,r8d - cdqe - ret -OPENSSL_atomic_add ENDP +$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; +open STDOUT,"| $^X ${dir}perlasm/x86_64-xlate.pl $flavour $output"; -PUBLIC OPENSSL_wipe_cpu -ALIGN 16 -OPENSSL_wipe_cpu PROC - pxor xmm0,xmm0 - pxor xmm1,xmm1 - pxor xmm2,xmm2 - pxor xmm3,xmm3 - pxor xmm4,xmm4 - pxor xmm5,xmm5 - xor rcx,rcx - xor rdx,rdx - xor r8,r8 - xor r9,r9 - xor r10,r10 - xor r11,r11 - lea rax,QWORD PTR[rsp+8] - ret -OPENSSL_wipe_cpu ENDP +if ($win64) { $arg1="%rcx"; $arg2="%rdx"; } +else { $arg1="%rdi"; $arg2="%rsi"; } +print<<___; +.extern OPENSSL_cpuid_setup +.section .init + call OPENSSL_cpuid_setup -OPENSSL_ia32_cpuid PROC - mov r8,rbx - mov eax,1 - cpuid - shl rcx,32 - mov eax,edx - mov rbx,r8 - or rax,rcx +.text + +.globl OPENSSL_atomic_add +.type OPENSSL_atomic_add,\@abi-omnipotent +.align 16 +OPENSSL_atomic_add: + movl ($arg1),%eax +.Lspin: leaq ($arg2,%rax),%r8 + .byte 0xf0 # lock + cmpxchgl %r8d,($arg1) + jne .Lspin + movl %r8d,%eax + .byte 0x48,0x98 # cltq/cdqe ret -OPENSSL_ia32_cpuid ENDP -_TEXT ENDS +.size OPENSSL_atomic_add,.-OPENSSL_atomic_add -CRT\$XIU SEGMENT -EXTRN OPENSSL_cpuid_setup:PROC -DQ OPENSSL_cpuid_setup -CRT\$XIU ENDS -END -___ -print<<___ if(!defined($win64a)); -.text .globl OPENSSL_rdtsc +.type OPENSSL_rdtsc,\@abi-omnipotent .align 16 OPENSSL_rdtsc: rdtsc @@ -75,21 +42,85 @@ OPENSSL_rdtsc: ret .size OPENSSL_rdtsc,.-OPENSSL_rdtsc -.globl OPENSSL_atomic_add -.type OPENSSL_atomic_add,\@function +.globl OPENSSL_ia32_cpuid +.type OPENSSL_ia32_cpuid,\@abi-omnipotent .align 16 -OPENSSL_atomic_add: - movl (%rdi),%eax -.Lspin: lea (%rsi,%rax),%r8 -lock; cmpxchg %r8d,(%rdi) - jne .Lspin - mov %r8d,%eax - cdqe +OPENSSL_ia32_cpuid: + mov %rbx,%r8 + + xor %eax,%eax + cpuid + xor %eax,%eax + cmp \$0x756e6547,%ebx # "Genu" + setne %al + mov %eax,%r9d + cmp \$0x49656e69,%edx # "ineI" + setne %al + or %eax,%r9d + cmp \$0x6c65746e,%ecx # "ntel" + setne %al + or %eax,%r9d + + mov \$1,%eax + cpuid + cmp \$0,%r9d + jne .Lnotintel + or \$0x00100000,%edx # use reserved 20th bit to engage RC4_CHAR + and \$15,%ah + cmp \$15,%ah # examine Family ID + je .Lnotintel + or \$0x40000000,%edx # use reserved bit to skip unrolled loop +.Lnotintel: + bt \$28,%edx # test hyper-threading bit + jnc .Ldone + shr \$16,%ebx + cmp \$1,%bl # see if cache is shared + ja .Ldone + and \$0xefffffff,%edx # ~(1<<28) +.Ldone: + shl \$32,%rcx + mov %edx,%eax + mov %r8,%rbx + or %rcx,%rax ret -.size OPENSSL_atomic_add,.-OPENSSL_atomic_add +.size OPENSSL_ia32_cpuid,.-OPENSSL_ia32_cpuid + +.globl OPENSSL_cleanse +.type OPENSSL_cleanse,\@abi-omnipotent +.align 16 +OPENSSL_cleanse: + xor %rax,%rax + cmp \$15,$arg2 + jae .Lot +.Little: + mov %al,($arg1) + sub \$1,$arg2 + lea 1($arg1),$arg1 + jnz .Little + ret +.align 16 +.Lot: + test \$7,$arg1 + jz .Laligned + mov %al,($arg1) + lea -1($arg2),$arg2 + lea 1($arg1),$arg1 + jmp .Lot +.Laligned: + mov %rax,($arg1) + lea -8($arg2),$arg2 + test \$-8,$arg2 + lea 8($arg1),$arg1 + jnz .Laligned + cmp \$0,$arg2 + jne .Little + ret +.size OPENSSL_cleanse,.-OPENSSL_cleanse +___ +print<<___ if (!$win64); .globl OPENSSL_wipe_cpu -.type OPENSSL_wipe_cpu,\@function +.type OPENSSL_wipe_cpu,\@abi-omnipotent .align 16 OPENSSL_wipe_cpu: pxor %xmm0,%xmm0 @@ -108,31 +139,38 @@ OPENSSL_wipe_cpu: pxor %xmm13,%xmm13 pxor %xmm14,%xmm14 pxor %xmm15,%xmm15 - xor %rcx,%rcx - xor %rdx,%rdx - xor %rsi,%rsi - xor %rdi,%rdi - xor %r8,%r8 - xor %r9,%r9 - xor %r10,%r10 - xor %r11,%r11 - lea 8(%rsp),%rax + xorq %rcx,%rcx + xorq %rdx,%rdx + xorq %rsi,%rsi + xorq %rdi,%rdi + xorq %r8,%r8 + xorq %r9,%r9 + xorq %r10,%r10 + xorq %r11,%r11 + leaq 8(%rsp),%rax ret .size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu - -.globl OPENSSL_ia32_cpuid +___ +print<<___ if ($win64); +.globl OPENSSL_wipe_cpu +.type OPENSSL_wipe_cpu,\@abi-omnipotent .align 16 -OPENSSL_ia32_cpuid: - mov %rbx,%r8 - mov \$1,%eax - cpuid - shl \$32,%rcx - mov %edx,%eax - mov %r8,%rbx - or %rcx,%rax +OPENSSL_wipe_cpu: + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + xorq %rcx,%rcx + xorq %rdx,%rdx + xorq %r8,%r8 + xorq %r9,%r9 + xorq %r10,%r10 + xorq %r11,%r11 + leaq 8(%rsp),%rax ret -.size OPENSSL_ia32_cpuid,.-OPENSSL_ia32_cpuid - -.section .init - call OPENSSL_cpuid_setup +.size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu ___ + +close STDOUT; # flush