From 4e8da09800d5dc753525b2558dc1875a75982d37 Mon Sep 17 00:00:00 2001 From: Andy Polyakov Date: Wed, 25 Jul 2007 12:38:35 +0000 Subject: [PATCH] x86 perlasm updates [from HEAD]. --- crypto/perlasm/x86nasm.pl | 54 ++++++++++++++++++++++++++-------- crypto/perlasm/x86unix.pl | 62 ++++++++++++++++++++++++--------------- 2 files changed, 79 insertions(+), 37 deletions(-) diff --git a/crypto/perlasm/x86nasm.pl b/crypto/perlasm/x86nasm.pl index c93b811e3b..68c89e8466 100644 --- a/crypto/perlasm/x86nasm.pl +++ b/crypto/perlasm/x86nasm.pl @@ -7,6 +7,7 @@ package x86nasm; $lprfx="\@L"; $label="000"; $under=($::netware)?'':'_'; +$initseg=""; sub ::generic { my $opcode=shift; @@ -117,15 +118,10 @@ sub ::function_end_B sub ::file_end { # try to detect if SSE2 or MMX extensions were used on Win32... - if ($::win32 && grep {/\s+[x]*mm[0-7]/i} @out) - { # One can argue that it's wasteful to craft every - # SSE/MMX module with this snippet... Well, it's 72 - # bytes long and for the moment we have two modules. - # Let's argue when we have 7 modules or so... - # - # $1<<10 sets a reserved bit to signal that variable + if ($::win32 && grep {/\b[x]?mm[0-7]\b|OPENSSL_ia32cap_P\b/i} @out) + { # $1<<10 sets a reserved bit to signal that variable # was initialized already... - my $tmp=<<___; + my $code=<<___; align 16 ${lprfx}OPENSSL_ia32cap_init: lea edx,[${under}OPENSSL_ia32cap_P] @@ -143,27 +139,60 @@ ${lprfx}OPENSSL_ia32cap_init: xor eax,ecx bt eax,21 jnc NEAR ${lprfx}nocpuid + push ebp push edi push ebx mov edi,edx + xor eax,eax + cpuid + xor eax,eax + cmp ebx,'Genu' + setne al + mov ebp,eax + cmp edx,'ineI' + setne al + or ebp,eax + cmp eax,'ntel' + setne al + or ebp,eax mov eax,1 cpuid + cmp ebp,0 + jne ${lprfx}notP4 + and ah,15 + cmp ah,15 + jne ${lprfx}notP4 + or edx,1<<20 +${lprfx}notP4: + bt edx,28 + jnc ${lprfx}done + shr ebx,16 + cmp bl,1 + ja ${lprfx}done + and edx,0xefffffff +${lprfx}done: or edx,1<<10 mov DWORD [edi],edx pop ebx pop edi + pop ebp ${lprfx}nocpuid: ret - segment .CRT\$XCU data align=4 dd ${lprfx}OPENSSL_ia32cap_init +___ + my $data=<<___; segment .bss common ${under}OPENSSL_ia32cap_P 4 ___ + + ##push (@out,$code); + # comment out OPENSSL_ia32cap_P declarations grep {s/(^extern\s+${under}OPENSSL_ia32cap_P)/\;$1/} @out; - push (@out,$tmp); + push (@out,$data) } + push (@out,$initseg) if ($initseg); } sub ::comment { foreach (@_) { push(@out,"\t; $_\n"); } } @@ -216,12 +245,11 @@ sub ::picmeup sub ::initseg { my($f)=$under.shift; if ($::win32) - { my($tmp)=<<___; -segment .CRT\$XCU rdata align=4 + { $initseg=<<___; +segment .CRT\$XCU data align=4 extern $f dd $f ___ - push(@out,$tmp); } } diff --git a/crypto/perlasm/x86unix.pl b/crypto/perlasm/x86unix.pl index 2b9e96c8db..8e3e4bd383 100644 --- a/crypto/perlasm/x86unix.pl +++ b/crypto/perlasm/x86unix.pl @@ -5,8 +5,6 @@ package x86unix; # GAS actually... *out=\@::out; $label="L000"; -$const=""; -$constl=0; $align=($::aout)?"4":"16"; $under=($::aout or $::coff)?"_":""; @@ -189,24 +187,20 @@ sub ::set_label sub ::file_end { # try to detect if SSE2 or MMX extensions were used on ELF platform... - if ($::elf && grep {/%[x]?mm[0-7]/i} @out){ - my $tmp; + if ($::elf && grep {/\b%[x]?mm[0-7]\b|OPENSSL_ia32cap_P\b/i} @out) { push (@out,"\n.section\t.bss\n"); push (@out,".comm\t${under}OPENSSL_ia32cap_P,4,4\n"); + return; # below is not needed in OpenSSL context + push (@out,".section\t.init\n"); - # One can argue that it's wasteful to craft every - # SSE/MMX module with this snippet... Well, it's 72 - # bytes long and for the moment we have two modules. - # Let's argue when we have 7 modules or so... - # + &::picmeup("edx","OPENSSL_ia32cap_P"); # $1<<10 sets a reserved bit to signal that variable # was initialized already... - &::picmeup("edx","OPENSSL_ia32cap_P"); - $tmp=<<___; + my $code=<<___; cmpl \$0,(%edx) - jne 1f + jne 3f movl \$1<<10,(%edx) pushf popl %eax @@ -218,27 +212,47 @@ sub ::file_end popl %eax xorl %ecx,%eax btl \$21,%eax - jnc 1f + jnc 3f + pushl %ebp pushl %edi pushl %ebx movl %edx,%edi - movl \$1,%eax + xor %eax,%eax + .byte 0x0f,0xa2 + xorl %eax,%eax + cmpl $1970169159,%ebx + setne %al + movl %eax,%ebp + cmpl $1231384169,%edx + setne %al + orl %eax,%ebp + cmpl $1818588270,%ecx + setne %al + orl %eax,%ebp + movl $1,%eax .byte 0x0f,0xa2 - orl \$1<<10,%edx + cmpl $0,%ebp + jne 1f + andb $15,%ah + cmpb $15,%ah + jne 1f + orl $1048576,%edx +1: btl $28,%edx + jnc 2f + shrl $16,%ebx + cmpb $1,%bl + ja 2f + andl $4026531839,%edx +2: orl \$1<<10,%edx movl %edx,0(%edi) popl %ebx popl %edi - jmp 1f + popl %ebp + jmp 3f .align $align - 1: + 3: ___ - push (@out,$tmp); - } - - if ($const ne "") - { push(@out,".section .rodata\n"); - push(@out,$const); - $const=""; + push (@out,$code); } } -- 2.25.1