sub main'cmp { &out2("cmpl",@_); }
sub main'lea { &out2("leal",@_); }
sub main'mul { &out1("mull",@_); }
+sub main'imul { &out2("imull",@_); }
sub main'div { &out1("divl",@_); }
sub main'jmp { &out1("jmp",@_); }
sub main'jmp_ptr { &out1p("jmp",@_); }
{ if ($label{$i} eq $_[0]) { $pre=''; last; } }
&out1("call",$pre.$_[0]);
}
+sub main'call_ptr { &out1p("call",@_); }
sub main'ret { &out0("ret"); }
sub main'nop { &out0("nop"); }
sub main'test { &out2("testl",@_); }
# SSE2
sub main'emms { &out0("emms"); }
sub main'movd { &out2("movd",@_); }
-sub main'movq { &out2("movq",@_); }
sub main'movdqu { &out2("movdqu",@_); }
sub main'movdqa { &out2("movdqa",@_); }
sub main'movdq2q{ &out2("movdq2q",@_); }
sub main'pxor { &out2("pxor",@_); }
sub main'por { &out2("por",@_); }
sub main'pand { &out2("pand",@_); }
+sub main'movq {
+ local($p1,$p2,$optimize)=@_;
+ if ($optimize && $p1=~/^mm[0-7]$/ && $p2=~/^mm[0-7]$/)
+ # movq between mmx registers can sink Intel CPUs
+ { push(@out,"\tpshufw\t\$0xe4,%$p2,%$p1\n"); }
+ else { &out2("movq",@_); }
+ }
# The bswapl instruction is new for the 486. Emulate if i386.
sub main'bswap
sub main'file_end
{
# try to detect if SSE2 or MMX extensions were used on ELF platform...
- if ($main'elf && grep {/%[x]*mm[0-7]/i} @out) {
+ if ($main'elf && grep {/\b%[x]*mm[0-7]\b|OPENSSL_ia32cap_P\b/i} @out) {
local($tmp);
push (@out,"\n.section\t.bss\n");
push (@out,".comm\t${under}OPENSSL_ia32cap_P,4,4\n");
- push (@out,".section\t.init\n");
- # One can argue that it's wasteful to craft every
- # SSE/MMX module with this snippet... Well, it's 72
- # bytes long and for the moment we have two modules.
- # Let's argue when we have 7 modules or so...
- #
- # $1<<10 sets a reserved bit to signal that variable
- # was initialized already...
- &main'picmeup("edx","OPENSSL_ia32cap_P");
- $tmp=<<___;
- cmpl \$0,(%edx)
- jne 1f
- movl \$1<<10,(%edx)
- pushf
- popl %eax
- movl %eax,%ecx
- xorl \$1<<21,%eax
- pushl %eax
- popf
- pushf
- popl %eax
- xorl %ecx,%eax
- btl \$21,%eax
- jnc 1f
- pushl %edi
- pushl %ebx
- movl %edx,%edi
- movl \$1,%eax
- .byte 0x0f,0xa2
- orl \$1<<10,%edx
- movl %edx,0(%edi)
- popl %ebx
- popl %edi
- jmp 1f
- .align $align
- 1:
-___
- push (@out,$tmp);
+ return;
}
if ($const ne "")
}
}
+sub main'data_byte
+ {
+ push(@out,"\t.byte\t".join(',',@_)."\n");
+ }
+
sub main'data_word
{
push(@out,"\t.long\t".join(',',@_)."\n");