sub main'shr { &out2("shrl",@_); }
sub main'xor { &out2("xorl",@_); }
sub main'xorb { &out2("xorb",@_); }
-sub main'add { &out2("addl",@_); }
+sub main'add { &out2($_[0]=~/%[a-d][lh]/?"addb":"addl",@_); }
sub main'adc { &out2("adcl",@_); }
sub main'sub { &out2("subl",@_); }
+sub main'sbb { &out2("sbbl",@_); }
sub main'rotl { &out2("roll",@_); }
sub main'rotr { &out2("rorl",@_); }
-sub main'exch { &out2("xchg",@_); }
+sub main'exch { &out2($_[0]=~/%[a-d][lh]/?"xchgb":"xchgl",@_); }
sub main'cmp { &out2("cmpl",@_); }
sub main'lea { &out2("leal",@_); }
sub main'mul { &out1("mull",@_); }
+sub main'imul { &out2("imull",@_); }
sub main'div { &out1("divl",@_); }
sub main'jmp { &out1("jmp",@_); }
sub main'jmp_ptr { &out1p("jmp",@_); }
sub main'jnc { &out1("jnc",@_); }
sub main'jno { &out1("jno",@_); }
sub main'dec { &out1("decl",@_); }
-sub main'inc { &out1("incl",@_); }
+sub main'inc { &out1($_[0]=~/%[a-d][hl]/?"incb":"incl",@_); }
sub main'push { &out1("pushl",@_); $stack+=4; }
sub main'pop { &out1("popl",@_); $stack-=4; }
-sub main'pushf { &out0("pushf"); $stack+=4; }
-sub main'popf { &out0("popf"); $stack-=4; }
+sub main'pushf { &out0("pushfl"); $stack+=4; }
+sub main'popf { &out0("popfl"); $stack-=4; }
sub main'not { &out1("notl",@_); }
-sub main'call { &out1("call",($_[0]=~/^\Q${dot}\EL/?'':$under).$_[0]); }
+sub main'call { my $pre=$under;
+ foreach $i (%label)
+ { if ($label{$i} eq $_[0]) { $pre=''; last; } }
+ &out1("call",$pre.$_[0]);
+ }
+sub main'call_ptr { &out1p("call",@_); }
sub main'ret { &out0("ret"); }
sub main'nop { &out0("nop"); }
sub main'test { &out2("testl",@_); }
sub main'bt { &out2("btl",@_); }
sub main'leave { &out0("leave"); }
-sub main'cpuid { &out0(".word\t0xa20f"); }
-sub main'rdtsc { &out0(".word\t0x310f"); }
+sub main'cpuid { &out0(".byte\t0x0f,0xa2"); }
+sub main'rdtsc { &out0(".byte\t0x0f,0x31"); }
+sub main'halt { &out0("hlt"); }
+sub main'movz { &out2("movzbl",@_); }
+sub main'neg { &out1("negl",@_); }
+sub main'cld { &out0("cld"); }
# SSE2
sub main'emms { &out0("emms"); }
sub main'movd { &out2("movd",@_); }
-sub main'movq { &out2("movq",@_); }
sub main'movdqu { &out2("movdqu",@_); }
sub main'movdqa { &out2("movdqa",@_); }
sub main'movdq2q{ &out2("movdq2q",@_); }
sub main'pxor { &out2("pxor",@_); }
sub main'por { &out2("por",@_); }
sub main'pand { &out2("pand",@_); }
+sub main'movq {
+ local($p1,$p2,$optimize)=@_;
+ if ($optimize && $p1=~/^mm[0-7]$/ && $p2=~/^mm[0-7]$/)
+ # movq between mmx registers can sink Intel CPUs
+ { push(@out,"\tpshufw\t\$0xe4,%$p2,%$p1\n"); }
+ else { &out2("movq",@_); }
+ }
# The bswapl instruction is new for the 486. Emulate if i386.
sub main'bswap
local($tmp)=<<"EOF";
.text
-.align $align
-.globl $func
+.globl $func
EOF
push(@out,$tmp);
if ($main'cpp)
{ $tmp=push(@out,"TYPE($func,\@function)\n"); }
elsif ($main'coff)
{ $tmp=push(@out,".def\t$func;\t.scl\t2;\t.type\t32;\t.endef\n"); }
- elsif ($main'aout)
+ elsif ($main'aout and !$main'pic)
{ }
else { $tmp=push(@out,".type\t$func,\@function\n"); }
+ push(@out,".align\t$align\n");
push(@out,"$func:\n");
$tmp=<<"EOF";
pushl %ebp
local($tmp)=<<"EOF";
.text
-.align $align
-.globl $func
+.globl $func
EOF
push(@out,$tmp);
if ($main'cpp)
{ push(@out,"TYPE($func,\@function)\n"); }
elsif ($main'coff)
{ $tmp=push(@out,".def\t$func;\t.scl\t2;\t.type\t32;\t.endef\n"); }
- elsif ($main'aout)
+ elsif ($main'aout and !$main'pic)
{ }
else { push(@out,".type $func,\@function\n"); }
+ push(@out,".align\t$align\n");
push(@out,"$func:\n");
$stack=4;
}
if ($main'cpp)
{ push(@out,"SIZE($func,${dot}L_${func}_end-$func)\n"); }
elsif ($main'coff or $main'aout)
- { $tmp=push(@out,".align $align\n"); }
+ { }
else { push(@out,".size\t$func,${dot}L_${func}_end-$func\n"); }
push(@out,".ident \"$func\"\n");
$stack=0;
if ($main'cpp)
{ push(@out,"SIZE($func,${dot}L_${func}_end-$func)\n"); }
elsif ($main'coff or $main'aout)
- { push(@out,".align $align\n"); }
+ { }
else { push(@out,".size\t$func,${dot}L_${func}_end-$func\n"); }
push(@out,".ident \"$func\"\n");
$stack=0;
}
}
+sub main'public_label
+ {
+ $label{$_[0]}="${under}${_[0]}" if (!defined($label{$_[0]}));
+ push(@out,".globl\t$label{$_[0]}\n");
+ }
+
sub main'label
{
if (!defined($label{$_[0]}))
$label{$_[0]}="${dot}${label}${_[0]}";
$label++;
}
- push(@out,".align $align\n") if ($_[1] != 0);
+ if ($_[1]!=0)
+ {
+ if ($_[1]>1) { main'align($_[1]); }
+ else { push(@out,".align $align\n"); }
+ }
push(@out,"$label{$_[0]}:\n");
}
sub main'file_end
{
# try to detect if SSE2 or MMX extensions were used on ELF platform...
- if ($main'elf && grep {/%[x]*mm[0-7]/i} @out) {
+ if ($main'elf && grep {/\b%[x]*mm[0-7]\b|OPENSSL_ia32cap_P\b/i} @out) {
local($tmp);
- push (@out,"\n.comm\t".$under."OPENSSL_ia32cap,8,4\n");
-
- push (@out,".section\t.init\n");
- # One can argue that it's wasteful to craft every
- # SSE/MMX module with this snippet... Well, it's 72
- # bytes long and for the moment we have two modules.
- # Let's argue when we have 7 modules or so...
- #
- # $1<<10 sets a reserved bit to signal that variable
- # was initialized already...
- &main'picmeup("edx","OPENSSL_ia32cap");
- $tmp=<<___;
- cmpl \$0,(%edx)
- jne 1f
- movl \$1<<10,(%edx)
- pushf
- popl %eax
- movl %eax,%ecx
- xorl \$1<<21,%eax
- pushl %eax
- popf
- pushf
- popl %eax
- xorl %ecx,%eax
- bt \$21,%eax
- jnc 1f
- pushl %edi
- pushl %ebx
- movl %edx,%edi
- movl \$1,%eax
- .word 0xa20f
- orl \$1<<10,%edx
- movl %edx,0(%edi)
- movl %ecx,4(%edi)
- popl %ebx
- popl %edi
- .align 4
- 1:
-___
- push (@out,$tmp);
+ push (@out,"\n.section\t.bss\n");
+ push (@out,".comm\t${under}OPENSSL_ia32cap_P,4,4\n");
+
+ return;
}
if ($const ne "")
}
}
+sub main'data_byte
+ {
+ push(@out,"\t.byte\t".join(',',@_)."\n");
+ }
+
sub main'data_word
{
push(@out,"\t.long\t".join(',',@_)."\n");
$val=$p2-1;
$val.=",0x90";
}
- push(@out,".align $val\n");
+ push(@out,".align\t$val\n");
}
# debug output functions: puts, putx, printf
&main'call(&main'label("PIC_me_up"));
&main'set_label("PIC_me_up");
&main'blindpop($dst);
- &main'add($dst,"\$$under"."_GLOBAL_OFFSET_TABLE_+[.-".
+ &main'add($dst,"\$${under}_GLOBAL_OFFSET_TABLE_+[.-".
&main'label("PIC_me_up") . "]");
- &main'mov($dst,&main'DWP("$under".$sym."\@GOT",$dst));
+ &main'mov($dst,&main'DWP($under.$sym."\@GOT",$dst));
}
else
{
sub main'initseg
{
local($f)=@_;
+ local($tmp);
if ($main'elf)
{
- local($tmp)=<<___;
-.pushsection .init
+ $tmp=<<___;
+.section .init
call $under$f
-.popsection
+ jmp .Linitalign
+.align $align
+.Linitalign:
+___
+ }
+ elsif ($main'coff)
+ {
+ $tmp=<<___; # applies to both Cygwin and Mingw
+.section .ctors
+.long $under$f
___
- push(@out,$tmp);
}
+ elsif ($main'aout)
+ {
+ local($ctor)="${under}_GLOBAL_\$I\$$f";
+ $tmp=".text\n";
+ $tmp.=".type $ctor,\@function\n" if ($main'pic);
+ $tmp.=<<___; # OpenBSD way...
+.globl $ctor
+.align 2
+$ctor:
+ jmp $under$f
+___
+ }
+ push(@out,$tmp) if ($tmp);
}
+
+1;