From 30df206cb0c66848142898115d301a0d80333d0f Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Tue, 20 Mar 2012 23:29:24 -0400 Subject: [PATCH] x86_64 math asm, long double functions only this has not been tested heavily, but it's known to at least assemble and run in basic usage cases. it's nearly identical to the corresponding i386 code, and thus expected to be just as correct or just as incorrect. --- src/math/x86_64/acosl.s | 20 +++++++++ src/math/x86_64/asinl.s | 12 ++++++ src/math/x86_64/atan2l.s | 7 ++++ src/math/x86_64/atanl.s | 7 ++++ src/math/x86_64/ceill.s | 1 + src/math/x86_64/exp2l.s | 1 + src/math/x86_64/expl.s | 85 +++++++++++++++++++++++++++++++++++++++ src/math/x86_64/expm1l.s | 1 + src/math/x86_64/floorl.s | 27 +++++++++++++ src/math/x86_64/llrintl.s | 7 ++++ src/math/x86_64/log10l.s | 7 ++++ src/math/x86_64/log1pl.s | 15 +++++++ src/math/x86_64/log2l.s | 7 ++++ src/math/x86_64/logl.s | 7 ++++ src/math/x86_64/lrintl.s | 7 ++++ src/math/x86_64/modfl.s | 27 +++++++++++++ src/math/x86_64/truncl.s | 1 + 17 files changed, 239 insertions(+) create mode 100644 src/math/x86_64/acosl.s create mode 100644 src/math/x86_64/asinl.s create mode 100644 src/math/x86_64/atan2l.s create mode 100644 src/math/x86_64/atanl.s create mode 100644 src/math/x86_64/ceill.s create mode 100644 src/math/x86_64/exp2l.s create mode 100644 src/math/x86_64/expl.s create mode 100644 src/math/x86_64/expm1l.s create mode 100644 src/math/x86_64/floorl.s create mode 100644 src/math/x86_64/llrintl.s create mode 100644 src/math/x86_64/log10l.s create mode 100644 src/math/x86_64/log1pl.s create mode 100644 src/math/x86_64/log2l.s create mode 100644 src/math/x86_64/logl.s create mode 100644 src/math/x86_64/lrintl.s create mode 100644 src/math/x86_64/modfl.s create mode 100644 src/math/x86_64/truncl.s diff --git a/src/math/x86_64/acosl.s b/src/math/x86_64/acosl.s new file mode 100644 index 00000000..92c29870 --- /dev/null +++ b/src/math/x86_64/acosl.s @@ -0,0 +1,20 @@ +.global acosl +.type acosl,@function +acosl: + fldt 8(%rsp) + fld1 + fld %st(1) + fld1 + fsubp + fxch %st(2) + faddp + fdivp + fsqrt + fld1 + fxch %st(1) + fpatan + fld1 + fld1 + faddp + fmulp + ret diff --git a/src/math/x86_64/asinl.s b/src/math/x86_64/asinl.s new file mode 100644 index 00000000..83c392f7 --- /dev/null +++ b/src/math/x86_64/asinl.s @@ -0,0 +1,12 @@ +.global asinl +.type asinl,@function +asinl: + fldt 8(%rsp) +1: fld %st(0) + fmul %st(0) + fld1 + fsubp %st(1) + fsqrt + fpatan + ret + diff --git a/src/math/x86_64/atan2l.s b/src/math/x86_64/atan2l.s new file mode 100644 index 00000000..e5f0a3de --- /dev/null +++ b/src/math/x86_64/atan2l.s @@ -0,0 +1,7 @@ +.global atan2l +.type atan2l,@function +atan2l: + fldt 8(%rsp) + fldt 24(%rsp) + fpatan + ret diff --git a/src/math/x86_64/atanl.s b/src/math/x86_64/atanl.s new file mode 100644 index 00000000..df76de5d --- /dev/null +++ b/src/math/x86_64/atanl.s @@ -0,0 +1,7 @@ +.global atanl +.type atanl,@function +atanl: + fldt 8(%rsp) + fld1 + fpatan + ret diff --git a/src/math/x86_64/ceill.s b/src/math/x86_64/ceill.s new file mode 100644 index 00000000..bc29f15c --- /dev/null +++ b/src/math/x86_64/ceill.s @@ -0,0 +1 @@ +# see floor.s diff --git a/src/math/x86_64/exp2l.s b/src/math/x86_64/exp2l.s new file mode 100644 index 00000000..510c256d --- /dev/null +++ b/src/math/x86_64/exp2l.s @@ -0,0 +1 @@ +# see expl.s diff --git a/src/math/x86_64/expl.s b/src/math/x86_64/expl.s new file mode 100644 index 00000000..64c1c78e --- /dev/null +++ b/src/math/x86_64/expl.s @@ -0,0 +1,85 @@ +.global expm1l +.type expm1l,@function +expm1l: + fldt 8(%rsp) +1: fldl2e + fmulp + fld1 + fld %st(1) + fabs + fucom %st(1) + fnstsw %ax + fstp %st(0) + fstp %st(0) + sahf + ja 1f + f2xm1 + ret +1: push %rax + call 1f + pop %rax + fld1 + fsubrp + ret + +.global expl +.type expl,@function +expl: + fldt 8(%rsp) + fldl2e + fmulp + jmp 1f + +.global exp2l +.type exp2l,@function +exp2l: + fldt 8(%rsp) +1: mov $0x467ff000,%eax + mov %eax,-16(%rsp) + mov $0x80000000,%eax + mov %eax,-20(%rsp) + xor %eax,%eax + mov %eax,-24(%rsp) + flds -16(%rsp) + fld %st(1) + fabs + fucom %st(1) + fnstsw + fstp %st(0) + fstp %st(0) + sahf + ja 2f + fld %st(0) + fistpl -16(%rsp) + fildl -16(%rsp) + fxch %st(1) + fsub %st(1) + mov $0x3fff,%eax + add %eax,-16(%rsp) + f2xm1 + fld1 + faddp + fldt -24(%rsp) + fmulp + fstp %st(1) + ret + +2: fld %st(0) + fstpt -24(%rsp) + mov -15(%rsp),%ah + and $0x7f,%ah + cmp $0x7f,%ah + jne 1f + decb -15(%rsp) + fstp %st(0) + fldt -24(%rsp) +1: fld %st(0) + frndint + fxch %st(1) + fsub %st(1) + f2xm1 + fld1 + faddp + fscale + fstp %st(1) + ret diff --git a/src/math/x86_64/expm1l.s b/src/math/x86_64/expm1l.s new file mode 100644 index 00000000..89c2d290 --- /dev/null +++ b/src/math/x86_64/expm1l.s @@ -0,0 +1 @@ +# see expm1.s diff --git a/src/math/x86_64/floorl.s b/src/math/x86_64/floorl.s new file mode 100644 index 00000000..80da4660 --- /dev/null +++ b/src/math/x86_64/floorl.s @@ -0,0 +1,27 @@ +.global floorl +.type floorl,@function +floorl: + fldt 8(%rsp) +1: mov $0x7,%al +1: fstcw 8(%rsp) + mov 9(%rsp),%ah + mov %al,9(%rsp) + fldcw 8(%rsp) + frndint + mov %ah,9(%rsp) + fldcw 8(%rsp) + ret + +.global ceill +.type ceill,@function +ceill: + fldt 8(%rsp) + mov $0xb,%al + jmp 1b + +.global truncl +.type truncl,@function +truncl: + fldt 8(%rsp) + mov $0xf,%al + jmp 1b diff --git a/src/math/x86_64/llrintl.s b/src/math/x86_64/llrintl.s new file mode 100644 index 00000000..3f3b7eea --- /dev/null +++ b/src/math/x86_64/llrintl.s @@ -0,0 +1,7 @@ +.global llrintl +.type llrintl,@function +llrintl: + fldt 8(%rsp) + fistpq 8(%rsp) + mov 8(%rsp),%rax + ret diff --git a/src/math/x86_64/log10l.s b/src/math/x86_64/log10l.s new file mode 100644 index 00000000..48ea4af7 --- /dev/null +++ b/src/math/x86_64/log10l.s @@ -0,0 +1,7 @@ +.global log10l +.type log10l,@function +log10l: + fldlg2 + fldt 8(%rsp) + fyl2x + ret diff --git a/src/math/x86_64/log1pl.s b/src/math/x86_64/log1pl.s new file mode 100644 index 00000000..955c9dbf --- /dev/null +++ b/src/math/x86_64/log1pl.s @@ -0,0 +1,15 @@ +.global log1pl +.type log1pl,@function +log1pl: + mov 14(%rsp),%eax + fldln2 + and $0x7fffffff,%eax + fldt 8(%rsp) + cmp $0x3ffd9400,%eax + ja 1f + fyl2xp1 + ret +1: fld1 + faddp + fyl2x + ret diff --git a/src/math/x86_64/log2l.s b/src/math/x86_64/log2l.s new file mode 100644 index 00000000..ba08b9fb --- /dev/null +++ b/src/math/x86_64/log2l.s @@ -0,0 +1,7 @@ +.global log2l +.type log2l,@function +log2l: + fld1 + fldt 8(%rsp) + fyl2x + ret diff --git a/src/math/x86_64/logl.s b/src/math/x86_64/logl.s new file mode 100644 index 00000000..20dd1f81 --- /dev/null +++ b/src/math/x86_64/logl.s @@ -0,0 +1,7 @@ +.global logl +.type logl,@function +logl: + fldln2 + fldt 8(%rsp) + fyl2x + ret diff --git a/src/math/x86_64/lrintl.s b/src/math/x86_64/lrintl.s new file mode 100644 index 00000000..5d6a15af --- /dev/null +++ b/src/math/x86_64/lrintl.s @@ -0,0 +1,7 @@ +.global lrintl +.type lrintl,@function +lrintl: + fldt 8(%rsp) + fistpl 8(%rsp) + mov 8(%rsp),%eax + ret diff --git a/src/math/x86_64/modfl.s b/src/math/x86_64/modfl.s new file mode 100644 index 00000000..cd2ec46e --- /dev/null +++ b/src/math/x86_64/modfl.s @@ -0,0 +1,27 @@ +.global modfl +.type modfl,@function +modfl: + fldt 8(%rsp) + fxam + fnstsw %ax + sahf + jnp 1f + jc 2f +1: fld1 + fld %st(1) +1: fprem + fnstsw %ax + sahf + jp 1b + fstp %st(1) + fsubr %st(0),%st(1) + fxch %st(1) + fstpt (%rdi) + ret + +2: fstpt (%rdi) + mov 6(%rdi),%eax + and $0x80000000,%eax + mov %eax,8(%rsp) + flds 8(%rsp) + ret diff --git a/src/math/x86_64/truncl.s b/src/math/x86_64/truncl.s new file mode 100644 index 00000000..bc29f15c --- /dev/null +++ b/src/math/x86_64/truncl.s @@ -0,0 +1 @@ +# see floor.s -- 2.25.1