From 30df206cb0c66848142898115d301a0d80333d0f Mon Sep 17 00:00:00 2001
From: Rich Felker <dalias@aerifal.cx>
Date: Tue, 20 Mar 2012 23:29:24 -0400
Subject: [PATCH] x86_64 math asm, long double functions only

this has not been tested heavily, but it's known to at least assemble
and run in basic usage cases. it's nearly identical to the
corresponding i386 code, and thus expected to be just as correct or
just as incorrect.
---
 src/math/x86_64/acosl.s   | 20 +++++++++
 src/math/x86_64/asinl.s   | 12 ++++++
 src/math/x86_64/atan2l.s  |  7 ++++
 src/math/x86_64/atanl.s   |  7 ++++
 src/math/x86_64/ceill.s   |  1 +
 src/math/x86_64/exp2l.s   |  1 +
 src/math/x86_64/expl.s    | 85 +++++++++++++++++++++++++++++++++++++++
 src/math/x86_64/expm1l.s  |  1 +
 src/math/x86_64/floorl.s  | 27 +++++++++++++
 src/math/x86_64/llrintl.s |  7 ++++
 src/math/x86_64/log10l.s  |  7 ++++
 src/math/x86_64/log1pl.s  | 15 +++++++
 src/math/x86_64/log2l.s   |  7 ++++
 src/math/x86_64/logl.s    |  7 ++++
 src/math/x86_64/lrintl.s  |  7 ++++
 src/math/x86_64/modfl.s   | 27 +++++++++++++
 src/math/x86_64/truncl.s  |  1 +
 17 files changed, 239 insertions(+)
 create mode 100644 src/math/x86_64/acosl.s
 create mode 100644 src/math/x86_64/asinl.s
 create mode 100644 src/math/x86_64/atan2l.s
 create mode 100644 src/math/x86_64/atanl.s
 create mode 100644 src/math/x86_64/ceill.s
 create mode 100644 src/math/x86_64/exp2l.s
 create mode 100644 src/math/x86_64/expl.s
 create mode 100644 src/math/x86_64/expm1l.s
 create mode 100644 src/math/x86_64/floorl.s
 create mode 100644 src/math/x86_64/llrintl.s
 create mode 100644 src/math/x86_64/log10l.s
 create mode 100644 src/math/x86_64/log1pl.s
 create mode 100644 src/math/x86_64/log2l.s
 create mode 100644 src/math/x86_64/logl.s
 create mode 100644 src/math/x86_64/lrintl.s
 create mode 100644 src/math/x86_64/modfl.s
 create mode 100644 src/math/x86_64/truncl.s

diff --git a/src/math/x86_64/acosl.s b/src/math/x86_64/acosl.s
new file mode 100644
index 00000000..92c29870
--- /dev/null
+++ b/src/math/x86_64/acosl.s
@@ -0,0 +1,20 @@
+.global acosl
+.type acosl,@function
+acosl:
+	fldt 8(%rsp)
+	fld1
+	fld %st(1)
+	fld1
+	fsubp
+	fxch %st(2)
+	faddp
+	fdivp
+	fsqrt
+	fld1
+	fxch %st(1)
+	fpatan
+	fld1
+	fld1
+	faddp
+	fmulp
+	ret
diff --git a/src/math/x86_64/asinl.s b/src/math/x86_64/asinl.s
new file mode 100644
index 00000000..83c392f7
--- /dev/null
+++ b/src/math/x86_64/asinl.s
@@ -0,0 +1,12 @@
+.global asinl
+.type asinl,@function
+asinl:
+	fldt 8(%rsp)
+1:	fld %st(0)
+	fmul %st(0)
+	fld1
+	fsubp %st(1)
+	fsqrt
+	fpatan
+	ret
+
diff --git a/src/math/x86_64/atan2l.s b/src/math/x86_64/atan2l.s
new file mode 100644
index 00000000..e5f0a3de
--- /dev/null
+++ b/src/math/x86_64/atan2l.s
@@ -0,0 +1,7 @@
+.global atan2l
+.type atan2l,@function
+atan2l:
+	fldt 8(%rsp)
+	fldt 24(%rsp)
+	fpatan
+	ret
diff --git a/src/math/x86_64/atanl.s b/src/math/x86_64/atanl.s
new file mode 100644
index 00000000..df76de5d
--- /dev/null
+++ b/src/math/x86_64/atanl.s
@@ -0,0 +1,7 @@
+.global atanl
+.type atanl,@function
+atanl:
+	fldt 8(%rsp)
+	fld1
+	fpatan
+	ret
diff --git a/src/math/x86_64/ceill.s b/src/math/x86_64/ceill.s
new file mode 100644
index 00000000..bc29f15c
--- /dev/null
+++ b/src/math/x86_64/ceill.s
@@ -0,0 +1 @@
+# see floor.s
diff --git a/src/math/x86_64/exp2l.s b/src/math/x86_64/exp2l.s
new file mode 100644
index 00000000..510c256d
--- /dev/null
+++ b/src/math/x86_64/exp2l.s
@@ -0,0 +1 @@
+# see expl.s
diff --git a/src/math/x86_64/expl.s b/src/math/x86_64/expl.s
new file mode 100644
index 00000000..64c1c78e
--- /dev/null
+++ b/src/math/x86_64/expl.s
@@ -0,0 +1,85 @@
+.global expm1l
+.type expm1l,@function
+expm1l:
+	fldt 8(%rsp)
+1:	fldl2e
+	fmulp
+	fld1
+	fld %st(1)
+	fabs
+	fucom %st(1)
+	fnstsw %ax
+	fstp %st(0)
+	fstp %st(0)
+	sahf
+	ja 1f
+	f2xm1
+	ret
+1:	push %rax
+	call 1f
+	pop %rax
+	fld1
+	fsubrp
+	ret
+
+.global expl
+.type expl,@function
+expl:
+	fldt 8(%rsp)
+	fldl2e
+	fmulp
+	jmp 1f
+
+.global exp2l
+.type exp2l,@function
+exp2l:
+	fldt 8(%rsp)
+1:	mov $0x467ff000,%eax
+	mov %eax,-16(%rsp)
+	mov $0x80000000,%eax
+	mov %eax,-20(%rsp)
+	xor %eax,%eax
+	mov %eax,-24(%rsp)
+	flds -16(%rsp)
+	fld %st(1)
+	fabs
+	fucom %st(1)
+	fnstsw
+	fstp %st(0)
+	fstp %st(0)
+	sahf
+	ja 2f
+	fld %st(0)
+	fistpl -16(%rsp)
+	fildl -16(%rsp)
+	fxch %st(1)
+	fsub %st(1)
+	mov $0x3fff,%eax
+	add %eax,-16(%rsp)
+	f2xm1
+	fld1
+	faddp
+	fldt -24(%rsp)
+	fmulp
+	fstp %st(1)
+	ret
+
+2:	fld %st(0)
+	fstpt -24(%rsp)
+	mov -15(%rsp),%ah
+	and $0x7f,%ah
+	cmp $0x7f,%ah
+	jne 1f
+	decb -15(%rsp)
+	fstp %st(0)
+	fldt -24(%rsp)
+1:	fld %st(0)
+	frndint
+	fxch %st(1)
+	fsub %st(1)
+	f2xm1
+	fld1
+	faddp
+	fscale
+	fstp %st(1)
+	ret
diff --git a/src/math/x86_64/expm1l.s b/src/math/x86_64/expm1l.s
new file mode 100644
index 00000000..89c2d290
--- /dev/null
+++ b/src/math/x86_64/expm1l.s
@@ -0,0 +1 @@
+# see expm1.s
diff --git a/src/math/x86_64/floorl.s b/src/math/x86_64/floorl.s
new file mode 100644
index 00000000..80da4660
--- /dev/null
+++ b/src/math/x86_64/floorl.s
@@ -0,0 +1,27 @@
+.global floorl
+.type floorl,@function
+floorl:
+	fldt 8(%rsp)
+1:	mov $0x7,%al
+1:	fstcw 8(%rsp)
+	mov 9(%rsp),%ah
+	mov %al,9(%rsp)
+	fldcw 8(%rsp)
+	frndint
+	mov %ah,9(%rsp)
+	fldcw 8(%rsp)
+	ret
+
+.global ceill
+.type ceill,@function
+ceill:
+	fldt 8(%rsp)
+	mov $0xb,%al
+	jmp 1b
+
+.global truncl
+.type truncl,@function
+truncl:
+	fldt 8(%rsp)
+	mov $0xf,%al
+	jmp 1b
diff --git a/src/math/x86_64/llrintl.s b/src/math/x86_64/llrintl.s
new file mode 100644
index 00000000..3f3b7eea
--- /dev/null
+++ b/src/math/x86_64/llrintl.s
@@ -0,0 +1,7 @@
+.global llrintl
+.type llrintl,@function
+llrintl:
+	fldt 8(%rsp)
+	fistpq 8(%rsp)
+	mov 8(%rsp),%rax
+	ret
diff --git a/src/math/x86_64/log10l.s b/src/math/x86_64/log10l.s
new file mode 100644
index 00000000..48ea4af7
--- /dev/null
+++ b/src/math/x86_64/log10l.s
@@ -0,0 +1,7 @@
+.global log10l
+.type log10l,@function
+log10l:
+	fldlg2
+	fldt 8(%rsp)
+	fyl2x
+	ret
diff --git a/src/math/x86_64/log1pl.s b/src/math/x86_64/log1pl.s
new file mode 100644
index 00000000..955c9dbf
--- /dev/null
+++ b/src/math/x86_64/log1pl.s
@@ -0,0 +1,15 @@
+.global log1pl
+.type log1pl,@function
+log1pl:
+	mov 14(%rsp),%eax
+	fldln2
+	and $0x7fffffff,%eax
+	fldt 8(%rsp)
+	cmp $0x3ffd9400,%eax
+	ja 1f
+	fyl2xp1
+	ret
+1:	fld1
+	faddp
+	fyl2x
+	ret
diff --git a/src/math/x86_64/log2l.s b/src/math/x86_64/log2l.s
new file mode 100644
index 00000000..ba08b9fb
--- /dev/null
+++ b/src/math/x86_64/log2l.s
@@ -0,0 +1,7 @@
+.global log2l
+.type log2l,@function
+log2l:
+	fld1
+	fldt 8(%rsp)
+	fyl2x
+	ret
diff --git a/src/math/x86_64/logl.s b/src/math/x86_64/logl.s
new file mode 100644
index 00000000..20dd1f81
--- /dev/null
+++ b/src/math/x86_64/logl.s
@@ -0,0 +1,7 @@
+.global logl
+.type logl,@function
+logl:
+	fldln2
+	fldt 8(%rsp)
+	fyl2x
+	ret
diff --git a/src/math/x86_64/lrintl.s b/src/math/x86_64/lrintl.s
new file mode 100644
index 00000000..5d6a15af
--- /dev/null
+++ b/src/math/x86_64/lrintl.s
@@ -0,0 +1,7 @@
+.global lrintl
+.type lrintl,@function
+lrintl:
+	fldt 8(%rsp)
+	fistpl 8(%rsp)
+	mov 8(%rsp),%eax
+	ret
diff --git a/src/math/x86_64/modfl.s b/src/math/x86_64/modfl.s
new file mode 100644
index 00000000..cd2ec46e
--- /dev/null
+++ b/src/math/x86_64/modfl.s
@@ -0,0 +1,27 @@
+.global modfl
+.type modfl,@function
+modfl:
+	fldt 8(%rsp)
+	fxam
+	fnstsw %ax
+	sahf
+        jnp 1f
+        jc 2f
+1:      fld1
+        fld %st(1)
+1:      fprem
+        fnstsw %ax
+        sahf
+        jp 1b
+        fstp %st(1)
+        fsubr %st(0),%st(1)
+        fxch %st(1)
+        fstpt (%rdi)
+        ret
+
+2:	fstpt (%rdi)
+	mov 6(%rdi),%eax
+	and $0x80000000,%eax
+	mov %eax,8(%rsp)
+	flds 8(%rsp)
+	ret
diff --git a/src/math/x86_64/truncl.s b/src/math/x86_64/truncl.s
new file mode 100644
index 00000000..bc29f15c
--- /dev/null
+++ b/src/math/x86_64/truncl.s
@@ -0,0 +1 @@
+# see floor.s
-- 
2.25.1