From 34660d73bd0db29469d2758e1b48d2360edf3a2f Mon Sep 17 00:00:00 2001
From: Szabolcs Nagy <nsz@port70.net>
Date: Wed, 4 Sep 2013 15:52:23 +0000
Subject: [PATCH] math: fix remaining old long double code (erfl, fmal,
 lgammal, scalbnl)

in lgammal don't handle 1 and 2 specially, in fma use the new ldshape
union instead of ld80 one.
---
 src/math/erfl.c    | 37 ++++++++++++++-----------------------
 src/math/fma.c     | 38 ++++++++++++++------------------------
 src/math/fmal.c    | 30 ++++++++++++++++--------------
 src/math/lgammal.c | 45 +++++++++++++++++----------------------------
 src/math/scalbnl.c |  8 ++++----
 5 files changed, 65 insertions(+), 93 deletions(-)

diff --git a/src/math/erfl.c b/src/math/erfl.c
index 2fd3c440..42bb1a17 100644
--- a/src/math/erfl.c
+++ b/src/math/erfl.c
@@ -253,8 +253,8 @@ static long double erfc1(long double x)
 
 static long double erfc2(uint32_t ix, long double x)
 {
+	union ldshape u;
 	long double s,z,R,S;
-	uint32_t i0,i1;
 
 	if (ix < 0x3fffa000)  /* 0.84375 <= |x| < 1.25 */
 		return erfc1(x);
@@ -288,28 +288,22 @@ static long double erfc2(uint32_t ix, long double x)
 		S = sc[0] + s * (sc[1] + s * (sc[2] + s * (sc[3] +
 		     s * (sc[4] + s))));
 	}
-	z = x;
-	GET_LDOUBLE_WORDS(ix, i0, i1, z);
-	i1 = 0;
-	i0 &= 0xffffff00;
-	SET_LDOUBLE_WORDS(z, ix, i0, i1);
+	u.f = x;
+	u.i.m &= -1ULL << 40;
+	z = u.f;
 	return expl(-z*z - 0.5625) * expl((z - x) * (z + x) + R / S) / x;
 }
 
 long double erfl(long double x)
 {
 	long double r, s, z, y;
-	uint32_t i0, i1, ix;
-	int sign;
+	union ldshape u = {x};
+	uint32_t ix = (u.i.se & 0x7fffU)<<16 | u.i.m>>48;
+	int sign = u.i.se >> 15;
 
-	GET_LDOUBLE_WORDS(ix, i0, i1, x);
-	sign = ix >> 15;
-	ix &= 0x7fff;
-	if (ix == 0x7fff) {
+	if (ix >= 0x7fff0000)
 		/* erf(nan)=nan, erf(+-inf)=+-1 */
 		return 1 - 2*sign + 1/x;
-	}
-	ix = (ix << 16) | (i0 >> 16);
 	if (ix < 0x3ffed800) {  /* |x| < 0.84375 */
 		if (ix < 0x3fde8000) {  /* |x| < 2**-33 */
 			return 0.125 * (8 * x + efx8 * x);  /* avoid underflow */
@@ -332,17 +326,13 @@ long double erfl(long double x)
 long double erfcl(long double x)
 {
 	long double r, s, z, y;
-	uint32_t i0, i1, ix;
-	int sign;
+	union ldshape u = {x};
+	uint32_t ix = (u.i.se & 0x7fffU)<<16 | u.i.m>>48;
+	int sign = u.i.se >> 15;
 
-	GET_LDOUBLE_WORDS(ix, i0, i1, x);
-	sign = ix>>15;
-	ix &= 0x7fff;
-	if (ix == 0x7fff)
+	if (ix >= 0x7fff0000)
 		/* erfc(nan) = nan, erfc(+-inf) = 0,2 */
 		return 2*sign + 1/x;
-
-	ix = (ix << 16) | (i0 >> 16);
 	if (ix < 0x3ffed800) {  /* |x| < 0.84375 */
 		if (ix < 0x3fbe0000)  /* |x| < 2**-65 */
 			return 1.0 - x;
@@ -358,6 +348,7 @@ long double erfcl(long double x)
 	}
 	if (ix < 0x4005d600)  /* |x| < 107 */
 		return sign ? 2 - erfc2(ix,x) : erfc2(ix,x);
-	return sign ? 2 - 0x1p-16382L : 0x1p-16382L*0x1p-16382L;
+	y = 0x1p-16382L;
+	return sign ? 2 - y : y*y;
 }
 #endif
diff --git a/src/math/fma.c b/src/math/fma.c
index 850a4a6c..1b1c1321 100644
--- a/src/math/fma.c
+++ b/src/math/fma.c
@@ -2,16 +2,6 @@
 #include "libm.h"
 
 #if LDBL_MANT_DIG==64 && LDBL_MAX_EXP==16384
-union ld80 {
-	long double x;
-	struct {
-		uint64_t m;
-		uint16_t e : 15;
-		uint16_t s : 1;
-		uint16_t pad;
-	} bits;
-};
-
 /* exact add, assumes exponent_x >= exponent_y */
 static void add(long double *hi, long double *lo, long double x, long double y)
 {
@@ -45,25 +35,25 @@ return an adjusted hi so that rounding it to double (or less) precision is corre
 */
 static long double adjust(long double hi, long double lo)
 {
-	union ld80 uhi, ulo;
+	union ldshape uhi, ulo;
 
 	if (lo == 0)
 		return hi;
-	uhi.x = hi;
-	if (uhi.bits.m & 0x3ff)
+	uhi.f = hi;
+	if (uhi.i.m & 0x3ff)
 		return hi;
-	ulo.x = lo;
-	if (uhi.bits.s == ulo.bits.s)
-		uhi.bits.m++;
+	ulo.f = lo;
+	if ((uhi.i.se & 0x8000) == (ulo.i.se & 0x8000))
+		uhi.i.m++;
 	else {
-		uhi.bits.m--;
 		/* handle underflow and take care of ld80 implicit msb */
-		if (uhi.bits.m == (uint64_t)-1/2) {
-			uhi.bits.m *= 2;
-			uhi.bits.e--;
+		if (uhi.i.m << 1 == 0) {
+			uhi.i.m = 0;
+			uhi.i.se--;
 		}
+		uhi.i.m--;
 	}
-	return uhi.x;
+	return uhi.f;
 }
 
 /* adjusted add so the result is correct when rounded to double (or less) precision */
@@ -82,9 +72,9 @@ static long double dmul(long double x, long double y)
 
 static int getexp(long double x)
 {
-	union ld80 u;
-	u.x = x;
-	return u.bits.e;
+	union ldshape u;
+	u.f = x;
+	return u.i.se & 0x7fff;
 }
 
 double fma(double x, double y, double z)
diff --git a/src/math/fmal.c b/src/math/fmal.c
index 87e30fcf..c68db255 100644
--- a/src/math/fmal.c
+++ b/src/math/fmal.c
@@ -34,6 +34,13 @@ long double fmal(long double x, long double y, long double z)
 }
 #elif (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384
 #include <fenv.h>
+#if LDBL_MANT_DIG == 64
+#define LASTBIT(u) (u.i.m & 1)
+#define SPLIT (0x1p32L + 1)
+#elif LDBL_MANT_DIG == 113
+#define LASTBIT(u) (u.i.lo & 1)
+#define SPLIT (0x1p57L + 1)
+#endif
 
 /*
  * A struct dd represents a floating-point number with twice the precision
@@ -75,12 +82,12 @@ static inline struct dd dd_add(long double a, long double b)
 static inline long double add_adjusted(long double a, long double b)
 {
 	struct dd sum;
-	union IEEEl2bits u;
+	union ldshape u;
 
 	sum = dd_add(a, b);
 	if (sum.lo != 0) {
-		u.e = sum.hi;
-		if ((u.bits.manl & 1) == 0)
+		u.f = sum.hi;
+		if (!LASTBIT(u))
 			sum.hi = nextafterl(sum.hi, INFINITY * sum.lo);
 	}
 	return (sum.hi);
@@ -95,7 +102,7 @@ static inline long double add_and_denormalize(long double a, long double b, int
 {
 	struct dd sum;
 	int bits_lost;
-	union IEEEl2bits u;
+	union ldshape u;
 
 	sum = dd_add(a, b);
 
@@ -110,9 +117,9 @@ static inline long double add_and_denormalize(long double a, long double b, int
 	 * break the ties manually.
 	 */
 	if (sum.lo != 0) {
-		u.e = sum.hi;
-		bits_lost = -u.bits.exp - scale + 1;
-		if (bits_lost != 1 ^ (int)(u.bits.manl & 1))
+		u.f = sum.hi;
+		bits_lost = -u.i.se - scale + 1;
+		if ((bits_lost != 1) ^ LASTBIT(u))
 			sum.hi = nextafterl(sum.hi, INFINITY * sum.lo);
 	}
 	return scalbnl(sum.hi, scale);
@@ -125,20 +132,15 @@ static inline long double add_and_denormalize(long double a, long double b, int
  */
 static inline struct dd dd_mul(long double a, long double b)
 {
-#if LDBL_MANT_DIG == 64
-	static const long double split = 0x1p32L + 1.0;
-#elif LDBL_MANT_DIG == 113
-	static const long double split = 0x1p57L + 1.0;
-#endif
 	struct dd ret;
 	long double ha, hb, la, lb, p, q;
 
-	p = a * split;
+	p = a * SPLIT;
 	ha = a - p;
 	ha += p;
 	la = a - ha;
 
-	p = b * split;
+	p = b * SPLIT;
 	hb = b - p;
 	hb += p;
 	lb = b - hb;
diff --git a/src/math/lgammal.c b/src/math/lgammal.c
index 56d7866d..5d56358e 100644
--- a/src/math/lgammal.c
+++ b/src/math/lgammal.c
@@ -202,13 +202,11 @@ w7 =  4.885026142432270781165E-3L;
 
 static long double sin_pi(long double x)
 {
+	union ldshape u = {x};
+	uint32_t ix = (u.i.se & 0x7fffU)<<16 | u.i.m>>48;
 	long double y, z;
-	int n, ix;
-	uint32_t se, i0, i1;
+	int n;
 
-	GET_LDOUBLE_WORDS(se, i0, i1, x);
-	ix = se & 0x7fff;
-	ix = (ix << 16) | (i0 >> 16);
 	if (ix < 0x3ffd8000)  /* 0.25 */
 		return sinl(pi * x);
 	y = -x;  /* x is assume negative */
@@ -229,8 +227,8 @@ static long double sin_pi(long double x)
 		} else {
 			if (ix < 0x403e8000)  /* 2^63 */
 				z = y + two63;  /* exact */
-			GET_LDOUBLE_WORDS(se, i0, i1, z);
-			n = i1 & 1;
+			u.f = z;
+			n = u.i.m & 1;
 			y = n;
 			n <<= 2;
 		}
@@ -261,33 +259,28 @@ static long double sin_pi(long double x)
 
 long double __lgammal_r(long double x, int *sg) {
 	long double t, y, z, nadj, p, p1, p2, q, r, w;
-	int i, ix;
-	uint32_t se, i0, i1;
+	union ldshape u = {x};
+	uint32_t ix = (u.i.se & 0x7fffU)<<16 | u.i.m>>48;
+	int sign = u.i.se >> 15;
+	int i;
 
 	*sg = 1;
-	GET_LDOUBLE_WORDS(se, i0, i1, x);
-	ix = se & 0x7fff;
-
-	if ((ix | i0 | i1) == 0) {
-		if (se & 0x8000)
-			*sg = -1;
-		return 1.0 / fabsl(x);
-	}
-
-	ix = (ix << 16) | (i0 >> 16);
 
 	/* purge off +-inf, NaN, +-0, and negative arguments */
 	if (ix >= 0x7fff0000)
 		return x * x;
-
+	if (x == 0) {
+		*sg -= 2*sign;
+		return 1.0 / fabsl(x);
+	}
 	if (ix < 0x3fc08000) {  /* |x|<2**-63, return -log(|x|) */
-		if (se & 0x8000) {
+		if (sign) {
 			*sg = -1;
 			return -logl(-x);
 		}
 		return -logl(x);
 	}
-	if (se & 0x8000) {
+	if (sign) {
 		t = sin_pi (x);
 		if (t == 0.0)
 			return 1.0 / fabsl(t); /* -integer */
@@ -297,11 +290,7 @@ long double __lgammal_r(long double x, int *sg) {
 		x = -x;
 	}
 
-	/* purge off 1 and 2 */
-	if ((((ix - 0x3fff8000) | i0 | i1) == 0) ||
-	    (((ix - 0x40008000) | i0 | i1) == 0))
-		r = 0;
-	else if (ix < 0x40008000) {  /* x < 2.0 */
+	if (ix < 0x40008000) {  /* x < 2.0 */
 		if (ix <= 0x3ffee666) {  /* 8.99993896484375e-1 */
 			/* lgamma(x) = lgamma(x+1) - log(x) */
 			r = -logl(x);
@@ -380,7 +369,7 @@ long double __lgammal_r(long double x, int *sg) {
 		r = (x - 0.5) * (t - 1.0) + w;
 	} else /* 2**66 <= x <= inf */
 		r = x * (logl(x) - 1.0);
-	if (se & 0x8000)
+	if (sign)
 		r = nadj - r;
 	return r;
 }
diff --git a/src/math/scalbnl.c b/src/math/scalbnl.c
index 7ad7688b..08a4c587 100644
--- a/src/math/scalbnl.c
+++ b/src/math/scalbnl.c
@@ -8,7 +8,7 @@ long double scalbnl(long double x, int n)
 #elif (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384
 long double scalbnl(long double x, int n)
 {
-	union IEEEl2bits scale;
+	union ldshape u;
 
 	if (n > 16383) {
 		x *= 0x1p16383L;
@@ -29,8 +29,8 @@ long double scalbnl(long double x, int n)
 				n = -16382;
 		}
 	}
-	scale.e = 1.0;
-	scale.bits.exp = 0x3fff + n;
-	return x * scale.e;
+	u.f = 1.0;
+	u.i.se = 0x3fff + n;
+	return x * u.f;
 }
 #endif
-- 
2.25.1