From bfda37935867f9bf271d6074db0accf05c63ad10 Mon Sep 17 00:00:00 2001
From: Szabolcs Nagy <nsz@port70.net>
Date: Sat, 18 May 2013 14:40:22 +0000
Subject: [PATCH] math: sin cos cleanup

* use unsigned arithmetics
* use unsigned to store arg reduction quotient (so n&3 is understood)
* remove z=0.0 variables, use literal 0
* raise underflow and inexact exceptions properly when x is small
* fix spurious underflow in tanl
---
 src/math/cos.c     | 20 +++++++++++---------
 src/math/cosf.c    | 33 +++++++++++++++++++--------------
 src/math/cosl.c    | 22 +++++++++++-----------
 src/math/sin.c     | 15 ++++++++-------
 src/math/sincos.c  | 12 ++++++------
 src/math/sincosf.c | 45 ++++++++++++++++++++++++---------------------
 src/math/sincosl.c | 20 +++++++++++---------
 src/math/sinf.c    | 33 ++++++++++++++++++---------------
 src/math/sinl.c    | 29 ++++++++++++++---------------
 src/math/tanl.c    | 11 ++++++-----
 10 files changed, 128 insertions(+), 112 deletions(-)

diff --git a/src/math/cos.c b/src/math/cos.c
index 76990e7f..ee97f68b 100644
--- a/src/math/cos.c
+++ b/src/math/cos.c
@@ -44,26 +44,28 @@
 
 double cos(double x)
 {
-	double y[2],z=0.0;
-	int32_t n, ix;
+	double y[2];
+	uint32_t ix;
+	unsigned n;
 
 	GET_HIGH_WORD(ix, x);
+	ix &= 0x7fffffff;
 
 	/* |x| ~< pi/4 */
-	ix &= 0x7fffffff;
 	if (ix <= 0x3fe921fb) {
-		if (ix < 0x3e46a09e)  /* if x < 2**-27 * sqrt(2) */
-			/* raise inexact if x != 0 */
-			if ((int)x == 0)
-				return 1.0;
-		return __cos(x, z);
+		if (ix < 0x3e46a09e) {  /* |x| < 2**-27 * sqrt(2) */
+			/* raise inexact if x!=0 */
+			FORCE_EVAL(x + 0x1p120f);
+			return 1.0;
+		}
+		return __cos(x, 0);
 	}
 
 	/* cos(Inf or NaN) is NaN */
 	if (ix >= 0x7ff00000)
 		return x-x;
 
-	/* argument reduction needed */
+	/* argument reduction */
 	n = __rem_pio2(x, y);
 	switch (n&3) {
 	case 0: return  __cos(y[0], y[1]);
diff --git a/src/math/cosf.c b/src/math/cosf.c
index 4d94130f..23f3e5bf 100644
--- a/src/math/cosf.c
+++ b/src/math/cosf.c
@@ -26,34 +26,39 @@ c4pio2 = 4*M_PI_2; /* 0x401921FB, 0x54442D18 */
 float cosf(float x)
 {
 	double y;
-	int32_t n, hx, ix;
+	uint32_t ix;
+	unsigned n, sign;
+
+	GET_FLOAT_WORD(ix, x);
+	sign = ix >> 31;
+	ix &= 0x7fffffff;
 
-	GET_FLOAT_WORD(hx, x);
-	ix = hx & 0x7fffffff;
 	if (ix <= 0x3f490fda) {  /* |x| ~<= pi/4 */
-		if (ix < 0x39800000)  /* |x| < 2**-12 */
-			if ((int)x == 0)  /* raise inexact if x != 0 */
-				return 1.0;
+		if (ix < 0x39800000) {  /* |x| < 2**-12 */
+			/* raise inexact if x != 0 */
+			FORCE_EVAL(x + 0x1p120f);
+			return 1.0f;
+		}
 		return __cosdf(x);
 	}
 	if (ix <= 0x407b53d1) {  /* |x| ~<= 5*pi/4 */
 		if (ix > 0x4016cbe3)  /* |x|  ~> 3*pi/4 */
-			return -__cosdf(hx > 0 ? x-c2pio2 : x+c2pio2);
+			return -__cosdf(sign ? x+c2pio2 : x-c2pio2);
 		else {
-			if (hx > 0)
-				return __sindf(c1pio2 - x);
-			else
+			if (sign)
 				return __sindf(x + c1pio2);
+			else
+				return __sindf(c1pio2 - x);
 		}
 	}
 	if (ix <= 0x40e231d5) {  /* |x| ~<= 9*pi/4 */
 		if (ix > 0x40afeddf)  /* |x| ~> 7*pi/4 */
-			return __cosdf(hx > 0 ? x-c4pio2 : x+c4pio2);
+			return __cosdf(sign ? x+c4pio2 : x-c4pio2);
 		else {
-			if (hx > 0)
-				return __sindf(x - c3pio2);
+			if (sign)
+				return __sindf(-x - c3pio2);
 			else
-				return __sindf(-c3pio2 - x);
+				return __sindf(x - c3pio2);
 		}
 	}
 
diff --git a/src/math/cosl.c b/src/math/cosl.c
index 25d9005a..0794d284 100644
--- a/src/math/cosl.c
+++ b/src/math/cosl.c
@@ -39,30 +39,30 @@ long double cosl(long double x) {
 long double cosl(long double x)
 {
 	union IEEEl2bits z;
-	int e0;
+	unsigned n;
 	long double y[2];
 	long double hi, lo;
 
 	z.e = x;
 	z.bits.sign = 0;
 
-	/* If x = +-0 or x is a subnormal number, then cos(x) = 1 */
-	if (z.bits.exp == 0)
-		return 1.0;
-
 	/* If x = NaN or Inf, then cos(x) = NaN. */
-	if (z.bits.exp == 32767)
+	if (z.bits.exp == 0x7fff)
 		return (x - x) / (x - x);
 
-	/* Optimize the case where x is already within range. */
-	if (z.e < M_PI_4)
+	/* |x| < (double)pi/4 */
+	if (z.e < M_PI_4) {
+		/* |x| < 0x1p-64 */
+		if (z.bits.exp < 0x3fff - 64)
+			/* raise inexact if x!=0 */
+			return 1.0 + x;
 		return __cosl(z.e, 0);
+	}
 
-	e0 = __rem_pio2l(x, y);
+	n = __rem_pio2l(x, y);
 	hi = y[0];
 	lo = y[1];
-
-	switch (e0 & 3) {
+	switch (n & 3) {
 	case 0:
 		hi = __cosl(hi, lo);
 		break;
diff --git a/src/math/sin.c b/src/math/sin.c
index 8e430f85..055e215b 100644
--- a/src/math/sin.c
+++ b/src/math/sin.c
@@ -44,21 +44,22 @@
 
 double sin(double x)
 {
-	double y[2], z=0.0;
-	int32_t n, ix;
+	double y[2];
+	uint32_t ix;
+	unsigned n;
 
 	/* High word of x. */
 	GET_HIGH_WORD(ix, x);
+	ix &= 0x7fffffff;
 
 	/* |x| ~< pi/4 */
-	ix &= 0x7fffffff;
 	if (ix <= 0x3fe921fb) {
 		if (ix < 0x3e500000) {  /* |x| < 2**-26 */
-			/* raise inexact if x != 0 */
-			if ((int)x == 0)
-				return x;
+			/* raise inexact if x != 0 and underflow if subnormal*/
+			FORCE_EVAL(ix < 0x00100000 ? x/0x1p120f : x+0x1p120f);
+			return x;
 		}
-		return __sin(x, z, 0);
+		return __sin(x, 0.0, 0);
 	}
 
 	/* sin(Inf or NaN) is NaN */
diff --git a/src/math/sincos.c b/src/math/sincos.c
index 442e285e..49f8a098 100644
--- a/src/math/sincos.c
+++ b/src/math/sincos.c
@@ -15,7 +15,8 @@
 void sincos(double x, double *sin, double *cos)
 {
 	double y[2], s, c;
-	uint32_t n, ix;
+	uint32_t ix;
+	unsigned n;
 
 	GET_HIGH_WORD(ix, x);
 	ix &= 0x7fffffff;
@@ -24,11 +25,10 @@ void sincos(double x, double *sin, double *cos)
 	if (ix <= 0x3fe921fb) {
 		/* if |x| < 2**-27 * sqrt(2) */
 		if (ix < 0x3e46a09e) {
-			/* raise inexact if x != 0 */
-			if ((int)x == 0) {
-				*sin = x;
-				*cos = 1.0;
-			}
+			/* raise inexact if x!=0 and underflow if subnormal */
+			FORCE_EVAL(ix < 0x00100000 ? x/0x1p120f : x+0x1p120f);
+			*sin = x;
+			*cos = 1.0;
 			return;
 		}
 		*sin = __sin(x, 0.0, 0);
diff --git a/src/math/sincosf.c b/src/math/sincosf.c
index 5e3b9a41..1b50f01c 100644
--- a/src/math/sincosf.c
+++ b/src/math/sincosf.c
@@ -25,21 +25,23 @@ s4pio2 = 4*M_PI_2; /* 0x401921FB, 0x54442D18 */
 
 void sincosf(float x, float *sin, float *cos)
 {
-	double y, s, c;
-	uint32_t n, hx, ix;
+	double y;
+	float_t s, c;
+	uint32_t ix;
+	unsigned n, sign;
 
-	GET_FLOAT_WORD(hx, x);
-	ix = hx & 0x7fffffff;
+	GET_FLOAT_WORD(ix, x);
+	sign = ix >> 31;
+	ix &= 0x7fffffff;
 
 	/* |x| ~<= pi/4 */
 	if (ix <= 0x3f490fda) {
 		/* |x| < 2**-12 */
 		if (ix < 0x39800000) {
-			/* raise inexact if x != 0 */
-			if((int)x == 0) {
-				*sin = x;
-				*cos = 1.0f;
-			}
+			/* raise inexact if x!=0 and underflow if subnormal */
+			FORCE_EVAL(ix < 0x00100000 ? x/0x1p120f : x+0x1p120f);
+			*sin = x;
+			*cos = 1.0f;
 			return;
 		}
 		*sin = __sindf(x);
@@ -50,34 +52,35 @@ void sincosf(float x, float *sin, float *cos)
 	/* |x| ~<= 5*pi/4 */
 	if (ix <= 0x407b53d1) {
 		if (ix <= 0x4016cbe3) {  /* |x| ~<= 3pi/4 */
-			if (hx < 0x80000000) {
-				*sin = __cosdf(x - s1pio2);
-				*cos = __sindf(s1pio2 - x);
-			} else {
+			if (sign) {
 				*sin = -__cosdf(x + s1pio2);
 				*cos = __sindf(x + s1pio2);
+			} else {
+				*sin = __cosdf(s1pio2 - x);
+				*cos = __sindf(s1pio2 - x);
 			}
 			return;
 		}
-		*sin = __sindf(hx < 0x80000000 ? s2pio2 - x : -s2pio2 - x);
-		*cos = -__cosdf(hx < 0x80000000 ? x - s2pio2 : x + s2pio2);
+		/* -sin(x+c) is not correct if x+c could be 0: -0 vs +0 */
+		*sin = -__sindf(sign ? x + s2pio2 : x - s2pio2);
+		*cos = -__cosdf(sign ? x + s2pio2 : x - s2pio2);
 		return;
 	}
 
 	/* |x| ~<= 9*pi/4 */
 	if (ix <= 0x40e231d5) {
 		if (ix <= 0x40afeddf) {  /* |x| ~<= 7*pi/4 */
-			if (hx < 0x80000000) {
+			if (sign) {
+				*sin = __cosdf(x + s3pio2);
+				*cos = -__sindf(x + s3pio2);
+			} else {
 				*sin = -__cosdf(x - s3pio2);
 				*cos = __sindf(x - s3pio2);
-			} else {
-				*sin = __cosdf(x + s3pio2);
-				*cos = __sindf(-s3pio2 - x);
 			}
 			return;
 		}
-		*sin = __sindf(hx < 0x80000000 ? x - s4pio2 : x + s4pio2);
-		*cos = __cosdf(hx < 0x80000000 ? x - s4pio2 : x + s4pio2);
+		*sin = __sindf(sign ? x + s4pio2 : x - s4pio2);
+		*cos = __cosdf(sign ? x + s4pio2 : x - s4pio2);
 		return;
 	}
 
diff --git a/src/math/sincosl.c b/src/math/sincosl.c
index d632fe6f..5db69bd6 100644
--- a/src/math/sincosl.c
+++ b/src/math/sincosl.c
@@ -10,27 +10,29 @@ void sincosl(long double x, long double *sin, long double *cos)
 void sincosl(long double x, long double *sin, long double *cos)
 {
 	union IEEEl2bits u;
-	int n;
+	unsigned n;
 	long double y[2], s, c;
 
 	u.e = x;
 	u.bits.sign = 0;
 
-	/* x = +-0 or subnormal */
-	if (!u.bits.exp) {
-		*sin = x;
-		*cos = 1.0;
-		return;
-	}
-
 	/* x = nan or inf */
 	if (u.bits.exp == 0x7fff) {
 		*sin = *cos = x - x;
 		return;
 	}
 
-	/* |x| < pi/4 */
+	/* |x| < (double)pi/4 */
 	if (u.e < M_PI_4) {
+		/* |x| < 0x1p-64 */
+		if (u.bits.exp < 0x3fff - 64) {
+			/* raise underflow if subnormal */
+			if (u.bits.exp == 0) FORCE_EVAL(x*0x1p-120f);
+			*sin = x;
+			/* raise inexact if x!=0 */
+			*cos = 1.0 + x;
+			return;
+		}
 		*sin = __sinl(x, 0, 0);
 		*cos = __cosl(x, 0);
 		return;
diff --git a/src/math/sinf.c b/src/math/sinf.c
index dcca67af..64e39f50 100644
--- a/src/math/sinf.c
+++ b/src/math/sinf.c
@@ -26,35 +26,38 @@ s4pio2 = 4*M_PI_2; /* 0x401921FB, 0x54442D18 */
 float sinf(float x)
 {
 	double y;
-	int32_t n, hx, ix;
+	uint32_t ix;
+	int n, sign;
 
-	GET_FLOAT_WORD(hx, x);
-	ix = hx & 0x7fffffff;
+	GET_FLOAT_WORD(ix, x);
+	sign = ix >> 31;
+	ix &= 0x7fffffff;
 
 	if (ix <= 0x3f490fda) {  /* |x| ~<= pi/4 */
-		if (ix < 0x39800000)  /* |x| < 2**-12 */
-			/* raise inexact if x != 0 */
-			if((int)x == 0)
-				return x;
+		if (ix < 0x39800000) {  /* |x| < 2**-12 */
+			/* raise inexact if x!=0 and underflow if subnormal */
+			FORCE_EVAL(ix < 0x00800000 ? x/0x1p120f : x+0x1p120f);
+			return x;
+		}
 		return __sindf(x);
 	}
 	if (ix <= 0x407b53d1) {  /* |x| ~<= 5*pi/4 */
 		if (ix <= 0x4016cbe3) {  /* |x| ~<= 3pi/4 */
-			if (hx > 0)
-				return __cosdf(x - s1pio2);
-			else
+			if (sign)
 				return -__cosdf(x + s1pio2);
+			else
+				return __cosdf(x - s1pio2);
 		}
-		return __sindf(hx > 0 ? s2pio2 - x : -s2pio2 - x);
+		return __sindf(sign ? -(x + s2pio2) : -(x - s2pio2));
 	}
 	if (ix <= 0x40e231d5) {  /* |x| ~<= 9*pi/4 */
 		if (ix <= 0x40afeddf) {  /* |x| ~<= 7*pi/4 */
-			if (hx > 0)
-				return -__cosdf(x - s3pio2);
-			else
+			if (sign)
 				return __cosdf(x + s3pio2);
+			else
+				return -__cosdf(x - s3pio2);
 		}
-		return __sindf(hx > 0 ? x - s4pio2 : x + s4pio2);
+		return __sindf(sign ? x + s4pio2 : x - s4pio2);
 	}
 
 	/* sin(Inf or NaN) is NaN */
diff --git a/src/math/sinl.c b/src/math/sinl.c
index 7e0b44f4..6ca99986 100644
--- a/src/math/sinl.c
+++ b/src/math/sinl.c
@@ -37,33 +37,32 @@ long double sinl(long double x)
 long double sinl(long double x)
 {
 	union IEEEl2bits z;
-	int e0, s;
+	unsigned n;
 	long double y[2];
 	long double hi, lo;
 
 	z.e = x;
-	s = z.bits.sign;
 	z.bits.sign = 0;
 
-	/* If x = +-0 or x is a subnormal number, then sin(x) = x */
-	if (z.bits.exp == 0)
-		return x;
-
 	/* If x = NaN or Inf, then sin(x) = NaN. */
-	if (z.bits.exp == 32767)
+	if (z.bits.exp == 0x7fff)
 		return (x - x) / (x - x);
 
-	/* Optimize the case where x is already within range. */
+	/* |x| < (double)pi/4 */
 	if (z.e < M_PI_4) {
-		hi = __sinl(z.e, 0, 0);
-		return  s ? -hi : hi;
+		/* |x| < 0x1p-64 */
+		if (z.bits.exp < 0x3fff - 64) {
+			/* raise inexact if x!=0 and underflow if subnormal */
+			FORCE_EVAL(z.bits.exp == 0 ? x/0x1p120f : x+0x1p120f);
+			return x;
+		}
+		return __sinl(x, 0.0, 0);
 	}
 
-	e0 = __rem_pio2l(x, y);
+	n = __rem_pio2l(x, y);
 	hi = y[0];
 	lo = y[1];
-
-	switch (e0 & 3) {
+	switch (n & 3) {
 	case 0:
 		hi = __sinl(hi, lo, 1);
 		break;
@@ -71,10 +70,10 @@ long double sinl(long double x)
 		hi = __cosl(hi, lo);
 		break;
 	case 2:
-		hi = - __sinl(hi, lo, 1);
+		hi = -__sinl(hi, lo, 1);
 		break;
 	case 3:
-		hi = - __cosl(hi, lo);
+		hi = -__cosl(hi, lo);
 		break;
 	}
 	return hi;
diff --git a/src/math/tanl.c b/src/math/tanl.c
index 3b51e011..546c7a02 100644
--- a/src/math/tanl.c
+++ b/src/math/tanl.c
@@ -53,11 +53,12 @@ long double tanl(long double x)
 
 	/* |x| < (double)pi/4 */
 	if (z.e < M_PI_4) {
-		/* x = +-0 or x is subnormal */
-		if (z.bits.exp == 0)
-			/* inexact and underflow if x!=0 */
-			return x + x*0x1p-120f;
-		/* can raise spurious underflow */
+		/* |x| < 0x1p-64 */
+		if (z.bits.exp < 0x3fff - 64) {
+			/* raise inexact if x!=0 and underflow if subnormal */
+			FORCE_EVAL(z.bits.exp == 0 ? x/0x1p120f : x+0x1p120f);
+			return x;
+		}
 		return __tanl(x, 0, 0);
 	}
 
-- 
2.25.1