From bfda37935867f9bf271d6074db0accf05c63ad10 Mon Sep 17 00:00:00 2001 From: Szabolcs Nagy Date: Sat, 18 May 2013 14:40:22 +0000 Subject: [PATCH] math: sin cos cleanup * use unsigned arithmetics * use unsigned to store arg reduction quotient (so n&3 is understood) * remove z=0.0 variables, use literal 0 * raise underflow and inexact exceptions properly when x is small * fix spurious underflow in tanl --- src/math/cos.c | 20 +++++++++++--------- src/math/cosf.c | 33 +++++++++++++++++++-------------- src/math/cosl.c | 22 +++++++++++----------- src/math/sin.c | 15 ++++++++------- src/math/sincos.c | 12 ++++++------ src/math/sincosf.c | 45 ++++++++++++++++++++++++--------------------- src/math/sincosl.c | 20 +++++++++++--------- src/math/sinf.c | 33 ++++++++++++++++++--------------- src/math/sinl.c | 29 ++++++++++++++--------------- src/math/tanl.c | 11 ++++++----- 10 files changed, 128 insertions(+), 112 deletions(-) diff --git a/src/math/cos.c b/src/math/cos.c index 76990e7f..ee97f68b 100644 --- a/src/math/cos.c +++ b/src/math/cos.c @@ -44,26 +44,28 @@ double cos(double x) { - double y[2],z=0.0; - int32_t n, ix; + double y[2]; + uint32_t ix; + unsigned n; GET_HIGH_WORD(ix, x); + ix &= 0x7fffffff; /* |x| ~< pi/4 */ - ix &= 0x7fffffff; if (ix <= 0x3fe921fb) { - if (ix < 0x3e46a09e) /* if x < 2**-27 * sqrt(2) */ - /* raise inexact if x != 0 */ - if ((int)x == 0) - return 1.0; - return __cos(x, z); + if (ix < 0x3e46a09e) { /* |x| < 2**-27 * sqrt(2) */ + /* raise inexact if x!=0 */ + FORCE_EVAL(x + 0x1p120f); + return 1.0; + } + return __cos(x, 0); } /* cos(Inf or NaN) is NaN */ if (ix >= 0x7ff00000) return x-x; - /* argument reduction needed */ + /* argument reduction */ n = __rem_pio2(x, y); switch (n&3) { case 0: return __cos(y[0], y[1]); diff --git a/src/math/cosf.c b/src/math/cosf.c index 4d94130f..23f3e5bf 100644 --- a/src/math/cosf.c +++ b/src/math/cosf.c @@ -26,34 +26,39 @@ c4pio2 = 4*M_PI_2; /* 0x401921FB, 0x54442D18 */ float cosf(float x) { double y; - int32_t n, hx, ix; + uint32_t ix; + unsigned n, sign; + + GET_FLOAT_WORD(ix, x); + sign = ix >> 31; + ix &= 0x7fffffff; - GET_FLOAT_WORD(hx, x); - ix = hx & 0x7fffffff; if (ix <= 0x3f490fda) { /* |x| ~<= pi/4 */ - if (ix < 0x39800000) /* |x| < 2**-12 */ - if ((int)x == 0) /* raise inexact if x != 0 */ - return 1.0; + if (ix < 0x39800000) { /* |x| < 2**-12 */ + /* raise inexact if x != 0 */ + FORCE_EVAL(x + 0x1p120f); + return 1.0f; + } return __cosdf(x); } if (ix <= 0x407b53d1) { /* |x| ~<= 5*pi/4 */ if (ix > 0x4016cbe3) /* |x| ~> 3*pi/4 */ - return -__cosdf(hx > 0 ? x-c2pio2 : x+c2pio2); + return -__cosdf(sign ? x+c2pio2 : x-c2pio2); else { - if (hx > 0) - return __sindf(c1pio2 - x); - else + if (sign) return __sindf(x + c1pio2); + else + return __sindf(c1pio2 - x); } } if (ix <= 0x40e231d5) { /* |x| ~<= 9*pi/4 */ if (ix > 0x40afeddf) /* |x| ~> 7*pi/4 */ - return __cosdf(hx > 0 ? x-c4pio2 : x+c4pio2); + return __cosdf(sign ? x+c4pio2 : x-c4pio2); else { - if (hx > 0) - return __sindf(x - c3pio2); + if (sign) + return __sindf(-x - c3pio2); else - return __sindf(-c3pio2 - x); + return __sindf(x - c3pio2); } } diff --git a/src/math/cosl.c b/src/math/cosl.c index 25d9005a..0794d284 100644 --- a/src/math/cosl.c +++ b/src/math/cosl.c @@ -39,30 +39,30 @@ long double cosl(long double x) { long double cosl(long double x) { union IEEEl2bits z; - int e0; + unsigned n; long double y[2]; long double hi, lo; z.e = x; z.bits.sign = 0; - /* If x = +-0 or x is a subnormal number, then cos(x) = 1 */ - if (z.bits.exp == 0) - return 1.0; - /* If x = NaN or Inf, then cos(x) = NaN. */ - if (z.bits.exp == 32767) + if (z.bits.exp == 0x7fff) return (x - x) / (x - x); - /* Optimize the case where x is already within range. */ - if (z.e < M_PI_4) + /* |x| < (double)pi/4 */ + if (z.e < M_PI_4) { + /* |x| < 0x1p-64 */ + if (z.bits.exp < 0x3fff - 64) + /* raise inexact if x!=0 */ + return 1.0 + x; return __cosl(z.e, 0); + } - e0 = __rem_pio2l(x, y); + n = __rem_pio2l(x, y); hi = y[0]; lo = y[1]; - - switch (e0 & 3) { + switch (n & 3) { case 0: hi = __cosl(hi, lo); break; diff --git a/src/math/sin.c b/src/math/sin.c index 8e430f85..055e215b 100644 --- a/src/math/sin.c +++ b/src/math/sin.c @@ -44,21 +44,22 @@ double sin(double x) { - double y[2], z=0.0; - int32_t n, ix; + double y[2]; + uint32_t ix; + unsigned n; /* High word of x. */ GET_HIGH_WORD(ix, x); + ix &= 0x7fffffff; /* |x| ~< pi/4 */ - ix &= 0x7fffffff; if (ix <= 0x3fe921fb) { if (ix < 0x3e500000) { /* |x| < 2**-26 */ - /* raise inexact if x != 0 */ - if ((int)x == 0) - return x; + /* raise inexact if x != 0 and underflow if subnormal*/ + FORCE_EVAL(ix < 0x00100000 ? x/0x1p120f : x+0x1p120f); + return x; } - return __sin(x, z, 0); + return __sin(x, 0.0, 0); } /* sin(Inf or NaN) is NaN */ diff --git a/src/math/sincos.c b/src/math/sincos.c index 442e285e..49f8a098 100644 --- a/src/math/sincos.c +++ b/src/math/sincos.c @@ -15,7 +15,8 @@ void sincos(double x, double *sin, double *cos) { double y[2], s, c; - uint32_t n, ix; + uint32_t ix; + unsigned n; GET_HIGH_WORD(ix, x); ix &= 0x7fffffff; @@ -24,11 +25,10 @@ void sincos(double x, double *sin, double *cos) if (ix <= 0x3fe921fb) { /* if |x| < 2**-27 * sqrt(2) */ if (ix < 0x3e46a09e) { - /* raise inexact if x != 0 */ - if ((int)x == 0) { - *sin = x; - *cos = 1.0; - } + /* raise inexact if x!=0 and underflow if subnormal */ + FORCE_EVAL(ix < 0x00100000 ? x/0x1p120f : x+0x1p120f); + *sin = x; + *cos = 1.0; return; } *sin = __sin(x, 0.0, 0); diff --git a/src/math/sincosf.c b/src/math/sincosf.c index 5e3b9a41..1b50f01c 100644 --- a/src/math/sincosf.c +++ b/src/math/sincosf.c @@ -25,21 +25,23 @@ s4pio2 = 4*M_PI_2; /* 0x401921FB, 0x54442D18 */ void sincosf(float x, float *sin, float *cos) { - double y, s, c; - uint32_t n, hx, ix; + double y; + float_t s, c; + uint32_t ix; + unsigned n, sign; - GET_FLOAT_WORD(hx, x); - ix = hx & 0x7fffffff; + GET_FLOAT_WORD(ix, x); + sign = ix >> 31; + ix &= 0x7fffffff; /* |x| ~<= pi/4 */ if (ix <= 0x3f490fda) { /* |x| < 2**-12 */ if (ix < 0x39800000) { - /* raise inexact if x != 0 */ - if((int)x == 0) { - *sin = x; - *cos = 1.0f; - } + /* raise inexact if x!=0 and underflow if subnormal */ + FORCE_EVAL(ix < 0x00100000 ? x/0x1p120f : x+0x1p120f); + *sin = x; + *cos = 1.0f; return; } *sin = __sindf(x); @@ -50,34 +52,35 @@ void sincosf(float x, float *sin, float *cos) /* |x| ~<= 5*pi/4 */ if (ix <= 0x407b53d1) { if (ix <= 0x4016cbe3) { /* |x| ~<= 3pi/4 */ - if (hx < 0x80000000) { - *sin = __cosdf(x - s1pio2); - *cos = __sindf(s1pio2 - x); - } else { + if (sign) { *sin = -__cosdf(x + s1pio2); *cos = __sindf(x + s1pio2); + } else { + *sin = __cosdf(s1pio2 - x); + *cos = __sindf(s1pio2 - x); } return; } - *sin = __sindf(hx < 0x80000000 ? s2pio2 - x : -s2pio2 - x); - *cos = -__cosdf(hx < 0x80000000 ? x - s2pio2 : x + s2pio2); + /* -sin(x+c) is not correct if x+c could be 0: -0 vs +0 */ + *sin = -__sindf(sign ? x + s2pio2 : x - s2pio2); + *cos = -__cosdf(sign ? x + s2pio2 : x - s2pio2); return; } /* |x| ~<= 9*pi/4 */ if (ix <= 0x40e231d5) { if (ix <= 0x40afeddf) { /* |x| ~<= 7*pi/4 */ - if (hx < 0x80000000) { + if (sign) { + *sin = __cosdf(x + s3pio2); + *cos = -__sindf(x + s3pio2); + } else { *sin = -__cosdf(x - s3pio2); *cos = __sindf(x - s3pio2); - } else { - *sin = __cosdf(x + s3pio2); - *cos = __sindf(-s3pio2 - x); } return; } - *sin = __sindf(hx < 0x80000000 ? x - s4pio2 : x + s4pio2); - *cos = __cosdf(hx < 0x80000000 ? x - s4pio2 : x + s4pio2); + *sin = __sindf(sign ? x + s4pio2 : x - s4pio2); + *cos = __cosdf(sign ? x + s4pio2 : x - s4pio2); return; } diff --git a/src/math/sincosl.c b/src/math/sincosl.c index d632fe6f..5db69bd6 100644 --- a/src/math/sincosl.c +++ b/src/math/sincosl.c @@ -10,27 +10,29 @@ void sincosl(long double x, long double *sin, long double *cos) void sincosl(long double x, long double *sin, long double *cos) { union IEEEl2bits u; - int n; + unsigned n; long double y[2], s, c; u.e = x; u.bits.sign = 0; - /* x = +-0 or subnormal */ - if (!u.bits.exp) { - *sin = x; - *cos = 1.0; - return; - } - /* x = nan or inf */ if (u.bits.exp == 0x7fff) { *sin = *cos = x - x; return; } - /* |x| < pi/4 */ + /* |x| < (double)pi/4 */ if (u.e < M_PI_4) { + /* |x| < 0x1p-64 */ + if (u.bits.exp < 0x3fff - 64) { + /* raise underflow if subnormal */ + if (u.bits.exp == 0) FORCE_EVAL(x*0x1p-120f); + *sin = x; + /* raise inexact if x!=0 */ + *cos = 1.0 + x; + return; + } *sin = __sinl(x, 0, 0); *cos = __cosl(x, 0); return; diff --git a/src/math/sinf.c b/src/math/sinf.c index dcca67af..64e39f50 100644 --- a/src/math/sinf.c +++ b/src/math/sinf.c @@ -26,35 +26,38 @@ s4pio2 = 4*M_PI_2; /* 0x401921FB, 0x54442D18 */ float sinf(float x) { double y; - int32_t n, hx, ix; + uint32_t ix; + int n, sign; - GET_FLOAT_WORD(hx, x); - ix = hx & 0x7fffffff; + GET_FLOAT_WORD(ix, x); + sign = ix >> 31; + ix &= 0x7fffffff; if (ix <= 0x3f490fda) { /* |x| ~<= pi/4 */ - if (ix < 0x39800000) /* |x| < 2**-12 */ - /* raise inexact if x != 0 */ - if((int)x == 0) - return x; + if (ix < 0x39800000) { /* |x| < 2**-12 */ + /* raise inexact if x!=0 and underflow if subnormal */ + FORCE_EVAL(ix < 0x00800000 ? x/0x1p120f : x+0x1p120f); + return x; + } return __sindf(x); } if (ix <= 0x407b53d1) { /* |x| ~<= 5*pi/4 */ if (ix <= 0x4016cbe3) { /* |x| ~<= 3pi/4 */ - if (hx > 0) - return __cosdf(x - s1pio2); - else + if (sign) return -__cosdf(x + s1pio2); + else + return __cosdf(x - s1pio2); } - return __sindf(hx > 0 ? s2pio2 - x : -s2pio2 - x); + return __sindf(sign ? -(x + s2pio2) : -(x - s2pio2)); } if (ix <= 0x40e231d5) { /* |x| ~<= 9*pi/4 */ if (ix <= 0x40afeddf) { /* |x| ~<= 7*pi/4 */ - if (hx > 0) - return -__cosdf(x - s3pio2); - else + if (sign) return __cosdf(x + s3pio2); + else + return -__cosdf(x - s3pio2); } - return __sindf(hx > 0 ? x - s4pio2 : x + s4pio2); + return __sindf(sign ? x + s4pio2 : x - s4pio2); } /* sin(Inf or NaN) is NaN */ diff --git a/src/math/sinl.c b/src/math/sinl.c index 7e0b44f4..6ca99986 100644 --- a/src/math/sinl.c +++ b/src/math/sinl.c @@ -37,33 +37,32 @@ long double sinl(long double x) long double sinl(long double x) { union IEEEl2bits z; - int e0, s; + unsigned n; long double y[2]; long double hi, lo; z.e = x; - s = z.bits.sign; z.bits.sign = 0; - /* If x = +-0 or x is a subnormal number, then sin(x) = x */ - if (z.bits.exp == 0) - return x; - /* If x = NaN or Inf, then sin(x) = NaN. */ - if (z.bits.exp == 32767) + if (z.bits.exp == 0x7fff) return (x - x) / (x - x); - /* Optimize the case where x is already within range. */ + /* |x| < (double)pi/4 */ if (z.e < M_PI_4) { - hi = __sinl(z.e, 0, 0); - return s ? -hi : hi; + /* |x| < 0x1p-64 */ + if (z.bits.exp < 0x3fff - 64) { + /* raise inexact if x!=0 and underflow if subnormal */ + FORCE_EVAL(z.bits.exp == 0 ? x/0x1p120f : x+0x1p120f); + return x; + } + return __sinl(x, 0.0, 0); } - e0 = __rem_pio2l(x, y); + n = __rem_pio2l(x, y); hi = y[0]; lo = y[1]; - - switch (e0 & 3) { + switch (n & 3) { case 0: hi = __sinl(hi, lo, 1); break; @@ -71,10 +70,10 @@ long double sinl(long double x) hi = __cosl(hi, lo); break; case 2: - hi = - __sinl(hi, lo, 1); + hi = -__sinl(hi, lo, 1); break; case 3: - hi = - __cosl(hi, lo); + hi = -__cosl(hi, lo); break; } return hi; diff --git a/src/math/tanl.c b/src/math/tanl.c index 3b51e011..546c7a02 100644 --- a/src/math/tanl.c +++ b/src/math/tanl.c @@ -53,11 +53,12 @@ long double tanl(long double x) /* |x| < (double)pi/4 */ if (z.e < M_PI_4) { - /* x = +-0 or x is subnormal */ - if (z.bits.exp == 0) - /* inexact and underflow if x!=0 */ - return x + x*0x1p-120f; - /* can raise spurious underflow */ + /* |x| < 0x1p-64 */ + if (z.bits.exp < 0x3fff - 64) { + /* raise inexact if x!=0 and underflow if subnormal */ + FORCE_EVAL(z.bits.exp == 0 ? x/0x1p120f : x+0x1p120f); + return x; + } return __tanl(x, 0, 0); } -- 2.25.1