From 34660d73bd0db29469d2758e1b48d2360edf3a2f Mon Sep 17 00:00:00 2001 From: Szabolcs Nagy Date: Wed, 4 Sep 2013 15:52:23 +0000 Subject: [PATCH] math: fix remaining old long double code (erfl, fmal, lgammal, scalbnl) in lgammal don't handle 1 and 2 specially, in fma use the new ldshape union instead of ld80 one. --- src/math/erfl.c | 37 ++++++++++++++----------------------- src/math/fma.c | 38 ++++++++++++++------------------------ src/math/fmal.c | 30 ++++++++++++++++-------------- src/math/lgammal.c | 45 +++++++++++++++++---------------------------- src/math/scalbnl.c | 8 ++++---- 5 files changed, 65 insertions(+), 93 deletions(-) diff --git a/src/math/erfl.c b/src/math/erfl.c index 2fd3c440..42bb1a17 100644 --- a/src/math/erfl.c +++ b/src/math/erfl.c @@ -253,8 +253,8 @@ static long double erfc1(long double x) static long double erfc2(uint32_t ix, long double x) { + union ldshape u; long double s,z,R,S; - uint32_t i0,i1; if (ix < 0x3fffa000) /* 0.84375 <= |x| < 1.25 */ return erfc1(x); @@ -288,28 +288,22 @@ static long double erfc2(uint32_t ix, long double x) S = sc[0] + s * (sc[1] + s * (sc[2] + s * (sc[3] + s * (sc[4] + s)))); } - z = x; - GET_LDOUBLE_WORDS(ix, i0, i1, z); - i1 = 0; - i0 &= 0xffffff00; - SET_LDOUBLE_WORDS(z, ix, i0, i1); + u.f = x; + u.i.m &= -1ULL << 40; + z = u.f; return expl(-z*z - 0.5625) * expl((z - x) * (z + x) + R / S) / x; } long double erfl(long double x) { long double r, s, z, y; - uint32_t i0, i1, ix; - int sign; + union ldshape u = {x}; + uint32_t ix = (u.i.se & 0x7fffU)<<16 | u.i.m>>48; + int sign = u.i.se >> 15; - GET_LDOUBLE_WORDS(ix, i0, i1, x); - sign = ix >> 15; - ix &= 0x7fff; - if (ix == 0x7fff) { + if (ix >= 0x7fff0000) /* erf(nan)=nan, erf(+-inf)=+-1 */ return 1 - 2*sign + 1/x; - } - ix = (ix << 16) | (i0 >> 16); if (ix < 0x3ffed800) { /* |x| < 0.84375 */ if (ix < 0x3fde8000) { /* |x| < 2**-33 */ return 0.125 * (8 * x + efx8 * x); /* avoid underflow */ @@ -332,17 +326,13 @@ long double erfl(long double x) long double erfcl(long double x) { long double r, s, z, y; - uint32_t i0, i1, ix; - int sign; + union ldshape u = {x}; + uint32_t ix = (u.i.se & 0x7fffU)<<16 | u.i.m>>48; + int sign = u.i.se >> 15; - GET_LDOUBLE_WORDS(ix, i0, i1, x); - sign = ix>>15; - ix &= 0x7fff; - if (ix == 0x7fff) + if (ix >= 0x7fff0000) /* erfc(nan) = nan, erfc(+-inf) = 0,2 */ return 2*sign + 1/x; - - ix = (ix << 16) | (i0 >> 16); if (ix < 0x3ffed800) { /* |x| < 0.84375 */ if (ix < 0x3fbe0000) /* |x| < 2**-65 */ return 1.0 - x; @@ -358,6 +348,7 @@ long double erfcl(long double x) } if (ix < 0x4005d600) /* |x| < 107 */ return sign ? 2 - erfc2(ix,x) : erfc2(ix,x); - return sign ? 2 - 0x1p-16382L : 0x1p-16382L*0x1p-16382L; + y = 0x1p-16382L; + return sign ? 2 - y : y*y; } #endif diff --git a/src/math/fma.c b/src/math/fma.c index 850a4a6c..1b1c1321 100644 --- a/src/math/fma.c +++ b/src/math/fma.c @@ -2,16 +2,6 @@ #include "libm.h" #if LDBL_MANT_DIG==64 && LDBL_MAX_EXP==16384 -union ld80 { - long double x; - struct { - uint64_t m; - uint16_t e : 15; - uint16_t s : 1; - uint16_t pad; - } bits; -}; - /* exact add, assumes exponent_x >= exponent_y */ static void add(long double *hi, long double *lo, long double x, long double y) { @@ -45,25 +35,25 @@ return an adjusted hi so that rounding it to double (or less) precision is corre */ static long double adjust(long double hi, long double lo) { - union ld80 uhi, ulo; + union ldshape uhi, ulo; if (lo == 0) return hi; - uhi.x = hi; - if (uhi.bits.m & 0x3ff) + uhi.f = hi; + if (uhi.i.m & 0x3ff) return hi; - ulo.x = lo; - if (uhi.bits.s == ulo.bits.s) - uhi.bits.m++; + ulo.f = lo; + if ((uhi.i.se & 0x8000) == (ulo.i.se & 0x8000)) + uhi.i.m++; else { - uhi.bits.m--; /* handle underflow and take care of ld80 implicit msb */ - if (uhi.bits.m == (uint64_t)-1/2) { - uhi.bits.m *= 2; - uhi.bits.e--; + if (uhi.i.m << 1 == 0) { + uhi.i.m = 0; + uhi.i.se--; } + uhi.i.m--; } - return uhi.x; + return uhi.f; } /* adjusted add so the result is correct when rounded to double (or less) precision */ @@ -82,9 +72,9 @@ static long double dmul(long double x, long double y) static int getexp(long double x) { - union ld80 u; - u.x = x; - return u.bits.e; + union ldshape u; + u.f = x; + return u.i.se & 0x7fff; } double fma(double x, double y, double z) diff --git a/src/math/fmal.c b/src/math/fmal.c index 87e30fcf..c68db255 100644 --- a/src/math/fmal.c +++ b/src/math/fmal.c @@ -34,6 +34,13 @@ long double fmal(long double x, long double y, long double z) } #elif (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384 #include +#if LDBL_MANT_DIG == 64 +#define LASTBIT(u) (u.i.m & 1) +#define SPLIT (0x1p32L + 1) +#elif LDBL_MANT_DIG == 113 +#define LASTBIT(u) (u.i.lo & 1) +#define SPLIT (0x1p57L + 1) +#endif /* * A struct dd represents a floating-point number with twice the precision @@ -75,12 +82,12 @@ static inline struct dd dd_add(long double a, long double b) static inline long double add_adjusted(long double a, long double b) { struct dd sum; - union IEEEl2bits u; + union ldshape u; sum = dd_add(a, b); if (sum.lo != 0) { - u.e = sum.hi; - if ((u.bits.manl & 1) == 0) + u.f = sum.hi; + if (!LASTBIT(u)) sum.hi = nextafterl(sum.hi, INFINITY * sum.lo); } return (sum.hi); @@ -95,7 +102,7 @@ static inline long double add_and_denormalize(long double a, long double b, int { struct dd sum; int bits_lost; - union IEEEl2bits u; + union ldshape u; sum = dd_add(a, b); @@ -110,9 +117,9 @@ static inline long double add_and_denormalize(long double a, long double b, int * break the ties manually. */ if (sum.lo != 0) { - u.e = sum.hi; - bits_lost = -u.bits.exp - scale + 1; - if (bits_lost != 1 ^ (int)(u.bits.manl & 1)) + u.f = sum.hi; + bits_lost = -u.i.se - scale + 1; + if ((bits_lost != 1) ^ LASTBIT(u)) sum.hi = nextafterl(sum.hi, INFINITY * sum.lo); } return scalbnl(sum.hi, scale); @@ -125,20 +132,15 @@ static inline long double add_and_denormalize(long double a, long double b, int */ static inline struct dd dd_mul(long double a, long double b) { -#if LDBL_MANT_DIG == 64 - static const long double split = 0x1p32L + 1.0; -#elif LDBL_MANT_DIG == 113 - static const long double split = 0x1p57L + 1.0; -#endif struct dd ret; long double ha, hb, la, lb, p, q; - p = a * split; + p = a * SPLIT; ha = a - p; ha += p; la = a - ha; - p = b * split; + p = b * SPLIT; hb = b - p; hb += p; lb = b - hb; diff --git a/src/math/lgammal.c b/src/math/lgammal.c index 56d7866d..5d56358e 100644 --- a/src/math/lgammal.c +++ b/src/math/lgammal.c @@ -202,13 +202,11 @@ w7 = 4.885026142432270781165E-3L; static long double sin_pi(long double x) { + union ldshape u = {x}; + uint32_t ix = (u.i.se & 0x7fffU)<<16 | u.i.m>>48; long double y, z; - int n, ix; - uint32_t se, i0, i1; + int n; - GET_LDOUBLE_WORDS(se, i0, i1, x); - ix = se & 0x7fff; - ix = (ix << 16) | (i0 >> 16); if (ix < 0x3ffd8000) /* 0.25 */ return sinl(pi * x); y = -x; /* x is assume negative */ @@ -229,8 +227,8 @@ static long double sin_pi(long double x) } else { if (ix < 0x403e8000) /* 2^63 */ z = y + two63; /* exact */ - GET_LDOUBLE_WORDS(se, i0, i1, z); - n = i1 & 1; + u.f = z; + n = u.i.m & 1; y = n; n <<= 2; } @@ -261,33 +259,28 @@ static long double sin_pi(long double x) long double __lgammal_r(long double x, int *sg) { long double t, y, z, nadj, p, p1, p2, q, r, w; - int i, ix; - uint32_t se, i0, i1; + union ldshape u = {x}; + uint32_t ix = (u.i.se & 0x7fffU)<<16 | u.i.m>>48; + int sign = u.i.se >> 15; + int i; *sg = 1; - GET_LDOUBLE_WORDS(se, i0, i1, x); - ix = se & 0x7fff; - - if ((ix | i0 | i1) == 0) { - if (se & 0x8000) - *sg = -1; - return 1.0 / fabsl(x); - } - - ix = (ix << 16) | (i0 >> 16); /* purge off +-inf, NaN, +-0, and negative arguments */ if (ix >= 0x7fff0000) return x * x; - + if (x == 0) { + *sg -= 2*sign; + return 1.0 / fabsl(x); + } if (ix < 0x3fc08000) { /* |x|<2**-63, return -log(|x|) */ - if (se & 0x8000) { + if (sign) { *sg = -1; return -logl(-x); } return -logl(x); } - if (se & 0x8000) { + if (sign) { t = sin_pi (x); if (t == 0.0) return 1.0 / fabsl(t); /* -integer */ @@ -297,11 +290,7 @@ long double __lgammal_r(long double x, int *sg) { x = -x; } - /* purge off 1 and 2 */ - if ((((ix - 0x3fff8000) | i0 | i1) == 0) || - (((ix - 0x40008000) | i0 | i1) == 0)) - r = 0; - else if (ix < 0x40008000) { /* x < 2.0 */ + if (ix < 0x40008000) { /* x < 2.0 */ if (ix <= 0x3ffee666) { /* 8.99993896484375e-1 */ /* lgamma(x) = lgamma(x+1) - log(x) */ r = -logl(x); @@ -380,7 +369,7 @@ long double __lgammal_r(long double x, int *sg) { r = (x - 0.5) * (t - 1.0) + w; } else /* 2**66 <= x <= inf */ r = x * (logl(x) - 1.0); - if (se & 0x8000) + if (sign) r = nadj - r; return r; } diff --git a/src/math/scalbnl.c b/src/math/scalbnl.c index 7ad7688b..08a4c587 100644 --- a/src/math/scalbnl.c +++ b/src/math/scalbnl.c @@ -8,7 +8,7 @@ long double scalbnl(long double x, int n) #elif (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384 long double scalbnl(long double x, int n) { - union IEEEl2bits scale; + union ldshape u; if (n > 16383) { x *= 0x1p16383L; @@ -29,8 +29,8 @@ long double scalbnl(long double x, int n) n = -16382; } } - scale.e = 1.0; - scale.bits.exp = 0x3fff + n; - return x * scale.e; + u.f = 1.0; + u.i.se = 0x3fff + n; + return x * u.f; } #endif -- 2.25.1