From ebbaf2180e6e32043837f570982c2ee86cf19eae Mon Sep 17 00:00:00 2001 From: Szabolcs Nagy Date: Thu, 21 Nov 2013 01:01:57 +0000 Subject: [PATCH] math: lgamma cleanup (simpler sin(pi*x) for the negative case) * simplify sin_pi(x) (don't care about inexact here, the result is inexact anyway, and x is not so small to underflow) * in lgammal add the previously removed special case for x==1 and x==2 (to fix the sign of zero in downward rounding mode) * only define lgammal on supported long double platforms * change tgamma so the generated code is a bit smaller --- src/math/lgamma_r.c | 92 +++++++++++++++----------------------------- src/math/lgammaf_r.c | 92 +++++++++++++++----------------------------- src/math/lgammal.c | 88 ++++++++++++++---------------------------- src/math/tgamma.c | 40 +++++++++---------- 4 files changed, 110 insertions(+), 202 deletions(-) diff --git a/src/math/lgamma_r.c b/src/math/lgamma_r.c index 82e296f5..fff565d2 100644 --- a/src/math/lgamma_r.c +++ b/src/math/lgamma_r.c @@ -82,7 +82,6 @@ #include "libc.h" static const double -two52= 4.50359962737049600000e+15, /* 0x43300000, 0x00000000 */ pi = 3.14159265358979311600e+00, /* 0x400921FB, 0x54442D18 */ a0 = 7.72156649015328655494e-02, /* 0x3FB3C467, 0xE37DB0C8 */ a1 = 3.22467033424113591611e-01, /* 0x3FD4A34C, 0xC4A60FAD */ @@ -147,91 +146,62 @@ w4 = -5.95187557450339963135e-04, /* 0xBF4380CB, 0x8C0FE741 */ w5 = 8.36339918996282139126e-04, /* 0x3F4B67BA, 0x4CDAD5D1 */ w6 = -1.63092934096575273989e-03; /* 0xBF5AB89D, 0x0B9E43E4 */ +/* sin(pi*x) assuming x > 2^-100, if sin(pi*x)==0 the sign is arbitrary */ static double sin_pi(double x) { - double y,z; - int n,ix; + int n; - GET_HIGH_WORD(ix, x); - ix &= 0x7fffffff; + /* spurious inexact if odd int */ + x = 2.0*(x*0.5 - floor(x*0.5)); /* x mod 2.0 */ - if (ix < 0x3fd00000) - return __sin(pi*x, 0.0, 0); + n = (int)(x*4.0); + n = (n+1)/2; + x -= n*0.5f; + x *= pi; - y = -x; /* negative x is assumed */ - - /* - * argument reduction, make sure inexact flag not raised if input - * is an integer - */ - z = floor(y); - if (z != y) { /* inexact anyway */ - y *= 0.5; - y = 2.0*(y - floor(y)); /* y = |x| mod 2.0 */ - n = (int)(y*4.0); - } else { - if (ix >= 0x43400000) { - y = 0.0; /* y must be even */ - n = 0; - } else { - if (ix < 0x43300000) - z = y + two52; /* exact */ - GET_LOW_WORD(n, z); - n &= 1; - y = n; - n <<= 2; - } - } switch (n) { - case 0: y = __sin(pi*y, 0.0, 0); break; - case 1: - case 2: y = __cos(pi*(0.5-y), 0.0); break; - case 3: - case 4: y = __sin(pi*(1.0-y), 0.0, 0); break; - case 5: - case 6: y = -__cos(pi*(y-1.5), 0.0); break; - default: y = __sin(pi*(y-2.0), 0.0, 0); break; + default: /* case 4: */ + case 0: return __sin(x, 0.0, 0); + case 1: return __cos(x, 0.0); + case 2: return __sin(-x, 0.0, 0); + case 3: return -__cos(x, 0.0); } - return -y; } - double __lgamma_r(double x, int *signgamp) { - double t,y,z,nadj,p,p1,p2,p3,q,r,w; - int32_t hx; - int i,lx,ix; - - EXTRACT_WORDS(hx, lx, x); + union {double f; uint64_t i;} u = {x}; + double_t t,y,z,nadj,p,p1,p2,p3,q,r,w; + uint32_t ix; + int sign,i; /* purge off +-inf, NaN, +-0, tiny and negative arguments */ *signgamp = 1; - ix = hx & 0x7fffffff; + sign = u.i>>63; + ix = u.i>>32 & 0x7fffffff; if (ix >= 0x7ff00000) return x*x; - if ((ix|lx) == 0) - return 1.0/0.0; - if (ix < 0x3b900000) { /* |x|<2**-70, return -log(|x|) */ - if(hx < 0) { + if (ix < (0x3ff-70)<<20) { /* |x|<2**-70, return -log(|x|) */ + if(sign) { + x = -x; *signgamp = -1; - return -log(-x); } return -log(x); } - if (hx < 0) { - if (ix >= 0x43300000) /* |x|>=2**52, must be -integer */ - return 1.0/0.0; + if (sign) { + x = -x; t = sin_pi(x); if (t == 0.0) /* -integer */ - return 1.0/0.0; - nadj = log(pi/fabs(t*x)); - if (t < 0.0) + return 1.0/(x-x); + if (t > 0.0) *signgamp = -1; - x = -x; + else + t = -t; + nadj = log(pi/(t*x)); } /* purge off 1 and 2 */ - if (((ix - 0x3ff00000)|lx) == 0 || ((ix - 0x40000000)|lx) == 0) + if ((ix == 0x3ff00000 || ix == 0x40000000) && (uint32_t)u.i == 0) r = 0; /* for x < 2.0 */ else if (ix < 0x40000000) { @@ -306,7 +276,7 @@ double __lgamma_r(double x, int *signgamp) r = (x-0.5)*(t-1.0)+w; } else /* 2**58 <= x <= inf */ r = x*(log(x)-1.0); - if (hx < 0) + if (sign) r = nadj - r; return r; } diff --git a/src/math/lgammaf_r.c b/src/math/lgammaf_r.c index dc65bace..c5b43db5 100644 --- a/src/math/lgammaf_r.c +++ b/src/math/lgammaf_r.c @@ -17,7 +17,6 @@ #include "libc.h" static const float -two23= 8.3886080000e+06, /* 0x4b000000 */ pi = 3.1415927410e+00, /* 0x40490fdb */ a0 = 7.7215664089e-02, /* 0x3d9e233f */ a1 = 3.2246702909e-01, /* 0x3ea51a66 */ @@ -82,87 +81,58 @@ w4 = -5.9518753551e-04, /* 0xba1c065c */ w5 = 8.3633989561e-04, /* 0x3a5b3dd2 */ w6 = -1.6309292987e-03; /* 0xbad5c4e8 */ -static float sin_pif(float x) +/* sin(pi*x) assuming x > 2^-100, if sin(pi*x)==0 the sign is arbitrary */ +static float sin_pi(float x) { - float y,z; - int n,ix; + double_t y; + int n; - GET_FLOAT_WORD(ix, x); - ix &= 0x7fffffff; + /* spurious inexact if odd int */ + x = 2*(x*0.5f - floorf(x*0.5f)); /* x mod 2.0 */ - if(ix < 0x3e800000) - return __sindf(pi*x); - - y = -x; /* negative x is assumed */ - - /* - * argument reduction, make sure inexact flag not raised if input - * is an integer - */ - z = floorf(y); - if (z != y) { /* inexact anyway */ - y *= 0.5f; - y = 2.0f*(y - floorf(y)); /* y = |x| mod 2.0 */ - n = (int)(y*4.0f); - } else { - if (ix >= 0x4b800000) { - y = 0.0f; /* y must be even */ - n = 0; - } else { - if (ix < 0x4b000000) - z = y + two23; /* exact */ - GET_FLOAT_WORD(n, z); - n &= 1; - y = n; - n <<= 2; - } - } + n = (int)(x*4); + n = (n+1)/2; + y = x - n*0.5f; + y *= 3.14159265358979323846; switch (n) { - case 0: y = __sindf(pi*y); break; - case 1: - case 2: y = __cosdf(pi*(0.5f - y)); break; - case 3: - case 4: y = __sindf(pi*(1.0f - y)); break; - case 5: - case 6: y = -__cosdf(pi*(y - 1.5f)); break; - default: y = __sindf(pi*(y - 2.0f)); break; + default: /* case 4: */ + case 0: return __sindf(y); + case 1: return __cosdf(y); + case 2: return __sindf(-y); + case 3: return -__cosdf(y); } - return -y; } - float __lgammaf_r(float x, int *signgamp) { + union {float f; uint32_t i;} u = {x}; float t,y,z,nadj,p,p1,p2,p3,q,r,w; - int32_t hx; - int i,ix; - - GET_FLOAT_WORD(hx, x); + uint32_t ix; + int i,sign; /* purge off +-inf, NaN, +-0, tiny and negative arguments */ *signgamp = 1; - ix = hx & 0x7fffffff; + sign = u.i>>31; + ix = u.i & 0x7fffffff; if (ix >= 0x7f800000) return x*x; - if (ix == 0) - return 1.0f/0.0f; if (ix < 0x35000000) { /* |x| < 2**-21, return -log(|x|) */ - if (hx < 0) { + if (sign) { *signgamp = -1; - return -logf(-x); + x = -x; } return -logf(x); } - if (hx < 0) { - if (ix >= 0x4b000000) /* |x| >= 2**23, must be -integer */ - return 1.0f/0.0f; - t = sin_pif(x); + if (sign) { + x = -x; + t = sin_pi(x); if (t == 0.0f) /* -integer */ - return 1.0f/0.0f; - nadj = logf(pi/fabsf(t*x)); - if (t < 0.0f) + return 1.0f/(x-x); + if (t > 0.0f) *signgamp = -1; - x = -x; + else + t = -t; + nadj = logf(pi/(t*x)); } /* purge off 1 and 2 */ @@ -241,7 +211,7 @@ float __lgammaf_r(float x, int *signgamp) r = (x-0.5f)*(t-1.0f)+w; } else /* 2**58 <= x <= inf */ r = x*(logf(x)-1.0f); - if (hx < 0) + if (sign) r = nadj - r; return r; } diff --git a/src/math/lgammal.c b/src/math/lgammal.c index 58054e56..55ec5325 100644 --- a/src/math/lgammal.c +++ b/src/math/lgammal.c @@ -99,7 +99,6 @@ long double __lgammal_r(long double x, int *sg) #elif LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384 static const long double pi = 3.14159265358979323846264L, -two63 = 9.223372036854775808e18L, /* lgam(1+x) = 0.5 x + x a(x)/b(x) -0.268402099609375 <= x <= 0 @@ -201,61 +200,27 @@ w5 = 8.412723297322498080632E-4L, w6 = -1.880801938119376907179E-3L, w7 = 4.885026142432270781165E-3L; +/* sin(pi*x) assuming x > 2^-1000, if sin(pi*x)==0 the sign is arbitrary */ static long double sin_pi(long double x) { - union ldshape u = {x}; - uint32_t ix = (u.i.se & 0x7fffU)<<16 | u.i.m>>48; - long double y, z; int n; - if (ix < 0x3ffd8000) /* 0.25 */ - return sinl(pi * x); - y = -x; /* x is assume negative */ + /* spurious inexact if odd int */ + x *= 0.5; + x = 2.0*(x - floorl(x)); /* x mod 2.0 */ - /* - * argument reduction, make sure inexact flag not raised if input - * is an integer - */ - z = floorl(y); - if (z != y) { /* inexact anyway */ - y *= 0.5; - y = 2.0*(y - floorl(y));/* y = |x| mod 2.0 */ - n = (int) (y*4.0); - } else { - if (ix >= 0x403f8000) { /* 2^64 */ - y = 0.0; /* y must be even */ - n = 0; - } else { - if (ix < 0x403e8000) /* 2^63 */ - z = y + two63; /* exact */ - u.f = z; - n = u.i.m & 1; - y = n; - n <<= 2; - } - } + n = (int)(x*4.0); + n = (n+1)/2; + x -= n*0.5f; + x *= pi; switch (n) { - case 0: - y = sinl(pi * y); - break; - case 1: - case 2: - y = cosl(pi * (0.5 - y)); - break; - case 3: - case 4: - y = sinl(pi * (1.0 - y)); - break; - case 5: - case 6: - y = -cosl(pi * (y - 1.5)); - break; - default: - y = sinl(pi * (y - 2.0)); - break; + default: /* case 4: */ + case 0: return __sinl(x, 0.0, 0); + case 1: return __cosl(x, 0.0); + case 2: return __sinl(-x, 0.0, 0); + case 3: return -__cosl(x, 0.0); } - return -y; } long double __lgammal_r(long double x, int *sg) { @@ -267,31 +232,32 @@ long double __lgammal_r(long double x, int *sg) { *sg = 1; - /* purge off +-inf, NaN, +-0, and negative arguments */ + /* purge off +-inf, NaN, +-0, tiny and negative arguments */ if (ix >= 0x7fff0000) return x * x; - if (x == 0) { - *sg -= 2*sign; - return 1.0 / fabsl(x); - } if (ix < 0x3fc08000) { /* |x|<2**-63, return -log(|x|) */ if (sign) { *sg = -1; - return -logl(-x); + x = -x; } return -logl(x); } if (sign) { - t = sin_pi (x); + x = -x; + t = sin_pi(x); if (t == 0.0) - return 1.0 / fabsl(t); /* -integer */ - nadj = logl(pi / fabsl(t * x)); - if (t < 0.0) + return 1.0 / (x-x); /* -integer */ + if (t > 0.0) *sg = -1; - x = -x; + else + t = -t; + nadj = logl(pi / (t * x)); } - if (ix < 0x40008000) { /* x < 2.0 */ + /* purge off 1 and 2 (so the sign is ok with downward rounding) */ + if ((ix == 0x3fff8000 || ix == 0x40008000) && u.i.m == 0) { + r = 0; + } else if (ix < 0x40008000) { /* x < 2.0 */ if (ix <= 0x3ffee666) { /* 8.99993896484375e-1 */ /* lgamma(x) = lgamma(x+1) - log(x) */ r = -logl(x); @@ -376,6 +342,7 @@ long double __lgammal_r(long double x, int *sg) { } #endif +#if (LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024) || (LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384) extern int __signgam; long double lgammal(long double x) @@ -384,3 +351,4 @@ long double lgammal(long double x) } weak_alias(__lgammal_r, lgammal_r); +#endif diff --git a/src/math/tgamma.c b/src/math/tgamma.c index f91af735..28f6e0f8 100644 --- a/src/math/tgamma.c +++ b/src/math/tgamma.c @@ -26,7 +26,7 @@ most ideas and constants are from boost and python static const double pi = 3.141592653589793238462643383279502884; -/* sin(pi x) with x > 0 && isnormal(x) assumption */ +/* sin(pi x) with x > 0x1p-100, if sin(pi*x)==0 the sign is arbitrary */ static double sinpi(double x) { int n; @@ -49,8 +49,7 @@ static double sinpi(double x) case 1: return __cos(x, 0); case 2: - /* sin(0-x) and -sin(x) have different sign at 0 */ - return __sin(0-x, 0, 0); + return __sin(-x, 0, 0); case 3: return -__cos(x, 0); } @@ -108,35 +107,33 @@ static double S(double x) double tgamma(double x) { - double absx, y, dy, z, r; + union {double f; uint64_t i;} u = {x}; + double absx, y; + double_t dy, z, r; + uint32_t ix = u.i>>32 & 0x7fffffff; + int sign = u.i>>63; /* special cases */ - if (!isfinite(x)) + if (ix >= 0x7ff00000) /* tgamma(nan)=nan, tgamma(inf)=inf, tgamma(-inf)=nan with invalid */ return x + INFINITY; + if (ix < (0x3ff-54)<<20) + /* |x| < 2^-54: tgamma(x) ~ 1/x, +-0 raises div-by-zero */ + return 1/x; /* integer arguments */ /* raise inexact when non-integer */ if (x == floor(x)) { - if (x == 0) - /* tgamma(+-0)=+-inf with divide-by-zero */ - return 1/x; - if (x < 0) + if (sign) return 0/0.0; if (x <= sizeof fact/sizeof *fact) return fact[(int)x - 1]; } - absx = fabs(x); - - /* x ~ 0: tgamma(x) ~ 1/x */ - if (absx < 0x1p-54) - return 1/x; - /* x >= 172: tgamma(x)=inf with overflow */ /* x =< -184: tgamma(x)=+-0 with underflow */ - if (absx >= 184) { - if (x < 0) { + if (ix >= 0x40670000) { /* |x| >= 184 */ + if (sign) { FORCE_EVAL((float)(0x1p-126/x)); if (floor(x) * 0.5 == floor(x * 0.5)) return 0; @@ -146,6 +143,8 @@ double tgamma(double x) return x; } + absx = sign ? -x : x; + /* handle the error of x + g - 0.5 */ y = absx + gmhalf; if (absx > gmhalf) { @@ -160,20 +159,21 @@ double tgamma(double x) r = S(absx) * exp(-y); if (x < 0) { /* reflection formula for negative x */ + /* sinpi(absx) is not 0, integers are already handled */ r = -pi / (sinpi(absx) * absx * r); dy = -dy; z = -z; } r += dy * (gmhalf+0.5) * r / y; z = pow(y, 0.5*z); - r = r * z * z; - return r; + y = r * z * z; + return y; } #if 0 double __lgamma_r(double x, int *sign) { - double r, absx, z, zz, w; + double r, absx; *sign = 1; -- 2.25.1