From e42a977fe5dbe48ba45072ab82886e6b5a694487 Mon Sep 17 00:00:00 2001 From: Szabolcs Nagy Date: Sun, 16 Dec 2012 19:52:42 +0100 Subject: [PATCH] math: tanh.c cleanup similar to sinh, cosh comments are kept in the double version of the function compared to fdlibm/freebsd we partition the domain into one more part and select different threshold points: now the [log(5/3)/2,log(3)/2] and [log(3)/2,inf] domains should have <1.5ulp error (so only the last bit may be wrong, assuming good exp, expm1) (note that log(3)/2 and log(5/3)/2 are the points where tanh changes resolution: tanh(log(3)/2)=0.5, tanh(log(5/3)/2)=0.25) for some x < log(5/3)/2 (~=0.2554) the error can be >1.5ulp but it should be <2ulp (the freebsd code had some >2ulp errors in [0.255,1]) even with the extra logic the new code produces smaller object files --- src/math/tanh.c | 94 ++++++++++++++++-------------------------------- src/math/tanhf.c | 70 +++++++++++++----------------------- src/math/tanhl.c | 92 ++++++++++++++--------------------------------- 3 files changed, 83 insertions(+), 173 deletions(-) diff --git a/src/math/tanh.c b/src/math/tanh.c index 21138643..0e766c5c 100644 --- a/src/math/tanh.c +++ b/src/math/tanh.c @@ -1,73 +1,41 @@ -/* origin: FreeBSD /usr/src/lib/msun/src/s_tanh.c */ -/* - * ==================================================== - * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. - * - * Developed at SunPro, a Sun Microsystems, Inc. business. - * Permission to use, copy, modify, and distribute this - * software is freely granted, provided that this notice - * is preserved. - * ==================================================== - */ -/* Tanh(x) - * Return the Hyperbolic Tangent of x - * - * Method : - * x -x - * e - e - * 0. tanh(x) is defined to be ----------- - * x -x - * e + e - * 1. reduce x to non-negative by tanh(-x) = -tanh(x). - * 2. 0 <= x < 2**-28 : tanh(x) := x with inexact if x != 0 - * -t - * 2**-28 <= x < 1 : tanh(x) := -----; t = expm1(-2x) - * t + 2 - * 2 - * 1 <= x < 22 : tanh(x) := 1 - -----; t = expm1(2x) - * t + 2 - * 22 <= x <= INF : tanh(x) := 1. - * - * Special cases: - * tanh(NaN) is NaN; - * only tanh(0)=0 is exact for finite argument. - */ - #include "libm.h" -static const double tiny = 1.0e-300, huge = 1.0e300; - +/* tanh(x) = (exp(x) - exp(-x))/(exp(x) + exp(-x)) + * = (exp(2*x) - 1)/(exp(2*x) - 1 + 2) + * = (1 - exp(-2*x))/(exp(-2*x) - 1 + 2) + */ double tanh(double x) { - double t,z; - int32_t jx,ix; - - GET_HIGH_WORD(jx, x); - ix = jx & 0x7fffffff; + union {double f; uint64_t i;} u = {.f = x}; + uint32_t w; + int sign; + double t; - /* x is INF or NaN */ - if (ix >= 0x7ff00000) { - if (jx >= 0) - return 1.0f/x + 1.0f; /* tanh(+-inf)=+-1 */ - else - return 1.0f/x - 1.0f; /* tanh(NaN) = NaN */ - } + /* x = |x| */ + sign = u.i >> 63; + u.i &= (uint64_t)-1/2; + x = u.f; + w = u.i >> 32; - if (ix < 0x40360000) { /* |x| < 22 */ - if (ix < 0x3e300000) { /* |x| < 2**-28 */ - /* tanh(tiny) = tiny with inexact */ - if (huge+x > 1.0f) - return x; - } - if (ix >= 0x3ff00000) { /* |x| >= 1 */ - t = expm1(2.0f*fabs(x)); - z = 1.0f - 2.0f/(t+2.0f); + if (w > 0x3fe193ea) { + /* |x| > log(3)/2 ~= 0.5493 or nan */ + if (w > 0x40340000) { + /* |x| > 20 or nan */ + /* note: this branch avoids raising overflow */ + /* raise inexact if x!=+-inf and handle nan */ + t = 1 + 0/(x + 0x1p-120f); } else { - t = expm1(-2.0f*fabs(x)); - z= -t/(t+2.0f); + t = expm1(2*x); + t = 1 - 2/(t+2); } - } else { /* |x| >= 22, return +-1 */ - z = 1.0f - tiny; /* raise inexact */ + } else if (w > 0x3fd058ae) { + /* |x| > log(5/3)/2 ~= 0.2554 */ + t = expm1(2*x); + t = t/(t+2); + } else { + /* |x| is small, up to 2ulp error in [0.1,0.2554] */ + t = expm1(-2*x); + t = -t/(t+2); } - return jx >= 0 ? z : -z; + return sign ? -t : t; } diff --git a/src/math/tanhf.c b/src/math/tanhf.c index 7cb459d0..8099ec30 100644 --- a/src/math/tanhf.c +++ b/src/math/tanhf.c @@ -1,55 +1,35 @@ -/* origin: FreeBSD /usr/src/lib/msun/src/s_tanhf.c */ -/* - * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. - */ -/* - * ==================================================== - * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. - * - * Developed at SunPro, a Sun Microsystems, Inc. business. - * Permission to use, copy, modify, and distribute this - * software is freely granted, provided that this notice - * is preserved. - * ==================================================== - */ - #include "libm.h" -static const float -tiny = 1.0e-30, -huge = 1.0e30; - float tanhf(float x) { - float t,z; - int32_t jx,ix; + union {float f; uint32_t i;} u = {.f = x}; + uint32_t w; + int sign; + float t; - GET_FLOAT_WORD(jx, x); - ix = jx & 0x7fffffff; + /* x = |x| */ + sign = u.i >> 31; + u.i &= 0x7fffffff; + x = u.f; + w = u.i; - /* x is INF or NaN */ - if(ix >= 0x7f800000) { - if (jx >= 0) - return 1.0f/x + 1.0f; /* tanh(+-inf)=+-1 */ - else - return 1.0f/x - 1.0f; /* tanh(NaN) = NaN */ - } - - if (ix < 0x41100000) { /* |x| < 9 */ - if (ix < 0x39800000) { /* |x| < 2**-12 */ - /* tanh(tiny) = tiny with inexact */ - if (huge+x > 1.0f) - return x; - } - if (ix >= 0x3f800000) { /* |x|>=1 */ - t = expm1f(2.0f*fabsf(x)); - z = 1.0f - 2.0f/(t+2.0f); + if (w > 0x3f0c9f54) { + /* |x| > log(3)/2 ~= 0.5493 or nan */ + if (w > 0x41200000) { + /* |x| > 10 */ + t = 1 + 0/(x + 0x1p-120f); } else { - t = expm1f(-2.0f*fabsf(x)); - z = -t/(t+2.0f); + t = expm1f(2*x); + t = 1 - 2/(t+2); } - } else { /* |x| >= 9, return +-1 */ - z = 1.0f - tiny; /* raise inexact */ + } else if (w > 0x3e82c578) { + /* |x| > log(5/3)/2 ~= 0.2554 */ + t = expm1f(2*x); + t = t/(t+2); + } else { + /* |x| is small */ + t = expm1f(-2*x); + t = -t/(t+2); } - return jx >= 0 ? z : -z; + return sign ? -t : t; } diff --git a/src/math/tanhl.c b/src/math/tanhl.c index 92efb20d..66559e9f 100644 --- a/src/math/tanhl.c +++ b/src/math/tanhl.c @@ -1,38 +1,3 @@ -/* origin: OpenBSD /usr/src/lib/libm/src/ld80/s_tanhl.c */ -/* - * ==================================================== - * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. - * - * Developed at SunPro, a Sun Microsystems, Inc. business. - * Permission to use, copy, modify, and distribute this - * software is freely granted, provided that this notice - * is preserved. - * ==================================================== - */ -/* tanhl(x) - * Return the Hyperbolic Tangent of x - * - * Method : - * x -x - * e - e - * 0. tanhl(x) is defined to be ----------- - * x -x - * e + e - * 1. reduce x to non-negative by tanhl(-x) = -tanhl(x). - * 2. 0 <= x <= 2**-55 : tanhl(x) := x*(one+x) - * -t - * 2**-55 < x <= 1 : tanhl(x) := -----; t = expm1l(-2x) - * t + 2 - * 2 - * 1 <= x <= 23.0 : tanhl(x) := 1- ----- ; t=expm1l(2x) - * t + 2 - * 23.0 < x <= INF : tanhl(x) := 1. - * - * Special cases: - * tanhl(NaN) is NaN; - * only tanhl(0)=0 is exact for finite argument. - */ - #include "libm.h" #if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024 @@ -41,43 +6,40 @@ long double tanhl(long double x) return tanh(x); } #elif LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384 -static const long double tiny = 1.0e-4900L; - long double tanhl(long double x) { - long double t,z; - int32_t se; - uint32_t jj0,jj1,ix; + union { + long double f; + struct{uint64_t m; uint16_t se; uint16_t pad;} i; + } u = {.f = x}; + unsigned ex = u.i.se & 0x7fff; + unsigned sign = u.i.se & 0x8000; + uint32_t w; + long double t; - /* High word of |x|. */ - GET_LDOUBLE_WORDS(se, jj0, jj1, x); - ix = se & 0x7fff; - - /* x is INF or NaN */ - if (ix == 0x7fff) { - /* for NaN it's not important which branch: tanhl(NaN) = NaN */ - if (se & 0x8000) - return 1.0/x-1.0; /* tanhl(-inf)= -1; */ - return 1.0/x+1.0; /* tanhl(+inf)= +1 */ - } + /* x = |x| */ + u.i.se = ex; + x = u.f; + w = u.i.m >> 32; - /* |x| < 23 */ - if (ix < 0x4003 || (ix == 0x4003 && jj0 < 0xb8000000u)) { - if ((ix|jj0|jj1) == 0) /* x == +- 0 */ - return x; - if (ix < 0x3fc8) /* |x| < 2**-55 */ - return x*(1.0+tiny); /* tanh(small) = small */ - if (ix >= 0x3fff) { /* |x| >= 1 */ - t = expm1l(2.0*fabsl(x)); - z = 1.0 - 2.0/(t+2.0); + if (ex > 0x3ffe || (ex == 0x3ffe && w > 0x8c9f53d5)) { + /* |x| > log(3)/2 ~= 0.5493 or nan */ + if (ex >= 0x3fff+5) { + /* |x| >= 32 */ + t = 1 + 0/(x + 0x1p-120f); } else { - t = expm1l(-2.0*fabsl(x)); - z = -t/(t+2.0); + t = expm1l(2*x); + t = 1 - 2/(t+2); } - /* |x| > 23, return +-1 */ + } else if (ex > 0x3ffd || (ex == 0x3ffd && w > 0x82c577d4)) { + /* |x| > log(5/3)/2 ~= 0.2554 */ + t = expm1l(2*x); + t = t/(t+2); } else { - z = 1.0 - tiny; /* raise inexact flag */ + /* |x| is small */ + t = expm1l(-2*x); + t = -t/(t+2); } - return se & 0x8000 ? -z : z; + return sign ? -t : t; } #endif -- 2.25.1