From 6d85096f49fa955e7e1473b7deb30ce55b6c8be0 Mon Sep 17 00:00:00 2001 From: Szabolcs Nagy Date: Thu, 15 Aug 2013 14:05:19 +0000 Subject: [PATCH] math: clean up atan2.c * remove volatile hacks * don't care about inexact flag for now (removed all the +-tiny) * fix atanl to raise underflow properly * remove signed int arithmetics * use pi/2 instead of pi_o_2 (gcc generates the same code, which is not correct, but it does not matter: we mainly care about nearest rounding) --- src/math/atan2.c | 68 +++++++++++++++++++---------------------------- src/math/atan2f.c | 59 +++++++++++++++++----------------------- src/math/atan2l.c | 36 +++++++++---------------- src/math/atanl.c | 13 ++++----- 4 files changed, 73 insertions(+), 103 deletions(-) diff --git a/src/math/atan2.c b/src/math/atan2.c index 825a38fe..5a1903c6 100644 --- a/src/math/atan2.c +++ b/src/math/atan2.c @@ -39,75 +39,63 @@ #include "libm.h" -// FIXME -static const volatile double -tiny = 1.0e-300; static const double -pi_o_4 = 7.8539816339744827900E-01, /* 0x3FE921FB, 0x54442D18 */ -pi_o_2 = 1.5707963267948965580E+00, /* 0x3FF921FB, 0x54442D18 */ -pi = 3.1415926535897931160E+00; /* 0x400921FB, 0x54442D18 */ -static const volatile double +pi = 3.1415926535897931160E+00, /* 0x400921FB, 0x54442D18 */ pi_lo = 1.2246467991473531772E-16; /* 0x3CA1A626, 0x33145C07 */ double atan2(double y, double x) { double z; - int32_t k,m,hx,hy,ix,iy; - uint32_t lx,ly; + uint32_t m,lx,ly,ix,iy; - EXTRACT_WORDS(hx, lx, x); - ix = hx & 0x7fffffff; - EXTRACT_WORDS(hy, ly, y); - iy = hy & 0x7fffffff; - if ((ix|((lx|-lx)>>31)) > 0x7ff00000 || - (iy|((ly|-ly)>>31)) > 0x7ff00000) /* x or y is NaN */ + if (isnan(x) || isnan(y)) return x+y; - if ((hx-0x3ff00000 | lx) == 0) /* x = 1.0 */ + EXTRACT_WORDS(ix, lx, x); + EXTRACT_WORDS(iy, ly, y); + if ((ix-0x3ff00000 | lx) == 0) /* x = 1.0 */ return atan(y); - m = ((hy>>31)&1) | ((hx>>30)&2); /* 2*sign(x)+sign(y) */ + m = ((iy>>31)&1) | ((ix>>30)&2); /* 2*sign(x)+sign(y) */ + ix = ix & 0x7fffffff; + iy = iy & 0x7fffffff; /* when y = 0 */ if ((iy|ly) == 0) { switch(m) { case 0: - case 1: return y; /* atan(+-0,+anything)=+-0 */ - case 2: return pi+tiny; /* atan(+0,-anything) = pi */ - case 3: return -pi-tiny; /* atan(-0,-anything) =-pi */ + case 1: return y; /* atan(+-0,+anything)=+-0 */ + case 2: return pi; /* atan(+0,-anything) = pi */ + case 3: return -pi; /* atan(-0,-anything) =-pi */ } } /* when x = 0 */ if ((ix|lx) == 0) - return hy < 0 ? -pi_o_2-tiny : pi_o_2+tiny; + return m&1 ? -pi/2 : pi/2; /* when x is INF */ if (ix == 0x7ff00000) { if (iy == 0x7ff00000) { switch(m) { - case 0: return pi_o_4+tiny; /* atan(+INF,+INF) */ - case 1: return -pi_o_4-tiny; /* atan(-INF,+INF) */ - case 2: return 3.0*pi_o_4+tiny; /* atan(+INF,-INF) */ - case 3: return -3.0*pi_o_4-tiny; /* atan(-INF,-INF) */ + case 0: return pi/4; /* atan(+INF,+INF) */ + case 1: return -pi/4; /* atan(-INF,+INF) */ + case 2: return 3*pi/4; /* atan(+INF,-INF) */ + case 3: return -3*pi/4; /* atan(-INF,-INF) */ } } else { switch(m) { - case 0: return 0.0; /* atan(+...,+INF) */ - case 1: return -0.0; /* atan(-...,+INF) */ - case 2: return pi+tiny; /* atan(+...,-INF) */ - case 3: return -pi-tiny; /* atan(-...,-INF) */ + case 0: return 0.0; /* atan(+...,+INF) */ + case 1: return -0.0; /* atan(-...,+INF) */ + case 2: return pi; /* atan(+...,-INF) */ + case 3: return -pi; /* atan(-...,-INF) */ } } } - /* when y is INF */ - if (iy == 0x7ff00000) - return hy < 0 ? -pi_o_2-tiny : pi_o_2+tiny; + /* |y/x| > 0x1p64 */ + if (ix+(64<<20) < iy || iy == 0x7ff00000) + return m&1 ? -pi/2 : pi/2; - /* compute y/x */ - k = (iy-ix)>>20; - if (k > 60) { /* |y/x| > 2**60 */ - z = pi_o_2+0.5*pi_lo; - m &= 1; - } else if (hx < 0 && k < -60) /* 0 > |y|/x > -2**-60 */ - z = 0.0; - else /* safe to do y/x */ + /* z = atan(|y/x|) without spurious underflow */ + if ((m&2) && iy+(64<<20) < ix) /* |y/x| < 0x1p-64, x<0 */ + z = 0; + else z = atan(fabs(y/x)); switch (m) { case 0: return z; /* atan(+,+) */ diff --git a/src/math/atan2f.c b/src/math/atan2f.c index 96839cba..c634d00f 100644 --- a/src/math/atan2f.c +++ b/src/math/atan2f.c @@ -15,72 +15,63 @@ #include "libm.h" -static const volatile float -tiny = 1.0e-30; static const float -pi_o_4 = 7.8539818525e-01, /* 0x3f490fdb */ -pi_o_2 = 1.5707963705e+00, /* 0x3fc90fdb */ -pi = 3.1415927410e+00; /* 0x40490fdb */ -static const volatile float +pi = 3.1415927410e+00, /* 0x40490fdb */ pi_lo = -8.7422776573e-08; /* 0xb3bbbd2e */ float atan2f(float y, float x) { float z; - int32_t k,m,hx,hy,ix,iy; + uint32_t m,ix,iy; - GET_FLOAT_WORD(hx, x); - ix = hx & 0x7fffffff; - GET_FLOAT_WORD(hy, y); - iy = hy & 0x7fffffff; - if (ix > 0x7f800000 || iy > 0x7f800000) /* x or y is NaN */ + if (isnan(x) || isnan(y)) return x+y; - if (hx == 0x3f800000) /* x=1.0 */ + GET_FLOAT_WORD(ix, x); + GET_FLOAT_WORD(iy, y); + if (ix == 0x3f800000) /* x=1.0 */ return atanf(y); - m = ((hy>>31)&1) | ((hx>>30)&2); /* 2*sign(x)+sign(y) */ + m = ((iy>>31)&1) | ((ix>>30)&2); /* 2*sign(x)+sign(y) */ + ix &= 0x7fffffff; + iy &= 0x7fffffff; /* when y = 0 */ if (iy == 0) { switch (m) { case 0: - case 1: return y; /* atan(+-0,+anything)=+-0 */ - case 2: return pi+tiny; /* atan(+0,-anything) = pi */ - case 3: return -pi-tiny; /* atan(-0,-anything) =-pi */ + case 1: return y; /* atan(+-0,+anything)=+-0 */ + case 2: return pi; /* atan(+0,-anything) = pi */ + case 3: return -pi; /* atan(-0,-anything) =-pi */ } } /* when x = 0 */ if (ix == 0) - return hy < 0 ? -pi_o_2-tiny : pi_o_2+tiny; + return m&1 ? -pi/2 : pi/2; /* when x is INF */ if (ix == 0x7f800000) { if (iy == 0x7f800000) { switch (m) { - case 0: return pi_o_4+tiny; /* atan(+INF,+INF) */ - case 1: return -pi_o_4-tiny; /* atan(-INF,+INF) */ - case 2: return 3.0f*pi_o_4+tiny; /*atan(+INF,-INF)*/ - case 3: return -3.0f*pi_o_4-tiny; /*atan(-INF,-INF)*/ + case 0: return pi/4; /* atan(+INF,+INF) */ + case 1: return -pi/4; /* atan(-INF,+INF) */ + case 2: return 3*pi/4; /*atan(+INF,-INF)*/ + case 3: return -3*pi/4; /*atan(-INF,-INF)*/ } } else { switch (m) { case 0: return 0.0f; /* atan(+...,+INF) */ case 1: return -0.0f; /* atan(-...,+INF) */ - case 2: return pi+tiny; /* atan(+...,-INF) */ - case 3: return -pi-tiny; /* atan(-...,-INF) */ + case 2: return pi; /* atan(+...,-INF) */ + case 3: return -pi; /* atan(-...,-INF) */ } } } - /* when y is INF */ - if (iy == 0x7f800000) - return hy < 0 ? -pi_o_2-tiny : pi_o_2+tiny; + /* |y/x| > 0x1p26 */ + if (ix+(26<<23) < iy || iy == 0x7f800000) + return m&1 ? -pi/2 : pi/2; - /* compute y/x */ - k = (iy-ix)>>23; - if (k > 26) { /* |y/x| > 2**26 */ - z = pi_o_2 + 0.5f*pi_lo; - m &= 1; - } else if (k < -26 && hx < 0) /* 0 > |y|/x > -2**-26 */ + /* z = atan(|y/x|) with correct underflow */ + if ((m&2) && iy+(26<<23) < ix) /*|y/x| < 0x1p-26, x < 0 */ z = 0.0; - else /* safe to do y/x */ + else z = atanf(fabsf(y/x)); switch (m) { case 0: return z; /* atan(+,+) */ diff --git a/src/math/atan2l.c b/src/math/atan2l.c index 7cb42c2f..e0167d09 100644 --- a/src/math/atan2l.c +++ b/src/math/atan2l.c @@ -29,26 +29,22 @@ long double atan2l(long double y, long double x) { union IEEEl2bits ux, uy; long double z; - int32_t k,m; - int16_t exptx, expsignx, expty, expsigny; + int m; + uint16_t exptx, expsignx, expty, expsigny; + if (isnan(x) || isnan(y)) + return x+y; + if (x == 1) + return atanl(y); uy.e = y; expsigny = uy.xbits.expsign; expty = expsigny & 0x7fff; ux.e = x; expsignx = ux.xbits.expsign; exptx = expsignx & 0x7fff; - if ((exptx==0x7fff && - ((ux.bits.manh&~LDBL_NBIT)|ux.bits.manl)!=0) || /* x is NaN */ - (expty==0x7fff && - ((uy.bits.manh&~LDBL_NBIT)|uy.bits.manl)!=0)) /* y is NaN */ - return x+y; - if (expsignx==0x3fff && ((ux.bits.manh&~LDBL_NBIT)|ux.bits.manl)==0) /* x=1.0 */ - return atanl(y); m = ((expsigny>>15)&1) | ((expsignx>>14)&2); /* 2*sign(x)+sign(y) */ - /* when y = 0 */ - if (expty==0 && ((uy.bits.manh&~LDBL_NBIT)|uy.bits.manl)==0) { + if (y == 0) { switch(m) { case 0: case 1: return y; /* atan(+-0,+anything)=+-0 */ @@ -56,9 +52,8 @@ long double atan2l(long double y, long double x) case 3: return -2*pio2_hi-0x1p-120f; /* atan(-0,-anything) =-pi */ } } - /* when x = 0 */ - if (exptx==0 && ((ux.bits.manh&~LDBL_NBIT)|ux.bits.manl)==0) - return expsigny < 0 ? -pio2_hi-0x1p-120f : pio2_hi+0x1p-120f; + if (x == 0) + return m&1 ? -pio2_hi-0x1p-120f : pio2_hi+0x1p-120f; /* when x is INF */ if (exptx == 0x7fff) { if (expty == 0x7fff) { @@ -78,17 +73,12 @@ long double atan2l(long double y, long double x) } } /* when y is INF */ - if (expty == 0x7fff) - return expsigny < 0 ? -pio2_hi-0x1p-120f : pio2_hi+0x1p-120f; + if (exptx+120 < expty || expty == 0x7fff) + return m&1 ? -pio2_hi-0x1p-120f : pio2_hi+0x1p-120f; - /* compute y/x */ - k = expty-exptx; - if(k > LDBL_MANT_DIG+2) { /* |y/x| huge */ - z = pio2_hi+0x1p-120f; - m &= 1; - } else if (expsignx < 0 && k < -LDBL_MANT_DIG-2) /* |y/x| tiny, x<0 */ + if ((m&2) && expty+120 < exptx) /* |y/x| tiny, x<0 */ z = 0.0; - else /* safe to do y/x */ + else z = atanl(fabsl(y/x)); switch (m) { case 0: return z; /* atan(+,+) */ diff --git a/src/math/atanl.c b/src/math/atanl.c index e76693e4..d29e6316 100644 --- a/src/math/atanl.c +++ b/src/math/atanl.c @@ -70,8 +70,8 @@ long double atanl(long double x) union IEEEl2bits u; long double w,s1,s2,z; int id; - int16_t expsign, expt; - int32_t expman; + uint16_t expsign, expt; + uint32_t expman; u.e = x; expsign = u.xbits.expsign; @@ -81,15 +81,16 @@ long double atanl(long double x) ((u.bits.manh&~LDBL_NBIT)|u.bits.manl)!=0) /* NaN */ return x+x; z = atanhi[3] + 0x1p-120f; - return expsign < 0 ? -z : z; + return expsign>>15 ? -z : z; } /* Extract the exponent and the first few bits of the mantissa. */ /* XXX There should be a more convenient way to do this. */ expman = (expt << 8) | ((u.bits.manh >> (LDBL_MANH_SIZE - 9)) & 0xff); if (expman < ((0x3fff - 2) << 8) + 0xc0) { /* |x| < 0.4375 */ if (expt < 0x3fff - 32) { /* if |x| is small, atanl(x)~=x */ - /* raise inexact if x!=0 */ - FORCE_EVAL(x + 0x1p120f); + /* raise underflow if subnormal */ + if (expt == 0) + FORCE_EVAL((float)x); return x; } id = -1; @@ -122,6 +123,6 @@ long double atanl(long double x) if (id < 0) return x - x*(s1+s2); z = atanhi[id] - ((x*(s1+s2) - atanlo[id]) - x); - return expsign < 0 ? -z : z; + return expsign>>15 ? -z : z; } #endif -- 2.25.1