From 8f438115f2c12b40fa7b1884f87db72857af67f6 Mon Sep 17 00:00:00 2001 From: Szabolcs Nagy Date: Mon, 7 Oct 2013 18:46:43 +0000 Subject: [PATCH] math: fix rare underflow issue in fma the issue is described in commits 1e5eb73545ca6cfe8b918798835aaf6e07af5beb and ffd8ac2dd50f99c3c83d7d9d845df9874ec3e7d5 --- src/math/fma.c | 16 ++++++++++++++-- src/math/fmaf.c | 36 +++++++++++++++++++++++++++--------- src/math/fmal.c | 16 ++++++++++++++-- 3 files changed, 55 insertions(+), 13 deletions(-) diff --git a/src/math/fma.c b/src/math/fma.c index 84868be5..02f5c865 100644 --- a/src/math/fma.c +++ b/src/math/fma.c @@ -431,12 +431,24 @@ double fma(double x, double y, double z) /* * There is no need to worry about double rounding in directed * rounding modes. - * TODO: underflow is not raised properly, example in downward rounding: + * But underflow may not be raised properly, example in downward rounding: * fma(0x1.000000001p-1000, 0x1.000000001p-30, -0x1p-1066) */ + double ret; +#if defined(FE_INEXACT) && defined(FE_UNDERFLOW) + int e = fetestexcept(FE_INEXACT); + feclearexcept(FE_INEXACT); +#endif fesetround(oround); adj = r.lo + xy.lo; - return scalbn(r.hi + adj, spread); + ret = scalbn(r.hi + adj, spread); +#if defined(FE_INEXACT) && defined(FE_UNDERFLOW) + if (ilogb(ret) < -1022 && fetestexcept(FE_INEXACT)) + feraiseexcept(FE_UNDERFLOW); + else if (e) + feraiseexcept(FE_INEXACT); +#endif + return ret; } adj = add_adjusted(r.lo, xy.lo); diff --git a/src/math/fmaf.c b/src/math/fmaf.c index 745ee393..aa57feb6 100644 --- a/src/math/fmaf.c +++ b/src/math/fmaf.c @@ -26,7 +26,8 @@ */ #include -#include "libm.h" +#include +#include /* * Fused multiply-add: Compute x * y + z with a single rounding error. @@ -39,21 +40,35 @@ float fmaf(float x, float y, float z) { #pragma STDC FENV_ACCESS ON double xy, result; - uint32_t hr, lr; + union {double f; uint64_t i;} u; + int e; xy = (double)x * y; result = xy + z; - EXTRACT_WORDS(hr, lr, result); + u.f = result; + e = u.i>>52 & 0x7ff; /* Common case: The double precision result is fine. */ - if ((lr & 0x1fffffff) != 0x10000000 || /* not a halfway case */ - (hr & 0x7ff00000) == 0x7ff00000 || /* NaN */ + if ((u.i & 0x1fffffff) != 0x10000000 || /* not a halfway case */ + e == 0x7ff || /* NaN */ result - xy == z || /* exact */ fegetround() != FE_TONEAREST) /* not round-to-nearest */ { /* - TODO: underflow is not raised correctly, example in - downward rouding: fmaf(0x1p-120f, 0x1p-120f, 0x1p-149f) + underflow may not be raised correctly, example: + fmaf(0x1p-120f, 0x1p-120f, 0x1p-149f) */ +#if defined(FE_INEXACT) && defined(FE_UNDERFLOW) + if (e < 0x3ff-126 && e >= 0x3ff-149 && fetestexcept(FE_INEXACT)) { + feclearexcept(FE_INEXACT); + /* TODO: gcc and clang bug workaround */ + volatile float vz = z; + result = xy + vz; + if (fetestexcept(FE_INEXACT)) + feraiseexcept(FE_UNDERFLOW); + else + feraiseexcept(FE_INEXACT); + } +#endif z = result; return z; } @@ -68,8 +83,11 @@ float fmaf(float x, float y, float z) volatile double vxy = xy; /* XXX work around gcc CSE bug */ double adjusted_result = vxy + z; fesetround(FE_TONEAREST); - if (result == adjusted_result) - SET_LOW_WORD(adjusted_result, lr + 1); + if (result == adjusted_result) { + u.f = adjusted_result; + u.i++; + adjusted_result = u.f; + } z = adjusted_result; return z; } diff --git a/src/math/fmal.c b/src/math/fmal.c index c68db255..4506aac6 100644 --- a/src/math/fmal.c +++ b/src/math/fmal.c @@ -264,12 +264,24 @@ long double fmal(long double x, long double y, long double z) /* * There is no need to worry about double rounding in directed * rounding modes. - * TODO: underflow is not raised correctly, example in downward rounding: + * But underflow may not be raised correctly, example in downward rounding: * fmal(0x1.0000000001p-16000L, 0x1.0000000001p-400L, -0x1p-16440L) */ + long double ret; +#if defined(FE_INEXACT) && defined(FE_UNDERFLOW) + int e = fetestexcept(FE_INEXACT); + feclearexcept(FE_INEXACT); +#endif fesetround(oround); adj = r.lo + xy.lo; - return scalbnl(r.hi + adj, spread); + ret = scalbnl(r.hi + adj, spread); +#if defined(FE_INEXACT) && defined(FE_UNDERFLOW) + if (ilogbl(ret) < -16382 && fetestexcept(FE_INEXACT)) + feraiseexcept(FE_UNDERFLOW); + else if (e) + feraiseexcept(FE_INEXACT); +#endif + return ret; } adj = add_adjusted(r.lo, xy.lo); -- 2.25.1