4 #if LDBL_MANT_DIG==64 && LDBL_MAX_EXP==16384
15 /* exact add, assumes exponent_x >= exponent_y */
16 static void add(long double *hi, long double *lo, long double x, long double y)
26 /* exact mul, assumes no over/underflow */
27 static void mul(long double *hi, long double *lo, long double x, long double y)
29 static const long double c = 1.0 + 0x1p32L;
30 long double cx, xh, xl, cy, yh, yl;
39 *lo = (xh*yh - *hi) + xh*yl + xl*yh + xl*yl;
43 assume (long double)(hi+lo) == hi
44 return an adjusted hi so that rounding it to double (or less) precision is correct
46 static long double adjust(long double hi, long double lo)
53 if (uhi.bits.m & 0x3ff)
56 if (uhi.bits.s == ulo.bits.s)
60 /* handle underflow and take care of ld80 implicit msb */
61 if (uhi.bits.m == (uint64_t)-1/2) {
69 /* adjusted add so the result is correct when rounded to double (or less) precision */
70 static long double dadd(long double x, long double y)
76 /* adjusted mul so the result is correct when rounded to double (or less) precision */
77 static long double dmul(long double x, long double y)
83 static int getexp(long double x)
90 double fma(double x, double y, double z)
92 #pragma STDC FENV_ACCESS ON
93 long double hi, lo1, lo2, xy;
96 /* handle +-inf,nan */
97 if (!isfinite(x) || !isfinite(y))
102 if (x == 0.0 || y == 0.0)
104 round = fegetround();
106 if (round == FE_TONEAREST)
111 /* exact mul and add require nearest rounding */
112 /* spurious inexact exceptions may be raised */
113 fesetround(FE_TONEAREST);
114 mul(&xy, &lo1, x, y);
118 add(&hi, &lo2, z, xy);
119 } else if (ez > exy - 12) {
120 add(&hi, &lo2, xy, z);
123 xy + z is 0, but it should be calculated with the
124 original rounding mode so the sign is correct, if the
125 compiler does not support FENV_ACCESS ON it does not
126 know about the changed rounding mode and eliminates
127 the xy + z below without the volatile memory access
132 return (xy + z_) + lo1;
137 the 12 extra bits (1guard, 11round+sticky) are needed so with
139 elo <= ehi - 11, and we use the last 10 bits in adjust so
141 gives correct result when rounded to double
147 the result is stored before return for correct precision and exceptions
149 one corner case is when the underflow flag should be raised because
150 the precise result is an inexact subnormal double, but the calculated
151 long double result is an exact subnormal double
152 (so rounding to double does not raise exceptions)
154 in nearest rounding mode dadd takes care of this: the last bit of the
155 result is adjusted so rounding sees an inexact value when it should
157 in non-nearest rounding mode fenv is used for the workaround
160 if (round == FE_TONEAREST)
161 z = dadd(hi, dadd(lo1, lo2));
163 #if defined(FE_INEXACT) && defined(FE_UNDERFLOW)
164 int e = fetestexcept(FE_INEXACT);
165 feclearexcept(FE_INEXACT);
167 z = hi + (lo1 + lo2);
168 #if defined(FE_INEXACT) && defined(FE_UNDERFLOW)
169 if (getexp(z) < 0x3fff-1022 && fetestexcept(FE_INEXACT))
170 feraiseexcept(FE_UNDERFLOW);
172 feraiseexcept(FE_INEXACT);
178 /* origin: FreeBSD /usr/src/lib/msun/src/s_fma.c */
180 * Copyright (c) 2005-2011 David Schultz <das@FreeBSD.ORG>
181 * All rights reserved.
183 * Redistribution and use in source and binary forms, with or without
184 * modification, are permitted provided that the following conditions
186 * 1. Redistributions of source code must retain the above copyright
187 * notice, this list of conditions and the following disclaimer.
188 * 2. Redistributions in binary form must reproduce the above copyright
189 * notice, this list of conditions and the following disclaimer in the
190 * documentation and/or other materials provided with the distribution.
192 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
193 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
194 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
195 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
196 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
197 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
198 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
199 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
200 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
201 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
206 * A struct dd represents a floating-point number with twice the precision
207 * of a double. We maintain the invariant that "hi" stores the 53 high-order
208 * bits of the result.
216 * Compute a+b exactly, returning the exact result in a struct dd. We assume
217 * that both a and b are finite, but make no assumptions about their relative
220 static inline struct dd dd_add(double a, double b)
227 ret.lo = (a - (ret.hi - s)) + (b - s);
232 * Compute a+b, with a small tweak: The least significant bit of the
233 * result is adjusted into a sticky bit summarizing all the bits that
234 * were lost to rounding. This adjustment negates the effects of double
235 * rounding when the result is added to another number with a higher
236 * exponent. For an explanation of round and sticky bits, see any reference
237 * on FPU design, e.g.,
239 * J. Coonen. An Implementation Guide to a Proposed Standard for
240 * Floating-Point Arithmetic. Computer, vol. 13, no. 1, Jan 1980.
242 static inline double add_adjusted(double a, double b)
245 uint64_t hibits, lobits;
249 EXTRACT_WORD64(hibits, sum.hi);
250 if ((hibits & 1) == 0) {
251 /* hibits += (int)copysign(1.0, sum.hi * sum.lo) */
252 EXTRACT_WORD64(lobits, sum.lo);
253 hibits += 1 - ((hibits ^ lobits) >> 62);
254 INSERT_WORD64(sum.hi, hibits);
261 * Compute ldexp(a+b, scale) with a single rounding error. It is assumed
262 * that the result will be subnormal, and care is taken to ensure that
263 * double rounding does not occur.
265 static inline double add_and_denormalize(double a, double b, int scale)
268 uint64_t hibits, lobits;
274 * If we are losing at least two bits of accuracy to denormalization,
275 * then the first lost bit becomes a round bit, and we adjust the
276 * lowest bit of sum.hi to make it a sticky bit summarizing all the
277 * bits in sum.lo. With the sticky bit adjusted, the hardware will
278 * break any ties in the correct direction.
280 * If we are losing only one bit to denormalization, however, we must
281 * break the ties manually.
284 EXTRACT_WORD64(hibits, sum.hi);
285 bits_lost = -((int)(hibits >> 52) & 0x7ff) - scale + 1;
286 if (bits_lost != 1 ^ (int)(hibits & 1)) {
287 /* hibits += (int)copysign(1.0, sum.hi * sum.lo) */
288 EXTRACT_WORD64(lobits, sum.lo);
289 hibits += 1 - (((hibits ^ lobits) >> 62) & 2);
290 INSERT_WORD64(sum.hi, hibits);
293 return scalbn(sum.hi, scale);
297 * Compute a*b exactly, returning the exact result in a struct dd. We assume
298 * that both a and b are normalized, so no underflow or overflow will occur.
299 * The current rounding mode must be round-to-nearest.
301 static inline struct dd dd_mul(double a, double b)
303 static const double split = 0x1p27 + 1.0;
305 double ha, hb, la, lb, p, q;
318 q = ha * lb + la * hb;
321 ret.lo = p - ret.hi + q + la * lb;
326 * Fused multiply-add: Compute x * y + z with a single rounding error.
328 * We use scaling to avoid overflow/underflow, along with the
329 * canonical precision-doubling technique adapted from:
331 * Dekker, T. A Floating-Point Technique for Extending the
332 * Available Precision. Numer. Math. 18, 224-242 (1971).
334 * This algorithm is sensitive to the rounding precision. FPUs such
335 * as the i387 must be set in double-precision mode if variables are
336 * to be stored in FP registers in order to avoid incorrect results.
337 * This is the default on FreeBSD, but not on many other systems.
339 * Hardware instructions should be used on architectures that support it,
340 * since this implementation will likely be several times slower.
342 double fma(double x, double y, double z)
344 #pragma STDC FENV_ACCESS ON
345 double xs, ys, zs, adj;
352 * Handle special cases. The order of operations and the particular
353 * return values here are crucial in handling special cases involving
354 * infinities, NaNs, overflows, and signed zeroes correctly.
356 if (!isfinite(x) || !isfinite(y))
360 if (x == 0.0 || y == 0.0)
368 oround = fegetround();
369 spread = ex + ey - ez;
372 * If x * y and z are many orders of magnitude apart, the scaling
373 * will overflow, so we handle these cases specially. Rounding
374 * modes other than FE_TONEAREST are painful.
376 if (spread < -DBL_MANT_DIG) {
378 feraiseexcept(FE_INEXACT);
382 feraiseexcept(FE_UNDERFLOW);
385 default: /* FE_TONEAREST */
389 if (x > 0.0 ^ y < 0.0 ^ z < 0.0)
392 return (nextafter(z, 0));
396 if (x > 0.0 ^ y < 0.0)
399 return (nextafter(z, -INFINITY));
403 if (x > 0.0 ^ y < 0.0)
404 return (nextafter(z, INFINITY));
410 if (spread <= DBL_MANT_DIG * 2)
411 zs = scalbn(zs, -spread);
413 zs = copysign(DBL_MIN, zs);
415 fesetround(FE_TONEAREST);
418 * Basic approach for round-to-nearest:
420 * (xy.hi, xy.lo) = x * y (exact)
421 * (r.hi, r.lo) = xy.hi + z (exact)
422 * adj = xy.lo + r.lo (inexact; low bit is sticky)
423 * result = r.hi + adj (correctly rounded)
426 r = dd_add(xy.hi, zs);
432 * When the addends cancel to 0, ensure that the result has
436 volatile double vzs = zs; /* XXX gcc CSE bug workaround */
437 return xy.hi + vzs + scalbn(xy.lo, spread);
440 if (oround != FE_TONEAREST) {
442 * There is no need to worry about double rounding in directed
444 * TODO: underflow is not raised properly, example in downward rounding:
445 * fma(0x1.000000001p-1000, 0x1.000000001p-30, -0x1p-1066)
449 return scalbn(r.hi + adj, spread);
452 adj = add_adjusted(r.lo, xy.lo);
453 if (spread + ilogb(r.hi) > -1023)
454 return scalbn(r.hi + adj, spread);
456 return add_and_denormalize(r.hi, adj, spread);