4 #if LDBL_MANT_DIG==64 && LDBL_MAX_EXP==16384
15 /* exact add, assumes exponent_x >= exponent_y */
16 static void add(long double *hi, long double *lo, long double x, long double y)
26 /* exact mul, assumes no over/underflow */
27 static void mul(long double *hi, long double *lo, long double x, long double y)
29 static const long double c = 1.0 + 0x1p32L;
30 long double cx, xh, xl, cy, yh, yl;
39 *lo = (xh*yh - *hi) + xh*yl + xl*yh + xl*yl;
43 assume (long double)(hi+lo) == hi
44 return an adjusted hi so that rounding it to double is correct
46 static long double adjust(long double hi, long double lo)
53 if (uhi.bits.m & 0x3ff)
56 if (uhi.bits.s == ulo.bits.s)
63 static long double dadd(long double x, long double y)
69 static long double dmul(long double x, long double y)
75 static int getexp(long double x)
82 double fma(double x, double y, double z)
84 long double hi, lo1, lo2, xy;
87 /* handle +-inf,nan */
88 if (!isfinite(x) || !isfinite(y))
93 if (x == 0.0 || y == 0.0)
97 if (round == FE_TONEAREST)
102 /* exact mul and add require nearest rounding */
103 /* spurious inexact exceptions may be raised */
104 fesetround(FE_TONEAREST);
105 mul(&xy, &lo1, x, y);
109 add(&hi, &lo2, z, xy);
110 } else if (ez > exy - 12) {
111 add(&hi, &lo2, xy, z);
114 /* make sure that the sign of 0 is correct */
115 return (xy + z) + lo1;
120 the 12 extra bits (1guard, 11round+sticky) are needed so with
122 elo <= ehi - 11, and we use the last 10 bits in adjust so
124 gives correct result when rounded to double
130 if (round == FE_TONEAREST)
131 return dadd(hi, dadd(lo1, lo2));
132 return hi + (lo1 + lo2);
135 /* origin: FreeBSD /usr/src/lib/msun/src/s_fma.c */
137 * Copyright (c) 2005-2011 David Schultz <das@FreeBSD.ORG>
138 * All rights reserved.
140 * Redistribution and use in source and binary forms, with or without
141 * modification, are permitted provided that the following conditions
143 * 1. Redistributions of source code must retain the above copyright
144 * notice, this list of conditions and the following disclaimer.
145 * 2. Redistributions in binary form must reproduce the above copyright
146 * notice, this list of conditions and the following disclaimer in the
147 * documentation and/or other materials provided with the distribution.
149 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
150 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
151 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
152 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
153 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
154 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
155 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
156 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
157 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
158 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
163 * A struct dd represents a floating-point number with twice the precision
164 * of a double. We maintain the invariant that "hi" stores the 53 high-order
165 * bits of the result.
173 * Compute a+b exactly, returning the exact result in a struct dd. We assume
174 * that both a and b are finite, but make no assumptions about their relative
177 static inline struct dd dd_add(double a, double b)
184 ret.lo = (a - (ret.hi - s)) + (b - s);
189 * Compute a+b, with a small tweak: The least significant bit of the
190 * result is adjusted into a sticky bit summarizing all the bits that
191 * were lost to rounding. This adjustment negates the effects of double
192 * rounding when the result is added to another number with a higher
193 * exponent. For an explanation of round and sticky bits, see any reference
194 * on FPU design, e.g.,
196 * J. Coonen. An Implementation Guide to a Proposed Standard for
197 * Floating-Point Arithmetic. Computer, vol. 13, no. 1, Jan 1980.
199 static inline double add_adjusted(double a, double b)
202 uint64_t hibits, lobits;
206 EXTRACT_WORD64(hibits, sum.hi);
207 if ((hibits & 1) == 0) {
208 /* hibits += (int)copysign(1.0, sum.hi * sum.lo) */
209 EXTRACT_WORD64(lobits, sum.lo);
210 hibits += 1 - ((hibits ^ lobits) >> 62);
211 INSERT_WORD64(sum.hi, hibits);
218 * Compute ldexp(a+b, scale) with a single rounding error. It is assumed
219 * that the result will be subnormal, and care is taken to ensure that
220 * double rounding does not occur.
222 static inline double add_and_denormalize(double a, double b, int scale)
225 uint64_t hibits, lobits;
231 * If we are losing at least two bits of accuracy to denormalization,
232 * then the first lost bit becomes a round bit, and we adjust the
233 * lowest bit of sum.hi to make it a sticky bit summarizing all the
234 * bits in sum.lo. With the sticky bit adjusted, the hardware will
235 * break any ties in the correct direction.
237 * If we are losing only one bit to denormalization, however, we must
238 * break the ties manually.
241 EXTRACT_WORD64(hibits, sum.hi);
242 bits_lost = -((int)(hibits >> 52) & 0x7ff) - scale + 1;
243 if (bits_lost != 1 ^ (int)(hibits & 1)) {
244 /* hibits += (int)copysign(1.0, sum.hi * sum.lo) */
245 EXTRACT_WORD64(lobits, sum.lo);
246 hibits += 1 - (((hibits ^ lobits) >> 62) & 2);
247 INSERT_WORD64(sum.hi, hibits);
250 return (ldexp(sum.hi, scale));
254 * Compute a*b exactly, returning the exact result in a struct dd. We assume
255 * that both a and b are normalized, so no underflow or overflow will occur.
256 * The current rounding mode must be round-to-nearest.
258 static inline struct dd dd_mul(double a, double b)
260 static const double split = 0x1p27 + 1.0;
262 double ha, hb, la, lb, p, q;
275 q = ha * lb + la * hb;
278 ret.lo = p - ret.hi + q + la * lb;
283 * Fused multiply-add: Compute x * y + z with a single rounding error.
285 * We use scaling to avoid overflow/underflow, along with the
286 * canonical precision-doubling technique adapted from:
288 * Dekker, T. A Floating-Point Technique for Extending the
289 * Available Precision. Numer. Math. 18, 224-242 (1971).
291 * This algorithm is sensitive to the rounding precision. FPUs such
292 * as the i387 must be set in double-precision mode if variables are
293 * to be stored in FP registers in order to avoid incorrect results.
294 * This is the default on FreeBSD, but not on many other systems.
296 * Hardware instructions should be used on architectures that support it,
297 * since this implementation will likely be several times slower.
299 double fma(double x, double y, double z)
301 double xs, ys, zs, adj;
308 * Handle special cases. The order of operations and the particular
309 * return values here are crucial in handling special cases involving
310 * infinities, NaNs, overflows, and signed zeroes correctly.
312 if (!isfinite(x) || !isfinite(y))
316 if (x == 0.0 || y == 0.0)
324 oround = fegetround();
325 spread = ex + ey - ez;
328 * If x * y and z are many orders of magnitude apart, the scaling
329 * will overflow, so we handle these cases specially. Rounding
330 * modes other than FE_TONEAREST are painful.
332 if (spread < -DBL_MANT_DIG) {
334 feraiseexcept(FE_INEXACT);
338 feraiseexcept(FE_UNDERFLOW);
341 default: /* FE_TONEAREST */
345 if (x > 0.0 ^ y < 0.0 ^ z < 0.0)
348 return (nextafter(z, 0));
352 if (x > 0.0 ^ y < 0.0)
355 return (nextafter(z, -INFINITY));
359 if (x > 0.0 ^ y < 0.0)
360 return (nextafter(z, INFINITY));
366 if (spread <= DBL_MANT_DIG * 2)
367 zs = ldexp(zs, -spread);
369 zs = copysign(DBL_MIN, zs);
371 fesetround(FE_TONEAREST);
374 * Basic approach for round-to-nearest:
376 * (xy.hi, xy.lo) = x * y (exact)
377 * (r.hi, r.lo) = xy.hi + z (exact)
378 * adj = xy.lo + r.lo (inexact; low bit is sticky)
379 * result = r.hi + adj (correctly rounded)
382 r = dd_add(xy.hi, zs);
388 * When the addends cancel to 0, ensure that the result has
392 volatile double vzs = zs; /* XXX gcc CSE bug workaround */
393 return (xy.hi + vzs + ldexp(xy.lo, spread));
396 if (oround != FE_TONEAREST) {
398 * There is no need to worry about double rounding in directed
403 return (ldexp(r.hi + adj, spread));
406 adj = add_adjusted(r.lo, xy.lo);
407 if (spread + ilogb(r.hi) > -1023)
408 return (ldexp(r.hi + adj, spread));
410 return (add_and_denormalize(r.hi, adj, spread));