From ee2ee92d62c43f6658d37ddea4c316d2089d0fe9 Mon Sep 17 00:00:00 2001
From: Szabolcs Nagy <nsz@port70.net>
Date: Tue, 3 Sep 2013 04:09:12 +0000
Subject: [PATCH] math: rewrite remainder functions (remainder, remquo, fmod,
 modf)

* results are exact
* modfl follows truncl (raises inexact flag spuriously now)
* modf and modff only had cosmetic cleanup
* remainder is just a wrapper around remquo now
* using iterative shift+subtract for remquo and fmod
* ld80 and ld128 are supported as well
---
 src/math/fmod.c       | 179 ++++++++--------------------
 src/math/fmodf.c      | 139 ++++++++--------------
 src/math/fmodl.c      | 213 +++++++++++++--------------------
 src/math/modf.c       |  31 +++--
 src/math/modff.c      |  29 ++---
 src/math/modfl.c      | 117 ++++++------------
 src/math/remainder.c  |  67 +----------
 src/math/remainderf.c |  60 +---------
 src/math/remquo.c     | 211 ++++++++++-----------------------
 src/math/remquof.c    | 166 ++++++++++----------------
 src/math/remquol.c    | 270 ++++++++++++++++--------------------------
 11 files changed, 472 insertions(+), 1010 deletions(-)

diff --git a/src/math/fmod.c b/src/math/fmod.c
index 84a1b4ac..6849722b 100644
--- a/src/math/fmod.c
+++ b/src/math/fmod.c
@@ -1,145 +1,68 @@
-/* origin: FreeBSD /usr/src/lib/msun/src/e_fmod.c */
-/*
- * ====================================================
- * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
- *
- * Developed at SunSoft, a Sun Microsystems, Inc. business.
- * Permission to use, copy, modify, and distribute this
- * software is freely granted, provided that this notice
- * is preserved.
- * ====================================================
- */
-/*
- * fmod(x,y)
- * Return x mod y in exact arithmetic
- * Method: shift and subtract
- */
-
-#include "libm.h"
-
-static const double Zero[] = {0.0, -0.0,};
+#include <math.h>
+#include <stdint.h>
 
 double fmod(double x, double y)
 {
-	int32_t n,hx,hy,hz,ix,iy,sx,i;
-	uint32_t lx,ly,lz;
+	union {double f; uint64_t i;} ux = {x}, uy = {y};
+	int ex = ux.i>>52 & 0x7ff;
+	int ey = uy.i>>52 & 0x7ff;
+	int sx = ux.i>>63;
+	uint64_t i;
 
-	EXTRACT_WORDS(hx, lx, x);
-	EXTRACT_WORDS(hy, ly, y);
-	sx = hx & 0x80000000;  /* sign of x */
-	hx ^= sx;              /* |x| */
-	hy &= 0x7fffffff;      /* |y| */
+	/* in the followings uxi should be ux.i, but then gcc wrongly adds */
+	/* float load/store to inner loops ruining performance and code size */
+	uint64_t uxi = ux.i;
 
-	/* purge off exception values */
-	if ((hy|ly) == 0 || hx >= 0x7ff00000 ||  /* y=0,or x not finite */
-	    (hy|((ly|-ly)>>31)) > 0x7ff00000)    /* or y is NaN */
+	if (uy.i<<1 == 0 || isnan(y) || ex == 0x7ff)
 		return (x*y)/(x*y);
-	if (hx <= hy) {
-		if (hx < hy || lx < ly)  /* |x| < |y| */
-			return x;
-		if (lx == ly)            /* |x| = |y|, return x*0 */
-			return Zero[(uint32_t)sx>>31];
+	if (uxi<<1 <= uy.i<<1) {
+		if (uxi<<1 == uy.i<<1)
+			return 0*x;
+		return x;
 	}
 
-	/* determine ix = ilogb(x) */
-	if (hx < 0x00100000) {  /* subnormal x */
-		if (hx == 0) {
-			for (ix = -1043, i = lx; i > 0; i <<= 1)
-				ix -= 1;
-		} else {
-			for (ix = -1022, i = hx<<11; i > 0; i <<= 1)
-				ix -= 1;
-		}
-	} else
-		ix = (hx>>20) - 1023;
-
-	/* determine iy = ilogb(y) */
-	if (hy < 0x00100000) {  /* subnormal y */
-		if (hy == 0) {
-			for (iy = -1043, i = ly; i > 0; i <<= 1)
-				iy -= 1;
-		} else {
-			for (iy = -1022, i = hy<<11; i > 0; i <<= 1)
-				iy -= 1;
-		}
-	} else
-		iy = (hy>>20) - 1023;
-
-	/* set up {hx,lx}, {hy,ly} and align y to x */
-	if (ix >= -1022)
-		hx = 0x00100000|(0x000fffff&hx);
-	else {       /* subnormal x, shift x to normal */
-		n = -1022-ix;
-		if (n <= 31) {
-			hx = (hx<<n)|(lx>>(32-n));
-			lx <<= n;
-		} else {
-			hx = lx<<(n-32);
-			lx = 0;
-		}
+	/* normalize x and y */
+	if (!ex) {
+		for (i = uxi<<12; i>>63 == 0; ex--, i <<= 1);
+		uxi <<= -ex + 1;
+	} else {
+		uxi &= -1ULL >> 12;
+		uxi |= 1ULL << 52;
 	}
-	if(iy >= -1022)
-		hy = 0x00100000|(0x000fffff&hy);
-	else {       /* subnormal y, shift y to normal */
-		n = -1022-iy;
-		if (n <= 31) {
-			hy = (hy<<n)|(ly>>(32-n));
-			ly <<= n;
-		} else {
-			hy = ly<<(n-32);
-			ly = 0;
-		}
+	if (!ey) {
+		for (i = uy.i<<12; i>>63 == 0; ey--, i <<= 1);
+		uy.i <<= -ey + 1;
+	} else {
+		uy.i &= -1ULL >> 12;
+		uy.i |= 1ULL << 52;
 	}
 
-	/* fix point fmod */
-	n = ix - iy;
-	while (n--) {
-		hz = hx-hy;
-		lz = lx-ly;
-		if (lx < ly)
-			hz -= 1;
-		if (hz < 0) {
-			hx = hx+hx+(lx>>31);
-			lx = lx+lx;
-		} else {
-			if ((hz|lz) == 0)   /* return sign(x)*0 */
-				return Zero[(uint32_t)sx>>31];
-			hx = hz+hz+(lz>>31);
-			lx = lz+lz;
+	/* x mod y */
+	for (; ex > ey; ex--) {
+		i = uxi - uy.i;
+		if (i >> 63 == 0) {
+			if (i == 0)
+				return 0*x;
+			uxi = i;
 		}
+		uxi <<= 1;
 	}
-	hz = hx-hy;
-	lz = lx-ly;
-	if (lx < ly)
-		hz -= 1;
-	if (hz >= 0) {
-		hx = hz;
-		lx = lz;
+	i = uxi - uy.i;
+	if (i >> 63 == 0) {
+		if (i == 0)
+			return 0*x;
+		uxi = i;
 	}
+	for (; uxi>>52 == 0; uxi <<= 1, ex--);
 
-	/* convert back to floating value and restore the sign */
-	if ((hx|lx) == 0)          /* return sign(x)*0 */
-		return Zero[(uint32_t)sx>>31];
-	while (hx < 0x00100000) {  /* normalize x */
-		hx = hx+hx+(lx>>31);
-		lx = lx+lx;
-		iy -= 1;
-	}
-	if (iy >= -1022) {         /* normalize output */
-		hx = ((hx-0x00100000)|((iy+1023)<<20));
-		INSERT_WORDS(x, hx|sx, lx);
-	} else {                   /* subnormal output */
-		n = -1022 - iy;
-		if (n <= 20) {
-			lx = (lx>>n)|((uint32_t)hx<<(32-n));
-			hx >>= n;
-		} else if (n <= 31) {
-			lx = (hx<<(32-n))|(lx>>n);
-			hx = sx;
-		} else {
-			lx = hx>>(n-32); hx = sx;
-		}
-		INSERT_WORDS(x, hx|sx, lx);
+	/* scale result */
+	if (ex > 0) {
+		uxi -= 1ULL << 52;
+		uxi |= (uint64_t)ex << 52;
+	} else {
+		uxi >>= -ex + 1;
 	}
-	return x;  /* exact output */
+	uxi |= (uint64_t)sx << 63;
+	ux.i = uxi;
+	return ux.f;
 }
diff --git a/src/math/fmodf.c b/src/math/fmodf.c
index 837e9371..ff58f933 100644
--- a/src/math/fmodf.c
+++ b/src/math/fmodf.c
@@ -1,104 +1,65 @@
-/* origin: FreeBSD /usr/src/lib/msun/src/e_fmodf.c */
-/*
- * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com.
- */
-/*
- * ====================================================
- * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
- *
- * Developed at SunPro, a Sun Microsystems, Inc. business.
- * Permission to use, copy, modify, and distribute this
- * software is freely granted, provided that this notice
- * is preserved.
- * ====================================================
- */
-/*
- * fmodf(x,y)
- * Return x mod y in exact arithmetic
- * Method: shift and subtract
- */
-
-#include "libm.h"
-
-static const float Zero[] = {0.0, -0.0,};
+#include <math.h>
+#include <stdint.h>
 
 float fmodf(float x, float y)
 {
-	int32_t n,hx,hy,hz,ix,iy,sx,i;
+	union {float f; uint32_t i;} ux = {x}, uy = {y};
+	int ex = ux.i>>23 & 0xff;
+	int ey = uy.i>>23 & 0xff;
+	uint32_t sx = ux.i & 0x80000000;
+	uint32_t i;
+	uint32_t uxi = ux.i;
 
-	GET_FLOAT_WORD(hx, x);
-	GET_FLOAT_WORD(hy, y);
-	sx = hx & 0x80000000;  /* sign of x */
-	hx ^= sx;              /* |x| */
-	hy &= 0x7fffffff;      /* |y| */
-
-	/* purge off exception values */
-	if (hy == 0 || hx >= 0x7f800000 ||  /* y=0,or x not finite */
-	    hy > 0x7f800000)                /* or y is NaN */
+	if (uy.i<<1 == 0 || isnan(y) || ex == 0xff)
 		return (x*y)/(x*y);
-	if (hx < hy)                        /* |x| < |y| */
+	if (uxi<<1 <= uy.i<<1) {
+		if (uxi<<1 == uy.i<<1)
+			return 0*x;
 		return x;
-	if (hx == hy)                       /* |x| = |y|, return x*0 */
-		return Zero[(uint32_t)sx>>31];
-
-	/* determine ix = ilogb(x) */
-	if (hx < 0x00800000) {     /* subnormal x */
-		for (ix = -126, i = hx<<8; i > 0; i <<= 1)
-			ix -= 1;
-	} else
-		ix = (hx>>23) - 127;
-
-	/* determine iy = ilogb(y) */
-	if (hy < 0x00800000) {     /* subnormal y */
-		for (iy = -126, i = hy<<8; i >= 0; i <<= 1)
-			iy -= 1;
-	} else
-		iy = (hy>>23) - 127;
+	}
 
-	/* set up {hx,lx}, {hy,ly} and align y to x */
-	if (ix >= -126)
-		hx = 0x00800000|(0x007fffff&hx);
-	else {          /* subnormal x, shift x to normal */
-		n = -126-ix;
-		hx = hx<<n;
+	/* normalize x and y */
+	if (!ex) {
+		for (i = uxi<<9; i>>31 == 0; ex--, i <<= 1);
+		uxi <<= -ex + 1;
+	} else {
+		uxi &= -1U >> 9;
+		uxi |= 1U << 23;
 	}
-	if (iy >= -126)
-		hy = 0x00800000|(0x007fffff&hy);
-	else {          /* subnormal y, shift y to normal */
-		n = -126-iy;
-		hy = hy<<n;
+	if (!ey) {
+		for (i = uy.i<<9; i>>31 == 0; ey--, i <<= 1);
+		uy.i <<= -ey + 1;
+	} else {
+		uy.i &= -1U >> 9;
+		uy.i |= 1U << 23;
 	}
 
-	/* fix point fmod */
-	n = ix - iy;
-	while (n--) {
-		hz = hx-hy;
-		if (hz<0)
-			hx = hx+hx;
-		else {
-			if(hz == 0)   /* return sign(x)*0 */
-				return Zero[(uint32_t)sx>>31];
-			hx = hz+hz;
+	/* x mod y */
+	for (; ex > ey; ex--) {
+		i = uxi - uy.i;
+		if (i >> 31 == 0) {
+			if (i == 0)
+				return 0*x;
+			uxi = i;
 		}
+		uxi <<= 1;
 	}
-	hz = hx-hy;
-	if (hz >= 0)
-		hx = hz;
-
-	/* convert back to floating value and restore the sign */
-	if (hx == 0)               /* return sign(x)*0 */
-		return Zero[(uint32_t)sx>>31];
-	while (hx < 0x00800000) {  /* normalize x */
-		hx = hx+hx;
-		iy -= 1;
+	i = uxi - uy.i;
+	if (i >> 31 == 0) {
+		if (i == 0)
+			return 0*x;
+		uxi = i;
 	}
-	if (iy >= -126) {          /* normalize output */
-		hx = ((hx-0x00800000)|((iy+127)<<23));
-		SET_FLOAT_WORD(x, hx|sx);
-	} else {                   /* subnormal output */
-		n = -126 - iy;
-		hx >>= n;
-		SET_FLOAT_WORD(x, hx|sx);
+	for (; uxi>>23 == 0; uxi <<= 1, ex--);
+
+	/* scale result up */
+	if (ex > 0) {
+		uxi -= 1U << 23;
+		uxi |= (uint32_t)ex << 23;
+	} else {
+		uxi >>= -ex + 1;
 	}
-	return x;  /* exact output */
+	uxi |= sx;
+	ux.i = uxi;
+	return ux.f;
 }
diff --git a/src/math/fmodl.c b/src/math/fmodl.c
index b930c49d..54af6a3f 100644
--- a/src/math/fmodl.c
+++ b/src/math/fmodl.c
@@ -1,15 +1,3 @@
-/* origin: FreeBSD /usr/src/lib/msun/src/e_fmodl.c */
-/*-
- * ====================================================
- * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
- *
- * Developed at SunSoft, a Sun Microsystems, Inc. business.
- * Permission to use, copy, modify, and distribute this
- * software is freely granted, provided that this notice
- * is preserved.
- * ====================================================
- */
-
 #include "libm.h"
 
 #if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
@@ -18,141 +6,100 @@ long double fmodl(long double x, long double y)
 	return fmod(x, y);
 }
 #elif (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384
-
-#define BIAS (LDBL_MAX_EXP - 1)
-
-#if LDBL_MANL_SIZE > 32
-typedef uint64_t manl_t;
-#else
-typedef uint32_t manl_t;
-#endif
-
-#if LDBL_MANH_SIZE > 32
-typedef uint64_t manh_t;
-#else
-typedef uint32_t manh_t;
-#endif
-
-/*
- * These macros add and remove an explicit integer bit in front of the
- * fractional mantissa, if the architecture doesn't have such a bit by
- * default already.
- */
-#ifdef LDBL_IMPLICIT_NBIT
-#define SET_NBIT(hx)    ((hx) | (1ULL << LDBL_MANH_SIZE))
-#define HFRAC_BITS      LDBL_MANH_SIZE
-#else
-#define SET_NBIT(hx)    (hx)
-#define HFRAC_BITS      (LDBL_MANH_SIZE - 1)
-#endif
-
-#define MANL_SHIFT      (LDBL_MANL_SIZE - 1)
-
-static const long double Zero[] = {0.0, -0.0,};
-
-/*
- * fmodl(x,y)
- * Return x mod y in exact arithmetic
- * Method: shift and subtract
- *
- * Assumptions:
- * - The low part of the mantissa fits in a manl_t exactly.
- * - The high part of the mantissa fits in an int64_t with enough room
- *   for an explicit integer bit in front of the fractional bits.
- */
 long double fmodl(long double x, long double y)
 {
-	union IEEEl2bits ux, uy;
-	int64_t hx,hz;  /* We need a carry bit even if LDBL_MANH_SIZE is 32. */
-	manh_t hy;
-	manl_t lx,ly,lz;
-	int ix,iy,n,sx;
-
-	ux.e = x;
-	uy.e = y;
-	sx = ux.bits.sign;
+	union ldshape ux = {x}, uy = {y};
+	int ex = ux.i.se & 0x7fff;
+	int ey = uy.i.se & 0x7fff;
+	int sx = ux.i.se & 0x8000;
 
-	/* purge off exception values */
-	if ((uy.bits.exp|uy.bits.manh|uy.bits.manl) == 0 || /* y=0 */
-	    ux.bits.exp == BIAS + LDBL_MAX_EXP ||           /* or x not finite */
-	    (uy.bits.exp == BIAS + LDBL_MAX_EXP &&
-	     ((uy.bits.manh&~LDBL_NBIT)|uy.bits.manl) != 0)) /* or y is NaN */
+	if (y == 0 || isnan(y) || ex == 0x7fff)
 		return (x*y)/(x*y);
-	if (ux.bits.exp <= uy.bits.exp) {
-		if (ux.bits.exp < uy.bits.exp ||
-		    (ux.bits.manh<=uy.bits.manh &&
-		     (ux.bits.manh<uy.bits.manh ||
-		      ux.bits.manl<uy.bits.manl)))  /* |x|<|y| return x or x-y */
-			return x;
-		if (ux.bits.manh==uy.bits.manh && ux.bits.manl==uy.bits.manl)
-			return Zero[sx];  /* |x| = |y| return x*0 */
+	ux.i.se = ex;
+	uy.i.se = ey;
+	if (ux.f <= uy.f) {
+		if (ux.f == uy.f)
+			return 0*x;
+		return x;
 	}
 
-	/* determine ix = ilogb(x) */
-	if (ux.bits.exp == 0) {  /* subnormal x */
-		ux.e *= 0x1.0p512;
-		ix = ux.bits.exp - (BIAS + 512);
-	} else {
-		ix = ux.bits.exp - BIAS;
+	/* normalize x and y */
+	if (!ex) {
+		ux.f *= 0x1p120f;
+		ex = ux.i.se - 120;
 	}
-
-	/* determine iy = ilogb(y) */
-	if (uy.bits.exp == 0) {  /* subnormal y */
-		uy.e *= 0x1.0p512;
-		iy = uy.bits.exp - (BIAS + 512);
-	} else {
-		iy = uy.bits.exp - BIAS;
+	if (!ey) {
+		uy.f *= 0x1p120f;
+		ey = uy.i.se - 120;
 	}
 
-	/* set up {hx,lx}, {hy,ly} and align y to x */
-	hx = SET_NBIT(ux.bits.manh);
-	hy = SET_NBIT(uy.bits.manh);
-	lx = ux.bits.manl;
-	ly = uy.bits.manl;
-
-	/* fix point fmod */
-	n = ix - iy;
-
-	while (n--) {
-		hz = hx-hy;
-		lz = lx-ly;
-		if (lx < ly)
-			hz -= 1;
-		if (hz < 0) {
-			hx = hx+hx+(lx>>MANL_SHIFT);
-			lx = lx+lx;
+	/* x mod y */
+#if LDBL_MANT_DIG == 64
+	uint64_t i, mx, my;
+	mx = ux.i.m;
+	my = uy.i.m;
+	for (; ex > ey; ex--) {
+		i = mx - my;
+		if (mx >= my) {
+			if (i == 0)
+				return 0*x;
+			mx = 2*i;
+		} else if (2*mx < mx) {
+			mx = 2*mx - my;
 		} else {
-			if ((hz|lz)==0)   /* return sign(x)*0 */
-				return Zero[sx];
-			hx = hz+hz+(lz>>MANL_SHIFT);
-			lx = lz+lz;
+			mx = 2*mx;
 		}
 	}
-	hz = hx-hy;
-	lz = lx-ly;
-	if (lx < ly)
-		hz -= 1;
-	if (hz >= 0) {
-		hx = hz;
-		lx = lz;
+	i = mx - my;
+	if (mx >= my) {
+		if (i == 0)
+			return 0*x;
+		mx = i;
 	}
-
-	/* convert back to floating value and restore the sign */
-	if ((hx|lx) == 0)   /* return sign(x)*0 */
-		return Zero[sx];
-	while (hx < (1ULL<<HFRAC_BITS)) {  /* normalize x */
-		hx = hx+hx+(lx>>MANL_SHIFT);
-		lx = lx+lx;
-		iy -= 1;
+	for (; mx >> 63 == 0; mx *= 2, ex--);
+	ux.i.m = mx;
+#elif LDBL_MANT_DIG == 113
+	uint64_t hi, lo, xhi, xlo, yhi, ylo;
+	xhi = (ux.i2.hi & -1ULL>>16) | 1ULL<<48;
+	yhi = (uy.i2.hi & -1ULL>>16) | 1ULL<<48;
+	xlo = ux.i2.lo;
+	ylo = ux.i2.lo;
+	for (; ex > ey; ex--) {
+		hi = xhi - yhi;
+		lo = xlo - ylo;
+		if (xlo < ylo)
+			hi -= 1;
+		if (hi >> 63 == 0) {
+			if ((hi|lo) == 0)
+				return 0*x;
+			xhi = 2*hi + (lo>>63);
+			xlo = 2*lo;
+		} else {
+			xhi = 2*xhi + (xlo>>63);
+			xlo = 2*xlo;
+		}
 	}
-	ux.bits.manh = hx; /* The mantissa is truncated here if needed. */
-	ux.bits.manl = lx;
-	if (iy < LDBL_MIN_EXP) {
-		ux.bits.exp = iy + (BIAS + 512);
-		ux.e *= 0x1p-512;
-	} else {
-		ux.bits.exp = iy + BIAS;
+	hi = xhi - yhi;
+	lo = xlo - ylo;
+	if (xlo < ylo)
+		hi -= 1;
+	if (hi >> 63 == 0) {
+		if ((hi|lo) == 0)
+			return 0*x;
+		xhi = hi;
+		xlo = lo;
 	}
-	return ux.e;       /* exact output */
+	for (; xhi >> 48 == 0; xhi = 2*xhi + (xlo>>63), xlo = 2*xlo, ex--);
+	ux.i2.hi = xhi;
+	ux.i2.lo = xlo;
+#endif
+
+	/* scale result */
+	if (ex <= 0) {
+		ux.i.se = (ex+120)|sx;
+		ux.f *= 0x1p-120f;
+	} else
+		ux.i.se = ex|sx;
+	return ux.f;
 }
 #endif
diff --git a/src/math/modf.c b/src/math/modf.c
index 8f337ef0..1c8a1db9 100644
--- a/src/math/modf.c
+++ b/src/math/modf.c
@@ -2,36 +2,33 @@
 
 double modf(double x, double *iptr)
 {
-	union {double x; uint64_t n;} u = {x};
+	union {double f; uint64_t i;} u = {x};
 	uint64_t mask;
-	int e;
-
-	e = (int)(u.n>>52 & 0x7ff) - 0x3ff;
+	int e = (int)(u.i>>52 & 0x7ff) - 0x3ff;
 
 	/* no fractional part */
 	if (e >= 52) {
 		*iptr = x;
-		if (e == 0x400 && u.n<<12 != 0) /* nan */
+		if (e == 0x400 && u.i<<12 != 0) /* nan */
 			return x;
-		u.n &= (uint64_t)1<<63;
-		return u.x;
+		u.i &= 1ULL<<63;
+		return u.f;
 	}
 
 	/* no integral part*/
 	if (e < 0) {
-		u.n &= (uint64_t)1<<63;
-		*iptr = u.x;
+		u.i &= 1ULL<<63;
+		*iptr = u.f;
 		return x;
 	}
 
-	mask = (uint64_t)-1>>12 >> e;
-	if ((u.n & mask) == 0) {
+	mask = -1ULL>>12>>e;
+	if ((u.i & mask) == 0) {
 		*iptr = x;
-		u.n &= (uint64_t)1<<63;
-		return u.x;
+		u.i &= 1ULL<<63;
+		return u.f;
 	}
-	u.n &= ~mask;
-	*iptr = u.x;
-	STRICT_ASSIGN(double, x, x - *iptr);
-	return x;
+	u.i &= ~mask;
+	*iptr = u.f;
+	return x - u.f;
 }
diff --git a/src/math/modff.c b/src/math/modff.c
index 90caf316..639514ef 100644
--- a/src/math/modff.c
+++ b/src/math/modff.c
@@ -2,36 +2,33 @@
 
 float modff(float x, float *iptr)
 {
-	union {float x; uint32_t n;} u = {x};
+	union {float f; uint32_t i;} u = {x};
 	uint32_t mask;
-	int e;
-
-	e = (int)(u.n>>23 & 0xff) - 0x7f;
+	int e = (int)(u.i>>23 & 0xff) - 0x7f;
 
 	/* no fractional part */
 	if (e >= 23) {
 		*iptr = x;
-		if (e == 0x80 && u.n<<9 != 0) { /* nan */
+		if (e == 0x80 && u.i<<9 != 0) { /* nan */
 			return x;
 		}
-		u.n &= 0x80000000;
-		return u.x;
+		u.i &= 0x80000000;
+		return u.f;
 	}
 	/* no integral part */
 	if (e < 0) {
-		u.n &= 0x80000000;
-		*iptr = u.x;
+		u.i &= 0x80000000;
+		*iptr = u.f;
 		return x;
 	}
 
 	mask = 0x007fffff>>e;
-	if ((u.n & mask) == 0) {
+	if ((u.i & mask) == 0) {
 		*iptr = x;
-		u.n &= 0x80000000;
-		return u.x;
+		u.i &= 0x80000000;
+		return u.f;
 	}
-	u.n &= ~mask;
-	*iptr = u.x;
-	STRICT_ASSIGN(float, x, x - *iptr);
-	return x;
+	u.i &= ~mask;
+	*iptr = u.f;
+	return x - u.f;
 }
diff --git a/src/math/modfl.c b/src/math/modfl.c
index bbfcdb8a..fc85bb58 100644
--- a/src/math/modfl.c
+++ b/src/math/modfl.c
@@ -1,40 +1,3 @@
-/* origin: FreeBSD /usr/src/lib/msun/src/s_modfl.c */
-/*-
- * Copyright (c) 2007 David Schultz <das@FreeBSD.ORG>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * Derived from s_modf.c, which has the following Copyright:
- * ====================================================
- * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
- *
- * Developed at SunPro, a Sun Microsystems, Inc. business.
- * Permission to use, copy, modify, and distribute this
- * software is freely granted, provided that this notice
- * is preserved.
- * ====================================================
- */
-
 #include "libm.h"
 
 #if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
@@ -43,58 +6,46 @@ long double modfl(long double x, long double *iptr)
 	return modf(x, (double *)iptr);
 }
 #elif (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384
-
-#if LDBL_MANL_SIZE > 32
-#define MASK    ((uint64_t)-1)
-#else
-#define MASK    ((uint32_t)-1)
+#if LDBL_MANT_DIG == 64
+#define TOINT 0x1p63
+#elif LDBL_MANT_DIG == 113
+#define TOINT 0x1p112
 #endif
-/* Return the last n bits of a word, representing the fractional part. */
-#define GETFRAC(bits, n)        ((bits) & ~(MASK << (n)))
-/* The number of fraction bits in manh, not counting the integer bit */
-#define HIBITS  (LDBL_MANT_DIG - LDBL_MANL_SIZE)
-
-static const long double zero[] = { 0.0, -0.0 };
-
 long double modfl(long double x, long double *iptr)
 {
-	union IEEEl2bits ux;
-	int e;
+	union ldshape u = {x};
+	uint64_t mask;
+	int e = (u.i.se & 0x7fff) - 0x3fff;
+	int s = u.i.se >> 15;
+	long double absx;
+	long double y;
 
-	ux.e = x;
-	e = ux.bits.exp - LDBL_MAX_EXP + 1;
-	if (e < HIBITS) {                       /* Integer part is in manh. */
-		if (e < 0) {                    /* |x|<1 */
-			*iptr = zero[ux.bits.sign];
-			return x;
-		}
-		if ((GETFRAC(ux.bits.manh, HIBITS - 1 - e)|ux.bits.manl) == 0) {
-			/* x is an integer. */
-			*iptr = x;
-			return zero[ux.bits.sign];
-		}
-		/* Clear all but the top e+1 bits. */
-		ux.bits.manh >>= HIBITS - 1 - e;
-		ux.bits.manh <<= HIBITS - 1 - e;
-		ux.bits.manl = 0;
-		*iptr = ux.e;
-		return x - ux.e;
-	} else if (e >= LDBL_MANT_DIG - 1) {    /* x has no fraction part. */
+	/* no fractional part */
+	if (e >= LDBL_MANT_DIG-1) {
 		*iptr = x;
-		if (e == LDBL_MAX_EXP && ((ux.bits.manh&~LDBL_NBIT)|ux.bits.manl)) /* nan */
+		if (isnan(x))
 			return x;
-		return zero[ux.bits.sign];
-	} else {                                /* Fraction part is in manl. */
-		if (GETFRAC(ux.bits.manl, LDBL_MANT_DIG - 1 - e) == 0) {
-			/* x is integral. */
-			*iptr = x;
-			return zero[ux.bits.sign];
-		}
-		/* Clear all but the top e+1 bits. */
-		ux.bits.manl >>= LDBL_MANT_DIG - 1 - e;
-		ux.bits.manl <<= LDBL_MANT_DIG - 1 - e;
-		*iptr = ux.e;
-		return x - ux.e;
+		return s ? -0.0 : 0.0;
+	}
+
+	/* no integral part*/
+	if (e < 0) {
+		*iptr = s ? -0.0 : 0.0;
+		return x;
+	}
+
+	/* raises spurious inexact */
+	absx = s ? -x : x;
+	y = absx + TOINT - TOINT - absx;
+	if (y == 0) {
+		*iptr = x;
+		return s ? -0.0 : 0.0;
 	}
+	if (y > 0)
+		y -= 1;
+	if (s)
+		y = -y;
+	*iptr = x + y;
+	return -y;
 }
 #endif
diff --git a/src/math/remainder.c b/src/math/remainder.c
index 2dede3a8..ed5c477e 100644
--- a/src/math/remainder.c
+++ b/src/math/remainder.c
@@ -1,66 +1,7 @@
-/* origin: FreeBSD /usr/src/lib/msun/src/e_remainder.c */
-/*
- * ====================================================
- * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
- *
- * Developed at SunSoft, a Sun Microsystems, Inc. business.
- * Permission to use, copy, modify, and distribute this
- * software is freely granted, provided that this notice
- * is preserved.
- * ====================================================
- */
-/* remainder(x,p)
- * Return :
- *      returns  x REM p  =  x - [x/p]*p as if in infinite
- *      precise arithmetic, where [x/p] is the (infinite bit)
- *      integer nearest x/p (in half way case choose the even one).
- * Method :
- *      Based on fmod() return x-[x/p]chopped*p exactlp.
- */
+#include <math.h>
 
-#include "libm.h"
-
-double remainder(double x, double p)
+double remainder(double x, double y)
 {
-	int32_t hx,hp;
-	uint32_t sx,lx,lp;
-	double p_half;
-
-	EXTRACT_WORDS(hx, lx, x);
-	EXTRACT_WORDS(hp, lp, p);
-	sx = hx & 0x80000000;
-	hp &= 0x7fffffff;
-	hx &= 0x7fffffff;
-
-	/* purge off exception values */
-	if ((hp|lp) == 0 ||                                  /* p = 0 */
-	    hx >= 0x7ff00000 ||                              /* x not finite */
-	    (hp >= 0x7ff00000 && (hp-0x7ff00000 | lp) != 0)) /* p is NaN */
-		return (x*p)/(x*p);
-
-	if (hp <= 0x7fdfffff)
-		x = fmod(x, p+p);  /* now x < 2p */
-	if (((hx-hp)|(lx-lp)) == 0)
-		return 0.0*x;
-	x = fabs(x);
-	p = fabs(p);
-	if (hp < 0x00200000) {
-		if (x + x > p) {
-			x -= p;
-			if (x + x >= p)
-				x -= p;
-		}
-	} else {
-		p_half = 0.5*p;
-		if (x > p_half) {
-			x -= p;
-			if (x >= p_half)
-				x -= p;
-		}
-	}
-	GET_HIGH_WORD(hx, x);
-	if ((hx&0x7fffffff) == 0)
-		hx = 0;
-	SET_HIGH_WORD(x, hx^sx);
-	return x;
+	int q;
+	return remquo(x, y, &q);
 }
diff --git a/src/math/remainderf.c b/src/math/remainderf.c
index 77671039..b418bbff 100644
--- a/src/math/remainderf.c
+++ b/src/math/remainderf.c
@@ -1,59 +1,7 @@
-/* origin: FreeBSD /usr/src/lib/msun/src/e_remainderf.c */
-/*
- * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com.
- */
-/*
- * ====================================================
- * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
- *
- * Developed at SunPro, a Sun Microsystems, Inc. business.
- * Permission to use, copy, modify, and distribute this
- * software is freely granted, provided that this notice
- * is preserved.
- * ====================================================
- */
+#include <math.h>
 
-#include "libm.h"
-
-float remainderf(float x, float p)
+float remainderf(float x, float y)
 {
-	int32_t hx,hp;
-	uint32_t sx;
-	float p_half;
-
-	GET_FLOAT_WORD(hx, x);
-	GET_FLOAT_WORD(hp, p);
-	sx = hx & 0x80000000;
-	hp &= 0x7fffffff;
-	hx &= 0x7fffffff;
-
-	/* purge off exception values */
-	if (hp == 0 || hx >= 0x7f800000 || hp > 0x7f800000)  /* p = 0, x not finite, p is NaN */
-		return (x*p)/(x*p);
-
-	if (hp <= 0x7effffff)
-		x = fmodf(x, p + p);  /* now x < 2p */
-	if (hx - hp == 0)
-		return 0.0f*x;
-	x = fabsf(x);
-	p = fabsf(p);
-	if (hp < 0x01000000) {
-		if (x + x > p) {
-			x -= p;
-			if (x + x >= p)
-				x -= p;
-		}
-	} else {
-		p_half = 0.5f*p;
-		if (x > p_half) {
-			x -= p;
-			if (x >= p_half)
-				x -= p;
-		}
-	}
-	GET_FLOAT_WORD(hx, x);
-	if ((hx & 0x7fffffff) == 0)
-		hx = 0;
-	SET_FLOAT_WORD(x, hx ^ sx);
-	return x;
+	int q;
+	return remquof(x, y, &q);
 }
diff --git a/src/math/remquo.c b/src/math/remquo.c
index 085466e6..59d5ad57 100644
--- a/src/math/remquo.c
+++ b/src/math/remquo.c
@@ -1,171 +1,82 @@
-/* origin: FreeBSD /usr/src/lib/msun/src/s_remquo.c */
-/*-
- * ====================================================
- * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
- *
- * Developed at SunSoft, a Sun Microsystems, Inc. business.
- * Permission to use, copy, modify, and distribute this
- * software is freely granted, provided that this notice
- * is preserved.
- * ====================================================
- */
-/*
- * Return the IEEE remainder and set *quo to the last n bits of the
- * quotient, rounded to the nearest integer.  We choose n=31 because
- * we wind up computing all the integer bits of the quotient anyway as
- * a side-effect of computing the remainder by the shift and subtract
- * method.  In practice, this is far more bits than are needed to use
- * remquo in reduction algorithms.
- */
-
-#include "libm.h"
-
-static const double Zero[] = {0.0, -0.0,};
+#include <math.h>
+#include <stdint.h>
 
 double remquo(double x, double y, int *quo)
 {
-	int32_t n,hx,hy,hz,ix,iy,sx,i;
-	uint32_t lx,ly,lz,q,sxy;
-
-	EXTRACT_WORDS(hx, lx, x);
-	EXTRACT_WORDS(hy, ly, y);
-	sxy = (hx ^ hy) & 0x80000000;
-	sx = hx & 0x80000000;   /* sign of x */
-	hx ^= sx;               /* |x| */
-	hy &= 0x7fffffff;       /* |y| */
+	union {double f; uint64_t i;} ux = {x}, uy = {y};
+	int ex = ux.i>>52 & 0x7ff;
+	int ey = uy.i>>52 & 0x7ff;
+	int sx = ux.i>>63;
+	int sy = uy.i>>63;
+	uint32_t q;
+	uint64_t i;
+	uint64_t uxi = ux.i;
 
-	/* purge off exception values */
-	if ((hy|ly) == 0 || hx >= 0x7ff00000 ||  /* y = 0, or x not finite */
-	    (hy|((ly|-ly)>>31)) > 0x7ff00000)    /* or y is NaN */
+	*quo = 0;
+	if (uy.i<<1 == 0 || isnan(y) || ex == 0x7ff)
 		return (x*y)/(x*y);
-	if (hx <= hy) {
-		if (hx < hy || lx < ly) {  /* |x| < |y| return x or x-y */
-			q = 0;
-			goto fixup;
-		}
-		if (lx == ly) {            /* |x| = |y| return x*0 */
-			*quo = sxy ? -1 : 1;
-			return Zero[(uint32_t)sx>>31];
-		}
-	}
+	if (ux.i<<1 == 0)
+		return x;
 
-	// FIXME: use ilogb?
-
-	/* determine ix = ilogb(x) */
-	if (hx < 0x00100000) {  /* subnormal x */
-		if (hx == 0) {
-			for (ix = -1043, i=lx; i>0; i<<=1) ix--;
-		} else {
-			for (ix = -1022, i=hx<<11; i>0; i<<=1) ix--;
-		}
-	} else
-		ix = (hx>>20) - 1023;
-
-	/* determine iy = ilogb(y) */
-	if (hy < 0x00100000) {  /* subnormal y */
-		if (hy == 0) {
-			for (iy = -1043, i=ly; i>0; i<<=1) iy--;
-		} else {
-			for (iy = -1022, i=hy<<11; i>0; i<<=1) iy--;
-		}
-	} else
-		iy = (hy>>20) - 1023;
-
-	/* set up {hx,lx}, {hy,ly} and align y to x */
-	if (ix >= -1022)
-		hx = 0x00100000|(0x000fffff&hx);
-	else {  /* subnormal x, shift x to normal */
-		n = -1022 - ix;
-		if (n <= 31) {
-			hx = (hx<<n)|(lx>>(32-n));
-			lx <<= n;
-		} else {
-			hx = lx<<(n-32);
-			lx = 0;
-		}
+	/* normalize x and y */
+	if (!ex) {
+		for (i = uxi<<12; i>>63 == 0; ex--, i <<= 1);
+		uxi <<= -ex + 1;
+	} else {
+		uxi &= -1ULL >> 12;
+		uxi |= 1ULL << 52;
 	}
-	if (iy >= -1022)
-		hy = 0x00100000|(0x000fffff&hy);
-	else {  /* subnormal y, shift y to normal */
-		n = -1022 - iy;
-		if (n <= 31) {
-			hy = (hy<<n)|(ly>>(32-n));
-			ly <<= n;
-		} else {
-			hy = ly<<(n-32);
-			ly = 0;
-		}
+	if (!ey) {
+		for (i = uy.i<<12; i>>63 == 0; ey--, i <<= 1);
+		uy.i <<= -ey + 1;
+	} else {
+		uy.i &= -1ULL >> 12;
+		uy.i |= 1ULL << 52;
 	}
 
-	/* fix point fmod */
-	n = ix - iy;
 	q = 0;
-	while (n--) {
-		hz = hx - hy;
-		lz = lx - ly;
-		if (lx < ly)
-			hz--;
-		if (hz < 0) {
-			hx = hx + hx + (lx>>31);
-			lx = lx + lx;
-		} else {
-			hx = hz + hz + (lz>>31);
-			lx = lz + lz;
+	if (ex < ey) {
+		if (ex+1 == ey)
+			goto end;
+		return x;
+	}
+
+	/* x mod y */
+	for (; ex > ey; ex--) {
+		i = uxi - uy.i;
+		if (i >> 63 == 0) {
+			uxi = i;
 			q++;
 		}
+		uxi <<= 1;
 		q <<= 1;
 	}
-	hz = hx - hy;
-	lz = lx - ly;
-	if (lx < ly)
-		hz--;
-	if (hz >= 0) {
-		hx = hz;
-		lx = lz;
+	i = uxi - uy.i;
+	if (i >> 63 == 0) {
+		uxi = i;
 		q++;
 	}
-
-	/* convert back to floating value and restore the sign */
-	if ((hx|lx) == 0) {  /* return sign(x)*0 */
-		q &= 0x7fffffff;
-		*quo = sxy ? -q : q;
-		return Zero[(uint32_t)sx>>31];
-	}
-	while (hx < 0x00100000) {  /* normalize x */
-		hx = hx + hx + (lx>>31);
-		lx = lx + lx;
-		iy--;
+	if (uxi == 0)
+		ex = -60;
+	else
+		for (; uxi>>52 == 0; uxi <<= 1, ex--);
+end:
+	/* scale result and decide between |x| and |x|-|y| */
+	if (ex > 0) {
+		uxi -= 1ULL << 52;
+		uxi |= (uint64_t)ex << 52;
+	} else {
+		uxi >>= -ex + 1;
 	}
-	if (iy >= -1022) {         /* normalize output */
-		hx = (hx-0x00100000)|((iy+1023)<<20);
-	} else {                   /* subnormal output */
-		n = -1022 - iy;
-		if (n <= 20) {
-			lx = (lx>>n)|((uint32_t)hx<<(32-n));
-			hx >>= n;
-		} else if (n <= 31) {
-			lx = (hx<<(32-n))|(lx>>n);
-			hx = 0;
-		} else {
-			lx = hx>>(n-32);
-			hx = 0;
-		}
-	}
-fixup:
-	INSERT_WORDS(x, hx, lx);
-	y = fabs(y);
-	if (y < 0x1p-1021) {
-		if (x + x > y || (x + x == y && (q & 1))) {
-			q++;
-			x -= y;
-		}
-	} else if (x > 0.5*y || (x == 0.5*y && (q & 1))) {
-		q++;
+	ux.i = uxi;
+	x = ux.f;
+	if (sy)
+		y = -y;
+	if (ex == ey || (ex+1 == ey && (2*x > y || (2*x == y && q%2)))) {
 		x -= y;
+		q++;
 	}
-	GET_HIGH_WORD(hx, x);
-	SET_HIGH_WORD(x, hx ^ sx);
 	q &= 0x7fffffff;
-	*quo = sxy ? -q : q;
-	return x;
+	*quo = sx^sy ? -(int)q : (int)q;
+	return sx ? -x : x;
 }
diff --git a/src/math/remquof.c b/src/math/remquof.c
index 536a050a..2f41ff70 100644
--- a/src/math/remquof.c
+++ b/src/math/remquof.c
@@ -1,126 +1,82 @@
-/* origin: FreeBSD /usr/src/lib/msun/src/s_remquof.c */
-/*-
- * ====================================================
- * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
- *
- * Developed at SunSoft, a Sun Microsystems, Inc. business.
- * Permission to use, copy, modify, and distribute this
- * software is freely granted, provided that this notice
- * is preserved.
- * ====================================================
- */
-/*
- * Return the IEEE remainder and set *quo to the last n bits of the
- * quotient, rounded to the nearest integer.  We choose n=31 because
- * we wind up computing all the integer bits of the quotient anyway as
- * a side-effect of computing the remainder by the shift and subtract
- * method.  In practice, this is far more bits than are needed to use
- * remquo in reduction algorithms.
- */
-
-#include "libm.h"
-
-static const float Zero[] = {0.0, -0.0,};
+#include <math.h>
+#include <stdint.h>
 
 float remquof(float x, float y, int *quo)
 {
-	int32_t n,hx,hy,hz,ix,iy,sx,i;
-	uint32_t q,sxy;
+	union {float f; uint32_t i;} ux = {x}, uy = {y};
+	int ex = ux.i>>23 & 0xff;
+	int ey = uy.i>>23 & 0xff;
+	int sx = ux.i>>31;
+	int sy = uy.i>>31;
+	uint32_t q;
+	uint32_t i;
+	uint32_t uxi = ux.i;
 
-	GET_FLOAT_WORD(hx, x);
-	GET_FLOAT_WORD(hy, y);
-	sxy = (hx ^ hy) & 0x80000000;
-	sx = hx & 0x80000000;   /* sign of x */
-	hx ^= sx;               /* |x| */
-	hy &= 0x7fffffff;       /* |y| */
-
-	/* purge off exception values */
-	if (hy == 0 || hx >= 0x7f800000 || hy > 0x7f800000) /* y=0,NaN;or x not finite */
+	*quo = 0;
+	if (uy.i<<1 == 0 || isnan(y) || ex == 0xff)
 		return (x*y)/(x*y);
-	if (hx < hy) {       /* |x| < |y| return x or x-y */
-		q = 0;
-		goto fixup;
-	} else if(hx==hy) {  /* |x| = |y| return x*0*/
-		*quo = sxy ? -1 : 1;
-		return Zero[(uint32_t)sx>>31];
-	}
+	if (ux.i<<1 == 0)
+		return x;
 
-	/* determine ix = ilogb(x) */
-	if (hx < 0x00800000) {  /* subnormal x */
-		for (ix = -126, i=hx<<8; i>0; i<<=1) ix--;
-	} else
-		ix = (hx>>23) - 127;
-
-	/* determine iy = ilogb(y) */
-	if (hy < 0x00800000) {  /* subnormal y */
-		for (iy = -126, i=hy<<8; i>0; i<<=1) iy--;
-	} else
-		iy = (hy>>23) - 127;
-
-	/* set up {hx,lx}, {hy,ly} and align y to x */
-	if (ix >= -126)
-		hx = 0x00800000|(0x007fffff&hx);
-	else {  /* subnormal x, shift x to normal */
-		n = -126 - ix;
-		hx <<= n;
+	/* normalize x and y */
+	if (!ex) {
+		for (i = uxi<<9; i>>31 == 0; ex--, i <<= 1);
+		uxi <<= -ex + 1;
+	} else {
+		uxi &= -1U >> 9;
+		uxi |= 1U << 23;
 	}
-	if (iy >= -126)
-		hy = 0x00800000|(0x007fffff&hy);
-	else {  /* subnormal y, shift y to normal */
-		n = -126 - iy;
-		hy <<= n;
+	if (!ey) {
+		for (i = uy.i<<9; i>>31 == 0; ey--, i <<= 1);
+		uy.i <<= -ey + 1;
+	} else {
+		uy.i &= -1U >> 9;
+		uy.i |= 1U << 23;
 	}
 
-	/* fix point fmod */
-	n = ix - iy;
 	q = 0;
-	while (n--) {
-		hz = hx - hy;
-		if (hz < 0)
-			hx = hx << 1;
-		else {
-			hx = hz << 1;
+	if (ex < ey) {
+		if (ex+1 == ey)
+			goto end;
+		return x;
+	}
+
+	/* x mod y */
+	for (; ex > ey; ex--) {
+		i = uxi - uy.i;
+		if (i >> 31 == 0) {
+			uxi = i;
 			q++;
 		}
+		uxi <<= 1;
 		q <<= 1;
 	}
-	hz = hx - hy;
-	if (hz >= 0) {
-		hx = hz;
+	i = uxi - uy.i;
+	if (i >> 31 == 0) {
+		uxi = i;
 		q++;
 	}
-
-	/* convert back to floating value and restore the sign */
-	if (hx == 0) {                             /* return sign(x)*0 */
-		q &= 0x7fffffff;
-		*quo = sxy ? -q : q;
-		return Zero[(uint32_t)sx>>31];
-	}
-	while (hx < 0x00800000) {  /* normalize x */
-		hx <<= 1;
-		iy--;
+	if (uxi == 0)
+		ex = -30;
+	else
+		for (; uxi>>23 == 0; uxi <<= 1, ex--);
+end:
+	/* scale result and decide between |x| and |x|-|y| */
+	if (ex > 0) {
+		uxi -= 1U << 23;
+		uxi |= (uint32_t)ex << 23;
+	} else {
+		uxi >>= -ex + 1;
 	}
-	if (iy >= -126) {          /* normalize output */
-		hx = (hx-0x00800000)|((iy+127)<<23);
-	} else {                   /* subnormal output */
-		n = -126 - iy;
-		hx >>= n;
-	}
-fixup:
-	SET_FLOAT_WORD(x,hx);
-	y = fabsf(y);
-	if (y < 0x1p-125f) {
-		if (x + x > y || (x + x == y && (q & 1))) {
-			q++;
-			x -= y;
-		}
-	} else if (x > 0.5f*y || (x == 0.5f*y && (q & 1))) {
-		q++;
+	ux.i = uxi;
+	x = ux.f;
+	if (sy)
+		y = -y;
+	if (ex == ey || (ex+1 == ey && (2*x > y || (2*x == y && q%2)))) {
 		x -= y;
+		q++;
 	}
-	GET_FLOAT_WORD(hx, x);
-	SET_FLOAT_WORD(x, hx ^ sx);
 	q &= 0x7fffffff;
-	*quo = sxy ? -q : q;
-	return x;
+	*quo = sx^sy ? -(int)q : (int)q;
+	return sx ? -x : x;
 }
diff --git a/src/math/remquol.c b/src/math/remquol.c
index a2e11728..9b065c00 100644
--- a/src/math/remquol.c
+++ b/src/math/remquol.c
@@ -1,15 +1,3 @@
-/* origin: FreeBSD /usr/src/lib/msun/src/s_remquol.c */
-/*-
- * ====================================================
- * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
- *
- * Developed at SunSoft, a Sun Microsystems, Inc. business.
- * Permission to use, copy, modify, and distribute this
- * software is freely granted, provided that this notice
- * is preserved.
- * ====================================================
- */
-
 #include "libm.h"
 
 #if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
@@ -18,177 +6,119 @@ long double remquol(long double x, long double y, int *quo)
 	return remquo(x, y, quo);
 }
 #elif (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384
-
-#define BIAS (LDBL_MAX_EXP - 1)
-
-#if LDBL_MANL_SIZE > 32
-typedef uint64_t manl_t;
-#else
-typedef uint32_t manl_t;
-#endif
-
-#if LDBL_MANH_SIZE > 32
-typedef uint64_t manh_t;
-#else
-typedef uint32_t manh_t;
-#endif
-
-/*
- * These macros add and remove an explicit integer bit in front of the
- * fractional mantissa, if the architecture doesn't have such a bit by
- * default already.
- */
-#ifdef LDBL_IMPLICIT_NBIT
-#define SET_NBIT(hx)    ((hx) | (1ULL << LDBL_MANH_SIZE))
-#define HFRAC_BITS      LDBL_MANH_SIZE
-#else
-#define SET_NBIT(hx)    (hx)
-#define HFRAC_BITS      (LDBL_MANH_SIZE - 1)
-#endif
-
-#define MANL_SHIFT      (LDBL_MANL_SIZE - 1)
-
-static const long double Zero[] = {0.0, -0.0};
-
-/*
- * Return the IEEE remainder and set *quo to the last n bits of the
- * quotient, rounded to the nearest integer.  We choose n=31 because
- * we wind up computing all the integer bits of the quotient anyway as
- * a side-effect of computing the remainder by the shift and subtract
- * method.  In practice, this is far more bits than are needed to use
- * remquo in reduction algorithms.
- *
- * Assumptions:
- * - The low part of the mantissa fits in a manl_t exactly.
- * - The high part of the mantissa fits in an int64_t with enough room
- *   for an explicit integer bit in front of the fractional bits.
- */
 long double remquol(long double x, long double y, int *quo)
 {
-	union IEEEl2bits ux, uy;
-	int64_t hx,hz;  /* We need a carry bit even if LDBL_MANH_SIZE is 32. */
-	manh_t hy;
-	manl_t lx,ly,lz;
-	int ix,iy,n,q,sx,sxy;
-
-	ux.e = x;
-	uy.e = y;
-	sx = ux.bits.sign;
-	sxy = sx ^ uy.bits.sign;
-	ux.bits.sign = 0;       /* |x| */
-	uy.bits.sign = 0;       /* |y| */
-	x = ux.e;
-
-	/* purge off exception values */
-	if ((uy.bits.exp|uy.bits.manh|uy.bits.manl)==0 || /* y=0 */
-	    (ux.bits.exp == BIAS + LDBL_MAX_EXP) ||       /* or x not finite */
-	    (uy.bits.exp == BIAS + LDBL_MAX_EXP &&
-		((uy.bits.manh&~LDBL_NBIT)|uy.bits.manl)!=0)) /* or y is NaN */
+	union ldshape ux = {x}, uy = {y};
+	int ex = ux.i.se & 0x7fff;
+	int ey = uy.i.se & 0x7fff;
+	int sx = ux.i.se >> 15;
+	int sy = uy.i.se >> 15;
+	uint32_t q;
+
+	*quo = 0;
+	if (y == 0 || isnan(y) || ex == 0x7fff)
 		return (x*y)/(x*y);
-	if (ux.bits.exp <= uy.bits.exp) {
-		if ((ux.bits.exp < uy.bits.exp) ||
-		    (ux.bits.manh <= uy.bits.manh &&
-		     (ux.bits.manh < uy.bits.manh ||
-		      ux.bits.manl < uy.bits.manl))) {
-			q = 0;
-			goto fixup;       /* |x|<|y| return x or x-y */
-		}
-		if (ux.bits.manh == uy.bits.manh && ux.bits.manl == uy.bits.manl) {
-			*quo = sxy ? -1 : 1;
-			return Zero[sx];  /* |x|=|y| return x*0*/
-		}
-	}
-
-	/* determine ix = ilogb(x) */
-	if (ux.bits.exp == 0) {  /* subnormal x */
-		ux.e *= 0x1.0p512;
-		ix = ux.bits.exp - (BIAS + 512);
-	} else {
-		ix = ux.bits.exp - BIAS;
+	if (x == 0)
+		return x;
+
+	/* normalize x and y */
+	if (!ex) {
+		ux.i.se = ex;
+		ux.f *= 0x1p120f;
+		ex = ux.i.se - 120;
 	}
-
-	/* determine iy = ilogb(y) */
-	if (uy.bits.exp == 0) {  /* subnormal y */
-		uy.e *= 0x1.0p512;
-		iy = uy.bits.exp - (BIAS + 512);
-	} else {
-		iy = uy.bits.exp - BIAS;
+	if (!ey) {
+		uy.i.se = ey;
+		uy.f *= 0x1p120f;
+		ey = uy.i.se - 120;
 	}
 
-	/* set up {hx,lx}, {hy,ly} and align y to x */
-	hx = SET_NBIT(ux.bits.manh);
-	hy = SET_NBIT(uy.bits.manh);
-	lx = ux.bits.manl;
-	ly = uy.bits.manl;
-
-	/* fix point fmod */
-	n = ix - iy;
 	q = 0;
-
-	while (n--) {
-		hz = hx - hy;
-		lz = lx - ly;
-		if (lx < ly)
-			hz -= 1;
-		if (hz < 0) {
-			hx = hx + hx + (lx>>MANL_SHIFT);
-			lx = lx + lx;
-		} else {
-			hx = hz + hz + (lz>>MANL_SHIFT);
-			lx = lz + lz;
+	if (ex >= ey) {
+		/* x mod y */
+#if LDBL_MANT_DIG == 64
+		uint64_t i, mx, my;
+		mx = ux.i.m;
+		my = uy.i.m;
+		for (; ex > ey; ex--) {
+			i = mx - my;
+			if (mx >= my) {
+				mx = 2*i;
+				q++;
+				q <<= 1;
+			} else if (2*mx < mx) {
+				mx = 2*mx - my;
+				q <<= 1;
+				q++;
+			} else {
+				mx = 2*mx;
+				q <<= 1;
+			}
+		}
+		i = mx - my;
+		if (mx >= my) {
+			mx = i;
 			q++;
 		}
-		q <<= 1;
-	}
-	hz = hx - hy;
-	lz = lx - ly;
-	if (lx < ly)
-		hz -= 1;
-	if (hz >= 0) {
-		hx = hz;
-		lx = lz;
-		q++;
-	}
-
-	/* convert back to floating value and restore the sign */
-	if ((hx|lx) == 0) {  /* return sign(x)*0 */
-		q &= 0x7fffffff;
-		*quo = sxy ? -q : q;
-		return Zero[sx];
-	}
-	while (hx < (1ULL<<HFRAC_BITS)) {  /* normalize x */
-		hx = hx + hx + (lx>>MANL_SHIFT);
-		lx = lx + lx;
-		iy -= 1;
-	}
-	ux.bits.manh = hx; /* The integer bit is truncated here if needed. */
-	ux.bits.manl = lx;
-	if (iy < LDBL_MIN_EXP) {
-		ux.bits.exp = iy + (BIAS + 512);
-		ux.e *= 0x1p-512;
-	} else {
-		ux.bits.exp = iy + BIAS;
-	}
-	ux.bits.sign = 0;
-	x = ux.e;
-fixup:
-	y = fabsl(y);
-	if (y < LDBL_MIN * 2) {
-		if (x + x > y || (x + x == y && (q & 1))) {
+		if (mx == 0)
+			ex = -120;
+		else
+			for (; mx >> 63 == 0; mx *= 2, ex--);
+		ux.i.m = mx;
+#elif LDBL_MANT_DIG == 113
+		uint64_t hi, lo, xhi, xlo, yhi, ylo;
+		xhi = (ux.i2.hi & -1ULL>>16) | 1ULL<<48;
+		yhi = (uy.i2.hi & -1ULL>>16) | 1ULL<<48;
+		xlo = ux.i2.lo;
+		ylo = ux.i2.lo;
+		for (; ex > ey; ex--) {
+			hi = xhi - yhi;
+			lo = xlo - ylo;
+			if (xlo < ylo)
+				hi -= 1;
+			if (hi >> 63 == 0) {
+				xhi = 2*hi + (lo>>63);
+				xlo = 2*lo;
+				q++;
+			} else {
+				xhi = 2*xhi + (xlo>>63);
+				xlo = 2*xlo;
+			}
+			q <<= 1;
+		}
+		hi = xhi - yhi;
+		lo = xlo - ylo;
+		if (xlo < ylo)
+			hi -= 1;
+		if (hi >> 63 == 0) {
+			xhi = hi;
+			xlo = lo;
 			q++;
-			x-=y;
 		}
-	} else if (x > 0.5*y || (x == 0.5*y && (q & 1))) {
-		q++;
-		x-=y;
+		if ((xhi|xlo) == 0)
+			ex = -120;
+		else
+			for (; xhi >> 48 == 0; xhi = 2*xhi + (xlo>>63), xlo = 2*xlo, ex--);
+		ux.i2.hi = xhi;
+		ux.i2.lo = xlo;
+#endif
 	}
 
-	ux.e = x;
-	ux.bits.sign ^= sx;
-	x = ux.e;
-
+	/* scale result and decide between |x| and |x|-|y| */
+	if (ex <= 0) {
+		ux.i.se = ex + 120;
+		ux.f *= 0x1p-120f;
+	} else
+		ux.i.se = ex;
+	x = ux.f;
+	if (sy)
+		y = -y;
+	if (ex == ey || (ex+1 == ey && (2*x > y || (2*x == y && q%2)))) {
+		x -= y;
+		q++;
+	}
 	q &= 0x7fffffff;
-	*quo = sxy ? -q : q;
-	return x;
+	*quo = sx^sy ? -(int)q : (int)q;
+	return sx ? -x : x;
 }
 #endif
-- 
2.25.1