new floating point parser/converter

author Rich Felker <dalias@aerifal.cx>

Tue, 10 Apr 2012 15:52:55 +0000 (11:52 -0400)

committer Rich Felker <dalias@aerifal.cx>

Tue, 10 Apr 2012 15:52:55 +0000 (11:52 -0400)
author Rich Felker <dalias@aerifal.cx>
Tue, 10 Apr 2012 15:52:55 +0000 (11:52 -0400)
committer Rich Felker <dalias@aerifal.cx>
Tue, 10 Apr 2012 15:52:55 +0000 (11:52 -0400)
diff --git a/src/internal/floatscan.c b/src/internal/floatscan.c

new file mode 100644 (file)

index 0000000..15ad5e1
--- /dev/null
+++ b/src/internal/floatscan.c
@@ -0,0 +1,438 @@
+#include <stdint.h>
+#include <stdio.h>
+#include <math.h>
+#include <float.h>
+#include <limits.h>
+
+#include "floatscan.h"
+
+#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
+
+#define LD_B1B_DIG 2
+#define LD_B1B_MAX 9007199, 254740991
+#define KMAX 128
+
+#else /* LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384 */
+
+#define LD_B1B_DIG 3
+#define LD_B1B_MAX 18, 446744073, 709551615
+#define KMAX 2048
+
+#endif
+
+#define MASK (KMAX-1)
+
+
+#if 1
+#include "stdio_impl.h"
+#undef ungetc
+#define ungetc(c,f) ((f)->rpos--,(c))
+#undef getc
+#define getc getc_unlocked
+#endif
+
+
+static long long scanexp(FILE *f, off_t *pcnt)
+{
+       int c;
+       int x;
+       long long y;
+       int neg = 0;
+       
+       *pcnt += (c=getc(f))>=0;
+       if (c=='+' || c=='-') {
+               neg = (c=='-');
+               *pcnt += (c=getc(f))>=0;
+               if (c-'0'>=10U) {
+                       if (c>=0) {
+                               ungetc(c, f);
+                               --*pcnt;
+                       }
+                       return LLONG_MIN;
+               }
+       }
+       for (x=0; c-'0'<10U && x<INT_MAX/10; *pcnt += (c=getc(f))>=0)
+               x = 10*x + c-'0';
+       for (y=x; c-'0'<10U && x<LLONG_MAX/10; *pcnt += (c=getc(f))>=0)
+               y = 10*y + c-'0';
+       for (; c-'0'<10U; *pcnt += (c=getc(f))>=0);
+       if (c>=0) {
+               ungetc(c, f);
+               --*pcnt;
+       }
+       return neg ? -y : y;
+}
+
+
+static long double decfloat(FILE *f, int c, int bits, int emin, int sign, int pok, off_t *pcnt)
+{
+       uint32_t x[KMAX];
+       static const uint32_t th[] = { LD_B1B_MAX };
+       int i, j, k, a, z;
+       long long lrp=-1, dc=0;
+       int gotdig = 0;
+       int rp;
+       int e10=0;
+       int e2;
+       long double y;
+       long double frac=0;
+       long double bias=0;
+
+       j=0;
+       k=0;
+
+       if (c<0) *pcnt += (c=getc(f))>=0;
+
+       /* Don't let leading zeros consume buffer space */
+       for (; c=='0'; *pcnt += (c=getc(f))>=0) gotdig=1;
+
+       x[0] = 0;
+       for (; c-'0'<10U || c=='.'; *pcnt += (c=getc(f))>=0) {
+               if (c == '.') {
+                       if (lrp!=-1) break;
+                       lrp = dc;
+               } else if (k < KMAX) {
+                       dc++;
+                       if (j) x[k] = x[k]*10 + c-'0';
+                       else x[k] = c-'0';
+                       if (++j==9) {
+                               k++;
+                               j=0;
+                       }
+                       gotdig=1;
+               } else {
+                       dc++;
+                       x[KMAX-1] |= c-'0';
+               }
+       }
+       if (lrp==-1) lrp=dc;
+
+       if (gotdig && (c|32)=='e') {
+               e10 = scanexp(f, pcnt);
+               if (e10 == LLONG_MIN) {
+                       if (!pok) {
+                               *pcnt = 0;
+                               return 0;
+                       }
+                       e10 = 0;
+               }
+               lrp += e10;
+       } else if (c>=0) {
+               ungetc(c, f);
+               --*pcnt;
+       }
+       if (!gotdig) {
+               *pcnt = 0;
+               return 0;
+       }
+
+       if (!x[0])
+               return sign * 0.0;
+       if (lrp==dc && (!k || (k==1 && !j)) && (bits>30 || x[0]>>bits==0))
+               return sign * (long double)x[0];
+       if (lrp > -emin/2)
+               return sign * LDBL_MAX * LDBL_MAX;
+       if (lrp < emin-2*LDBL_MANT_DIG)
+               return sign * LDBL_MIN * LDBL_MIN;
+
+       if (k<KMAX && j) {
+               for (; j<9; j++) x[k]*=10;
+               k++;
+               j=0;
+       }
+
+       a = 0;
+       z = k;
+       e2 = 0;
+       rp = lrp;
+
+       while (rp < 18+9*LD_B1B_DIG) {
+               uint32_t carry = 0;
+               e2 -= 29;
+               for (k=(z-1 & MASK); ; k=(k-1 & MASK)) {
+                       uint64_t tmp = ((uint64_t)x[k] << 29) + carry;
+                       if (tmp > 1000000000) {
+                               carry = tmp / 1000000000;
+                               x[k] = tmp % 1000000000;
+                       } else {
+                               carry = 0;
+                               x[k] = tmp;
+                       }
+                       if (k==(z-1 & MASK) && k!=a && !x[k]) z = k;
+                       if (k==a) break;
+               }
+               if (carry) {
+                       rp += 9;
+                       if (a == z) {
+                               z = (z-1 & MASK);
+                               x[z-1 & MASK] |= x[z];
+                       }
+                       a = (a-1 & MASK);
+                       x[a] = carry;
+               }
+       }
+
+       if (rp % 9) {
+               static const int p10s[] = {
+                       100000000, 10000000, 1000000, 100000,
+                       10000, 1000, 100, 10
+               };
+               int rpm9 = rp % 9;
+               int p10 = p10s[rpm9-1];
+               uint32_t carry = 0;
+               for (k=a; k!=z; k=(k+1 & MASK)) {
+                       uint32_t tmp = x[k] % p10;
+                       x[k] = x[k]/p10 + carry;
+                       carry = 1000000000/p10 * tmp;
+                       if (k==a && !x[k]) {
+                               a = (a+1 & MASK);
+                               rp -= 9;
+                       }
+               }
+               if (carry) {
+                       if ((z+1 & MASK) != a) {
+                               x[z] = carry;
+                               z = (z+1 & MASK);
+                       } else x[z-1 & MASK] |= 1;
+               }
+               rp += 9-rpm9;
+       }
+
+       for (;;) {
+               uint32_t carry = 0;
+               int sh = 1;
+               for (i=0; i<LD_B1B_DIG; i++) {
+                       k = (a+i & MASK);
+                       if (k == z || x[k] < th[i]) {
+                               i=LD_B1B_DIG;
+                               break;
+                       }
+                       if (x[a+i & MASK] > th[i]) break;
+               }
+               if (i==LD_B1B_DIG && rp==9*LD_B1B_DIG) break;
+               /* FIXME: find a way to compute optimal sh */
+               if (rp > 9+9*LD_B1B_DIG) sh = 9;
+               e2 += sh;
+               for (k=a; k!=z; k=(k+1 & MASK)) {
+                       uint32_t tmp = x[k] & (1<<sh)-1;
+                       x[k] = (x[k]>>sh) + carry;
+                       carry = (1000000000>>sh) * tmp;
+                       if (k==a && !x[k]) {
+                               a = (a+1 & MASK);
+                               rp -= 9;
+                       }
+               }
+               if (carry) {
+                       if ((z+1 & MASK) != a) {
+                               x[z] = carry;
+                               z = (z+1 & MASK);
+                       } else x[z-1 & MASK] |= 1;
+               }
+       }
+
+       for (y=i=0; i<LD_B1B_DIG && (a+i & MASK)!=z; i++)
+               y = 1000000000.0L * y + x[a+i & MASK];
+
+       y *= sign;
+
+       if (bits > LDBL_MANT_DIG+e2-emin) {
+               bits = LDBL_MANT_DIG+e2-emin;
+               if (bits<0) bits=0;
+       }
+
+       if (bits < LDBL_MANT_DIG) {
+               bias = copysignl(scalbn(1, 2*LDBL_MANT_DIG-bits-1), y);
+               frac = fmodl(y, scalbn(1, LDBL_MANT_DIG-bits));
+               y -= frac;
+               y += bias;
+       }
+
+       if ((a+i & MASK) != z) {
+               uint32_t t = x[a+i & MASK];
+               if (t < 500000000 && (t || (a+i+1 & MASK) != z))
+                       frac += 0.25*sign;
+               else if (t > 500000000)
+                       frac += 0.75*sign;
+               else if (t == 500000000) {
+                       if ((a+i+1 & MASK) == z)
+                               frac += 0.5*sign;
+                       else
+                               frac += 0.75*sign;
+               }
+               if (LDBL_MANT_DIG-bits >= 2 && !fmodl(frac, 1))
+                       frac++;
+       }
+
+       y += frac;
+       y -= bias;
+
+       y = scalbnl(y, e2);
+
+       return y;
+}
+
+static long double hexfloat(FILE *f, int c, int bits, int emin, int sign, int pok, off_t *pcnt)
+{
+       uint32_t x = 0;
+       long double y = 0;
+       long double scale = 1;
+       long double bias = 0;
+       int gottail = 0, gotrad = 0, gotdig = 0;
+       long long rp = 0;
+       long long dc = 0;
+       long long e2 = 0;
+       int d;
+
+       if (c<0) *pcnt += (c=getc(f))>=0;
+
+       /* Skip leading zeros */
+       for (; c=='0'; *pcnt += (c=getc(f))>=0) gotdig = 1;
+
+       if (c=='.') {
+               gotrad = 1;
+               *pcnt += (c=getc(f))>=0;
+               /* Count zeros after the radix point before significand */
+               for (rp=0; c=='0'; *pcnt += (c=getc(f))>=0, rp--) gotdig = 1;
+       }
+
+       for (; c-'0'<10U || (c|32)-'a'<6U || c=='.'; *pcnt += (c=getc(f))>=0) {
+               if (c=='.') {
+                       if (gotrad) break;
+                       rp = dc;
+                       gotrad = 1;
+               } else {
+                       gotdig = 1;
+                       if (c > '9') d = (c|32)+10-'a';
+                       else d = c-'0';
+                       if (dc<8) {
+                               x = x*16 + d;
+                       } else if (dc < LDBL_MANT_DIG/4+1) {
+                               y += d*(scale/=16);
+                       } else if (d && !gottail) {
+                               y += 0.5*scale;
+                               gottail = 1;
+                       }
+                       dc++;
+               }
+       }
+       if (!gotdig) {
+               if (c>=0) {
+                       ungetc(c, f);
+                       --*pcnt;
+               }
+               if (pok) *pcnt -= 1+gotrad; /* uncount the rp, x of 0x */
+               else *pcnt = 0;
+               return 0;
+       }
+       if (!gotrad) rp = dc;
+       while (dc<8) x *= 16, dc++;
+       if ((c|32)=='p') {
+               e2 = scanexp(f, pcnt);
+               if (e2 == LLONG_MIN) {
+                       if (!pok) {
+                               *pcnt = 0;
+                               return 0;
+                       }
+                       e2 = 0;
+               }
+       }
+       e2 += 4*rp - 32;
+
+       if (!x) return sign * 0.0;
+       if (e2 > -emin) return sign * LDBL_MAX * LDBL_MAX;
+       if (e2 < emin-2*LDBL_MANT_DIG) return sign * LDBL_MIN * LDBL_MIN;
+
+       while (x < 0x80000000) {
+               if (y>=0.5) {
+                       x += x + 1;
+                       y += y - 1;
+               } else {
+                       x += x;
+                       y += y;
+               }
+               e2--;
+       }
+
+       if (bits > 32+e2-emin) {
+               bits = 32+e2-emin;
+               if (bits<0) bits=0;
+       }
+
+       if (bits < LDBL_MANT_DIG)
+               bias = copysignl(scalbn(1, 32+LDBL_MANT_DIG-bits-1), sign);
+
+       if (bits<32 && y && !(x&1)) x++, y=0;
+
+       y = bias + sign*(long double)x + sign*y;
+       y -= bias;
+
+       return scalbnl(y, e2);
+}
+
+long double __floatscan(FILE *f, int c, int prec, int pok, off_t *pcnt)
+{
+       int sign = 1;
+       int i;
+       int bits;
+       int emin;
+
+       *pcnt = 0;
+
+       switch (prec) {
+       case 0:
+               bits = 24;
+               emin = -149;
+               break;
+       case 1:
+               bits = 53;
+               emin = -1074;
+               break;
+       case 2:
+               bits = LDBL_MANT_DIG;
+               emin = -16445;
+               break;
+       default:
+               return 0;
+       }
+
+       if (c<0) *pcnt += (c=getc(f))>=0;
+
+       if (c=='+' || c=='-') {
+               sign -= 2*(c=='-');
+               *pcnt += (c=getc(f))>=0;
+       }
+
+       for (i=0; i<8 && (c|32)=="infinity"[i]; i++)
+               if (i<7) c = getc(f);
+       if (i==3 || i==8 || (i>3 && pok)) {
+               if (i==3 && c>=0) ungetc(c, f);
+               if (i==8) *pcnt += 7;
+               else *pcnt += 2;
+               return sign * INFINITY;
+       }
+       if (!i) for (i=0; i<3 && (c|32)=="nan"[i]; i++)
+               if (i<3) c = getc(f);
+       if (i==3) {
+               *pcnt += 2;
+               return sign>0 ? NAN : -NAN;
+       }
+
+       if (i) {
+               if (c>=0) ungetc(c, f);
+               *pcnt = 0;
+               return 0;
+       }
+
+       if (c=='0') {
+               *pcnt += (c=getc(f))>=0;
+               if ((c|32) == 'x')
+                       return hexfloat(f, -1, bits, emin, sign, pok, pcnt);
+               if (c>=0) {
+                       ungetc(c, f);
+                       --*pcnt;
+               }
+               c = '0';
+       }
+
+       return decfloat(f, c, bits, emin, sign, pok, pcnt);
+}
diff --git a/src/internal/floatscan.h b/src/internal/floatscan.h

new file mode 100644 (file)

index 0000000..5ea74cc
--- /dev/null
+++ b/src/internal/floatscan.h
@@ -0,0 +1,8 @@
+#ifndef FLOATSCAN_H
+#define FLOATSCAN_H
+
+#include <stdio.h>
+
+long double __floatscan(FILE *, int, int, int, off_t *);
+
+#endif
diff --git a/src/stdlib/strtod.c b/src/stdlib/strtod.c

index 388058fe630d1fed46d4656f0e29fedf5b3dd2fb..98b992a1f77108e295405fae3b16495a8a6c8f8e 100644 (file)
--- a/src/stdlib/strtod.c
+++ b/src/stdlib/strtod.c
@@ -1,6 +1,15 @@
  #include <stdlib.h>
+#include "floatscan.h"
+#include "stdio_impl.h"
  
  double strtod(const char *s, char **p)
  {
-       return strtold(s, p);
+       FILE f = {
+               .buf = (void *)s, .rpos = (void *)s,
+               .rend = (void *)-1, .lock = -1
+       };
+       off_t cnt;
+       double y = __floatscan(&f, -1, 1, 1, &cnt);
+       if (p) *p = (char *)s + cnt;
+       return y;
  }
diff --git a/src/stdlib/strtof.c b/src/stdlib/strtof.c

index 07b32df443f4a77945b9fe6c9ba694f0ea243dd2..2dc349a926e5750f02e6a0569559f28fdb4e4e5f 100644 (file)
--- a/src/stdlib/strtof.c
+++ b/src/stdlib/strtof.c
@@ -1,6 +1,15 @@
  #include <stdlib.h>
+#include "floatscan.h"
+#include "stdio_impl.h"
  
  float strtof(const char *s, char **p)
  {
-       return strtold(s, p);
+       FILE f = {
+               .buf = (void *)s, .rpos = (void *)s,
+               .rend = (void *)-1, .lock = -1
+       };
+       off_t cnt;
+       float y = __floatscan(&f, -1, 0, 1, &cnt);
+       if (p) *p = (char *)s + cnt;
+       return y;
  }
diff --git a/src/stdlib/strtold.c b/src/stdlib/strtold.c

index ec464c15c9081b0e071b23d5293150c269dc1456..40ecc122abdcfb3389df3fac7d0255efb636e0fb 100644 (file)
--- a/src/stdlib/strtold.c
+++ b/src/stdlib/strtold.c
@@ -1,96 +1,15 @@
  #include <stdlib.h>
-#include <errno.h>
-#include <ctype.h>
+#include "floatscan.h"
+#include "stdio_impl.h"
  
-static int valid_exp(const unsigned char *s)
+long double strtold(const char *s, char **p)
  {
-       return isdigit(*s) || ((s[0]=='+'||s[0]=='-') && isdigit(s[1]));
-}
-
-long double strtold(const char *s1, char **p)
-{
-       const unsigned char *s = (void *)s1;
-       long double x = 0;
-       long double frac;
-       int sign = 0;
-       int nonzero = 0;
-       int radix = '.';
-       long e;
-       int saved_errno = errno;
-
-       if (!p) p = (char **)&s1;
-
-       /* Initial whitespace */
-       for (; isspace(*s); s++);
-
-       /* Optional sign */
-       if (*s == '-') sign = *s++;
-       else if (*s == '+') s++;
-
-       /* Handle infinities and NaNs. */
-       if ((s[0]|32)=='i' && (s[1]|32)=='n' && (s[2]|32)=='f') {
-               *p = (char *)s + 3;
-               return sign ? -1.0/0.0 : 1.0/0.0;
-       } else if ((s[0]|32)=='n' && (s[1]|32)=='a' && (s[2]|32)=='n') {
-               *p = (char *)s + 3;
-               return 0.0/0.0;
-       }
-
-       /* Possible hex float */
-       if (s[0]=='0' && (s[1]|32)=='x') {
-               /* Mantissa must be non-degenerate */
-               if (!isxdigit(s[2]) && (s[2]!=radix || !isxdigit(s[3]))) {
-                       /* Decimal float 0, 'x' extraneous */
-                       *p = (char *)++s;
-                       return 0;
-               }
-               /* We have a real hex float */
-               s += 2;
-               for (; isxdigit(*s); s++) {
-                       x = 16*x + (isdigit(*s)?*s-'0':(*s|32)-'a'+10);
-                       if (*s!='0') nonzero=1;
-               }
-               if (*s == radix) {
-                       frac = 1.0/16.0;
-                       for (s++; isxdigit(*s); s++) {
-                               x += frac * (isdigit(*s)?*s-'0':(*s|32)-'a'+10);
-                               frac *= 1.0/16.0;
-                               if (*s!='0') nonzero=1;
-                       }
-               }
-               if ((*s|32) == 'p' && valid_exp(s+1)) {
-                       e = strtol((void *)(s+1), (void *)&s, 10);
-                       for (; e>0; e--) x *= 2.0;
-                       for (; e<0; e++) x *= 0.5;
-               }
-               goto finish;
-       }
-
-       /* Mantissa must be non-degenerate */
-       if (!isdigit(s[0]) && (s[0]!=radix || !isdigit(s[1]))) {
-               *p = (char *)s1;
-               return 0;
-       }
-
-       for (; isdigit(*s); s++) {
-               x = 10*x + *s-'0';
-               if (*s!='0') nonzero=1;
-       }
-       if (*s == radix) {
-               frac = 10.0;
-               for (s++; isdigit(*s); s++) {
-                       x += (*s-'0') / frac;
-                       frac *= 10.0;
-                       if (*s!='0') nonzero=1;
-               }
-       }
-       if ((*s|32)=='e' && valid_exp(s+1)) {
-               e = strtol((void *)++s, (void *)&s, 10);
-               for (; e>0; e--) x *= 10.0;
-               for (; e<0; e++) x /= 10.0;
-       }
-finish:
-       errno = ((nonzero && !x) || !(1.0/x)) ? ERANGE : saved_errno;
-       *p = (char*)s;
-       return sign ? -x : x;
+       FILE f = {
+               .buf = (void *)s, .rpos = (void *)s,
+               .rend = (void *)-1, .lock = -1
+       };
+       off_t cnt;
+       long double y = __floatscan(&f, -1, 2, 1, &cnt);
+       if (p) *p = (char *)s + cnt;
+       return y;
  }
author	Rich Felker <dalias@aerifal.cx>
	Tue, 10 Apr 2012 15:52:55 +0000 (11:52 -0400)
committer	Rich Felker <dalias@aerifal.cx>
	Tue, 10 Apr 2012 15:52:55 +0000 (11:52 -0400)
src/internal/floatscan.c	[new file with mode: 0644]	patch \| blob
src/internal/floatscan.h	[new file with mode: 0644]	patch \| blob
src/stdlib/strtod.c		patch \| blob \| history
src/stdlib/strtof.c		patch \| blob \| history
src/stdlib/strtold.c		patch \| blob \| history