From 2162541f38d3f642f5a643010548d62220d55a4d Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Tue, 10 Apr 2012 21:47:37 -0400 Subject: [PATCH] add "scan helper getc" and rework strtod, etc. to use it the immediate benefit is a significant debloating of the float parsing code by moving the responsibility for keeping track of the number of characters read to a different module. by linking shgetc with the stdio buffer logic, counting logic is defered to buffer refill time, keeping the calls to shgetc fast and light. in the future, shgetc will also be useful for integrating the new float code with scanf, which needs to not only count the characters consumed, but also limit the number of characters read based on field width specifiers. shgetc may also become a useful tool for simplifying the integer parsing code. --- src/internal/floatscan.c | 131 +++++++++++++++++--------------------- src/internal/floatscan.h | 2 +- src/internal/shgetc.c | 24 +++++++ src/internal/shgetc.h | 25 ++++++++ src/internal/stdio_impl.h | 2 + src/stdlib/strtod.c | 6 +- 6 files changed, 115 insertions(+), 75 deletions(-) create mode 100644 src/internal/shgetc.c create mode 100644 src/internal/shgetc.h diff --git a/src/internal/floatscan.c b/src/internal/floatscan.c index 15ad5e12..7d9a4524 100644 --- a/src/internal/floatscan.c +++ b/src/internal/floatscan.c @@ -4,6 +4,7 @@ #include #include +#include "shgetc.h" #include "floatscan.h" #if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024 @@ -23,71 +24,58 @@ #define MASK (KMAX-1) -#if 1 -#include "stdio_impl.h" -#undef ungetc -#define ungetc(c,f) ((f)->rpos--,(c)) -#undef getc -#define getc getc_unlocked -#endif - - -static long long scanexp(FILE *f, off_t *pcnt) +static long long scanexp(FILE *f, int pok) { int c; int x; long long y; int neg = 0; - *pcnt += (c=getc(f))>=0; + c = shgetc(f); if (c=='+' || c=='-') { neg = (c=='-'); - *pcnt += (c=getc(f))>=0; - if (c-'0'>=10U) { - if (c>=0) { - ungetc(c, f); - --*pcnt; - } - return LLONG_MIN; - } + c = shgetc(f); + if (c-'0'>=10U && pok) shunget(f); + } + if (c-'0'>=10U) { + shunget(f); + return LLONG_MIN; } - for (x=0; c-'0'<10U && x=0) + for (x=0; c-'0'<10U && x=0) + for (y=x; c-'0'<10U && x=0); - if (c>=0) { - ungetc(c, f); - --*pcnt; - } + for (; c-'0'<10U; c = shgetc(f)); + shunget(f); return neg ? -y : y; } -static long double decfloat(FILE *f, int c, int bits, int emin, int sign, int pok, off_t *pcnt) +static long double decfloat(FILE *f, int bits, int emin, int sign, int pok) { uint32_t x[KMAX]; static const uint32_t th[] = { LD_B1B_MAX }; int i, j, k, a, z; long long lrp=-1, dc=0; + long long e10=0; int gotdig = 0; int rp; - int e10=0; int e2; long double y; long double frac=0; long double bias=0; + int c; j=0; k=0; - if (c<0) *pcnt += (c=getc(f))>=0; + c = shgetc(f); /* Don't let leading zeros consume buffer space */ - for (; c=='0'; *pcnt += (c=getc(f))>=0) gotdig=1; + for (; c=='0'; c = shgetc(f)) gotdig=1; x[0] = 0; - for (; c-'0'<10U || c=='.'; *pcnt += (c=getc(f))>=0) { + for (; c-'0'<10U || c=='.'; c = shgetc(f)) { if (c == '.') { if (lrp!=-1) break; lrp = dc; @@ -108,21 +96,22 @@ static long double decfloat(FILE *f, int c, int bits, int emin, int sign, int po if (lrp==-1) lrp=dc; if (gotdig && (c|32)=='e') { - e10 = scanexp(f, pcnt); + e10 = scanexp(f, pok); if (e10 == LLONG_MIN) { - if (!pok) { - *pcnt = 0; + if (pok) { + shunget(f); + } else { + shlim(f, 0); return 0; } e10 = 0; } lrp += e10; } else if (c>=0) { - ungetc(c, f); - --*pcnt; + shunget(f); } if (!gotdig) { - *pcnt = 0; + shlim(f, 0); return 0; } @@ -271,7 +260,7 @@ static long double decfloat(FILE *f, int c, int bits, int emin, int sign, int po return y; } -static long double hexfloat(FILE *f, int c, int bits, int emin, int sign, int pok, off_t *pcnt) +static long double hexfloat(FILE *f, int bits, int emin, int sign, int pok) { uint32_t x = 0; long double y = 0; @@ -282,20 +271,21 @@ static long double hexfloat(FILE *f, int c, int bits, int emin, int sign, int po long long dc = 0; long long e2 = 0; int d; + int c; - if (c<0) *pcnt += (c=getc(f))>=0; + c = shgetc(f); /* Skip leading zeros */ - for (; c=='0'; *pcnt += (c=getc(f))>=0) gotdig = 1; + for (; c=='0'; c = shgetc(f)) gotdig = 1; if (c=='.') { gotrad = 1; - *pcnt += (c=getc(f))>=0; + c = shgetc(f); /* Count zeros after the radix point before significand */ - for (rp=0; c=='0'; *pcnt += (c=getc(f))>=0, rp--) gotdig = 1; + for (rp=0; c=='0'; c = shgetc(f), rp--) gotdig = 1; } - for (; c-'0'<10U || (c|32)-'a'<6U || c=='.'; *pcnt += (c=getc(f))>=0) { + for (; c-'0'<10U || (c|32)-'a'<6U || c=='.'; c = shgetc(f)) { if (c=='.') { if (gotrad) break; rp = dc; @@ -316,21 +306,24 @@ static long double hexfloat(FILE *f, int c, int bits, int emin, int sign, int po } } if (!gotdig) { - if (c>=0) { - ungetc(c, f); - --*pcnt; + shunget(f); + if (pok) { + shunget(f); + if (gotrad) shunget(f); + } else { + shlim(f, 0); } - if (pok) *pcnt -= 1+gotrad; /* uncount the rp, x of 0x */ - else *pcnt = 0; return 0; } if (!gotrad) rp = dc; while (dc<8) x *= 16, dc++; if ((c|32)=='p') { - e2 = scanexp(f, pcnt); + e2 = scanexp(f, pok); if (e2 == LLONG_MIN) { - if (!pok) { - *pcnt = 0; + if (pok) { + shunget(f); + } else { + shlim(f, 0); return 0; } e2 = 0; @@ -369,15 +362,13 @@ static long double hexfloat(FILE *f, int c, int bits, int emin, int sign, int po return scalbnl(y, e2); } -long double __floatscan(FILE *f, int c, int prec, int pok, off_t *pcnt) +long double __floatscan(FILE *f, int c, int prec, int pok) { int sign = 1; int i; int bits; int emin; - *pcnt = 0; - switch (prec) { case 0: bits = 24; @@ -395,44 +386,40 @@ long double __floatscan(FILE *f, int c, int prec, int pok, off_t *pcnt) return 0; } - if (c<0) *pcnt += (c=getc(f))>=0; + if (c<0) c = shgetc(f); if (c=='+' || c=='-') { sign -= 2*(c=='-'); - *pcnt += (c=getc(f))>=0; + c = shgetc(f); } for (i=0; i<8 && (c|32)=="infinity"[i]; i++) - if (i<7) c = getc(f); + if (i<7) c = shgetc(f); if (i==3 || i==8 || (i>3 && pok)) { - if (i==3 && c>=0) ungetc(c, f); - if (i==8) *pcnt += 7; - else *pcnt += 2; + if (i==3) shunget(f); + if (pok) for (; i>3; i--) shunget(f); + else shlim(f, 0); return sign * INFINITY; } if (!i) for (i=0; i<3 && (c|32)=="nan"[i]; i++) - if (i<3) c = getc(f); + if (i<3) c = shgetc(f); if (i==3) { - *pcnt += 2; - return sign>0 ? NAN : -NAN; + return NAN; } if (i) { - if (c>=0) ungetc(c, f); - *pcnt = 0; + shunget(f); + shlim(f, 0); return 0; } if (c=='0') { - *pcnt += (c=getc(f))>=0; + c = shgetc(f); if ((c|32) == 'x') - return hexfloat(f, -1, bits, emin, sign, pok, pcnt); - if (c>=0) { - ungetc(c, f); - --*pcnt; - } + return hexfloat(f, bits, emin, sign, pok); c = '0'; } - return decfloat(f, c, bits, emin, sign, pok, pcnt); + shunget(f); + return decfloat(f, bits, emin, sign, pok); } diff --git a/src/internal/floatscan.h b/src/internal/floatscan.h index 5ea74cc9..5595b81e 100644 --- a/src/internal/floatscan.h +++ b/src/internal/floatscan.h @@ -3,6 +3,6 @@ #include -long double __floatscan(FILE *, int, int, int, off_t *); +long double __floatscan(FILE *, int, int, int); #endif diff --git a/src/internal/shgetc.c b/src/internal/shgetc.c new file mode 100644 index 00000000..7c4e58c1 --- /dev/null +++ b/src/internal/shgetc.c @@ -0,0 +1,24 @@ +#include "shgetc.h" + +void __shlim(FILE *f, off_t lim) +{ + f->shlim = lim; + f->shcnt = f->rend ? f->rend - f->buf : 0; + if (lim && f->rend - f->rpos > lim) + f->shend = f->rpos + lim; + else + f->shend = f->rend; +} + +int __shgetc(FILE *f) +{ + int c; + if (f->shcnt >= f->shlim) return EOF; + c = __uflow(f); + if (f->shlim && f->rend - f->rpos > f->shlim - f->shcnt - 1) + f->shend = f->rpos + (f->shlim - f->shcnt - 1); + else + f->shend = f->rend; + if (f->rend) f->shcnt += f->rend - f->buf; + return c; +} diff --git a/src/internal/shgetc.h b/src/internal/shgetc.h new file mode 100644 index 00000000..3434cdaa --- /dev/null +++ b/src/internal/shgetc.h @@ -0,0 +1,25 @@ +#include "stdio_impl.h" + +void __shlim(FILE *, off_t); +int __shgetc(FILE *); + +static inline off_t shcnt(FILE *f) +{ + return f->shcnt + (f->rpos - f->rend); +} + +static inline void shlim(FILE *f, off_t lim) +{ + __shlim(f, lim); +} + +static inline int shgetc(FILE *f) +{ + if (f->rpos < f->shend) return *f->rpos++; + return __shgetc(f); +} + +static inline void shunget(FILE *f) +{ + if (f->rend) f->rpos--; +} diff --git a/src/internal/stdio_impl.h b/src/internal/stdio_impl.h index c5f45eb1..5ec296f3 100644 --- a/src/internal/stdio_impl.h +++ b/src/internal/stdio_impl.h @@ -59,6 +59,8 @@ struct __FILE_s { off_t off; int (*flush)(FILE *); void *mustbezero_2; + unsigned char *shend; + off_t shlim, shcnt; }; size_t __stdio_read(FILE *, unsigned char *, size_t); diff --git a/src/stdlib/strtod.c b/src/stdlib/strtod.c index b444f530..ecfabdf1 100644 --- a/src/stdlib/strtod.c +++ b/src/stdlib/strtod.c @@ -1,4 +1,5 @@ #include +#include "shgetc.h" #include "floatscan.h" #include "stdio_impl.h" @@ -10,8 +11,9 @@ static long double strtox(const char *s, char **p, int prec) .buf = (void *)t, .rpos = (void *)t, .rend = (void *)-1, .lock = -1 }; - off_t cnt; - long double y = __floatscan(&f, -1, prec, 1, &cnt); + shlim(&f, 0); + long double y = __floatscan(&f, -1, prec, 1); + off_t cnt = shcnt(&f); if (p) *p = cnt ? t + cnt : (char *)s; return y; } -- 2.25.1