From d6c855caa88ddb1ab6e24e23a14b1e7baf4ba9c7 Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Sat, 15 Sep 2018 02:33:08 -0400 Subject: [PATCH] fix undefined behavior in strto* via FILE buffer pointer abuse in order to produce FILE objects to pass to the intscan/floatscan backends without any (prohibitively costly) extra buffering layer, the strto* functions set the FILE's rend (read end) buffer pointer to an invalid value at the end of the address space, or SIZE_MAX/2 past the beginning of the string. this led to undefined behavior comparing and subtracting the end pointer with the buffer position pointer (rpos). the comparison issue is easily eliminated by using != instead of <. however the subtractions require nontrivial changes: previously, f->shcnt stored the count that would have been read if consuming the whole buffer, which required an end pointer for the buffer. the purpose for this was that it allowed reading it and adding rpos-rend at any time to get the actual count so far, and required no adjustment at the time of __shgetc (actual function call) since the call would only happen when reaching the end of the buffer. to get rid of the dependency on rend, instead offset shcnt by buf-rpos (start of buffer) at the time of last __shlim/__shgetc call. this makes for slightly more work in __shgetc the function, but for the inline macro it's still just as easy to compute the current count. since the scan helper interfaces used here are a big hack, comments are added to document their contracts and what's going on with their implementations. --- src/internal/shgetc.c | 21 +++++++++++++++------ src/internal/shgetc.h | 27 +++++++++++++++++++++++++-- src/stdlib/strtod.c | 6 ++---- src/stdlib/strtol.c | 9 +-------- 4 files changed, 43 insertions(+), 20 deletions(-) diff --git a/src/internal/shgetc.c b/src/internal/shgetc.c index e878b00a..ebd5fae7 100644 --- a/src/internal/shgetc.c +++ b/src/internal/shgetc.c @@ -1,10 +1,16 @@ #include "shgetc.h" +/* The shcnt field stores the number of bytes read so far, offset by + * the value of buf-rpos at the last function call (__shlim or __shgetc), + * so that between calls the inline shcnt macro can add rpos-buf to get + * the actual count. */ + void __shlim(FILE *f, off_t lim) { f->shlim = lim; - f->shcnt = f->rend - f->rpos; - if (lim && f->shcnt > lim) + f->shcnt = f->buf - f->rpos; + /* If lim is nonzero, rend must be a valid pointer. */ + if (lim && f->rend - f->rpos > lim) f->shend = f->rpos + lim; else f->shend = f->rend; @@ -13,15 +19,18 @@ void __shlim(FILE *f, off_t lim) int __shgetc(FILE *f) { int c; - if (f->shlim && f->shcnt >= f->shlim || (c=__uflow(f)) < 0) { + off_t cnt = shcnt(f); + if (f->shlim && cnt >= f->shlim || (c=__uflow(f)) < 0) { + f->shcnt = f->buf - f->rpos + cnt; f->shend = 0; return EOF; } - if (f->shlim && f->rend - f->rpos > f->shlim - f->shcnt - 1) - f->shend = f->rpos + (f->shlim - f->shcnt - 1); + cnt++; + if (f->shlim && f->rend - f->rpos > f->shlim - cnt) + f->shend = f->rpos + (f->shlim - cnt); else f->shend = f->rend; - if (f->rend) f->shcnt += f->rend - f->rpos + 1; + f->shcnt = f->buf - f->rpos + cnt; if (f->rpos[-1] != c) f->rpos[-1] = c; return c; } diff --git a/src/internal/shgetc.h b/src/internal/shgetc.h index 210f6468..1c30f75f 100644 --- a/src/internal/shgetc.h +++ b/src/internal/shgetc.h @@ -1,9 +1,32 @@ #include "stdio_impl.h" +/* Scan helper "stdio" functions for use by scanf-family and strto*-family + * functions. These accept either a valid stdio FILE, or a minimal pseudo + * FILE whose buffer pointers point into a null-terminated string. In the + * latter case, the sh_fromstring macro should be used to setup the FILE; + * the rest of the structure can be left uninitialized. + * + * To begin using these functions, shlim must first be called on the FILE + * to set a field width limit, or 0 for no limit. For string pseudo-FILEs, + * a nonzero limit is not valid and produces undefined behavior. After that, + * shgetc, shunget, and shcnt are valid as long as no other stdio functions + * are called on the stream. + * + * When used with a real FILE object, shunget has only one byte of pushback + * available. Further shunget (up to a limit of the stdio UNGET buffer size) + * will adjust the position but will not restore the data to be read again. + * This functionality is needed for the wcsto*-family functions, where it's + * okay because the FILE will be discarded immediately anyway. When used + * with string pseudo-FILEs, shunget has unlimited pushback, back to the + * beginning of the string. */ + hidden void __shlim(FILE *, off_t); hidden int __shgetc(FILE *); -#define shcnt(f) ((f)->shcnt + ((f)->rpos - (f)->rend)) +#define shcnt(f) ((f)->shcnt + ((f)->rpos - (f)->buf)) #define shlim(f, lim) __shlim((f), (lim)) -#define shgetc(f) (((f)->rpos < (f)->shend) ? *(f)->rpos++ : __shgetc(f)) +#define shgetc(f) (((f)->rpos != (f)->shend) ? *(f)->rpos++ : __shgetc(f)) #define shunget(f) ((f)->shend ? (void)(f)->rpos-- : (void)0) + +#define sh_fromstring(f, s) \ + ((f)->buf = (f)->rpos = (void *)(s), (f)->rend = (void*)-1) diff --git a/src/stdlib/strtod.c b/src/stdlib/strtod.c index a898b1d4..a5d0118a 100644 --- a/src/stdlib/strtod.c +++ b/src/stdlib/strtod.c @@ -5,10 +5,8 @@ static long double strtox(const char *s, char **p, int prec) { - FILE f = { - .buf = (void *)s, .rpos = (void *)s, - .rend = (void *)-1, .lock = -1 - }; + FILE f; + sh_fromstring(&f, s); shlim(&f, 0); long double y = __floatscan(&f, prec, 1); off_t cnt = shcnt(&f); diff --git a/src/stdlib/strtol.c b/src/stdlib/strtol.c index d82ecf7f..bfefea69 100644 --- a/src/stdlib/strtol.c +++ b/src/stdlib/strtol.c @@ -7,15 +7,8 @@ static unsigned long long strtox(const char *s, char **p, int base, unsigned long long lim) { - /* FIXME: use a helper function or macro to setup the FILE */ FILE f; - f.flags = 0; - f.buf = f.rpos = (void *)s; - if ((size_t)s > (size_t)-1/2) - f.rend = (void *)-1; - else - f.rend = (unsigned char *)s+(size_t)-1/2; - f.lock = -1; + sh_fromstring(&f, s); shlim(&f, 0); unsigned long long y = __intscan(&f, base, 1, lim); if (p) { -- 2.25.1