regex: simplify the {,} repetition parsing logic
[oweals/musl.git] / src / stdio / vfscanf.c
1 #include <stdlib.h>
2 #include <stdarg.h>
3 #include <ctype.h>
4 #include <wchar.h>
5 #include <wctype.h>
6 #include <limits.h>
7 #include <string.h>
8 #include <stdint.h>
9
10 #include "stdio_impl.h"
11 #include "shgetc.h"
12 #include "intscan.h"
13 #include "floatscan.h"
14
15 #define SIZE_hh -2
16 #define SIZE_h  -1
17 #define SIZE_def 0
18 #define SIZE_l   1
19 #define SIZE_L   2
20 #define SIZE_ll  3
21
22 static void store_int(void *dest, int size, unsigned long long i)
23 {
24         if (!dest) return;
25         switch (size) {
26         case SIZE_hh:
27                 *(char *)dest = i;
28                 break;
29         case SIZE_h:
30                 *(short *)dest = i;
31                 break;
32         case SIZE_def:
33                 *(int *)dest = i;
34                 break;
35         case SIZE_l:
36                 *(long *)dest = i;
37                 break;
38         case SIZE_ll:
39                 *(long long *)dest = i;
40                 break;
41         }
42 }
43
44 static void *arg_n(va_list ap, unsigned int n)
45 {
46         void *p;
47         unsigned int i;
48         va_list ap2;
49         va_copy(ap2, ap);
50         for (i=n; i>1; i--) va_arg(ap2, void *);
51         p = va_arg(ap2, void *);
52         va_end(ap2);
53         return p;
54 }
55
56 int vfscanf(FILE *restrict f, const char *restrict fmt, va_list ap)
57 {
58         int width;
59         int size;
60         int alloc;
61         int base;
62         const unsigned char *p;
63         int c, t;
64         char *s;
65         wchar_t *wcs;
66         mbstate_t st;
67         void *dest=NULL;
68         int invert;
69         int matches=0;
70         unsigned long long x;
71         long double y;
72         off_t pos = 0;
73         unsigned char scanset[257];
74         size_t i, k;
75         wchar_t wc;
76
77         FLOCK(f);
78
79         for (p=(const unsigned char *)fmt; *p; p++) {
80
81                 alloc = 0;
82
83                 if (isspace(*p)) {
84                         while (isspace(p[1])) p++;
85                         shlim(f, 0);
86                         while (isspace(shgetc(f)));
87                         shunget(f);
88                         pos += shcnt(f);
89                         continue;
90                 }
91                 if (*p != '%' || p[1] == '%') {
92                         p += *p=='%';
93                         shlim(f, 0);
94                         c = shgetc(f);
95                         if (c!=*p) {
96                                 shunget(f);
97                                 if (c<0) goto input_fail;
98                                 goto match_fail;
99                         }
100                         pos++;
101                         continue;
102                 }
103
104                 p++;
105                 if (*p=='*') {
106                         dest = 0; p++;
107                 } else if (isdigit(*p) && p[1]=='$') {
108                         dest = arg_n(ap, *p-'0'); p+=2;
109                 } else {
110                         dest = va_arg(ap, void *);
111                 }
112
113                 for (width=0; isdigit(*p); p++) {
114                         width = 10*width + *p - '0';
115                 }
116
117                 if (*p=='m') {
118                         wcs = 0;
119                         s = 0;
120                         alloc = !!dest;
121                         p++;
122                 } else {
123                         alloc = 0;
124                 }
125
126                 size = SIZE_def;
127                 switch (*p++) {
128                 case 'h':
129                         if (*p == 'h') p++, size = SIZE_hh;
130                         else size = SIZE_h;
131                         break;
132                 case 'l':
133                         if (*p == 'l') p++, size = SIZE_ll;
134                         else size = SIZE_l;
135                         break;
136                 case 'j':
137                         size = SIZE_ll;
138                         break;
139                 case 'z':
140                 case 't':
141                         size = SIZE_l;
142                         break;
143                 case 'L':
144                         size = SIZE_L;
145                         break;
146                 case 'd': case 'i': case 'o': case 'u': case 'x':
147                 case 'a': case 'e': case 'f': case 'g':
148                 case 'A': case 'E': case 'F': case 'G': case 'X':
149                 case 's': case 'c': case '[':
150                 case 'S': case 'C':
151                 case 'p': case 'n':
152                         p--;
153                         break;
154                 default:
155                         goto fmt_fail;
156                 }
157
158                 t = *p;
159
160                 /* C or S */
161                 if ((t&0x2f) == 3) {
162                         t |= 32;
163                         size = SIZE_l;
164                 }
165
166                 switch (t) {
167                 case 'c':
168                         if (width < 1) width = 1;
169                 case '[':
170                         break;
171                 case 'n':
172                         store_int(dest, size, pos);
173                         /* do not increment match count, etc! */
174                         continue;
175                 default:
176                         shlim(f, 0);
177                         while (isspace(shgetc(f)));
178                         shunget(f);
179                         pos += shcnt(f);
180                 }
181
182                 shlim(f, width);
183                 if (shgetc(f) < 0) goto input_fail;
184                 shunget(f);
185
186                 switch (t) {
187                 case 's':
188                 case 'c':
189                 case '[':
190                         if (t == 'c' || t == 's') {
191                                 memset(scanset, -1, sizeof scanset);
192                                 scanset[0] = 0;
193                                 if (t == 's') {
194                                         scanset[1+'\t'] = 0;
195                                         scanset[1+'\n'] = 0;
196                                         scanset[1+'\v'] = 0;
197                                         scanset[1+'\f'] = 0;
198                                         scanset[1+'\r'] = 0;
199                                         scanset[1+' '] = 0;
200                                 }
201                         } else {
202                                 if (*++p == '^') p++, invert = 1;
203                                 else invert = 0;
204                                 memset(scanset, invert, sizeof scanset);
205                                 scanset[0] = 0;
206                                 if (*p == '-') p++, scanset[1+'-'] = 1-invert;
207                                 else if (*p == ']') p++, scanset[1+']'] = 1-invert;
208                                 for (; *p != ']'; p++) {
209                                         if (!*p) goto fmt_fail;
210                                         if (*p=='-' && p[1] && p[1] != ']')
211                                                 for (c=p++[-1]; c<*p; c++)
212                                                         scanset[1+c] = 1-invert;
213                                         scanset[1+*p] = 1-invert;
214                                 }
215                         }
216                         wcs = 0;
217                         s = 0;
218                         i = 0;
219                         k = t=='c' ? width+1U : 31;
220                         if (size == SIZE_l) {
221                                 if (alloc) {
222                                         wcs = malloc(k*sizeof(wchar_t));
223                                         if (!wcs) goto alloc_fail;
224                                 } else {
225                                         wcs = dest;
226                                 }
227                                 st = (mbstate_t){0};
228                                 while (scanset[(c=shgetc(f))+1]) {
229                                         switch (mbrtowc(&wc, &(char){c}, 1, &st)) {
230                                         case -1:
231                                                 goto input_fail;
232                                         case -2:
233                                                 continue;
234                                         }
235                                         if (wcs) wcs[i++] = wc;
236                                         if (alloc && i==k) {
237                                                 k+=k+1;
238                                                 wchar_t *tmp = realloc(wcs, k*sizeof(wchar_t));
239                                                 if (!tmp) goto alloc_fail;
240                                                 wcs = tmp;
241                                         }
242                                 }
243                                 if (!mbsinit(&st)) goto input_fail;
244                         } else if (alloc) {
245                                 s = malloc(k);
246                                 if (!s) goto alloc_fail;
247                                 while (scanset[(c=shgetc(f))+1]) {
248                                         s[i++] = c;
249                                         if (i==k) {
250                                                 k+=k+1;
251                                                 char *tmp = realloc(s, k);
252                                                 if (!tmp) goto alloc_fail;
253                                                 s = tmp;
254                                         }
255                                 }
256                         } else if ((s = dest)) {
257                                 while (scanset[(c=shgetc(f))+1])
258                                         s[i++] = c;
259                         } else {
260                                 while (scanset[(c=shgetc(f))+1]);
261                         }
262                         shunget(f);
263                         if (!shcnt(f)) goto match_fail;
264                         if (t == 'c' && shcnt(f) != width) goto match_fail;
265                         if (alloc) {
266                                 if (size == SIZE_l) *(wchar_t **)dest = wcs;
267                                 else *(char **)dest = s;
268                         }
269                         if (t != 'c') {
270                                 if (wcs) wcs[i] = 0;
271                                 if (s) s[i] = 0;
272                         }
273                         break;
274                 case 'p':
275                 case 'X':
276                 case 'x':
277                         base = 16;
278                         goto int_common;
279                 case 'o':
280                         base = 8;
281                         goto int_common;
282                 case 'd':
283                 case 'u':
284                         base = 10;
285                         goto int_common;
286                 case 'i':
287                         base = 0;
288                 int_common:
289                         x = __intscan(f, base, 0, ULLONG_MAX);
290                         if (!shcnt(f)) goto match_fail;
291                         if (t=='p' && dest) *(void **)dest = (void *)(uintptr_t)x;
292                         else store_int(dest, size, x);
293                         break;
294                 case 'a': case 'A':
295                 case 'e': case 'E':
296                 case 'f': case 'F':
297                 case 'g': case 'G':
298                         y = __floatscan(f, size, 0);
299                         if (!shcnt(f)) goto match_fail;
300                         if (dest) switch (size) {
301                         case SIZE_def:
302                                 *(float *)dest = y;
303                                 break;
304                         case SIZE_l:
305                                 *(double *)dest = y;
306                                 break;
307                         case SIZE_L:
308                                 *(long double *)dest = y;
309                                 break;
310                         }
311                         break;
312                 }
313
314                 pos += shcnt(f);
315                 if (dest) matches++;
316         }
317         if (0) {
318 fmt_fail:
319 alloc_fail:
320 input_fail:
321                 if (!matches) matches--;
322 match_fail:
323                 if (alloc) {
324                         free(s);
325                         free(wcs);
326                 }
327         }
328         FUNLOCK(f);
329         return matches;
330 }
331
332 weak_alias(vfscanf,__isoc99_vfscanf);