2 * An implementation of what I call the "Sea of Stars" algorithm for
3 * POSIX fnmatch(). The basic idea is that we factor the pattern into
4 * a head component (which we match first and can reject without ever
5 * measuring the length of the string), an optional tail component
6 * (which only exists if the pattern contains at least one star), and
7 * an optional "sea of stars", a set of star-separated components
8 * between the head and tail. After the head and tail matches have
9 * been removed from the input string, the components in the "sea of
10 * stars" are matched sequentially by searching for their first
11 * occurrence past the end of the previous match.
13 * - Rich Felker, April 2012
23 #define UNMATCHABLE -2
28 static int str_next(const char *str, size_t n, size_t *step)
36 int k = mbtowc(&wc, str, n);
48 static int pat_next(const char *pat, size_t m, size_t *step, int flags)
56 if (pat[0]=='\\' && !(flags & FNM_NOESCAPE)) {
64 if (k<m) if (pat[k] == '^' || pat[k] == '!') k++;
65 if (k<m) if (pat[k] == ']') k++;
66 for (; k<m && pat[k] && pat[k]!=']'; k++) {
67 if (k+1<m && pat[k+1] && pat[k]=='[' && (pat[k+1]==':' || pat[k+1]=='.' || pat[k+1]=='=')) {
70 if (k<m && pat[k]) k++;
71 while (k<m && pat[k] && (pat[k-1]!=z || pat[k]!=']')) k++;
72 if (k==m || !pat[k]) break;
75 if (k==m || !pat[k]) {
89 int k = mbtowc(&wc, pat, m);
100 static int match_bracket(const char *p, int k)
105 if (*p=='^' || *p=='!') {
110 if (k==']') return !inv;
112 } else if (*p=='-') {
113 if (k=='-') return !inv;
117 for (; *p != ']'; p++) {
118 if (p[0]=='-' && p[1]!=']') {
120 int l = mbtowc(&wc2, p+1, 4);
122 if (wc<=wc2 && (unsigned)k-wc <= wc2-wc) return !inv;
126 if (p[0]=='[' && (p[1]==':' || p[1]=='.' || p[1]=='=')) {
127 const char *p0 = p+2;
130 while (p[-1]!=z || p[0]!=']') p++;
131 if (z == ':' && p-1-p0 < 16) {
133 memcpy(buf, p0, p-1-p0);
135 if (iswctype(k, wctype(buf))) return !inv;
140 wc = (unsigned char)*p;
142 int l = mbtowc(&wc, p, 4);
146 if (wc==k) return !inv;
151 static int fnmatch_internal(const char *pat, size_t m, const char *str, size_t n, int flags)
153 const char *p, *ptail, *endpat;
154 const char *s, *stail, *endstr;
155 size_t pinc, sinc, tailcnt=0;
158 if (flags & FNM_PERIOD) {
159 if (*str == '.' && *pat != '.')
163 switch ((c = pat_next(pat, m, &pinc, flags))) {
171 k = str_next(str, n, &sinc);
173 return (c==END) ? 0 : FNM_NOMATCH;
177 if (!match_bracket(pat, k))
179 } else if (c != QUESTION && k != c) {
189 /* Compute real pat length if it was initially unknown/-1 */
193 /* Find the last * in pat and count chars needed after it */
194 for (p=ptail=pat; p<endpat; p+=pinc) {
195 switch (pat_next(p, endpat-p, &pinc, flags)) {
208 /* Past this point we need not check for UNMATCHABLE in pat,
209 * because all of pat has already been parsed once. */
211 /* Compute real str length if it was initially unknown/-1 */
214 if (n < tailcnt) return FNM_NOMATCH;
216 /* Find the final tailcnt chars of str, accounting for UTF-8.
217 * On illegal sequences we may get it wrong, but in that case
218 * we necessarily have a matching failure anyway. */
219 for (s=endstr; s>str && tailcnt; tailcnt--) {
220 if (s[-1] < 128U) s--;
221 else while ((unsigned char)*--s-0x80U<0x40 && s>str);
223 if (tailcnt) return FNM_NOMATCH;
226 /* Check that the pat and str tails match */
229 c = pat_next(p, endpat-p, &pinc, flags);
231 if ((k = str_next(s, endstr-s, &sinc)) <= 0) {
232 if (c != END) return FNM_NOMATCH;
237 if (!match_bracket(p-pinc, k))
239 } else if (c != QUESTION && k != c) {
244 /* We're all done with the tails now, so throw them out */
248 /* Match pattern components until there are none left */
253 c = pat_next(p, endpat-p, &pinc, flags);
255 /* Encountering * completes/commits a component */
261 k = str_next(s, endstr-s, &sinc);
265 if (!match_bracket(p-pinc, k))
267 } else if (c != QUESTION && k != c) {
272 if (c == STAR) continue;
273 /* If we failed, advance str, by 1 char if it's a valid
274 * char, or past all invalid bytes otherwise. */
275 k = str_next(str, endstr-str, &sinc);
276 if (k > 0) str += sinc;
277 else for (str++; str_next(str, endstr-str, &sinc)<0; str++);
283 int fnmatch(const char *pat, const char *str, int flags)
288 if (flags & FNM_PATHNAME) for (;;) {
289 for (s=str; *s && *s!='/'; s++);
290 for (p=pat; (c=pat_next(p, -1, &inc, flags))!=END && c!='/'; p+=inc);
291 if (*s && *p!=*s) return FNM_NOMATCH;
292 if (fnmatch_internal(pat, p-pat, str, s-str, flags))
294 if (!*s && c==END) return 0;
298 return fnmatch_internal(pat, -1, str, -1, flags);