ash: use glob() from libc
authorDenys Vlasenko <vda.linux@googlemail.com>
Wed, 21 Sep 2016 14:25:58 +0000 (16:25 +0200)
committerDenys Vlasenko <vda.linux@googlemail.com>
Wed, 21 Sep 2016 14:25:58 +0000 (16:25 +0200)
Adapted from dash.
The "homegrown" glob code is retained (ifdef'ed out).
This changes was inspired by bug 9261, which detected out-of bounds use of heap
for 2098 byte long name in the "homegrown" code. This is still not fixed...

function                                             old     new   delta
expandarg                                            960     982     +22
static.syntax_index_table                             26      25      -1
static.spec_symbls                                    27      26      -1
static.metachars                                       4       -      -4
addfname                                              42       -     -42
msort                                                126       -    -126
expmeta                                              528       -    -528
------------------------------------------------------------------------------
(add/remove: 0/4 grow/shrink: 1/2 up/down: 22/-702)          Total: -680 bytes

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
shell/ash.c

index d96e5685163e9e2d58c1ace10f439f807e48a991..2674937085178b8067f068bcaa855d5e6cfb0f06 100644 (file)
@@ -39,6 +39,7 @@
 
 #include <setjmp.h>
 #include <fnmatch.h>
+#include <glob.h>
 #include <sys/times.h>
 #include <sys/utsname.h> /* for setting $HOSTNAME */
 
@@ -2808,18 +2809,27 @@ enum {
 static int
 SIT(int c, int syntax)
 {
-       static const char spec_symbls[] ALIGN1 = "\t\n !\"$&'()*-/:;<=>?[\\]`|}~";
+       /* Used to also have '/' in this string: "\t\n !\"$&'()*-/:;<=>?[\\]`|}~" */
+       static const char spec_symbls[] ALIGN1 = "\t\n !\"$&'()*-:;<=>?[\\]`|}~";
+       /*
+        * This causes '/' to be prepended with CTLESC in dquoted string,
+        * making "./file"* treated incorrectly because we feed
+        * ".\/file*" string to glob(), confusing it (see expandmeta func).
+        * The "homegrown" glob implementation is okay with that,
+        * but glibc one isn't. With '/' always treated as CWORD,
+        * both work fine.
+        */
 # if ENABLE_ASH_ALIAS
        static const uint8_t syntax_index_table[] ALIGN1 = {
                1, 2, 1, 3, 4, 5, 1, 6,         /* "\t\n !\"$&'" */
-               7, 8, 3, 3, 3, 3, 1, 1,         /* "()*-/:;<" */
+               7, 8, 3, 3,/*3,*/3, 1, 1,       /* "()*-/:;<" */
                3, 1, 3, 3, 9, 3, 10, 1,        /* "=>?[\\]`|" */
                11, 3                           /* "}~" */
        };
 # else
        static const uint8_t syntax_index_table[] ALIGN1 = {
                0, 1, 0, 2, 3, 4, 0, 5,         /* "\t\n !\"$&'" */
-               6, 7, 2, 2, 2, 2, 0, 0,         /* "()*-/:;<" */
+               6, 7, 2, 2,/*2,*/2, 0, 0,       /* "()*-/:;<" */
                2, 0, 2, 2, 8, 2, 9, 0,         /* "=>?[\\]`|" */
                10, 2                           /* "}~" */
        };
@@ -2901,7 +2911,8 @@ static const uint8_t syntax_index_table[] ALIGN1 = {
        /*  44  "," */ CWORD_CWORD_CWORD_CWORD,
        /*  45  "-" */ CWORD_CCTL_CCTL_CWORD,
        /*  46  "." */ CWORD_CWORD_CWORD_CWORD,
-       /*  47  "/" */ CWORD_CCTL_CCTL_CWORD,
+/* "/" was CWORD_CCTL_CCTL_CWORD, see comment in SIT() function why this is changed: */
+       /*  47  "/" */ CWORD_CWORD_CWORD_CWORD,
        /*  48  "0" */ CWORD_CWORD_CWORD_CWORD,
        /*  49  "1" */ CWORD_CWORD_CWORD_CWORD,
        /*  50  "2" */ CWORD_CWORD_CWORD_CWORD,
@@ -5633,7 +5644,6 @@ rmescapes(char *str, int flag)
        while (*p) {
                if ((unsigned char)*p == CTLQUOTEMARK) {
 // Note: both inquotes and protect_against_glob only affect whether
-// CTLESC,<ch> gets converted to <ch> or to \<ch>
                        inquotes = ~inquotes;
                        p++;
                        protect_against_glob = globbing;
@@ -5697,11 +5707,14 @@ memtodest(const char *p, size_t len, int syntax, int quotes)
                unsigned char c = *p++;
                if (c) {
                        int n = SIT(c, syntax);
-                       if ((quotes & QUOTES_ESC) &&
-                                       ((n == CCTL) ||
-                                       (((quotes & EXP_FULL) || syntax != BASESYNTAX) &&
-                                       n == CBACK)))
+                       if ((quotes & QUOTES_ESC)
+                        && ((n == CCTL)
+                           ||  (((quotes & EXP_FULL) || syntax != BASESYNTAX)
+                               && n == CBACK)
+                               )
+                       ) {
                                USTPUTC(CTLESC, q);
+                       }
                } else if (!(quotes & QUOTES_KEEPNUL))
                        continue;
                USTPUTC(c, q);
@@ -6435,7 +6448,8 @@ subevalvar(char *p, char *varname, int strloc, int subtype,
                char *idx, *end;
 
                if (!repl) {
-                       if ((repl=strchr(str, CTLESC)))
+                       repl = strchr(str, CTLESC);
+                       if (repl)
                                *repl++ = '\0';
                        else
                                repl = nullstr;
@@ -6976,6 +6990,70 @@ addfname(const char *name)
        exparg.lastp = &sp->next;
 }
 
+/* If we want to use glob() from libc... */
+#if 1
+
+/* Add the result of glob() to the list */
+static void
+addglob(const glob_t *pglob)
+{
+       char **p = pglob->gl_pathv;
+
+       do {
+               addfname(*p);
+       } while (*++p);
+}
+static void
+expandmeta(struct strlist *str /*, int flag*/)
+{
+       /* TODO - EXP_REDIR */
+
+       while (str) {
+               char *p;
+               glob_t pglob;
+               int i;
+
+               if (fflag)
+                       goto nometa;
+               INT_OFF;
+               p = preglob(str->text, RMESCAPE_ALLOC | RMESCAPE_HEAP);
+               /*
+                * GLOB_NOMAGIC (GNU): if no *?[ chars in pattern, return it even if no match
+                * TODO?: GLOB_NOCHECK: if no match, return unchanged pattern (sans \* escapes?)
+                */
+               i = glob(p, GLOB_NOMAGIC, NULL, &pglob);
+               if (p != str->text)
+                       free(p);
+               switch (i) {
+               case 0:
+                       /* GLOB_MAGCHAR is set if *?[ chars were seen (GNU) */
+                       if (!(pglob.gl_flags & GLOB_MAGCHAR))
+                               goto nometa2;
+                       addglob(&pglob);
+                       globfree(&pglob);
+                       INT_ON;
+                       break;
+               case GLOB_NOMATCH:
+nometa2:
+                       globfree(&pglob);
+                       INT_ON;
+nometa:
+                       *exparg.lastp = str;
+                       rmescapes(str->text, 0);
+                       exparg.lastp = &str->next;
+                       break;
+               default:        /* GLOB_NOSPACE */
+                       globfree(&pglob);
+                       INT_ON;
+                       ash_msg_and_raise_error(bb_msg_memory_exhausted);
+               }
+               str = str->next;
+       }
+}
+
+#else
+/* Homegrown globbing code. (dash also has both, uses homegrown one.) */
+
 /*
  * Do metacharacter (i.e. *, ?, [...]) expansion.
  */
@@ -7179,7 +7257,8 @@ expandmeta(struct strlist *str /*, int flag*/)
                p = preglob(str->text, RMESCAPE_ALLOC | RMESCAPE_HEAP);
                {
                        int i = strlen(str->text);
-                       expdir = ckmalloc(i < 2048 ? 2048 : i); /* XXX */
+//BUGGY estimation of how long expanded name can be
+                       expdir = ckmalloc(i < 2048 ? 2048 : i+1);
                }
                expmeta(expdir, expdir, p);
                free(expdir);
@@ -7204,6 +7283,7 @@ expandmeta(struct strlist *str /*, int flag*/)
                str = str->next;
        }
 }
+#endif /* our globbing code */
 
 /*
  * Perform variable substitution and command substitution on an argument,