hush: fix more obscure ${var%...} cases
[oweals/busybox.git] / shell / hush.c
index c713ce808896cc715483bb953f25b3f2f6b2d50d..32b90876fde26c265a53b61af9678d9ca8661173 100644 (file)
  * handle the recursion implicit in the various substitutions, especially
  * across continuation lines.
  *
- * POSIX syntax not implemented:
+ * TODOs:
+ *      grep for "TODO" and fix (some of them are easy)
+ *      special variables (done: PWD, PPID, RANDOM)
+ *      tilde expansion
  *      aliases
- *      Tilde Expansion
+ *      follow IFS rules more precisely, including update semantics
+ *      builtins mandated by standards we don't support:
+ *          [un]alias, command, fc, getopts, newgrp, readonly, times
+ *      make complex ${var%...} constructs support optional
+ *      make here documents optional
  *
  * Bash compat TODO:
  *      redirection of stdout+stderr: &> and >&
  *      process substitution: <(list) and >(list)
  *      =~: regex operator
  *      let EXPR [EXPR...]
- *        Each EXPR is an arithmetic expression (ARITHMETIC EVALUATION)
- *        If the last arg evaluates to 0, let returns 1; 0 otherwise.
- *        NB: let `echo 'a=a + 1'` - error (IOW: multi-word expansion is used)
+ *          Each EXPR is an arithmetic expression (ARITHMETIC EVALUATION)
+ *          If the last arg evaluates to 0, let returns 1; 0 otherwise.
+ *          NB: let `echo 'a=a + 1'` - error (IOW: multi-word expansion is used)
  *      ((EXPR))
- *        The EXPR is evaluated according to ARITHMETIC EVALUATION.
- *        This is exactly equivalent to let "EXPR".
+ *          The EXPR is evaluated according to ARITHMETIC EVALUATION.
+ *          This is exactly equivalent to let "EXPR".
  *      $[EXPR]: synonym for $((EXPR))
- *
- * TODOs:
- *      grep for "TODO" and fix (some of them are easy)
- *      special variables (done: PWD, PPID, RANDOM)
- *      follow IFS rules more precisely, including update semantics
- *      builtins mandated by standards we don't support:
- *          [un]alias, command, fc, getopts, newgrp, readonly, times
  *      export builtin should be special, its arguments are assignments
  *          and therefore expansion of them should be "one-word" expansion:
  *              $ export i=`echo 'a  b'` # export has one arg: "i=a  b"
@@ -2405,6 +2405,23 @@ static char *expand_pseudo_dquoted(const char *str)
        return exp_str;
 }
 
+#if ENABLE_SH_MATH_SUPPORT
+static arith_t expand_and_evaluate_arith(const char *arg, int *errcode_p)
+{
+       arith_eval_hooks_t hooks;
+       arith_t res;
+       char *exp_str;
+
+       hooks.lookupvar = get_local_var_value;
+       hooks.setvar = set_local_var_from_halves;
+       hooks.endofname = endofname;
+       exp_str = expand_pseudo_dquoted(arg);
+       res = arith(exp_str ? exp_str : arg, errcode_p, &hooks);
+       free(exp_str);
+       return res;
+}
+#endif
+
 /* Expand all variable references in given string, adding words to list[]
  * at n, n+1,... positions. Return updated n (so that list[n] is next one
  * to be filled). This routine is extremely tricky: has to deal with
@@ -2429,7 +2446,7 @@ static NOINLINE int expand_vars_to_list(o_string *output, int n, char *arg, char
        while ((p = strchr(arg, SPECIAL_VAR_SYMBOL)) != NULL) {
                char first_ch;
                int i;
-               char *dyn_val = NULL;
+               char *to_be_freed = NULL;
                const char *val = NULL;
 #if ENABLE_HUSH_TICK
                o_string subst_result = NULL_O_STRING;
@@ -2530,21 +2547,13 @@ static NOINLINE int expand_vars_to_list(o_string *output, int n, char *arg, char
 #endif
 #if ENABLE_SH_MATH_SUPPORT
                case '+': { /* <SPECIAL_VAR_SYMBOL>+cmd<SPECIAL_VAR_SYMBOL> */
-                       arith_eval_hooks_t hooks;
                        arith_t res;
                        int errcode;
-                       char *exp_str;
 
                        arg++; /* skip '+' */
                        *p = '\0'; /* replace trailing <SPECIAL_VAR_SYMBOL> */
                        debug_printf_subst("ARITH '%s' first_ch %x\n", arg, first_ch);
-
-                       exp_str = expand_pseudo_dquoted(arg);
-                       hooks.lookupvar = get_local_var_value;
-                       hooks.setvar = set_local_var_from_halves;
-                       hooks.endofname = endofname;
-                       res = arith(exp_str ? exp_str : arg, &errcode, &hooks);
-                       free(exp_str);
+                       res = expand_and_evaluate_arith(arg, &errcode);
 
                        if (errcode < 0) {
                                const char *msg = "error in arithmetic";
@@ -2630,12 +2639,22 @@ static NOINLINE int expand_vars_to_list(o_string *output, int n, char *arg, char
                                                scan_t scan = pick_scan(exp_op, *exp_word, &match_at_left);
                                                if (exp_op == *exp_word)        /* ## or %% */
                                                        exp_word++;
-                                               val = dyn_val = xstrdup(val);
-                                               loc = scan(dyn_val, exp_word, match_at_left);
-                                               if (match_at_left) /* # or ## */
-                                                       val = loc;
-                                               else if (loc) /* % or %% and match was found */
-                                                       *loc = '\0';
+                                               val = to_be_freed = xstrdup(val);
+                                               {
+                                                       char *exp_exp_word = expand_pseudo_dquoted(exp_word);
+                                                       if (exp_exp_word)
+                                                               exp_word = exp_exp_word;
+                                                       loc = scan(to_be_freed, exp_word, match_at_left);
+                                                       //bb_error_msg("op:%c str:'%s' pat:'%s' res:'%s'",
+                                                       //              exp_op, to_be_freed, exp_word, loc);
+                                                       free(exp_exp_word);
+                                               }
+                                               if (loc) { /* match was found */
+                                                       if (match_at_left) /* # or ## */
+                                                               val = loc;
+                                                       else /* % or %% */
+                                                               *loc = '\0';
+                                               }
                                        }
                                } else if (!strchr("%#:-=+?"+3, exp_op)) {
 #if ENABLE_HUSH_BASH_COMPAT
@@ -2664,7 +2683,7 @@ static NOINLINE int expand_vars_to_list(o_string *output, int n, char *arg, char
                                                if (len == 0 || !val || beg >= strlen(val))
                                                        val = "";
                                                else
-                                                       val = dyn_val = xstrndup(val + beg, len);
+                                                       val = to_be_freed = xstrndup(val + beg, len);
                                                //bb_error_msg("val:'%s'", val);
                                        } else
 #endif
@@ -2701,13 +2720,16 @@ static NOINLINE int expand_vars_to_list(o_string *output, int n, char *arg, char
                                        debug_printf_expand("expand: op:%c (null:%s) test:%i\n", exp_op,
                                                (exp_save == ':') ? "true" : "false", use_word);
                                        if (use_word) {
+                                               to_be_freed = expand_pseudo_dquoted(exp_word);
+                                               if (to_be_freed)
+                                                       exp_word = to_be_freed;
                                                if (exp_op == '?') {
-//TODO: how interactive bash aborts expansion mid-command?
                                                        /* mimic bash message */
                                                        die_if_script("%s: %s",
                                                                var,
                                                                exp_word[0] ? exp_word : "parameter null or not set"
                                                        );
+//TODO: how interactive bash aborts expansion mid-command?
                                                } else {
                                                        val = exp_word;
                                                }
@@ -2752,7 +2774,7 @@ static NOINLINE int expand_vars_to_list(o_string *output, int n, char *arg, char
                if (val) {
                        o_addQstr(output, val, strlen(val));
                }
-               free(dyn_val);
+               free(to_be_freed);
                /* Do the check to avoid writing to a const string */
                if (*p != SPECIAL_VAR_SYMBOL)
                        *p = SPECIAL_VAR_SYMBOL;
@@ -5866,29 +5888,37 @@ static void add_till_backquote(o_string *dest, struct in_str *input)
  * echo $(echo '(TEST)' BEST)           (TEST) BEST
  * echo $(echo 'TEST)' BEST)            TEST) BEST
  * echo $(echo \(\(TEST\) BEST)         ((TEST) BEST
+ *
+ * Also adapted to eat ${var%...} constructs, since ... part
+ * can contain arbitrary constructs, just like $(cmd).
  */
-static void add_till_closing_paren(o_string *dest, struct in_str *input, bool dbl)
+#define DOUBLE_CLOSE_CHAR_FLAG 0x80
+static void add_till_closing_paren(o_string *dest, struct in_str *input, char end_ch)
 {
-       int count = 0;
+       char dbl = end_ch & DOUBLE_CLOSE_CHAR_FLAG;
+       end_ch &= (DOUBLE_CLOSE_CHAR_FLAG-1);
        while (1) {
                int ch = i_getch(input);
                if (ch == EOF) {
-                       syntax_error_unterm_ch(')');
+                       syntax_error_unterm_ch(end_ch);
                        /*xfunc_die(); - redundant */
                }
-               if (ch == '(')
-                       count++;
-               if (ch == ')') {
-                       if (--count < 0) {
-                               if (!dbl)
-                                       break;
-                               if (i_peek(input) == ')') {
-                                       i_getch(input);
-                                       break;
-                               }
+               if (ch == end_ch) {
+                       if (!dbl)
+                               break;
+                       /* we look for closing )) of $((EXPR)) */
+                       if (i_peek(input) == end_ch) {
+                               i_getch(input); /* eat second ')' */
+                               break;
                        }
                }
                o_addchr(dest, ch);
+               if (ch == '(' || ch == '{') {
+                       ch = (ch == '(' ? ')' : '}');
+                       add_till_closing_paren(dest, input, ch);
+                       o_addchr(dest, ch);
+                       continue;
+               }
                if (ch == '\'') {
                        add_till_single_quote(dest, input);
                        o_addchr(dest, ch);
@@ -5899,6 +5929,11 @@ static void add_till_closing_paren(o_string *dest, struct in_str *input, bool db
                        o_addchr(dest, ch);
                        continue;
                }
+               if (ch == '`') {
+                       add_till_backquote(dest, input);
+                       o_addchr(dest, ch);
+                       continue;
+               }
                if (ch == '\\') {
                        /* \x. Copy verbatim. Important for  \(, \) */
                        ch = i_getch(input);
@@ -5959,62 +5994,52 @@ static int handle_dollar(o_string *as_string,
        case '@': /* args */
                goto make_one_char_var;
        case '{': {
-               bool first_char, all_digits;
-               bool in_expansion_param;
+               o_addchr(dest, SPECIAL_VAR_SYMBOL);
 
-               ch = i_getch(input);
+               ch = i_getch(input); /* eat '{' */
                nommu_addchr(as_string, ch);
-               o_addchr(dest, SPECIAL_VAR_SYMBOL);
 
-// TODO: need to handle "a=ab}; echo ${a%\}}"
-// and "a=abc; c=c; echo ${a%${c}}"
-               in_expansion_param = false;
-               first_char = true;
-               all_digits = false;
+               ch = i_getch(input); /* first char after '{' */
+               nommu_addchr(as_string, ch);
+               /* It should be ${?}, or ${#var},
+                * or even ${?+subst} - operator acting on a special variable,
+                * or the beginning of variable name.
+                */
+               if (!strchr("$!?#*@_", ch) && !isalnum(ch)) { /* not one of those */
+ bad_dollar_syntax:
+                       syntax_error_unterm_str("${name}");
+                       debug_printf_parse("handle_dollar return 1: unterminated ${name}\n");
+                       return 1;
+               }
+               ch |= quote_mask;
+
+               /* It's possible to just call add_till_closing_paren() at this point.
+                * However, this regresses some of our testsuite cases
+                * which check invalid constructs like ${%}.
+                * Oh well... let's check that the var name part is fine... */
+
                while (1) {
+                       o_addchr(dest, ch);
+                       debug_printf_parse(": '%c'\n", ch);
+
                        ch = i_getch(input);
                        nommu_addchr(as_string, ch);
-                       if (ch == '}') {
+                       if (ch == '}')
                                break;
-                       }
-
-                       if (first_char) {
-                               if (ch == '#') {
-                                       /* ${#var}: length of var contents */
-                                       goto char_ok;
-                               }
-                               if (isdigit(ch)) {
-                                       all_digits = true;
-                                       goto char_ok;
-                               }
-                               /* They're being verbose and doing ${?} */
-                               if (i_peek(input) == '}' && strchr("$!?#*@_", ch))
-                                       goto char_ok;
-                       }
 
-                       if (!in_expansion_param
-                        && (  (all_digits && !isdigit(ch)) /* met non-digit: 123w */
-                           || (!all_digits && !isalnum(ch) && ch != '_') /* met non-name char: abc% */
-                           )
-                       ) {
+                       if (!isalnum(ch) && ch != '_') {
                                /* handle parameter expansions
                                 * http://www.opengroup.org/onlinepubs/009695399/utilities/xcu_chap02.html#tag_02_06_02
                                 */
-                               if (first_char /* bad (empty var name): "${%..." */
-                                || !strchr("%#:-=+?", ch) /* bad: "${var<bad_char>..." */
-                               ) {
-                                       syntax_error_unterm_str("${name}");
-                                       debug_printf_parse("handle_dollar return 1: unterminated ${name}\n");
-                                       return 1;
-                               }
-                               in_expansion_param = true;
+                               if (!strchr("%#:-=+?", ch)) /* ${var<bad_char>... */
+                                       goto bad_dollar_syntax;
+                               /* Eat everything until closing '}' */
+                               o_addchr(dest, ch);
+//TODO: add nommu_addchr hack here
+                               add_till_closing_paren(dest, input, '}');
+                               break;
                        }
- char_ok:
-                       debug_printf_parse(": '%c'\n", ch);
-                       o_addchr(dest, ch | quote_mask);
-                       quote_mask = 0;
-                       first_char = false;
-               } /* while (1) */
+               }
                o_addchr(dest, SPECIAL_VAR_SYMBOL);
                break;
        }
@@ -6034,7 +6059,7 @@ static int handle_dollar(o_string *as_string,
 #  if !BB_MMU
                        pos = dest->length;
 #  endif
-                       add_till_closing_paren(dest, input, true);
+                       add_till_closing_paren(dest, input, ')' | DOUBLE_CLOSE_CHAR_FLAG);
 #  if !BB_MMU
                        if (as_string) {
                                o_addstr(as_string, dest->data + pos);
@@ -6052,7 +6077,7 @@ static int handle_dollar(o_string *as_string,
 #  if !BB_MMU
                pos = dest->length;
 #  endif
-               add_till_closing_paren(dest, input, false);
+               add_till_closing_paren(dest, input, ')');
 #  if !BB_MMU
                if (as_string) {
                        o_addstr(as_string, dest->data + pos);