awk: Fix overly permissive func arg list parsing
authorBrian Foley <bpfoley@google.com>
Mon, 7 Jan 2019 02:32:59 +0000 (18:32 -0800)
committerDenys Vlasenko <vda.linux@googlemail.com>
Mon, 21 Jan 2019 11:59:19 +0000 (12:59 +0100)
It allows things like 'func f(a b)' and 'func f(a,)' which GNU awk forbids.

function                                             old     new   delta
parse_program                                        327     367     +40
chain_expr                                            40      67     +27
parse_expr                                           891     915     +24
EMSG_TOO_FEW_ARGS                                     30      18     -12
------------------------------------------------------------------------------
(add/remove: 0/0 grow/shrink: 3/1 up/down: 91/-12)             Total: 79 bytes

Signed-off-by: Brian Foley <bpfoley@google.com>
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
editors/awk.c
testsuite/awk.tests

index 90edec82cc998f7ee04154f6323a4cbe93de25fb..d25508e5d40802fea65a3ccb7302270a87b9b053 100644 (file)
@@ -1613,12 +1613,25 @@ static void parse_program(char *p)
                        f = newfunc(t_string);
                        f->body.first = NULL;
                        f->nargs = 0;
-                       while (next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
+                       /* Match func arg list: a comma sep list of >= 0 args, and a close paren */
+                       while (next_token(TC_VARIABLE | TC_SEQTERM | TC_COMMA)) {
+                               /* Either an empty arg list, or trailing comma from prev iter
+                                * must be followed by an arg */
+                               if (f->nargs == 0 && t_tclass == TC_SEQTERM)
+                                       break;
+
+                               /* TC_SEQSTART/TC_COMMA must be followed by TC_VARIABLE */
+                               if (t_tclass != TC_VARIABLE)
+                                       syntax_error(EMSG_UNEXP_TOKEN);
+
                                v = findvar(ahash, t_string);
                                v->x.aidx = f->nargs++;
 
+                               /* Arg followed either by end of arg list or 1 comma */
                                if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
                                        break;
+                               if (t_tclass != TC_COMMA)
+                                       syntax_error(EMSG_UNEXP_TOKEN);
                        }
                        seq = &f->body;
                        chain_group();
index 03fedf7715f5fbd7770a6759d918e16252c604ff..0db6a26e4c2c32c4cadfc219df8cd65f1853d345 100755 (executable)
@@ -280,6 +280,18 @@ testing "awk 'delete a[v--]' evaluates v-- once" \
 " \
        "" ""
 
+testing "awk func arg parsing 1" \
+       "awk 'func f(,) { }' 2>&1" "awk: cmd. line:1: Unexpected token\n" "" ""
+
+testing "awk func arg parsing 2" \
+       "awk 'func f(a,,b) { }' 2>&1" "awk: cmd. line:1: Unexpected token\n" "" ""
+
+testing "awk func arg parsing 3" \
+       "awk 'func f(a,) { }' 2>&1" "awk: cmd. line:1: Unexpected token\n" "" ""
+
+testing "awk func arg parsing 4" \
+       "awk 'func f(a b) { }' 2>&1" "awk: cmd. line:1: Unexpected token\n" "" ""
+
 testing "awk handles empty ()" \
        "awk 'BEGIN {print()}' 2>&1" "awk: cmd. line:1: Empty sequence\n" "" ""