awk: fix more "length" cases, closes 12486
authorDenys Vlasenko <vda.linux@googlemail.com>
Sun, 2 Feb 2020 22:28:55 +0000 (23:28 +0100)
committerDenys Vlasenko <vda.linux@googlemail.com>
Sun, 2 Feb 2020 22:28:55 +0000 (23:28 +0100)
function                                             old     new   delta
next_token                                           808     831     +23

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
editors/awk.c
testsuite/awk.tests

index f199909018e059e61c37491a7db72cb7e3ac5fc2..70df2fdb44f5ffadf09a6729f64f425ae86d6f3d 100644 (file)
@@ -272,7 +272,8 @@ typedef struct tsplitter_s {
 /* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
 /* operator is inserted between them */
 #define        TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM \
-                   | TC_STRING | TC_NUMBER | TC_UOPPOST)
+                   | TC_STRING | TC_NUMBER | TC_UOPPOST \
+                   | TC_LENGTH)
 #define        TC_CONCAT2 (TC_OPERAND | TC_UOPPRE)
 
 #define        OF_RES1     0x010000
@@ -1070,8 +1071,10 @@ static uint32_t next_token(uint32_t expected)
        const uint32_t *ti;
 
        if (t_rollback) {
+               debug_printf_parse("%s: using rolled-back token\n", __func__);
                t_rollback = FALSE;
        } else if (concat_inserted) {
+               debug_printf_parse("%s: using concat-inserted token\n", __func__);
                concat_inserted = FALSE;
                t_tclass = save_tclass;
                t_info = save_info;
@@ -1200,7 +1203,11 @@ static uint32_t next_token(uint32_t expected)
                        goto readnext;
 
                /* insert concatenation operator when needed */
-               if ((ltclass & TC_CONCAT1) && (tc & TC_CONCAT2) && (expected & TC_BINOP)) {
+               debug_printf_parse("%s: %x %x %x concat_inserted?\n", __func__,
+                       (ltclass & TC_CONCAT1), (tc & TC_CONCAT2), (expected & TC_BINOP));
+               if ((ltclass & TC_CONCAT1) && (tc & TC_CONCAT2) && (expected & TC_BINOP)
+                && !(ltclass == TC_LENGTH && tc == TC_SEQSTART) /* but not for "length(..." */
+               ) {
                        concat_inserted = TRUE;
                        save_tclass = tc;
                        save_info = t_info;
@@ -1208,6 +1215,7 @@ static uint32_t next_token(uint32_t expected)
                        t_info = OC_CONCAT | SS | P(35);
                }
 
+               debug_printf_parse("%s: t_tclass=tc=%x\n", __func__, t_tclass);
                t_tclass = tc;
        }
        ltclass = t_tclass;
@@ -1218,6 +1226,7 @@ static uint32_t next_token(uint32_t expected)
                                EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
        }
 
+       debug_printf_parse("%s: returning, ltclass:%x t_double:%f\n", __func__, ltclass, t_double);
        return ltclass;
 #undef concat_inserted
 #undef save_tclass
@@ -1282,7 +1291,7 @@ static node *parse_expr(uint32_t iexp)
                        glptr = NULL;
 
                } else if (tc & (TC_BINOP | TC_UOPPOST)) {
-                       debug_printf_parse("%s: TC_BINOP | TC_UOPPOST\n", __func__);
+                       debug_printf_parse("%s: TC_BINOP | TC_UOPPOST tc:%x\n", __func__, tc);
                        /* for binary and postfix-unary operators, jump back over
                         * previous operators with higher priority */
                        vn = cn;
@@ -1387,7 +1396,12 @@ static node *parse_expr(uint32_t iexp)
 
                                case TC_LENGTH:
                                        debug_printf_parse("%s: TC_LENGTH\n", __func__);
-                                       next_token(TC_SEQSTART | TC_OPTERM | TC_GRPTERM);
+                                       next_token(TC_SEQSTART /* length(...) */
+                                               | TC_OPTERM    /* length; (or newline)*/
+                                               | TC_GRPTERM   /* length } */
+                                               | TC_BINOPX    /* length <op> NUM */
+                                               | TC_COMMA     /* print length, 1 */
+                                       );
                                        rollback_token();
                                        if (t_tclass & TC_SEQSTART) {
                                                /* It was a "(" token. Handle just like TC_BUILTIN */
index a7a533ba05daebe61c51f706c71b07182e2fa480..b5008290fe568ce02320769c501e0beec4914c7e 100755 (executable)
@@ -85,7 +85,8 @@ testing "awk floating const with leading zeroes" \
        "" "\n"
 
 # long field seps requiring regex
-testing "awk long field sep" "awk -F-- '{ print NF, length(\$NF), \$NF }'" \
+testing "awk long field sep" \
+       "awk -F-- '{ print NF, length(\$NF), \$NF }'" \
        "2 0 \n3 0 \n4 0 \n5 0 \n" \
        "" \
        "a--\na--b--\na--b--c--\na--b--c--d--"
@@ -317,6 +318,26 @@ testing "awk length()" \
        "3\n3\n3\n3\n" \
        "" "qwe"
 
+testing "awk print length, 1" \
+       "awk '{ print length, 1 }'" \
+       "0 1\n" \
+       "" "\n"
+
+testing "awk print length 1" \
+       "awk '{ print length 1 }'" \
+       "01\n" \
+       "" "\n"
+
+testing "awk length == 0" \
+       "awk 'length == 0 { print \"foo\" }'" \
+       "foo\n" \
+       "" "\n"
+
+testing "awk if (length == 0)" \
+       "awk '{ if (length == 0) { print \"bar\" } }'" \
+       "bar\n" \
+       "" "\n"
+
 testing "awk -f and ARGC" \
        "awk -f - input" \
        "re\n2\n" \