Improve error reporting
authorJo-Philipp Wich <jow@openwrt.org>
Wed, 18 Jun 2014 15:59:51 +0000 (17:59 +0200)
committerJo-Philipp Wich <jow@openwrt.org>
Wed, 18 Jun 2014 16:50:51 +0000 (18:50 +0200)
Keep track of the exact location the error occured in and switch
the internal error description from a dynamic string buffer to an
integer which either holds negative values for lexer errors or
positives values for grammer violations.

In case of grammer violations the "error_code" member will hold
a bitfield describing the expected tokens.

Also rework the error messages emitted by the cli to be more
precise.

Examples:

$ jsonfilter -s '{}' -e '@.foo bar'
Syntax error: Expecting End of file
In expression @.foo bar
Near here ----------^

$ jsonfilter -s '{}' -e '@.foo\bar'
Syntax error: Unexpected character
In expression @.foo\bar
Near here ---------^

$ jsonfilter -s '{}' -e '@.foo..bar'
Syntax error: Expecting Label or '*'
In expression @.foo..bar
Near here ----------^

Signed-off-by: Jo-Philipp Wich <jow@openwrt.org>
ast.c
ast.h
lexer.c
main.c
parser.y

diff --git a/ast.c b/ast.c
index 5093e8b5167b61f19473e7536ab80d530ff36536..6db7a46df7f64a5bfa314b7266b65e599333aa17 100644 (file)
--- a/ast.c
+++ b/ast.c
@@ -73,9 +73,6 @@ jp_free(struct jp_state *s)
                op = tmp;
        }
 
-       if (s->error)
-               free(s->error);
-
        free(s);
 }
 
@@ -101,18 +98,11 @@ jp_parse(const char *expr)
 
        while (len > 0)
        {
-               s->off = (ptr - expr);
-
                op = jp_get_token(s, ptr, &mlen);
 
                if (mlen < 0)
                {
-                       s->erroff = s->off;
-                       s->error = strdup((mlen == -3) ? "String too long" :
-                                                               (mlen == -2) ? "Invalid escape sequence" :
-                                                                       (mlen == -1) ? "Unterminated string" :
-                                                                               "Unknown error");
-
+                       s->error_code = mlen;
                        goto out;
                }
 
@@ -121,6 +111,8 @@ jp_parse(const char *expr)
 
                len -= mlen;
                ptr += mlen;
+
+               s->off += mlen;
        }
 
        Parse(pParser, 0, NULL, s);
diff --git a/ast.h b/ast.h
index c2d6f2b68dace9c3e789f89c4e0029d86010dcb1..ec1f55ca89a870b10213dbbecc359019bf627d04 100644 (file)
--- a/ast.h
+++ b/ast.h
@@ -31,8 +31,8 @@ struct jp_opcode {
 struct jp_state {
        struct jp_opcode *pool;
        struct jp_opcode *path;
-       char *error;
-       int erroff;
+       int error_pos;
+       int error_code;
        int off;
 };
 
diff --git a/lexer.c b/lexer.c
index 3703d56d7bc4500ec07213a2d140442c21e5ff52..b1615ad2cf8b09fc9ef0d72d572d5699ccce7075 100644 (file)
--- a/lexer.c
+++ b/lexer.c
@@ -28,7 +28,7 @@ struct token {
        int type;
        const char *pat;
        int plen;
-       int (*parse)(const char *buf, struct jp_opcode *op);
+       int (*parse)(const char *buf, struct jp_opcode *op, struct jp_state *s);
 };
 
 #define dec(o) \
@@ -106,7 +106,7 @@ utf8enc(char **out, int *rem, int code)
  */
 
 static int
-parse_string(const char *buf, struct jp_opcode *op)
+parse_string(const char *buf, struct jp_opcode *op, struct jp_state *s)
 {
        char q = *(buf++);
        char str[128] = { 0 };
@@ -132,12 +132,16 @@ parse_string(const char *buf, struct jp_opcode *op)
                                                     hex(in[2]) * 16 * 16 +
                                                     hex(in[3]) * 16 +
                                                     hex(in[4])))
+                                       {
+                                               s->error_pos = s->off + (in - buf);
                                                return -3;
+                                       }
 
                                        in += 5;
                                }
                                else
                                {
+                                       s->error_pos = s->off + (in - buf);
                                        return -2;
                                }
                        }
@@ -148,12 +152,16 @@ parse_string(const char *buf, struct jp_opcode *op)
                                if (isxdigit(in[1]) && isxdigit(in[2]))
                                {
                                        if (!utf8enc(&out, &rem, hex(in[1]) * 16 + hex(in[2])))
+                                       {
+                                               s->error_pos = s->off + (in - buf);
                                                return -3;
+                                       }
 
                                        in += 3;
                                }
                                else
                                {
+                                       s->error_pos = s->off + (in - buf);
                                        return -2;
                                }
                        }
@@ -170,10 +178,16 @@ parse_string(const char *buf, struct jp_opcode *op)
                                               dec(in[2]);
 
                                        if (code > 255)
+                                       {
+                                               s->error_pos = s->off + (in - buf);
                                                return -2;
+                                       }
 
                                        if (!utf8enc(&out, &rem, code))
+                                       {
+                                               s->error_pos = s->off + (in - buf);
                                                return -3;
+                                       }
 
                                        in += 3;
                                }
@@ -182,7 +196,10 @@ parse_string(const char *buf, struct jp_opcode *op)
                                else if (in[1] >= '0' && in[1] <= '7')
                                {
                                        if (!utf8enc(&out, &rem, dec(in[0]) * 8 + dec(in[1])))
+                                       {
+                                               s->error_pos = s->off + (in - buf);
                                                return -3;
+                                       }
 
                                        in += 2;
                                }
@@ -191,7 +208,10 @@ parse_string(const char *buf, struct jp_opcode *op)
                                else
                                {
                                        if (!utf8enc(&out, &rem, dec(in[0])))
+                                       {
+                                               s->error_pos = s->off + (in - buf);
                                                return -3;
+                                       }
 
                                        in += 1;
                                }
@@ -201,7 +221,10 @@ parse_string(const char *buf, struct jp_opcode *op)
                        else
                        {
                                if (rem-- < 1)
+                               {
+                                       s->error_pos = s->off + (in - buf);
                                        return -3;
+                               }
 
                                switch (in[0])
                                {
@@ -241,7 +264,10 @@ parse_string(const char *buf, struct jp_opcode *op)
                else
                {
                        if (rem-- < 1)
+                       {
+                               s->error_pos = s->off + (in - buf);
                                return -3;
+                       }
 
                        *out++ = *in++;
                }
@@ -262,7 +288,7 @@ parse_string(const char *buf, struct jp_opcode *op)
  */
 
 static int
-parse_label(const char *buf, struct jp_opcode *op)
+parse_label(const char *buf, struct jp_opcode *op, struct jp_state *s)
 {
        char str[128] = { 0 };
        char *out = str;
@@ -272,7 +298,10 @@ parse_label(const char *buf, struct jp_opcode *op)
        while (*in == '_' || isalnum(*in))
        {
                if (rem-- < 1)
+               {
+                       s->error_pos = s->off + (in - buf);
                        return -3;
+               }
 
                *out++ = *in++;
        }
@@ -302,13 +331,16 @@ parse_label(const char *buf, struct jp_opcode *op)
  */
 
 static int
-parse_number(const char *buf, struct jp_opcode *op)
+parse_number(const char *buf, struct jp_opcode *op, struct jp_state *s)
 {
        char *e;
        int n = strtol(buf, &e, 10);
 
        if (e == buf)
+       {
+               s->error_pos = s->off;
                return -2;
+       }
 
        op->num = n;
 
@@ -374,7 +406,7 @@ const char *tokennames[23] = {
 
 
 static int
-match_token(const char *ptr, struct jp_opcode *op)
+match_token(const char *ptr, struct jp_opcode *op, struct jp_state *s)
 {
        int i;
        const struct token *tok;
@@ -389,13 +421,14 @@ match_token(const char *ptr, struct jp_opcode *op)
                        op->type = tok->type;
 
                        if (tok->parse)
-                               return tok->parse(ptr, op);
+                               return tok->parse(ptr, op, s);
 
                        return tok->plen;
                }
        }
 
-       return -1;
+       s->error_pos = s->off;
+       return -4;
 }
 
 struct jp_opcode *
@@ -403,10 +436,17 @@ jp_get_token(struct jp_state *s, const char *input, int *mlen)
 {
        struct jp_opcode op = { 0 };
 
-       *mlen = match_token(input, &op);
+       *mlen = match_token(input, &op, s);
 
-       if (*mlen < 0 || op.type == 0)
+       if (*mlen < 0)
+       {
+               s->error_code = *mlen;
                return NULL;
+       }
+       else if (op.type == 0)
+       {
+               return NULL;
+       }
 
        return jp_alloc_op(s, op.type, op.num, op.str, NULL);
 }
diff --git a/main.c b/main.c
index 3166746fd985e58623265d8fcafd6041c84b29b8..62e38e941177cc88743be5ade4cf9a7e4ce1fdb2 100644 (file)
--- a/main.c
+++ b/main.c
@@ -250,6 +250,57 @@ match_cb(struct json_object *res, void *priv)
        }
 }
 
+static void
+print_error(struct jp_state *state, char *expr)
+{
+       int i;
+       bool first = true;
+
+       fprintf(stderr, "Syntax error: ");
+
+       switch (state->error_code)
+       {
+       case -4:
+               fprintf(stderr, "Unexpected character\n");
+               break;
+       
+       case -3:
+               fprintf(stderr, "String or label literal too long\n");
+               break;
+
+       case -2:
+               fprintf(stderr, "Invalid escape sequence\n");
+               break;
+
+       case -1:
+               fprintf(stderr, "Unterminated string\n");
+               break;
+
+       default:
+               for (i = 0; i < sizeof(state->error_code) * 8; i++)
+               {
+                       if (state->error_code & (1 << i))
+                       {
+                               fprintf(stderr,
+                                       first ? "Expecting %s" : " or %s", tokennames[i]);
+
+                               first = false;
+                       }
+               }
+
+               fprintf(stderr, "\n");
+               break;
+       }
+
+       fprintf(stderr, "In expression %s\n", expr);
+       fprintf(stderr, "Near here ----");
+
+       for (i = 0; i < state->error_pos; i++)
+               fprintf(stderr, "-");
+
+       fprintf(stderr, "^\n");
+}
+
 static bool
 filter_json(int opt, struct json_object *jsobj, char *expr)
 {
@@ -261,12 +312,14 @@ filter_json(int opt, struct json_object *jsobj, char *expr)
 
        state = jp_parse(expr);
 
-       if (!state || state->error)
+       if (!state)
        {
-               fprintf(stderr, "Syntax error near {%s}: %s\n",
-                       state ? expr + state->erroff : expr,
-                       state ? state->error : "Out of memory");
-
+               fprintf(stderr, "Out of memory\n");
+               goto out;
+       }
+       else if (state->error_code)
+       {
+               print_error(state, expr);
                goto out;
        }
 
index 9c06c07c4a90a1eeed05a9a072727576db6e346d..f77779bc72ca8f7a9597c4fdc15f6c2956a6d069 100644 (file)
--- a/parser.y
+++ b/parser.y
 }
 
 %syntax_error {
-       int n = sizeof(tokennames) / sizeof(tokennames[0]);
-       int l = strlen("Expecting ");
-       int c = 0;
        int i;
 
-       for (i = 0; i < n; i++)
-       {
+       for (i = 0; i < sizeof(tokennames) / sizeof(tokennames[0]); i++)
                if (yy_find_shift_action(yypParser, (YYCODETYPE)i) < YYNSTATE + YYNRULE)
-                       l += strlen(tokennames[i]) + 4;
-       }
-
-       s->error = malloc(l);
-
-       if (s->error)
-       {
-               s->erroff = s->off;
-               strcpy(s->error, "Expecting ");
-
-               for (i = 0; i < n; i++)
-               {
-                       if (yy_find_shift_action(yypParser, (YYCODETYPE)i) < YYNSTATE + YYNRULE)
-                       {
-                               if (c++)
-                                       strcat(s->error, " or ");
-
-                               strcat(s->error, tokennames[i]);
-                       }
-               }
-       }
+                       s->error_code |= (1 << i);
+
+       s->error_pos = s->off;
 }