bc: fix handling of comment/string interactions while buffering input
authorDenys Vlasenko <vda.linux@googlemail.com>
Wed, 26 Dec 2018 11:23:05 +0000 (12:23 +0100)
committerDenys Vlasenko <vda.linux@googlemail.com>
Wed, 26 Dec 2018 11:23:05 +0000 (12:23 +0100)
function                                             old     new   delta
zbc_lex_next                                        1965    1982     +17
zbc_num_divmod                                       150     156      +6
bc_read_line                                         411     394     -17
------------------------------------------------------------------------------
(add/remove: 0/0 grow/shrink: 2/1 up/down: 23/-17)              Total: 6 bytes

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
miscutils/bc.c

index 9d04ddea3d6b0402e36822987f9c1a7e3c26a2a5..f4e499f13438e6fb74f2fabd6975e950c1ae8e3d 100644 (file)
@@ -1147,23 +1147,6 @@ static void bc_vec_string(BcVec *v, size_t len, const char *str)
        bc_vec_pushZeroByte(v);
 }
 
-#if ENABLE_FEATURE_BC_SIGNALS && ENABLE_FEATURE_EDITING
-static void bc_vec_concat(BcVec *v, const char *str)
-{
-       size_t len, slen;
-
-       if (v->len == 0) bc_vec_pushZeroByte(v);
-
-       slen = strlen(str);
-       len = v->len + slen;
-
-       if (v->cap < len) bc_vec_grow(v, slen);
-       strcpy(v->v + v->len - 1, str);
-
-       v->len = len;
-}
-#endif
-
 static void *bc_vec_item(const BcVec *v, size_t idx)
 {
        return v->v + v->size * idx;
@@ -2491,6 +2474,21 @@ static FAST_FUNC void bc_result_free(void *result)
        }
 }
 
+#if ENABLE_FEATURE_BC_SIGNALS && ENABLE_FEATURE_EDITING
+static void bc_vec_concat(BcVec *v, const char *str)
+{
+       size_t len, slen;
+
+       slen = strlen(str);
+       len = v->len + slen + 1;
+
+       if (v->cap < len) bc_vec_grow(v, slen);
+       strcpy(v->v + v->len, str);
+
+       v->len = len;
+}
+#endif
+
 static int bad_input_byte(char c)
 {
        if ((c < ' ' && c != '\t' && c != '\r' && c != '\n') // also allow '\v' '\f'?
@@ -2887,7 +2885,7 @@ static void bc_lex_file(void)
 static bool bc_lex_more_input(void)
 {
        BcParse *p = &G.prs;
-       size_t str;
+       unsigned str; // bool for bc, string nest count for dc
        bool comment;
 
        bc_vec_pop_all(&G.input_buffer);
@@ -2896,8 +2894,23 @@ static bool bc_lex_more_input(void)
        // with a backslash to the parser. The reason for that is because the parser
        // treats a backslash+newline combo as whitespace, per the bc spec. In that
        // case, and for strings and comments, the parser will expect more stuff.
-       comment = false;
+       //
+       // bc cases to test interactively:
+       // 1 #comment\  - prints "1<newline>" at once (comment is not continued)
+       // 1 #comment/* - prints "1<newline>" at once
+       // 1 #comment"  - prints "1<newline>" at once
+       // 1\#comment   - error at once (\ is not a line continuation)
+       // 1 + /*"*/2   - prints "3<newline>" at once
+       // 1 + /*#*/2   - prints "3<newline>" at once
+       // "str\"       - prints "str\" at once
+       // "str#"       - prints "str#" at once
+       // "str/*"      - prints "str/*" at once
+       // "str#\       - waits for second line
+       // end"         - ...prints "str#\<newline>end"
+//This is way too complex, we duplicate comment/string logic of lexer
+//TODO: switch to char-by-char input like hush does it
        str = 0;
+       comment = false; // stays always false for dc
        for (;;) {
                size_t prevlen = G.input_buffer.len;
                char *string;
@@ -2909,39 +2922,61 @@ static bool bc_lex_more_input(void)
 
                string = G.input_buffer.v + prevlen;
                while (*string) {
-                       char c = *string;
-                       if (!comment) {
-                               if (string == G.input_buffer.v || string[-1] != '\\') {
-                                       if (IS_BC)
-                                               str ^= (c == '"');
-                                       else {
-                                               if (c == ']')
-                                                       str -= 1;
-                                               else if (c == '[')
-                                                       str += 1;
-                                       }
+                       char c = *string++;
+#if ENABLE_BC
+                       if (comment) {
+                               // We are in /**/ comment, exit only on "*/"
+                               if (c == '*' && *string == '/') {
+                                       comment = false;
+                                       string++;
                                }
+                               continue;
                        }
-                       string++;
-                       if (!str) {
+#endif
+                       // We are not in /**/ comment
+                       if (str) {
+                               // We are in "string" (bc) or [string] (dc)
+                               if (IS_BC) {
+                                       // bc strings have no escapes: \\ is not special,
+                                       // \n is not, \" is not, \<newline> is not.
+                                       str = (c != '"'); // clear flag when " is seen
+                               } else {
+                                       // dc strings have no escapes as well, can nest
+                                       if (c == ']')
+                                               str--;
+                                       if (c == '[')
+                                               str++;
+                               }
+                               continue;
+                       }
+                       // We are not in a string or /**/ comment
+
+                       // Is it a #comment? Return the string (can't have continuation)
+                       if (c == '#')
+                               goto return_string;
+#if ENABLE_BC
+                       if (IS_BC) {
+                               // bc: is it a start of /**/ comment or string?
                                if (c == '/' && *string == '*') {
                                        comment = true;
                                        string++;
                                        continue;
                                }
-                               if (c == '*' && *string == '/') {
-                                       comment = false;
-                                       string++;
-                               }
+                               str = (c == '"'); // set flag if " is seen
+                               continue;
                        }
-               }
+#endif
+                       // dc: is it a start of string?
+                       str = (c == '[');
+               } // end of "check all chars in string" loop
+
                if (str != 0 || comment) {
                        G.input_buffer.len--; // backstep over the trailing NUL byte
                        continue;
                }
 
                // Check for backslash+newline.
-               // we do not check that last char is '\n' -
+               // We do not check that last char is '\n' -
                // if it is not, then it's EOF, and looping back
                // to bc_read_line() will detect it:
                string -= 2;
@@ -2952,6 +2987,7 @@ static bool bc_lex_more_input(void)
 
                break;
        }
+ return_string:
 
        p->lex_inbuf = G.input_buffer.v;
 //     bb_error_msg("G.input_buffer.len:%d '%s'", G.input_buffer.len, G.input_buffer.v);