bc: fix handling of comment/string interactions while buffering input

function old new delta zbc_lex_next 1965 1982 +17 zbc_num_divmod 150 156 +6 bc_read_line 411 394 -17 ------------------------------------------------------------------------------ (add/remove: 0/0 grow/shrink: 2/1 up/down: 23/-17) Total: 6 bytes Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
author: Denys Vlasenko <vda.linux@googlemail.com> 2018-12-26 12:23:05 +0100
committer: Denys Vlasenko <vda.linux@googlemail.com> 2018-12-26 12:23:05 +0100
commit: 63ad7993845fbf59264e22295ae9c06250832119 (patch)
tree: 8907aa8c1819109955edf4b449a5045fd1f6441e
parent: 94576d2b972b3bd136fbe8057c95690ae36ea8c9 (diff)
download: busybox-w32-63ad7993845fbf59264e22295ae9c06250832119.tar.gz
busybox-w32-63ad7993845fbf59264e22295ae9c06250832119.tar.bz2
busybox-w32-63ad7993845fbf59264e22295ae9c06250832119.zip
1 files changed, 74 insertions, 38 deletions
diff --git a/miscutils/bc.c b/miscutils/bc.c
index 9d04ddea3..f4e499f13 100644
--- a/miscutils/bc.c
+++ b/miscutils/bc.c
@@ -1147,23 +1147,6 @@ static void bc_vec_string(BcVec *v, size_t len, const char *str)
        bc_vec_pushZeroByte(v);
 }
-#if ENABLE_FEATURE_BC_SIGNALS && ENABLE_FEATURE_EDITING
-static void bc_vec_concat(BcVec *v, const char *str)
-{
-        size_t len, slen;
-        if (v->len == 0) bc_vec_pushZeroByte(v);
-        slen = strlen(str);
-        len = v->len + slen;
-        if (v->cap < len) bc_vec_grow(v, slen);
-        strcpy(v->v + v->len - 1, str);
-        v->len = len;
-}
-#endif
 static void *bc_vec_item(const BcVec *v, size_t idx)
 {
        return v->v + v->size * idx;
@@ -2491,6 +2474,21 @@ static FAST_FUNC void bc_result_free(void *result)
        }
 }
+#if ENABLE_FEATURE_BC_SIGNALS && ENABLE_FEATURE_EDITING
+static void bc_vec_concat(BcVec *v, const char *str)
+{
+        size_t len, slen;
+        slen = strlen(str);
+        len = v->len + slen + 1;
+        if (v->cap < len) bc_vec_grow(v, slen);
+        strcpy(v->v + v->len, str);
+        v->len = len;
+}
+#endif
 static int bad_input_byte(char c)
 {
        if ((c < ' ' && c != '\t' && c != '\r' && c != '\n') // also allow '\v' '\f'?
@@ -2887,7 +2885,7 @@ static void bc_lex_file(void)
 static bool bc_lex_more_input(void)
 {
        BcParse *p = &G.prs;
-        size_t str;
+        unsigned str; // bool for bc, string nest count for dc
        bool comment;
        bc_vec_pop_all(&G.input_buffer);
@@ -2896,8 +2894,23 @@ static bool bc_lex_more_input(void)
        // with a backslash to the parser. The reason for that is because the parser
        // treats a backslash+newline combo as whitespace, per the bc spec. In that
        // case, and for strings and comments, the parser will expect more stuff.
-        comment = false;
+        //
+        // bc cases to test interactively:
+        // 1 #comment\  - prints "1<newline>" at once (comment is not continued)
+        // 1 #comment/* - prints "1<newline>" at once
+        // 1 #comment"  - prints "1<newline>" at once
+        // 1\#comment   - error at once (\ is not a line continuation)
+        // 1 + /*"*/2   - prints "3<newline>" at once
+        // 1 + /*#*/2   - prints "3<newline>" at once
+        // "str\"       - prints "str\" at once
+        // "str#"       - prints "str#" at once
+        // "str/*"      - prints "str/*" at once
+        // "str#\       - waits for second line
+        // end"         - ...prints "str#\<newline>end"
+//This is way too complex, we duplicate comment/string logic of lexer
+//TODO: switch to char-by-char input like hush does it
        str = 0;
+        comment = false; // stays always false for dc
        for (;;) {
                size_t prevlen = G.input_buffer.len;
                char *string;
@@ -2909,39 +2922,61 @@ static bool bc_lex_more_input(void)
                string = G.input_buffer.v + prevlen;
                while (*string) {
-                        char c = *string;
+                        char c = *string++;
-                        if (!comment) {
+#if ENABLE_BC
-                                if (string == G.input_buffer.v || string[-1] != '\\') {
+                        if (comment) {
-                                        if (IS_BC)
+                                // We are in /**/ comment, exit only on "*/"
-                                                str ^= (c == '"');
+                                if (c == '*' && *string == '/') {
-                                        else {
+                                        comment = false;
-                                                if (c == ']')
+                                        string++;
-                                                        str -= 1;
-                                                else if (c == '[')
-                                                        str += 1;
-                                        }
                                }
+                                continue;
                        }
-                        string++;
+#endif
-                        if (!str) {
+                        // We are not in /**/ comment
+                        if (str) {
+                                // We are in "string" (bc) or [string] (dc)
+                                if (IS_BC) {
+                                        // bc strings have no escapes: \\ is not special,
+                                        // \n is not, \" is not, \<newline> is not.
+                                        str = (c != '"'); // clear flag when " is seen
+                                } else {
+                                        // dc strings have no escapes as well, can nest
+                                        if (c == ']')
+                                                str--;
+                                        if (c == '[')
+                                                str++;
+                                }
+                                continue;
+                        }
+                        // We are not in a string or /**/ comment
+                        // Is it a #comment? Return the string (can't have continuation)
+                        if (c == '#')
+                                goto return_string;
+#if ENABLE_BC
+                        if (IS_BC) {
+                                // bc: is it a start of /**/ comment or string?
                                if (c == '/' && *string == '*') {
                                        comment = true;
                                        string++;
                                        continue;
                                }
-                                if (c == '*' && *string == '/') {
+                                str = (c == '"'); // set flag if " is seen
-                                        comment = false;
+                                continue;
-                                        string++;
-                                }
                        }
-                }
+#endif
+                        // dc: is it a start of string?
+                        str = (c == '[');
+                } // end of "check all chars in string" loop
                if (str != 0 || comment) {
                        G.input_buffer.len--; // backstep over the trailing NUL byte
                        continue;
                }
                // Check for backslash+newline.
-                // we do not check that last char is '\n' -
+                // We do not check that last char is '\n' -
                // if it is not, then it's EOF, and looping back
                // to bc_read_line() will detect it:
                string -= 2;
@@ -2952,6 +2987,7 @@ static bool bc_lex_more_input(void)
                break;
        }
+ return_string:
        p->lex_inbuf = G.input_buffer.v;
 //      bb_error_msg("G.input_buffer.len:%d '%s'", G.input_buffer.len, G.input_buffer.v);
author	Denys Vlasenko <vda.linux@googlemail.com>	2018-12-26 12:23:05 +0100
committer	Denys Vlasenko <vda.linux@googlemail.com>	2018-12-26 12:23:05 +0100
commit	63ad7993845fbf59264e22295ae9c06250832119 (patch)
tree	8907aa8c1819109955edf4b449a5045fd1f6441e
parent	94576d2b972b3bd136fbe8057c95690ae36ea8c9 (diff)
download	busybox-w32-63ad7993845fbf59264e22295ae9c06250832119.tar.gz busybox-w32-63ad7993845fbf59264e22295ae9c06250832119.tar.bz2 busybox-w32-63ad7993845fbf59264e22295ae9c06250832119.zip