awk: tighten parsing - disallow extra semicolons

'; BEGIN {...}' and 'BEGIN {...} ;; {...}' are not accepted by gawk function old new delta parse_program 332 353 +21 Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
author: Denys Vlasenko <vda.linux@googlemail.com> 2021-07-14 14:25:07 +0200
committer: Denys Vlasenko <vda.linux@googlemail.com> 2021-07-14 16:32:19 +0200
commit: d62627487a44d9175b05d49846aeef83fed97019 (patch)
tree: cca6e3b0ba26dfbf6dc652ff0d9770572260cf03
parent: e6f4145f2961bfd500214ef1fcf07543ffacb603 (diff)
download: busybox-w32-d62627487a44d9175b05d49846aeef83fed97019.tar.gz
busybox-w32-d62627487a44d9175b05d49846aeef83fed97019.tar.bz2
busybox-w32-d62627487a44d9175b05d49846aeef83fed97019.zip
1 files changed, 24 insertions, 16 deletions
diff --git a/editors/awk.c b/editors/awk.c
index 7a282356d..2f8a18c8e 100644
--- a/editors/awk.c
+++ b/editors/awk.c
@@ -1634,7 +1634,7 @@ static void chain_group(void)
                debug_printf_parse("%s: ST_FOR\n", __func__);
                next_token(TC_LPAREN);
                n2 = parse_expr(TC_SEMICOL | TC_RPAREN);
-                if (t_tclass & TC_RPAREN) {     /* for-in */
+                if (t_tclass & TC_RPAREN) {     /* for (I in ARRAY) */
                        if (!n2 || n2->info != TI_IN)
                                syntax_error(EMSG_UNEXP_TOKEN);
                        n = chain_node(OC_WALKINIT | VV);
@@ -1700,20 +1700,15 @@ static void parse_program(char *p)
        for (;;) {
                uint32_t tclass;
-                tclass = next_token(TC_EOF | TS_OPSEQ | TC_LBRACE |
+                tclass = next_token(TS_OPSEQ | TC_LBRACE | TC_BEGIN | TC_END | TC_FUNCDECL
-                        TC_SEMICOL | TC_NEWLINE | TC_BEGIN | TC_END | TC_FUNCDECL);
+                        | TC_EOF | TC_NEWLINE /* but not TC_SEMICOL */);
+ got_tok:
                if (tclass == TC_EOF) {
                        debug_printf_parse("%s: TC_EOF\n", __func__);
                        break;
                }
-                if (tclass & (TC_SEMICOL | TC_NEWLINE)) {
+                if (tclass == TC_NEWLINE) {
-                        debug_printf_parse("%s: TC_SEMICOL | TC_NEWLINE\n", __func__);
+                        debug_printf_parse("%s: TC_NEWLINE\n", __func__);
-//NB: gawk allows many newlines, but does not allow more than one semicolon:
-//  BEGIN {...}<newline>;<newline>;
-//would complain "each rule must have a pattern or an action part".
-//Same message for
-//  ; BEGIN {...}
                        continue;
                }
                if (tclass == TC_BEGIN) {
@@ -1722,7 +1717,7 @@ static void parse_program(char *p)
                        /* ensure there is no newline between BEGIN and { */
                        next_token(TC_LBRACE);
                        chain_until_rbrace();
-                        continue;
+                        goto next_tok;
                }
                if (tclass == TC_END) {
                        debug_printf_parse("%s: TC_END\n", __func__);
@@ -1730,7 +1725,7 @@ static void parse_program(char *p)
                        /* ensure there is no newline between END and { */
                        next_token(TC_LBRACE);
                        chain_until_rbrace();
-                        continue;
+                        goto next_tok;
                }
                if (tclass == TC_FUNCDECL) {
                        func *f;
@@ -1765,7 +1760,7 @@ static void parse_program(char *p)
                                continue;
                        chain_until_rbrace();
                        hash_clear(ahash);
-                        continue;
+                        goto next_tok;
                }
                seq = &mainseq;
                if (tclass & TS_OPSEQ) {
@@ -1784,12 +1779,25 @@ static void parse_program(char *p)
                                chain_node(OC_PRINT);
                        }
                        cn->r.n = mainseq.last;
-                        continue;
+                        goto next_tok;
                }
                /* tclass == TC_LBRACE */
                debug_printf_parse("%s: TC_LBRACE(?)\n", __func__);
                chain_until_rbrace();
-        }
+ next_tok:
+                /* Same as next_token() at the top of the loop, + TC_SEMICOL */
+                tclass = next_token(TS_OPSEQ | TC_LBRACE | TC_BEGIN | TC_END | TC_FUNCDECL
+                        | TC_EOF | TC_NEWLINE | TC_SEMICOL);
+                /* gawk allows many newlines, but does not allow more than one semicolon:
+                 *  BEGIN {...}<newline>;<newline>;
+                 * would complain "each rule must have a pattern or an action part".
+                 * Same message for
+                 *  ; BEGIN {...}
+                 */
+                if (tclass != TC_SEMICOL)
+                        goto got_tok; /* use this token */
+                /* else: loop back - ate the semicolon, get and use _next_ token */
+        } /* for (;;) */
 }
author	Denys Vlasenko <vda.linux@googlemail.com>	2021-07-14 14:25:07 +0200
committer	Denys Vlasenko <vda.linux@googlemail.com>	2021-07-14 16:32:19 +0200
commit	d62627487a44d9175b05d49846aeef83fed97019 (patch)
tree	cca6e3b0ba26dfbf6dc652ff0d9770572260cf03
parent	e6f4145f2961bfd500214ef1fcf07543ffacb603 (diff)
download	busybox-w32-d62627487a44d9175b05d49846aeef83fed97019.tar.gz busybox-w32-d62627487a44d9175b05d49846aeef83fed97019.tar.bz2 busybox-w32-d62627487a44d9175b05d49846aeef83fed97019.zip

diff --git a/editors/awk.c b/editors/awk.c index 7a282356d..2f8a18c8e 100644 --- a/editors/awk.c +++ b/editors/awk.c
@@ -1634,7 +1634,7 @@ static void chain_group(void)
1634	debug_printf_parse("%s: ST_FOR\n", __func__);	1634	debug_printf_parse("%s: ST_FOR\n", __func__);
1635	next_token(TC_LPAREN);	1635	next_token(TC_LPAREN);
1636	n2 = parse_expr(TC_SEMICOL \| TC_RPAREN);	1636	n2 = parse_expr(TC_SEMICOL \| TC_RPAREN);
1637	if (t_tclass & TC_RPAREN) { /* for-in */	1637	if (t_tclass & TC_RPAREN) { /* for (I in ARRAY) */
1638	if (!n2 \|\| n2->info != TI_IN)	1638	if (!n2 \|\| n2->info != TI_IN)
1639	syntax_error(EMSG_UNEXP_TOKEN);	1639	syntax_error(EMSG_UNEXP_TOKEN);
1640	n = chain_node(OC_WALKINIT \| VV);	1640	n = chain_node(OC_WALKINIT \| VV);
@@ -1700,20 +1700,15 @@ static void parse_program(char *p)
1700	for (;;) {	1700	for (;;) {
1701	uint32_t tclass;	1701	uint32_t tclass;
1702		1702
1703	tclass = next_token(TC_EOF \| TS_OPSEQ \| TC_LBRACE \|	1703	tclass = next_token(TS_OPSEQ \| TC_LBRACE \| TC_BEGIN \| TC_END \| TC_FUNCDECL
1704	TC_SEMICOL \| TC_NEWLINE \| TC_BEGIN \| TC_END \| TC_FUNCDECL);	1704	\| TC_EOF \| TC_NEWLINE /* but not TC_SEMICOL */);
1705		1705	got_tok:
1706	if (tclass == TC_EOF) {	1706	if (tclass == TC_EOF) {
1707	debug_printf_parse("%s: TC_EOF\n", __func__);	1707	debug_printf_parse("%s: TC_EOF\n", __func__);
1708	break;	1708	break;
1709	}	1709	}
1710	if (tclass & (TC_SEMICOL \| TC_NEWLINE)) {	1710	if (tclass == TC_NEWLINE) {
1711	debug_printf_parse("%s: TC_SEMICOL \| TC_NEWLINE\n", __func__);	1711	debug_printf_parse("%s: TC_NEWLINE\n", __func__);
1712	//NB: gawk allows many newlines, but does not allow more than one semicolon:
1713	// BEGIN {...}<newline>;<newline>;
1714	//would complain "each rule must have a pattern or an action part".
1715	//Same message for
1716	// ; BEGIN {...}
1717	continue;	1712	continue;
1718	}	1713	}
1719	if (tclass == TC_BEGIN) {	1714	if (tclass == TC_BEGIN) {
@@ -1722,7 +1717,7 @@ static void parse_program(char *p)
1722	/* ensure there is no newline between BEGIN and { */	1717	/* ensure there is no newline between BEGIN and { */
1723	next_token(TC_LBRACE);	1718	next_token(TC_LBRACE);
1724	chain_until_rbrace();	1719	chain_until_rbrace();
1725	continue;	1720	goto next_tok;
1726	}	1721	}
1727	if (tclass == TC_END) {	1722	if (tclass == TC_END) {
1728	debug_printf_parse("%s: TC_END\n", __func__);	1723	debug_printf_parse("%s: TC_END\n", __func__);
@@ -1730,7 +1725,7 @@ static void parse_program(char *p)
1730	/* ensure there is no newline between END and { */	1725	/* ensure there is no newline between END and { */
1731	next_token(TC_LBRACE);	1726	next_token(TC_LBRACE);
1732	chain_until_rbrace();	1727	chain_until_rbrace();
1733	continue;	1728	goto next_tok;
1734	}	1729	}
1735	if (tclass == TC_FUNCDECL) {	1730	if (tclass == TC_FUNCDECL) {
1736	func *f;	1731	func *f;
@@ -1765,7 +1760,7 @@ static void parse_program(char *p)
1765	continue;	1760	continue;
1766	chain_until_rbrace();	1761	chain_until_rbrace();
1767	hash_clear(ahash);	1762	hash_clear(ahash);
1768	continue;	1763	goto next_tok;
1769	}	1764	}
1770	seq = &mainseq;	1765	seq = &mainseq;
1771	if (tclass & TS_OPSEQ) {	1766	if (tclass & TS_OPSEQ) {
@@ -1784,12 +1779,25 @@ static void parse_program(char *p)
1784	chain_node(OC_PRINT);	1779	chain_node(OC_PRINT);
1785	}	1780	}
1786	cn->r.n = mainseq.last;	1781	cn->r.n = mainseq.last;
1787	continue;	1782	goto next_tok;
1788	}	1783	}
1789	/* tclass == TC_LBRACE */	1784	/* tclass == TC_LBRACE */
1790	debug_printf_parse("%s: TC_LBRACE(?)\n", __func__);	1785	debug_printf_parse("%s: TC_LBRACE(?)\n", __func__);
1791	chain_until_rbrace();	1786	chain_until_rbrace();
1792	}	1787	next_tok:
		1788	/* Same as next_token() at the top of the loop, + TC_SEMICOL */
		1789	tclass = next_token(TS_OPSEQ \| TC_LBRACE \| TC_BEGIN \| TC_END \| TC_FUNCDECL
		1790	\| TC_EOF \| TC_NEWLINE \| TC_SEMICOL);
		1791	/* gawk allows many newlines, but does not allow more than one semicolon:
		1792	* BEGIN {...}<newline>;<newline>;
		1793	* would complain "each rule must have a pattern or an action part".
		1794	* Same message for
		1795	* ; BEGIN {...}
		1796	*/
		1797	if (tclass != TC_SEMICOL)
		1798	goto got_tok; /* use this token */
		1799	/* else: loop back - ate the semicolon, get and use _next_ token */
		1800	} /* for (;;) */
1793	}	1801	}
1794		1802
1795		1803