aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDenys Vlasenko <vda.linux@googlemail.com>2021-07-14 14:25:07 +0200
committerDenys Vlasenko <vda.linux@googlemail.com>2021-07-14 16:32:19 +0200
commitd62627487a44d9175b05d49846aeef83fed97019 (patch)
treecca6e3b0ba26dfbf6dc652ff0d9770572260cf03
parente6f4145f2961bfd500214ef1fcf07543ffacb603 (diff)
downloadbusybox-w32-d62627487a44d9175b05d49846aeef83fed97019.tar.gz
busybox-w32-d62627487a44d9175b05d49846aeef83fed97019.tar.bz2
busybox-w32-d62627487a44d9175b05d49846aeef83fed97019.zip
awk: tighten parsing - disallow extra semicolons
'; BEGIN {...}' and 'BEGIN {...} ;; {...}' are not accepted by gawk function old new delta parse_program 332 353 +21 Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
-rw-r--r--editors/awk.c40
1 files changed, 24 insertions, 16 deletions
diff --git a/editors/awk.c b/editors/awk.c
index 7a282356d..2f8a18c8e 100644
--- a/editors/awk.c
+++ b/editors/awk.c
@@ -1634,7 +1634,7 @@ static void chain_group(void)
1634 debug_printf_parse("%s: ST_FOR\n", __func__); 1634 debug_printf_parse("%s: ST_FOR\n", __func__);
1635 next_token(TC_LPAREN); 1635 next_token(TC_LPAREN);
1636 n2 = parse_expr(TC_SEMICOL | TC_RPAREN); 1636 n2 = parse_expr(TC_SEMICOL | TC_RPAREN);
1637 if (t_tclass & TC_RPAREN) { /* for-in */ 1637 if (t_tclass & TC_RPAREN) { /* for (I in ARRAY) */
1638 if (!n2 || n2->info != TI_IN) 1638 if (!n2 || n2->info != TI_IN)
1639 syntax_error(EMSG_UNEXP_TOKEN); 1639 syntax_error(EMSG_UNEXP_TOKEN);
1640 n = chain_node(OC_WALKINIT | VV); 1640 n = chain_node(OC_WALKINIT | VV);
@@ -1700,20 +1700,15 @@ static void parse_program(char *p)
1700 for (;;) { 1700 for (;;) {
1701 uint32_t tclass; 1701 uint32_t tclass;
1702 1702
1703 tclass = next_token(TC_EOF | TS_OPSEQ | TC_LBRACE | 1703 tclass = next_token(TS_OPSEQ | TC_LBRACE | TC_BEGIN | TC_END | TC_FUNCDECL
1704 TC_SEMICOL | TC_NEWLINE | TC_BEGIN | TC_END | TC_FUNCDECL); 1704 | TC_EOF | TC_NEWLINE /* but not TC_SEMICOL */);
1705 1705 got_tok:
1706 if (tclass == TC_EOF) { 1706 if (tclass == TC_EOF) {
1707 debug_printf_parse("%s: TC_EOF\n", __func__); 1707 debug_printf_parse("%s: TC_EOF\n", __func__);
1708 break; 1708 break;
1709 } 1709 }
1710 if (tclass & (TC_SEMICOL | TC_NEWLINE)) { 1710 if (tclass == TC_NEWLINE) {
1711 debug_printf_parse("%s: TC_SEMICOL | TC_NEWLINE\n", __func__); 1711 debug_printf_parse("%s: TC_NEWLINE\n", __func__);
1712//NB: gawk allows many newlines, but does not allow more than one semicolon:
1713// BEGIN {...}<newline>;<newline>;
1714//would complain "each rule must have a pattern or an action part".
1715//Same message for
1716// ; BEGIN {...}
1717 continue; 1712 continue;
1718 } 1713 }
1719 if (tclass == TC_BEGIN) { 1714 if (tclass == TC_BEGIN) {
@@ -1722,7 +1717,7 @@ static void parse_program(char *p)
1722 /* ensure there is no newline between BEGIN and { */ 1717 /* ensure there is no newline between BEGIN and { */
1723 next_token(TC_LBRACE); 1718 next_token(TC_LBRACE);
1724 chain_until_rbrace(); 1719 chain_until_rbrace();
1725 continue; 1720 goto next_tok;
1726 } 1721 }
1727 if (tclass == TC_END) { 1722 if (tclass == TC_END) {
1728 debug_printf_parse("%s: TC_END\n", __func__); 1723 debug_printf_parse("%s: TC_END\n", __func__);
@@ -1730,7 +1725,7 @@ static void parse_program(char *p)
1730 /* ensure there is no newline between END and { */ 1725 /* ensure there is no newline between END and { */
1731 next_token(TC_LBRACE); 1726 next_token(TC_LBRACE);
1732 chain_until_rbrace(); 1727 chain_until_rbrace();
1733 continue; 1728 goto next_tok;
1734 } 1729 }
1735 if (tclass == TC_FUNCDECL) { 1730 if (tclass == TC_FUNCDECL) {
1736 func *f; 1731 func *f;
@@ -1765,7 +1760,7 @@ static void parse_program(char *p)
1765 continue; 1760 continue;
1766 chain_until_rbrace(); 1761 chain_until_rbrace();
1767 hash_clear(ahash); 1762 hash_clear(ahash);
1768 continue; 1763 goto next_tok;
1769 } 1764 }
1770 seq = &mainseq; 1765 seq = &mainseq;
1771 if (tclass & TS_OPSEQ) { 1766 if (tclass & TS_OPSEQ) {
@@ -1784,12 +1779,25 @@ static void parse_program(char *p)
1784 chain_node(OC_PRINT); 1779 chain_node(OC_PRINT);
1785 } 1780 }
1786 cn->r.n = mainseq.last; 1781 cn->r.n = mainseq.last;
1787 continue; 1782 goto next_tok;
1788 } 1783 }
1789 /* tclass == TC_LBRACE */ 1784 /* tclass == TC_LBRACE */
1790 debug_printf_parse("%s: TC_LBRACE(?)\n", __func__); 1785 debug_printf_parse("%s: TC_LBRACE(?)\n", __func__);
1791 chain_until_rbrace(); 1786 chain_until_rbrace();
1792 } 1787 next_tok:
1788 /* Same as next_token() at the top of the loop, + TC_SEMICOL */
1789 tclass = next_token(TS_OPSEQ | TC_LBRACE | TC_BEGIN | TC_END | TC_FUNCDECL
1790 | TC_EOF | TC_NEWLINE | TC_SEMICOL);
1791 /* gawk allows many newlines, but does not allow more than one semicolon:
1792 * BEGIN {...}<newline>;<newline>;
1793 * would complain "each rule must have a pattern or an action part".
1794 * Same message for
1795 * ; BEGIN {...}
1796 */
1797 if (tclass != TC_SEMICOL)
1798 goto got_tok; /* use this token */
1799 /* else: loop back - ate the semicolon, get and use _next_ token */
1800 } /* for (;;) */
1793} 1801}
1794 1802
1795 1803