diff options
author | Denys Vlasenko <vda.linux@googlemail.com> | 2021-07-14 14:25:07 +0200 |
---|---|---|
committer | Denys Vlasenko <vda.linux@googlemail.com> | 2021-07-14 16:32:19 +0200 |
commit | d62627487a44d9175b05d49846aeef83fed97019 (patch) | |
tree | cca6e3b0ba26dfbf6dc652ff0d9770572260cf03 | |
parent | e6f4145f2961bfd500214ef1fcf07543ffacb603 (diff) | |
download | busybox-w32-d62627487a44d9175b05d49846aeef83fed97019.tar.gz busybox-w32-d62627487a44d9175b05d49846aeef83fed97019.tar.bz2 busybox-w32-d62627487a44d9175b05d49846aeef83fed97019.zip |
awk: tighten parsing - disallow extra semicolons
'; BEGIN {...}' and 'BEGIN {...} ;; {...}' are not accepted by gawk
function old new delta
parse_program 332 353 +21
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
-rw-r--r-- | editors/awk.c | 40 |
1 files changed, 24 insertions, 16 deletions
diff --git a/editors/awk.c b/editors/awk.c index 7a282356d..2f8a18c8e 100644 --- a/editors/awk.c +++ b/editors/awk.c | |||
@@ -1634,7 +1634,7 @@ static void chain_group(void) | |||
1634 | debug_printf_parse("%s: ST_FOR\n", __func__); | 1634 | debug_printf_parse("%s: ST_FOR\n", __func__); |
1635 | next_token(TC_LPAREN); | 1635 | next_token(TC_LPAREN); |
1636 | n2 = parse_expr(TC_SEMICOL | TC_RPAREN); | 1636 | n2 = parse_expr(TC_SEMICOL | TC_RPAREN); |
1637 | if (t_tclass & TC_RPAREN) { /* for-in */ | 1637 | if (t_tclass & TC_RPAREN) { /* for (I in ARRAY) */ |
1638 | if (!n2 || n2->info != TI_IN) | 1638 | if (!n2 || n2->info != TI_IN) |
1639 | syntax_error(EMSG_UNEXP_TOKEN); | 1639 | syntax_error(EMSG_UNEXP_TOKEN); |
1640 | n = chain_node(OC_WALKINIT | VV); | 1640 | n = chain_node(OC_WALKINIT | VV); |
@@ -1700,20 +1700,15 @@ static void parse_program(char *p) | |||
1700 | for (;;) { | 1700 | for (;;) { |
1701 | uint32_t tclass; | 1701 | uint32_t tclass; |
1702 | 1702 | ||
1703 | tclass = next_token(TC_EOF | TS_OPSEQ | TC_LBRACE | | 1703 | tclass = next_token(TS_OPSEQ | TC_LBRACE | TC_BEGIN | TC_END | TC_FUNCDECL |
1704 | TC_SEMICOL | TC_NEWLINE | TC_BEGIN | TC_END | TC_FUNCDECL); | 1704 | | TC_EOF | TC_NEWLINE /* but not TC_SEMICOL */); |
1705 | 1705 | got_tok: | |
1706 | if (tclass == TC_EOF) { | 1706 | if (tclass == TC_EOF) { |
1707 | debug_printf_parse("%s: TC_EOF\n", __func__); | 1707 | debug_printf_parse("%s: TC_EOF\n", __func__); |
1708 | break; | 1708 | break; |
1709 | } | 1709 | } |
1710 | if (tclass & (TC_SEMICOL | TC_NEWLINE)) { | 1710 | if (tclass == TC_NEWLINE) { |
1711 | debug_printf_parse("%s: TC_SEMICOL | TC_NEWLINE\n", __func__); | 1711 | debug_printf_parse("%s: TC_NEWLINE\n", __func__); |
1712 | //NB: gawk allows many newlines, but does not allow more than one semicolon: | ||
1713 | // BEGIN {...}<newline>;<newline>; | ||
1714 | //would complain "each rule must have a pattern or an action part". | ||
1715 | //Same message for | ||
1716 | // ; BEGIN {...} | ||
1717 | continue; | 1712 | continue; |
1718 | } | 1713 | } |
1719 | if (tclass == TC_BEGIN) { | 1714 | if (tclass == TC_BEGIN) { |
@@ -1722,7 +1717,7 @@ static void parse_program(char *p) | |||
1722 | /* ensure there is no newline between BEGIN and { */ | 1717 | /* ensure there is no newline between BEGIN and { */ |
1723 | next_token(TC_LBRACE); | 1718 | next_token(TC_LBRACE); |
1724 | chain_until_rbrace(); | 1719 | chain_until_rbrace(); |
1725 | continue; | 1720 | goto next_tok; |
1726 | } | 1721 | } |
1727 | if (tclass == TC_END) { | 1722 | if (tclass == TC_END) { |
1728 | debug_printf_parse("%s: TC_END\n", __func__); | 1723 | debug_printf_parse("%s: TC_END\n", __func__); |
@@ -1730,7 +1725,7 @@ static void parse_program(char *p) | |||
1730 | /* ensure there is no newline between END and { */ | 1725 | /* ensure there is no newline between END and { */ |
1731 | next_token(TC_LBRACE); | 1726 | next_token(TC_LBRACE); |
1732 | chain_until_rbrace(); | 1727 | chain_until_rbrace(); |
1733 | continue; | 1728 | goto next_tok; |
1734 | } | 1729 | } |
1735 | if (tclass == TC_FUNCDECL) { | 1730 | if (tclass == TC_FUNCDECL) { |
1736 | func *f; | 1731 | func *f; |
@@ -1765,7 +1760,7 @@ static void parse_program(char *p) | |||
1765 | continue; | 1760 | continue; |
1766 | chain_until_rbrace(); | 1761 | chain_until_rbrace(); |
1767 | hash_clear(ahash); | 1762 | hash_clear(ahash); |
1768 | continue; | 1763 | goto next_tok; |
1769 | } | 1764 | } |
1770 | seq = &mainseq; | 1765 | seq = &mainseq; |
1771 | if (tclass & TS_OPSEQ) { | 1766 | if (tclass & TS_OPSEQ) { |
@@ -1784,12 +1779,25 @@ static void parse_program(char *p) | |||
1784 | chain_node(OC_PRINT); | 1779 | chain_node(OC_PRINT); |
1785 | } | 1780 | } |
1786 | cn->r.n = mainseq.last; | 1781 | cn->r.n = mainseq.last; |
1787 | continue; | 1782 | goto next_tok; |
1788 | } | 1783 | } |
1789 | /* tclass == TC_LBRACE */ | 1784 | /* tclass == TC_LBRACE */ |
1790 | debug_printf_parse("%s: TC_LBRACE(?)\n", __func__); | 1785 | debug_printf_parse("%s: TC_LBRACE(?)\n", __func__); |
1791 | chain_until_rbrace(); | 1786 | chain_until_rbrace(); |
1792 | } | 1787 | next_tok: |
1788 | /* Same as next_token() at the top of the loop, + TC_SEMICOL */ | ||
1789 | tclass = next_token(TS_OPSEQ | TC_LBRACE | TC_BEGIN | TC_END | TC_FUNCDECL | ||
1790 | | TC_EOF | TC_NEWLINE | TC_SEMICOL); | ||
1791 | /* gawk allows many newlines, but does not allow more than one semicolon: | ||
1792 | * BEGIN {...}<newline>;<newline>; | ||
1793 | * would complain "each rule must have a pattern or an action part". | ||
1794 | * Same message for | ||
1795 | * ; BEGIN {...} | ||
1796 | */ | ||
1797 | if (tclass != TC_SEMICOL) | ||
1798 | goto got_tok; /* use this token */ | ||
1799 | /* else: loop back - ate the semicolon, get and use _next_ token */ | ||
1800 | } /* for (;;) */ | ||
1793 | } | 1801 | } |
1794 | 1802 | ||
1795 | 1803 | ||