diff options
author | Denys Vlasenko <vda.linux@googlemail.com> | 2021-07-03 01:16:48 +0200 |
---|---|---|
committer | Denys Vlasenko <vda.linux@googlemail.com> | 2021-07-03 01:16:48 +0200 |
commit | 2b65e73db3254a7228802886546152c72217017d (patch) | |
tree | a1a8d3cd8e5daee15a15e4fce42a328df50e0c35 | |
parent | 717200eb43c9420773c0f8b751aadabba3052027 (diff) | |
download | busybox-w32-2b65e73db3254a7228802886546152c72217017d.tar.gz busybox-w32-2b65e73db3254a7228802886546152c72217017d.tar.bz2 busybox-w32-2b65e73db3254a7228802886546152c72217017d.zip |
awk: tighten rules in action parsing
Disallow:
BEGIN
{ action } - must start on the same line
Disallow:
func f()
print "hello" - must be in {...}
function old new delta
chain_until_rbrace - 41 +41
parse_program 307 336 +29
chain_group 649 616 -33
------------------------------------------------------------------------------
(add/remove: 1/0 grow/shrink: 1/1 up/down: 70/-33) Total: 37 bytes
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
-rw-r--r-- | editors/awk.c | 108 |
1 files changed, 66 insertions, 42 deletions
diff --git a/editors/awk.c b/editors/awk.c index a1a2afd1d..c68416873 100644 --- a/editors/awk.c +++ b/editors/awk.c | |||
@@ -1549,29 +1549,35 @@ static node *chain_loop(node *nn) | |||
1549 | return n; | 1549 | return n; |
1550 | } | 1550 | } |
1551 | 1551 | ||
1552 | static void chain_until_rbrace(void) | ||
1553 | { | ||
1554 | uint32_t tc; | ||
1555 | while ((tc = next_token(TS_GRPSEQ | TC_RBRACE)) != TC_RBRACE) { | ||
1556 | debug_printf_parse("%s: !TC_RBRACE\n", __func__); | ||
1557 | if (tc == TC_NEWLINE) | ||
1558 | continue; | ||
1559 | rollback_token(); | ||
1560 | chain_group(); | ||
1561 | } | ||
1562 | debug_printf_parse("%s: TC_RBRACE\n", __func__); | ||
1563 | } | ||
1564 | |||
1552 | /* parse group and attach it to chain */ | 1565 | /* parse group and attach it to chain */ |
1553 | static void chain_group(void) | 1566 | static void chain_group(void) |
1554 | { | 1567 | { |
1555 | uint32_t c; | 1568 | uint32_t tc; |
1556 | node *n, *n2, *n3; | 1569 | node *n, *n2, *n3; |
1557 | 1570 | ||
1558 | do { | 1571 | do { |
1559 | c = next_token(TS_GRPSEQ); | 1572 | tc = next_token(TS_GRPSEQ); |
1560 | } while (c & TC_NEWLINE); | 1573 | } while (tc == TC_NEWLINE); |
1561 | 1574 | ||
1562 | if (c & TC_LBRACE) { | 1575 | if (tc == TC_LBRACE) { |
1563 | debug_printf_parse("%s: TC_LBRACE\n", __func__); | 1576 | debug_printf_parse("%s: TC_LBRACE\n", __func__); |
1564 | while ((c = next_token(TS_GRPSEQ | TC_RBRACE)) != TC_RBRACE) { | 1577 | chain_until_rbrace(); |
1565 | debug_printf_parse("%s: !TC_RBRACE\n", __func__); | ||
1566 | if (c & TC_NEWLINE) | ||
1567 | continue; | ||
1568 | rollback_token(); | ||
1569 | chain_group(); | ||
1570 | } | ||
1571 | debug_printf_parse("%s: TC_RBRACE\n", __func__); | ||
1572 | return; | 1578 | return; |
1573 | } | 1579 | } |
1574 | if (c & (TS_OPSEQ | TS_OPTERM)) { | 1580 | if (tc & (TS_OPSEQ | TS_OPTERM)) { |
1575 | debug_printf_parse("%s: TS_OPSEQ | TS_OPTERM\n", __func__); | 1581 | debug_printf_parse("%s: TS_OPSEQ | TS_OPTERM\n", __func__); |
1576 | rollback_token(); | 1582 | rollback_token(); |
1577 | chain_expr(OC_EXEC | Vx); | 1583 | chain_expr(OC_EXEC | Vx); |
@@ -1675,37 +1681,48 @@ static void chain_group(void) | |||
1675 | 1681 | ||
1676 | static void parse_program(char *p) | 1682 | static void parse_program(char *p) |
1677 | { | 1683 | { |
1678 | uint32_t tclass; | ||
1679 | node *cn; | ||
1680 | func *f; | ||
1681 | var *v; | ||
1682 | |||
1683 | debug_printf_parse("%s()\n", __func__); | 1684 | debug_printf_parse("%s()\n", __func__); |
1684 | 1685 | ||
1685 | g_pos = p; | 1686 | g_pos = p; |
1686 | t_lineno = 1; | 1687 | t_lineno = 1; |
1687 | while ((tclass = next_token(TC_EOF | TS_OPSEQ | TC_LBRACE | | 1688 | for (;;) { |
1688 | TS_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) { | 1689 | uint32_t tclass; |
1689 | 1690 | ||
1690 | if (tclass & TS_OPTERM) { | 1691 | tclass = next_token(TC_EOF | TS_OPSEQ | TC_LBRACE | |
1692 | TS_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL); | ||
1693 | |||
1694 | if (tclass == TC_EOF) { | ||
1695 | debug_printf_parse("%s: TC_EOF\n", __func__); | ||
1696 | break; | ||
1697 | } | ||
1698 | if (tclass & TS_OPTERM) { /* ; or <newline> */ | ||
1691 | debug_printf_parse("%s: TS_OPTERM\n", __func__); | 1699 | debug_printf_parse("%s: TS_OPTERM\n", __func__); |
1700 | //NB: gawk allows many newlines, but does not allow more than one semicolon: | ||
1701 | // BEGIN {...}<newline>;<newline>; | ||
1702 | //would complain "each rule must have a pattern or an action part". | ||
1703 | //Same message for | ||
1704 | // ; BEGIN {...} | ||
1692 | continue; | 1705 | continue; |
1693 | } | 1706 | } |
1694 | 1707 | if (tclass == TC_BEGIN) { | |
1695 | seq = &mainseq; | ||
1696 | if (tclass & TC_BEGIN) { | ||
1697 | debug_printf_parse("%s: TC_BEGIN\n", __func__); | 1708 | debug_printf_parse("%s: TC_BEGIN\n", __func__); |
1698 | seq = &beginseq; | 1709 | seq = &beginseq; |
1699 | //TODO: ensure there is no newline between BEGIN and { | 1710 | /* ensure there is no newline between BEGIN and { */ |
1700 | //next_token(TC_LBRACE); rollback_token(); | 1711 | next_token(TC_LBRACE); |
1701 | chain_group(); | 1712 | chain_until_rbrace(); |
1702 | } else if (tclass & TC_END) { | 1713 | continue; |
1714 | } | ||
1715 | if (tclass == TC_END) { | ||
1703 | debug_printf_parse("%s: TC_END\n", __func__); | 1716 | debug_printf_parse("%s: TC_END\n", __func__); |
1704 | seq = &endseq; | 1717 | seq = &endseq; |
1705 | //TODO: ensure there is no newline between END and { | 1718 | /* ensure there is no newline between END and { */ |
1706 | //next_token(TC_LBRACE); rollback_token(); | 1719 | next_token(TC_LBRACE); |
1707 | chain_group(); | 1720 | chain_until_rbrace(); |
1708 | } else if (tclass & TC_FUNCDECL) { | 1721 | continue; |
1722 | } | ||
1723 | if (tclass == TC_FUNCDECL) { | ||
1724 | func *f; | ||
1725 | |||
1709 | debug_printf_parse("%s: TC_FUNCDECL\n", __func__); | 1726 | debug_printf_parse("%s: TC_FUNCDECL\n", __func__); |
1710 | next_token(TC_FUNCTION); | 1727 | next_token(TC_FUNCTION); |
1711 | f = newfunc(t_string); | 1728 | f = newfunc(t_string); |
@@ -1716,6 +1733,7 @@ static void parse_program(char *p) | |||
1716 | //f->nargs = 0; - already is | 1733 | //f->nargs = 0; - already is |
1717 | /* func arg list: comma sep list of args, and a close paren */ | 1734 | /* func arg list: comma sep list of args, and a close paren */ |
1718 | for (;;) { | 1735 | for (;;) { |
1736 | var *v; | ||
1719 | if (next_token(TC_VARIABLE | TC_RPAREN) == TC_RPAREN) { | 1737 | if (next_token(TC_VARIABLE | TC_RPAREN) == TC_RPAREN) { |
1720 | if (f->nargs == 0) | 1738 | if (f->nargs == 0) |
1721 | break; /* func() is ok */ | 1739 | break; /* func() is ok */ |
@@ -1730,31 +1748,37 @@ static void parse_program(char *p) | |||
1730 | /* it was a comma, we ate it */ | 1748 | /* it was a comma, we ate it */ |
1731 | } | 1749 | } |
1732 | seq = &f->body; | 1750 | seq = &f->body; |
1733 | //TODO: ensure there is { after "func F(...)" - but newlines are allowed | 1751 | /* ensure there is { after "func F(...)" - but newlines are allowed */ |
1734 | //while (next_token(TC_LBRACE | TC_NEWLINE) == TC_NEWLINE) continue; rollback_token(); | 1752 | while (next_token(TC_LBRACE | TC_NEWLINE) == TC_NEWLINE) |
1735 | chain_group(); | 1753 | continue; |
1754 | chain_until_rbrace(); | ||
1736 | hash_clear(ahash); | 1755 | hash_clear(ahash); |
1737 | } else if (tclass & TS_OPSEQ) { | 1756 | continue; |
1757 | } | ||
1758 | seq = &mainseq; | ||
1759 | if (tclass & TS_OPSEQ) { | ||
1760 | node *cn; | ||
1761 | |||
1738 | debug_printf_parse("%s: TS_OPSEQ\n", __func__); | 1762 | debug_printf_parse("%s: TS_OPSEQ\n", __func__); |
1739 | rollback_token(); | 1763 | rollback_token(); |
1740 | cn = chain_node(OC_TEST); | 1764 | cn = chain_node(OC_TEST); |
1741 | cn->l.n = parse_expr(TS_OPTERM | TC_EOF | TC_LBRACE); | 1765 | cn->l.n = parse_expr(TS_OPTERM | TC_EOF | TC_LBRACE); |
1742 | if (t_tclass & TC_LBRACE) { | 1766 | if (t_tclass == TC_LBRACE) { |
1743 | debug_printf_parse("%s: TC_LBRACE\n", __func__); | 1767 | debug_printf_parse("%s: TC_LBRACE\n", __func__); |
1744 | rollback_token(); | 1768 | rollback_token(); |
1745 | chain_group(); | 1769 | chain_group(); |
1746 | } else { | 1770 | } else { |
1771 | /* no action, assume default "{ print }" */ | ||
1747 | debug_printf_parse("%s: !TC_LBRACE\n", __func__); | 1772 | debug_printf_parse("%s: !TC_LBRACE\n", __func__); |
1748 | chain_node(OC_PRINT); | 1773 | chain_node(OC_PRINT); |
1749 | } | 1774 | } |
1750 | cn->r.n = mainseq.last; | 1775 | cn->r.n = mainseq.last; |
1751 | } else /* if (tclass & TC_LBRACE) */ { | 1776 | continue; |
1752 | debug_printf_parse("%s: TC_LBRACE(?)\n", __func__); | ||
1753 | rollback_token(); | ||
1754 | chain_group(); | ||
1755 | } | 1777 | } |
1778 | /* tclass == TC_LBRACE */ | ||
1779 | debug_printf_parse("%s: TC_LBRACE(?)\n", __func__); | ||
1780 | chain_until_rbrace(); | ||
1756 | } | 1781 | } |
1757 | debug_printf_parse("%s: TC_EOF\n", __func__); | ||
1758 | } | 1782 | } |
1759 | 1783 | ||
1760 | 1784 | ||