aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDenys Vlasenko <vda.linux@googlemail.com>2021-07-03 01:16:48 +0200
committerDenys Vlasenko <vda.linux@googlemail.com>2021-07-03 01:16:48 +0200
commit2b65e73db3254a7228802886546152c72217017d (patch)
treea1a8d3cd8e5daee15a15e4fce42a328df50e0c35
parent717200eb43c9420773c0f8b751aadabba3052027 (diff)
downloadbusybox-w32-2b65e73db3254a7228802886546152c72217017d.tar.gz
busybox-w32-2b65e73db3254a7228802886546152c72217017d.tar.bz2
busybox-w32-2b65e73db3254a7228802886546152c72217017d.zip
awk: tighten rules in action parsing
Disallow: BEGIN { action } - must start on the same line Disallow: func f() print "hello" - must be in {...} function old new delta chain_until_rbrace - 41 +41 parse_program 307 336 +29 chain_group 649 616 -33 ------------------------------------------------------------------------------ (add/remove: 1/0 grow/shrink: 1/1 up/down: 70/-33) Total: 37 bytes Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
-rw-r--r--editors/awk.c108
1 files changed, 66 insertions, 42 deletions
diff --git a/editors/awk.c b/editors/awk.c
index a1a2afd1d..c68416873 100644
--- a/editors/awk.c
+++ b/editors/awk.c
@@ -1549,29 +1549,35 @@ static node *chain_loop(node *nn)
1549 return n; 1549 return n;
1550} 1550}
1551 1551
1552static void chain_until_rbrace(void)
1553{
1554 uint32_t tc;
1555 while ((tc = next_token(TS_GRPSEQ | TC_RBRACE)) != TC_RBRACE) {
1556 debug_printf_parse("%s: !TC_RBRACE\n", __func__);
1557 if (tc == TC_NEWLINE)
1558 continue;
1559 rollback_token();
1560 chain_group();
1561 }
1562 debug_printf_parse("%s: TC_RBRACE\n", __func__);
1563}
1564
1552/* parse group and attach it to chain */ 1565/* parse group and attach it to chain */
1553static void chain_group(void) 1566static void chain_group(void)
1554{ 1567{
1555 uint32_t c; 1568 uint32_t tc;
1556 node *n, *n2, *n3; 1569 node *n, *n2, *n3;
1557 1570
1558 do { 1571 do {
1559 c = next_token(TS_GRPSEQ); 1572 tc = next_token(TS_GRPSEQ);
1560 } while (c & TC_NEWLINE); 1573 } while (tc == TC_NEWLINE);
1561 1574
1562 if (c & TC_LBRACE) { 1575 if (tc == TC_LBRACE) {
1563 debug_printf_parse("%s: TC_LBRACE\n", __func__); 1576 debug_printf_parse("%s: TC_LBRACE\n", __func__);
1564 while ((c = next_token(TS_GRPSEQ | TC_RBRACE)) != TC_RBRACE) { 1577 chain_until_rbrace();
1565 debug_printf_parse("%s: !TC_RBRACE\n", __func__);
1566 if (c & TC_NEWLINE)
1567 continue;
1568 rollback_token();
1569 chain_group();
1570 }
1571 debug_printf_parse("%s: TC_RBRACE\n", __func__);
1572 return; 1578 return;
1573 } 1579 }
1574 if (c & (TS_OPSEQ | TS_OPTERM)) { 1580 if (tc & (TS_OPSEQ | TS_OPTERM)) {
1575 debug_printf_parse("%s: TS_OPSEQ | TS_OPTERM\n", __func__); 1581 debug_printf_parse("%s: TS_OPSEQ | TS_OPTERM\n", __func__);
1576 rollback_token(); 1582 rollback_token();
1577 chain_expr(OC_EXEC | Vx); 1583 chain_expr(OC_EXEC | Vx);
@@ -1675,37 +1681,48 @@ static void chain_group(void)
1675 1681
1676static void parse_program(char *p) 1682static void parse_program(char *p)
1677{ 1683{
1678 uint32_t tclass;
1679 node *cn;
1680 func *f;
1681 var *v;
1682
1683 debug_printf_parse("%s()\n", __func__); 1684 debug_printf_parse("%s()\n", __func__);
1684 1685
1685 g_pos = p; 1686 g_pos = p;
1686 t_lineno = 1; 1687 t_lineno = 1;
1687 while ((tclass = next_token(TC_EOF | TS_OPSEQ | TC_LBRACE | 1688 for (;;) {
1688 TS_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) { 1689 uint32_t tclass;
1689 1690
1690 if (tclass & TS_OPTERM) { 1691 tclass = next_token(TC_EOF | TS_OPSEQ | TC_LBRACE |
1692 TS_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL);
1693
1694 if (tclass == TC_EOF) {
1695 debug_printf_parse("%s: TC_EOF\n", __func__);
1696 break;
1697 }
1698 if (tclass & TS_OPTERM) { /* ; or <newline> */
1691 debug_printf_parse("%s: TS_OPTERM\n", __func__); 1699 debug_printf_parse("%s: TS_OPTERM\n", __func__);
1700//NB: gawk allows many newlines, but does not allow more than one semicolon:
1701// BEGIN {...}<newline>;<newline>;
1702//would complain "each rule must have a pattern or an action part".
1703//Same message for
1704// ; BEGIN {...}
1692 continue; 1705 continue;
1693 } 1706 }
1694 1707 if (tclass == TC_BEGIN) {
1695 seq = &mainseq;
1696 if (tclass & TC_BEGIN) {
1697 debug_printf_parse("%s: TC_BEGIN\n", __func__); 1708 debug_printf_parse("%s: TC_BEGIN\n", __func__);
1698 seq = &beginseq; 1709 seq = &beginseq;
1699//TODO: ensure there is no newline between BEGIN and { 1710 /* ensure there is no newline between BEGIN and { */
1700//next_token(TC_LBRACE); rollback_token(); 1711 next_token(TC_LBRACE);
1701 chain_group(); 1712 chain_until_rbrace();
1702 } else if (tclass & TC_END) { 1713 continue;
1714 }
1715 if (tclass == TC_END) {
1703 debug_printf_parse("%s: TC_END\n", __func__); 1716 debug_printf_parse("%s: TC_END\n", __func__);
1704 seq = &endseq; 1717 seq = &endseq;
1705//TODO: ensure there is no newline between END and { 1718 /* ensure there is no newline between END and { */
1706//next_token(TC_LBRACE); rollback_token(); 1719 next_token(TC_LBRACE);
1707 chain_group(); 1720 chain_until_rbrace();
1708 } else if (tclass & TC_FUNCDECL) { 1721 continue;
1722 }
1723 if (tclass == TC_FUNCDECL) {
1724 func *f;
1725
1709 debug_printf_parse("%s: TC_FUNCDECL\n", __func__); 1726 debug_printf_parse("%s: TC_FUNCDECL\n", __func__);
1710 next_token(TC_FUNCTION); 1727 next_token(TC_FUNCTION);
1711 f = newfunc(t_string); 1728 f = newfunc(t_string);
@@ -1716,6 +1733,7 @@ static void parse_program(char *p)
1716 //f->nargs = 0; - already is 1733 //f->nargs = 0; - already is
1717 /* func arg list: comma sep list of args, and a close paren */ 1734 /* func arg list: comma sep list of args, and a close paren */
1718 for (;;) { 1735 for (;;) {
1736 var *v;
1719 if (next_token(TC_VARIABLE | TC_RPAREN) == TC_RPAREN) { 1737 if (next_token(TC_VARIABLE | TC_RPAREN) == TC_RPAREN) {
1720 if (f->nargs == 0) 1738 if (f->nargs == 0)
1721 break; /* func() is ok */ 1739 break; /* func() is ok */
@@ -1730,31 +1748,37 @@ static void parse_program(char *p)
1730 /* it was a comma, we ate it */ 1748 /* it was a comma, we ate it */
1731 } 1749 }
1732 seq = &f->body; 1750 seq = &f->body;
1733//TODO: ensure there is { after "func F(...)" - but newlines are allowed 1751 /* ensure there is { after "func F(...)" - but newlines are allowed */
1734//while (next_token(TC_LBRACE | TC_NEWLINE) == TC_NEWLINE) continue; rollback_token(); 1752 while (next_token(TC_LBRACE | TC_NEWLINE) == TC_NEWLINE)
1735 chain_group(); 1753 continue;
1754 chain_until_rbrace();
1736 hash_clear(ahash); 1755 hash_clear(ahash);
1737 } else if (tclass & TS_OPSEQ) { 1756 continue;
1757 }
1758 seq = &mainseq;
1759 if (tclass & TS_OPSEQ) {
1760 node *cn;
1761
1738 debug_printf_parse("%s: TS_OPSEQ\n", __func__); 1762 debug_printf_parse("%s: TS_OPSEQ\n", __func__);
1739 rollback_token(); 1763 rollback_token();
1740 cn = chain_node(OC_TEST); 1764 cn = chain_node(OC_TEST);
1741 cn->l.n = parse_expr(TS_OPTERM | TC_EOF | TC_LBRACE); 1765 cn->l.n = parse_expr(TS_OPTERM | TC_EOF | TC_LBRACE);
1742 if (t_tclass & TC_LBRACE) { 1766 if (t_tclass == TC_LBRACE) {
1743 debug_printf_parse("%s: TC_LBRACE\n", __func__); 1767 debug_printf_parse("%s: TC_LBRACE\n", __func__);
1744 rollback_token(); 1768 rollback_token();
1745 chain_group(); 1769 chain_group();
1746 } else { 1770 } else {
1771 /* no action, assume default "{ print }" */
1747 debug_printf_parse("%s: !TC_LBRACE\n", __func__); 1772 debug_printf_parse("%s: !TC_LBRACE\n", __func__);
1748 chain_node(OC_PRINT); 1773 chain_node(OC_PRINT);
1749 } 1774 }
1750 cn->r.n = mainseq.last; 1775 cn->r.n = mainseq.last;
1751 } else /* if (tclass & TC_LBRACE) */ { 1776 continue;
1752 debug_printf_parse("%s: TC_LBRACE(?)\n", __func__);
1753 rollback_token();
1754 chain_group();
1755 } 1777 }
1778 /* tclass == TC_LBRACE */
1779 debug_printf_parse("%s: TC_LBRACE(?)\n", __func__);
1780 chain_until_rbrace();
1756 } 1781 }
1757 debug_printf_parse("%s: TC_EOF\n", __func__);
1758} 1782}
1759 1783
1760 1784