From af0172369eb024fff3c8c2cd2c8765a7fde5a9f5 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Tue, 29 Jun 2021 01:03:42 +0200 Subject: awk: remove redundant check function old new delta next_token 785 784 -1 parse_program 337 328 -9 ------------------------------------------------------------------------------ (add/remove: 0/0 grow/shrink: 0/2 up/down: 0/-10) Total: -10 bytes Signed-off-by: Denys Vlasenko --- editors/awk.c | 32 ++++++++++++++++++-------------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/editors/awk.c b/editors/awk.c index 86076d7b6..9826a57c6 100644 --- a/editors/awk.c +++ b/editors/awk.c @@ -1093,8 +1093,9 @@ static void nvfree(var *v) /* ------- awk program text parsing ------- */ -/* Parse next token pointed by global pos, place results into global ttt. - * If token isn't expected, give away. Return token class +/* Parse next token pointed by global pos, place results into global t_XYZ variables. + * If token isn't expected, print error message and die. + * Return token class (also store it in t_tclass). */ static uint32_t next_token(uint32_t expected) { @@ -1248,33 +1249,35 @@ static uint32_t next_token(uint32_t expected) goto readnext; /* insert concatenation operator when needed */ - debug_printf_parse("%s: %x %x %x concat_inserted?\n", __func__, - (ltclass & TC_CONCAT1), (tc & TC_CONCAT2), (expected & TC_BINOP)); + debug_printf_parse("%s: concat_inserted if all nonzero: %x %x %x %x\n", __func__, + (ltclass & TC_CONCAT1), (tc & TC_CONCAT2), (expected & TC_BINOP), + !(ltclass == TC_LENGTH && tc == TC_SEQSTART)); if ((ltclass & TC_CONCAT1) && (tc & TC_CONCAT2) && (expected & TC_BINOP) && !(ltclass == TC_LENGTH && tc == TC_SEQSTART) /* but not for "length(..." */ ) { concat_inserted = TRUE; save_tclass = tc; save_info = t_info; - tc = TC_BINOP; + tc = TC_BINOPX; t_info = OC_CONCAT | SS | P(35); } - debug_printf_parse("%s: t_tclass=tc=%x\n", __func__, t_tclass); t_tclass = tc; + debug_printf_parse("%s: t_tclass=tc=%x\n", __func__, tc); } - ltclass = t_tclass; - /* Are we ready for this? */ - if (!(ltclass & expected)) { + if (!(t_tclass & expected)) { syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ? EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN); } - debug_printf_parse("%s: returning, t_double:%f ltclass:", __func__, t_double); - debug_parse_print_tc(ltclass); + debug_printf_parse("%s: returning, t_double:%f t_tclass:", __func__, t_double); + debug_parse_print_tc(t_tclass); debug_printf_parse("\n"); - return ltclass; + + ltclass = t_tclass; + + return t_tclass; #undef concat_inserted #undef save_tclass #undef save_info @@ -1700,8 +1703,9 @@ static void parse_program(char *p) /* Arg followed either by end of arg list or 1 comma */ if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM) break; - if (t_tclass != TC_COMMA) - syntax_error(EMSG_UNEXP_TOKEN); +//Impossible: next_token() above would error out and die +// if (t_tclass != TC_COMMA) +// syntax_error(EMSG_UNEXP_TOKEN); } seq = &f->body; chain_group(); -- cgit v1.2.3-55-g6feb From 832cb4fcb98d2845bd3f9d244593fc1b5f362ca0 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Tue, 29 Jun 2021 01:09:08 +0200 Subject: awk: make ltclass ("last token class") local to next_token() function old new delta next_token 784 790 +6 next_input_file 219 216 -3 ------------------------------------------------------------------------------ (add/remove: 0/0 grow/shrink: 1/1 up/down: 6/-3) Total: 3 bytes Signed-off-by: Denys Vlasenko --- editors/awk.c | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/editors/awk.c b/editors/awk.c index 9826a57c6..418bda160 100644 --- a/editors/awk.c +++ b/editors/awk.c @@ -556,7 +556,6 @@ struct globals2 { uint32_t next_token__save_tclass; uint32_t next_token__save_info; - uint32_t next_token__ltclass; smallint next_token__concat_inserted; smallint next_input_file__files_happen; @@ -615,7 +614,7 @@ struct globals2 { #define rsplitter (G.rsplitter ) #define INIT_G() do { \ SET_PTR_TO_GLOBALS((char*)xzalloc(sizeof(G1)+sizeof(G)) + sizeof(G1)); \ - G.next_token__ltclass = TC_OPTERM; \ + t_tclass = TC_OPTERM; \ G.evaluate__seed = 1; \ } while (0) @@ -1102,13 +1101,13 @@ static uint32_t next_token(uint32_t expected) #define concat_inserted (G.next_token__concat_inserted) #define save_tclass (G.next_token__save_tclass) #define save_info (G.next_token__save_info) -/* Initialized to TC_OPTERM: */ -#define ltclass (G.next_token__ltclass) char *p, *s; const char *tl; - uint32_t tc; const uint32_t *ti; + uint32_t tc, last_token_class; + + last_token_class = t_tclass; /* t_tclass is initialized to TC_OPTERM */ debug_printf_parse("%s() expected(%x):", __func__, expected); debug_parse_print_tc(expected); @@ -1245,15 +1244,15 @@ static uint32_t next_token(uint32_t expected) g_pos = p; /* skipping newlines in some cases */ - if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE)) + if ((last_token_class & TC_NOTERM) && (tc & TC_NEWLINE)) goto readnext; /* insert concatenation operator when needed */ debug_printf_parse("%s: concat_inserted if all nonzero: %x %x %x %x\n", __func__, - (ltclass & TC_CONCAT1), (tc & TC_CONCAT2), (expected & TC_BINOP), - !(ltclass == TC_LENGTH && tc == TC_SEQSTART)); - if ((ltclass & TC_CONCAT1) && (tc & TC_CONCAT2) && (expected & TC_BINOP) - && !(ltclass == TC_LENGTH && tc == TC_SEQSTART) /* but not for "length(..." */ + (last_token_class & TC_CONCAT1), (tc & TC_CONCAT2), (expected & TC_BINOP), + !(last_token_class == TC_LENGTH && tc == TC_SEQSTART)); + if ((last_token_class & TC_CONCAT1) && (tc & TC_CONCAT2) && (expected & TC_BINOP) + && !(last_token_class == TC_LENGTH && tc == TC_SEQSTART) /* but not for "length(..." */ ) { concat_inserted = TRUE; save_tclass = tc; @@ -1267,7 +1266,7 @@ static uint32_t next_token(uint32_t expected) } /* Are we ready for this? */ if (!(t_tclass & expected)) { - syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ? + syntax_error((last_token_class & (TC_NEWLINE | TC_EOF)) ? EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN); } @@ -1275,13 +1274,10 @@ static uint32_t next_token(uint32_t expected) debug_parse_print_tc(t_tclass); debug_printf_parse("\n"); - ltclass = t_tclass; - return t_tclass; #undef concat_inserted #undef save_tclass #undef save_info -#undef ltclass } static void rollback_token(void) -- cgit v1.2.3-55-g6feb From adcd9a6f349f3f2715a586b45fb27350b37cf1e5 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Tue, 29 Jun 2021 01:23:37 +0200 Subject: awk: use TS_foo for combined token classes. No code changes Confusion with "simple" classes was the cause of a bug fixed by previous commit Signed-off-by: Denys Vlasenko --- editors/awk.c | 128 +++++++++++++++++++++++++++++----------------------------- 1 file changed, 64 insertions(+), 64 deletions(-) diff --git a/editors/awk.c b/editors/awk.c index 418bda160..764a3dd49 100644 --- a/editors/awk.c +++ b/editors/awk.c @@ -281,39 +281,39 @@ if ((n) & TC_NUMBER ) debug_printf_parse(" NUMBER" ); \ } while (0) #endif -/* combined token classes */ -#define TC_UOPPRE (TC_UOPPRE1 | TC_UOPPRE2) +/* combined token classes ("token [class] sets") */ +#define TS_UOPPRE (TC_UOPPRE1 | TC_UOPPRE2) -#define TC_BINOP (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN) -//#define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST) -#define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \ - | TC_BUILTIN | TC_LENGTH | TC_GETLINE \ - | TC_SEQSTART | TC_STRING | TC_NUMBER) -#define TC_LVALUE (TC_VARIABLE | TC_ARRAY) +#define TS_BINOP (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN) +//#define TS_UNARYOP (TS_UOPPRE | TC_UOPPOST) +#define TS_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \ + | TC_BUILTIN | TC_LENGTH | TC_GETLINE \ + | TC_SEQSTART | TC_STRING | TC_NUMBER) -#define TC_STATEMNT (TC_STATX | TC_WHILE) -#define TC_OPTERM (TC_SEMICOL | TC_NEWLINE) +#define TS_LVALUE (TC_VARIABLE | TC_ARRAY) +#define TS_STATEMNT (TC_STATX | TC_WHILE) +#define TS_OPTERM (TC_SEMICOL | TC_NEWLINE) /* word tokens, cannot mean something else if not expected */ -#define TC_WORD (TC_IN | TC_STATEMNT | TC_ELSE \ - | TC_BUILTIN | TC_LENGTH | TC_GETLINE \ - | TC_FUNCDECL | TC_BEGIN | TC_END) +#define TS_WORD (TC_IN | TS_STATEMNT | TC_ELSE \ + | TC_BUILTIN | TC_LENGTH | TC_GETLINE \ + | TC_FUNCDECL | TC_BEGIN | TC_END) /* discard newlines after these */ -#define TC_NOTERM (TC_COMMA | TC_GRPSTART | TC_GRPTERM \ - | TC_BINOP | TC_OPTERM) +#define TS_NOTERM (TC_COMMA | TC_GRPSTART | TC_GRPTERM \ + | TS_BINOP | TS_OPTERM) /* what can expression begin with */ -#define TC_OPSEQ (TC_OPERAND | TC_UOPPRE | TC_REGEXP) +#define TS_OPSEQ (TS_OPERAND | TS_UOPPRE | TC_REGEXP) /* what can group begin with */ -#define TC_GRPSEQ (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART) +#define TS_GRPSEQ (TS_OPSEQ | TS_OPTERM | TS_STATEMNT | TC_GRPSTART) -/* if previous token class is CONCAT1 and next is CONCAT2, concatenation */ +/* if previous token class is CONCAT_L and next is CONCAT_R, concatenation */ /* operator is inserted between them */ -#define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM \ +#define TS_CONCAT_L (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM \ | TC_STRING | TC_NUMBER | TC_UOPPOST \ | TC_LENGTH) -#define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE) +#define TS_CONCAT_R (TS_OPERAND | TS_UOPPRE) #define OF_RES1 0x010000 #define OF_RES2 0x020000 @@ -614,7 +614,7 @@ struct globals2 { #define rsplitter (G.rsplitter ) #define INIT_G() do { \ SET_PTR_TO_GLOBALS((char*)xzalloc(sizeof(G1)+sizeof(G)) + sizeof(G1)); \ - t_tclass = TC_OPTERM; \ + t_tclass = TS_OPTERM; \ G.evaluate__seed = 1; \ } while (0) @@ -1107,7 +1107,7 @@ static uint32_t next_token(uint32_t expected) const uint32_t *ti; uint32_t tc, last_token_class; - last_token_class = t_tclass; /* t_tclass is initialized to TC_OPTERM */ + last_token_class = t_tclass; /* t_tclass is initialized to TS_OPTERM */ debug_printf_parse("%s() expected(%x):", __func__, expected); debug_parse_print_tc(expected); @@ -1198,9 +1198,9 @@ static uint32_t next_token(uint32_t expected) * token matches, * and it's not a longer word, */ - if ((tc & (expected | TC_WORD | TC_NEWLINE)) + if ((tc & (expected | TS_WORD | TC_NEWLINE)) && strncmp(p, tl, l) == 0 - && !((tc & TC_WORD) && isalnum_(p[l])) + && !((tc & TS_WORD) && isalnum_(p[l])) ) { /* then this is what we are looking for */ t_info = *ti; @@ -1244,14 +1244,14 @@ static uint32_t next_token(uint32_t expected) g_pos = p; /* skipping newlines in some cases */ - if ((last_token_class & TC_NOTERM) && (tc & TC_NEWLINE)) + if ((last_token_class & TS_NOTERM) && (tc & TC_NEWLINE)) goto readnext; /* insert concatenation operator when needed */ debug_printf_parse("%s: concat_inserted if all nonzero: %x %x %x %x\n", __func__, - (last_token_class & TC_CONCAT1), (tc & TC_CONCAT2), (expected & TC_BINOP), + (last_token_class & TS_CONCAT_L), (tc & TS_CONCAT_R), (expected & TS_BINOP), !(last_token_class == TC_LENGTH && tc == TC_SEQSTART)); - if ((last_token_class & TC_CONCAT1) && (tc & TC_CONCAT2) && (expected & TC_BINOP) + if ((last_token_class & TS_CONCAT_L) && (tc & TS_CONCAT_R) && (expected & TS_BINOP) && !(last_token_class == TC_LENGTH && tc == TC_SEQSTART) /* but not for "length(..." */ ) { concat_inserted = TRUE; @@ -1317,7 +1317,7 @@ static node *parse_expr(uint32_t term_tc) node sn; node *cn = &sn; node *vn, *glptr; - uint32_t tc, xtc; + uint32_t tc, expected_tc; var *v; debug_printf_parse("%s() term_tc(%x):", __func__, term_tc); @@ -1326,20 +1326,20 @@ static node *parse_expr(uint32_t term_tc) sn.info = PRIMASK; sn.r.n = sn.a.n = glptr = NULL; - xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | term_tc; + expected_tc = TS_OPERAND | TS_UOPPRE | TC_REGEXP | term_tc; - while (!((tc = next_token(xtc)) & term_tc)) { + while (!((tc = next_token(expected_tc)) & term_tc)) { if (glptr && (t_info == TI_LESS)) { /* input redirection (<) attached to glptr node */ debug_printf_parse("%s: input redir\n", __func__); cn = glptr->l.n = new_node(OC_CONCAT | SS | P(37)); cn->a.n = glptr; - xtc = TC_OPERAND | TC_UOPPRE; + expected_tc = TS_OPERAND | TS_UOPPRE; glptr = NULL; - } else if (tc & (TC_BINOP | TC_UOPPOST)) { - debug_printf_parse("%s: TC_BINOP | TC_UOPPOST tc:%x\n", __func__, tc); + } else if (tc & (TS_BINOP | TC_UOPPOST)) { + debug_printf_parse("%s: TS_BINOP | TC_UOPPOST tc:%x\n", __func__, tc); /* for binary and postfix-unary operators, jump back over * previous operators with higher priority */ vn = cn; @@ -1353,19 +1353,19 @@ static node *parse_expr(uint32_t term_tc) t_info += P(6); cn = vn->a.n->r.n = new_node(t_info); cn->a.n = vn->a.n; - if (tc & TC_BINOP) { + if (tc & TS_BINOP) { cn->l.n = vn; - xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP; + expected_tc = TS_OPERAND | TS_UOPPRE | TC_REGEXP; if ((t_info & OPCLSMASK) == OC_PGETLINE) { /* it's a pipe */ next_token(TC_GETLINE); /* give maximum priority to this pipe */ cn->info &= ~PRIMASK; - xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | term_tc; + expected_tc = TS_OPERAND | TS_UOPPRE | TS_BINOP | term_tc; } } else { cn->r.n = vn; - xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | term_tc; + expected_tc = TS_OPERAND | TS_UOPPRE | TS_BINOP | term_tc; } vn->a.n = cn; @@ -1377,14 +1377,14 @@ static node *parse_expr(uint32_t term_tc) cn = vn->r.n = new_node(t_info); cn->a.n = vn; - xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP; + expected_tc = TS_OPERAND | TS_UOPPRE | TC_REGEXP; if (t_info == TI_PREINC || t_info == TI_PREDEC) - xtc = TC_LVALUE | TC_UOPPRE1; - if (tc & (TC_OPERAND | TC_REGEXP)) { - debug_printf_parse("%s: TC_OPERAND | TC_REGEXP\n", __func__); - xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | term_tc; + expected_tc = TS_LVALUE | TC_UOPPRE1; + if (tc & (TS_OPERAND | TC_REGEXP)) { + debug_printf_parse("%s: TS_OPERAND | TC_REGEXP\n", __func__); + expected_tc = TS_UOPPRE | TC_UOPPOST | TS_BINOP | TS_OPERAND | term_tc; /* one should be very careful with switch on tclass - - * only simple tclasses should be used! */ + * only simple tclasses should be used (TC_xyz, not TS_xyz) */ switch (tc) { case TC_VARIABLE: case TC_ARRAY: @@ -1412,7 +1412,7 @@ static node *parse_expr(uint32_t term_tc) setvar_i(v, t_double); else { setvar_s(v, t_string); - xtc &= ~TC_UOPPOST; /* "str"++ is not allowed */ + expected_tc &= ~TC_UOPPOST; /* "str"++ is not allowed */ } break; @@ -1439,7 +1439,7 @@ static node *parse_expr(uint32_t term_tc) case TC_GETLINE: debug_printf_parse("%s: TC_GETLINE\n", __func__); glptr = cn; - xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | term_tc; + expected_tc = TS_OPERAND | TS_UOPPRE | TS_BINOP | term_tc; break; case TC_BUILTIN: @@ -1450,7 +1450,7 @@ static node *parse_expr(uint32_t term_tc) case TC_LENGTH: debug_printf_parse("%s: TC_LENGTH\n", __func__); next_token(TC_SEQSTART /* length(...) */ - | TC_OPTERM /* length; (or newline)*/ + | TS_OPTERM /* length; (or newline)*/ | TC_GRPTERM /* length } */ | TC_BINOPX /* length NUM */ | TC_COMMA /* print length, 1 */ @@ -1464,7 +1464,7 @@ static node *parse_expr(uint32_t term_tc) } } } - } + } /* while() */ debug_printf_parse("%s() returns %p\n", __func__, sn.r.n); return sn.r.n; @@ -1497,7 +1497,7 @@ static void chain_expr(uint32_t info) n = chain_node(info); - n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM); + n->l.n = parse_expr(TS_OPTERM | TC_GRPTERM); if ((info & OF_REQUIRED) && !n->l.n) syntax_error(EMSG_TOO_FEW_ARGS); @@ -1535,12 +1535,12 @@ static void chain_group(void) node *n, *n2, *n3; do { - c = next_token(TC_GRPSEQ); + c = next_token(TS_GRPSEQ); } while (c & TC_NEWLINE); if (c & TC_GRPSTART) { debug_printf_parse("%s: TC_GRPSTART\n", __func__); - while (next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) { + while (next_token(TS_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) { debug_printf_parse("%s: !TC_GRPTERM\n", __func__); if (t_tclass & TC_NEWLINE) continue; @@ -1548,13 +1548,13 @@ static void chain_group(void) chain_group(); } debug_printf_parse("%s: TC_GRPTERM\n", __func__); - } else if (c & (TC_OPSEQ | TC_OPTERM)) { - debug_printf_parse("%s: TC_OPSEQ | TC_OPTERM\n", __func__); + } else if (c & (TS_OPSEQ | TS_OPTERM)) { + debug_printf_parse("%s: TS_OPSEQ | TS_OPTERM\n", __func__); rollback_token(); chain_expr(OC_EXEC | Vx); } else { - /* TC_STATEMNT */ - debug_printf_parse("%s: TC_STATEMNT(?)\n", __func__); + /* TS_STATEMNT */ + debug_printf_parse("%s: TS_STATEMNT(?)\n", __func__); switch (t_info & OPCLSMASK) { case ST_IF: debug_printf_parse("%s: ST_IF\n", __func__); @@ -1563,7 +1563,7 @@ static void chain_group(void) chain_group(); n2 = chain_node(OC_EXEC); n->r.n = seq->last; - if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE) == TC_ELSE) { + if (next_token(TS_GRPSEQ | TC_GRPTERM | TC_ELSE) == TC_ELSE) { chain_group(); n2->a.n = seq->last; } else { @@ -1616,10 +1616,10 @@ static void chain_group(void) case OC_PRINTF: debug_printf_parse("%s: OC_PRINT[F]\n", __func__); n = chain_node(t_info); - n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM); + n->l.n = parse_expr(TS_OPTERM | TC_OUTRDR | TC_GRPTERM); if (t_tclass & TC_OUTRDR) { n->info |= t_info; - n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM); + n->r.n = parse_expr(TS_OPTERM | TC_GRPTERM); } if (t_tclass & TC_GRPTERM) rollback_token(); @@ -1658,11 +1658,11 @@ static void parse_program(char *p) g_pos = p; t_lineno = 1; - while ((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART | - TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) { + while ((tclass = next_token(TC_EOF | TS_OPSEQ | TC_GRPSTART | + TS_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) { - if (tclass & TC_OPTERM) { - debug_printf_parse("%s: TC_OPTERM\n", __func__); + if (tclass & TS_OPTERM) { + debug_printf_parse("%s: TS_OPTERM\n", __func__); continue; } @@ -1706,11 +1706,11 @@ static void parse_program(char *p) seq = &f->body; chain_group(); clear_array(ahash); - } else if (tclass & TC_OPSEQ) { - debug_printf_parse("%s: TC_OPSEQ\n", __func__); + } else if (tclass & TS_OPSEQ) { + debug_printf_parse("%s: TS_OPSEQ\n", __func__); rollback_token(); cn = chain_node(OC_TEST); - cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART); + cn->l.n = parse_expr(TS_OPTERM | TC_EOF | TC_GRPSTART); if (t_tclass & TC_GRPSTART) { debug_printf_parse("%s: TC_GRPSTART\n", __func__); rollback_token(); -- cgit v1.2.3-55-g6feb From a493441ca52adca7df3976c668f2e7c48d1b67a1 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Tue, 29 Jun 2021 01:30:49 +0200 Subject: awk: deindent code block, no code changes Signed-off-by: Denys Vlasenko --- editors/awk.c | 177 +++++++++++++++++++++++++++++----------------------------- 1 file changed, 90 insertions(+), 87 deletions(-) diff --git a/editors/awk.c b/editors/awk.c index 764a3dd49..9a3b63df6 100644 --- a/editors/awk.c +++ b/editors/awk.c @@ -1337,8 +1337,9 @@ static node *parse_expr(uint32_t term_tc) cn->a.n = glptr; expected_tc = TS_OPERAND | TS_UOPPRE; glptr = NULL; - - } else if (tc & (TS_BINOP | TC_UOPPOST)) { + continue; + } + if (tc & (TS_BINOP | TC_UOPPOST)) { debug_printf_parse("%s: TS_BINOP | TC_UOPPOST tc:%x\n", __func__, tc); /* for binary and postfix-unary operators, jump back over * previous operators with higher priority */ @@ -1368,101 +1369,103 @@ static node *parse_expr(uint32_t term_tc) expected_tc = TS_OPERAND | TS_UOPPRE | TS_BINOP | term_tc; } vn->a.n = cn; + continue; + } - } else { - debug_printf_parse("%s: other, t_info:%x\n", __func__, t_info); - /* for operands and prefix-unary operators, attach them - * to last node */ - vn = cn; - cn = vn->r.n = new_node(t_info); - cn->a.n = vn; + debug_printf_parse("%s: other, t_info:%x\n", __func__, t_info); + /* for operands and prefix-unary operators, attach them + * to last node */ + vn = cn; + cn = vn->r.n = new_node(t_info); + cn->a.n = vn; - expected_tc = TS_OPERAND | TS_UOPPRE | TC_REGEXP; - if (t_info == TI_PREINC || t_info == TI_PREDEC) - expected_tc = TS_LVALUE | TC_UOPPRE1; - if (tc & (TS_OPERAND | TC_REGEXP)) { - debug_printf_parse("%s: TS_OPERAND | TC_REGEXP\n", __func__); - expected_tc = TS_UOPPRE | TC_UOPPOST | TS_BINOP | TS_OPERAND | term_tc; - /* one should be very careful with switch on tclass - - * only simple tclasses should be used (TC_xyz, not TS_xyz) */ - switch (tc) { - case TC_VARIABLE: - case TC_ARRAY: - debug_printf_parse("%s: TC_VARIABLE | TC_ARRAY\n", __func__); - cn->info = OC_VAR; - v = hash_search(ahash, t_string); - if (v != NULL) { - cn->info = OC_FNARG; - cn->l.aidx = v->x.aidx; - } else { - cn->l.v = newvar(t_string); - } - if (tc & TC_ARRAY) { - cn->info |= xS; - cn->r.n = parse_expr(TC_ARRTERM); - } - break; + expected_tc = TS_OPERAND | TS_UOPPRE | TC_REGEXP; + if (t_info == TI_PREINC || t_info == TI_PREDEC) + expected_tc = TS_LVALUE | TC_UOPPRE1; - case TC_NUMBER: - case TC_STRING: - debug_printf_parse("%s: TC_NUMBER | TC_STRING\n", __func__); - cn->info = OC_VAR; - v = cn->l.v = xzalloc(sizeof(var)); - if (tc & TC_NUMBER) - setvar_i(v, t_double); - else { - setvar_s(v, t_string); - expected_tc &= ~TC_UOPPOST; /* "str"++ is not allowed */ - } - break; + if (!(tc & (TS_OPERAND | TC_REGEXP))) + continue; - case TC_REGEXP: - debug_printf_parse("%s: TC_REGEXP\n", __func__); - mk_re_node(t_string, cn, xzalloc(sizeof(regex_t)*2)); - break; + debug_printf_parse("%s: TS_OPERAND | TC_REGEXP\n", __func__); + expected_tc = TS_UOPPRE | TC_UOPPOST | TS_BINOP | TS_OPERAND | term_tc; + /* one should be very careful with switch on tclass - + * only simple tclasses should be used (TC_xyz, not TS_xyz) */ + switch (tc) { + case TC_VARIABLE: + case TC_ARRAY: + debug_printf_parse("%s: TC_VARIABLE | TC_ARRAY\n", __func__); + cn->info = OC_VAR; + v = hash_search(ahash, t_string); + if (v != NULL) { + cn->info = OC_FNARG; + cn->l.aidx = v->x.aidx; + } else { + cn->l.v = newvar(t_string); + } + if (tc & TC_ARRAY) { + cn->info |= xS; + cn->r.n = parse_expr(TC_ARRTERM); + } + break; - case TC_FUNCTION: - debug_printf_parse("%s: TC_FUNCTION\n", __func__); - cn->info = OC_FUNC; - cn->r.f = newfunc(t_string); - cn->l.n = condition(); - break; + case TC_NUMBER: + case TC_STRING: + debug_printf_parse("%s: TC_NUMBER | TC_STRING\n", __func__); + cn->info = OC_VAR; + v = cn->l.v = xzalloc(sizeof(var)); + if (tc & TC_NUMBER) + setvar_i(v, t_double); + else { + setvar_s(v, t_string); + expected_tc &= ~TC_UOPPOST; /* "str"++ is not allowed */ + } + break; - case TC_SEQSTART: - debug_printf_parse("%s: TC_SEQSTART\n", __func__); - cn = vn->r.n = parse_expr(TC_SEQTERM); - if (!cn) - syntax_error("Empty sequence"); - cn->a.n = vn; - break; + case TC_REGEXP: + debug_printf_parse("%s: TC_REGEXP\n", __func__); + mk_re_node(t_string, cn, xzalloc(sizeof(regex_t)*2)); + break; - case TC_GETLINE: - debug_printf_parse("%s: TC_GETLINE\n", __func__); - glptr = cn; - expected_tc = TS_OPERAND | TS_UOPPRE | TS_BINOP | term_tc; - break; + case TC_FUNCTION: + debug_printf_parse("%s: TC_FUNCTION\n", __func__); + cn->info = OC_FUNC; + cn->r.f = newfunc(t_string); + cn->l.n = condition(); + break; - case TC_BUILTIN: - debug_printf_parse("%s: TC_BUILTIN\n", __func__); - cn->l.n = condition(); - break; + case TC_SEQSTART: + debug_printf_parse("%s: TC_SEQSTART\n", __func__); + cn = vn->r.n = parse_expr(TC_SEQTERM); + if (!cn) + syntax_error("Empty sequence"); + cn->a.n = vn; + break; - case TC_LENGTH: - debug_printf_parse("%s: TC_LENGTH\n", __func__); - next_token(TC_SEQSTART /* length(...) */ - | TS_OPTERM /* length; (or newline)*/ - | TC_GRPTERM /* length } */ - | TC_BINOPX /* length NUM */ - | TC_COMMA /* print length, 1 */ - ); - rollback_token(); - if (t_tclass & TC_SEQSTART) { - /* It was a "(" token. Handle just like TC_BUILTIN */ - cn->l.n = condition(); - } - break; - } + case TC_GETLINE: + debug_printf_parse("%s: TC_GETLINE\n", __func__); + glptr = cn; + expected_tc = TS_OPERAND | TS_UOPPRE | TS_BINOP | term_tc; + break; + + case TC_BUILTIN: + debug_printf_parse("%s: TC_BUILTIN\n", __func__); + cn->l.n = condition(); + break; + + case TC_LENGTH: + debug_printf_parse("%s: TC_LENGTH\n", __func__); + next_token(TC_SEQSTART /* length(...) */ + | TS_OPTERM /* length; (or newline)*/ + | TC_GRPTERM /* length } */ + | TC_BINOPX /* length NUM */ + | TC_COMMA /* print length, 1 */ + ); + rollback_token(); + if (t_tclass & TC_SEQSTART) { + /* It was a "(" token. Handle just like TC_BUILTIN */ + cn->l.n = condition(); } + break; } } /* while() */ -- cgit v1.2.3-55-g6feb From 9782cb7774f00a3e777e3d764ccce15055a29977 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Tue, 29 Jun 2021 01:50:47 +0200 Subject: awk: rename TC_SEQSTART/END to L/RPAREN, no code changes Signed-off-by: Denys Vlasenko --- editors/awk.c | 94 +++++++++++++++++++++++++++++------------------------------ 1 file changed, 47 insertions(+), 47 deletions(-) diff --git a/editors/awk.c b/editors/awk.c index 9a3b63df6..d31b97d86 100644 --- a/editors/awk.c +++ b/editors/awk.c @@ -207,48 +207,48 @@ typedef struct tsplitter_s { } tsplitter; /* simple token classes */ -/* Order and hex values are very important!!! See next_token() */ -#define TC_SEQSTART (1 << 0) /* ( */ -#define TC_SEQTERM (1 << 1) /* ) */ +/* order and hex values are very important!!! See next_token() */ +#define TC_LPAREN (1 << 0) /* ( */ +#define TC_RPAREN (1 << 1) /* ) */ #define TC_REGEXP (1 << 2) /* /.../ */ #define TC_OUTRDR (1 << 3) /* | > >> */ #define TC_UOPPOST (1 << 4) /* unary postfix operator ++ -- */ #define TC_UOPPRE1 (1 << 5) /* unary prefix operator ++ -- $ */ #define TC_BINOPX (1 << 6) /* two-opnd operator */ -#define TC_IN (1 << 7) -#define TC_COMMA (1 << 8) -#define TC_PIPE (1 << 9) /* input redirection pipe */ +#define TC_IN (1 << 7) /* 'in' */ +#define TC_COMMA (1 << 8) /* , */ +#define TC_PIPE (1 << 9) /* input redirection pipe | */ #define TC_UOPPRE2 (1 << 10) /* unary prefix operator + - ! */ #define TC_ARRTERM (1 << 11) /* ] */ #define TC_GRPSTART (1 << 12) /* { */ #define TC_GRPTERM (1 << 13) /* } */ -#define TC_SEMICOL (1 << 14) +#define TC_SEMICOL (1 << 14) /* ; */ #define TC_NEWLINE (1 << 15) #define TC_STATX (1 << 16) /* ctl statement (for, next...) */ -#define TC_WHILE (1 << 17) -#define TC_ELSE (1 << 18) +#define TC_WHILE (1 << 17) /* 'while' */ +#define TC_ELSE (1 << 18) /* 'else' */ #define TC_BUILTIN (1 << 19) /* This costs ~50 bytes of code. * A separate class to support deprecated "length" form. If we don't need that * (i.e. if we demand that only "length()" with () is valid), then TC_LENGTH * can be merged with TC_BUILTIN: */ -#define TC_LENGTH (1 << 20) -#define TC_GETLINE (1 << 21) +#define TC_LENGTH (1 << 20) /* 'length' */ +#define TC_GETLINE (1 << 21) /* 'getline' */ #define TC_FUNCDECL (1 << 22) /* 'function' 'func' */ -#define TC_BEGIN (1 << 23) -#define TC_END (1 << 24) +#define TC_BEGIN (1 << 23) /* 'BEGIN' */ +#define TC_END (1 << 24) /* 'END' */ #define TC_EOF (1 << 25) -#define TC_VARIABLE (1 << 26) -#define TC_ARRAY (1 << 27) -#define TC_FUNCTION (1 << 28) -#define TC_STRING (1 << 29) +#define TC_VARIABLE (1 << 26) /* name */ +#define TC_ARRAY (1 << 27) /* name[ */ +#define TC_FUNCTION (1 << 28) /* name( - but unlike TC_ARRAY, parser does not consume '(' */ +#define TC_STRING (1 << 29) /* "..." */ #define TC_NUMBER (1 << 30) #ifndef debug_parse_print_tc #define debug_parse_print_tc(n) do { \ -if ((n) & TC_SEQSTART) debug_printf_parse(" SEQSTART"); \ -if ((n) & TC_SEQTERM ) debug_printf_parse(" SEQTERM" ); \ +if ((n) & TC_LPAREN ) debug_printf_parse(" LPAREN" ); \ +if ((n) & TC_RPAREN ) debug_printf_parse(" RPAREN" ); \ if ((n) & TC_REGEXP ) debug_printf_parse(" REGEXP" ); \ if ((n) & TC_OUTRDR ) debug_printf_parse(" OUTRDR" ); \ if ((n) & TC_UOPPOST ) debug_printf_parse(" UOPPOST" ); \ @@ -288,7 +288,7 @@ if ((n) & TC_NUMBER ) debug_printf_parse(" NUMBER" ); \ //#define TS_UNARYOP (TS_UOPPRE | TC_UOPPOST) #define TS_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \ | TC_BUILTIN | TC_LENGTH | TC_GETLINE \ - | TC_SEQSTART | TC_STRING | TC_NUMBER) + | TC_LPAREN | TC_STRING | TC_NUMBER) #define TS_LVALUE (TC_VARIABLE | TC_ARRAY) #define TS_STATEMNT (TC_STATX | TC_WHILE) @@ -310,7 +310,7 @@ if ((n) & TC_NUMBER ) debug_printf_parse(" NUMBER" ); \ /* if previous token class is CONCAT_L and next is CONCAT_R, concatenation */ /* operator is inserted between them */ -#define TS_CONCAT_L (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM \ +#define TS_CONCAT_L (TC_VARIABLE | TC_ARRTERM | TC_RPAREN \ | TC_STRING | TC_NUMBER | TC_UOPPOST \ | TC_LENGTH) #define TS_CONCAT_R (TS_OPERAND | TS_UOPPRE) @@ -394,8 +394,8 @@ enum { #define NTCC '\377' static const char tokenlist[] ALIGN1 = - "\1(" NTC /* TC_SEQSTART */ - "\1)" NTC /* TC_SEQTERM */ + "\1(" NTC /* TC_LPAREN */ + "\1)" NTC /* TC_RPAREN */ "\1/" NTC /* TC_REGEXP */ "\2>>" "\1>" "\1|" NTC /* TC_OUTRDR */ "\2++" "\2--" NTC /* TC_UOPPOST */ @@ -1250,9 +1250,9 @@ static uint32_t next_token(uint32_t expected) /* insert concatenation operator when needed */ debug_printf_parse("%s: concat_inserted if all nonzero: %x %x %x %x\n", __func__, (last_token_class & TS_CONCAT_L), (tc & TS_CONCAT_R), (expected & TS_BINOP), - !(last_token_class == TC_LENGTH && tc == TC_SEQSTART)); + !(last_token_class == TC_LENGTH && tc == TC_LPAREN)); if ((last_token_class & TS_CONCAT_L) && (tc & TS_CONCAT_R) && (expected & TS_BINOP) - && !(last_token_class == TC_LENGTH && tc == TC_SEQSTART) /* but not for "length(..." */ + && !(last_token_class == TC_LENGTH && tc == TC_LPAREN) /* but not for "length(..." */ ) { concat_inserted = TRUE; save_tclass = tc; @@ -1304,10 +1304,10 @@ static void mk_re_node(const char *s, node *n, regex_t *re) xregcomp(re + 1, s, REG_EXTENDED | REG_ICASE); } -static node *condition(void) +static node *parse_lrparen_list(void) { - next_token(TC_SEQSTART); - return parse_expr(TC_SEQTERM); + next_token(TC_LPAREN); + return parse_expr(TC_RPAREN); } /* parse expression terminated by given argument, return ptr @@ -1430,12 +1430,12 @@ static node *parse_expr(uint32_t term_tc) debug_printf_parse("%s: TC_FUNCTION\n", __func__); cn->info = OC_FUNC; cn->r.f = newfunc(t_string); - cn->l.n = condition(); + cn->l.n = parse_lrparen_list(); break; - case TC_SEQSTART: - debug_printf_parse("%s: TC_SEQSTART\n", __func__); - cn = vn->r.n = parse_expr(TC_SEQTERM); + case TC_LPAREN: + debug_printf_parse("%s: TC_LPAREN\n", __func__); + cn = vn->r.n = parse_expr(TC_RPAREN); if (!cn) syntax_error("Empty sequence"); cn->a.n = vn; @@ -1449,21 +1449,21 @@ static node *parse_expr(uint32_t term_tc) case TC_BUILTIN: debug_printf_parse("%s: TC_BUILTIN\n", __func__); - cn->l.n = condition(); + cn->l.n = parse_lrparen_list(); break; case TC_LENGTH: debug_printf_parse("%s: TC_LENGTH\n", __func__); - next_token(TC_SEQSTART /* length(...) */ + next_token(TC_LPAREN /* length(...) */ | TS_OPTERM /* length; (or newline)*/ | TC_GRPTERM /* length } */ | TC_BINOPX /* length NUM */ | TC_COMMA /* print length, 1 */ ); rollback_token(); - if (t_tclass & TC_SEQSTART) { + if (t_tclass & TC_LPAREN) { /* It was a "(" token. Handle just like TC_BUILTIN */ - cn->l.n = condition(); + cn->l.n = parse_lrparen_list(); } break; } @@ -1562,7 +1562,7 @@ static void chain_group(void) case ST_IF: debug_printf_parse("%s: ST_IF\n", __func__); n = chain_node(OC_BR | Vx); - n->l.n = condition(); + n->l.n = parse_lrparen_list(); chain_group(); n2 = chain_node(OC_EXEC); n->r.n = seq->last; @@ -1576,7 +1576,7 @@ static void chain_group(void) case ST_WHILE: debug_printf_parse("%s: ST_WHILE\n", __func__); - n2 = condition(); + n2 = parse_lrparen_list(); n = chain_loop(NULL); n->l.n = n2; break; @@ -1587,14 +1587,14 @@ static void chain_group(void) n = chain_loop(NULL); n2->a.n = n->a.n; next_token(TC_WHILE); - n->l.n = condition(); + n->l.n = parse_lrparen_list(); break; case ST_FOR: debug_printf_parse("%s: ST_FOR\n", __func__); - next_token(TC_SEQSTART); - n2 = parse_expr(TC_SEMICOL | TC_SEQTERM); - if (t_tclass & TC_SEQTERM) { /* for-in */ + next_token(TC_LPAREN); + n2 = parse_expr(TC_SEMICOL | TC_RPAREN); + if (t_tclass & TC_RPAREN) { /* for-in */ if (!n2 || (n2->info & OPCLSMASK) != OC_IN) syntax_error(EMSG_UNEXP_TOKEN); n = chain_node(OC_WALKINIT | VV); @@ -1607,7 +1607,7 @@ static void chain_group(void) n = chain_node(OC_EXEC | Vx); n->l.n = n2; n2 = parse_expr(TC_SEMICOL); - n3 = parse_expr(TC_SEQTERM); + n3 = parse_expr(TC_RPAREN); n = chain_loop(n3); n->l.n = n2; if (!n2) @@ -1686,13 +1686,13 @@ static void parse_program(char *p) f->body.first = NULL; f->nargs = 0; /* Match func arg list: a comma sep list of >= 0 args, and a close paren */ - while (next_token(TC_VARIABLE | TC_SEQTERM | TC_COMMA)) { + while (next_token(TC_VARIABLE | TC_RPAREN | TC_COMMA)) { /* Either an empty arg list, or trailing comma from prev iter * must be followed by an arg */ - if (f->nargs == 0 && t_tclass == TC_SEQTERM) + if (f->nargs == 0 && t_tclass == TC_RPAREN) break; - /* TC_SEQSTART/TC_COMMA must be followed by TC_VARIABLE */ + /* TC_LPAREN/TC_COMMA must be followed by TC_VARIABLE */ if (t_tclass != TC_VARIABLE) syntax_error(EMSG_UNEXP_TOKEN); @@ -1700,7 +1700,7 @@ static void parse_program(char *p) v->x.aidx = f->nargs++; /* Arg followed either by end of arg list or 1 comma */ - if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM) + if (next_token(TC_COMMA | TC_RPAREN) & TC_RPAREN) break; //Impossible: next_token() above would error out and die // if (t_tclass != TC_COMMA) -- cgit v1.2.3-55-g6feb From 926420795b4191e045d4a316bfed19f84275a185 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Tue, 29 Jun 2021 02:32:32 +0200 Subject: awk: simplify parsing of function declaration function old new delta parse_program 328 313 -15 Signed-off-by: Denys Vlasenko --- editors/awk.c | 26 ++++++++++---------------- 1 file changed, 10 insertions(+), 16 deletions(-) diff --git a/editors/awk.c b/editors/awk.c index d31b97d86..08ff02adb 100644 --- a/editors/awk.c +++ b/editors/awk.c @@ -769,7 +769,7 @@ static void hash_remove(xhash *hash, const char *name) static char *skip_spaces(char *p) { - while (1) { + for (;;) { if (*p == '\\' && p[1] == '\n') { p++; t_lineno++; @@ -1685,26 +1685,20 @@ static void parse_program(char *p) f = newfunc(t_string); f->body.first = NULL; f->nargs = 0; - /* Match func arg list: a comma sep list of >= 0 args, and a close paren */ - while (next_token(TC_VARIABLE | TC_RPAREN | TC_COMMA)) { - /* Either an empty arg list, or trailing comma from prev iter - * must be followed by an arg */ - if (f->nargs == 0 && t_tclass == TC_RPAREN) - break; - - /* TC_LPAREN/TC_COMMA must be followed by TC_VARIABLE */ - if (t_tclass != TC_VARIABLE) + /* func arg list: comma sep list of args, and a close paren */ + for (;;) { + if (next_token(TC_VARIABLE | TC_RPAREN) == TC_RPAREN) { + if (f->nargs == 0) + break; /* func() is ok */ + /* func(a,) is not ok */ syntax_error(EMSG_UNEXP_TOKEN); - + } v = findvar(ahash, t_string); v->x.aidx = f->nargs++; - /* Arg followed either by end of arg list or 1 comma */ - if (next_token(TC_COMMA | TC_RPAREN) & TC_RPAREN) + if (next_token(TC_COMMA | TC_RPAREN) == TC_RPAREN) break; -//Impossible: next_token() above would error out and die -// if (t_tclass != TC_COMMA) -// syntax_error(EMSG_UNEXP_TOKEN); + /* it was a comma, we ate it */ } seq = &f->body; chain_group(); -- cgit v1.2.3-55-g6feb From cb6061a4e9860bf3d529109b34103ce3bde6d735 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Tue, 29 Jun 2021 02:43:02 +0200 Subject: awk: g_buf[] does not need a separate allocation function old new delta exec_builtin 1400 1414 +14 evaluate 3132 3141 +9 getvar_s 121 125 +4 awk_main 902 886 -16 ------------------------------------------------------------------------------ (add/remove: 0/0 grow/shrink: 3/1 up/down: 27/-16) Total: 11 bytes Signed-off-by: Denys Vlasenko --- editors/awk.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/editors/awk.c b/editors/awk.c index 08ff02adb..7e4f0d142 100644 --- a/editors/awk.c +++ b/editors/awk.c @@ -535,7 +535,6 @@ struct globals { var *Fields; nvblock *g_cb; char *g_pos; - char *g_buf; smallint icase; smallint exiting; smallint nextrec; @@ -571,6 +570,8 @@ struct globals2 { /* biggest and least used members go last */ tsplitter fsplitter, rsplitter; + + char g_buf[MAXVARFMT + 1]; }; #define G1 (ptr_to_globals[-1]) #define G (*(struct globals2 *)ptr_to_globals) @@ -598,7 +599,6 @@ struct globals2 { #define Fields (G1.Fields ) #define g_cb (G1.g_cb ) #define g_pos (G1.g_pos ) -#define g_buf (G1.g_buf ) #define icase (G1.icase ) #define exiting (G1.exiting ) #define nextrec (G1.nextrec ) @@ -612,6 +612,7 @@ struct globals2 { #define intvar (G.intvar ) #define fsplitter (G.fsplitter ) #define rsplitter (G.rsplitter ) +#define g_buf (G.g_buf ) #define INIT_G() do { \ SET_PTR_TO_GLOBALS((char*)xzalloc(sizeof(G1)+sizeof(G)) + sizeof(G1)); \ t_tclass = TS_OPTERM; \ @@ -3353,9 +3354,6 @@ int awk_main(int argc UNUSED_PARAM, char **argv) if (ENABLE_LOCALE_SUPPORT) setlocale(LC_NUMERIC, "C"); - /* allocate global buffer */ - g_buf = xmalloc(MAXVARFMT + 1); - vhash = hash_init(); ahash = hash_init(); fdhash = hash_init(); -- cgit v1.2.3-55-g6feb From f414fb4411e65662b44f038ed3175789172edc20 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Tue, 29 Jun 2021 03:02:21 +0200 Subject: awk: when parsing TC_FUNCTION token, eat its opening '(' ...like we do for array references. function old new delta parse_expr 938 948 +10 next_token 788 791 +3 parse_program 313 310 -3 ------------------------------------------------------------------------------ (add/remove: 0/0 grow/shrink: 2/1 up/down: 13/-3) Total: 10 bytes Signed-off-by: Denys Vlasenko --- editors/awk.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/editors/awk.c b/editors/awk.c index 7e4f0d142..1a4468a53 100644 --- a/editors/awk.c +++ b/editors/awk.c @@ -241,7 +241,7 @@ typedef struct tsplitter_s { #define TC_EOF (1 << 25) #define TC_VARIABLE (1 << 26) /* name */ #define TC_ARRAY (1 << 27) /* name[ */ -#define TC_FUNCTION (1 << 28) /* name( - but unlike TC_ARRAY, parser does not consume '(' */ +#define TC_FUNCTION (1 << 28) /* name( */ #define TC_STRING (1 << 29) /* "..." */ #define TC_NUMBER (1 << 30) @@ -959,6 +959,7 @@ static double getvar_i(var *v) v->number = my_strtod(&s); debug_printf_eval("%f (s:'%s')\n", v->number, s); if (v->type & VF_USER) { +//TODO: skip_spaces() also skips backslash+newline, is it intended here? s = skip_spaces(s); if (*s != '\0') v->type &= ~VF_USER; @@ -1103,7 +1104,7 @@ static uint32_t next_token(uint32_t expected) #define save_tclass (G.next_token__save_tclass) #define save_info (G.next_token__save_info) - char *p, *s; + char *p; const char *tl; const uint32_t *ti; uint32_t tc, last_token_class; @@ -1131,15 +1132,12 @@ static uint32_t next_token(uint32_t expected) while (*p != '\n' && *p != '\0') p++; - if (*p == '\n') - t_lineno++; - if (*p == '\0') { tc = TC_EOF; debug_printf_parse("%s: token found: TC_EOF\n", __func__); } else if (*p == '\"') { /* it's a string */ - t_string = s = ++p; + char *s = t_string = ++p; while (*p != '\"') { char *pp; if (*p == '\0' || *p == '\n') @@ -1154,7 +1152,7 @@ static uint32_t next_token(uint32_t expected) debug_printf_parse("%s: token found:'%s' TC_STRING\n", __func__, t_string); } else if ((expected & TC_REGEXP) && *p == '/') { /* it's regexp */ - t_string = s = ++p; + char *s = t_string = ++p; while (*p != '/') { if (*p == '\0' || *p == '\n') syntax_error(EMSG_UNEXP_EOS); @@ -1185,6 +1183,9 @@ static uint32_t next_token(uint32_t expected) tc = TC_NUMBER; debug_printf_parse("%s: token found:%f TC_NUMBER\n", __func__, t_double); } else { + if (*p == '\n') + t_lineno++; + /* search for something known */ tl = tokenlist; tc = 0x00000001; @@ -1230,15 +1231,15 @@ static uint32_t next_token(uint32_t expected) if (!(expected & TC_VARIABLE) || (expected & TC_ARRAY)) p = skip_spaces(p); if (*p == '(') { + p++; tc = TC_FUNCTION; debug_printf_parse("%s: token found:'%s' TC_FUNCTION\n", __func__, t_string); + } else if (*p == '[') { + p++; + tc = TC_ARRAY; + debug_printf_parse("%s: token found:'%s' TC_ARRAY\n", __func__, t_string); } else { - if (*p == '[') { - p++; - tc = TC_ARRAY; - debug_printf_parse("%s: token found:'%s' TC_ARRAY\n", __func__, t_string); - } else - debug_printf_parse("%s: token found:'%s' TC_VARIABLE\n", __func__, t_string); + debug_printf_parse("%s: token found:'%s' TC_VARIABLE\n", __func__, t_string); } } token_found: @@ -1431,7 +1432,7 @@ static node *parse_expr(uint32_t term_tc) debug_printf_parse("%s: TC_FUNCTION\n", __func__); cn->info = OC_FUNC; cn->r.f = newfunc(t_string); - cn->l.n = parse_lrparen_list(); + cn->l.n = parse_expr(TC_RPAREN); break; case TC_LPAREN: @@ -1682,7 +1683,6 @@ static void parse_program(char *p) } else if (tclass & TC_FUNCDECL) { debug_printf_parse("%s: TC_FUNCDECL\n", __func__); next_token(TC_FUNCTION); - g_pos++; f = newfunc(t_string); f->body.first = NULL; f->nargs = 0; -- cgit v1.2.3-55-g6feb From 4f27503a1ecab8dfe373a349df3d8fe3c22e2160 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Tue, 29 Jun 2021 03:27:07 +0200 Subject: awk: get rid of "move name one char back" trick in next_token() function old new delta next_token 791 812 +21 awk_main 886 831 -55 ------------------------------------------------------------------------------ (add/remove: 0/0 grow/shrink: 1/1 up/down: 21/-55) Total: -34 bytes Signed-off-by: Denys Vlasenko --- editors/awk.c | 54 +++++++++++++++++++++++++++--------------------------- 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/editors/awk.c b/editors/awk.c index 1a4468a53..fb1e5d59b 100644 --- a/editors/awk.c +++ b/editors/awk.c @@ -535,6 +535,7 @@ struct globals { var *Fields; nvblock *g_cb; char *g_pos; + char g_saved_ch; smallint icase; smallint exiting; smallint nextrec; @@ -599,6 +600,7 @@ struct globals2 { #define Fields (G1.Fields ) #define g_cb (G1.g_cb ) #define g_pos (G1.g_pos ) +#define g_saved_ch (G1.g_saved_ch ) #define icase (G1.icase ) #define exiting (G1.exiting ) #define nextrec (G1.nextrec ) @@ -1125,6 +1127,10 @@ static uint32_t next_token(uint32_t expected) t_info = save_info; } else { p = g_pos; + if (g_saved_ch != '\0') { + *p = g_saved_ch; + g_saved_ch = '\0'; + } readnext: p = skip_spaces(p); g_lineno = t_lineno; @@ -1183,6 +1189,8 @@ static uint32_t next_token(uint32_t expected) tc = TC_NUMBER; debug_printf_parse("%s: token found:%f TC_NUMBER\n", __func__, t_double); } else { + char *end_of_name; + if (*p == '\n') t_lineno++; @@ -1219,16 +1227,14 @@ static uint32_t next_token(uint32_t expected) if (!isalnum_(*p)) syntax_error(EMSG_UNEXP_TOKEN); /* no */ /* yes */ -/* "move name one char back" trick: we need a byte for NUL terminator */ -/* NB: this results in argv[i][-1] being used (!!!) in e.g. "awk -e 'NAME'" case */ - t_string = --p; - while (isalnum_(*++p)) { - p[-1] = *p; - } - p[-1] = '\0'; + t_string = p; + while (isalnum_(*p)) + p++; + end_of_name = p; tc = TC_VARIABLE; /* also consume whitespace between functionname and bracket */ if (!(expected & TC_VARIABLE) || (expected & TC_ARRAY)) +//TODO: why if variable can be here (but not array ref), skipping is not allowed? Example where it matters? p = skip_spaces(p); if (*p == '(') { p++; @@ -1240,7 +1246,19 @@ static uint32_t next_token(uint32_t expected) debug_printf_parse("%s: token found:'%s' TC_ARRAY\n", __func__, t_string); } else { debug_printf_parse("%s: token found:'%s' TC_VARIABLE\n", __func__, t_string); + if (end_of_name == p) { + /* there is no space for trailing NUL in t_string! + * We need to save the char we are going to NUL. + * (we'll use it in future call to next_token()) + */ + g_saved_ch = *end_of_name; +// especially pathological example is V="abc"; V.2 - it's V concatenated to .2 +// (it evaluates to "abc0.2"). Because of this case, we can't simply cache +// '.' and analyze it later: we also have to *store it back* in next +// next_token(), in order to give my_strtod() the undamaged ".2" string. + } } + *end_of_name = '\0'; /* terminate t_string */ } token_found: g_pos = p; @@ -3420,38 +3438,20 @@ int awk_main(int argc UNUSED_PARAM, char **argv) g_progname = llist_pop(&list_f); fd = xopen_stdin(g_progname); - /* 1st byte is reserved for "move name one char back" trick in next_token */ - i = 1; - s = NULL; - for (;;) { - int sz; - s = xrealloc(s, i + 1000); - sz = safe_read(fd, s + i, 1000); - if (sz <= 0) - break; - i += sz; - } - s = xrealloc(s, i + 1); /* trim unused 999 bytes */ - s[i] = '\0'; + s = xmalloc_read(fd, NULL); /* it's NUL-terminated */ close(fd); - parse_program(s + 1); + parse_program(s); free(s); } g_progname = "cmd. line"; #if ENABLE_FEATURE_AWK_GNU_EXTENSIONS while (list_e) { - /* NB: "move name one char back" trick in next_token - * can use argv[i][-1] here. - */ parse_program(llist_pop(&list_e)); } #endif if (!(opt & (OPT_f | OPT_e))) { if (!*argv) bb_show_usage(); - /* NB: "move name one char back" trick in next_token - * can use argv[i][-1] here. - */ parse_program(*argv++); } -- cgit v1.2.3-55-g6feb From 216d3d8ad9b7d0346cf439ccaca18d0a263e7608 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Tue, 29 Jun 2021 03:44:56 +0200 Subject: awk: code shrink function old new delta parse_expr 948 945 -3 chain_expr 65 62 -3 chain_group 655 649 -6 parse_program 310 303 -7 rollback_token 10 - -10 ------------------------------------------------------------------------------ (add/remove: 0/1 grow/shrink: 0/4 up/down: 0/-29) Total: -29 bytes Signed-off-by: Denys Vlasenko --- editors/awk.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/editors/awk.c b/editors/awk.c index fb1e5d59b..3d1c04a32 100644 --- a/editors/awk.c +++ b/editors/awk.c @@ -1300,7 +1300,7 @@ static uint32_t next_token(uint32_t expected) #undef save_info } -static void rollback_token(void) +static ALWAYS_INLINE void rollback_token(void) { t_rollback = TRUE; } @@ -1474,14 +1474,14 @@ static node *parse_expr(uint32_t term_tc) case TC_LENGTH: debug_printf_parse("%s: TC_LENGTH\n", __func__); - next_token(TC_LPAREN /* length(...) */ + tc = next_token(TC_LPAREN /* length(...) */ | TS_OPTERM /* length; (or newline)*/ | TC_GRPTERM /* length } */ | TC_BINOPX /* length NUM */ | TC_COMMA /* print length, 1 */ ); rollback_token(); - if (t_tclass & TC_LPAREN) { + if (tc & TC_LPAREN) { /* It was a "(" token. Handle just like TC_BUILTIN */ cn->l.n = parse_lrparen_list(); } @@ -1563,19 +1563,23 @@ static void chain_group(void) if (c & TC_GRPSTART) { debug_printf_parse("%s: TC_GRPSTART\n", __func__); - while (next_token(TS_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) { + while ((c = next_token(TS_GRPSEQ | TC_GRPTERM)) != TC_GRPTERM) { debug_printf_parse("%s: !TC_GRPTERM\n", __func__); - if (t_tclass & TC_NEWLINE) + if (c & TC_NEWLINE) continue; rollback_token(); chain_group(); } debug_printf_parse("%s: TC_GRPTERM\n", __func__); - } else if (c & (TS_OPSEQ | TS_OPTERM)) { + return; + } + if (c & (TS_OPSEQ | TS_OPTERM)) { debug_printf_parse("%s: TS_OPSEQ | TS_OPTERM\n", __func__); rollback_token(); chain_expr(OC_EXEC | Vx); - } else { + return; + } + { /* TS_STATEMNT */ debug_printf_parse("%s: TS_STATEMNT(?)\n", __func__); switch (t_info & OPCLSMASK) { -- cgit v1.2.3-55-g6feb From 686287b5da98508dd03fb295745c82d00440131e Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Tue, 29 Jun 2021 03:47:46 +0200 Subject: awk: deindent a block, no code changes Signed-off-by: Denys Vlasenko --- editors/awk.c | 167 +++++++++++++++++++++++++++++----------------------------- 1 file changed, 83 insertions(+), 84 deletions(-) diff --git a/editors/awk.c b/editors/awk.c index 3d1c04a32..34bcc1798 100644 --- a/editors/awk.c +++ b/editors/awk.c @@ -1579,98 +1579,97 @@ static void chain_group(void) chain_expr(OC_EXEC | Vx); return; } - { - /* TS_STATEMNT */ - debug_printf_parse("%s: TS_STATEMNT(?)\n", __func__); - switch (t_info & OPCLSMASK) { - case ST_IF: - debug_printf_parse("%s: ST_IF\n", __func__); - n = chain_node(OC_BR | Vx); - n->l.n = parse_lrparen_list(); + + /* TS_STATEMNT */ + debug_printf_parse("%s: TS_STATEMNT(?)\n", __func__); + switch (t_info & OPCLSMASK) { + case ST_IF: + debug_printf_parse("%s: ST_IF\n", __func__); + n = chain_node(OC_BR | Vx); + n->l.n = parse_lrparen_list(); + chain_group(); + n2 = chain_node(OC_EXEC); + n->r.n = seq->last; + if (next_token(TS_GRPSEQ | TC_GRPTERM | TC_ELSE) == TC_ELSE) { chain_group(); - n2 = chain_node(OC_EXEC); - n->r.n = seq->last; - if (next_token(TS_GRPSEQ | TC_GRPTERM | TC_ELSE) == TC_ELSE) { - chain_group(); - n2->a.n = seq->last; - } else { - rollback_token(); - } - break; + n2->a.n = seq->last; + } else { + rollback_token(); + } + break; - case ST_WHILE: - debug_printf_parse("%s: ST_WHILE\n", __func__); - n2 = parse_lrparen_list(); - n = chain_loop(NULL); - n->l.n = n2; - break; + case ST_WHILE: + debug_printf_parse("%s: ST_WHILE\n", __func__); + n2 = parse_lrparen_list(); + n = chain_loop(NULL); + n->l.n = n2; + break; - case ST_DO: - debug_printf_parse("%s: ST_DO\n", __func__); - n2 = chain_node(OC_EXEC); - n = chain_loop(NULL); - n2->a.n = n->a.n; - next_token(TC_WHILE); - n->l.n = parse_lrparen_list(); - break; + case ST_DO: + debug_printf_parse("%s: ST_DO\n", __func__); + n2 = chain_node(OC_EXEC); + n = chain_loop(NULL); + n2->a.n = n->a.n; + next_token(TC_WHILE); + n->l.n = parse_lrparen_list(); + break; - case ST_FOR: - debug_printf_parse("%s: ST_FOR\n", __func__); - next_token(TC_LPAREN); - n2 = parse_expr(TC_SEMICOL | TC_RPAREN); - if (t_tclass & TC_RPAREN) { /* for-in */ - if (!n2 || (n2->info & OPCLSMASK) != OC_IN) - syntax_error(EMSG_UNEXP_TOKEN); - n = chain_node(OC_WALKINIT | VV); - n->l.n = n2->l.n; - n->r.n = n2->r.n; - n = chain_loop(NULL); - n->info = OC_WALKNEXT | Vx; - n->l.n = n2->l.n; - } else { /* for (;;) */ - n = chain_node(OC_EXEC | Vx); - n->l.n = n2; - n2 = parse_expr(TC_SEMICOL); - n3 = parse_expr(TC_RPAREN); - n = chain_loop(n3); - n->l.n = n2; - if (!n2) - n->info = OC_EXEC; - } - break; + case ST_FOR: + debug_printf_parse("%s: ST_FOR\n", __func__); + next_token(TC_LPAREN); + n2 = parse_expr(TC_SEMICOL | TC_RPAREN); + if (t_tclass & TC_RPAREN) { /* for-in */ + if (!n2 || (n2->info & OPCLSMASK) != OC_IN) + syntax_error(EMSG_UNEXP_TOKEN); + n = chain_node(OC_WALKINIT | VV); + n->l.n = n2->l.n; + n->r.n = n2->r.n; + n = chain_loop(NULL); + n->info = OC_WALKNEXT | Vx; + n->l.n = n2->l.n; + } else { /* for (;;) */ + n = chain_node(OC_EXEC | Vx); + n->l.n = n2; + n2 = parse_expr(TC_SEMICOL); + n3 = parse_expr(TC_RPAREN); + n = chain_loop(n3); + n->l.n = n2; + if (!n2) + n->info = OC_EXEC; + } + break; - case OC_PRINT: - case OC_PRINTF: - debug_printf_parse("%s: OC_PRINT[F]\n", __func__); - n = chain_node(t_info); - n->l.n = parse_expr(TS_OPTERM | TC_OUTRDR | TC_GRPTERM); - if (t_tclass & TC_OUTRDR) { - n->info |= t_info; - n->r.n = parse_expr(TS_OPTERM | TC_GRPTERM); - } - if (t_tclass & TC_GRPTERM) - rollback_token(); - break; + case OC_PRINT: + case OC_PRINTF: + debug_printf_parse("%s: OC_PRINT[F]\n", __func__); + n = chain_node(t_info); + n->l.n = parse_expr(TS_OPTERM | TC_OUTRDR | TC_GRPTERM); + if (t_tclass & TC_OUTRDR) { + n->info |= t_info; + n->r.n = parse_expr(TS_OPTERM | TC_GRPTERM); + } + if (t_tclass & TC_GRPTERM) + rollback_token(); + break; - case OC_BREAK: - debug_printf_parse("%s: OC_BREAK\n", __func__); - n = chain_node(OC_EXEC); - n->a.n = break_ptr; - chain_expr(t_info); - break; + case OC_BREAK: + debug_printf_parse("%s: OC_BREAK\n", __func__); + n = chain_node(OC_EXEC); + n->a.n = break_ptr; + chain_expr(t_info); + break; - case OC_CONTINUE: - debug_printf_parse("%s: OC_CONTINUE\n", __func__); - n = chain_node(OC_EXEC); - n->a.n = continue_ptr; - chain_expr(t_info); - break; + case OC_CONTINUE: + debug_printf_parse("%s: OC_CONTINUE\n", __func__); + n = chain_node(OC_EXEC); + n->a.n = continue_ptr; + chain_expr(t_info); + break; - /* delete, next, nextfile, return, exit */ - default: - debug_printf_parse("%s: default\n", __func__); - chain_expr(t_info); - } + /* delete, next, nextfile, return, exit */ + default: + debug_printf_parse("%s: default\n", __func__); + chain_expr(t_info); } } -- cgit v1.2.3-55-g6feb From 6872c193a935df47facf717c15a32f93b43c6bcf Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Tue, 29 Jun 2021 12:16:36 +0200 Subject: awk: fix parsing of expressions such as "v (a)" function old new delta next_token 812 825 +13 Signed-off-by: Denys Vlasenko --- editors/awk.c | 22 ++++++++++++++++++---- testsuite/awk.tests | 11 +++++++++++ 2 files changed, 29 insertions(+), 4 deletions(-) diff --git a/editors/awk.c b/editors/awk.c index 34bcc1798..ce860dc04 100644 --- a/editors/awk.c +++ b/editors/awk.c @@ -1231,11 +1231,24 @@ static uint32_t next_token(uint32_t expected) while (isalnum_(*p)) p++; end_of_name = p; - tc = TC_VARIABLE; - /* also consume whitespace between functionname and bracket */ - if (!(expected & TC_VARIABLE) || (expected & TC_ARRAY)) -//TODO: why if variable can be here (but not array ref), skipping is not allowed? Example where it matters? + + if (last_token_class == TC_FUNCDECL) + /* eat space in "function FUNC (...) {...}" declaration */ p = skip_spaces(p); + else if (expected & TC_ARRAY) { + /* eat space between array name and [ */ + char *s = skip_spaces(p); + if (*s == '[') /* array ref, not just a name? */ + p = s; + } + /* else: do NOT consume whitespace after variable name! + * gawk allows definition "function FUNC (p) {...}" - note space, + * but disallows the call "FUNC (p)" because it isn't one - + * expression "v (a)" should NOT be parsed as TC_FUNCTION: + * it is a valid concatenation if "v" is a variable, + * not a function name (and type of name is not known at parse time). + */ + if (*p == '(') { p++; tc = TC_FUNCTION; @@ -1245,6 +1258,7 @@ static uint32_t next_token(uint32_t expected) tc = TC_ARRAY; debug_printf_parse("%s: token found:'%s' TC_ARRAY\n", __func__, t_string); } else { + tc = TC_VARIABLE; debug_printf_parse("%s: token found:'%s' TC_VARIABLE\n", __func__, t_string); if (end_of_name == p) { /* there is no space for trailing NUL in t_string! diff --git a/testsuite/awk.tests b/testsuite/awk.tests index cf9b722dc..6e35d33dd 100755 --- a/testsuite/awk.tests +++ b/testsuite/awk.tests @@ -71,6 +71,17 @@ testing "awk properly handles undefined function" \ "L1\n\nawk: cmd. line:5: Call to undefined function\n" \ "" "" +prg=' +BEGIN { + v=1 + a=2 + print v (a) +}' +testing "'v (a)' is not a function call, it is a concatenation" \ + "awk '$prg' 2>&1" \ + "12\n" \ + "" "" + optional DESKTOP testing "awk hex const 1" "awk '{ print or(0xffffffff,1) }'" "4294967295\n" "" "\n" -- cgit v1.2.3-55-g6feb From 21fbee2e87ddf7b47bb501b6529b63ac2b3af0bd Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Tue, 29 Jun 2021 14:33:04 +0200 Subject: awk: document which hashes are used at what state (parse/execute) We can free them after they are no longer needed. (Currently, being a NOEXEC applet is much larger waste of memory for the case of long-running awk script). function old new delta awk_main 831 827 -4 Signed-off-by: Denys Vlasenko --- editors/awk.c | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/editors/awk.c b/editors/awk.c index ce860dc04..6142144bb 100644 --- a/editors/awk.c +++ b/editors/awk.c @@ -527,7 +527,10 @@ struct globals { chain *seq; node *break_ptr, *continue_ptr; rstream *iF; - xhash *vhash, *ahash, *fdhash, *fnhash; + xhash *ahash; /* argument names, used only while parsing function bodies */ + xhash *fnhash; /* function names, used only in parsing stage */ + xhash *vhash; /* variables and arrays */ + xhash *fdhash; /* file objects, used only in execution stage */ const char *g_progname; int g_lineno; int nfields; @@ -1719,6 +1722,7 @@ static void parse_program(char *p) debug_printf_parse("%s: TC_FUNCDECL\n", __func__); next_token(TC_FUNCTION); f = newfunc(t_string); +//FIXME: dup check: functions can't be redefined, this is not ok: awk 'func f(){}; func f(){}' f->body.first = NULL; f->nargs = 0; /* func arg list: comma sep list of args, and a close paren */ @@ -3389,12 +3393,8 @@ int awk_main(int argc UNUSED_PARAM, char **argv) if (ENABLE_LOCALE_SUPPORT) setlocale(LC_NUMERIC, "C"); - vhash = hash_init(); - ahash = hash_init(); - fdhash = hash_init(); - fnhash = hash_init(); - /* initialize variables */ + vhash = hash_init(); { char *vnames = (char *)vNames; /* cheat */ char *vvalues = (char *)vValues; @@ -3416,10 +3416,6 @@ int awk_main(int argc UNUSED_PARAM, char **argv) handle_special(intvar[FS]); handle_special(intvar[RS]); - newfile("/dev/stdin")->F = stdin; - newfile("/dev/stdout")->F = stdout; - newfile("/dev/stderr")->F = stderr; - /* Huh, people report that sometimes environ is NULL. Oh well. */ if (environ) { char **envp; @@ -3449,6 +3445,10 @@ int awk_main(int argc UNUSED_PARAM, char **argv) if (!is_assignment(llist_pop(&list_v))) bb_show_usage(); } + + /* Parse all supplied programs */ + fnhash = hash_init(); + ahash = hash_init(); while (list_f) { int fd; char *s; @@ -3471,6 +3471,11 @@ int awk_main(int argc UNUSED_PARAM, char **argv) bb_show_usage(); parse_program(*argv++); } + //free_hash(ahash) // ~250 bytes, arg names, used only during parse of function bodies + //ahash = NULL; // debug + //free_hash(fnhash) // ~250 bytes, used only for function names + //fnhash = NULL; // debug + /* parsing done, on to executing */ /* fill in ARGV array */ setari_u(intvar[ARGV], 0, "awk"); @@ -3479,6 +3484,11 @@ int awk_main(int argc UNUSED_PARAM, char **argv) setari_u(intvar[ARGV], ++i, *argv++); setvar_i(intvar[ARGC], i + 1); + fdhash = hash_init(); + newfile("/dev/stdin")->F = stdin; + newfile("/dev/stdout")->F = stdout; + newfile("/dev/stderr")->F = stderr; + zero_out_var(&tv); evaluate(beginseq.first, &tv); if (!mainseq.first && !endseq.first) -- cgit v1.2.3-55-g6feb From b3c91a127f8baecee0265ba92898ae1e718bdb31 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Tue, 29 Jun 2021 18:33:25 +0200 Subject: awk: free unused parsing structures after parse is done function old new delta hash_clear - 90 +90 awk_main 827 849 +22 clear_array 90 - -90 ------------------------------------------------------------------------------ (add/remove: 1/1 grow/shrink: 1/0 up/down: 112/-90) Total: 22 bytes Signed-off-by: Denys Vlasenko --- editors/awk.c | 74 +++++++++++++++++++++++++++++++++++++---------------------- 1 file changed, 47 insertions(+), 27 deletions(-) diff --git a/editors/awk.c b/editors/awk.c index 6142144bb..4e29b28cf 100644 --- a/editors/awk.c +++ b/editors/awk.c @@ -530,7 +530,8 @@ struct globals { xhash *ahash; /* argument names, used only while parsing function bodies */ xhash *fnhash; /* function names, used only in parsing stage */ xhash *vhash; /* variables and arrays */ - xhash *fdhash; /* file objects, used only in execution stage */ + //xhash *fdhash; /* file objects, used only in execution stage */ + //we are reusing ahash as fdhash, via define (see later) const char *g_progname; int g_lineno; int nfields; @@ -592,10 +593,13 @@ struct globals2 { #define break_ptr (G1.break_ptr ) #define continue_ptr (G1.continue_ptr) #define iF (G1.iF ) -#define vhash (G1.vhash ) #define ahash (G1.ahash ) -#define fdhash (G1.fdhash ) #define fnhash (G1.fnhash ) +#define vhash (G1.vhash ) +#define fdhash ahash +//^^^^^^^^^^^^^^^^^^ ahash is cleared after every function parsing, +// and ends up empty after parsing phase. Thus, we can simply reuse it +// for fdhash in execution stage. #define g_progname (G1.g_progname ) #define g_lineno (G1.g_lineno ) #define nfields (G1.nfields ) @@ -682,6 +686,33 @@ static xhash *hash_init(void) return newhash; } +static void hash_clear(xhash *hash) +{ + unsigned i; + hash_item *hi, *thi; + + for (i = 0; i < hash->csize; i++) { + hi = hash->items[i]; + while (hi) { + thi = hi; + hi = hi->next; + free(thi->data.v.string); + free(thi); + } + hash->items[i] = NULL; + } + hash->glen = hash->nel = 0; +} + +#if 0 //UNUSED +static void hash_free(xhash *hash) +{ + hash_clear(hash); + free(hash->items); + free(hash); +} +#endif + /* find item in hash, return ptr to data, NULL if not found */ static void *hash_search(xhash *hash, const char *name) { @@ -869,23 +900,7 @@ static xhash *iamarray(var *v) return a->x.array; } -static void clear_array(xhash *array) -{ - unsigned i; - hash_item *hi, *thi; - - for (i = 0; i < array->csize; i++) { - hi = array->items[i]; - while (hi) { - thi = hi; - hi = hi->next; - free(thi->data.v.string); - free(thi); - } - array->items[i] = NULL; - } - array->glen = array->nel = 0; -} +#define clear_array(array) hash_clear(array) /* clear a variable */ static var *clrvar(var *v) @@ -1742,7 +1757,7 @@ static void parse_program(char *p) } seq = &f->body; chain_group(); - clear_array(ahash); + hash_clear(ahash); } else if (tclass & TS_OPSEQ) { debug_printf_parse("%s: TS_OPSEQ\n", __func__); rollback_token(); @@ -3471,11 +3486,16 @@ int awk_main(int argc UNUSED_PARAM, char **argv) bb_show_usage(); parse_program(*argv++); } - //free_hash(ahash) // ~250 bytes, arg names, used only during parse of function bodies - //ahash = NULL; // debug - //free_hash(fnhash) // ~250 bytes, used only for function names - //fnhash = NULL; // debug - /* parsing done, on to executing */ + /* Free unused parse structures */ + //hash_free(fnhash); // ~250 bytes when empty, used only for function names + //^^^^^^^^^^^^^^^^^ does not work, hash_clear() inside SEGVs + // (IOW: hash_clear() assumes it's a hash of variables. fnhash is not). + free(fnhash->items); + free(fnhash); + fnhash = NULL; // debug + //hash_free(ahash); // empty after parsing, will reuse as fdhash instead of freeing + + /* Parsing done, on to executing */ /* fill in ARGV array */ setari_u(intvar[ARGV], 0, "awk"); @@ -3484,7 +3504,7 @@ int awk_main(int argc UNUSED_PARAM, char **argv) setari_u(intvar[ARGV], ++i, *argv++); setvar_i(intvar[ARGC], i + 1); - fdhash = hash_init(); + //fdhash = ahash - done via define newfile("/dev/stdin")->F = stdin; newfile("/dev/stdout")->F = stdout; newfile("/dev/stderr")->F = stderr; -- cgit v1.2.3-55-g6feb From 3aff3b9cb81c1f574aaafaf3981e755c6639e2bc Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Tue, 29 Jun 2021 19:07:36 +0200 Subject: awk: assorted optimizations hash_find(): do not caclculate hash twice. Do not divide - can use cheap multiply-by-8 shift. nextword(): do not repeatedly increment in-memory value, do it in register, then store final result. hashwalk_init(): do not strlen() twice. function old new delta hash_search3 - 49 +49 hash_find 259 281 +22 nextword 19 16 -3 evaluate 3141 3137 -4 hash_search 54 28 -26 ------------------------------------------------------------------------------ (add/remove: 1/0 grow/shrink: 1/3 up/down: 71/-33) Total: 38 bytes Signed-off-by: Denys Vlasenko --- editors/awk.c | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/editors/awk.c b/editors/awk.c index 4e29b28cf..a4cd3cf93 100644 --- a/editors/awk.c +++ b/editors/awk.c @@ -696,6 +696,7 @@ static void hash_clear(xhash *hash) while (hi) { thi = hi; hi = hi->next; +//FIXME: this assumes that it's a hash of *variables*: free(thi->data.v.string); free(thi); } @@ -714,11 +715,11 @@ static void hash_free(xhash *hash) #endif /* find item in hash, return ptr to data, NULL if not found */ -static void *hash_search(xhash *hash, const char *name) +static NOINLINE void *hash_search3(xhash *hash, const char *name, unsigned idx) { hash_item *hi; - hi = hash->items[hashidx(name) % hash->csize]; + hi = hash->items[idx % hash->csize]; while (hi) { if (strcmp(hi->name, name) == 0) return &hi->data; @@ -727,6 +728,11 @@ static void *hash_search(xhash *hash, const char *name) return NULL; } +static void *hash_search(xhash *hash, const char *name) +{ + return hash_search3(hash, name, hashidx(name)); +} + /* grow hash if it becomes too big */ static void hash_rebuild(xhash *hash) { @@ -762,16 +768,17 @@ static void *hash_find(xhash *hash, const char *name) unsigned idx; int l; - hi = hash_search(hash, name); + idx = hashidx(name); + hi = hash_search3(hash, name, idx); if (!hi) { - if (++hash->nel / hash->csize > 10) + if (++hash->nel > hash->csize * 8) hash_rebuild(hash); l = strlen(name) + 1; hi = xzalloc(sizeof(*hi) + l); strcpy(hi->name, name); - idx = hashidx(name) % hash->csize; + idx = idx % hash->csize; hi->next = hash->items[idx]; hash->items[idx] = hi; hash->glen += l; @@ -822,8 +829,10 @@ static char *skip_spaces(char *p) static char *nextword(char **s) { char *p = *s; - while (*(*s)++ != '\0') + char *q = p; + while (*q++ != '\0') continue; + *s = q; return p; } @@ -2116,8 +2125,7 @@ static void hashwalk_init(var *v, xhash *array) for (i = 0; i < array->csize; i++) { hi = array->items[i]; while (hi) { - strcpy(w->end, hi->name); - nextword(&w->end); + w->end = stpcpy(w->end, hi->name) + 1; hi = hi->next; } } @@ -3504,7 +3512,7 @@ int awk_main(int argc UNUSED_PARAM, char **argv) setari_u(intvar[ARGV], ++i, *argv++); setvar_i(intvar[ARGC], i + 1); - //fdhash = ahash - done via define + //fdhash = ahash; // done via define newfile("/dev/stdin")->F = stdin; newfile("/dev/stdout")->F = stdout; newfile("/dev/stderr")->F = stderr; -- cgit v1.2.3-55-g6feb From f99800758e24ff159808ca0b44064f548ed77a26 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Wed, 30 Jun 2021 00:49:24 +0200 Subject: modprobe: when reading modules.builtin, use basenames of files from it function old new delta modprobe_main 798 803 +5 Signed-off-by: Denys Vlasenko --- modutils/modprobe.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/modutils/modprobe.c b/modutils/modprobe.c index c334186b8..235706fd5 100644 --- a/modutils/modprobe.c +++ b/modutils/modprobe.c @@ -629,8 +629,9 @@ int modprobe_main(int argc UNUSED_PARAM, char **argv) config_close(parser); parser = config_open2("modules.builtin", fopen_for_read); + /* this file contains lines like "kernel/fs/binfmt_script.ko" */ while (config_read(parser, &s, 1, 1, "# \t", PARSE_NORMAL)) - get_or_add_modentry(s)->flags |= MODULE_FLAG_BUILTIN; + get_or_add_modentry(bb_basename(s))->flags |= MODULE_FLAG_BUILTIN; config_close(parser); } -- cgit v1.2.3-55-g6feb From 6cf6f1eaee1f6be2b936c2ff0e5852c00740edb4 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Wed, 30 Jun 2021 02:12:27 +0200 Subject: awk: remove custom pool allocator for temporary awk variables It seems to be designed to reduce overhead of malloc's auxiliary data, by allocating at least 64 variables as a block. With "struct var" being about 20-32 bytes long (32/64 bits), malloc overhead for one temporary indeed is high, ~33% more memory used than needed. function old new delta evaluate 3137 3145 +8 modprobe_main 798 803 +5 exec_builtin 1414 1419 +5 awk_printf 476 481 +5 as_regex 132 137 +5 EMSG_INTERNAL_ERROR 15 - -15 nvfree 169 116 -53 nvalloc 145 - -145 ------------------------------------------------------------------------------ (add/remove: 0/2 grow/shrink: 5/1 up/down: 28/-213) Total: -185 bytes Signed-off-by: Denys Vlasenko --- editors/awk.c | 164 ++++++++++++++++++++++------------------------------------ 1 file changed, 61 insertions(+), 103 deletions(-) diff --git a/editors/awk.c b/editors/awk.c index a4cd3cf93..35c11ec58 100644 --- a/editors/awk.c +++ b/editors/awk.c @@ -93,7 +93,6 @@ enum { }; #define MAXVARFMT 240 -#define MINNVBLOCK 64 /* variable flags */ #define VF_NUMBER 0x0001 /* 1 = primary type is number */ @@ -120,8 +119,8 @@ typedef struct walker_list { /* Variable */ typedef struct var_s { unsigned type; /* flags */ - double number; char *string; + double number; union { int aidx; /* func arg idx (for compilation stage) */ struct xhash_s *array; /* array ptr */ @@ -192,15 +191,6 @@ typedef struct node_s { } a; } node; -/* Block of temporary variables */ -typedef struct nvblock_s { - int size; - var *pos; - struct nvblock_s *prev; - struct nvblock_s *next; - var nv[]; -} nvblock; - typedef struct tsplitter_s { node n; regex_t re[2]; @@ -537,7 +527,6 @@ struct globals { int nfields; int maxfields; /* used in fsrealloc() only */ var *Fields; - nvblock *g_cb; char *g_pos; char g_saved_ch; smallint icase; @@ -605,7 +594,6 @@ struct globals2 { #define nfields (G1.nfields ) #define maxfields (G1.maxfields ) #define Fields (G1.Fields ) -#define g_cb (G1.g_cb ) #define g_pos (G1.g_pos ) #define g_saved_ch (G1.g_saved_ch ) #define icase (G1.icase ) @@ -640,7 +628,6 @@ static int awk_exit(int) NORETURN; /* ---- error handling ---- */ -static const char EMSG_INTERNAL_ERROR[] ALIGN1 = "Internal error"; static const char EMSG_UNEXP_EOS[] ALIGN1 = "Unexpected end of string"; static const char EMSG_UNEXP_TOKEN[] ALIGN1 = "Unexpected token"; static const char EMSG_DIV_BY_ZERO[] ALIGN1 = "Division by zero"; @@ -1050,77 +1037,6 @@ static int istrue(var *v) return (v->string && v->string[0]); } -/* temporary variables allocator. Last allocated should be first freed */ -static var *nvalloc(int n) -{ - nvblock *pb = NULL; - var *v, *r; - int size; - - while (g_cb) { - pb = g_cb; - if ((g_cb->pos - g_cb->nv) + n <= g_cb->size) - break; - g_cb = g_cb->next; - } - - if (!g_cb) { - size = (n <= MINNVBLOCK) ? MINNVBLOCK : n; - g_cb = xzalloc(sizeof(nvblock) + size * sizeof(var)); - g_cb->size = size; - g_cb->pos = g_cb->nv; - g_cb->prev = pb; - /*g_cb->next = NULL; - xzalloc did it */ - if (pb) - pb->next = g_cb; - } - - v = r = g_cb->pos; - g_cb->pos += n; - - while (v < g_cb->pos) { - v->type = 0; - v->string = NULL; - v++; - } - - return r; -} - -static void nvfree(var *v) -{ - var *p; - - if (v < g_cb->nv || v >= g_cb->pos) - syntax_error(EMSG_INTERNAL_ERROR); - - for (p = v; p < g_cb->pos; p++) { - if ((p->type & (VF_ARRAY | VF_CHILD)) == VF_ARRAY) { - clear_array(iamarray(p)); - free(p->x.array->items); - free(p->x.array); - } - if (p->type & VF_WALK) { - walker_list *n; - walker_list *w = p->x.walker; - debug_printf_walker("nvfree: freeing walker @%p\n", &p->x.walker); - p->x.walker = NULL; - while (w) { - n = w->prev; - debug_printf_walker(" free(%p)\n", w); - free(w); - w = n; - } - } - clrvar(p); - } - - g_cb->pos = v; - while (g_cb->prev && g_cb->pos == g_cb->nv) { - g_cb = g_cb->prev; - } -} - /* ------- awk program text parsing ------- */ /* Parse next token pointed by global pos, place results into global t_XYZ variables. @@ -1793,6 +1709,41 @@ static void parse_program(char *p) /* -------- program execution part -------- */ +/* temporary variables allocator */ +static var *nvalloc(int sz) +{ + return xzalloc(sz * sizeof(var)); +} + +static void nvfree(var *v, int sz) +{ + var *p = v; + + while (--sz >= 0) { + if ((p->type & (VF_ARRAY | VF_CHILD)) == VF_ARRAY) { + clear_array(iamarray(p)); + free(p->x.array->items); + free(p->x.array); + } + if (p->type & VF_WALK) { + walker_list *n; + walker_list *w = p->x.walker; + debug_printf_walker("nvfree: freeing walker @%p\n", &p->x.walker); + p->x.walker = NULL; + while (w) { + n = w->prev; + debug_printf_walker(" free(%p)\n", w); + free(w); + w = n; + } + } + clrvar(p); + p++; + } + + free(v); +} + static node *mk_splitter(const char *s, tsplitter *spl) { regex_t *re, *ire; @@ -1814,9 +1765,9 @@ static node *mk_splitter(const char *s, tsplitter *spl) return n; } -/* use node as a regular expression. Supplied with node ptr and regex_t +/* Use node as a regular expression. Supplied with node ptr and regex_t * storage space. Return ptr to regex (if result points to preg, it should - * be later regfree'd manually + * be later regfree'd manually). */ static regex_t *as_regex(node *op, regex_t *preg) { @@ -1840,7 +1791,7 @@ static regex_t *as_regex(node *op, regex_t *preg) cflags &= ~REG_EXTENDED; xregcomp(preg, s, cflags); } - nvfree(v); + nvfree(v, 1); return preg; } @@ -2292,6 +2243,8 @@ static char *awk_printf(node *n, int *len) var *v, *arg; v = nvalloc(1); +//TODO: above, to avoid allocating a single temporary var, take a pointer +//to a temporary that our caller (evaluate()) already has? fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), v))); i = 0; @@ -2333,7 +2286,7 @@ static char *awk_printf(node *n, int *len) } free(fmt); - nvfree(v); + nvfree(v, 1); b = xrealloc(b, i + 1); b[i] = '\0'; #if ENABLE_FEATURE_AWK_GNU_EXTENSIONS @@ -2661,14 +2614,14 @@ static NOINLINE var *exec_builtin(node *op, var *res) break; } - nvfree(tv); + nvfree(tv, 4); return res; #undef tspl } /* * Evaluate node - the heart of the program. Supplied with subtree - * and place where to store result. returns ptr to result. + * and place where to store result. Returns ptr to result. */ #define XC(n) ((n) >> 8) @@ -2953,33 +2906,38 @@ static var *evaluate(node *op, var *res) break; case XC( OC_FUNC ): { - var *vbeg, *v; + var *tv, *sv_fnargs; const char *sv_progname; + int nargs1, i; + debug_printf_eval("FUNC\n"); - /* The body might be empty, still has to eval the args */ if (!op->r.n->info && !op->r.f->body.first) syntax_error(EMSG_UNDEF_FUNC); - vbeg = v = nvalloc(op->r.f->nargs + 1); + /* The body might be empty, still has to eval the args */ + nargs1 = op->r.f->nargs + 1; + tv = nvalloc(nargs1); + i = 0; while (op1) { +//TODO: explain why one iteration is done even for the case p->r.f->nargs == 0 var *arg = evaluate(nextarg(&op1), v1); - copyvar(v, arg); - v->type |= VF_CHILD; - v->x.parent = arg; - if (++v - vbeg >= op->r.f->nargs) + copyvar(&tv[i], arg); + tv[i].type |= VF_CHILD; + tv[i].x.parent = arg; + if (++i >= op->r.f->nargs) break; } - v = fnargs; - fnargs = vbeg; + sv_fnargs = fnargs; sv_progname = g_progname; + fnargs = tv; res = evaluate(op->r.f->body.first, res); + nvfree(fnargs, nargs1); g_progname = sv_progname; - nvfree(fnargs); - fnargs = v; + fnargs = sv_fnargs; break; } @@ -3301,7 +3259,7 @@ static var *evaluate(node *op, var *res) break; } /* while (op) */ - nvfree(v1); + nvfree(v1, 2); debug_printf_eval("returning from %s(): %p\n", __func__, res); return res; #undef fnargs -- cgit v1.2.3-55-g6feb From 86fc2872b33224cfa5442700c2a8abd020cbf900 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Wed, 30 Jun 2021 12:12:20 +0200 Subject: awk: replace incorrect use of union in undefined function check (no code changes) ...which reveals that it's buggy: it thinks "func f(){}" is an undefined function! Signed-off-by: Denys Vlasenko --- editors/awk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/editors/awk.c b/editors/awk.c index 35c11ec58..1115085da 100644 --- a/editors/awk.c +++ b/editors/awk.c @@ -2912,7 +2912,7 @@ static var *evaluate(node *op, var *res) debug_printf_eval("FUNC\n"); - if (!op->r.n->info && !op->r.f->body.first) + if (op->r.f->nargs == 0 && !op->r.f->body.first) syntax_error(EMSG_UNDEF_FUNC); /* The body might be empty, still has to eval the args */ -- cgit v1.2.3-55-g6feb From d1507101695f6bad35a61c4770b7d3913597ac16 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Wed, 30 Jun 2021 12:23:51 +0200 Subject: awk: allow empty fuinctions with no arguments, disallow function redefinitions function old new delta .rodata 103681 103700 +19 parse_program 303 307 +4 evaluate 3145 3141 -4 ------------------------------------------------------------------------------ (add/remove: 0/0 grow/shrink: 2/1 up/down: 23/-4) Total: 19 bytes Signed-off-by: Denys Vlasenko --- editors/awk.c | 11 +++++++---- testsuite/awk.tests | 10 ++++++++++ 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/editors/awk.c b/editors/awk.c index 1115085da..c05d5d651 100644 --- a/editors/awk.c +++ b/editors/awk.c @@ -139,6 +139,7 @@ typedef struct chain_s { /* Function */ typedef struct func_s { unsigned nargs; + smallint defined; struct chain_s body; } func; @@ -1662,9 +1663,11 @@ static void parse_program(char *p) debug_printf_parse("%s: TC_FUNCDECL\n", __func__); next_token(TC_FUNCTION); f = newfunc(t_string); -//FIXME: dup check: functions can't be redefined, this is not ok: awk 'func f(){}; func f(){}' - f->body.first = NULL; - f->nargs = 0; + if (f->defined) + syntax_error("Duplicate function"); + f->defined = 1; + //f->body.first = NULL; - already is + //f->nargs = 0; - already is /* func arg list: comma sep list of args, and a close paren */ for (;;) { if (next_token(TC_VARIABLE | TC_RPAREN) == TC_RPAREN) { @@ -2912,7 +2915,7 @@ static var *evaluate(node *op, var *res) debug_printf_eval("FUNC\n"); - if (op->r.f->nargs == 0 && !op->r.f->body.first) + if (!op->r.f->defined) syntax_error(EMSG_UNDEF_FUNC); /* The body might be empty, still has to eval the args */ diff --git a/testsuite/awk.tests b/testsuite/awk.tests index 6e35d33dd..873cc3680 100755 --- a/testsuite/awk.tests +++ b/testsuite/awk.tests @@ -44,6 +44,16 @@ testing "awk handles empty function f(arg){}" \ "L1\n\nL2\n\n" \ "" "" +prg=' +function empty_fun(){} +END {empty_fun() + print "Ok" +}' +testing "awk handles empty function f(){}" \ + "awk '$prg'" \ + "Ok\n" \ + "" "" + prg=' function outer_fun() { return 1 -- cgit v1.2.3-55-g6feb From ca9278ee5855a91a5521960d3743809f47ed27b8 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Wed, 30 Jun 2021 12:42:39 +0200 Subject: awk: rewrite "print" logic a bit to make it clearer Signed-off-by: Denys Vlasenko --- editors/awk.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/editors/awk.c b/editors/awk.c index c05d5d651..0fbca0433 100644 --- a/editors/awk.c +++ b/editors/awk.c @@ -2792,7 +2792,7 @@ static var *evaluate(node *op, var *res) if (!op1) { fputs(getvar_s(intvar[F0]), F); } else { - while (op1) { + for (;;) { var *v = evaluate(nextarg(&op1), v1); if (v->type & VF_NUMBER) { fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[OFMT]), @@ -2801,13 +2801,12 @@ static var *evaluate(node *op, var *res) } else { fputs(getvar_s(v), F); } - - if (op1) - fputs(getvar_s(intvar[OFS]), F); + if (!op1) + break; + fputs(getvar_s(intvar[OFS]), F); } } fputs(getvar_s(intvar[ORS]), F); - } else { /* OC_PRINTF */ char *s = awk_printf(op1, &len); #if ENABLE_FEATURE_AWK_GNU_EXTENSIONS -- cgit v1.2.3-55-g6feb From d7354df169603807fe2ac4f8a0f9f72c9703184f Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Wed, 30 Jun 2021 12:52:51 +0200 Subject: awk: evaluate all, even superfluous function args function old new delta evaluate 3128 3135 +7 Signed-off-by: Denys Vlasenko --- editors/awk.c | 19 ++++++++++++------- testsuite/awk.tests | 8 +++++++- 2 files changed, 19 insertions(+), 8 deletions(-) diff --git a/editors/awk.c b/editors/awk.c index 0fbca0433..47bbc10a6 100644 --- a/editors/awk.c +++ b/editors/awk.c @@ -2910,7 +2910,7 @@ static var *evaluate(node *op, var *res) case XC( OC_FUNC ): { var *tv, *sv_fnargs; const char *sv_progname; - int nargs1, i; + int nargs, i; debug_printf_eval("FUNC\n"); @@ -2918,17 +2918,22 @@ static var *evaluate(node *op, var *res) syntax_error(EMSG_UNDEF_FUNC); /* The body might be empty, still has to eval the args */ - nargs1 = op->r.f->nargs + 1; - tv = nvalloc(nargs1); + nargs = op->r.f->nargs; + tv = nvalloc(nargs); i = 0; while (op1) { -//TODO: explain why one iteration is done even for the case p->r.f->nargs == 0 var *arg = evaluate(nextarg(&op1), v1); + if (i == nargs) { + /* call with more arguments than function takes. + * (gawk warns: "warning: function 'f' called with more arguments than declared"). + * They are still evaluated, but discarded: */ + clrvar(arg); + continue; + } copyvar(&tv[i], arg); tv[i].type |= VF_CHILD; tv[i].x.parent = arg; - if (++i >= op->r.f->nargs) - break; + i++; } sv_fnargs = fnargs; @@ -2936,7 +2941,7 @@ static var *evaluate(node *op, var *res) fnargs = tv; res = evaluate(op->r.f->body.first, res); - nvfree(fnargs, nargs1); + nvfree(fnargs, nargs); g_progname = sv_progname; fnargs = sv_fnargs; diff --git a/testsuite/awk.tests b/testsuite/awk.tests index 873cc3680..3c230393f 100755 --- a/testsuite/awk.tests +++ b/testsuite/awk.tests @@ -87,11 +87,17 @@ BEGIN { a=2 print v (a) }' -testing "'v (a)' is not a function call, it is a concatenation" \ +testing "awk 'v (a)' is not a function call, it is a concatenation" \ "awk '$prg' 2>&1" \ "12\n" \ "" "" +prg='func f(){print"F"};func g(){print"G"};BEGIN{f(g(),g())}' +testing "awk unused function args are evaluated" \ + "awk '$prg' 2>&1" \ + "G\nG\nF\n" \ + "" "" + optional DESKTOP testing "awk hex const 1" "awk '{ print or(0xffffffff,1) }'" "4294967295\n" "" "\n" -- cgit v1.2.3-55-g6feb From 1573487e2100892d06e3628828690692313a48d5 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Thu, 1 Jul 2021 16:02:16 +0200 Subject: awk: rename temp variables, no code changes Signed-off-by: Denys Vlasenko --- editors/awk.c | 76 ++++++++++++++++++++++++++++++++++++----------------------- 1 file changed, 46 insertions(+), 30 deletions(-) diff --git a/editors/awk.c b/editors/awk.c index 47bbc10a6..2c2cb74d7 100644 --- a/editors/awk.c +++ b/editors/awk.c @@ -1775,14 +1775,14 @@ static node *mk_splitter(const char *s, tsplitter *spl) static regex_t *as_regex(node *op, regex_t *preg) { int cflags; - var *v; + var *tmpvar; const char *s; if ((op->info & OPCLSMASK) == OC_REGEXP) { return icase ? op->r.ire : op->l.re; } - v = nvalloc(1); - s = getvar_s(evaluate(op, v)); + tmpvar = nvalloc(1); + s = getvar_s(evaluate(op, tmpvar)); cflags = icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED; /* Testcase where REG_EXTENDED fails (unpaired '{'): @@ -1794,7 +1794,7 @@ static regex_t *as_regex(node *op, regex_t *preg) cflags &= ~REG_EXTENDED; xregcomp(preg, s, cflags); } - nvfree(v, 1); + nvfree(tmpvar, 1); return preg; } @@ -2243,12 +2243,12 @@ static char *awk_printf(node *n, int *len) const char *s1; int i, j, incr, bsize; char c, c1; - var *v, *arg; + var *tmpvar, *arg; - v = nvalloc(1); + tmpvar = nvalloc(1); //TODO: above, to avoid allocating a single temporary var, take a pointer //to a temporary that our caller (evaluate()) already has? - fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), v))); + fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), tmpvar))); i = 0; while (*f) { @@ -2268,7 +2268,7 @@ static char *awk_printf(node *n, int *len) f++; c1 = *f; *f = '\0'; - arg = evaluate(nextarg(&n), v); + arg = evaluate(nextarg(&n), tmpvar); j = i; if (c == 'c' || !c) { @@ -2289,7 +2289,7 @@ static char *awk_printf(node *n, int *len) } free(fmt); - nvfree(v, 1); + nvfree(tmpvar, 1); b = xrealloc(b, i + 1); b[i] = '\0'; #if ENABLE_FEATURE_AWK_GNU_EXTENSIONS @@ -2429,7 +2429,7 @@ static NOINLINE var *exec_builtin(node *op, var *res) { #define tspl (G.exec_builtin__tspl) - var *tv; + var *tmpvars; node *an[4]; var *av[4]; const char *as[4]; @@ -2441,7 +2441,12 @@ static NOINLINE var *exec_builtin(node *op, var *res) time_t tt; int i, l, ll, n; - tv = nvalloc(4); + tmpvars = nvalloc(4); +#define TMPVAR0 (tmpvars) +#define TMPVAR1 (tmpvars + 1) +#define TMPVAR2 (tmpvars + 2) +#define TMPVAR3 (tmpvars + 3) +#define TMPVAR(i) (tmpvars + (i)) isr = info = op->info; op = op->l.n; @@ -2449,7 +2454,7 @@ static NOINLINE var *exec_builtin(node *op, var *res) for (i = 0; i < 4 && op; i++) { an[i] = nextarg(&op); if (isr & 0x09000000) - av[i] = evaluate(an[i], &tv[i]); + av[i] = evaluate(an[i], TMPVAR(i)); if (isr & 0x08000000) as[i] = getvar_s(av[i]); isr >>= 1; @@ -2474,7 +2479,7 @@ static NOINLINE var *exec_builtin(node *op, var *res) if (nargs > 2) { spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ? - an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl); + an[2] : mk_splitter(getvar_s(evaluate(an[2], TMPVAR2)), &tspl); } else { spl = &fsplitter.n; } @@ -2617,7 +2622,13 @@ static NOINLINE var *exec_builtin(node *op, var *res) break; } - nvfree(tv, 4); + nvfree(tmpvars, 4); +#undef TMPVAR0 +#undef TMPVAR1 +#undef TMPVAR2 +#undef TMPVAR3 +#undef TMPVAR + return res; #undef tspl } @@ -2636,14 +2647,16 @@ static var *evaluate(node *op, var *res) #define seed (G.evaluate__seed) #define sreg (G.evaluate__sreg) - var *v1; + var *tmpvars; +#define TMPVAR0 (tmpvars) +#define TMPVAR1 (tmpvars + 1) if (!op) return setvar_s(res, NULL); debug_printf_eval("entered %s()\n", __func__); - v1 = nvalloc(2); + tmpvars = nvalloc(2); while (op) { struct { @@ -2683,7 +2696,7 @@ static var *evaluate(node *op, var *res) } if (op1->r.n) { /* array ref? */ const char *s; - s = getvar_s(evaluate(op1->r.n, v1)); + s = getvar_s(evaluate(op1->r.n, TMPVAR0)); hash_remove(iamarray(v), s); } else { clear_array(iamarray(v)); @@ -2693,7 +2706,7 @@ static var *evaluate(node *op, var *res) /* execute inevitable things */ if (opinfo & OF_RES1) - L.v = evaluate(op1, v1); + L.v = evaluate(op1, TMPVAR0); if (opinfo & OF_STR1) { L.s = getvar_s(L.v); debug_printf_eval("L.s:'%s'\n", L.s); @@ -2710,7 +2723,7 @@ static var *evaluate(node *op, var *res) * (Seen trying to evaluate "$444 $44444") */ if (opinfo & OF_RES2) { - R.v = evaluate(op->r.n, v1+1); + R.v = evaluate(op->r.n, TMPVAR1); //TODO: L.v may be invalid now, set L.v to NULL to catch bugs? //L.v = NULL; } @@ -2793,7 +2806,7 @@ static var *evaluate(node *op, var *res) fputs(getvar_s(intvar[F0]), F); } else { for (;;) { - var *v = evaluate(nextarg(&op1), v1); + var *v = evaluate(nextarg(&op1), TMPVAR0); if (v->type & VF_NUMBER) { fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[OFMT]), getvar_i(v), TRUE); @@ -2892,7 +2905,7 @@ static var *evaluate(node *op, var *res) /* if source is a temporary string, jusk relink it to dest */ //Disabled: if R.v is numeric but happens to have cached R.v->string, //then L.v ends up being a string, which is wrong -// if (R.v == v1+1 && R.v->string) { +// if (R.v == TMPVAR1 && R.v->string) { // res = setvar_p(L.v, R.v->string); // R.v->string = NULL; // } else { @@ -2908,7 +2921,7 @@ static var *evaluate(node *op, var *res) break; case XC( OC_FUNC ): { - var *tv, *sv_fnargs; + var *argvars, *sv_fnargs; const char *sv_progname; int nargs, i; @@ -2919,10 +2932,10 @@ static var *evaluate(node *op, var *res) /* The body might be empty, still has to eval the args */ nargs = op->r.f->nargs; - tv = nvalloc(nargs); + argvars = nvalloc(nargs); i = 0; while (op1) { - var *arg = evaluate(nextarg(&op1), v1); + var *arg = evaluate(nextarg(&op1), TMPVAR0); if (i == nargs) { /* call with more arguments than function takes. * (gawk warns: "warning: function 'f' called with more arguments than declared"). @@ -2930,18 +2943,18 @@ static var *evaluate(node *op, var *res) clrvar(arg); continue; } - copyvar(&tv[i], arg); - tv[i].type |= VF_CHILD; - tv[i].x.parent = arg; + copyvar(&argvars[i], arg); + argvars[i].type |= VF_CHILD; + argvars[i].x.parent = arg; i++; } sv_fnargs = fnargs; sv_progname = g_progname; - fnargs = tv; + fnargs = argvars; res = evaluate(op->r.f->body.first, res); - nvfree(fnargs, nargs); + nvfree(argvars, nargs); g_progname = sv_progname; fnargs = sv_fnargs; @@ -3266,7 +3279,10 @@ static var *evaluate(node *op, var *res) break; } /* while (op) */ - nvfree(v1, 2); + nvfree(tmpvars, 2); +#undef TMPVAR0 +#undef TMPVAR1 + debug_printf_eval("returning from %s(): %p\n", __func__, res); return res; #undef fnargs -- cgit v1.2.3-55-g6feb From 8b4c429025c233640bd5c5838552f34683a06fc0 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Thu, 1 Jul 2021 17:50:26 +0200 Subject: awk: use static tmpvars instead of nvalloc(1)ed ones ptest() was using this idea already. As far as I can see, this is safe. Ttestsuite passes. One downside is that a temporary from e.g. printf invocation won't be freed until the next printf call. function old new delta awk_printf 481 468 -13 as_regex 137 111 -26 ------------------------------------------------------------------------------ (add/remove: 0/0 grow/shrink: 0/2 up/down: 0/-39) Total: -39 bytes Signed-off-by: Denys Vlasenko --- editors/awk.c | 49 ++++++++++++++++++++++++++++++++++--------------- 1 file changed, 34 insertions(+), 15 deletions(-) diff --git a/editors/awk.c b/editors/awk.c index 2c2cb74d7..0be044eef 100644 --- a/editors/awk.c +++ b/editors/awk.c @@ -559,7 +559,9 @@ struct globals2 { unsigned evaluate__seed; regex_t evaluate__sreg; - var ptest__v; + var ptest__tmpvar; + var awk_printf__tmpvar; + var as_regex__tmpvar; tsplitter exec_builtin__tspl; @@ -1775,14 +1777,19 @@ static node *mk_splitter(const char *s, tsplitter *spl) static regex_t *as_regex(node *op, regex_t *preg) { int cflags; - var *tmpvar; const char *s; if ((op->info & OPCLSMASK) == OC_REGEXP) { return icase ? op->r.ire : op->l.re; } - tmpvar = nvalloc(1); - s = getvar_s(evaluate(op, tmpvar)); + +#define TMPVAR (&G.as_regex__tmpvar) + //tmpvar = nvalloc(1); + // We use a single "static" tmpvar (instead of on-stack or malloced one) + // to decrease memory consumption in deeply-recursive awk programs. + // The rule to work safely is to never call evaluate() while our static + // TMPVAR's value is still needed. + s = getvar_s(evaluate(op, TMPVAR)); cflags = icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED; /* Testcase where REG_EXTENDED fails (unpaired '{'): @@ -1794,7 +1801,8 @@ static regex_t *as_regex(node *op, regex_t *preg) cflags &= ~REG_EXTENDED; xregcomp(preg, s, cflags); } - nvfree(tmpvar, 1); + //nvfree(tmpvar, 1); +#undef TMPVAR return preg; } @@ -2105,8 +2113,11 @@ static int hashwalk_next(var *v) /* evaluate node, return 1 when result is true, 0 otherwise */ static int ptest(node *pattern) { - /* ptest__v is "static": to save stack space? */ - return istrue(evaluate(pattern, &G.ptest__v)); + // We use a single "static" tmpvar (instead of on-stack or malloced one) + // to decrease memory consumption in deeply-recursive awk programs. + // The rule to work safely is to never call evaluate() while our static + // TMPVAR's value is still needed. + return istrue(evaluate(pattern, &G.ptest__tmpvar)); } /* read next record from stream rsm into a variable v */ @@ -2243,12 +2254,18 @@ static char *awk_printf(node *n, int *len) const char *s1; int i, j, incr, bsize; char c, c1; - var *tmpvar, *arg; - - tmpvar = nvalloc(1); -//TODO: above, to avoid allocating a single temporary var, take a pointer -//to a temporary that our caller (evaluate()) already has? - fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), tmpvar))); + var *arg; + + //tmpvar = nvalloc(1); +#define TMPVAR (&G.awk_printf__tmpvar) + // We use a single "static" tmpvar (instead of on-stack or malloced one) + // to decrease memory consumption in deeply-recursive awk programs. + // The rule to work safely is to never call evaluate() while our static + // TMPVAR's value is still needed. + fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), TMPVAR))); + // ^^^^^^^^^ here we immediately strdup() the value, so the later call + // to evaluate() potentially recursing into another awk_printf() can't + // mangle the value. i = 0; while (*f) { @@ -2268,7 +2285,7 @@ static char *awk_printf(node *n, int *len) f++; c1 = *f; *f = '\0'; - arg = evaluate(nextarg(&n), tmpvar); + arg = evaluate(nextarg(&n), TMPVAR); j = i; if (c == 'c' || !c) { @@ -2289,7 +2306,9 @@ static char *awk_printf(node *n, int *len) } free(fmt); - nvfree(tmpvar, 1); +// nvfree(tmpvar, 1); +#undef TMPVAR + b = xrealloc(b, i + 1); b[i] = '\0'; #if ENABLE_FEATURE_AWK_GNU_EXTENSIONS -- cgit v1.2.3-55-g6feb From 40573556f2a67b11319785e0479b7087d02c060e Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Fri, 2 Jul 2021 14:27:40 +0200 Subject: awk: shuffle functions to reduce forward declarations, no code changes Signed-off-by: Denys Vlasenko --- editors/awk.c | 192 ++++++++++++++++++++++++++++------------------------------ 1 file changed, 94 insertions(+), 98 deletions(-) diff --git a/editors/awk.c b/editors/awk.c index 0be044eef..6833c2f0d 100644 --- a/editors/awk.c +++ b/editors/awk.c @@ -619,18 +619,6 @@ struct globals2 { G.evaluate__seed = 1; \ } while (0) - -/* function prototypes */ -static void handle_special(var *); -static node *parse_expr(uint32_t); -static void chain_group(void); -static var *evaluate(node *, var *); -static rstream *next_input_file(void); -static int fmt_num(char *, int, const char *, double, int); -static int awk_exit(int) NORETURN; - -/* ---- error handling ---- */ - static const char EMSG_UNEXP_EOS[] ALIGN1 = "Unexpected end of string"; static const char EMSG_UNEXP_TOKEN[] ALIGN1 = "Unexpected token"; static const char EMSG_DIV_BY_ZERO[] ALIGN1 = "Division by zero"; @@ -642,10 +630,7 @@ static const char EMSG_UNDEF_FUNC[] ALIGN1 = "Call to undefined function"; static const char EMSG_NO_MATH[] ALIGN1 = "Math support is not compiled in"; static const char EMSG_NEGATIVE_FIELD[] ALIGN1 = "Access to negative field"; -static void zero_out_var(var *vp) -{ - memset(vp, 0, sizeof(*vp)); -} +static int awk_exit(int) NORETURN; static void syntax_error(const char *message) NORETURN; static void syntax_error(const char *message) @@ -653,6 +638,11 @@ static void syntax_error(const char *message) bb_error_msg_and_die("%s:%i: %s", g_progname, g_lineno, message); } +static void zero_out_var(var *vp) +{ + memset(vp, 0, sizeof(*vp)); +} + /* ---- hash stuff ---- */ static unsigned hashidx(const char *name) @@ -885,10 +875,29 @@ static double my_strtod(char **pp) /* -------- working with variables (set/get/copy/etc) -------- */ -static xhash *iamarray(var *v) +static int fmt_num(char *b, int size, const char *format, double n, int int_as_int) { - var *a = v; + int r = 0; + char c; + const char *s = format; + + if (int_as_int && n == (long long)n) { + r = snprintf(b, size, "%lld", (long long)n); + } else { + do { c = *s; } while (c && *++s); + if (strchr("diouxX", c)) { + r = snprintf(b, size, format, (int)n); + } else if (strchr("eEfgG", c)) { + r = snprintf(b, size, format, n); + } else { + syntax_error(EMSG_INV_FMT); + } + } + return r; +} +static xhash *iamarray(var *a) +{ while (a->type & VF_CHILD) a = a->x.parent; @@ -913,6 +922,8 @@ static var *clrvar(var *v) return v; } +static void handle_special(var *); + /* assign string value to variable */ static var *setvar_p(var *v, char *value) { @@ -1284,6 +1295,8 @@ static void mk_re_node(const char *s, node *n, regex_t *re) xregcomp(re + 1, s, REG_EXTENDED | REG_ICASE); } +static node *parse_expr(uint32_t); + static node *parse_lrparen_list(void) { next_token(TC_LPAREN); @@ -1488,6 +1501,8 @@ static void chain_expr(uint32_t info) rollback_token(); } +static void chain_group(void); + static node *chain_loop(node *nn) { node *n, *n2, *save_brk, *save_cont; @@ -1770,6 +1785,8 @@ static node *mk_splitter(const char *s, tsplitter *spl) return n; } +static var *evaluate(node *, var *); + /* Use node as a regular expression. Supplied with node ptr and regex_t * storage space. Return ptr to regex (if result points to preg, it should * be later regfree'd manually). @@ -2222,27 +2239,6 @@ static int awk_getline(rstream *rsm, var *v) return r; } -static int fmt_num(char *b, int size, const char *format, double n, int int_as_int) -{ - int r = 0; - char c; - const char *s = format; - - if (int_as_int && n == (long long)n) { - r = snprintf(b, size, "%lld", (long long)n); - } else { - do { c = *s; } while (c && *++s); - if (strchr("diouxX", c)) { - r = snprintf(b, size, format, (int)n); - } else if (strchr("eEfgG", c)) { - r = snprintf(b, size, format, n); - } else { - syntax_error(EMSG_INV_FMT); - } - } - return r; -} - /* formatted output into an allocated buffer, return ptr to buffer */ #if !ENABLE_FEATURE_AWK_GNU_EXTENSIONS # define awk_printf(a, b) awk_printf(a) @@ -2306,7 +2302,7 @@ static char *awk_printf(node *n, int *len) } free(fmt); -// nvfree(tmpvar, 1); + //nvfree(tmpvar, 1); #undef TMPVAR b = xrealloc(b, i + 1); @@ -2652,6 +2648,64 @@ static NOINLINE var *exec_builtin(node *op, var *res) #undef tspl } +/* if expr looks like "var=value", perform assignment and return 1, + * otherwise return 0 */ +static int is_assignment(const char *expr) +{ + char *exprc, *val; + + if (!isalnum_(*expr) || (val = strchr(expr, '=')) == NULL) { + return FALSE; + } + + exprc = xstrdup(expr); + val = exprc + (val - expr); + *val++ = '\0'; + + unescape_string_in_place(val); + setvar_u(newvar(exprc), val); + free(exprc); + return TRUE; +} + +/* switch to next input file */ +static rstream *next_input_file(void) +{ +#define rsm (G.next_input_file__rsm) +#define files_happen (G.next_input_file__files_happen) + + FILE *F; + const char *fname, *ind; + + if (rsm.F) + fclose(rsm.F); + rsm.F = NULL; + rsm.pos = rsm.adv = 0; + + for (;;) { + if (getvar_i(intvar[ARGIND])+1 >= getvar_i(intvar[ARGC])) { + if (files_happen) + return NULL; + fname = "-"; + F = stdin; + break; + } + ind = getvar_s(incvar(intvar[ARGIND])); + fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind)); + if (fname && *fname && !is_assignment(fname)) { + F = xfopen_stdin(fname); + break; + } + } + + files_happen = TRUE; + setvar_s(intvar[FILENAME], fname); + rsm.F = F; + return &rsm; +#undef rsm +#undef files_happen +} + /* * Evaluate node - the heart of the program. Supplied with subtree * and place where to store result. Returns ptr to result. @@ -3338,64 +3392,6 @@ static int awk_exit(int r) exit(r); } -/* if expr looks like "var=value", perform assignment and return 1, - * otherwise return 0 */ -static int is_assignment(const char *expr) -{ - char *exprc, *val; - - if (!isalnum_(*expr) || (val = strchr(expr, '=')) == NULL) { - return FALSE; - } - - exprc = xstrdup(expr); - val = exprc + (val - expr); - *val++ = '\0'; - - unescape_string_in_place(val); - setvar_u(newvar(exprc), val); - free(exprc); - return TRUE; -} - -/* switch to next input file */ -static rstream *next_input_file(void) -{ -#define rsm (G.next_input_file__rsm) -#define files_happen (G.next_input_file__files_happen) - - FILE *F; - const char *fname, *ind; - - if (rsm.F) - fclose(rsm.F); - rsm.F = NULL; - rsm.pos = rsm.adv = 0; - - for (;;) { - if (getvar_i(intvar[ARGIND])+1 >= getvar_i(intvar[ARGC])) { - if (files_happen) - return NULL; - fname = "-"; - F = stdin; - break; - } - ind = getvar_s(incvar(intvar[ARGIND])); - fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind)); - if (fname && *fname && !is_assignment(fname)) { - F = xfopen_stdin(fname); - break; - } - } - - files_happen = TRUE; - setvar_s(intvar[FILENAME], fname); - rsm.F = F; - return &rsm; -#undef rsm -#undef files_happen -} - int awk_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE; int awk_main(int argc UNUSED_PARAM, char **argv) { -- cgit v1.2.3-55-g6feb From 1193c68fa718ff16c47aba23f8532bf1568f294e Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Fri, 2 Jul 2021 14:29:01 +0200 Subject: awk: when parsing length(), simplify eating of LPAREN function old new delta parse_expr 945 948 +3 Signed-off-by: Denys Vlasenko --- editors/awk.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/editors/awk.c b/editors/awk.c index 6833c2f0d..f65449a09 100644 --- a/editors/awk.c +++ b/editors/awk.c @@ -1453,10 +1453,11 @@ static node *parse_expr(uint32_t term_tc) | TC_BINOPX /* length NUM */ | TC_COMMA /* print length, 1 */ ); - rollback_token(); - if (tc & TC_LPAREN) { + if (tc != TC_LPAREN) + rollback_token(); + else { /* It was a "(" token. Handle just like TC_BUILTIN */ - cn->l.n = parse_lrparen_list(); + cn->l.n = parse_expr(TC_RPAREN); } break; } -- cgit v1.2.3-55-g6feb From 966cafcc77d8cda5d1a95bc73080e9a9b9010a45 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Fri, 2 Jul 2021 14:33:13 +0200 Subject: awk: use "static" tmpvars in main and exit function old new delta awk_exit 103 93 -10 awk_main 850 832 -18 ------------------------------------------------------------------------------ (add/remove: 0/0 grow/shrink: 0/2 up/down: 0/-28) Total: -28 bytes Signed-off-by: Denys Vlasenko --- editors/awk.c | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/editors/awk.c b/editors/awk.c index f65449a09..9f5a94037 100644 --- a/editors/awk.c +++ b/editors/awk.c @@ -562,6 +562,8 @@ struct globals2 { var ptest__tmpvar; var awk_printf__tmpvar; var as_regex__tmpvar; + var exit__tmpvar; + var main__tmpvar; tsplitter exec_builtin__tspl; @@ -638,11 +640,6 @@ static void syntax_error(const char *message) bb_error_msg_and_die("%s:%i: %s", g_progname, g_lineno, message); } -static void zero_out_var(var *vp) -{ - memset(vp, 0, sizeof(*vp)); -} - /* ---- hash stuff ---- */ static unsigned hashidx(const char *name) @@ -3372,11 +3369,9 @@ static int awk_exit(int r) unsigned i; if (!exiting) { - var tv; exiting = TRUE; nextrec = FALSE; - zero_out_var(&tv); - evaluate(endseq.first, &tv); + evaluate(endseq.first, &G.exit__tmpvar); } /* waiting for children */ @@ -3404,7 +3399,6 @@ int awk_main(int argc UNUSED_PARAM, char **argv) llist_t *list_e = NULL; #endif int i; - var tv; INIT_G(); @@ -3514,8 +3508,7 @@ int awk_main(int argc UNUSED_PARAM, char **argv) newfile("/dev/stdout")->F = stdout; newfile("/dev/stderr")->F = stderr; - zero_out_var(&tv); - evaluate(beginseq.first, &tv); + evaluate(beginseq.first, &G.main__tmpvar); if (!mainseq.first && !endseq.first) awk_exit(EXIT_SUCCESS); @@ -3532,7 +3525,7 @@ int awk_main(int argc UNUSED_PARAM, char **argv) nextrec = FALSE; incvar(intvar[NR]); incvar(intvar[FNR]); - evaluate(mainseq.first, &tv); + evaluate(mainseq.first, &G.main__tmpvar); if (nextfile) break; -- cgit v1.2.3-55-g6feb From ef5463cf16f88c0992b2073a30ab6081c86fdf23 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Fri, 2 Jul 2021 14:53:52 +0200 Subject: awk: shuffle globals for smaller offsets function old new delta awk_main 832 829 -3 evaluate 3229 3223 -6 ------------------------------------------------------------------------------ (add/remove: 0/0 grow/shrink: 0/2 up/down: 0/-9) Total: -9 bytes Signed-off-by: Denys Vlasenko --- editors/awk.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/editors/awk.c b/editors/awk.c index 9f5a94037..068ed687b 100644 --- a/editors/awk.c +++ b/editors/awk.c @@ -536,6 +536,11 @@ struct globals { smallint nextfile; smallint is_f0_split; smallint t_rollback; + + /* former statics from various functions */ + smallint next_token__concat_inserted; + uint32_t next_token__save_tclass; + uint32_t next_token__save_info; }; struct globals2 { uint32_t t_info; /* often used */ @@ -548,15 +553,11 @@ struct globals2 { /* former statics from various functions */ char *split_f0__fstrings; - uint32_t next_token__save_tclass; - uint32_t next_token__save_info; - smallint next_token__concat_inserted; - - smallint next_input_file__files_happen; rstream next_input_file__rsm; + smallint next_input_file__files_happen; - var *evaluate__fnargs; unsigned evaluate__seed; + var *evaluate__fnargs; regex_t evaluate__sreg; var ptest__tmpvar; @@ -575,10 +576,10 @@ struct globals2 { #define G1 (ptr_to_globals[-1]) #define G (*(struct globals2 *)ptr_to_globals) /* For debug. nm --size-sort awk.o | grep -vi ' [tr] ' */ -/*char G1size[sizeof(G1)]; - 0x74 */ -/*char Gsize[sizeof(G)]; - 0x1c4 */ +//char G1size[sizeof(G1)]; // 0x70 +//char Gsize[sizeof(G)]; // 0x2f8 /* Trying to keep most of members accessible with short offsets: */ -/*char Gofs_seed[offsetof(struct globals2, evaluate__seed)]; - 0x90 */ +//char Gofs_seed[offsetof(struct globals2, evaluate__seed)]; // 0x7c #define t_double (G1.t_double ) #define beginseq (G1.beginseq ) #define mainseq (G1.mainseq ) @@ -1056,9 +1057,9 @@ static int istrue(var *v) */ static uint32_t next_token(uint32_t expected) { -#define concat_inserted (G.next_token__concat_inserted) -#define save_tclass (G.next_token__save_tclass) -#define save_info (G.next_token__save_info) +#define concat_inserted (G1.next_token__concat_inserted) +#define save_tclass (G1.next_token__save_tclass) +#define save_info (G1.next_token__save_info) char *p; const char *tl; -- cgit v1.2.3-55-g6feb From 640212ae0ea8a1b47cd73a080d77b25b9f3ccd40 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Fri, 2 Jul 2021 15:19:14 +0200 Subject: awk: do not special-case "delete" Rework of the previous fix: Can use operation attributes to disable arg evaluation instead of special-casing. function old new delta .rodata 104032 104036 +4 evaluate 3223 3215 -8 ------------------------------------------------------------------------------ (add/remove: 0/0 grow/shrink: 1/1 up/down: 4/-8) Total: -4 bytes Signed-off-by: Denys Vlasenko --- editors/awk.c | 56 +++++++++++++++++++++++++++----------------------------- 1 file changed, 27 insertions(+), 29 deletions(-) diff --git a/editors/awk.c b/editors/awk.c index 068ed687b..a3dda6959 100644 --- a/editors/awk.c +++ b/editors/awk.c @@ -319,7 +319,7 @@ if ((n) & TC_NUMBER ) debug_printf_parse(" NUMBER" ); \ #define xV OF_RES2 #define xS (OF_RES2 | OF_STR2) #define Vx OF_RES1 -#define Rx (OF_RES1 | OF_NUM1 | OF_REQUIRED) +#define Rx OF_REQUIRED #define VV (OF_RES1 | OF_RES2) #define Nx (OF_RES1 | OF_NUM1) #define NV (OF_RES1 | OF_NUM1 | OF_RES2) @@ -2750,32 +2750,6 @@ static var *evaluate(node *op, var *res) op1 = op->l.n; debug_printf_eval("opinfo:%08x opn:%08x\n", opinfo, opn); - /* "delete" is special: - * "delete array[var--]" must evaluate index expr only once, - * must not evaluate it in "execute inevitable things" part. - */ - if (XC(opinfo & OPCLSMASK) == XC(OC_DELETE)) { - uint32_t info = op1->info & OPCLSMASK; - var *v; - - debug_printf_eval("DELETE\n"); - if (info == OC_VAR) { - v = op1->l.v; - } else if (info == OC_FNARG) { - v = &fnargs[op1->l.aidx]; - } else { - syntax_error(EMSG_NOT_ARRAY); - } - if (op1->r.n) { /* array ref? */ - const char *s; - s = getvar_s(evaluate(op1->r.n, TMPVAR0)); - hash_remove(iamarray(v), s); - } else { - clear_array(iamarray(v)); - } - goto next; - } - /* execute inevitable things */ if (opinfo & OF_RES1) L.v = evaluate(op1, TMPVAR0); @@ -2905,7 +2879,31 @@ static var *evaluate(node *op, var *res) break; } - /* case XC( OC_DELETE ): - moved to happen before arg evaluation */ + case XC( OC_DELETE ): + debug_printf_eval("DELETE\n"); + { + /* "delete" is special: + * "delete array[var--]" must evaluate index expr only once. + */ + uint32_t info = op1->info & OPCLSMASK; + var *v; + + if (info == OC_VAR) { + v = op1->l.v; + } else if (info == OC_FNARG) { + v = &fnargs[op1->l.aidx]; + } else { + syntax_error(EMSG_NOT_ARRAY); + } + if (op1->r.n) { /* array ref? */ + const char *s; + s = getvar_s(evaluate(op1->r.n, TMPVAR0)); + hash_remove(iamarray(v), s); + } else { + clear_array(iamarray(v)); + } + break; + } case XC( OC_NEWSOURCE ): debug_printf_eval("NEWSOURCE\n"); @@ -3342,7 +3340,7 @@ static var *evaluate(node *op, var *res) default: syntax_error(EMSG_POSSIBLE_ERROR); } /* switch */ - next: + if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS) op = op->a.n; if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS) -- cgit v1.2.3-55-g6feb From 786ca197ad1305607efaccb067c19931d9e765b1 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Fri, 2 Jul 2021 17:32:08 +0200 Subject: awk: make builtin definitions more understandable, no code changes Signed-off-by: Denys Vlasenko --- editors/awk.c | 71 +++++++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 49 insertions(+), 22 deletions(-) diff --git a/editors/awk.c b/editors/awk.c index a3dda6959..fb841687e 100644 --- a/editors/awk.c +++ b/editors/awk.c @@ -331,8 +331,7 @@ if ((n) & TC_NUMBER ) debug_printf_parse(" NUMBER" ); \ #define OPNMASK 0x007F /* operator priority is a highest byte (even: r->l, odd: l->r grouping) - * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1, - * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string + * (for builtins it has different meaning) */ #undef P #undef PRIMASK @@ -430,8 +429,6 @@ static const char tokenlist[] ALIGN1 = /* compiler adds trailing "\0" */ ; -#define OC_B OC_BUILTIN - static const uint32_t tokeninfo[] ALIGN4 = { 0, 0, @@ -464,20 +461,43 @@ static const uint32_t tokeninfo[] ALIGN4 = { OC_RETURN|Vx, OC_EXIT|Nx, ST_WHILE, 0, /* else */ - OC_B|B_an|P(0x83), OC_B|B_co|P(0x41), OC_B|B_ls|P(0x83), OC_B|B_or|P(0x83), - OC_B|B_rs|P(0x83), OC_B|B_xo|P(0x83), - OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83), - OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg, - OC_FBLTIN|F_rn, OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr, - OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), /* OC_FBLTIN|Sx|F_le, was here */ - OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF, OC_B|B_su|P(0xb6), - OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti, OC_B|B_ti|P(0x0b), OC_B|B_mt|P(0x0b), - OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49), - OC_FBLTIN|Sx|F_le, /* TC_LENGTH */ - OC_GETLINE|SV|P(0), - 0, 0, - 0, - 0 /* TC_END */ +// OC_B's are builtins with enforced minimum number of arguments (two upper bits). +// Highest byte bit pattern: nn s3s2s1 v3v2v1 +// nn - min. number of args, sN - resolve Nth arg to string, vN - resolve to var +// OC_FBLTIN's are builtins with one optional argument, +// TODO: enforce exactly one arg for: system, close, cos, sin, exp, int, log, sqrt +// zero args for: rand systime +// Do have one optional arg: fflush, srand, length +#define OC_B OC_BUILTIN +#define A1 P(0x40) /*one arg*/ +#define A2 P(0x80) /*two args*/ +#define A3 P(0xc0) /*three args*/ +#define __v P(1) +#define _vv P(3) +#define __s__v P(9) +#define __s_vv P(0x0b) +#define __svvv P(0x0f) +#define _ss_vv P(0x1b) +#define _s_vv_ P(0x16) +#define ss_vv_ P(0x36) + OC_B|B_an|_vv|A2, OC_B|B_co|__v|A1, OC_B|B_ls|_vv|A2, OC_B|B_or|_vv|A2, // and compl lshift or + OC_B|B_rs|_vv|A2, OC_B|B_xo|_vv|A2, // rshift xor + OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|_vv|A2, // close system fflush atan2 + OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg, // cos exp int log + OC_FBLTIN|F_rn, OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr, // rand sin sqrt srand + OC_B|B_ge|_s_vv_|A3, OC_B|B_gs|ss_vv_|A2, OC_B|B_ix|_ss_vv|A2, // gensub gsub index /*length was here*/ + OC_B|B_ma|__s__v|A2, OC_B|B_sp|__s_vv|A2, OC_SPRINTF, OC_B|B_su|ss_vv_|A2, // match split sprintf sub + OC_B|B_ss|__svvv|A2, OC_FBLTIN|F_ti, OC_B|B_ti|__s_vv, OC_B|B_mt|__s_vv, // substr systime strftime mktime + OC_B|B_lo|__s__v|A1, OC_B|B_up|__s__v|A1, // tolower toupper + OC_FBLTIN|Sx|F_le, // length + OC_GETLINE|SV, // getline + 0, 0, // func function + 0, // BEGIN + 0 // END +#undef A1 +#undef A2 +#undef A3 +#undef OC_B }; /* internal variable names and their initial values */ @@ -1630,6 +1650,7 @@ static void chain_group(void) debug_printf_parse("%s: OC_BREAK\n", __func__); n = chain_node(OC_EXEC); n->a.n = break_ptr; +//TODO: if break_ptr is NULL, syntax error (not in the loop)? chain_expr(t_info); break; @@ -1637,6 +1658,7 @@ static void chain_group(void) debug_printf_parse("%s: OC_CONTINUE\n", __func__); n = chain_node(OC_EXEC); n->a.n = continue_ptr; +//TODO: if continue_ptr is NULL, syntax error (not in the loop)? chain_expr(t_info); break; @@ -1799,8 +1821,8 @@ static regex_t *as_regex(node *op, regex_t *preg) return icase ? op->r.ire : op->l.re; } -#define TMPVAR (&G.as_regex__tmpvar) //tmpvar = nvalloc(1); +#define TMPVAR (&G.as_regex__tmpvar) // We use a single "static" tmpvar (instead of on-stack or malloced one) // to decrease memory consumption in deeply-recursive awk programs. // The rule to work safely is to never call evaluate() while our static @@ -2720,8 +2742,6 @@ static var *evaluate(node *op, var *res) #define sreg (G.evaluate__sreg) var *tmpvars; -#define TMPVAR0 (tmpvars) -#define TMPVAR1 (tmpvars + 1) if (!op) return setvar_s(res, NULL); @@ -2729,6 +2749,8 @@ static var *evaluate(node *op, var *res) debug_printf_eval("entered %s()\n", __func__); tmpvars = nvalloc(2); +#define TMPVAR0 (tmpvars) +#define TMPVAR1 (tmpvars + 1) while (op) { struct { @@ -3166,7 +3188,7 @@ static var *evaluate(node *op, var *res) rstream *rsm; int err = 0; rsm = (rstream *)hash_search(fdhash, L.s); - debug_printf_eval("OC_FBLTIN F_cl rsm:%p\n", rsm); + debug_printf_eval("OC_FBLTIN close: op1:%p s:'%s' rsm:%p\n", op1, L.s, rsm); if (rsm) { debug_printf_eval("OC_FBLTIN F_cl " "rsm->is_pipe:%d, ->F:%p\n", @@ -3177,6 +3199,11 @@ static var *evaluate(node *op, var *res) */ if (rsm->F) err = rsm->is_pipe ? pclose(rsm->F) : fclose(rsm->F); +//TODO: fix this case: +// $ awk 'BEGIN { print close(""); print ERRNO }' +// -1 +// close of redirection that was never opened +// (we print 0, 0) free(rsm->buffer); hash_remove(fdhash, L.s); } -- cgit v1.2.3-55-g6feb From 47d9133896f0de6b17393309193051e4bd52015e Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Fri, 2 Jul 2021 18:28:12 +0200 Subject: awk: enforce simple builtins' argument number function old new delta evaluate 3215 3303 +88 .rodata 104036 104107 +71 ------------------------------------------------------------------------------ (add/remove: 0/0 grow/shrink: 2/0 up/down: 159/0) Total: 159 bytes Signed-off-by: Denys Vlasenko --- editors/awk.c | 45 ++++++++++++++++++++++++++++----------------- 1 file changed, 28 insertions(+), 17 deletions(-) diff --git a/editors/awk.c b/editors/awk.c index fb841687e..1925e0771 100644 --- a/editors/awk.c +++ b/editors/awk.c @@ -464,11 +464,11 @@ static const uint32_t tokeninfo[] ALIGN4 = { // OC_B's are builtins with enforced minimum number of arguments (two upper bits). // Highest byte bit pattern: nn s3s2s1 v3v2v1 // nn - min. number of args, sN - resolve Nth arg to string, vN - resolve to var -// OC_FBLTIN's are builtins with one optional argument, -// TODO: enforce exactly one arg for: system, close, cos, sin, exp, int, log, sqrt -// zero args for: rand systime -// Do have one optional arg: fflush, srand, length -#define OC_B OC_BUILTIN +// OC_FBLTIN's are builtins with zero or one argument. +// |Rx| enforces that arg is present for: system, close, cos, sin, exp, int, log, sqrt. +// Check for no args is present in builtins' code (not in this table): rand, systime. +// Have one _optional_ arg: fflush, srand, length +#define OC_B OC_BUILTIN #define A1 P(0x40) /*one arg*/ #define A2 P(0x80) /*two args*/ #define A3 P(0xc0) /*three args*/ @@ -480,15 +480,15 @@ static const uint32_t tokeninfo[] ALIGN4 = { #define _ss_vv P(0x1b) #define _s_vv_ P(0x16) #define ss_vv_ P(0x36) - OC_B|B_an|_vv|A2, OC_B|B_co|__v|A1, OC_B|B_ls|_vv|A2, OC_B|B_or|_vv|A2, // and compl lshift or - OC_B|B_rs|_vv|A2, OC_B|B_xo|_vv|A2, // rshift xor - OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|_vv|A2, // close system fflush atan2 - OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg, // cos exp int log - OC_FBLTIN|F_rn, OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr, // rand sin sqrt srand - OC_B|B_ge|_s_vv_|A3, OC_B|B_gs|ss_vv_|A2, OC_B|B_ix|_ss_vv|A2, // gensub gsub index /*length was here*/ - OC_B|B_ma|__s__v|A2, OC_B|B_sp|__s_vv|A2, OC_SPRINTF, OC_B|B_su|ss_vv_|A2, // match split sprintf sub - OC_B|B_ss|__svvv|A2, OC_FBLTIN|F_ti, OC_B|B_ti|__s_vv, OC_B|B_mt|__s_vv, // substr systime strftime mktime - OC_B|B_lo|__s__v|A1, OC_B|B_up|__s__v|A1, // tolower toupper + OC_B|B_an|_vv|A2, OC_B|B_co|__v|A1, OC_B|B_ls|_vv|A2, OC_B|B_or|_vv|A2, // and compl lshift or + OC_B|B_rs|_vv|A2, OC_B|B_xo|_vv|A2, // rshift xor + OC_FBLTIN|Sx|Rx|F_cl,OC_FBLTIN|Sx|Rx|F_sy,OC_FBLTIN|Sx|F_ff, OC_B|B_a2|_vv|A2, // close system fflush atan2 + OC_FBLTIN|Nx|Rx|F_co,OC_FBLTIN|Nx|Rx|F_ex,OC_FBLTIN|Nx|Rx|F_in,OC_FBLTIN|Nx|Rx|F_lg,// cos exp int log + OC_FBLTIN|F_rn, OC_FBLTIN|Nx|Rx|F_si,OC_FBLTIN|Nx|Rx|F_sq,OC_FBLTIN|Nx|F_sr, // rand sin sqrt srand + OC_B|B_ge|_s_vv_|A3, OC_B|B_gs|ss_vv_|A2, OC_B|B_ix|_ss_vv|A2, // gensub gsub index /*length was here*/ + OC_B|B_ma|__s__v|A2, OC_B|B_sp|__s_vv|A2, OC_SPRINTF, OC_B|B_su|ss_vv_|A2, // match split sprintf sub + OC_B|B_ss|__svvv|A2, OC_FBLTIN|F_ti, OC_B|B_ti|__s_vv, OC_B|B_mt|__s_vv, // substr systime strftime mktime + OC_B|B_lo|__s__v|A1, OC_B|B_up|__s__v|A1, // tolower toupper OC_FBLTIN|Sx|F_le, // length OC_GETLINE|SV, // getline 0, 0, // func function @@ -2773,8 +2773,11 @@ static var *evaluate(node *op, var *res) debug_printf_eval("opinfo:%08x opn:%08x\n", opinfo, opn); /* execute inevitable things */ - if (opinfo & OF_RES1) + if (opinfo & OF_RES1) { + if ((opinfo & OF_REQUIRED) && !op1) + syntax_error(EMSG_TOO_FEW_ARGS); L.v = evaluate(op1, TMPVAR0); + } if (opinfo & OF_STR1) { L.s = getvar_s(L.v); debug_printf_eval("L.s:'%s'\n", L.s); @@ -3101,12 +3104,18 @@ static var *evaluate(node *op, var *res) double R_d = R_d; /* for compiler */ debug_printf_eval("FBLTIN\n"); + if (op1 && (op1->info & OPCLSMASK) == OC_COMMA) + /* Simple builtins take one arg maximum */ + syntax_error("Too many arguments"); + switch (opn) { case F_in: R_d = (long long)L_d; break; - case F_rn: + case F_rn: /*rand*/ + if (op1) + syntax_error("Too many arguments"); R_d = (double)rand() / (double)RAND_MAX; break; @@ -3149,7 +3158,9 @@ static var *evaluate(node *op, var *res) srand(seed); break; - case F_ti: + case F_ti: /*systime*/ + if (op1) + syntax_error("Too many arguments"); R_d = time(NULL); break; -- cgit v1.2.3-55-g6feb From 37ae8cdc6e428e68ad76f6b446881ecff305ebd3 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Fri, 2 Jul 2021 18:55:00 +0200 Subject: awk: beautify builtins table, no code changes Signed-off-by: Denys Vlasenko --- editors/awk.c | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/editors/awk.c b/editors/awk.c index 1925e0771..8d7777ca6 100644 --- a/editors/awk.c +++ b/editors/awk.c @@ -464,11 +464,12 @@ static const uint32_t tokeninfo[] ALIGN4 = { // OC_B's are builtins with enforced minimum number of arguments (two upper bits). // Highest byte bit pattern: nn s3s2s1 v3v2v1 // nn - min. number of args, sN - resolve Nth arg to string, vN - resolve to var -// OC_FBLTIN's are builtins with zero or one argument. +// OC_F's are builtins with zero or one argument. // |Rx| enforces that arg is present for: system, close, cos, sin, exp, int, log, sqrt. // Check for no args is present in builtins' code (not in this table): rand, systime. // Have one _optional_ arg: fflush, srand, length #define OC_B OC_BUILTIN +#define OC_F OC_FBLTIN #define A1 P(0x40) /*one arg*/ #define A2 P(0x80) /*two args*/ #define A3 P(0xc0) /*three args*/ @@ -480,17 +481,17 @@ static const uint32_t tokeninfo[] ALIGN4 = { #define _ss_vv P(0x1b) #define _s_vv_ P(0x16) #define ss_vv_ P(0x36) - OC_B|B_an|_vv|A2, OC_B|B_co|__v|A1, OC_B|B_ls|_vv|A2, OC_B|B_or|_vv|A2, // and compl lshift or - OC_B|B_rs|_vv|A2, OC_B|B_xo|_vv|A2, // rshift xor - OC_FBLTIN|Sx|Rx|F_cl,OC_FBLTIN|Sx|Rx|F_sy,OC_FBLTIN|Sx|F_ff, OC_B|B_a2|_vv|A2, // close system fflush atan2 - OC_FBLTIN|Nx|Rx|F_co,OC_FBLTIN|Nx|Rx|F_ex,OC_FBLTIN|Nx|Rx|F_in,OC_FBLTIN|Nx|Rx|F_lg,// cos exp int log - OC_FBLTIN|F_rn, OC_FBLTIN|Nx|Rx|F_si,OC_FBLTIN|Nx|Rx|F_sq,OC_FBLTIN|Nx|F_sr, // rand sin sqrt srand - OC_B|B_ge|_s_vv_|A3, OC_B|B_gs|ss_vv_|A2, OC_B|B_ix|_ss_vv|A2, // gensub gsub index /*length was here*/ - OC_B|B_ma|__s__v|A2, OC_B|B_sp|__s_vv|A2, OC_SPRINTF, OC_B|B_su|ss_vv_|A2, // match split sprintf sub - OC_B|B_ss|__svvv|A2, OC_FBLTIN|F_ti, OC_B|B_ti|__s_vv, OC_B|B_mt|__s_vv, // substr systime strftime mktime - OC_B|B_lo|__s__v|A1, OC_B|B_up|__s__v|A1, // tolower toupper - OC_FBLTIN|Sx|F_le, // length - OC_GETLINE|SV, // getline + OC_B|B_an|_vv|A2, OC_B|B_co|__v|A1, OC_B|B_ls|_vv|A2, OC_B|B_or|_vv|A2, // and compl lshift or + OC_B|B_rs|_vv|A2, OC_B|B_xo|_vv|A2, // rshift xor + OC_F|F_cl|Sx|Rx, OC_F|F_sy|Sx|Rx, OC_F|F_ff|Sx, OC_B|B_a2|_vv|A2, // close system fflush atan2 + OC_F|F_co|Nx|Rx, OC_F|F_ex|Nx|Rx, OC_F|F_in|Nx|Rx, OC_F|F_lg|Nx|Rx, // cos exp int log + OC_F|F_rn, OC_F|F_si|Nx|Rx, OC_F|F_sq|Nx|Rx, OC_F|F_sr|Nx, // rand sin sqrt srand + OC_B|B_ge|_s_vv_|A3,OC_B|B_gs|ss_vv_|A2,OC_B|B_ix|_ss_vv|A2, // gensub gsub index /*length was here*/ + OC_B|B_ma|__s__v|A2,OC_B|B_sp|__s_vv|A2,OC_SPRINTF, OC_B|B_su|ss_vv_|A2,// match split sprintf sub + OC_B|B_ss|__svvv|A2,OC_F|F_ti, OC_B|B_ti|__s_vv, OC_B|B_mt|__s_vv, // substr systime strftime mktime + OC_B|B_lo|__s__v|A1,OC_B|B_up|__s__v|A1, // tolower toupper + OC_F|F_le|Sx, // length + OC_GETLINE|SV, // getline 0, 0, // func function 0, // BEGIN 0 // END @@ -498,6 +499,7 @@ static const uint32_t tokeninfo[] ALIGN4 = { #undef A2 #undef A3 #undef OC_B +#undef OC_F }; /* internal variable names and their initial values */ -- cgit v1.2.3-55-g6feb From 8bb03da906e1f8f750123214b15a19d7d4e166c1 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Fri, 2 Jul 2021 19:38:03 +0200 Subject: awk: rand() could return 1.0, fix this - should be in [0,1) While at it, make it finer-grained (63 bits of randomness) function old new delta evaluate 3303 3336 +33 .rodata 104107 104111 +4 ------------------------------------------------------------------------------ (add/remove: 0/0 grow/shrink: 2/0 up/down: 37/0) Total: 37 bytes Signed-off-by: Denys Vlasenko --- editors/awk.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/editors/awk.c b/editors/awk.c index 8d7777ca6..64fe81be4 100644 --- a/editors/awk.c +++ b/editors/awk.c @@ -3118,9 +3118,20 @@ static var *evaluate(node *op, var *res) case F_rn: /*rand*/ if (op1) syntax_error("Too many arguments"); - R_d = (double)rand() / (double)RAND_MAX; + { +#if RAND_MAX >= 0x7fffffff + uint32_t u = ((uint32_t)rand() << 16) ^ rand(); + uint64_t v = ((uint64_t)rand() << 32) | u; + /* the above shift+or is optimized out on 32-bit arches */ +# if RAND_MAX > 0x7fffffff + v &= 0x7fffffffffffffffUL; +# endif + R_d = (double)v / 0x8000000000000000UL; +#else +# error Not implemented for this value of RAND_MAX +#endif break; - + } case F_co: if (ENABLE_FEATURE_AWK_LIBM) { R_d = cos(L_d); -- cgit v1.2.3-55-g6feb From 4d902ea9def573cd15271177abbfa50fbf30c84f Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Fri, 2 Jul 2021 22:28:51 +0200 Subject: awk: fix beavior of "exit" without parameter function old new delta evaluate 3336 3339 +3 awk_exit 93 94 +1 awk_main 829 827 -2 ------------------------------------------------------------------------------ (add/remove: 0/0 grow/shrink: 2/1 up/down: 4/-2) Total: 2 bytes Signed-off-by: Denys Vlasenko --- editors/awk.c | 40 ++++++++++++++++++++++------------------ testsuite/awk.tests | 5 +++++ 2 files changed, 27 insertions(+), 18 deletions(-) diff --git a/editors/awk.c b/editors/awk.c index 64fe81be4..86cb7a95f 100644 --- a/editors/awk.c +++ b/editors/awk.c @@ -578,6 +578,8 @@ struct globals2 { rstream next_input_file__rsm; smallint next_input_file__files_happen; + smalluint exitcode; + unsigned evaluate__seed; var *evaluate__fnargs; regex_t evaluate__sreg; @@ -655,7 +657,7 @@ static const char EMSG_UNDEF_FUNC[] ALIGN1 = "Call to undefined function"; static const char EMSG_NO_MATH[] ALIGN1 = "Math support is not compiled in"; static const char EMSG_NEGATIVE_FIELD[] ALIGN1 = "Access to negative field"; -static int awk_exit(int) NORETURN; +static int awk_exit(void) NORETURN; static void syntax_error(const char *message) NORETURN; static void syntax_error(const char *message) @@ -2779,14 +2781,14 @@ static var *evaluate(node *op, var *res) if ((opinfo & OF_REQUIRED) && !op1) syntax_error(EMSG_TOO_FEW_ARGS); L.v = evaluate(op1, TMPVAR0); - } - if (opinfo & OF_STR1) { - L.s = getvar_s(L.v); - debug_printf_eval("L.s:'%s'\n", L.s); - } - if (opinfo & OF_NUM1) { - L_d = getvar_i(L.v); - debug_printf_eval("L_d:%f\n", L_d); + if (opinfo & OF_STR1) { + L.s = getvar_s(L.v); + debug_printf_eval("L.s:'%s'\n", L.s); + } + if (opinfo & OF_NUM1) { + L_d = getvar_i(L.v); + debug_printf_eval("L_d:%f\n", L_d); + } } /* NB: Must get string/numeric values of L (done above) * _before_ evaluate()'ing R.v: if both L and R are $NNNs, @@ -2799,10 +2801,10 @@ static var *evaluate(node *op, var *res) R.v = evaluate(op->r.n, TMPVAR1); //TODO: L.v may be invalid now, set L.v to NULL to catch bugs? //L.v = NULL; - } - if (opinfo & OF_STR2) { - R.s = getvar_s(R.v); - debug_printf_eval("R.s:'%s'\n", R.s); + if (opinfo & OF_STR2) { + R.s = getvar_s(R.v); + debug_printf_eval("R.s:'%s'\n", R.s); + } } debug_printf_eval("switch(0x%x)\n", XC(opinfo & OPCLSMASK)); @@ -2955,7 +2957,9 @@ static var *evaluate(node *op, var *res) case XC( OC_EXIT ): debug_printf_eval("EXIT\n"); - awk_exit(L_d); + if (op1) + G.exitcode = (int)L_d; + awk_exit(); /* -- recursive node type -- */ @@ -3414,7 +3418,7 @@ static var *evaluate(node *op, var *res) /* -------- main & co. -------- */ -static int awk_exit(int r) +static int awk_exit(void) { unsigned i; @@ -3435,7 +3439,7 @@ static int awk_exit(int r) } } - exit(r); + exit(G.exitcode); } int awk_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE; @@ -3560,7 +3564,7 @@ int awk_main(int argc UNUSED_PARAM, char **argv) evaluate(beginseq.first, &G.main__tmpvar); if (!mainseq.first && !endseq.first) - awk_exit(EXIT_SUCCESS); + awk_exit(); /* input file could already be opened in BEGIN block */ if (!iF) @@ -3587,6 +3591,6 @@ int awk_main(int argc UNUSED_PARAM, char **argv) iF = next_input_file(); } - awk_exit(EXIT_SUCCESS); + awk_exit(); /*return 0;*/ } diff --git a/testsuite/awk.tests b/testsuite/awk.tests index 3c230393f..770d8ffce 100755 --- a/testsuite/awk.tests +++ b/testsuite/awk.tests @@ -445,4 +445,9 @@ testing 'awk $NF is empty' \ '' \ 'a=====123=' +testing "awk exit N propagates through END's exit" \ + "awk 'BEGIN { exit 42 } END { exit }'; echo \$?" \ + "42\n" \ + '' '' + exit $FAILCOUNT -- cgit v1.2.3-55-g6feb From a5d7b0f4f4e9728c3eb7a06d38227d9f3351e677 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Fri, 2 Jul 2021 23:07:21 +0200 Subject: awk: fix detection of VAR=VAL arguments 1NAME=VAL is not it, neither is VA.R=VAL function old new delta next_input_file 216 214 -2 is_assignment 115 91 -24 ------------------------------------------------------------------------------ (add/remove: 0/0 grow/shrink: 0/2 up/down: 0/-26) Total: -26 bytes Signed-off-by: Denys Vlasenko --- editors/awk.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/editors/awk.c b/editors/awk.c index 86cb7a95f..9f14f0f9a 100644 --- a/editors/awk.c +++ b/editors/awk.c @@ -2679,7 +2679,8 @@ static int is_assignment(const char *expr) { char *exprc, *val; - if (!isalnum_(*expr) || (val = strchr(expr, '=')) == NULL) { + val = (char*)endofname(expr); + if (val == (char*)expr || *val != '=') { return FALSE; } @@ -2699,7 +2700,6 @@ static rstream *next_input_file(void) #define rsm (G.next_input_file__rsm) #define files_happen (G.next_input_file__files_happen) - FILE *F; const char *fname, *ind; if (rsm.F) @@ -2712,20 +2712,19 @@ static rstream *next_input_file(void) if (files_happen) return NULL; fname = "-"; - F = stdin; + rsm.F = stdin; break; } ind = getvar_s(incvar(intvar[ARGIND])); fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind)); if (fname && *fname && !is_assignment(fname)) { - F = xfopen_stdin(fname); + rsm.F = xfopen_stdin(fname); break; } } files_happen = TRUE; setvar_s(intvar[FILENAME], fname); - rsm.F = F; return &rsm; #undef rsm #undef files_happen -- cgit v1.2.3-55-g6feb From 646429e05e2f62250da80aa8d98111f3a9818e9a Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Fri, 2 Jul 2021 23:24:52 +0200 Subject: awk: use smaller regmatch_t arrays, they had 2 elements for no apparent reason function old new delta exec_builtin 1479 1434 -45 Signed-off-by: Denys Vlasenko --- editors/awk.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/editors/awk.c b/editors/awk.c index 9f14f0f9a..c06dd2304 100644 --- a/editors/awk.c +++ b/editors/awk.c @@ -1937,7 +1937,7 @@ static int awk_split(const char *s, node *spl, char **slist) n++; /* at least one field will be there */ do { int l; - regmatch_t pmatch[2]; // TODO: why [2]? [1] is enough... + regmatch_t pmatch[1]; l = strcspn(s, c+2); /* len till next NUL or \n */ if (regexec1_nonempty(icase ? spl->r.ire : spl->l.re, s, pmatch) == 0 @@ -2166,7 +2166,7 @@ static int ptest(node *pattern) static int awk_getline(rstream *rsm, var *v) { char *b; - regmatch_t pmatch[2]; // TODO: why [2]? [1] is enough... + regmatch_t pmatch[1]; int size, a, p, pp = 0; int fd, so, eo, r, rp; char c, *m, *s; @@ -2473,7 +2473,7 @@ static NOINLINE var *exec_builtin(node *op, var *res) node *an[4]; var *av[4]; const char *as[4]; - regmatch_t pmatch[2]; + regmatch_t pmatch[1]; regex_t sreg, *re; node *spl; uint32_t isr, info; @@ -3533,6 +3533,8 @@ int awk_main(int argc UNUSED_PARAM, char **argv) parse_program(llist_pop(&list_e)); } #endif +//FIXME: preserve order of -e and -f +//TODO: implement -i LIBRARY and -E FILE too, they are easy-ish if (!(opt & (OPT_f | OPT_e))) { if (!*argv) bb_show_usage(); -- cgit v1.2.3-55-g6feb From b705bf55395bf338f9b9888d87e418f67d4f1a29 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Fri, 2 Jul 2021 23:38:50 +0200 Subject: awk: move match() code out-of-line function old new delta exec_builtin_match - 202 +202 exec_builtin 1434 1157 -277 ------------------------------------------------------------------------------ (add/remove: 1/0 grow/shrink: 0/1 up/down: 202/-277) Total: -75 bytes Signed-off-by: Denys Vlasenko --- editors/awk.c | 45 ++++++++++++++++++++++++++++----------------- 1 file changed, 28 insertions(+), 17 deletions(-) diff --git a/editors/awk.c b/editors/awk.c index c06dd2304..96e06db25 100644 --- a/editors/awk.c +++ b/editors/awk.c @@ -2465,6 +2465,30 @@ static NOINLINE int do_mktime(const char *ds) return mktime(&then); } +/* Reduce stack usage in exec_builtin() by keeping match() code separate */ +static NOINLINE void exec_builtin_match(node *an1, const char *as0, var *res) +{ + regmatch_t pmatch[1]; + regex_t sreg, *re; + int n; + + re = as_regex(an1, &sreg); + n = regexec(re, as0, 1, pmatch, 0); + if (n == 0) { + pmatch[0].rm_so++; + pmatch[0].rm_eo++; + } else { + pmatch[0].rm_so = 0; + pmatch[0].rm_eo = -1; + } + if (re == &sreg) + regfree(re); + setvar_i(newvar("RSTART"), pmatch[0].rm_so); + setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so); + setvar_i(res, pmatch[0].rm_so); +} + +/* Reduce stack usage in evaluate() by keeping builtins' code separate */ static NOINLINE var *exec_builtin(node *op, var *res) { #define tspl (G.exec_builtin__tspl) @@ -2473,8 +2497,6 @@ static NOINLINE var *exec_builtin(node *op, var *res) node *an[4]; var *av[4]; const char *as[4]; - regmatch_t pmatch[1]; - regex_t sreg, *re; node *spl; uint32_t isr, info; int nargs; @@ -2633,20 +2655,7 @@ static NOINLINE var *exec_builtin(node *op, var *res) break; case B_ma: - re = as_regex(an[1], &sreg); - n = regexec(re, as[0], 1, pmatch, 0); - if (n == 0) { - pmatch[0].rm_so++; - pmatch[0].rm_eo++; - } else { - pmatch[0].rm_so = 0; - pmatch[0].rm_eo = -1; - } - setvar_i(newvar("RSTART"), pmatch[0].rm_so); - setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so); - setvar_i(res, pmatch[0].rm_so); - if (re == &sreg) - regfree(re); + exec_builtin_match(an[1], as[0], res); break; case B_ge: @@ -2732,7 +2741,9 @@ static rstream *next_input_file(void) /* * Evaluate node - the heart of the program. Supplied with subtree - * and place where to store result. Returns ptr to result. + * and "res" variable to assign the result to if we evaluate an expression. + * If node refers to e.g. a variable or a field, no assignment happens. + * Return ptr to the result (which may or may not be the "res" variable!) */ #define XC(n) ((n) >> 8) -- cgit v1.2.3-55-g6feb From 717200eb43c9420773c0f8b751aadabba3052027 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Sat, 3 Jul 2021 00:39:55 +0200 Subject: awk: rename GRPSTART/END to L/RBRACE, no code changes Signed-off-by: Denys Vlasenko --- editors/awk.c | 60 ++++++++++++++++++++++++++++++++--------------------------- 1 file changed, 33 insertions(+), 27 deletions(-) diff --git a/editors/awk.c b/editors/awk.c index 96e06db25..a1a2afd1d 100644 --- a/editors/awk.c +++ b/editors/awk.c @@ -211,8 +211,8 @@ typedef struct tsplitter_s { #define TC_PIPE (1 << 9) /* input redirection pipe | */ #define TC_UOPPRE2 (1 << 10) /* unary prefix operator + - ! */ #define TC_ARRTERM (1 << 11) /* ] */ -#define TC_GRPSTART (1 << 12) /* { */ -#define TC_GRPTERM (1 << 13) /* } */ +#define TC_LBRACE (1 << 12) /* { */ +#define TC_RBRACE (1 << 13) /* } */ #define TC_SEMICOL (1 << 14) /* ; */ #define TC_NEWLINE (1 << 15) #define TC_STATX (1 << 16) /* ctl statement (for, next...) */ @@ -250,8 +250,8 @@ if ((n) & TC_COMMA ) debug_printf_parse(" COMMA" ); \ if ((n) & TC_PIPE ) debug_printf_parse(" PIPE" ); \ if ((n) & TC_UOPPRE2 ) debug_printf_parse(" UOPPRE2" ); \ if ((n) & TC_ARRTERM ) debug_printf_parse(" ARRTERM" ); \ -if ((n) & TC_GRPSTART) debug_printf_parse(" GRPSTART"); \ -if ((n) & TC_GRPTERM ) debug_printf_parse(" GRPTERM" ); \ +if ((n) & TC_LBRACE ) debug_printf_parse(" LBRACE" ); \ +if ((n) & TC_RBRACE ) debug_printf_parse(" RBRACE" ); \ if ((n) & TC_SEMICOL ) debug_printf_parse(" SEMICOL" ); \ if ((n) & TC_NEWLINE ) debug_printf_parse(" NEWLINE" ); \ if ((n) & TC_STATX ) debug_printf_parse(" STATX" ); \ @@ -291,13 +291,13 @@ if ((n) & TC_NUMBER ) debug_printf_parse(" NUMBER" ); \ | TC_FUNCDECL | TC_BEGIN | TC_END) /* discard newlines after these */ -#define TS_NOTERM (TC_COMMA | TC_GRPSTART | TC_GRPTERM \ +#define TS_NOTERM (TC_COMMA | TC_LBRACE | TC_RBRACE \ | TS_BINOP | TS_OPTERM) /* what can expression begin with */ #define TS_OPSEQ (TS_OPERAND | TS_UOPPRE | TC_REGEXP) /* what can group begin with */ -#define TS_GRPSEQ (TS_OPSEQ | TS_OPTERM | TS_STATEMNT | TC_GRPSTART) +#define TS_GRPSEQ (TS_OPSEQ | TS_OPTERM | TS_STATEMNT | TC_LBRACE) /* if previous token class is CONCAT_L and next is CONCAT_R, concatenation */ /* operator is inserted between them */ @@ -402,8 +402,8 @@ static const char tokenlist[] ALIGN1 = "\1|" NTC /* TC_PIPE */ "\1+" "\1-" "\1!" NTC /* TC_UOPPRE2 */ "\1]" NTC /* TC_ARRTERM */ - "\1{" NTC /* TC_GRPSTART */ - "\1}" NTC /* TC_GRPTERM */ + "\1{" NTC /* TC_LBRACE */ + "\1}" NTC /* TC_RBRACE */ "\1;" NTC /* TC_SEMICOL */ "\1\n" NTC /* TC_NEWLINE */ "\2if" "\2do" "\3for" "\5break" /* TC_STATX */ @@ -1471,7 +1471,7 @@ static node *parse_expr(uint32_t term_tc) debug_printf_parse("%s: TC_LENGTH\n", __func__); tc = next_token(TC_LPAREN /* length(...) */ | TS_OPTERM /* length; (or newline)*/ - | TC_GRPTERM /* length } */ + | TC_RBRACE /* length } */ | TC_BINOPX /* length NUM */ | TC_COMMA /* print length, 1 */ ); @@ -1516,11 +1516,11 @@ static void chain_expr(uint32_t info) n = chain_node(info); - n->l.n = parse_expr(TS_OPTERM | TC_GRPTERM); + n->l.n = parse_expr(TS_OPTERM | TC_RBRACE); if ((info & OF_REQUIRED) && !n->l.n) syntax_error(EMSG_TOO_FEW_ARGS); - if (t_tclass & TC_GRPTERM) + if (t_tclass & TC_RBRACE) rollback_token(); } @@ -1559,16 +1559,16 @@ static void chain_group(void) c = next_token(TS_GRPSEQ); } while (c & TC_NEWLINE); - if (c & TC_GRPSTART) { - debug_printf_parse("%s: TC_GRPSTART\n", __func__); - while ((c = next_token(TS_GRPSEQ | TC_GRPTERM)) != TC_GRPTERM) { - debug_printf_parse("%s: !TC_GRPTERM\n", __func__); + if (c & TC_LBRACE) { + debug_printf_parse("%s: TC_LBRACE\n", __func__); + while ((c = next_token(TS_GRPSEQ | TC_RBRACE)) != TC_RBRACE) { + debug_printf_parse("%s: !TC_RBRACE\n", __func__); if (c & TC_NEWLINE) continue; rollback_token(); chain_group(); } - debug_printf_parse("%s: TC_GRPTERM\n", __func__); + debug_printf_parse("%s: TC_RBRACE\n", __func__); return; } if (c & (TS_OPSEQ | TS_OPTERM)) { @@ -1588,7 +1588,7 @@ static void chain_group(void) chain_group(); n2 = chain_node(OC_EXEC); n->r.n = seq->last; - if (next_token(TS_GRPSEQ | TC_GRPTERM | TC_ELSE) == TC_ELSE) { + if (next_token(TS_GRPSEQ | TC_RBRACE | TC_ELSE) == TC_ELSE) { chain_group(); n2->a.n = seq->last; } else { @@ -1641,12 +1641,12 @@ static void chain_group(void) case OC_PRINTF: debug_printf_parse("%s: OC_PRINT[F]\n", __func__); n = chain_node(t_info); - n->l.n = parse_expr(TS_OPTERM | TC_OUTRDR | TC_GRPTERM); + n->l.n = parse_expr(TS_OPTERM | TC_OUTRDR | TC_RBRACE); if (t_tclass & TC_OUTRDR) { n->info |= t_info; - n->r.n = parse_expr(TS_OPTERM | TC_GRPTERM); + n->r.n = parse_expr(TS_OPTERM | TC_RBRACE); } - if (t_tclass & TC_GRPTERM) + if (t_tclass & TC_RBRACE) rollback_token(); break; @@ -1684,7 +1684,7 @@ static void parse_program(char *p) g_pos = p; t_lineno = 1; - while ((tclass = next_token(TC_EOF | TS_OPSEQ | TC_GRPSTART | + while ((tclass = next_token(TC_EOF | TS_OPSEQ | TC_LBRACE | TS_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) { if (tclass & TS_OPTERM) { @@ -1696,10 +1696,14 @@ static void parse_program(char *p) if (tclass & TC_BEGIN) { debug_printf_parse("%s: TC_BEGIN\n", __func__); seq = &beginseq; +//TODO: ensure there is no newline between BEGIN and { +//next_token(TC_LBRACE); rollback_token(); chain_group(); } else if (tclass & TC_END) { debug_printf_parse("%s: TC_END\n", __func__); seq = &endseq; +//TODO: ensure there is no newline between END and { +//next_token(TC_LBRACE); rollback_token(); chain_group(); } else if (tclass & TC_FUNCDECL) { debug_printf_parse("%s: TC_FUNCDECL\n", __func__); @@ -1726,24 +1730,26 @@ static void parse_program(char *p) /* it was a comma, we ate it */ } seq = &f->body; +//TODO: ensure there is { after "func F(...)" - but newlines are allowed +//while (next_token(TC_LBRACE | TC_NEWLINE) == TC_NEWLINE) continue; rollback_token(); chain_group(); hash_clear(ahash); } else if (tclass & TS_OPSEQ) { debug_printf_parse("%s: TS_OPSEQ\n", __func__); rollback_token(); cn = chain_node(OC_TEST); - cn->l.n = parse_expr(TS_OPTERM | TC_EOF | TC_GRPSTART); - if (t_tclass & TC_GRPSTART) { - debug_printf_parse("%s: TC_GRPSTART\n", __func__); + cn->l.n = parse_expr(TS_OPTERM | TC_EOF | TC_LBRACE); + if (t_tclass & TC_LBRACE) { + debug_printf_parse("%s: TC_LBRACE\n", __func__); rollback_token(); chain_group(); } else { - debug_printf_parse("%s: !TC_GRPSTART\n", __func__); + debug_printf_parse("%s: !TC_LBRACE\n", __func__); chain_node(OC_PRINT); } cn->r.n = mainseq.last; - } else /* if (tclass & TC_GRPSTART) */ { - debug_printf_parse("%s: TC_GRPSTART(?)\n", __func__); + } else /* if (tclass & TC_LBRACE) */ { + debug_printf_parse("%s: TC_LBRACE(?)\n", __func__); rollback_token(); chain_group(); } -- cgit v1.2.3-55-g6feb From 2b65e73db3254a7228802886546152c72217017d Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Sat, 3 Jul 2021 01:16:48 +0200 Subject: awk: tighten rules in action parsing Disallow: BEGIN { action } - must start on the same line Disallow: func f() print "hello" - must be in {...} function old new delta chain_until_rbrace - 41 +41 parse_program 307 336 +29 chain_group 649 616 -33 ------------------------------------------------------------------------------ (add/remove: 1/0 grow/shrink: 1/1 up/down: 70/-33) Total: 37 bytes Signed-off-by: Denys Vlasenko --- editors/awk.c | 108 +++++++++++++++++++++++++++++++++++----------------------- 1 file changed, 66 insertions(+), 42 deletions(-) diff --git a/editors/awk.c b/editors/awk.c index a1a2afd1d..c68416873 100644 --- a/editors/awk.c +++ b/editors/awk.c @@ -1549,29 +1549,35 @@ static node *chain_loop(node *nn) return n; } +static void chain_until_rbrace(void) +{ + uint32_t tc; + while ((tc = next_token(TS_GRPSEQ | TC_RBRACE)) != TC_RBRACE) { + debug_printf_parse("%s: !TC_RBRACE\n", __func__); + if (tc == TC_NEWLINE) + continue; + rollback_token(); + chain_group(); + } + debug_printf_parse("%s: TC_RBRACE\n", __func__); +} + /* parse group and attach it to chain */ static void chain_group(void) { - uint32_t c; + uint32_t tc; node *n, *n2, *n3; do { - c = next_token(TS_GRPSEQ); - } while (c & TC_NEWLINE); + tc = next_token(TS_GRPSEQ); + } while (tc == TC_NEWLINE); - if (c & TC_LBRACE) { + if (tc == TC_LBRACE) { debug_printf_parse("%s: TC_LBRACE\n", __func__); - while ((c = next_token(TS_GRPSEQ | TC_RBRACE)) != TC_RBRACE) { - debug_printf_parse("%s: !TC_RBRACE\n", __func__); - if (c & TC_NEWLINE) - continue; - rollback_token(); - chain_group(); - } - debug_printf_parse("%s: TC_RBRACE\n", __func__); + chain_until_rbrace(); return; } - if (c & (TS_OPSEQ | TS_OPTERM)) { + if (tc & (TS_OPSEQ | TS_OPTERM)) { debug_printf_parse("%s: TS_OPSEQ | TS_OPTERM\n", __func__); rollback_token(); chain_expr(OC_EXEC | Vx); @@ -1675,37 +1681,48 @@ static void chain_group(void) static void parse_program(char *p) { - uint32_t tclass; - node *cn; - func *f; - var *v; - debug_printf_parse("%s()\n", __func__); g_pos = p; t_lineno = 1; - while ((tclass = next_token(TC_EOF | TS_OPSEQ | TC_LBRACE | - TS_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) { + for (;;) { + uint32_t tclass; - if (tclass & TS_OPTERM) { + tclass = next_token(TC_EOF | TS_OPSEQ | TC_LBRACE | + TS_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL); + + if (tclass == TC_EOF) { + debug_printf_parse("%s: TC_EOF\n", __func__); + break; + } + if (tclass & TS_OPTERM) { /* ; or */ debug_printf_parse("%s: TS_OPTERM\n", __func__); +//NB: gawk allows many newlines, but does not allow more than one semicolon: +// BEGIN {...};; +//would complain "each rule must have a pattern or an action part". +//Same message for +// ; BEGIN {...} continue; } - - seq = &mainseq; - if (tclass & TC_BEGIN) { + if (tclass == TC_BEGIN) { debug_printf_parse("%s: TC_BEGIN\n", __func__); seq = &beginseq; -//TODO: ensure there is no newline between BEGIN and { -//next_token(TC_LBRACE); rollback_token(); - chain_group(); - } else if (tclass & TC_END) { + /* ensure there is no newline between BEGIN and { */ + next_token(TC_LBRACE); + chain_until_rbrace(); + continue; + } + if (tclass == TC_END) { debug_printf_parse("%s: TC_END\n", __func__); seq = &endseq; -//TODO: ensure there is no newline between END and { -//next_token(TC_LBRACE); rollback_token(); - chain_group(); - } else if (tclass & TC_FUNCDECL) { + /* ensure there is no newline between END and { */ + next_token(TC_LBRACE); + chain_until_rbrace(); + continue; + } + if (tclass == TC_FUNCDECL) { + func *f; + debug_printf_parse("%s: TC_FUNCDECL\n", __func__); next_token(TC_FUNCTION); f = newfunc(t_string); @@ -1716,6 +1733,7 @@ static void parse_program(char *p) //f->nargs = 0; - already is /* func arg list: comma sep list of args, and a close paren */ for (;;) { + var *v; if (next_token(TC_VARIABLE | TC_RPAREN) == TC_RPAREN) { if (f->nargs == 0) break; /* func() is ok */ @@ -1730,31 +1748,37 @@ static void parse_program(char *p) /* it was a comma, we ate it */ } seq = &f->body; -//TODO: ensure there is { after "func F(...)" - but newlines are allowed -//while (next_token(TC_LBRACE | TC_NEWLINE) == TC_NEWLINE) continue; rollback_token(); - chain_group(); + /* ensure there is { after "func F(...)" - but newlines are allowed */ + while (next_token(TC_LBRACE | TC_NEWLINE) == TC_NEWLINE) + continue; + chain_until_rbrace(); hash_clear(ahash); - } else if (tclass & TS_OPSEQ) { + continue; + } + seq = &mainseq; + if (tclass & TS_OPSEQ) { + node *cn; + debug_printf_parse("%s: TS_OPSEQ\n", __func__); rollback_token(); cn = chain_node(OC_TEST); cn->l.n = parse_expr(TS_OPTERM | TC_EOF | TC_LBRACE); - if (t_tclass & TC_LBRACE) { + if (t_tclass == TC_LBRACE) { debug_printf_parse("%s: TC_LBRACE\n", __func__); rollback_token(); chain_group(); } else { + /* no action, assume default "{ print }" */ debug_printf_parse("%s: !TC_LBRACE\n", __func__); chain_node(OC_PRINT); } cn->r.n = mainseq.last; - } else /* if (tclass & TC_LBRACE) */ { - debug_printf_parse("%s: TC_LBRACE(?)\n", __func__); - rollback_token(); - chain_group(); + continue; } + /* tclass == TC_LBRACE */ + debug_printf_parse("%s: TC_LBRACE(?)\n", __func__); + chain_until_rbrace(); } - debug_printf_parse("%s: TC_EOF\n", __func__); } -- cgit v1.2.3-55-g6feb From 1f765709ed9c9595647853ac2cd7905f218c3044 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Sat, 3 Jul 2021 01:32:03 +0200 Subject: awk: open-code TS_OPTERM, no logic changes Signed-off-by: Denys Vlasenko --- editors/awk.c | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/editors/awk.c b/editors/awk.c index c68416873..8c471d693 100644 --- a/editors/awk.c +++ b/editors/awk.c @@ -283,7 +283,6 @@ if ((n) & TC_NUMBER ) debug_printf_parse(" NUMBER" ); \ #define TS_LVALUE (TC_VARIABLE | TC_ARRAY) #define TS_STATEMNT (TC_STATX | TC_WHILE) -#define TS_OPTERM (TC_SEMICOL | TC_NEWLINE) /* word tokens, cannot mean something else if not expected */ #define TS_WORD (TC_IN | TS_STATEMNT | TC_ELSE \ @@ -291,13 +290,14 @@ if ((n) & TC_NUMBER ) debug_printf_parse(" NUMBER" ); \ | TC_FUNCDECL | TC_BEGIN | TC_END) /* discard newlines after these */ -#define TS_NOTERM (TC_COMMA | TC_LBRACE | TC_RBRACE \ - | TS_BINOP | TS_OPTERM) +#define TS_NOTERM (TS_BINOP | TC_COMMA | TC_LBRACE | TC_RBRACE \ + | TC_SEMICOL | TC_NEWLINE) /* what can expression begin with */ #define TS_OPSEQ (TS_OPERAND | TS_UOPPRE | TC_REGEXP) /* what can group begin with */ -#define TS_GRPSEQ (TS_OPSEQ | TS_OPTERM | TS_STATEMNT | TC_LBRACE) +#define TS_GRPSEQ (TS_OPSEQ | TS_STATEMNT \ + | TC_SEMICOL | TC_NEWLINE | TC_LBRACE) /* if previous token class is CONCAT_L and next is CONCAT_R, concatenation */ /* operator is inserted between them */ @@ -642,7 +642,7 @@ struct globals2 { #define g_buf (G.g_buf ) #define INIT_G() do { \ SET_PTR_TO_GLOBALS((char*)xzalloc(sizeof(G1)+sizeof(G)) + sizeof(G1)); \ - t_tclass = TS_OPTERM; \ + t_tclass = TC_NEWLINE; \ G.evaluate__seed = 1; \ } while (0) @@ -1090,7 +1090,7 @@ static uint32_t next_token(uint32_t expected) const uint32_t *ti; uint32_t tc, last_token_class; - last_token_class = t_tclass; /* t_tclass is initialized to TS_OPTERM */ + last_token_class = t_tclass; /* t_tclass is initialized to TC_NEWLINE */ debug_printf_parse("%s() expected(%x):", __func__, expected); debug_parse_print_tc(expected); @@ -1470,7 +1470,8 @@ static node *parse_expr(uint32_t term_tc) case TC_LENGTH: debug_printf_parse("%s: TC_LENGTH\n", __func__); tc = next_token(TC_LPAREN /* length(...) */ - | TS_OPTERM /* length; (or newline)*/ + | TC_SEMICOL /* length; */ + | TC_NEWLINE /* length */ | TC_RBRACE /* length } */ | TC_BINOPX /* length NUM */ | TC_COMMA /* print length, 1 */ @@ -1516,7 +1517,7 @@ static void chain_expr(uint32_t info) n = chain_node(info); - n->l.n = parse_expr(TS_OPTERM | TC_RBRACE); + n->l.n = parse_expr(TC_SEMICOL | TC_NEWLINE | TC_RBRACE); if ((info & OF_REQUIRED) && !n->l.n) syntax_error(EMSG_TOO_FEW_ARGS); @@ -1577,8 +1578,8 @@ static void chain_group(void) chain_until_rbrace(); return; } - if (tc & (TS_OPSEQ | TS_OPTERM)) { - debug_printf_parse("%s: TS_OPSEQ | TS_OPTERM\n", __func__); + if (tc & (TS_OPSEQ | TC_SEMICOL | TC_NEWLINE)) { + debug_printf_parse("%s: TS_OPSEQ | TC_SEMICOL | TC_NEWLINE\n", __func__); rollback_token(); chain_expr(OC_EXEC | Vx); return; @@ -1647,10 +1648,10 @@ static void chain_group(void) case OC_PRINTF: debug_printf_parse("%s: OC_PRINT[F]\n", __func__); n = chain_node(t_info); - n->l.n = parse_expr(TS_OPTERM | TC_OUTRDR | TC_RBRACE); + n->l.n = parse_expr(TC_SEMICOL | TC_NEWLINE | TC_OUTRDR | TC_RBRACE); if (t_tclass & TC_OUTRDR) { n->info |= t_info; - n->r.n = parse_expr(TS_OPTERM | TC_RBRACE); + n->r.n = parse_expr(TC_SEMICOL | TC_NEWLINE | TC_RBRACE); } if (t_tclass & TC_RBRACE) rollback_token(); @@ -1689,14 +1690,14 @@ static void parse_program(char *p) uint32_t tclass; tclass = next_token(TC_EOF | TS_OPSEQ | TC_LBRACE | - TS_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL); + TC_SEMICOL | TC_NEWLINE | TC_BEGIN | TC_END | TC_FUNCDECL); if (tclass == TC_EOF) { debug_printf_parse("%s: TC_EOF\n", __func__); break; } - if (tclass & TS_OPTERM) { /* ; or */ - debug_printf_parse("%s: TS_OPTERM\n", __func__); + if (tclass & (TC_SEMICOL | TC_NEWLINE)) { + debug_printf_parse("%s: TC_SEMICOL | TC_NEWLINE\n", __func__); //NB: gawk allows many newlines, but does not allow more than one semicolon: // BEGIN {...};; //would complain "each rule must have a pattern or an action part". @@ -1762,7 +1763,7 @@ static void parse_program(char *p) debug_printf_parse("%s: TS_OPSEQ\n", __func__); rollback_token(); cn = chain_node(OC_TEST); - cn->l.n = parse_expr(TS_OPTERM | TC_EOF | TC_LBRACE); + cn->l.n = parse_expr(TC_SEMICOL | TC_NEWLINE | TC_EOF | TC_LBRACE); if (t_tclass == TC_LBRACE) { debug_printf_parse("%s: TC_LBRACE\n", __func__); rollback_token(); -- cgit v1.2.3-55-g6feb From e1e7ad6b6005b2265667040fc9d7f69b73b0d5b0 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Sat, 3 Jul 2021 01:59:36 +0200 Subject: awk: support %F %a %A in printf function old new delta .rodata 104111 104120 +9 Signed-off-by: Denys Vlasenko --- editors/awk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/editors/awk.c b/editors/awk.c index 8c471d693..2c3b49bc8 100644 --- a/editors/awk.c +++ b/editors/awk.c @@ -909,7 +909,7 @@ static int fmt_num(char *b, int size, const char *format, double n, int int_as_i do { c = *s; } while (c && *++s); if (strchr("diouxX", c)) { r = snprintf(b, size, format, (int)n); - } else if (strchr("eEfgG", c)) { + } else if (strchr("eEfFgGaA", c)) { r = snprintf(b, size, format, n); } else { syntax_error(EMSG_INV_FMT); -- cgit v1.2.3-55-g6feb From 2211fa70ccad29fc7bccd34c13141850ebb199da Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Sat, 3 Jul 2021 11:54:01 +0200 Subject: awk: do not use a copy of g_progname for node->l.new_progname We never destroy g_progname's, the strings still exist, no need to copy function old new delta chain_node 104 97 -7 Signed-off-by: Denys Vlasenko --- editors/awk.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/editors/awk.c b/editors/awk.c index 2c3b49bc8..4119253ec 100644 --- a/editors/awk.c +++ b/editors/awk.c @@ -179,7 +179,7 @@ typedef struct node_s { struct node_s *n; var *v; int aidx; - char *new_progname; + const char *new_progname; regex_t *re; } l; union { @@ -1501,7 +1501,7 @@ static node *chain_node(uint32_t info) if (seq->programname != g_progname) { seq->programname = g_progname; n = chain_node(OC_NEWSOURCE); - n->l.new_progname = xstrdup(g_progname); + n->l.new_progname = g_progname; } n = seq->last; -- cgit v1.2.3-55-g6feb From 0e3ef4efb061366bfa4b9609fe3a03f3a1e40f0e Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Sat, 3 Jul 2021 11:57:59 +0200 Subject: awk: rand(): 64-bit constants should be ULL Signed-off-by: Denys Vlasenko --- editors/awk.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/editors/awk.c b/editors/awk.c index 4119253ec..e4dd6684c 100644 --- a/editors/awk.c +++ b/editors/awk.c @@ -3169,9 +3169,9 @@ static var *evaluate(node *op, var *res) uint64_t v = ((uint64_t)rand() << 32) | u; /* the above shift+or is optimized out on 32-bit arches */ # if RAND_MAX > 0x7fffffff - v &= 0x7fffffffffffffffUL; + v &= 0x7fffffffffffffffULL; # endif - R_d = (double)v / 0x8000000000000000UL; + R_d = (double)v / 0x8000000000000000ULL; #else # error Not implemented for this value of RAND_MAX #endif -- cgit v1.2.3-55-g6feb From 90404ed2f62a872ffd9a555660b7ce17fae372d8 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Sat, 3 Jul 2021 12:20:36 +0200 Subject: awk: match(): code shrink function old new delta do_match - 165 +165 exec_builtin_match 202 - -202 ------------------------------------------------------------------------------ (add/remove: 1/1 grow/shrink: 0/0 up/down: 165/-202) Total: -37 bytes Signed-off-by: Denys Vlasenko --- editors/awk.c | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/editors/awk.c b/editors/awk.c index e4dd6684c..649198d15 100644 --- a/editors/awk.c +++ b/editors/awk.c @@ -2497,26 +2497,24 @@ static NOINLINE int do_mktime(const char *ds) } /* Reduce stack usage in exec_builtin() by keeping match() code separate */ -static NOINLINE void exec_builtin_match(node *an1, const char *as0, var *res) +static NOINLINE var *do_match(node *an1, const char *as0) { regmatch_t pmatch[1]; regex_t sreg, *re; - int n; + int n, start, len; re = as_regex(an1, &sreg); n = regexec(re, as0, 1, pmatch, 0); - if (n == 0) { - pmatch[0].rm_so++; - pmatch[0].rm_eo++; - } else { - pmatch[0].rm_so = 0; - pmatch[0].rm_eo = -1; - } if (re == &sreg) regfree(re); - setvar_i(newvar("RSTART"), pmatch[0].rm_so); - setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so); - setvar_i(res, pmatch[0].rm_so); + start = 0; + len = -1; + if (n == 0) { + start = pmatch[0].rm_so + 1; + len = pmatch[0].rm_eo - pmatch[0].rm_so; + } + setvar_i(newvar("RLENGTH"), len); + return setvar_i(newvar("RSTART"), start); } /* Reduce stack usage in evaluate() by keeping builtins' code separate */ @@ -2686,7 +2684,7 @@ static NOINLINE var *exec_builtin(node *op, var *res) break; case B_ma: - exec_builtin_match(an[1], as[0], res); + res = do_match(an[1], as[0]); break; case B_ge: -- cgit v1.2.3-55-g6feb From cb042b05828c4c89320bc9c7454c04c2761bbb9a Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Sat, 3 Jul 2021 13:29:32 +0200 Subject: awk: restore strdup elision optimization in assignment function old new delta evaluate 3339 3387 +48 Signed-off-by: Denys Vlasenko --- editors/awk.c | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/editors/awk.c b/editors/awk.c index 649198d15..20672db9a 100644 --- a/editors/awk.c +++ b/editors/awk.c @@ -102,7 +102,7 @@ enum { #define VF_USER 0x0200 /* 1 = user input (may be numeric string) */ #define VF_SPECIAL 0x0400 /* 1 = requires extra handling when changed */ #define VF_WALK 0x0800 /* 1 = variable has alloc'd x.walker list */ -#define VF_FSTR 0x1000 /* 1 = var::string points to fstring buffer */ +#define VF_FSTR 0x1000 /* 1 = don't free() var::string (not malloced, or is owned by something else) */ #define VF_CHILD 0x2000 /* 1 = function arg; x.parent points to source */ #define VF_DIRTY 0x4000 /* 1 = variable was set explicitly */ @@ -1371,6 +1371,12 @@ static node *parse_expr(uint32_t term_tc) cn->a.n = vn->a.n; if (tc & TS_BINOP) { cn->l.n = vn; +//FIXME: this is the place to detect and reject assignments to non-lvalues. +//Currently we allow "assignments" to consts and temporaries, nonsense like this: +// awk 'BEGIN { "qwe" = 1 }' +// awk 'BEGIN { 7 *= 7 }' +// awk 'BEGIN { length("qwe") = 1 }' +// awk 'BEGIN { (1+1) += 3 }' expected_tc = TS_OPERAND | TS_UOPPRE | TC_REGEXP; if ((t_info & OPCLSMASK) == OC_PGETLINE) { /* it's a pipe */ @@ -3043,14 +3049,17 @@ static var *evaluate(node *op, var *res) case XC( OC_MOVE ): debug_printf_eval("MOVE\n"); /* if source is a temporary string, jusk relink it to dest */ -//Disabled: if R.v is numeric but happens to have cached R.v->string, -//then L.v ends up being a string, which is wrong -// if (R.v == TMPVAR1 && R.v->string) { -// res = setvar_p(L.v, R.v->string); -// R.v->string = NULL; -// } else { + if (R.v == TMPVAR1 + && !(R.v->type & VF_NUMBER) + /* Why check !NUMBER? if R.v is a number but has cached R.v->string, + * L.v ends up a string, which is wrong */ + /*&& R.v->string - always not NULL (right?) */ + ) { + res = setvar_p(L.v, R.v->string); /* avoids strdup */ + R.v->string = NULL; + } else { res = copyvar(L.v, R.v); -// } + } break; case XC( OC_TERNARY ): -- cgit v1.2.3-55-g6feb From 08ca313d7edb99687068b93b5d2435b59f3db23a Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Sat, 3 Jul 2021 13:57:47 +0200 Subject: awk: simplify tests for operation class Usually, an operation class has only one possible value of "info" word. In this case, just compare the entire info word, do not bother to mask OPCLSMASK bits. (Example where this is not the case: OC_REPLACE for "=") function old new delta mk_splitter 106 100 -6 chain_group 616 610 -6 nextarg 40 32 -8 exec_builtin 1157 1149 -8 as_regex 111 103 -8 awk_split 553 543 -10 parse_expr 948 936 -12 awk_getline 656 642 -14 evaluate 3387 3343 -44 ------------------------------------------------------------------------------ (add/remove: 0/0 grow/shrink: 0/9 up/down: 0/-116) Total: -116 bytes Signed-off-by: Denys Vlasenko --- editors/awk.c | 64 +++++++++++++++++++++++++++++++++-------------------------- 1 file changed, 36 insertions(+), 28 deletions(-) diff --git a/editors/awk.c b/editors/awk.c index 20672db9a..cd135ef64 100644 --- a/editors/awk.c +++ b/editors/awk.c @@ -432,7 +432,8 @@ static const char tokenlist[] ALIGN1 = static const uint32_t tokeninfo[] ALIGN4 = { 0, 0, - OC_REGEXP, +#define TI_REGEXP OC_REGEXP + TI_REGEXP, xS|'a', xS|'w', xS|'|', OC_UNARY|xV|P(9)|'p', OC_UNARY|xV|P(9)|'m', #define TI_PREINC (OC_UNARY|xV|P(9)|'P') @@ -443,12 +444,17 @@ static const uint32_t tokeninfo[] ALIGN4 = { OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-', OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&', OC_BINARY|NV|P(25)|'/', OC_BINARY|NV|P(25)|'%', OC_BINARY|NV|P(15)|'&', OC_BINARY|NV|P(25)|'*', OC_COMPARE|VV|P(39)|4, OC_COMPARE|VV|P(39)|3, OC_COMPARE|VV|P(39)|0, OC_COMPARE|VV|P(39)|1, -#define TI_LESS (OC_COMPARE|VV|P(39)|2) +#define TI_LESS (OC_COMPARE|VV|P(39)|2) TI_LESS, OC_MATCH|Sx|P(45)|'!', OC_MATCH|Sx|P(45)|'~', OC_LAND|Vx|P(55), - OC_LOR|Vx|P(59), OC_TERNARY|Vx|P(64)|'?', OC_COLON|xx|P(67)|':', - OC_IN|SV|P(49), /* TC_IN */ - OC_COMMA|SS|P(80), - OC_PGETLINE|SV|P(37), +#define TI_TERNARY (OC_TERNARY|Vx|P(64)|'?') +#define TI_COLON (OC_COLON|xx|P(67)|':') + OC_LOR|Vx|P(59), TI_TERNARY, TI_COLON, +#define TI_IN (OC_IN|SV|P(49)) + TI_IN, +#define TI_COMMA (OC_COMMA|SS|P(80)) + TI_COMMA, +#define TI_PGETLINE (OC_PGETLINE|SV|P(37)) + TI_PGETLINE, OC_UNARY|xV|P(19)|'+', OC_UNARY|xV|P(19)|'-', OC_UNARY|xV|P(19)|'!', 0, /* ] */ 0, @@ -456,7 +462,8 @@ static const uint32_t tokeninfo[] ALIGN4 = { 0, 0, /* \n */ ST_IF, ST_DO, ST_FOR, OC_BREAK, - OC_CONTINUE, OC_DELETE|Rx, OC_PRINT, +#define TI_PRINT OC_PRINT + OC_CONTINUE, OC_DELETE|Rx, TI_PRINT, OC_PRINTF, OC_NEXT, OC_NEXTFILE, OC_RETURN|Vx, OC_EXIT|Nx, ST_WHILE, @@ -465,8 +472,8 @@ static const uint32_t tokeninfo[] ALIGN4 = { // Highest byte bit pattern: nn s3s2s1 v3v2v1 // nn - min. number of args, sN - resolve Nth arg to string, vN - resolve to var // OC_F's are builtins with zero or one argument. -// |Rx| enforces that arg is present for: system, close, cos, sin, exp, int, log, sqrt. -// Check for no args is present in builtins' code (not in this table): rand, systime. +// |Rx| enforces that arg is present for: system, close, cos, sin, exp, int, log, sqrt +// Check for no args is present in builtins' code (not in this table): rand, systime // Have one _optional_ arg: fflush, srand, length #define OC_B OC_BUILTIN #define OC_F OC_FBLTIN @@ -1310,7 +1317,7 @@ static node *new_node(uint32_t info) static void mk_re_node(const char *s, node *n, regex_t *re) { - n->info = OC_REGEXP; + n->info = TI_REGEXP; n->l.re = re; n->r.ire = re + 1; xregcomp(re, s, REG_EXTENDED); @@ -1360,12 +1367,13 @@ static node *parse_expr(uint32_t term_tc) * previous operators with higher priority */ vn = cn; while (((t_info & PRIMASK) > (vn->a.n->info & PRIMASK2)) - || ((t_info == vn->info) && ((t_info & OPCLSMASK) == OC_COLON)) + || ((t_info == vn->info) && t_info == TI_COLON) ) { vn = vn->a.n; if (!vn->a.n) syntax_error(EMSG_UNEXP_TOKEN); } - if ((t_info & OPCLSMASK) == OC_TERNARY) + if (t_info == TI_TERNARY) +//TODO: why? t_info += P(6); cn = vn->a.n->r.n = new_node(t_info); cn->a.n = vn->a.n; @@ -1378,7 +1386,7 @@ static node *parse_expr(uint32_t term_tc) // awk 'BEGIN { length("qwe") = 1 }' // awk 'BEGIN { (1+1) += 3 }' expected_tc = TS_OPERAND | TS_UOPPRE | TC_REGEXP; - if ((t_info & OPCLSMASK) == OC_PGETLINE) { + if (t_info == TI_PGETLINE) { /* it's a pipe */ next_token(TC_GETLINE); /* give maximum priority to this pipe */ @@ -1630,7 +1638,7 @@ static void chain_group(void) next_token(TC_LPAREN); n2 = parse_expr(TC_SEMICOL | TC_RPAREN); if (t_tclass & TC_RPAREN) { /* for-in */ - if (!n2 || (n2->info & OPCLSMASK) != OC_IN) + if (!n2 || n2->info != TI_IN) syntax_error(EMSG_UNEXP_TOKEN); n = chain_node(OC_WALKINIT | VV); n->l.n = n2->l.n; @@ -1834,7 +1842,7 @@ static node *mk_splitter(const char *s, tsplitter *spl) re = &spl->re[0]; ire = &spl->re[1]; n = &spl->n; - if ((n->info & OPCLSMASK) == OC_REGEXP) { + if (n->info == TI_REGEXP) { regfree(re); regfree(ire); // TODO: nuke ire, use re+1? } @@ -1858,7 +1866,7 @@ static regex_t *as_regex(node *op, regex_t *preg) int cflags; const char *s; - if ((op->info & OPCLSMASK) == OC_REGEXP) { + if (op->info == TI_REGEXP) { return icase ? op->r.ire : op->l.re; } @@ -1968,7 +1976,7 @@ static int awk_split(const char *s, node *spl, char **slist) c[2] = '\n'; n = 0; - if ((spl->info & OPCLSMASK) == OC_REGEXP) { /* regex split */ + if (spl->info == TI_REGEXP) { /* regex split */ if (!*s) return n; /* "": zero fields */ n++; /* at least one field will be there */ @@ -2135,7 +2143,7 @@ static node *nextarg(node **pn) node *n; n = *pn; - if (n && (n->info & OPCLSMASK) == OC_COMMA) { + if (n && n->info == TI_COMMA) { *pn = n->r.n; n = n->l.n; } else { @@ -2229,7 +2237,7 @@ static int awk_getline(rstream *rsm, var *v) so = eo = p; r = 1; if (p > 0) { - if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) { + if (rsplitter.n.info == TI_REGEXP) { if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re, b, 1, pmatch, 0) == 0) { so = pmatch[0].rm_so; @@ -2575,8 +2583,8 @@ static NOINLINE var *exec_builtin(node *op, var *res) char *s, *s1; if (nargs > 2) { - spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ? - an[2] : mk_splitter(getvar_s(evaluate(an[2], TMPVAR2)), &tspl); + spl = (an[2]->info == TI_REGEXP) ? an[2] + : mk_splitter(getvar_s(evaluate(an[2], TMPVAR2)), &tspl); } else { spl = &fsplitter.n; } @@ -2860,7 +2868,7 @@ static var *evaluate(node *op, var *res) /* test pattern */ case XC( OC_TEST ): debug_printf_eval("TEST\n"); - if ((op1->info & OPCLSMASK) == OC_COMMA) { + if (op1->info == TI_COMMA) { /* it's range pattern */ if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) { op->info |= OF_CHECKED; @@ -2921,7 +2929,7 @@ static var *evaluate(node *op, var *res) F = rsm->F; } - if ((opinfo & OPCLSMASK) == OC_PRINT) { + if (opinfo == TI_PRINT) { if (!op1) { fputs(getvar_s(intvar[F0]), F); } else { @@ -2940,7 +2948,7 @@ static var *evaluate(node *op, var *res) } } fputs(getvar_s(intvar[ORS]), F); - } else { /* OC_PRINTF */ + } else { /* PRINTF */ char *s = awk_printf(op1, &len); #if ENABLE_FEATURE_AWK_GNU_EXTENSIONS fwrite(s, len, 1, F); @@ -3064,7 +3072,7 @@ static var *evaluate(node *op, var *res) case XC( OC_TERNARY ): debug_printf_eval("TERNARY\n"); - if ((op->r.n->info & OPCLSMASK) != OC_COLON) + if (op->r.n->info != TI_COLON) syntax_error(EMSG_POSSIBLE_ERROR); res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res); break; @@ -3122,7 +3130,7 @@ static var *evaluate(node *op, var *res) if (op1) { rsm = newfile(L.s); if (!rsm->F) { - if ((opinfo & OPCLSMASK) == OC_PGETLINE) { + if (opinfo == TI_PGETLINE) { rsm->F = popen(L.s, "r"); rsm->is_pipe = TRUE; } else { @@ -3158,7 +3166,7 @@ static var *evaluate(node *op, var *res) double R_d = R_d; /* for compiler */ debug_printf_eval("FBLTIN\n"); - if (op1 && (op1->info & OPCLSMASK) == OC_COMMA) + if (op1 && op1->info == TI_COMMA) /* Simple builtins take one arg maximum */ syntax_error("Too many arguments"); @@ -3358,7 +3366,7 @@ static var *evaluate(node *op, var *res) case XC( OC_COMMA ): { const char *sep = ""; debug_printf_eval("COMMA\n"); - if ((opinfo & OPCLSMASK) == OC_COMMA) + if (opinfo == TI_COMMA) sep = getvar_s(intvar[SUBSEP]); setvar_p(res, xasprintf("%s%s%s", L.s, sep, R.s)); break; -- cgit v1.2.3-55-g6feb