From 9f262698cc7e6a8b5621f018645f3c1fa06b226f Mon Sep 17 00:00:00 2001 From: Sertonix Date: Sat, 8 Jun 2024 18:05:44 +0000 Subject: wget: ignore header casing HTTP headers are case insensitive and therefore the check if a default header has been overwritten needs to be case insensitive. Without this patch `--header 'user-agent: test'` results in `User-Agent: Wget` and `user-agent: test` being send. function old new delta ------------------------------------------------------------------------------ (add/remove: 0/0 grow/shrink: 0/0 up/down: 0/0) Total: 0 bytes text data bss dec hex filename 1040876 16443 1840 1059159 102957 busybox_old 1040876 16443 1840 1059159 102957 busybox_unstripped Signed-off-by: Sertonix Signed-off-by: Denys Vlasenko --- networking/wget.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/networking/wget.c b/networking/wget.c index 199ddd4da..512bebfc2 100644 --- a/networking/wget.c +++ b/networking/wget.c @@ -1602,7 +1602,7 @@ IF_DESKTOP( "no-parent\0" No_argument "\xf0") bit = 1; words = wget_user_headers; while (*words) { - if (strstr(hdr, words) == hdr) { + if (strcasestr(hdr, words) == hdr) { G.user_headers |= bit; break; } -- cgit v1.2.3-55-g6feb From eba9b33b4595fbb5f2a64148b1ff3daeab9b3813 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Mon, 8 Jul 2024 17:53:32 +0200 Subject: chown: stop accepting deprecated USER.GROUP syntax, only : separator is allowed function old new delta parse_chown_usergroup_or_die 115 94 -21 Signed-off-by: Denys Vlasenko --- libpwdgrp/uidgid_get.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/libpwdgrp/uidgid_get.c b/libpwdgrp/uidgid_get.c index 283ac78fc..d76eb8298 100644 --- a/libpwdgrp/uidgid_get.c +++ b/libpwdgrp/uidgid_get.c @@ -93,11 +93,7 @@ void FAST_FUNC parse_chown_usergroup_or_die(struct bb_uidgid_t *u, char *user_gr u->uid = u->gid = (gid_t)-1L; /* Check if there is a group name */ - group = strchr(user_group, '.'); /* deprecated? */ - if (!group) - group = strchr(user_group, ':'); - else - *group = ':'; /* replace '.' with ':' */ + group = strchr(user_group, ':'); /* Parse "user[:[group]]" */ if (!group) { /* "user" */ -- cgit v1.2.3-55-g6feb From fb08d43d44d1fea1f741fafb9aa7e1958a5f69aa Mon Sep 17 00:00:00 2001 From: Natanael Copa Date: Mon, 20 May 2024 17:55:28 +0200 Subject: awk: fix use after free (CVE-2023-42363) function old new delta evaluate 3377 3385 +8 Fixes https://bugs.busybox.net/show_bug.cgi?id=15865 Signed-off-by: Natanael Copa Signed-off-by: Denys Vlasenko --- editors/awk.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/editors/awk.c b/editors/awk.c index 0981c6735..ff6d6350b 100644 --- a/editors/awk.c +++ b/editors/awk.c @@ -2981,19 +2981,14 @@ static var *evaluate(node *op, var *res) /* yes, remember where Fields[] is */ old_Fields_ptr = Fields; } - if (opinfo & OF_STR1) { - L.s = getvar_s(L.v); - debug_printf_eval("L.s:'%s'\n", L.s); - } if (opinfo & OF_NUM1) { L_d = getvar_i(L.v); debug_printf_eval("L_d:%f\n", L_d); } } - /* NB: Must get string/numeric values of L (done above) - * _before_ evaluate()'ing R.v: if both L and R are $NNNs, - * and right one is large, then L.v points to Fields[NNN1], - * second evaluate() reallocates and moves (!) Fields[], + /* NB: if both L and R are $NNNs, and right one is large, + * then at this pint L.v points to Fields[NNN1], second + * evaluate() below reallocates and moves (!) Fields[], * R.v points to Fields[NNN2] but L.v now points to freed mem! * (Seen trying to evaluate "$444 $44444") */ @@ -3013,6 +3008,16 @@ static var *evaluate(node *op, var *res) debug_printf_eval("R.s:'%s'\n", R.s); } } + /* Get L.s _after_ R.v is evaluated: it may have realloc'd L.v + * so we must get the string after "old_Fields_ptr" correction + * above. Testcase: x = (v = "abc", gsub("b", "X", v)); + */ + if (opinfo & OF_RES1) { + if (opinfo & OF_STR1) { + L.s = getvar_s(L.v); + debug_printf_eval("L.s:'%s'\n", L.s); + } + } debug_printf_eval("switch(0x%x)\n", XC(opinfo & OPCLSMASK)); switch (XC(opinfo & OPCLSMASK)) { -- cgit v1.2.3-55-g6feb From 49340d93edc778b193cb40b59cf94dbe38650013 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Tue, 9 Jul 2024 03:04:26 +0200 Subject: awk: do not infinitely recurse getvar_s() if CONVFMT is set to a numeric value function old new delta fmt_num 247 257 +10 evaluate 3385 3379 -6 getvar_s 111 102 -9 ------------------------------------------------------------------------------ (add/remove: 0/0 grow/shrink: 1/2 up/down: 10/-15) Total: -5 bytes Signed-off-by: Denys Vlasenko --- editors/awk.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/editors/awk.c b/editors/awk.c index ff6d6350b..8bc214b69 100644 --- a/editors/awk.c +++ b/editors/awk.c @@ -532,6 +532,7 @@ static const char vValues[] ALIGN1 = "%.6g\0" "%.6g\0" " \0" " \0" "\n\0" "\n\0" "\0" "\0" "\034\0" "\0" "\377"; +#define str_percent_dot_6g vValues /* hash size may grow to these values */ #define FIRST_PRIME 61 @@ -922,7 +923,7 @@ static double my_strtod_or_hexoct(char **pp) /* -------- working with variables (set/get/copy/etc) -------- */ -static void fmt_num(const char *format, double n) +static const char *fmt_num(const char *format, double n) { if (n == (long long)n) { snprintf(g_buf, MAXVARFMT, "%lld", (long long)n); @@ -939,6 +940,7 @@ static void fmt_num(const char *format, double n) syntax_error(EMSG_INV_FMT); } } + return g_buf; } static xhash *iamarray(var *a) @@ -1025,8 +1027,15 @@ static const char *getvar_s(var *v) { /* if v is numeric and has no cached string, convert it to string */ if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) { - fmt_num(getvar_s(intvar[CONVFMT]), v->number); - v->string = xstrdup(g_buf); + const char *convfmt = str_percent_dot_6g; /* "%.6g" */ + /* Get CONVFMT, unless we already recursed on it: + * someone might try to cause stack overflow by setting + * CONVFMT=9 (a numeric, not string, value) + */ + if (v != intvar[CONVFMT]) + convfmt = getvar_s(intvar[CONVFMT]); + /* Convert the value */ + v->string = xstrdup(fmt_num(convfmt, v->number)); v->type |= VF_CACHED; } return (v->string == NULL) ? "" : v->string; @@ -3097,9 +3106,8 @@ static var *evaluate(node *op, var *res) for (;;) { var *v = evaluate(nextarg(&op1), TMPVAR0); if (v->type & VF_NUMBER) { - fmt_num(getvar_s(intvar[OFMT]), - getvar_i(v)); - fputs(g_buf, F); + fputs(fmt_num(getvar_s(intvar[OFMT]), getvar_i(v)), + F); } else { fputs(getvar_s(v), F); } -- cgit v1.2.3-55-g6feb From 38335df9e9f45378c3407defd38b5b610578bdda Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Tue, 9 Jul 2024 15:30:46 +0200 Subject: awk: restore assignment precedence to be lower than ternary ?: Something is fishy with constrcts like "3==v=3" in gawk, they should not work, but do. Ignore those for now. Signed-off-by: Denys Vlasenko --- editors/awk.c | 65 ++++++++++++++++++++++++++++++++++++++++++++--------- testsuite/awk.tests | 31 ++++++++++++++++--------- 2 files changed, 74 insertions(+), 22 deletions(-) diff --git a/editors/awk.c b/editors/awk.c index 8bc214b69..697a44c8c 100644 --- a/editors/awk.c +++ b/editors/awk.c @@ -433,36 +433,47 @@ static const char tokenlist[] ALIGN1 = ; static const uint32_t tokeninfo[] ALIGN4 = { - 0, - 0, + 0, /* ( */ + 0, /* ) */ #define TI_REGEXP OC_REGEXP - TI_REGEXP, + TI_REGEXP, /* / */ + /* >> > | */ xS|'a', xS|'w', xS|'|', + /* ++ -- */ OC_UNARY|xV|P(9)|'p', OC_UNARY|xV|P(9)|'m', #define TI_PREINC (OC_UNARY|xV|P(9)|'P') #define TI_PREDEC (OC_UNARY|xV|P(9)|'M') + /* ++ -- $ */ TI_PREINC, TI_PREDEC, OC_FIELD|xV|P(5), - OC_COMPARE|VV|P(39)|5, OC_MOVE|VV|P(38), OC_REPLACE|NV|P(38)|'+', OC_REPLACE|NV|P(38)|'-', - OC_REPLACE|NV|P(38)|'*', OC_REPLACE|NV|P(38)|'/', OC_REPLACE|NV|P(38)|'%', OC_REPLACE|NV|P(38)|'&', - OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-', OC_REPLACE|NV|P(38)|'&', OC_BINARY|NV|P(15)|'&', + /* == = += -= */ + OC_COMPARE|VV|P(39)|5, OC_MOVE|VV|P(74), OC_REPLACE|NV|P(74)|'+', OC_REPLACE|NV|P(74)|'-', + /* *= /= %= ^= (^ is exponentiation, NOT xor) */ + OC_REPLACE|NV|P(74)|'*', OC_REPLACE|NV|P(74)|'/', OC_REPLACE|NV|P(74)|'%', OC_REPLACE|NV|P(74)|'&', + /* + - **= ** */ + OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-', OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&', + /* / % ^ * */ OC_BINARY|NV|P(25)|'/', OC_BINARY|NV|P(25)|'%', OC_BINARY|NV|P(15)|'&', OC_BINARY|NV|P(25)|'*', + /* != >= <= > */ OC_COMPARE|VV|P(39)|4, OC_COMPARE|VV|P(39)|3, OC_COMPARE|VV|P(39)|0, OC_COMPARE|VV|P(39)|1, #define TI_LESS (OC_COMPARE|VV|P(39)|2) + /* < !~ ~ && */ TI_LESS, OC_MATCH|Sx|P(45)|'!', OC_MATCH|Sx|P(45)|'~', OC_LAND|Vx|P(55), #define TI_TERNARY (OC_TERNARY|Vx|P(64)|'?') #define TI_COLON (OC_COLON|xx|P(67)|':') + /* || ? : */ OC_LOR|Vx|P(59), TI_TERNARY, TI_COLON, #define TI_IN (OC_IN|SV|P(49)) TI_IN, #define TI_COMMA (OC_COMMA|SS|P(80)) TI_COMMA, #define TI_PGETLINE (OC_PGETLINE|SV|P(37)) - TI_PGETLINE, + TI_PGETLINE, /* | */ + /* + - ! */ OC_UNARY|xV|P(19)|'+', OC_UNARY|xV|P(19)|'-', OC_UNARY|xV|P(19)|'!', 0, /* ] */ - 0, - 0, - 0, + 0, /* { */ + 0, /* } */ + 0, /* ; */ 0, /* \n */ ST_IF, ST_DO, ST_FOR, OC_BREAK, OC_CONTINUE, OC_DELETE|Rx, OC_PRINT, @@ -511,6 +522,38 @@ static const uint32_t tokeninfo[] ALIGN4 = { #undef OC_F }; +/* gawk 5.1.1 manpage says the precedence of comparisons and assignments are as follows: + * ...... + * < > <= >= == != + * ~ !~ + * in + * && + * || + * ?: + * = += -= *= /= %= ^= + * But there are some abnormalities: + * awk 'BEGIN { print v=3==3,v }' - ok: + * 1 1 + * awk 'BEGIN { print 3==v=3,v }' - wrong, (3==v)=3 is not a valid assignment: + * 1 3 + * This also unexpectedly works: echo "foo" | awk '$1==$1="foo" {print $1}' + * More than one comparison op fails to parse: + * awk 'BEGIN { print 3==3==3 }' - syntax error (wrong, should work) + * awk 'BEGIN { print 3==3!=3 }' - syntax error (wrong, should work) + * + * The ternary a?b:c works as follows in gawk: "a" can't be assignment + * ("= has lower precedence than ?") but inside "b" or "c", assignment + * is higher precedence: + * awk 'BEGIN { u=v=w=1; print u=0?v=4:w=5; print u,v,w }' + * 5 + * 5 1 5 + * This differs from C and shell's "test" rules for ?: which have implicit () + * around "b" in ?:, but not around "c" - they would barf on "w=5" above. + * gawk allows nesting of ?: - this works: + * u=0?v=4?5:6:w=7?8:9 means u=0?(v=4?5:6):(w=7?8:9) + * bbox is buggy here, requires parens: "u=0?(v=4):(w=5)" + */ + /* internal variable names and their initial values */ /* asterisk marks SPECIAL vars; $ is just no-named Field0 */ enum { @@ -1409,7 +1452,7 @@ static node *parse_expr(uint32_t term_tc) vn = vn->a.n; if (!vn->a.n) syntax_error(EMSG_UNEXP_TOKEN); } - if (t_info == TI_TERNARY) + if (t_info == TI_TERNARY) /* "?" operator */ //TODO: why? t_info += PRECEDENCE(6); cn = vn->a.n->r.n = new_node(t_info); diff --git a/testsuite/awk.tests b/testsuite/awk.tests index 063084a1c..be25f6696 100755 --- a/testsuite/awk.tests +++ b/testsuite/awk.tests @@ -5,6 +5,7 @@ . ./testing.sh +sq="'" # testing "description" "command" "result" "infile" "stdin" testing "awk -F case 0" "awk -F '[#]' '{ print NF }'" "" "" "" @@ -479,12 +480,6 @@ testing 'awk backslash+newline eaten with no trace' \ "Hello world\n" \ '' '' -testing 'awk assign while test' \ - "awk '\$1==\$1=\"foo\" {print \$1}'" \ - "foo\n" \ - "" \ - "foo" - # User-supplied bug (SEGV) example, was causing use-after-realloc testing 'awk assign while assign' \ "awk '\$5=\$\$5=\$0'; echo \$?" \ @@ -543,16 +538,30 @@ testing 'awk assign while assign' \ # If field separator FS=' ' (default), fields are split only on # space or tab or linefeed, NOT other whitespace. testing 'awk does not split on CR (char 13)' \ - "awk '{ \$1=\$0; print }'" \ + 'awk '$sq'{ $1=$0; print }'$sq \ 'word1 word2 word3\r word2 word3\r\n' \ '' 'word1 word2 word3\r' -testing "awk = has higher precedence than == (despite what gawk manpage claims)" \ - "awk 'BEGIN { v=1; print 2==v; print 2==v=2; print v; print v=3==3; print v}'" \ - '0\n1\n2\n1\n3\n' \ +# No, it seems a bug in gawk parser. +#testing "awk = has higher precedence than == (despite what gawk manpage claims)" \ +# "awk 'BEGIN { v=1; print 2==v; print 2==v=2; print v; print v=3==3; print v}'" \ +# '0\n1\n2\n1\n3\n' \ +# '' '' +# +#testing 'awk assign while test' \ +# 'awk '$sq'$1==$1="foo" {print $1}'$sq \ +# "foo\n" \ +# "" \ +# "foo" + +testing "awk = and ?: precedence" \ + 'awk '$sq'BEGIN { a=0?"bug":"ok"; print a}'$sq \ + 'ok\n' \ '' '' -sq="'" +# TODO: gawk can do this: awk 'BEGIN { u=v=w=1; print u=0?v=4:w=5; print u,v,w}' +# and even this: u=0?v=4?5:6:w=7?8:9 + testing 'awk gensub backslashes \' \ 'awk '$sq'BEGIN { s="\\"; print "s=" s; print gensub("a", s, "g", "a|a") }'$sq \ 's=\\ -- cgit v1.2.3-55-g6feb From 45d471d435a335b172724c53fff41957adb22885 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Tue, 9 Jul 2024 17:50:58 +0200 Subject: qwk: code shrink function old new delta mk_splitter 100 96 -4 as_regex 103 99 -4 parse_expr 991 986 -5 awk_split 544 538 -6 awk_getline 559 552 -7 ------------------------------------------------------------------------------ (add/remove: 0/0 grow/shrink: 0/5 up/down: 0/-26) Total: -26 bytes Signed-off-by: Denys Vlasenko --- editors/awk.c | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/editors/awk.c b/editors/awk.c index 697a44c8c..cf5173938 100644 --- a/editors/awk.c +++ b/editors/awk.c @@ -180,11 +180,11 @@ typedef struct node_s { var *v; int aidx; const char *new_progname; + /* if TI_REGEXP node, points to regex_t[2] array (case sensitive and insensitive) */ regex_t *re; } l; union { struct node_s *n; - regex_t *ire; func *f; } r; union { @@ -1399,7 +1399,6 @@ static void mk_re_node(const char *s, node *n, regex_t *re) { n->info = TI_REGEXP; n->l.re = re; - n->r.ire = re + 1; xregcomp(re, s, REG_EXTENDED); xregcomp(re + 1, s, REG_EXTENDED | REG_ICASE); } @@ -1412,13 +1411,13 @@ static node *parse_lrparen_list(void) return parse_expr(TC_RPAREN); } -/* parse expression terminated by given argument, return ptr +/* Parse expression terminated by given token, return ptr * to built subtree. Terminator is eaten by parse_expr */ static node *parse_expr(uint32_t term_tc) { node sn; node *cn = &sn; - node *glptr; + node *getline_node; uint32_t tc, expected_tc; debug_printf_parse("%s() term_tc(%x):", __func__, term_tc); @@ -1426,19 +1425,19 @@ static node *parse_expr(uint32_t term_tc) debug_printf_parse("\n"); sn.info = PRIMASK; - sn.r.n = sn.a.n = glptr = NULL; + sn.r.n = sn.a.n = getline_node = NULL; expected_tc = TS_OPERAND | TS_UOPPRE | TC_REGEXP | term_tc; while (!((tc = next_token(expected_tc)) & term_tc)) { node *vn; - if (glptr && (t_info == TI_LESS)) { - /* input redirection (<) attached to glptr node */ + if (getline_node && (t_info == TI_LESS)) { + /* Attach input redirection (<) to getline node */ debug_printf_parse("%s: input redir\n", __func__); - cn = glptr->l.n = new_node(OC_CONCAT | SS | PRECEDENCE(37)); - cn->a.n = glptr; + cn = getline_node->l.n = new_node(OC_CONCAT | SS | PRECEDENCE(37)); + cn->a.n = getline_node; expected_tc = TS_OPERAND | TS_UOPPRE; - glptr = NULL; + getline_node = NULL; continue; } if (tc & (TS_BINOP | TC_UOPPOST)) { @@ -1485,19 +1484,21 @@ static node *parse_expr(uint32_t term_tc) expected_tc = TS_OPERAND | TS_UOPPRE | TC_REGEXP; if (t_info == TI_PGETLINE) { - /* it's a pipe */ + /* it's a pipe token "|" */ next_token(TC_GETLINE); /* give maximum priority to this pipe */ cn->info &= ~PRIMASK; expected_tc = TS_OPERAND | TS_UOPPRE | TS_BINOP | term_tc; } } else { + /* It was an unary postfix operator */ cn->r.n = vn; expected_tc = TS_OPERAND | TS_UOPPRE | TS_BINOP | term_tc; } vn->a.n = cn; continue; } + /* It wasn't a binary or unary_postfix operator */ debug_printf_parse("%s: other, t_info:%x\n", __func__, t_info); /* for operands and prefix-unary operators, attach them @@ -1572,7 +1573,7 @@ static node *parse_expr(uint32_t term_tc) case TC_GETLINE: debug_printf_parse("%s: TC_GETLINE\n", __func__); - glptr = cn; + getline_node = cn; expected_tc = TS_OPERAND | TS_UOPPRE | TS_BINOP | term_tc; break; @@ -1944,15 +1945,14 @@ static void nvfree(var *v, int sz) static node *mk_splitter(const char *s, tsplitter *spl) { - regex_t *re, *ire; + regex_t *re; node *n; - re = &spl->re[0]; - ire = &spl->re[1]; + re = spl->re; n = &spl->n; if (n->info == TI_REGEXP) { regfree(re); - regfree(ire); // TODO: nuke ire, use re+1? + regfree(re + 1); } if (s[0] && s[1]) { /* strlen(s) > 1 */ mk_re_node(s, n, re); @@ -1975,7 +1975,7 @@ static regex_t *as_regex(node *op, regex_t *preg) const char *s; if (op->info == TI_REGEXP) { - return icase ? op->r.ire : op->l.re; + return &op->l.re[icase]; } //tmpvar = nvalloc(1); @@ -2093,7 +2093,7 @@ static int awk_split(const char *s, node *spl, char **slist) regmatch_t pmatch[1]; l = strcspn(s, c+2); /* len till next NUL or \n */ - if (regexec1_nonempty(icase ? spl->r.ire : spl->l.re, s, pmatch) == 0 + if (regexec1_nonempty(&spl->l.re[icase], s, pmatch) == 0 && pmatch[0].rm_so <= l ) { /* if (pmatch[0].rm_eo == 0) ... - impossible */ @@ -2348,7 +2348,7 @@ static int awk_getline(rstream *rsm, var *v) if (p > 0) { char c = (char) rsplitter.n.info; if (rsplitter.n.info == TI_REGEXP) { - if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re, + if (regexec(&rsplitter.n.l.re[icase], b, 1, pmatch, 0) == 0 ) { so = pmatch[0].rm_so; -- cgit v1.2.3-55-g6feb From 2eea3494f160da7640813bef1e276f806452148f Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Wed, 10 Jul 2024 06:58:51 +0200 Subject: awk: improve comments and constants, no code changes Signed-off-by: Denys Vlasenko --- editors/awk.c | 47 +++++++++++++++++++++++++++-------------------- 1 file changed, 27 insertions(+), 20 deletions(-) diff --git a/editors/awk.c b/editors/awk.c index cf5173938..d4491d3e7 100644 --- a/editors/awk.c +++ b/editors/awk.c @@ -307,13 +307,13 @@ static void debug_parse_print_tc(uint32_t n) | TC_LENGTH) #define TS_CONCAT_R (TS_OPERAND | TS_UOPPRE) -#define OF_RES1 0x010000 -#define OF_RES2 0x020000 -#define OF_STR1 0x040000 -#define OF_STR2 0x080000 -#define OF_NUM1 0x100000 -#define OF_CHECKED 0x200000 -#define OF_REQUIRED 0x400000 +#define OF_RES1 0x010000 /* evaluate(left_node) */ +#define OF_RES2 0x020000 /* evaluate(right_node) */ +#define OF_STR1 0x040000 /* ...and use its string value */ +#define OF_STR2 0x080000 /* ...and use its string value */ +#define OF_NUM1 0x100000 /* ...and use its numeric value */ +#define OF_REQUIRED 0x200000 /* left_node must not be NULL */ +#define OF_CHECKED 0x400000 /* range pattern flip-flop bit */ /* combined operator flags */ #define xx 0 @@ -331,17 +331,18 @@ static void debug_parse_print_tc(uint32_t n) #define OPCLSMASK 0xFF00 #define OPNMASK 0x007F -/* operator priority is a highest byte (even: r->l, odd: l->r grouping) - * (for builtins it has different meaning) +/* operator precedence is the highest byte (even: r->l, odd: l->r grouping) + * (for builtins the byte has a different meaning) */ #undef P #undef PRIMASK #undef PRIMASK2 +#define PRIMASK 0x7F000000 +#define PRIMASK2 0x7E000000 /* Smaller 'x' means _higher_ operator precedence */ #define PRECEDENCE(x) (x << 24) #define P(x) PRECEDENCE(x) -#define PRIMASK 0x7F000000 -#define PRIMASK2 0x7E000000 +#define LOWEST_PRECEDENCE PRIMASK /* Operation classes */ #define SHIFT_TIL_THIS 0x0600 @@ -1424,7 +1425,7 @@ static node *parse_expr(uint32_t term_tc) debug_parse_print_tc(term_tc); debug_printf_parse("\n"); - sn.info = PRIMASK; + sn.info = LOWEST_PRECEDENCE; sn.r.n = sn.a.n = getline_node = NULL; expected_tc = TS_OPERAND | TS_UOPPRE | TC_REGEXP | term_tc; @@ -1443,7 +1444,7 @@ static node *parse_expr(uint32_t term_tc) if (tc & (TS_BINOP | TC_UOPPOST)) { debug_printf_parse("%s: TS_BINOP | TC_UOPPOST tc:%x\n", __func__, tc); /* for binary and postfix-unary operators, jump back over - * previous operators with higher priority */ + * previous operators with higher precedence */ vn = cn; while (((t_info & PRIMASK) > (vn->a.n->info & PRIMASK2)) || (t_info == vn->info && t_info == TI_COLON) @@ -1451,7 +1452,7 @@ static node *parse_expr(uint32_t term_tc) vn = vn->a.n; if (!vn->a.n) syntax_error(EMSG_UNEXP_TOKEN); } - if (t_info == TI_TERNARY) /* "?" operator */ + if (t_info == TI_TERNARY) /* "?" token */ //TODO: why? t_info += PRECEDENCE(6); cn = vn->a.n->r.n = new_node(t_info); @@ -1483,11 +1484,10 @@ static node *parse_expr(uint32_t term_tc) } expected_tc = TS_OPERAND | TS_UOPPRE | TC_REGEXP; - if (t_info == TI_PGETLINE) { - /* it's a pipe token "|" */ - next_token(TC_GETLINE); - /* give maximum priority to this pipe */ - cn->info &= ~PRIMASK; + if (t_info == TI_PGETLINE) { /* "|" token */ + next_token(TC_GETLINE); /* must be folowed by "getline" */ + /* give maximum precedence to this pipe */ + cn->info &= ~PRIMASK; /* sets PRECEDENCE(0) */ expected_tc = TS_OPERAND | TS_UOPPRE | TS_BINOP | term_tc; } } else { @@ -1498,7 +1498,7 @@ static node *parse_expr(uint32_t term_tc) vn->a.n = cn; continue; } - /* It wasn't a binary or unary_postfix operator */ + /* It wasn't a binary or postfix-unary operator */ debug_printf_parse("%s: other, t_info:%x\n", __func__, t_info); /* for operands and prefix-unary operators, attach them @@ -1572,6 +1572,13 @@ static node *parse_expr(uint32_t term_tc) break; case TC_GETLINE: + /* "getline" is a function, not a statement. + * Works in gawk: + * r = ["SHELL CMD" | ] getline [VAR] [<"FILE"] + * if (getline <"FILE" < 0) print "Can't read FILE" + * while ("SHELL CMD" | getline > 0) ... + * Returns: 1 successful read, 0 EOF, -1 error (sets ERRNO) + */ debug_printf_parse("%s: TC_GETLINE\n", __func__); getline_node = cn; expected_tc = TS_OPERAND | TS_UOPPRE | TS_BINOP | term_tc; -- cgit v1.2.3-55-g6feb From 0a88a7ae3bb350d72712628078cf79517f627e99 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Wed, 10 Jul 2024 07:04:28 +0200 Subject: awk: mktime() with no arguments is not allowed It was SEGVing. Signed-off-by: Denys Vlasenko --- editors/awk.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/editors/awk.c b/editors/awk.c index d4491d3e7..64e752f4b 100644 --- a/editors/awk.c +++ b/editors/awk.c @@ -509,7 +509,7 @@ static const uint32_t tokeninfo[] ALIGN4 = { OC_F|F_rn, OC_F|F_si|Nx|Rx, OC_F|F_sq|Nx|Rx, OC_F|F_sr|Nx, // rand sin sqrt srand OC_B|B_ge|_s_vv_|A3,OC_B|B_gs|ss_vv_|A2,OC_B|B_ix|_ss_vv|A2, // gensub gsub index /*length was here*/ OC_B|B_ma|__s__v|A2,OC_B|B_sp|__s_vv|A2,OC_SPRINTF, OC_B|B_su|ss_vv_|A2,// match split sprintf sub - OC_B|B_ss|__svvv|A2,OC_F|F_ti, OC_B|B_ti|__s_vv, OC_B|B_mt|__s_vv, // substr systime strftime mktime + OC_B|B_ss|__svvv|A2,OC_F|F_ti, OC_B|B_ti|__s_vv, OC_B|B_mt|__s_vv|A1,// substr systime strftime mktime OC_B|B_lo|__s__v|A1,OC_B|B_up|__s__v|A1, // tolower toupper OC_F|F_le|Sx, // length OC_GETLINE|SV, // getline @@ -2870,7 +2870,6 @@ static NOINLINE var *exec_builtin(node *op, var *res) tt = getvar_i(av[1]); else time(&tt); - //s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y"; i = strftime(g_buf, MAXVARFMT, ((nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y"), localtime(&tt)); -- cgit v1.2.3-55-g6feb From ba0f94458bf992bb1f34c75e36960811d77b5f55 Mon Sep 17 00:00:00 2001 From: Ron Yorston Date: Thu, 2 May 2024 13:08:14 +0100 Subject: ash: fix parsing of alias expansion + bash features An alias expansion immediately followed by '<' and a newline is parsed incorrectly: ~ $ alias x='echo yo' ~ $ x< yo ~ $ sh: syntax error: unexpected newline The echo is executed and an error is printed on the next command submission. In dash the echo isn't executed and the error is reported immediately: $ alias x='echo yo' $ x< dash: 3: Syntax error: newline unexpected $ The difference between BusyBox and dash is that BusyBox supports bash-style process substitution and output redirection. These require checking for '<(', '>(' and '&>' in readtoken1(). In the case above, when the end of the alias is found, the '<' and the following newline are both read to check for '<('. Since there's no match both characters are pushed back. The next input is obtained by reading the expansion of the alias. Once this string is exhausted the next call to __pgetc() calls preadbuffer() which pops the string, reverts to the previous input and recursively calls __pgetc(). This request is satisified from the pungetc buffer. But the first __pgetc() doesn't know this: it sees the character has come from preadbuffer() so it (incorrectly) updates the pungetc buffer. Resolve the issue by moving the code to pop the string and fetch the next character up from preadbuffer() into __pgetc(). function old new delta pgetc 28 589 +561 __pgetc 607 - -607 ------------------------------------------------------------------------------ (add/remove: 0/1 grow/shrink: 1/0 up/down: 561/-607) Total: -46 bytes Signed-off-by: Ron Yorston Signed-off-by: Denys Vlasenko --- shell/ash.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/shell/ash.c b/shell/ash.c index 4ca4c6c56..5df0ba625 100644 --- a/shell/ash.c +++ b/shell/ash.c @@ -10934,11 +10934,6 @@ preadbuffer(void) char *q; int more; - if (unlikely(g_parsefile->strpush)) { - popstring(); - return __pgetc(); - } - if (g_parsefile->buf == NULL) { pgetc_debug("preadbuffer PEOF1"); return PEOF; @@ -11053,8 +11048,13 @@ static int __pgetc(void) if (--g_parsefile->left_in_line >= 0) c = (unsigned char)*g_parsefile->next_to_pgetc++; - else + else { + if (unlikely(g_parsefile->strpush)) { + popstring(); + return __pgetc(); + } c = preadbuffer(); + } g_parsefile->lastc[1] = g_parsefile->lastc[0]; g_parsefile->lastc[0] = c; -- cgit v1.2.3-55-g6feb From 87c31609f40c98203bbc9c30117a15a9342b2d4c Mon Sep 17 00:00:00 2001 From: Ron Yorston Date: Tue, 2 Jan 2024 09:01:32 +0000 Subject: Makefile.flags: suppress clang warnings when cross-compiling Extend the changes introduced by commit b4ef2e3467 (Makefile.flags: suppress some clang-9 warnings) so they also cover the case where clang is used as a cross-compiler. Signed-off-by: Ron Yorston Signed-off-by: Denys Vlasenko --- Makefile.flags | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Makefile.flags b/Makefile.flags index e4cd658fd..97cb4dca2 100644 --- a/Makefile.flags +++ b/Makefile.flags @@ -48,7 +48,7 @@ endif # gcc 3.x emits bogus "old style proto" warning on find.c:alloc_action() CFLAGS += $(call cc-ifversion, -ge, 0400, -Wold-style-definition) -ifneq ($(CC),clang) +ifneq ($(lastword $(subst -, ,$(CC))),clang) # "clang-9: warning: optimization flag '-finline-limit=0' is not supported CFLAGS += $(call cc-option,-finline-limit=0,) endif @@ -66,7 +66,7 @@ CFLAGS += $(call cc-option,-static-libgcc,) endif CFLAGS += $(call cc-option,-falign-functions=1,) -ifneq ($(CC),clang) +ifneq ($(lastword $(subst -, ,$(CC))),clang) # "clang-9: warning: optimization flag '-falign-jumps=1' is not supported" (and same for other two) CFLAGS += $(call cc-option,-falign-jumps=1 -falign-labels=1 -falign-loops=1,) endif @@ -79,7 +79,7 @@ CFLAGS += $(call cc-option,-fno-asynchronous-unwind-tables,) CFLAGS += $(call cc-option,-fno-builtin-printf,) # clang-9 does not like "str" + N and "if (CONFIG_ITEM && cond)" constructs -ifeq ($(CC),clang) +ifeq ($(lastword $(subst -, ,$(CC))),clang) CFLAGS += $(call cc-option,-Wno-string-plus-int -Wno-constant-logical-operand) endif -- cgit v1.2.3-55-g6feb From 0af28b84e58307422f807ddbdafc67a68f71eb64 Mon Sep 17 00:00:00 2001 From: Ron Yorston Date: Wed, 10 Jul 2024 07:35:26 +0200 Subject: ash: remove defunct control character to save a few bytes Commit 549deab5a (ash: move parse-time quote flag detection to run-time) did away with the need to distinguish between backquotes inside and outside quotes. This left a gap among the control characters used in argument strings. Removing this gap saves a few bytes. function old new delta .rodata 167346 167338 -8 cmdputs 399 388 -11 ------------------------------------------------------------------------------ (add/remove: 0/0 grow/shrink: 0/2 up/down: 0/-19) Total: -19 bytes Signed-off-by: Ron Yorston Signed-off-by: Denys Vlasenko --- shell/ash.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/shell/ash.c b/shell/ash.c index 5df0ba625..094a87390 100644 --- a/shell/ash.c +++ b/shell/ash.c @@ -835,13 +835,13 @@ out2str(const char *p) #define CTLVAR ((unsigned char)'\202') /* variable defn */ #define CTLENDVAR ((unsigned char)'\203') #define CTLBACKQ ((unsigned char)'\204') -#define CTLARI ((unsigned char)'\206') /* arithmetic expression */ -#define CTLENDARI ((unsigned char)'\207') -#define CTLQUOTEMARK ((unsigned char)'\210') +#define CTLARI ((unsigned char)'\205') /* arithmetic expression */ +#define CTLENDARI ((unsigned char)'\206') +#define CTLQUOTEMARK ((unsigned char)'\207') #define CTL_LAST CTLQUOTEMARK #if BASH_PROCESS_SUBST -# define CTLTOPROC ((unsigned char)'\211') -# define CTLFROMPROC ((unsigned char)'\212') +# define CTLTOPROC ((unsigned char)'\210') +# define CTLFROMPROC ((unsigned char)'\211') # undef CTL_LAST # define CTL_LAST CTLFROMPROC #endif @@ -3246,17 +3246,17 @@ static const uint8_t syntax_index_table[] ALIGN1 = { /* 130 CTLVAR */ CCTL_CCTL_CCTL_CCTL, /* 131 CTLENDVAR */ CCTL_CCTL_CCTL_CCTL, /* 132 CTLBACKQ */ CCTL_CCTL_CCTL_CCTL, - /* 133 CTLQUOTE */ CCTL_CCTL_CCTL_CCTL, - /* 134 CTLARI */ CCTL_CCTL_CCTL_CCTL, - /* 135 CTLENDARI */ CCTL_CCTL_CCTL_CCTL, - /* 136 CTLQUOTEMARK */ CCTL_CCTL_CCTL_CCTL, + /* 133 CTLARI */ CCTL_CCTL_CCTL_CCTL, + /* 134 CTLENDARI */ CCTL_CCTL_CCTL_CCTL, + /* 135 CTLQUOTEMARK */ CCTL_CCTL_CCTL_CCTL, #if BASH_PROCESS_SUBST - /* 137 CTLTOPROC */ CCTL_CCTL_CCTL_CCTL, - /* 138 CTLFROMPROC */ CCTL_CCTL_CCTL_CCTL, + /* 136 CTLTOPROC */ CCTL_CCTL_CCTL_CCTL, + /* 137 CTLFROMPROC */ CCTL_CCTL_CCTL_CCTL, #else + /* 136 */ CWORD_CWORD_CWORD_CWORD, /* 137 */ CWORD_CWORD_CWORD_CWORD, - /* 138 */ CWORD_CWORD_CWORD_CWORD, #endif + /* 138 */ CWORD_CWORD_CWORD_CWORD, /* 139 */ CWORD_CWORD_CWORD_CWORD, /* 140 */ CWORD_CWORD_CWORD_CWORD, /* 141 */ CWORD_CWORD_CWORD_CWORD, -- cgit v1.2.3-55-g6feb