aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDenys Vlasenko <vda.linux@googlemail.com>2024-07-09 15:30:46 +0200
committerDenys Vlasenko <vda.linux@googlemail.com>2024-07-09 15:30:46 +0200
commit38335df9e9f45378c3407defd38b5b610578bdda (patch)
treee543513248930c41a99b55cbdd6cdbb3532e1d66
parent49340d93edc778b193cb40b59cf94dbe38650013 (diff)
downloadbusybox-w32-38335df9e9f45378c3407defd38b5b610578bdda.tar.gz
busybox-w32-38335df9e9f45378c3407defd38b5b610578bdda.tar.bz2
busybox-w32-38335df9e9f45378c3407defd38b5b610578bdda.zip
awk: restore assignment precedence to be lower than ternary ?:
Something is fishy with constrcts like "3==v=3" in gawk, they should not work, but do. Ignore those for now. Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
-rw-r--r--editors/awk.c65
-rwxr-xr-xtestsuite/awk.tests31
2 files changed, 74 insertions, 22 deletions
diff --git a/editors/awk.c b/editors/awk.c
index 8bc214b69..697a44c8c 100644
--- a/editors/awk.c
+++ b/editors/awk.c
@@ -433,36 +433,47 @@ static const char tokenlist[] ALIGN1 =
433 ; 433 ;
434 434
435static const uint32_t tokeninfo[] ALIGN4 = { 435static const uint32_t tokeninfo[] ALIGN4 = {
436 0, 436 0, /* ( */
437 0, 437 0, /* ) */
438#define TI_REGEXP OC_REGEXP 438#define TI_REGEXP OC_REGEXP
439 TI_REGEXP, 439 TI_REGEXP, /* / */
440 /* >> > | */
440 xS|'a', xS|'w', xS|'|', 441 xS|'a', xS|'w', xS|'|',
442 /* ++ -- */
441 OC_UNARY|xV|P(9)|'p', OC_UNARY|xV|P(9)|'m', 443 OC_UNARY|xV|P(9)|'p', OC_UNARY|xV|P(9)|'m',
442#define TI_PREINC (OC_UNARY|xV|P(9)|'P') 444#define TI_PREINC (OC_UNARY|xV|P(9)|'P')
443#define TI_PREDEC (OC_UNARY|xV|P(9)|'M') 445#define TI_PREDEC (OC_UNARY|xV|P(9)|'M')
446 /* ++ -- $ */
444 TI_PREINC, TI_PREDEC, OC_FIELD|xV|P(5), 447 TI_PREINC, TI_PREDEC, OC_FIELD|xV|P(5),
445 OC_COMPARE|VV|P(39)|5, OC_MOVE|VV|P(38), OC_REPLACE|NV|P(38)|'+', OC_REPLACE|NV|P(38)|'-', 448 /* == = += -= */
446 OC_REPLACE|NV|P(38)|'*', OC_REPLACE|NV|P(38)|'/', OC_REPLACE|NV|P(38)|'%', OC_REPLACE|NV|P(38)|'&', 449 OC_COMPARE|VV|P(39)|5, OC_MOVE|VV|P(74), OC_REPLACE|NV|P(74)|'+', OC_REPLACE|NV|P(74)|'-',
447 OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-', OC_REPLACE|NV|P(38)|'&', OC_BINARY|NV|P(15)|'&', 450 /* *= /= %= ^= (^ is exponentiation, NOT xor) */
451 OC_REPLACE|NV|P(74)|'*', OC_REPLACE|NV|P(74)|'/', OC_REPLACE|NV|P(74)|'%', OC_REPLACE|NV|P(74)|'&',
452 /* + - **= ** */
453 OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-', OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&',
454 /* / % ^ * */
448 OC_BINARY|NV|P(25)|'/', OC_BINARY|NV|P(25)|'%', OC_BINARY|NV|P(15)|'&', OC_BINARY|NV|P(25)|'*', 455 OC_BINARY|NV|P(25)|'/', OC_BINARY|NV|P(25)|'%', OC_BINARY|NV|P(15)|'&', OC_BINARY|NV|P(25)|'*',
456 /* != >= <= > */
449 OC_COMPARE|VV|P(39)|4, OC_COMPARE|VV|P(39)|3, OC_COMPARE|VV|P(39)|0, OC_COMPARE|VV|P(39)|1, 457 OC_COMPARE|VV|P(39)|4, OC_COMPARE|VV|P(39)|3, OC_COMPARE|VV|P(39)|0, OC_COMPARE|VV|P(39)|1,
450#define TI_LESS (OC_COMPARE|VV|P(39)|2) 458#define TI_LESS (OC_COMPARE|VV|P(39)|2)
459 /* < !~ ~ && */
451 TI_LESS, OC_MATCH|Sx|P(45)|'!', OC_MATCH|Sx|P(45)|'~', OC_LAND|Vx|P(55), 460 TI_LESS, OC_MATCH|Sx|P(45)|'!', OC_MATCH|Sx|P(45)|'~', OC_LAND|Vx|P(55),
452#define TI_TERNARY (OC_TERNARY|Vx|P(64)|'?') 461#define TI_TERNARY (OC_TERNARY|Vx|P(64)|'?')
453#define TI_COLON (OC_COLON|xx|P(67)|':') 462#define TI_COLON (OC_COLON|xx|P(67)|':')
463 /* || ? : */
454 OC_LOR|Vx|P(59), TI_TERNARY, TI_COLON, 464 OC_LOR|Vx|P(59), TI_TERNARY, TI_COLON,
455#define TI_IN (OC_IN|SV|P(49)) 465#define TI_IN (OC_IN|SV|P(49))
456 TI_IN, 466 TI_IN,
457#define TI_COMMA (OC_COMMA|SS|P(80)) 467#define TI_COMMA (OC_COMMA|SS|P(80))
458 TI_COMMA, 468 TI_COMMA,
459#define TI_PGETLINE (OC_PGETLINE|SV|P(37)) 469#define TI_PGETLINE (OC_PGETLINE|SV|P(37))
460 TI_PGETLINE, 470 TI_PGETLINE, /* | */
471 /* + - ! */
461 OC_UNARY|xV|P(19)|'+', OC_UNARY|xV|P(19)|'-', OC_UNARY|xV|P(19)|'!', 472 OC_UNARY|xV|P(19)|'+', OC_UNARY|xV|P(19)|'-', OC_UNARY|xV|P(19)|'!',
462 0, /* ] */ 473 0, /* ] */
463 0, 474 0, /* { */
464 0, 475 0, /* } */
465 0, 476 0, /* ; */
466 0, /* \n */ 477 0, /* \n */
467 ST_IF, ST_DO, ST_FOR, OC_BREAK, 478 ST_IF, ST_DO, ST_FOR, OC_BREAK,
468 OC_CONTINUE, OC_DELETE|Rx, OC_PRINT, 479 OC_CONTINUE, OC_DELETE|Rx, OC_PRINT,
@@ -511,6 +522,38 @@ static const uint32_t tokeninfo[] ALIGN4 = {
511#undef OC_F 522#undef OC_F
512}; 523};
513 524
525/* gawk 5.1.1 manpage says the precedence of comparisons and assignments are as follows:
526 * ......
527 * < > <= >= == !=
528 * ~ !~
529 * in
530 * &&
531 * ||
532 * ?:
533 * = += -= *= /= %= ^=
534 * But there are some abnormalities:
535 * awk 'BEGIN { print v=3==3,v }' - ok:
536 * 1 1
537 * awk 'BEGIN { print 3==v=3,v }' - wrong, (3==v)=3 is not a valid assignment:
538 * 1 3
539 * This also unexpectedly works: echo "foo" | awk '$1==$1="foo" {print $1}'
540 * More than one comparison op fails to parse:
541 * awk 'BEGIN { print 3==3==3 }' - syntax error (wrong, should work)
542 * awk 'BEGIN { print 3==3!=3 }' - syntax error (wrong, should work)
543 *
544 * The ternary a?b:c works as follows in gawk: "a" can't be assignment
545 * ("= has lower precedence than ?") but inside "b" or "c", assignment
546 * is higher precedence:
547 * awk 'BEGIN { u=v=w=1; print u=0?v=4:w=5; print u,v,w }'
548 * 5
549 * 5 1 5
550 * This differs from C and shell's "test" rules for ?: which have implicit ()
551 * around "b" in ?:, but not around "c" - they would barf on "w=5" above.
552 * gawk allows nesting of ?: - this works:
553 * u=0?v=4?5:6:w=7?8:9 means u=0?(v=4?5:6):(w=7?8:9)
554 * bbox is buggy here, requires parens: "u=0?(v=4):(w=5)"
555 */
556
514/* internal variable names and their initial values */ 557/* internal variable names and their initial values */
515/* asterisk marks SPECIAL vars; $ is just no-named Field0 */ 558/* asterisk marks SPECIAL vars; $ is just no-named Field0 */
516enum { 559enum {
@@ -1409,7 +1452,7 @@ static node *parse_expr(uint32_t term_tc)
1409 vn = vn->a.n; 1452 vn = vn->a.n;
1410 if (!vn->a.n) syntax_error(EMSG_UNEXP_TOKEN); 1453 if (!vn->a.n) syntax_error(EMSG_UNEXP_TOKEN);
1411 } 1454 }
1412 if (t_info == TI_TERNARY) 1455 if (t_info == TI_TERNARY) /* "?" operator */
1413//TODO: why? 1456//TODO: why?
1414 t_info += PRECEDENCE(6); 1457 t_info += PRECEDENCE(6);
1415 cn = vn->a.n->r.n = new_node(t_info); 1458 cn = vn->a.n->r.n = new_node(t_info);
diff --git a/testsuite/awk.tests b/testsuite/awk.tests
index 063084a1c..be25f6696 100755
--- a/testsuite/awk.tests
+++ b/testsuite/awk.tests
@@ -5,6 +5,7 @@
5 5
6. ./testing.sh 6. ./testing.sh
7 7
8sq="'"
8# testing "description" "command" "result" "infile" "stdin" 9# testing "description" "command" "result" "infile" "stdin"
9 10
10testing "awk -F case 0" "awk -F '[#]' '{ print NF }'" "" "" "" 11testing "awk -F case 0" "awk -F '[#]' '{ print NF }'" "" "" ""
@@ -479,12 +480,6 @@ testing 'awk backslash+newline eaten with no trace' \
479 "Hello world\n" \ 480 "Hello world\n" \
480 '' '' 481 '' ''
481 482
482testing 'awk assign while test' \
483 "awk '\$1==\$1=\"foo\" {print \$1}'" \
484 "foo\n" \
485 "" \
486 "foo"
487
488# User-supplied bug (SEGV) example, was causing use-after-realloc 483# User-supplied bug (SEGV) example, was causing use-after-realloc
489testing 'awk assign while assign' \ 484testing 'awk assign while assign' \
490 "awk '\$5=\$\$5=\$0'; echo \$?" \ 485 "awk '\$5=\$\$5=\$0'; echo \$?" \
@@ -543,16 +538,30 @@ testing 'awk assign while assign' \
543# If field separator FS=' ' (default), fields are split only on 538# If field separator FS=' ' (default), fields are split only on
544# space or tab or linefeed, NOT other whitespace. 539# space or tab or linefeed, NOT other whitespace.
545testing 'awk does not split on CR (char 13)' \ 540testing 'awk does not split on CR (char 13)' \
546 "awk '{ \$1=\$0; print }'" \ 541 'awk '$sq'{ $1=$0; print }'$sq \
547 'word1 word2 word3\r word2 word3\r\n' \ 542 'word1 word2 word3\r word2 word3\r\n' \
548 '' 'word1 word2 word3\r' 543 '' 'word1 word2 word3\r'
549 544
550testing "awk = has higher precedence than == (despite what gawk manpage claims)" \ 545# No, it seems a bug in gawk parser.
551 "awk 'BEGIN { v=1; print 2==v; print 2==v=2; print v; print v=3==3; print v}'" \ 546#testing "awk = has higher precedence than == (despite what gawk manpage claims)" \
552 '0\n1\n2\n1\n3\n' \ 547# "awk 'BEGIN { v=1; print 2==v; print 2==v=2; print v; print v=3==3; print v}'" \
548# '0\n1\n2\n1\n3\n' \
549# '' ''
550#
551#testing 'awk assign while test' \
552# 'awk '$sq'$1==$1="foo" {print $1}'$sq \
553# "foo\n" \
554# "" \
555# "foo"
556
557testing "awk = and ?: precedence" \
558 'awk '$sq'BEGIN { a=0?"bug":"ok"; print a}'$sq \
559 'ok\n' \
553 '' '' 560 '' ''
554 561
555sq="'" 562# TODO: gawk can do this: awk 'BEGIN { u=v=w=1; print u=0?v=4:w=5; print u,v,w}'
563# and even this: u=0?v=4?5:6:w=7?8:9
564
556testing 'awk gensub backslashes \' \ 565testing 'awk gensub backslashes \' \
557 'awk '$sq'BEGIN { s="\\"; print "s=" s; print gensub("a", s, "g", "a|a") }'$sq \ 566 'awk '$sq'BEGIN { s="\\"; print "s=" s; print gensub("a", s, "g", "a|a") }'$sq \
558 's=\\ 567 's=\\