diff options
author | Denys Vlasenko <vda.linux@googlemail.com> | 2024-07-09 15:30:46 +0200 |
---|---|---|
committer | Denys Vlasenko <vda.linux@googlemail.com> | 2024-07-09 15:30:46 +0200 |
commit | 38335df9e9f45378c3407defd38b5b610578bdda (patch) | |
tree | e543513248930c41a99b55cbdd6cdbb3532e1d66 | |
parent | 49340d93edc778b193cb40b59cf94dbe38650013 (diff) | |
download | busybox-w32-38335df9e9f45378c3407defd38b5b610578bdda.tar.gz busybox-w32-38335df9e9f45378c3407defd38b5b610578bdda.tar.bz2 busybox-w32-38335df9e9f45378c3407defd38b5b610578bdda.zip |
awk: restore assignment precedence to be lower than ternary ?:
Something is fishy with constrcts like "3==v=3" in gawk,
they should not work, but do. Ignore those for now.
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
-rw-r--r-- | editors/awk.c | 65 | ||||
-rwxr-xr-x | testsuite/awk.tests | 31 |
2 files changed, 74 insertions, 22 deletions
diff --git a/editors/awk.c b/editors/awk.c index 8bc214b69..697a44c8c 100644 --- a/editors/awk.c +++ b/editors/awk.c | |||
@@ -433,36 +433,47 @@ static const char tokenlist[] ALIGN1 = | |||
433 | ; | 433 | ; |
434 | 434 | ||
435 | static const uint32_t tokeninfo[] ALIGN4 = { | 435 | static const uint32_t tokeninfo[] ALIGN4 = { |
436 | 0, | 436 | 0, /* ( */ |
437 | 0, | 437 | 0, /* ) */ |
438 | #define TI_REGEXP OC_REGEXP | 438 | #define TI_REGEXP OC_REGEXP |
439 | TI_REGEXP, | 439 | TI_REGEXP, /* / */ |
440 | /* >> > | */ | ||
440 | xS|'a', xS|'w', xS|'|', | 441 | xS|'a', xS|'w', xS|'|', |
442 | /* ++ -- */ | ||
441 | OC_UNARY|xV|P(9)|'p', OC_UNARY|xV|P(9)|'m', | 443 | OC_UNARY|xV|P(9)|'p', OC_UNARY|xV|P(9)|'m', |
442 | #define TI_PREINC (OC_UNARY|xV|P(9)|'P') | 444 | #define TI_PREINC (OC_UNARY|xV|P(9)|'P') |
443 | #define TI_PREDEC (OC_UNARY|xV|P(9)|'M') | 445 | #define TI_PREDEC (OC_UNARY|xV|P(9)|'M') |
446 | /* ++ -- $ */ | ||
444 | TI_PREINC, TI_PREDEC, OC_FIELD|xV|P(5), | 447 | TI_PREINC, TI_PREDEC, OC_FIELD|xV|P(5), |
445 | OC_COMPARE|VV|P(39)|5, OC_MOVE|VV|P(38), OC_REPLACE|NV|P(38)|'+', OC_REPLACE|NV|P(38)|'-', | 448 | /* == = += -= */ |
446 | OC_REPLACE|NV|P(38)|'*', OC_REPLACE|NV|P(38)|'/', OC_REPLACE|NV|P(38)|'%', OC_REPLACE|NV|P(38)|'&', | 449 | OC_COMPARE|VV|P(39)|5, OC_MOVE|VV|P(74), OC_REPLACE|NV|P(74)|'+', OC_REPLACE|NV|P(74)|'-', |
447 | OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-', OC_REPLACE|NV|P(38)|'&', OC_BINARY|NV|P(15)|'&', | 450 | /* *= /= %= ^= (^ is exponentiation, NOT xor) */ |
451 | OC_REPLACE|NV|P(74)|'*', OC_REPLACE|NV|P(74)|'/', OC_REPLACE|NV|P(74)|'%', OC_REPLACE|NV|P(74)|'&', | ||
452 | /* + - **= ** */ | ||
453 | OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-', OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&', | ||
454 | /* / % ^ * */ | ||
448 | OC_BINARY|NV|P(25)|'/', OC_BINARY|NV|P(25)|'%', OC_BINARY|NV|P(15)|'&', OC_BINARY|NV|P(25)|'*', | 455 | OC_BINARY|NV|P(25)|'/', OC_BINARY|NV|P(25)|'%', OC_BINARY|NV|P(15)|'&', OC_BINARY|NV|P(25)|'*', |
456 | /* != >= <= > */ | ||
449 | OC_COMPARE|VV|P(39)|4, OC_COMPARE|VV|P(39)|3, OC_COMPARE|VV|P(39)|0, OC_COMPARE|VV|P(39)|1, | 457 | OC_COMPARE|VV|P(39)|4, OC_COMPARE|VV|P(39)|3, OC_COMPARE|VV|P(39)|0, OC_COMPARE|VV|P(39)|1, |
450 | #define TI_LESS (OC_COMPARE|VV|P(39)|2) | 458 | #define TI_LESS (OC_COMPARE|VV|P(39)|2) |
459 | /* < !~ ~ && */ | ||
451 | TI_LESS, OC_MATCH|Sx|P(45)|'!', OC_MATCH|Sx|P(45)|'~', OC_LAND|Vx|P(55), | 460 | TI_LESS, OC_MATCH|Sx|P(45)|'!', OC_MATCH|Sx|P(45)|'~', OC_LAND|Vx|P(55), |
452 | #define TI_TERNARY (OC_TERNARY|Vx|P(64)|'?') | 461 | #define TI_TERNARY (OC_TERNARY|Vx|P(64)|'?') |
453 | #define TI_COLON (OC_COLON|xx|P(67)|':') | 462 | #define TI_COLON (OC_COLON|xx|P(67)|':') |
463 | /* || ? : */ | ||
454 | OC_LOR|Vx|P(59), TI_TERNARY, TI_COLON, | 464 | OC_LOR|Vx|P(59), TI_TERNARY, TI_COLON, |
455 | #define TI_IN (OC_IN|SV|P(49)) | 465 | #define TI_IN (OC_IN|SV|P(49)) |
456 | TI_IN, | 466 | TI_IN, |
457 | #define TI_COMMA (OC_COMMA|SS|P(80)) | 467 | #define TI_COMMA (OC_COMMA|SS|P(80)) |
458 | TI_COMMA, | 468 | TI_COMMA, |
459 | #define TI_PGETLINE (OC_PGETLINE|SV|P(37)) | 469 | #define TI_PGETLINE (OC_PGETLINE|SV|P(37)) |
460 | TI_PGETLINE, | 470 | TI_PGETLINE, /* | */ |
471 | /* + - ! */ | ||
461 | OC_UNARY|xV|P(19)|'+', OC_UNARY|xV|P(19)|'-', OC_UNARY|xV|P(19)|'!', | 472 | OC_UNARY|xV|P(19)|'+', OC_UNARY|xV|P(19)|'-', OC_UNARY|xV|P(19)|'!', |
462 | 0, /* ] */ | 473 | 0, /* ] */ |
463 | 0, | 474 | 0, /* { */ |
464 | 0, | 475 | 0, /* } */ |
465 | 0, | 476 | 0, /* ; */ |
466 | 0, /* \n */ | 477 | 0, /* \n */ |
467 | ST_IF, ST_DO, ST_FOR, OC_BREAK, | 478 | ST_IF, ST_DO, ST_FOR, OC_BREAK, |
468 | OC_CONTINUE, OC_DELETE|Rx, OC_PRINT, | 479 | OC_CONTINUE, OC_DELETE|Rx, OC_PRINT, |
@@ -511,6 +522,38 @@ static const uint32_t tokeninfo[] ALIGN4 = { | |||
511 | #undef OC_F | 522 | #undef OC_F |
512 | }; | 523 | }; |
513 | 524 | ||
525 | /* gawk 5.1.1 manpage says the precedence of comparisons and assignments are as follows: | ||
526 | * ...... | ||
527 | * < > <= >= == != | ||
528 | * ~ !~ | ||
529 | * in | ||
530 | * && | ||
531 | * || | ||
532 | * ?: | ||
533 | * = += -= *= /= %= ^= | ||
534 | * But there are some abnormalities: | ||
535 | * awk 'BEGIN { print v=3==3,v }' - ok: | ||
536 | * 1 1 | ||
537 | * awk 'BEGIN { print 3==v=3,v }' - wrong, (3==v)=3 is not a valid assignment: | ||
538 | * 1 3 | ||
539 | * This also unexpectedly works: echo "foo" | awk '$1==$1="foo" {print $1}' | ||
540 | * More than one comparison op fails to parse: | ||
541 | * awk 'BEGIN { print 3==3==3 }' - syntax error (wrong, should work) | ||
542 | * awk 'BEGIN { print 3==3!=3 }' - syntax error (wrong, should work) | ||
543 | * | ||
544 | * The ternary a?b:c works as follows in gawk: "a" can't be assignment | ||
545 | * ("= has lower precedence than ?") but inside "b" or "c", assignment | ||
546 | * is higher precedence: | ||
547 | * awk 'BEGIN { u=v=w=1; print u=0?v=4:w=5; print u,v,w }' | ||
548 | * 5 | ||
549 | * 5 1 5 | ||
550 | * This differs from C and shell's "test" rules for ?: which have implicit () | ||
551 | * around "b" in ?:, but not around "c" - they would barf on "w=5" above. | ||
552 | * gawk allows nesting of ?: - this works: | ||
553 | * u=0?v=4?5:6:w=7?8:9 means u=0?(v=4?5:6):(w=7?8:9) | ||
554 | * bbox is buggy here, requires parens: "u=0?(v=4):(w=5)" | ||
555 | */ | ||
556 | |||
514 | /* internal variable names and their initial values */ | 557 | /* internal variable names and their initial values */ |
515 | /* asterisk marks SPECIAL vars; $ is just no-named Field0 */ | 558 | /* asterisk marks SPECIAL vars; $ is just no-named Field0 */ |
516 | enum { | 559 | enum { |
@@ -1409,7 +1452,7 @@ static node *parse_expr(uint32_t term_tc) | |||
1409 | vn = vn->a.n; | 1452 | vn = vn->a.n; |
1410 | if (!vn->a.n) syntax_error(EMSG_UNEXP_TOKEN); | 1453 | if (!vn->a.n) syntax_error(EMSG_UNEXP_TOKEN); |
1411 | } | 1454 | } |
1412 | if (t_info == TI_TERNARY) | 1455 | if (t_info == TI_TERNARY) /* "?" operator */ |
1413 | //TODO: why? | 1456 | //TODO: why? |
1414 | t_info += PRECEDENCE(6); | 1457 | t_info += PRECEDENCE(6); |
1415 | cn = vn->a.n->r.n = new_node(t_info); | 1458 | cn = vn->a.n->r.n = new_node(t_info); |
diff --git a/testsuite/awk.tests b/testsuite/awk.tests index 063084a1c..be25f6696 100755 --- a/testsuite/awk.tests +++ b/testsuite/awk.tests | |||
@@ -5,6 +5,7 @@ | |||
5 | 5 | ||
6 | . ./testing.sh | 6 | . ./testing.sh |
7 | 7 | ||
8 | sq="'" | ||
8 | # testing "description" "command" "result" "infile" "stdin" | 9 | # testing "description" "command" "result" "infile" "stdin" |
9 | 10 | ||
10 | testing "awk -F case 0" "awk -F '[#]' '{ print NF }'" "" "" "" | 11 | testing "awk -F case 0" "awk -F '[#]' '{ print NF }'" "" "" "" |
@@ -479,12 +480,6 @@ testing 'awk backslash+newline eaten with no trace' \ | |||
479 | "Hello world\n" \ | 480 | "Hello world\n" \ |
480 | '' '' | 481 | '' '' |
481 | 482 | ||
482 | testing 'awk assign while test' \ | ||
483 | "awk '\$1==\$1=\"foo\" {print \$1}'" \ | ||
484 | "foo\n" \ | ||
485 | "" \ | ||
486 | "foo" | ||
487 | |||
488 | # User-supplied bug (SEGV) example, was causing use-after-realloc | 483 | # User-supplied bug (SEGV) example, was causing use-after-realloc |
489 | testing 'awk assign while assign' \ | 484 | testing 'awk assign while assign' \ |
490 | "awk '\$5=\$\$5=\$0'; echo \$?" \ | 485 | "awk '\$5=\$\$5=\$0'; echo \$?" \ |
@@ -543,16 +538,30 @@ testing 'awk assign while assign' \ | |||
543 | # If field separator FS=' ' (default), fields are split only on | 538 | # If field separator FS=' ' (default), fields are split only on |
544 | # space or tab or linefeed, NOT other whitespace. | 539 | # space or tab or linefeed, NOT other whitespace. |
545 | testing 'awk does not split on CR (char 13)' \ | 540 | testing 'awk does not split on CR (char 13)' \ |
546 | "awk '{ \$1=\$0; print }'" \ | 541 | 'awk '$sq'{ $1=$0; print }'$sq \ |
547 | 'word1 word2 word3\r word2 word3\r\n' \ | 542 | 'word1 word2 word3\r word2 word3\r\n' \ |
548 | '' 'word1 word2 word3\r' | 543 | '' 'word1 word2 word3\r' |
549 | 544 | ||
550 | testing "awk = has higher precedence than == (despite what gawk manpage claims)" \ | 545 | # No, it seems a bug in gawk parser. |
551 | "awk 'BEGIN { v=1; print 2==v; print 2==v=2; print v; print v=3==3; print v}'" \ | 546 | #testing "awk = has higher precedence than == (despite what gawk manpage claims)" \ |
552 | '0\n1\n2\n1\n3\n' \ | 547 | # "awk 'BEGIN { v=1; print 2==v; print 2==v=2; print v; print v=3==3; print v}'" \ |
548 | # '0\n1\n2\n1\n3\n' \ | ||
549 | # '' '' | ||
550 | # | ||
551 | #testing 'awk assign while test' \ | ||
552 | # 'awk '$sq'$1==$1="foo" {print $1}'$sq \ | ||
553 | # "foo\n" \ | ||
554 | # "" \ | ||
555 | # "foo" | ||
556 | |||
557 | testing "awk = and ?: precedence" \ | ||
558 | 'awk '$sq'BEGIN { a=0?"bug":"ok"; print a}'$sq \ | ||
559 | 'ok\n' \ | ||
553 | '' '' | 560 | '' '' |
554 | 561 | ||
555 | sq="'" | 562 | # TODO: gawk can do this: awk 'BEGIN { u=v=w=1; print u=0?v=4:w=5; print u,v,w}' |
563 | # and even this: u=0?v=4?5:6:w=7?8:9 | ||
564 | |||
556 | testing 'awk gensub backslashes \' \ | 565 | testing 'awk gensub backslashes \' \ |
557 | 'awk '$sq'BEGIN { s="\\"; print "s=" s; print gensub("a", s, "g", "a|a") }'$sq \ | 566 | 'awk '$sq'BEGIN { s="\\"; print "s=" s; print gensub("a", s, "g", "a|a") }'$sq \ |
558 | 's=\\ | 567 | 's=\\ |