diff options
author | Denys Vlasenko <vda.linux@googlemail.com> | 2023-06-03 00:39:33 +0200 |
---|---|---|
committer | Denys Vlasenko <vda.linux@googlemail.com> | 2023-06-03 00:42:10 +0200 |
commit | 5f84c5633663f6ee8c9cc3a4608b86d4b56b39d6 (patch) | |
tree | f3c3aa3267164310b55192a1a3b174523aa49dbe | |
parent | 0256e00a9d077588bd3a39f5a1ef7e2eaa2911e4 (diff) | |
download | busybox-w32-5f84c5633663f6ee8c9cc3a4608b86d4b56b39d6.tar.gz busybox-w32-5f84c5633663f6ee8c9cc3a4608b86d4b56b39d6.tar.bz2 busybox-w32-5f84c5633663f6ee8c9cc3a4608b86d4b56b39d6.zip |
awk: fix backslash handling in sub() builtins
function old new delta
awk_sub 559 544 -15
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
-rw-r--r-- | editors/awk.c | 41 | ||||
-rwxr-xr-x | testsuite/awk.tests | 47 |
2 files changed, 66 insertions, 22 deletions
diff --git a/editors/awk.c b/editors/awk.c index 0f062dcdb..f77573806 100644 --- a/editors/awk.c +++ b/editors/awk.c | |||
@@ -2492,7 +2492,7 @@ static char *awk_printf(node *n, size_t *len) | |||
2492 | * store result into (dest), return number of substitutions. | 2492 | * store result into (dest), return number of substitutions. |
2493 | * If nm = 0, replace all matches. | 2493 | * If nm = 0, replace all matches. |
2494 | * If src or dst is NULL, use $0. | 2494 | * If src or dst is NULL, use $0. |
2495 | * If subexp != 0, enable subexpression matching (\1-\9). | 2495 | * If subexp != 0, enable subexpression matching (\0-\9). |
2496 | */ | 2496 | */ |
2497 | static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int subexp) | 2497 | static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int subexp) |
2498 | { | 2498 | { |
@@ -2520,35 +2520,32 @@ static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int | |||
2520 | residx += eo; | 2520 | residx += eo; |
2521 | if (++match_no >= nm) { | 2521 | if (++match_no >= nm) { |
2522 | const char *s; | 2522 | const char *s; |
2523 | int nbs; | 2523 | int bslash; |
2524 | 2524 | ||
2525 | /* replace */ | 2525 | /* replace */ |
2526 | residx -= (eo - so); | 2526 | residx -= (eo - so); |
2527 | nbs = 0; | 2527 | bslash = 0; |
2528 | for (s = repl; *s; s++) { | 2528 | for (s = repl; *s; s++) { |
2529 | char c = resbuf[residx++] = *s; | 2529 | char c = *s; |
2530 | if (c == '\\') { | 2530 | if (c == '\\' && s[1]) { |
2531 | nbs++; | 2531 | bslash ^= 1; |
2532 | continue; | 2532 | if (bslash) |
2533 | continue; | ||
2533 | } | 2534 | } |
2534 | if (c == '&' || (subexp && c >= '0' && c <= '9')) { | 2535 | if ((!bslash && c == '&') |
2535 | int j; | 2536 | || (subexp && bslash && c >= '0' && c <= '9') |
2536 | residx -= ((nbs + 3) >> 1); | 2537 | ) { |
2537 | j = 0; | 2538 | int n, j = 0; |
2538 | if (c != '&') { | 2539 | if (c != '&') { |
2539 | j = c - '0'; | 2540 | j = c - '0'; |
2540 | nbs++; | ||
2541 | } | 2541 | } |
2542 | if (nbs % 2) { | 2542 | n = pmatch[j].rm_eo - pmatch[j].rm_so; |
2543 | resbuf[residx++] = c; | 2543 | resbuf = qrealloc(resbuf, residx + replen + n, &resbufsize); |
2544 | } else { | 2544 | memcpy(resbuf + residx, sp + pmatch[j].rm_so, n); |
2545 | int n = pmatch[j].rm_eo - pmatch[j].rm_so; | 2545 | residx += n; |
2546 | resbuf = qrealloc(resbuf, residx + replen + n, &resbufsize); | 2546 | } else |
2547 | memcpy(resbuf + residx, sp + pmatch[j].rm_so, n); | 2547 | resbuf[residx++] = c; |
2548 | residx += n; | 2548 | bslash = 0; |
2549 | } | ||
2550 | } | ||
2551 | nbs = 0; | ||
2552 | } | 2549 | } |
2553 | } | 2550 | } |
2554 | 2551 | ||
diff --git a/testsuite/awk.tests b/testsuite/awk.tests index cdab93d21..c61d32947 100755 --- a/testsuite/awk.tests +++ b/testsuite/awk.tests | |||
@@ -552,4 +552,51 @@ testing "awk = has higher precedence than == (despite what gawk manpage claims)" | |||
552 | '0\n1\n2\n1\n3\n' \ | 552 | '0\n1\n2\n1\n3\n' \ |
553 | '' '' | 553 | '' '' |
554 | 554 | ||
555 | sq="'" | ||
556 | testing 'awk gensub backslashes \' \ | ||
557 | 'awk '$sq'BEGIN { s="\\"; print "s=" s; print gensub("a", s, "g", "a|a") }'$sq \ | ||
558 | 's=\\ | ||
559 | \\|\\ | ||
560 | ' \ | ||
561 | '' '' | ||
562 | testing 'awk gensub backslashes \\' \ | ||
563 | 'awk '$sq'BEGIN { s="\\\\"; print "s=" s; print gensub("a", s, "g", "a|a") }'$sq \ | ||
564 | 's=\\\\ | ||
565 | \\|\\ | ||
566 | ' \ | ||
567 | '' '' | ||
568 | # gawk 5.1.1 handles trailing unpaired \ inconsistently. | ||
569 | # If replace string is single \, it is used verbatim, | ||
570 | # but if it is \\\ (three slashes), gawk uses "\<NUL>" (!!!), not "\\" as you would expect. | ||
571 | testing 'awk gensub backslashes \\\' \ | ||
572 | 'awk '$sq'BEGIN { s="\\\\\\"; print "s=" s; print gensub("a", s, "g", "a|a") }'$sq \ | ||
573 | 's=\\\\\\ | ||
574 | \\\\|\\\\ | ||
575 | ' \ | ||
576 | '' '' | ||
577 | testing 'awk gensub backslashes \\\\' \ | ||
578 | 'awk '$sq'BEGIN { s="\\\\\\\\"; print "s=" s; print gensub("a", s, "g", "a|a") }'$sq \ | ||
579 | 's=\\\\\\\\ | ||
580 | \\\\|\\\\ | ||
581 | ' \ | ||
582 | '' '' | ||
583 | testing 'awk gensub backslashes \&' \ | ||
584 | 'awk '$sq'BEGIN { s="\\&"; print "s=" s; print gensub("a", s, "g", "a|a") }'$sq \ | ||
585 | 's=\\& | ||
586 | &|& | ||
587 | ' \ | ||
588 | '' '' | ||
589 | testing 'awk gensub backslashes \0' \ | ||
590 | 'awk '$sq'BEGIN { s="\\0"; print "s=" s; print gensub("a", s, "g", "a|a") }'$sq \ | ||
591 | 's=\\0 | ||
592 | a|a | ||
593 | ' \ | ||
594 | '' '' | ||
595 | testing 'awk gensub backslashes \\0' \ | ||
596 | 'awk '$sq'BEGIN { s="\\\\0"; print "s=" s; print gensub("a", s, "g", "a|a") }'$sq \ | ||
597 | 's=\\\\0 | ||
598 | \\0|\\0 | ||
599 | ' \ | ||
600 | '' '' | ||
601 | |||
555 | exit $FAILCOUNT | 602 | exit $FAILCOUNT |