aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDenys Vlasenko <vda.linux@googlemail.com>2023-06-03 00:39:33 +0200
committerDenys Vlasenko <vda.linux@googlemail.com>2023-06-03 00:42:10 +0200
commit5f84c5633663f6ee8c9cc3a4608b86d4b56b39d6 (patch)
treef3c3aa3267164310b55192a1a3b174523aa49dbe
parent0256e00a9d077588bd3a39f5a1ef7e2eaa2911e4 (diff)
downloadbusybox-w32-5f84c5633663f6ee8c9cc3a4608b86d4b56b39d6.tar.gz
busybox-w32-5f84c5633663f6ee8c9cc3a4608b86d4b56b39d6.tar.bz2
busybox-w32-5f84c5633663f6ee8c9cc3a4608b86d4b56b39d6.zip
awk: fix backslash handling in sub() builtins
function old new delta awk_sub 559 544 -15 Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
-rw-r--r--editors/awk.c41
-rwxr-xr-xtestsuite/awk.tests47
2 files changed, 66 insertions, 22 deletions
diff --git a/editors/awk.c b/editors/awk.c
index 0f062dcdb..f77573806 100644
--- a/editors/awk.c
+++ b/editors/awk.c
@@ -2492,7 +2492,7 @@ static char *awk_printf(node *n, size_t *len)
2492 * store result into (dest), return number of substitutions. 2492 * store result into (dest), return number of substitutions.
2493 * If nm = 0, replace all matches. 2493 * If nm = 0, replace all matches.
2494 * If src or dst is NULL, use $0. 2494 * If src or dst is NULL, use $0.
2495 * If subexp != 0, enable subexpression matching (\1-\9). 2495 * If subexp != 0, enable subexpression matching (\0-\9).
2496 */ 2496 */
2497static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int subexp) 2497static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int subexp)
2498{ 2498{
@@ -2520,35 +2520,32 @@ static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int
2520 residx += eo; 2520 residx += eo;
2521 if (++match_no >= nm) { 2521 if (++match_no >= nm) {
2522 const char *s; 2522 const char *s;
2523 int nbs; 2523 int bslash;
2524 2524
2525 /* replace */ 2525 /* replace */
2526 residx -= (eo - so); 2526 residx -= (eo - so);
2527 nbs = 0; 2527 bslash = 0;
2528 for (s = repl; *s; s++) { 2528 for (s = repl; *s; s++) {
2529 char c = resbuf[residx++] = *s; 2529 char c = *s;
2530 if (c == '\\') { 2530 if (c == '\\' && s[1]) {
2531 nbs++; 2531 bslash ^= 1;
2532 continue; 2532 if (bslash)
2533 continue;
2533 } 2534 }
2534 if (c == '&' || (subexp && c >= '0' && c <= '9')) { 2535 if ((!bslash && c == '&')
2535 int j; 2536 || (subexp && bslash && c >= '0' && c <= '9')
2536 residx -= ((nbs + 3) >> 1); 2537 ) {
2537 j = 0; 2538 int n, j = 0;
2538 if (c != '&') { 2539 if (c != '&') {
2539 j = c - '0'; 2540 j = c - '0';
2540 nbs++;
2541 } 2541 }
2542 if (nbs % 2) { 2542 n = pmatch[j].rm_eo - pmatch[j].rm_so;
2543 resbuf[residx++] = c; 2543 resbuf = qrealloc(resbuf, residx + replen + n, &resbufsize);
2544 } else { 2544 memcpy(resbuf + residx, sp + pmatch[j].rm_so, n);
2545 int n = pmatch[j].rm_eo - pmatch[j].rm_so; 2545 residx += n;
2546 resbuf = qrealloc(resbuf, residx + replen + n, &resbufsize); 2546 } else
2547 memcpy(resbuf + residx, sp + pmatch[j].rm_so, n); 2547 resbuf[residx++] = c;
2548 residx += n; 2548 bslash = 0;
2549 }
2550 }
2551 nbs = 0;
2552 } 2549 }
2553 } 2550 }
2554 2551
diff --git a/testsuite/awk.tests b/testsuite/awk.tests
index cdab93d21..c61d32947 100755
--- a/testsuite/awk.tests
+++ b/testsuite/awk.tests
@@ -552,4 +552,51 @@ testing "awk = has higher precedence than == (despite what gawk manpage claims)"
552 '0\n1\n2\n1\n3\n' \ 552 '0\n1\n2\n1\n3\n' \
553 '' '' 553 '' ''
554 554
555sq="'"
556testing 'awk gensub backslashes \' \
557 'awk '$sq'BEGIN { s="\\"; print "s=" s; print gensub("a", s, "g", "a|a") }'$sq \
558 's=\\
559\\|\\
560' \
561 '' ''
562testing 'awk gensub backslashes \\' \
563 'awk '$sq'BEGIN { s="\\\\"; print "s=" s; print gensub("a", s, "g", "a|a") }'$sq \
564 's=\\\\
565\\|\\
566' \
567 '' ''
568# gawk 5.1.1 handles trailing unpaired \ inconsistently.
569# If replace string is single \, it is used verbatim,
570# but if it is \\\ (three slashes), gawk uses "\<NUL>" (!!!), not "\\" as you would expect.
571testing 'awk gensub backslashes \\\' \
572 'awk '$sq'BEGIN { s="\\\\\\"; print "s=" s; print gensub("a", s, "g", "a|a") }'$sq \
573 's=\\\\\\
574\\\\|\\\\
575' \
576 '' ''
577testing 'awk gensub backslashes \\\\' \
578 'awk '$sq'BEGIN { s="\\\\\\\\"; print "s=" s; print gensub("a", s, "g", "a|a") }'$sq \
579 's=\\\\\\\\
580\\\\|\\\\
581' \
582 '' ''
583testing 'awk gensub backslashes \&' \
584 'awk '$sq'BEGIN { s="\\&"; print "s=" s; print gensub("a", s, "g", "a|a") }'$sq \
585 's=\\&
586&|&
587' \
588 '' ''
589testing 'awk gensub backslashes \0' \
590 'awk '$sq'BEGIN { s="\\0"; print "s=" s; print gensub("a", s, "g", "a|a") }'$sq \
591 's=\\0
592a|a
593' \
594 '' ''
595testing 'awk gensub backslashes \\0' \
596 'awk '$sq'BEGIN { s="\\\\0"; print "s=" s; print gensub("a", s, "g", "a|a") }'$sq \
597 's=\\\\0
598\\0|\\0
599' \
600 '' ''
601
555exit $FAILCOUNT 602exit $FAILCOUNT