aboutsummaryrefslogtreecommitdiff
path: root/editors
diff options
context:
space:
mode:
Diffstat (limited to 'editors')
-rw-r--r--editors/awk.c49
1 files changed, 36 insertions, 13 deletions
diff --git a/editors/awk.c b/editors/awk.c
index df9b7fdc9..171f0a7ea 100644
--- a/editors/awk.c
+++ b/editors/awk.c
@@ -2504,17 +2504,46 @@ static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest /*,in
2504 regex_t sreg, *regex; 2504 regex_t sreg, *regex;
2505 /* True only if called to implement gensub(): */ 2505 /* True only if called to implement gensub(): */
2506 int subexp = (src != dest); 2506 int subexp = (src != dest);
2507 2507#if defined(REG_STARTEND)
2508 const char *src_string;
2509 size_t src_strlen;
2510 regexec_flags = REG_STARTEND;
2511#else
2512 regexec_flags = 0;
2513#endif
2508 resbuf = NULL; 2514 resbuf = NULL;
2509 residx = 0; 2515 residx = 0;
2510 match_no = 0; 2516 match_no = 0;
2511 regexec_flags = 0;
2512 regex = as_regex(rn, &sreg); 2517 regex = as_regex(rn, &sreg);
2513 sp = getvar_s(src ? src : intvar[F0]); 2518 sp = getvar_s(src ? src : intvar[F0]);
2519#if defined(REG_STARTEND)
2520 src_string = sp;
2521 src_strlen = strlen(src_string);
2522#endif
2514 replen = strlen(repl); 2523 replen = strlen(repl);
2515 while (regexec(regex, sp, 10, pmatch, regexec_flags) == 0) { 2524 for (;;) {
2516 int so = pmatch[0].rm_so; 2525 int so, eo;
2517 int eo = pmatch[0].rm_eo; 2526
2527#if defined(REG_STARTEND)
2528// REG_STARTEND: "This flag is a BSD extension, not present in POSIX"
2529 size_t start_ofs = sp - src_string;
2530 pmatch[0].rm_so = start_ofs;
2531 pmatch[0].rm_eo = src_strlen;
2532 if (regexec(regex, src_string, 10, pmatch, regexec_flags) != 0)
2533 break;
2534 eo = pmatch[0].rm_eo - start_ofs;
2535 so = pmatch[0].rm_so - start_ofs;
2536#else
2537// BUG:
2538// gsub(/\<b*/,"") on "abc" matches empty string at "a...",
2539// advances sp one char (see "Empty match" comment later) to "bc"
2540// ... and erroneously matches "b" even though it is NOT at the word start.
2541 enum { start_ofs = 0 };
2542 if (regexec(regex, sp, 10, pmatch, regexec_flags) != 0)
2543 break;
2544 so = pmatch[0].rm_so;
2545 eo = pmatch[0].rm_eo;
2546#endif
2518 2547
2519 //bb_error_msg("match %u: [%u,%u] '%s'%p", match_no+1, so, eo, sp,sp); 2548 //bb_error_msg("match %u: [%u,%u] '%s'%p", match_no+1, so, eo, sp,sp);
2520 resbuf = qrealloc(resbuf, residx + eo + replen, &resbufsize); 2549 resbuf = qrealloc(resbuf, residx + eo + replen, &resbufsize);
@@ -2543,7 +2572,7 @@ static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest /*,in
2543 } 2572 }
2544 n = pmatch[j].rm_eo - pmatch[j].rm_so; 2573 n = pmatch[j].rm_eo - pmatch[j].rm_so;
2545 resbuf = qrealloc(resbuf, residx + replen + n, &resbufsize); 2574 resbuf = qrealloc(resbuf, residx + replen + n, &resbufsize);
2546 memcpy(resbuf + residx, sp + pmatch[j].rm_so, n); 2575 memcpy(resbuf + residx, sp + pmatch[j].rm_so - start_ofs, n);
2547 residx += n; 2576 residx += n;
2548 } else 2577 } else
2549 resbuf[residx++] = c; 2578 resbuf[residx++] = c;
@@ -2557,12 +2586,6 @@ static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest /*,in
2557 if (eo == so) { 2586 if (eo == so) {
2558 /* Empty match (e.g. "b*" will match anywhere). 2587 /* Empty match (e.g. "b*" will match anywhere).
2559 * Advance by one char. */ 2588 * Advance by one char. */
2560//BUG (bug 1333):
2561//gsub(/\<b*/,"") on "abc" will reach this point, advance to "bc"
2562//... and will erroneously match "b" even though it is NOT at the word start.
2563//we need REG_NOTBOW but it does not exist...
2564//TODO: if EXTRA_COMPAT=y, use GNU matching and re_search,
2565//it should be able to do it correctly.
2566 /* Subtle: this is safe only because 2589 /* Subtle: this is safe only because
2567 * qrealloc allocated at least one extra byte */ 2590 * qrealloc allocated at least one extra byte */
2568 resbuf[residx] = *sp; 2591 resbuf[residx] = *sp;
@@ -2571,7 +2594,7 @@ static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest /*,in
2571 sp++; 2594 sp++;
2572 residx++; 2595 residx++;
2573 } 2596 }
2574 regexec_flags = REG_NOTBOL; 2597 regexec_flags |= REG_NOTBOL;
2575 } 2598 }
2576 2599
2577 resbuf = qrealloc(resbuf, residx + strlen(sp), &resbufsize); 2600 resbuf = qrealloc(resbuf, residx + strlen(sp), &resbufsize);