diff options
Diffstat (limited to 'editors')
-rw-r--r-- | editors/awk.c | 49 |
1 files changed, 36 insertions, 13 deletions
diff --git a/editors/awk.c b/editors/awk.c index df9b7fdc9..171f0a7ea 100644 --- a/editors/awk.c +++ b/editors/awk.c | |||
@@ -2504,17 +2504,46 @@ static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest /*,in | |||
2504 | regex_t sreg, *regex; | 2504 | regex_t sreg, *regex; |
2505 | /* True only if called to implement gensub(): */ | 2505 | /* True only if called to implement gensub(): */ |
2506 | int subexp = (src != dest); | 2506 | int subexp = (src != dest); |
2507 | 2507 | #if defined(REG_STARTEND) | |
2508 | const char *src_string; | ||
2509 | size_t src_strlen; | ||
2510 | regexec_flags = REG_STARTEND; | ||
2511 | #else | ||
2512 | regexec_flags = 0; | ||
2513 | #endif | ||
2508 | resbuf = NULL; | 2514 | resbuf = NULL; |
2509 | residx = 0; | 2515 | residx = 0; |
2510 | match_no = 0; | 2516 | match_no = 0; |
2511 | regexec_flags = 0; | ||
2512 | regex = as_regex(rn, &sreg); | 2517 | regex = as_regex(rn, &sreg); |
2513 | sp = getvar_s(src ? src : intvar[F0]); | 2518 | sp = getvar_s(src ? src : intvar[F0]); |
2519 | #if defined(REG_STARTEND) | ||
2520 | src_string = sp; | ||
2521 | src_strlen = strlen(src_string); | ||
2522 | #endif | ||
2514 | replen = strlen(repl); | 2523 | replen = strlen(repl); |
2515 | while (regexec(regex, sp, 10, pmatch, regexec_flags) == 0) { | 2524 | for (;;) { |
2516 | int so = pmatch[0].rm_so; | 2525 | int so, eo; |
2517 | int eo = pmatch[0].rm_eo; | 2526 | |
2527 | #if defined(REG_STARTEND) | ||
2528 | // REG_STARTEND: "This flag is a BSD extension, not present in POSIX" | ||
2529 | size_t start_ofs = sp - src_string; | ||
2530 | pmatch[0].rm_so = start_ofs; | ||
2531 | pmatch[0].rm_eo = src_strlen; | ||
2532 | if (regexec(regex, src_string, 10, pmatch, regexec_flags) != 0) | ||
2533 | break; | ||
2534 | eo = pmatch[0].rm_eo - start_ofs; | ||
2535 | so = pmatch[0].rm_so - start_ofs; | ||
2536 | #else | ||
2537 | // BUG: | ||
2538 | // gsub(/\<b*/,"") on "abc" matches empty string at "a...", | ||
2539 | // advances sp one char (see "Empty match" comment later) to "bc" | ||
2540 | // ... and erroneously matches "b" even though it is NOT at the word start. | ||
2541 | enum { start_ofs = 0 }; | ||
2542 | if (regexec(regex, sp, 10, pmatch, regexec_flags) != 0) | ||
2543 | break; | ||
2544 | so = pmatch[0].rm_so; | ||
2545 | eo = pmatch[0].rm_eo; | ||
2546 | #endif | ||
2518 | 2547 | ||
2519 | //bb_error_msg("match %u: [%u,%u] '%s'%p", match_no+1, so, eo, sp,sp); | 2548 | //bb_error_msg("match %u: [%u,%u] '%s'%p", match_no+1, so, eo, sp,sp); |
2520 | resbuf = qrealloc(resbuf, residx + eo + replen, &resbufsize); | 2549 | resbuf = qrealloc(resbuf, residx + eo + replen, &resbufsize); |
@@ -2543,7 +2572,7 @@ static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest /*,in | |||
2543 | } | 2572 | } |
2544 | n = pmatch[j].rm_eo - pmatch[j].rm_so; | 2573 | n = pmatch[j].rm_eo - pmatch[j].rm_so; |
2545 | resbuf = qrealloc(resbuf, residx + replen + n, &resbufsize); | 2574 | resbuf = qrealloc(resbuf, residx + replen + n, &resbufsize); |
2546 | memcpy(resbuf + residx, sp + pmatch[j].rm_so, n); | 2575 | memcpy(resbuf + residx, sp + pmatch[j].rm_so - start_ofs, n); |
2547 | residx += n; | 2576 | residx += n; |
2548 | } else | 2577 | } else |
2549 | resbuf[residx++] = c; | 2578 | resbuf[residx++] = c; |
@@ -2557,12 +2586,6 @@ static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest /*,in | |||
2557 | if (eo == so) { | 2586 | if (eo == so) { |
2558 | /* Empty match (e.g. "b*" will match anywhere). | 2587 | /* Empty match (e.g. "b*" will match anywhere). |
2559 | * Advance by one char. */ | 2588 | * Advance by one char. */ |
2560 | //BUG (bug 1333): | ||
2561 | //gsub(/\<b*/,"") on "abc" will reach this point, advance to "bc" | ||
2562 | //... and will erroneously match "b" even though it is NOT at the word start. | ||
2563 | //we need REG_NOTBOW but it does not exist... | ||
2564 | //TODO: if EXTRA_COMPAT=y, use GNU matching and re_search, | ||
2565 | //it should be able to do it correctly. | ||
2566 | /* Subtle: this is safe only because | 2589 | /* Subtle: this is safe only because |
2567 | * qrealloc allocated at least one extra byte */ | 2590 | * qrealloc allocated at least one extra byte */ |
2568 | resbuf[residx] = *sp; | 2591 | resbuf[residx] = *sp; |
@@ -2571,7 +2594,7 @@ static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest /*,in | |||
2571 | sp++; | 2594 | sp++; |
2572 | residx++; | 2595 | residx++; |
2573 | } | 2596 | } |
2574 | regexec_flags = REG_NOTBOL; | 2597 | regexec_flags |= REG_NOTBOL; |
2575 | } | 2598 | } |
2576 | 2599 | ||
2577 | resbuf = qrealloc(resbuf, residx + strlen(sp), &resbufsize); | 2600 | resbuf = qrealloc(resbuf, residx + strlen(sp), &resbufsize); |