diff options
-rw-r--r-- | editors/sed.c | 64 | ||||
-rwxr-xr-x | testsuite/sed.tests | 10 |
2 files changed, 50 insertions, 24 deletions
diff --git a/editors/sed.c b/editors/sed.c index a2df93165..87fc755eb 100644 --- a/editors/sed.c +++ b/editors/sed.c | |||
@@ -673,7 +673,7 @@ static void do_subst_w_backrefs(char *line, char *replace) | |||
673 | 673 | ||
674 | /* go through the replacement string */ | 674 | /* go through the replacement string */ |
675 | for (i = 0; replace[i]; i++) { | 675 | for (i = 0; replace[i]; i++) { |
676 | /* if we find a backreference (\1, \2, etc.) print the backref'ed * text */ | 676 | /* if we find a backreference (\1, \2, etc.) print the backref'ed text */ |
677 | if (replace[i] == '\\') { | 677 | if (replace[i] == '\\') { |
678 | unsigned backref = replace[++i] - '0'; | 678 | unsigned backref = replace[++i] - '0'; |
679 | if (backref <= 9) { | 679 | if (backref <= 9) { |
@@ -707,8 +707,10 @@ static void do_subst_w_backrefs(char *line, char *replace) | |||
707 | static int do_subst_command(sed_cmd_t *sed_cmd, char **line_p) | 707 | static int do_subst_command(sed_cmd_t *sed_cmd, char **line_p) |
708 | { | 708 | { |
709 | char *line = *line_p; | 709 | char *line = *line_p; |
710 | int altered = 0; | ||
711 | unsigned match_count = 0; | 710 | unsigned match_count = 0; |
711 | bool altered = 0; | ||
712 | bool prev_match_empty = 1; | ||
713 | bool tried_at_eol = 0; | ||
712 | regex_t *current_regex; | 714 | regex_t *current_regex; |
713 | 715 | ||
714 | current_regex = sed_cmd->sub_match; | 716 | current_regex = sed_cmd->sub_match; |
@@ -737,46 +739,64 @@ static int do_subst_command(sed_cmd_t *sed_cmd, char **line_p) | |||
737 | do { | 739 | do { |
738 | int i; | 740 | int i; |
739 | 741 | ||
740 | /* Work around bug in glibc regexec, demonstrated by: | ||
741 | * echo " a.b" | busybox sed 's [^ .]* x g' | ||
742 | * The match_count check is so not to break | ||
743 | * echo "hi" | busybox sed 's/^/!/g' | ||
744 | */ | ||
745 | if (!G.regmatch[0].rm_so && !G.regmatch[0].rm_eo && match_count) { | ||
746 | pipe_putc(*line++); | ||
747 | goto next; | ||
748 | } | ||
749 | |||
750 | match_count++; | 742 | match_count++; |
751 | 743 | ||
752 | /* If we aren't interested in this match, output old line to | 744 | /* If we aren't interested in this match, output old line to |
753 | end of match and continue */ | 745 | * end of match and continue */ |
754 | if (sed_cmd->which_match | 746 | if (sed_cmd->which_match |
755 | && (sed_cmd->which_match != match_count) | 747 | && (sed_cmd->which_match != match_count) |
756 | ) { | 748 | ) { |
757 | for (i = 0; i < G.regmatch[0].rm_eo; i++) | 749 | for (i = 0; i < G.regmatch[0].rm_eo; i++) |
758 | pipe_putc(*line++); | 750 | pipe_putc(*line++); |
751 | /* Null match? Print one more char */ | ||
752 | if (G.regmatch[0].rm_so == i && *line) | ||
753 | pipe_putc(*line++); | ||
759 | goto next; | 754 | goto next; |
760 | } | 755 | } |
761 | 756 | ||
762 | /* print everything before the match */ | 757 | /* Print everything before the match */ |
763 | for (i = 0; i < G.regmatch[0].rm_so; i++) | 758 | for (i = 0; i < G.regmatch[0].rm_so; i++) |
764 | pipe_putc(line[i]); | 759 | pipe_putc(line[i]); |
765 | 760 | ||
766 | /* then print the substitution string */ | 761 | /* Then print the substitution string, |
767 | do_subst_w_backrefs(line, sed_cmd->string); | 762 | * unless we just matched empty string after non-empty one. |
763 | * Example: string "cccd", pattern "c*", repl "R": | ||
764 | * result is "RdR", not "RRdR": first match "ccc", | ||
765 | * second is "" before "d", third is "" after "d". | ||
766 | * Second match is NOT replaced! | ||
767 | */ | ||
768 | if (prev_match_empty || i != 0) { | ||
769 | dbg("inserting replacement at %d in '%s'", i, line); | ||
770 | do_subst_w_backrefs(line, sed_cmd->string); | ||
771 | } else { | ||
772 | dbg("NOT inserting replacement at %d in '%s'", i, line); | ||
773 | } | ||
774 | |||
775 | /* If matched string is empty (f.e. "c*" pattern), | ||
776 | * copy verbatim one char after it before attempting more matches | ||
777 | */ | ||
778 | prev_match_empty = (G.regmatch[0].rm_eo == i); | ||
779 | if (prev_match_empty && line[i]) { | ||
780 | pipe_putc(line[i]); | ||
781 | G.regmatch[0].rm_eo++; | ||
782 | } | ||
768 | 783 | ||
769 | /* advance past the match */ | 784 | /* Advance past the match */ |
785 | dbg("line += %d", G.regmatch[0].rm_eo); | ||
770 | line += G.regmatch[0].rm_eo; | 786 | line += G.regmatch[0].rm_eo; |
771 | /* flag that something has changed */ | 787 | /* Flag that something has changed */ |
772 | altered++; | 788 | altered = 1; |
773 | 789 | ||
774 | /* if we're not doing this globally, get out now */ | 790 | /* if we're not doing this globally, get out now */ |
775 | if (sed_cmd->which_match != 0) | 791 | if (sed_cmd->which_match != 0) |
776 | break; | 792 | break; |
777 | next: | 793 | next: |
778 | if (*line == '\0') | 794 | /* Exit if we are at EOL and already tried matching at it */ |
779 | break; | 795 | if (*line == '\0') { |
796 | if (tried_at_eol) | ||
797 | break; | ||
798 | tried_at_eol = 1; | ||
799 | } | ||
780 | 800 | ||
781 | //maybe (G.regmatch[0].rm_eo ? REG_NOTBOL : 0) instead of unconditional REG_NOTBOL? | 801 | //maybe (G.regmatch[0].rm_eo ? REG_NOTBOL : 0) instead of unconditional REG_NOTBOL? |
782 | } while (regexec(current_regex, line, 10, G.regmatch, REG_NOTBOL) != REG_NOMATCH); | 802 | } while (regexec(current_regex, line, 10, G.regmatch, REG_NOTBOL) != REG_NOMATCH); |
@@ -1127,7 +1147,7 @@ static void process_files(void) | |||
1127 | case 's': | 1147 | case 's': |
1128 | if (!do_subst_command(sed_cmd, &pattern_space)) | 1148 | if (!do_subst_command(sed_cmd, &pattern_space)) |
1129 | break; | 1149 | break; |
1130 | dbg("do_subst_command succeeeded:'%s'", pattern_space); | 1150 | dbg("do_subst_command succeeded:'%s'", pattern_space); |
1131 | substituted |= 1; | 1151 | substituted |= 1; |
1132 | 1152 | ||
1133 | /* handle p option */ | 1153 | /* handle p option */ |
diff --git a/testsuite/sed.tests b/testsuite/sed.tests index 9fa8e190c..375beb518 100755 --- a/testsuite/sed.tests +++ b/testsuite/sed.tests | |||
@@ -52,10 +52,8 @@ testing "sed with empty match" "sed 's/z*//g'" "string\n" "" "string\n" | |||
52 | testing "sed s//p" "sed -e s/foo/bar/p -e s/bar/baz/p" "bar\nbaz\nbaz\n" \ | 52 | testing "sed s//p" "sed -e s/foo/bar/p -e s/bar/baz/p" "bar\nbaz\nbaz\n" \ |
53 | "" "foo\n" | 53 | "" "foo\n" |
54 | testing "sed -n s//p" "sed -ne s/abc/def/p" "def\n" "" "abc\n" | 54 | testing "sed -n s//p" "sed -ne s/abc/def/p" "def\n" "" "abc\n" |
55 | test x"$SKIP_KNOWN_BUGS" = x"" && { | ||
56 | testing "sed s//g (exhaustive)" "sed -e 's/[[:space:]]*/,/g'" ",1,2,3,4,5,\n" \ | 55 | testing "sed s//g (exhaustive)" "sed -e 's/[[:space:]]*/,/g'" ",1,2,3,4,5,\n" \ |
57 | "" "12345\n" | 56 | "" "12345\n" |
58 | } | ||
59 | testing "sed s arbitrary delimiter" "sed -e 's woo boing '" "boing\n" "" "woo\n" | 57 | testing "sed s arbitrary delimiter" "sed -e 's woo boing '" "boing\n" "" "woo\n" |
60 | testing "sed s chains" "sed -e s/foo/bar/ -e s/bar/baz/" "baz\n" "" "foo\n" | 58 | testing "sed s chains" "sed -e s/foo/bar/ -e s/bar/baz/" "baz\n" "" "foo\n" |
61 | testing "sed s chains2" "sed -e s/foo/bar/ -e s/baz/nee/" "bar\n" "" "foo\n" | 59 | testing "sed s chains2" "sed -e s/foo/bar/ -e s/baz/nee/" "bar\n" "" "foo\n" |
@@ -296,6 +294,14 @@ testing "sed -i finishes ranges correctly" \ | |||
296 | "sed '1,2d' -i input; echo \$?; cat input" \ | 294 | "sed '1,2d' -i input; echo \$?; cat input" \ |
297 | "0\n3\n4\n" "1\n2\n3\n4\n" "" | 295 | "0\n3\n4\n" "1\n2\n3\n4\n" "" |
298 | 296 | ||
297 | testing "sed zero chars match/replace advances correctly 1" \ | ||
298 | "sed 's/l*/@/g'" \ | ||
299 | "@h@e@o@\n" "" "helllo\n" | ||
300 | |||
301 | testing "sed zero chars match/replace advances correctly 2" \ | ||
302 | "sed 's [^ .]* x g'" \ | ||
303 | "x x.x\n" "" " a.b\n" | ||
304 | |||
299 | # testing "description" "commands" "result" "infile" "stdin" | 305 | # testing "description" "commands" "result" "infile" "stdin" |
300 | 306 | ||
301 | exit $FAILCOUNT | 307 | exit $FAILCOUNT |