aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDenys Vlasenko <vda.linux@googlemail.com>2012-06-04 14:44:47 +0200
committerDenys Vlasenko <vda.linux@googlemail.com>2012-06-04 14:44:47 +0200
commit21f6fbf545e7fa58f0eaa444001a9d25bc37c4eb (patch)
tree17be754928b225ce5412faf1cbe613189fee14cd
parent21f620f6e5f72c4cbecfecaf63a901c33911c00c (diff)
downloadbusybox-w32-21f6fbf545e7fa58f0eaa444001a9d25bc37c4eb.tar.gz
busybox-w32-21f6fbf545e7fa58f0eaa444001a9d25bc37c4eb.tar.bz2
busybox-w32-21f6fbf545e7fa58f0eaa444001a9d25bc37c4eb.zip
sed: fix zero chars match/replace
function old new delta process_files 2099 2181 +82 Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
-rw-r--r--editors/sed.c64
-rwxr-xr-xtestsuite/sed.tests10
2 files changed, 50 insertions, 24 deletions
diff --git a/editors/sed.c b/editors/sed.c
index a2df93165..87fc755eb 100644
--- a/editors/sed.c
+++ b/editors/sed.c
@@ -673,7 +673,7 @@ static void do_subst_w_backrefs(char *line, char *replace)
673 673
674 /* go through the replacement string */ 674 /* go through the replacement string */
675 for (i = 0; replace[i]; i++) { 675 for (i = 0; replace[i]; i++) {
676 /* if we find a backreference (\1, \2, etc.) print the backref'ed * text */ 676 /* if we find a backreference (\1, \2, etc.) print the backref'ed text */
677 if (replace[i] == '\\') { 677 if (replace[i] == '\\') {
678 unsigned backref = replace[++i] - '0'; 678 unsigned backref = replace[++i] - '0';
679 if (backref <= 9) { 679 if (backref <= 9) {
@@ -707,8 +707,10 @@ static void do_subst_w_backrefs(char *line, char *replace)
707static int do_subst_command(sed_cmd_t *sed_cmd, char **line_p) 707static int do_subst_command(sed_cmd_t *sed_cmd, char **line_p)
708{ 708{
709 char *line = *line_p; 709 char *line = *line_p;
710 int altered = 0;
711 unsigned match_count = 0; 710 unsigned match_count = 0;
711 bool altered = 0;
712 bool prev_match_empty = 1;
713 bool tried_at_eol = 0;
712 regex_t *current_regex; 714 regex_t *current_regex;
713 715
714 current_regex = sed_cmd->sub_match; 716 current_regex = sed_cmd->sub_match;
@@ -737,46 +739,64 @@ static int do_subst_command(sed_cmd_t *sed_cmd, char **line_p)
737 do { 739 do {
738 int i; 740 int i;
739 741
740 /* Work around bug in glibc regexec, demonstrated by:
741 * echo " a.b" | busybox sed 's [^ .]* x g'
742 * The match_count check is so not to break
743 * echo "hi" | busybox sed 's/^/!/g'
744 */
745 if (!G.regmatch[0].rm_so && !G.regmatch[0].rm_eo && match_count) {
746 pipe_putc(*line++);
747 goto next;
748 }
749
750 match_count++; 742 match_count++;
751 743
752 /* If we aren't interested in this match, output old line to 744 /* If we aren't interested in this match, output old line to
753 end of match and continue */ 745 * end of match and continue */
754 if (sed_cmd->which_match 746 if (sed_cmd->which_match
755 && (sed_cmd->which_match != match_count) 747 && (sed_cmd->which_match != match_count)
756 ) { 748 ) {
757 for (i = 0; i < G.regmatch[0].rm_eo; i++) 749 for (i = 0; i < G.regmatch[0].rm_eo; i++)
758 pipe_putc(*line++); 750 pipe_putc(*line++);
751 /* Null match? Print one more char */
752 if (G.regmatch[0].rm_so == i && *line)
753 pipe_putc(*line++);
759 goto next; 754 goto next;
760 } 755 }
761 756
762 /* print everything before the match */ 757 /* Print everything before the match */
763 for (i = 0; i < G.regmatch[0].rm_so; i++) 758 for (i = 0; i < G.regmatch[0].rm_so; i++)
764 pipe_putc(line[i]); 759 pipe_putc(line[i]);
765 760
766 /* then print the substitution string */ 761 /* Then print the substitution string,
767 do_subst_w_backrefs(line, sed_cmd->string); 762 * unless we just matched empty string after non-empty one.
763 * Example: string "cccd", pattern "c*", repl "R":
764 * result is "RdR", not "RRdR": first match "ccc",
765 * second is "" before "d", third is "" after "d".
766 * Second match is NOT replaced!
767 */
768 if (prev_match_empty || i != 0) {
769 dbg("inserting replacement at %d in '%s'", i, line);
770 do_subst_w_backrefs(line, sed_cmd->string);
771 } else {
772 dbg("NOT inserting replacement at %d in '%s'", i, line);
773 }
774
775 /* If matched string is empty (f.e. "c*" pattern),
776 * copy verbatim one char after it before attempting more matches
777 */
778 prev_match_empty = (G.regmatch[0].rm_eo == i);
779 if (prev_match_empty && line[i]) {
780 pipe_putc(line[i]);
781 G.regmatch[0].rm_eo++;
782 }
768 783
769 /* advance past the match */ 784 /* Advance past the match */
785 dbg("line += %d", G.regmatch[0].rm_eo);
770 line += G.regmatch[0].rm_eo; 786 line += G.regmatch[0].rm_eo;
771 /* flag that something has changed */ 787 /* Flag that something has changed */
772 altered++; 788 altered = 1;
773 789
774 /* if we're not doing this globally, get out now */ 790 /* if we're not doing this globally, get out now */
775 if (sed_cmd->which_match != 0) 791 if (sed_cmd->which_match != 0)
776 break; 792 break;
777 next: 793 next:
778 if (*line == '\0') 794 /* Exit if we are at EOL and already tried matching at it */
779 break; 795 if (*line == '\0') {
796 if (tried_at_eol)
797 break;
798 tried_at_eol = 1;
799 }
780 800
781//maybe (G.regmatch[0].rm_eo ? REG_NOTBOL : 0) instead of unconditional REG_NOTBOL? 801//maybe (G.regmatch[0].rm_eo ? REG_NOTBOL : 0) instead of unconditional REG_NOTBOL?
782 } while (regexec(current_regex, line, 10, G.regmatch, REG_NOTBOL) != REG_NOMATCH); 802 } while (regexec(current_regex, line, 10, G.regmatch, REG_NOTBOL) != REG_NOMATCH);
@@ -1127,7 +1147,7 @@ static void process_files(void)
1127 case 's': 1147 case 's':
1128 if (!do_subst_command(sed_cmd, &pattern_space)) 1148 if (!do_subst_command(sed_cmd, &pattern_space))
1129 break; 1149 break;
1130 dbg("do_subst_command succeeeded:'%s'", pattern_space); 1150 dbg("do_subst_command succeeded:'%s'", pattern_space);
1131 substituted |= 1; 1151 substituted |= 1;
1132 1152
1133 /* handle p option */ 1153 /* handle p option */
diff --git a/testsuite/sed.tests b/testsuite/sed.tests
index 9fa8e190c..375beb518 100755
--- a/testsuite/sed.tests
+++ b/testsuite/sed.tests
@@ -52,10 +52,8 @@ testing "sed with empty match" "sed 's/z*//g'" "string\n" "" "string\n"
52testing "sed s//p" "sed -e s/foo/bar/p -e s/bar/baz/p" "bar\nbaz\nbaz\n" \ 52testing "sed s//p" "sed -e s/foo/bar/p -e s/bar/baz/p" "bar\nbaz\nbaz\n" \
53 "" "foo\n" 53 "" "foo\n"
54testing "sed -n s//p" "sed -ne s/abc/def/p" "def\n" "" "abc\n" 54testing "sed -n s//p" "sed -ne s/abc/def/p" "def\n" "" "abc\n"
55test x"$SKIP_KNOWN_BUGS" = x"" && {
56testing "sed s//g (exhaustive)" "sed -e 's/[[:space:]]*/,/g'" ",1,2,3,4,5,\n" \ 55testing "sed s//g (exhaustive)" "sed -e 's/[[:space:]]*/,/g'" ",1,2,3,4,5,\n" \
57 "" "12345\n" 56 "" "12345\n"
58}
59testing "sed s arbitrary delimiter" "sed -e 's woo boing '" "boing\n" "" "woo\n" 57testing "sed s arbitrary delimiter" "sed -e 's woo boing '" "boing\n" "" "woo\n"
60testing "sed s chains" "sed -e s/foo/bar/ -e s/bar/baz/" "baz\n" "" "foo\n" 58testing "sed s chains" "sed -e s/foo/bar/ -e s/bar/baz/" "baz\n" "" "foo\n"
61testing "sed s chains2" "sed -e s/foo/bar/ -e s/baz/nee/" "bar\n" "" "foo\n" 59testing "sed s chains2" "sed -e s/foo/bar/ -e s/baz/nee/" "bar\n" "" "foo\n"
@@ -296,6 +294,14 @@ testing "sed -i finishes ranges correctly" \
296 "sed '1,2d' -i input; echo \$?; cat input" \ 294 "sed '1,2d' -i input; echo \$?; cat input" \
297 "0\n3\n4\n" "1\n2\n3\n4\n" "" 295 "0\n3\n4\n" "1\n2\n3\n4\n" ""
298 296
297testing "sed zero chars match/replace advances correctly 1" \
298 "sed 's/l*/@/g'" \
299 "@h@e@o@\n" "" "helllo\n"
300
301testing "sed zero chars match/replace advances correctly 2" \
302 "sed 's [^ .]* x g'" \
303 "x x.x\n" "" " a.b\n"
304
299# testing "description" "commands" "result" "infile" "stdin" 305# testing "description" "commands" "result" "infile" "stdin"
300 306
301exit $FAILCOUNT 307exit $FAILCOUNT