diff options
author | vda <vda@69ca8d6d-28ef-0310-b511-8ec308f3f277> | 2007-01-29 14:31:47 +0000 |
---|---|---|
committer | vda <vda@69ca8d6d-28ef-0310-b511-8ec308f3f277> | 2007-01-29 14:31:47 +0000 |
commit | 23620f1f7d2e58f9b44c1c9d0e57bd9d86937e6d (patch) | |
tree | 66e4a7f08b6696a8ac0b8839438e44a0314f4090 | |
parent | 938e84dc19a2ad67c3ee7260ed28ff0cefb2994b (diff) | |
download | busybox-w32-23620f1f7d2e58f9b44c1c9d0e57bd9d86937e6d.tar.gz busybox-w32-23620f1f7d2e58f9b44c1c9d0e57bd9d86937e6d.tar.bz2 busybox-w32-23620f1f7d2e58f9b44c1c9d0e57bd9d86937e6d.zip |
sed: fix 2 bugs (one testsuite entry + one newly found)
but more importantly make code more understandable
git-svn-id: svn://busybox.net/trunk/busybox@17632 69ca8d6d-28ef-0310-b511-8ec308f3f277
-rw-r--r-- | editors/sed.c | 118 | ||||
-rwxr-xr-x | testsuite/sed.tests | 4 |
2 files changed, 63 insertions, 59 deletions
diff --git a/editors/sed.c b/editors/sed.c index 28ee698e4..70be2e824 100644 --- a/editors/sed.c +++ b/editors/sed.c | |||
@@ -72,7 +72,7 @@ typedef struct sed_cmd_s { | |||
72 | int beg_line; /* 'sed 1p' 0 == apply commands to all lines */ | 72 | int beg_line; /* 'sed 1p' 0 == apply commands to all lines */ |
73 | int end_line; /* 'sed 1,3p' 0 == one line only. -1 = last line ($) */ | 73 | int end_line; /* 'sed 1,3p' 0 == one line only. -1 = last line ($) */ |
74 | 74 | ||
75 | FILE *file; /* File (sw) command writes to, -1 for none. */ | 75 | FILE *sw_file; /* File (sw) command writes to, -1 for none. */ |
76 | char *string; /* Data string for (saicytb) commands. */ | 76 | char *string; /* Data string for (saicytb) commands. */ |
77 | 77 | ||
78 | unsigned short which_match; /* (s) Which match to replace (0 for all) */ | 78 | unsigned short which_match; /* (s) Which match to replace (0 for all) */ |
@@ -82,7 +82,7 @@ typedef struct sed_cmd_s { | |||
82 | unsigned int in_match:1; /* Next line also included in match? */ | 82 | unsigned int in_match:1; /* Next line also included in match? */ |
83 | unsigned int sub_p:1; /* (s) print option */ | 83 | unsigned int sub_p:1; /* (s) print option */ |
84 | 84 | ||
85 | // int sw_last_char; /* Last line written by (sw) had no '\n' */ | 85 | char sw_last_char; /* Last line written by (sw) had no '\n' */ |
86 | 86 | ||
87 | /* GENERAL FIELDS */ | 87 | /* GENERAL FIELDS */ |
88 | char cmd; /* The command char: abcdDgGhHilnNpPqrstwxy:={} */ | 88 | char cmd; /* The command char: abcdDgGhHilnNpPqrstwxy:={} */ |
@@ -130,8 +130,8 @@ static void sed_free_and_close_stuff(void) | |||
130 | while (sed_cmd) { | 130 | while (sed_cmd) { |
131 | sed_cmd_t *sed_cmd_next = sed_cmd->next; | 131 | sed_cmd_t *sed_cmd_next = sed_cmd->next; |
132 | 132 | ||
133 | if (sed_cmd->file) | 133 | if (sed_cmd->sw_file) |
134 | xprint_and_close_file(sed_cmd->file); | 134 | xprint_and_close_file(sed_cmd->sw_file); |
135 | 135 | ||
136 | if (sed_cmd->beg_match) { | 136 | if (sed_cmd->beg_match) { |
137 | regfree(sed_cmd->beg_match); | 137 | regfree(sed_cmd->beg_match); |
@@ -423,7 +423,7 @@ static char *parse_cmd_args(sed_cmd_t *sed_cmd, char *cmdstr) | |||
423 | bb_error_msg_and_die("command only uses one address"); | 423 | bb_error_msg_and_die("command only uses one address"); |
424 | cmdstr += parse_file_cmd(sed_cmd, cmdstr, &sed_cmd->string); | 424 | cmdstr += parse_file_cmd(sed_cmd, cmdstr, &sed_cmd->string); |
425 | if (sed_cmd->cmd == 'w') | 425 | if (sed_cmd->cmd == 'w') |
426 | sed_cmd->file = xfopen(sed_cmd->string, "w"); | 426 | sed_cmd->sw_file = xfopen(sed_cmd->string, "w"); |
427 | /* handle branch commands */ | 427 | /* handle branch commands */ |
428 | } else if (strchr(":btT", sed_cmd->cmd)) { | 428 | } else if (strchr(":btT", sed_cmd->cmd)) { |
429 | int length; | 429 | int length; |
@@ -717,21 +717,22 @@ static void add_input_file(FILE *file) | |||
717 | bbg.input_file_list[bbg.input_file_count++] = file; | 717 | bbg.input_file_list[bbg.input_file_count++] = file; |
718 | } | 718 | } |
719 | 719 | ||
720 | enum { | ||
721 | MASK_NO_EOL_CHAR = 0x100, | ||
722 | //MASK_FIRST_LINE = 0x200, | ||
723 | }; | ||
724 | |||
725 | /* Get next line of input from bbg.input_file_list, flushing append buffer and | 720 | /* Get next line of input from bbg.input_file_list, flushing append buffer and |
726 | * noting if we ran out of files without a newline on the last line we read. | 721 | * noting if we ran out of files without a newline on the last line we read. |
727 | */ | 722 | */ |
728 | static char *get_next_line(int *last_char) | 723 | enum { |
724 | NO_EOL_CHAR = 1, | ||
725 | }; | ||
726 | static char *get_next_line(char *gets_char) | ||
729 | { | 727 | { |
730 | char *temp = NULL; | 728 | char *temp = NULL; |
731 | int len, lc; | 729 | int len, gc; |
732 | 730 | ||
733 | lc = 0; | ||
734 | flush_append(); | 731 | flush_append(); |
732 | |||
733 | /* will be returned if last line in the file | ||
734 | * doesn't end with either '\n' or '\0' */ | ||
735 | gc = NO_EOL_CHAR; | ||
735 | while (bbg.current_input_file < bbg.input_file_count) { | 736 | while (bbg.current_input_file < bbg.input_file_count) { |
736 | /* Read line up to a newline or NUL byte, inclusive, | 737 | /* Read line up to a newline or NUL byte, inclusive, |
737 | * return malloc'ed char[]. length of the chunk read | 738 | * return malloc'ed char[]. length of the chunk read |
@@ -743,26 +744,29 @@ static char *get_next_line(int *last_char) | |||
743 | char c = temp[len-1]; | 744 | char c = temp[len-1]; |
744 | if (c == '\n' || c == '\0') { | 745 | if (c == '\n' || c == '\0') { |
745 | temp[len-1] = '\0'; | 746 | temp[len-1] = '\0'; |
746 | lc |= (unsigned char)c; | 747 | gc = (unsigned char)c; |
747 | break; | 748 | break; |
748 | } | 749 | } |
749 | /* will be returned if last line in the file | 750 | /* NB: I had the idea of peeking next file(s) and returning |
750 | * doesn't end with either '\n' or '\0' */ | 751 | * NO_EOL_CHAR only if it is the *last* non-empty |
751 | lc |= MASK_NO_EOL_CHAR; | 752 | * input file. But there is a case where this won't work: |
753 | * file1: "a woo\nb woo" | ||
754 | * file2: "c no\nd no" | ||
755 | * sed -ne 's/woo/bang/p' input1 input2 => "a bang\nb bang" | ||
756 | * (note: *no* newline after "b bang"!) */ | ||
757 | |||
752 | break; | 758 | break; |
753 | } | 759 | } |
754 | /* Close this file and advance to next one */ | 760 | /* Close this file and advance to next one */ |
755 | fclose(bbg.input_file_list[bbg.current_input_file++]); | 761 | fclose(bbg.input_file_list[bbg.current_input_file++]); |
756 | /* "this is the first line from new input file" */ | ||
757 | //lc |= MASK_FIRST_LINE; | ||
758 | } | 762 | } |
759 | *last_char = lc; | 763 | *gets_char = gc; |
760 | return temp; | 764 | return temp; |
761 | } | 765 | } |
762 | 766 | ||
763 | /* Output line of text. */ | 767 | /* Output line of text. */ |
764 | /* Note: | 768 | /* Note: |
765 | * The tricks with MASK_FIRST_LINE and last_puts_char are there to emulate gnu sed. | 769 | * The tricks with NO_EOL_CHAR and last_puts_char are there to emulate gnu sed. |
766 | * Without them, we had this: | 770 | * Without them, we had this: |
767 | * echo -n thingy >z1 | 771 | * echo -n thingy >z1 |
768 | * echo -n again >z2 | 772 | * echo -n again >z2 |
@@ -774,37 +778,32 @@ static char *get_next_line(int *last_char) | |||
774 | * bbox: | 778 | * bbox: |
775 | * 00000000 74 68 7a 6e 67 79 61 67 61 7a 6e |thzngyagazn| | 779 | * 00000000 74 68 7a 6e 67 79 61 67 61 7a 6e |thzngyagazn| |
776 | */ | 780 | */ |
777 | static void puts_maybe_newline(char *s, FILE *file, int last_char) | 781 | static void puts_maybe_newline(char *s, FILE *file, char *last_puts_char, char last_gets_char) |
778 | { | 782 | { |
779 | static char last_puts_char = '\n'; | 783 | char lpc = *last_puts_char; |
780 | 784 | ||
781 | /* Is this a first line from new file | 785 | /* Is this a first line from new file |
782 | * and old file didn't end with '\n' or '\0'? */ | 786 | * and old file didn't end with '\n' or '\0'? */ |
783 | // if ((last_char & MASK_FIRST_LINE) && last_puts_char != '\n') { | 787 | if (lpc != '\n' && lpc != '\0') { |
784 | if (last_puts_char != '\n' && last_puts_char != '\0') { | ||
785 | fputc('\n', file); | 788 | fputc('\n', file); |
786 | last_puts_char = '\n'; | 789 | lpc = '\n'; |
787 | } | 790 | } |
788 | fputs(s, file); | 791 | fputs(s, file); |
789 | /* why 'x'? - just something which is not '\n' */ | 792 | /* 'x' - just something which is not '\n', '\0' or NO_EOL_CHAR */ |
790 | if (s[0]) | 793 | if (s[0]) |
791 | last_puts_char = 'x'; | 794 | lpc = 'x'; |
792 | if (!(last_char & MASK_NO_EOL_CHAR)) { /* had trailing '\n' or '\0'? */ | 795 | if (last_gets_char != NO_EOL_CHAR) { /* had trailing '\n' or '\0'? */ |
793 | last_char &= 0xff; | 796 | fputc(last_gets_char, file); |
794 | fputc(last_char, file); | 797 | lpc = last_gets_char; |
795 | last_puts_char = last_char; | ||
796 | } | 798 | } |
797 | |||
798 | if (ferror(file)) { | 799 | if (ferror(file)) { |
799 | xfunc_error_retval = 4; /* It's what gnu sed exits with... */ | 800 | xfunc_error_retval = 4; /* It's what gnu sed exits with... */ |
800 | bb_error_msg_and_die(bb_msg_write_error); | 801 | bb_error_msg_and_die(bb_msg_write_error); |
801 | } | 802 | } |
802 | 803 | *last_puts_char = lpc; | |
803 | /* Seems to be unused */ | ||
804 | /*return last_char;*/ | ||
805 | } | 804 | } |
806 | 805 | ||
807 | #define sed_puts(s, n) (puts_maybe_newline(s, bbg.nonstdout, n)) | 806 | #define sed_puts(s, n) (puts_maybe_newline(s, bbg.nonstdout, &last_puts_char, n)) |
808 | 807 | ||
809 | /* Process all the lines in all the files */ | 808 | /* Process all the lines in all the files */ |
810 | 809 | ||
@@ -812,12 +811,13 @@ static void process_files(void) | |||
812 | { | 811 | { |
813 | char *pattern_space, *next_line; | 812 | char *pattern_space, *next_line; |
814 | int linenum = 0; | 813 | int linenum = 0; |
815 | int last_char, next_last_char = 0; | 814 | char last_puts_char = '\n'; |
815 | char last_gets_char, next_gets_char; | ||
816 | sed_cmd_t *sed_cmd; | 816 | sed_cmd_t *sed_cmd; |
817 | int substituted; | 817 | int substituted; |
818 | 818 | ||
819 | /* Prime the pump */ | 819 | /* Prime the pump */ |
820 | next_line = get_next_line(&next_last_char); | 820 | next_line = get_next_line(&next_gets_char); |
821 | 821 | ||
822 | /* go through every line in each file */ | 822 | /* go through every line in each file */ |
823 | again: | 823 | again: |
@@ -826,11 +826,11 @@ again: | |||
826 | /* Advance to next line. Stop if out of lines. */ | 826 | /* Advance to next line. Stop if out of lines. */ |
827 | pattern_space = next_line; | 827 | pattern_space = next_line; |
828 | if (!pattern_space) return; | 828 | if (!pattern_space) return; |
829 | last_char = next_last_char; | 829 | last_gets_char = next_gets_char; |
830 | 830 | ||
831 | /* Read one line in advance so we can act on the last line, | 831 | /* Read one line in advance so we can act on the last line, |
832 | * the '$' address */ | 832 | * the '$' address */ |
833 | next_line = get_next_line(&next_last_char); | 833 | next_line = get_next_line(&next_gets_char); |
834 | linenum++; | 834 | linenum++; |
835 | restart: | 835 | restart: |
836 | /* for every line, go through all the commands */ | 836 | /* for every line, go through all the commands */ |
@@ -924,7 +924,7 @@ restart: | |||
924 | * (of current file) is printed. Even if | 924 | * (of current file) is printed. Even if |
925 | * that line is nonterminated, we print | 925 | * that line is nonterminated, we print |
926 | * '\n' here (gnu sed does the same) */ | 926 | * '\n' here (gnu sed does the same) */ |
927 | sed_puts(pattern_space, (last_char & 0x200) | '\n'); | 927 | sed_puts(pattern_space, '\n'); |
928 | break; | 928 | break; |
929 | /* Delete up through first newline */ | 929 | /* Delete up through first newline */ |
930 | case 'D': | 930 | case 'D': |
@@ -950,12 +950,12 @@ restart: | |||
950 | 950 | ||
951 | /* handle p option */ | 951 | /* handle p option */ |
952 | if (sed_cmd->sub_p) | 952 | if (sed_cmd->sub_p) |
953 | sed_puts(pattern_space, last_char); | 953 | sed_puts(pattern_space, last_gets_char); |
954 | /* handle w option */ | 954 | /* handle w option */ |
955 | if (sed_cmd->file) | 955 | if (sed_cmd->sw_file) |
956 | /*sed_cmd->sw_last_char =*/ puts_maybe_newline( | 956 | puts_maybe_newline( |
957 | pattern_space, sed_cmd->file, | 957 | pattern_space, sed_cmd->sw_file, |
958 | last_char); | 958 | &sed_cmd->sw_last_char, last_gets_char); |
959 | break; | 959 | break; |
960 | 960 | ||
961 | /* Append line to linked list to be printed later */ | 961 | /* Append line to linked list to be printed later */ |
@@ -972,7 +972,7 @@ restart: | |||
972 | case 'c': | 972 | case 'c': |
973 | /* Only triggers on last line of a matching range. */ | 973 | /* Only triggers on last line of a matching range. */ |
974 | if (!sed_cmd->in_match) | 974 | if (!sed_cmd->in_match) |
975 | sed_puts(sed_cmd->string, MASK_NO_EOL_CHAR); | 975 | sed_puts(sed_cmd->string, NO_EOL_CHAR); |
976 | goto discard_line; | 976 | goto discard_line; |
977 | 977 | ||
978 | /* Read file, append contents to output */ | 978 | /* Read file, append contents to output */ |
@@ -995,20 +995,20 @@ restart: | |||
995 | 995 | ||
996 | /* Write pattern space to file. */ | 996 | /* Write pattern space to file. */ |
997 | case 'w': | 997 | case 'w': |
998 | /*sed_cmd->sw_last_char =*/ puts_maybe_newline( | 998 | puts_maybe_newline( |
999 | pattern_space, sed_cmd->file, | 999 | pattern_space, sed_cmd->sw_file, |
1000 | last_char); | 1000 | &sed_cmd->sw_last_char, last_gets_char); |
1001 | break; | 1001 | break; |
1002 | 1002 | ||
1003 | /* Read next line from input */ | 1003 | /* Read next line from input */ |
1004 | case 'n': | 1004 | case 'n': |
1005 | if (!bbg.be_quiet) | 1005 | if (!bbg.be_quiet) |
1006 | sed_puts(pattern_space, last_char); | 1006 | sed_puts(pattern_space, last_gets_char); |
1007 | if (next_line) { | 1007 | if (next_line) { |
1008 | free(pattern_space); | 1008 | free(pattern_space); |
1009 | pattern_space = next_line; | 1009 | pattern_space = next_line; |
1010 | last_char = next_last_char; | 1010 | last_gets_char = next_gets_char; |
1011 | next_line = get_next_line(&next_last_char); | 1011 | next_line = get_next_line(&next_gets_char); |
1012 | linenum++; | 1012 | linenum++; |
1013 | break; | 1013 | break; |
1014 | } | 1014 | } |
@@ -1037,8 +1037,8 @@ restart: | |||
1037 | pattern_space = realloc(pattern_space, len + strlen(next_line) + 2); | 1037 | pattern_space = realloc(pattern_space, len + strlen(next_line) + 2); |
1038 | pattern_space[len] = '\n'; | 1038 | pattern_space[len] = '\n'; |
1039 | strcpy(pattern_space + len+1, next_line); | 1039 | strcpy(pattern_space + len+1, next_line); |
1040 | last_char = next_last_char; | 1040 | last_gets_char = next_gets_char; |
1041 | next_line = get_next_line(&next_last_char); | 1041 | next_line = get_next_line(&next_gets_char); |
1042 | linenum++; | 1042 | linenum++; |
1043 | break; | 1043 | break; |
1044 | } | 1044 | } |
@@ -1093,7 +1093,7 @@ restart: | |||
1093 | strcat(pattern_space, "\n"); | 1093 | strcat(pattern_space, "\n"); |
1094 | if (bbg.hold_space) | 1094 | if (bbg.hold_space) |
1095 | strcat(pattern_space, bbg.hold_space); | 1095 | strcat(pattern_space, bbg.hold_space); |
1096 | last_char = '\n'; | 1096 | last_gets_char = '\n'; |
1097 | 1097 | ||
1098 | break; | 1098 | break; |
1099 | } | 1099 | } |
@@ -1125,7 +1125,7 @@ restart: | |||
1125 | { | 1125 | { |
1126 | char *tmp = pattern_space; | 1126 | char *tmp = pattern_space; |
1127 | pattern_space = bbg.hold_space ? : xzalloc(1); | 1127 | pattern_space = bbg.hold_space ? : xzalloc(1); |
1128 | last_char = '\n'; | 1128 | last_gets_char = '\n'; |
1129 | bbg.hold_space = tmp; | 1129 | bbg.hold_space = tmp; |
1130 | break; | 1130 | break; |
1131 | } | 1131 | } |
@@ -1140,7 +1140,7 @@ restart: | |||
1140 | /* we will print the line unless we were told to be quiet ('-n') | 1140 | /* we will print the line unless we were told to be quiet ('-n') |
1141 | or if the line was suppressed (ala 'd'elete) */ | 1141 | or if the line was suppressed (ala 'd'elete) */ |
1142 | if (!bbg.be_quiet) | 1142 | if (!bbg.be_quiet) |
1143 | sed_puts(pattern_space, last_char); | 1143 | sed_puts(pattern_space, last_gets_char); |
1144 | 1144 | ||
1145 | /* Delete and such jump here. */ | 1145 | /* Delete and such jump here. */ |
1146 | discard_line: | 1146 | discard_line: |
diff --git a/testsuite/sed.tests b/testsuite/sed.tests index a386f1723..cc200703d 100755 --- a/testsuite/sed.tests +++ b/testsuite/sed.tests | |||
@@ -139,6 +139,10 @@ testing "sed selective matches noinsert newline" \ | |||
139 | testing "sed clusternewline" \ | 139 | testing "sed clusternewline" \ |
140 | "sed -e '/one/a 111' -e '/two/i 222' -e p input -" \ | 140 | "sed -e '/one/a 111' -e '/two/i 222' -e p input -" \ |
141 | "one\none\n111\n222\ntwo\ntwo" "one" "two" | 141 | "one\none\n111\n222\ntwo\ntwo" "one" "two" |
142 | testing "sed subst+write" \ | ||
143 | "sed -e 's/i/z/' -e 'woutputw' input -; echo -n X; cat outputw" \ | ||
144 | "thzngy\nagaznXthzngy\nagazn" "thingy" "again" | ||
145 | rm outputw | ||
142 | 146 | ||
143 | # Test end-of-file matching behavior | 147 | # Test end-of-file matching behavior |
144 | 148 | ||