diff options
author | Ron Yorston <rmy@pobox.com> | 2023-08-06 12:40:17 +0100 |
---|---|---|
committer | Ron Yorston <rmy@pobox.com> | 2023-08-06 12:40:17 +0100 |
commit | 0342f258f74bde329cc98733a3b22196ca1b1bdb (patch) | |
tree | 73a58db17072aef8d3ef26b32d50e45c58dd5167 | |
parent | 16a2532c519bac11d456f7c71f06d64385f66ed3 (diff) | |
download | busybox-w32-0342f258f74bde329cc98733a3b22196ca1b1bdb.tar.gz busybox-w32-0342f258f74bde329cc98733a3b22196ca1b1bdb.tar.bz2 busybox-w32-0342f258f74bde329cc98733a3b22196ca1b1bdb.zip |
diff: more changes to --binary
The changes introduced to support the --binary option gave incorrect
results when comparing files with CRLF line endings *without* the
--binary option present.
The code needs to keep track of the position within the file and is
confused by text mode.
As an alternative solution, always use binary mode but skip the CR
of a CRLF pair when the --binary option isn't used. This gives
results matching GNU diff when comparing files with matching line
endings, with or without --binary. When line endings differ the
results aren't always the same.
Costs 32 bytes in the 32-bit build, saves 16 in 64-bit.
(GitHub issue #348)
-rw-r--r-- | editors/diff.c | 25 | ||||
-rwxr-xr-x | testsuite/diff.tests | 63 |
2 files changed, 77 insertions, 11 deletions
diff --git a/editors/diff.c b/editors/diff.c index b6716455f..b324feaa5 100644 --- a/editors/diff.c +++ b/editors/diff.c | |||
@@ -157,7 +157,7 @@ enum { /* Commandline flags */ | |||
157 | FLAG_p, /* not implemented */ | 157 | FLAG_p, /* not implemented */ |
158 | FLAG_B, | 158 | FLAG_B, |
159 | FLAG_E, /* not implemented */ | 159 | FLAG_E, /* not implemented */ |
160 | #if ENABLE_PLATFORM_MINGW32 | 160 | #if ENABLE_PLATFORM_MINGW32 && ENABLE_FEATURE_DIFF_LONG_OPTIONS |
161 | FLAG_binary, | 161 | FLAG_binary, |
162 | #endif | 162 | #endif |
163 | }; | 163 | }; |
@@ -223,6 +223,9 @@ static int read_token(FILE_and_pos_t *ft, token_t tok) | |||
223 | int t; | 223 | int t; |
224 | 224 | ||
225 | t = fgetc(ft->ft_fp); | 225 | t = fgetc(ft->ft_fp); |
226 | #if ENABLE_PLATFORM_MINGW32 && ENABLE_FEATURE_DIFF_LONG_OPTIONS | ||
227 | newline: | ||
228 | #endif | ||
226 | if (t != EOF) | 229 | if (t != EOF) |
227 | ft->ft_pos++; | 230 | ft->ft_pos++; |
228 | is_space = (t == EOF || isspace(t)); | 231 | is_space = (t == EOF || isspace(t)); |
@@ -238,6 +241,16 @@ static int read_token(FILE_and_pos_t *ft, token_t tok) | |||
238 | 241 | ||
239 | if ((option_mask32 & FLAG(w)) && is_space) | 242 | if ((option_mask32 & FLAG(w)) && is_space) |
240 | continue; | 243 | continue; |
244 | #if ENABLE_PLATFORM_MINGW32 && ENABLE_FEATURE_DIFF_LONG_OPTIONS | ||
245 | if (!(option_mask32 & FLAG(binary)) && t == '\r') { | ||
246 | int t2 = fgetc(ft->ft_fp); | ||
247 | if (t2 == '\n') { | ||
248 | t = t2; | ||
249 | goto newline; | ||
250 | } | ||
251 | ungetc(t2, ft->ft_fp); | ||
252 | } | ||
253 | #endif | ||
241 | 254 | ||
242 | /* Trim char value to low 9 bits */ | 255 | /* Trim char value to low 9 bits */ |
243 | t &= CHAR_MASK; | 256 | t &= CHAR_MASK; |
@@ -718,7 +731,6 @@ static int diffreg(char *file[2]) | |||
718 | #if ENABLE_PLATFORM_MINGW32 | 731 | #if ENABLE_PLATFORM_MINGW32 |
719 | char *tmpfile[2] = { NULL, NULL }; | 732 | char *tmpfile[2] = { NULL, NULL }; |
720 | char *tmpdir; | 733 | char *tmpdir; |
721 | const char *mode; | ||
722 | #endif | 734 | #endif |
723 | 735 | ||
724 | fp[0] = stdin; | 736 | fp[0] = stdin; |
@@ -761,16 +773,7 @@ static int diffreg(char *file[2]) | |||
761 | fd = fd_tmp; | 773 | fd = fd_tmp; |
762 | xlseek(fd, 0, SEEK_SET); | 774 | xlseek(fd, 0, SEEK_SET); |
763 | } | 775 | } |
764 | #if ENABLE_PLATFORM_MINGW32 | ||
765 | mode = "r"; | ||
766 | if (!(option_mask32 & FLAG(binary))) { | ||
767 | _setmode(fd, _O_TEXT); | ||
768 | mode = "rt"; | ||
769 | } | ||
770 | fp[i] = fdopen(fd, mode); | ||
771 | #else | ||
772 | fp[i] = fdopen(fd, "r"); | 776 | fp[i] = fdopen(fd, "r"); |
773 | #endif | ||
774 | } | 777 | } |
775 | 778 | ||
776 | setup_common_bufsiz(); | 779 | setup_common_bufsiz(); |
diff --git a/testsuite/diff.tests b/testsuite/diff.tests index 0ced0f248..ee0567a80 100755 --- a/testsuite/diff.tests +++ b/testsuite/diff.tests | |||
@@ -123,6 +123,69 @@ testing "diff always takes context from old file" \ | |||
123 | "abc\na c\ndef\n" \ | 123 | "abc\na c\ndef\n" \ |
124 | "a c\n" | 124 | "a c\n" |
125 | 125 | ||
126 | optional PLATFORM_MINGW32 LONG_OPTS | ||
127 | testing "diff LF line endings" \ | ||
128 | 'diff -u - input' \ | ||
129 | "\ | ||
130 | --- - | ||
131 | +++ input | ||
132 | @@ -1,4 +1,4 @@ | ||
133 | a | ||
134 | b | ||
135 | +c | ||
136 | d | ||
137 | -e | ||
138 | " \ | ||
139 | "a\nb\nc\nd\n" \ | ||
140 | "a\nb\nd\ne\n" | ||
141 | |||
142 | testing "diff --binary LF line endings" \ | ||
143 | 'diff --binary -u - input' \ | ||
144 | "\ | ||
145 | --- - | ||
146 | +++ input | ||
147 | @@ -1,4 +1,4 @@ | ||
148 | a | ||
149 | b | ||
150 | +c | ||
151 | d | ||
152 | -e | ||
153 | " \ | ||
154 | "a\nb\nc\nd\n" \ | ||
155 | "a\nb\nd\ne\n" | ||
156 | |||
157 | testing "diff CRLF line endings" \ | ||
158 | 'diff -u - input' \ | ||
159 | "\ | ||
160 | --- - | ||
161 | +++ input | ||
162 | @@ -1,4 +1,4 @@ | ||
163 | a | ||
164 | b | ||
165 | +c | ||
166 | d | ||
167 | -e | ||
168 | " \ | ||
169 | "a\r\nb\r\nc\r\nd\r\n" \ | ||
170 | "a\r\nb\r\nd\r\ne\r\n" | ||
171 | |||
172 | testing "diff --binary CRLF line endings" \ | ||
173 | 'diff --binary -u - input' \ | ||
174 | "\ | ||
175 | --- - | ||
176 | +++ input | ||
177 | @@ -1,4 +1,4 @@ | ||
178 | a | ||
179 | b | ||
180 | +c | ||
181 | d | ||
182 | -e | ||
183 | " \ | ||
184 | "a\r\nb\r\nc\r\nd\r\n" \ | ||
185 | "a\r\nb\r\nd\r\ne\r\n" | ||
186 | |||
187 | SKIP= | ||
188 | |||
126 | # testing "test name" "commands" "expected result" "file input" "stdin" | 189 | # testing "test name" "commands" "expected result" "file input" "stdin" |
127 | 190 | ||
128 | # clean up | 191 | # clean up |