From 0342f258f74bde329cc98733a3b22196ca1b1bdb Mon Sep 17 00:00:00 2001 From: Ron Yorston Date: Sun, 6 Aug 2023 12:40:17 +0100 Subject: diff: more changes to --binary The changes introduced to support the --binary option gave incorrect results when comparing files with CRLF line endings *without* the --binary option present. The code needs to keep track of the position within the file and is confused by text mode. As an alternative solution, always use binary mode but skip the CR of a CRLF pair when the --binary option isn't used. This gives results matching GNU diff when comparing files with matching line endings, with or without --binary. When line endings differ the results aren't always the same. Costs 32 bytes in the 32-bit build, saves 16 in 64-bit. (GitHub issue #348) --- editors/diff.c | 25 ++++++++++++--------- testsuite/diff.tests | 63 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 77 insertions(+), 11 deletions(-) diff --git a/editors/diff.c b/editors/diff.c index b6716455f..b324feaa5 100644 --- a/editors/diff.c +++ b/editors/diff.c @@ -157,7 +157,7 @@ enum { /* Commandline flags */ FLAG_p, /* not implemented */ FLAG_B, FLAG_E, /* not implemented */ -#if ENABLE_PLATFORM_MINGW32 +#if ENABLE_PLATFORM_MINGW32 && ENABLE_FEATURE_DIFF_LONG_OPTIONS FLAG_binary, #endif }; @@ -223,6 +223,9 @@ static int read_token(FILE_and_pos_t *ft, token_t tok) int t; t = fgetc(ft->ft_fp); +#if ENABLE_PLATFORM_MINGW32 && ENABLE_FEATURE_DIFF_LONG_OPTIONS + newline: +#endif if (t != EOF) ft->ft_pos++; is_space = (t == EOF || isspace(t)); @@ -238,6 +241,16 @@ static int read_token(FILE_and_pos_t *ft, token_t tok) if ((option_mask32 & FLAG(w)) && is_space) continue; +#if ENABLE_PLATFORM_MINGW32 && ENABLE_FEATURE_DIFF_LONG_OPTIONS + if (!(option_mask32 & FLAG(binary)) && t == '\r') { + int t2 = fgetc(ft->ft_fp); + if (t2 == '\n') { + t = t2; + goto newline; + } + ungetc(t2, ft->ft_fp); + } +#endif /* Trim char value to low 9 bits */ t &= CHAR_MASK; @@ -718,7 +731,6 @@ static int diffreg(char *file[2]) #if ENABLE_PLATFORM_MINGW32 char *tmpfile[2] = { NULL, NULL }; char *tmpdir; - const char *mode; #endif fp[0] = stdin; @@ -761,16 +773,7 @@ static int diffreg(char *file[2]) fd = fd_tmp; xlseek(fd, 0, SEEK_SET); } -#if ENABLE_PLATFORM_MINGW32 - mode = "r"; - if (!(option_mask32 & FLAG(binary))) { - _setmode(fd, _O_TEXT); - mode = "rt"; - } - fp[i] = fdopen(fd, mode); -#else fp[i] = fdopen(fd, "r"); -#endif } setup_common_bufsiz(); diff --git a/testsuite/diff.tests b/testsuite/diff.tests index 0ced0f248..ee0567a80 100755 --- a/testsuite/diff.tests +++ b/testsuite/diff.tests @@ -123,6 +123,69 @@ testing "diff always takes context from old file" \ "abc\na c\ndef\n" \ "a c\n" +optional PLATFORM_MINGW32 LONG_OPTS +testing "diff LF line endings" \ + 'diff -u - input' \ +"\ +--- - ++++ input +@@ -1,4 +1,4 @@ + a + b ++c + d +-e +" \ + "a\nb\nc\nd\n" \ + "a\nb\nd\ne\n" + +testing "diff --binary LF line endings" \ + 'diff --binary -u - input' \ +"\ +--- - ++++ input +@@ -1,4 +1,4 @@ + a + b ++c + d +-e +" \ + "a\nb\nc\nd\n" \ + "a\nb\nd\ne\n" + +testing "diff CRLF line endings" \ + 'diff -u - input' \ +"\ +--- - ++++ input +@@ -1,4 +1,4 @@ + a + b ++c + d +-e +" \ + "a\r\nb\r\nc\r\nd\r\n" \ + "a\r\nb\r\nd\r\ne\r\n" + +testing "diff --binary CRLF line endings" \ + 'diff --binary -u - input' \ +"\ +--- - ++++ input +@@ -1,4 +1,4 @@ + a + b ++c + d +-e +" \ + "a\r\nb\r\nc\r\nd\r\n" \ + "a\r\nb\r\nd\r\ne\r\n" + +SKIP= + # testing "test name" "commands" "expected result" "file input" "stdin" # clean up -- cgit v1.2.3-55-g6feb