From b8fff6b345d4b7e3f16227f65eecca1a0c88ab41 Mon Sep 17 00:00:00 2001
From: "Avi Halachmi (:avih)" <avihpit@yahoo.com>
Date: Thu, 3 Aug 2023 19:19:47 +0300
Subject: win32: disable console output conversion with LC_ALL=C

Previously, when writing to the console, the non-unicode build always
assumed the source data is in the ANSI codepage, and used charToCon
to convert it unconditionally to the console CP.

Similarly, the unicode build made the same assumption (where ANSI CP
is UTF8), and always tried to convert it so that it's printed
correctly (at least when FEATURE_UTF8_OUTPUT is enabled - which it is
by default at the unicode build).

However, there could be cases where this assumption is incorrect, for
instance if the data comes from a file encoded for some codepage X,
and after the user also changed the console CP to X does 'cat file.X'

This commit allows disabling this conversion, using the same env vars
which can be used to disable the locale/unicode elsewhere, (LANG,
LC_CTYPE, LC_ALL as "C") e.g. 'LC_ALL=C cat file.X' now doesn't
convert, and the console renders it according to its own codepage.
---
 win32/winansi.c | 35 +++++++++++++++++++++++++++++------
 1 file changed, 29 insertions(+), 6 deletions(-)

diff --git a/win32/winansi.c b/win32/winansi.c
index aaaa2fa50..c88c096d2 100644
--- a/win32/winansi.c
+++ b/win32/winansi.c
@@ -1534,6 +1534,25 @@ void console_write(const char *str, int len)
 	free(buf);
 }
 
+// LC_ALL=C disables console output conversion, so that the source
+// data is interpreted only by the console according to its output CP.
+static int conout_conv_enabled(void)
+{
+	static int enabled, tested;  /* = 0 */
+
+	if (!tested) {
+		// keep in sync with [re]init_unicode at libbb/unicode.c
+		char *s = getenv("LC_ALL");
+		if (!s) s = getenv("LC_CTYPE");
+		if (!s) s = getenv("LANG");
+
+		enabled = !(s && s[0] == 'C' && s[1] == 0);
+		tested = 1;
+	}
+
+	return enabled;
+}
+
 // TODO: improvements:
 //
 // 1. currently conv_[f]writeCon modify buf inplace, which means the caller
@@ -1554,12 +1573,14 @@ void console_write(const char *str, int len)
 // returns EOF on error, 0 on success
 static int conv_fwriteCon(FILE *stream, char *buf, size_t siz)
 {
+	if (conout_conv_enabled()) {
 #if ENABLE_FEATURE_UTF8_OUTPUT
-	if (GetConsoleOutputCP() != CP_UTF8)
-		return writeCon_utf8(fileno(stream), buf, siz) ? EOF : 0;
+		if (GetConsoleOutputCP() != CP_UTF8)
+			return writeCon_utf8(fileno(stream), buf, siz) ? EOF : 0;
 #else
-	charToConBuffA(buf, siz);
+		charToConBuffA(buf, siz);
 #endif
+	}
 	return fwrite(buf, 1, siz, stream) < siz ? EOF : 0;
 }
 
@@ -1567,11 +1588,13 @@ static int conv_fwriteCon(FILE *stream, char *buf, size_t siz)
 // returns -1 on error, actually-written bytes on suceess
 static int conv_writeCon(int fd, char *buf, size_t siz)
 {
+	if (conout_conv_enabled()) {
 #if ENABLE_FEATURE_UTF8_OUTPUT
-	if (GetConsoleOutputCP() != CP_UTF8)
-		return writeCon_utf8(fd, buf, siz) ? -1 : siz;
+		if (GetConsoleOutputCP() != CP_UTF8)
+			return writeCon_utf8(fd, buf, siz) ? -1 : siz;
 #else
-	charToConBuffA(buf, siz);
+		charToConBuffA(buf, siz);
 #endif
+	}
 	return write(fd, buf, siz);
 }
-- 
cgit v1.2.3-55-g6feb