aboutsummaryrefslogtreecommitdiff
path: root/win32
diff options
context:
space:
mode:
Diffstat (limited to 'win32')
-rw-r--r--win32/winansi.c180
1 files changed, 163 insertions, 17 deletions
diff --git a/win32/winansi.c b/win32/winansi.c
index f280177e6..c88c096d2 100644
--- a/win32/winansi.c
+++ b/win32/winansi.c
@@ -10,6 +10,9 @@
10static BOOL charToConBuffA(LPSTR s, DWORD len); 10static BOOL charToConBuffA(LPSTR s, DWORD len);
11static BOOL charToConA(LPSTR s); 11static BOOL charToConA(LPSTR s);
12 12
13static int conv_fwriteCon(FILE *stream, char *buf, size_t siz);
14static int conv_writeCon(int fd, char *buf, size_t siz);
15
13/* 16/*
14 Functions to be wrapped: 17 Functions to be wrapped:
15*/ 18*/
@@ -814,9 +817,7 @@ static int ansi_emulate(const char *s, FILE *stream)
814 size_t len = pos - str; 817 size_t len = pos - str;
815 818
816 if (len) { 819 if (len) {
817 *pos = '\0'; /* NB, '\033' has been overwritten */ 820 if (conv_fwriteCon(stream, str, len) == EOF)
818 charToConA(str);
819 if (fputs(str, stream) == EOF)
820 return EOF; 821 return EOF;
821 rv += len; 822 rv += len;
822 } 823 }
@@ -837,9 +838,9 @@ static int ansi_emulate(const char *s, FILE *stream)
837 return EOF; 838 return EOF;
838 839
839 } else { 840 } else {
840 rv += strlen(str); 841 size_t len = strlen(str);
841 charToConA(str); 842 rv += len;
842 return fputs(str, stream) == EOF ? EOF : rv; 843 return conv_fwriteCon(stream, str, len) == EOF ? EOF : rv;
843 } 844 }
844 } 845 }
845 return rv; 846 return rv;
@@ -853,8 +854,7 @@ int winansi_putchar(int c)
853 if (!is_console(STDOUT_FILENO)) 854 if (!is_console(STDOUT_FILENO))
854 return putchar(c); 855 return putchar(c);
855 856
856 charToConBuffA(s, 1); 857 return conv_fwriteCon(stdout, s, 1) == EOF ? EOF : (unsigned char)c;
857 return putchar(t) == EOF ? EOF : (unsigned char)c;
858} 858}
859 859
860int winansi_puts(const char *s) 860int winansi_puts(const char *s)
@@ -952,8 +952,7 @@ int winansi_fputc(int c, FILE *stream)
952 return ret; 952 return ret;
953 } 953 }
954 954
955 charToConBuffA(s, 1); 955 return conv_fwriteCon(stream, s, 1) == EOF ? EOF : (unsigned char )c;
956 return fputc(t, stream) == EOF ? EOF : (unsigned char )c;
957} 956}
958 957
959#if !defined(__USE_MINGW_ANSI_STDIO) || !__USE_MINGW_ANSI_STDIO 958#if !defined(__USE_MINGW_ANSI_STDIO) || !__USE_MINGW_ANSI_STDIO
@@ -1083,8 +1082,7 @@ static int ansi_emulate_write(int fd, const void *buf, size_t count)
1083 len = pos - str; 1082 len = pos - str;
1084 1083
1085 if (len) { 1084 if (len) {
1086 charToConBuffA(str, len); 1085 out_len = conv_writeCon(fd, str, len);
1087 out_len = write(fd, str, len);
1088 if (out_len == -1) 1086 if (out_len == -1)
1089 return -1; 1087 return -1;
1090 rv += out_len; 1088 rv += out_len;
@@ -1100,8 +1098,7 @@ static int ansi_emulate_write(int fd, const void *buf, size_t count)
1100 pos = str; 1098 pos = str;
1101 } else { 1099 } else {
1102 len = strlen(str); 1100 len = strlen(str);
1103 charToConA(str); 1101 out_len = conv_writeCon(fd, str, len);
1104 out_len = write(fd, str, len);
1105 return (out_len == -1) ? -1 : rv+out_len; 1102 return (out_len == -1) ? -1 : rv+out_len;
1106 } 1103 }
1107 } 1104 }
@@ -1442,13 +1439,162 @@ BOOL readConsoleInput_utf8(HANDLE h, INPUT_RECORD *r, DWORD len, DWORD *got)
1442} 1439}
1443#endif 1440#endif
1444 1441
1442#if ENABLE_FEATURE_UTF8_OUTPUT
1443// Write u8buf as if the console output CP is UTF8 - regardless of the CP.
1444// fd should be associated with a console output.
1445// Return: 0 on successful write[s], else -1 (e.g. if fd is not a console).
1446//
1447// Up to 3 bytes of an incomplete codepoint may be buffered from prior call[s].
1448// All the completed codepoints in one call are written using WriteConsoleW.
1449// Bad sequence of any length (till ASCII7 or UTF8 lead) prints 1 subst wchar.
1450//
1451// note: one console is assumed, and the (3 bytes) buffer is shared regardless
1452// of the original output stream (stdout/err), or even if the handle is
1453// of a different console. This can result in invalid codepoints output
1454// if streams are multiplexed mid-codepoint (same as elsewhere?)
1455static int writeCon_utf8(int fd, const char *u8buf, size_t u8siz)
1456{
1457 static int state = 0; // -1: bad, 0-3: remaining cp bytes (0: done/new)
1458 static uint32_t codepoint = 0; // accumulated from up to 4 UTF8 bytes
1459
1460 HANDLE h = (HANDLE)_get_osfhandle(fd);
1461 wchar_t wbuf[256];
1462 int wlen = 0;
1463
1464 // ASCII7 uses least logic, then UTF8 continuations, UTF8 lead, errors
1465 while (u8siz--) {
1466 unsigned char c = *u8buf++;
1467 int topbits = 0;
1468
1469 while (c & (0x80 >> topbits))
1470 ++topbits;
1471
1472 process_byte:
1473 if (state == 0 && topbits == 0) {
1474 // valid ASCII7, state remains 0
1475 codepoint = c;
1476
1477 } else if (state > 0 && topbits == 1) {
1478 // valid continuation byte
1479 codepoint = (codepoint << 6) | (c & 0x3f);
1480 if (--state)
1481 continue;
1482
1483 } else if (state == 0 && topbits >= 2 && topbits <= 4) {
1484 // valid UTF8 lead of 2/3/4 bytes codepoint
1485 codepoint = c & (0x7f >> topbits);
1486 state = topbits - 1; // remaining bytes after lead
1487 continue;
1488
1489 } else if (state >= 0) {
1490 // invalid byte at state 0/1/2/3, add placeholder once
1491 codepoint = CONFIG_SUBST_WCHAR;
1492 state = -1;
1493
1494 } else {
1495 // inside bad sequence (placeholder char already added)
1496 if (topbits == 1 || topbits > 4)
1497 continue; // still bad
1498 // c is valid for state 0, process it with clean slate
1499 state = 0;
1500 goto process_byte;
1501 }
1502
1503 // codepoint is complete
1504 // we don't reject surrogate halves, reserved, etc
1505 if (codepoint < 0x10000) {
1506 wbuf[wlen++] = codepoint;
1507 } else {
1508 // generate a surrogates pair (wbuf has room for 2+)
1509 codepoint -= 0x10000;
1510 wbuf[wlen++] = 0xd800 | (codepoint >> 10);
1511 wbuf[wlen++] = 0xdc00 | (codepoint & 0x3ff);
1512 }
1513
1514 // flush if we have less than two empty spaces
1515 if (wlen > ARRAY_SIZE(wbuf) - 2) {
1516 if (!WriteConsoleW(h, wbuf, wlen, 0, 0))
1517 return -1;
1518 wlen = 0;
1519 }
1520 }
1521
1522 if (wlen && !WriteConsoleW(h, wbuf, wlen, 0, 0))
1523 return -1;
1524 return 0;
1525}
1526#endif
1527
1445void console_write(const char *str, int len) 1528void console_write(const char *str, int len)
1446{ 1529{
1447 char *buf = xmemdup(str, len); 1530 char *buf = xmemdup(str, len);
1448 int fd = _open("CONOUT$", _O_WRONLY); 1531 int fd = _open("CONOUT$", _O_WRONLY);
1449 HANDLE fh = (HANDLE)_get_osfhandle(fd); 1532 conv_writeCon(fd, buf, len);
1450 charToConBuffA(buf, len);
1451 WriteConsole(fh, buf, len, NULL, NULL);
1452 close(fd); 1533 close(fd);
1453 free(buf); 1534 free(buf);
1454} 1535}
1536
1537// LC_ALL=C disables console output conversion, so that the source
1538// data is interpreted only by the console according to its output CP.
1539static int conout_conv_enabled(void)
1540{
1541 static int enabled, tested; /* = 0 */
1542
1543 if (!tested) {
1544 // keep in sync with [re]init_unicode at libbb/unicode.c
1545 char *s = getenv("LC_ALL");
1546 if (!s) s = getenv("LC_CTYPE");
1547 if (!s) s = getenv("LANG");
1548
1549 enabled = !(s && s[0] == 'C' && s[1] == 0);
1550 tested = 1;
1551 }
1552
1553 return enabled;
1554}
1555
1556// TODO: improvements:
1557//
1558// 1. currently conv_[f]writeCon modify buf inplace, which means the caller
1559// typically has to make a writable copy first just for this.
1560// Sometimes it allocates a big copy once, and calls us with substrings.
1561// Instead, we could make a writable copy here - it's not used later anyway.
1562// To avoid the performance hit of many small allocations, we could use
1563// a local buffer for short strings, and allocate only if it doesn't fit
1564// (or maybe just reuse the local buffer with substring iterations).
1565//
1566// 2. Instead of converting from ACP to the console out CP - which guarantees
1567// potential data-loss if they differ, we could convert it to wchar_t and
1568// write it using WriteConsoleW. This should prevent all output data-loss.
1569// care should be taken with DBCS codepages (e.g. 936) or other multi-byte
1570// because then converting on arbitrary substring boundaries can fail.
1571
1572// convert buf inplace from ACP to console out CP and write it to stream
1573// returns EOF on error, 0 on success
1574static int conv_fwriteCon(FILE *stream, char *buf, size_t siz)
1575{
1576 if (conout_conv_enabled()) {
1577#if ENABLE_FEATURE_UTF8_OUTPUT
1578 if (GetConsoleOutputCP() != CP_UTF8)
1579 return writeCon_utf8(fileno(stream), buf, siz) ? EOF : 0;
1580#else
1581 charToConBuffA(buf, siz);
1582#endif
1583 }
1584 return fwrite(buf, 1, siz, stream) < siz ? EOF : 0;
1585}
1586
1587// similar to above, but using lower level write
1588// returns -1 on error, actually-written bytes on suceess
1589static int conv_writeCon(int fd, char *buf, size_t siz)
1590{
1591 if (conout_conv_enabled()) {
1592#if ENABLE_FEATURE_UTF8_OUTPUT
1593 if (GetConsoleOutputCP() != CP_UTF8)
1594 return writeCon_utf8(fd, buf, siz) ? -1 : siz;
1595#else
1596 charToConBuffA(buf, siz);
1597#endif
1598 }
1599 return write(fd, buf, siz);
1600}