aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAvi Halachmi (:avih) <avihpit@yahoo.com>2023-08-24 13:22:24 +0300
committerAvi Halachmi (:avih) <avihpit@yahoo.com>2023-08-24 13:52:54 +0300
commit5862b6920d519974c2453529bdfd6832dd06f807 (patch)
tree7d70f298f1a1199f5ed330fc1d24be180fffdf01
parentf4f0515429d8ace3c3314ee0e823205d8044f2ac (diff)
downloadbusybox-w32-5862b6920d519974c2453529bdfd6832dd06f807.tar.gz
busybox-w32-5862b6920d519974c2453529bdfd6832dd06f807.tar.bz2
busybox-w32-5862b6920d519974c2453529bdfd6832dd06f807.zip
win32: UTF8_OUTPUT: speedup for big outputs
With the native Windows console, writeCon_utf8 which converts a stream of UTF8 into console output is about 1.4x slower for big unicode writes than the native fwrite (e.g. when the console codepage is UTF8), which is not too bad. However, newer versions of conhost are quicker, e.g. OpenConsole.exe (which is conhost) which ships with the Windows terminal is about 4x faster than the native conhost in processing (unicode?) input. And when conhost can process inputs much quicker, it turned out that fwrite throughput was nearly 3x better than writeCon_utf8. Luckily, this turned out to be mainly due to the internal 256 wide chars buffer which writeCon_utf8 uses, and that with 4096 buffer it becomes only ~ 10% slower than fwrite, which is much better. However, making the console window very small such that it needs to spend very little time on rendering, makes it apparent that there's still a difference - writeCon_utf8 is about 30% slower than fwrite, but that's still not bad, and that's also an uncommon use case. So this commit increases the buffer, and also allocates it dynamically (once) to avoid abusing the stck with additional 8K in one call.
-rw-r--r--win32/winansi.c10
1 files changed, 8 insertions, 2 deletions
diff --git a/win32/winansi.c b/win32/winansi.c
index c88c096d2..591154378 100644
--- a/win32/winansi.c
+++ b/win32/winansi.c
@@ -1457,10 +1457,16 @@ static int writeCon_utf8(int fd, const char *u8buf, size_t u8siz)
1457 static int state = 0; // -1: bad, 0-3: remaining cp bytes (0: done/new) 1457 static int state = 0; // -1: bad, 0-3: remaining cp bytes (0: done/new)
1458 static uint32_t codepoint = 0; // accumulated from up to 4 UTF8 bytes 1458 static uint32_t codepoint = 0; // accumulated from up to 4 UTF8 bytes
1459 1459
1460 // not a state, only avoids re-alloc on every call
1461 static const int wbufwsiz = 4096;
1462 static wchar_t *wbuf = 0;
1463
1460 HANDLE h = (HANDLE)_get_osfhandle(fd); 1464 HANDLE h = (HANDLE)_get_osfhandle(fd);
1461 wchar_t wbuf[256];
1462 int wlen = 0; 1465 int wlen = 0;
1463 1466
1467 if (!wbuf)
1468 wbuf = xmalloc(wbufwsiz * sizeof(wchar_t));
1469
1464 // ASCII7 uses least logic, then UTF8 continuations, UTF8 lead, errors 1470 // ASCII7 uses least logic, then UTF8 continuations, UTF8 lead, errors
1465 while (u8siz--) { 1471 while (u8siz--) {
1466 unsigned char c = *u8buf++; 1472 unsigned char c = *u8buf++;
@@ -1512,7 +1518,7 @@ static int writeCon_utf8(int fd, const char *u8buf, size_t u8siz)
1512 } 1518 }
1513 1519
1514 // flush if we have less than two empty spaces 1520 // flush if we have less than two empty spaces
1515 if (wlen > ARRAY_SIZE(wbuf) - 2) { 1521 if (wlen > wbufwsiz - 2) {
1516 if (!WriteConsoleW(h, wbuf, wlen, 0, 0)) 1522 if (!WriteConsoleW(h, wbuf, wlen, 0, 0))
1517 return -1; 1523 return -1;
1518 wlen = 0; 1524 wlen = 0;