diff options
Diffstat (limited to 'win32')
-rw-r--r-- | win32/termios.c | 5 | ||||
-rw-r--r-- | win32/winansi.c | 160 |
2 files changed, 163 insertions, 2 deletions
diff --git a/win32/termios.c b/win32/termios.c index 11c24c8b3..d70c1e685 100644 --- a/win32/termios.c +++ b/win32/termios.c | |||
@@ -57,7 +57,10 @@ int64_t FAST_FUNC windows_read_key(int fd, char *buf UNUSED_PARAM, int timeout) | |||
57 | #if ENABLE_FEATURE_EURO_INPUT | 57 | #if ENABLE_FEATURE_EURO_INPUT |
58 | if (!ReadConsoleInputW(cin, &record, 1, &nevent_out)) | 58 | if (!ReadConsoleInputW(cin, &record, 1, &nevent_out)) |
59 | #else | 59 | #else |
60 | if (!ReadConsoleInput(cin, &record, 1, &nevent_out)) | 60 | // if ACP is UTF8 then we read UTF8 regardless of console (in) CP |
61 | if (GetConsoleCP() == CP_UTF8 || GetACP() == CP_UTF8 | ||
62 | ? !readConsoleInput_utf8(cin, &record, 1, &nevent_out) | ||
63 | : !ReadConsoleInput(cin, &record, 1, &nevent_out)) | ||
61 | #endif | 64 | #endif |
62 | goto done; | 65 | goto done; |
63 | 66 | ||
diff --git a/win32/winansi.c b/win32/winansi.c index 552ccc6ca..f823d14be 100644 --- a/win32/winansi.c +++ b/win32/winansi.c | |||
@@ -758,7 +758,8 @@ BOOL conToCharBuffA(LPSTR s, DWORD len) | |||
758 | CPINFO acp_info, con_info; | 758 | CPINFO acp_info, con_info; |
759 | WCHAR *buf; | 759 | WCHAR *buf; |
760 | 760 | ||
761 | if (acp == conicp) | 761 | // if acp is UTF8 then we got UTF8 via readConsoleInput_utf8 |
762 | if (acp == conicp || acp == CP_UTF8) | ||
762 | return TRUE; | 763 | return TRUE; |
763 | 764 | ||
764 | if (!s || !GetCPInfo(acp, &acp_info) || !GetCPInfo(conicp, &con_info) || | 765 | if (!s || !GetCPInfo(acp, &acp_info) || !GetCPInfo(conicp, &con_info) || |
@@ -1189,3 +1190,160 @@ int mingw_isatty(int fd) | |||
1189 | 1190 | ||
1190 | return result; | 1191 | return result; |
1191 | } | 1192 | } |
1193 | |||
1194 | // intentionally also converts invalid values (surrogate halfs, too big) | ||
1195 | static int toutf8(DWORD cp, unsigned char *buf) { | ||
1196 | if (cp <= 0x7f) { | ||
1197 | *buf = cp; | ||
1198 | return 1; | ||
1199 | } | ||
1200 | if (cp <= 0x7ff) { | ||
1201 | *buf++ = 0xc0 | (cp >> 6); | ||
1202 | *buf = 0x80 | (cp & 0x3f); | ||
1203 | return 2; | ||
1204 | } | ||
1205 | if (cp <= 0xffff) { | ||
1206 | *buf++ = 0xe0 | (cp >> 12); | ||
1207 | *buf++ = 0x80 | ((cp >> 6) & 0x3f); | ||
1208 | *buf = 0x80 | (cp & 0x3f); | ||
1209 | return 3; | ||
1210 | } | ||
1211 | if (cp <= 0x10ffff) { | ||
1212 | *buf++ = 0xf0 | (cp >> 18); | ||
1213 | *buf++ = 0x80 | ((cp >> 12) & 0x3f); | ||
1214 | *buf++ = 0x80 | ((cp >> 6) & 0x3f); | ||
1215 | *buf = 0x80 | (cp & 0x3f); | ||
1216 | return 4; | ||
1217 | } | ||
1218 | // invalid. returning 0 works in our context because it's delivered | ||
1219 | // as a key event, where 0 values are typically ignored by the caller | ||
1220 | *buf = 0; | ||
1221 | return 1; | ||
1222 | } | ||
1223 | |||
1224 | // peek into the console input queue and try to find a key-up event of | ||
1225 | // a surrugate-2nd-half, at which case eat the console events up to this | ||
1226 | // one, and combine the pair values into *ph1 | ||
1227 | static void maybeEatUpto2ndHalfUp(HANDLE h, DWORD *ph1) | ||
1228 | { | ||
1229 | // Peek into the queue arbitrary 16 records deep | ||
1230 | INPUT_RECORD r[16]; | ||
1231 | DWORD got; | ||
1232 | int i; | ||
1233 | |||
1234 | if (!PeekConsoleInputW(h, r, 16, &got)) | ||
1235 | return; | ||
1236 | |||
1237 | // we're conservative, and abort the search on anything which | ||
1238 | // seems out of place, like non-key event, non-2nd-half, etc. | ||
1239 | for (i = 0; i < got; ++i) { | ||
1240 | DWORD h2; | ||
1241 | int is2nd, isdown; | ||
1242 | |||
1243 | if (r[i].EventType != KEY_EVENT) | ||
1244 | return; | ||
1245 | |||
1246 | isdown = r[i].Event.KeyEvent.bKeyDown; | ||
1247 | h2 = r[i].Event.KeyEvent.uChar.UnicodeChar; | ||
1248 | is2nd = h2 >= 0xDC00 && h2 <= 0xDFFF; | ||
1249 | |||
1250 | // skip 0 values, keyup of 1st half, and keydown of a 2nd half, if any | ||
1251 | if (!h2 || (h2 == *ph1 && !isdown) || (is2nd && isdown)) | ||
1252 | continue; | ||
1253 | |||
1254 | if (!is2nd) | ||
1255 | return; | ||
1256 | |||
1257 | // got 2nd-half-up. eat the events up to this, combine the values | ||
1258 | ReadConsoleInputW(h, r, i + 1, &got); | ||
1259 | *ph1 = 0x10000 | ((*ph1 & ~0xD800) << 10) | (h2 & ~0xDC00); | ||
1260 | return; | ||
1261 | } | ||
1262 | } | ||
1263 | |||
1264 | |||
1265 | /* | ||
1266 | * readConsoleInput_utf8 behaves similar enough to ReadConsoleInputA when | ||
1267 | * the console (input) CP is UTF8, but addressed two issues: | ||
1268 | * - It depend on the console CP, while we use ReadConsoleInputW internally. | ||
1269 | * - ReadConsoleInputA with Console CP of UTF8 (65001) is buggy: | ||
1270 | * - Doesn't work on Windows 7 (reads 0 or '?' for non-ASCII codepoints). | ||
1271 | * - When used at the cmd.exe console - but not Windows Terminal: | ||
1272 | * sometimes only key-up events arrive without the expected prior key-down. | ||
1273 | * Seems to depend both on the console CP and the entered/pasted codepoint. | ||
1274 | * - If reading one record at a time (which is how we use it), then input | ||
1275 | * codepoints of U+0800 or higher crash the console/terminal window. | ||
1276 | * (tested on Windows 10.0.19045.3086: console and Windows Terminal 1.17) | ||
1277 | * Example: U+0C80 (UTF8: 0xE0 0xB2 0x80): "ಀ" | ||
1278 | * Example: U+1F600 (UTF8: 0xF0 0x9F 0x98 0x80): "😀" | ||
1279 | * - If reading more than one record at a time: | ||
1280 | * - Unknown whether it can still crash in some cases (was not observed). | ||
1281 | * - Codepoints above U+FFFF are broken, and arrive as | ||
1282 | * U+FFFD REPLACEMENT CHARACTER "�" | ||
1283 | * - Few more codepoints to test the issues above (and below): | ||
1284 | * - U+0500 (UTF8: 0xD4, 0x80): "Ԁ" (OK in UTF8 CP, else maybe no key-down) | ||
1285 | * - U+07C0 (UTF8: 0xDF, 0x80): "߀" (might exhibit missing key-down) | ||
1286 | * | ||
1287 | * So this function uses ReadConsoleInputW and then delivers it as UTF8: | ||
1288 | * - Works with any console CP, in Windows terminal and Windows 7/10 console. | ||
1289 | * - Surrogate pairs are combined and delivered as a single UTF8 codepoint. | ||
1290 | * - Ignore occasional intermediate control events between the halfs. | ||
1291 | * - If we can't find the 2nd half, or if for some reason we get a 2nd half | ||
1292 | * wiithout the 1st, deliver the half we got as UTF8 (a-la WTF8). | ||
1293 | * - The "sometimes key-down is missing" issue at the cmd.exe console happens | ||
1294 | * also when using ReadConsoleInputW (for U+0080 or higher), so handle it. | ||
1295 | * This can also happen with surrogate pairs. | ||
1296 | * - Up to 4-bytes state is maintained for a single UTF8 codepoint buffer. | ||
1297 | * | ||
1298 | * Gotchas (could be solved, but currently there's no need): | ||
1299 | * - We support reading one record at a time, else fail - to make it obvious. | ||
1300 | * - We have a state which is hidden from PeekConsoleInput - so not in sync. | ||
1301 | * - We don't deliver key-up events in some cases: when working around | ||
1302 | * the "missing key-down" issue, and with combined surrogate halfs value. | ||
1303 | */ | ||
1304 | BOOL readConsoleInput_utf8(HANDLE h, INPUT_RECORD *r, DWORD len, DWORD *got) | ||
1305 | { | ||
1306 | static unsigned char u8buf[4]; // any single codepoint in UTF8 | ||
1307 | static int u8pos = 0, u8len = 0; | ||
1308 | static INPUT_RECORD srec; | ||
1309 | |||
1310 | if (len != 1) | ||
1311 | return FALSE; | ||
1312 | |||
1313 | if (u8pos == u8len) { | ||
1314 | DWORD codepoint; | ||
1315 | |||
1316 | if (!ReadConsoleInputW(h, r, 1, got)) | ||
1317 | return FALSE; | ||
1318 | if (*got == 0 || r->EventType != KEY_EVENT) | ||
1319 | return TRUE; | ||
1320 | |||
1321 | srec = *r; | ||
1322 | codepoint = srec.Event.KeyEvent.uChar.UnicodeChar; | ||
1323 | |||
1324 | // At the cmd.exe console (but not windows terminal) we sometimes | ||
1325 | // get key-up without the prior expected key-down event, sometimes | ||
1326 | // with UnicodeChar of 0 instead the key-down event. work around it. | ||
1327 | if (codepoint) { | ||
1328 | static wchar_t last_down = 0; | ||
1329 | |||
1330 | if (srec.Event.KeyEvent.bKeyDown) | ||
1331 | last_down = codepoint; | ||
1332 | else if (codepoint > 127 && codepoint != last_down) | ||
1333 | srec.Event.KeyEvent.bKeyDown = TRUE; | ||
1334 | } | ||
1335 | |||
1336 | // if it's a 1st (high) surrogate pair half, try to eat upto and | ||
1337 | // including the 2nd (low) half, and combine them into codepoint. | ||
1338 | if (codepoint >= 0xD800 && codepoint <= 0xDBFF) | ||
1339 | maybeEatUpto2ndHalfUp(h, &codepoint); | ||
1340 | |||
1341 | u8len = toutf8(codepoint, u8buf); | ||
1342 | u8pos = 0; | ||
1343 | } | ||
1344 | |||
1345 | *r = srec; | ||
1346 | r->Event.KeyEvent.uChar.AsciiChar = (char)u8buf[u8pos++]; | ||
1347 | *got = 1; | ||
1348 | return TRUE; | ||
1349 | } | ||