diff options
author | Ron Yorston <rmy@pobox.com> | 2023-07-23 12:20:42 +0000 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-07-23 12:20:42 +0000 |
commit | a5b78ff089a28651282d765349ede783b1a80fa9 (patch) | |
tree | 2cd4cc741e2ee38c5100a6ddea63451ccc278182 | |
parent | 72b97c86c6c1a1902d6dcda3da7c38db13585cdc (diff) | |
parent | 878b3cd27fe83f2b0ff476b884c34d165be0072c (diff) | |
download | busybox-w32-a5b78ff089a28651282d765349ede783b1a80fa9.tar.gz busybox-w32-a5b78ff089a28651282d765349ede783b1a80fa9.tar.bz2 busybox-w32-a5b78ff089a28651282d765349ede783b1a80fa9.zip |
Merge pull request #340 from avih/win32-unicode-editing
Win32: support unicode editing
-rw-r--r-- | include/mingw.h | 12 | ||||
-rw-r--r-- | include/unicode.h | 15 | ||||
-rw-r--r-- | libbb/lineedit.c | 24 | ||||
-rw-r--r-- | libbb/unicode.c | 14 | ||||
-rwxr-xr-x | scripts/mk_mingw64u_defconfig | 35 | ||||
-rw-r--r-- | win32/mingw.c | 14 | ||||
-rw-r--r-- | win32/winansi.c | 58 |
7 files changed, 160 insertions, 12 deletions
diff --git a/include/mingw.h b/include/mingw.h index 232ffadd7..97db2f6a9 100644 --- a/include/mingw.h +++ b/include/mingw.h | |||
@@ -586,6 +586,18 @@ char *alloc_ext_space(const char *path); | |||
586 | int add_win32_extension(char *p); | 586 | int add_win32_extension(char *p); |
587 | char *file_is_win32_exe(const char *name); | 587 | char *file_is_win32_exe(const char *name); |
588 | 588 | ||
589 | #if ENABLE_UNICODE_SUPPORT | ||
590 | /* | ||
591 | * windows wchar_t is 16 bit, while linux (and busybox expectation) is 32. | ||
592 | * so when (busybox) unicode.h is included, wchar_t is 32 bit. | ||
593 | * Without unicode.h, MINGW_BB_WCHAR_T is busybox wide char (32), | ||
594 | * and wchar_t is Windows wide char (16). | ||
595 | */ | ||
596 | #define MINGW_BB_WCHAR_T uint32_t /* keep in sync with unicode.h */ | ||
597 | |||
598 | MINGW_BB_WCHAR_T *bs_to_slash_u(MINGW_BB_WCHAR_T *p) FAST_FUNC; | ||
599 | #endif | ||
600 | |||
589 | char *bs_to_slash(char *p) FAST_FUNC; | 601 | char *bs_to_slash(char *p) FAST_FUNC; |
590 | void slash_to_bs(char *p) FAST_FUNC; | 602 | void slash_to_bs(char *p) FAST_FUNC; |
591 | size_t remove_cr(char *p, size_t len) FAST_FUNC; | 603 | size_t remove_cr(char *p, size_t len) FAST_FUNC; |
diff --git a/include/unicode.h b/include/unicode.h index 0317a2151..e894f7148 100644 --- a/include/unicode.h +++ b/include/unicode.h | |||
@@ -87,6 +87,21 @@ void reinit_unicode(const char *LANG) FAST_FUNC; | |||
87 | # undef MB_CUR_MAX | 87 | # undef MB_CUR_MAX |
88 | # define MB_CUR_MAX 6 | 88 | # define MB_CUR_MAX 6 |
89 | 89 | ||
90 | #if ENABLE_PLATFORM_MINGW32 | ||
91 | #undef wint_t | ||
92 | #undef mbstate_t | ||
93 | #undef mbstowcs | ||
94 | #undef wcstombs | ||
95 | #undef wcrtomb | ||
96 | #undef iswspace | ||
97 | #undef iswalnum | ||
98 | #undef iswpunct | ||
99 | #undef wcwidth | ||
100 | |||
101 | #undef wchar_t | ||
102 | #define wchar_t uint32_t /* keep in sync with MINGW_BB_WCHAR_T */ | ||
103 | #endif | ||
104 | |||
90 | /* Prevent name collisions */ | 105 | /* Prevent name collisions */ |
91 | # define wint_t bb_wint_t | 106 | # define wint_t bb_wint_t |
92 | # define mbstate_t bb_mbstate_t | 107 | # define mbstate_t bb_mbstate_t |
diff --git a/libbb/lineedit.c b/libbb/lineedit.c index a6884c7e0..54f0edef0 100644 --- a/libbb/lineedit.c +++ b/libbb/lineedit.c | |||
@@ -726,8 +726,15 @@ static void input_forward(void) | |||
726 | #if !ENABLE_PLATFORM_MINGW32 | 726 | #if !ENABLE_PLATFORM_MINGW32 |
727 | put_cur_glyph_and_inc_cursor(); | 727 | put_cur_glyph_and_inc_cursor(); |
728 | #else | 728 | #else |
729 | /* | ||
730 | * inc_cursor improves forward cursor movement appearance on | ||
731 | * win 7/8 console, but it's broken with unicode wide-glyphs, | ||
732 | * e.g. paste and move forward over: echo 开开心心过每一天 | ||
733 | * so disable inc_cursor when unicode is active (which is only | ||
734 | * windows 10+, where inc_cursor is not needed anyway). | ||
735 | */ | ||
729 | { | 736 | { |
730 | if (terminal_mode(FALSE) & VT_INPUT) | 737 | if (unicode_status == UNICODE_ON) |
731 | put_cur_glyph_and_inc_cursor(); | 738 | put_cur_glyph_and_inc_cursor(); |
732 | else | 739 | else |
733 | inc_cursor(); | 740 | inc_cursor(); |
@@ -770,6 +777,11 @@ static void add_match(char *matched, int sensitive) | |||
770 | || (!ENABLE_UNICODE_SUPPORT && *p >= 0x7f) | 777 | || (!ENABLE_UNICODE_SUPPORT && *p >= 0x7f) |
771 | || (ENABLE_UNICODE_SUPPORT && *p == 0x7f) | 778 | || (ENABLE_UNICODE_SUPPORT && *p == 0x7f) |
772 | # else | 779 | # else |
780 | /* | ||
781 | * on Windows, *p > 0x7f is never control: | ||
782 | * without unicode active: these are normal codepage chars. | ||
783 | * with unicode active: these are UTF8 continuation bytes. | ||
784 | */ | ||
773 | || *p == 0x7f | 785 | || *p == 0x7f |
774 | # endif | 786 | # endif |
775 | ) { | 787 | ) { |
@@ -1318,6 +1330,12 @@ static NOINLINE void input_tab(smallint *lastWasTab) | |||
1318 | # if ENABLE_PLATFORM_MINGW32 | 1330 | # if ENABLE_PLATFORM_MINGW32 |
1319 | int chosen_index = 0; | 1331 | int chosen_index = 0; |
1320 | int chosen_sens = FALSE; | 1332 | int chosen_sens = FALSE; |
1333 | /* | ||
1334 | * FIXME: the next three vars are unused with ENABLE_UNICODE_SUPPORT | ||
1335 | * because the mingw code which uses them to update a tab-completion | ||
1336 | * prefix to the correct case (e.g. ~/desk<tab> to ~/Desktop/) is | ||
1337 | * not compiled, and so e.g. ~/desk<tab> completes to ~/desktop/ . | ||
1338 | */ | ||
1321 | unsigned orig_pfx_len; | 1339 | unsigned orig_pfx_len; |
1322 | char *target; | 1340 | char *target; |
1323 | const char *source; | 1341 | const char *source; |
@@ -2803,7 +2821,11 @@ int FAST_FUNC read_line_input(line_input_t *st, const char *prompt, char *comman | |||
2803 | #if ENABLE_PLATFORM_MINGW32 | 2821 | #if ENABLE_PLATFORM_MINGW32 |
2804 | case CTRL('Z'): | 2822 | case CTRL('Z'): |
2805 | command_ps[command_len] = '\0'; | 2823 | command_ps[command_len] = '\0'; |
2824 | #if ENABLE_UNICODE_SUPPORT | ||
2825 | bs_to_slash_u(command_ps); | ||
2826 | #else | ||
2806 | bs_to_slash(command_ps); | 2827 | bs_to_slash(command_ps); |
2828 | #endif | ||
2807 | redraw(cmdedit_y, 0); | 2829 | redraw(cmdedit_y, 0); |
2808 | break; | 2830 | break; |
2809 | #endif | 2831 | #endif |
diff --git a/libbb/unicode.c b/libbb/unicode.c index e98cbbf35..206ec0dcb 100644 --- a/libbb/unicode.c +++ b/libbb/unicode.c | |||
@@ -69,8 +69,14 @@ void FAST_FUNC init_unicode(void) | |||
69 | void FAST_FUNC reinit_unicode(const char *LANG) | 69 | void FAST_FUNC reinit_unicode(const char *LANG) |
70 | { | 70 | { |
71 | unicode_status = UNICODE_OFF; | 71 | unicode_status = UNICODE_OFF; |
72 | #if ENABLE_PLATFORM_MINGW32 | ||
73 | /* enable unicode only when ACP is UTF8 and the env var is not 'C' */ | ||
74 | if (GetACP() != CP_UTF8 || (LANG && LANG[0] == 'C' && LANG[1] == 0)) | ||
75 | return; | ||
76 | #else | ||
72 | if (!LANG || !(strstr(LANG, ".utf") || strstr(LANG, ".UTF"))) | 77 | if (!LANG || !(strstr(LANG, ".utf") || strstr(LANG, ".UTF"))) |
73 | return; | 78 | return; |
79 | #endif | ||
74 | unicode_status = UNICODE_ON; | 80 | unicode_status = UNICODE_ON; |
75 | } | 81 | } |
76 | 82 | ||
@@ -653,6 +659,9 @@ int FAST_FUNC wcwidth(unsigned ucs) | |||
653 | { 0x0A38, 0x0A3A }, { 0x0A3F, 0x0A3F }, { 0xD167, 0xD169 }, | 659 | { 0x0A38, 0x0A3A }, { 0x0A3F, 0x0A3F }, { 0xD167, 0xD169 }, |
654 | { 0xD173, 0xD182 }, { 0xD185, 0xD18B }, { 0xD1AA, 0xD1AD }, | 660 | { 0xD173, 0xD182 }, { 0xD185, 0xD18B }, { 0xD1AA, 0xD1AD }, |
655 | { 0xD242, 0xD244 } | 661 | { 0xD242, 0xD244 } |
662 | #if ENABLE_PLATFORM_MINGW32 | ||
663 | , { 0xF3FB, 0xF3FF } | ||
664 | #endif | ||
656 | }; | 665 | }; |
657 | /* Binary search in table of non-spacing characters in Supplementary Multilingual Plane */ | 666 | /* Binary search in table of non-spacing characters in Supplementary Multilingual Plane */ |
658 | if (in_interval_table(ucs ^ 0x10000, combining0x10000, ARRAY_SIZE(combining0x10000) - 1)) | 667 | if (in_interval_table(ucs ^ 0x10000, combining0x10000, ARRAY_SIZE(combining0x10000) - 1)) |
@@ -689,6 +698,11 @@ int FAST_FUNC wcwidth(unsigned ucs) | |||
689 | || (ucs >= 0xff00 && ucs <= 0xff60) /* Fullwidth Forms */ | 698 | || (ucs >= 0xff00 && ucs <= 0xff60) /* Fullwidth Forms */ |
690 | || (ucs >= 0xffe0 && ucs <= 0xffe6) | 699 | || (ucs >= 0xffe0 && ucs <= 0xffe6) |
691 | # endif | 700 | # endif |
701 | #if ENABLE_PLATFORM_MINGW32 | ||
702 | # if CONFIG_LAST_SUPPORTED_WCHAR >= 0x10000 | ||
703 | || (ucs >= 0x1f600 && ucs <= 0x1f64f) /* Emoticons */ | ||
704 | # endif | ||
705 | #endif | ||
692 | # if CONFIG_LAST_SUPPORTED_WCHAR >= 0x20000 | 706 | # if CONFIG_LAST_SUPPORTED_WCHAR >= 0x20000 |
693 | || ((ucs >> 17) == (2 >> 1)) /* 20000..3ffff: Supplementary and Tertiary Ideographic Planes */ | 707 | || ((ucs >> 17) == (2 >> 1)) /* 20000..3ffff: Supplementary and Tertiary Ideographic Planes */ |
694 | # endif | 708 | # endif |
diff --git a/scripts/mk_mingw64u_defconfig b/scripts/mk_mingw64u_defconfig new file mode 100755 index 000000000..3cca78e5b --- /dev/null +++ b/scripts/mk_mingw64u_defconfig | |||
@@ -0,0 +1,35 @@ | |||
1 | #!/bin/sh | ||
2 | |||
3 | configs=$(dirname -- "$0")/../configs | ||
4 | |||
5 | # replace each FOO=bar argument with -e 's/.*FOO.*/FOO=bar/', then sed "$@" | ||
6 | set_build_opts() { | ||
7 | for v; do | ||
8 | set -- "$@" -e "s/.*${v%%=*}.*/$v/" | ||
9 | shift | ||
10 | done | ||
11 | sed "$@" | ||
12 | } | ||
13 | |||
14 | |||
15 | # Create unicode configs/mingw64u_defconfig from configs/mingw64_defconfig | ||
16 | # by flipping some build options to enable: | ||
17 | # - UTF8 manifest to support unicode on win 10 (filenames, etc). | ||
18 | # - UTF8 terminal input (shell prompt, read). | ||
19 | # - UTF8 editing - codepoint awareness (prompt, read): | ||
20 | # - Builtin libc unicode functions (mbstowcs etc - no UNICODE_USING_LOCALE). | ||
21 | # - Dynamic unicode based on ANSI codepage and ENV (CHECK_UNICODE_IN_ENV). | ||
22 | # - Screen-width awareness (COMBINING_WCHARS, WIDE_WCHARS) | ||
23 | # - Full unicode range (U+10FFFF - LAST_SUPPORTED_WCHAR=1114111) | ||
24 | |||
25 | set_build_opts \ | ||
26 | CONFIG_FEATURE_UTF8_MANIFEST=y \ | ||
27 | CONFIG_FEATURE_UTF8_INPUT=y \ | ||
28 | CONFIG_UNICODE_SUPPORT=y \ | ||
29 | CONFIG_FEATURE_CHECK_UNICODE_IN_ENV=y \ | ||
30 | CONFIG_SUBST_WCHAR=63 \ | ||
31 | CONFIG_LAST_SUPPORTED_WCHAR=1114111 \ | ||
32 | CONFIG_UNICODE_COMBINING_WCHARS=y \ | ||
33 | CONFIG_UNICODE_WIDE_WCHARS=y \ | ||
34 | < "$configs"/mingw64_defconfig \ | ||
35 | > "$configs"/mingw64u_defconfig | ||
diff --git a/win32/mingw.c b/win32/mingw.c index 5e9c71226..dabb2a2e7 100644 --- a/win32/mingw.c +++ b/win32/mingw.c | |||
@@ -2119,6 +2119,20 @@ char * FAST_FUNC bs_to_slash(char *str) | |||
2119 | return str; | 2119 | return str; |
2120 | } | 2120 | } |
2121 | 2121 | ||
2122 | #if ENABLE_UNICODE_SUPPORT | ||
2123 | MINGW_BB_WCHAR_T * FAST_FUNC bs_to_slash_u(MINGW_BB_WCHAR_T *str) | ||
2124 | { | ||
2125 | MINGW_BB_WCHAR_T *p; | ||
2126 | |||
2127 | for (p=str; *p; ++p) { | ||
2128 | if ( *p == '\\' ) { | ||
2129 | *p = '/'; | ||
2130 | } | ||
2131 | } | ||
2132 | return str; | ||
2133 | } | ||
2134 | #endif | ||
2135 | |||
2122 | void FAST_FUNC slash_to_bs(char *p) | 2136 | void FAST_FUNC slash_to_bs(char *p) |
2123 | { | 2137 | { |
2124 | for (; *p; ++p) { | 2138 | for (; *p; ++p) { |
diff --git a/win32/winansi.c b/win32/winansi.c index bc3e69163..f280177e6 100644 --- a/win32/winansi.c +++ b/win32/winansi.c | |||
@@ -1284,6 +1284,44 @@ static void maybeEatUpto2ndHalfUp(HANDLE h, DWORD *ph1) | |||
1284 | } | 1284 | } |
1285 | } | 1285 | } |
1286 | 1286 | ||
1287 | // if the codepoint is a key-down event, remember it, else if | ||
1288 | // it's a key-up event with matching prior down - forget the down, | ||
1289 | // else (up without matching prior key-down) - change it to down. | ||
1290 | // We remember few prior key-down events so that a sequence | ||
1291 | // like X-down Y-down X-up Y-up won't trigger this hack for Y-up. | ||
1292 | // When up is changed into down there won't be further key-up event, | ||
1293 | // but that's OK because the caller ignores key-up events anyway. | ||
1294 | static void maybe_change_up_to_down(wchar_t key, BOOL *isdown) | ||
1295 | { | ||
1296 | #define DOWN_BUF_SIZ 8 | ||
1297 | static wchar_t downbuf[DOWN_BUF_SIZ] = {0}; | ||
1298 | static int pos = 0; | ||
1299 | |||
1300 | if (*isdown) { | ||
1301 | downbuf[pos++] = key; | ||
1302 | pos = pos % DOWN_BUF_SIZ; | ||
1303 | return; | ||
1304 | } | ||
1305 | |||
1306 | // the missing-key-down issue was only observed with unicode values, | ||
1307 | // so limit this hack to non-ASCII-7 values. | ||
1308 | // also, launching a new shell/read process from CLI captures | ||
1309 | // an ENTER-up event without prior down at this new process, which | ||
1310 | // would otherwise change it to down - creating a wrong ENTER keypress. | ||
1311 | if (key <= 127) | ||
1312 | return; | ||
1313 | |||
1314 | // key up, try to match a prior down | ||
1315 | for (int i = 0; i < DOWN_BUF_SIZ; ++i) { | ||
1316 | if (downbuf[i] == key) { | ||
1317 | downbuf[i] = 0; // "forget" this down | ||
1318 | return; | ||
1319 | } | ||
1320 | } | ||
1321 | |||
1322 | // no prior key-down - replace the up with down | ||
1323 | *isdown = TRUE; | ||
1324 | } | ||
1287 | 1325 | ||
1288 | /* | 1326 | /* |
1289 | * readConsoleInput_utf8 behaves similar enough to ReadConsoleInputA when | 1327 | * readConsoleInput_utf8 behaves similar enough to ReadConsoleInputA when |
@@ -1355,20 +1393,18 @@ BOOL readConsoleInput_utf8(HANDLE h, INPUT_RECORD *r, DWORD len, DWORD *got) | |||
1355 | srec = *r; | 1393 | srec = *r; |
1356 | codepoint = srec.Event.KeyEvent.uChar.UnicodeChar; | 1394 | codepoint = srec.Event.KeyEvent.uChar.UnicodeChar; |
1357 | 1395 | ||
1358 | // At the cmd.exe console (but not windows terminal) we sometimes | 1396 | // Observed when pasting unicode at cmd.exe console (but not |
1359 | // get key-up without the prior expected key-down event, sometimes | 1397 | // windows terminal), we sometimes get key-up event without |
1360 | // with UnicodeChar of 0 instead the key-down event. work around it. | 1398 | // a prior matching key-down (or with key-down codepoint 0), |
1361 | if (codepoint) { | 1399 | // so this call would change the up into down in such case. |
1362 | static wchar_t last_down = 0; | 1400 | // E.g. pastes fixed by this hack: U+1F600 "😀", or U+0C80 "ಀ" |
1363 | 1401 | if (codepoint) | |
1364 | if (srec.Event.KeyEvent.bKeyDown) | 1402 | maybe_change_up_to_down(codepoint, &srec.Event.KeyEvent.bKeyDown); |
1365 | last_down = codepoint; | ||
1366 | else if (codepoint > 127 && codepoint != last_down) | ||
1367 | srec.Event.KeyEvent.bKeyDown = TRUE; | ||
1368 | } | ||
1369 | 1403 | ||
1370 | // if it's a 1st (high) surrogate pair half, try to eat upto and | 1404 | // if it's a 1st (high) surrogate pair half, try to eat upto and |
1371 | // excluding the 2nd (low) half, and combine them into codepoint. | 1405 | // excluding the 2nd (low) half, and combine them into codepoint. |
1406 | // this does not interfere with the missing-key-down workaround | ||
1407 | // (no issue if the down-buffer has 1st-half-down without up). | ||
1372 | if (codepoint >= 0xD800 && codepoint <= 0xDBFF) | 1408 | if (codepoint >= 0xD800 && codepoint <= 0xDBFF) |
1373 | maybeEatUpto2ndHalfUp(h, &codepoint); | 1409 | maybeEatUpto2ndHalfUp(h, &codepoint); |
1374 | 1410 | ||