aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRon Yorston <rmy@pobox.com>2023-07-23 12:20:42 +0000
committerGitHub <noreply@github.com>2023-07-23 12:20:42 +0000
commita5b78ff089a28651282d765349ede783b1a80fa9 (patch)
tree2cd4cc741e2ee38c5100a6ddea63451ccc278182
parent72b97c86c6c1a1902d6dcda3da7c38db13585cdc (diff)
parent878b3cd27fe83f2b0ff476b884c34d165be0072c (diff)
downloadbusybox-w32-a5b78ff089a28651282d765349ede783b1a80fa9.tar.gz
busybox-w32-a5b78ff089a28651282d765349ede783b1a80fa9.tar.bz2
busybox-w32-a5b78ff089a28651282d765349ede783b1a80fa9.zip
Merge pull request #340 from avih/win32-unicode-editing
Win32: support unicode editing
-rw-r--r--include/mingw.h12
-rw-r--r--include/unicode.h15
-rw-r--r--libbb/lineedit.c24
-rw-r--r--libbb/unicode.c14
-rwxr-xr-xscripts/mk_mingw64u_defconfig35
-rw-r--r--win32/mingw.c14
-rw-r--r--win32/winansi.c58
7 files changed, 160 insertions, 12 deletions
diff --git a/include/mingw.h b/include/mingw.h
index 232ffadd7..97db2f6a9 100644
--- a/include/mingw.h
+++ b/include/mingw.h
@@ -586,6 +586,18 @@ char *alloc_ext_space(const char *path);
586int add_win32_extension(char *p); 586int add_win32_extension(char *p);
587char *file_is_win32_exe(const char *name); 587char *file_is_win32_exe(const char *name);
588 588
589#if ENABLE_UNICODE_SUPPORT
590/*
591 * windows wchar_t is 16 bit, while linux (and busybox expectation) is 32.
592 * so when (busybox) unicode.h is included, wchar_t is 32 bit.
593 * Without unicode.h, MINGW_BB_WCHAR_T is busybox wide char (32),
594 * and wchar_t is Windows wide char (16).
595 */
596#define MINGW_BB_WCHAR_T uint32_t /* keep in sync with unicode.h */
597
598MINGW_BB_WCHAR_T *bs_to_slash_u(MINGW_BB_WCHAR_T *p) FAST_FUNC;
599#endif
600
589char *bs_to_slash(char *p) FAST_FUNC; 601char *bs_to_slash(char *p) FAST_FUNC;
590void slash_to_bs(char *p) FAST_FUNC; 602void slash_to_bs(char *p) FAST_FUNC;
591size_t remove_cr(char *p, size_t len) FAST_FUNC; 603size_t remove_cr(char *p, size_t len) FAST_FUNC;
diff --git a/include/unicode.h b/include/unicode.h
index 0317a2151..e894f7148 100644
--- a/include/unicode.h
+++ b/include/unicode.h
@@ -87,6 +87,21 @@ void reinit_unicode(const char *LANG) FAST_FUNC;
87# undef MB_CUR_MAX 87# undef MB_CUR_MAX
88# define MB_CUR_MAX 6 88# define MB_CUR_MAX 6
89 89
90#if ENABLE_PLATFORM_MINGW32
91 #undef wint_t
92 #undef mbstate_t
93 #undef mbstowcs
94 #undef wcstombs
95 #undef wcrtomb
96 #undef iswspace
97 #undef iswalnum
98 #undef iswpunct
99 #undef wcwidth
100
101 #undef wchar_t
102 #define wchar_t uint32_t /* keep in sync with MINGW_BB_WCHAR_T */
103#endif
104
90/* Prevent name collisions */ 105/* Prevent name collisions */
91# define wint_t bb_wint_t 106# define wint_t bb_wint_t
92# define mbstate_t bb_mbstate_t 107# define mbstate_t bb_mbstate_t
diff --git a/libbb/lineedit.c b/libbb/lineedit.c
index a6884c7e0..54f0edef0 100644
--- a/libbb/lineedit.c
+++ b/libbb/lineedit.c
@@ -726,8 +726,15 @@ static void input_forward(void)
726#if !ENABLE_PLATFORM_MINGW32 726#if !ENABLE_PLATFORM_MINGW32
727 put_cur_glyph_and_inc_cursor(); 727 put_cur_glyph_and_inc_cursor();
728#else 728#else
729 /*
730 * inc_cursor improves forward cursor movement appearance on
731 * win 7/8 console, but it's broken with unicode wide-glyphs,
732 * e.g. paste and move forward over: echo 开开心心过每一天
733 * so disable inc_cursor when unicode is active (which is only
734 * windows 10+, where inc_cursor is not needed anyway).
735 */
729 { 736 {
730 if (terminal_mode(FALSE) & VT_INPUT) 737 if (unicode_status == UNICODE_ON)
731 put_cur_glyph_and_inc_cursor(); 738 put_cur_glyph_and_inc_cursor();
732 else 739 else
733 inc_cursor(); 740 inc_cursor();
@@ -770,6 +777,11 @@ static void add_match(char *matched, int sensitive)
770 || (!ENABLE_UNICODE_SUPPORT && *p >= 0x7f) 777 || (!ENABLE_UNICODE_SUPPORT && *p >= 0x7f)
771 || (ENABLE_UNICODE_SUPPORT && *p == 0x7f) 778 || (ENABLE_UNICODE_SUPPORT && *p == 0x7f)
772# else 779# else
780 /*
781 * on Windows, *p > 0x7f is never control:
782 * without unicode active: these are normal codepage chars.
783 * with unicode active: these are UTF8 continuation bytes.
784 */
773 || *p == 0x7f 785 || *p == 0x7f
774# endif 786# endif
775 ) { 787 ) {
@@ -1318,6 +1330,12 @@ static NOINLINE void input_tab(smallint *lastWasTab)
1318# if ENABLE_PLATFORM_MINGW32 1330# if ENABLE_PLATFORM_MINGW32
1319 int chosen_index = 0; 1331 int chosen_index = 0;
1320 int chosen_sens = FALSE; 1332 int chosen_sens = FALSE;
1333 /*
1334 * FIXME: the next three vars are unused with ENABLE_UNICODE_SUPPORT
1335 * because the mingw code which uses them to update a tab-completion
1336 * prefix to the correct case (e.g. ~/desk<tab> to ~/Desktop/) is
1337 * not compiled, and so e.g. ~/desk<tab> completes to ~/desktop/ .
1338 */
1321 unsigned orig_pfx_len; 1339 unsigned orig_pfx_len;
1322 char *target; 1340 char *target;
1323 const char *source; 1341 const char *source;
@@ -2803,7 +2821,11 @@ int FAST_FUNC read_line_input(line_input_t *st, const char *prompt, char *comman
2803#if ENABLE_PLATFORM_MINGW32 2821#if ENABLE_PLATFORM_MINGW32
2804 case CTRL('Z'): 2822 case CTRL('Z'):
2805 command_ps[command_len] = '\0'; 2823 command_ps[command_len] = '\0';
2824 #if ENABLE_UNICODE_SUPPORT
2825 bs_to_slash_u(command_ps);
2826 #else
2806 bs_to_slash(command_ps); 2827 bs_to_slash(command_ps);
2828 #endif
2807 redraw(cmdedit_y, 0); 2829 redraw(cmdedit_y, 0);
2808 break; 2830 break;
2809#endif 2831#endif
diff --git a/libbb/unicode.c b/libbb/unicode.c
index e98cbbf35..206ec0dcb 100644
--- a/libbb/unicode.c
+++ b/libbb/unicode.c
@@ -69,8 +69,14 @@ void FAST_FUNC init_unicode(void)
69void FAST_FUNC reinit_unicode(const char *LANG) 69void FAST_FUNC reinit_unicode(const char *LANG)
70{ 70{
71 unicode_status = UNICODE_OFF; 71 unicode_status = UNICODE_OFF;
72#if ENABLE_PLATFORM_MINGW32
73 /* enable unicode only when ACP is UTF8 and the env var is not 'C' */
74 if (GetACP() != CP_UTF8 || (LANG && LANG[0] == 'C' && LANG[1] == 0))
75 return;
76#else
72 if (!LANG || !(strstr(LANG, ".utf") || strstr(LANG, ".UTF"))) 77 if (!LANG || !(strstr(LANG, ".utf") || strstr(LANG, ".UTF")))
73 return; 78 return;
79#endif
74 unicode_status = UNICODE_ON; 80 unicode_status = UNICODE_ON;
75} 81}
76 82
@@ -653,6 +659,9 @@ int FAST_FUNC wcwidth(unsigned ucs)
653 { 0x0A38, 0x0A3A }, { 0x0A3F, 0x0A3F }, { 0xD167, 0xD169 }, 659 { 0x0A38, 0x0A3A }, { 0x0A3F, 0x0A3F }, { 0xD167, 0xD169 },
654 { 0xD173, 0xD182 }, { 0xD185, 0xD18B }, { 0xD1AA, 0xD1AD }, 660 { 0xD173, 0xD182 }, { 0xD185, 0xD18B }, { 0xD1AA, 0xD1AD },
655 { 0xD242, 0xD244 } 661 { 0xD242, 0xD244 }
662#if ENABLE_PLATFORM_MINGW32
663 , { 0xF3FB, 0xF3FF }
664#endif
656 }; 665 };
657 /* Binary search in table of non-spacing characters in Supplementary Multilingual Plane */ 666 /* Binary search in table of non-spacing characters in Supplementary Multilingual Plane */
658 if (in_interval_table(ucs ^ 0x10000, combining0x10000, ARRAY_SIZE(combining0x10000) - 1)) 667 if (in_interval_table(ucs ^ 0x10000, combining0x10000, ARRAY_SIZE(combining0x10000) - 1))
@@ -689,6 +698,11 @@ int FAST_FUNC wcwidth(unsigned ucs)
689 || (ucs >= 0xff00 && ucs <= 0xff60) /* Fullwidth Forms */ 698 || (ucs >= 0xff00 && ucs <= 0xff60) /* Fullwidth Forms */
690 || (ucs >= 0xffe0 && ucs <= 0xffe6) 699 || (ucs >= 0xffe0 && ucs <= 0xffe6)
691# endif 700# endif
701#if ENABLE_PLATFORM_MINGW32
702# if CONFIG_LAST_SUPPORTED_WCHAR >= 0x10000
703 || (ucs >= 0x1f600 && ucs <= 0x1f64f) /* Emoticons */
704# endif
705#endif
692# if CONFIG_LAST_SUPPORTED_WCHAR >= 0x20000 706# if CONFIG_LAST_SUPPORTED_WCHAR >= 0x20000
693 || ((ucs >> 17) == (2 >> 1)) /* 20000..3ffff: Supplementary and Tertiary Ideographic Planes */ 707 || ((ucs >> 17) == (2 >> 1)) /* 20000..3ffff: Supplementary and Tertiary Ideographic Planes */
694# endif 708# endif
diff --git a/scripts/mk_mingw64u_defconfig b/scripts/mk_mingw64u_defconfig
new file mode 100755
index 000000000..3cca78e5b
--- /dev/null
+++ b/scripts/mk_mingw64u_defconfig
@@ -0,0 +1,35 @@
1#!/bin/sh
2
3configs=$(dirname -- "$0")/../configs
4
5# replace each FOO=bar argument with -e 's/.*FOO.*/FOO=bar/', then sed "$@"
6set_build_opts() {
7 for v; do
8 set -- "$@" -e "s/.*${v%%=*}.*/$v/"
9 shift
10 done
11 sed "$@"
12}
13
14
15# Create unicode configs/mingw64u_defconfig from configs/mingw64_defconfig
16# by flipping some build options to enable:
17# - UTF8 manifest to support unicode on win 10 (filenames, etc).
18# - UTF8 terminal input (shell prompt, read).
19# - UTF8 editing - codepoint awareness (prompt, read):
20# - Builtin libc unicode functions (mbstowcs etc - no UNICODE_USING_LOCALE).
21# - Dynamic unicode based on ANSI codepage and ENV (CHECK_UNICODE_IN_ENV).
22# - Screen-width awareness (COMBINING_WCHARS, WIDE_WCHARS)
23# - Full unicode range (U+10FFFF - LAST_SUPPORTED_WCHAR=1114111)
24
25set_build_opts \
26 CONFIG_FEATURE_UTF8_MANIFEST=y \
27 CONFIG_FEATURE_UTF8_INPUT=y \
28 CONFIG_UNICODE_SUPPORT=y \
29 CONFIG_FEATURE_CHECK_UNICODE_IN_ENV=y \
30 CONFIG_SUBST_WCHAR=63 \
31 CONFIG_LAST_SUPPORTED_WCHAR=1114111 \
32 CONFIG_UNICODE_COMBINING_WCHARS=y \
33 CONFIG_UNICODE_WIDE_WCHARS=y \
34 < "$configs"/mingw64_defconfig \
35 > "$configs"/mingw64u_defconfig
diff --git a/win32/mingw.c b/win32/mingw.c
index 5e9c71226..dabb2a2e7 100644
--- a/win32/mingw.c
+++ b/win32/mingw.c
@@ -2119,6 +2119,20 @@ char * FAST_FUNC bs_to_slash(char *str)
2119 return str; 2119 return str;
2120} 2120}
2121 2121
2122#if ENABLE_UNICODE_SUPPORT
2123MINGW_BB_WCHAR_T * FAST_FUNC bs_to_slash_u(MINGW_BB_WCHAR_T *str)
2124{
2125 MINGW_BB_WCHAR_T *p;
2126
2127 for (p=str; *p; ++p) {
2128 if ( *p == '\\' ) {
2129 *p = '/';
2130 }
2131 }
2132 return str;
2133}
2134#endif
2135
2122void FAST_FUNC slash_to_bs(char *p) 2136void FAST_FUNC slash_to_bs(char *p)
2123{ 2137{
2124 for (; *p; ++p) { 2138 for (; *p; ++p) {
diff --git a/win32/winansi.c b/win32/winansi.c
index bc3e69163..f280177e6 100644
--- a/win32/winansi.c
+++ b/win32/winansi.c
@@ -1284,6 +1284,44 @@ static void maybeEatUpto2ndHalfUp(HANDLE h, DWORD *ph1)
1284 } 1284 }
1285} 1285}
1286 1286
1287// if the codepoint is a key-down event, remember it, else if
1288// it's a key-up event with matching prior down - forget the down,
1289// else (up without matching prior key-down) - change it to down.
1290// We remember few prior key-down events so that a sequence
1291// like X-down Y-down X-up Y-up won't trigger this hack for Y-up.
1292// When up is changed into down there won't be further key-up event,
1293// but that's OK because the caller ignores key-up events anyway.
1294static void maybe_change_up_to_down(wchar_t key, BOOL *isdown)
1295{
1296 #define DOWN_BUF_SIZ 8
1297 static wchar_t downbuf[DOWN_BUF_SIZ] = {0};
1298 static int pos = 0;
1299
1300 if (*isdown) {
1301 downbuf[pos++] = key;
1302 pos = pos % DOWN_BUF_SIZ;
1303 return;
1304 }
1305
1306 // the missing-key-down issue was only observed with unicode values,
1307 // so limit this hack to non-ASCII-7 values.
1308 // also, launching a new shell/read process from CLI captures
1309 // an ENTER-up event without prior down at this new process, which
1310 // would otherwise change it to down - creating a wrong ENTER keypress.
1311 if (key <= 127)
1312 return;
1313
1314 // key up, try to match a prior down
1315 for (int i = 0; i < DOWN_BUF_SIZ; ++i) {
1316 if (downbuf[i] == key) {
1317 downbuf[i] = 0; // "forget" this down
1318 return;
1319 }
1320 }
1321
1322 // no prior key-down - replace the up with down
1323 *isdown = TRUE;
1324}
1287 1325
1288/* 1326/*
1289 * readConsoleInput_utf8 behaves similar enough to ReadConsoleInputA when 1327 * readConsoleInput_utf8 behaves similar enough to ReadConsoleInputA when
@@ -1355,20 +1393,18 @@ BOOL readConsoleInput_utf8(HANDLE h, INPUT_RECORD *r, DWORD len, DWORD *got)
1355 srec = *r; 1393 srec = *r;
1356 codepoint = srec.Event.KeyEvent.uChar.UnicodeChar; 1394 codepoint = srec.Event.KeyEvent.uChar.UnicodeChar;
1357 1395
1358 // At the cmd.exe console (but not windows terminal) we sometimes 1396 // Observed when pasting unicode at cmd.exe console (but not
1359 // get key-up without the prior expected key-down event, sometimes 1397 // windows terminal), we sometimes get key-up event without
1360 // with UnicodeChar of 0 instead the key-down event. work around it. 1398 // a prior matching key-down (or with key-down codepoint 0),
1361 if (codepoint) { 1399 // so this call would change the up into down in such case.
1362 static wchar_t last_down = 0; 1400 // E.g. pastes fixed by this hack: U+1F600 "😀", or U+0C80 "ಀ"
1363 1401 if (codepoint)
1364 if (srec.Event.KeyEvent.bKeyDown) 1402 maybe_change_up_to_down(codepoint, &srec.Event.KeyEvent.bKeyDown);
1365 last_down = codepoint;
1366 else if (codepoint > 127 && codepoint != last_down)
1367 srec.Event.KeyEvent.bKeyDown = TRUE;
1368 }
1369 1403
1370 // if it's a 1st (high) surrogate pair half, try to eat upto and 1404 // if it's a 1st (high) surrogate pair half, try to eat upto and
1371 // excluding the 2nd (low) half, and combine them into codepoint. 1405 // excluding the 2nd (low) half, and combine them into codepoint.
1406 // this does not interfere with the missing-key-down workaround
1407 // (no issue if the down-buffer has 1st-half-down without up).
1372 if (codepoint >= 0xD800 && codepoint <= 0xDBFF) 1408 if (codepoint >= 0xD800 && codepoint <= 0xDBFF)
1373 maybeEatUpto2ndHalfUp(h, &codepoint); 1409 maybeEatUpto2ndHalfUp(h, &codepoint);
1374 1410