Merge pull request #340 from avih/win32-unicode-editing

Win32: support unicode editing
author: Ron Yorston <rmy@pobox.com> 2023-07-23 12:20:42 +0000
committer: GitHub <noreply@github.com> 2023-07-23 12:20:42 +0000
commit: a5b78ff089a28651282d765349ede783b1a80fa9 (patch)
tree: 2cd4cc741e2ee38c5100a6ddea63451ccc278182
parent: 72b97c86c6c1a1902d6dcda3da7c38db13585cdc (diff)
parent: 878b3cd27fe83f2b0ff476b884c34d165be0072c (diff)
download: busybox-w32-a5b78ff089a28651282d765349ede783b1a80fa9.tar.gz
busybox-w32-a5b78ff089a28651282d765349ede783b1a80fa9.tar.bz2
busybox-w32-a5b78ff089a28651282d765349ede783b1a80fa9.zip
7 files changed, 160 insertions, 12 deletions
diff --git a/include/mingw.h b/include/mingw.h
index 232ffadd7..97db2f6a9 100644
--- a/include/mingw.h
+++ b/include/mingw.h
@@ -586,6 +586,18 @@ char *alloc_ext_space(const char *path);
 int add_win32_extension(char *p);
 char *file_is_win32_exe(const char *name);
+#if ENABLE_UNICODE_SUPPORT
+/*
+ * windows wchar_t is 16 bit, while linux (and busybox expectation) is 32.
+ * so when (busybox) unicode.h is included, wchar_t is 32 bit.
+ * Without unicode.h, MINGW_BB_WCHAR_T is busybox wide char (32),
+ * and wchar_t is Windows wide char (16).
+ */
+#define MINGW_BB_WCHAR_T uint32_t  /* keep in sync with unicode.h */
+MINGW_BB_WCHAR_T *bs_to_slash_u(MINGW_BB_WCHAR_T *p) FAST_FUNC;
+#endif
 char *bs_to_slash(char *p) FAST_FUNC;
 void slash_to_bs(char *p) FAST_FUNC;
 size_t remove_cr(char *p, size_t len) FAST_FUNC;
diff --git a/include/unicode.h b/include/unicode.h
index 0317a2151..e894f7148 100644
--- a/include/unicode.h
+++ b/include/unicode.h
@@ -87,6 +87,21 @@ void reinit_unicode(const char *LANG) FAST_FUNC;
 #  undef MB_CUR_MAX
 #  define MB_CUR_MAX 6
+#if ENABLE_PLATFORM_MINGW32
+  #undef wint_t
+  #undef mbstate_t
+  #undef mbstowcs
+  #undef wcstombs
+  #undef wcrtomb
+  #undef iswspace
+  #undef iswalnum
+  #undef iswpunct
+  #undef wcwidth
+  #undef wchar_t
+  #define wchar_t uint32_t  /* keep in sync with MINGW_BB_WCHAR_T */
+#endif
 /* Prevent name collisions */
 #  define wint_t    bb_wint_t
 #  define mbstate_t bb_mbstate_t
diff --git a/libbb/lineedit.c b/libbb/lineedit.c
index a6884c7e0..54f0edef0 100644
--- a/libbb/lineedit.c
+++ b/libbb/lineedit.c
@@ -726,8 +726,15 @@ static void input_forward(void)
 #if !ENABLE_PLATFORM_MINGW32
                put_cur_glyph_and_inc_cursor();
 #else
+        /*
+         * inc_cursor improves forward cursor movement appearance on
+         * win 7/8 console, but it's broken with unicode wide-glyphs,
+         * e.g. paste and move forward over: echo 开开心心过每一天
+         * so disable inc_cursor when unicode is active (which is only
+         * windows 10+, where inc_cursor is not needed anyway).
+         */
        {
-                if (terminal_mode(FALSE) & VT_INPUT)
+                if (unicode_status == UNICODE_ON)
                        put_cur_glyph_and_inc_cursor();
                else
                        inc_cursor();
@@ -770,6 +777,11 @@ static void add_match(char *matched, int sensitive)
                 || (!ENABLE_UNICODE_SUPPORT && *p >= 0x7f)
                 || (ENABLE_UNICODE_SUPPORT && *p == 0x7f)
 # else
+                /*
+                 * on Windows, *p > 0x7f is never control:
+                 * without unicode active: these are normal codepage chars.
+                 * with unicode active: these are UTF8 continuation bytes.
+                 */
                 || *p == 0x7f
 # endif
                ) {
@@ -1318,6 +1330,12 @@ static NOINLINE void input_tab(smallint *lastWasTab)
 # if ENABLE_PLATFORM_MINGW32
        int chosen_index = 0;
        int chosen_sens = FALSE;
+        /*
+         * FIXME: the next three vars are unused with ENABLE_UNICODE_SUPPORT
+         * because the mingw code which uses them to update a tab-completion
+         * prefix to the correct case (e.g. ~/desk<tab> to ~/Desktop/) is
+         * not compiled, and so e.g. ~/desk<tab> completes to ~/desktop/ .
+         */
        unsigned orig_pfx_len;
        char *target;
        const char *source;
@@ -2803,7 +2821,11 @@ int FAST_FUNC read_line_input(line_input_t *st, const char *prompt, char *comman
 #if ENABLE_PLATFORM_MINGW32
                case CTRL('Z'):
                        command_ps[command_len] = '\0';
+                #if ENABLE_UNICODE_SUPPORT
+                        bs_to_slash_u(command_ps);
+                #else
                        bs_to_slash(command_ps);
+                #endif
                        redraw(cmdedit_y, 0);
                        break;
 #endif
diff --git a/libbb/unicode.c b/libbb/unicode.c
index e98cbbf35..206ec0dcb 100644
--- a/libbb/unicode.c
+++ b/libbb/unicode.c
@@ -69,8 +69,14 @@ void FAST_FUNC init_unicode(void)
 void FAST_FUNC reinit_unicode(const char *LANG)
 {
        unicode_status = UNICODE_OFF;
+#if ENABLE_PLATFORM_MINGW32
+        /* enable unicode only when ACP is UTF8 and the env var is not 'C' */
+        if (GetACP() != CP_UTF8 || (LANG && LANG[0] == 'C' && LANG[1] == 0))
+                return;
+#else
        if (!LANG || !(strstr(LANG, ".utf") || strstr(LANG, ".UTF")))
                return;
+#endif
        unicode_status = UNICODE_ON;
 }
@@ -653,6 +659,9 @@ int FAST_FUNC wcwidth(unsigned ucs)
                        { 0x0A38, 0x0A3A }, { 0x0A3F, 0x0A3F }, { 0xD167, 0xD169 },
                        { 0xD173, 0xD182 }, { 0xD185, 0xD18B }, { 0xD1AA, 0xD1AD },
                        { 0xD242, 0xD244 }
+#if ENABLE_PLATFORM_MINGW32
+                        , { 0xF3FB, 0xF3FF }
+#endif
                };
                /* Binary search in table of non-spacing characters in Supplementary Multilingual Plane */
                if (in_interval_table(ucs ^ 0x10000, combining0x10000, ARRAY_SIZE(combining0x10000) - 1))
@@ -689,6 +698,11 @@ int FAST_FUNC wcwidth(unsigned ucs)
                || (ucs >= 0xff00 && ucs <= 0xff60) /* Fullwidth Forms */
                || (ucs >= 0xffe0 && ucs <= 0xffe6)
 #   endif
+#if ENABLE_PLATFORM_MINGW32
+#   if CONFIG_LAST_SUPPORTED_WCHAR >= 0x10000
+                || (ucs >= 0x1f600 && ucs <= 0x1f64f) /* Emoticons */
+#   endif
+#endif
 #   if CONFIG_LAST_SUPPORTED_WCHAR >= 0x20000
                || ((ucs >> 17) == (2 >> 1)) /* 20000..3ffff: Supplementary and Tertiary Ideographic Planes */
 #   endif
diff --git a/scripts/mk_mingw64u_defconfig b/scripts/mk_mingw64u_defconfig
new file mode 100755
index 000000000..3cca78e5b
--- /dev/null
+++ b/scripts/mk_mingw64u_defconfig
@@ -0,0 +1,35 @@
+#!/bin/sh
+configs=$(dirname -- "$0")/../configs
+# replace each FOO=bar argument with -e 's/.*FOO.*/FOO=bar/', then sed "$@"
+set_build_opts() {
+    for v; do
+        set -- "$@" -e "s/.*${v%%=*}.*/$v/"
+        shift
+    done
+    sed "$@"
+}
+# Create unicode configs/mingw64u_defconfig from configs/mingw64_defconfig
+# by flipping some build options to enable:
+# - UTF8 manifest to support unicode on win 10 (filenames, etc).
+# - UTF8 terminal input (shell prompt, read).
+# - UTF8 editing - codepoint awareness (prompt, read):
+#   - Builtin libc unicode functions (mbstowcs etc - no UNICODE_USING_LOCALE).
+#   - Dynamic unicode based on ANSI codepage and ENV (CHECK_UNICODE_IN_ENV).
+#   - Screen-width awareness (COMBINING_WCHARS, WIDE_WCHARS)
+#   - Full unicode range (U+10FFFF - LAST_SUPPORTED_WCHAR=1114111)
+set_build_opts \
+    CONFIG_FEATURE_UTF8_MANIFEST=y \
+    CONFIG_FEATURE_UTF8_INPUT=y \
+    CONFIG_UNICODE_SUPPORT=y \
+    CONFIG_FEATURE_CHECK_UNICODE_IN_ENV=y \
+    CONFIG_SUBST_WCHAR=63 \
+    CONFIG_LAST_SUPPORTED_WCHAR=1114111 \
+    CONFIG_UNICODE_COMBINING_WCHARS=y \
+    CONFIG_UNICODE_WIDE_WCHARS=y \
+    < "$configs"/mingw64_defconfig \
+    > "$configs"/mingw64u_defconfig
diff --git a/win32/mingw.c b/win32/mingw.c
index 5e9c71226..dabb2a2e7 100644
--- a/win32/mingw.c
+++ b/win32/mingw.c
@@ -2119,6 +2119,20 @@ char * FAST_FUNC bs_to_slash(char *str)
        return str;
 }
+#if ENABLE_UNICODE_SUPPORT
+MINGW_BB_WCHAR_T * FAST_FUNC bs_to_slash_u(MINGW_BB_WCHAR_T *str)
+{
+        MINGW_BB_WCHAR_T *p;
+        for (p=str; *p; ++p) {
+                if ( *p == '\\' ) {
+                        *p = '/';
+                }
+        }
+        return str;
+}
+#endif
 void FAST_FUNC slash_to_bs(char *p)
 {
        for (; *p; ++p) {
diff --git a/win32/winansi.c b/win32/winansi.c
index bc3e69163..f280177e6 100644
--- a/win32/winansi.c
+++ b/win32/winansi.c
@@ -1284,6 +1284,44 @@ static void maybeEatUpto2ndHalfUp(HANDLE h, DWORD *ph1)
        }
 }
+// if the codepoint is a key-down event, remember it, else if
+// it's a key-up event with matching prior down - forget the down,
+// else (up without matching prior key-down) - change it to down.
+// We remember few prior key-down events so that a sequence
+// like X-down Y-down X-up Y-up won't trigger this hack for Y-up.
+// When up is changed into down there won't be further key-up event,
+// but that's OK because the caller ignores key-up events anyway.
+static void maybe_change_up_to_down(wchar_t key, BOOL *isdown)
+{
+        #define DOWN_BUF_SIZ 8
+        static wchar_t downbuf[DOWN_BUF_SIZ] = {0};
+        static int pos = 0;
+        if (*isdown) {
+                downbuf[pos++] = key;
+                pos = pos % DOWN_BUF_SIZ;
+                return;
+        }
+        // the missing-key-down issue was only observed with unicode values,
+        // so limit this hack to non-ASCII-7 values.
+        // also, launching a new shell/read process from CLI captures
+        // an ENTER-up event without prior down at this new process, which
+        // would otherwise change it to down - creating a wrong ENTER keypress.
+        if (key <= 127)
+                return;
+        // key up, try to match a prior down
+        for (int i = 0; i < DOWN_BUF_SIZ; ++i) {
+                if (downbuf[i] == key) {
+                        downbuf[i] = 0;  // "forget" this down
+                        return;
+                }
+        }
+        // no prior key-down - replace the up with down
+        *isdown = TRUE;
+}
 /*
 * readConsoleInput_utf8 behaves similar enough to ReadConsoleInputA when
@@ -1355,20 +1393,18 @@ BOOL readConsoleInput_utf8(HANDLE h, INPUT_RECORD *r, DWORD len, DWORD *got)
                srec = *r;
                codepoint = srec.Event.KeyEvent.uChar.UnicodeChar;
-                // At the cmd.exe console (but not windows terminal) we sometimes
+                // Observed when pasting unicode at cmd.exe console (but not
-                // get key-up without the prior expected key-down event, sometimes
+                // windows terminal), we sometimes get key-up event without
-                // with UnicodeChar of 0 instead the key-down event. work around it.
+                // a prior matching key-down (or with key-down codepoint 0),
-                if (codepoint) {
+                // so this call would change the up into down in such case.
-                        static wchar_t last_down = 0;
+                // E.g. pastes fixed by this hack: U+1F600 "😀", or U+0C80 "ಀ"
+                if (codepoint)
-                        if (srec.Event.KeyEvent.bKeyDown)
+                        maybe_change_up_to_down(codepoint, &srec.Event.KeyEvent.bKeyDown);
-                                last_down = codepoint;
-                        else if (codepoint > 127 && codepoint != last_down)
-                                srec.Event.KeyEvent.bKeyDown = TRUE;
-                }
                // if it's a 1st (high) surrogate pair half, try to eat upto and
                // excluding the 2nd (low) half, and combine them into codepoint.
+                // this does not interfere with the missing-key-down workaround
+                // (no issue if the down-buffer has 1st-half-down without up).
                if (codepoint >= 0xD800 && codepoint <= 0xDBFF)
                        maybeEatUpto2ndHalfUp(h, &codepoint);
author	Ron Yorston <rmy@pobox.com>	2023-07-23 12:20:42 +0000
committer	GitHub <noreply@github.com>	2023-07-23 12:20:42 +0000
commit	a5b78ff089a28651282d765349ede783b1a80fa9 (patch)
tree	2cd4cc741e2ee38c5100a6ddea63451ccc278182
parent	72b97c86c6c1a1902d6dcda3da7c38db13585cdc (diff)
parent	878b3cd27fe83f2b0ff476b884c34d165be0072c (diff)
download	busybox-w32-a5b78ff089a28651282d765349ede783b1a80fa9.tar.gz busybox-w32-a5b78ff089a28651282d765349ede783b1a80fa9.tar.bz2 busybox-w32-a5b78ff089a28651282d765349ede783b1a80fa9.zip

diff --git a/include/mingw.h b/include/mingw.h index 232ffadd7..97db2f6a9 100644 --- a/include/mingw.h +++ b/include/mingw.h
@@ -586,6 +586,18 @@ char alloc_ext_space(const char path);
586	int add_win32_extension(char *p);	586	int add_win32_extension(char *p);
587	char file_is_win32_exe(const char name);	587	char file_is_win32_exe(const char name);
588		588
		589	#if ENABLE_UNICODE_SUPPORT
		590	/*
		591	* windows wchar_t is 16 bit, while linux (and busybox expectation) is 32.
		592	* so when (busybox) unicode.h is included, wchar_t is 32 bit.
		593	* Without unicode.h, MINGW_BB_WCHAR_T is busybox wide char (32),
		594	* and wchar_t is Windows wide char (16).
		595	*/
		596	#define MINGW_BB_WCHAR_T uint32_t /* keep in sync with unicode.h */
		597
		598	MINGW_BB_WCHAR_T bs_to_slash_u(MINGW_BB_WCHAR_T p) FAST_FUNC;
		599	#endif
		600
589	char bs_to_slash(char p) FAST_FUNC;	601	char bs_to_slash(char p) FAST_FUNC;
590	void slash_to_bs(char *p) FAST_FUNC;	602	void slash_to_bs(char *p) FAST_FUNC;
591	size_t remove_cr(char *p, size_t len) FAST_FUNC;	603	size_t remove_cr(char *p, size_t len) FAST_FUNC;


diff --git a/include/unicode.h b/include/unicode.h index 0317a2151..e894f7148 100644 --- a/include/unicode.h +++ b/include/unicode.h
@@ -87,6 +87,21 @@ void reinit_unicode(const char *LANG) FAST_FUNC;
87	# undef MB_CUR_MAX	87	# undef MB_CUR_MAX
88	# define MB_CUR_MAX 6	88	# define MB_CUR_MAX 6
89		89
		90	#if ENABLE_PLATFORM_MINGW32
		91	#undef wint_t
		92	#undef mbstate_t
		93	#undef mbstowcs
		94	#undef wcstombs
		95	#undef wcrtomb
		96	#undef iswspace
		97	#undef iswalnum
		98	#undef iswpunct
		99	#undef wcwidth
		100
		101	#undef wchar_t
		102	#define wchar_t uint32_t /* keep in sync with MINGW_BB_WCHAR_T */
		103	#endif
		104
90	/* Prevent name collisions */	105	/* Prevent name collisions */
91	# define wint_t bb_wint_t	106	# define wint_t bb_wint_t
92	# define mbstate_t bb_mbstate_t	107	# define mbstate_t bb_mbstate_t


diff --git a/libbb/lineedit.c b/libbb/lineedit.c index a6884c7e0..54f0edef0 100644 --- a/libbb/lineedit.c +++ b/libbb/lineedit.c
@@ -726,8 +726,15 @@ static void input_forward(void)
726	#if !ENABLE_PLATFORM_MINGW32	726	#if !ENABLE_PLATFORM_MINGW32
727	put_cur_glyph_and_inc_cursor();	727	put_cur_glyph_and_inc_cursor();
728	#else	728	#else
		729	/*
		730	* inc_cursor improves forward cursor movement appearance on
		731	* win 7/8 console, but it's broken with unicode wide-glyphs,
		732	* e.g. paste and move forward over: echo 开开心心过每一天
		733	* so disable inc_cursor when unicode is active (which is only
		734	* windows 10+, where inc_cursor is not needed anyway).
		735	*/
729	{	736	{
730	if (terminal_mode(FALSE) & VT_INPUT)	737	if (unicode_status == UNICODE_ON)
731	put_cur_glyph_and_inc_cursor();	738	put_cur_glyph_and_inc_cursor();
732	else	739	else
733	inc_cursor();	740	inc_cursor();
@@ -770,6 +777,11 @@ static void add_match(char *matched, int sensitive)
770	\|\| (!ENABLE_UNICODE_SUPPORT && *p >= 0x7f)	777	\|\| (!ENABLE_UNICODE_SUPPORT && *p >= 0x7f)
771	\|\| (ENABLE_UNICODE_SUPPORT && *p == 0x7f)	778	\|\| (ENABLE_UNICODE_SUPPORT && *p == 0x7f)
772	# else	779	# else
		780	/*
		781	* on Windows, *p > 0x7f is never control:
		782	* without unicode active: these are normal codepage chars.
		783	* with unicode active: these are UTF8 continuation bytes.
		784	*/
773	\|\| *p == 0x7f	785	\|\| *p == 0x7f
774	# endif	786	# endif
775	) {	787	) {
@@ -1318,6 +1330,12 @@ static NOINLINE void input_tab(smallint *lastWasTab)
1318	# if ENABLE_PLATFORM_MINGW32	1330	# if ENABLE_PLATFORM_MINGW32
1319	int chosen_index = 0;	1331	int chosen_index = 0;
1320	int chosen_sens = FALSE;	1332	int chosen_sens = FALSE;
		1333	/*
		1334	* FIXME: the next three vars are unused with ENABLE_UNICODE_SUPPORT
		1335	* because the mingw code which uses them to update a tab-completion
		1336	* prefix to the correct case (e.g. ~/desk<tab> to ~/Desktop/) is
		1337	* not compiled, and so e.g. ~/desk<tab> completes to ~/desktop/ .
		1338	*/
1321	unsigned orig_pfx_len;	1339	unsigned orig_pfx_len;
1322	char *target;	1340	char *target;
1323	const char *source;	1341	const char *source;
@@ -2803,7 +2821,11 @@ int FAST_FUNC read_line_input(line_input_t st, const char prompt, char *comman
2803	#if ENABLE_PLATFORM_MINGW32	2821	#if ENABLE_PLATFORM_MINGW32
2804	case CTRL('Z'):	2822	case CTRL('Z'):
2805	command_ps[command_len] = '\0';	2823	command_ps[command_len] = '\0';
		2824	#if ENABLE_UNICODE_SUPPORT
		2825	bs_to_slash_u(command_ps);
		2826	#else
2806	bs_to_slash(command_ps);	2827	bs_to_slash(command_ps);
		2828	#endif
2807	redraw(cmdedit_y, 0);	2829	redraw(cmdedit_y, 0);
2808	break;	2830	break;
2809	#endif	2831	#endif


diff --git a/libbb/unicode.c b/libbb/unicode.c index e98cbbf35..206ec0dcb 100644 --- a/libbb/unicode.c +++ b/libbb/unicode.c
@@ -69,8 +69,14 @@ void FAST_FUNC init_unicode(void)
69	void FAST_FUNC reinit_unicode(const char *LANG)	69	void FAST_FUNC reinit_unicode(const char *LANG)
70	{	70	{
71	unicode_status = UNICODE_OFF;	71	unicode_status = UNICODE_OFF;
		72	#if ENABLE_PLATFORM_MINGW32
		73	/* enable unicode only when ACP is UTF8 and the env var is not 'C' */
		74	if (GetACP() != CP_UTF8 \|\| (LANG && LANG[0] == 'C' && LANG[1] == 0))
		75	return;
		76	#else
72	if (!LANG \|\| !(strstr(LANG, ".utf") \|\| strstr(LANG, ".UTF")))	77	if (!LANG \|\| !(strstr(LANG, ".utf") \|\| strstr(LANG, ".UTF")))
73	return;	78	return;
		79	#endif
74	unicode_status = UNICODE_ON;	80	unicode_status = UNICODE_ON;
75	}	81	}
76		82
@@ -653,6 +659,9 @@ int FAST_FUNC wcwidth(unsigned ucs)
653	{ 0x0A38, 0x0A3A }, { 0x0A3F, 0x0A3F }, { 0xD167, 0xD169 },	659	{ 0x0A38, 0x0A3A }, { 0x0A3F, 0x0A3F }, { 0xD167, 0xD169 },
654	{ 0xD173, 0xD182 }, { 0xD185, 0xD18B }, { 0xD1AA, 0xD1AD },	660	{ 0xD173, 0xD182 }, { 0xD185, 0xD18B }, { 0xD1AA, 0xD1AD },
655	{ 0xD242, 0xD244 }	661	{ 0xD242, 0xD244 }
		662	#if ENABLE_PLATFORM_MINGW32
		663	, { 0xF3FB, 0xF3FF }
		664	#endif
656	};	665	};
657	/* Binary search in table of non-spacing characters in Supplementary Multilingual Plane */	666	/* Binary search in table of non-spacing characters in Supplementary Multilingual Plane */
658	if (in_interval_table(ucs ^ 0x10000, combining0x10000, ARRAY_SIZE(combining0x10000) - 1))	667	if (in_interval_table(ucs ^ 0x10000, combining0x10000, ARRAY_SIZE(combining0x10000) - 1))
@@ -689,6 +698,11 @@ int FAST_FUNC wcwidth(unsigned ucs)
689	\|\| (ucs >= 0xff00 && ucs <= 0xff60) /* Fullwidth Forms */	698	\|\| (ucs >= 0xff00 && ucs <= 0xff60) /* Fullwidth Forms */
690	\|\| (ucs >= 0xffe0 && ucs <= 0xffe6)	699	\|\| (ucs >= 0xffe0 && ucs <= 0xffe6)
691	# endif	700	# endif
		701	#if ENABLE_PLATFORM_MINGW32
		702	# if CONFIG_LAST_SUPPORTED_WCHAR >= 0x10000
		703	\|\| (ucs >= 0x1f600 && ucs <= 0x1f64f) /* Emoticons */
		704	# endif
		705	#endif
692	# if CONFIG_LAST_SUPPORTED_WCHAR >= 0x20000	706	# if CONFIG_LAST_SUPPORTED_WCHAR >= 0x20000
693	\|\| ((ucs >> 17) == (2 >> 1)) /* 20000..3ffff: Supplementary and Tertiary Ideographic Planes */	707	\|\| ((ucs >> 17) == (2 >> 1)) /* 20000..3ffff: Supplementary and Tertiary Ideographic Planes */
694	# endif	708	# endif


diff --git a/scripts/mk_mingw64u_defconfig b/scripts/mk_mingw64u_defconfig new file mode 100755 index 000000000..3cca78e5b --- /dev/null +++ b/scripts/mk_mingw64u_defconfig
@@ -0,0 +1,35 @@
		1	#!/bin/sh
		2
		3	configs=$(dirname -- "$0")/../configs
		4
		5	# replace each FOO=bar argument with -e 's/.FOO./FOO=bar/', then sed "$@"
		6	set_build_opts() {
		7	for v; do
		8	set -- "$@" -e "s/.${v%%=}.*/$v/"
		9	shift
		10	done
		11	sed "$@"
		12	}
		13
		14
		15	# Create unicode configs/mingw64u_defconfig from configs/mingw64_defconfig
		16	# by flipping some build options to enable:
		17	# - UTF8 manifest to support unicode on win 10 (filenames, etc).
		18	# - UTF8 terminal input (shell prompt, read).
		19	# - UTF8 editing - codepoint awareness (prompt, read):
		20	# - Builtin libc unicode functions (mbstowcs etc - no UNICODE_USING_LOCALE).
		21	# - Dynamic unicode based on ANSI codepage and ENV (CHECK_UNICODE_IN_ENV).
		22	# - Screen-width awareness (COMBINING_WCHARS, WIDE_WCHARS)
		23	# - Full unicode range (U+10FFFF - LAST_SUPPORTED_WCHAR=1114111)
		24
		25	set_build_opts \
		26	CONFIG_FEATURE_UTF8_MANIFEST=y \
		27	CONFIG_FEATURE_UTF8_INPUT=y \
		28	CONFIG_UNICODE_SUPPORT=y \
		29	CONFIG_FEATURE_CHECK_UNICODE_IN_ENV=y \
		30	CONFIG_SUBST_WCHAR=63 \
		31	CONFIG_LAST_SUPPORTED_WCHAR=1114111 \
		32	CONFIG_UNICODE_COMBINING_WCHARS=y \
		33	CONFIG_UNICODE_WIDE_WCHARS=y \
		34	< "$configs"/mingw64_defconfig \
		35	> "$configs"/mingw64u_defconfig


diff --git a/win32/mingw.c b/win32/mingw.c index 5e9c71226..dabb2a2e7 100644 --- a/win32/mingw.c +++ b/win32/mingw.c
@@ -2119,6 +2119,20 @@ char * FAST_FUNC bs_to_slash(char *str)
2119	return str;	2119	return str;
2120	}	2120	}
2121		2121
		2122	#if ENABLE_UNICODE_SUPPORT
		2123	MINGW_BB_WCHAR_T * FAST_FUNC bs_to_slash_u(MINGW_BB_WCHAR_T *str)
		2124	{
		2125	MINGW_BB_WCHAR_T *p;
		2126
		2127	for (p=str; *p; ++p) {
		2128	if ( *p == '\\' ) {
		2129	*p = '/';
		2130	}
		2131	}
		2132	return str;
		2133	}
		2134	#endif
		2135
2122	void FAST_FUNC slash_to_bs(char *p)	2136	void FAST_FUNC slash_to_bs(char *p)
2123	{	2137	{
2124	for (; *p; ++p) {	2138	for (; *p; ++p) {


diff --git a/win32/winansi.c b/win32/winansi.c index bc3e69163..f280177e6 100644 --- a/win32/winansi.c +++ b/win32/winansi.c
@@ -1284,6 +1284,44 @@ static void maybeEatUpto2ndHalfUp(HANDLE h, DWORD *ph1)
1284	}	1284	}
1285	}	1285	}
1286		1286
		1287	// if the codepoint is a key-down event, remember it, else if
		1288	// it's a key-up event with matching prior down - forget the down,
		1289	// else (up without matching prior key-down) - change it to down.
		1290	// We remember few prior key-down events so that a sequence
		1291	// like X-down Y-down X-up Y-up won't trigger this hack for Y-up.
		1292	// When up is changed into down there won't be further key-up event,
		1293	// but that's OK because the caller ignores key-up events anyway.
		1294	static void maybe_change_up_to_down(wchar_t key, BOOL *isdown)
		1295	{
		1296	#define DOWN_BUF_SIZ 8
		1297	static wchar_t downbuf[DOWN_BUF_SIZ] = {0};
		1298	static int pos = 0;
		1299
		1300	if (*isdown) {
		1301	downbuf[pos++] = key;
		1302	pos = pos % DOWN_BUF_SIZ;
		1303	return;
		1304	}
		1305
		1306	// the missing-key-down issue was only observed with unicode values,
		1307	// so limit this hack to non-ASCII-7 values.
		1308	// also, launching a new shell/read process from CLI captures
		1309	// an ENTER-up event without prior down at this new process, which
		1310	// would otherwise change it to down - creating a wrong ENTER keypress.
		1311	if (key <= 127)
		1312	return;
		1313
		1314	// key up, try to match a prior down
		1315	for (int i = 0; i < DOWN_BUF_SIZ; ++i) {
		1316	if (downbuf[i] == key) {
		1317	downbuf[i] = 0; // "forget" this down
		1318	return;
		1319	}
		1320	}
		1321
		1322	// no prior key-down - replace the up with down
		1323	*isdown = TRUE;
		1324	}
1287		1325
1288	/*	1326	/*
1289	* readConsoleInput_utf8 behaves similar enough to ReadConsoleInputA when	1327	* readConsoleInput_utf8 behaves similar enough to ReadConsoleInputA when
@@ -1355,20 +1393,18 @@ BOOL readConsoleInput_utf8(HANDLE h, INPUT_RECORD r, DWORD len, DWORD got)
1355	srec = *r;	1393	srec = *r;
1356	codepoint = srec.Event.KeyEvent.uChar.UnicodeChar;	1394	codepoint = srec.Event.KeyEvent.uChar.UnicodeChar;
1357		1395
1358	// At the cmd.exe console (but not windows terminal) we sometimes	1396	// Observed when pasting unicode at cmd.exe console (but not
1359	// get key-up without the prior expected key-down event, sometimes	1397	// windows terminal), we sometimes get key-up event without
1360	// with UnicodeChar of 0 instead the key-down event. work around it.	1398	// a prior matching key-down (or with key-down codepoint 0),
1361	if (codepoint) {	1399	// so this call would change the up into down in such case.
1362	static wchar_t last_down = 0;	1400	// E.g. pastes fixed by this hack: U+1F600 "😀", or U+0C80 "ಀ"
1363		1401	if (codepoint)
1364	if (srec.Event.KeyEvent.bKeyDown)	1402	maybe_change_up_to_down(codepoint, &srec.Event.KeyEvent.bKeyDown);
1365	last_down = codepoint;
1366	else if (codepoint > 127 && codepoint != last_down)
1367	srec.Event.KeyEvent.bKeyDown = TRUE;
1368	}
1369		1403
1370	// if it's a 1st (high) surrogate pair half, try to eat upto and	1404	// if it's a 1st (high) surrogate pair half, try to eat upto and
1371	// excluding the 2nd (low) half, and combine them into codepoint.	1405	// excluding the 2nd (low) half, and combine them into codepoint.
		1406	// this does not interfere with the missing-key-down workaround
		1407	// (no issue if the down-buffer has 1st-half-down without up).
1372	if (codepoint >= 0xD800 && codepoint <= 0xDBFF)	1408	if (codepoint >= 0xD800 && codepoint <= 0xDBFF)
1373	maybeEatUpto2ndHalfUp(h, &codepoint);	1409	maybeEatUpto2ndHalfUp(h, &codepoint);
1374		1410