aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAvi Halachmi (:avih) <avihpit@yahoo.com>2023-06-27 14:41:47 +0300
committerAvi Halachmi (:avih) <avihpit@yahoo.com>2023-07-22 09:40:16 +0300
commit0efc74740ebc0d98af79ba4a5dfa73bfb5db3df0 (patch)
treec21fd8650e8c197abb317831c1f84e6fcf021f8e
parent4fe7e7cdd0441e9455cc93c17b40a7a96704e61f (diff)
downloadbusybox-w32-0efc74740ebc0d98af79ba4a5dfa73bfb5db3df0.tar.gz
busybox-w32-0efc74740ebc0d98af79ba4a5dfa73bfb5db3df0.tar.bz2
busybox-w32-0efc74740ebc0d98af79ba4a5dfa73bfb5db3df0.zip
win32: support build with FEATURE_UNICODE_SUPPORT
FEATURE_UTF8_MANIFEST enables Unicode args and filenames on Win 10+. FEATURE_UTF8_INPUT allows the shell prompt to digest correctly Unicode strings (as UTF8) which are typed or pasted. This commit adds support for building with FEATURE_UNICODE_SUPPORT (mostly by supporting 32 bit wchar_t which busybox expects): - Unicode-aware line-edit - for the most part cursor movement/del being (UTF8) codepoint-aware rather than assuming that one-byte equals one-char-on-screen. - Codepoint-aware operations in some other utils, like rev or wc -c. - When UNICODE_COMBINING_WCHARS and UNICODE_WIDE_WCHARS are enabled, some screen-width-aware operations, like with fold, ls, expand, etc. The busybox Unicode support is incomplete, and even less so with the builtin libc replacement functions, like wcwidth, which are active when UNICODE_USING_LOCALE is unset (mingw lacks those functions). FEATURE_CHECK_UNICODE_IN_ENV should be set so that Unicode is not hardcoded but rather depends on the ANSI codepage and some env vars: LC_ALL=C disables Unicode support, else it's enabled if ACP is UTF8. There's at least one known issue where the tab-completion-prefix-case is not updated correctly, e.g. ~/desk<tab> completes to ~/desktop/ instead of ~/Desktop/, because the code which handles it exists only at the non-unicode code paths, but that's not very critical. That seems to be the only case where mingw-specific code is disabled when Unicode is enabled, but there could be other unknown issues. None of the Unicode options is enabled by default, and the next commit will make it easier to create a build which supports Unicode.
-rw-r--r--include/mingw.h12
-rw-r--r--include/unicode.h15
-rw-r--r--libbb/lineedit.c28
-rw-r--r--libbb/unicode.c6
-rw-r--r--win32/mingw.c14
5 files changed, 74 insertions, 1 deletions
diff --git a/include/mingw.h b/include/mingw.h
index 232ffadd7..97db2f6a9 100644
--- a/include/mingw.h
+++ b/include/mingw.h
@@ -586,6 +586,18 @@ char *alloc_ext_space(const char *path);
586int add_win32_extension(char *p); 586int add_win32_extension(char *p);
587char *file_is_win32_exe(const char *name); 587char *file_is_win32_exe(const char *name);
588 588
589#if ENABLE_UNICODE_SUPPORT
590/*
591 * windows wchar_t is 16 bit, while linux (and busybox expectation) is 32.
592 * so when (busybox) unicode.h is included, wchar_t is 32 bit.
593 * Without unicode.h, MINGW_BB_WCHAR_T is busybox wide char (32),
594 * and wchar_t is Windows wide char (16).
595 */
596#define MINGW_BB_WCHAR_T uint32_t /* keep in sync with unicode.h */
597
598MINGW_BB_WCHAR_T *bs_to_slash_u(MINGW_BB_WCHAR_T *p) FAST_FUNC;
599#endif
600
589char *bs_to_slash(char *p) FAST_FUNC; 601char *bs_to_slash(char *p) FAST_FUNC;
590void slash_to_bs(char *p) FAST_FUNC; 602void slash_to_bs(char *p) FAST_FUNC;
591size_t remove_cr(char *p, size_t len) FAST_FUNC; 603size_t remove_cr(char *p, size_t len) FAST_FUNC;
diff --git a/include/unicode.h b/include/unicode.h
index 0317a2151..e894f7148 100644
--- a/include/unicode.h
+++ b/include/unicode.h
@@ -87,6 +87,21 @@ void reinit_unicode(const char *LANG) FAST_FUNC;
87# undef MB_CUR_MAX 87# undef MB_CUR_MAX
88# define MB_CUR_MAX 6 88# define MB_CUR_MAX 6
89 89
90#if ENABLE_PLATFORM_MINGW32
91 #undef wint_t
92 #undef mbstate_t
93 #undef mbstowcs
94 #undef wcstombs
95 #undef wcrtomb
96 #undef iswspace
97 #undef iswalnum
98 #undef iswpunct
99 #undef wcwidth
100
101 #undef wchar_t
102 #define wchar_t uint32_t /* keep in sync with MINGW_BB_WCHAR_T */
103#endif
104
90/* Prevent name collisions */ 105/* Prevent name collisions */
91# define wint_t bb_wint_t 106# define wint_t bb_wint_t
92# define mbstate_t bb_mbstate_t 107# define mbstate_t bb_mbstate_t
diff --git a/libbb/lineedit.c b/libbb/lineedit.c
index a6884c7e0..1fb8919bb 100644
--- a/libbb/lineedit.c
+++ b/libbb/lineedit.c
@@ -726,8 +726,19 @@ static void input_forward(void)
726#if !ENABLE_PLATFORM_MINGW32 726#if !ENABLE_PLATFORM_MINGW32
727 put_cur_glyph_and_inc_cursor(); 727 put_cur_glyph_and_inc_cursor();
728#else 728#else
729 /*
730 * inc_cursor improves forward cursor movement appearance on
731 * win 7/8 console, but it's broken with unicode wide-glyphs,
732 * e.g. paste and move forward over: echo 开开心心过每一天
733 * so disable inc_corsor when unicode is active (which is only
734 * windows 10+, where inc_cursor is not needed anyway).
735 *
736 * FIXME: the VT_INPUT condition is not required, because other
737 * than the wide-glyphs issue, inc_cursor works correctly
738 * regardless of the VT mode.
739 */
729 { 740 {
730 if (terminal_mode(FALSE) & VT_INPUT) 741 if (terminal_mode(FALSE) & VT_INPUT || unicode_status == UNICODE_ON)
731 put_cur_glyph_and_inc_cursor(); 742 put_cur_glyph_and_inc_cursor();
732 else 743 else
733 inc_cursor(); 744 inc_cursor();
@@ -770,6 +781,11 @@ static void add_match(char *matched, int sensitive)
770 || (!ENABLE_UNICODE_SUPPORT && *p >= 0x7f) 781 || (!ENABLE_UNICODE_SUPPORT && *p >= 0x7f)
771 || (ENABLE_UNICODE_SUPPORT && *p == 0x7f) 782 || (ENABLE_UNICODE_SUPPORT && *p == 0x7f)
772# else 783# else
784 /*
785 * on Windows, *p > 0x7f is never control:
786 * without unicode active: these are normal codepage chars.
787 * with unicode active: these are UTF8 continuation bytes.
788 */
773 || *p == 0x7f 789 || *p == 0x7f
774# endif 790# endif
775 ) { 791 ) {
@@ -1318,6 +1334,12 @@ static NOINLINE void input_tab(smallint *lastWasTab)
1318# if ENABLE_PLATFORM_MINGW32 1334# if ENABLE_PLATFORM_MINGW32
1319 int chosen_index = 0; 1335 int chosen_index = 0;
1320 int chosen_sens = FALSE; 1336 int chosen_sens = FALSE;
1337 /*
1338 * FIXME: the next three vars are unused with ENABLE_UNICODE_SUPPORT
1339 * because the mingw code which uses them to update a tab-completion
1340 * prefix to the correct case (e.g. ~/desk<tab> to ~/Desktop/) is
1341 * not compiled, and so e.g. ~/desk<tab> completes to ~/desktop/ .
1342 */
1321 unsigned orig_pfx_len; 1343 unsigned orig_pfx_len;
1322 char *target; 1344 char *target;
1323 const char *source; 1345 const char *source;
@@ -2803,7 +2825,11 @@ int FAST_FUNC read_line_input(line_input_t *st, const char *prompt, char *comman
2803#if ENABLE_PLATFORM_MINGW32 2825#if ENABLE_PLATFORM_MINGW32
2804 case CTRL('Z'): 2826 case CTRL('Z'):
2805 command_ps[command_len] = '\0'; 2827 command_ps[command_len] = '\0';
2828 #if ENABLE_UNICODE_SUPPORT
2829 bs_to_slash_u(command_ps);
2830 #else
2806 bs_to_slash(command_ps); 2831 bs_to_slash(command_ps);
2832 #endif
2807 redraw(cmdedit_y, 0); 2833 redraw(cmdedit_y, 0);
2808 break; 2834 break;
2809#endif 2835#endif
diff --git a/libbb/unicode.c b/libbb/unicode.c
index e98cbbf35..638c3b7c3 100644
--- a/libbb/unicode.c
+++ b/libbb/unicode.c
@@ -69,8 +69,14 @@ void FAST_FUNC init_unicode(void)
69void FAST_FUNC reinit_unicode(const char *LANG) 69void FAST_FUNC reinit_unicode(const char *LANG)
70{ 70{
71 unicode_status = UNICODE_OFF; 71 unicode_status = UNICODE_OFF;
72#if ENABLE_PLATFORM_MINGW32
73 /* enable unicode only when ACP is UTF8 and the env var is not 'C' */
74 if (GetACP() != CP_UTF8 || (LANG && LANG[0] == 'C' && LANG[1] == 0))
75 return;
76#else
72 if (!LANG || !(strstr(LANG, ".utf") || strstr(LANG, ".UTF"))) 77 if (!LANG || !(strstr(LANG, ".utf") || strstr(LANG, ".UTF")))
73 return; 78 return;
79#endif
74 unicode_status = UNICODE_ON; 80 unicode_status = UNICODE_ON;
75} 81}
76 82
diff --git a/win32/mingw.c b/win32/mingw.c
index 5e9c71226..dabb2a2e7 100644
--- a/win32/mingw.c
+++ b/win32/mingw.c
@@ -2119,6 +2119,20 @@ char * FAST_FUNC bs_to_slash(char *str)
2119 return str; 2119 return str;
2120} 2120}
2121 2121
2122#if ENABLE_UNICODE_SUPPORT
2123MINGW_BB_WCHAR_T * FAST_FUNC bs_to_slash_u(MINGW_BB_WCHAR_T *str)
2124{
2125 MINGW_BB_WCHAR_T *p;
2126
2127 for (p=str; *p; ++p) {
2128 if ( *p == '\\' ) {
2129 *p = '/';
2130 }
2131 }
2132 return str;
2133}
2134#endif
2135
2122void FAST_FUNC slash_to_bs(char *p) 2136void FAST_FUNC slash_to_bs(char *p)
2123{ 2137{
2124 for (; *p; ++p) { 2138 for (; *p; ++p) {