diff options
author | Avi Halachmi (:avih) <avihpit@yahoo.com> | 2023-06-27 14:41:47 +0300 |
---|---|---|
committer | Avi Halachmi (:avih) <avihpit@yahoo.com> | 2023-07-22 09:40:16 +0300 |
commit | 0efc74740ebc0d98af79ba4a5dfa73bfb5db3df0 (patch) | |
tree | c21fd8650e8c197abb317831c1f84e6fcf021f8e | |
parent | 4fe7e7cdd0441e9455cc93c17b40a7a96704e61f (diff) | |
download | busybox-w32-0efc74740ebc0d98af79ba4a5dfa73bfb5db3df0.tar.gz busybox-w32-0efc74740ebc0d98af79ba4a5dfa73bfb5db3df0.tar.bz2 busybox-w32-0efc74740ebc0d98af79ba4a5dfa73bfb5db3df0.zip |
win32: support build with FEATURE_UNICODE_SUPPORT
FEATURE_UTF8_MANIFEST enables Unicode args and filenames on Win 10+.
FEATURE_UTF8_INPUT allows the shell prompt to digest correctly
Unicode strings (as UTF8) which are typed or pasted.
This commit adds support for building with FEATURE_UNICODE_SUPPORT
(mostly by supporting 32 bit wchar_t which busybox expects):
- Unicode-aware line-edit - for the most part cursor movement/del
being (UTF8) codepoint-aware rather than assuming that one-byte
equals one-char-on-screen.
- Codepoint-aware operations in some other utils, like rev or wc -c.
- When UNICODE_COMBINING_WCHARS and UNICODE_WIDE_WCHARS are enabled,
some screen-width-aware operations, like with fold, ls, expand, etc.
The busybox Unicode support is incomplete, and even less so with the
builtin libc replacement functions, like wcwidth, which are active
when UNICODE_USING_LOCALE is unset (mingw lacks those functions).
FEATURE_CHECK_UNICODE_IN_ENV should be set so that Unicode is not
hardcoded but rather depends on the ANSI codepage and some env vars:
LC_ALL=C disables Unicode support, else it's enabled if ACP is UTF8.
There's at least one known issue where the tab-completion-prefix-case
is not updated correctly, e.g. ~/desk<tab> completes to ~/desktop/
instead of ~/Desktop/, because the code which handles it exists
only at the non-unicode code paths, but that's not very critical.
That seems to be the only case where mingw-specific code is disabled
when Unicode is enabled, but there could be other unknown issues.
None of the Unicode options is enabled by default, and the next
commit will make it easier to create a build which supports Unicode.
-rw-r--r-- | include/mingw.h | 12 | ||||
-rw-r--r-- | include/unicode.h | 15 | ||||
-rw-r--r-- | libbb/lineedit.c | 28 | ||||
-rw-r--r-- | libbb/unicode.c | 6 | ||||
-rw-r--r-- | win32/mingw.c | 14 |
5 files changed, 74 insertions, 1 deletions
diff --git a/include/mingw.h b/include/mingw.h index 232ffadd7..97db2f6a9 100644 --- a/include/mingw.h +++ b/include/mingw.h | |||
@@ -586,6 +586,18 @@ char *alloc_ext_space(const char *path); | |||
586 | int add_win32_extension(char *p); | 586 | int add_win32_extension(char *p); |
587 | char *file_is_win32_exe(const char *name); | 587 | char *file_is_win32_exe(const char *name); |
588 | 588 | ||
589 | #if ENABLE_UNICODE_SUPPORT | ||
590 | /* | ||
591 | * windows wchar_t is 16 bit, while linux (and busybox expectation) is 32. | ||
592 | * so when (busybox) unicode.h is included, wchar_t is 32 bit. | ||
593 | * Without unicode.h, MINGW_BB_WCHAR_T is busybox wide char (32), | ||
594 | * and wchar_t is Windows wide char (16). | ||
595 | */ | ||
596 | #define MINGW_BB_WCHAR_T uint32_t /* keep in sync with unicode.h */ | ||
597 | |||
598 | MINGW_BB_WCHAR_T *bs_to_slash_u(MINGW_BB_WCHAR_T *p) FAST_FUNC; | ||
599 | #endif | ||
600 | |||
589 | char *bs_to_slash(char *p) FAST_FUNC; | 601 | char *bs_to_slash(char *p) FAST_FUNC; |
590 | void slash_to_bs(char *p) FAST_FUNC; | 602 | void slash_to_bs(char *p) FAST_FUNC; |
591 | size_t remove_cr(char *p, size_t len) FAST_FUNC; | 603 | size_t remove_cr(char *p, size_t len) FAST_FUNC; |
diff --git a/include/unicode.h b/include/unicode.h index 0317a2151..e894f7148 100644 --- a/include/unicode.h +++ b/include/unicode.h | |||
@@ -87,6 +87,21 @@ void reinit_unicode(const char *LANG) FAST_FUNC; | |||
87 | # undef MB_CUR_MAX | 87 | # undef MB_CUR_MAX |
88 | # define MB_CUR_MAX 6 | 88 | # define MB_CUR_MAX 6 |
89 | 89 | ||
90 | #if ENABLE_PLATFORM_MINGW32 | ||
91 | #undef wint_t | ||
92 | #undef mbstate_t | ||
93 | #undef mbstowcs | ||
94 | #undef wcstombs | ||
95 | #undef wcrtomb | ||
96 | #undef iswspace | ||
97 | #undef iswalnum | ||
98 | #undef iswpunct | ||
99 | #undef wcwidth | ||
100 | |||
101 | #undef wchar_t | ||
102 | #define wchar_t uint32_t /* keep in sync with MINGW_BB_WCHAR_T */ | ||
103 | #endif | ||
104 | |||
90 | /* Prevent name collisions */ | 105 | /* Prevent name collisions */ |
91 | # define wint_t bb_wint_t | 106 | # define wint_t bb_wint_t |
92 | # define mbstate_t bb_mbstate_t | 107 | # define mbstate_t bb_mbstate_t |
diff --git a/libbb/lineedit.c b/libbb/lineedit.c index a6884c7e0..1fb8919bb 100644 --- a/libbb/lineedit.c +++ b/libbb/lineedit.c | |||
@@ -726,8 +726,19 @@ static void input_forward(void) | |||
726 | #if !ENABLE_PLATFORM_MINGW32 | 726 | #if !ENABLE_PLATFORM_MINGW32 |
727 | put_cur_glyph_and_inc_cursor(); | 727 | put_cur_glyph_and_inc_cursor(); |
728 | #else | 728 | #else |
729 | /* | ||
730 | * inc_cursor improves forward cursor movement appearance on | ||
731 | * win 7/8 console, but it's broken with unicode wide-glyphs, | ||
732 | * e.g. paste and move forward over: echo 开开心心过每一天 | ||
733 | * so disable inc_corsor when unicode is active (which is only | ||
734 | * windows 10+, where inc_cursor is not needed anyway). | ||
735 | * | ||
736 | * FIXME: the VT_INPUT condition is not required, because other | ||
737 | * than the wide-glyphs issue, inc_cursor works correctly | ||
738 | * regardless of the VT mode. | ||
739 | */ | ||
729 | { | 740 | { |
730 | if (terminal_mode(FALSE) & VT_INPUT) | 741 | if (terminal_mode(FALSE) & VT_INPUT || unicode_status == UNICODE_ON) |
731 | put_cur_glyph_and_inc_cursor(); | 742 | put_cur_glyph_and_inc_cursor(); |
732 | else | 743 | else |
733 | inc_cursor(); | 744 | inc_cursor(); |
@@ -770,6 +781,11 @@ static void add_match(char *matched, int sensitive) | |||
770 | || (!ENABLE_UNICODE_SUPPORT && *p >= 0x7f) | 781 | || (!ENABLE_UNICODE_SUPPORT && *p >= 0x7f) |
771 | || (ENABLE_UNICODE_SUPPORT && *p == 0x7f) | 782 | || (ENABLE_UNICODE_SUPPORT && *p == 0x7f) |
772 | # else | 783 | # else |
784 | /* | ||
785 | * on Windows, *p > 0x7f is never control: | ||
786 | * without unicode active: these are normal codepage chars. | ||
787 | * with unicode active: these are UTF8 continuation bytes. | ||
788 | */ | ||
773 | || *p == 0x7f | 789 | || *p == 0x7f |
774 | # endif | 790 | # endif |
775 | ) { | 791 | ) { |
@@ -1318,6 +1334,12 @@ static NOINLINE void input_tab(smallint *lastWasTab) | |||
1318 | # if ENABLE_PLATFORM_MINGW32 | 1334 | # if ENABLE_PLATFORM_MINGW32 |
1319 | int chosen_index = 0; | 1335 | int chosen_index = 0; |
1320 | int chosen_sens = FALSE; | 1336 | int chosen_sens = FALSE; |
1337 | /* | ||
1338 | * FIXME: the next three vars are unused with ENABLE_UNICODE_SUPPORT | ||
1339 | * because the mingw code which uses them to update a tab-completion | ||
1340 | * prefix to the correct case (e.g. ~/desk<tab> to ~/Desktop/) is | ||
1341 | * not compiled, and so e.g. ~/desk<tab> completes to ~/desktop/ . | ||
1342 | */ | ||
1321 | unsigned orig_pfx_len; | 1343 | unsigned orig_pfx_len; |
1322 | char *target; | 1344 | char *target; |
1323 | const char *source; | 1345 | const char *source; |
@@ -2803,7 +2825,11 @@ int FAST_FUNC read_line_input(line_input_t *st, const char *prompt, char *comman | |||
2803 | #if ENABLE_PLATFORM_MINGW32 | 2825 | #if ENABLE_PLATFORM_MINGW32 |
2804 | case CTRL('Z'): | 2826 | case CTRL('Z'): |
2805 | command_ps[command_len] = '\0'; | 2827 | command_ps[command_len] = '\0'; |
2828 | #if ENABLE_UNICODE_SUPPORT | ||
2829 | bs_to_slash_u(command_ps); | ||
2830 | #else | ||
2806 | bs_to_slash(command_ps); | 2831 | bs_to_slash(command_ps); |
2832 | #endif | ||
2807 | redraw(cmdedit_y, 0); | 2833 | redraw(cmdedit_y, 0); |
2808 | break; | 2834 | break; |
2809 | #endif | 2835 | #endif |
diff --git a/libbb/unicode.c b/libbb/unicode.c index e98cbbf35..638c3b7c3 100644 --- a/libbb/unicode.c +++ b/libbb/unicode.c | |||
@@ -69,8 +69,14 @@ void FAST_FUNC init_unicode(void) | |||
69 | void FAST_FUNC reinit_unicode(const char *LANG) | 69 | void FAST_FUNC reinit_unicode(const char *LANG) |
70 | { | 70 | { |
71 | unicode_status = UNICODE_OFF; | 71 | unicode_status = UNICODE_OFF; |
72 | #if ENABLE_PLATFORM_MINGW32 | ||
73 | /* enable unicode only when ACP is UTF8 and the env var is not 'C' */ | ||
74 | if (GetACP() != CP_UTF8 || (LANG && LANG[0] == 'C' && LANG[1] == 0)) | ||
75 | return; | ||
76 | #else | ||
72 | if (!LANG || !(strstr(LANG, ".utf") || strstr(LANG, ".UTF"))) | 77 | if (!LANG || !(strstr(LANG, ".utf") || strstr(LANG, ".UTF"))) |
73 | return; | 78 | return; |
79 | #endif | ||
74 | unicode_status = UNICODE_ON; | 80 | unicode_status = UNICODE_ON; |
75 | } | 81 | } |
76 | 82 | ||
diff --git a/win32/mingw.c b/win32/mingw.c index 5e9c71226..dabb2a2e7 100644 --- a/win32/mingw.c +++ b/win32/mingw.c | |||
@@ -2119,6 +2119,20 @@ char * FAST_FUNC bs_to_slash(char *str) | |||
2119 | return str; | 2119 | return str; |
2120 | } | 2120 | } |
2121 | 2121 | ||
2122 | #if ENABLE_UNICODE_SUPPORT | ||
2123 | MINGW_BB_WCHAR_T * FAST_FUNC bs_to_slash_u(MINGW_BB_WCHAR_T *str) | ||
2124 | { | ||
2125 | MINGW_BB_WCHAR_T *p; | ||
2126 | |||
2127 | for (p=str; *p; ++p) { | ||
2128 | if ( *p == '\\' ) { | ||
2129 | *p = '/'; | ||
2130 | } | ||
2131 | } | ||
2132 | return str; | ||
2133 | } | ||
2134 | #endif | ||
2135 | |||
2122 | void FAST_FUNC slash_to_bs(char *p) | 2136 | void FAST_FUNC slash_to_bs(char *p) |
2123 | { | 2137 | { |
2124 | for (; *p; ++p) { | 2138 | for (; *p; ++p) { |