aboutsummaryrefslogtreecommitdiff
path: root/libbb
diff options
context:
space:
mode:
authorAvi Halachmi (:avih) <avihpit@yahoo.com>2023-06-27 14:41:47 +0300
committerAvi Halachmi (:avih) <avihpit@yahoo.com>2023-07-22 09:40:16 +0300
commit0efc74740ebc0d98af79ba4a5dfa73bfb5db3df0 (patch)
treec21fd8650e8c197abb317831c1f84e6fcf021f8e /libbb
parent4fe7e7cdd0441e9455cc93c17b40a7a96704e61f (diff)
downloadbusybox-w32-0efc74740ebc0d98af79ba4a5dfa73bfb5db3df0.tar.gz
busybox-w32-0efc74740ebc0d98af79ba4a5dfa73bfb5db3df0.tar.bz2
busybox-w32-0efc74740ebc0d98af79ba4a5dfa73bfb5db3df0.zip
win32: support build with FEATURE_UNICODE_SUPPORT
FEATURE_UTF8_MANIFEST enables Unicode args and filenames on Win 10+. FEATURE_UTF8_INPUT allows the shell prompt to digest correctly Unicode strings (as UTF8) which are typed or pasted. This commit adds support for building with FEATURE_UNICODE_SUPPORT (mostly by supporting 32 bit wchar_t which busybox expects): - Unicode-aware line-edit - for the most part cursor movement/del being (UTF8) codepoint-aware rather than assuming that one-byte equals one-char-on-screen. - Codepoint-aware operations in some other utils, like rev or wc -c. - When UNICODE_COMBINING_WCHARS and UNICODE_WIDE_WCHARS are enabled, some screen-width-aware operations, like with fold, ls, expand, etc. The busybox Unicode support is incomplete, and even less so with the builtin libc replacement functions, like wcwidth, which are active when UNICODE_USING_LOCALE is unset (mingw lacks those functions). FEATURE_CHECK_UNICODE_IN_ENV should be set so that Unicode is not hardcoded but rather depends on the ANSI codepage and some env vars: LC_ALL=C disables Unicode support, else it's enabled if ACP is UTF8. There's at least one known issue where the tab-completion-prefix-case is not updated correctly, e.g. ~/desk<tab> completes to ~/desktop/ instead of ~/Desktop/, because the code which handles it exists only at the non-unicode code paths, but that's not very critical. That seems to be the only case where mingw-specific code is disabled when Unicode is enabled, but there could be other unknown issues. None of the Unicode options is enabled by default, and the next commit will make it easier to create a build which supports Unicode.
Diffstat (limited to 'libbb')
-rw-r--r--libbb/lineedit.c28
-rw-r--r--libbb/unicode.c6
2 files changed, 33 insertions, 1 deletions
diff --git a/libbb/lineedit.c b/libbb/lineedit.c
index a6884c7e0..1fb8919bb 100644
--- a/libbb/lineedit.c
+++ b/libbb/lineedit.c
@@ -726,8 +726,19 @@ static void input_forward(void)
726#if !ENABLE_PLATFORM_MINGW32 726#if !ENABLE_PLATFORM_MINGW32
727 put_cur_glyph_and_inc_cursor(); 727 put_cur_glyph_and_inc_cursor();
728#else 728#else
729 /*
730 * inc_cursor improves forward cursor movement appearance on
731 * win 7/8 console, but it's broken with unicode wide-glyphs,
732 * e.g. paste and move forward over: echo 开开心心过每一天
733 * so disable inc_corsor when unicode is active (which is only
734 * windows 10+, where inc_cursor is not needed anyway).
735 *
736 * FIXME: the VT_INPUT condition is not required, because other
737 * than the wide-glyphs issue, inc_cursor works correctly
738 * regardless of the VT mode.
739 */
729 { 740 {
730 if (terminal_mode(FALSE) & VT_INPUT) 741 if (terminal_mode(FALSE) & VT_INPUT || unicode_status == UNICODE_ON)
731 put_cur_glyph_and_inc_cursor(); 742 put_cur_glyph_and_inc_cursor();
732 else 743 else
733 inc_cursor(); 744 inc_cursor();
@@ -770,6 +781,11 @@ static void add_match(char *matched, int sensitive)
770 || (!ENABLE_UNICODE_SUPPORT && *p >= 0x7f) 781 || (!ENABLE_UNICODE_SUPPORT && *p >= 0x7f)
771 || (ENABLE_UNICODE_SUPPORT && *p == 0x7f) 782 || (ENABLE_UNICODE_SUPPORT && *p == 0x7f)
772# else 783# else
784 /*
785 * on Windows, *p > 0x7f is never control:
786 * without unicode active: these are normal codepage chars.
787 * with unicode active: these are UTF8 continuation bytes.
788 */
773 || *p == 0x7f 789 || *p == 0x7f
774# endif 790# endif
775 ) { 791 ) {
@@ -1318,6 +1334,12 @@ static NOINLINE void input_tab(smallint *lastWasTab)
1318# if ENABLE_PLATFORM_MINGW32 1334# if ENABLE_PLATFORM_MINGW32
1319 int chosen_index = 0; 1335 int chosen_index = 0;
1320 int chosen_sens = FALSE; 1336 int chosen_sens = FALSE;
1337 /*
1338 * FIXME: the next three vars are unused with ENABLE_UNICODE_SUPPORT
1339 * because the mingw code which uses them to update a tab-completion
1340 * prefix to the correct case (e.g. ~/desk<tab> to ~/Desktop/) is
1341 * not compiled, and so e.g. ~/desk<tab> completes to ~/desktop/ .
1342 */
1321 unsigned orig_pfx_len; 1343 unsigned orig_pfx_len;
1322 char *target; 1344 char *target;
1323 const char *source; 1345 const char *source;
@@ -2803,7 +2825,11 @@ int FAST_FUNC read_line_input(line_input_t *st, const char *prompt, char *comman
2803#if ENABLE_PLATFORM_MINGW32 2825#if ENABLE_PLATFORM_MINGW32
2804 case CTRL('Z'): 2826 case CTRL('Z'):
2805 command_ps[command_len] = '\0'; 2827 command_ps[command_len] = '\0';
2828 #if ENABLE_UNICODE_SUPPORT
2829 bs_to_slash_u(command_ps);
2830 #else
2806 bs_to_slash(command_ps); 2831 bs_to_slash(command_ps);
2832 #endif
2807 redraw(cmdedit_y, 0); 2833 redraw(cmdedit_y, 0);
2808 break; 2834 break;
2809#endif 2835#endif
diff --git a/libbb/unicode.c b/libbb/unicode.c
index e98cbbf35..638c3b7c3 100644
--- a/libbb/unicode.c
+++ b/libbb/unicode.c
@@ -69,8 +69,14 @@ void FAST_FUNC init_unicode(void)
69void FAST_FUNC reinit_unicode(const char *LANG) 69void FAST_FUNC reinit_unicode(const char *LANG)
70{ 70{
71 unicode_status = UNICODE_OFF; 71 unicode_status = UNICODE_OFF;
72#if ENABLE_PLATFORM_MINGW32
73 /* enable unicode only when ACP is UTF8 and the env var is not 'C' */
74 if (GetACP() != CP_UTF8 || (LANG && LANG[0] == 'C' && LANG[1] == 0))
75 return;
76#else
72 if (!LANG || !(strstr(LANG, ".utf") || strstr(LANG, ".UTF"))) 77 if (!LANG || !(strstr(LANG, ".utf") || strstr(LANG, ".UTF")))
73 return; 78 return;
79#endif
74 unicode_status = UNICODE_ON; 80 unicode_status = UNICODE_ON;
75} 81}
76 82