aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDenys Vlasenko <vda.linux@googlemail.com>2010-03-26 14:06:56 +0100
committerDenys Vlasenko <vda.linux@googlemail.com>2010-03-26 14:06:56 +0100
commit19158a837df5093a2d655536424412bac2b07467 (patch)
tree3f3ce9c808e05dbf8dd38292f4c2db52cb73b429
parentaa167556cd2954bb9a9fb0a005178462087a4600 (diff)
downloadbusybox-w32-19158a837df5093a2d655536424412bac2b07467.tar.gz
busybox-w32-19158a837df5093a2d655536424412bac2b07467.tar.bz2
busybox-w32-19158a837df5093a2d655536424412bac2b07467.zip
unicode: s/FEATURE_ASSUME_UNICODE/UNICODE_SUPPORT, add UNICODE_USING_LOCALE
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
-rw-r--r--Config.in22
-rw-r--r--TODO4
-rw-r--r--TODO_config_nommu2
-rw-r--r--coreutils/cal.c8
-rw-r--r--coreutils/df.c2
-rw-r--r--coreutils/expand.c4
-rw-r--r--include/unicode.h10
-rw-r--r--libbb/Kbuild2
-rw-r--r--libbb/lineedit.c26
-rw-r--r--libbb/printable_string.c2
-rw-r--r--libbb/progress.c2
-rw-r--r--libbb/unicode.c434
-rw-r--r--libbb/unicode_wcwidth.c543
-rw-r--r--modutils/lsmod.c4
-rw-r--r--networking/udhcp/dumpleases.c2
-rw-r--r--scripts/defconfig2
-rwxr-xr-xscripts/randomtest2
-rwxr-xr-xtestsuite/cal.tests2
-rwxr-xr-xtestsuite/ls.tests4
19 files changed, 481 insertions, 596 deletions
diff --git a/Config.in b/Config.in
index 4439ce4f9..bb7dd6d5d 100644
--- a/Config.in
+++ b/Config.in
@@ -119,7 +119,7 @@ config LOCALE_SUPPORT
119 Enable this if your system has locale support and you would like 119 Enable this if your system has locale support and you would like
120 busybox to support locale settings. 120 busybox to support locale settings.
121 121
122config FEATURE_ASSUME_UNICODE 122config UNICODE_SUPPORT
123 bool "Support Unicode" 123 bool "Support Unicode"
124 default n 124 default n
125 help 125 help
@@ -131,10 +131,18 @@ config FEATURE_ASSUME_UNICODE
131 Probably by the time when busybox will be fully Unicode-clean, 131 Probably by the time when busybox will be fully Unicode-clean,
132 other encodings will be mainly of historic interest. 132 other encodings will be mainly of historic interest.
133 133
134config UNICODE_USING_LOCALE
135 bool "Use libc routines for Unicode (else uses internal ones)"
136 default n
137 depends on UNICODE_SUPPORT && LOCALE_SUPPORT
138 help
139 With this option on, Unicode support is implemented using libc
140 routines. Otherwise, internal implementation is used.
141
134config FEATURE_CHECK_UNICODE_IN_ENV 142config FEATURE_CHECK_UNICODE_IN_ENV
135 bool "Check $LANG environment variable" 143 bool "Check $LANG environment variable"
136 default y 144 default y
137 depends on FEATURE_ASSUME_UNICODE && !LOCALE_SUPPORT 145 depends on UNICODE_SUPPORT && !UNICODE_USING_LOCALE
138 help 146 help
139 With this option on, Unicode support is activated 147 With this option on, Unicode support is activated
140 only if LANG variable has the value of the form "xxxx.utf8" 148 only if LANG variable has the value of the form "xxxx.utf8"
@@ -143,7 +151,7 @@ config FEATURE_CHECK_UNICODE_IN_ENV
143 151
144config SUBST_WCHAR 152config SUBST_WCHAR
145 int "Character code to substitute unprintable characters with" 153 int "Character code to substitute unprintable characters with"
146 depends on FEATURE_ASSUME_UNICODE 154 depends on UNICODE_SUPPORT
147 default 63 155 default 63
148 help 156 help
149 Typical values are 63 for '?' (works with any output device), 157 Typical values are 63 for '?' (works with any output device),
@@ -152,7 +160,7 @@ config SUBST_WCHAR
152 160
153config LAST_SUPPORTED_WCHAR 161config LAST_SUPPORTED_WCHAR
154 int "Range of supported Unicode characters" 162 int "Range of supported Unicode characters"
155 depends on FEATURE_ASSUME_UNICODE 163 depends on UNICODE_SUPPORT
156 default 767 164 default 767
157 help 165 help
158 Any character with Unicode value bigger than this is assumed 166 Any character with Unicode value bigger than this is assumed
@@ -183,7 +191,7 @@ config LAST_SUPPORTED_WCHAR
183config UNICODE_COMBINING_WCHARS 191config UNICODE_COMBINING_WCHARS
184 bool "Allow zero-width Unicode characters on output" 192 bool "Allow zero-width Unicode characters on output"
185 default n 193 default n
186 depends on FEATURE_ASSUME_UNICODE 194 depends on UNICODE_SUPPORT
187 help 195 help
188 With this option off, any Unicode char with width of 0 196 With this option off, any Unicode char with width of 0
189 is substituted on output. 197 is substituted on output.
@@ -191,7 +199,7 @@ config UNICODE_COMBINING_WCHARS
191config UNICODE_WIDE_WCHARS 199config UNICODE_WIDE_WCHARS
192 bool "Allow wide Unicode characters on output" 200 bool "Allow wide Unicode characters on output"
193 default n 201 default n
194 depends on FEATURE_ASSUME_UNICODE 202 depends on UNICODE_SUPPORT
195 help 203 help
196 With this option off, any Unicode char with width > 1 204 With this option off, any Unicode char with width > 1
197 is substituted on output. 205 is substituted on output.
@@ -199,7 +207,7 @@ config UNICODE_WIDE_WCHARS
199config UNICODE_BIDI_SUPPORT 207config UNICODE_BIDI_SUPPORT
200 bool "Bidirectional character-aware line input" 208 bool "Bidirectional character-aware line input"
201 default n 209 default n
202 depends on FEATURE_ASSUME_UNICODE && !LOCALE_SUPPORT 210 depends on UNICODE_SUPPORT && !UNICODE_USING_LOCALE
203 help 211 help
204 With this option on, right-to-left Unicode characters 212 With this option on, right-to-left Unicode characters
205 are treated differently on input (e.g. cursor movement). 213 are treated differently on input (e.g. cursor movement).
diff --git a/TODO b/TODO
index 31aae41fd..af4c467c2 100644
--- a/TODO
+++ b/TODO
@@ -324,8 +324,8 @@ This is useful if you build against uclibc with locale support disabled.
324Unicode-dependent applets must call check_unicode_in_env() when they 324Unicode-dependent applets must call check_unicode_in_env() when they
325begin executing. 325begin executing.
326 326
327Applet code may conditionalize on FEATURE_ASSUME_UNICODE 327Applet code may conditionalize on UNICODE_SUPPORT in order to use
328in order to use more efficient code if unicode support is not requested. 328more efficient code if unicode support is not requested.
329 329
330Available functions (if you need more, implement them in libbb/unicode.c 330Available functions (if you need more, implement them in libbb/unicode.c
331so that they work without LOCALE_SUPPORT too): 331so that they work without LOCALE_SUPPORT too):
diff --git a/TODO_config_nommu b/TODO_config_nommu
index 2c8210cfe..911f02f6b 100644
--- a/TODO_config_nommu
+++ b/TODO_config_nommu
@@ -24,7 +24,7 @@ CONFIG_FEATURE_VERBOSE_USAGE=y
24CONFIG_FEATURE_COMPRESS_USAGE=y 24CONFIG_FEATURE_COMPRESS_USAGE=y
25CONFIG_FEATURE_INSTALLER=y 25CONFIG_FEATURE_INSTALLER=y
26# CONFIG_LOCALE_SUPPORT is not set 26# CONFIG_LOCALE_SUPPORT is not set
27# CONFIG_FEATURE_ASSUME_UNICODE is not set 27# CONFIG_UNICODE_SUPPORT is not set
28# CONFIG_FEATURE_CHECK_UNICODE_IN_ENV is not set 28# CONFIG_FEATURE_CHECK_UNICODE_IN_ENV is not set
29CONFIG_LONG_OPTS=y 29CONFIG_LONG_OPTS=y
30CONFIG_FEATURE_DEVPTS=y 30CONFIG_FEATURE_DEVPTS=y
diff --git a/coreutils/cal.c b/coreutils/cal.c
index 79fe074f8..c98229cb0 100644
--- a/coreutils/cal.c
+++ b/coreutils/cal.c
@@ -87,8 +87,8 @@ int cal_main(int argc UNUSED_PARAM, char **argv)
87 /* "Su Mo Tu We Th Fr Sa" */ 87 /* "Su Mo Tu We Th Fr Sa" */
88 /* -j heading: */ 88 /* -j heading: */
89 /* " Su Mo Tu We Th Fr Sa" */ 89 /* " Su Mo Tu We Th Fr Sa" */
90 char day_headings[ENABLE_FEATURE_ASSUME_UNICODE ? 28 * 6 : 28]; 90 char day_headings[ENABLE_UNICODE_SUPPORT ? 28 * 6 : 28];
91 IF_FEATURE_ASSUME_UNICODE(char *hp = day_headings;) 91 IF_UNICODE_SUPPORT(char *hp = day_headings;)
92 char buf[40]; 92 char buf[40];
93 93
94 init_unicode(); 94 init_unicode();
@@ -134,7 +134,7 @@ int cal_main(int argc UNUSED_PARAM, char **argv)
134 zero_tm.tm_wday = i; 134 zero_tm.tm_wday = i;
135 /* abbreviated weekday name according to locale */ 135 /* abbreviated weekday name according to locale */
136 strftime(buf, sizeof(buf), "%a", &zero_tm); 136 strftime(buf, sizeof(buf), "%a", &zero_tm);
137#if ENABLE_FEATURE_ASSUME_UNICODE 137#if ENABLE_UNICODE_SUPPORT
138 if (julian) 138 if (julian)
139 *hp++ = ' '; 139 *hp++ = ' ';
140 { 140 {
@@ -149,7 +149,7 @@ int cal_main(int argc UNUSED_PARAM, char **argv)
149#endif 149#endif
150 } 150 }
151 } while (++i < 12); 151 } while (++i < 12);
152 IF_FEATURE_ASSUME_UNICODE(hp[-1] = '\0';) 152 IF_UNICODE_SUPPORT(hp[-1] = '\0';)
153 153
154 if (month) { 154 if (month) {
155 unsigned row, len, days[MAXDAYS]; 155 unsigned row, len, days[MAXDAYS];
diff --git a/coreutils/df.c b/coreutils/df.c
index 4b23faa7a..5eeb5b476 100644
--- a/coreutils/df.c
+++ b/coreutils/df.c
@@ -174,7 +174,7 @@ int df_main(int argc UNUSED_PARAM, char **argv)
174 } 174 }
175#endif 175#endif
176 176
177#if ENABLE_FEATURE_ASSUME_UNICODE 177#if ENABLE_UNICODE_SUPPORT
178 { 178 {
179 uni_stat_t uni_stat; 179 uni_stat_t uni_stat;
180 char *uni_dev = unicode_conv_to_printable(&uni_stat, device); 180 char *uni_dev = unicode_conv_to_printable(&uni_stat, device);
diff --git a/coreutils/expand.c b/coreutils/expand.c
index cfb1e25d9..b874b6ad4 100644
--- a/coreutils/expand.c
+++ b/coreutils/expand.c
@@ -48,7 +48,7 @@ static void expand(FILE *file, unsigned tab_size, unsigned opt)
48 if (c == '\t') { 48 if (c == '\t') {
49 unsigned len; 49 unsigned len;
50 *ptr = '\0'; 50 *ptr = '\0';
51# if ENABLE_FEATURE_ASSUME_UNICODE 51# if ENABLE_UNICODE_SUPPORT
52 { 52 {
53 uni_stat_t uni_stat; 53 uni_stat_t uni_stat;
54 printable_string(&uni_stat, ptr_strbeg); 54 printable_string(&uni_stat, ptr_strbeg);
@@ -107,7 +107,7 @@ static void unexpand(FILE *file, unsigned tab_size, unsigned opt)
107 } 107 }
108 n = strcspn(ptr, "\t "); 108 n = strcspn(ptr, "\t ");
109 printf("%*s%.*s", len, "", n, ptr); 109 printf("%*s%.*s", len, "", n, ptr);
110# if ENABLE_FEATURE_ASSUME_UNICODE 110# if ENABLE_UNICODE_SUPPORT
111 { 111 {
112 char c; 112 char c;
113 uni_stat_t uni_stat; 113 uni_stat_t uni_stat;
diff --git a/include/unicode.h b/include/unicode.h
index deb4022c3..4e2927297 100644
--- a/include/unicode.h
+++ b/include/unicode.h
@@ -5,7 +5,7 @@
5#ifndef UNICODE_H 5#ifndef UNICODE_H
6#define UNICODE_H 1 6#define UNICODE_H 1
7 7
8#if ENABLE_LOCALE_SUPPORT 8#if ENABLE_UNICODE_USING_LOCALE
9# include <wchar.h> 9# include <wchar.h>
10# include <wctype.h> 10# include <wctype.h>
11#endif 11#endif
@@ -21,7 +21,7 @@ enum {
21#define unicode_bidi_isrtl(wc) 0 21#define unicode_bidi_isrtl(wc) 0
22#define unicode_bidi_is_neutral_wchar(wc) (wc <= 126 && !isalpha(wc)) 22#define unicode_bidi_is_neutral_wchar(wc) (wc <= 126 && !isalpha(wc))
23 23
24#if !ENABLE_FEATURE_ASSUME_UNICODE 24#if !ENABLE_UNICODE_SUPPORT
25 25
26# define unicode_strlen(string) strlen(string) 26# define unicode_strlen(string) strlen(string)
27# define unicode_status UNICODE_OFF 27# define unicode_status UNICODE_OFF
@@ -50,7 +50,7 @@ char* FAST_FUNC unicode_conv_to_printable(uni_stat_t *stats, const char *src);
50char* FAST_FUNC unicode_conv_to_printable_maxwidth(uni_stat_t *stats, const char *src, unsigned maxwidth); 50char* FAST_FUNC unicode_conv_to_printable_maxwidth(uni_stat_t *stats, const char *src, unsigned maxwidth);
51char* FAST_FUNC unicode_conv_to_printable_fixedwidth(uni_stat_t *stats, const char *src, unsigned width); 51char* FAST_FUNC unicode_conv_to_printable_fixedwidth(uni_stat_t *stats, const char *src, unsigned width);
52 52
53# if ENABLE_LOCALE_SUPPORT 53# if ENABLE_UNICODE_USING_LOCALE
54 54
55extern uint8_t unicode_status; 55extern uint8_t unicode_status;
56void init_unicode(void) FAST_FUNC; 56void init_unicode(void) FAST_FUNC;
@@ -102,9 +102,9 @@ int unicode_bidi_is_neutral_wchar(wint_t wc) FAST_FUNC;
102# endif 102# endif
103 103
104 104
105# endif /* !LOCALE_SUPPORT */ 105# endif /* !UNICODE_USING_LOCALE */
106 106
107#endif /* FEATURE_ASSUME_UNICODE */ 107#endif /* UNICODE_SUPPORT */
108 108
109POP_SAVED_FUNCTION_VISIBILITY 109POP_SAVED_FUNCTION_VISIBILITY
110 110
diff --git a/libbb/Kbuild b/libbb/Kbuild
index 49cf4b8ad..4606d5aa7 100644
--- a/libbb/Kbuild
+++ b/libbb/Kbuild
@@ -124,7 +124,7 @@ lib-y += xrealloc_vector.o
124# and objects which may fail to build (SELinux on selinux-less system) 124# and objects which may fail to build (SELinux on selinux-less system)
125lib-$(CONFIG_SELINUX) += selinux_common.o 125lib-$(CONFIG_SELINUX) += selinux_common.o
126lib-$(CONFIG_FEATURE_MTAB_SUPPORT) += mtab.o 126lib-$(CONFIG_FEATURE_MTAB_SUPPORT) += mtab.o
127lib-$(CONFIG_FEATURE_ASSUME_UNICODE) += unicode.o 127lib-$(CONFIG_UNICODE_SUPPORT) += unicode.o
128lib-$(CONFIG_FEATURE_CHECK_NAMES) += die_if_bad_username.o 128lib-$(CONFIG_FEATURE_CHECK_NAMES) += die_if_bad_username.o
129 129
130lib-$(CONFIG_LOSETUP) += loop.o 130lib-$(CONFIG_LOSETUP) += loop.o
diff --git a/libbb/lineedit.c b/libbb/lineedit.c
index 38a09cb26..dc90846f9 100644
--- a/libbb/lineedit.c
+++ b/libbb/lineedit.c
@@ -67,7 +67,7 @@
67 67
68 68
69#undef CHAR_T 69#undef CHAR_T
70#if ENABLE_FEATURE_ASSUME_UNICODE 70#if ENABLE_UNICODE_SUPPORT
71# define BB_NUL L'\0' 71# define BB_NUL L'\0'
72# define CHAR_T wchar_t 72# define CHAR_T wchar_t
73static bool BB_isspace(CHAR_T c) { return ((unsigned)c < 256 && isspace(c)); } 73static bool BB_isspace(CHAR_T c) { return ((unsigned)c < 256 && isspace(c)); }
@@ -202,7 +202,7 @@ static void deinit_S(void)
202#define DEINIT_S() deinit_S() 202#define DEINIT_S() deinit_S()
203 203
204 204
205#if ENABLE_FEATURE_ASSUME_UNICODE 205#if ENABLE_UNICODE_SUPPORT
206static size_t load_string(const char *src, int maxsize) 206static size_t load_string(const char *src, int maxsize)
207{ 207{
208 ssize_t len = mbstowcs(command_ps, src, maxsize - 1); 208 ssize_t len = mbstowcs(command_ps, src, maxsize - 1);
@@ -932,7 +932,7 @@ static void input_tab(smallint *lastWasTab)
932#define matchBuf (S.input_tab__matchBuf) 932#define matchBuf (S.input_tab__matchBuf)
933 int find_type; 933 int find_type;
934 int recalc_pos; 934 int recalc_pos;
935#if ENABLE_FEATURE_ASSUME_UNICODE 935#if ENABLE_UNICODE_SUPPORT
936 /* cursor pos in command converted to multibyte form */ 936 /* cursor pos in command converted to multibyte form */
937 int cursor_mb; 937 int cursor_mb;
938#endif 938#endif
@@ -942,7 +942,7 @@ static void input_tab(smallint *lastWasTab)
942 /* Make a local copy of the string -- 942 /* Make a local copy of the string --
943 * up to the position of the cursor */ 943 * up to the position of the cursor */
944 save_string(matchBuf, cursor + 1); 944 save_string(matchBuf, cursor + 1);
945#if ENABLE_FEATURE_ASSUME_UNICODE 945#if ENABLE_UNICODE_SUPPORT
946 cursor_mb = strlen(matchBuf); 946 cursor_mb = strlen(matchBuf);
947#endif 947#endif
948 tmp = matchBuf; 948 tmp = matchBuf;
@@ -1015,7 +1015,7 @@ static void input_tab(smallint *lastWasTab)
1015 } 1015 }
1016 1016
1017 len_found = strlen(tmp); 1017 len_found = strlen(tmp);
1018#if !ENABLE_FEATURE_ASSUME_UNICODE 1018#if !ENABLE_UNICODE_SUPPORT
1019 /* have space to place the match? */ 1019 /* have space to place the match? */
1020 /* The result consists of three parts with these lengths: */ 1020 /* The result consists of three parts with these lengths: */
1021 /* (cursor - recalc_pos) + len_found + (command_len - cursor) */ 1021 /* (cursor - recalc_pos) + len_found + (command_len - cursor) */
@@ -1088,7 +1088,7 @@ static void save_command_ps_at_cur_history(void)
1088 int cur = state->cur_history; 1088 int cur = state->cur_history;
1089 free(state->history[cur]); 1089 free(state->history[cur]);
1090 1090
1091# if ENABLE_FEATURE_ASSUME_UNICODE 1091# if ENABLE_UNICODE_SUPPORT
1092 { 1092 {
1093 char tbuf[MAX_LINELEN]; 1093 char tbuf[MAX_LINELEN];
1094 save_string(tbuf, sizeof(tbuf)); 1094 save_string(tbuf, sizeof(tbuf));
@@ -1659,7 +1659,7 @@ static int lineedit_read_key(char *read_key_buffer)
1659{ 1659{
1660 int64_t ic; 1660 int64_t ic;
1661 int timeout = -1; 1661 int timeout = -1;
1662#if ENABLE_FEATURE_ASSUME_UNICODE 1662#if ENABLE_UNICODE_SUPPORT
1663 char unicode_buf[MB_CUR_MAX + 1]; 1663 char unicode_buf[MB_CUR_MAX + 1];
1664 int unicode_idx = 0; 1664 int unicode_idx = 0;
1665#endif 1665#endif
@@ -1674,7 +1674,7 @@ static int lineedit_read_key(char *read_key_buffer)
1674 */ 1674 */
1675 ic = read_key(STDIN_FILENO, read_key_buffer, timeout); 1675 ic = read_key(STDIN_FILENO, read_key_buffer, timeout);
1676 if (errno) { 1676 if (errno) {
1677#if ENABLE_FEATURE_ASSUME_UNICODE 1677#if ENABLE_UNICODE_SUPPORT
1678 if (errno == EAGAIN && unicode_idx != 0) 1678 if (errno == EAGAIN && unicode_idx != 0)
1679 goto pushback; 1679 goto pushback;
1680#endif 1680#endif
@@ -1700,7 +1700,7 @@ static int lineedit_read_key(char *read_key_buffer)
1700 } 1700 }
1701#endif 1701#endif
1702 1702
1703#if ENABLE_FEATURE_ASSUME_UNICODE 1703#if ENABLE_UNICODE_SUPPORT
1704 if (unicode_status == UNICODE_ON) { 1704 if (unicode_status == UNICODE_ON) {
1705 wchar_t wc; 1705 wchar_t wc;
1706 1706
@@ -1817,7 +1817,7 @@ int FAST_FUNC read_line_input(const char *prompt, char *command, int maxsize, li
1817 /* prepare before init handlers */ 1817 /* prepare before init handlers */
1818 cmdedit_y = 0; /* quasireal y, not true if line > xt*yt */ 1818 cmdedit_y = 0; /* quasireal y, not true if line > xt*yt */
1819 command_len = 0; 1819 command_len = 0;
1820#if ENABLE_FEATURE_ASSUME_UNICODE 1820#if ENABLE_UNICODE_SUPPORT
1821 command_ps = xzalloc(maxsize * sizeof(command_ps[0])); 1821 command_ps = xzalloc(maxsize * sizeof(command_ps[0]));
1822#else 1822#else
1823 command_ps = command; 1823 command_ps = command;
@@ -2199,8 +2199,8 @@ int FAST_FUNC read_line_input(const char *prompt, char *command, int maxsize, li
2199// } 2199// }
2200// } 2200// }
2201 if (ic < ' ' 2201 if (ic < ' '
2202 || (!ENABLE_FEATURE_ASSUME_UNICODE && ic >= 256) 2202 || (!ENABLE_UNICODE_SUPPORT && ic >= 256)
2203 || (ENABLE_FEATURE_ASSUME_UNICODE && ic >= VI_CMDMODE_BIT) 2203 || (ENABLE_UNICODE_SUPPORT && ic >= VI_CMDMODE_BIT)
2204 ) { 2204 ) {
2205 /* If VI_CMDMODE_BIT is set, ic is >= 256 2205 /* If VI_CMDMODE_BIT is set, ic is >= 256
2206 * and vi mode ignores unexpected chars. 2206 * and vi mode ignores unexpected chars.
@@ -2268,7 +2268,7 @@ int FAST_FUNC read_line_input(const char *prompt, char *command, int maxsize, li
2268/* Stop bug catching using "command_must_not_be_used" trick */ 2268/* Stop bug catching using "command_must_not_be_used" trick */
2269#undef command 2269#undef command
2270 2270
2271#if ENABLE_FEATURE_ASSUME_UNICODE 2271#if ENABLE_UNICODE_SUPPORT
2272 command[0] = '\0'; 2272 command[0] = '\0';
2273 if (command_len > 0) 2273 if (command_len > 0)
2274 command_len = save_string(command, maxsize - 1); 2274 command_len = save_string(command, maxsize - 1);
diff --git a/libbb/printable_string.c b/libbb/printable_string.c
index 47565de0d..83a482196 100644
--- a/libbb/printable_string.c
+++ b/libbb/printable_string.c
@@ -36,7 +36,7 @@ const char* FAST_FUNC printable_string(uni_stat_t *stats, const char *str)
36 s++; 36 s++;
37 } 37 }
38 38
39#if ENABLE_FEATURE_ASSUME_UNICODE 39#if ENABLE_UNICODE_SUPPORT
40 dst = unicode_conv_to_printable(stats, str); 40 dst = unicode_conv_to_printable(stats, str);
41#else 41#else
42 { 42 {
diff --git a/libbb/progress.c b/libbb/progress.c
index 0e484da6c..e96039042 100644
--- a/libbb/progress.c
+++ b/libbb/progress.c
@@ -78,7 +78,7 @@ void FAST_FUNC bb_progress_update(bb_progress_t *p,
78 if (ratio > 100) ratio = 100; 78 if (ratio > 100) ratio = 100;
79 } 79 }
80 80
81#if ENABLE_FEATURE_ASSUME_UNICODE 81#if ENABLE_UNICODE_SUPPORT
82 init_unicode(); 82 init_unicode();
83 /* libbb candidate? */ 83 /* libbb candidate? */
84 { 84 {
diff --git a/libbb/unicode.c b/libbb/unicode.c
index bc9714562..83e70b412 100644
--- a/libbb/unicode.c
+++ b/libbb/unicode.c
@@ -14,12 +14,12 @@
14uint8_t unicode_status; 14uint8_t unicode_status;
15#endif 15#endif
16 16
17/* This file is compiled only if FEATURE_ASSUME_UNICODE is on. 17/* This file is compiled only if UNICODE_SUPPORT is on.
18 * We check other options and decide whether to use libc support 18 * We check other options and decide whether to use libc support
19 * via locale, or use our own logic: 19 * via locale, or use our own logic:
20 */ 20 */
21 21
22#if ENABLE_LOCALE_SUPPORT 22#if ENABLE_UNICODE_USING_LOCALE
23 23
24/* Unicode support using libc locale support. */ 24/* Unicode support using libc locale support. */
25 25
@@ -139,7 +139,7 @@ size_t FAST_FUNC wcstombs(char *dest, const wchar_t *src, size_t n)
139 return org_n - n; 139 return org_n - n;
140} 140}
141 141
142#define ERROR_WCHAR (~(wchar_t)0) 142# define ERROR_WCHAR (~(wchar_t)0)
143 143
144static const char *mbstowc_internal(wchar_t *res, const char *src) 144static const char *mbstowc_internal(wchar_t *res, const char *src)
145{ 145{
@@ -239,7 +239,427 @@ int FAST_FUNC iswpunct(wint_t wc)
239 return (unsigned)wc <= 0x7f && ispunct(wc); 239 return (unsigned)wc <= 0x7f && ispunct(wc);
240} 240}
241 241
242#include "unicode_wcwidth.c" 242
243# if LAST_SUPPORTED_WCHAR >= 0x300
244struct interval {
245 uint16_t first;
246 uint16_t last;
247};
248
249/* auxiliary function for binary search in interval table */
250static int in_interval_table(unsigned ucs, const struct interval *table, unsigned max)
251{
252 unsigned min;
253 unsigned mid;
254
255 if (ucs < table[0].first || ucs > table[max].last)
256 return 0;
257
258 min = 0;
259 while (max >= min) {
260 mid = (min + max) / 2;
261 if (ucs > table[mid].last)
262 min = mid + 1;
263 else if (ucs < table[mid].first)
264 max = mid - 1;
265 else
266 return 1;
267 }
268 return 0;
269}
270
271static int in_uint16_table(unsigned ucs, const uint16_t *table, unsigned max)
272{
273 unsigned min;
274 unsigned mid;
275 unsigned first, last;
276
277 first = table[0] >> 2;
278 last = first + (table[0] & 3);
279 if (ucs < first || ucs > last)
280 return 0;
281
282 min = 0;
283 while (max >= min) {
284 mid = (min + max) / 2;
285 first = table[mid] >> 2;
286 last = first + (table[mid] & 3);
287 if (ucs > last)
288 min = mid + 1;
289 else if (ucs < first)
290 max = mid - 1;
291 else
292 return 1;
293 }
294 return 0;
295}
296# endif
297
298
299/*
300 * This is an implementation of wcwidth() and wcswidth() (defined in
301 * IEEE Std 1002.1-2001) for Unicode.
302 *
303 * http://www.opengroup.org/onlinepubs/007904975/functions/wcwidth.html
304 * http://www.opengroup.org/onlinepubs/007904975/functions/wcswidth.html
305 *
306 * In fixed-width output devices, Latin characters all occupy a single
307 * "cell" position of equal width, whereas ideographic CJK characters
308 * occupy two such cells. Interoperability between terminal-line
309 * applications and (teletype-style) character terminals using the
310 * UTF-8 encoding requires agreement on which character should advance
311 * the cursor by how many cell positions. No established formal
312 * standards exist at present on which Unicode character shall occupy
313 * how many cell positions on character terminals. These routines are
314 * a first attempt of defining such behavior based on simple rules
315 * applied to data provided by the Unicode Consortium.
316 *
317 * For some graphical characters, the Unicode standard explicitly
318 * defines a character-cell width via the definition of the East Asian
319 * FullWidth (F), Wide (W), Half-width (H), and Narrow (Na) classes.
320 * In all these cases, there is no ambiguity about which width a
321 * terminal shall use. For characters in the East Asian Ambiguous (A)
322 * class, the width choice depends purely on a preference of backward
323 * compatibility with either historic CJK or Western practice.
324 * Choosing single-width for these characters is easy to justify as
325 * the appropriate long-term solution, as the CJK practice of
326 * displaying these characters as double-width comes from historic
327 * implementation simplicity (8-bit encoded characters were displayed
328 * single-width and 16-bit ones double-width, even for Greek,
329 * Cyrillic, etc.) and not any typographic considerations.
330 *
331 * Much less clear is the choice of width for the Not East Asian
332 * (Neutral) class. Existing practice does not dictate a width for any
333 * of these characters. It would nevertheless make sense
334 * typographically to allocate two character cells to characters such
335 * as for instance EM SPACE or VOLUME INTEGRAL, which cannot be
336 * represented adequately with a single-width glyph. The following
337 * routines at present merely assign a single-cell width to all
338 * neutral characters, in the interest of simplicity. This is not
339 * entirely satisfactory and should be reconsidered before
340 * establishing a formal standard in this area. At the moment, the
341 * decision which Not East Asian (Neutral) characters should be
342 * represented by double-width glyphs cannot yet be answered by
343 * applying a simple rule from the Unicode database content. Setting
344 * up a proper standard for the behavior of UTF-8 character terminals
345 * will require a careful analysis not only of each Unicode character,
346 * but also of each presentation form, something the author of these
347 * routines has avoided to do so far.
348 *
349 * http://www.unicode.org/unicode/reports/tr11/
350 *
351 * Markus Kuhn -- 2007-05-26 (Unicode 5.0)
352 *
353 * Permission to use, copy, modify, and distribute this software
354 * for any purpose and without fee is hereby granted. The author
355 * disclaims all warranties with regard to this software.
356 *
357 * Latest version: http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c
358 */
359
360/* Assigned Unicode character ranges:
361 * Plane Range
362 * 0 0000–FFFF Basic Multilingual Plane
363 * 1 10000–1FFFF Supplementary Multilingual Plane
364 * 2 20000–2FFFF Supplementary Ideographic Plane
365 * 3 30000-3FFFF Tertiary Ideographic Plane (no chars assigned yet)
366 * 4-13 40000–DFFFF currently unassigned
367 * 14 E0000–EFFFF Supplementary Special-purpose Plane
368 * 15 F0000–FFFFF Supplementary Private Use Area-A
369 * 16 100000–10FFFF Supplementary Private Use Area-B
370 *
371 * "Supplementary Special-purpose Plane currently contains non-graphical
372 * characters in two blocks of 128 and 240 characters. The first block
373 * is for language tag characters for use when language cannot be indicated
374 * through other protocols (such as the xml:lang attribute in XML).
375 * The other block contains glyph variation selectors to indicate
376 * an alternate glyph for a character that cannot be determined by context."
377 *
378 * In simpler terms: it is a tool to fix the "Han unification" mess
379 * created by Unicode committee, to select Chinese/Japanese/Korean/Taiwan
380 * version of a character. (They forgot that the whole purpose of the Unicode
381 * was to be able to write all chars in one charset without such tricks).
382 * Until East Asian users say it is actually necessary to support these
383 * code points in console applications like busybox
384 * (i.e. do these chars ever appear in filenames, hostnames, text files
385 * and such?), we are treating these code points as invalid.
386 *
387 * Tertiary Ideographic Plane is also ignored for now,
388 * until Unicode committee assigns something there.
389 */
390/* The following two functions define the column width of an ISO 10646
391 * character as follows:
392 *
393 * - The null character (U+0000) has a column width of 0.
394 *
395 * - Other C0/C1 control characters and DEL will lead to a return
396 * value of -1.
397 *
398 * - Non-spacing and enclosing combining characters (general
399 * category code Mn or Me in the Unicode database) have a
400 * column width of 0.
401 *
402 * - SOFT HYPHEN (U+00AD) has a column width of 1.
403 *
404 * - Other format characters (general category code Cf in the Unicode
405 * database) and ZERO WIDTH SPACE (U+200B) have a column width of 0.
406 *
407 * - Hangul Jamo medial vowels and final consonants (U+1160-U+11FF)
408 * have a column width of 0.
409 *
410 * - Spacing characters in the East Asian Wide (W) or East Asian
411 * Full-width (F) category as defined in Unicode Technical
412 * Report #11 have a column width of 2.
413 *
414 * - All remaining characters (including all printable
415 * ISO 8859-1 and WGL4 characters, Unicode control characters,
416 * etc.) have a column width of 1.
417 *
418 * This implementation assumes that wchar_t characters are encoded
419 * in ISO 10646.
420 */
421static int wcwidth(unsigned ucs)
422{
423# if LAST_SUPPORTED_WCHAR >= 0x300
424 /* sorted list of non-overlapping intervals of non-spacing characters */
425 /* generated by "uniset +cat=Me +cat=Mn +cat=Cf -00AD +1160-11FF +200B c" */
426 static const struct interval combining[] = {
427# define BIG_(a,b) { a, b },
428# define PAIR(a,b)
429# define ARRAY /* PAIR if < 0x4000 and no more than 4 chars big */ \
430 BIG_(0x0300, 0x036F) \
431 PAIR(0x0483, 0x0486) \
432 PAIR(0x0488, 0x0489) \
433 BIG_(0x0591, 0x05BD) \
434 PAIR(0x05BF, 0x05BF) \
435 PAIR(0x05C1, 0x05C2) \
436 PAIR(0x05C4, 0x05C5) \
437 PAIR(0x05C7, 0x05C7) \
438 PAIR(0x0600, 0x0603) \
439 BIG_(0x0610, 0x0615) \
440 BIG_(0x064B, 0x065E) \
441 PAIR(0x0670, 0x0670) \
442 BIG_(0x06D6, 0x06E4) \
443 PAIR(0x06E7, 0x06E8) \
444 PAIR(0x06EA, 0x06ED) \
445 PAIR(0x070F, 0x070F) \
446 PAIR(0x0711, 0x0711) \
447 BIG_(0x0730, 0x074A) \
448 BIG_(0x07A6, 0x07B0) \
449 BIG_(0x07EB, 0x07F3) \
450 PAIR(0x0901, 0x0902) \
451 PAIR(0x093C, 0x093C) \
452 BIG_(0x0941, 0x0948) \
453 PAIR(0x094D, 0x094D) \
454 PAIR(0x0951, 0x0954) \
455 PAIR(0x0962, 0x0963) \
456 PAIR(0x0981, 0x0981) \
457 PAIR(0x09BC, 0x09BC) \
458 PAIR(0x09C1, 0x09C4) \
459 PAIR(0x09CD, 0x09CD) \
460 PAIR(0x09E2, 0x09E3) \
461 PAIR(0x0A01, 0x0A02) \
462 PAIR(0x0A3C, 0x0A3C) \
463 PAIR(0x0A41, 0x0A42) \
464 PAIR(0x0A47, 0x0A48) \
465 PAIR(0x0A4B, 0x0A4D) \
466 PAIR(0x0A70, 0x0A71) \
467 PAIR(0x0A81, 0x0A82) \
468 PAIR(0x0ABC, 0x0ABC) \
469 BIG_(0x0AC1, 0x0AC5) \
470 PAIR(0x0AC7, 0x0AC8) \
471 PAIR(0x0ACD, 0x0ACD) \
472 PAIR(0x0AE2, 0x0AE3) \
473 PAIR(0x0B01, 0x0B01) \
474 PAIR(0x0B3C, 0x0B3C) \
475 PAIR(0x0B3F, 0x0B3F) \
476 PAIR(0x0B41, 0x0B43) \
477 PAIR(0x0B4D, 0x0B4D) \
478 PAIR(0x0B56, 0x0B56) \
479 PAIR(0x0B82, 0x0B82) \
480 PAIR(0x0BC0, 0x0BC0) \
481 PAIR(0x0BCD, 0x0BCD) \
482 PAIR(0x0C3E, 0x0C40) \
483 PAIR(0x0C46, 0x0C48) \
484 PAIR(0x0C4A, 0x0C4D) \
485 PAIR(0x0C55, 0x0C56) \
486 PAIR(0x0CBC, 0x0CBC) \
487 PAIR(0x0CBF, 0x0CBF) \
488 PAIR(0x0CC6, 0x0CC6) \
489 PAIR(0x0CCC, 0x0CCD) \
490 PAIR(0x0CE2, 0x0CE3) \
491 PAIR(0x0D41, 0x0D43) \
492 PAIR(0x0D4D, 0x0D4D) \
493 PAIR(0x0DCA, 0x0DCA) \
494 PAIR(0x0DD2, 0x0DD4) \
495 PAIR(0x0DD6, 0x0DD6) \
496 PAIR(0x0E31, 0x0E31) \
497 BIG_(0x0E34, 0x0E3A) \
498 BIG_(0x0E47, 0x0E4E) \
499 PAIR(0x0EB1, 0x0EB1) \
500 BIG_(0x0EB4, 0x0EB9) \
501 PAIR(0x0EBB, 0x0EBC) \
502 BIG_(0x0EC8, 0x0ECD) \
503 PAIR(0x0F18, 0x0F19) \
504 PAIR(0x0F35, 0x0F35) \
505 PAIR(0x0F37, 0x0F37) \
506 PAIR(0x0F39, 0x0F39) \
507 BIG_(0x0F71, 0x0F7E) \
508 BIG_(0x0F80, 0x0F84) \
509 PAIR(0x0F86, 0x0F87) \
510 PAIR(0x0FC6, 0x0FC6) \
511 BIG_(0x0F90, 0x0F97) \
512 BIG_(0x0F99, 0x0FBC) \
513 PAIR(0x102D, 0x1030) \
514 PAIR(0x1032, 0x1032) \
515 PAIR(0x1036, 0x1037) \
516 PAIR(0x1039, 0x1039) \
517 PAIR(0x1058, 0x1059) \
518 BIG_(0x1160, 0x11FF) \
519 PAIR(0x135F, 0x135F) \
520 PAIR(0x1712, 0x1714) \
521 PAIR(0x1732, 0x1734) \
522 PAIR(0x1752, 0x1753) \
523 PAIR(0x1772, 0x1773) \
524 PAIR(0x17B4, 0x17B5) \
525 BIG_(0x17B7, 0x17BD) \
526 PAIR(0x17C6, 0x17C6) \
527 BIG_(0x17C9, 0x17D3) \
528 PAIR(0x17DD, 0x17DD) \
529 PAIR(0x180B, 0x180D) \
530 PAIR(0x18A9, 0x18A9) \
531 PAIR(0x1920, 0x1922) \
532 PAIR(0x1927, 0x1928) \
533 PAIR(0x1932, 0x1932) \
534 PAIR(0x1939, 0x193B) \
535 PAIR(0x1A17, 0x1A18) \
536 PAIR(0x1B00, 0x1B03) \
537 PAIR(0x1B34, 0x1B34) \
538 BIG_(0x1B36, 0x1B3A) \
539 PAIR(0x1B3C, 0x1B3C) \
540 PAIR(0x1B42, 0x1B42) \
541 BIG_(0x1B6B, 0x1B73) \
542 BIG_(0x1DC0, 0x1DCA) \
543 PAIR(0x1DFE, 0x1DFF) \
544 BIG_(0x200B, 0x200F) \
545 BIG_(0x202A, 0x202E) \
546 PAIR(0x2060, 0x2063) \
547 BIG_(0x206A, 0x206F) \
548 BIG_(0x20D0, 0x20EF) \
549 BIG_(0x302A, 0x302F) \
550 PAIR(0x3099, 0x309A) \
551 /* Too big to be packed in PAIRs: */ \
552 BIG_(0xA806, 0xA806) \
553 BIG_(0xA80B, 0xA80B) \
554 BIG_(0xA825, 0xA826) \
555 BIG_(0xFB1E, 0xFB1E) \
556 BIG_(0xFE00, 0xFE0F) \
557 BIG_(0xFE20, 0xFE23) \
558 BIG_(0xFEFF, 0xFEFF) \
559 BIG_(0xFFF9, 0xFFFB)
560 ARRAY
561# undef BIG_
562# undef PAIR
563 };
564# define BIG_(a,b)
565# define PAIR(a,b) (a << 2) | (b-a),
566 static const uint16_t combining1[] = { ARRAY };
567# undef BIG_
568# undef PAIR
569# define BIG_(a,b) char big_##a[b < 0x4000 && b-a <= 3 ? -1 : 1];
570# define PAIR(a,b) char pair##a[b >= 0x4000 || b-a > 3 ? -1 : 1];
571 struct CHECK { ARRAY };
572# undef BIG_
573# undef PAIR
574# undef ARRAY
575# endif
576
577 if (ucs == 0)
578 return 0;
579
580 /* Test for 8-bit control characters (00-1f, 80-9f, 7f) */
581 if ((ucs & ~0x80) < 0x20 || ucs == 0x7f)
582 return -1;
583 /* Quick abort if it is an obviously invalid char */
584 if (ucs > LAST_SUPPORTED_WCHAR)
585 return -1;
586
587 /* Optimization: no combining chars below 0x300 */
588 if (LAST_SUPPORTED_WCHAR < 0x300 || ucs < 0x300)
589 return 1;
590
591# if LAST_SUPPORTED_WCHAR >= 0x300
592 /* Binary search in table of non-spacing characters */
593 if (in_interval_table(ucs, combining, ARRAY_SIZE(combining) - 1))
594 return 0;
595 if (in_uint16_table(ucs, combining1, ARRAY_SIZE(combining1) - 1))
596 return 0;
597
598 /* Optimization: all chars below 0x1100 are not double-width */
599 if (LAST_SUPPORTED_WCHAR < 0x1100 || ucs < 0x1100)
600 return 1;
601
602# if LAST_SUPPORTED_WCHAR >= 0x1100
603 /* Invalid code points: */
604 /* High (d800..dbff) and low (dc00..dfff) surrogates (valid only in UTF16) */
605 /* Private Use Area (e000..f8ff) */
606 /* Noncharacters fdd0..fdef */
607 if ((LAST_SUPPORTED_WCHAR >= 0xd800 && ucs >= 0xd800 && ucs <= 0xf8ff)
608 || (LAST_SUPPORTED_WCHAR >= 0xfdd0 && ucs >= 0xfdd0 && ucs <= 0xfdef)
609 ) {
610 return -1;
611 }
612 /* 0xfffe and 0xffff in every plane are invalid */
613 if (LAST_SUPPORTED_WCHAR >= 0xfffe && ((ucs & 0xfffe) == 0xfffe)) {
614 return -1;
615 }
616
617# if LAST_SUPPORTED_WCHAR >= 0x10000
618 if (ucs >= 0x10000) {
619 /* Combining chars in Supplementary Multilingual Plane 0x1xxxx */
620 static const struct interval combining0x10000[] = {
621 { 0x0A01, 0x0A03 }, { 0x0A05, 0x0A06 }, { 0x0A0C, 0x0A0F },
622 { 0x0A38, 0x0A3A }, { 0x0A3F, 0x0A3F }, { 0xD167, 0xD169 },
623 { 0xD173, 0xD182 }, { 0xD185, 0xD18B }, { 0xD1AA, 0xD1AD },
624 { 0xD242, 0xD244 }
625 };
626 /* Binary search in table of non-spacing characters in Supplementary Multilingual Plane */
627 if (in_interval_table(ucs ^ 0x10000, combining0x10000, ARRAY_SIZE(combining0x10000) - 1))
628 return 0;
629 /* Check a few non-spacing chars in Supplementary Special-purpose Plane 0xExxxx */
630 if (LAST_SUPPORTED_WCHAR >= 0xE0001
631 && ( ucs == 0xE0001
632 || (ucs >= 0xE0020 && ucs <= 0xE007F)
633 || (ucs >= 0xE0100 && ucs <= 0xE01EF)
634 )
635 ) {
636 return 0;
637 }
638 }
639# endif
640
641 /* If we arrive here, ucs is not a combining or C0/C1 control character.
642 * Check whether it's 1 char or 2-shar wide.
643 */
644 return 1 +
645 ( (/*ucs >= 0x1100 &&*/ ucs <= 0x115f) /* Hangul Jamo init. consonants */
646 || ucs == 0x2329 /* left-pointing angle bracket; also CJK punct. char */
647 || ucs == 0x232a /* right-pointing angle bracket; also CJK punct. char */
648 || (ucs >= 0x2e80 && ucs <= 0xa4cf && ucs != 0x303f) /* CJK ... Yi */
649# if LAST_SUPPORTED_WCHAR >= 0xac00
650 || (ucs >= 0xac00 && ucs <= 0xd7a3) /* Hangul Syllables */
651 || (ucs >= 0xf900 && ucs <= 0xfaff) /* CJK Compatibility Ideographs */
652 || (ucs >= 0xfe10 && ucs <= 0xfe19) /* Vertical forms */
653 || (ucs >= 0xfe30 && ucs <= 0xfe6f) /* CJK Compatibility Forms */
654 || (ucs >= 0xff00 && ucs <= 0xff60) /* Fullwidth Forms */
655 || (ucs >= 0xffe0 && ucs <= 0xffe6)
656 || ((ucs >> 17) == (2 >> 1)) /* 20000..3ffff: Supplementary and Tertiary Ideographic Planes */
657# endif
658 );
659# endif /* >= 0x1100 */
660# endif /* >= 0x300 */
661}
662
243 663
244# if ENABLE_UNICODE_BIDI_SUPPORT 664# if ENABLE_UNICODE_BIDI_SUPPORT
245int FAST_FUNC unicode_bidi_isrtl(wint_t wc) 665int FAST_FUNC unicode_bidi_isrtl(wint_t wc)
@@ -592,7 +1012,7 @@ static char* FAST_FUNC unicode_conv_to_printable2(uni_stat_t *stats, const char
592 int w; 1012 int w;
593 wchar_t wc; 1013 wchar_t wc;
594 1014
595#if ENABLE_LOCALE_SUPPORT 1015#if ENABLE_UNICODE_USING_LOCALE
596 { 1016 {
597 mbstate_t mbst = { 0 }; 1017 mbstate_t mbst = { 0 };
598 ssize_t rc = mbsrtowcs(&wc, &src, 1, &mbst); 1018 ssize_t rc = mbsrtowcs(&wc, &src, 1, &mbst);
@@ -647,7 +1067,7 @@ static char* FAST_FUNC unicode_conv_to_printable2(uni_stat_t *stats, const char
647 uni_count++; 1067 uni_count++;
648 uni_width += w; 1068 uni_width += w;
649 dst = xrealloc(dst, dst_len + MB_CUR_MAX); 1069 dst = xrealloc(dst, dst_len + MB_CUR_MAX);
650#if ENABLE_LOCALE_SUPPORT 1070#if ENABLE_UNICODE_USING_LOCALE
651 { 1071 {
652 mbstate_t mbst = { 0 }; 1072 mbstate_t mbst = { 0 };
653 dst_len += wcrtomb(&dst[dst_len], wc, &mbst); 1073 dst_len += wcrtomb(&dst[dst_len], wc, &mbst);
@@ -699,7 +1119,7 @@ unsigned FAST_FUNC unicode_padding_to_width(unsigned width, const char *src)
699 int w; 1119 int w;
700 wchar_t wc; 1120 wchar_t wc;
701 1121
702#if ENABLE_LOCALE_SUPPORT 1122#if ENABLE_UNICODE_USING_LOCALE
703 { 1123 {
704 mbstate_t mbst = { 0 }; 1124 mbstate_t mbst = { 0 };
705 ssize_t rc = mbsrtowcs(&wc, &src, 1, &mbst); 1125 ssize_t rc = mbsrtowcs(&wc, &src, 1, &mbst);
diff --git a/libbb/unicode_wcwidth.c b/libbb/unicode_wcwidth.c
deleted file mode 100644
index 0bb622705..000000000
--- a/libbb/unicode_wcwidth.c
+++ /dev/null
@@ -1,543 +0,0 @@
1/*
2 * This is an implementation of wcwidth() and wcswidth() (defined in
3 * IEEE Std 1002.1-2001) for Unicode.
4 *
5 * http://www.opengroup.org/onlinepubs/007904975/functions/wcwidth.html
6 * http://www.opengroup.org/onlinepubs/007904975/functions/wcswidth.html
7 *
8 * In fixed-width output devices, Latin characters all occupy a single
9 * "cell" position of equal width, whereas ideographic CJK characters
10 * occupy two such cells. Interoperability between terminal-line
11 * applications and (teletype-style) character terminals using the
12 * UTF-8 encoding requires agreement on which character should advance
13 * the cursor by how many cell positions. No established formal
14 * standards exist at present on which Unicode character shall occupy
15 * how many cell positions on character terminals. These routines are
16 * a first attempt of defining such behavior based on simple rules
17 * applied to data provided by the Unicode Consortium.
18 *
19 * For some graphical characters, the Unicode standard explicitly
20 * defines a character-cell width via the definition of the East Asian
21 * FullWidth (F), Wide (W), Half-width (H), and Narrow (Na) classes.
22 * In all these cases, there is no ambiguity about which width a
23 * terminal shall use. For characters in the East Asian Ambiguous (A)
24 * class, the width choice depends purely on a preference of backward
25 * compatibility with either historic CJK or Western practice.
26 * Choosing single-width for these characters is easy to justify as
27 * the appropriate long-term solution, as the CJK practice of
28 * displaying these characters as double-width comes from historic
29 * implementation simplicity (8-bit encoded characters were displayed
30 * single-width and 16-bit ones double-width, even for Greek,
31 * Cyrillic, etc.) and not any typographic considerations.
32 *
33 * Much less clear is the choice of width for the Not East Asian
34 * (Neutral) class. Existing practice does not dictate a width for any
35 * of these characters. It would nevertheless make sense
36 * typographically to allocate two character cells to characters such
37 * as for instance EM SPACE or VOLUME INTEGRAL, which cannot be
38 * represented adequately with a single-width glyph. The following
39 * routines at present merely assign a single-cell width to all
40 * neutral characters, in the interest of simplicity. This is not
41 * entirely satisfactory and should be reconsidered before
42 * establishing a formal standard in this area. At the moment, the
43 * decision which Not East Asian (Neutral) characters should be
44 * represented by double-width glyphs cannot yet be answered by
45 * applying a simple rule from the Unicode database content. Setting
46 * up a proper standard for the behavior of UTF-8 character terminals
47 * will require a careful analysis not only of each Unicode character,
48 * but also of each presentation form, something the author of these
49 * routines has avoided to do so far.
50 *
51 * http://www.unicode.org/unicode/reports/tr11/
52 *
53 * Markus Kuhn -- 2007-05-26 (Unicode 5.0)
54 *
55 * Permission to use, copy, modify, and distribute this software
56 * for any purpose and without fee is hereby granted. The author
57 * disclaims all warranties with regard to this software.
58 *
59 * Latest version: http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c
60 */
61
62/* Assigned Unicode character ranges:
63 * Plane Range
64 * 0 0000–FFFF Basic Multilingual Plane
65 * 1 10000–1FFFF Supplementary Multilingual Plane
66 * 2 20000–2FFFF Supplementary Ideographic Plane
67 * 3 30000-3FFFF Tertiary Ideographic Plane (no chars assigned yet)
68 * 4-13 40000–DFFFF currently unassigned
69 * 14 E0000–EFFFF Supplementary Special-purpose Plane
70 * 15 F0000–FFFFF Supplementary Private Use Area-A
71 * 16 100000–10FFFF Supplementary Private Use Area-B
72 *
73 * "Supplementary Special-purpose Plane currently contains non-graphical
74 * characters in two blocks of 128 and 240 characters. The first block
75 * is for language tag characters for use when language cannot be indicated
76 * through other protocols (such as the xml:lang attribute in XML).
77 * The other block contains glyph variation selectors to indicate
78 * an alternate glyph for a character that cannot be determined by context."
79 *
80 * In simpler terms: it is a tool to fix the "Han unification" mess
81 * created by Unicode committee, to select Chinese/Japanese/Korean/Taiwan
82 * version of a character. (They forgot that the whole purpose of the Unicode
83 * was to be able to write all chars in one charset without such tricks).
84 * Until East Asian users say it is actually necessary to support these
85 * code points in console applications like busybox
86 * (i.e. do these chars ever appear in filenames, hostnames, text files
87 * and such?), we are treating these code points as invalid.
88 *
89 * Tertiary Ideographic Plane is also ignored for now,
90 * until Unicode committee assigns something there.
91 */
92
93#if LAST_SUPPORTED_WCHAR >= 0x300
94struct interval {
95 uint16_t first;
96 uint16_t last;
97};
98
99/* auxiliary function for binary search in interval table */
100static int in_interval_table(unsigned ucs, const struct interval *table, unsigned max)
101{
102 unsigned min;
103 unsigned mid;
104
105 if (ucs < table[0].first || ucs > table[max].last)
106 return 0;
107
108 min = 0;
109 while (max >= min) {
110 mid = (min + max) / 2;
111 if (ucs > table[mid].last)
112 min = mid + 1;
113 else if (ucs < table[mid].first)
114 max = mid - 1;
115 else
116 return 1;
117 }
118 return 0;
119}
120
121static int in_uint16_table(unsigned ucs, const uint16_t *table, unsigned max)
122{
123 unsigned min;
124 unsigned mid;
125 unsigned first, last;
126
127 first = table[0] >> 2;
128 last = first + (table[0] & 3);
129 if (ucs < first || ucs > last)
130 return 0;
131
132 min = 0;
133 while (max >= min) {
134 mid = (min + max) / 2;
135 first = table[mid] >> 2;
136 last = first + (table[mid] & 3);
137 if (ucs > last)
138 min = mid + 1;
139 else if (ucs < first)
140 max = mid - 1;
141 else
142 return 1;
143 }
144 return 0;
145}
146#endif
147
148
149/* The following two functions define the column width of an ISO 10646
150 * character as follows:
151 *
152 * - The null character (U+0000) has a column width of 0.
153 *
154 * - Other C0/C1 control characters and DEL will lead to a return
155 * value of -1.
156 *
157 * - Non-spacing and enclosing combining characters (general
158 * category code Mn or Me in the Unicode database) have a
159 * column width of 0.
160 *
161 * - SOFT HYPHEN (U+00AD) has a column width of 1.
162 *
163 * - Other format characters (general category code Cf in the Unicode
164 * database) and ZERO WIDTH SPACE (U+200B) have a column width of 0.
165 *
166 * - Hangul Jamo medial vowels and final consonants (U+1160-U+11FF)
167 * have a column width of 0.
168 *
169 * - Spacing characters in the East Asian Wide (W) or East Asian
170 * Full-width (F) category as defined in Unicode Technical
171 * Report #11 have a column width of 2.
172 *
173 * - All remaining characters (including all printable
174 * ISO 8859-1 and WGL4 characters, Unicode control characters,
175 * etc.) have a column width of 1.
176 *
177 * This implementation assumes that wchar_t characters are encoded
178 * in ISO 10646.
179 */
180static int wcwidth(unsigned ucs)
181{
182#if LAST_SUPPORTED_WCHAR >= 0x300
183 /* sorted list of non-overlapping intervals of non-spacing characters */
184 /* generated by "uniset +cat=Me +cat=Mn +cat=Cf -00AD +1160-11FF +200B c" */
185 static const struct interval combining[] = {
186#define BIG_(a,b) { a, b },
187#define PAIR(a,b)
188 /* PAIR if < 0x4000 and no more than 4 chars big */
189 BIG_(0x0300, 0x036F)
190 PAIR(0x0483, 0x0486)
191 PAIR(0x0488, 0x0489)
192 BIG_(0x0591, 0x05BD)
193 PAIR(0x05BF, 0x05BF)
194 PAIR(0x05C1, 0x05C2)
195 PAIR(0x05C4, 0x05C5)
196 PAIR(0x05C7, 0x05C7)
197 PAIR(0x0600, 0x0603)
198 BIG_(0x0610, 0x0615)
199 BIG_(0x064B, 0x065E)
200 PAIR(0x0670, 0x0670)
201 BIG_(0x06D6, 0x06E4)
202 PAIR(0x06E7, 0x06E8)
203 PAIR(0x06EA, 0x06ED)
204 PAIR(0x070F, 0x070F)
205 PAIR(0x0711, 0x0711)
206 BIG_(0x0730, 0x074A)
207 BIG_(0x07A6, 0x07B0)
208 BIG_(0x07EB, 0x07F3)
209 PAIR(0x0901, 0x0902)
210 PAIR(0x093C, 0x093C)
211 BIG_(0x0941, 0x0948)
212 PAIR(0x094D, 0x094D)
213 PAIR(0x0951, 0x0954)
214 PAIR(0x0962, 0x0963)
215 PAIR(0x0981, 0x0981)
216 PAIR(0x09BC, 0x09BC)
217 PAIR(0x09C1, 0x09C4)
218 PAIR(0x09CD, 0x09CD)
219 PAIR(0x09E2, 0x09E3)
220 PAIR(0x0A01, 0x0A02)
221 PAIR(0x0A3C, 0x0A3C)
222 PAIR(0x0A41, 0x0A42)
223 PAIR(0x0A47, 0x0A48)
224 PAIR(0x0A4B, 0x0A4D)
225 PAIR(0x0A70, 0x0A71)
226 PAIR(0x0A81, 0x0A82)
227 PAIR(0x0ABC, 0x0ABC)
228 BIG_(0x0AC1, 0x0AC5)
229 PAIR(0x0AC7, 0x0AC8)
230 PAIR(0x0ACD, 0x0ACD)
231 PAIR(0x0AE2, 0x0AE3)
232 PAIR(0x0B01, 0x0B01)
233 PAIR(0x0B3C, 0x0B3C)
234 PAIR(0x0B3F, 0x0B3F)
235 PAIR(0x0B41, 0x0B43)
236 PAIR(0x0B4D, 0x0B4D)
237 PAIR(0x0B56, 0x0B56)
238 PAIR(0x0B82, 0x0B82)
239 PAIR(0x0BC0, 0x0BC0)
240 PAIR(0x0BCD, 0x0BCD)
241 PAIR(0x0C3E, 0x0C40)
242 PAIR(0x0C46, 0x0C48)
243 PAIR(0x0C4A, 0x0C4D)
244 PAIR(0x0C55, 0x0C56)
245 PAIR(0x0CBC, 0x0CBC)
246 PAIR(0x0CBF, 0x0CBF)
247 PAIR(0x0CC6, 0x0CC6)
248 PAIR(0x0CCC, 0x0CCD)
249 PAIR(0x0CE2, 0x0CE3)
250 PAIR(0x0D41, 0x0D43)
251 PAIR(0x0D4D, 0x0D4D)
252 PAIR(0x0DCA, 0x0DCA)
253 PAIR(0x0DD2, 0x0DD4)
254 PAIR(0x0DD6, 0x0DD6)
255 PAIR(0x0E31, 0x0E31)
256 BIG_(0x0E34, 0x0E3A)
257 BIG_(0x0E47, 0x0E4E)
258 PAIR(0x0EB1, 0x0EB1)
259 BIG_(0x0EB4, 0x0EB9)
260 PAIR(0x0EBB, 0x0EBC)
261 BIG_(0x0EC8, 0x0ECD)
262 PAIR(0x0F18, 0x0F19)
263 PAIR(0x0F35, 0x0F35)
264 PAIR(0x0F37, 0x0F37)
265 PAIR(0x0F39, 0x0F39)
266 BIG_(0x0F71, 0x0F7E)
267 BIG_(0x0F80, 0x0F84)
268 PAIR(0x0F86, 0x0F87)
269 PAIR(0x0FC6, 0x0FC6)
270 BIG_(0x0F90, 0x0F97)
271 BIG_(0x0F99, 0x0FBC)
272 PAIR(0x102D, 0x1030)
273 PAIR(0x1032, 0x1032)
274 PAIR(0x1036, 0x1037)
275 PAIR(0x1039, 0x1039)
276 PAIR(0x1058, 0x1059)
277 BIG_(0x1160, 0x11FF)
278 PAIR(0x135F, 0x135F)
279 PAIR(0x1712, 0x1714)
280 PAIR(0x1732, 0x1734)
281 PAIR(0x1752, 0x1753)
282 PAIR(0x1772, 0x1773)
283 PAIR(0x17B4, 0x17B5)
284 BIG_(0x17B7, 0x17BD)
285 PAIR(0x17C6, 0x17C6)
286 BIG_(0x17C9, 0x17D3)
287 PAIR(0x17DD, 0x17DD)
288 PAIR(0x180B, 0x180D)
289 PAIR(0x18A9, 0x18A9)
290 PAIR(0x1920, 0x1922)
291 PAIR(0x1927, 0x1928)
292 PAIR(0x1932, 0x1932)
293 PAIR(0x1939, 0x193B)
294 PAIR(0x1A17, 0x1A18)
295 PAIR(0x1B00, 0x1B03)
296 PAIR(0x1B34, 0x1B34)
297 BIG_(0x1B36, 0x1B3A)
298 PAIR(0x1B3C, 0x1B3C)
299 PAIR(0x1B42, 0x1B42)
300 BIG_(0x1B6B, 0x1B73)
301 BIG_(0x1DC0, 0x1DCA)
302 PAIR(0x1DFE, 0x1DFF)
303 BIG_(0x200B, 0x200F)
304 BIG_(0x202A, 0x202E)
305 PAIR(0x2060, 0x2063)
306 BIG_(0x206A, 0x206F)
307 BIG_(0x20D0, 0x20EF)
308 BIG_(0x302A, 0x302F)
309 PAIR(0x3099, 0x309A)
310 /* Too big to be packed in PAIRs: */
311 { 0xA806, 0xA806 },
312 { 0xA80B, 0xA80B },
313 { 0xA825, 0xA826 },
314 { 0xFB1E, 0xFB1E },
315 { 0xFE00, 0xFE0F },
316 { 0xFE20, 0xFE23 },
317 { 0xFEFF, 0xFEFF },
318 { 0xFFF9, 0xFFFB }
319#undef BIG_
320#undef PAIR
321 };
322 static const uint16_t combining1[] = {
323#define BIG_(a,b)
324#define PAIR(a,b) (a << 2) | (b-a),
325 /* Exact copy-n-paste of the above: */
326 BIG_(0x0300, 0x036F)
327 PAIR(0x0483, 0x0486)
328 PAIR(0x0488, 0x0489)
329 BIG_(0x0591, 0x05BD)
330 PAIR(0x05BF, 0x05BF)
331 PAIR(0x05C1, 0x05C2)
332 PAIR(0x05C4, 0x05C5)
333 PAIR(0x05C7, 0x05C7)
334 PAIR(0x0600, 0x0603)
335 BIG_(0x0610, 0x0615)
336 BIG_(0x064B, 0x065E)
337 PAIR(0x0670, 0x0670)
338 BIG_(0x06D6, 0x06E4)
339 PAIR(0x06E7, 0x06E8)
340 PAIR(0x06EA, 0x06ED)
341 PAIR(0x070F, 0x070F)
342 PAIR(0x0711, 0x0711)
343 BIG_(0x0730, 0x074A)
344 BIG_(0x07A6, 0x07B0)
345 BIG_(0x07EB, 0x07F3)
346 PAIR(0x0901, 0x0902)
347 PAIR(0x093C, 0x093C)
348 BIG_(0x0941, 0x0948)
349 PAIR(0x094D, 0x094D)
350 PAIR(0x0951, 0x0954)
351 PAIR(0x0962, 0x0963)
352 PAIR(0x0981, 0x0981)
353 PAIR(0x09BC, 0x09BC)
354 PAIR(0x09C1, 0x09C4)
355 PAIR(0x09CD, 0x09CD)
356 PAIR(0x09E2, 0x09E3)
357 PAIR(0x0A01, 0x0A02)
358 PAIR(0x0A3C, 0x0A3C)
359 PAIR(0x0A41, 0x0A42)
360 PAIR(0x0A47, 0x0A48)
361 PAIR(0x0A4B, 0x0A4D)
362 PAIR(0x0A70, 0x0A71)
363 PAIR(0x0A81, 0x0A82)
364 PAIR(0x0ABC, 0x0ABC)
365 BIG_(0x0AC1, 0x0AC5)
366 PAIR(0x0AC7, 0x0AC8)
367 PAIR(0x0ACD, 0x0ACD)
368 PAIR(0x0AE2, 0x0AE3)
369 PAIR(0x0B01, 0x0B01)
370 PAIR(0x0B3C, 0x0B3C)
371 PAIR(0x0B3F, 0x0B3F)
372 PAIR(0x0B41, 0x0B43)
373 PAIR(0x0B4D, 0x0B4D)
374 PAIR(0x0B56, 0x0B56)
375 PAIR(0x0B82, 0x0B82)
376 PAIR(0x0BC0, 0x0BC0)
377 PAIR(0x0BCD, 0x0BCD)
378 PAIR(0x0C3E, 0x0C40)
379 PAIR(0x0C46, 0x0C48)
380 PAIR(0x0C4A, 0x0C4D)
381 PAIR(0x0C55, 0x0C56)
382 PAIR(0x0CBC, 0x0CBC)
383 PAIR(0x0CBF, 0x0CBF)
384 PAIR(0x0CC6, 0x0CC6)
385 PAIR(0x0CCC, 0x0CCD)
386 PAIR(0x0CE2, 0x0CE3)
387 PAIR(0x0D41, 0x0D43)
388 PAIR(0x0D4D, 0x0D4D)
389 PAIR(0x0DCA, 0x0DCA)
390 PAIR(0x0DD2, 0x0DD4)
391 PAIR(0x0DD6, 0x0DD6)
392 PAIR(0x0E31, 0x0E31)
393 BIG_(0x0E34, 0x0E3A)
394 BIG_(0x0E47, 0x0E4E)
395 PAIR(0x0EB1, 0x0EB1)
396 BIG_(0x0EB4, 0x0EB9)
397 PAIR(0x0EBB, 0x0EBC)
398 BIG_(0x0EC8, 0x0ECD)
399 PAIR(0x0F18, 0x0F19)
400 PAIR(0x0F35, 0x0F35)
401 PAIR(0x0F37, 0x0F37)
402 PAIR(0x0F39, 0x0F39)
403 BIG_(0x0F71, 0x0F7E)
404 BIG_(0x0F80, 0x0F84)
405 PAIR(0x0F86, 0x0F87)
406 PAIR(0x0FC6, 0x0FC6)
407 BIG_(0x0F90, 0x0F97)
408 BIG_(0x0F99, 0x0FBC)
409 PAIR(0x102D, 0x1030)
410 PAIR(0x1032, 0x1032)
411 PAIR(0x1036, 0x1037)
412 PAIR(0x1039, 0x1039)
413 PAIR(0x1058, 0x1059)
414 BIG_(0x1160, 0x11FF)
415 PAIR(0x135F, 0x135F)
416 PAIR(0x1712, 0x1714)
417 PAIR(0x1732, 0x1734)
418 PAIR(0x1752, 0x1753)
419 PAIR(0x1772, 0x1773)
420 PAIR(0x17B4, 0x17B5)
421 BIG_(0x17B7, 0x17BD)
422 PAIR(0x17C6, 0x17C6)
423 BIG_(0x17C9, 0x17D3)
424 PAIR(0x17DD, 0x17DD)
425 PAIR(0x180B, 0x180D)
426 PAIR(0x18A9, 0x18A9)
427 PAIR(0x1920, 0x1922)
428 PAIR(0x1927, 0x1928)
429 PAIR(0x1932, 0x1932)
430 PAIR(0x1939, 0x193B)
431 PAIR(0x1A17, 0x1A18)
432 PAIR(0x1B00, 0x1B03)
433 PAIR(0x1B34, 0x1B34)
434 BIG_(0x1B36, 0x1B3A)
435 PAIR(0x1B3C, 0x1B3C)
436 PAIR(0x1B42, 0x1B42)
437 BIG_(0x1B6B, 0x1B73)
438 BIG_(0x1DC0, 0x1DCA)
439 PAIR(0x1DFE, 0x1DFF)
440 BIG_(0x200B, 0x200F)
441 BIG_(0x202A, 0x202E)
442 PAIR(0x2060, 0x2063)
443 BIG_(0x206A, 0x206F)
444 BIG_(0x20D0, 0x20EF)
445 BIG_(0x302A, 0x302F)
446 PAIR(0x3099, 0x309A)
447#undef BIG_
448#undef PAIR
449 };
450 struct CHECK {
451#define BIG_(a,b) char big##a[b-a <= 3 ? -1 : 1];
452#define PAIR(a,b) char pair##a[b-a > 3 ? -1 : 1];
453 /* Copy-n-paste it here again to verify correctness */
454#undef BIG_
455#undef PAIR
456 };
457#endif
458
459 if (ucs == 0)
460 return 0;
461
462 /* Test for 8-bit control characters (00-1f, 80-9f, 7f) */
463 if ((ucs & ~0x80) < 0x20 || ucs == 0x7f)
464 return -1;
465 /* Quick abort if it is an obviously invalid char */
466 if (ucs > LAST_SUPPORTED_WCHAR)
467 return -1;
468
469 /* Optimization: no combining chars below 0x300 */
470 if (LAST_SUPPORTED_WCHAR < 0x300 || ucs < 0x300)
471 return 1;
472
473#if LAST_SUPPORTED_WCHAR >= 0x300
474 /* Binary search in table of non-spacing characters */
475 if (in_interval_table(ucs, combining, ARRAY_SIZE(combining) - 1))
476 return 0;
477 if (in_uint16_table(ucs, combining1, ARRAY_SIZE(combining1) - 1))
478 return 0;
479
480 /* Optimization: all chars below 0x1100 are not double-width */
481 if (LAST_SUPPORTED_WCHAR < 0x1100 || ucs < 0x1100)
482 return 1;
483
484# if LAST_SUPPORTED_WCHAR >= 0x1100
485 /* Invalid code points: */
486 /* High (d800..dbff) and low (dc00..dfff) surrogates (valid only in UTF16) */
487 /* Private Use Area (e000..f8ff) */
488 /* Noncharacters fdd0..fdef */
489 if ((LAST_SUPPORTED_WCHAR >= 0xd800 && ucs >= 0xd800 && ucs <= 0xf8ff)
490 || (LAST_SUPPORTED_WCHAR >= 0xfdd0 && ucs >= 0xfdd0 && ucs <= 0xfdef)
491 ) {
492 return -1;
493 }
494 /* 0xfffe and 0xffff in every plane are invalid */
495 if (LAST_SUPPORTED_WCHAR >= 0xfffe && ((ucs & 0xfffe) == 0xfffe)) {
496 return -1;
497 }
498
499# if LAST_SUPPORTED_WCHAR >= 0x10000
500 if (ucs >= 0x10000) {
501 /* Combining chars in Supplementary Multilingual Plane 0x1xxxx */
502 static const struct interval combining0x10000[] = {
503 { 0x0A01, 0x0A03 }, { 0x0A05, 0x0A06 }, { 0x0A0C, 0x0A0F },
504 { 0x0A38, 0x0A3A }, { 0x0A3F, 0x0A3F }, { 0xD167, 0xD169 },
505 { 0xD173, 0xD182 }, { 0xD185, 0xD18B }, { 0xD1AA, 0xD1AD },
506 { 0xD242, 0xD244 }
507 };
508 /* Binary search in table of non-spacing characters in Supplementary Multilingual Plane */
509 if (in_interval_table(ucs ^ 0x10000, combining0x10000, ARRAY_SIZE(combining0x10000) - 1))
510 return 0;
511 /* Check a few non-spacing chars in Supplementary Special-purpose Plane 0xExxxx */
512 if (LAST_SUPPORTED_WCHAR >= 0xE0001
513 && ( ucs == 0xE0001
514 || (ucs >= 0xE0020 && ucs <= 0xE007F)
515 || (ucs >= 0xE0100 && ucs <= 0xE01EF)
516 )
517 ) {
518 return 0;
519 }
520 }
521# endif
522
523 /* If we arrive here, ucs is not a combining or C0/C1 control character.
524 * Check whether it's 1 char or 2-shar wide.
525 */
526 return 1 +
527 ( (/*ucs >= 0x1100 &&*/ ucs <= 0x115f) /* Hangul Jamo init. consonants */
528 || ucs == 0x2329 /* left-pointing angle bracket; also CJK punct. char */
529 || ucs == 0x232a /* right-pointing angle bracket; also CJK punct. char */
530 || (ucs >= 0x2e80 && ucs <= 0xa4cf && ucs != 0x303f) /* CJK ... Yi */
531# if LAST_SUPPORTED_WCHAR >= 0xac00
532 || (ucs >= 0xac00 && ucs <= 0xd7a3) /* Hangul Syllables */
533 || (ucs >= 0xf900 && ucs <= 0xfaff) /* CJK Compatibility Ideographs */
534 || (ucs >= 0xfe10 && ucs <= 0xfe19) /* Vertical forms */
535 || (ucs >= 0xfe30 && ucs <= 0xfe6f) /* CJK Compatibility Forms */
536 || (ucs >= 0xff00 && ucs <= 0xff60) /* Fullwidth Forms */
537 || (ucs >= 0xffe0 && ucs <= 0xffe6)
538 || ((ucs >> 17) == (2 >> 1)) /* 20000..3ffff: Supplementary and Tertiary Ideographic Planes */
539# endif
540 );
541# endif /* >= 0x1100 */
542#endif /* >= 0x300 */
543}
diff --git a/modutils/lsmod.c b/modutils/lsmod.c
index 50621c245..97954c71f 100644
--- a/modutils/lsmod.c
+++ b/modutils/lsmod.c
@@ -60,7 +60,7 @@ int lsmod_main(int argc UNUSED_PARAM, char **argv UNUSED_PARAM)
60 token[3][strlen(token[3])-1] = '\0'; 60 token[3][strlen(token[3])-1] = '\0';
61 } else 61 } else
62 token[3] = (char *) ""; 62 token[3] = (char *) "";
63# if ENABLE_FEATURE_ASSUME_UNICODE 63# if ENABLE_UNICODE_SUPPORT
64 { 64 {
65 uni_stat_t uni_stat; 65 uni_stat_t uni_stat;
66 char *uni_name = unicode_conv_to_printable(&uni_stat, token[0]); 66 char *uni_name = unicode_conv_to_printable(&uni_stat, token[0]);
@@ -78,7 +78,7 @@ int lsmod_main(int argc UNUSED_PARAM, char **argv UNUSED_PARAM)
78 // or comma-separated list ended by comma 78 // or comma-separated list ended by comma
79 // so trimming the trailing char is just what we need! 79 // so trimming the trailing char is just what we need!
80 token[3][strlen(token[3])-1] = '\0'; 80 token[3][strlen(token[3])-1] = '\0';
81# if ENABLE_FEATURE_ASSUME_UNICODE 81# if ENABLE_UNICODE_SUPPORT
82 { 82 {
83 uni_stat_t uni_stat; 83 uni_stat_t uni_stat;
84 char *uni_name = unicode_conv_to_printable(&uni_stat, token[0]); 84 char *uni_name = unicode_conv_to_printable(&uni_stat, token[0]);
diff --git a/networking/udhcp/dumpleases.c b/networking/udhcp/dumpleases.c
index 6ebda94b6..fb6219fba 100644
--- a/networking/udhcp/dumpleases.c
+++ b/networking/udhcp/dumpleases.c
@@ -66,7 +66,7 @@ int dumpleases_main(int argc UNUSED_PARAM, char **argv)
66 fmt = ":%02x"; 66 fmt = ":%02x";
67 } 67 }
68 addr.s_addr = lease.lease_nip; 68 addr.s_addr = lease.lease_nip;
69#if ENABLE_FEATURE_ASSUME_UNICODE 69#if ENABLE_UNICODE_SUPPORT
70 { 70 {
71 char *uni_name = unicode_conv_to_printable_fixedwidth(NULL, lease.hostname, 19); 71 char *uni_name = unicode_conv_to_printable_fixedwidth(NULL, lease.hostname, 19);
72 printf(" %-16s%s ", inet_ntoa(addr), uni_name); 72 printf(" %-16s%s ", inet_ntoa(addr), uni_name);
diff --git a/scripts/defconfig b/scripts/defconfig
index 49158ceca..d13f5b1b4 100644
--- a/scripts/defconfig
+++ b/scripts/defconfig
@@ -24,7 +24,7 @@ CONFIG_FEATURE_VERBOSE_USAGE=y
24CONFIG_FEATURE_COMPRESS_USAGE=y 24CONFIG_FEATURE_COMPRESS_USAGE=y
25CONFIG_FEATURE_INSTALLER=y 25CONFIG_FEATURE_INSTALLER=y
26CONFIG_LOCALE_SUPPORT=y 26CONFIG_LOCALE_SUPPORT=y
27CONFIG_FEATURE_ASSUME_UNICODE=y 27CONFIG_UNICODE_SUPPORT=y
28# CONFIG_FEATURE_CHECK_UNICODE_IN_ENV is not set 28# CONFIG_FEATURE_CHECK_UNICODE_IN_ENV is not set
29CONFIG_LONG_OPTS=y 29CONFIG_LONG_OPTS=y
30CONFIG_FEATURE_DEVPTS=y 30CONFIG_FEATURE_DEVPTS=y
diff --git a/scripts/randomtest b/scripts/randomtest
index 2a30cb638..6b7db9239 100755
--- a/scripts/randomtest
+++ b/scripts/randomtest
@@ -50,7 +50,7 @@ cat .config \
50| grep -v ^CONFIG_BUILD_LIBBUSYBOX= \ 50| grep -v ^CONFIG_BUILD_LIBBUSYBOX= \
51| grep -v ^CONFIG_PAM= \ 51| grep -v ^CONFIG_PAM= \
52| grep -v ^CONFIG_TASKSET= \ 52| grep -v ^CONFIG_TASKSET= \
53| grep -v ^CONFIG_FEATURE_ASSUME_UNICODE= \ 53| grep -v ^CONFIG_UNICODE_SUPPORT= \
54| grep -v ^CONFIG_PIE= \ 54| grep -v ^CONFIG_PIE= \
55| grep -v CONFIG_STATIC \ 55| grep -v CONFIG_STATIC \
56| grep -v CONFIG_CROSS_COMPILER_PREFIX \ 56| grep -v CONFIG_CROSS_COMPILER_PREFIX \
diff --git a/testsuite/cal.tests b/testsuite/cal.tests
index 36be2b4b5..30985688b 100755
--- a/testsuite/cal.tests
+++ b/testsuite/cal.tests
@@ -20,7 +20,7 @@ Su Mo Tu We Th Fr Sa
20" "" "" 20" "" ""
21 21
22test x"$CONFIG_LOCALE_SUPPORT" = x"y" \ 22test x"$CONFIG_LOCALE_SUPPORT" = x"y" \
23&& test x"$CONFIG_FEATURE_ASSUME_UNICODE" = x"y" \ 23&& test x"$CONFIG_UNICODE_SUPPORT" = x"y" \
24&& test x"$CONFIG_LAST_SUPPORTED_WCHAR" = x"0" \ 24&& test x"$CONFIG_LAST_SUPPORTED_WCHAR" = x"0" \
25&& test x"$CONFIG_UNICODE_WIDE_WCHARS" = x"y" \ 25&& test x"$CONFIG_UNICODE_WIDE_WCHARS" = x"y" \
26&& test x"$CONFIG_STATIC" != x"y" \ 26&& test x"$CONFIG_STATIC" != x"y" \
diff --git a/testsuite/ls.tests b/testsuite/ls.tests
index 169313a63..0680762fc 100755
--- a/testsuite/ls.tests
+++ b/testsuite/ls.tests
@@ -14,7 +14,7 @@ mkdir ls.testdir || exit 1
14# With Unicode provided by libc locale, I'm not sure this test can pass. 14# With Unicode provided by libc locale, I'm not sure this test can pass.
15# I suspect we might fail to skip exactly correct number of bytes 15# I suspect we might fail to skip exactly correct number of bytes
16# over broked unicode sequences. 16# over broked unicode sequences.
17test x"$CONFIG_FEATURE_ASSUME_UNICODE" = x"y" \ 17test x"$CONFIG_UNICODE_SUPPORT" = x"y" \
18&& test x"$CONFIG_LOCALE_SUPPORT" != x"y" \ 18&& test x"$CONFIG_LOCALE_SUPPORT" != x"y" \
19&& test x"$CONFIG_SUBST_WCHAR" = x"63" \ 19&& test x"$CONFIG_SUBST_WCHAR" = x"63" \
20&& test x"$CONFIG_LAST_SUPPORTED_WCHAR" = x"767" \ 20&& test x"$CONFIG_LAST_SUPPORTED_WCHAR" = x"767" \
@@ -133,7 +133,7 @@ test x"$CONFIG_FEATURE_ASSUME_UNICODE" = x"y" \
133' "" "" 133' "" ""
134 134
135# Currently fails on "0080_4.2.2__U-000007FF_=_e0_9f_bf" line 135# Currently fails on "0080_4.2.2__U-000007FF_=_e0_9f_bf" line
136test x"$CONFIG_FEATURE_ASSUME_UNICODE" = x"y" \ 136test x"$CONFIG_UNICODE_SUPPORT" = x"y" \
137&& test x"$CONFIG_LOCALE_SUPPORT" != x"y" \ 137&& test x"$CONFIG_LOCALE_SUPPORT" != x"y" \
138&& test x"$CONFIG_SUBST_WCHAR" = x"63" \ 138&& test x"$CONFIG_SUBST_WCHAR" = x"63" \
139&& test x"$CONFIG_LAST_SUPPORTED_WCHAR" = x"0" \ 139&& test x"$CONFIG_LAST_SUPPORTED_WCHAR" = x"0" \