more fine-grained Unicode support

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
author: Denys Vlasenko <vda.linux@googlemail.com> 2010-01-29 09:11:47 +0100
committer: Denys Vlasenko <vda.linux@googlemail.com> 2010-01-29 09:11:47 +0100
commit: 2edba21f4c59d071f2241c2f47021c7034ec7cb8 (patch)
tree: 6cf3de29bfbdafa26fddbc1cd3dc467a2d8263f6
parent: 083e172641b64c564b7ec5478197dccbde43b421 (diff)
download: busybox-w32-2edba21f4c59d071f2241c2f47021c7034ec7cb8.tar.gz
busybox-w32-2edba21f4c59d071f2241c2f47021c7034ec7cb8.tar.bz2
busybox-w32-2edba21f4c59d071f2241c2f47021c7034ec7cb8.zip
3 files changed, 143 insertions, 18 deletions
diff --git a/Config.in b/Config.in
index 8e751530c..68444839d 100644
--- a/Config.in
+++ b/Config.in
@@ -141,6 +141,57 @@ config FEATURE_CHECK_UNICODE_IN_ENV
          Otherwise, Unicode support will be always enabled and active.
+config SUBST_WCHAR
+        int "Character code to substitute unprintable characters with"
+        range 1 4294967295
+        depends on FEATURE_ASSUME_UNICODE
+        default 63
+        help
+          Typical values are 63 for '?' (works with any output device),
+          30 for ASCII substitute control code,
+          65533 (0xfffd) for Unicode replacement character.
+config LAST_SUPPORTED_WCHAR
+        int "Range of supported Unicode characters"
+        range 0 4294967295
+        depends on FEATURE_ASSUME_UNICODE
+        default 767
+        help
+          Any character with Unicode value bigger than this is assumed
+          to be non-printable on output device. Many applets replace
+          such chars with substitution character.
+          The idea is that many valid printable Unicode chars are
+          nevertheless are not displayed correctly. Think about
+          combining charachers, double-wide hieroglyphs and such.
+          Many terminals, xterms and such will fail to handle them
+          correctly.
+          Typical values are:
+          126 - ASCII only
+          767 (0x2ff) - there are no combining chars in [0..767] range
+                        (the range includes Latin 1, Latin Ext. A and B),
+                        code is ~700 bytes smaller for this case.
+          4351 (0x10ff) - there are no double-wide chars in [0..4351] range,
+                        code is ~300 bytes smaller for this case.
+          0 - off, any valid printable Unicode character will be printed.
+config UNICODE_COMBINING_WCHARS
+        bool "Allow zero-width Unicode characters on output"
+        default n
+        depends on FEATURE_ASSUME_UNICODE
+        help
+          With this option off, any Unicode char with width of 0
+          is substituted on output.
+config UNICODE_WIDE_WCHARS
+        bool "Allow wide Unicode characters on output"
+        default n
+        depends on FEATURE_ASSUME_UNICODE
+        help
+          With this option off, any Unicode char with width > 1
+          is substituted on output.
 config LONG_OPTS
        bool "Support for --long-options"
        default y
diff --git a/libbb/unicode.c b/libbb/unicode.c
index 39b173e9c..878af84bc 100644
--- a/libbb/unicode.c
+++ b/libbb/unicode.c
@@ -216,8 +216,6 @@ size_t FAST_FUNC mbstowcs(wchar_t *dest, const char *src, size_t n)
        return org_n - n;
 }
-#include "unicode_wcwidth.c"
 int FAST_FUNC iswspace(wint_t wc)
 {
        return (unsigned)wc <= 0x7f && isspace(wc);
@@ -233,6 +231,8 @@ int FAST_FUNC iswpunct(wint_t wc)
        return (unsigned)wc <= 0x7f && ispunct(wc);
 }
+#include "unicode_wcwidth.c"
 #endif /* Homegrown Unicode support */
@@ -251,8 +251,22 @@ char* FAST_FUNC unicode_cut_nchars(unsigned width, const char *src)
        char *dst;
        unsigned dst_len;
-        if (unicode_status != UNICODE_ON)
+        if (unicode_status != UNICODE_ON) {
-                return xasprintf("%-*.*s", width, width, src);
+                char *d = dst = xmalloc(width + 1);
+                while ((int)--width >= 0) {
+                        unsigned char c = *src;
+                        if (c == '\0') {
+                                do
+                                        *d++ = ' ';
+                                while ((int)--width >= 0);
+                                break;
+                        }
+                        *d++ = (c >= ' ' && c < 0x7f) ? c : '?';
+                        src++;
+                }
+                *d = '\0';
+                return dst;
+        }
        dst = NULL;
        dst_len = 0;
@@ -260,31 +274,64 @@ char* FAST_FUNC unicode_cut_nchars(unsigned width, const char *src)
                int w;
                wchar_t wc;
-                dst = xrealloc(dst, dst_len + 2 * MB_CUR_MAX);
 #if ENABLE_LOCALE_SUPPORT
                {
                        mbstate_t mbst = { 0 };
                        ssize_t rc = mbsrtowcs(&wc, &src, 1, &mbst);
-                        if (rc <= 0) /* error, or end-of-string */
+                        /* If invalid sequence is seen: -1 is returned,
+                         * src points to the invalid sequence, errno = EILSEQ.
+                         * Else number of wchars (excluding terminating L'\0')
+                         * written to dest is returned.
+                         * If len (here: 1) non-L'\0' wchars stored at dest,
+                         * src points to the next char to be converted.
+                         * If string is completely converted: src = NULL.
+                         */
+                        if (rc == 0) /* end-of-string */
                                break;
+                        if (rc < 0) { /* error */
+                                src++;
+                                goto subst;
+                        }
+                        if (!iswprint(wc))
+                                goto subst;
                }
 #else
-                src = mbstowc_internal(&wc, src);
+                {
-                if (!src || wc == 0) /* error, or end-of-string */
+                        const char *src1 = mbstowc_internal(&wc, src);
-                        break;
+                        /* src = NULL: invalid sequence is seen,
+                         * else: wc is set, src is advanced to next mb char
+                         */
+                        if (src1) {/* no error */
+                                if (wc == 0) /* end-of-string */
+                                        break;
+                                src = src1;
+                        } else { /* error */
+                                src++;
+                                goto subst;
+                        }
+                }
 #endif
+                if (CONFIG_LAST_SUPPORTED_WCHAR && wc > CONFIG_LAST_SUPPORTED_WCHAR)
+                        goto subst;
                w = wcwidth(wc);
-                if (w < 0) /* non-printable wchar */
+                if ((ENABLE_UNICODE_COMBINING_WCHARS && w < 0) /* non-printable wchar */
-                        break;
+                 || (!ENABLE_UNICODE_COMBINING_WCHARS && wc <= 0)
+                 || (!ENABLE_UNICODE_WIDE_WCHARS && wc > 1)
+                ) {
+ subst:
+                        wc = CONFIG_SUBST_WCHAR;
+                        w = 1;
+                }
                width -= w;
-                if ((int)width < 0) { /* string is longer than width */
+                /* Note: if width == 0, we still may add more chars,
+                 * they may be zero-width or combining ones */
+                if ((int)width < 0) {
+                        /* can't add this wc, string would become longer than width */
                        width += w;
-                        while (width) {
-                                dst[dst_len++] = ' ';
-                                width--;
-                        }
                        break;
                }
+                dst = xrealloc(dst, dst_len + MB_CUR_MAX);
 #if ENABLE_LOCALE_SUPPORT
                {
                        mbstate_t mbst = { 0 };
@@ -294,7 +341,14 @@ char* FAST_FUNC unicode_cut_nchars(unsigned width, const char *src)
                dst_len += wcrtomb_internal(&dst[dst_len], wc);
 #endif
        }
+        /* Pad to remaining width */
+        dst = xrealloc(dst, dst_len + width + 1);
+        while ((int)--width >= 0) {
+                dst[dst_len++] = ' ';
+        }
        dst[dst_len] = '\0';
        return dst;
 }
diff --git a/libbb/unicode_wcwidth.c b/libbb/unicode_wcwidth.c
index 8d301f7c3..ab62b18f6 100644
--- a/libbb/unicode_wcwidth.c
+++ b/libbb/unicode_wcwidth.c
@@ -59,6 +59,13 @@
 * Latest version: http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c
 */
+#if CONFIG_LAST_SUPPORTED_WCHAR == 0
+# define LAST_SUPPORTED_WCHAR ((1 << 31) - 1)
+#else
+# define LAST_SUPPORTED_WCHAR CONFIG_LAST_SUPPORTED_WCHAR
+#endif
+#if LAST_SUPPORTED_WCHAR >= 0x0300
 struct interval {
        uint16_t first;
        uint16_t last;
@@ -111,6 +118,7 @@ static int in_uint16_table(unsigned ucs, const uint16_t *table, unsigned max)
        }
        return 0;
 }
+#endif
 /* The following two functions define the column width of an ISO 10646
@@ -146,6 +154,7 @@ static int in_uint16_table(unsigned ucs, const uint16_t *table, unsigned max)
 */
 static int wcwidth(unsigned ucs)
 {
+#if LAST_SUPPORTED_WCHAR >= 0x0300
        /* sorted list of non-overlapping intervals of non-spacing characters */
        /* generated by "uniset +cat=Me +cat=Mn +cat=Cf -00AD +1160-11FF +200B c" */
        static const struct interval combining[] = {
@@ -420,12 +429,15 @@ static int wcwidth(unsigned ucs)
 #undef BIG_
 #undef PAIR
        };
+# if LAST_SUPPORTED_WCHAR >= 0x1100
        static const struct interval combining0x10000[] = {
                { 0x0A01, 0x0A03 }, { 0x0A05, 0x0A06 }, { 0x0A0C, 0x0A0F },
                { 0x0A38, 0x0A3A }, { 0x0A3F, 0x0A3F }, { 0xD167, 0xD169 },
                { 0xD173, 0xD182 }, { 0xD185, 0xD18B }, { 0xD1AA, 0xD1AD },
                { 0xD242, 0xD244 }
        };
+# endif
+#endif
        if (ucs == 0)
                return 0;
@@ -435,6 +447,9 @@ static int wcwidth(unsigned ucs)
        if (ucs < 0x0300) /* optimization */
                return 1;
+#if LAST_SUPPORTED_WCHAR < 0x0300
+        return -1;
+#else
        /* binary search in table of non-spacing characters */
        if (in_interval_table(ucs, combining, ARRAY_SIZE(combining) - 1))
                return 0;
@@ -444,6 +459,9 @@ static int wcwidth(unsigned ucs)
        if (ucs < 0x1100) /* optimization */
                return 1;
+# if LAST_SUPPORTED_WCHAR < 0x1100
+        return -1;
+# else
        /* binary search in table of non-spacing characters, cont. */
        if (in_interval_table(ucs ^ 0x10000, combining0x10000, ARRAY_SIZE(combining0x10000) - 1))
                return 0;
@@ -458,8 +476,8 @@ static int wcwidth(unsigned ucs)
        return 1 +
                (  (/*ucs >= 0x1100 &&*/ ucs <= 0x115f) /* Hangul Jamo init. consonants */
-                || ucs == 0x2329
+                || ucs == 0x2329 /* left-pointing angle bracket; also CJK punct. char */
-                || ucs == 0x232a
+                || ucs == 0x232a /* right-pointing angle bracket; also CJK punct. char */
                || (ucs >= 0x2e80 && ucs <= 0xa4cf && ucs != 0x303f) /* CJK ... Yi */
                || (ucs >= 0xac00 && ucs <= 0xd7a3) /* Hangul Syllables */
                || (ucs >= 0xf900 && ucs <= 0xfaff) /* CJK Compatibility Ideographs */
@@ -470,4 +488,6 @@ static int wcwidth(unsigned ucs)
                || (ucs >= 0x20000 && ucs <= 0x2fffd)
                || (ucs >= 0x30000 && ucs <= 0x3fffd)
                );
+# endif
+#endif
 }
author	Denys Vlasenko <vda.linux@googlemail.com>	2010-01-29 09:11:47 +0100
committer	Denys Vlasenko <vda.linux@googlemail.com>	2010-01-29 09:11:47 +0100
commit	2edba21f4c59d071f2241c2f47021c7034ec7cb8 (patch)
tree	6cf3de29bfbdafa26fddbc1cd3dc467a2d8263f6
parent	083e172641b64c564b7ec5478197dccbde43b421 (diff)
download	busybox-w32-2edba21f4c59d071f2241c2f47021c7034ec7cb8.tar.gz busybox-w32-2edba21f4c59d071f2241c2f47021c7034ec7cb8.tar.bz2 busybox-w32-2edba21f4c59d071f2241c2f47021c7034ec7cb8.zip

diff --git a/Config.in b/Config.in index 8e751530c..68444839d 100644 --- a/Config.in +++ b/Config.in
@@ -141,6 +141,57 @@ config FEATURE_CHECK_UNICODE_IN_ENV
141		141
142	Otherwise, Unicode support will be always enabled and active.	142	Otherwise, Unicode support will be always enabled and active.
143		143
		144	config SUBST_WCHAR
		145	int "Character code to substitute unprintable characters with"
		146	range 1 4294967295
		147	depends on FEATURE_ASSUME_UNICODE
		148	default 63
		149	help
		150	Typical values are 63 for '?' (works with any output device),
		151	30 for ASCII substitute control code,
		152	65533 (0xfffd) for Unicode replacement character.
		153
		154	config LAST_SUPPORTED_WCHAR
		155	int "Range of supported Unicode characters"
		156	range 0 4294967295
		157	depends on FEATURE_ASSUME_UNICODE
		158	default 767
		159	help
		160	Any character with Unicode value bigger than this is assumed
		161	to be non-printable on output device. Many applets replace
		162	such chars with substitution character.
		163
		164	The idea is that many valid printable Unicode chars are
		165	nevertheless are not displayed correctly. Think about
		166	combining charachers, double-wide hieroglyphs and such.
		167	Many terminals, xterms and such will fail to handle them
		168	correctly.
		169
		170	Typical values are:
		171	126 - ASCII only
		172	767 (0x2ff) - there are no combining chars in [0..767] range
		173	(the range includes Latin 1, Latin Ext. A and B),
		174	code is ~700 bytes smaller for this case.
		175	4351 (0x10ff) - there are no double-wide chars in [0..4351] range,
		176	code is ~300 bytes smaller for this case.
		177	0 - off, any valid printable Unicode character will be printed.
		178
		179	config UNICODE_COMBINING_WCHARS
		180	bool "Allow zero-width Unicode characters on output"
		181	default n
		182	depends on FEATURE_ASSUME_UNICODE
		183	help
		184	With this option off, any Unicode char with width of 0
		185	is substituted on output.
		186
		187	config UNICODE_WIDE_WCHARS
		188	bool "Allow wide Unicode characters on output"
		189	default n
		190	depends on FEATURE_ASSUME_UNICODE
		191	help
		192	With this option off, any Unicode char with width > 1
		193	is substituted on output.
		194
144	config LONG_OPTS	195	config LONG_OPTS
145	bool "Support for --long-options"	196	bool "Support for --long-options"
146	default y	197	default y


diff --git a/libbb/unicode.c b/libbb/unicode.c index 39b173e9c..878af84bc 100644 --- a/libbb/unicode.c +++ b/libbb/unicode.c
@@ -216,8 +216,6 @@ size_t FAST_FUNC mbstowcs(wchar_t dest, const char src, size_t n)
216	return org_n - n;	216	return org_n - n;
217	}	217	}
218		218
219	#include "unicode_wcwidth.c"
220
221	int FAST_FUNC iswspace(wint_t wc)	219	int FAST_FUNC iswspace(wint_t wc)
222	{	220	{
223	return (unsigned)wc <= 0x7f && isspace(wc);	221	return (unsigned)wc <= 0x7f && isspace(wc);
@@ -233,6 +231,8 @@ int FAST_FUNC iswpunct(wint_t wc)
233	return (unsigned)wc <= 0x7f && ispunct(wc);	231	return (unsigned)wc <= 0x7f && ispunct(wc);
234	}	232	}
235		233
		234	#include "unicode_wcwidth.c"
		235
236	#endif /* Homegrown Unicode support */	236	#endif /* Homegrown Unicode support */
237		237
238		238
@@ -251,8 +251,22 @@ char* FAST_FUNC unicode_cut_nchars(unsigned width, const char *src)
251	char *dst;	251	char *dst;
252	unsigned dst_len;	252	unsigned dst_len;
253		253
254	if (unicode_status != UNICODE_ON)	254	if (unicode_status != UNICODE_ON) {
255	return xasprintf("%-.s", width, width, src);	255	char *d = dst = xmalloc(width + 1);
		256	while ((int)--width >= 0) {
		257	unsigned char c = *src;
		258	if (c == '\0') {
		259	do
		260	*d++ = ' ';
		261	while ((int)--width >= 0);
		262	break;
		263	}
		264	*d++ = (c >= ' ' && c < 0x7f) ? c : '?';
		265	src++;
		266	}
		267	*d = '\0';
		268	return dst;
		269	}
256		270
257	dst = NULL;	271	dst = NULL;
258	dst_len = 0;	272	dst_len = 0;
@@ -260,31 +274,64 @@ char* FAST_FUNC unicode_cut_nchars(unsigned width, const char *src)
260	int w;	274	int w;
261	wchar_t wc;	275	wchar_t wc;
262		276
263	dst = xrealloc(dst, dst_len + 2 * MB_CUR_MAX);
264	#if ENABLE_LOCALE_SUPPORT	277	#if ENABLE_LOCALE_SUPPORT
265	{	278	{
266	mbstate_t mbst = { 0 };	279	mbstate_t mbst = { 0 };
267	ssize_t rc = mbsrtowcs(&wc, &src, 1, &mbst);	280	ssize_t rc = mbsrtowcs(&wc, &src, 1, &mbst);
268	if (rc <= 0) /* error, or end-of-string */	281	/* If invalid sequence is seen: -1 is returned,
		282	* src points to the invalid sequence, errno = EILSEQ.
		283	* Else number of wchars (excluding terminating L'\0')
		284	* written to dest is returned.
		285	* If len (here: 1) non-L'\0' wchars stored at dest,
		286	* src points to the next char to be converted.
		287	* If string is completely converted: src = NULL.
		288	*/
		289	if (rc == 0) /* end-of-string */
269	break;	290	break;
		291	if (rc < 0) { /* error */
		292	src++;
		293	goto subst;
		294	}
		295	if (!iswprint(wc))
		296	goto subst;
270	}	297	}
271	#else	298	#else
272	src = mbstowc_internal(&wc, src);	299	{
273	if (!src \|\| wc == 0) /* error, or end-of-string */	300	const char *src1 = mbstowc_internal(&wc, src);
274	break;	301	/* src = NULL: invalid sequence is seen,
		302	* else: wc is set, src is advanced to next mb char
		303	*/
		304	if (src1) {/* no error */
		305	if (wc == 0) /* end-of-string */
		306	break;
		307	src = src1;
		308	} else { /* error */
		309	src++;
		310	goto subst;
		311	}
		312	}
275	#endif	313	#endif
		314	if (CONFIG_LAST_SUPPORTED_WCHAR && wc > CONFIG_LAST_SUPPORTED_WCHAR)
		315	goto subst;
276	w = wcwidth(wc);	316	w = wcwidth(wc);
277	if (w < 0) /* non-printable wchar */	317	if ((ENABLE_UNICODE_COMBINING_WCHARS && w < 0) /* non-printable wchar */
278	break;	318	\|\| (!ENABLE_UNICODE_COMBINING_WCHARS && wc <= 0)
		319	\|\| (!ENABLE_UNICODE_WIDE_WCHARS && wc > 1)
		320	) {
		321	subst:
		322	wc = CONFIG_SUBST_WCHAR;
		323	w = 1;
		324	}
279	width -= w;	325	width -= w;
280	if ((int)width < 0) { /* string is longer than width */	326	/* Note: if width == 0, we still may add more chars,
		327	* they may be zero-width or combining ones */
		328	if ((int)width < 0) {
		329	/* can't add this wc, string would become longer than width */
281	width += w;	330	width += w;
282	while (width) {
283	dst[dst_len++] = ' ';
284	width--;
285	}
286	break;	331	break;
287	}	332	}
		333
		334	dst = xrealloc(dst, dst_len + MB_CUR_MAX);
288	#if ENABLE_LOCALE_SUPPORT	335	#if ENABLE_LOCALE_SUPPORT
289	{	336	{
290	mbstate_t mbst = { 0 };	337	mbstate_t mbst = { 0 };
@@ -294,7 +341,14 @@ char* FAST_FUNC unicode_cut_nchars(unsigned width, const char *src)
294	dst_len += wcrtomb_internal(&dst[dst_len], wc);	341	dst_len += wcrtomb_internal(&dst[dst_len], wc);
295	#endif	342	#endif
296	}	343	}
		344
		345	/* Pad to remaining width */
		346	dst = xrealloc(dst, dst_len + width + 1);
		347	while ((int)--width >= 0) {
		348	dst[dst_len++] = ' ';
		349	}
297	dst[dst_len] = '\0';	350	dst[dst_len] = '\0';
		351
298	return dst;	352	return dst;
299	}	353	}
300		354


diff --git a/libbb/unicode_wcwidth.c b/libbb/unicode_wcwidth.c index 8d301f7c3..ab62b18f6 100644 --- a/libbb/unicode_wcwidth.c +++ b/libbb/unicode_wcwidth.c
@@ -59,6 +59,13 @@
59	* Latest version: http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c	59	* Latest version: http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c
60	*/	60	*/
61		61
		62	#if CONFIG_LAST_SUPPORTED_WCHAR == 0
		63	# define LAST_SUPPORTED_WCHAR ((1 << 31) - 1)
		64	#else
		65	# define LAST_SUPPORTED_WCHAR CONFIG_LAST_SUPPORTED_WCHAR
		66	#endif
		67
		68	#if LAST_SUPPORTED_WCHAR >= 0x0300
62	struct interval {	69	struct interval {
63	uint16_t first;	70	uint16_t first;
64	uint16_t last;	71	uint16_t last;
@@ -111,6 +118,7 @@ static int in_uint16_table(unsigned ucs, const uint16_t *table, unsigned max)
111	}	118	}
112	return 0;	119	return 0;
113	}	120	}
		121	#endif
114		122
115		123
116	/* The following two functions define the column width of an ISO 10646	124	/* The following two functions define the column width of an ISO 10646
@@ -146,6 +154,7 @@ static int in_uint16_table(unsigned ucs, const uint16_t *table, unsigned max)
146	*/	154	*/
147	static int wcwidth(unsigned ucs)	155	static int wcwidth(unsigned ucs)
148	{	156	{
		157	#if LAST_SUPPORTED_WCHAR >= 0x0300
149	/* sorted list of non-overlapping intervals of non-spacing characters */	158	/* sorted list of non-overlapping intervals of non-spacing characters */
150	/* generated by "uniset +cat=Me +cat=Mn +cat=Cf -00AD +1160-11FF +200B c" */	159	/* generated by "uniset +cat=Me +cat=Mn +cat=Cf -00AD +1160-11FF +200B c" */
151	static const struct interval combining[] = {	160	static const struct interval combining[] = {
@@ -420,12 +429,15 @@ static int wcwidth(unsigned ucs)
420	#undef BIG_	429	#undef BIG_
421	#undef PAIR	430	#undef PAIR
422	};	431	};
		432	# if LAST_SUPPORTED_WCHAR >= 0x1100
423	static const struct interval combining0x10000[] = {	433	static const struct interval combining0x10000[] = {
424	{ 0x0A01, 0x0A03 }, { 0x0A05, 0x0A06 }, { 0x0A0C, 0x0A0F },	434	{ 0x0A01, 0x0A03 }, { 0x0A05, 0x0A06 }, { 0x0A0C, 0x0A0F },
425	{ 0x0A38, 0x0A3A }, { 0x0A3F, 0x0A3F }, { 0xD167, 0xD169 },	435	{ 0x0A38, 0x0A3A }, { 0x0A3F, 0x0A3F }, { 0xD167, 0xD169 },
426	{ 0xD173, 0xD182 }, { 0xD185, 0xD18B }, { 0xD1AA, 0xD1AD },	436	{ 0xD173, 0xD182 }, { 0xD185, 0xD18B }, { 0xD1AA, 0xD1AD },
427	{ 0xD242, 0xD244 }	437	{ 0xD242, 0xD244 }
428	};	438	};
		439	# endif
		440	#endif
429		441
430	if (ucs == 0)	442	if (ucs == 0)
431	return 0;	443	return 0;
@@ -435,6 +447,9 @@ static int wcwidth(unsigned ucs)
435	if (ucs < 0x0300) /* optimization */	447	if (ucs < 0x0300) /* optimization */
436	return 1;	448	return 1;
437		449
		450	#if LAST_SUPPORTED_WCHAR < 0x0300
		451	return -1;
		452	#else
438	/* binary search in table of non-spacing characters */	453	/* binary search in table of non-spacing characters */
439	if (in_interval_table(ucs, combining, ARRAY_SIZE(combining) - 1))	454	if (in_interval_table(ucs, combining, ARRAY_SIZE(combining) - 1))
440	return 0;	455	return 0;
@@ -444,6 +459,9 @@ static int wcwidth(unsigned ucs)
444	if (ucs < 0x1100) /* optimization */	459	if (ucs < 0x1100) /* optimization */
445	return 1;	460	return 1;
446		461
		462	# if LAST_SUPPORTED_WCHAR < 0x1100
		463	return -1;
		464	# else
447	/* binary search in table of non-spacing characters, cont. */	465	/* binary search in table of non-spacing characters, cont. */
448	if (in_interval_table(ucs ^ 0x10000, combining0x10000, ARRAY_SIZE(combining0x10000) - 1))	466	if (in_interval_table(ucs ^ 0x10000, combining0x10000, ARRAY_SIZE(combining0x10000) - 1))
449	return 0;	467	return 0;
@@ -458,8 +476,8 @@ static int wcwidth(unsigned ucs)
458		476
459	return 1 +	477	return 1 +
460	( (/ucs >= 0x1100 &&/ ucs <= 0x115f) /* Hangul Jamo init. consonants */	478	( (/ucs >= 0x1100 &&/ ucs <= 0x115f) /* Hangul Jamo init. consonants */
461	\|\| ucs == 0x2329	479	\|\| ucs == 0x2329 /* left-pointing angle bracket; also CJK punct. char */
462	\|\| ucs == 0x232a	480	\|\| ucs == 0x232a /* right-pointing angle bracket; also CJK punct. char */
463	\|\| (ucs >= 0x2e80 && ucs <= 0xa4cf && ucs != 0x303f) /* CJK ... Yi */	481	\|\| (ucs >= 0x2e80 && ucs <= 0xa4cf && ucs != 0x303f) /* CJK ... Yi */
464	\|\| (ucs >= 0xac00 && ucs <= 0xd7a3) /* Hangul Syllables */	482	\|\| (ucs >= 0xac00 && ucs <= 0xd7a3) /* Hangul Syllables */
465	\|\| (ucs >= 0xf900 && ucs <= 0xfaff) /* CJK Compatibility Ideographs */	483	\|\| (ucs >= 0xf900 && ucs <= 0xfaff) /* CJK Compatibility Ideographs */
@@ -470,4 +488,6 @@ static int wcwidth(unsigned ucs)
470	\|\| (ucs >= 0x20000 && ucs <= 0x2fffd)	488	\|\| (ucs >= 0x20000 && ucs <= 0x2fffd)
471	\|\| (ucs >= 0x30000 && ucs <= 0x3fffd)	489	\|\| (ucs >= 0x30000 && ucs <= 0x3fffd)
472	);	490	);
		491	# endif
		492	#endif
473	}	493	}