summaryrefslogtreecommitdiff
path: root/libbb/unicode_wcwidth.c
diff options
context:
space:
mode:
Diffstat (limited to 'libbb/unicode_wcwidth.c')
-rw-r--r--libbb/unicode_wcwidth.c543
1 files changed, 0 insertions, 543 deletions
diff --git a/libbb/unicode_wcwidth.c b/libbb/unicode_wcwidth.c
deleted file mode 100644
index 0bb622705..000000000
--- a/libbb/unicode_wcwidth.c
+++ /dev/null
@@ -1,543 +0,0 @@
1/*
2 * This is an implementation of wcwidth() and wcswidth() (defined in
3 * IEEE Std 1002.1-2001) for Unicode.
4 *
5 * http://www.opengroup.org/onlinepubs/007904975/functions/wcwidth.html
6 * http://www.opengroup.org/onlinepubs/007904975/functions/wcswidth.html
7 *
8 * In fixed-width output devices, Latin characters all occupy a single
9 * "cell" position of equal width, whereas ideographic CJK characters
10 * occupy two such cells. Interoperability between terminal-line
11 * applications and (teletype-style) character terminals using the
12 * UTF-8 encoding requires agreement on which character should advance
13 * the cursor by how many cell positions. No established formal
14 * standards exist at present on which Unicode character shall occupy
15 * how many cell positions on character terminals. These routines are
16 * a first attempt of defining such behavior based on simple rules
17 * applied to data provided by the Unicode Consortium.
18 *
19 * For some graphical characters, the Unicode standard explicitly
20 * defines a character-cell width via the definition of the East Asian
21 * FullWidth (F), Wide (W), Half-width (H), and Narrow (Na) classes.
22 * In all these cases, there is no ambiguity about which width a
23 * terminal shall use. For characters in the East Asian Ambiguous (A)
24 * class, the width choice depends purely on a preference of backward
25 * compatibility with either historic CJK or Western practice.
26 * Choosing single-width for these characters is easy to justify as
27 * the appropriate long-term solution, as the CJK practice of
28 * displaying these characters as double-width comes from historic
29 * implementation simplicity (8-bit encoded characters were displayed
30 * single-width and 16-bit ones double-width, even for Greek,
31 * Cyrillic, etc.) and not any typographic considerations.
32 *
33 * Much less clear is the choice of width for the Not East Asian
34 * (Neutral) class. Existing practice does not dictate a width for any
35 * of these characters. It would nevertheless make sense
36 * typographically to allocate two character cells to characters such
37 * as for instance EM SPACE or VOLUME INTEGRAL, which cannot be
38 * represented adequately with a single-width glyph. The following
39 * routines at present merely assign a single-cell width to all
40 * neutral characters, in the interest of simplicity. This is not
41 * entirely satisfactory and should be reconsidered before
42 * establishing a formal standard in this area. At the moment, the
43 * decision which Not East Asian (Neutral) characters should be
44 * represented by double-width glyphs cannot yet be answered by
45 * applying a simple rule from the Unicode database content. Setting
46 * up a proper standard for the behavior of UTF-8 character terminals
47 * will require a careful analysis not only of each Unicode character,
48 * but also of each presentation form, something the author of these
49 * routines has avoided to do so far.
50 *
51 * http://www.unicode.org/unicode/reports/tr11/
52 *
53 * Markus Kuhn -- 2007-05-26 (Unicode 5.0)
54 *
55 * Permission to use, copy, modify, and distribute this software
56 * for any purpose and without fee is hereby granted. The author
57 * disclaims all warranties with regard to this software.
58 *
59 * Latest version: http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c
60 */
61
62/* Assigned Unicode character ranges:
63 * Plane Range
64 * 0 0000–FFFF Basic Multilingual Plane
65 * 1 10000–1FFFF Supplementary Multilingual Plane
66 * 2 20000–2FFFF Supplementary Ideographic Plane
67 * 3 30000-3FFFF Tertiary Ideographic Plane (no chars assigned yet)
68 * 4-13 40000–DFFFF currently unassigned
69 * 14 E0000–EFFFF Supplementary Special-purpose Plane
70 * 15 F0000–FFFFF Supplementary Private Use Area-A
71 * 16 100000–10FFFF Supplementary Private Use Area-B
72 *
73 * "Supplementary Special-purpose Plane currently contains non-graphical
74 * characters in two blocks of 128 and 240 characters. The first block
75 * is for language tag characters for use when language cannot be indicated
76 * through other protocols (such as the xml:lang attribute in XML).
77 * The other block contains glyph variation selectors to indicate
78 * an alternate glyph for a character that cannot be determined by context."
79 *
80 * In simpler terms: it is a tool to fix the "Han unification" mess
81 * created by Unicode committee, to select Chinese/Japanese/Korean/Taiwan
82 * version of a character. (They forgot that the whole purpose of the Unicode
83 * was to be able to write all chars in one charset without such tricks).
84 * Until East Asian users say it is actually necessary to support these
85 * code points in console applications like busybox
86 * (i.e. do these chars ever appear in filenames, hostnames, text files
87 * and such?), we are treating these code points as invalid.
88 *
89 * Tertiary Ideographic Plane is also ignored for now,
90 * until Unicode committee assigns something there.
91 */
92
93#if LAST_SUPPORTED_WCHAR >= 0x300
94struct interval {
95 uint16_t first;
96 uint16_t last;
97};
98
99/* auxiliary function for binary search in interval table */
100static int in_interval_table(unsigned ucs, const struct interval *table, unsigned max)
101{
102 unsigned min;
103 unsigned mid;
104
105 if (ucs < table[0].first || ucs > table[max].last)
106 return 0;
107
108 min = 0;
109 while (max >= min) {
110 mid = (min + max) / 2;
111 if (ucs > table[mid].last)
112 min = mid + 1;
113 else if (ucs < table[mid].first)
114 max = mid - 1;
115 else
116 return 1;
117 }
118 return 0;
119}
120
121static int in_uint16_table(unsigned ucs, const uint16_t *table, unsigned max)
122{
123 unsigned min;
124 unsigned mid;
125 unsigned first, last;
126
127 first = table[0] >> 2;
128 last = first + (table[0] & 3);
129 if (ucs < first || ucs > last)
130 return 0;
131
132 min = 0;
133 while (max >= min) {
134 mid = (min + max) / 2;
135 first = table[mid] >> 2;
136 last = first + (table[mid] & 3);
137 if (ucs > last)
138 min = mid + 1;
139 else if (ucs < first)
140 max = mid - 1;
141 else
142 return 1;
143 }
144 return 0;
145}
146#endif
147
148
149/* The following two functions define the column width of an ISO 10646
150 * character as follows:
151 *
152 * - The null character (U+0000) has a column width of 0.
153 *
154 * - Other C0/C1 control characters and DEL will lead to a return
155 * value of -1.
156 *
157 * - Non-spacing and enclosing combining characters (general
158 * category code Mn or Me in the Unicode database) have a
159 * column width of 0.
160 *
161 * - SOFT HYPHEN (U+00AD) has a column width of 1.
162 *
163 * - Other format characters (general category code Cf in the Unicode
164 * database) and ZERO WIDTH SPACE (U+200B) have a column width of 0.
165 *
166 * - Hangul Jamo medial vowels and final consonants (U+1160-U+11FF)
167 * have a column width of 0.
168 *
169 * - Spacing characters in the East Asian Wide (W) or East Asian
170 * Full-width (F) category as defined in Unicode Technical
171 * Report #11 have a column width of 2.
172 *
173 * - All remaining characters (including all printable
174 * ISO 8859-1 and WGL4 characters, Unicode control characters,
175 * etc.) have a column width of 1.
176 *
177 * This implementation assumes that wchar_t characters are encoded
178 * in ISO 10646.
179 */
180static int wcwidth(unsigned ucs)
181{
182#if LAST_SUPPORTED_WCHAR >= 0x300
183 /* sorted list of non-overlapping intervals of non-spacing characters */
184 /* generated by "uniset +cat=Me +cat=Mn +cat=Cf -00AD +1160-11FF +200B c" */
185 static const struct interval combining[] = {
186#define BIG_(a,b) { a, b },
187#define PAIR(a,b)
188 /* PAIR if < 0x4000 and no more than 4 chars big */
189 BIG_(0x0300, 0x036F)
190 PAIR(0x0483, 0x0486)
191 PAIR(0x0488, 0x0489)
192 BIG_(0x0591, 0x05BD)
193 PAIR(0x05BF, 0x05BF)
194 PAIR(0x05C1, 0x05C2)
195 PAIR(0x05C4, 0x05C5)
196 PAIR(0x05C7, 0x05C7)
197 PAIR(0x0600, 0x0603)
198 BIG_(0x0610, 0x0615)
199 BIG_(0x064B, 0x065E)
200 PAIR(0x0670, 0x0670)
201 BIG_(0x06D6, 0x06E4)
202 PAIR(0x06E7, 0x06E8)
203 PAIR(0x06EA, 0x06ED)
204 PAIR(0x070F, 0x070F)
205 PAIR(0x0711, 0x0711)
206 BIG_(0x0730, 0x074A)
207 BIG_(0x07A6, 0x07B0)
208 BIG_(0x07EB, 0x07F3)
209 PAIR(0x0901, 0x0902)
210 PAIR(0x093C, 0x093C)
211 BIG_(0x0941, 0x0948)
212 PAIR(0x094D, 0x094D)
213 PAIR(0x0951, 0x0954)
214 PAIR(0x0962, 0x0963)
215 PAIR(0x0981, 0x0981)
216 PAIR(0x09BC, 0x09BC)
217 PAIR(0x09C1, 0x09C4)
218 PAIR(0x09CD, 0x09CD)
219 PAIR(0x09E2, 0x09E3)
220 PAIR(0x0A01, 0x0A02)
221 PAIR(0x0A3C, 0x0A3C)
222 PAIR(0x0A41, 0x0A42)
223 PAIR(0x0A47, 0x0A48)
224 PAIR(0x0A4B, 0x0A4D)
225 PAIR(0x0A70, 0x0A71)
226 PAIR(0x0A81, 0x0A82)
227 PAIR(0x0ABC, 0x0ABC)
228 BIG_(0x0AC1, 0x0AC5)
229 PAIR(0x0AC7, 0x0AC8)
230 PAIR(0x0ACD, 0x0ACD)
231 PAIR(0x0AE2, 0x0AE3)
232 PAIR(0x0B01, 0x0B01)
233 PAIR(0x0B3C, 0x0B3C)
234 PAIR(0x0B3F, 0x0B3F)
235 PAIR(0x0B41, 0x0B43)
236 PAIR(0x0B4D, 0x0B4D)
237 PAIR(0x0B56, 0x0B56)
238 PAIR(0x0B82, 0x0B82)
239 PAIR(0x0BC0, 0x0BC0)
240 PAIR(0x0BCD, 0x0BCD)
241 PAIR(0x0C3E, 0x0C40)
242 PAIR(0x0C46, 0x0C48)
243 PAIR(0x0C4A, 0x0C4D)
244 PAIR(0x0C55, 0x0C56)
245 PAIR(0x0CBC, 0x0CBC)
246 PAIR(0x0CBF, 0x0CBF)
247 PAIR(0x0CC6, 0x0CC6)
248 PAIR(0x0CCC, 0x0CCD)
249 PAIR(0x0CE2, 0x0CE3)
250 PAIR(0x0D41, 0x0D43)
251 PAIR(0x0D4D, 0x0D4D)
252 PAIR(0x0DCA, 0x0DCA)
253 PAIR(0x0DD2, 0x0DD4)
254 PAIR(0x0DD6, 0x0DD6)
255 PAIR(0x0E31, 0x0E31)
256 BIG_(0x0E34, 0x0E3A)
257 BIG_(0x0E47, 0x0E4E)
258 PAIR(0x0EB1, 0x0EB1)
259 BIG_(0x0EB4, 0x0EB9)
260 PAIR(0x0EBB, 0x0EBC)
261 BIG_(0x0EC8, 0x0ECD)
262 PAIR(0x0F18, 0x0F19)
263 PAIR(0x0F35, 0x0F35)
264 PAIR(0x0F37, 0x0F37)
265 PAIR(0x0F39, 0x0F39)
266 BIG_(0x0F71, 0x0F7E)
267 BIG_(0x0F80, 0x0F84)
268 PAIR(0x0F86, 0x0F87)
269 PAIR(0x0FC6, 0x0FC6)
270 BIG_(0x0F90, 0x0F97)
271 BIG_(0x0F99, 0x0FBC)
272 PAIR(0x102D, 0x1030)
273 PAIR(0x1032, 0x1032)
274 PAIR(0x1036, 0x1037)
275 PAIR(0x1039, 0x1039)
276 PAIR(0x1058, 0x1059)
277 BIG_(0x1160, 0x11FF)
278 PAIR(0x135F, 0x135F)
279 PAIR(0x1712, 0x1714)
280 PAIR(0x1732, 0x1734)
281 PAIR(0x1752, 0x1753)
282 PAIR(0x1772, 0x1773)
283 PAIR(0x17B4, 0x17B5)
284 BIG_(0x17B7, 0x17BD)
285 PAIR(0x17C6, 0x17C6)
286 BIG_(0x17C9, 0x17D3)
287 PAIR(0x17DD, 0x17DD)
288 PAIR(0x180B, 0x180D)
289 PAIR(0x18A9, 0x18A9)
290 PAIR(0x1920, 0x1922)
291 PAIR(0x1927, 0x1928)
292 PAIR(0x1932, 0x1932)
293 PAIR(0x1939, 0x193B)
294 PAIR(0x1A17, 0x1A18)
295 PAIR(0x1B00, 0x1B03)
296 PAIR(0x1B34, 0x1B34)
297 BIG_(0x1B36, 0x1B3A)
298 PAIR(0x1B3C, 0x1B3C)
299 PAIR(0x1B42, 0x1B42)
300 BIG_(0x1B6B, 0x1B73)
301 BIG_(0x1DC0, 0x1DCA)
302 PAIR(0x1DFE, 0x1DFF)
303 BIG_(0x200B, 0x200F)
304 BIG_(0x202A, 0x202E)
305 PAIR(0x2060, 0x2063)
306 BIG_(0x206A, 0x206F)
307 BIG_(0x20D0, 0x20EF)
308 BIG_(0x302A, 0x302F)
309 PAIR(0x3099, 0x309A)
310 /* Too big to be packed in PAIRs: */
311 { 0xA806, 0xA806 },
312 { 0xA80B, 0xA80B },
313 { 0xA825, 0xA826 },
314 { 0xFB1E, 0xFB1E },
315 { 0xFE00, 0xFE0F },
316 { 0xFE20, 0xFE23 },
317 { 0xFEFF, 0xFEFF },
318 { 0xFFF9, 0xFFFB }
319#undef BIG_
320#undef PAIR
321 };
322 static const uint16_t combining1[] = {
323#define BIG_(a,b)
324#define PAIR(a,b) (a << 2) | (b-a),
325 /* Exact copy-n-paste of the above: */
326 BIG_(0x0300, 0x036F)
327 PAIR(0x0483, 0x0486)
328 PAIR(0x0488, 0x0489)
329 BIG_(0x0591, 0x05BD)
330 PAIR(0x05BF, 0x05BF)
331 PAIR(0x05C1, 0x05C2)
332 PAIR(0x05C4, 0x05C5)
333 PAIR(0x05C7, 0x05C7)
334 PAIR(0x0600, 0x0603)
335 BIG_(0x0610, 0x0615)
336 BIG_(0x064B, 0x065E)
337 PAIR(0x0670, 0x0670)
338 BIG_(0x06D6, 0x06E4)
339 PAIR(0x06E7, 0x06E8)
340 PAIR(0x06EA, 0x06ED)
341 PAIR(0x070F, 0x070F)
342 PAIR(0x0711, 0x0711)
343 BIG_(0x0730, 0x074A)
344 BIG_(0x07A6, 0x07B0)
345 BIG_(0x07EB, 0x07F3)
346 PAIR(0x0901, 0x0902)
347 PAIR(0x093C, 0x093C)
348 BIG_(0x0941, 0x0948)
349 PAIR(0x094D, 0x094D)
350 PAIR(0x0951, 0x0954)
351 PAIR(0x0962, 0x0963)
352 PAIR(0x0981, 0x0981)
353 PAIR(0x09BC, 0x09BC)
354 PAIR(0x09C1, 0x09C4)
355 PAIR(0x09CD, 0x09CD)
356 PAIR(0x09E2, 0x09E3)
357 PAIR(0x0A01, 0x0A02)
358 PAIR(0x0A3C, 0x0A3C)
359 PAIR(0x0A41, 0x0A42)
360 PAIR(0x0A47, 0x0A48)
361 PAIR(0x0A4B, 0x0A4D)
362 PAIR(0x0A70, 0x0A71)
363 PAIR(0x0A81, 0x0A82)
364 PAIR(0x0ABC, 0x0ABC)
365 BIG_(0x0AC1, 0x0AC5)
366 PAIR(0x0AC7, 0x0AC8)
367 PAIR(0x0ACD, 0x0ACD)
368 PAIR(0x0AE2, 0x0AE3)
369 PAIR(0x0B01, 0x0B01)
370 PAIR(0x0B3C, 0x0B3C)
371 PAIR(0x0B3F, 0x0B3F)
372 PAIR(0x0B41, 0x0B43)
373 PAIR(0x0B4D, 0x0B4D)
374 PAIR(0x0B56, 0x0B56)
375 PAIR(0x0B82, 0x0B82)
376 PAIR(0x0BC0, 0x0BC0)
377 PAIR(0x0BCD, 0x0BCD)
378 PAIR(0x0C3E, 0x0C40)
379 PAIR(0x0C46, 0x0C48)
380 PAIR(0x0C4A, 0x0C4D)
381 PAIR(0x0C55, 0x0C56)
382 PAIR(0x0CBC, 0x0CBC)
383 PAIR(0x0CBF, 0x0CBF)
384 PAIR(0x0CC6, 0x0CC6)
385 PAIR(0x0CCC, 0x0CCD)
386 PAIR(0x0CE2, 0x0CE3)
387 PAIR(0x0D41, 0x0D43)
388 PAIR(0x0D4D, 0x0D4D)
389 PAIR(0x0DCA, 0x0DCA)
390 PAIR(0x0DD2, 0x0DD4)
391 PAIR(0x0DD6, 0x0DD6)
392 PAIR(0x0E31, 0x0E31)
393 BIG_(0x0E34, 0x0E3A)
394 BIG_(0x0E47, 0x0E4E)
395 PAIR(0x0EB1, 0x0EB1)
396 BIG_(0x0EB4, 0x0EB9)
397 PAIR(0x0EBB, 0x0EBC)
398 BIG_(0x0EC8, 0x0ECD)
399 PAIR(0x0F18, 0x0F19)
400 PAIR(0x0F35, 0x0F35)
401 PAIR(0x0F37, 0x0F37)
402 PAIR(0x0F39, 0x0F39)
403 BIG_(0x0F71, 0x0F7E)
404 BIG_(0x0F80, 0x0F84)
405 PAIR(0x0F86, 0x0F87)
406 PAIR(0x0FC6, 0x0FC6)
407 BIG_(0x0F90, 0x0F97)
408 BIG_(0x0F99, 0x0FBC)
409 PAIR(0x102D, 0x1030)
410 PAIR(0x1032, 0x1032)
411 PAIR(0x1036, 0x1037)
412 PAIR(0x1039, 0x1039)
413 PAIR(0x1058, 0x1059)
414 BIG_(0x1160, 0x11FF)
415 PAIR(0x135F, 0x135F)
416 PAIR(0x1712, 0x1714)
417 PAIR(0x1732, 0x1734)
418 PAIR(0x1752, 0x1753)
419 PAIR(0x1772, 0x1773)
420 PAIR(0x17B4, 0x17B5)
421 BIG_(0x17B7, 0x17BD)
422 PAIR(0x17C6, 0x17C6)
423 BIG_(0x17C9, 0x17D3)
424 PAIR(0x17DD, 0x17DD)
425 PAIR(0x180B, 0x180D)
426 PAIR(0x18A9, 0x18A9)
427 PAIR(0x1920, 0x1922)
428 PAIR(0x1927, 0x1928)
429 PAIR(0x1932, 0x1932)
430 PAIR(0x1939, 0x193B)
431 PAIR(0x1A17, 0x1A18)
432 PAIR(0x1B00, 0x1B03)
433 PAIR(0x1B34, 0x1B34)
434 BIG_(0x1B36, 0x1B3A)
435 PAIR(0x1B3C, 0x1B3C)
436 PAIR(0x1B42, 0x1B42)
437 BIG_(0x1B6B, 0x1B73)
438 BIG_(0x1DC0, 0x1DCA)
439 PAIR(0x1DFE, 0x1DFF)
440 BIG_(0x200B, 0x200F)
441 BIG_(0x202A, 0x202E)
442 PAIR(0x2060, 0x2063)
443 BIG_(0x206A, 0x206F)
444 BIG_(0x20D0, 0x20EF)
445 BIG_(0x302A, 0x302F)
446 PAIR(0x3099, 0x309A)
447#undef BIG_
448#undef PAIR
449 };
450 struct CHECK {
451#define BIG_(a,b) char big##a[b-a <= 3 ? -1 : 1];
452#define PAIR(a,b) char pair##a[b-a > 3 ? -1 : 1];
453 /* Copy-n-paste it here again to verify correctness */
454#undef BIG_
455#undef PAIR
456 };
457#endif
458
459 if (ucs == 0)
460 return 0;
461
462 /* Test for 8-bit control characters (00-1f, 80-9f, 7f) */
463 if ((ucs & ~0x80) < 0x20 || ucs == 0x7f)
464 return -1;
465 /* Quick abort if it is an obviously invalid char */
466 if (ucs > LAST_SUPPORTED_WCHAR)
467 return -1;
468
469 /* Optimization: no combining chars below 0x300 */
470 if (LAST_SUPPORTED_WCHAR < 0x300 || ucs < 0x300)
471 return 1;
472
473#if LAST_SUPPORTED_WCHAR >= 0x300
474 /* Binary search in table of non-spacing characters */
475 if (in_interval_table(ucs, combining, ARRAY_SIZE(combining) - 1))
476 return 0;
477 if (in_uint16_table(ucs, combining1, ARRAY_SIZE(combining1) - 1))
478 return 0;
479
480 /* Optimization: all chars below 0x1100 are not double-width */
481 if (LAST_SUPPORTED_WCHAR < 0x1100 || ucs < 0x1100)
482 return 1;
483
484# if LAST_SUPPORTED_WCHAR >= 0x1100
485 /* Invalid code points: */
486 /* High (d800..dbff) and low (dc00..dfff) surrogates (valid only in UTF16) */
487 /* Private Use Area (e000..f8ff) */
488 /* Noncharacters fdd0..fdef */
489 if ((LAST_SUPPORTED_WCHAR >= 0xd800 && ucs >= 0xd800 && ucs <= 0xf8ff)
490 || (LAST_SUPPORTED_WCHAR >= 0xfdd0 && ucs >= 0xfdd0 && ucs <= 0xfdef)
491 ) {
492 return -1;
493 }
494 /* 0xfffe and 0xffff in every plane are invalid */
495 if (LAST_SUPPORTED_WCHAR >= 0xfffe && ((ucs & 0xfffe) == 0xfffe)) {
496 return -1;
497 }
498
499# if LAST_SUPPORTED_WCHAR >= 0x10000
500 if (ucs >= 0x10000) {
501 /* Combining chars in Supplementary Multilingual Plane 0x1xxxx */
502 static const struct interval combining0x10000[] = {
503 { 0x0A01, 0x0A03 }, { 0x0A05, 0x0A06 }, { 0x0A0C, 0x0A0F },
504 { 0x0A38, 0x0A3A }, { 0x0A3F, 0x0A3F }, { 0xD167, 0xD169 },
505 { 0xD173, 0xD182 }, { 0xD185, 0xD18B }, { 0xD1AA, 0xD1AD },
506 { 0xD242, 0xD244 }
507 };
508 /* Binary search in table of non-spacing characters in Supplementary Multilingual Plane */
509 if (in_interval_table(ucs ^ 0x10000, combining0x10000, ARRAY_SIZE(combining0x10000) - 1))
510 return 0;
511 /* Check a few non-spacing chars in Supplementary Special-purpose Plane 0xExxxx */
512 if (LAST_SUPPORTED_WCHAR >= 0xE0001
513 && ( ucs == 0xE0001
514 || (ucs >= 0xE0020 && ucs <= 0xE007F)
515 || (ucs >= 0xE0100 && ucs <= 0xE01EF)
516 )
517 ) {
518 return 0;
519 }
520 }
521# endif
522
523 /* If we arrive here, ucs is not a combining or C0/C1 control character.
524 * Check whether it's 1 char or 2-shar wide.
525 */
526 return 1 +
527 ( (/*ucs >= 0x1100 &&*/ ucs <= 0x115f) /* Hangul Jamo init. consonants */
528 || ucs == 0x2329 /* left-pointing angle bracket; also CJK punct. char */
529 || ucs == 0x232a /* right-pointing angle bracket; also CJK punct. char */
530 || (ucs >= 0x2e80 && ucs <= 0xa4cf && ucs != 0x303f) /* CJK ... Yi */
531# if LAST_SUPPORTED_WCHAR >= 0xac00
532 || (ucs >= 0xac00 && ucs <= 0xd7a3) /* Hangul Syllables */
533 || (ucs >= 0xf900 && ucs <= 0xfaff) /* CJK Compatibility Ideographs */
534 || (ucs >= 0xfe10 && ucs <= 0xfe19) /* Vertical forms */
535 || (ucs >= 0xfe30 && ucs <= 0xfe6f) /* CJK Compatibility Forms */
536 || (ucs >= 0xff00 && ucs <= 0xff60) /* Fullwidth Forms */
537 || (ucs >= 0xffe0 && ucs <= 0xffe6)
538 || ((ucs >> 17) == (2 >> 1)) /* 20000..3ffff: Supplementary and Tertiary Ideographic Planes */
539# endif
540 );
541# endif /* >= 0x1100 */
542#endif /* >= 0x300 */
543}