aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/unicode.h4
-rw-r--r--libbb/unicode.c4
-rw-r--r--libbb/wcwidth_alt.c506
-rwxr-xr-xscripts/mkwcwidth169
4 files changed, 683 insertions, 0 deletions
diff --git a/include/unicode.h b/include/unicode.h
index e894f7148..cdf35acb7 100644
--- a/include/unicode.h
+++ b/include/unicode.h
@@ -33,7 +33,11 @@ enum {
33 33
34# if CONFIG_LAST_SUPPORTED_WCHAR < 126 || CONFIG_LAST_SUPPORTED_WCHAR >= 0x30000 34# if CONFIG_LAST_SUPPORTED_WCHAR < 126 || CONFIG_LAST_SUPPORTED_WCHAR >= 0x30000
35# undef CONFIG_LAST_SUPPORTED_WCHAR 35# undef CONFIG_LAST_SUPPORTED_WCHAR
36# if ENABLE_PLATFORM_MINGW32
37# define CONFIG_LAST_SUPPORTED_WCHAR 0x10ffff /* full unicode range */
38# else
36# define CONFIG_LAST_SUPPORTED_WCHAR 0x2ffff 39# define CONFIG_LAST_SUPPORTED_WCHAR 0x2ffff
40# endif
37# endif 41# endif
38 42
39# if CONFIG_LAST_SUPPORTED_WCHAR < 0x300 43# if CONFIG_LAST_SUPPORTED_WCHAR < 0x300
diff --git a/libbb/unicode.c b/libbb/unicode.c
index 206ec0dcb..a0b2db625 100644
--- a/libbb/unicode.c
+++ b/libbb/unicode.c
@@ -276,6 +276,7 @@ int FAST_FUNC iswpunct(wint_t wc)
276 return (unsigned)wc <= 0x7f && ispunct(wc); 276 return (unsigned)wc <= 0x7f && ispunct(wc);
277} 277}
278 278
279# if !ENABLE_PLATFORM_MINGW32 || CONFIG_LAST_SUPPORTED_WCHAR < 0x30000
279 280
280# if CONFIG_LAST_SUPPORTED_WCHAR >= 0x300 281# if CONFIG_LAST_SUPPORTED_WCHAR >= 0x300
281struct interval { 282struct interval {
@@ -711,6 +712,9 @@ int FAST_FUNC wcwidth(unsigned ucs)
711# endif /* >= 0x300 */ 712# endif /* >= 0x300 */
712} 713}
713 714
715# else /* ENABLE_PLATFORM_MINGW32 && CONFIG_LAST_SUPPORTED_WCHAR >= 0x30000 */
716# include "wcwidth_alt.c" /* simpler and more up-to-date implementation */
717# endif
714 718
715# if ENABLE_UNICODE_BIDI_SUPPORT 719# if ENABLE_UNICODE_BIDI_SUPPORT
716int FAST_FUNC unicode_bidi_isrtl(wint_t wc) 720int FAST_FUNC unicode_bidi_isrtl(wint_t wc)
diff --git a/libbb/wcwidth_alt.c b/libbb/wcwidth_alt.c
new file mode 100644
index 000000000..9a45ab0e9
--- /dev/null
+++ b/libbb/wcwidth_alt.c
@@ -0,0 +1,506 @@
1/* wcwidth - Unicode 15.1.0, generated by scripts/mkwcwidth.
2 * Copyright (C) 2024 Avi Halachmi <avihpit at yahoo.com>
3 * License: MIT
4 *
5 * Data imported on 2024-03-29 from https://github.com/jquast/wcwidth
6 * commit 0.2.13-3-g056ee4b (2024-02-14 15:05:06 -0500)
7 */
8int FAST_FUNC wcwidth(uint32_t ucs)
9{
10 /* sorted ranges, "first" is clipped to 16 bit, and its high bits
11 * (plane) are deduced from the "planes" array below.
12 * (imported from table_zero.py and table_wide.py)
13 */
14 static const struct range {
15 uint16_t first;
16 uint16_t iswide: 1; /* bitfield order empirically faster */
17 uint16_t difflast: 15;
18 } ranges[] = {
19 #define R(first, last, width) {first & 0xffff, width/2, last-first}
20 R(0x000000, 0x000000, 0), /* nil */
21 R(0x0000ad, 0x0000ad, 0), /* Soft Hyphen */
22 R(0x000300, 0x00036f, 0), /* Combining Grave Accent ..Combining Latin Small Le */
23 R(0x000483, 0x000489, 0), /* Combining Cyrillic Titlo..Combining Cyrillic Milli */
24 R(0x000591, 0x0005bd, 0), /* Hebrew Accent Etnahta ..Hebrew Point Meteg */
25 R(0x0005bf, 0x0005bf, 0), /* Hebrew Point Rafe */
26 R(0x0005c1, 0x0005c2, 0), /* Hebrew Point Shin Dot ..Hebrew Point Sin Dot */
27 R(0x0005c4, 0x0005c5, 0), /* Hebrew Mark Upper Dot ..Hebrew Mark Lower Dot */
28 R(0x0005c7, 0x0005c7, 0), /* Hebrew Point Qamats Qatan */
29 R(0x000600, 0x000605, 0), /* Arabic Number Sign ..Arabic Number Mark Above */
30 R(0x000610, 0x00061a, 0), /* Arabic Sign Sallallahou ..Arabic Small Kasra */
31 R(0x00061c, 0x00061c, 0), /* Arabic Letter Mark */
32 R(0x00064b, 0x00065f, 0), /* Arabic Fathatan ..Arabic Wavy Hamza Below */
33 R(0x000670, 0x000670, 0), /* Arabic Letter Superscript Alef */
34 R(0x0006d6, 0x0006dd, 0), /* Arabic Small High Ligatu..Arabic End Of Ayah */
35 R(0x0006df, 0x0006e4, 0), /* Arabic Small High Rounde..Arabic Small High Madda */
36 R(0x0006e7, 0x0006e8, 0), /* Arabic Small High Yeh ..Arabic Small High Noon */
37 R(0x0006ea, 0x0006ed, 0), /* Arabic Empty Centre Low ..Arabic Small Low Meem */
38 R(0x00070f, 0x00070f, 0), /* Syriac Abbreviation Mark */
39 R(0x000711, 0x000711, 0), /* Syriac Letter Superscript Alaph */
40 R(0x000730, 0x00074a, 0), /* Syriac Pthaha Above ..Syriac Barrekh */
41 R(0x0007a6, 0x0007b0, 0), /* Thaana Abafili ..Thaana Sukun */
42 R(0x0007eb, 0x0007f3, 0), /* Nko Combining Short High..Nko Combining Double Dot */
43 R(0x0007fd, 0x0007fd, 0), /* Nko Dantayalan */
44 R(0x000816, 0x000819, 0), /* Samaritan Mark In ..Samaritan Mark Dagesh */
45 R(0x00081b, 0x000823, 0), /* Samaritan Mark Epentheti..Samaritan Vowel Sign A */
46 R(0x000825, 0x000827, 0), /* Samaritan Vowel Sign Sho..Samaritan Vowel Sign U */
47 R(0x000829, 0x00082d, 0), /* Samaritan Vowel Sign Lon..Samaritan Mark Nequdaa */
48 R(0x000859, 0x00085b, 0), /* Mandaic Affrication Mark..Mandaic Gemination Mark */
49 R(0x000890, 0x000891, 0), /* Arabic Pound Mark Above ..Arabic Piastre Mark Abov */
50 R(0x000898, 0x00089f, 0), /* Arabic Small High Word A..Arabic Half Madda Over M */
51 R(0x0008ca, 0x000903, 0), /* Arabic Small High Farsi ..Devanagari Sign Visarga */
52 R(0x00093a, 0x00093c, 0), /* Devanagari Vowel Sign Oe..Devanagari Sign Nukta */
53 R(0x00093e, 0x00094f, 0), /* Devanagari Vowel Sign Aa..Devanagari Vowel Sign Aw */
54 R(0x000951, 0x000957, 0), /* Devanagari Stress Sign U..Devanagari Vowel Sign Uu */
55 R(0x000962, 0x000963, 0), /* Devanagari Vowel Sign Vo..Devanagari Vowel Sign Vo */
56 R(0x000981, 0x000983, 0), /* Bengali Sign Candrabindu..Bengali Sign Visarga */
57 R(0x0009bc, 0x0009bc, 0), /* Bengali Sign Nukta */
58 R(0x0009be, 0x0009c4, 0), /* Bengali Vowel Sign Aa ..Bengali Vowel Sign Vocal */
59 R(0x0009c7, 0x0009c8, 0), /* Bengali Vowel Sign E ..Bengali Vowel Sign Ai */
60 R(0x0009cb, 0x0009cd, 0), /* Bengali Vowel Sign O ..Bengali Sign Virama */
61 R(0x0009d7, 0x0009d7, 0), /* Bengali Au Length Mark */
62 R(0x0009e2, 0x0009e3, 0), /* Bengali Vowel Sign Vocal..Bengali Vowel Sign Vocal */
63 R(0x0009fe, 0x0009fe, 0), /* Bengali Sandhi Mark */
64 R(0x000a01, 0x000a03, 0), /* Gurmukhi Sign Adak Bindi..Gurmukhi Sign Visarga */
65 R(0x000a3c, 0x000a3c, 0), /* Gurmukhi Sign Nukta */
66 R(0x000a3e, 0x000a42, 0), /* Gurmukhi Vowel Sign Aa ..Gurmukhi Vowel Sign Uu */
67 R(0x000a47, 0x000a48, 0), /* Gurmukhi Vowel Sign Ee ..Gurmukhi Vowel Sign Ai */
68 R(0x000a4b, 0x000a4d, 0), /* Gurmukhi Vowel Sign Oo ..Gurmukhi Sign Virama */
69 R(0x000a51, 0x000a51, 0), /* Gurmukhi Sign Udaat */
70 R(0x000a70, 0x000a71, 0), /* Gurmukhi Tippi ..Gurmukhi Addak */
71 R(0x000a75, 0x000a75, 0), /* Gurmukhi Sign Yakash */
72 R(0x000a81, 0x000a83, 0), /* Gujarati Sign Candrabind..Gujarati Sign Visarga */
73 R(0x000abc, 0x000abc, 0), /* Gujarati Sign Nukta */
74 R(0x000abe, 0x000ac5, 0), /* Gujarati Vowel Sign Aa ..Gujarati Vowel Sign Cand */
75 R(0x000ac7, 0x000ac9, 0), /* Gujarati Vowel Sign E ..Gujarati Vowel Sign Cand */
76 R(0x000acb, 0x000acd, 0), /* Gujarati Vowel Sign O ..Gujarati Sign Virama */
77 R(0x000ae2, 0x000ae3, 0), /* Gujarati Vowel Sign Voca..Gujarati Vowel Sign Voca */
78 R(0x000afa, 0x000aff, 0), /* Gujarati Sign Sukun ..Gujarati Sign Two-circle */
79 R(0x000b01, 0x000b03, 0), /* Oriya Sign Candrabindu ..Oriya Sign Visarga */
80 R(0x000b3c, 0x000b3c, 0), /* Oriya Sign Nukta */
81 R(0x000b3e, 0x000b44, 0), /* Oriya Vowel Sign Aa ..Oriya Vowel Sign Vocalic */
82 R(0x000b47, 0x000b48, 0), /* Oriya Vowel Sign E ..Oriya Vowel Sign Ai */
83 R(0x000b4b, 0x000b4d, 0), /* Oriya Vowel Sign O ..Oriya Sign Virama */
84 R(0x000b55, 0x000b57, 0), /* Oriya Sign Overline ..Oriya Au Length Mark */
85 R(0x000b62, 0x000b63, 0), /* Oriya Vowel Sign Vocalic..Oriya Vowel Sign Vocalic */
86 R(0x000b82, 0x000b82, 0), /* Tamil Sign Anusvara */
87 R(0x000bbe, 0x000bc2, 0), /* Tamil Vowel Sign Aa ..Tamil Vowel Sign Uu */
88 R(0x000bc6, 0x000bc8, 0), /* Tamil Vowel Sign E ..Tamil Vowel Sign Ai */
89 R(0x000bca, 0x000bcd, 0), /* Tamil Vowel Sign O ..Tamil Sign Virama */
90 R(0x000bd7, 0x000bd7, 0), /* Tamil Au Length Mark */
91 R(0x000c00, 0x000c04, 0), /* Telugu Sign Combining Ca..Telugu Sign Combining An */
92 R(0x000c3c, 0x000c3c, 0), /* Telugu Sign Nukta */
93 R(0x000c3e, 0x000c44, 0), /* Telugu Vowel Sign Aa ..Telugu Vowel Sign Vocali */
94 R(0x000c46, 0x000c48, 0), /* Telugu Vowel Sign E ..Telugu Vowel Sign Ai */
95 R(0x000c4a, 0x000c4d, 0), /* Telugu Vowel Sign O ..Telugu Sign Virama */
96 R(0x000c55, 0x000c56, 0), /* Telugu Length Mark ..Telugu Ai Length Mark */
97 R(0x000c62, 0x000c63, 0), /* Telugu Vowel Sign Vocali..Telugu Vowel Sign Vocali */
98 R(0x000c81, 0x000c83, 0), /* Kannada Sign Candrabindu..Kannada Sign Visarga */
99 R(0x000cbc, 0x000cbc, 0), /* Kannada Sign Nukta */
100 R(0x000cbe, 0x000cc4, 0), /* Kannada Vowel Sign Aa ..Kannada Vowel Sign Vocal */
101 R(0x000cc6, 0x000cc8, 0), /* Kannada Vowel Sign E ..Kannada Vowel Sign Ai */
102 R(0x000cca, 0x000ccd, 0), /* Kannada Vowel Sign O ..Kannada Sign Virama */
103 R(0x000cd5, 0x000cd6, 0), /* Kannada Length Mark ..Kannada Ai Length Mark */
104 R(0x000ce2, 0x000ce3, 0), /* Kannada Vowel Sign Vocal..Kannada Vowel Sign Vocal */
105 R(0x000cf3, 0x000cf3, 0), /* Kannada Sign Combining Anusvara Above Right */
106 R(0x000d00, 0x000d03, 0), /* Malayalam Sign Combining..Malayalam Sign Visarga */
107 R(0x000d3b, 0x000d3c, 0), /* Malayalam Sign Vertical ..Malayalam Sign Circular */
108 R(0x000d3e, 0x000d44, 0), /* Malayalam Vowel Sign Aa ..Malayalam Vowel Sign Voc */
109 R(0x000d46, 0x000d48, 0), /* Malayalam Vowel Sign E ..Malayalam Vowel Sign Ai */
110 R(0x000d4a, 0x000d4d, 0), /* Malayalam Vowel Sign O ..Malayalam Sign Virama */
111 R(0x000d57, 0x000d57, 0), /* Malayalam Au Length Mark */
112 R(0x000d62, 0x000d63, 0), /* Malayalam Vowel Sign Voc..Malayalam Vowel Sign Voc */
113 R(0x000d81, 0x000d83, 0), /* Sinhala Sign Candrabindu..Sinhala Sign Visargaya */
114 R(0x000dca, 0x000dca, 0), /* Sinhala Sign Al-lakuna */
115 R(0x000dcf, 0x000dd4, 0), /* Sinhala Vowel Sign Aela-..Sinhala Vowel Sign Ketti */
116 R(0x000dd6, 0x000dd6, 0), /* Sinhala Vowel Sign Diga Paa-pilla */
117 R(0x000dd8, 0x000ddf, 0), /* Sinhala Vowel Sign Gaett..Sinhala Vowel Sign Gayan */
118 R(0x000df2, 0x000df3, 0), /* Sinhala Vowel Sign Diga ..Sinhala Vowel Sign Diga */
119 R(0x000e31, 0x000e31, 0), /* Thai Character Mai Han-akat */
120 R(0x000e34, 0x000e3a, 0), /* Thai Character Sara I ..Thai Character Phinthu */
121 R(0x000e47, 0x000e4e, 0), /* Thai Character Maitaikhu..Thai Character Yamakkan */
122 R(0x000eb1, 0x000eb1, 0), /* Lao Vowel Sign Mai Kan */
123 R(0x000eb4, 0x000ebc, 0), /* Lao Vowel Sign I ..Lao Semivowel Sign Lo */
124 R(0x000ec8, 0x000ece, 0), /* Lao Tone Mai Ek ..Lao Yamakkan */
125 R(0x000f18, 0x000f19, 0), /* Tibetan Astrological Sig..Tibetan Astrological Sig */
126 R(0x000f35, 0x000f35, 0), /* Tibetan Mark Ngas Bzung Nyi Zla */
127 R(0x000f37, 0x000f37, 0), /* Tibetan Mark Ngas Bzung Sgor Rtags */
128 R(0x000f39, 0x000f39, 0), /* Tibetan Mark Tsa -phru */
129 R(0x000f3e, 0x000f3f, 0), /* Tibetan Sign Yar Tshes ..Tibetan Sign Mar Tshes */
130 R(0x000f71, 0x000f84, 0), /* Tibetan Vowel Sign Aa ..Tibetan Mark Halanta */
131 R(0x000f86, 0x000f87, 0), /* Tibetan Sign Lci Rtags ..Tibetan Sign Yang Rtags */
132 R(0x000f8d, 0x000f97, 0), /* Tibetan Subjoined Sign L..Tibetan Subjoined Letter */
133 R(0x000f99, 0x000fbc, 0), /* Tibetan Subjoined Letter..Tibetan Subjoined Letter */
134 R(0x000fc6, 0x000fc6, 0), /* Tibetan Symbol Padma Gdan */
135 R(0x00102b, 0x00103e, 0), /* Myanmar Vowel Sign Tall ..Myanmar Consonant Sign M */
136 R(0x001056, 0x001059, 0), /* Myanmar Vowel Sign Vocal..Myanmar Vowel Sign Vocal */
137 R(0x00105e, 0x001060, 0), /* Myanmar Consonant Sign M..Myanmar Consonant Sign M */
138 R(0x001062, 0x001064, 0), /* Myanmar Vowel Sign Sgaw ..Myanmar Tone Mark Sgaw K */
139 R(0x001067, 0x00106d, 0), /* Myanmar Vowel Sign Weste..Myanmar Sign Western Pwo */
140 R(0x001071, 0x001074, 0), /* Myanmar Vowel Sign Geba ..Myanmar Vowel Sign Kayah */
141 R(0x001082, 0x00108d, 0), /* Myanmar Consonant Sign S..Myanmar Sign Shan Counci */
142 R(0x00108f, 0x00108f, 0), /* Myanmar Sign Rumai Palaung Tone-5 */
143 R(0x00109a, 0x00109d, 0), /* Myanmar Sign Khamti Tone..Myanmar Vowel Sign Aiton */
144 R(0x001100, 0x00115f, 2), /* Hangul Choseong Kiyeok ..Hangul Choseong Filler */
145 R(0x001160, 0x0011ff, 0), /* Hangul Jungseong Filler ..Hangul Jongseong Ssangni */
146 R(0x00135d, 0x00135f, 0), /* Ethiopic Combining Gemin..Ethiopic Combining Gemin */
147 R(0x001712, 0x001715, 0), /* Tagalog Vowel Sign I ..Tagalog Sign Pamudpod */
148 R(0x001732, 0x001734, 0), /* Hanunoo Vowel Sign I ..Hanunoo Sign Pamudpod */
149 R(0x001752, 0x001753, 0), /* Buhid Vowel Sign I ..Buhid Vowel Sign U */
150 R(0x001772, 0x001773, 0), /* Tagbanwa Vowel Sign I ..Tagbanwa Vowel Sign U */
151 R(0x0017b4, 0x0017d3, 0), /* Khmer Vowel Inherent Aq ..Khmer Sign Bathamasat */
152 R(0x0017dd, 0x0017dd, 0), /* Khmer Sign Atthacan */
153 R(0x00180b, 0x00180f, 0), /* Mongolian Free Variation..Mongolian Free Variation */
154 R(0x001885, 0x001886, 0), /* Mongolian Letter Ali Gal..Mongolian Letter Ali Gal */
155 R(0x0018a9, 0x0018a9, 0), /* Mongolian Letter Ali Gali Dagalga */
156 R(0x001920, 0x00192b, 0), /* Limbu Vowel Sign A ..Limbu Subjoined Letter W */
157 R(0x001930, 0x00193b, 0), /* Limbu Small Letter Ka ..Limbu Sign Sa-i */
158 R(0x001a17, 0x001a1b, 0), /* Buginese Vowel Sign I ..Buginese Vowel Sign Ae */
159 R(0x001a55, 0x001a5e, 0), /* Tai Tham Consonant Sign ..Tai Tham Consonant Sign */
160 R(0x001a60, 0x001a7c, 0), /* Tai Tham Sign Sakot ..Tai Tham Sign Khuen-lue */
161 R(0x001a7f, 0x001a7f, 0), /* Tai Tham Combining Cryptogrammic Dot */
162 R(0x001ab0, 0x001ace, 0), /* Combining Doubled Circum..Combining Latin Small Le */
163 R(0x001b00, 0x001b04, 0), /* Balinese Sign Ulu Ricem ..Balinese Sign Bisah */
164 R(0x001b34, 0x001b44, 0), /* Balinese Sign Rerekan ..Balinese Adeg Adeg */
165 R(0x001b6b, 0x001b73, 0), /* Balinese Musical Symbol ..Balinese Musical Symbol */
166 R(0x001b80, 0x001b82, 0), /* Sundanese Sign Panyecek ..Sundanese Sign Pangwisad */
167 R(0x001ba1, 0x001bad, 0), /* Sundanese Consonant Sign..Sundanese Consonant Sign */
168 R(0x001be6, 0x001bf3, 0), /* Batak Sign Tompi ..Batak Panongonan */
169 R(0x001c24, 0x001c37, 0), /* Lepcha Subjoined Letter ..Lepcha Sign Nukta */
170 R(0x001cd0, 0x001cd2, 0), /* Vedic Tone Karshana ..Vedic Tone Prenkha */
171 R(0x001cd4, 0x001ce8, 0), /* Vedic Sign Yajurvedic Mi..Vedic Sign Visarga Anuda */
172 R(0x001ced, 0x001ced, 0), /* Vedic Sign Tiryak */
173 R(0x001cf4, 0x001cf4, 0), /* Vedic Tone Candra Above */
174 R(0x001cf7, 0x001cf9, 0), /* Vedic Sign Atikrama ..Vedic Tone Double Ring A */
175 R(0x001dc0, 0x001dff, 0), /* Combining Dotted Grave A..Combining Right Arrowhea */
176 R(0x00200b, 0x00200f, 0), /* Zero Width Space ..Right-to-left Mark */
177 R(0x002028, 0x00202e, 0), /* Line Separator ..Right-to-left Override */
178 R(0x002060, 0x002064, 0), /* Word Joiner ..Invisible Plus */
179 R(0x002066, 0x00206f, 0), /* Left-to-right Isolate ..Nominal Digit Shapes */
180 R(0x0020d0, 0x0020f0, 0), /* Combining Left Harpoon A..Combining Asterisk Above */
181 R(0x00231a, 0x00231b, 2), /* Watch ..Hourglass */
182 R(0x002329, 0x00232a, 2), /* Left-pointing Angle Brac..Right-pointing Angle Bra */
183 R(0x0023e9, 0x0023ec, 2), /* Black Right-pointing Dou..Black Down-pointing Doub */
184 R(0x0023f0, 0x0023f0, 2), /* Alarm Clock */
185 R(0x0023f3, 0x0023f3, 2), /* Hourglass With Flowing Sand */
186 R(0x0025fd, 0x0025fe, 2), /* White Medium Small Squar..Black Medium Small Squar */
187 R(0x002614, 0x002615, 2), /* Umbrella With Rain Drops..Hot Beverage */
188 R(0x002648, 0x002653, 2), /* Aries ..Pisces */
189 R(0x00267f, 0x00267f, 2), /* Wheelchair Symbol */
190 R(0x002693, 0x002693, 2), /* Anchor */
191 R(0x0026a1, 0x0026a1, 2), /* High Voltage Sign */
192 R(0x0026aa, 0x0026ab, 2), /* Medium White Circle ..Medium Black Circle */
193 R(0x0026bd, 0x0026be, 2), /* Soccer Ball ..Baseball */
194 R(0x0026c4, 0x0026c5, 2), /* Snowman Without Snow ..Sun Behind Cloud */
195 R(0x0026ce, 0x0026ce, 2), /* Ophiuchus */
196 R(0x0026d4, 0x0026d4, 2), /* No Entry */
197 R(0x0026ea, 0x0026ea, 2), /* Church */
198 R(0x0026f2, 0x0026f3, 2), /* Fountain ..Flag In Hole */
199 R(0x0026f5, 0x0026f5, 2), /* Sailboat */
200 R(0x0026fa, 0x0026fa, 2), /* Tent */
201 R(0x0026fd, 0x0026fd, 2), /* Fuel Pump */
202 R(0x002705, 0x002705, 2), /* White Heavy Check Mark */
203 R(0x00270a, 0x00270b, 2), /* Raised Fist ..Raised Hand */
204 R(0x002728, 0x002728, 2), /* Sparkles */
205 R(0x00274c, 0x00274c, 2), /* Cross Mark */
206 R(0x00274e, 0x00274e, 2), /* Negative Squared Cross Mark */
207 R(0x002753, 0x002755, 2), /* Black Question Mark Orna..White Exclamation Mark O */
208 R(0x002757, 0x002757, 2), /* Heavy Exclamation Mark Symbol */
209 R(0x002795, 0x002797, 2), /* Heavy Plus Sign ..Heavy Division Sign */
210 R(0x0027b0, 0x0027b0, 2), /* Curly Loop */
211 R(0x0027bf, 0x0027bf, 2), /* Double Curly Loop */
212 R(0x002b1b, 0x002b1c, 2), /* Black Large Square ..White Large Square */
213 R(0x002b50, 0x002b50, 2), /* White Medium Star */
214 R(0x002b55, 0x002b55, 2), /* Heavy Large Circle */
215 R(0x002cef, 0x002cf1, 0), /* Coptic Combining Ni Abov..Coptic Combining Spiritu */
216 R(0x002d7f, 0x002d7f, 0), /* Tifinagh Consonant Joiner */
217 R(0x002de0, 0x002dff, 0), /* Combining Cyrillic Lette..Combining Cyrillic Lette */
218 R(0x002e80, 0x002e99, 2), /* Cjk Radical Repeat ..Cjk Radical Rap */
219 R(0x002e9b, 0x002ef3, 2), /* Cjk Radical Choke ..Cjk Radical C-simplified */
220 R(0x002f00, 0x002fd5, 2), /* Kangxi Radical One ..Kangxi Radical Flute */
221 R(0x002ff0, 0x003029, 2), /* Ideographic Description ..Hangzhou Numeral Nine */
222 R(0x00302a, 0x00302f, 0), /* Ideographic Level Tone M..Hangul Double Dot Tone M */
223 R(0x003030, 0x00303e, 2), /* Wavy Dash ..Ideographic Variation In */
224 R(0x003041, 0x003096, 2), /* Hiragana Letter Small A ..Hiragana Letter Small Ke */
225 R(0x003099, 0x00309a, 0), /* Combining Katakana-hirag..Combining Katakana-hirag */
226 R(0x00309b, 0x0030ff, 2), /* Katakana-hiragana Voiced..Katakana Digraph Koto */
227 R(0x003105, 0x00312f, 2), /* Bopomofo Letter B ..Bopomofo Letter Nn */
228 R(0x003131, 0x00318e, 2), /* Hangul Letter Kiyeok ..Hangul Letter Araeae */
229 R(0x003190, 0x0031e3, 2), /* Ideographic Annotation L..Cjk Stroke Q */
230 R(0x0031ef, 0x00321e, 2), /* nil ..Parenthesized Korean Cha */
231 R(0x003220, 0x003247, 2), /* Parenthesized Ideograph ..Circled Ideograph Koto */
232 R(0x003250, 0x004dbf, 2), /* Partnership Sign ..Cjk Unified Ideograph-4d */
233 R(0x004e00, 0x00a48c, 2), /* Cjk Unified Ideograph-4e..Yi Syllable Yyr */
234 R(0x00a490, 0x00a4c6, 2), /* Yi Radical Qot ..Yi Radical Ke */
235 R(0x00a66f, 0x00a672, 0), /* Combining Cyrillic Vzmet..Combining Cyrillic Thous */
236 R(0x00a674, 0x00a67d, 0), /* Combining Cyrillic Lette..Combining Cyrillic Payer */
237 R(0x00a69e, 0x00a69f, 0), /* Combining Cyrillic Lette..Combining Cyrillic Lette */
238 R(0x00a6f0, 0x00a6f1, 0), /* Bamum Combining Mark Koq..Bamum Combining Mark Tuk */
239 R(0x00a802, 0x00a802, 0), /* Syloti Nagri Sign Dvisvara */
240 R(0x00a806, 0x00a806, 0), /* Syloti Nagri Sign Hasanta */
241 R(0x00a80b, 0x00a80b, 0), /* Syloti Nagri Sign Anusvara */
242 R(0x00a823, 0x00a827, 0), /* Syloti Nagri Vowel Sign ..Syloti Nagri Vowel Sign */
243 R(0x00a82c, 0x00a82c, 0), /* Syloti Nagri Sign Alternate Hasanta */
244 R(0x00a880, 0x00a881, 0), /* Saurashtra Sign Anusvara..Saurashtra Sign Visarga */
245 R(0x00a8b4, 0x00a8c5, 0), /* Saurashtra Consonant Sig..Saurashtra Sign Candrabi */
246 R(0x00a8e0, 0x00a8f1, 0), /* Combining Devanagari Dig..Combining Devanagari Sig */
247 R(0x00a8ff, 0x00a8ff, 0), /* Devanagari Vowel Sign Ay */
248 R(0x00a926, 0x00a92d, 0), /* Kayah Li Vowel Ue ..Kayah Li Tone Calya Plop */
249 R(0x00a947, 0x00a953, 0), /* Rejang Vowel Sign I ..Rejang Virama */
250 R(0x00a960, 0x00a97c, 2), /* Hangul Choseong Tikeut-m..Hangul Choseong Ssangyeo */
251 R(0x00a980, 0x00a983, 0), /* Javanese Sign Panyangga ..Javanese Sign Wignyan */
252 R(0x00a9b3, 0x00a9c0, 0), /* Javanese Sign Cecak Telu..Javanese Pangkon */
253 R(0x00a9e5, 0x00a9e5, 0), /* Myanmar Sign Shan Saw */
254 R(0x00aa29, 0x00aa36, 0), /* Cham Vowel Sign Aa ..Cham Consonant Sign Wa */
255 R(0x00aa43, 0x00aa43, 0), /* Cham Consonant Sign Final Ng */
256 R(0x00aa4c, 0x00aa4d, 0), /* Cham Consonant Sign Fina..Cham Consonant Sign Fina */
257 R(0x00aa7b, 0x00aa7d, 0), /* Myanmar Sign Pao Karen T..Myanmar Sign Tai Laing T */
258 R(0x00aab0, 0x00aab0, 0), /* Tai Viet Mai Kang */
259 R(0x00aab2, 0x00aab4, 0), /* Tai Viet Vowel I ..Tai Viet Vowel U */
260 R(0x00aab7, 0x00aab8, 0), /* Tai Viet Mai Khit ..Tai Viet Vowel Ia */
261 R(0x00aabe, 0x00aabf, 0), /* Tai Viet Vowel Am ..Tai Viet Tone Mai Ek */
262 R(0x00aac1, 0x00aac1, 0), /* Tai Viet Tone Mai Tho */
263 R(0x00aaeb, 0x00aaef, 0), /* Meetei Mayek Vowel Sign ..Meetei Mayek Vowel Sign */
264 R(0x00aaf5, 0x00aaf6, 0), /* Meetei Mayek Vowel Sign ..Meetei Mayek Virama */
265 R(0x00abe3, 0x00abea, 0), /* Meetei Mayek Vowel Sign ..Meetei Mayek Vowel Sign */
266 R(0x00abec, 0x00abed, 0), /* Meetei Mayek Lum Iyek ..Meetei Mayek Apun Iyek */
267 R(0x00ac00, 0x00d7a3, 2), /* Hangul Syllable Ga ..Hangul Syllable Hih */
268 R(0x00d7b0, 0x00d7ff, 0), /* Hangul Jungseong O-yeo .. nil */
269 R(0x00f900, 0x00faff, 2), /* Cjk Compatibility Ideogr.. nil */
270 R(0x00fb1e, 0x00fb1e, 0), /* Hebrew Point Judeo-spanish Varika */
271 R(0x00fe00, 0x00fe0f, 0), /* Variation Selector-1 ..Variation Selector-16 */
272 R(0x00fe10, 0x00fe19, 2), /* Presentation Form For Ve..Presentation Form For Ve */
273 R(0x00fe20, 0x00fe2f, 0), /* Combining Ligature Left ..Combining Cyrillic Titlo */
274 R(0x00fe30, 0x00fe52, 2), /* Presentation Form For Ve..Small Full Stop */
275 R(0x00fe54, 0x00fe66, 2), /* Small Semicolon ..Small Equals Sign */
276 R(0x00fe68, 0x00fe6b, 2), /* Small Reverse Solidus ..Small Commercial At */
277 R(0x00feff, 0x00feff, 0), /* Zero Width No-break Space */
278 R(0x00ff01, 0x00ff60, 2), /* Fullwidth Exclamation Ma..Fullwidth Right White Pa */
279 R(0x00ffe0, 0x00ffe6, 2), /* Fullwidth Cent Sign ..Fullwidth Won Sign */
280 R(0x00fff9, 0x00fffb, 0), /* Interlinear Annotation A..Interlinear Annotation T */
281 R(0x0101fd, 0x0101fd, 0), /* Phaistos Disc Sign Combining Oblique Stroke */
282 R(0x0102e0, 0x0102e0, 0), /* Coptic Epact Thousands Mark */
283 R(0x010376, 0x01037a, 0), /* Combining Old Permic Let..Combining Old Permic Let */
284 R(0x010a01, 0x010a03, 0), /* Kharoshthi Vowel Sign I ..Kharoshthi Vowel Sign Vo */
285 R(0x010a05, 0x010a06, 0), /* Kharoshthi Vowel Sign E ..Kharoshthi Vowel Sign O */
286 R(0x010a0c, 0x010a0f, 0), /* Kharoshthi Vowel Length ..Kharoshthi Sign Visarga */
287 R(0x010a38, 0x010a3a, 0), /* Kharoshthi Sign Bar Abov..Kharoshthi Sign Dot Belo */
288 R(0x010a3f, 0x010a3f, 0), /* Kharoshthi Virama */
289 R(0x010ae5, 0x010ae6, 0), /* Manichaean Abbreviation ..Manichaean Abbreviation */
290 R(0x010d24, 0x010d27, 0), /* Hanifi Rohingya Sign Har..Hanifi Rohingya Sign Tas */
291 R(0x010eab, 0x010eac, 0), /* Yezidi Combining Hamza M..Yezidi Combining Madda M */
292 R(0x010efd, 0x010eff, 0), /* Arabic Small Low Word Sa..Arabic Small Low Word Ma */
293 R(0x010f46, 0x010f50, 0), /* Sogdian Combining Dot Be..Sogdian Combining Stroke */
294 R(0x010f82, 0x010f85, 0), /* Old Uyghur Combining Dot..Old Uyghur Combining Two */
295 R(0x011000, 0x011002, 0), /* Brahmi Sign Candrabindu ..Brahmi Sign Visarga */
296 R(0x011038, 0x011046, 0), /* Brahmi Vowel Sign Aa ..Brahmi Virama */
297 R(0x011070, 0x011070, 0), /* Brahmi Sign Old Tamil Virama */
298 R(0x011073, 0x011074, 0), /* Brahmi Vowel Sign Old Ta..Brahmi Vowel Sign Old Ta */
299 R(0x01107f, 0x011082, 0), /* Brahmi Number Joiner ..Kaithi Sign Visarga */
300 R(0x0110b0, 0x0110ba, 0), /* Kaithi Vowel Sign Aa ..Kaithi Sign Nukta */
301 R(0x0110bd, 0x0110bd, 0), /* Kaithi Number Sign */
302 R(0x0110c2, 0x0110c2, 0), /* Kaithi Vowel Sign Vocalic R */
303 R(0x0110cd, 0x0110cd, 0), /* Kaithi Number Sign Above */
304 R(0x011100, 0x011102, 0), /* Chakma Sign Candrabindu ..Chakma Sign Visarga */
305 R(0x011127, 0x011134, 0), /* Chakma Vowel Sign A ..Chakma Maayyaa */
306 R(0x011145, 0x011146, 0), /* Chakma Vowel Sign Aa ..Chakma Vowel Sign Ei */
307 R(0x011173, 0x011173, 0), /* Mahajani Sign Nukta */
308 R(0x011180, 0x011182, 0), /* Sharada Sign Candrabindu..Sharada Sign Visarga */
309 R(0x0111b3, 0x0111c0, 0), /* Sharada Vowel Sign Aa ..Sharada Sign Virama */
310 R(0x0111c9, 0x0111cc, 0), /* Sharada Sandhi Mark ..Sharada Extra Short Vowe */
311 R(0x0111ce, 0x0111cf, 0), /* Sharada Vowel Sign Prish..Sharada Sign Inverted Ca */
312 R(0x01122c, 0x011237, 0), /* Khojki Vowel Sign Aa ..Khojki Sign Shadda */
313 R(0x01123e, 0x01123e, 0), /* Khojki Sign Sukun */
314 R(0x011241, 0x011241, 0), /* Khojki Vowel Sign Vocalic R */
315 R(0x0112df, 0x0112ea, 0), /* Khudawadi Sign Anusvara ..Khudawadi Sign Virama */
316 R(0x011300, 0x011303, 0), /* Grantha Sign Combining A..Grantha Sign Visarga */
317 R(0x01133b, 0x01133c, 0), /* Combining Bindu Below ..Grantha Sign Nukta */
318 R(0x01133e, 0x011344, 0), /* Grantha Vowel Sign Aa ..Grantha Vowel Sign Vocal */
319 R(0x011347, 0x011348, 0), /* Grantha Vowel Sign Ee ..Grantha Vowel Sign Ai */
320 R(0x01134b, 0x01134d, 0), /* Grantha Vowel Sign Oo ..Grantha Sign Virama */
321 R(0x011357, 0x011357, 0), /* Grantha Au Length Mark */
322 R(0x011362, 0x011363, 0), /* Grantha Vowel Sign Vocal..Grantha Vowel Sign Vocal */
323 R(0x011366, 0x01136c, 0), /* Combining Grantha Digit ..Combining Grantha Digit */
324 R(0x011370, 0x011374, 0), /* Combining Grantha Letter..Combining Grantha Letter */
325 R(0x011435, 0x011446, 0), /* Newa Vowel Sign Aa ..Newa Sign Nukta */
326 R(0x01145e, 0x01145e, 0), /* Newa Sandhi Mark */
327 R(0x0114b0, 0x0114c3, 0), /* Tirhuta Vowel Sign Aa ..Tirhuta Sign Nukta */
328 R(0x0115af, 0x0115b5, 0), /* Siddham Vowel Sign Aa ..Siddham Vowel Sign Vocal */
329 R(0x0115b8, 0x0115c0, 0), /* Siddham Vowel Sign E ..Siddham Sign Nukta */
330 R(0x0115dc, 0x0115dd, 0), /* Siddham Vowel Sign Alter..Siddham Vowel Sign Alter */
331 R(0x011630, 0x011640, 0), /* Modi Vowel Sign Aa ..Modi Sign Ardhacandra */
332 R(0x0116ab, 0x0116b7, 0), /* Takri Sign Anusvara ..Takri Sign Nukta */
333 R(0x01171d, 0x01172b, 0), /* Ahom Consonant Sign Medi..Ahom Sign Killer */
334 R(0x01182c, 0x01183a, 0), /* Dogra Vowel Sign Aa ..Dogra Sign Nukta */
335 R(0x011930, 0x011935, 0), /* Dives Akuru Vowel Sign A..Dives Akuru Vowel Sign E */
336 R(0x011937, 0x011938, 0), /* Dives Akuru Vowel Sign A..Dives Akuru Vowel Sign O */
337 R(0x01193b, 0x01193e, 0), /* Dives Akuru Sign Anusvar..Dives Akuru Virama */
338 R(0x011940, 0x011940, 0), /* Dives Akuru Medial Ya */
339 R(0x011942, 0x011943, 0), /* Dives Akuru Medial Ra ..Dives Akuru Sign Nukta */
340 R(0x0119d1, 0x0119d7, 0), /* Nandinagari Vowel Sign A..Nandinagari Vowel Sign V */
341 R(0x0119da, 0x0119e0, 0), /* Nandinagari Vowel Sign E..Nandinagari Sign Virama */
342 R(0x0119e4, 0x0119e4, 0), /* Nandinagari Vowel Sign Prishthamatra E */
343 R(0x011a01, 0x011a0a, 0), /* Zanabazar Square Vowel S..Zanabazar Square Vowel L */
344 R(0x011a33, 0x011a39, 0), /* Zanabazar Square Final C..Zanabazar Square Sign Vi */
345 R(0x011a3b, 0x011a3e, 0), /* Zanabazar Square Cluster..Zanabazar Square Cluster */
346 R(0x011a47, 0x011a47, 0), /* Zanabazar Square Subjoiner */
347 R(0x011a51, 0x011a5b, 0), /* Soyombo Vowel Sign I ..Soyombo Vowel Length Mar */
348 R(0x011a8a, 0x011a99, 0), /* Soyombo Final Consonant ..Soyombo Subjoiner */
349 R(0x011c2f, 0x011c36, 0), /* Bhaiksuki Vowel Sign Aa ..Bhaiksuki Vowel Sign Voc */
350 R(0x011c38, 0x011c3f, 0), /* Bhaiksuki Vowel Sign E ..Bhaiksuki Sign Virama */
351 R(0x011c92, 0x011ca7, 0), /* Marchen Subjoined Letter..Marchen Subjoined Letter */
352 R(0x011ca9, 0x011cb6, 0), /* Marchen Subjoined Letter..Marchen Sign Candrabindu */
353 R(0x011d31, 0x011d36, 0), /* Masaram Gondi Vowel Sign..Masaram Gondi Vowel Sign */
354 R(0x011d3a, 0x011d3a, 0), /* Masaram Gondi Vowel Sign E */
355 R(0x011d3c, 0x011d3d, 0), /* Masaram Gondi Vowel Sign..Masaram Gondi Vowel Sign */
356 R(0x011d3f, 0x011d45, 0), /* Masaram Gondi Vowel Sign..Masaram Gondi Virama */
357 R(0x011d47, 0x011d47, 0), /* Masaram Gondi Ra-kara */
358 R(0x011d8a, 0x011d8e, 0), /* Gunjala Gondi Vowel Sign..Gunjala Gondi Vowel Sign */
359 R(0x011d90, 0x011d91, 0), /* Gunjala Gondi Vowel Sign..Gunjala Gondi Vowel Sign */
360 R(0x011d93, 0x011d97, 0), /* Gunjala Gondi Vowel Sign..Gunjala Gondi Virama */
361 R(0x011ef3, 0x011ef6, 0), /* Makasar Vowel Sign I ..Makasar Vowel Sign O */
362 R(0x011f00, 0x011f01, 0), /* Kawi Sign Candrabindu ..Kawi Sign Anusvara */
363 R(0x011f03, 0x011f03, 0), /* Kawi Sign Visarga */
364 R(0x011f34, 0x011f3a, 0), /* Kawi Vowel Sign Aa ..Kawi Vowel Sign Vocalic */
365 R(0x011f3e, 0x011f42, 0), /* Kawi Vowel Sign E ..Kawi Conjoiner */
366 R(0x013430, 0x013440, 0), /* Egyptian Hieroglyph Vert..Egyptian Hieroglyph Mirr */
367 R(0x013447, 0x013455, 0), /* Egyptian Hieroglyph Modi..Egyptian Hieroglyph Modi */
368 R(0x016af0, 0x016af4, 0), /* Bassa Vah Combining High..Bassa Vah Combining High */
369 R(0x016b30, 0x016b36, 0), /* Pahawh Hmong Mark Cim Tu..Pahawh Hmong Mark Cim Ta */
370 R(0x016f4f, 0x016f4f, 0), /* Miao Sign Consonant Modifier Bar */
371 R(0x016f51, 0x016f87, 0), /* Miao Sign Aspiration ..Miao Vowel Sign Ui */
372 R(0x016f8f, 0x016f92, 0), /* Miao Tone Right ..Miao Tone Below */
373 R(0x016fe0, 0x016fe3, 2), /* Tangut Iteration Mark ..Old Chinese Iteration Ma */
374 R(0x016fe4, 0x016fe4, 0), /* Khitan Small Script Filler */
375 R(0x016ff0, 0x016ff1, 0), /* Vietnamese Alternate Rea..Vietnamese Alternate Rea */
376 R(0x017000, 0x0187f7, 2), /* nil */
377 R(0x018800, 0x018cd5, 2), /* Tangut Component-001 ..Khitan Small Script Char */
378 R(0x018d00, 0x018d08, 2), /* nil */
379 R(0x01aff0, 0x01aff3, 2), /* Katakana Letter Minnan T..Katakana Letter Minnan T */
380 R(0x01aff5, 0x01affb, 2), /* Katakana Letter Minnan T..Katakana Letter Minnan N */
381 R(0x01affd, 0x01affe, 2), /* Katakana Letter Minnan N..Katakana Letter Minnan N */
382 R(0x01b000, 0x01b122, 2), /* Katakana Letter Archaic ..Katakana Letter Archaic */
383 R(0x01b132, 0x01b132, 2), /* Hiragana Letter Small Ko */
384 R(0x01b150, 0x01b152, 2), /* Hiragana Letter Small Wi..Hiragana Letter Small Wo */
385 R(0x01b155, 0x01b155, 2), /* Katakana Letter Small Ko */
386 R(0x01b164, 0x01b167, 2), /* Katakana Letter Small Wi..Katakana Letter Small N */
387 R(0x01b170, 0x01b2fb, 2), /* Nushu Character-1b170 ..Nushu Character-1b2fb */
388 R(0x01bc9d, 0x01bc9e, 0), /* Duployan Thick Letter Se..Duployan Double Mark */
389 R(0x01bca0, 0x01bca3, 0), /* Shorthand Format Letter ..Shorthand Format Up Step */
390 R(0x01cf00, 0x01cf2d, 0), /* Znamenny Combining Mark ..Znamenny Combining Mark */
391 R(0x01cf30, 0x01cf46, 0), /* Znamenny Combining Tonal..Znamenny Priznak Modifie */
392 R(0x01d165, 0x01d169, 0), /* Musical Symbol Combining..Musical Symbol Combining */
393 R(0x01d16d, 0x01d182, 0), /* Musical Symbol Combining..Musical Symbol Combining */
394 R(0x01d185, 0x01d18b, 0), /* Musical Symbol Combining..Musical Symbol Combining */
395 R(0x01d1aa, 0x01d1ad, 0), /* Musical Symbol Combining..Musical Symbol Combining */
396 R(0x01d242, 0x01d244, 0), /* Combining Greek Musical ..Combining Greek Musical */
397 R(0x01da00, 0x01da36, 0), /* Signwriting Head Rim ..Signwriting Air Sucking */
398 R(0x01da3b, 0x01da6c, 0), /* Signwriting Mouth Closed..Signwriting Excitement */
399 R(0x01da75, 0x01da75, 0), /* Signwriting Upper Body Tilting From Hip Joints */
400 R(0x01da84, 0x01da84, 0), /* Signwriting Location Head Neck */
401 R(0x01da9b, 0x01da9f, 0), /* Signwriting Fill Modifie..Signwriting Fill Modifie */
402 R(0x01daa1, 0x01daaf, 0), /* Signwriting Rotation Mod..Signwriting Rotation Mod */
403 R(0x01e000, 0x01e006, 0), /* Combining Glagolitic Let..Combining Glagolitic Let */
404 R(0x01e008, 0x01e018, 0), /* Combining Glagolitic Let..Combining Glagolitic Let */
405 R(0x01e01b, 0x01e021, 0), /* Combining Glagolitic Let..Combining Glagolitic Let */
406 R(0x01e023, 0x01e024, 0), /* Combining Glagolitic Let..Combining Glagolitic Let */
407 R(0x01e026, 0x01e02a, 0), /* Combining Glagolitic Let..Combining Glagolitic Let */
408 R(0x01e08f, 0x01e08f, 0), /* Combining Cyrillic Small Letter Byelorussian-ukr */
409 R(0x01e130, 0x01e136, 0), /* Nyiakeng Puachue Hmong T..Nyiakeng Puachue Hmong T */
410 R(0x01e2ae, 0x01e2ae, 0), /* Toto Sign Rising Tone */
411 R(0x01e2ec, 0x01e2ef, 0), /* Wancho Tone Tup ..Wancho Tone Koini */
412 R(0x01e4ec, 0x01e4ef, 0), /* Nag Mundari Sign Muhor ..Nag Mundari Sign Sutuh */
413 R(0x01e8d0, 0x01e8d6, 0), /* Mende Kikakui Combining ..Mende Kikakui Combining */
414 R(0x01e944, 0x01e94a, 0), /* Adlam Alif Lengthener ..Adlam Nukta */
415 R(0x01f004, 0x01f004, 2), /* Mahjong Tile Red Dragon */
416 R(0x01f0cf, 0x01f0cf, 2), /* Playing Card Black Joker */
417 R(0x01f18e, 0x01f18e, 2), /* Negative Squared Ab */
418 R(0x01f191, 0x01f19a, 2), /* Squared Cl ..Squared Vs */
419 R(0x01f200, 0x01f202, 2), /* Square Hiragana Hoka ..Squared Katakana Sa */
420 R(0x01f210, 0x01f23b, 2), /* Squared Cjk Unified Ideo..Squared Cjk Unified Ideo */
421 R(0x01f240, 0x01f248, 2), /* Tortoise Shell Bracketed..Tortoise Shell Bracketed */
422 R(0x01f250, 0x01f251, 2), /* Circled Ideograph Advant..Circled Ideograph Accept */
423 R(0x01f260, 0x01f265, 2), /* Rounded Symbol For Fu ..Rounded Symbol For Cai */
424 R(0x01f300, 0x01f320, 2), /* Cyclone ..Shooting Star */
425 R(0x01f32d, 0x01f335, 2), /* Hot Dog ..Cactus */
426 R(0x01f337, 0x01f37c, 2), /* Tulip ..Baby Bottle */
427 R(0x01f37e, 0x01f393, 2), /* Bottle With Popping Cork..Graduation Cap */
428 R(0x01f3a0, 0x01f3ca, 2), /* Carousel Horse ..Swimmer */
429 R(0x01f3cf, 0x01f3d3, 2), /* Cricket Bat And Ball ..Table Tennis Paddle And */
430 R(0x01f3e0, 0x01f3f0, 2), /* House Building ..European Castle */
431 R(0x01f3f4, 0x01f3f4, 2), /* Waving Black Flag */
432 R(0x01f3f8, 0x01f3fa, 2), /* Badminton Racquet And Sh..Amphora */
433 R(0x01f3fb, 0x01f3ff, 0), /* Emoji Modifier Fitzpatri..Emoji Modifier Fitzpatri */
434 R(0x01f400, 0x01f43e, 2), /* Rat ..Paw Prints */
435 R(0x01f440, 0x01f440, 2), /* Eyes */
436 R(0x01f442, 0x01f4fc, 2), /* Ear ..Videocassette */
437 R(0x01f4ff, 0x01f53d, 2), /* Prayer Beads ..Down-pointing Small Red */
438 R(0x01f54b, 0x01f54e, 2), /* Kaaba ..Menorah With Nine Branch */
439 R(0x01f550, 0x01f567, 2), /* Clock Face One Oclock ..Clock Face Twelve-thirty */
440 R(0x01f57a, 0x01f57a, 2), /* Man Dancing */
441 R(0x01f595, 0x01f596, 2), /* Reversed Hand With Middl..Raised Hand With Part Be */
442 R(0x01f5a4, 0x01f5a4, 2), /* Black Heart */
443 R(0x01f5fb, 0x01f64f, 2), /* Mount Fuji ..Person With Folded Hands */
444 R(0x01f680, 0x01f6c5, 2), /* Rocket ..Left Luggage */
445 R(0x01f6cc, 0x01f6cc, 2), /* Sleeping Accommodation */
446 R(0x01f6d0, 0x01f6d2, 2), /* Place Of Worship ..Shopping Trolley */
447 R(0x01f6d5, 0x01f6d7, 2), /* Hindu Temple ..Elevator */
448 R(0x01f6dc, 0x01f6df, 2), /* Wireless ..Ring Buoy */
449 R(0x01f6eb, 0x01f6ec, 2), /* Airplane Departure ..Airplane Arriving */
450 R(0x01f6f4, 0x01f6fc, 2), /* Scooter ..Roller Skate */
451 R(0x01f7e0, 0x01f7eb, 2), /* Large Orange Circle ..Large Brown Square */
452 R(0x01f7f0, 0x01f7f0, 2), /* Heavy Equals Sign */
453 R(0x01f90c, 0x01f93a, 2), /* Pinched Fingers ..Fencer */
454 R(0x01f93c, 0x01f945, 2), /* Wrestlers ..Goal Net */
455 R(0x01f947, 0x01f9ff, 2), /* First Place Medal ..Nazar Amulet */
456 R(0x01fa70, 0x01fa7c, 2), /* Ballet Shoes ..Crutch */
457 R(0x01fa80, 0x01fa88, 2), /* Yo-yo ..Flute */
458 R(0x01fa90, 0x01fabd, 2), /* Ringed Planet ..Wing */
459 R(0x01fabf, 0x01fac5, 2), /* Goose ..Person With Crown */
460 R(0x01face, 0x01fadb, 2), /* Moose ..Pea Pod */
461 R(0x01fae0, 0x01fae8, 2), /* Melting Face ..Shaking Face */
462 R(0x01faf0, 0x01faf8, 2), /* Hand With Index Finger A..Rightwards Pushing Hand */
463 R(0x020000, 0x027fff, 2), /* Cjk Unified Ideograph-20.. nil */
464 R(0x028000, 0x02fffd, 2), /* (continued...) */
465 R(0x030000, 0x037fff, 2), /* Cjk Unified Ideograph-30.. nil */
466 R(0x038000, 0x03fffd, 2), /* (continued...) */
467 R(0x0e0001, 0x0e0001, 0), /* Language Tag */
468 R(0x0e0020, 0x0e007f, 0), /* Tag Space ..Cancel Tag */
469 R(0x0e0100, 0x0e01ef, 0), /* Variation Selector-17 ..Variation Selector-256 */
470 #undef R
471 };
472
473 /* planes[p], planes[p+1] are [from, to) at "ranges" for plane p */
474 static const uint16_t planes[/* 18 */] = {
475 0, 261, 443, 445, 447, 447, 447, 447, 447, 447, 447, 447,
476 447, 447, 447, 450, 450, 450,
477 };
478
479 /******* END OF STATIC DATA *******/
480
481 uint32_t p, bot, top;
482
483 /* 0:0, 1..31:-1 (C0), 32..126:1 (isprint), 127..159:-1 (DEL, C1) */
484 if (ucs < 160)
485 return ((ucs + 1) & 127) > 32 ? 1 : ucs ? -1 : 0;
486
487 /* out of range for "planes" (and non-unicode), non-characters. */
488 /* (some also test surrogate halves, but not required by POSIX) */
489 if (ucs > 0x10ffff || (ucs & 0xfffe) == 0xfffe)
490 return -1;
491
492 p = ucs >> 16;
493 ucs &= 0xffff;
494
495 for (bot = planes[p], top = planes[p+1]; bot < top; ) {
496 uint32_t mid = (bot + top) / 2;
497 if (ucs < ranges[mid].first)
498 top = mid;
499 else if (ucs > ranges[mid].first + ranges[mid].difflast)
500 bot = mid + 1;
501 else
502 return 2 * ranges[mid].iswide;
503 }
504
505 return 1;
506} /* wcwidth - Unicode 15.1.0 */
diff --git a/scripts/mkwcwidth b/scripts/mkwcwidth
new file mode 100755
index 000000000..792045a29
--- /dev/null
+++ b/scripts/mkwcwidth
@@ -0,0 +1,169 @@
1#!/bin/sh
2#
3# Generate a C implementation of wcwidth, with latest unicode data
4# from a local clone of https://github.com/jquast/wcwidth
5#
6# The MIT License (MIT)
7#
8# Copyright (C) 2024 Avi Halachmi <avihpit at yahoo.com>
9#
10# Permission is hereby granted, free of charge, to any person obtaining a copy
11# of this software and associated documentation files (the "Software"), to deal
12# in the Software without restriction, including without limitation the rights
13# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14# copies of the Software, and to permit persons to whom the Software is
15# furnished to do so, subject to the following conditions:
16#
17# The above copyright notice and this permission notice shall be included in all
18# copies or substantial portions of the Software.
19#
20# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26# SOFTWARE.
27
28export LC_ALL=C
29self=${0##*/}
30
31# c-types (bigger types work but waste memory. uintN_t need <stdint.h>)
32u32=uint32_t # "unsigned" is also typically 32 bit
33u16=uint16_t # "unsigned short" is also typically 16 bits
34FUNC_ATTR=FAST_FUNC # delete this line if not generating a busybox function
35
36
37err() { >&2 printf %s\\n "$self: $*"; exit 1; }
38
39case ${1-} in -h | --help)
40 echo "Usage: $self [path/to/python-wcwidth] (default path is '.')"
41 echo "Prints a wcwidth C implementation, with latest Unicode data"
42 echo "imported from a local https://github.com/jquast/wcwidth repo."
43 echo "Assumptions about table_zero.py and table_wide.py at the repo:"
44 echo "- Each range is in one Unicode plane (a>>16 == b>>16) (enforced)."
45 echo "- Commit 04d6d90c (2023-10-30) or later, where table_zero.py"
46 echo " includes zero-width Cf chars (else need to add manual tests)."
47esac
48
49[ "${1-}" != -- ] || shift
50
51pwc_root=${1:-.}
52pwc_git() { git -C "$pwc_root" "$@"; }
53
54zerowidth_py=$pwc_root/wcwidth/table_zero.py
55widewidth_py=$pwc_root/wcwidth/table_wide.py
56
57[ -r "$zerowidth_py" ] && [ -r "$widewidth_py" ] \
58 || err "missing $zerowidth_py or $widewidth_py. abort."
59
60# latest unicode version from table_wide.py (e.g. from " '10.0.0': (")
61ver=$(grep "^\s*'[0-9]" < "$widewidth_py" | tail -n1 | sed "s/.*'\(.*\)'.*/\1/")
62
63# stdin -> stdout: extract the data of the last table (latest spec) from
64# wcwidth/table_{wide,zero}.py (from https://github.com/jquast/wcwidth)
65last_table() {
66 awk "/^\s*'[0-9]/ { i=0 } # new table -> reset
67 /^\s*\(0x/ { arr[++i] = \$0 } # range (first, last)
68 END { for (j=1; j <= i; ++j) print arr[j] }"
69}
70
71# stdin -> stdout, $1 is the range's (wc)width (0 or 2), e.g.
72# from: (0x0123a, 0x0123c,), # comment
73# to : R(0x00123a, 0x00123c, 2), /* comment */
74# ranges bigger than half-plane (32769+ codepoints) are split to two.
75py_data_to_c() {
76 sed -e 's/[(),]/ /g' -e 's|#\(.*\)|/*\1 */|' | while read a b c; do
77 # to support cross-plane ranges, we'd need to split them here,
78 # but unlikely required, as all planes end in non-characters.
79 [ $(($a>>16)) = $(($b>>16)) ] || err "not same plane -- $a $b"
80
81 a=$(($a)) b=$(($b)) # some shells want decimal vars in $(())
82 if [ "$((b-a))" -ge 32768 ]; then # split to 15 bit ranges
83 printf "R(0x%06x, 0x%06x, $1), %s\n" $a $((a+32767)) "$c"
84 a=$((a+32768)) c="/* (continued...) */"
85 fi
86 printf "R(0x%06x, 0x%06x, $1), %s\n" $a $b "$c"
87 done
88}
89
90data=$(last_table < "$zerowidth_py" | py_data_to_c 0 &&
91 last_table < "$widewidth_py" | py_data_to_c 2) || err abort
92data=$(printf %s\\n "$data" | sort) # lexicographic here is also numeric
93
94# sorted hex ranges and their (wc)width: R(first, last, {0|2}),[ /* ... */]
95data() { printf %s\\n "$data"; }
96
97repeat() { R=$2; while [ "$R" -gt 0 ]; do printf %s "$1"; R=$((R-1)); done; }
98
99# data -> stdout: array such that a[p], a[p+1] are [from, to) of plane p data
100mkplanes() {
101 i=0 lastp=-1
102 while read a b c; do
103 p=$((${b%?} >> 16)) # plane (last >> 16)
104 repeat "$i, " $((p-lastp))
105 i=$((i+1)) lastp=$p
106 done
107 repeat "$i, " $((17-lastp))
108}
109
110indent() { sed -e 's/^/\t\t/' -e 's/\s*$//'; } # also trim trailing spaces
111
112cat << CFUNCTION
113/* wcwidth - Unicode $ver, generated by $0.
114 * Copyright (C) 2024 Avi Halachmi <avihpit at yahoo.com>
115 * License: MIT
116 *
117 * Data imported on $(date -u -I) from https://github.com/jquast/wcwidth
118 * commit $(pwc_git describe --tags) ($(pwc_git show --no-patch --format=%ci))
119 */
120int ${FUNC_ATTR-} wcwidth($u32 ucs)
121{
122 /* sorted ranges, "first" is clipped to 16 bit, and its high bits
123 * (plane) are deduced from the "planes" array below.
124 * (imported from ${zerowidth_py##*/} and ${widewidth_py##*/})
125 */
126 static const struct range {
127 uint16_t first;
128 uint16_t iswide: 1; /* bitfield order empirically faster */
129 uint16_t difflast: 15;
130 } ranges[] = {
131 #define R(first, last, width) {first & 0xffff, width/2, last-first}
132$(data | indent)
133 #undef R
134 };
135
136 /* planes[p], planes[p+1] are [from, to) at "ranges" for plane p */
137 static const $u16 planes[/* 18 */] = {
138$(data | mkplanes | fold -s -w 60 | indent)
139 };
140
141 /******* END OF STATIC DATA *******/
142
143 $u32 p, bot, top;
144
145 /* 0:0, 1..31:-1 (C0), 32..126:1 (isprint), 127..159:-1 (DEL, C1) */
146 if (ucs < 160)
147 return ((ucs + 1) & 127) > 32 ? 1 : ucs ? -1 : 0;
148
149 /* out of range for "planes" (and non-unicode), non-characters. */
150 /* (some also test surrogate halves, but not required by POSIX) */
151 if (ucs > 0x10ffff || (ucs & 0xfffe) == 0xfffe)
152 return -1;
153
154 p = ucs >> 16;
155 ucs &= 0xffff;
156
157 for (bot = planes[p], top = planes[p+1]; bot < top; ) {
158 $u32 mid = (bot + top) / 2;
159 if (ucs < ranges[mid].first)
160 top = mid;
161 else if (ucs > ranges[mid].first + ranges[mid].difflast)
162 bot = mid + 1;
163 else
164 return 2 * ranges[mid].iswide;
165 }
166
167 return 1;
168} /* wcwidth - Unicode $ver */
169CFUNCTION