diff options
-rw-r--r-- | include/unicode.h | 4 | ||||
-rw-r--r-- | libbb/unicode.c | 4 | ||||
-rw-r--r-- | libbb/wcwidth_alt.c | 506 | ||||
-rwxr-xr-x | scripts/mkwcwidth | 169 |
4 files changed, 683 insertions, 0 deletions
diff --git a/include/unicode.h b/include/unicode.h index e894f7148..cdf35acb7 100644 --- a/include/unicode.h +++ b/include/unicode.h | |||
@@ -33,7 +33,11 @@ enum { | |||
33 | 33 | ||
34 | # if CONFIG_LAST_SUPPORTED_WCHAR < 126 || CONFIG_LAST_SUPPORTED_WCHAR >= 0x30000 | 34 | # if CONFIG_LAST_SUPPORTED_WCHAR < 126 || CONFIG_LAST_SUPPORTED_WCHAR >= 0x30000 |
35 | # undef CONFIG_LAST_SUPPORTED_WCHAR | 35 | # undef CONFIG_LAST_SUPPORTED_WCHAR |
36 | # if ENABLE_PLATFORM_MINGW32 | ||
37 | # define CONFIG_LAST_SUPPORTED_WCHAR 0x10ffff /* full unicode range */ | ||
38 | # else | ||
36 | # define CONFIG_LAST_SUPPORTED_WCHAR 0x2ffff | 39 | # define CONFIG_LAST_SUPPORTED_WCHAR 0x2ffff |
40 | # endif | ||
37 | # endif | 41 | # endif |
38 | 42 | ||
39 | # if CONFIG_LAST_SUPPORTED_WCHAR < 0x300 | 43 | # if CONFIG_LAST_SUPPORTED_WCHAR < 0x300 |
diff --git a/libbb/unicode.c b/libbb/unicode.c index 206ec0dcb..a0b2db625 100644 --- a/libbb/unicode.c +++ b/libbb/unicode.c | |||
@@ -276,6 +276,7 @@ int FAST_FUNC iswpunct(wint_t wc) | |||
276 | return (unsigned)wc <= 0x7f && ispunct(wc); | 276 | return (unsigned)wc <= 0x7f && ispunct(wc); |
277 | } | 277 | } |
278 | 278 | ||
279 | # if !ENABLE_PLATFORM_MINGW32 || CONFIG_LAST_SUPPORTED_WCHAR < 0x30000 | ||
279 | 280 | ||
280 | # if CONFIG_LAST_SUPPORTED_WCHAR >= 0x300 | 281 | # if CONFIG_LAST_SUPPORTED_WCHAR >= 0x300 |
281 | struct interval { | 282 | struct interval { |
@@ -711,6 +712,9 @@ int FAST_FUNC wcwidth(unsigned ucs) | |||
711 | # endif /* >= 0x300 */ | 712 | # endif /* >= 0x300 */ |
712 | } | 713 | } |
713 | 714 | ||
715 | # else /* ENABLE_PLATFORM_MINGW32 && CONFIG_LAST_SUPPORTED_WCHAR >= 0x30000 */ | ||
716 | # include "wcwidth_alt.c" /* simpler and more up-to-date implementation */ | ||
717 | # endif | ||
714 | 718 | ||
715 | # if ENABLE_UNICODE_BIDI_SUPPORT | 719 | # if ENABLE_UNICODE_BIDI_SUPPORT |
716 | int FAST_FUNC unicode_bidi_isrtl(wint_t wc) | 720 | int FAST_FUNC unicode_bidi_isrtl(wint_t wc) |
diff --git a/libbb/wcwidth_alt.c b/libbb/wcwidth_alt.c new file mode 100644 index 000000000..9a45ab0e9 --- /dev/null +++ b/libbb/wcwidth_alt.c | |||
@@ -0,0 +1,506 @@ | |||
1 | /* wcwidth - Unicode 15.1.0, generated by scripts/mkwcwidth. | ||
2 | * Copyright (C) 2024 Avi Halachmi <avihpit at yahoo.com> | ||
3 | * License: MIT | ||
4 | * | ||
5 | * Data imported on 2024-03-29 from https://github.com/jquast/wcwidth | ||
6 | * commit 0.2.13-3-g056ee4b (2024-02-14 15:05:06 -0500) | ||
7 | */ | ||
8 | int FAST_FUNC wcwidth(uint32_t ucs) | ||
9 | { | ||
10 | /* sorted ranges, "first" is clipped to 16 bit, and its high bits | ||
11 | * (plane) are deduced from the "planes" array below. | ||
12 | * (imported from table_zero.py and table_wide.py) | ||
13 | */ | ||
14 | static const struct range { | ||
15 | uint16_t first; | ||
16 | uint16_t iswide: 1; /* bitfield order empirically faster */ | ||
17 | uint16_t difflast: 15; | ||
18 | } ranges[] = { | ||
19 | #define R(first, last, width) {first & 0xffff, width/2, last-first} | ||
20 | R(0x000000, 0x000000, 0), /* nil */ | ||
21 | R(0x0000ad, 0x0000ad, 0), /* Soft Hyphen */ | ||
22 | R(0x000300, 0x00036f, 0), /* Combining Grave Accent ..Combining Latin Small Le */ | ||
23 | R(0x000483, 0x000489, 0), /* Combining Cyrillic Titlo..Combining Cyrillic Milli */ | ||
24 | R(0x000591, 0x0005bd, 0), /* Hebrew Accent Etnahta ..Hebrew Point Meteg */ | ||
25 | R(0x0005bf, 0x0005bf, 0), /* Hebrew Point Rafe */ | ||
26 | R(0x0005c1, 0x0005c2, 0), /* Hebrew Point Shin Dot ..Hebrew Point Sin Dot */ | ||
27 | R(0x0005c4, 0x0005c5, 0), /* Hebrew Mark Upper Dot ..Hebrew Mark Lower Dot */ | ||
28 | R(0x0005c7, 0x0005c7, 0), /* Hebrew Point Qamats Qatan */ | ||
29 | R(0x000600, 0x000605, 0), /* Arabic Number Sign ..Arabic Number Mark Above */ | ||
30 | R(0x000610, 0x00061a, 0), /* Arabic Sign Sallallahou ..Arabic Small Kasra */ | ||
31 | R(0x00061c, 0x00061c, 0), /* Arabic Letter Mark */ | ||
32 | R(0x00064b, 0x00065f, 0), /* Arabic Fathatan ..Arabic Wavy Hamza Below */ | ||
33 | R(0x000670, 0x000670, 0), /* Arabic Letter Superscript Alef */ | ||
34 | R(0x0006d6, 0x0006dd, 0), /* Arabic Small High Ligatu..Arabic End Of Ayah */ | ||
35 | R(0x0006df, 0x0006e4, 0), /* Arabic Small High Rounde..Arabic Small High Madda */ | ||
36 | R(0x0006e7, 0x0006e8, 0), /* Arabic Small High Yeh ..Arabic Small High Noon */ | ||
37 | R(0x0006ea, 0x0006ed, 0), /* Arabic Empty Centre Low ..Arabic Small Low Meem */ | ||
38 | R(0x00070f, 0x00070f, 0), /* Syriac Abbreviation Mark */ | ||
39 | R(0x000711, 0x000711, 0), /* Syriac Letter Superscript Alaph */ | ||
40 | R(0x000730, 0x00074a, 0), /* Syriac Pthaha Above ..Syriac Barrekh */ | ||
41 | R(0x0007a6, 0x0007b0, 0), /* Thaana Abafili ..Thaana Sukun */ | ||
42 | R(0x0007eb, 0x0007f3, 0), /* Nko Combining Short High..Nko Combining Double Dot */ | ||
43 | R(0x0007fd, 0x0007fd, 0), /* Nko Dantayalan */ | ||
44 | R(0x000816, 0x000819, 0), /* Samaritan Mark In ..Samaritan Mark Dagesh */ | ||
45 | R(0x00081b, 0x000823, 0), /* Samaritan Mark Epentheti..Samaritan Vowel Sign A */ | ||
46 | R(0x000825, 0x000827, 0), /* Samaritan Vowel Sign Sho..Samaritan Vowel Sign U */ | ||
47 | R(0x000829, 0x00082d, 0), /* Samaritan Vowel Sign Lon..Samaritan Mark Nequdaa */ | ||
48 | R(0x000859, 0x00085b, 0), /* Mandaic Affrication Mark..Mandaic Gemination Mark */ | ||
49 | R(0x000890, 0x000891, 0), /* Arabic Pound Mark Above ..Arabic Piastre Mark Abov */ | ||
50 | R(0x000898, 0x00089f, 0), /* Arabic Small High Word A..Arabic Half Madda Over M */ | ||
51 | R(0x0008ca, 0x000903, 0), /* Arabic Small High Farsi ..Devanagari Sign Visarga */ | ||
52 | R(0x00093a, 0x00093c, 0), /* Devanagari Vowel Sign Oe..Devanagari Sign Nukta */ | ||
53 | R(0x00093e, 0x00094f, 0), /* Devanagari Vowel Sign Aa..Devanagari Vowel Sign Aw */ | ||
54 | R(0x000951, 0x000957, 0), /* Devanagari Stress Sign U..Devanagari Vowel Sign Uu */ | ||
55 | R(0x000962, 0x000963, 0), /* Devanagari Vowel Sign Vo..Devanagari Vowel Sign Vo */ | ||
56 | R(0x000981, 0x000983, 0), /* Bengali Sign Candrabindu..Bengali Sign Visarga */ | ||
57 | R(0x0009bc, 0x0009bc, 0), /* Bengali Sign Nukta */ | ||
58 | R(0x0009be, 0x0009c4, 0), /* Bengali Vowel Sign Aa ..Bengali Vowel Sign Vocal */ | ||
59 | R(0x0009c7, 0x0009c8, 0), /* Bengali Vowel Sign E ..Bengali Vowel Sign Ai */ | ||
60 | R(0x0009cb, 0x0009cd, 0), /* Bengali Vowel Sign O ..Bengali Sign Virama */ | ||
61 | R(0x0009d7, 0x0009d7, 0), /* Bengali Au Length Mark */ | ||
62 | R(0x0009e2, 0x0009e3, 0), /* Bengali Vowel Sign Vocal..Bengali Vowel Sign Vocal */ | ||
63 | R(0x0009fe, 0x0009fe, 0), /* Bengali Sandhi Mark */ | ||
64 | R(0x000a01, 0x000a03, 0), /* Gurmukhi Sign Adak Bindi..Gurmukhi Sign Visarga */ | ||
65 | R(0x000a3c, 0x000a3c, 0), /* Gurmukhi Sign Nukta */ | ||
66 | R(0x000a3e, 0x000a42, 0), /* Gurmukhi Vowel Sign Aa ..Gurmukhi Vowel Sign Uu */ | ||
67 | R(0x000a47, 0x000a48, 0), /* Gurmukhi Vowel Sign Ee ..Gurmukhi Vowel Sign Ai */ | ||
68 | R(0x000a4b, 0x000a4d, 0), /* Gurmukhi Vowel Sign Oo ..Gurmukhi Sign Virama */ | ||
69 | R(0x000a51, 0x000a51, 0), /* Gurmukhi Sign Udaat */ | ||
70 | R(0x000a70, 0x000a71, 0), /* Gurmukhi Tippi ..Gurmukhi Addak */ | ||
71 | R(0x000a75, 0x000a75, 0), /* Gurmukhi Sign Yakash */ | ||
72 | R(0x000a81, 0x000a83, 0), /* Gujarati Sign Candrabind..Gujarati Sign Visarga */ | ||
73 | R(0x000abc, 0x000abc, 0), /* Gujarati Sign Nukta */ | ||
74 | R(0x000abe, 0x000ac5, 0), /* Gujarati Vowel Sign Aa ..Gujarati Vowel Sign Cand */ | ||
75 | R(0x000ac7, 0x000ac9, 0), /* Gujarati Vowel Sign E ..Gujarati Vowel Sign Cand */ | ||
76 | R(0x000acb, 0x000acd, 0), /* Gujarati Vowel Sign O ..Gujarati Sign Virama */ | ||
77 | R(0x000ae2, 0x000ae3, 0), /* Gujarati Vowel Sign Voca..Gujarati Vowel Sign Voca */ | ||
78 | R(0x000afa, 0x000aff, 0), /* Gujarati Sign Sukun ..Gujarati Sign Two-circle */ | ||
79 | R(0x000b01, 0x000b03, 0), /* Oriya Sign Candrabindu ..Oriya Sign Visarga */ | ||
80 | R(0x000b3c, 0x000b3c, 0), /* Oriya Sign Nukta */ | ||
81 | R(0x000b3e, 0x000b44, 0), /* Oriya Vowel Sign Aa ..Oriya Vowel Sign Vocalic */ | ||
82 | R(0x000b47, 0x000b48, 0), /* Oriya Vowel Sign E ..Oriya Vowel Sign Ai */ | ||
83 | R(0x000b4b, 0x000b4d, 0), /* Oriya Vowel Sign O ..Oriya Sign Virama */ | ||
84 | R(0x000b55, 0x000b57, 0), /* Oriya Sign Overline ..Oriya Au Length Mark */ | ||
85 | R(0x000b62, 0x000b63, 0), /* Oriya Vowel Sign Vocalic..Oriya Vowel Sign Vocalic */ | ||
86 | R(0x000b82, 0x000b82, 0), /* Tamil Sign Anusvara */ | ||
87 | R(0x000bbe, 0x000bc2, 0), /* Tamil Vowel Sign Aa ..Tamil Vowel Sign Uu */ | ||
88 | R(0x000bc6, 0x000bc8, 0), /* Tamil Vowel Sign E ..Tamil Vowel Sign Ai */ | ||
89 | R(0x000bca, 0x000bcd, 0), /* Tamil Vowel Sign O ..Tamil Sign Virama */ | ||
90 | R(0x000bd7, 0x000bd7, 0), /* Tamil Au Length Mark */ | ||
91 | R(0x000c00, 0x000c04, 0), /* Telugu Sign Combining Ca..Telugu Sign Combining An */ | ||
92 | R(0x000c3c, 0x000c3c, 0), /* Telugu Sign Nukta */ | ||
93 | R(0x000c3e, 0x000c44, 0), /* Telugu Vowel Sign Aa ..Telugu Vowel Sign Vocali */ | ||
94 | R(0x000c46, 0x000c48, 0), /* Telugu Vowel Sign E ..Telugu Vowel Sign Ai */ | ||
95 | R(0x000c4a, 0x000c4d, 0), /* Telugu Vowel Sign O ..Telugu Sign Virama */ | ||
96 | R(0x000c55, 0x000c56, 0), /* Telugu Length Mark ..Telugu Ai Length Mark */ | ||
97 | R(0x000c62, 0x000c63, 0), /* Telugu Vowel Sign Vocali..Telugu Vowel Sign Vocali */ | ||
98 | R(0x000c81, 0x000c83, 0), /* Kannada Sign Candrabindu..Kannada Sign Visarga */ | ||
99 | R(0x000cbc, 0x000cbc, 0), /* Kannada Sign Nukta */ | ||
100 | R(0x000cbe, 0x000cc4, 0), /* Kannada Vowel Sign Aa ..Kannada Vowel Sign Vocal */ | ||
101 | R(0x000cc6, 0x000cc8, 0), /* Kannada Vowel Sign E ..Kannada Vowel Sign Ai */ | ||
102 | R(0x000cca, 0x000ccd, 0), /* Kannada Vowel Sign O ..Kannada Sign Virama */ | ||
103 | R(0x000cd5, 0x000cd6, 0), /* Kannada Length Mark ..Kannada Ai Length Mark */ | ||
104 | R(0x000ce2, 0x000ce3, 0), /* Kannada Vowel Sign Vocal..Kannada Vowel Sign Vocal */ | ||
105 | R(0x000cf3, 0x000cf3, 0), /* Kannada Sign Combining Anusvara Above Right */ | ||
106 | R(0x000d00, 0x000d03, 0), /* Malayalam Sign Combining..Malayalam Sign Visarga */ | ||
107 | R(0x000d3b, 0x000d3c, 0), /* Malayalam Sign Vertical ..Malayalam Sign Circular */ | ||
108 | R(0x000d3e, 0x000d44, 0), /* Malayalam Vowel Sign Aa ..Malayalam Vowel Sign Voc */ | ||
109 | R(0x000d46, 0x000d48, 0), /* Malayalam Vowel Sign E ..Malayalam Vowel Sign Ai */ | ||
110 | R(0x000d4a, 0x000d4d, 0), /* Malayalam Vowel Sign O ..Malayalam Sign Virama */ | ||
111 | R(0x000d57, 0x000d57, 0), /* Malayalam Au Length Mark */ | ||
112 | R(0x000d62, 0x000d63, 0), /* Malayalam Vowel Sign Voc..Malayalam Vowel Sign Voc */ | ||
113 | R(0x000d81, 0x000d83, 0), /* Sinhala Sign Candrabindu..Sinhala Sign Visargaya */ | ||
114 | R(0x000dca, 0x000dca, 0), /* Sinhala Sign Al-lakuna */ | ||
115 | R(0x000dcf, 0x000dd4, 0), /* Sinhala Vowel Sign Aela-..Sinhala Vowel Sign Ketti */ | ||
116 | R(0x000dd6, 0x000dd6, 0), /* Sinhala Vowel Sign Diga Paa-pilla */ | ||
117 | R(0x000dd8, 0x000ddf, 0), /* Sinhala Vowel Sign Gaett..Sinhala Vowel Sign Gayan */ | ||
118 | R(0x000df2, 0x000df3, 0), /* Sinhala Vowel Sign Diga ..Sinhala Vowel Sign Diga */ | ||
119 | R(0x000e31, 0x000e31, 0), /* Thai Character Mai Han-akat */ | ||
120 | R(0x000e34, 0x000e3a, 0), /* Thai Character Sara I ..Thai Character Phinthu */ | ||
121 | R(0x000e47, 0x000e4e, 0), /* Thai Character Maitaikhu..Thai Character Yamakkan */ | ||
122 | R(0x000eb1, 0x000eb1, 0), /* Lao Vowel Sign Mai Kan */ | ||
123 | R(0x000eb4, 0x000ebc, 0), /* Lao Vowel Sign I ..Lao Semivowel Sign Lo */ | ||
124 | R(0x000ec8, 0x000ece, 0), /* Lao Tone Mai Ek ..Lao Yamakkan */ | ||
125 | R(0x000f18, 0x000f19, 0), /* Tibetan Astrological Sig..Tibetan Astrological Sig */ | ||
126 | R(0x000f35, 0x000f35, 0), /* Tibetan Mark Ngas Bzung Nyi Zla */ | ||
127 | R(0x000f37, 0x000f37, 0), /* Tibetan Mark Ngas Bzung Sgor Rtags */ | ||
128 | R(0x000f39, 0x000f39, 0), /* Tibetan Mark Tsa -phru */ | ||
129 | R(0x000f3e, 0x000f3f, 0), /* Tibetan Sign Yar Tshes ..Tibetan Sign Mar Tshes */ | ||
130 | R(0x000f71, 0x000f84, 0), /* Tibetan Vowel Sign Aa ..Tibetan Mark Halanta */ | ||
131 | R(0x000f86, 0x000f87, 0), /* Tibetan Sign Lci Rtags ..Tibetan Sign Yang Rtags */ | ||
132 | R(0x000f8d, 0x000f97, 0), /* Tibetan Subjoined Sign L..Tibetan Subjoined Letter */ | ||
133 | R(0x000f99, 0x000fbc, 0), /* Tibetan Subjoined Letter..Tibetan Subjoined Letter */ | ||
134 | R(0x000fc6, 0x000fc6, 0), /* Tibetan Symbol Padma Gdan */ | ||
135 | R(0x00102b, 0x00103e, 0), /* Myanmar Vowel Sign Tall ..Myanmar Consonant Sign M */ | ||
136 | R(0x001056, 0x001059, 0), /* Myanmar Vowel Sign Vocal..Myanmar Vowel Sign Vocal */ | ||
137 | R(0x00105e, 0x001060, 0), /* Myanmar Consonant Sign M..Myanmar Consonant Sign M */ | ||
138 | R(0x001062, 0x001064, 0), /* Myanmar Vowel Sign Sgaw ..Myanmar Tone Mark Sgaw K */ | ||
139 | R(0x001067, 0x00106d, 0), /* Myanmar Vowel Sign Weste..Myanmar Sign Western Pwo */ | ||
140 | R(0x001071, 0x001074, 0), /* Myanmar Vowel Sign Geba ..Myanmar Vowel Sign Kayah */ | ||
141 | R(0x001082, 0x00108d, 0), /* Myanmar Consonant Sign S..Myanmar Sign Shan Counci */ | ||
142 | R(0x00108f, 0x00108f, 0), /* Myanmar Sign Rumai Palaung Tone-5 */ | ||
143 | R(0x00109a, 0x00109d, 0), /* Myanmar Sign Khamti Tone..Myanmar Vowel Sign Aiton */ | ||
144 | R(0x001100, 0x00115f, 2), /* Hangul Choseong Kiyeok ..Hangul Choseong Filler */ | ||
145 | R(0x001160, 0x0011ff, 0), /* Hangul Jungseong Filler ..Hangul Jongseong Ssangni */ | ||
146 | R(0x00135d, 0x00135f, 0), /* Ethiopic Combining Gemin..Ethiopic Combining Gemin */ | ||
147 | R(0x001712, 0x001715, 0), /* Tagalog Vowel Sign I ..Tagalog Sign Pamudpod */ | ||
148 | R(0x001732, 0x001734, 0), /* Hanunoo Vowel Sign I ..Hanunoo Sign Pamudpod */ | ||
149 | R(0x001752, 0x001753, 0), /* Buhid Vowel Sign I ..Buhid Vowel Sign U */ | ||
150 | R(0x001772, 0x001773, 0), /* Tagbanwa Vowel Sign I ..Tagbanwa Vowel Sign U */ | ||
151 | R(0x0017b4, 0x0017d3, 0), /* Khmer Vowel Inherent Aq ..Khmer Sign Bathamasat */ | ||
152 | R(0x0017dd, 0x0017dd, 0), /* Khmer Sign Atthacan */ | ||
153 | R(0x00180b, 0x00180f, 0), /* Mongolian Free Variation..Mongolian Free Variation */ | ||
154 | R(0x001885, 0x001886, 0), /* Mongolian Letter Ali Gal..Mongolian Letter Ali Gal */ | ||
155 | R(0x0018a9, 0x0018a9, 0), /* Mongolian Letter Ali Gali Dagalga */ | ||
156 | R(0x001920, 0x00192b, 0), /* Limbu Vowel Sign A ..Limbu Subjoined Letter W */ | ||
157 | R(0x001930, 0x00193b, 0), /* Limbu Small Letter Ka ..Limbu Sign Sa-i */ | ||
158 | R(0x001a17, 0x001a1b, 0), /* Buginese Vowel Sign I ..Buginese Vowel Sign Ae */ | ||
159 | R(0x001a55, 0x001a5e, 0), /* Tai Tham Consonant Sign ..Tai Tham Consonant Sign */ | ||
160 | R(0x001a60, 0x001a7c, 0), /* Tai Tham Sign Sakot ..Tai Tham Sign Khuen-lue */ | ||
161 | R(0x001a7f, 0x001a7f, 0), /* Tai Tham Combining Cryptogrammic Dot */ | ||
162 | R(0x001ab0, 0x001ace, 0), /* Combining Doubled Circum..Combining Latin Small Le */ | ||
163 | R(0x001b00, 0x001b04, 0), /* Balinese Sign Ulu Ricem ..Balinese Sign Bisah */ | ||
164 | R(0x001b34, 0x001b44, 0), /* Balinese Sign Rerekan ..Balinese Adeg Adeg */ | ||
165 | R(0x001b6b, 0x001b73, 0), /* Balinese Musical Symbol ..Balinese Musical Symbol */ | ||
166 | R(0x001b80, 0x001b82, 0), /* Sundanese Sign Panyecek ..Sundanese Sign Pangwisad */ | ||
167 | R(0x001ba1, 0x001bad, 0), /* Sundanese Consonant Sign..Sundanese Consonant Sign */ | ||
168 | R(0x001be6, 0x001bf3, 0), /* Batak Sign Tompi ..Batak Panongonan */ | ||
169 | R(0x001c24, 0x001c37, 0), /* Lepcha Subjoined Letter ..Lepcha Sign Nukta */ | ||
170 | R(0x001cd0, 0x001cd2, 0), /* Vedic Tone Karshana ..Vedic Tone Prenkha */ | ||
171 | R(0x001cd4, 0x001ce8, 0), /* Vedic Sign Yajurvedic Mi..Vedic Sign Visarga Anuda */ | ||
172 | R(0x001ced, 0x001ced, 0), /* Vedic Sign Tiryak */ | ||
173 | R(0x001cf4, 0x001cf4, 0), /* Vedic Tone Candra Above */ | ||
174 | R(0x001cf7, 0x001cf9, 0), /* Vedic Sign Atikrama ..Vedic Tone Double Ring A */ | ||
175 | R(0x001dc0, 0x001dff, 0), /* Combining Dotted Grave A..Combining Right Arrowhea */ | ||
176 | R(0x00200b, 0x00200f, 0), /* Zero Width Space ..Right-to-left Mark */ | ||
177 | R(0x002028, 0x00202e, 0), /* Line Separator ..Right-to-left Override */ | ||
178 | R(0x002060, 0x002064, 0), /* Word Joiner ..Invisible Plus */ | ||
179 | R(0x002066, 0x00206f, 0), /* Left-to-right Isolate ..Nominal Digit Shapes */ | ||
180 | R(0x0020d0, 0x0020f0, 0), /* Combining Left Harpoon A..Combining Asterisk Above */ | ||
181 | R(0x00231a, 0x00231b, 2), /* Watch ..Hourglass */ | ||
182 | R(0x002329, 0x00232a, 2), /* Left-pointing Angle Brac..Right-pointing Angle Bra */ | ||
183 | R(0x0023e9, 0x0023ec, 2), /* Black Right-pointing Dou..Black Down-pointing Doub */ | ||
184 | R(0x0023f0, 0x0023f0, 2), /* Alarm Clock */ | ||
185 | R(0x0023f3, 0x0023f3, 2), /* Hourglass With Flowing Sand */ | ||
186 | R(0x0025fd, 0x0025fe, 2), /* White Medium Small Squar..Black Medium Small Squar */ | ||
187 | R(0x002614, 0x002615, 2), /* Umbrella With Rain Drops..Hot Beverage */ | ||
188 | R(0x002648, 0x002653, 2), /* Aries ..Pisces */ | ||
189 | R(0x00267f, 0x00267f, 2), /* Wheelchair Symbol */ | ||
190 | R(0x002693, 0x002693, 2), /* Anchor */ | ||
191 | R(0x0026a1, 0x0026a1, 2), /* High Voltage Sign */ | ||
192 | R(0x0026aa, 0x0026ab, 2), /* Medium White Circle ..Medium Black Circle */ | ||
193 | R(0x0026bd, 0x0026be, 2), /* Soccer Ball ..Baseball */ | ||
194 | R(0x0026c4, 0x0026c5, 2), /* Snowman Without Snow ..Sun Behind Cloud */ | ||
195 | R(0x0026ce, 0x0026ce, 2), /* Ophiuchus */ | ||
196 | R(0x0026d4, 0x0026d4, 2), /* No Entry */ | ||
197 | R(0x0026ea, 0x0026ea, 2), /* Church */ | ||
198 | R(0x0026f2, 0x0026f3, 2), /* Fountain ..Flag In Hole */ | ||
199 | R(0x0026f5, 0x0026f5, 2), /* Sailboat */ | ||
200 | R(0x0026fa, 0x0026fa, 2), /* Tent */ | ||
201 | R(0x0026fd, 0x0026fd, 2), /* Fuel Pump */ | ||
202 | R(0x002705, 0x002705, 2), /* White Heavy Check Mark */ | ||
203 | R(0x00270a, 0x00270b, 2), /* Raised Fist ..Raised Hand */ | ||
204 | R(0x002728, 0x002728, 2), /* Sparkles */ | ||
205 | R(0x00274c, 0x00274c, 2), /* Cross Mark */ | ||
206 | R(0x00274e, 0x00274e, 2), /* Negative Squared Cross Mark */ | ||
207 | R(0x002753, 0x002755, 2), /* Black Question Mark Orna..White Exclamation Mark O */ | ||
208 | R(0x002757, 0x002757, 2), /* Heavy Exclamation Mark Symbol */ | ||
209 | R(0x002795, 0x002797, 2), /* Heavy Plus Sign ..Heavy Division Sign */ | ||
210 | R(0x0027b0, 0x0027b0, 2), /* Curly Loop */ | ||
211 | R(0x0027bf, 0x0027bf, 2), /* Double Curly Loop */ | ||
212 | R(0x002b1b, 0x002b1c, 2), /* Black Large Square ..White Large Square */ | ||
213 | R(0x002b50, 0x002b50, 2), /* White Medium Star */ | ||
214 | R(0x002b55, 0x002b55, 2), /* Heavy Large Circle */ | ||
215 | R(0x002cef, 0x002cf1, 0), /* Coptic Combining Ni Abov..Coptic Combining Spiritu */ | ||
216 | R(0x002d7f, 0x002d7f, 0), /* Tifinagh Consonant Joiner */ | ||
217 | R(0x002de0, 0x002dff, 0), /* Combining Cyrillic Lette..Combining Cyrillic Lette */ | ||
218 | R(0x002e80, 0x002e99, 2), /* Cjk Radical Repeat ..Cjk Radical Rap */ | ||
219 | R(0x002e9b, 0x002ef3, 2), /* Cjk Radical Choke ..Cjk Radical C-simplified */ | ||
220 | R(0x002f00, 0x002fd5, 2), /* Kangxi Radical One ..Kangxi Radical Flute */ | ||
221 | R(0x002ff0, 0x003029, 2), /* Ideographic Description ..Hangzhou Numeral Nine */ | ||
222 | R(0x00302a, 0x00302f, 0), /* Ideographic Level Tone M..Hangul Double Dot Tone M */ | ||
223 | R(0x003030, 0x00303e, 2), /* Wavy Dash ..Ideographic Variation In */ | ||
224 | R(0x003041, 0x003096, 2), /* Hiragana Letter Small A ..Hiragana Letter Small Ke */ | ||
225 | R(0x003099, 0x00309a, 0), /* Combining Katakana-hirag..Combining Katakana-hirag */ | ||
226 | R(0x00309b, 0x0030ff, 2), /* Katakana-hiragana Voiced..Katakana Digraph Koto */ | ||
227 | R(0x003105, 0x00312f, 2), /* Bopomofo Letter B ..Bopomofo Letter Nn */ | ||
228 | R(0x003131, 0x00318e, 2), /* Hangul Letter Kiyeok ..Hangul Letter Araeae */ | ||
229 | R(0x003190, 0x0031e3, 2), /* Ideographic Annotation L..Cjk Stroke Q */ | ||
230 | R(0x0031ef, 0x00321e, 2), /* nil ..Parenthesized Korean Cha */ | ||
231 | R(0x003220, 0x003247, 2), /* Parenthesized Ideograph ..Circled Ideograph Koto */ | ||
232 | R(0x003250, 0x004dbf, 2), /* Partnership Sign ..Cjk Unified Ideograph-4d */ | ||
233 | R(0x004e00, 0x00a48c, 2), /* Cjk Unified Ideograph-4e..Yi Syllable Yyr */ | ||
234 | R(0x00a490, 0x00a4c6, 2), /* Yi Radical Qot ..Yi Radical Ke */ | ||
235 | R(0x00a66f, 0x00a672, 0), /* Combining Cyrillic Vzmet..Combining Cyrillic Thous */ | ||
236 | R(0x00a674, 0x00a67d, 0), /* Combining Cyrillic Lette..Combining Cyrillic Payer */ | ||
237 | R(0x00a69e, 0x00a69f, 0), /* Combining Cyrillic Lette..Combining Cyrillic Lette */ | ||
238 | R(0x00a6f0, 0x00a6f1, 0), /* Bamum Combining Mark Koq..Bamum Combining Mark Tuk */ | ||
239 | R(0x00a802, 0x00a802, 0), /* Syloti Nagri Sign Dvisvara */ | ||
240 | R(0x00a806, 0x00a806, 0), /* Syloti Nagri Sign Hasanta */ | ||
241 | R(0x00a80b, 0x00a80b, 0), /* Syloti Nagri Sign Anusvara */ | ||
242 | R(0x00a823, 0x00a827, 0), /* Syloti Nagri Vowel Sign ..Syloti Nagri Vowel Sign */ | ||
243 | R(0x00a82c, 0x00a82c, 0), /* Syloti Nagri Sign Alternate Hasanta */ | ||
244 | R(0x00a880, 0x00a881, 0), /* Saurashtra Sign Anusvara..Saurashtra Sign Visarga */ | ||
245 | R(0x00a8b4, 0x00a8c5, 0), /* Saurashtra Consonant Sig..Saurashtra Sign Candrabi */ | ||
246 | R(0x00a8e0, 0x00a8f1, 0), /* Combining Devanagari Dig..Combining Devanagari Sig */ | ||
247 | R(0x00a8ff, 0x00a8ff, 0), /* Devanagari Vowel Sign Ay */ | ||
248 | R(0x00a926, 0x00a92d, 0), /* Kayah Li Vowel Ue ..Kayah Li Tone Calya Plop */ | ||
249 | R(0x00a947, 0x00a953, 0), /* Rejang Vowel Sign I ..Rejang Virama */ | ||
250 | R(0x00a960, 0x00a97c, 2), /* Hangul Choseong Tikeut-m..Hangul Choseong Ssangyeo */ | ||
251 | R(0x00a980, 0x00a983, 0), /* Javanese Sign Panyangga ..Javanese Sign Wignyan */ | ||
252 | R(0x00a9b3, 0x00a9c0, 0), /* Javanese Sign Cecak Telu..Javanese Pangkon */ | ||
253 | R(0x00a9e5, 0x00a9e5, 0), /* Myanmar Sign Shan Saw */ | ||
254 | R(0x00aa29, 0x00aa36, 0), /* Cham Vowel Sign Aa ..Cham Consonant Sign Wa */ | ||
255 | R(0x00aa43, 0x00aa43, 0), /* Cham Consonant Sign Final Ng */ | ||
256 | R(0x00aa4c, 0x00aa4d, 0), /* Cham Consonant Sign Fina..Cham Consonant Sign Fina */ | ||
257 | R(0x00aa7b, 0x00aa7d, 0), /* Myanmar Sign Pao Karen T..Myanmar Sign Tai Laing T */ | ||
258 | R(0x00aab0, 0x00aab0, 0), /* Tai Viet Mai Kang */ | ||
259 | R(0x00aab2, 0x00aab4, 0), /* Tai Viet Vowel I ..Tai Viet Vowel U */ | ||
260 | R(0x00aab7, 0x00aab8, 0), /* Tai Viet Mai Khit ..Tai Viet Vowel Ia */ | ||
261 | R(0x00aabe, 0x00aabf, 0), /* Tai Viet Vowel Am ..Tai Viet Tone Mai Ek */ | ||
262 | R(0x00aac1, 0x00aac1, 0), /* Tai Viet Tone Mai Tho */ | ||
263 | R(0x00aaeb, 0x00aaef, 0), /* Meetei Mayek Vowel Sign ..Meetei Mayek Vowel Sign */ | ||
264 | R(0x00aaf5, 0x00aaf6, 0), /* Meetei Mayek Vowel Sign ..Meetei Mayek Virama */ | ||
265 | R(0x00abe3, 0x00abea, 0), /* Meetei Mayek Vowel Sign ..Meetei Mayek Vowel Sign */ | ||
266 | R(0x00abec, 0x00abed, 0), /* Meetei Mayek Lum Iyek ..Meetei Mayek Apun Iyek */ | ||
267 | R(0x00ac00, 0x00d7a3, 2), /* Hangul Syllable Ga ..Hangul Syllable Hih */ | ||
268 | R(0x00d7b0, 0x00d7ff, 0), /* Hangul Jungseong O-yeo .. nil */ | ||
269 | R(0x00f900, 0x00faff, 2), /* Cjk Compatibility Ideogr.. nil */ | ||
270 | R(0x00fb1e, 0x00fb1e, 0), /* Hebrew Point Judeo-spanish Varika */ | ||
271 | R(0x00fe00, 0x00fe0f, 0), /* Variation Selector-1 ..Variation Selector-16 */ | ||
272 | R(0x00fe10, 0x00fe19, 2), /* Presentation Form For Ve..Presentation Form For Ve */ | ||
273 | R(0x00fe20, 0x00fe2f, 0), /* Combining Ligature Left ..Combining Cyrillic Titlo */ | ||
274 | R(0x00fe30, 0x00fe52, 2), /* Presentation Form For Ve..Small Full Stop */ | ||
275 | R(0x00fe54, 0x00fe66, 2), /* Small Semicolon ..Small Equals Sign */ | ||
276 | R(0x00fe68, 0x00fe6b, 2), /* Small Reverse Solidus ..Small Commercial At */ | ||
277 | R(0x00feff, 0x00feff, 0), /* Zero Width No-break Space */ | ||
278 | R(0x00ff01, 0x00ff60, 2), /* Fullwidth Exclamation Ma..Fullwidth Right White Pa */ | ||
279 | R(0x00ffe0, 0x00ffe6, 2), /* Fullwidth Cent Sign ..Fullwidth Won Sign */ | ||
280 | R(0x00fff9, 0x00fffb, 0), /* Interlinear Annotation A..Interlinear Annotation T */ | ||
281 | R(0x0101fd, 0x0101fd, 0), /* Phaistos Disc Sign Combining Oblique Stroke */ | ||
282 | R(0x0102e0, 0x0102e0, 0), /* Coptic Epact Thousands Mark */ | ||
283 | R(0x010376, 0x01037a, 0), /* Combining Old Permic Let..Combining Old Permic Let */ | ||
284 | R(0x010a01, 0x010a03, 0), /* Kharoshthi Vowel Sign I ..Kharoshthi Vowel Sign Vo */ | ||
285 | R(0x010a05, 0x010a06, 0), /* Kharoshthi Vowel Sign E ..Kharoshthi Vowel Sign O */ | ||
286 | R(0x010a0c, 0x010a0f, 0), /* Kharoshthi Vowel Length ..Kharoshthi Sign Visarga */ | ||
287 | R(0x010a38, 0x010a3a, 0), /* Kharoshthi Sign Bar Abov..Kharoshthi Sign Dot Belo */ | ||
288 | R(0x010a3f, 0x010a3f, 0), /* Kharoshthi Virama */ | ||
289 | R(0x010ae5, 0x010ae6, 0), /* Manichaean Abbreviation ..Manichaean Abbreviation */ | ||
290 | R(0x010d24, 0x010d27, 0), /* Hanifi Rohingya Sign Har..Hanifi Rohingya Sign Tas */ | ||
291 | R(0x010eab, 0x010eac, 0), /* Yezidi Combining Hamza M..Yezidi Combining Madda M */ | ||
292 | R(0x010efd, 0x010eff, 0), /* Arabic Small Low Word Sa..Arabic Small Low Word Ma */ | ||
293 | R(0x010f46, 0x010f50, 0), /* Sogdian Combining Dot Be..Sogdian Combining Stroke */ | ||
294 | R(0x010f82, 0x010f85, 0), /* Old Uyghur Combining Dot..Old Uyghur Combining Two */ | ||
295 | R(0x011000, 0x011002, 0), /* Brahmi Sign Candrabindu ..Brahmi Sign Visarga */ | ||
296 | R(0x011038, 0x011046, 0), /* Brahmi Vowel Sign Aa ..Brahmi Virama */ | ||
297 | R(0x011070, 0x011070, 0), /* Brahmi Sign Old Tamil Virama */ | ||
298 | R(0x011073, 0x011074, 0), /* Brahmi Vowel Sign Old Ta..Brahmi Vowel Sign Old Ta */ | ||
299 | R(0x01107f, 0x011082, 0), /* Brahmi Number Joiner ..Kaithi Sign Visarga */ | ||
300 | R(0x0110b0, 0x0110ba, 0), /* Kaithi Vowel Sign Aa ..Kaithi Sign Nukta */ | ||
301 | R(0x0110bd, 0x0110bd, 0), /* Kaithi Number Sign */ | ||
302 | R(0x0110c2, 0x0110c2, 0), /* Kaithi Vowel Sign Vocalic R */ | ||
303 | R(0x0110cd, 0x0110cd, 0), /* Kaithi Number Sign Above */ | ||
304 | R(0x011100, 0x011102, 0), /* Chakma Sign Candrabindu ..Chakma Sign Visarga */ | ||
305 | R(0x011127, 0x011134, 0), /* Chakma Vowel Sign A ..Chakma Maayyaa */ | ||
306 | R(0x011145, 0x011146, 0), /* Chakma Vowel Sign Aa ..Chakma Vowel Sign Ei */ | ||
307 | R(0x011173, 0x011173, 0), /* Mahajani Sign Nukta */ | ||
308 | R(0x011180, 0x011182, 0), /* Sharada Sign Candrabindu..Sharada Sign Visarga */ | ||
309 | R(0x0111b3, 0x0111c0, 0), /* Sharada Vowel Sign Aa ..Sharada Sign Virama */ | ||
310 | R(0x0111c9, 0x0111cc, 0), /* Sharada Sandhi Mark ..Sharada Extra Short Vowe */ | ||
311 | R(0x0111ce, 0x0111cf, 0), /* Sharada Vowel Sign Prish..Sharada Sign Inverted Ca */ | ||
312 | R(0x01122c, 0x011237, 0), /* Khojki Vowel Sign Aa ..Khojki Sign Shadda */ | ||
313 | R(0x01123e, 0x01123e, 0), /* Khojki Sign Sukun */ | ||
314 | R(0x011241, 0x011241, 0), /* Khojki Vowel Sign Vocalic R */ | ||
315 | R(0x0112df, 0x0112ea, 0), /* Khudawadi Sign Anusvara ..Khudawadi Sign Virama */ | ||
316 | R(0x011300, 0x011303, 0), /* Grantha Sign Combining A..Grantha Sign Visarga */ | ||
317 | R(0x01133b, 0x01133c, 0), /* Combining Bindu Below ..Grantha Sign Nukta */ | ||
318 | R(0x01133e, 0x011344, 0), /* Grantha Vowel Sign Aa ..Grantha Vowel Sign Vocal */ | ||
319 | R(0x011347, 0x011348, 0), /* Grantha Vowel Sign Ee ..Grantha Vowel Sign Ai */ | ||
320 | R(0x01134b, 0x01134d, 0), /* Grantha Vowel Sign Oo ..Grantha Sign Virama */ | ||
321 | R(0x011357, 0x011357, 0), /* Grantha Au Length Mark */ | ||
322 | R(0x011362, 0x011363, 0), /* Grantha Vowel Sign Vocal..Grantha Vowel Sign Vocal */ | ||
323 | R(0x011366, 0x01136c, 0), /* Combining Grantha Digit ..Combining Grantha Digit */ | ||
324 | R(0x011370, 0x011374, 0), /* Combining Grantha Letter..Combining Grantha Letter */ | ||
325 | R(0x011435, 0x011446, 0), /* Newa Vowel Sign Aa ..Newa Sign Nukta */ | ||
326 | R(0x01145e, 0x01145e, 0), /* Newa Sandhi Mark */ | ||
327 | R(0x0114b0, 0x0114c3, 0), /* Tirhuta Vowel Sign Aa ..Tirhuta Sign Nukta */ | ||
328 | R(0x0115af, 0x0115b5, 0), /* Siddham Vowel Sign Aa ..Siddham Vowel Sign Vocal */ | ||
329 | R(0x0115b8, 0x0115c0, 0), /* Siddham Vowel Sign E ..Siddham Sign Nukta */ | ||
330 | R(0x0115dc, 0x0115dd, 0), /* Siddham Vowel Sign Alter..Siddham Vowel Sign Alter */ | ||
331 | R(0x011630, 0x011640, 0), /* Modi Vowel Sign Aa ..Modi Sign Ardhacandra */ | ||
332 | R(0x0116ab, 0x0116b7, 0), /* Takri Sign Anusvara ..Takri Sign Nukta */ | ||
333 | R(0x01171d, 0x01172b, 0), /* Ahom Consonant Sign Medi..Ahom Sign Killer */ | ||
334 | R(0x01182c, 0x01183a, 0), /* Dogra Vowel Sign Aa ..Dogra Sign Nukta */ | ||
335 | R(0x011930, 0x011935, 0), /* Dives Akuru Vowel Sign A..Dives Akuru Vowel Sign E */ | ||
336 | R(0x011937, 0x011938, 0), /* Dives Akuru Vowel Sign A..Dives Akuru Vowel Sign O */ | ||
337 | R(0x01193b, 0x01193e, 0), /* Dives Akuru Sign Anusvar..Dives Akuru Virama */ | ||
338 | R(0x011940, 0x011940, 0), /* Dives Akuru Medial Ya */ | ||
339 | R(0x011942, 0x011943, 0), /* Dives Akuru Medial Ra ..Dives Akuru Sign Nukta */ | ||
340 | R(0x0119d1, 0x0119d7, 0), /* Nandinagari Vowel Sign A..Nandinagari Vowel Sign V */ | ||
341 | R(0x0119da, 0x0119e0, 0), /* Nandinagari Vowel Sign E..Nandinagari Sign Virama */ | ||
342 | R(0x0119e4, 0x0119e4, 0), /* Nandinagari Vowel Sign Prishthamatra E */ | ||
343 | R(0x011a01, 0x011a0a, 0), /* Zanabazar Square Vowel S..Zanabazar Square Vowel L */ | ||
344 | R(0x011a33, 0x011a39, 0), /* Zanabazar Square Final C..Zanabazar Square Sign Vi */ | ||
345 | R(0x011a3b, 0x011a3e, 0), /* Zanabazar Square Cluster..Zanabazar Square Cluster */ | ||
346 | R(0x011a47, 0x011a47, 0), /* Zanabazar Square Subjoiner */ | ||
347 | R(0x011a51, 0x011a5b, 0), /* Soyombo Vowel Sign I ..Soyombo Vowel Length Mar */ | ||
348 | R(0x011a8a, 0x011a99, 0), /* Soyombo Final Consonant ..Soyombo Subjoiner */ | ||
349 | R(0x011c2f, 0x011c36, 0), /* Bhaiksuki Vowel Sign Aa ..Bhaiksuki Vowel Sign Voc */ | ||
350 | R(0x011c38, 0x011c3f, 0), /* Bhaiksuki Vowel Sign E ..Bhaiksuki Sign Virama */ | ||
351 | R(0x011c92, 0x011ca7, 0), /* Marchen Subjoined Letter..Marchen Subjoined Letter */ | ||
352 | R(0x011ca9, 0x011cb6, 0), /* Marchen Subjoined Letter..Marchen Sign Candrabindu */ | ||
353 | R(0x011d31, 0x011d36, 0), /* Masaram Gondi Vowel Sign..Masaram Gondi Vowel Sign */ | ||
354 | R(0x011d3a, 0x011d3a, 0), /* Masaram Gondi Vowel Sign E */ | ||
355 | R(0x011d3c, 0x011d3d, 0), /* Masaram Gondi Vowel Sign..Masaram Gondi Vowel Sign */ | ||
356 | R(0x011d3f, 0x011d45, 0), /* Masaram Gondi Vowel Sign..Masaram Gondi Virama */ | ||
357 | R(0x011d47, 0x011d47, 0), /* Masaram Gondi Ra-kara */ | ||
358 | R(0x011d8a, 0x011d8e, 0), /* Gunjala Gondi Vowel Sign..Gunjala Gondi Vowel Sign */ | ||
359 | R(0x011d90, 0x011d91, 0), /* Gunjala Gondi Vowel Sign..Gunjala Gondi Vowel Sign */ | ||
360 | R(0x011d93, 0x011d97, 0), /* Gunjala Gondi Vowel Sign..Gunjala Gondi Virama */ | ||
361 | R(0x011ef3, 0x011ef6, 0), /* Makasar Vowel Sign I ..Makasar Vowel Sign O */ | ||
362 | R(0x011f00, 0x011f01, 0), /* Kawi Sign Candrabindu ..Kawi Sign Anusvara */ | ||
363 | R(0x011f03, 0x011f03, 0), /* Kawi Sign Visarga */ | ||
364 | R(0x011f34, 0x011f3a, 0), /* Kawi Vowel Sign Aa ..Kawi Vowel Sign Vocalic */ | ||
365 | R(0x011f3e, 0x011f42, 0), /* Kawi Vowel Sign E ..Kawi Conjoiner */ | ||
366 | R(0x013430, 0x013440, 0), /* Egyptian Hieroglyph Vert..Egyptian Hieroglyph Mirr */ | ||
367 | R(0x013447, 0x013455, 0), /* Egyptian Hieroglyph Modi..Egyptian Hieroglyph Modi */ | ||
368 | R(0x016af0, 0x016af4, 0), /* Bassa Vah Combining High..Bassa Vah Combining High */ | ||
369 | R(0x016b30, 0x016b36, 0), /* Pahawh Hmong Mark Cim Tu..Pahawh Hmong Mark Cim Ta */ | ||
370 | R(0x016f4f, 0x016f4f, 0), /* Miao Sign Consonant Modifier Bar */ | ||
371 | R(0x016f51, 0x016f87, 0), /* Miao Sign Aspiration ..Miao Vowel Sign Ui */ | ||
372 | R(0x016f8f, 0x016f92, 0), /* Miao Tone Right ..Miao Tone Below */ | ||
373 | R(0x016fe0, 0x016fe3, 2), /* Tangut Iteration Mark ..Old Chinese Iteration Ma */ | ||
374 | R(0x016fe4, 0x016fe4, 0), /* Khitan Small Script Filler */ | ||
375 | R(0x016ff0, 0x016ff1, 0), /* Vietnamese Alternate Rea..Vietnamese Alternate Rea */ | ||
376 | R(0x017000, 0x0187f7, 2), /* nil */ | ||
377 | R(0x018800, 0x018cd5, 2), /* Tangut Component-001 ..Khitan Small Script Char */ | ||
378 | R(0x018d00, 0x018d08, 2), /* nil */ | ||
379 | R(0x01aff0, 0x01aff3, 2), /* Katakana Letter Minnan T..Katakana Letter Minnan T */ | ||
380 | R(0x01aff5, 0x01affb, 2), /* Katakana Letter Minnan T..Katakana Letter Minnan N */ | ||
381 | R(0x01affd, 0x01affe, 2), /* Katakana Letter Minnan N..Katakana Letter Minnan N */ | ||
382 | R(0x01b000, 0x01b122, 2), /* Katakana Letter Archaic ..Katakana Letter Archaic */ | ||
383 | R(0x01b132, 0x01b132, 2), /* Hiragana Letter Small Ko */ | ||
384 | R(0x01b150, 0x01b152, 2), /* Hiragana Letter Small Wi..Hiragana Letter Small Wo */ | ||
385 | R(0x01b155, 0x01b155, 2), /* Katakana Letter Small Ko */ | ||
386 | R(0x01b164, 0x01b167, 2), /* Katakana Letter Small Wi..Katakana Letter Small N */ | ||
387 | R(0x01b170, 0x01b2fb, 2), /* Nushu Character-1b170 ..Nushu Character-1b2fb */ | ||
388 | R(0x01bc9d, 0x01bc9e, 0), /* Duployan Thick Letter Se..Duployan Double Mark */ | ||
389 | R(0x01bca0, 0x01bca3, 0), /* Shorthand Format Letter ..Shorthand Format Up Step */ | ||
390 | R(0x01cf00, 0x01cf2d, 0), /* Znamenny Combining Mark ..Znamenny Combining Mark */ | ||
391 | R(0x01cf30, 0x01cf46, 0), /* Znamenny Combining Tonal..Znamenny Priznak Modifie */ | ||
392 | R(0x01d165, 0x01d169, 0), /* Musical Symbol Combining..Musical Symbol Combining */ | ||
393 | R(0x01d16d, 0x01d182, 0), /* Musical Symbol Combining..Musical Symbol Combining */ | ||
394 | R(0x01d185, 0x01d18b, 0), /* Musical Symbol Combining..Musical Symbol Combining */ | ||
395 | R(0x01d1aa, 0x01d1ad, 0), /* Musical Symbol Combining..Musical Symbol Combining */ | ||
396 | R(0x01d242, 0x01d244, 0), /* Combining Greek Musical ..Combining Greek Musical */ | ||
397 | R(0x01da00, 0x01da36, 0), /* Signwriting Head Rim ..Signwriting Air Sucking */ | ||
398 | R(0x01da3b, 0x01da6c, 0), /* Signwriting Mouth Closed..Signwriting Excitement */ | ||
399 | R(0x01da75, 0x01da75, 0), /* Signwriting Upper Body Tilting From Hip Joints */ | ||
400 | R(0x01da84, 0x01da84, 0), /* Signwriting Location Head Neck */ | ||
401 | R(0x01da9b, 0x01da9f, 0), /* Signwriting Fill Modifie..Signwriting Fill Modifie */ | ||
402 | R(0x01daa1, 0x01daaf, 0), /* Signwriting Rotation Mod..Signwriting Rotation Mod */ | ||
403 | R(0x01e000, 0x01e006, 0), /* Combining Glagolitic Let..Combining Glagolitic Let */ | ||
404 | R(0x01e008, 0x01e018, 0), /* Combining Glagolitic Let..Combining Glagolitic Let */ | ||
405 | R(0x01e01b, 0x01e021, 0), /* Combining Glagolitic Let..Combining Glagolitic Let */ | ||
406 | R(0x01e023, 0x01e024, 0), /* Combining Glagolitic Let..Combining Glagolitic Let */ | ||
407 | R(0x01e026, 0x01e02a, 0), /* Combining Glagolitic Let..Combining Glagolitic Let */ | ||
408 | R(0x01e08f, 0x01e08f, 0), /* Combining Cyrillic Small Letter Byelorussian-ukr */ | ||
409 | R(0x01e130, 0x01e136, 0), /* Nyiakeng Puachue Hmong T..Nyiakeng Puachue Hmong T */ | ||
410 | R(0x01e2ae, 0x01e2ae, 0), /* Toto Sign Rising Tone */ | ||
411 | R(0x01e2ec, 0x01e2ef, 0), /* Wancho Tone Tup ..Wancho Tone Koini */ | ||
412 | R(0x01e4ec, 0x01e4ef, 0), /* Nag Mundari Sign Muhor ..Nag Mundari Sign Sutuh */ | ||
413 | R(0x01e8d0, 0x01e8d6, 0), /* Mende Kikakui Combining ..Mende Kikakui Combining */ | ||
414 | R(0x01e944, 0x01e94a, 0), /* Adlam Alif Lengthener ..Adlam Nukta */ | ||
415 | R(0x01f004, 0x01f004, 2), /* Mahjong Tile Red Dragon */ | ||
416 | R(0x01f0cf, 0x01f0cf, 2), /* Playing Card Black Joker */ | ||
417 | R(0x01f18e, 0x01f18e, 2), /* Negative Squared Ab */ | ||
418 | R(0x01f191, 0x01f19a, 2), /* Squared Cl ..Squared Vs */ | ||
419 | R(0x01f200, 0x01f202, 2), /* Square Hiragana Hoka ..Squared Katakana Sa */ | ||
420 | R(0x01f210, 0x01f23b, 2), /* Squared Cjk Unified Ideo..Squared Cjk Unified Ideo */ | ||
421 | R(0x01f240, 0x01f248, 2), /* Tortoise Shell Bracketed..Tortoise Shell Bracketed */ | ||
422 | R(0x01f250, 0x01f251, 2), /* Circled Ideograph Advant..Circled Ideograph Accept */ | ||
423 | R(0x01f260, 0x01f265, 2), /* Rounded Symbol For Fu ..Rounded Symbol For Cai */ | ||
424 | R(0x01f300, 0x01f320, 2), /* Cyclone ..Shooting Star */ | ||
425 | R(0x01f32d, 0x01f335, 2), /* Hot Dog ..Cactus */ | ||
426 | R(0x01f337, 0x01f37c, 2), /* Tulip ..Baby Bottle */ | ||
427 | R(0x01f37e, 0x01f393, 2), /* Bottle With Popping Cork..Graduation Cap */ | ||
428 | R(0x01f3a0, 0x01f3ca, 2), /* Carousel Horse ..Swimmer */ | ||
429 | R(0x01f3cf, 0x01f3d3, 2), /* Cricket Bat And Ball ..Table Tennis Paddle And */ | ||
430 | R(0x01f3e0, 0x01f3f0, 2), /* House Building ..European Castle */ | ||
431 | R(0x01f3f4, 0x01f3f4, 2), /* Waving Black Flag */ | ||
432 | R(0x01f3f8, 0x01f3fa, 2), /* Badminton Racquet And Sh..Amphora */ | ||
433 | R(0x01f3fb, 0x01f3ff, 0), /* Emoji Modifier Fitzpatri..Emoji Modifier Fitzpatri */ | ||
434 | R(0x01f400, 0x01f43e, 2), /* Rat ..Paw Prints */ | ||
435 | R(0x01f440, 0x01f440, 2), /* Eyes */ | ||
436 | R(0x01f442, 0x01f4fc, 2), /* Ear ..Videocassette */ | ||
437 | R(0x01f4ff, 0x01f53d, 2), /* Prayer Beads ..Down-pointing Small Red */ | ||
438 | R(0x01f54b, 0x01f54e, 2), /* Kaaba ..Menorah With Nine Branch */ | ||
439 | R(0x01f550, 0x01f567, 2), /* Clock Face One Oclock ..Clock Face Twelve-thirty */ | ||
440 | R(0x01f57a, 0x01f57a, 2), /* Man Dancing */ | ||
441 | R(0x01f595, 0x01f596, 2), /* Reversed Hand With Middl..Raised Hand With Part Be */ | ||
442 | R(0x01f5a4, 0x01f5a4, 2), /* Black Heart */ | ||
443 | R(0x01f5fb, 0x01f64f, 2), /* Mount Fuji ..Person With Folded Hands */ | ||
444 | R(0x01f680, 0x01f6c5, 2), /* Rocket ..Left Luggage */ | ||
445 | R(0x01f6cc, 0x01f6cc, 2), /* Sleeping Accommodation */ | ||
446 | R(0x01f6d0, 0x01f6d2, 2), /* Place Of Worship ..Shopping Trolley */ | ||
447 | R(0x01f6d5, 0x01f6d7, 2), /* Hindu Temple ..Elevator */ | ||
448 | R(0x01f6dc, 0x01f6df, 2), /* Wireless ..Ring Buoy */ | ||
449 | R(0x01f6eb, 0x01f6ec, 2), /* Airplane Departure ..Airplane Arriving */ | ||
450 | R(0x01f6f4, 0x01f6fc, 2), /* Scooter ..Roller Skate */ | ||
451 | R(0x01f7e0, 0x01f7eb, 2), /* Large Orange Circle ..Large Brown Square */ | ||
452 | R(0x01f7f0, 0x01f7f0, 2), /* Heavy Equals Sign */ | ||
453 | R(0x01f90c, 0x01f93a, 2), /* Pinched Fingers ..Fencer */ | ||
454 | R(0x01f93c, 0x01f945, 2), /* Wrestlers ..Goal Net */ | ||
455 | R(0x01f947, 0x01f9ff, 2), /* First Place Medal ..Nazar Amulet */ | ||
456 | R(0x01fa70, 0x01fa7c, 2), /* Ballet Shoes ..Crutch */ | ||
457 | R(0x01fa80, 0x01fa88, 2), /* Yo-yo ..Flute */ | ||
458 | R(0x01fa90, 0x01fabd, 2), /* Ringed Planet ..Wing */ | ||
459 | R(0x01fabf, 0x01fac5, 2), /* Goose ..Person With Crown */ | ||
460 | R(0x01face, 0x01fadb, 2), /* Moose ..Pea Pod */ | ||
461 | R(0x01fae0, 0x01fae8, 2), /* Melting Face ..Shaking Face */ | ||
462 | R(0x01faf0, 0x01faf8, 2), /* Hand With Index Finger A..Rightwards Pushing Hand */ | ||
463 | R(0x020000, 0x027fff, 2), /* Cjk Unified Ideograph-20.. nil */ | ||
464 | R(0x028000, 0x02fffd, 2), /* (continued...) */ | ||
465 | R(0x030000, 0x037fff, 2), /* Cjk Unified Ideograph-30.. nil */ | ||
466 | R(0x038000, 0x03fffd, 2), /* (continued...) */ | ||
467 | R(0x0e0001, 0x0e0001, 0), /* Language Tag */ | ||
468 | R(0x0e0020, 0x0e007f, 0), /* Tag Space ..Cancel Tag */ | ||
469 | R(0x0e0100, 0x0e01ef, 0), /* Variation Selector-17 ..Variation Selector-256 */ | ||
470 | #undef R | ||
471 | }; | ||
472 | |||
473 | /* planes[p], planes[p+1] are [from, to) at "ranges" for plane p */ | ||
474 | static const uint16_t planes[/* 18 */] = { | ||
475 | 0, 261, 443, 445, 447, 447, 447, 447, 447, 447, 447, 447, | ||
476 | 447, 447, 447, 450, 450, 450, | ||
477 | }; | ||
478 | |||
479 | /******* END OF STATIC DATA *******/ | ||
480 | |||
481 | uint32_t p, bot, top; | ||
482 | |||
483 | /* 0:0, 1..31:-1 (C0), 32..126:1 (isprint), 127..159:-1 (DEL, C1) */ | ||
484 | if (ucs < 160) | ||
485 | return ((ucs + 1) & 127) > 32 ? 1 : ucs ? -1 : 0; | ||
486 | |||
487 | /* out of range for "planes" (and non-unicode), non-characters. */ | ||
488 | /* (some also test surrogate halves, but not required by POSIX) */ | ||
489 | if (ucs > 0x10ffff || (ucs & 0xfffe) == 0xfffe) | ||
490 | return -1; | ||
491 | |||
492 | p = ucs >> 16; | ||
493 | ucs &= 0xffff; | ||
494 | |||
495 | for (bot = planes[p], top = planes[p+1]; bot < top; ) { | ||
496 | uint32_t mid = (bot + top) / 2; | ||
497 | if (ucs < ranges[mid].first) | ||
498 | top = mid; | ||
499 | else if (ucs > ranges[mid].first + ranges[mid].difflast) | ||
500 | bot = mid + 1; | ||
501 | else | ||
502 | return 2 * ranges[mid].iswide; | ||
503 | } | ||
504 | |||
505 | return 1; | ||
506 | } /* wcwidth - Unicode 15.1.0 */ | ||
diff --git a/scripts/mkwcwidth b/scripts/mkwcwidth new file mode 100755 index 000000000..792045a29 --- /dev/null +++ b/scripts/mkwcwidth | |||
@@ -0,0 +1,169 @@ | |||
1 | #!/bin/sh | ||
2 | # | ||
3 | # Generate a C implementation of wcwidth, with latest unicode data | ||
4 | # from a local clone of https://github.com/jquast/wcwidth | ||
5 | # | ||
6 | # The MIT License (MIT) | ||
7 | # | ||
8 | # Copyright (C) 2024 Avi Halachmi <avihpit at yahoo.com> | ||
9 | # | ||
10 | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||
11 | # of this software and associated documentation files (the "Software"), to deal | ||
12 | # in the Software without restriction, including without limitation the rights | ||
13 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||
14 | # copies of the Software, and to permit persons to whom the Software is | ||
15 | # furnished to do so, subject to the following conditions: | ||
16 | # | ||
17 | # The above copyright notice and this permission notice shall be included in all | ||
18 | # copies or substantial portions of the Software. | ||
19 | # | ||
20 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
21 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
22 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
23 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
24 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
25 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
26 | # SOFTWARE. | ||
27 | |||
28 | export LC_ALL=C | ||
29 | self=${0##*/} | ||
30 | |||
31 | # c-types (bigger types work but waste memory. uintN_t need <stdint.h>) | ||
32 | u32=uint32_t # "unsigned" is also typically 32 bit | ||
33 | u16=uint16_t # "unsigned short" is also typically 16 bits | ||
34 | FUNC_ATTR=FAST_FUNC # delete this line if not generating a busybox function | ||
35 | |||
36 | |||
37 | err() { >&2 printf %s\\n "$self: $*"; exit 1; } | ||
38 | |||
39 | case ${1-} in -h | --help) | ||
40 | echo "Usage: $self [path/to/python-wcwidth] (default path is '.')" | ||
41 | echo "Prints a wcwidth C implementation, with latest Unicode data" | ||
42 | echo "imported from a local https://github.com/jquast/wcwidth repo." | ||
43 | echo "Assumptions about table_zero.py and table_wide.py at the repo:" | ||
44 | echo "- Each range is in one Unicode plane (a>>16 == b>>16) (enforced)." | ||
45 | echo "- Commit 04d6d90c (2023-10-30) or later, where table_zero.py" | ||
46 | echo " includes zero-width Cf chars (else need to add manual tests)." | ||
47 | esac | ||
48 | |||
49 | [ "${1-}" != -- ] || shift | ||
50 | |||
51 | pwc_root=${1:-.} | ||
52 | pwc_git() { git -C "$pwc_root" "$@"; } | ||
53 | |||
54 | zerowidth_py=$pwc_root/wcwidth/table_zero.py | ||
55 | widewidth_py=$pwc_root/wcwidth/table_wide.py | ||
56 | |||
57 | [ -r "$zerowidth_py" ] && [ -r "$widewidth_py" ] \ | ||
58 | || err "missing $zerowidth_py or $widewidth_py. abort." | ||
59 | |||
60 | # latest unicode version from table_wide.py (e.g. from " '10.0.0': (") | ||
61 | ver=$(grep "^\s*'[0-9]" < "$widewidth_py" | tail -n1 | sed "s/.*'\(.*\)'.*/\1/") | ||
62 | |||
63 | # stdin -> stdout: extract the data of the last table (latest spec) from | ||
64 | # wcwidth/table_{wide,zero}.py (from https://github.com/jquast/wcwidth) | ||
65 | last_table() { | ||
66 | awk "/^\s*'[0-9]/ { i=0 } # new table -> reset | ||
67 | /^\s*\(0x/ { arr[++i] = \$0 } # range (first, last) | ||
68 | END { for (j=1; j <= i; ++j) print arr[j] }" | ||
69 | } | ||
70 | |||
71 | # stdin -> stdout, $1 is the range's (wc)width (0 or 2), e.g. | ||
72 | # from: (0x0123a, 0x0123c,), # comment | ||
73 | # to : R(0x00123a, 0x00123c, 2), /* comment */ | ||
74 | # ranges bigger than half-plane (32769+ codepoints) are split to two. | ||
75 | py_data_to_c() { | ||
76 | sed -e 's/[(),]/ /g' -e 's|#\(.*\)|/*\1 */|' | while read a b c; do | ||
77 | # to support cross-plane ranges, we'd need to split them here, | ||
78 | # but unlikely required, as all planes end in non-characters. | ||
79 | [ $(($a>>16)) = $(($b>>16)) ] || err "not same plane -- $a $b" | ||
80 | |||
81 | a=$(($a)) b=$(($b)) # some shells want decimal vars in $(()) | ||
82 | if [ "$((b-a))" -ge 32768 ]; then # split to 15 bit ranges | ||
83 | printf "R(0x%06x, 0x%06x, $1), %s\n" $a $((a+32767)) "$c" | ||
84 | a=$((a+32768)) c="/* (continued...) */" | ||
85 | fi | ||
86 | printf "R(0x%06x, 0x%06x, $1), %s\n" $a $b "$c" | ||
87 | done | ||
88 | } | ||
89 | |||
90 | data=$(last_table < "$zerowidth_py" | py_data_to_c 0 && | ||
91 | last_table < "$widewidth_py" | py_data_to_c 2) || err abort | ||
92 | data=$(printf %s\\n "$data" | sort) # lexicographic here is also numeric | ||
93 | |||
94 | # sorted hex ranges and their (wc)width: R(first, last, {0|2}),[ /* ... */] | ||
95 | data() { printf %s\\n "$data"; } | ||
96 | |||
97 | repeat() { R=$2; while [ "$R" -gt 0 ]; do printf %s "$1"; R=$((R-1)); done; } | ||
98 | |||
99 | # data -> stdout: array such that a[p], a[p+1] are [from, to) of plane p data | ||
100 | mkplanes() { | ||
101 | i=0 lastp=-1 | ||
102 | while read a b c; do | ||
103 | p=$((${b%?} >> 16)) # plane (last >> 16) | ||
104 | repeat "$i, " $((p-lastp)) | ||
105 | i=$((i+1)) lastp=$p | ||
106 | done | ||
107 | repeat "$i, " $((17-lastp)) | ||
108 | } | ||
109 | |||
110 | indent() { sed -e 's/^/\t\t/' -e 's/\s*$//'; } # also trim trailing spaces | ||
111 | |||
112 | cat << CFUNCTION | ||
113 | /* wcwidth - Unicode $ver, generated by $0. | ||
114 | * Copyright (C) 2024 Avi Halachmi <avihpit at yahoo.com> | ||
115 | * License: MIT | ||
116 | * | ||
117 | * Data imported on $(date -u -I) from https://github.com/jquast/wcwidth | ||
118 | * commit $(pwc_git describe --tags) ($(pwc_git show --no-patch --format=%ci)) | ||
119 | */ | ||
120 | int ${FUNC_ATTR-} wcwidth($u32 ucs) | ||
121 | { | ||
122 | /* sorted ranges, "first" is clipped to 16 bit, and its high bits | ||
123 | * (plane) are deduced from the "planes" array below. | ||
124 | * (imported from ${zerowidth_py##*/} and ${widewidth_py##*/}) | ||
125 | */ | ||
126 | static const struct range { | ||
127 | uint16_t first; | ||
128 | uint16_t iswide: 1; /* bitfield order empirically faster */ | ||
129 | uint16_t difflast: 15; | ||
130 | } ranges[] = { | ||
131 | #define R(first, last, width) {first & 0xffff, width/2, last-first} | ||
132 | $(data | indent) | ||
133 | #undef R | ||
134 | }; | ||
135 | |||
136 | /* planes[p], planes[p+1] are [from, to) at "ranges" for plane p */ | ||
137 | static const $u16 planes[/* 18 */] = { | ||
138 | $(data | mkplanes | fold -s -w 60 | indent) | ||
139 | }; | ||
140 | |||
141 | /******* END OF STATIC DATA *******/ | ||
142 | |||
143 | $u32 p, bot, top; | ||
144 | |||
145 | /* 0:0, 1..31:-1 (C0), 32..126:1 (isprint), 127..159:-1 (DEL, C1) */ | ||
146 | if (ucs < 160) | ||
147 | return ((ucs + 1) & 127) > 32 ? 1 : ucs ? -1 : 0; | ||
148 | |||
149 | /* out of range for "planes" (and non-unicode), non-characters. */ | ||
150 | /* (some also test surrogate halves, but not required by POSIX) */ | ||
151 | if (ucs > 0x10ffff || (ucs & 0xfffe) == 0xfffe) | ||
152 | return -1; | ||
153 | |||
154 | p = ucs >> 16; | ||
155 | ucs &= 0xffff; | ||
156 | |||
157 | for (bot = planes[p], top = planes[p+1]; bot < top; ) { | ||
158 | $u32 mid = (bot + top) / 2; | ||
159 | if (ucs < ranges[mid].first) | ||
160 | top = mid; | ||
161 | else if (ucs > ranges[mid].first + ranges[mid].difflast) | ||
162 | bot = mid + 1; | ||
163 | else | ||
164 | return 2 * ranges[mid].iswide; | ||
165 | } | ||
166 | |||
167 | return 1; | ||
168 | } /* wcwidth - Unicode $ver */ | ||
169 | CFUNCTION | ||