diff options
author | Thijs Schreijer <thijs@thijsschreijer.nl> | 2025-01-26 20:15:39 +0100 |
---|---|---|
committer | Thijs Schreijer <thijs@thijsschreijer.nl> | 2025-01-26 20:15:39 +0100 |
commit | f697ea7e7603e916d5ee13327fcdaf9c811a00aa (patch) | |
tree | e044dec90ee1cc497ac0cf1d9892aee11089a3e5 | |
parent | 85ad15fbd8c81807a1a662f5b6060641fa3a6357 (diff) | |
download | luasystem-unicode/ambiguous.tar.gz luasystem-unicode/ambiguous.tar.bz2 luasystem-unicode/ambiguous.zip |
feat(terminal): check ambiguous unicode widthunicode/ambiguous
-rw-r--r-- | luasystem-scm-0.rockspec | 1 | ||||
-rw-r--r-- | spec/04-term_spec.lua | 6 | ||||
-rw-r--r-- | src/term.c | 11 | ||||
-rw-r--r-- | src/wcwidtha.c | 259 | ||||
-rw-r--r-- | src/wcwidtha.h | 12 |
5 files changed, 288 insertions, 1 deletions
diff --git a/luasystem-scm-0.rockspec b/luasystem-scm-0.rockspec index 00a442c..ab83080 100644 --- a/luasystem-scm-0.rockspec +++ b/luasystem-scm-0.rockspec | |||
@@ -61,6 +61,7 @@ local function make_platform(plat) | |||
61 | 'src/term.c', | 61 | 'src/term.c', |
62 | 'src/bitflags.c', | 62 | 'src/bitflags.c', |
63 | 'src/wcwidth.c', | 63 | 'src/wcwidth.c', |
64 | 'src/wcwidtha.c', | ||
64 | }, | 65 | }, |
65 | defines = defines[plat], | 66 | defines = defines[plat], |
66 | libraries = libraries[plat], | 67 | libraries = libraries[plat], |
diff --git a/spec/04-term_spec.lua b/spec/04-term_spec.lua index 813947a..711059b 100644 --- a/spec/04-term_spec.lua +++ b/spec/04-term_spec.lua | |||
@@ -539,6 +539,12 @@ describe("Terminal:", function() | |||
539 | assert.same({2}, {system.utf8cwidth(ch2 .. ch3 .. ch4)}) | 539 | assert.same({2}, {system.utf8cwidth(ch2 .. ch3 .. ch4)}) |
540 | end) | 540 | end) |
541 | 541 | ||
542 | it("returns 2nd ambigious boolean value only if requested", function() | ||
543 | assert.same({1}, {system.utf8cwidth("¡", false)}) | ||
544 | assert.same({1, true}, {system.utf8cwidth("¡", true)}) | ||
545 | assert.same({1, false}, {system.utf8cwidth("a", true)}) | ||
546 | end) | ||
547 | |||
542 | end) | 548 | end) |
543 | 549 | ||
544 | 550 | ||
@@ -36,6 +36,7 @@ | |||
36 | // Windows does not have a wcwidth function, so we use compatibilty code from | 36 | // Windows does not have a wcwidth function, so we use compatibilty code from |
37 | // http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c by Markus Kuhn | 37 | // http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c by Markus Kuhn |
38 | #include "wcwidth.h" | 38 | #include "wcwidth.h" |
39 | #include "wcwidtha.h" // ambiguous width checks for East Asian characters | ||
39 | 40 | ||
40 | 41 | ||
41 | #ifdef _WIN32 | 42 | #ifdef _WIN32 |
@@ -950,14 +951,16 @@ int utf8_to_wchar(const char *utf8, size_t len, mk_wchar_t *codepoint) { | |||
950 | Get the width of a utf8 character for terminal display. | 951 | Get the width of a utf8 character for terminal display. |
951 | @function utf8cwidth | 952 | @function utf8cwidth |
952 | @tparam string utf8_char the utf8 character to check, only the width of the first character will be returned | 953 | @tparam string utf8_char the utf8 character to check, only the width of the first character will be returned |
954 | @tparam bool ambiguous if `true` a second return value will be returned; boolean indicating if the character is ambiguous | ||
953 | @treturn[1] int the display width in columns of the first character in the string (0 for an empty string) | 955 | @treturn[1] int the display width in columns of the first character in the string (0 for an empty string) |
954 | @treturn[2] nil | 956 | @treturn[2] nil|bool if `ambiguous` is `true`, a boolean indicating if the character is ambiguous |
955 | @treturn[2] string error message | 957 | @treturn[2] string error message |
956 | */ | 958 | */ |
957 | int lst_utf8cwidth(lua_State *L) { | 959 | int lst_utf8cwidth(lua_State *L) { |
958 | const char *utf8_char; | 960 | const char *utf8_char; |
959 | size_t utf8_len; | 961 | size_t utf8_len; |
960 | utf8_char = luaL_checklstring(L, 1, &utf8_len); | 962 | utf8_char = luaL_checklstring(L, 1, &utf8_len); |
963 | int ambiguous = lua_toboolean(L, 2); | ||
961 | int width = 0; | 964 | int width = 0; |
962 | 965 | ||
963 | mk_wchar_t wc; | 966 | mk_wchar_t wc; |
@@ -984,6 +987,12 @@ int lst_utf8cwidth(lua_State *L) { | |||
984 | } | 987 | } |
985 | 988 | ||
986 | lua_pushinteger(L, width); | 989 | lua_pushinteger(L, width); |
990 | |||
991 | if (ambiguous) { | ||
992 | // also check if the width is ambiguous | ||
993 | lua_pushboolean(L, mk_wcwidth_a(wc)); | ||
994 | return 2; | ||
995 | } | ||
987 | return 1; | 996 | return 1; |
988 | } | 997 | } |
989 | 998 | ||
diff --git a/src/wcwidtha.c b/src/wcwidtha.c new file mode 100644 index 0000000..2936ee0 --- /dev/null +++ b/src/wcwidtha.c | |||
@@ -0,0 +1,259 @@ | |||
1 | // To update this file to the lastest version of the Unicode standard | ||
2 | // save the Lua script below to a file named 'getranges.lua' | ||
3 | // execute as: | ||
4 | // curl -s https://www.unicode.org/Public/UCD/latest/ucd/EastAsianWidth.txt | lua getranges.lua | ||
5 | |||
6 | /* the script: | ||
7 | local function singleline(line) | ||
8 | if line:find("; A", 1, true) then -- handle ambiguous characters only | ||
9 | local s,e = line:match("^([0-9a-fA-F]+)%.?%.?([0-9a-fA-F]*)") | ||
10 | e = (e == "" and s) or e -- single char, so end-range == start-range | ||
11 | local cmmnt = "// "..line:match("(; A.*)$") | ||
12 | local range = " {0x"..s..", 0x"..e.."}," | ||
13 | print(range..(" "):rep(30-#range)..cmmnt) -- print formatted output line | ||
14 | end | ||
15 | end | ||
16 | |||
17 | -- read all lines from stdin and iterate over them | ||
18 | local t = {} | ||
19 | for line in io.lines() do | ||
20 | line = line:match("^%s*(.-)%s*$") -- strip whitespace | ||
21 | if line ~= "" and line:sub(1,1) ~= "#" then -- skip comments and empty lines | ||
22 | singleline(line) | ||
23 | end | ||
24 | end | ||
25 | */ | ||
26 | |||
27 | |||
28 | #include "wcwidtha.h" | ||
29 | |||
30 | struct interval { | ||
31 | mk_wchar_t start; | ||
32 | mk_wchar_t end; | ||
33 | }; | ||
34 | |||
35 | |||
36 | // Takes a unicode character, and return whether the character is in the list of | ||
37 | // ambiguous width characters. | ||
38 | int mk_wcwidth_a(mk_wchar_t ucs) | ||
39 | { | ||
40 | /* sorted list of ambiguous width characters in East Asian displays */ | ||
41 | /* generated by script in the comments above */ | ||
42 | static const struct interval ranges[] = { | ||
43 | {0x00A1, 0x00A1}, // ; A # Po INVERTED EXCLAMATION MARK | ||
44 | {0x00A4, 0x00A4}, // ; A # Sc CURRENCY SIGN | ||
45 | {0x00A7, 0x00A7}, // ; A # Po SECTION SIGN | ||
46 | {0x00A8, 0x00A8}, // ; A # Sk DIAERESIS | ||
47 | {0x00AA, 0x00AA}, // ; A # Lo FEMININE ORDINAL INDICATOR | ||
48 | {0x00AD, 0x00AD}, // ; A # Cf SOFT HYPHEN | ||
49 | {0x00AE, 0x00AE}, // ; A # So REGISTERED SIGN | ||
50 | {0x00B0, 0x00B0}, // ; A # So DEGREE SIGN | ||
51 | {0x00B1, 0x00B1}, // ; A # Sm PLUS-MINUS SIGN | ||
52 | {0x00B2, 0x00B3}, // ; A # No [2] SUPERSCRIPT TWO..SUPERSCRIPT THREE | ||
53 | {0x00B4, 0x00B4}, // ; A # Sk ACUTE ACCENT | ||
54 | {0x00B6, 0x00B7}, // ; A # Po [2] PILCROW SIGN..MIDDLE DOT | ||
55 | {0x00B8, 0x00B8}, // ; A # Sk CEDILLA | ||
56 | {0x00B9, 0x00B9}, // ; A # No SUPERSCRIPT ONE | ||
57 | {0x00BA, 0x00BA}, // ; A # Lo MASCULINE ORDINAL INDICATOR | ||
58 | {0x00BC, 0x00BE}, // ; A # No [3] VULGAR FRACTION ONE QUARTER..VULGAR FRACTION THREE QUARTERS | ||
59 | {0x00BF, 0x00BF}, // ; A # Po INVERTED QUESTION MARK | ||
60 | {0x00C6, 0x00C6}, // ; A # Lu LATIN CAPITAL LETTER AE | ||
61 | {0x00D0, 0x00D0}, // ; A # Lu LATIN CAPITAL LETTER ETH | ||
62 | {0x00D7, 0x00D7}, // ; A # Sm MULTIPLICATION SIGN | ||
63 | {0x00D8, 0x00D8}, // ; A # Lu LATIN CAPITAL LETTER O WITH STROKE | ||
64 | {0x00DE, 0x00E1}, // ; A # L& [4] LATIN CAPITAL LETTER THORN..LATIN SMALL LETTER A WITH ACUTE | ||
65 | {0x00E6, 0x00E6}, // ; A # Ll LATIN SMALL LETTER AE | ||
66 | {0x00E8, 0x00EA}, // ; A # Ll [3] LATIN SMALL LETTER E WITH GRAVE..LATIN SMALL LETTER E WITH CIRCUMFLEX | ||
67 | {0x00EC, 0x00ED}, // ; A # Ll [2] LATIN SMALL LETTER I WITH GRAVE..LATIN SMALL LETTER I WITH ACUTE | ||
68 | {0x00F0, 0x00F0}, // ; A # Ll LATIN SMALL LETTER ETH | ||
69 | {0x00F2, 0x00F3}, // ; A # Ll [2] LATIN SMALL LETTER O WITH GRAVE..LATIN SMALL LETTER O WITH ACUTE | ||
70 | {0x00F7, 0x00F7}, // ; A # Sm DIVISION SIGN | ||
71 | {0x00F8, 0x00FA}, // ; A # Ll [3] LATIN SMALL LETTER O WITH STROKE..LATIN SMALL LETTER U WITH ACUTE | ||
72 | {0x00FC, 0x00FC}, // ; A # Ll LATIN SMALL LETTER U WITH DIAERESIS | ||
73 | {0x00FE, 0x00FE}, // ; A # Ll LATIN SMALL LETTER THORN | ||
74 | {0x0101, 0x0101}, // ; A # Ll LATIN SMALL LETTER A WITH MACRON | ||
75 | {0x0111, 0x0111}, // ; A # Ll LATIN SMALL LETTER D WITH STROKE | ||
76 | {0x0113, 0x0113}, // ; A # Ll LATIN SMALL LETTER E WITH MACRON | ||
77 | {0x011B, 0x011B}, // ; A # Ll LATIN SMALL LETTER E WITH CARON | ||
78 | {0x0126, 0x0127}, // ; A # L& [2] LATIN CAPITAL LETTER H WITH STROKE..LATIN SMALL LETTER H WITH STROKE | ||
79 | {0x012B, 0x012B}, // ; A # Ll LATIN SMALL LETTER I WITH MACRON | ||
80 | {0x0131, 0x0133}, // ; A # L& [3] LATIN SMALL LETTER DOTLESS I..LATIN SMALL LIGATURE IJ | ||
81 | {0x0138, 0x0138}, // ; A # Ll LATIN SMALL LETTER KRA | ||
82 | {0x013F, 0x0142}, // ; A # L& [4] LATIN CAPITAL LETTER L WITH MIDDLE DOT..LATIN SMALL LETTER L WITH STROKE | ||
83 | {0x0144, 0x0144}, // ; A # Ll LATIN SMALL LETTER N WITH ACUTE | ||
84 | {0x0148, 0x014B}, // ; A # L& [4] LATIN SMALL LETTER N WITH CARON..LATIN SMALL LETTER ENG | ||
85 | {0x014D, 0x014D}, // ; A # Ll LATIN SMALL LETTER O WITH MACRON | ||
86 | {0x0152, 0x0153}, // ; A # L& [2] LATIN CAPITAL LIGATURE OE..LATIN SMALL LIGATURE OE | ||
87 | {0x0166, 0x0167}, // ; A # L& [2] LATIN CAPITAL LETTER T WITH STROKE..LATIN SMALL LETTER T WITH STROKE | ||
88 | {0x016B, 0x016B}, // ; A # Ll LATIN SMALL LETTER U WITH MACRON | ||
89 | {0x01CE, 0x01CE}, // ; A # Ll LATIN SMALL LETTER A WITH CARON | ||
90 | {0x01D0, 0x01D0}, // ; A # Ll LATIN SMALL LETTER I WITH CARON | ||
91 | {0x01D2, 0x01D2}, // ; A # Ll LATIN SMALL LETTER O WITH CARON | ||
92 | {0x01D4, 0x01D4}, // ; A # Ll LATIN SMALL LETTER U WITH CARON | ||
93 | {0x01D6, 0x01D6}, // ; A # Ll LATIN SMALL LETTER U WITH DIAERESIS AND MACRON | ||
94 | {0x01D8, 0x01D8}, // ; A # Ll LATIN SMALL LETTER U WITH DIAERESIS AND ACUTE | ||
95 | {0x01DA, 0x01DA}, // ; A # Ll LATIN SMALL LETTER U WITH DIAERESIS AND CARON | ||
96 | {0x01DC, 0x01DC}, // ; A # Ll LATIN SMALL LETTER U WITH DIAERESIS AND GRAVE | ||
97 | {0x0251, 0x0251}, // ; A # Ll LATIN SMALL LETTER ALPHA | ||
98 | {0x0261, 0x0261}, // ; A # Ll LATIN SMALL LETTER SCRIPT G | ||
99 | {0x02C4, 0x02C4}, // ; A # Sk MODIFIER LETTER UP ARROWHEAD | ||
100 | {0x02C7, 0x02C7}, // ; A # Lm CARON | ||
101 | {0x02C9, 0x02CB}, // ; A # Lm [3] MODIFIER LETTER MACRON..MODIFIER LETTER GRAVE ACCENT | ||
102 | {0x02CD, 0x02CD}, // ; A # Lm MODIFIER LETTER LOW MACRON | ||
103 | {0x02D0, 0x02D0}, // ; A # Lm MODIFIER LETTER TRIANGULAR COLON | ||
104 | {0x02D8, 0x02DB}, // ; A # Sk [4] BREVE..OGONEK | ||
105 | {0x02DD, 0x02DD}, // ; A # Sk DOUBLE ACUTE ACCENT | ||
106 | {0x02DF, 0x02DF}, // ; A # Sk MODIFIER LETTER CROSS ACCENT | ||
107 | {0x0300, 0x036F}, // ; A # Mn [112] COMBINING GRAVE ACCENT..COMBINING LATIN SMALL LETTER X | ||
108 | {0x0391, 0x03A1}, // ; A # Lu [17] GREEK CAPITAL LETTER ALPHA..GREEK CAPITAL LETTER RHO | ||
109 | {0x03A3, 0x03A9}, // ; A # Lu [7] GREEK CAPITAL LETTER SIGMA..GREEK CAPITAL LETTER OMEGA | ||
110 | {0x03B1, 0x03C1}, // ; A # Ll [17] GREEK SMALL LETTER ALPHA..GREEK SMALL LETTER RHO | ||
111 | {0x03C3, 0x03C9}, // ; A # Ll [7] GREEK SMALL LETTER SIGMA..GREEK SMALL LETTER OMEGA | ||
112 | {0x0401, 0x0401}, // ; A # Lu CYRILLIC CAPITAL LETTER IO | ||
113 | {0x0410, 0x044F}, // ; A # L& [64] CYRILLIC CAPITAL LETTER A..CYRILLIC SMALL LETTER YA | ||
114 | {0x0451, 0x0451}, // ; A # Ll CYRILLIC SMALL LETTER IO | ||
115 | {0x2010, 0x2010}, // ; A # Pd HYPHEN | ||
116 | {0x2013, 0x2015}, // ; A # Pd [3] EN DASH..HORIZONTAL BAR | ||
117 | {0x2016, 0x2016}, // ; A # Po DOUBLE VERTICAL LINE | ||
118 | {0x2018, 0x2018}, // ; A # Pi LEFT SINGLE QUOTATION MARK | ||
119 | {0x2019, 0x2019}, // ; A # Pf RIGHT SINGLE QUOTATION MARK | ||
120 | {0x201C, 0x201C}, // ; A # Pi LEFT DOUBLE QUOTATION MARK | ||
121 | {0x201D, 0x201D}, // ; A # Pf RIGHT DOUBLE QUOTATION MARK | ||
122 | {0x2020, 0x2022}, // ; A # Po [3] DAGGER..BULLET | ||
123 | {0x2024, 0x2027}, // ; A # Po [4] ONE DOT LEADER..HYPHENATION POINT | ||
124 | {0x2030, 0x2030}, // ; A # Po PER MILLE SIGN | ||
125 | {0x2032, 0x2033}, // ; A # Po [2] PRIME..DOUBLE PRIME | ||
126 | {0x2035, 0x2035}, // ; A # Po REVERSED PRIME | ||
127 | {0x203B, 0x203B}, // ; A # Po REFERENCE MARK | ||
128 | {0x203E, 0x203E}, // ; A # Po OVERLINE | ||
129 | {0x2074, 0x2074}, // ; A # No SUPERSCRIPT FOUR | ||
130 | {0x207F, 0x207F}, // ; A # Lm SUPERSCRIPT LATIN SMALL LETTER N | ||
131 | {0x2081, 0x2084}, // ; A # No [4] SUBSCRIPT ONE..SUBSCRIPT FOUR | ||
132 | {0x20AC, 0x20AC}, // ; A # Sc EURO SIGN | ||
133 | {0x2103, 0x2103}, // ; A # So DEGREE CELSIUS | ||
134 | {0x2105, 0x2105}, // ; A # So CARE OF | ||
135 | {0x2109, 0x2109}, // ; A # So DEGREE FAHRENHEIT | ||
136 | {0x2113, 0x2113}, // ; A # Ll SCRIPT SMALL L | ||
137 | {0x2116, 0x2116}, // ; A # So NUMERO SIGN | ||
138 | {0x2121, 0x2122}, // ; A # So [2] TELEPHONE SIGN..TRADE MARK SIGN | ||
139 | {0x2126, 0x2126}, // ; A # Lu OHM SIGN | ||
140 | {0x212B, 0x212B}, // ; A # Lu ANGSTROM SIGN | ||
141 | {0x2153, 0x2154}, // ; A # No [2] VULGAR FRACTION ONE THIRD..VULGAR FRACTION TWO THIRDS | ||
142 | {0x215B, 0x215E}, // ; A # No [4] VULGAR FRACTION ONE EIGHTH..VULGAR FRACTION SEVEN EIGHTHS | ||
143 | {0x2160, 0x216B}, // ; A # Nl [12] ROMAN NUMERAL ONE..ROMAN NUMERAL TWELVE | ||
144 | {0x2170, 0x2179}, // ; A # Nl [10] SMALL ROMAN NUMERAL ONE..SMALL ROMAN NUMERAL TEN | ||
145 | {0x2189, 0x2189}, // ; A # No VULGAR FRACTION ZERO THIRDS | ||
146 | {0x2190, 0x2194}, // ; A # Sm [5] LEFTWARDS ARROW..LEFT RIGHT ARROW | ||
147 | {0x2195, 0x2199}, // ; A # So [5] UP DOWN ARROW..SOUTH WEST ARROW | ||
148 | {0x21B8, 0x21B9}, // ; A # So [2] NORTH WEST ARROW TO LONG BAR..LEFTWARDS ARROW TO BAR OVER RIGHTWARDS ARROW TO BAR | ||
149 | {0x21D2, 0x21D2}, // ; A # Sm RIGHTWARDS DOUBLE ARROW | ||
150 | {0x21D4, 0x21D4}, // ; A # Sm LEFT RIGHT DOUBLE ARROW | ||
151 | {0x21E7, 0x21E7}, // ; A # So UPWARDS WHITE ARROW | ||
152 | {0x2200, 0x2200}, // ; A # Sm FOR ALL | ||
153 | {0x2202, 0x2203}, // ; A # Sm [2] PARTIAL DIFFERENTIAL..THERE EXISTS | ||
154 | {0x2207, 0x2208}, // ; A # Sm [2] NABLA..ELEMENT OF | ||
155 | {0x220B, 0x220B}, // ; A # Sm CONTAINS AS MEMBER | ||
156 | {0x220F, 0x220F}, // ; A # Sm N-ARY PRODUCT | ||
157 | {0x2211, 0x2211}, // ; A # Sm N-ARY SUMMATION | ||
158 | {0x2215, 0x2215}, // ; A # Sm DIVISION SLASH | ||
159 | {0x221A, 0x221A}, // ; A # Sm SQUARE ROOT | ||
160 | {0x221D, 0x2220}, // ; A # Sm [4] PROPORTIONAL TO..ANGLE | ||
161 | {0x2223, 0x2223}, // ; A # Sm DIVIDES | ||
162 | {0x2225, 0x2225}, // ; A # Sm PARALLEL TO | ||
163 | {0x2227, 0x222C}, // ; A # Sm [6] LOGICAL AND..DOUBLE INTEGRAL | ||
164 | {0x222E, 0x222E}, // ; A # Sm CONTOUR INTEGRAL | ||
165 | {0x2234, 0x2237}, // ; A # Sm [4] THEREFORE..PROPORTION | ||
166 | {0x223C, 0x223D}, // ; A # Sm [2] TILDE OPERATOR..REVERSED TILDE | ||
167 | {0x2248, 0x2248}, // ; A # Sm ALMOST EQUAL TO | ||
168 | {0x224C, 0x224C}, // ; A # Sm ALL EQUAL TO | ||
169 | {0x2252, 0x2252}, // ; A # Sm APPROXIMATELY EQUAL TO OR THE IMAGE OF | ||
170 | {0x2260, 0x2261}, // ; A # Sm [2] NOT EQUAL TO..IDENTICAL TO | ||
171 | {0x2264, 0x2267}, // ; A # Sm [4] LESS-THAN OR EQUAL TO..GREATER-THAN OVER EQUAL TO | ||
172 | {0x226A, 0x226B}, // ; A # Sm [2] MUCH LESS-THAN..MUCH GREATER-THAN | ||
173 | {0x226E, 0x226F}, // ; A # Sm [2] NOT LESS-THAN..NOT GREATER-THAN | ||
174 | {0x2282, 0x2283}, // ; A # Sm [2] SUBSET OF..SUPERSET OF | ||
175 | {0x2286, 0x2287}, // ; A # Sm [2] SUBSET OF OR EQUAL TO..SUPERSET OF OR EQUAL TO | ||
176 | {0x2295, 0x2295}, // ; A # Sm CIRCLED PLUS | ||
177 | {0x2299, 0x2299}, // ; A # Sm CIRCLED DOT OPERATOR | ||
178 | {0x22A5, 0x22A5}, // ; A # Sm UP TACK | ||
179 | {0x22BF, 0x22BF}, // ; A # Sm RIGHT TRIANGLE | ||
180 | {0x2312, 0x2312}, // ; A # So ARC | ||
181 | {0x2460, 0x249B}, // ; A # No [60] CIRCLED DIGIT ONE..NUMBER TWENTY FULL STOP | ||
182 | {0x249C, 0x24E9}, // ; A # So [78] PARENTHESIZED LATIN SMALL LETTER A..CIRCLED LATIN SMALL LETTER Z | ||
183 | {0x24EB, 0x24FF}, // ; A # No [21] NEGATIVE CIRCLED NUMBER ELEVEN..NEGATIVE CIRCLED DIGIT ZERO | ||
184 | {0x2500, 0x254B}, // ; A # So [76] BOX DRAWINGS LIGHT HORIZONTAL..BOX DRAWINGS HEAVY VERTICAL AND HORIZONTAL | ||
185 | {0x2550, 0x2573}, // ; A # So [36] BOX DRAWINGS DOUBLE HORIZONTAL..BOX DRAWINGS LIGHT DIAGONAL CROSS | ||
186 | {0x2580, 0x258F}, // ; A # So [16] UPPER HALF BLOCK..LEFT ONE EIGHTH BLOCK | ||
187 | {0x2592, 0x2595}, // ; A # So [4] MEDIUM SHADE..RIGHT ONE EIGHTH BLOCK | ||
188 | {0x25A0, 0x25A1}, // ; A # So [2] BLACK SQUARE..WHITE SQUARE | ||
189 | {0x25A3, 0x25A9}, // ; A # So [7] WHITE SQUARE CONTAINING BLACK SMALL SQUARE..SQUARE WITH DIAGONAL CROSSHATCH FILL | ||
190 | {0x25B2, 0x25B3}, // ; A # So [2] BLACK UP-POINTING TRIANGLE..WHITE UP-POINTING TRIANGLE | ||
191 | {0x25B6, 0x25B6}, // ; A # So BLACK RIGHT-POINTING TRIANGLE | ||
192 | {0x25B7, 0x25B7}, // ; A # Sm WHITE RIGHT-POINTING TRIANGLE | ||
193 | {0x25BC, 0x25BD}, // ; A # So [2] BLACK DOWN-POINTING TRIANGLE..WHITE DOWN-POINTING TRIANGLE | ||
194 | {0x25C0, 0x25C0}, // ; A # So BLACK LEFT-POINTING TRIANGLE | ||
195 | {0x25C1, 0x25C1}, // ; A # Sm WHITE LEFT-POINTING TRIANGLE | ||
196 | {0x25C6, 0x25C8}, // ; A # So [3] BLACK DIAMOND..WHITE DIAMOND CONTAINING BLACK SMALL DIAMOND | ||
197 | {0x25CB, 0x25CB}, // ; A # So WHITE CIRCLE | ||
198 | {0x25CE, 0x25D1}, // ; A # So [4] BULLSEYE..CIRCLE WITH RIGHT HALF BLACK | ||
199 | {0x25E2, 0x25E5}, // ; A # So [4] BLACK LOWER RIGHT TRIANGLE..BLACK UPPER RIGHT TRIANGLE | ||
200 | {0x25EF, 0x25EF}, // ; A # So LARGE CIRCLE | ||
201 | {0x2605, 0x2606}, // ; A # So [2] BLACK STAR..WHITE STAR | ||
202 | {0x2609, 0x2609}, // ; A # So SUN | ||
203 | {0x260E, 0x260F}, // ; A # So [2] BLACK TELEPHONE..WHITE TELEPHONE | ||
204 | {0x261C, 0x261C}, // ; A # So WHITE LEFT POINTING INDEX | ||
205 | {0x261E, 0x261E}, // ; A # So WHITE RIGHT POINTING INDEX | ||
206 | {0x2640, 0x2640}, // ; A # So FEMALE SIGN | ||
207 | {0x2642, 0x2642}, // ; A # So MALE SIGN | ||
208 | {0x2660, 0x2661}, // ; A # So [2] BLACK SPADE SUIT..WHITE HEART SUIT | ||
209 | {0x2663, 0x2665}, // ; A # So [3] BLACK CLUB SUIT..BLACK HEART SUIT | ||
210 | {0x2667, 0x266A}, // ; A # So [4] WHITE CLUB SUIT..EIGHTH NOTE | ||
211 | {0x266C, 0x266D}, // ; A # So [2] BEAMED SIXTEENTH NOTES..MUSIC FLAT SIGN | ||
212 | {0x266F, 0x266F}, // ; A # Sm MUSIC SHARP SIGN | ||
213 | {0x269E, 0x269F}, // ; A # So [2] THREE LINES CONVERGING RIGHT..THREE LINES CONVERGING LEFT | ||
214 | {0x26BF, 0x26BF}, // ; A # So SQUARED KEY | ||
215 | {0x26C6, 0x26CD}, // ; A # So [8] RAIN..DISABLED CAR | ||
216 | {0x26CF, 0x26D3}, // ; A # So [5] PICK..CHAINS | ||
217 | {0x26D5, 0x26E1}, // ; A # So [13] ALTERNATE ONE-WAY LEFT WAY TRAFFIC..RESTRICTED LEFT ENTRY-2 | ||
218 | {0x26E3, 0x26E3}, // ; A # So HEAVY CIRCLE WITH STROKE AND TWO DOTS ABOVE | ||
219 | {0x26E8, 0x26E9}, // ; A # So [2] BLACK CROSS ON SHIELD..SHINTO SHRINE | ||
220 | {0x26EB, 0x26F1}, // ; A # So [7] CASTLE..UMBRELLA ON GROUND | ||
221 | {0x26F4, 0x26F4}, // ; A # So FERRY | ||
222 | {0x26F6, 0x26F9}, // ; A # So [4] SQUARE FOUR CORNERS..PERSON WITH BALL | ||
223 | {0x26FB, 0x26FC}, // ; A # So [2] JAPANESE BANK SYMBOL..HEADSTONE GRAVEYARD SYMBOL | ||
224 | {0x26FE, 0x26FF}, // ; A # So [2] CUP ON BLACK SQUARE..WHITE FLAG WITH HORIZONTAL MIDDLE BLACK STRIPE | ||
225 | {0x273D, 0x273D}, // ; A # So HEAVY TEARDROP-SPOKED ASTERISK | ||
226 | {0x2776, 0x277F}, // ; A # No [10] DINGBAT NEGATIVE CIRCLED DIGIT ONE..DINGBAT NEGATIVE CIRCLED NUMBER TEN | ||
227 | {0x2B56, 0x2B59}, // ; A # So [4] HEAVY OVAL WITH OVAL INSIDE..HEAVY CIRCLED SALTIRE | ||
228 | {0x3248, 0x324F}, // ; A # No [8] CIRCLED NUMBER TEN ON BLACK SQUARE..CIRCLED NUMBER EIGHTY ON BLACK SQUARE | ||
229 | {0xE000, 0xF8FF}, // ; A # Co [6400] <private-use-E000>..<private-use-F8FF> | ||
230 | {0xFE00, 0xFE0F}, // ; A # Mn [16] VARIATION SELECTOR-1..VARIATION SELECTOR-16 | ||
231 | {0xFFFD, 0xFFFD}, // ; A # So REPLACEMENT CHARACTER | ||
232 | {0x1F100, 0x1F10A}, // ; A # No [11] DIGIT ZERO FULL STOP..DIGIT NINE COMMA | ||
233 | {0x1F110, 0x1F12D}, // ; A # So [30] PARENTHESIZED LATIN CAPITAL LETTER A..CIRCLED CD | ||
234 | {0x1F130, 0x1F169}, // ; A # So [58] SQUARED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z | ||
235 | {0x1F170, 0x1F18D}, // ; A # So [30] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED SA | ||
236 | {0x1F18F, 0x1F190}, // ; A # So [2] NEGATIVE SQUARED WC..SQUARE DJ | ||
237 | {0x1F19B, 0x1F1AC}, // ; A # So [18] SQUARED THREE D..SQUARED VOD | ||
238 | {0xE0100, 0xE01EF}, // ; A # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 | ||
239 | {0xF0000, 0xFFFFD}, // ; A # Co [65534] <private-use-F0000>..<private-use-FFFFD> | ||
240 | {0x100000, 0x10FFFD} // ; A # Co [65534] <private-use-100000>..<private-use-10FFFD> | ||
241 | }; | ||
242 | const size_t num_ranges = sizeof(ranges) / sizeof(ranges[0]); | ||
243 | |||
244 | int left = 0, right = num_ranges - 1; | ||
245 | |||
246 | while (left <= right) { | ||
247 | int mid = left + (right - left) / 2; | ||
248 | |||
249 | if (ucs >= ranges[mid].start && ucs <= ranges[mid].end) { | ||
250 | return 1; // Character is in the range | ||
251 | } else if (ucs < ranges[mid].start) { | ||
252 | right = mid - 1; | ||
253 | } else { | ||
254 | left = mid + 1; | ||
255 | } | ||
256 | } | ||
257 | return 0; // Character is not in any of the ranges | ||
258 | } | ||
259 | |||
diff --git a/src/wcwidtha.h b/src/wcwidtha.h new file mode 100644 index 0000000..9931b01 --- /dev/null +++ b/src/wcwidtha.h | |||
@@ -0,0 +1,12 @@ | |||
1 | // wcwidtha.h | ||
2 | |||
3 | #ifndef MK_WCWIDTHA_H | ||
4 | #define MK_WCWIDTHA_H | ||
5 | |||
6 | |||
7 | #include "wcwidth.h" | ||
8 | |||
9 | // Is a character in the list of ambiguous width characters (for east asian display) | ||
10 | int mk_wcwidth_a(mk_wchar_t ucs); | ||
11 | |||
12 | #endif // MK_WCWIDTHA_H | ||