aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThijs Schreijer <thijs@thijsschreijer.nl>2025-01-26 20:15:39 +0100
committerThijs Schreijer <thijs@thijsschreijer.nl>2025-01-26 20:15:39 +0100
commitf697ea7e7603e916d5ee13327fcdaf9c811a00aa (patch)
treee044dec90ee1cc497ac0cf1d9892aee11089a3e5
parent85ad15fbd8c81807a1a662f5b6060641fa3a6357 (diff)
downloadluasystem-unicode/ambiguous.tar.gz
luasystem-unicode/ambiguous.tar.bz2
luasystem-unicode/ambiguous.zip
feat(terminal): check ambiguous unicode widthunicode/ambiguous
-rw-r--r--luasystem-scm-0.rockspec1
-rw-r--r--spec/04-term_spec.lua6
-rw-r--r--src/term.c11
-rw-r--r--src/wcwidtha.c259
-rw-r--r--src/wcwidtha.h12
5 files changed, 288 insertions, 1 deletions
diff --git a/luasystem-scm-0.rockspec b/luasystem-scm-0.rockspec
index 00a442c..ab83080 100644
--- a/luasystem-scm-0.rockspec
+++ b/luasystem-scm-0.rockspec
@@ -61,6 +61,7 @@ local function make_platform(plat)
61 'src/term.c', 61 'src/term.c',
62 'src/bitflags.c', 62 'src/bitflags.c',
63 'src/wcwidth.c', 63 'src/wcwidth.c',
64 'src/wcwidtha.c',
64 }, 65 },
65 defines = defines[plat], 66 defines = defines[plat],
66 libraries = libraries[plat], 67 libraries = libraries[plat],
diff --git a/spec/04-term_spec.lua b/spec/04-term_spec.lua
index 813947a..711059b 100644
--- a/spec/04-term_spec.lua
+++ b/spec/04-term_spec.lua
@@ -539,6 +539,12 @@ describe("Terminal:", function()
539 assert.same({2}, {system.utf8cwidth(ch2 .. ch3 .. ch4)}) 539 assert.same({2}, {system.utf8cwidth(ch2 .. ch3 .. ch4)})
540 end) 540 end)
541 541
542 it("returns 2nd ambigious boolean value only if requested", function()
543 assert.same({1}, {system.utf8cwidth("¡", false)})
544 assert.same({1, true}, {system.utf8cwidth("¡", true)})
545 assert.same({1, false}, {system.utf8cwidth("a", true)})
546 end)
547
542 end) 548 end)
543 549
544 550
diff --git a/src/term.c b/src/term.c
index 4deb5f1..650b838 100644
--- a/src/term.c
+++ b/src/term.c
@@ -36,6 +36,7 @@
36// Windows does not have a wcwidth function, so we use compatibilty code from 36// Windows does not have a wcwidth function, so we use compatibilty code from
37// http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c by Markus Kuhn 37// http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c by Markus Kuhn
38#include "wcwidth.h" 38#include "wcwidth.h"
39#include "wcwidtha.h" // ambiguous width checks for East Asian characters
39 40
40 41
41#ifdef _WIN32 42#ifdef _WIN32
@@ -950,14 +951,16 @@ int utf8_to_wchar(const char *utf8, size_t len, mk_wchar_t *codepoint) {
950Get the width of a utf8 character for terminal display. 951Get the width of a utf8 character for terminal display.
951@function utf8cwidth 952@function utf8cwidth
952@tparam string utf8_char the utf8 character to check, only the width of the first character will be returned 953@tparam string utf8_char the utf8 character to check, only the width of the first character will be returned
954@tparam bool ambiguous if `true` a second return value will be returned; boolean indicating if the character is ambiguous
953@treturn[1] int the display width in columns of the first character in the string (0 for an empty string) 955@treturn[1] int the display width in columns of the first character in the string (0 for an empty string)
954@treturn[2] nil 956@treturn[2] nil|bool if `ambiguous` is `true`, a boolean indicating if the character is ambiguous
955@treturn[2] string error message 957@treturn[2] string error message
956*/ 958*/
957int lst_utf8cwidth(lua_State *L) { 959int lst_utf8cwidth(lua_State *L) {
958 const char *utf8_char; 960 const char *utf8_char;
959 size_t utf8_len; 961 size_t utf8_len;
960 utf8_char = luaL_checklstring(L, 1, &utf8_len); 962 utf8_char = luaL_checklstring(L, 1, &utf8_len);
963 int ambiguous = lua_toboolean(L, 2);
961 int width = 0; 964 int width = 0;
962 965
963 mk_wchar_t wc; 966 mk_wchar_t wc;
@@ -984,6 +987,12 @@ int lst_utf8cwidth(lua_State *L) {
984 } 987 }
985 988
986 lua_pushinteger(L, width); 989 lua_pushinteger(L, width);
990
991 if (ambiguous) {
992 // also check if the width is ambiguous
993 lua_pushboolean(L, mk_wcwidth_a(wc));
994 return 2;
995 }
987 return 1; 996 return 1;
988} 997}
989 998
diff --git a/src/wcwidtha.c b/src/wcwidtha.c
new file mode 100644
index 0000000..2936ee0
--- /dev/null
+++ b/src/wcwidtha.c
@@ -0,0 +1,259 @@
1// To update this file to the lastest version of the Unicode standard
2// save the Lua script below to a file named 'getranges.lua'
3// execute as:
4// curl -s https://www.unicode.org/Public/UCD/latest/ucd/EastAsianWidth.txt | lua getranges.lua
5
6/* the script:
7local function singleline(line)
8 if line:find("; A", 1, true) then -- handle ambiguous characters only
9 local s,e = line:match("^([0-9a-fA-F]+)%.?%.?([0-9a-fA-F]*)")
10 e = (e == "" and s) or e -- single char, so end-range == start-range
11 local cmmnt = "// "..line:match("(; A.*)$")
12 local range = " {0x"..s..", 0x"..e.."},"
13 print(range..(" "):rep(30-#range)..cmmnt) -- print formatted output line
14 end
15end
16
17-- read all lines from stdin and iterate over them
18local t = {}
19for line in io.lines() do
20 line = line:match("^%s*(.-)%s*$") -- strip whitespace
21 if line ~= "" and line:sub(1,1) ~= "#" then -- skip comments and empty lines
22 singleline(line)
23 end
24end
25*/
26
27
28#include "wcwidtha.h"
29
30struct interval {
31 mk_wchar_t start;
32 mk_wchar_t end;
33};
34
35
36// Takes a unicode character, and return whether the character is in the list of
37// ambiguous width characters.
38int mk_wcwidth_a(mk_wchar_t ucs)
39{
40 /* sorted list of ambiguous width characters in East Asian displays */
41 /* generated by script in the comments above */
42 static const struct interval ranges[] = {
43 {0x00A1, 0x00A1}, // ; A # Po INVERTED EXCLAMATION MARK
44 {0x00A4, 0x00A4}, // ; A # Sc CURRENCY SIGN
45 {0x00A7, 0x00A7}, // ; A # Po SECTION SIGN
46 {0x00A8, 0x00A8}, // ; A # Sk DIAERESIS
47 {0x00AA, 0x00AA}, // ; A # Lo FEMININE ORDINAL INDICATOR
48 {0x00AD, 0x00AD}, // ; A # Cf SOFT HYPHEN
49 {0x00AE, 0x00AE}, // ; A # So REGISTERED SIGN
50 {0x00B0, 0x00B0}, // ; A # So DEGREE SIGN
51 {0x00B1, 0x00B1}, // ; A # Sm PLUS-MINUS SIGN
52 {0x00B2, 0x00B3}, // ; A # No [2] SUPERSCRIPT TWO..SUPERSCRIPT THREE
53 {0x00B4, 0x00B4}, // ; A # Sk ACUTE ACCENT
54 {0x00B6, 0x00B7}, // ; A # Po [2] PILCROW SIGN..MIDDLE DOT
55 {0x00B8, 0x00B8}, // ; A # Sk CEDILLA
56 {0x00B9, 0x00B9}, // ; A # No SUPERSCRIPT ONE
57 {0x00BA, 0x00BA}, // ; A # Lo MASCULINE ORDINAL INDICATOR
58 {0x00BC, 0x00BE}, // ; A # No [3] VULGAR FRACTION ONE QUARTER..VULGAR FRACTION THREE QUARTERS
59 {0x00BF, 0x00BF}, // ; A # Po INVERTED QUESTION MARK
60 {0x00C6, 0x00C6}, // ; A # Lu LATIN CAPITAL LETTER AE
61 {0x00D0, 0x00D0}, // ; A # Lu LATIN CAPITAL LETTER ETH
62 {0x00D7, 0x00D7}, // ; A # Sm MULTIPLICATION SIGN
63 {0x00D8, 0x00D8}, // ; A # Lu LATIN CAPITAL LETTER O WITH STROKE
64 {0x00DE, 0x00E1}, // ; A # L& [4] LATIN CAPITAL LETTER THORN..LATIN SMALL LETTER A WITH ACUTE
65 {0x00E6, 0x00E6}, // ; A # Ll LATIN SMALL LETTER AE
66 {0x00E8, 0x00EA}, // ; A # Ll [3] LATIN SMALL LETTER E WITH GRAVE..LATIN SMALL LETTER E WITH CIRCUMFLEX
67 {0x00EC, 0x00ED}, // ; A # Ll [2] LATIN SMALL LETTER I WITH GRAVE..LATIN SMALL LETTER I WITH ACUTE
68 {0x00F0, 0x00F0}, // ; A # Ll LATIN SMALL LETTER ETH
69 {0x00F2, 0x00F3}, // ; A # Ll [2] LATIN SMALL LETTER O WITH GRAVE..LATIN SMALL LETTER O WITH ACUTE
70 {0x00F7, 0x00F7}, // ; A # Sm DIVISION SIGN
71 {0x00F8, 0x00FA}, // ; A # Ll [3] LATIN SMALL LETTER O WITH STROKE..LATIN SMALL LETTER U WITH ACUTE
72 {0x00FC, 0x00FC}, // ; A # Ll LATIN SMALL LETTER U WITH DIAERESIS
73 {0x00FE, 0x00FE}, // ; A # Ll LATIN SMALL LETTER THORN
74 {0x0101, 0x0101}, // ; A # Ll LATIN SMALL LETTER A WITH MACRON
75 {0x0111, 0x0111}, // ; A # Ll LATIN SMALL LETTER D WITH STROKE
76 {0x0113, 0x0113}, // ; A # Ll LATIN SMALL LETTER E WITH MACRON
77 {0x011B, 0x011B}, // ; A # Ll LATIN SMALL LETTER E WITH CARON
78 {0x0126, 0x0127}, // ; A # L& [2] LATIN CAPITAL LETTER H WITH STROKE..LATIN SMALL LETTER H WITH STROKE
79 {0x012B, 0x012B}, // ; A # Ll LATIN SMALL LETTER I WITH MACRON
80 {0x0131, 0x0133}, // ; A # L& [3] LATIN SMALL LETTER DOTLESS I..LATIN SMALL LIGATURE IJ
81 {0x0138, 0x0138}, // ; A # Ll LATIN SMALL LETTER KRA
82 {0x013F, 0x0142}, // ; A # L& [4] LATIN CAPITAL LETTER L WITH MIDDLE DOT..LATIN SMALL LETTER L WITH STROKE
83 {0x0144, 0x0144}, // ; A # Ll LATIN SMALL LETTER N WITH ACUTE
84 {0x0148, 0x014B}, // ; A # L& [4] LATIN SMALL LETTER N WITH CARON..LATIN SMALL LETTER ENG
85 {0x014D, 0x014D}, // ; A # Ll LATIN SMALL LETTER O WITH MACRON
86 {0x0152, 0x0153}, // ; A # L& [2] LATIN CAPITAL LIGATURE OE..LATIN SMALL LIGATURE OE
87 {0x0166, 0x0167}, // ; A # L& [2] LATIN CAPITAL LETTER T WITH STROKE..LATIN SMALL LETTER T WITH STROKE
88 {0x016B, 0x016B}, // ; A # Ll LATIN SMALL LETTER U WITH MACRON
89 {0x01CE, 0x01CE}, // ; A # Ll LATIN SMALL LETTER A WITH CARON
90 {0x01D0, 0x01D0}, // ; A # Ll LATIN SMALL LETTER I WITH CARON
91 {0x01D2, 0x01D2}, // ; A # Ll LATIN SMALL LETTER O WITH CARON
92 {0x01D4, 0x01D4}, // ; A # Ll LATIN SMALL LETTER U WITH CARON
93 {0x01D6, 0x01D6}, // ; A # Ll LATIN SMALL LETTER U WITH DIAERESIS AND MACRON
94 {0x01D8, 0x01D8}, // ; A # Ll LATIN SMALL LETTER U WITH DIAERESIS AND ACUTE
95 {0x01DA, 0x01DA}, // ; A # Ll LATIN SMALL LETTER U WITH DIAERESIS AND CARON
96 {0x01DC, 0x01DC}, // ; A # Ll LATIN SMALL LETTER U WITH DIAERESIS AND GRAVE
97 {0x0251, 0x0251}, // ; A # Ll LATIN SMALL LETTER ALPHA
98 {0x0261, 0x0261}, // ; A # Ll LATIN SMALL LETTER SCRIPT G
99 {0x02C4, 0x02C4}, // ; A # Sk MODIFIER LETTER UP ARROWHEAD
100 {0x02C7, 0x02C7}, // ; A # Lm CARON
101 {0x02C9, 0x02CB}, // ; A # Lm [3] MODIFIER LETTER MACRON..MODIFIER LETTER GRAVE ACCENT
102 {0x02CD, 0x02CD}, // ; A # Lm MODIFIER LETTER LOW MACRON
103 {0x02D0, 0x02D0}, // ; A # Lm MODIFIER LETTER TRIANGULAR COLON
104 {0x02D8, 0x02DB}, // ; A # Sk [4] BREVE..OGONEK
105 {0x02DD, 0x02DD}, // ; A # Sk DOUBLE ACUTE ACCENT
106 {0x02DF, 0x02DF}, // ; A # Sk MODIFIER LETTER CROSS ACCENT
107 {0x0300, 0x036F}, // ; A # Mn [112] COMBINING GRAVE ACCENT..COMBINING LATIN SMALL LETTER X
108 {0x0391, 0x03A1}, // ; A # Lu [17] GREEK CAPITAL LETTER ALPHA..GREEK CAPITAL LETTER RHO
109 {0x03A3, 0x03A9}, // ; A # Lu [7] GREEK CAPITAL LETTER SIGMA..GREEK CAPITAL LETTER OMEGA
110 {0x03B1, 0x03C1}, // ; A # Ll [17] GREEK SMALL LETTER ALPHA..GREEK SMALL LETTER RHO
111 {0x03C3, 0x03C9}, // ; A # Ll [7] GREEK SMALL LETTER SIGMA..GREEK SMALL LETTER OMEGA
112 {0x0401, 0x0401}, // ; A # Lu CYRILLIC CAPITAL LETTER IO
113 {0x0410, 0x044F}, // ; A # L& [64] CYRILLIC CAPITAL LETTER A..CYRILLIC SMALL LETTER YA
114 {0x0451, 0x0451}, // ; A # Ll CYRILLIC SMALL LETTER IO
115 {0x2010, 0x2010}, // ; A # Pd HYPHEN
116 {0x2013, 0x2015}, // ; A # Pd [3] EN DASH..HORIZONTAL BAR
117 {0x2016, 0x2016}, // ; A # Po DOUBLE VERTICAL LINE
118 {0x2018, 0x2018}, // ; A # Pi LEFT SINGLE QUOTATION MARK
119 {0x2019, 0x2019}, // ; A # Pf RIGHT SINGLE QUOTATION MARK
120 {0x201C, 0x201C}, // ; A # Pi LEFT DOUBLE QUOTATION MARK
121 {0x201D, 0x201D}, // ; A # Pf RIGHT DOUBLE QUOTATION MARK
122 {0x2020, 0x2022}, // ; A # Po [3] DAGGER..BULLET
123 {0x2024, 0x2027}, // ; A # Po [4] ONE DOT LEADER..HYPHENATION POINT
124 {0x2030, 0x2030}, // ; A # Po PER MILLE SIGN
125 {0x2032, 0x2033}, // ; A # Po [2] PRIME..DOUBLE PRIME
126 {0x2035, 0x2035}, // ; A # Po REVERSED PRIME
127 {0x203B, 0x203B}, // ; A # Po REFERENCE MARK
128 {0x203E, 0x203E}, // ; A # Po OVERLINE
129 {0x2074, 0x2074}, // ; A # No SUPERSCRIPT FOUR
130 {0x207F, 0x207F}, // ; A # Lm SUPERSCRIPT LATIN SMALL LETTER N
131 {0x2081, 0x2084}, // ; A # No [4] SUBSCRIPT ONE..SUBSCRIPT FOUR
132 {0x20AC, 0x20AC}, // ; A # Sc EURO SIGN
133 {0x2103, 0x2103}, // ; A # So DEGREE CELSIUS
134 {0x2105, 0x2105}, // ; A # So CARE OF
135 {0x2109, 0x2109}, // ; A # So DEGREE FAHRENHEIT
136 {0x2113, 0x2113}, // ; A # Ll SCRIPT SMALL L
137 {0x2116, 0x2116}, // ; A # So NUMERO SIGN
138 {0x2121, 0x2122}, // ; A # So [2] TELEPHONE SIGN..TRADE MARK SIGN
139 {0x2126, 0x2126}, // ; A # Lu OHM SIGN
140 {0x212B, 0x212B}, // ; A # Lu ANGSTROM SIGN
141 {0x2153, 0x2154}, // ; A # No [2] VULGAR FRACTION ONE THIRD..VULGAR FRACTION TWO THIRDS
142 {0x215B, 0x215E}, // ; A # No [4] VULGAR FRACTION ONE EIGHTH..VULGAR FRACTION SEVEN EIGHTHS
143 {0x2160, 0x216B}, // ; A # Nl [12] ROMAN NUMERAL ONE..ROMAN NUMERAL TWELVE
144 {0x2170, 0x2179}, // ; A # Nl [10] SMALL ROMAN NUMERAL ONE..SMALL ROMAN NUMERAL TEN
145 {0x2189, 0x2189}, // ; A # No VULGAR FRACTION ZERO THIRDS
146 {0x2190, 0x2194}, // ; A # Sm [5] LEFTWARDS ARROW..LEFT RIGHT ARROW
147 {0x2195, 0x2199}, // ; A # So [5] UP DOWN ARROW..SOUTH WEST ARROW
148 {0x21B8, 0x21B9}, // ; A # So [2] NORTH WEST ARROW TO LONG BAR..LEFTWARDS ARROW TO BAR OVER RIGHTWARDS ARROW TO BAR
149 {0x21D2, 0x21D2}, // ; A # Sm RIGHTWARDS DOUBLE ARROW
150 {0x21D4, 0x21D4}, // ; A # Sm LEFT RIGHT DOUBLE ARROW
151 {0x21E7, 0x21E7}, // ; A # So UPWARDS WHITE ARROW
152 {0x2200, 0x2200}, // ; A # Sm FOR ALL
153 {0x2202, 0x2203}, // ; A # Sm [2] PARTIAL DIFFERENTIAL..THERE EXISTS
154 {0x2207, 0x2208}, // ; A # Sm [2] NABLA..ELEMENT OF
155 {0x220B, 0x220B}, // ; A # Sm CONTAINS AS MEMBER
156 {0x220F, 0x220F}, // ; A # Sm N-ARY PRODUCT
157 {0x2211, 0x2211}, // ; A # Sm N-ARY SUMMATION
158 {0x2215, 0x2215}, // ; A # Sm DIVISION SLASH
159 {0x221A, 0x221A}, // ; A # Sm SQUARE ROOT
160 {0x221D, 0x2220}, // ; A # Sm [4] PROPORTIONAL TO..ANGLE
161 {0x2223, 0x2223}, // ; A # Sm DIVIDES
162 {0x2225, 0x2225}, // ; A # Sm PARALLEL TO
163 {0x2227, 0x222C}, // ; A # Sm [6] LOGICAL AND..DOUBLE INTEGRAL
164 {0x222E, 0x222E}, // ; A # Sm CONTOUR INTEGRAL
165 {0x2234, 0x2237}, // ; A # Sm [4] THEREFORE..PROPORTION
166 {0x223C, 0x223D}, // ; A # Sm [2] TILDE OPERATOR..REVERSED TILDE
167 {0x2248, 0x2248}, // ; A # Sm ALMOST EQUAL TO
168 {0x224C, 0x224C}, // ; A # Sm ALL EQUAL TO
169 {0x2252, 0x2252}, // ; A # Sm APPROXIMATELY EQUAL TO OR THE IMAGE OF
170 {0x2260, 0x2261}, // ; A # Sm [2] NOT EQUAL TO..IDENTICAL TO
171 {0x2264, 0x2267}, // ; A # Sm [4] LESS-THAN OR EQUAL TO..GREATER-THAN OVER EQUAL TO
172 {0x226A, 0x226B}, // ; A # Sm [2] MUCH LESS-THAN..MUCH GREATER-THAN
173 {0x226E, 0x226F}, // ; A # Sm [2] NOT LESS-THAN..NOT GREATER-THAN
174 {0x2282, 0x2283}, // ; A # Sm [2] SUBSET OF..SUPERSET OF
175 {0x2286, 0x2287}, // ; A # Sm [2] SUBSET OF OR EQUAL TO..SUPERSET OF OR EQUAL TO
176 {0x2295, 0x2295}, // ; A # Sm CIRCLED PLUS
177 {0x2299, 0x2299}, // ; A # Sm CIRCLED DOT OPERATOR
178 {0x22A5, 0x22A5}, // ; A # Sm UP TACK
179 {0x22BF, 0x22BF}, // ; A # Sm RIGHT TRIANGLE
180 {0x2312, 0x2312}, // ; A # So ARC
181 {0x2460, 0x249B}, // ; A # No [60] CIRCLED DIGIT ONE..NUMBER TWENTY FULL STOP
182 {0x249C, 0x24E9}, // ; A # So [78] PARENTHESIZED LATIN SMALL LETTER A..CIRCLED LATIN SMALL LETTER Z
183 {0x24EB, 0x24FF}, // ; A # No [21] NEGATIVE CIRCLED NUMBER ELEVEN..NEGATIVE CIRCLED DIGIT ZERO
184 {0x2500, 0x254B}, // ; A # So [76] BOX DRAWINGS LIGHT HORIZONTAL..BOX DRAWINGS HEAVY VERTICAL AND HORIZONTAL
185 {0x2550, 0x2573}, // ; A # So [36] BOX DRAWINGS DOUBLE HORIZONTAL..BOX DRAWINGS LIGHT DIAGONAL CROSS
186 {0x2580, 0x258F}, // ; A # So [16] UPPER HALF BLOCK..LEFT ONE EIGHTH BLOCK
187 {0x2592, 0x2595}, // ; A # So [4] MEDIUM SHADE..RIGHT ONE EIGHTH BLOCK
188 {0x25A0, 0x25A1}, // ; A # So [2] BLACK SQUARE..WHITE SQUARE
189 {0x25A3, 0x25A9}, // ; A # So [7] WHITE SQUARE CONTAINING BLACK SMALL SQUARE..SQUARE WITH DIAGONAL CROSSHATCH FILL
190 {0x25B2, 0x25B3}, // ; A # So [2] BLACK UP-POINTING TRIANGLE..WHITE UP-POINTING TRIANGLE
191 {0x25B6, 0x25B6}, // ; A # So BLACK RIGHT-POINTING TRIANGLE
192 {0x25B7, 0x25B7}, // ; A # Sm WHITE RIGHT-POINTING TRIANGLE
193 {0x25BC, 0x25BD}, // ; A # So [2] BLACK DOWN-POINTING TRIANGLE..WHITE DOWN-POINTING TRIANGLE
194 {0x25C0, 0x25C0}, // ; A # So BLACK LEFT-POINTING TRIANGLE
195 {0x25C1, 0x25C1}, // ; A # Sm WHITE LEFT-POINTING TRIANGLE
196 {0x25C6, 0x25C8}, // ; A # So [3] BLACK DIAMOND..WHITE DIAMOND CONTAINING BLACK SMALL DIAMOND
197 {0x25CB, 0x25CB}, // ; A # So WHITE CIRCLE
198 {0x25CE, 0x25D1}, // ; A # So [4] BULLSEYE..CIRCLE WITH RIGHT HALF BLACK
199 {0x25E2, 0x25E5}, // ; A # So [4] BLACK LOWER RIGHT TRIANGLE..BLACK UPPER RIGHT TRIANGLE
200 {0x25EF, 0x25EF}, // ; A # So LARGE CIRCLE
201 {0x2605, 0x2606}, // ; A # So [2] BLACK STAR..WHITE STAR
202 {0x2609, 0x2609}, // ; A # So SUN
203 {0x260E, 0x260F}, // ; A # So [2] BLACK TELEPHONE..WHITE TELEPHONE
204 {0x261C, 0x261C}, // ; A # So WHITE LEFT POINTING INDEX
205 {0x261E, 0x261E}, // ; A # So WHITE RIGHT POINTING INDEX
206 {0x2640, 0x2640}, // ; A # So FEMALE SIGN
207 {0x2642, 0x2642}, // ; A # So MALE SIGN
208 {0x2660, 0x2661}, // ; A # So [2] BLACK SPADE SUIT..WHITE HEART SUIT
209 {0x2663, 0x2665}, // ; A # So [3] BLACK CLUB SUIT..BLACK HEART SUIT
210 {0x2667, 0x266A}, // ; A # So [4] WHITE CLUB SUIT..EIGHTH NOTE
211 {0x266C, 0x266D}, // ; A # So [2] BEAMED SIXTEENTH NOTES..MUSIC FLAT SIGN
212 {0x266F, 0x266F}, // ; A # Sm MUSIC SHARP SIGN
213 {0x269E, 0x269F}, // ; A # So [2] THREE LINES CONVERGING RIGHT..THREE LINES CONVERGING LEFT
214 {0x26BF, 0x26BF}, // ; A # So SQUARED KEY
215 {0x26C6, 0x26CD}, // ; A # So [8] RAIN..DISABLED CAR
216 {0x26CF, 0x26D3}, // ; A # So [5] PICK..CHAINS
217 {0x26D5, 0x26E1}, // ; A # So [13] ALTERNATE ONE-WAY LEFT WAY TRAFFIC..RESTRICTED LEFT ENTRY-2
218 {0x26E3, 0x26E3}, // ; A # So HEAVY CIRCLE WITH STROKE AND TWO DOTS ABOVE
219 {0x26E8, 0x26E9}, // ; A # So [2] BLACK CROSS ON SHIELD..SHINTO SHRINE
220 {0x26EB, 0x26F1}, // ; A # So [7] CASTLE..UMBRELLA ON GROUND
221 {0x26F4, 0x26F4}, // ; A # So FERRY
222 {0x26F6, 0x26F9}, // ; A # So [4] SQUARE FOUR CORNERS..PERSON WITH BALL
223 {0x26FB, 0x26FC}, // ; A # So [2] JAPANESE BANK SYMBOL..HEADSTONE GRAVEYARD SYMBOL
224 {0x26FE, 0x26FF}, // ; A # So [2] CUP ON BLACK SQUARE..WHITE FLAG WITH HORIZONTAL MIDDLE BLACK STRIPE
225 {0x273D, 0x273D}, // ; A # So HEAVY TEARDROP-SPOKED ASTERISK
226 {0x2776, 0x277F}, // ; A # No [10] DINGBAT NEGATIVE CIRCLED DIGIT ONE..DINGBAT NEGATIVE CIRCLED NUMBER TEN
227 {0x2B56, 0x2B59}, // ; A # So [4] HEAVY OVAL WITH OVAL INSIDE..HEAVY CIRCLED SALTIRE
228 {0x3248, 0x324F}, // ; A # No [8] CIRCLED NUMBER TEN ON BLACK SQUARE..CIRCLED NUMBER EIGHTY ON BLACK SQUARE
229 {0xE000, 0xF8FF}, // ; A # Co [6400] <private-use-E000>..<private-use-F8FF>
230 {0xFE00, 0xFE0F}, // ; A # Mn [16] VARIATION SELECTOR-1..VARIATION SELECTOR-16
231 {0xFFFD, 0xFFFD}, // ; A # So REPLACEMENT CHARACTER
232 {0x1F100, 0x1F10A}, // ; A # No [11] DIGIT ZERO FULL STOP..DIGIT NINE COMMA
233 {0x1F110, 0x1F12D}, // ; A # So [30] PARENTHESIZED LATIN CAPITAL LETTER A..CIRCLED CD
234 {0x1F130, 0x1F169}, // ; A # So [58] SQUARED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z
235 {0x1F170, 0x1F18D}, // ; A # So [30] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED SA
236 {0x1F18F, 0x1F190}, // ; A # So [2] NEGATIVE SQUARED WC..SQUARE DJ
237 {0x1F19B, 0x1F1AC}, // ; A # So [18] SQUARED THREE D..SQUARED VOD
238 {0xE0100, 0xE01EF}, // ; A # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
239 {0xF0000, 0xFFFFD}, // ; A # Co [65534] <private-use-F0000>..<private-use-FFFFD>
240 {0x100000, 0x10FFFD} // ; A # Co [65534] <private-use-100000>..<private-use-10FFFD>
241 };
242 const size_t num_ranges = sizeof(ranges) / sizeof(ranges[0]);
243
244 int left = 0, right = num_ranges - 1;
245
246 while (left <= right) {
247 int mid = left + (right - left) / 2;
248
249 if (ucs >= ranges[mid].start && ucs <= ranges[mid].end) {
250 return 1; // Character is in the range
251 } else if (ucs < ranges[mid].start) {
252 right = mid - 1;
253 } else {
254 left = mid + 1;
255 }
256 }
257 return 0; // Character is not in any of the ranges
258}
259
diff --git a/src/wcwidtha.h b/src/wcwidtha.h
new file mode 100644
index 0000000..9931b01
--- /dev/null
+++ b/src/wcwidtha.h
@@ -0,0 +1,12 @@
1// wcwidtha.h
2
3#ifndef MK_WCWIDTHA_H
4#define MK_WCWIDTHA_H
5
6
7#include "wcwidth.h"
8
9// Is a character in the list of ambiguous width characters (for east asian display)
10int mk_wcwidth_a(mk_wchar_t ucs);
11
12#endif // MK_WCWIDTHA_H