diff options
| -rw-r--r-- | .gitignore | 2 | ||||
| -rw-r--r-- | CHANGELOG.md | 8 | ||||
| -rw-r--r-- | spec/04-term_spec.lua | 59 | ||||
| -rw-r--r-- | src/term.c | 15 | ||||
| -rw-r--r-- | src/wcwidth.c | 245 | ||||
| -rw-r--r-- | src/wcwidth.h | 7 | ||||
| -rw-r--r-- | src/wcwidth_ambiguous_width.c | 64 | ||||
| -rw-r--r-- | src/wcwidth_double_width.c | 45 | ||||
| -rwxr-xr-x | src/wcwidth_update.lua | 404 | ||||
| -rw-r--r-- | src/wcwidth_zero_width.c | 128 |
10 files changed, 745 insertions, 232 deletions
| @@ -50,3 +50,5 @@ luac.out | |||
| 50 | # VS Code files | 50 | # VS Code files |
| 51 | .vscode/ | 51 | .vscode/ |
| 52 | 52 | ||
| 53 | # generated/downloaded unicode files | ||
| 54 | src/unicode_data/ | ||
diff --git a/CHANGELOG.md b/CHANGELOG.md index 3b00fb6..1d006ff 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md | |||
| @@ -27,6 +27,14 @@ The scope of what is covered by the version number excludes: | |||
| 27 | 27 | ||
| 28 | ## Version history | 28 | ## Version history |
| 29 | 29 | ||
| 30 | ### version x.x.x, unreleased | ||
| 31 | |||
| 32 | - Fix: fix the display-width functions that occasionally reported a bad width. The source code now | ||
| 33 | includes a code-generator that parses the official Unicode files to create static ranges in the | ||
| 34 | source code. Beyond that the ambiguous width can now be specified by the user, as a second parameter | ||
| 35 | to `utf8cwidth` and `utf8swidth`. | ||
| 36 | See [#82](https://github.com/lunarmodules/luasystem/pull/82). | ||
| 37 | |||
| 30 | ### version 0.6.3, released 11-Jul-2025 | 38 | ### version 0.6.3, released 11-Jul-2025 |
| 31 | 39 | ||
| 32 | - Fix: maximum key-delay, reduced from 0.2 to 0.1 seconds to reduce slugginess feel on some key presses. | 40 | - Fix: maximum key-delay, reduced from 0.2 to 0.1 seconds to reduce slugginess feel on some key presses. |
diff --git a/spec/04-term_spec.lua b/spec/04-term_spec.lua index 5dea046..2d50a6b 100644 --- a/spec/04-term_spec.lua +++ b/spec/04-term_spec.lua | |||
| @@ -512,21 +512,35 @@ describe("Terminal:", function() | |||
| 512 | describe("utf8cwidth()", function() | 512 | describe("utf8cwidth()", function() |
| 513 | 513 | ||
| 514 | -- utf-8 strings | 514 | -- utf-8 strings |
| 515 | local ch1 = string.char(226, 130, 172) -- "€" single | 515 | local ch1 = string.char(65) -- "A" single |
| 516 | local ch2 = string.char(240, 159, 154, 128) -- "🚀" double | 516 | local ch2 = string.char(240, 159, 154, 128) -- "🚀" double |
| 517 | local ch3 = string.char(228, 189, 160) -- "你" double | 517 | local ch3 = string.char(228, 189, 160) -- "你" double |
| 518 | local ch4 = string.char(229, 165, 189) -- "好" double | 518 | local ch4 = string.char(229, 165, 189) -- "好" double |
| 519 | local ch5 = string.char(226, 130, 172) -- "€" ambiguous | ||
| 519 | 520 | ||
| 520 | -- unicode codepoints | 521 | -- unicode codepoints |
| 521 | local cp1 = 8364 -- "€" single | 522 | local cp1 = 65 -- "A" single |
| 522 | local cp2 = 128640 -- "🚀" double | 523 | local cp2 = 128640 -- "🚀" double |
| 523 | local cp3 = 20320 -- "你" double | 524 | local cp3 = 20320 -- "你" double |
| 524 | local cp4 = 22909 -- "好" double | 525 | local cp4 = 22909 -- "好" double |
| 526 | local cp5 = 8364 -- "€" ambiguous | ||
| 525 | 527 | ||
| 526 | it("handles zero width characters", function() | 528 | it("handles zero width characters", function() |
| 527 | assert.same({0}, {system.utf8cwidth("")}) -- empty string returns 0-size | 529 | assert.same({0}, {system.utf8cwidth("")}) -- empty string returns 0-size |
| 528 | assert.same({nil, 'Character width determination failed'}, {system.utf8cwidth("\a")}) -- bell character | 530 | assert.same({0}, {system.utf8cwidth("\0")}) -- null character |
| 529 | assert.same({nil, 'Character width determination failed'}, {system.utf8cwidth("\27")}) -- escape character | 531 | |
| 532 | -- zero-width (from wcwidth_zero_width.c / wcwidth_update.lua) | ||
| 533 | local zw_sp = string.char(0xE2, 0x80, 0x8B) -- U+200B Zero Width Space | ||
| 534 | local zw_nj = string.char(0xE2, 0x80, 0x8C) -- U+200C Zero Width Non-Joiner | ||
| 535 | local zw_j = string.char(0xE2, 0x80, 0x8D) -- U+200D Zero Width Joiner | ||
| 536 | local zw_nb = string.char(0xEF, 0xBB, 0xBF) -- U+FEFF Zero Width No-Break Space (BOM) | ||
| 537 | local soft_hy = string.char(0xC2, 0xAD) -- U+00AD Soft hyphen | ||
| 538 | |||
| 539 | assert.same({0}, {system.utf8cwidth(zw_sp)}) | ||
| 540 | assert.same({0}, {system.utf8cwidth(zw_nj)}) | ||
| 541 | assert.same({0}, {system.utf8cwidth(zw_j)}) | ||
| 542 | assert.same({0}, {system.utf8cwidth(zw_nb)}) | ||
| 543 | assert.same({0}, {system.utf8cwidth(soft_hy)}) | ||
| 530 | end) | 544 | end) |
| 531 | 545 | ||
| 532 | it("handles single width characters", function() | 546 | it("handles single width characters", function() |
| @@ -540,8 +554,16 @@ describe("Terminal:", function() | |||
| 540 | assert.same({2}, {system.utf8cwidth(ch4)}) | 554 | assert.same({2}, {system.utf8cwidth(ch4)}) |
| 541 | end) | 555 | end) |
| 542 | 556 | ||
| 557 | it("handles ambiguous width characters", function() | ||
| 558 | assert.same({99}, {system.utf8cwidth(ch5, 99)}) | ||
| 559 | end) | ||
| 560 | |||
| 561 | it("ambiguous width defaults to 1", function() | ||
| 562 | assert.same({1}, {system.utf8cwidth(ch5, nil)}) | ||
| 563 | end) | ||
| 564 | |||
| 543 | it("returns the width of the first character in the string", function() | 565 | it("returns the width of the first character in the string", function() |
| 544 | assert.same({nil, 'Character width determination failed'}, {system.utf8cwidth("\a" .. ch1)}) -- bell character + EURO | 566 | assert.same({nil, 'Control characters have no width'}, {system.utf8cwidth("\a" .. ch1)}) -- bell character + EURO |
| 545 | assert.same({1}, {system.utf8cwidth(ch1 .. ch2)}) | 567 | assert.same({1}, {system.utf8cwidth(ch1 .. ch2)}) |
| 546 | assert.same({2}, {system.utf8cwidth(ch2 .. ch3 .. ch4)}) | 568 | assert.same({2}, {system.utf8cwidth(ch2 .. ch3 .. ch4)}) |
| 547 | end) | 569 | end) |
| @@ -551,6 +573,12 @@ describe("Terminal:", function() | |||
| 551 | assert.same({2}, {system.utf8cwidth(cp2)}) | 573 | assert.same({2}, {system.utf8cwidth(cp2)}) |
| 552 | assert.same({2}, {system.utf8cwidth(cp3)}) | 574 | assert.same({2}, {system.utf8cwidth(cp3)}) |
| 553 | assert.same({2}, {system.utf8cwidth(cp4)}) | 575 | assert.same({2}, {system.utf8cwidth(cp4)}) |
| 576 | assert.same({99}, {system.utf8cwidth(cp5, 99)}) -- ambiguous width | ||
| 577 | end) | ||
| 578 | |||
| 579 | it("returns an error on control characters", function() | ||
| 580 | assert.same({nil, 'Control characters have no width'}, {system.utf8cwidth("\a")}) -- bell character | ||
| 581 | assert.same({nil, 'Control characters have no width'}, {system.utf8cwidth("\27")}) -- escape character | ||
| 554 | end) | 582 | end) |
| 555 | 583 | ||
| 556 | it("returns an error on bad argument", function() | 584 | it("returns an error on bad argument", function() |
| @@ -570,15 +598,16 @@ describe("Terminal:", function() | |||
| 570 | 598 | ||
| 571 | describe("utf8swidth()", function() | 599 | describe("utf8swidth()", function() |
| 572 | 600 | ||
| 573 | local ch1 = string.char(226, 130, 172) -- "€" single | 601 | local ch1 = string.char(65) -- "A" single |
| 574 | local ch2 = string.char(240, 159, 154, 128) -- "🚀" double | 602 | local ch2 = string.char(240, 159, 154, 128) -- "🚀" double |
| 575 | local ch3 = string.char(228, 189, 160) -- "你" double | 603 | local ch3 = string.char(228, 189, 160) -- "你" double |
| 576 | local ch4 = string.char(229, 165, 189) -- "好" double | 604 | local ch4 = string.char(229, 165, 189) -- "好" double |
| 605 | local ch5 = string.char(226, 130, 172) -- "€" ambiguous | ||
| 577 | 606 | ||
| 578 | it("handles zero width characters", function() | 607 | it("handles zero width characters", function() |
| 579 | assert.same({0}, {system.utf8swidth("")}) -- empty string returns 0-size | 608 | assert.same({0}, {system.utf8swidth("")}) -- empty string returns 0-size |
| 580 | assert.same({nil, 'Character width determination failed'}, {system.utf8swidth("\a")}) -- bell character | 609 | assert.same({nil, 'Control characters have no width'}, {system.utf8swidth("\a")}) -- bell character |
| 581 | assert.same({nil, 'Character width determination failed'}, {system.utf8swidth("\27")}) -- escape character | 610 | assert.same({nil, 'Control characters have no width'}, {system.utf8swidth("\27")}) -- escape character |
| 582 | end) | 611 | end) |
| 583 | 612 | ||
| 584 | it("handles multi-character UTF8 strings", function() | 613 | it("handles multi-character UTF8 strings", function() |
| @@ -586,6 +615,20 @@ describe("Terminal:", function() | |||
| 586 | assert.same({16}, {system.utf8swidth("hello " .. ch3 .. ch4 .. " world")}) | 615 | assert.same({16}, {system.utf8swidth("hello " .. ch3 .. ch4 .. " world")}) |
| 587 | end) | 616 | end) |
| 588 | 617 | ||
| 618 | it("handles ambiguous width characters", function() | ||
| 619 | assert.same({12}, {system.utf8swidth(ch5 .. "1234567890", 2)}) | ||
| 620 | end) | ||
| 621 | |||
| 622 | it("ambiguous width defaults to 1", function() | ||
| 623 | assert.same({1}, {system.utf8swidth(ch5, nil)}) | ||
| 624 | end) | ||
| 625 | |||
| 626 | it("ambiguous width must be 1 or 2", function() | ||
| 627 | assert.has.error(function() | ||
| 628 | system.utf8swidth(ch5, 3) | ||
| 629 | end, "bad argument #2 to 'utf8swidth' (Ambiguous width must be 1 or 2)") | ||
| 630 | end) | ||
| 631 | |||
| 589 | end) | 632 | end) |
| 590 | 633 | ||
| 591 | 634 | ||
| @@ -1085,6 +1085,7 @@ int utf8_to_wchar(const char *utf8, size_t len, mk_wchar_t *codepoint) { | |||
| 1085 | Get the width of a utf8 character for terminal display. | 1085 | Get the width of a utf8 character for terminal display. |
| 1086 | @function utf8cwidth | 1086 | @function utf8cwidth |
| 1087 | @tparam string|int utf8_char the utf8 character, or unicode codepoint, to check, only the width of the first character will be returned | 1087 | @tparam string|int utf8_char the utf8 character, or unicode codepoint, to check, only the width of the first character will be returned |
| 1088 | @tparam[opt=1] int ambiguous_width the width to return for ambiguous width characters (usually 1 or 2) | ||
| 1088 | @treturn[1] int the display width in columns of the first character in the string (0 for an empty string) | 1089 | @treturn[1] int the display width in columns of the first character in the string (0 for an empty string) |
| 1089 | @treturn[2] nil | 1090 | @treturn[2] nil |
| 1090 | @treturn[2] string error message | 1091 | @treturn[2] string error message |
| @@ -1093,6 +1094,7 @@ Get the width of a utf8 character for terminal display. | |||
| 1093 | int lst_utf8cwidth(lua_State *L) { | 1094 | int lst_utf8cwidth(lua_State *L) { |
| 1094 | int width = 0; | 1095 | int width = 0; |
| 1095 | mk_wchar_t wc; | 1096 | mk_wchar_t wc; |
| 1097 | int ambiguous_width = luaL_optinteger(L, 2, 1); | ||
| 1096 | 1098 | ||
| 1097 | if (lua_type(L, 1) == LUA_TSTRING) { | 1099 | if (lua_type(L, 1) == LUA_TSTRING) { |
| 1098 | // Handle UTF8 as string input | 1100 | // Handle UTF8 as string input |
| @@ -1129,10 +1131,10 @@ int lst_utf8cwidth(lua_State *L) { | |||
| 1129 | } | 1131 | } |
| 1130 | 1132 | ||
| 1131 | // Get the width of the wide character | 1133 | // Get the width of the wide character |
| 1132 | width = mk_wcwidth(wc); | 1134 | width = mk_wcwidth(wc, ambiguous_width); |
| 1133 | if (width == -1) { | 1135 | if (width == -1) { |
| 1134 | lua_pushnil(L); | 1136 | lua_pushnil(L); |
| 1135 | lua_pushstring(L, "Character width determination failed"); | 1137 | lua_pushstring(L, "Control characters have no width"); |
| 1136 | return 2; | 1138 | return 2; |
| 1137 | } | 1139 | } |
| 1138 | 1140 | ||
| @@ -1147,6 +1149,7 @@ int lst_utf8cwidth(lua_State *L) { | |||
| 1147 | Get the width of a utf8 string for terminal display. | 1149 | Get the width of a utf8 string for terminal display. |
| 1148 | @function utf8swidth | 1150 | @function utf8swidth |
| 1149 | @tparam string utf8_string the utf8 string to check | 1151 | @tparam string utf8_string the utf8 string to check |
| 1152 | @tparam[opt=1] int ambiguous_width the width to return for ambiguous width characters (1 or 2) | ||
| 1150 | @treturn[1] int the display width of the string in columns (0 for an empty string) | 1153 | @treturn[1] int the display width of the string in columns (0 for an empty string) |
| 1151 | @treturn[2] nil | 1154 | @treturn[2] nil |
| 1152 | @treturn[2] string error message | 1155 | @treturn[2] string error message |
| @@ -1156,6 +1159,10 @@ int lst_utf8swidth(lua_State *L) { | |||
| 1156 | const char *utf8_str; | 1159 | const char *utf8_str; |
| 1157 | size_t utf8_len; | 1160 | size_t utf8_len; |
| 1158 | utf8_str = luaL_checklstring(L, 1, &utf8_len); | 1161 | utf8_str = luaL_checklstring(L, 1, &utf8_len); |
| 1162 | int ambiguous_width = luaL_optinteger(L, 2, 1); | ||
| 1163 | if (ambiguous_width != 1 && ambiguous_width != 2) { | ||
| 1164 | return luaL_argerror(L, 2, "Ambiguous width must be 1 or 2"); | ||
| 1165 | } | ||
| 1159 | int total_width = 0; | 1166 | int total_width = 0; |
| 1160 | 1167 | ||
| 1161 | if (utf8_len == 0) { | 1168 | if (utf8_len == 0) { |
| @@ -1175,10 +1182,10 @@ int lst_utf8swidth(lua_State *L) { | |||
| 1175 | return 2; | 1182 | return 2; |
| 1176 | } | 1183 | } |
| 1177 | 1184 | ||
| 1178 | int width = mk_wcwidth(wc); | 1185 | int width = mk_wcwidth(wc, ambiguous_width); |
| 1179 | if (width == -1) { | 1186 | if (width == -1) { |
| 1180 | lua_pushnil(L); | 1187 | lua_pushnil(L); |
| 1181 | lua_pushstring(L, "Character width determination failed"); | 1188 | lua_pushstring(L, "Control characters have no width"); |
| 1182 | return 2; | 1189 | return 2; |
| 1183 | } | 1190 | } |
| 1184 | 1191 | ||
diff --git a/src/wcwidth.c b/src/wcwidth.c index 6032158..ea293c9 100644 --- a/src/wcwidth.c +++ b/src/wcwidth.c | |||
| @@ -1,57 +1,6 @@ | |||
| 1 | // This file was modified from the original versions, check "modified:" comments for details | 1 | // This file was modified from the original version by Markus Kuhn |
| 2 | // Character range updates (both the table and the +1 check) were generated using ChatGPT. | ||
| 3 | 2 | ||
| 4 | /* | 3 | /* Original copyrights: |
| 5 | * This is an implementation of wcwidth() and wcswidth() (defined in | ||
| 6 | * IEEE Std 1002.1-2001) for Unicode. | ||
| 7 | * | ||
| 8 | * http://www.opengroup.org/onlinepubs/007904975/functions/wcwidth.html | ||
| 9 | * http://www.opengroup.org/onlinepubs/007904975/functions/wcswidth.html | ||
| 10 | * | ||
| 11 | * In fixed-width output devices, Latin characters all occupy a single | ||
| 12 | * "cell" position of equal width, whereas ideographic CJK characters | ||
| 13 | * occupy two such cells. Interoperability between terminal-line | ||
| 14 | * applications and (teletype-style) character terminals using the | ||
| 15 | * UTF-8 encoding requires agreement on which character should advance | ||
| 16 | * the cursor by how many cell positions. No established formal | ||
| 17 | * standards exist at present on which Unicode character shall occupy | ||
| 18 | * how many cell positions on character terminals. These routines are | ||
| 19 | * a first attempt of defining such behavior based on simple rules | ||
| 20 | * applied to data provided by the Unicode Consortium. | ||
| 21 | * | ||
| 22 | * For some graphical characters, the Unicode standard explicitly | ||
| 23 | * defines a character-cell width via the definition of the East Asian | ||
| 24 | * FullWidth (F), Wide (W), Half-width (H), and Narrow (Na) classes. | ||
| 25 | * In all these cases, there is no ambiguity about which width a | ||
| 26 | * terminal shall use. For characters in the East Asian Ambiguous (A) | ||
| 27 | * class, the width choice depends purely on a preference of backward | ||
| 28 | * compatibility with either historic CJK or Western practice. | ||
| 29 | * Choosing single-width for these characters is easy to justify as | ||
| 30 | * the appropriate long-term solution, as the CJK practice of | ||
| 31 | * displaying these characters as double-width comes from historic | ||
| 32 | * implementation simplicity (8-bit encoded characters were displayed | ||
| 33 | * single-width and 16-bit ones double-width, even for Greek, | ||
| 34 | * Cyrillic, etc.) and not any typographic considerations. | ||
| 35 | * | ||
| 36 | * Much less clear is the choice of width for the Not East Asian | ||
| 37 | * (Neutral) class. Existing practice does not dictate a width for any | ||
| 38 | * of these characters. It would nevertheless make sense | ||
| 39 | * typographically to allocate two character cells to characters such | ||
| 40 | * as for instance EM SPACE or VOLUME INTEGRAL, which cannot be | ||
| 41 | * represented adequately with a single-width glyph. The following | ||
| 42 | * routines at present merely assign a single-cell width to all | ||
| 43 | * neutral characters, in the interest of simplicity. This is not | ||
| 44 | * entirely satisfactory and should be reconsidered before | ||
| 45 | * establishing a formal standard in this area. At the moment, the | ||
| 46 | * decision which Not East Asian (Neutral) characters should be | ||
| 47 | * represented by double-width glyphs cannot yet be answered by | ||
| 48 | * applying a simple rule from the Unicode database content. Setting | ||
| 49 | * up a proper standard for the behavior of UTF-8 character terminals | ||
| 50 | * will require a careful analysis not only of each Unicode character, | ||
| 51 | * but also of each presentation form, something the author of these | ||
| 52 | * routines has avoided to do so far. | ||
| 53 | * | ||
| 54 | * http://www.unicode.org/unicode/reports/tr11/ | ||
| 55 | * | 4 | * |
| 56 | * Markus Kuhn -- 2007-05-26 (Unicode 5.0) | 5 | * Markus Kuhn -- 2007-05-26 (Unicode 5.0) |
| 57 | * | 6 | * |
| @@ -62,7 +11,7 @@ | |||
| 62 | * Latest version: http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c | 11 | * Latest version: http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c |
| 63 | */ | 12 | */ |
| 64 | 13 | ||
| 65 | #include "wcwidth.h" // modified: used to define mk_wchar_t | 14 | #include "wcwidth.h" |
| 66 | 15 | ||
| 67 | struct interval { | 16 | struct interval { |
| 68 | int first; | 17 | int first; |
| @@ -70,7 +19,7 @@ struct interval { | |||
| 70 | }; | 19 | }; |
| 71 | 20 | ||
| 72 | /* auxiliary function for binary search in interval table */ | 21 | /* auxiliary function for binary search in interval table */ |
| 73 | static int bisearch(mk_wchar_t ucs, const struct interval *table, int max) { // modified: use mk_wchar_t | 22 | static int bisearch(mk_wchar_t ucs, const struct interval *table, int max) { |
| 74 | int min = 0; | 23 | int min = 0; |
| 75 | int mid; | 24 | int mid; |
| 76 | 25 | ||
| @@ -91,150 +40,23 @@ static int bisearch(mk_wchar_t ucs, const struct interval *table, int max) { // | |||
| 91 | 40 | ||
| 92 | 41 | ||
| 93 | /* The following two functions define the column width of an ISO 10646 | 42 | /* The following two functions define the column width of an ISO 10646 |
| 94 | * character as follows: | 43 | * characters. |
| 95 | * | ||
| 96 | * - The null character (U+0000) has a column width of 0. | ||
| 97 | * | ||
| 98 | * - Other C0/C1 control characters and DEL will lead to a return | ||
| 99 | * value of -1. | ||
| 100 | * | ||
| 101 | * - Non-spacing and enclosing combining characters (general | ||
| 102 | * category code Mn or Me in the Unicode database) have a | ||
| 103 | * column width of 0. | ||
| 104 | * | 44 | * |
| 105 | * - SOFT HYPHEN (U+00AD) has a column width of 1. | 45 | * @param ucs the Unicode code point to check |
| 106 | * | 46 | * @param ambiguous_width the width to return for ambiguous width characters (1 or 2) |
| 107 | * - Other format characters (general category code Cf in the Unicode | 47 | * @return the width of the character, or -1 if the character is a control character |
| 108 | * database) and ZERO WIDTH SPACE (U+200B) have a column width of 0. | ||
| 109 | * | ||
| 110 | * - Hangul Jamo medial vowels and final consonants (U+1160-U+11FF) | ||
| 111 | * have a column width of 0. | ||
| 112 | * | ||
| 113 | * - Spacing characters in the East Asian Wide (W) or East Asian | ||
| 114 | * Full-width (F) category as defined in Unicode Technical | ||
| 115 | * Report #11 have a column width of 2. | ||
| 116 | * | ||
| 117 | * - All remaining characters (including all printable | ||
| 118 | * ISO 8859-1 and WGL4 characters, Unicode control characters, | ||
| 119 | * etc.) have a column width of 1. | ||
| 120 | * | ||
| 121 | * This implementation assumes that mk_wchar_t characters are encoded | ||
| 122 | * in ISO 10646. | ||
| 123 | */ | 48 | */ |
| 124 | 49 | ||
| 125 | int mk_wcwidth(mk_wchar_t ucs) // modified: use mk_wchar_t | 50 | int mk_wcwidth(mk_wchar_t ucs, int ambiguous_width) |
| 126 | { | 51 | { |
| 127 | /* sorted list of non-overlapping intervals of non-spacing characters */ | 52 | static const struct interval zero_width_ranges[] = { |
| 128 | /* generated by "uniset +cat=Me +cat=Mn +cat=Cf -00AD +1160-11FF +200B c" */ | 53 | #include "wcwidth_zero_width.c" |
| 129 | static const struct interval combining[] = { // modified: added new ranges to the list | 54 | }; |
| 130 | { 0x0300, 0x036F }, { 0x0483, 0x0489 }, { 0x0591, 0x05BD }, | 55 | static const struct interval ambiguous_width_ranges[] = { |
| 131 | { 0x05BF, 0x05BF }, { 0x05C1, 0x05C2 }, { 0x05C4, 0x05C5 }, | 56 | #include "wcwidth_ambiguous_width.c" |
| 132 | { 0x05C7, 0x05C7 }, { 0x0600, 0x0605 }, { 0x0610, 0x061A }, | 57 | }; |
| 133 | { 0x061C, 0x061C }, { 0x064B, 0x065F }, { 0x0670, 0x0670 }, | 58 | static const struct interval double_width_ranges[] = { |
| 134 | { 0x06D6, 0x06DC }, { 0x06DF, 0x06E4 }, { 0x06E7, 0x06E8 }, | 59 | #include "wcwidth_double_width.c" |
| 135 | { 0x06EA, 0x06ED }, { 0x0711, 0x0711 }, { 0x0730, 0x074A }, | ||
| 136 | { 0x07A6, 0x07B0 }, { 0x07EB, 0x07F3 }, { 0x07FD, 0x07FD }, | ||
| 137 | { 0x0816, 0x0819 }, { 0x081B, 0x0823 }, { 0x0825, 0x0827 }, | ||
| 138 | { 0x0829, 0x082D }, { 0x0859, 0x085B }, { 0x08D3, 0x08E1 }, | ||
| 139 | { 0x08E3, 0x0903 }, { 0x093A, 0x093C }, { 0x093E, 0x094F }, | ||
| 140 | { 0x0951, 0x0957 }, { 0x0962, 0x0963 }, { 0x0981, 0x0983 }, | ||
| 141 | { 0x09BC, 0x09BC }, { 0x09BE, 0x09C4 }, { 0x09C7, 0x09C8 }, | ||
| 142 | { 0x09CB, 0x09CD }, { 0x09D7, 0x09D7 }, { 0x09E2, 0x09E3 }, | ||
| 143 | { 0x09FE, 0x09FE }, { 0x0A01, 0x0A03 }, { 0x0A3C, 0x0A3C }, | ||
| 144 | { 0x0A3E, 0x0A42 }, { 0x0A47, 0x0A48 }, { 0x0A4B, 0x0A4D }, | ||
| 145 | { 0x0A51, 0x0A51 }, { 0x0A70, 0x0A71 }, { 0x0A75, 0x0A75 }, | ||
| 146 | { 0x0A81, 0x0A83 }, { 0x0ABC, 0x0ABC }, { 0x0ABE, 0x0AC5 }, | ||
| 147 | { 0x0AC7, 0x0AC9 }, { 0x0ACB, 0x0ACD }, { 0x0AE2, 0x0AE3 }, | ||
| 148 | { 0x0AFA, 0x0AFF }, { 0x0B01, 0x0B03 }, { 0x0B3C, 0x0B3C }, | ||
| 149 | { 0x0B3E, 0x0B44 }, { 0x0B47, 0x0B48 }, { 0x0B4B, 0x0B4D }, | ||
| 150 | { 0x0B55, 0x0B57 }, { 0x0B62, 0x0B63 }, { 0x0B82, 0x0B82 }, | ||
| 151 | { 0x0BBE, 0x0BC2 }, { 0x0BC6, 0x0BC8 }, { 0x0BCA, 0x0BCD }, | ||
| 152 | { 0x0BD7, 0x0BD7 }, { 0x0C00, 0x0C04 }, { 0x0C3E, 0x0C44 }, | ||
| 153 | { 0x0C46, 0x0C48 }, { 0x0C4A, 0x0C4D }, { 0x0C55, 0x0C56 }, | ||
| 154 | { 0x0C62, 0x0C63 }, { 0x0C81, 0x0C83 }, { 0x0CBC, 0x0CBC }, | ||
| 155 | { 0x0CBE, 0x0CC4 }, { 0x0CC6, 0x0CC8 }, { 0x0CCA, 0x0CCD }, | ||
| 156 | { 0x0CD5, 0x0CD6 }, { 0x0CE2, 0x0CE3 }, { 0x0D00, 0x0D03 }, | ||
| 157 | { 0x0D3B, 0x0D3C }, { 0x0D3E, 0x0D44 }, { 0x0D46, 0x0D48 }, | ||
| 158 | { 0x0D4A, 0x0D4D }, { 0x0D57, 0x0D57 }, { 0x0D62, 0x0D63 }, | ||
| 159 | { 0x0D82, 0x0D83 }, { 0x0DCF, 0x0DD4 }, { 0x0DD6, 0x0DD6 }, | ||
| 160 | { 0x0DD8, 0x0DDF }, { 0x0DF2, 0x0DF3 }, { 0x0E31, 0x0E31 }, | ||
| 161 | { 0x0E34, 0x0E3A }, { 0x0E47, 0x0E4E }, { 0x0EB1, 0x0EB1 }, | ||
| 162 | { 0x0EB4, 0x0EBC }, { 0x0EC8, 0x0ECD }, { 0x0F18, 0x0F19 }, | ||
| 163 | { 0x0F35, 0x0F35 }, { 0x0F37, 0x0F37 }, { 0x0F39, 0x0F39 }, | ||
| 164 | { 0x0F71, 0x0F7E }, { 0x0F80, 0x0F84 }, { 0x0F86, 0x0F87 }, | ||
| 165 | { 0x0F8D, 0x0F97 }, { 0x0F99, 0x0FBC }, { 0x0FC6, 0x0FC6 }, | ||
| 166 | { 0x102D, 0x1030 }, { 0x1032, 0x1037 }, { 0x1039, 0x103A }, | ||
| 167 | { 0x103D, 0x103E }, { 0x1058, 0x1059 }, { 0x105E, 0x1060 }, | ||
| 168 | { 0x1071, 0x1074 }, { 0x1082, 0x1082 }, { 0x1085, 0x1086 }, | ||
| 169 | { 0x108D, 0x108D }, { 0x109D, 0x109D }, { 0x135D, 0x135F }, | ||
| 170 | { 0x1712, 0x1714 }, { 0x1732, 0x1734 }, { 0x1752, 0x1753 }, | ||
| 171 | { 0x1772, 0x1773 }, { 0x17B4, 0x17B5 }, { 0x17B7, 0x17BD }, | ||
| 172 | { 0x17C6, 0x17C6 }, { 0x17C9, 0x17D3 }, { 0x17DD, 0x17DD }, | ||
| 173 | { 0x180B, 0x180E }, { 0x1885, 0x1886 }, { 0x18A9, 0x18A9 }, | ||
| 174 | { 0x1920, 0x1922 }, { 0x1927, 0x1928 }, { 0x1932, 0x1932 }, | ||
| 175 | { 0x1939, 0x193B }, { 0x1A17, 0x1A18 }, { 0x1A1B, 0x1A1B }, | ||
| 176 | { 0x1A56, 0x1A56 }, { 0x1A58, 0x1A5E }, { 0x1A60, 0x1A60 }, | ||
| 177 | { 0x1A62, 0x1A62 }, { 0x1A65, 0x1A6C }, { 0x1A73, 0x1A7C }, | ||
| 178 | { 0x1A7F, 0x1A7F }, { 0x1AB0, 0x1ACE }, { 0x1B00, 0x1B03 }, | ||
| 179 | { 0x1B34, 0x1B34 }, { 0x1B36, 0x1B3A }, { 0x1B3C, 0x1B3C }, | ||
| 180 | { 0x1B42, 0x1B42 }, { 0x1B6B, 0x1B73 }, { 0x1B80, 0x1B82 }, | ||
| 181 | { 0x1BA1, 0x1BA1 }, { 0x1BA6, 0x1BA7 }, { 0x1BAA, 0x1BAA }, | ||
| 182 | { 0x1BAB, 0x1BAD }, { 0x1BE6, 0x1BE6 }, { 0x1BE8, 0x1BE9 }, | ||
| 183 | { 0x1BED, 0x1BED }, { 0x1BEF, 0x1BF1 }, { 0x1C2C, 0x1C33 }, | ||
| 184 | { 0x1C36, 0x1C37 }, { 0x1CD0, 0x1CD2 }, { 0x1CD4, 0x1CE8 }, | ||
| 185 | { 0x1CED, 0x1CED }, { 0x1CF4, 0x1CF4 }, { 0x1CF8, 0x1CF9 }, | ||
| 186 | { 0x1DC0, 0x1DF9 }, { 0x1DFB, 0x1DFF }, { 0x20D0, 0x20DC }, | ||
| 187 | { 0x20E1, 0x20E1 }, { 0x20E5, 0x20F0 }, { 0x2CEF, 0x2CF1 }, | ||
| 188 | { 0x2D7F, 0x2D7F }, { 0x2DE0, 0x2DFF }, { 0x302A, 0x302D }, | ||
| 189 | { 0x3099, 0x309A }, { 0xA66F, 0xA672 }, { 0xA674, 0xA67D }, | ||
| 190 | { 0xA69E, 0xA69F }, { 0xA6F0, 0xA6F1 }, { 0xA802, 0xA802 }, | ||
| 191 | { 0xA806, 0xA806 }, { 0xA80B, 0xA80B }, { 0xA825, 0xA826 }, | ||
| 192 | { 0xA82C, 0xA82C }, { 0xA8C4, 0xA8C5 }, { 0xA8E0, 0xA8F1 }, | ||
| 193 | { 0xA8FF, 0xA8FF }, { 0xA926, 0xA92D }, { 0xA947, 0xA951 }, | ||
| 194 | { 0xA980, 0xA982 }, { 0xA9B3, 0xA9B3 }, { 0xA9B6, 0xA9B9 }, | ||
| 195 | { 0xA9BC, 0xA9BD }, { 0xA9E5, 0xA9E5 }, { 0xAA29, 0xAA2E }, | ||
| 196 | { 0xAA31, 0xAA32 }, { 0xAA35, 0xAA36 }, { 0xAA43, 0xAA43 }, | ||
| 197 | { 0xAA4C, 0xAA4C }, { 0xAA7C, 0xAA7C }, { 0xAAB0, 0xAAB0 }, | ||
| 198 | { 0xAAB2, 0xAAB4 }, { 0xAAB7, 0xAAB8 }, { 0xAABE, 0xAABF }, | ||
| 199 | { 0xAAC1, 0xAAC1 }, { 0xAAEB, 0xAAEB }, { 0xAAEE, 0xAAEF }, | ||
| 200 | { 0xAAF5, 0xAAF6 }, { 0xABE3, 0xABE4 }, { 0xABE6, 0xABE7 }, | ||
| 201 | { 0xABE9, 0xABEA }, { 0xABEC, 0xABED }, { 0xFB1E, 0xFB1E }, | ||
| 202 | { 0xFE00, 0xFE0F }, { 0xFE20, 0xFE2F }, { 0x101FD, 0x101FD }, | ||
| 203 | { 0x102E0, 0x102E0 }, { 0x10376, 0x1037A }, { 0x10A01, 0x10A03 }, | ||
| 204 | { 0x10A05, 0x10A06 }, { 0x10A0C, 0x10A0F }, { 0x10A38, 0x10A3A }, | ||
| 205 | { 0x10A3F, 0x10A3F }, { 0x10AE5, 0x10AE6 }, { 0x10D24, 0x10D27 }, | ||
| 206 | { 0x10EAB, 0x10EAC }, { 0x10F46, 0x10F50 }, { 0x10F82, 0x10F85 }, | ||
| 207 | { 0x11000, 0x11002 }, { 0x11038, 0x11046 }, { 0x1107F, 0x11082 }, | ||
| 208 | { 0x110B0, 0x110BA }, { 0x11100, 0x11102 }, { 0x11127, 0x11134 }, | ||
| 209 | { 0x11145, 0x11146 }, { 0x11173, 0x11173 }, { 0x11180, 0x11182 }, | ||
| 210 | { 0x111B3, 0x111C0 }, { 0x111C9, 0x111CC }, { 0x1122C, 0x11237 }, | ||
| 211 | { 0x1123E, 0x1123E }, { 0x112DF, 0x112EA }, { 0x11300, 0x11303 }, | ||
| 212 | { 0x1133B, 0x1133C }, { 0x1133E, 0x11344 }, { 0x11347, 0x11348 }, | ||
| 213 | { 0x1134B, 0x1134D }, { 0x11357, 0x11357 }, { 0x11362, 0x11363 }, | ||
| 214 | { 0x11435, 0x11446 }, { 0x1145E, 0x1145E }, { 0x114B0, 0x114C3 }, | ||
| 215 | { 0x115AF, 0x115B5 }, { 0x115B8, 0x115C0 }, { 0x115DC, 0x115DD }, | ||
| 216 | { 0x11630, 0x11640 }, { 0x116AB, 0x116B7 }, { 0x1171D, 0x1172B }, | ||
| 217 | { 0x1182C, 0x1183A }, { 0x11930, 0x11935 }, { 0x11937, 0x11938 }, | ||
| 218 | { 0x1193B, 0x1193E }, { 0x11940, 0x11940 }, { 0x11942, 0x11942 }, | ||
| 219 | { 0x119D1, 0x119D7 }, { 0x119DA, 0x119E0 }, { 0x11A01, 0x11A0A }, | ||
| 220 | { 0x11A33, 0x11A39 }, { 0x11A3B, 0x11A3E }, { 0x11A47, 0x11A47 }, | ||
| 221 | { 0x11A51, 0x11A5B }, { 0x11A8A, 0x11A96 }, { 0x11A98, 0x11A99 }, | ||
| 222 | { 0x11C30, 0x11C36 }, { 0x11C38, 0x11C3D }, { 0x11C3F, 0x11C3F }, | ||
| 223 | { 0x11C92, 0x11CA7 }, { 0x11CAA, 0x11CB0 }, { 0x11CB2, 0x11CB3 }, | ||
| 224 | { 0x11CB5, 0x11CB6 }, { 0x11D31, 0x11D36 }, { 0x11D3A, 0x11D3A }, | ||
| 225 | { 0x11D3C, 0x11D3D }, { 0x11D3F, 0x11D45 }, { 0x11D47, 0x11D47 }, | ||
| 226 | { 0x11D90, 0x11D91 }, { 0x11D95, 0x11D95 }, { 0x11D97, 0x11D97 }, | ||
| 227 | { 0x11EF3, 0x11EF4 }, { 0x13430, 0x13438 }, { 0x16AF0, 0x16AF4 }, | ||
| 228 | { 0x16B30, 0x16B36 }, { 0x16F4F, 0x16F4F }, { 0x16F8F, 0x16F92 }, | ||
| 229 | { 0x1BC9D, 0x1BC9E }, { 0x1BCA0, 0x1BCA3 }, { 0x1D167, 0x1D169 }, | ||
| 230 | { 0x1D173, 0x1D182 }, { 0x1D185, 0x1D18B }, { 0x1D1AA, 0x1D1AD }, | ||
| 231 | { 0x1D242, 0x1D244 }, { 0x1DA00, 0x1DA36 }, { 0x1DA3B, 0x1DA6C }, | ||
| 232 | { 0x1DA75, 0x1DA75 }, { 0x1DA84, 0x1DA84 }, { 0x1DA9B, 0x1DA9F }, | ||
| 233 | { 0x1DAA1, 0x1DAAF }, { 0x1E000, 0x1E006 }, { 0x1E008, 0x1E018 }, | ||
| 234 | { 0x1E01B, 0x1E021 }, { 0x1E023, 0x1E024 }, { 0x1E026, 0x1E02A }, | ||
| 235 | { 0x1E130, 0x1E136 }, { 0x1E2AE, 0x1E2AE }, { 0x1E2EC, 0x1E2EF }, | ||
| 236 | { 0x1E4EC, 0x1E4EF }, { 0x1E8D0, 0x1E8D6 }, { 0x1E944, 0x1E94A }, | ||
| 237 | { 0x1E947, 0x1E94A }, { 0xE0100, 0xE01EF } | ||
| 238 | }; | 60 | }; |
| 239 | 61 | ||
| 240 | /* test for 8-bit control characters */ | 62 | /* test for 8-bit control characters */ |
| @@ -244,38 +66,27 @@ int mk_wcwidth(mk_wchar_t ucs) // modified: use mk_wchar_t | |||
| 244 | return -1; | 66 | return -1; |
| 245 | 67 | ||
| 246 | /* binary search in table of non-spacing characters */ | 68 | /* binary search in table of non-spacing characters */ |
| 247 | if (bisearch(ucs, combining, | 69 | if (bisearch(ucs, zero_width_ranges, |
| 248 | sizeof(combining) / sizeof(struct interval) - 1)) | 70 | sizeof(zero_width_ranges) / sizeof(struct interval) - 1)) |
| 249 | return 0; | 71 | return 0; |
| 250 | 72 | ||
| 251 | /* if we arrive here, ucs is not a combining or C0/C1 control character */ | 73 | /* binary search in table of ambiguous width characters */ |
| 74 | if (bisearch(ucs, ambiguous_width_ranges, | ||
| 75 | sizeof(ambiguous_width_ranges) / sizeof(struct interval) - 1)) | ||
| 76 | return ambiguous_width; | ||
| 252 | 77 | ||
| 253 | return 1 + | 78 | /* binary search in table of double width characters, default to 1 width */ |
| 254 | (ucs >= 0x1100 && | 79 | return 1 + (bisearch(ucs, double_width_ranges, |
| 255 | (ucs <= 0x115f || /* Hangul Jamo init. consonants */ | 80 | sizeof(double_width_ranges) / sizeof(struct interval) - 1)); |
| 256 | ucs == 0x2329 || ucs == 0x232a || | ||
| 257 | (ucs >= 0x2e80 && ucs <= 0xa4cf && | ||
| 258 | ucs != 0x303f) || /* CJK ... Yi */ | ||
| 259 | (ucs >= 0xac00 && ucs <= 0xd7a3) || /* Hangul Syllables */ | ||
| 260 | (ucs >= 0xf900 && ucs <= 0xfaff) || /* CJK Compatibility Ideographs */ | ||
| 261 | (ucs >= 0xfe10 && ucs <= 0xfe19) || /* Vertical forms */ | ||
| 262 | (ucs >= 0xfe30 && ucs <= 0xfe6f) || /* CJK Compatibility Forms */ | ||
| 263 | (ucs >= 0xff00 && ucs <= 0xff60) || /* Fullwidth Forms */ | ||
| 264 | (ucs >= 0xffe0 && ucs <= 0xffe6) || | ||
| 265 | (ucs >= 0x1f300 && ucs <= 0x1f64f) || /* modified: added Emoticons */ | ||
| 266 | (ucs >= 0x1f680 && ucs <= 0x1f6ff) || /* modified: added Transport and Map Symbols */ | ||
| 267 | (ucs >= 0x1f900 && ucs <= 0x1f9ff) || /* modified: added Supplemental Symbols and Pictographs */ | ||
| 268 | (ucs >= 0x20000 && ucs <= 0x2fffd) || | ||
| 269 | (ucs >= 0x30000 && ucs <= 0x3fffd))); | ||
| 270 | } | 81 | } |
| 271 | 82 | ||
| 272 | 83 | ||
| 273 | int mk_wcswidth(const mk_wchar_t *pwcs, size_t n) // modified: use mk_wchar_t | 84 | int mk_wcswidth(const mk_wchar_t *pwcs, size_t n, int ambiguous_width) |
| 274 | { | 85 | { |
| 275 | int w, width = 0; | 86 | int w, width = 0; |
| 276 | 87 | ||
| 277 | for (;*pwcs && n-- > 0; pwcs++) | 88 | for (;*pwcs && n-- > 0; pwcs++) |
| 278 | if ((w = mk_wcwidth(*pwcs)) < 0) | 89 | if ((w = mk_wcwidth(*pwcs, ambiguous_width)) < 0) |
| 279 | return -1; | 90 | return -1; |
| 280 | else | 91 | else |
| 281 | width += w; | 92 | width += w; |
diff --git a/src/wcwidth.h b/src/wcwidth.h index 6cb6f6d..9d345f9 100644 --- a/src/wcwidth.h +++ b/src/wcwidth.h | |||
| @@ -1,7 +1,8 @@ | |||
| 1 | // wcwidth.h | 1 | // wcwidth.h |
| 2 | 2 | ||
| 3 | // Windows does not have a wcwidth function, so we use compatibilty code from | 3 | // Windows does not have a wcwidth function, so we use compatibilty code from |
| 4 | // http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c by Markus Kuhn | 4 | // http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c by Markus Kuhn, this is |
| 5 | // however heavily modified. | ||
| 5 | 6 | ||
| 6 | #ifndef MK_WCWIDTH_H | 7 | #ifndef MK_WCWIDTH_H |
| 7 | #define MK_WCWIDTH_H | 8 | #define MK_WCWIDTH_H |
| @@ -16,7 +17,7 @@ typedef uint32_t mk_wchar_t; // Windows wchar_t can be 16-bit, we need 32-bit | |||
| 16 | typedef wchar_t mk_wchar_t; // Posix wchar_t is 32-bit so just use that | 17 | typedef wchar_t mk_wchar_t; // Posix wchar_t is 32-bit so just use that |
| 17 | #endif | 18 | #endif |
| 18 | 19 | ||
| 19 | int mk_wcwidth(mk_wchar_t ucs); | 20 | int mk_wcwidth(mk_wchar_t ucs, int ambiguous_width); |
| 20 | int mk_wcswidth(const mk_wchar_t *pwcs, size_t n); | 21 | int mk_wcswidth(const mk_wchar_t *pwcs, size_t n, int ambiguous_width); |
| 21 | 22 | ||
| 22 | #endif // MK_WCWIDTH_H | 23 | #endif // MK_WCWIDTH_H |
diff --git a/src/wcwidth_ambiguous_width.c b/src/wcwidth_ambiguous_width.c new file mode 100644 index 0000000..264258e --- /dev/null +++ b/src/wcwidth_ambiguous_width.c | |||
| @@ -0,0 +1,64 @@ | |||
| 1 | // Do not modify this file directly, it is generated by the wcwidth_update.lua script | ||
| 2 | // Contains unicode character-ranges handled as ambiguous (either 1 or 2 width) | ||
| 3 | // Generated from Unicode 17.0.0 | ||
| 4 | // Generated on 2026-01-29 | ||
| 5 | { 0x00A1, 0x00A1 }, { 0x00A4, 0x00A4 }, { 0x00A7, 0x00A8 }, | ||
| 6 | { 0x00AA, 0x00AA }, { 0x00AD, 0x00AE }, { 0x00B0, 0x00B4 }, | ||
| 7 | { 0x00B6, 0x00BA }, { 0x00BC, 0x00BF }, { 0x00C6, 0x00C6 }, | ||
| 8 | { 0x00D0, 0x00D0 }, { 0x00D7, 0x00D8 }, { 0x00DE, 0x00E1 }, | ||
| 9 | { 0x00E6, 0x00E6 }, { 0x00E8, 0x00EA }, { 0x00EC, 0x00ED }, | ||
| 10 | { 0x00F0, 0x00F0 }, { 0x00F2, 0x00F3 }, { 0x00F7, 0x00FA }, | ||
| 11 | { 0x00FC, 0x00FC }, { 0x00FE, 0x00FE }, { 0x0101, 0x0101 }, | ||
| 12 | { 0x0111, 0x0111 }, { 0x0113, 0x0113 }, { 0x011B, 0x011B }, | ||
| 13 | { 0x0126, 0x0127 }, { 0x012B, 0x012B }, { 0x0131, 0x0133 }, | ||
| 14 | { 0x0138, 0x0138 }, { 0x013F, 0x0142 }, { 0x0144, 0x0144 }, | ||
| 15 | { 0x0148, 0x014B }, { 0x014D, 0x014D }, { 0x0152, 0x0153 }, | ||
| 16 | { 0x0166, 0x0167 }, { 0x016B, 0x016B }, { 0x01CE, 0x01CE }, | ||
| 17 | { 0x01D0, 0x01D0 }, { 0x01D2, 0x01D2 }, { 0x01D4, 0x01D4 }, | ||
| 18 | { 0x01D6, 0x01D6 }, { 0x01D8, 0x01D8 }, { 0x01DA, 0x01DA }, | ||
| 19 | { 0x01DC, 0x01DC }, { 0x0251, 0x0251 }, { 0x0261, 0x0261 }, | ||
| 20 | { 0x02C4, 0x02C4 }, { 0x02C7, 0x02C7 }, { 0x02C9, 0x02CB }, | ||
| 21 | { 0x02CD, 0x02CD }, { 0x02D0, 0x02D0 }, { 0x02D8, 0x02DB }, | ||
| 22 | { 0x02DD, 0x02DD }, { 0x02DF, 0x02DF }, { 0x0300, 0x036F }, | ||
| 23 | { 0x0391, 0x03A1 }, { 0x03A3, 0x03A9 }, { 0x03B1, 0x03C1 }, | ||
| 24 | { 0x03C3, 0x03C9 }, { 0x0401, 0x0401 }, { 0x0410, 0x044F }, | ||
| 25 | { 0x0451, 0x0451 }, { 0x2010, 0x2010 }, { 0x2013, 0x2016 }, | ||
| 26 | { 0x2018, 0x2019 }, { 0x201C, 0x201D }, { 0x2020, 0x2022 }, | ||
| 27 | { 0x2024, 0x2027 }, { 0x2030, 0x2030 }, { 0x2032, 0x2033 }, | ||
| 28 | { 0x2035, 0x2035 }, { 0x203B, 0x203B }, { 0x203E, 0x203E }, | ||
| 29 | { 0x2074, 0x2074 }, { 0x207F, 0x207F }, { 0x2081, 0x2084 }, | ||
| 30 | { 0x20AC, 0x20AC }, { 0x2103, 0x2103 }, { 0x2105, 0x2105 }, | ||
| 31 | { 0x2109, 0x2109 }, { 0x2113, 0x2113 }, { 0x2116, 0x2116 }, | ||
| 32 | { 0x2121, 0x2122 }, { 0x2126, 0x2126 }, { 0x212B, 0x212B }, | ||
| 33 | { 0x2153, 0x2154 }, { 0x215B, 0x215E }, { 0x2160, 0x216B }, | ||
| 34 | { 0x2170, 0x2179 }, { 0x2189, 0x2189 }, { 0x2190, 0x2199 }, | ||
| 35 | { 0x21B8, 0x21B9 }, { 0x21D2, 0x21D2 }, { 0x21D4, 0x21D4 }, | ||
| 36 | { 0x21E7, 0x21E7 }, { 0x2200, 0x2200 }, { 0x2202, 0x2203 }, | ||
| 37 | { 0x2207, 0x2208 }, { 0x220B, 0x220B }, { 0x220F, 0x220F }, | ||
| 38 | { 0x2211, 0x2211 }, { 0x2215, 0x2215 }, { 0x221A, 0x221A }, | ||
| 39 | { 0x221D, 0x2220 }, { 0x2223, 0x2223 }, { 0x2225, 0x2225 }, | ||
| 40 | { 0x2227, 0x222C }, { 0x222E, 0x222E }, { 0x2234, 0x2237 }, | ||
| 41 | { 0x223C, 0x223D }, { 0x2248, 0x2248 }, { 0x224C, 0x224C }, | ||
| 42 | { 0x2252, 0x2252 }, { 0x2260, 0x2261 }, { 0x2264, 0x2267 }, | ||
| 43 | { 0x226A, 0x226B }, { 0x226E, 0x226F }, { 0x2282, 0x2283 }, | ||
| 44 | { 0x2286, 0x2287 }, { 0x2295, 0x2295 }, { 0x2299, 0x2299 }, | ||
| 45 | { 0x22A5, 0x22A5 }, { 0x22BF, 0x22BF }, { 0x2312, 0x2312 }, | ||
| 46 | { 0x2460, 0x24E9 }, { 0x24EB, 0x254B }, { 0x2550, 0x2573 }, | ||
| 47 | { 0x2580, 0x258F }, { 0x2592, 0x2595 }, { 0x25A0, 0x25A1 }, | ||
| 48 | { 0x25A3, 0x25A9 }, { 0x25B2, 0x25B3 }, { 0x25B6, 0x25B7 }, | ||
| 49 | { 0x25BC, 0x25BD }, { 0x25C0, 0x25C1 }, { 0x25C6, 0x25C8 }, | ||
| 50 | { 0x25CB, 0x25CB }, { 0x25CE, 0x25D1 }, { 0x25E2, 0x25E5 }, | ||
| 51 | { 0x25EF, 0x25EF }, { 0x2605, 0x2606 }, { 0x2609, 0x2609 }, | ||
| 52 | { 0x260E, 0x260F }, { 0x261C, 0x261C }, { 0x261E, 0x261E }, | ||
| 53 | { 0x2640, 0x2640 }, { 0x2642, 0x2642 }, { 0x2660, 0x2661 }, | ||
| 54 | { 0x2663, 0x2665 }, { 0x2667, 0x266A }, { 0x266C, 0x266D }, | ||
| 55 | { 0x266F, 0x266F }, { 0x269E, 0x269F }, { 0x26BF, 0x26BF }, | ||
| 56 | { 0x26C6, 0x26CD }, { 0x26CF, 0x26D3 }, { 0x26D5, 0x26E1 }, | ||
| 57 | { 0x26E3, 0x26E3 }, { 0x26E8, 0x26E9 }, { 0x26EB, 0x26F1 }, | ||
| 58 | { 0x26F4, 0x26F4 }, { 0x26F6, 0x26F9 }, { 0x26FB, 0x26FC }, | ||
| 59 | { 0x26FE, 0x26FF }, { 0x273D, 0x273D }, { 0x2776, 0x277F }, | ||
| 60 | { 0x2B56, 0x2B59 }, { 0x3248, 0x324F }, { 0xE000, 0xF8FF }, | ||
| 61 | { 0xFE00, 0xFE0F }, { 0xFFFD, 0xFFFD }, { 0x1F100, 0x1F10A }, | ||
| 62 | { 0x1F110, 0x1F12D }, { 0x1F130, 0x1F169 }, { 0x1F170, 0x1F18D }, | ||
| 63 | { 0x1F18F, 0x1F190 }, { 0x1F19B, 0x1F1AC }, { 0xE0100, 0xE01EF }, | ||
| 64 | { 0xF0000, 0xFFFFD }, { 0x100000, 0x10FFFD } | ||
diff --git a/src/wcwidth_double_width.c b/src/wcwidth_double_width.c new file mode 100644 index 0000000..a0c1b65 --- /dev/null +++ b/src/wcwidth_double_width.c | |||
| @@ -0,0 +1,45 @@ | |||
| 1 | // Do not modify this file directly, it is generated by the wcwidth_update.lua script | ||
| 2 | // Contains unicode character-ranges handled as double width | ||
| 3 | // Generated from Unicode 17.0.0 | ||
| 4 | // Generated on 2026-01-29 | ||
| 5 | { 0x1100, 0x115F }, { 0x231A, 0x231B }, { 0x2329, 0x232A }, | ||
| 6 | { 0x23E9, 0x23EC }, { 0x23F0, 0x23F0 }, { 0x23F3, 0x23F3 }, | ||
| 7 | { 0x25FD, 0x25FE }, { 0x2614, 0x2615 }, { 0x2630, 0x2637 }, | ||
| 8 | { 0x2648, 0x2653 }, { 0x267F, 0x267F }, { 0x268A, 0x268F }, | ||
| 9 | { 0x2693, 0x2693 }, { 0x26A1, 0x26A1 }, { 0x26AA, 0x26AB }, | ||
| 10 | { 0x26BD, 0x26BE }, { 0x26C4, 0x26C5 }, { 0x26CE, 0x26CE }, | ||
| 11 | { 0x26D4, 0x26D4 }, { 0x26EA, 0x26EA }, { 0x26F2, 0x26F3 }, | ||
| 12 | { 0x26F5, 0x26F5 }, { 0x26FA, 0x26FA }, { 0x26FD, 0x26FD }, | ||
| 13 | { 0x2705, 0x2705 }, { 0x270A, 0x270B }, { 0x2728, 0x2728 }, | ||
| 14 | { 0x274C, 0x274C }, { 0x274E, 0x274E }, { 0x2753, 0x2755 }, | ||
| 15 | { 0x2757, 0x2757 }, { 0x2795, 0x2797 }, { 0x27B0, 0x27B0 }, | ||
| 16 | { 0x27BF, 0x27BF }, { 0x2B1B, 0x2B1C }, { 0x2B50, 0x2B50 }, | ||
| 17 | { 0x2B55, 0x2B55 }, { 0x2E80, 0x2E99 }, { 0x2E9B, 0x2EF3 }, | ||
| 18 | { 0x2F00, 0x2FD5 }, { 0x2FF0, 0x303E }, { 0x3041, 0x3096 }, | ||
| 19 | { 0x3099, 0x30FF }, { 0x3105, 0x312F }, { 0x3131, 0x318E }, | ||
| 20 | { 0x3190, 0x31E5 }, { 0x31EF, 0x321E }, { 0x3220, 0x3247 }, | ||
| 21 | { 0x3250, 0xA48C }, { 0xA490, 0xA4C6 }, { 0xA960, 0xA97C }, | ||
| 22 | { 0xAC00, 0xD7A3 }, { 0xF900, 0xFAFF }, { 0xFE10, 0xFE19 }, | ||
| 23 | { 0xFE30, 0xFE52 }, { 0xFE54, 0xFE66 }, { 0xFE68, 0xFE6B }, | ||
| 24 | { 0xFF01, 0xFF60 }, { 0xFFE0, 0xFFE6 }, { 0x16FE0, 0x16FE4 }, | ||
| 25 | { 0x16FF0, 0x16FF6 }, { 0x17000, 0x18CD5 }, { 0x18CFF, 0x18D1E }, | ||
| 26 | { 0x18D80, 0x18DF2 }, { 0x1AFF0, 0x1AFF3 }, { 0x1AFF5, 0x1AFFB }, | ||
| 27 | { 0x1AFFD, 0x1AFFE }, { 0x1B000, 0x1B122 }, { 0x1B132, 0x1B132 }, | ||
| 28 | { 0x1B150, 0x1B152 }, { 0x1B155, 0x1B155 }, { 0x1B164, 0x1B167 }, | ||
| 29 | { 0x1B170, 0x1B2FB }, { 0x1D300, 0x1D356 }, { 0x1D360, 0x1D376 }, | ||
| 30 | { 0x1F004, 0x1F004 }, { 0x1F0CF, 0x1F0CF }, { 0x1F18E, 0x1F18E }, | ||
| 31 | { 0x1F191, 0x1F19A }, { 0x1F1E6, 0x1F202 }, { 0x1F210, 0x1F23B }, | ||
| 32 | { 0x1F240, 0x1F248 }, { 0x1F250, 0x1F251 }, { 0x1F260, 0x1F265 }, | ||
| 33 | { 0x1F300, 0x1F320 }, { 0x1F32D, 0x1F335 }, { 0x1F337, 0x1F37C }, | ||
| 34 | { 0x1F37E, 0x1F393 }, { 0x1F3A0, 0x1F3CA }, { 0x1F3CF, 0x1F3D3 }, | ||
| 35 | { 0x1F3E0, 0x1F3F0 }, { 0x1F3F4, 0x1F3F4 }, { 0x1F3F8, 0x1F43E }, | ||
| 36 | { 0x1F440, 0x1F440 }, { 0x1F442, 0x1F4FC }, { 0x1F4FF, 0x1F53D }, | ||
| 37 | { 0x1F54B, 0x1F54E }, { 0x1F550, 0x1F567 }, { 0x1F57A, 0x1F57A }, | ||
| 38 | { 0x1F595, 0x1F596 }, { 0x1F5A4, 0x1F5A4 }, { 0x1F5FB, 0x1F64F }, | ||
| 39 | { 0x1F680, 0x1F6C5 }, { 0x1F6CC, 0x1F6CC }, { 0x1F6D0, 0x1F6D2 }, | ||
| 40 | { 0x1F6D5, 0x1F6D8 }, { 0x1F6DC, 0x1F6DF }, { 0x1F6EB, 0x1F6EC }, | ||
| 41 | { 0x1F6F4, 0x1F6FC }, { 0x1F7E0, 0x1F7EB }, { 0x1F7F0, 0x1F7F0 }, | ||
| 42 | { 0x1F90C, 0x1F93A }, { 0x1F93C, 0x1F945 }, { 0x1F947, 0x1F9FF }, | ||
| 43 | { 0x1FA70, 0x1FA7C }, { 0x1FA80, 0x1FA8A }, { 0x1FA8E, 0x1FAC6 }, | ||
| 44 | { 0x1FAC8, 0x1FAC8 }, { 0x1FACD, 0x1FADC }, { 0x1FADF, 0x1FAEA }, | ||
| 45 | { 0x1FAEF, 0x1FAF8 }, { 0x20000, 0x2FFFD }, { 0x30000, 0x3FFFD } | ||
diff --git a/src/wcwidth_update.lua b/src/wcwidth_update.lua new file mode 100755 index 0000000..37f18c3 --- /dev/null +++ b/src/wcwidth_update.lua | |||
| @@ -0,0 +1,404 @@ | |||
| 1 | #!/usr/bin/env lua | ||
| 2 | |||
| 3 | -- This file downloads and parses unicode standard files and updates the wcwidth code | ||
| 4 | -- based on that data. | ||
| 5 | |||
| 6 | local VERSION="17.0.0" -- the unicode standard version to download | ||
| 7 | |||
| 8 | |||
| 9 | |||
| 10 | -- test if curl is available, and Penlight | ||
| 11 | do | ||
| 12 | local ok, ec = os.execute("curl --version > /dev/null 2>&1") | ||
| 13 | if not ok then | ||
| 14 | error("curl is not available in the path; exitcode " .. ec) | ||
| 15 | end | ||
| 16 | |||
| 17 | local ok, utils = pcall(require, "pl.utils") | ||
| 18 | if not ok then | ||
| 19 | error("Penlight is not available, please install via `luarocks install penlight`") | ||
| 20 | end | ||
| 21 | |||
| 22 | utils.readfile("./wcwidth.c") | ||
| 23 | if not ok then | ||
| 24 | error("failed to read './wcwidth.c', run this script from within the `./src/` directory") | ||
| 25 | end | ||
| 26 | end | ||
| 27 | |||
| 28 | -- files to download from the unicode site | ||
| 29 | local FN_DERIVED_GENERAL_CATEGORY = 1 | ||
| 30 | local FN_EAST_ASIAN_WIDTH = 2 | ||
| 31 | local FN_DERIVED_CORE_PROPERTIES = 3 | ||
| 32 | local FN_EMOJI_DATA = 4 | ||
| 33 | |||
| 34 | local download_file_list = { | ||
| 35 | [FN_DERIVED_GENERAL_CATEGORY] = "extracted/DerivedGeneralCategory.txt", | ||
| 36 | [FN_EAST_ASIAN_WIDTH] = "EastAsianWidth.txt", | ||
| 37 | [FN_DERIVED_CORE_PROPERTIES] = "DerivedCoreProperties.txt", | ||
| 38 | [FN_EMOJI_DATA] = "emoji/emoji-data.txt", | ||
| 39 | } | ||
| 40 | local target_path = "./unicode_data/" | ||
| 41 | |||
| 42 | |||
| 43 | |||
| 44 | do | ||
| 45 | local base_url = "https://www.unicode.org/Public/" .. VERSION .. "/ucd/" -- must include trailing slash | ||
| 46 | |||
| 47 | |||
| 48 | -- removes a file, and then downloads a new copy from the unicode site | ||
| 49 | local function download_file(filename, target_filename) | ||
| 50 | print("Downloading " .. filename .. " to " .. target_filename) | ||
| 51 | os.remove(target_filename) | ||
| 52 | local cmd = "curl --fail -s -o " .. target_filename .. " " .. base_url .. filename | ||
| 53 | local ok, ec = os.execute(cmd) | ||
| 54 | if not ok then | ||
| 55 | error("Failed to execute: " .. cmd .. "; exitcode " .. ec) | ||
| 56 | end | ||
| 57 | end | ||
| 58 | |||
| 59 | |||
| 60 | -- Downloads all unicode files we need | ||
| 61 | local function download_files() | ||
| 62 | os.execute("mkdir -p " .. target_path .. "extracted") | ||
| 63 | os.execute("mkdir -p " .. target_path .. "emoji") | ||
| 64 | for _, filename in ipairs(download_file_list) do | ||
| 65 | download_file(filename, target_path .. filename) | ||
| 66 | end | ||
| 67 | end | ||
| 68 | |||
| 69 | |||
| 70 | download_files() | ||
| 71 | end | ||
| 72 | |||
| 73 | |||
| 74 | |||
| 75 | -- set up the 3 lists of data (everything else is single-width) | ||
| 76 | local zero_width = {} | ||
| 77 | local double_width = {} | ||
| 78 | local ambiguous_width = {} | ||
| 79 | |||
| 80 | |||
| 81 | |||
| 82 | local readlines do | ||
| 83 | local utils = require("pl.utils") | ||
| 84 | |||
| 85 | function readlines(filename) | ||
| 86 | print("Parsing " .. filename) | ||
| 87 | local lines = assert(utils.readlines(filename)) | ||
| 88 | |||
| 89 | -- drop lines starting with "#" being comments, or empty lines (whitespace only) | ||
| 90 | for i = #lines, 1, -1 do -- reverse, since we're deleting items | ||
| 91 | if lines[i]:match("^%s*#") or lines[i]:match("^%s*$") then | ||
| 92 | table.remove(lines, i) | ||
| 93 | end | ||
| 94 | end | ||
| 95 | |||
| 96 | return lines | ||
| 97 | end | ||
| 98 | end | ||
| 99 | |||
| 100 | |||
| 101 | |||
| 102 | |||
| 103 | -- parse DerivedGeneralCategory.txt | ||
| 104 | -- Purpose: zero-width combining marks | ||
| 105 | -- Extract: | ||
| 106 | -- Mn — Nonspacing Mark → width = 0 | ||
| 107 | -- Me — Enclosing Mark → width = 0 | ||
| 108 | -- Why: | ||
| 109 | -- These characters overlay the previous glyph | ||
| 110 | -- This replaces Markus Kuhn’s combining[] table | ||
| 111 | -- Ignore all other categories in this file. | ||
| 112 | do | ||
| 113 | local lines = readlines(target_path .. download_file_list[FN_DERIVED_GENERAL_CATEGORY]) | ||
| 114 | local zw_start = #zero_width | ||
| 115 | |||
| 116 | -- parse the lines | ||
| 117 | for _, line in ipairs(lines) do | ||
| 118 | local range, category = line:match("^([%x%.]+)%s*;%s*(%a+)") | ||
| 119 | if not range then | ||
| 120 | error("Failed to parse line: " .. line) | ||
| 121 | end | ||
| 122 | |||
| 123 | if not range:find("..", 1, true) then -- single code point, make range | ||
| 124 | range = range .. ".." .. range | ||
| 125 | end | ||
| 126 | |||
| 127 | if category == "Mn" or category == "Me" then | ||
| 128 | zero_width[#zero_width + 1] = range | ||
| 129 | end | ||
| 130 | end | ||
| 131 | |||
| 132 | print(" found " .. (#zero_width - zw_start) .. " zero-width character-ranges") | ||
| 133 | end | ||
| 134 | |||
| 135 | |||
| 136 | |||
| 137 | -- parse DerivedCoreProperties.txt | ||
| 138 | -- Purpose: zero-width format / ignorable characters | ||
| 139 | -- Extract: | ||
| 140 | -- Default_Ignorable_Code_Point → width = 0 | ||
| 141 | |||
| 142 | -- Includes (important examples): | ||
| 143 | -- U+200D ZERO WIDTH JOINER | ||
| 144 | -- U+200C ZERO WIDTH NON-JOINER | ||
| 145 | -- U+FE00..U+FE0F (variation selectors) | ||
| 146 | -- Bidi and other format controls | ||
| 147 | |||
| 148 | -- Why: | ||
| 149 | -- Not Mn/Me, but terminals treat them as zero-width | ||
| 150 | -- Required for emoji correctness and modern text | ||
| 151 | do | ||
| 152 | local lines = readlines(target_path .. download_file_list[FN_DERIVED_CORE_PROPERTIES]) | ||
| 153 | local zw_start = #zero_width | ||
| 154 | |||
| 155 | -- parse the lines | ||
| 156 | for _, line in ipairs(lines) do | ||
| 157 | local range, category = line:match("^([%x%.]+)%s*;%s*([%a_]+)") | ||
| 158 | if not range then | ||
| 159 | error("Failed to parse line: " .. line) | ||
| 160 | end | ||
| 161 | |||
| 162 | if not range:find("..", 1, true) then -- single code point, make range | ||
| 163 | range = range .. ".." .. range | ||
| 164 | end | ||
| 165 | |||
| 166 | if category == "Default_Ignorable_Code_Point" then | ||
| 167 | zero_width[#zero_width + 1] = range | ||
| 168 | end | ||
| 169 | end | ||
| 170 | |||
| 171 | print(" found " .. (#zero_width - zw_start) .. " zero-width character-ranges") | ||
| 172 | end | ||
| 173 | |||
| 174 | |||
| 175 | |||
| 176 | -- parse EastAsianWidth.txt | ||
| 177 | -- Purpose: determine double-width and ambiguous-width characters | ||
| 178 | -- Extract: | ||
| 179 | -- W (Wide) → width = 2 | ||
| 180 | -- F (Fullwidth) → width = 2 | ||
| 181 | -- A (Ambiguous) → width = 1 or 2 (your choice; usually 1 unless CJK mode) | ||
| 182 | -- Everything else: | ||
| 183 | -- H, Na, N → width = 1 | ||
| 184 | -- Why: | ||
| 185 | -- - This is the only Unicode-sanctioned width-related property | ||
| 186 | -- - Core of all wcwidth() implementations | ||
| 187 | do | ||
| 188 | local lines = readlines(target_path .. download_file_list[FN_EAST_ASIAN_WIDTH]) | ||
| 189 | local dw_start = #double_width | ||
| 190 | local aw_start = #ambiguous_width | ||
| 191 | |||
| 192 | -- parse the lines | ||
| 193 | for _, line in ipairs(lines) do | ||
| 194 | local range, width_type = line:match("^([%x%.]+)%s*;%s*(%a+)") | ||
| 195 | if not range then | ||
| 196 | error("Failed to parse line: " .. line) | ||
| 197 | end | ||
| 198 | |||
| 199 | if not range:find("..", 1, true) then -- single code point, make range | ||
| 200 | range = range .. ".." .. range | ||
| 201 | end | ||
| 202 | |||
| 203 | if width_type == "W" or width_type == "F" then | ||
| 204 | double_width[#double_width + 1] = range | ||
| 205 | elseif width_type == "A" then | ||
| 206 | ambiguous_width[#ambiguous_width + 1] = range | ||
| 207 | end | ||
| 208 | end | ||
| 209 | |||
| 210 | print(" found " .. (#double_width - dw_start) .. " double-width character-ranges") | ||
| 211 | print(" found " .. (#ambiguous_width - aw_start) .. " ambiguous-width character-ranges") | ||
| 212 | end | ||
| 213 | |||
| 214 | |||
| 215 | |||
| 216 | -- parse emoji-data.txt | ||
| 217 | -- Purpose: emoji presentation width | ||
| 218 | -- Extract: | ||
| 219 | -- Emoji_Presentation=Yes → width = 2 | ||
| 220 | -- (Optionally) Extended_Pictographic → emoji sequences | ||
| 221 | -- Why: | ||
| 222 | -- Emoji are not reliably covered by EastAsianWidth | ||
| 223 | -- Modern terminals render these as double-width | ||
| 224 | -- Required for correct emoji column alignment | ||
| 225 | do | ||
| 226 | local lines = readlines(target_path .. download_file_list[FN_EMOJI_DATA]) | ||
| 227 | local dw_start = #double_width | ||
| 228 | |||
| 229 | -- parse the lines | ||
| 230 | for _, line in ipairs(lines) do | ||
| 231 | local range, properties = line:match("^([%x%.]+)%s*;%s*([%a_]+)") | ||
| 232 | if not range then | ||
| 233 | error("Failed to parse line: " .. line) | ||
| 234 | end | ||
| 235 | |||
| 236 | if not range:find("..", 1, true) then -- single code point, make range | ||
| 237 | range = range .. ".." .. range | ||
| 238 | end | ||
| 239 | |||
| 240 | if properties:match("Emoji_Presentation") then | ||
| 241 | double_width[#double_width + 1] = range | ||
| 242 | end | ||
| 243 | end | ||
| 244 | |||
| 245 | print(" found " .. (#double_width - dw_start) .. " double-width character-ranges") | ||
| 246 | end | ||
| 247 | |||
| 248 | |||
| 249 | |||
| 250 | -- returns the start and end of a range, numerically, and hex strings | ||
| 251 | -- @tparam string range the range to parse | ||
| 252 | -- @treturn number sr the start of the range | ||
| 253 | -- @treturn number er the end of the range | ||
| 254 | -- @treturn string sh the start of the range as a hex string | ||
| 255 | -- @treturn string eh the end of the range as a hex string | ||
| 256 | local parse_range do | ||
| 257 | function parse_range(range) | ||
| 258 | local s = range:find("..", 1, true) | ||
| 259 | if not s then | ||
| 260 | error("Failed to parse range: " .. range) | ||
| 261 | end | ||
| 262 | local sh = range:sub(1, s - 1) | ||
| 263 | local eh = range:sub(s + 2, -1) | ||
| 264 | local sr = tonumber(sh, 16) | ||
| 265 | local er = tonumber(eh, 16) | ||
| 266 | if er < sr then | ||
| 267 | error("Failed to parse range: " .. range .. " (end < start)") | ||
| 268 | end | ||
| 269 | return sr, er, sh, eh | ||
| 270 | end | ||
| 271 | |||
| 272 | -- some inline tests for parse_range | ||
| 273 | local sr, er = parse_range("25FD..25FE") | ||
| 274 | assert(sr == 9725) | ||
| 275 | assert(er == 9726) | ||
| 276 | local sr, er = parse_range("105C0..105F3") | ||
| 277 | assert(sr == 67008) | ||
| 278 | assert(er == 67059) | ||
| 279 | end | ||
| 280 | |||
| 281 | |||
| 282 | |||
| 283 | -- sorts the ranges in-place | ||
| 284 | local function sort_ranges(ranges) | ||
| 285 | table.sort(ranges, function(a, b) | ||
| 286 | return parse_range(a) < parse_range(b) | ||
| 287 | end) | ||
| 288 | return ranges | ||
| 289 | end | ||
| 290 | |||
| 291 | |||
| 292 | |||
| 293 | -- combines adjacent ranges in-place | ||
| 294 | local combine_ranges do | ||
| 295 | function combine_ranges(ranges) | ||
| 296 | local last_idx = 1 | ||
| 297 | for i = 2, #ranges do | ||
| 298 | local last_s, last_e, last_sh, last_eh = parse_range(ranges[last_idx]) | ||
| 299 | local current_s, current_e, _, current_eh = parse_range(ranges[i]) | ||
| 300 | if current_s >= last_s and current_s <= (last_e + 1) then | ||
| 301 | -- ranges are adjacent or overlapping, combine them | ||
| 302 | local sh = last_sh | ||
| 303 | local eh = current_eh | ||
| 304 | if last_e > current_e then | ||
| 305 | eh = last_eh | ||
| 306 | end | ||
| 307 | ranges[last_idx] = sh .. ".." .. eh | ||
| 308 | else | ||
| 309 | last_idx = last_idx + 1 | ||
| 310 | ranges[last_idx] = ranges[i] | ||
| 311 | end | ||
| 312 | end | ||
| 313 | -- clear left-overs beyond last entry | ||
| 314 | for i = last_idx + 1, #ranges do | ||
| 315 | ranges[i] = nil | ||
| 316 | end | ||
| 317 | end | ||
| 318 | |||
| 319 | -- some inline tests for combine_ranges | ||
| 320 | local ranges = { | ||
| 321 | "25FD..25FE", | ||
| 322 | "25FD..25FE", -- duplicate range, should be removed | ||
| 323 | "105C0..105F3", | ||
| 324 | "105D0..105E0", -- range fully within previous range, should be combined | ||
| 325 | "10F00..10F10", | ||
| 326 | "10F11..10F20", -- adjacent or previous, should be combined | ||
| 327 | "11000..11100", | ||
| 328 | "11101..11110", -- adjacent + extending to previous, should be combined | ||
| 329 | "12000..12010", | ||
| 330 | "12011..12020", -- multiple: adjacent should be combined | ||
| 331 | "12015..12030", -- multiple: overlap + extending to previous, should be combined | ||
| 332 | "12031..12040", -- multiple: overlapping, should be combined | ||
| 333 | } | ||
| 334 | combine_ranges(ranges) | ||
| 335 | assert(#ranges == 5) | ||
| 336 | assert(ranges[1] == "25FD..25FE") | ||
| 337 | assert(ranges[2] == "105C0..105F3") | ||
| 338 | assert(ranges[3] == "10F00..10F20") | ||
| 339 | assert(ranges[4] == "11000..11110") | ||
| 340 | assert(ranges[5] == "12000..12040") | ||
| 341 | end | ||
| 342 | |||
| 343 | |||
| 344 | |||
| 345 | combine_ranges(sort_ranges(zero_width)) | ||
| 346 | combine_ranges(sort_ranges(double_width)) | ||
| 347 | combine_ranges(sort_ranges(ambiguous_width)) | ||
| 348 | |||
| 349 | |||
| 350 | |||
| 351 | -- convert ranges into c-source-code ranges (in-place) | ||
| 352 | -- format: "{ 0x0829, 0x082D }" | ||
| 353 | local function convert_c_ranges(ranges) | ||
| 354 | for i = 1, #ranges do | ||
| 355 | local _, _, sh, eh = parse_range(ranges[i]) | ||
| 356 | ranges[i] = "{ 0x" .. sh .. ", 0x" .. eh .. " }" | ||
| 357 | end | ||
| 358 | end | ||
| 359 | |||
| 360 | convert_c_ranges(zero_width) | ||
| 361 | convert_c_ranges(double_width) | ||
| 362 | convert_c_ranges(ambiguous_width) | ||
| 363 | |||
| 364 | |||
| 365 | |||
| 366 | local SOURCE_INDENT = " " | ||
| 367 | |||
| 368 | |||
| 369 | -- write c source, as triplet; 3 ranges on 1 line | ||
| 370 | local function triplet_lines(ranges) | ||
| 371 | local lines = {} | ||
| 372 | for i = 1, #ranges, 3 do | ||
| 373 | lines[#lines+1] = SOURCE_INDENT .. table.concat(ranges, ", ", i, math.min(i + 2, #ranges)) .. "," | ||
| 374 | end | ||
| 375 | -- drop trailing comma from last line | ||
| 376 | lines[#lines] = lines[#lines]:sub(1, -2) | ||
| 377 | return lines | ||
| 378 | end | ||
| 379 | |||
| 380 | |||
| 381 | -- create file-contents | ||
| 382 | local function create_file_contents(ranges, contains) | ||
| 383 | return | ||
| 384 | SOURCE_INDENT .. "// Do not modify this file directly, it is generated by the wcwidth_update.lua script\n" .. | ||
| 385 | SOURCE_INDENT .. "// Contains " .. contains .. "\n" .. | ||
| 386 | SOURCE_INDENT .. "// Generated from Unicode " .. VERSION .. "\n" .. | ||
| 387 | SOURCE_INDENT .. "// Generated on " .. os.date("%Y-%m-%d") .. "\n" .. | ||
| 388 | table.concat(triplet_lines(ranges), "\n") .. "\n" | ||
| 389 | end | ||
| 390 | |||
| 391 | |||
| 392 | |||
| 393 | |||
| 394 | local writefile = require("pl.utils").writefile | ||
| 395 | |||
| 396 | print("writing source files...") | ||
| 397 | print(" zero-width: ./wcwidth_zero_width.c") | ||
| 398 | assert(writefile("./wcwidth_zero_width.c", create_file_contents(zero_width, "unicode character-ranges handled as 0 width"))) | ||
| 399 | |||
| 400 | print(" double-width: ./wcwidth_double_width.c") | ||
| 401 | assert(writefile("./wcwidth_double_width.c", create_file_contents(double_width, "unicode character-ranges handled as double width"))) | ||
| 402 | |||
| 403 | print(" ambiguous-width: ./wcwidth_ambiguous_width.c") | ||
| 404 | assert(writefile("./wcwidth_ambiguous_width.c", create_file_contents(ambiguous_width, "unicode character-ranges handled as ambiguous (either 1 or 2 width)"))) | ||
diff --git a/src/wcwidth_zero_width.c b/src/wcwidth_zero_width.c new file mode 100644 index 0000000..579ca5f --- /dev/null +++ b/src/wcwidth_zero_width.c | |||
| @@ -0,0 +1,128 @@ | |||
| 1 | // Do not modify this file directly, it is generated by the wcwidth_update.lua script | ||
| 2 | // Contains unicode character-ranges handled as 0 width | ||
| 3 | // Generated from Unicode 17.0.0 | ||
| 4 | // Generated on 2026-01-29 | ||
| 5 | { 0x00AD, 0x00AD }, { 0x0300, 0x036F }, { 0x0483, 0x0489 }, | ||
| 6 | { 0x0591, 0x05BD }, { 0x05BF, 0x05BF }, { 0x05C1, 0x05C2 }, | ||
| 7 | { 0x05C4, 0x05C5 }, { 0x05C7, 0x05C7 }, { 0x0610, 0x061A }, | ||
| 8 | { 0x061C, 0x061C }, { 0x064B, 0x065F }, { 0x0670, 0x0670 }, | ||
| 9 | { 0x06D6, 0x06DC }, { 0x06DF, 0x06E4 }, { 0x06E7, 0x06E8 }, | ||
| 10 | { 0x06EA, 0x06ED }, { 0x0711, 0x0711 }, { 0x0730, 0x074A }, | ||
| 11 | { 0x07A6, 0x07B0 }, { 0x07EB, 0x07F3 }, { 0x07FD, 0x07FD }, | ||
| 12 | { 0x0816, 0x0819 }, { 0x081B, 0x0823 }, { 0x0825, 0x0827 }, | ||
| 13 | { 0x0829, 0x082D }, { 0x0859, 0x085B }, { 0x0897, 0x089F }, | ||
| 14 | { 0x08CA, 0x08E1 }, { 0x08E3, 0x0902 }, { 0x093A, 0x093A }, | ||
| 15 | { 0x093C, 0x093C }, { 0x0941, 0x0948 }, { 0x094D, 0x094D }, | ||
| 16 | { 0x0951, 0x0957 }, { 0x0962, 0x0963 }, { 0x0981, 0x0981 }, | ||
| 17 | { 0x09BC, 0x09BC }, { 0x09C1, 0x09C4 }, { 0x09CD, 0x09CD }, | ||
| 18 | { 0x09E2, 0x09E3 }, { 0x09FE, 0x09FE }, { 0x0A01, 0x0A02 }, | ||
| 19 | { 0x0A3C, 0x0A3C }, { 0x0A41, 0x0A42 }, { 0x0A47, 0x0A48 }, | ||
| 20 | { 0x0A4B, 0x0A4D }, { 0x0A51, 0x0A51 }, { 0x0A70, 0x0A71 }, | ||
| 21 | { 0x0A75, 0x0A75 }, { 0x0A81, 0x0A82 }, { 0x0ABC, 0x0ABC }, | ||
| 22 | { 0x0AC1, 0x0AC5 }, { 0x0AC7, 0x0AC8 }, { 0x0ACD, 0x0ACD }, | ||
| 23 | { 0x0AE2, 0x0AE3 }, { 0x0AFA, 0x0AFF }, { 0x0B01, 0x0B01 }, | ||
| 24 | { 0x0B3C, 0x0B3C }, { 0x0B3F, 0x0B3F }, { 0x0B41, 0x0B44 }, | ||
| 25 | { 0x0B4D, 0x0B4D }, { 0x0B55, 0x0B56 }, { 0x0B62, 0x0B63 }, | ||
| 26 | { 0x0B82, 0x0B82 }, { 0x0BC0, 0x0BC0 }, { 0x0BCD, 0x0BCD }, | ||
| 27 | { 0x0C00, 0x0C00 }, { 0x0C04, 0x0C04 }, { 0x0C3C, 0x0C3C }, | ||
| 28 | { 0x0C3E, 0x0C40 }, { 0x0C46, 0x0C48 }, { 0x0C4A, 0x0C4D }, | ||
| 29 | { 0x0C55, 0x0C56 }, { 0x0C62, 0x0C63 }, { 0x0C81, 0x0C81 }, | ||
| 30 | { 0x0CBC, 0x0CBC }, { 0x0CBF, 0x0CBF }, { 0x0CC6, 0x0CC6 }, | ||
| 31 | { 0x0CCC, 0x0CCD }, { 0x0CE2, 0x0CE3 }, { 0x0D00, 0x0D01 }, | ||
| 32 | { 0x0D3B, 0x0D3C }, { 0x0D41, 0x0D44 }, { 0x0D4D, 0x0D4D }, | ||
| 33 | { 0x0D62, 0x0D63 }, { 0x0D81, 0x0D81 }, { 0x0DCA, 0x0DCA }, | ||
| 34 | { 0x0DD2, 0x0DD4 }, { 0x0DD6, 0x0DD6 }, { 0x0E31, 0x0E31 }, | ||
| 35 | { 0x0E34, 0x0E3A }, { 0x0E47, 0x0E4E }, { 0x0EB1, 0x0EB1 }, | ||
| 36 | { 0x0EB4, 0x0EBC }, { 0x0EC8, 0x0ECE }, { 0x0F18, 0x0F19 }, | ||
| 37 | { 0x0F35, 0x0F35 }, { 0x0F37, 0x0F37 }, { 0x0F39, 0x0F39 }, | ||
| 38 | { 0x0F71, 0x0F7E }, { 0x0F80, 0x0F84 }, { 0x0F86, 0x0F87 }, | ||
| 39 | { 0x0F8D, 0x0F97 }, { 0x0F99, 0x0FBC }, { 0x0FC6, 0x0FC6 }, | ||
| 40 | { 0x102D, 0x1030 }, { 0x1032, 0x1037 }, { 0x1039, 0x103A }, | ||
| 41 | { 0x103D, 0x103E }, { 0x1058, 0x1059 }, { 0x105E, 0x1060 }, | ||
| 42 | { 0x1071, 0x1074 }, { 0x1082, 0x1082 }, { 0x1085, 0x1086 }, | ||
| 43 | { 0x108D, 0x108D }, { 0x109D, 0x109D }, { 0x115F, 0x1160 }, | ||
| 44 | { 0x135D, 0x135F }, { 0x1712, 0x1714 }, { 0x1732, 0x1733 }, | ||
| 45 | { 0x1752, 0x1753 }, { 0x1772, 0x1773 }, { 0x17B4, 0x17B5 }, | ||
| 46 | { 0x17B7, 0x17BD }, { 0x17C6, 0x17C6 }, { 0x17C9, 0x17D3 }, | ||
| 47 | { 0x17DD, 0x17DD }, { 0x180B, 0x180F }, { 0x1885, 0x1886 }, | ||
| 48 | { 0x18A9, 0x18A9 }, { 0x1920, 0x1922 }, { 0x1927, 0x1928 }, | ||
| 49 | { 0x1932, 0x1932 }, { 0x1939, 0x193B }, { 0x1A17, 0x1A18 }, | ||
| 50 | { 0x1A1B, 0x1A1B }, { 0x1A56, 0x1A56 }, { 0x1A58, 0x1A5E }, | ||
| 51 | { 0x1A60, 0x1A60 }, { 0x1A62, 0x1A62 }, { 0x1A65, 0x1A6C }, | ||
| 52 | { 0x1A73, 0x1A7C }, { 0x1A7F, 0x1A7F }, { 0x1AB0, 0x1ADD }, | ||
| 53 | { 0x1AE0, 0x1AEB }, { 0x1B00, 0x1B03 }, { 0x1B34, 0x1B34 }, | ||
| 54 | { 0x1B36, 0x1B3A }, { 0x1B3C, 0x1B3C }, { 0x1B42, 0x1B42 }, | ||
| 55 | { 0x1B6B, 0x1B73 }, { 0x1B80, 0x1B81 }, { 0x1BA2, 0x1BA5 }, | ||
| 56 | { 0x1BA8, 0x1BA9 }, { 0x1BAB, 0x1BAD }, { 0x1BE6, 0x1BE6 }, | ||
| 57 | { 0x1BE8, 0x1BE9 }, { 0x1BED, 0x1BED }, { 0x1BEF, 0x1BF1 }, | ||
| 58 | { 0x1C2C, 0x1C33 }, { 0x1C36, 0x1C37 }, { 0x1CD0, 0x1CD2 }, | ||
| 59 | { 0x1CD4, 0x1CE0 }, { 0x1CE2, 0x1CE8 }, { 0x1CED, 0x1CED }, | ||
| 60 | { 0x1CF4, 0x1CF4 }, { 0x1CF8, 0x1CF9 }, { 0x1DC0, 0x1DFF }, | ||
| 61 | { 0x200B, 0x200F }, { 0x202A, 0x202E }, { 0x2060, 0x206F }, | ||
| 62 | { 0x20D0, 0x20F0 }, { 0x2CEF, 0x2CF1 }, { 0x2D7F, 0x2D7F }, | ||
| 63 | { 0x2DE0, 0x2DFF }, { 0x302A, 0x302D }, { 0x3099, 0x309A }, | ||
| 64 | { 0x3164, 0x3164 }, { 0xA66F, 0xA672 }, { 0xA674, 0xA67D }, | ||
| 65 | { 0xA69E, 0xA69F }, { 0xA6F0, 0xA6F1 }, { 0xA802, 0xA802 }, | ||
| 66 | { 0xA806, 0xA806 }, { 0xA80B, 0xA80B }, { 0xA825, 0xA826 }, | ||
| 67 | { 0xA82C, 0xA82C }, { 0xA8C4, 0xA8C5 }, { 0xA8E0, 0xA8F1 }, | ||
| 68 | { 0xA8FF, 0xA8FF }, { 0xA926, 0xA92D }, { 0xA947, 0xA951 }, | ||
| 69 | { 0xA980, 0xA982 }, { 0xA9B3, 0xA9B3 }, { 0xA9B6, 0xA9B9 }, | ||
| 70 | { 0xA9BC, 0xA9BD }, { 0xA9E5, 0xA9E5 }, { 0xAA29, 0xAA2E }, | ||
| 71 | { 0xAA31, 0xAA32 }, { 0xAA35, 0xAA36 }, { 0xAA43, 0xAA43 }, | ||
| 72 | { 0xAA4C, 0xAA4C }, { 0xAA7C, 0xAA7C }, { 0xAAB0, 0xAAB0 }, | ||
| 73 | { 0xAAB2, 0xAAB4 }, { 0xAAB7, 0xAAB8 }, { 0xAABE, 0xAABF }, | ||
| 74 | { 0xAAC1, 0xAAC1 }, { 0xAAEC, 0xAAED }, { 0xAAF6, 0xAAF6 }, | ||
| 75 | { 0xABE5, 0xABE5 }, { 0xABE8, 0xABE8 }, { 0xABED, 0xABED }, | ||
| 76 | { 0xFB1E, 0xFB1E }, { 0xFE00, 0xFE0F }, { 0xFE20, 0xFE2F }, | ||
| 77 | { 0xFEFF, 0xFEFF }, { 0xFFA0, 0xFFA0 }, { 0xFFF0, 0xFFF8 }, | ||
| 78 | { 0x101FD, 0x101FD }, { 0x102E0, 0x102E0 }, { 0x10376, 0x1037A }, | ||
| 79 | { 0x10A01, 0x10A03 }, { 0x10A05, 0x10A06 }, { 0x10A0C, 0x10A0F }, | ||
| 80 | { 0x10A38, 0x10A3A }, { 0x10A3F, 0x10A3F }, { 0x10AE5, 0x10AE6 }, | ||
| 81 | { 0x10D24, 0x10D27 }, { 0x10D69, 0x10D6D }, { 0x10EAB, 0x10EAC }, | ||
| 82 | { 0x10EFA, 0x10EFF }, { 0x10F46, 0x10F50 }, { 0x10F82, 0x10F85 }, | ||
| 83 | { 0x11001, 0x11001 }, { 0x11038, 0x11046 }, { 0x11070, 0x11070 }, | ||
| 84 | { 0x11073, 0x11074 }, { 0x1107F, 0x11081 }, { 0x110B3, 0x110B6 }, | ||
| 85 | { 0x110B9, 0x110BA }, { 0x110C2, 0x110C2 }, { 0x11100, 0x11102 }, | ||
| 86 | { 0x11127, 0x1112B }, { 0x1112D, 0x11134 }, { 0x11173, 0x11173 }, | ||
| 87 | { 0x11180, 0x11181 }, { 0x111B6, 0x111BE }, { 0x111C9, 0x111CC }, | ||
| 88 | { 0x111CF, 0x111CF }, { 0x1122F, 0x11231 }, { 0x11234, 0x11234 }, | ||
| 89 | { 0x11236, 0x11237 }, { 0x1123E, 0x1123E }, { 0x11241, 0x11241 }, | ||
| 90 | { 0x112DF, 0x112DF }, { 0x112E3, 0x112EA }, { 0x11300, 0x11301 }, | ||
| 91 | { 0x1133B, 0x1133C }, { 0x11340, 0x11340 }, { 0x11366, 0x1136C }, | ||
| 92 | { 0x11370, 0x11374 }, { 0x113BB, 0x113C0 }, { 0x113CE, 0x113CE }, | ||
| 93 | { 0x113D0, 0x113D0 }, { 0x113D2, 0x113D2 }, { 0x113E1, 0x113E2 }, | ||
| 94 | { 0x11438, 0x1143F }, { 0x11442, 0x11444 }, { 0x11446, 0x11446 }, | ||
| 95 | { 0x1145E, 0x1145E }, { 0x114B3, 0x114B8 }, { 0x114BA, 0x114BA }, | ||
| 96 | { 0x114BF, 0x114C0 }, { 0x114C2, 0x114C3 }, { 0x115B2, 0x115B5 }, | ||
| 97 | { 0x115BC, 0x115BD }, { 0x115BF, 0x115C0 }, { 0x115DC, 0x115DD }, | ||
| 98 | { 0x11633, 0x1163A }, { 0x1163D, 0x1163D }, { 0x1163F, 0x11640 }, | ||
| 99 | { 0x116AB, 0x116AB }, { 0x116AD, 0x116AD }, { 0x116B0, 0x116B5 }, | ||
| 100 | { 0x116B7, 0x116B7 }, { 0x1171D, 0x1171D }, { 0x1171F, 0x1171F }, | ||
| 101 | { 0x11722, 0x11725 }, { 0x11727, 0x1172B }, { 0x1182F, 0x11837 }, | ||
| 102 | { 0x11839, 0x1183A }, { 0x1193B, 0x1193C }, { 0x1193E, 0x1193E }, | ||
| 103 | { 0x11943, 0x11943 }, { 0x119D4, 0x119D7 }, { 0x119DA, 0x119DB }, | ||
| 104 | { 0x119E0, 0x119E0 }, { 0x11A01, 0x11A0A }, { 0x11A33, 0x11A38 }, | ||
| 105 | { 0x11A3B, 0x11A3E }, { 0x11A47, 0x11A47 }, { 0x11A51, 0x11A56 }, | ||
| 106 | { 0x11A59, 0x11A5B }, { 0x11A8A, 0x11A96 }, { 0x11A98, 0x11A99 }, | ||
| 107 | { 0x11B60, 0x11B60 }, { 0x11B62, 0x11B64 }, { 0x11B66, 0x11B66 }, | ||
| 108 | { 0x11C30, 0x11C36 }, { 0x11C38, 0x11C3D }, { 0x11C3F, 0x11C3F }, | ||
| 109 | { 0x11C92, 0x11CA7 }, { 0x11CAA, 0x11CB0 }, { 0x11CB2, 0x11CB3 }, | ||
| 110 | { 0x11CB5, 0x11CB6 }, { 0x11D31, 0x11D36 }, { 0x11D3A, 0x11D3A }, | ||
| 111 | { 0x11D3C, 0x11D3D }, { 0x11D3F, 0x11D45 }, { 0x11D47, 0x11D47 }, | ||
| 112 | { 0x11D90, 0x11D91 }, { 0x11D95, 0x11D95 }, { 0x11D97, 0x11D97 }, | ||
| 113 | { 0x11EF3, 0x11EF4 }, { 0x11F00, 0x11F01 }, { 0x11F36, 0x11F3A }, | ||
| 114 | { 0x11F40, 0x11F40 }, { 0x11F42, 0x11F42 }, { 0x11F5A, 0x11F5A }, | ||
| 115 | { 0x13440, 0x13440 }, { 0x13447, 0x13455 }, { 0x1611E, 0x16129 }, | ||
| 116 | { 0x1612D, 0x1612F }, { 0x16AF0, 0x16AF4 }, { 0x16B30, 0x16B36 }, | ||
| 117 | { 0x16F4F, 0x16F4F }, { 0x16F8F, 0x16F92 }, { 0x16FE4, 0x16FE4 }, | ||
| 118 | { 0x1BC9D, 0x1BC9E }, { 0x1BCA0, 0x1BCA3 }, { 0x1CF00, 0x1CF2D }, | ||
| 119 | { 0x1CF30, 0x1CF46 }, { 0x1D167, 0x1D169 }, { 0x1D173, 0x1D182 }, | ||
| 120 | { 0x1D185, 0x1D18B }, { 0x1D1AA, 0x1D1AD }, { 0x1D242, 0x1D244 }, | ||
| 121 | { 0x1DA00, 0x1DA36 }, { 0x1DA3B, 0x1DA6C }, { 0x1DA75, 0x1DA75 }, | ||
| 122 | { 0x1DA84, 0x1DA84 }, { 0x1DA9B, 0x1DA9F }, { 0x1DAA1, 0x1DAAF }, | ||
| 123 | { 0x1E000, 0x1E006 }, { 0x1E008, 0x1E018 }, { 0x1E01B, 0x1E021 }, | ||
| 124 | { 0x1E023, 0x1E024 }, { 0x1E026, 0x1E02A }, { 0x1E08F, 0x1E08F }, | ||
| 125 | { 0x1E130, 0x1E136 }, { 0x1E2AE, 0x1E2AE }, { 0x1E2EC, 0x1E2EF }, | ||
| 126 | { 0x1E4EC, 0x1E4EF }, { 0x1E5EE, 0x1E5EF }, { 0x1E6E3, 0x1E6E3 }, | ||
| 127 | { 0x1E6E6, 0x1E6E6 }, { 0x1E6EE, 0x1E6EF }, { 0x1E6F5, 0x1E6F5 }, | ||
| 128 | { 0x1E8D0, 0x1E8D6 }, { 0x1E944, 0x1E94A }, { 0xE0000, 0xE0FFF } | ||
