diff options
| author | Thijs Schreijer <thijs@thijsschreijer.nl> | 2025-02-06 16:52:08 +0100 |
|---|---|---|
| committer | Thijs <thijs@thijsschreijer.nl> | 2025-02-06 21:10:21 +0100 |
| commit | a1d933aae61420364685051dbc5b318a527fef1d (patch) | |
| tree | 771c9ea7e694f6c18d76e76e5a7160f371453db1 | |
| parent | 3c1fdbcc844a55f94dde41591f487ded73eab012 (diff) | |
| download | luasystem-feat/unicode-width.tar.gz luasystem-feat/unicode-width.tar.bz2 luasystem-feat/unicode-width.zip | |
feat(terminal): also accept codepoint integers for width checkfeat/unicode-width
Lua utf8 functions return codepoints, hence it makes sense to accept
those, instead of having to convert to utf8 string and back again.
| -rw-r--r-- | CHANGELOG.md | 2 | ||||
| -rw-r--r-- | spec/04-term_spec.lua | 25 | ||||
| -rw-r--r-- | src/term.c | 47 |
3 files changed, 59 insertions, 15 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md index 7ae7189..25114c3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md | |||
| @@ -30,6 +30,8 @@ The scope of what is covered by the version number excludes: | |||
| 30 | 30 | ||
| 31 | ### unreleased | 31 | ### unreleased |
| 32 | 32 | ||
| 33 | - Feat: when detecting character display width, also accept unicode codepoints (integers), | ||
| 34 | since the Lua utf8 library returns codepoints, not strings | ||
| 33 | - Fix: NetBSD fix compilation, undeclared directives | 35 | - Fix: NetBSD fix compilation, undeclared directives |
| 34 | 36 | ||
| 35 | ### version 0.4.5, released 18-Dec-2024 | 37 | ### version 0.4.5, released 18-Dec-2024 |
diff --git a/spec/04-term_spec.lua b/spec/04-term_spec.lua index 813947a..907f903 100644 --- a/spec/04-term_spec.lua +++ b/spec/04-term_spec.lua | |||
| @@ -511,11 +511,18 @@ describe("Terminal:", function() | |||
| 511 | 511 | ||
| 512 | describe("utf8cwidth()", function() | 512 | describe("utf8cwidth()", function() |
| 513 | 513 | ||
| 514 | -- utf-8 strings | ||
| 514 | local ch1 = string.char(226, 130, 172) -- "€" single | 515 | local ch1 = string.char(226, 130, 172) -- "€" single |
| 515 | local ch2 = string.char(240, 159, 154, 128) -- "🚀" double | 516 | local ch2 = string.char(240, 159, 154, 128) -- "🚀" double |
| 516 | local ch3 = string.char(228, 189, 160) -- "你" double | 517 | local ch3 = string.char(228, 189, 160) -- "你" double |
| 517 | local ch4 = string.char(229, 165, 189) -- "好" double | 518 | local ch4 = string.char(229, 165, 189) -- "好" double |
| 518 | 519 | ||
| 520 | -- unicode codepoints | ||
| 521 | local cp1 = 8364 -- "€" single | ||
| 522 | local cp2 = 128640 -- "🚀" double | ||
| 523 | local cp3 = 20320 -- "你" double | ||
| 524 | local cp4 = 22909 -- "好" double | ||
| 525 | |||
| 519 | it("handles zero width characters", function() | 526 | it("handles zero width characters", function() |
| 520 | assert.same({0}, {system.utf8cwidth("")}) -- empty string returns 0-size | 527 | assert.same({0}, {system.utf8cwidth("")}) -- empty string returns 0-size |
| 521 | assert.same({nil, 'Character width determination failed'}, {system.utf8cwidth("\a")}) -- bell character | 528 | assert.same({nil, 'Character width determination failed'}, {system.utf8cwidth("\a")}) -- bell character |
| @@ -539,6 +546,24 @@ describe("Terminal:", function() | |||
| 539 | assert.same({2}, {system.utf8cwidth(ch2 .. ch3 .. ch4)}) | 546 | assert.same({2}, {system.utf8cwidth(ch2 .. ch3 .. ch4)}) |
| 540 | end) | 547 | end) |
| 541 | 548 | ||
| 549 | it("handles integer codepoints", function() | ||
| 550 | assert.same({1}, {system.utf8cwidth(cp1)}) | ||
| 551 | assert.same({2}, {system.utf8cwidth(cp2)}) | ||
| 552 | assert.same({2}, {system.utf8cwidth(cp3)}) | ||
| 553 | assert.same({2}, {system.utf8cwidth(cp4)}) | ||
| 554 | end) | ||
| 555 | |||
| 556 | it("returns an error on bad argument", function() | ||
| 557 | assert.has.error(function() | ||
| 558 | system.utf8cwidth(true) | ||
| 559 | end, "bad argument #1 to 'utf8cwidth' (Expected UTF-8-string or codepoint-integer as first argument)") | ||
| 560 | end) | ||
| 561 | |||
| 562 | it("returns an error on bad unicode values", function() | ||
| 563 | assert.same({nil, "Invalid Unicode codepoint"}, {system.utf8cwidth(-10)}) | ||
| 564 | assert.same({nil, "Invalid Unicode codepoint"}, {system.utf8cwidth(999999999999)}) | ||
| 565 | end) | ||
| 566 | |||
| 542 | end) | 567 | end) |
| 543 | 568 | ||
| 544 | 569 | ||
| @@ -953,30 +953,47 @@ int utf8_to_wchar(const char *utf8, size_t len, mk_wchar_t *codepoint) { | |||
| 953 | /*** | 953 | /*** |
| 954 | Get the width of a utf8 character for terminal display. | 954 | Get the width of a utf8 character for terminal display. |
| 955 | @function utf8cwidth | 955 | @function utf8cwidth |
| 956 | @tparam string utf8_char the utf8 character to check, only the width of the first character will be returned | 956 | @tparam string|int utf8_char the utf8 character, or unicode codepoint, to check, only the width of the first character will be returned |
| 957 | @treturn[1] int the display width in columns of the first character in the string (0 for an empty string) | 957 | @treturn[1] int the display width in columns of the first character in the string (0 for an empty string) |
| 958 | @treturn[2] nil | 958 | @treturn[2] nil |
| 959 | @treturn[2] string error message | 959 | @treturn[2] string error message |
| 960 | */ | 960 | */ |
| 961 | int lst_utf8cwidth(lua_State *L) { | 961 | int lst_utf8cwidth(lua_State *L) { |
| 962 | const char *utf8_char; | ||
| 963 | size_t utf8_len; | ||
| 964 | utf8_char = luaL_checklstring(L, 1, &utf8_len); | ||
| 965 | int width = 0; | 962 | int width = 0; |
| 966 | |||
| 967 | mk_wchar_t wc; | 963 | mk_wchar_t wc; |
| 968 | 964 | ||
| 969 | if (utf8_len == 0) { | 965 | if (lua_type(L, 1) == LUA_TSTRING) { |
| 970 | lua_pushinteger(L, 0); | 966 | // Handle UTF8 as string input |
| 971 | return 1; | 967 | const char *utf8_char; |
| 972 | } | 968 | size_t utf8_len; |
| 969 | utf8_char = luaL_checklstring(L, 1, &utf8_len); | ||
| 973 | 970 | ||
| 974 | // Convert the UTF-8 string to a wide character | 971 | if (utf8_len == 0) { |
| 975 | int bytes_processed = utf8_to_wchar(utf8_char, utf8_len, &wc); | 972 | lua_pushinteger(L, 0); |
| 976 | if (bytes_processed == -1) { | 973 | return 1; |
| 977 | lua_pushnil(L); | 974 | } |
| 978 | lua_pushstring(L, "Invalid UTF-8 character"); | 975 | |
| 979 | return 2; | 976 | // Convert the UTF-8 string to a wide character |
| 977 | int bytes_processed = utf8_to_wchar(utf8_char, utf8_len, &wc); | ||
| 978 | if (bytes_processed == -1) { | ||
| 979 | lua_pushnil(L); | ||
| 980 | lua_pushstring(L, "Invalid UTF-8 character"); | ||
| 981 | return 2; | ||
| 982 | } | ||
| 983 | |||
| 984 | } else if (lua_type(L, 1) == LUA_TNUMBER) { | ||
| 985 | // Handle codepoint input | ||
| 986 | int codepoint = luaL_checkinteger(L, 1); | ||
| 987 | |||
| 988 | if (codepoint < 0 || codepoint > 0x10FFFF) { | ||
| 989 | lua_pushnil(L); | ||
| 990 | lua_pushstring(L, "Invalid Unicode codepoint"); | ||
| 991 | return 2; | ||
| 992 | } | ||
| 993 | wc = (mk_wchar_t)codepoint; | ||
| 994 | |||
| 995 | } else { | ||
| 996 | return luaL_argerror(L, 1, "Expected UTF-8-string or codepoint-integer as first argument"); | ||
| 980 | } | 997 | } |
| 981 | 998 | ||
| 982 | // Get the width of the wide character | 999 | // Get the width of the wide character |
