diff options
author | Thijs Schreijer <thijs@thijsschreijer.nl> | 2025-02-06 16:52:08 +0100 |
---|---|---|
committer | Thijs Schreijer <thijs@thijsschreijer.nl> | 2025-02-10 09:01:35 +0100 |
commit | 4a128b8969fe4d720f50c1fdb68f0265af8a7117 (patch) | |
tree | 771c9ea7e694f6c18d76e76e5a7160f371453db1 | |
parent | 3c1fdbcc844a55f94dde41591f487ded73eab012 (diff) | |
download | luasystem-4a128b8969fe4d720f50c1fdb68f0265af8a7117.tar.gz luasystem-4a128b8969fe4d720f50c1fdb68f0265af8a7117.tar.bz2 luasystem-4a128b8969fe4d720f50c1fdb68f0265af8a7117.zip |
feat(terminal): also accept codepoint integers for width check
Lua utf8 functions return codepoints, hence it makes sense to accept
those, instead of having to convert to utf8 string and back again.
-rw-r--r-- | CHANGELOG.md | 2 | ||||
-rw-r--r-- | spec/04-term_spec.lua | 25 | ||||
-rw-r--r-- | src/term.c | 47 |
3 files changed, 59 insertions, 15 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md index 7ae7189..25114c3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md | |||
@@ -30,6 +30,8 @@ The scope of what is covered by the version number excludes: | |||
30 | 30 | ||
31 | ### unreleased | 31 | ### unreleased |
32 | 32 | ||
33 | - Feat: when detecting character display width, also accept unicode codepoints (integers), | ||
34 | since the Lua utf8 library returns codepoints, not strings | ||
33 | - Fix: NetBSD fix compilation, undeclared directives | 35 | - Fix: NetBSD fix compilation, undeclared directives |
34 | 36 | ||
35 | ### version 0.4.5, released 18-Dec-2024 | 37 | ### version 0.4.5, released 18-Dec-2024 |
diff --git a/spec/04-term_spec.lua b/spec/04-term_spec.lua index 813947a..907f903 100644 --- a/spec/04-term_spec.lua +++ b/spec/04-term_spec.lua | |||
@@ -511,11 +511,18 @@ describe("Terminal:", function() | |||
511 | 511 | ||
512 | describe("utf8cwidth()", function() | 512 | describe("utf8cwidth()", function() |
513 | 513 | ||
514 | -- utf-8 strings | ||
514 | local ch1 = string.char(226, 130, 172) -- "€" single | 515 | local ch1 = string.char(226, 130, 172) -- "€" single |
515 | local ch2 = string.char(240, 159, 154, 128) -- "🚀" double | 516 | local ch2 = string.char(240, 159, 154, 128) -- "🚀" double |
516 | local ch3 = string.char(228, 189, 160) -- "你" double | 517 | local ch3 = string.char(228, 189, 160) -- "你" double |
517 | local ch4 = string.char(229, 165, 189) -- "好" double | 518 | local ch4 = string.char(229, 165, 189) -- "好" double |
518 | 519 | ||
520 | -- unicode codepoints | ||
521 | local cp1 = 8364 -- "€" single | ||
522 | local cp2 = 128640 -- "🚀" double | ||
523 | local cp3 = 20320 -- "你" double | ||
524 | local cp4 = 22909 -- "好" double | ||
525 | |||
519 | it("handles zero width characters", function() | 526 | it("handles zero width characters", function() |
520 | assert.same({0}, {system.utf8cwidth("")}) -- empty string returns 0-size | 527 | assert.same({0}, {system.utf8cwidth("")}) -- empty string returns 0-size |
521 | assert.same({nil, 'Character width determination failed'}, {system.utf8cwidth("\a")}) -- bell character | 528 | assert.same({nil, 'Character width determination failed'}, {system.utf8cwidth("\a")}) -- bell character |
@@ -539,6 +546,24 @@ describe("Terminal:", function() | |||
539 | assert.same({2}, {system.utf8cwidth(ch2 .. ch3 .. ch4)}) | 546 | assert.same({2}, {system.utf8cwidth(ch2 .. ch3 .. ch4)}) |
540 | end) | 547 | end) |
541 | 548 | ||
549 | it("handles integer codepoints", function() | ||
550 | assert.same({1}, {system.utf8cwidth(cp1)}) | ||
551 | assert.same({2}, {system.utf8cwidth(cp2)}) | ||
552 | assert.same({2}, {system.utf8cwidth(cp3)}) | ||
553 | assert.same({2}, {system.utf8cwidth(cp4)}) | ||
554 | end) | ||
555 | |||
556 | it("returns an error on bad argument", function() | ||
557 | assert.has.error(function() | ||
558 | system.utf8cwidth(true) | ||
559 | end, "bad argument #1 to 'utf8cwidth' (Expected UTF-8-string or codepoint-integer as first argument)") | ||
560 | end) | ||
561 | |||
562 | it("returns an error on bad unicode values", function() | ||
563 | assert.same({nil, "Invalid Unicode codepoint"}, {system.utf8cwidth(-10)}) | ||
564 | assert.same({nil, "Invalid Unicode codepoint"}, {system.utf8cwidth(999999999999)}) | ||
565 | end) | ||
566 | |||
542 | end) | 567 | end) |
543 | 568 | ||
544 | 569 | ||
@@ -953,30 +953,47 @@ int utf8_to_wchar(const char *utf8, size_t len, mk_wchar_t *codepoint) { | |||
953 | /*** | 953 | /*** |
954 | Get the width of a utf8 character for terminal display. | 954 | Get the width of a utf8 character for terminal display. |
955 | @function utf8cwidth | 955 | @function utf8cwidth |
956 | @tparam string utf8_char the utf8 character to check, only the width of the first character will be returned | 956 | @tparam string|int utf8_char the utf8 character, or unicode codepoint, to check, only the width of the first character will be returned |
957 | @treturn[1] int the display width in columns of the first character in the string (0 for an empty string) | 957 | @treturn[1] int the display width in columns of the first character in the string (0 for an empty string) |
958 | @treturn[2] nil | 958 | @treturn[2] nil |
959 | @treturn[2] string error message | 959 | @treturn[2] string error message |
960 | */ | 960 | */ |
961 | int lst_utf8cwidth(lua_State *L) { | 961 | int lst_utf8cwidth(lua_State *L) { |
962 | const char *utf8_char; | ||
963 | size_t utf8_len; | ||
964 | utf8_char = luaL_checklstring(L, 1, &utf8_len); | ||
965 | int width = 0; | 962 | int width = 0; |
966 | |||
967 | mk_wchar_t wc; | 963 | mk_wchar_t wc; |
968 | 964 | ||
969 | if (utf8_len == 0) { | 965 | if (lua_type(L, 1) == LUA_TSTRING) { |
970 | lua_pushinteger(L, 0); | 966 | // Handle UTF8 as string input |
971 | return 1; | 967 | const char *utf8_char; |
972 | } | 968 | size_t utf8_len; |
969 | utf8_char = luaL_checklstring(L, 1, &utf8_len); | ||
973 | 970 | ||
974 | // Convert the UTF-8 string to a wide character | 971 | if (utf8_len == 0) { |
975 | int bytes_processed = utf8_to_wchar(utf8_char, utf8_len, &wc); | 972 | lua_pushinteger(L, 0); |
976 | if (bytes_processed == -1) { | 973 | return 1; |
977 | lua_pushnil(L); | 974 | } |
978 | lua_pushstring(L, "Invalid UTF-8 character"); | 975 | |
979 | return 2; | 976 | // Convert the UTF-8 string to a wide character |
977 | int bytes_processed = utf8_to_wchar(utf8_char, utf8_len, &wc); | ||
978 | if (bytes_processed == -1) { | ||
979 | lua_pushnil(L); | ||
980 | lua_pushstring(L, "Invalid UTF-8 character"); | ||
981 | return 2; | ||
982 | } | ||
983 | |||
984 | } else if (lua_type(L, 1) == LUA_TNUMBER) { | ||
985 | // Handle codepoint input | ||
986 | int codepoint = luaL_checkinteger(L, 1); | ||
987 | |||
988 | if (codepoint < 0 || codepoint > 0x10FFFF) { | ||
989 | lua_pushnil(L); | ||
990 | lua_pushstring(L, "Invalid Unicode codepoint"); | ||
991 | return 2; | ||
992 | } | ||
993 | wc = (mk_wchar_t)codepoint; | ||
994 | |||
995 | } else { | ||
996 | return luaL_argerror(L, 1, "Expected UTF-8-string or codepoint-integer as first argument"); | ||
980 | } | 997 | } |
981 | 998 | ||
982 | // Get the width of the wide character | 999 | // Get the width of the wide character |