aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThijs Schreijer <thijs@thijsschreijer.nl>2025-02-06 16:52:08 +0100
committerThijs Schreijer <thijs@thijsschreijer.nl>2025-02-10 09:01:35 +0100
commit4a128b8969fe4d720f50c1fdb68f0265af8a7117 (patch)
tree771c9ea7e694f6c18d76e76e5a7160f371453db1
parent3c1fdbcc844a55f94dde41591f487ded73eab012 (diff)
downloadluasystem-4a128b8969fe4d720f50c1fdb68f0265af8a7117.tar.gz
luasystem-4a128b8969fe4d720f50c1fdb68f0265af8a7117.tar.bz2
luasystem-4a128b8969fe4d720f50c1fdb68f0265af8a7117.zip
feat(terminal): also accept codepoint integers for width check
Lua utf8 functions return codepoints, hence it makes sense to accept those, instead of having to convert to utf8 string and back again.
-rw-r--r--CHANGELOG.md2
-rw-r--r--spec/04-term_spec.lua25
-rw-r--r--src/term.c47
3 files changed, 59 insertions, 15 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 7ae7189..25114c3 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -30,6 +30,8 @@ The scope of what is covered by the version number excludes:
30 30
31### unreleased 31### unreleased
32 32
33- Feat: when detecting character display width, also accept unicode codepoints (integers),
34 since the Lua utf8 library returns codepoints, not strings
33- Fix: NetBSD fix compilation, undeclared directives 35- Fix: NetBSD fix compilation, undeclared directives
34 36
35### version 0.4.5, released 18-Dec-2024 37### version 0.4.5, released 18-Dec-2024
diff --git a/spec/04-term_spec.lua b/spec/04-term_spec.lua
index 813947a..907f903 100644
--- a/spec/04-term_spec.lua
+++ b/spec/04-term_spec.lua
@@ -511,11 +511,18 @@ describe("Terminal:", function()
511 511
512 describe("utf8cwidth()", function() 512 describe("utf8cwidth()", function()
513 513
514 -- utf-8 strings
514 local ch1 = string.char(226, 130, 172) -- "€" single 515 local ch1 = string.char(226, 130, 172) -- "€" single
515 local ch2 = string.char(240, 159, 154, 128) -- "🚀" double 516 local ch2 = string.char(240, 159, 154, 128) -- "🚀" double
516 local ch3 = string.char(228, 189, 160) -- "你" double 517 local ch3 = string.char(228, 189, 160) -- "你" double
517 local ch4 = string.char(229, 165, 189) -- "好" double 518 local ch4 = string.char(229, 165, 189) -- "好" double
518 519
520 -- unicode codepoints
521 local cp1 = 8364 -- "€" single
522 local cp2 = 128640 -- "🚀" double
523 local cp3 = 20320 -- "你" double
524 local cp4 = 22909 -- "好" double
525
519 it("handles zero width characters", function() 526 it("handles zero width characters", function()
520 assert.same({0}, {system.utf8cwidth("")}) -- empty string returns 0-size 527 assert.same({0}, {system.utf8cwidth("")}) -- empty string returns 0-size
521 assert.same({nil, 'Character width determination failed'}, {system.utf8cwidth("\a")}) -- bell character 528 assert.same({nil, 'Character width determination failed'}, {system.utf8cwidth("\a")}) -- bell character
@@ -539,6 +546,24 @@ describe("Terminal:", function()
539 assert.same({2}, {system.utf8cwidth(ch2 .. ch3 .. ch4)}) 546 assert.same({2}, {system.utf8cwidth(ch2 .. ch3 .. ch4)})
540 end) 547 end)
541 548
549 it("handles integer codepoints", function()
550 assert.same({1}, {system.utf8cwidth(cp1)})
551 assert.same({2}, {system.utf8cwidth(cp2)})
552 assert.same({2}, {system.utf8cwidth(cp3)})
553 assert.same({2}, {system.utf8cwidth(cp4)})
554 end)
555
556 it("returns an error on bad argument", function()
557 assert.has.error(function()
558 system.utf8cwidth(true)
559 end, "bad argument #1 to 'utf8cwidth' (Expected UTF-8-string or codepoint-integer as first argument)")
560 end)
561
562 it("returns an error on bad unicode values", function()
563 assert.same({nil, "Invalid Unicode codepoint"}, {system.utf8cwidth(-10)})
564 assert.same({nil, "Invalid Unicode codepoint"}, {system.utf8cwidth(999999999999)})
565 end)
566
542 end) 567 end)
543 568
544 569
diff --git a/src/term.c b/src/term.c
index 8c2b87a..2375080 100644
--- a/src/term.c
+++ b/src/term.c
@@ -953,30 +953,47 @@ int utf8_to_wchar(const char *utf8, size_t len, mk_wchar_t *codepoint) {
953/*** 953/***
954Get the width of a utf8 character for terminal display. 954Get the width of a utf8 character for terminal display.
955@function utf8cwidth 955@function utf8cwidth
956@tparam string utf8_char the utf8 character to check, only the width of the first character will be returned 956@tparam string|int utf8_char the utf8 character, or unicode codepoint, to check, only the width of the first character will be returned
957@treturn[1] int the display width in columns of the first character in the string (0 for an empty string) 957@treturn[1] int the display width in columns of the first character in the string (0 for an empty string)
958@treturn[2] nil 958@treturn[2] nil
959@treturn[2] string error message 959@treturn[2] string error message
960*/ 960*/
961int lst_utf8cwidth(lua_State *L) { 961int lst_utf8cwidth(lua_State *L) {
962 const char *utf8_char;
963 size_t utf8_len;
964 utf8_char = luaL_checklstring(L, 1, &utf8_len);
965 int width = 0; 962 int width = 0;
966
967 mk_wchar_t wc; 963 mk_wchar_t wc;
968 964
969 if (utf8_len == 0) { 965 if (lua_type(L, 1) == LUA_TSTRING) {
970 lua_pushinteger(L, 0); 966 // Handle UTF8 as string input
971 return 1; 967 const char *utf8_char;
972 } 968 size_t utf8_len;
969 utf8_char = luaL_checklstring(L, 1, &utf8_len);
973 970
974 // Convert the UTF-8 string to a wide character 971 if (utf8_len == 0) {
975 int bytes_processed = utf8_to_wchar(utf8_char, utf8_len, &wc); 972 lua_pushinteger(L, 0);
976 if (bytes_processed == -1) { 973 return 1;
977 lua_pushnil(L); 974 }
978 lua_pushstring(L, "Invalid UTF-8 character"); 975
979 return 2; 976 // Convert the UTF-8 string to a wide character
977 int bytes_processed = utf8_to_wchar(utf8_char, utf8_len, &wc);
978 if (bytes_processed == -1) {
979 lua_pushnil(L);
980 lua_pushstring(L, "Invalid UTF-8 character");
981 return 2;
982 }
983
984 } else if (lua_type(L, 1) == LUA_TNUMBER) {
985 // Handle codepoint input
986 int codepoint = luaL_checkinteger(L, 1);
987
988 if (codepoint < 0 || codepoint > 0x10FFFF) {
989 lua_pushnil(L);
990 lua_pushstring(L, "Invalid Unicode codepoint");
991 return 2;
992 }
993 wc = (mk_wchar_t)codepoint;
994
995 } else {
996 return luaL_argerror(L, 1, "Expected UTF-8-string or codepoint-integer as first argument");
980 } 997 }
981 998
982 // Get the width of the wide character 999 // Get the width of the wide character