From 4a128b8969fe4d720f50c1fdb68f0265af8a7117 Mon Sep 17 00:00:00 2001 From: Thijs Schreijer Date: Thu, 6 Feb 2025 16:52:08 +0100 Subject: feat(terminal): also accept codepoint integers for width check Lua utf8 functions return codepoints, hence it makes sense to accept those, instead of having to convert to utf8 string and back again. --- CHANGELOG.md | 2 ++ spec/04-term_spec.lua | 25 +++++++++++++++++++++++++ src/term.c | 47 ++++++++++++++++++++++++++++++++--------------- 3 files changed, 59 insertions(+), 15 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7ae7189..25114c3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -30,6 +30,8 @@ The scope of what is covered by the version number excludes: ### unreleased +- Feat: when detecting character display width, also accept unicode codepoints (integers), + since the Lua utf8 library returns codepoints, not strings - Fix: NetBSD fix compilation, undeclared directives ### version 0.4.5, released 18-Dec-2024 diff --git a/spec/04-term_spec.lua b/spec/04-term_spec.lua index 813947a..907f903 100644 --- a/spec/04-term_spec.lua +++ b/spec/04-term_spec.lua @@ -511,11 +511,18 @@ describe("Terminal:", function() describe("utf8cwidth()", function() + -- utf-8 strings local ch1 = string.char(226, 130, 172) -- "€" single local ch2 = string.char(240, 159, 154, 128) -- "🚀" double local ch3 = string.char(228, 189, 160) -- "你" double local ch4 = string.char(229, 165, 189) -- "好" double + -- unicode codepoints + local cp1 = 8364 -- "€" single + local cp2 = 128640 -- "🚀" double + local cp3 = 20320 -- "你" double + local cp4 = 22909 -- "好" double + it("handles zero width characters", function() assert.same({0}, {system.utf8cwidth("")}) -- empty string returns 0-size assert.same({nil, 'Character width determination failed'}, {system.utf8cwidth("\a")}) -- bell character @@ -539,6 +546,24 @@ describe("Terminal:", function() assert.same({2}, {system.utf8cwidth(ch2 .. ch3 .. ch4)}) end) + it("handles integer codepoints", function() + assert.same({1}, {system.utf8cwidth(cp1)}) + assert.same({2}, {system.utf8cwidth(cp2)}) + assert.same({2}, {system.utf8cwidth(cp3)}) + assert.same({2}, {system.utf8cwidth(cp4)}) + end) + + it("returns an error on bad argument", function() + assert.has.error(function() + system.utf8cwidth(true) + end, "bad argument #1 to 'utf8cwidth' (Expected UTF-8-string or codepoint-integer as first argument)") + end) + + it("returns an error on bad unicode values", function() + assert.same({nil, "Invalid Unicode codepoint"}, {system.utf8cwidth(-10)}) + assert.same({nil, "Invalid Unicode codepoint"}, {system.utf8cwidth(999999999999)}) + end) + end) diff --git a/src/term.c b/src/term.c index 8c2b87a..2375080 100644 --- a/src/term.c +++ b/src/term.c @@ -953,30 +953,47 @@ int utf8_to_wchar(const char *utf8, size_t len, mk_wchar_t *codepoint) { /*** Get the width of a utf8 character for terminal display. @function utf8cwidth -@tparam string utf8_char the utf8 character to check, only the width of the first character will be returned +@tparam string|int utf8_char the utf8 character, or unicode codepoint, to check, only the width of the first character will be returned @treturn[1] int the display width in columns of the first character in the string (0 for an empty string) @treturn[2] nil @treturn[2] string error message */ int lst_utf8cwidth(lua_State *L) { - const char *utf8_char; - size_t utf8_len; - utf8_char = luaL_checklstring(L, 1, &utf8_len); int width = 0; - mk_wchar_t wc; - if (utf8_len == 0) { - lua_pushinteger(L, 0); - return 1; - } + if (lua_type(L, 1) == LUA_TSTRING) { + // Handle UTF8 as string input + const char *utf8_char; + size_t utf8_len; + utf8_char = luaL_checklstring(L, 1, &utf8_len); - // Convert the UTF-8 string to a wide character - int bytes_processed = utf8_to_wchar(utf8_char, utf8_len, &wc); - if (bytes_processed == -1) { - lua_pushnil(L); - lua_pushstring(L, "Invalid UTF-8 character"); - return 2; + if (utf8_len == 0) { + lua_pushinteger(L, 0); + return 1; + } + + // Convert the UTF-8 string to a wide character + int bytes_processed = utf8_to_wchar(utf8_char, utf8_len, &wc); + if (bytes_processed == -1) { + lua_pushnil(L); + lua_pushstring(L, "Invalid UTF-8 character"); + return 2; + } + + } else if (lua_type(L, 1) == LUA_TNUMBER) { + // Handle codepoint input + int codepoint = luaL_checkinteger(L, 1); + + if (codepoint < 0 || codepoint > 0x10FFFF) { + lua_pushnil(L); + lua_pushstring(L, "Invalid Unicode codepoint"); + return 2; + } + wc = (mk_wchar_t)codepoint; + + } else { + return luaL_argerror(L, 1, "Expected UTF-8-string or codepoint-integer as first argument"); } // Get the width of the wide character -- cgit v1.2.3-55-g6feb