From 814213b65fa4ab2b1a7216d06f68a6f3df89efcd Mon Sep 17 00:00:00 2001 From: Roberto Ierusalimschy Date: Mon, 27 May 2024 11:29:39 -0300 Subject: utf8.offset returns also final position of character 'utf8.offset' returns two values: the initial and the final position of the given character. --- lutf8lib.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) (limited to 'lutf8lib.c') diff --git a/lutf8lib.c b/lutf8lib.c index 3a5b9bc3..7b747937 100644 --- a/lutf8lib.c +++ b/lutf8lib.c @@ -181,8 +181,8 @@ static int utfchar (lua_State *L) { /* -** offset(s, n, [i]) -> index where n-th character counting from -** position 'i' starts; 0 means character at 'i'. +** offset(s, n, [i]) -> indices where n-th character counting from +** position 'i' starts and ends; 0 means character at 'i'. */ static int byteoffset (lua_State *L) { size_t len; @@ -217,11 +217,19 @@ static int byteoffset (lua_State *L) { } } } - if (n == 0) /* did it find given character? */ - lua_pushinteger(L, posi + 1); - else /* no such character */ + if (n != 0) { /* did not find given character? */ luaL_pushfail(L); - return 1; + return 1; + } + lua_pushinteger(L, posi + 1); /* initial position */ + if ((s[posi] & 0x80) != 0) { /* multi-byte character? */ + do { + posi++; + } while (iscontp(s + posi + 1)); /* skip to final byte */ + } + /* else one-byte character: final position is the initial one */ + lua_pushinteger(L, posi + 1); /* 'posi' now is the final position */ + return 2; } -- cgit v1.2.3-55-g6feb