From ccb8b307f11c7497e61f617b12f3a7f0a697256c Mon Sep 17 00:00:00 2001 From: Roberto Ierusalimschy Date: Fri, 18 Jul 2025 16:10:28 -0300 Subject: Correction in utf8.offset Wrong utf-8 character may have no continuation bytes. --- testes/utf8.lua | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'testes/utf8.lua') diff --git a/testes/utf8.lua b/testes/utf8.lua index 143c6d34..028995a4 100644 --- a/testes/utf8.lua +++ b/testes/utf8.lua @@ -152,11 +152,20 @@ checkerror("position out of bounds", utf8.offset, "", 1, -1) checkerror("continuation byte", utf8.offset, "𦧺", 1, 2) checkerror("continuation byte", utf8.offset, "𦧺", 1, 2) checkerror("continuation byte", utf8.offset, "\x80", 1) +checkerror("continuation byte", utf8.offset, "\x9c", -1) -- error in indices for len checkerror("out of bounds", utf8.len, "abc", 0, 2) checkerror("out of bounds", utf8.len, "abc", 1, 4) +do -- missing continuation bytes + -- get what is available + local p, e = utf8.offset("\xE0", 1) + assert(p == 1 and e == 1) + local p, e = utf8.offset("\xE0\x9e", -1) + assert(p == 1 and e == 2) +end + local s = "hello World" local t = {string.byte(s, 1, -1)} -- cgit v1.2.3-55-g6feb