From 814213b65fa4ab2b1a7216d06f68a6f3df89efcd Mon Sep 17 00:00:00 2001
From: Roberto Ierusalimschy <roberto@inf.puc-rio.br>
Date: Mon, 27 May 2024 11:29:39 -0300
Subject: utf8.offset returns also final position of character

'utf8.offset' returns two values: the initial and the final position
of the given character.
---
 testes/utf8.lua | 44 +++++++++++++++++++++++++++-----------------
 1 file changed, 27 insertions(+), 17 deletions(-)

(limited to 'testes')

diff --git a/testes/utf8.lua b/testes/utf8.lua
index efadbd5c..dc0f2f09 100644
--- a/testes/utf8.lua
+++ b/testes/utf8.lua
@@ -52,25 +52,35 @@ local function check (s, t, nonstrict)
   for i = 1, #t do assert(t[i] == t1[i]) end   -- 't' is equal to 't1'
 
   for i = 1, l do   -- for all codepoints
-    local pi = utf8.offset(s, i)        -- position of i-th char
+    local pi, pie = utf8.offset(s, i)        -- position of i-th char
     local pi1 = utf8.offset(s, 2, pi)   -- position of next char
+    assert(pi1 == pie + 1)
     assert(string.find(string.sub(s, pi, pi1 - 1), justone))
     assert(utf8.offset(s, -1, pi1) == pi)
     assert(utf8.offset(s, i - l - 1) == pi)
     assert(pi1 - pi == #utf8.char(utf8.codepoint(s, pi, pi, nonstrict)))
     for j = pi, pi1 - 1 do
-      assert(utf8.offset(s, 0, j) == pi)
+      local off1, off2 = utf8.offset(s, 0, j)
+      assert(off1 == pi and off2 == pi1 - 1)
     end
     for j = pi + 1, pi1 - 1 do
       assert(not utf8.len(s, j))
     end
-   assert(utf8.len(s, pi, pi, nonstrict) == 1)
-   assert(utf8.len(s, pi, pi1 - 1, nonstrict) == 1)
-   assert(utf8.len(s, pi, -1, nonstrict) == l - i + 1)
-   assert(utf8.len(s, pi1, -1, nonstrict) == l - i)
-   assert(utf8.len(s, 1, pi, nonstrict) == i)
+    assert(utf8.len(s, pi, pi, nonstrict) == 1)
+    assert(utf8.len(s, pi, pi1 - 1, nonstrict) == 1)
+    assert(utf8.len(s, pi, -1, nonstrict) == l - i + 1)
+    assert(utf8.len(s, pi1, -1, nonstrict) == l - i)
+    assert(utf8.len(s, 1, pi, nonstrict) == i)
   end
 
+  local expected = 1    -- expected position of "current" character
+  for i = 1, l + 1 do
+    local p, e = utf8.offset(s, i)
+    assert(p == expected)
+    expected = e + 1
+  end
+  assert(expected - 1 == #s + 1)
+
   local i = 0
   for p, c in utf8.codes(s, nonstrict) do
     i = i + 1
@@ -94,20 +104,20 @@ end
 
 
 do    -- error indication in utf8.len
-  local function check (s, p)
+  local function checklen (s, p)
     local a, b = utf8.len(s)
     assert(not a and b == p)
   end
-  check("abc\xE3def", 4)
-  check("\xF4\x9F\xBF", 1)
-  check("\xF4\x9F\xBF\xBF", 1)
+  checklen("abc\xE3def", 4)
+  checklen("\xF4\x9F\xBF", 1)
+  checklen("\xF4\x9F\xBF\xBF", 1)
   -- spurious continuation bytes
-  check("汉字\x80", #("汉字") + 1)
-  check("\x80hello", 1)
-  check("hel\x80lo", 4)
-  check("汉字\xBF", #("汉字") + 1)
-  check("\xBFhello", 1)
-  check("hel\xBFlo", 4)
+  checklen("汉字\x80", #("汉字") + 1)
+  checklen("\x80hello", 1)
+  checklen("hel\x80lo", 4)
+  checklen("汉字\xBF", #("汉字") + 1)
+  checklen("\xBFhello", 1)
+  checklen("hel\xBFlo", 4)
 end
 
 -- errors in utf8.codes
-- 
cgit v1.2.3-55-g6feb