aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lutf8lib.c7
-rw-r--r--testes/utf8.lua9
2 files changed, 13 insertions, 3 deletions
diff --git a/lutf8lib.c b/lutf8lib.c
index be016135..df49c901 100644
--- a/lutf8lib.c
+++ b/lutf8lib.c
@@ -215,9 +215,10 @@ static int byteoffset (lua_State *L) {
215 } 215 }
216 lua_pushinteger(L, posi + 1); /* initial position */ 216 lua_pushinteger(L, posi + 1); /* initial position */
217 if ((s[posi] & 0x80) != 0) { /* multi-byte character? */ 217 if ((s[posi] & 0x80) != 0) { /* multi-byte character? */
218 do { 218 if (iscont(s[posi]))
219 posi++; 219 return luaL_error(L, "initial position is a continuation byte");
220 } while (iscontp(s + posi + 1)); /* skip to final byte */ 220 while (iscontp(s + posi + 1))
221 posi++; /* skip to last continuation byte */
221 } 222 }
222 /* else one-byte character: final position is the initial one */ 223 /* else one-byte character: final position is the initial one */
223 lua_pushinteger(L, posi + 1); /* 'posi' now is the final position */ 224 lua_pushinteger(L, posi + 1); /* 'posi' now is the final position */
diff --git a/testes/utf8.lua b/testes/utf8.lua
index 143c6d34..028995a4 100644
--- a/testes/utf8.lua
+++ b/testes/utf8.lua
@@ -152,11 +152,20 @@ checkerror("position out of bounds", utf8.offset, "", 1, -1)
152checkerror("continuation byte", utf8.offset, "𦧺", 1, 2) 152checkerror("continuation byte", utf8.offset, "𦧺", 1, 2)
153checkerror("continuation byte", utf8.offset, "𦧺", 1, 2) 153checkerror("continuation byte", utf8.offset, "𦧺", 1, 2)
154checkerror("continuation byte", utf8.offset, "\x80", 1) 154checkerror("continuation byte", utf8.offset, "\x80", 1)
155checkerror("continuation byte", utf8.offset, "\x9c", -1)
155 156
156-- error in indices for len 157-- error in indices for len
157checkerror("out of bounds", utf8.len, "abc", 0, 2) 158checkerror("out of bounds", utf8.len, "abc", 0, 2)
158checkerror("out of bounds", utf8.len, "abc", 1, 4) 159checkerror("out of bounds", utf8.len, "abc", 1, 4)
159 160
161do -- missing continuation bytes
162 -- get what is available
163 local p, e = utf8.offset("\xE0", 1)
164 assert(p == 1 and e == 1)
165 local p, e = utf8.offset("\xE0\x9e", -1)
166 assert(p == 1 and e == 2)
167end
168
160 169
161local s = "hello World" 170local s = "hello World"
162local t = {string.byte(s, 1, -1)} 171local t = {string.byte(s, 1, -1)}