diff options
| author | Roberto Ierusalimschy <roberto@inf.puc-rio.br> | 2025-07-18 16:10:28 -0300 |
|---|---|---|
| committer | Roberto Ierusalimschy <roberto@inf.puc-rio.br> | 2025-07-18 16:10:28 -0300 |
| commit | ccb8b307f11c7497e61f617b12f3a7f0a697256c (patch) | |
| tree | 91f6eb88650be03ab27433e2a3ee53f6d2dfcafd | |
| parent | 60b6599e8322dd93e3b33c9496ff035a1c45552f (diff) | |
| download | lua-ccb8b307f11c7497e61f617b12f3a7f0a697256c.tar.gz lua-ccb8b307f11c7497e61f617b12f3a7f0a697256c.tar.bz2 lua-ccb8b307f11c7497e61f617b12f3a7f0a697256c.zip | |
Correction in utf8.offset
Wrong utf-8 character may have no continuation bytes.
| -rw-r--r-- | lutf8lib.c | 7 | ||||
| -rw-r--r-- | testes/utf8.lua | 9 |
2 files changed, 13 insertions, 3 deletions
| @@ -215,9 +215,10 @@ static int byteoffset (lua_State *L) { | |||
| 215 | } | 215 | } |
| 216 | lua_pushinteger(L, posi + 1); /* initial position */ | 216 | lua_pushinteger(L, posi + 1); /* initial position */ |
| 217 | if ((s[posi] & 0x80) != 0) { /* multi-byte character? */ | 217 | if ((s[posi] & 0x80) != 0) { /* multi-byte character? */ |
| 218 | do { | 218 | if (iscont(s[posi])) |
| 219 | posi++; | 219 | return luaL_error(L, "initial position is a continuation byte"); |
| 220 | } while (iscontp(s + posi + 1)); /* skip to final byte */ | 220 | while (iscontp(s + posi + 1)) |
| 221 | posi++; /* skip to last continuation byte */ | ||
| 221 | } | 222 | } |
| 222 | /* else one-byte character: final position is the initial one */ | 223 | /* else one-byte character: final position is the initial one */ |
| 223 | lua_pushinteger(L, posi + 1); /* 'posi' now is the final position */ | 224 | lua_pushinteger(L, posi + 1); /* 'posi' now is the final position */ |
diff --git a/testes/utf8.lua b/testes/utf8.lua index 143c6d34..028995a4 100644 --- a/testes/utf8.lua +++ b/testes/utf8.lua | |||
| @@ -152,11 +152,20 @@ checkerror("position out of bounds", utf8.offset, "", 1, -1) | |||
| 152 | checkerror("continuation byte", utf8.offset, "𦧺", 1, 2) | 152 | checkerror("continuation byte", utf8.offset, "𦧺", 1, 2) |
| 153 | checkerror("continuation byte", utf8.offset, "𦧺", 1, 2) | 153 | checkerror("continuation byte", utf8.offset, "𦧺", 1, 2) |
| 154 | checkerror("continuation byte", utf8.offset, "\x80", 1) | 154 | checkerror("continuation byte", utf8.offset, "\x80", 1) |
| 155 | checkerror("continuation byte", utf8.offset, "\x9c", -1) | ||
| 155 | 156 | ||
| 156 | -- error in indices for len | 157 | -- error in indices for len |
| 157 | checkerror("out of bounds", utf8.len, "abc", 0, 2) | 158 | checkerror("out of bounds", utf8.len, "abc", 0, 2) |
| 158 | checkerror("out of bounds", utf8.len, "abc", 1, 4) | 159 | checkerror("out of bounds", utf8.len, "abc", 1, 4) |
| 159 | 160 | ||
| 161 | do -- missing continuation bytes | ||
| 162 | -- get what is available | ||
| 163 | local p, e = utf8.offset("\xE0", 1) | ||
| 164 | assert(p == 1 and e == 1) | ||
| 165 | local p, e = utf8.offset("\xE0\x9e", -1) | ||
| 166 | assert(p == 1 and e == 2) | ||
| 167 | end | ||
| 168 | |||
| 160 | 169 | ||
| 161 | local s = "hello World" | 170 | local s = "hello World" |
| 162 | local t = {string.byte(s, 1, -1)} | 171 | local t = {string.byte(s, 1, -1)} |
