aboutsummaryrefslogtreecommitdiff
path: root/llex.c
diff options
context:
space:
mode:
authorRoberto Ierusalimschy <roberto@inf.puc-rio.br>2019-03-15 13:14:17 -0300
committerRoberto Ierusalimschy <roberto@inf.puc-rio.br>2019-03-15 13:14:17 -0300
commit1e0c73d5b643707335b06abd2546a83d9439d14c (patch)
treeb80b7d5e2cfeeef888ddf98fcc6276832134c1bf /llex.c
parent8fa4f1380b9a203bfdf002c2e9e9e13ebb8384c1 (diff)
downloadlua-1e0c73d5b643707335b06abd2546a83d9439d14c.tar.gz
lua-1e0c73d5b643707335b06abd2546a83d9439d14c.tar.bz2
lua-1e0c73d5b643707335b06abd2546a83d9439d14c.zip
Changes in the validation of UTF-8
All UTF-8 encoding functionality (including the escape sequence '\u') accepts all values from the original UTF-8 specification (with sequences of up to six bytes). By default, the decoding functions in the UTF-8 library do not accept invalid Unicode code points, such as surrogates. A new parameter 'nonstrict' makes them accept all code points up to (2^31)-1, as in the original UTF-8 specification.
Diffstat (limited to 'llex.c')
-rw-r--r--llex.c2
1 files changed, 1 insertions, 1 deletions
diff --git a/llex.c b/llex.c
index 38c6d92d..1539f525 100644
--- a/llex.c
+++ b/llex.c
@@ -335,7 +335,7 @@ static unsigned long readutf8esc (LexState *ls) {
335 while ((save_and_next(ls), lisxdigit(ls->current))) { 335 while ((save_and_next(ls), lisxdigit(ls->current))) {
336 i++; 336 i++;
337 r = (r << 4) + luaO_hexavalue(ls->current); 337 r = (r << 4) + luaO_hexavalue(ls->current);
338 esccheck(ls, r <= 0x10FFFF, "UTF-8 value too large"); 338 esccheck(ls, r <= 0x7FFFFFFFu, "UTF-8 value too large");
339 } 339 }
340 esccheck(ls, ls->current == '}', "missing '}'"); 340 esccheck(ls, ls->current == '}', "missing '}'");
341 next(ls); /* skip '}' */ 341 next(ls); /* skip '}' */