Changes in the validation of UTF-8

All UTF-8 encoding functionality (including the escape sequence '\u') accepts all values from the original UTF-8 specification (with sequences of up to six bytes). By default, the decoding functions in the UTF-8 library do not accept invalid Unicode code points, such as surrogates. A new parameter 'nonstrict' makes them accept all code points up to (2^31)-1, as in the original UTF-8 specification.
author: Roberto Ierusalimschy <roberto@inf.puc-rio.br> 2019-03-15 13:14:17 -0300
committer: Roberto Ierusalimschy <roberto@inf.puc-rio.br> 2019-03-15 13:14:17 -0300
commit: 1e0c73d5b643707335b06abd2546a83d9439d14c (patch)
tree: b80b7d5e2cfeeef888ddf98fcc6276832134c1bf /llex.c
parent: 8fa4f1380b9a203bfdf002c2e9e9e13ebb8384c1 (diff)
download: lua-1e0c73d5b643707335b06abd2546a83d9439d14c.tar.gz
lua-1e0c73d5b643707335b06abd2546a83d9439d14c.tar.bz2
lua-1e0c73d5b643707335b06abd2546a83d9439d14c.zip
1 files changed, 1 insertions, 1 deletions
diff --git a/llex.c b/llex.c
index 38c6d92d..1539f525 100644
--- a/llex.c
+++ b/llex.c
@@ -335,7 +335,7 @@ static unsigned long readutf8esc (LexState *ls) {
  while ((save_and_next(ls), lisxdigit(ls->current))) {
    i++;
    r = (r << 4) + luaO_hexavalue(ls->current);
-    esccheck(ls, r <= 0x10FFFF, "UTF-8 value too large");
+    esccheck(ls, r <= 0x7FFFFFFFu, "UTF-8 value too large");
  }
  esccheck(ls, ls->current == '}', "missing '}'");
  next(ls);  /* skip '}' */
author	Roberto Ierusalimschy <roberto@inf.puc-rio.br>	2019-03-15 13:14:17 -0300
committer	Roberto Ierusalimschy <roberto@inf.puc-rio.br>	2019-03-15 13:14:17 -0300
commit	1e0c73d5b643707335b06abd2546a83d9439d14c (patch)
tree	b80b7d5e2cfeeef888ddf98fcc6276832134c1bf /llex.c
parent	8fa4f1380b9a203bfdf002c2e9e9e13ebb8384c1 (diff)
download	lua-1e0c73d5b643707335b06abd2546a83d9439d14c.tar.gz lua-1e0c73d5b643707335b06abd2546a83d9439d14c.tar.bz2 lua-1e0c73d5b643707335b06abd2546a83d9439d14c.zip

diff --git a/llex.c b/llex.c index 38c6d92d..1539f525 100644 --- a/llex.c +++ b/llex.c
@@ -335,7 +335,7 @@ static unsigned long readutf8esc (LexState *ls) {
335	while ((save_and_next(ls), lisxdigit(ls->current))) {	335	while ((save_and_next(ls), lisxdigit(ls->current))) {
336	i++;	336	i++;
337	r = (r << 4) + luaO_hexavalue(ls->current);	337	r = (r << 4) + luaO_hexavalue(ls->current);
338	esccheck(ls, r <= 0x10FFFF, "UTF-8 value too large");	338	esccheck(ls, r <= 0x7FFFFFFFu, "UTF-8 value too large");
339	}	339	}
340	esccheck(ls, ls->current == '}', "missing '}'");	340	esccheck(ls, ls->current == '}', "missing '}'");
341	next(ls); /* skip '}' */	341	next(ls); /* skip '}' */