From 1e0c73d5b643707335b06abd2546a83d9439d14c Mon Sep 17 00:00:00 2001 From: Roberto Ierusalimschy Date: Fri, 15 Mar 2019 13:14:17 -0300 Subject: Changes in the validation of UTF-8 All UTF-8 encoding functionality (including the escape sequence '\u') accepts all values from the original UTF-8 specification (with sequences of up to six bytes). By default, the decoding functions in the UTF-8 library do not accept invalid Unicode code points, such as surrogates. A new parameter 'nonstrict' makes them accept all code points up to (2^31)-1, as in the original UTF-8 specification. --- llex.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'llex.c') diff --git a/llex.c b/llex.c index 38c6d92d..1539f525 100644 --- a/llex.c +++ b/llex.c @@ -335,7 +335,7 @@ static unsigned long readutf8esc (LexState *ls) { while ((save_and_next(ls), lisxdigit(ls->current))) { i++; r = (r << 4) + luaO_hexavalue(ls->current); - esccheck(ls, r <= 0x10FFFF, "UTF-8 value too large"); + esccheck(ls, r <= 0x7FFFFFFFu, "UTF-8 value too large"); } esccheck(ls, ls->current == '}', "missing '}'"); next(ls); /* skip '}' */ -- cgit v1.2.3-55-g6feb