From 1e0c73d5b643707335b06abd2546a83d9439d14c Mon Sep 17 00:00:00 2001
From: Roberto Ierusalimschy <roberto@inf.puc-rio.br>
Date: Fri, 15 Mar 2019 13:14:17 -0300
Subject: Changes in the validation of UTF-8

All UTF-8 encoding functionality (including the escape
sequence '\u') accepts all values from the original UTF-8
specification (with sequences of up to six bytes).

By default, the decoding functions in the UTF-8 library do not
accept invalid Unicode code points, such as surrogates. A new
parameter 'nonstrict' makes them accept all code points up to
(2^31)-1, as in the original UTF-8 specification.
---
 llex.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'llex.c')

diff --git a/llex.c b/llex.c
index 38c6d92d..1539f525 100644
--- a/llex.c
+++ b/llex.c
@@ -335,7 +335,7 @@ static unsigned long readutf8esc (LexState *ls) {
   while ((save_and_next(ls), lisxdigit(ls->current))) {
     i++;
     r = (r << 4) + luaO_hexavalue(ls->current);
-    esccheck(ls, r <= 0x10FFFF, "UTF-8 value too large");
+    esccheck(ls, r <= 0x7FFFFFFFu, "UTF-8 value too large");
   }
   esccheck(ls, ls->current == '}', "missing '}'");
   next(ls);  /* skip '}' */
-- 
cgit v1.2.3-55-g6feb