diff options
author | Roberto Ierusalimschy <roberto@inf.puc-rio.br> | 2019-03-15 13:14:17 -0300 |
---|---|---|
committer | Roberto Ierusalimschy <roberto@inf.puc-rio.br> | 2019-03-15 13:14:17 -0300 |
commit | 1e0c73d5b643707335b06abd2546a83d9439d14c (patch) | |
tree | b80b7d5e2cfeeef888ddf98fcc6276832134c1bf /llex.c | |
parent | 8fa4f1380b9a203bfdf002c2e9e9e13ebb8384c1 (diff) | |
download | lua-1e0c73d5b643707335b06abd2546a83d9439d14c.tar.gz lua-1e0c73d5b643707335b06abd2546a83d9439d14c.tar.bz2 lua-1e0c73d5b643707335b06abd2546a83d9439d14c.zip |
Changes in the validation of UTF-8
All UTF-8 encoding functionality (including the escape
sequence '\u') accepts all values from the original UTF-8
specification (with sequences of up to six bytes).
By default, the decoding functions in the UTF-8 library do not
accept invalid Unicode code points, such as surrogates. A new
parameter 'nonstrict' makes them accept all code points up to
(2^31)-1, as in the original UTF-8 specification.
Diffstat (limited to 'llex.c')
-rw-r--r-- | llex.c | 2 |
1 files changed, 1 insertions, 1 deletions
@@ -335,7 +335,7 @@ static unsigned long readutf8esc (LexState *ls) { | |||
335 | while ((save_and_next(ls), lisxdigit(ls->current))) { | 335 | while ((save_and_next(ls), lisxdigit(ls->current))) { |
336 | i++; | 336 | i++; |
337 | r = (r << 4) + luaO_hexavalue(ls->current); | 337 | r = (r << 4) + luaO_hexavalue(ls->current); |
338 | esccheck(ls, r <= 0x10FFFF, "UTF-8 value too large"); | 338 | esccheck(ls, r <= 0x7FFFFFFFu, "UTF-8 value too large"); |
339 | } | 339 | } |
340 | esccheck(ls, ls->current == '}', "missing '}'"); | 340 | esccheck(ls, ls->current == '}', "missing '}'"); |
341 | next(ls); /* skip '}' */ | 341 | next(ls); /* skip '}' */ |