From 1e0c73d5b643707335b06abd2546a83d9439d14c Mon Sep 17 00:00:00 2001 From: Roberto Ierusalimschy Date: Fri, 15 Mar 2019 13:14:17 -0300 Subject: Changes in the validation of UTF-8 All UTF-8 encoding functionality (including the escape sequence '\u') accepts all values from the original UTF-8 specification (with sequences of up to six bytes). By default, the decoding functions in the UTF-8 library do not accept invalid Unicode code points, such as surrogates. A new parameter 'nonstrict' makes them accept all code points up to (2^31)-1, as in the original UTF-8 specification. --- lobject.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'lobject.c') diff --git a/lobject.c b/lobject.c index 3ce052c2..5d340de6 100644 --- a/lobject.c +++ b/lobject.c @@ -343,7 +343,7 @@ size_t luaO_str2num (const char *s, TValue *o) { int luaO_utf8esc (char *buff, unsigned long x) { int n = 1; /* number of bytes put in buffer (backwards) */ - lua_assert(x <= 0x10FFFF); + lua_assert(x <= 0x7FFFFFFFu); if (x < 0x80) /* ascii? */ buff[UTF8BUFFSZ - 1] = cast_char(x); else { /* need continuation bytes */ @@ -435,9 +435,9 @@ const char *luaO_pushvfstring (lua_State *L, const char *fmt, va_list argp) { pushstr(L, buff, l); break; } - case 'U': { /* an 'int' as a UTF-8 sequence */ + case 'U': { /* a 'long' as a UTF-8 sequence */ char buff[UTF8BUFFSZ]; - int l = luaO_utf8esc(buff, cast(long, va_arg(argp, long))); + int l = luaO_utf8esc(buff, va_arg(argp, long)); pushstr(L, buff + UTF8BUFFSZ - l, l); break; } -- cgit v1.2.3-55-g6feb