aboutsummaryrefslogtreecommitdiff
path: root/lobject.c
diff options
context:
space:
mode:
authorRoberto Ierusalimschy <roberto@inf.puc-rio.br>2019-03-15 13:14:17 -0300
committerRoberto Ierusalimschy <roberto@inf.puc-rio.br>2019-03-15 13:14:17 -0300
commit1e0c73d5b643707335b06abd2546a83d9439d14c (patch)
treeb80b7d5e2cfeeef888ddf98fcc6276832134c1bf /lobject.c
parent8fa4f1380b9a203bfdf002c2e9e9e13ebb8384c1 (diff)
downloadlua-1e0c73d5b643707335b06abd2546a83d9439d14c.tar.gz
lua-1e0c73d5b643707335b06abd2546a83d9439d14c.tar.bz2
lua-1e0c73d5b643707335b06abd2546a83d9439d14c.zip
Changes in the validation of UTF-8
All UTF-8 encoding functionality (including the escape sequence '\u') accepts all values from the original UTF-8 specification (with sequences of up to six bytes). By default, the decoding functions in the UTF-8 library do not accept invalid Unicode code points, such as surrogates. A new parameter 'nonstrict' makes them accept all code points up to (2^31)-1, as in the original UTF-8 specification.
Diffstat (limited to 'lobject.c')
-rw-r--r--lobject.c6
1 files changed, 3 insertions, 3 deletions
diff --git a/lobject.c b/lobject.c
index 3ce052c2..5d340de6 100644
--- a/lobject.c
+++ b/lobject.c
@@ -343,7 +343,7 @@ size_t luaO_str2num (const char *s, TValue *o) {
343 343
344int luaO_utf8esc (char *buff, unsigned long x) { 344int luaO_utf8esc (char *buff, unsigned long x) {
345 int n = 1; /* number of bytes put in buffer (backwards) */ 345 int n = 1; /* number of bytes put in buffer (backwards) */
346 lua_assert(x <= 0x10FFFF); 346 lua_assert(x <= 0x7FFFFFFFu);
347 if (x < 0x80) /* ascii? */ 347 if (x < 0x80) /* ascii? */
348 buff[UTF8BUFFSZ - 1] = cast_char(x); 348 buff[UTF8BUFFSZ - 1] = cast_char(x);
349 else { /* need continuation bytes */ 349 else { /* need continuation bytes */
@@ -435,9 +435,9 @@ const char *luaO_pushvfstring (lua_State *L, const char *fmt, va_list argp) {
435 pushstr(L, buff, l); 435 pushstr(L, buff, l);
436 break; 436 break;
437 } 437 }
438 case 'U': { /* an 'int' as a UTF-8 sequence */ 438 case 'U': { /* a 'long' as a UTF-8 sequence */
439 char buff[UTF8BUFFSZ]; 439 char buff[UTF8BUFFSZ];
440 int l = luaO_utf8esc(buff, cast(long, va_arg(argp, long))); 440 int l = luaO_utf8esc(buff, va_arg(argp, long));
441 pushstr(L, buff + UTF8BUFFSZ - l, l); 441 pushstr(L, buff + UTF8BUFFSZ - l, l);
442 break; 442 break;
443 } 443 }