summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRoberto Ierusalimschy <roberto@inf.puc-rio.br>2014-02-06 13:59:24 -0200
committerRoberto Ierusalimschy <roberto@inf.puc-rio.br>2014-02-06 13:59:24 -0200
commit4ea60463f5a5cc5c30bf3f20be0dd5141f48aa3c (patch)
treed5330beaf2e11fc249f6c0aaef0b18ab3d9ec22c
parentd438e1379d24f06f027f8fc0e8fc1ff6673b322f (diff)
downloadlua-4ea60463f5a5cc5c30bf3f20be0dd5141f48aa3c.tar.gz
lua-4ea60463f5a5cc5c30bf3f20be0dd5141f48aa3c.tar.bz2
lua-4ea60463f5a5cc5c30bf3f20be0dd5141f48aa3c.zip
UTF-8 encoding exported as format '%U' in 'lua_pushfstring'
-rw-r--r--llex.c25
-rw-r--r--lobject.c28
-rw-r--r--lobject.h5
3 files changed, 37 insertions, 21 deletions
diff --git a/llex.c b/llex.c
index 818c0812..514a8150 100644
--- a/llex.c
+++ b/llex.c
@@ -1,5 +1,5 @@
1/* 1/*
2** $Id: llex.c,v 2.71 2014/01/31 15:14:22 roberto Exp roberto $ 2** $Id: llex.c,v 2.72 2014/02/04 18:57:34 roberto Exp roberto $
3** Lexical Analyzer 3** Lexical Analyzer
4** See Copyright Notice in lua.h 4** See Copyright Notice in lua.h
5*/ 5*/
@@ -359,22 +359,11 @@ static unsigned int readutf8esc (LexState *ls) {
359} 359}
360 360
361 361
362static void utf8esc (LexState *ls, unsigned int r) { 362static void utf8esc (LexState *ls) {
363 if (r < 0x80) /* ascii? */ 363 char buff[UTF8BUFFSZ];
364 save(ls, r); 364 int n = luaO_utf8esc(buff, readutf8esc(ls));
365 else { /* need continuation bytes */ 365 for (; n > 0; n--) /* add 'buff' to string */
366 int buff[4]; /* to store continuation bytes */ 366 save(ls, buff[UTF8BUFFSZ - n]);
367 int n = 0; /* number of continuation bytes */
368 unsigned int mfb = 0x3f; /* maximum that fits in first byte */
369 do {
370 buff[n++] = 0x80 | (r & 0x3f); /* add continuation byte */
371 r >>= 6; /* remove added bits */
372 mfb >>= 1; /* now there is one less bit in first byte */
373 } while (r > mfb); /* needs continuation byte? */
374 save(ls, (~mfb << 1) | r); /* add first byte */
375 while (n-- > 0) /* add 'buff' to string, reversed */
376 save(ls, buff[n]);
377 }
378} 367}
379 368
380 369
@@ -414,7 +403,7 @@ static void read_string (LexState *ls, int del, SemInfo *seminfo) {
414 case 't': c = '\t'; goto read_save; 403 case 't': c = '\t'; goto read_save;
415 case 'v': c = '\v'; goto read_save; 404 case 'v': c = '\v'; goto read_save;
416 case 'x': c = readhexaesc(ls); goto read_save; 405 case 'x': c = readhexaesc(ls); goto read_save;
417 case 'u': utf8esc(ls, readutf8esc(ls)); goto no_save; 406 case 'u': utf8esc(ls); goto no_save;
418 case '\n': case '\r': 407 case '\n': case '\r':
419 inclinenumber(ls); c = '\n'; goto only_save; 408 inclinenumber(ls); c = '\n'; goto only_save;
420 case '\\': case '\"': case '\'': 409 case '\\': case '\"': case '\'':
diff --git a/lobject.c b/lobject.c
index 90a1e443..90e7d71a 100644
--- a/lobject.c
+++ b/lobject.c
@@ -1,5 +1,5 @@
1/* 1/*
2** $Id: lobject.c,v 2.71 2013/12/30 20:47:58 roberto Exp roberto $ 2** $Id: lobject.c,v 2.72 2014/01/27 13:34:32 roberto Exp roberto $
3** Some generic functions over Lua objects 3** Some generic functions over Lua objects
4** See Copyright Notice in lua.h 4** See Copyright Notice in lua.h
5*/ 5*/
@@ -284,12 +284,30 @@ int luaO_str2int (const char *s, size_t len, lua_Integer *result) {
284} 284}
285 285
286 286
287int luaO_utf8esc (char *buff, unsigned int x) {
288 int n = 1; /* number of bytes put in buffer (backwards) */
289 if (x < 0x80) /* ascii? */
290 buff[UTF8BUFFSZ - 1] = x;
291 else { /* need continuation bytes */
292 unsigned int mfb = 0x3f; /* maximum that fits in first byte */
293 do {
294 buff[UTF8BUFFSZ - (n++)] = 0x80 | (x & 0x3f); /* add continuation byte */
295 x >>= 6; /* remove added bits */
296 mfb >>= 1; /* now there is one less bit available in first byte */
297 } while (x > mfb); /* still needs continuation byte? */
298 buff[UTF8BUFFSZ - n] = (~mfb << 1) | x; /* add first byte */
299 }
300 return n;
301}
302
303
287static void pushstr (lua_State *L, const char *str, size_t l) { 304static void pushstr (lua_State *L, const char *str, size_t l) {
288 setsvalue2s(L, L->top++, luaS_newlstr(L, str, l)); 305 setsvalue2s(L, L->top++, luaS_newlstr(L, str, l));
289} 306}
290 307
291 308
292/* this function handles only `%d', `%c', %f, %p, and `%s' formats */ 309/* this function handles only '%d', '%c', '%f', '%p', and '%s'
310 conventional formats, plus Lua-specific '%L' and '%U' */
293const char *luaO_pushvfstring (lua_State *L, const char *fmt, va_list argp) { 311const char *luaO_pushvfstring (lua_State *L, const char *fmt, va_list argp) {
294 int n = 0; 312 int n = 0;
295 for (;;) { 313 for (;;) {
@@ -328,6 +346,12 @@ const char *luaO_pushvfstring (lua_State *L, const char *fmt, va_list argp) {
328 pushstr(L, buff, l); 346 pushstr(L, buff, l);
329 break; 347 break;
330 } 348 }
349 case 'U': {
350 char buff[UTF8BUFFSZ];
351 int l = luaO_utf8esc(buff, va_arg(argp, int));
352 pushstr(L, buff + UTF8BUFFSZ - l, l);
353 break;
354 }
331 case '%': { 355 case '%': {
332 pushstr(L, "%", 1); 356 pushstr(L, "%", 1);
333 break; 357 break;
diff --git a/lobject.h b/lobject.h
index ca92fc56..23bbe74f 100644
--- a/lobject.h
+++ b/lobject.h
@@ -1,5 +1,5 @@
1/* 1/*
2** $Id: lobject.h,v 2.82 2013/09/05 19:31:49 roberto Exp roberto $ 2** $Id: lobject.h,v 2.83 2013/12/04 12:15:22 roberto Exp roberto $
3** Type definitions for Lua objects 3** Type definitions for Lua objects
4** See Copyright Notice in lua.h 4** See Copyright Notice in lua.h
5*/ 5*/
@@ -479,9 +479,12 @@ typedef struct Table {
479 479
480LUAI_DDEC const TValue luaO_nilobject_; 480LUAI_DDEC const TValue luaO_nilobject_;
481 481
482/* size of buffer for 'luaO_utf8esc' function */
483#define UTF8BUFFSZ 8
482 484
483LUAI_FUNC int luaO_int2fb (unsigned int x); 485LUAI_FUNC int luaO_int2fb (unsigned int x);
484LUAI_FUNC int luaO_fb2int (int x); 486LUAI_FUNC int luaO_fb2int (int x);
487LUAI_FUNC int luaO_utf8esc (char *buff, unsigned int x);
485LUAI_FUNC int luaO_ceillog2 (unsigned int x); 488LUAI_FUNC int luaO_ceillog2 (unsigned int x);
486LUAI_FUNC void luaO_arith (lua_State *L, int op, const TValue *p1, 489LUAI_FUNC void luaO_arith (lua_State *L, int op, const TValue *p1,
487 const TValue *p2, TValue *res); 490 const TValue *p2, TValue *res);