diff options
author | Mike Pall <mike> | 2015-08-29 23:58:28 +0200 |
---|---|---|
committer | Mike Pall <mike> | 2015-08-30 00:10:15 +0200 |
commit | 55c3b29f7b20f3801848e0ab71e9de1d22207b95 (patch) | |
tree | 133d38b282623935b050a6eb799f8bcf22cabdff | |
parent | a3a6866d4c2cc096b478c8f81b73a3b818034b89 (diff) | |
download | luajit-55c3b29f7b20f3801848e0ab71e9de1d22207b95.tar.gz luajit-55c3b29f7b20f3801848e0ab71e9de1d22207b95.tar.bz2 luajit-55c3b29f7b20f3801848e0ab71e9de1d22207b95.zip |
Parse Unicode string escape \u{XX...}.
Thanks to drbo.
-rw-r--r-- | doc/changes.html | 1 | ||||
-rw-r--r-- | doc/extensions.html | 7 | ||||
-rw-r--r-- | src/lj_lex.c | 27 |
3 files changed, 35 insertions, 0 deletions
diff --git a/doc/changes.html b/doc/changes.html index febb03c7..125b58b4 100644 --- a/doc/changes.html +++ b/doc/changes.html | |||
@@ -86,6 +86,7 @@ Please take a look at the commit history for more details. | |||
86 | <li>Add <tt>LJ_GC64</tt> mode: 64 bit GC object references (really: 47 bit). Interpreter-only for now.</li> | 86 | <li>Add <tt>LJ_GC64</tt> mode: 64 bit GC object references (really: 47 bit). Interpreter-only for now.</li> |
87 | <li>Add <tt>LJ_FR2</tt> mode: Two-slot frame info. Required by <tt>LJ_GC64</tt> mode.</li> | 87 | <li>Add <tt>LJ_FR2</tt> mode: Two-slot frame info. Required by <tt>LJ_GC64</tt> mode.</li> |
88 | <li>Add <tt>table.new()</tt> and <tt>table.clear()</tt>.</li> | 88 | <li>Add <tt>table.new()</tt> and <tt>table.clear()</tt>.</li> |
89 | <li>Parse Unicode escape <tt>'\u{XX...}'</tt> in string literals.</li> | ||
89 | <li>Parse binary number literals (<tt>0bxxx</tt>).</li> | 90 | <li>Parse binary number literals (<tt>0bxxx</tt>).</li> |
90 | </ul></li> | 91 | </ul></li> |
91 | <li>Improvements to the JIT compiler: | 92 | <li>Improvements to the JIT compiler: |
diff --git a/doc/extensions.html b/doc/extensions.html index 84ca5ce4..e034e1dc 100644 --- a/doc/extensions.html +++ b/doc/extensions.html | |||
@@ -344,6 +344,13 @@ Lua 5.1, which prevents implementing features that would otherwise | |||
344 | break the Lua/C API and ABI (e.g. <tt>_ENV</tt>). | 344 | break the Lua/C API and ABI (e.g. <tt>_ENV</tt>). |
345 | </p> | 345 | </p> |
346 | 346 | ||
347 | <h2 id="lua53">Extensions from Lua 5.3</h2> | ||
348 | <p> | ||
349 | LuaJIT supports some extensions from Lua 5.3: | ||
350 | <ul> | ||
351 | <li>Unicode escape <tt>'\u{XX...}'</tt> embeds the UTF-8 encoding in string literals.</li> | ||
352 | </ul> | ||
353 | |||
347 | <h2 id="exceptions">C++ Exception Interoperability</h2> | 354 | <h2 id="exceptions">C++ Exception Interoperability</h2> |
348 | <p> | 355 | <p> |
349 | LuaJIT has built-in support for interoperating with C++ exceptions. | 356 | LuaJIT has built-in support for interoperating with C++ exceptions. |
diff --git a/src/lj_lex.c b/src/lj_lex.c index 8409cd78..5a918f74 100644 --- a/src/lj_lex.c +++ b/src/lj_lex.c | |||
@@ -214,6 +214,33 @@ static void lex_string(LexState *ls, TValue *tv) | |||
214 | c += 9; | 214 | c += 9; |
215 | } | 215 | } |
216 | break; | 216 | break; |
217 | case 'u': /* Unicode escape '\u{XX...}'. */ | ||
218 | if (lex_next(ls) != '{') goto err_xesc; | ||
219 | lex_next(ls); | ||
220 | c = 0; | ||
221 | do { | ||
222 | c = (c << 4) | (ls->c & 15u); | ||
223 | if (!lj_char_isdigit(ls->c)) { | ||
224 | if (!lj_char_isxdigit(ls->c)) goto err_xesc; | ||
225 | c += 9; | ||
226 | } | ||
227 | if (c >= 0x110000) goto err_xesc; /* Out of Unicode range. */ | ||
228 | } while (lex_next(ls) != '}'); | ||
229 | if (c < 0x800) { | ||
230 | if (c < 0x80) break; | ||
231 | lex_save(ls, 0xc0 | (c >> 6)); | ||
232 | } else { | ||
233 | if (c >= 0x10000) { | ||
234 | lex_save(ls, 0xf0 | (c >> 18)); | ||
235 | lex_save(ls, 0x80 | ((c >> 12) & 0x3f)); | ||
236 | } else { | ||
237 | if (c >= 0xd800 && c < 0xe000) goto err_xesc; /* No surrogates. */ | ||
238 | lex_save(ls, 0xe0 | (c >> 12)); | ||
239 | } | ||
240 | lex_save(ls, 0x80 | ((c >> 6) & 0x3f)); | ||
241 | } | ||
242 | c = 0x80 | (c & 0x3f); | ||
243 | break; | ||
217 | case 'z': /* Skip whitespace. */ | 244 | case 'z': /* Skip whitespace. */ |
218 | lex_next(ls); | 245 | lex_next(ls); |
219 | while (lj_char_isspace(ls->c)) | 246 | while (lj_char_isspace(ls->c)) |