aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMark Pulford <mark@kyne.com.au>2011-05-01 02:11:10 +0930
committerMark Pulford <mark@kyne.com.au>2011-05-01 02:11:10 +0930
commit60fb31cfdd625ea3bc4a12a8440715c8ff0c9242 (patch)
tree58022c23864fde947839cf7ba0eb18749e4e7e07
parent058acaa9f2d52a4854650d19964bb7892c99d9af (diff)
downloadlua-cjson-60fb31cfdd625ea3bc4a12a8440715c8ff0c9242.tar.gz
lua-cjson-60fb31cfdd625ea3bc4a12a8440715c8ff0c9242.tar.bz2
lua-cjson-60fb31cfdd625ea3bc4a12a8440715c8ff0c9242.zip
Create "cjson" Lua module, support UCS-2 escapes
- Convert lua_json_init() into luaopen_cjson() to support dynamic .so loading. - Rename "json" to "cjson" to reduce conflicts with other JSON modules. - Remove unnecessary *_pcall_* API. Lua calls are fast enough, even through C. - Encode empty tables as objects - Add support for decoding all UCS-2 escape codes.
-rw-r--r--lua_cjson.c (renamed from lua_json.c)246
-rw-r--r--strbuf.c7
2 files changed, 154 insertions, 99 deletions
diff --git a/lua_json.c b/lua_cjson.c
index 77a8d90..203b3b8 100644
--- a/lua_json.c
+++ b/lua_cjson.c
@@ -2,11 +2,15 @@
2 */ 2 */
3 3
4/* Caveats: 4/* Caveats:
5 * - No unicode support 5 * - Assumes strings are valid UTF-8 and mostly treats them as opaque
6 * binary data. Will not throw an exception on bad data.
7 * - Will decode \uXXXX escapes, but leaves high codepoints as UTF-8
8 * when encoding.
6 * - JSON "null" values are represented as lightuserdata. Compare with 9 * - JSON "null" values are represented as lightuserdata. Compare with
7 * json.null. 10 * json.null.
8 * - Parsing comments is not support. According to json.org, this isn't 11 * - Parsing comments is not supported. According to json.org, this isn't
9 * part of the spec. 12 * part of the spec.
13 * - Parser accepts number formats beyond the JSON spec.
10 * 14 *
11 * Note: lua_json_decode() probably spends significant time rehashing 15 * Note: lua_json_decode() probably spends significant time rehashing
12 * tables since it is difficult to know their size ahead of time. 16 * tables since it is difficult to know their size ahead of time.
@@ -15,10 +19,10 @@
15 */ 19 */
16 20
17/* FIXME: 21/* FIXME:
18 * - Ensure JSON data is UTF-8. Fail otherwise.
19 * - Alternatively, dynamically support Unicode in JSON string. Return current locale.
20 * - Consider implementing other Unicode standards.
21 * - Option to encode non-printable characters? Only \" \\ are required 22 * - Option to encode non-printable characters? Only \" \\ are required
23 * - Protect against cycles when encoding JSON from a data structure
24 * - Max depth? Notice cycles?
25 * - Handle huge sparse arrays?
22 */ 26 */
23 27
24#include <assert.h> 28#include <assert.h>
@@ -30,17 +34,8 @@
30#include <lua.h> 34#include <lua.h>
31#include <lauxlib.h> 35#include <lauxlib.h>
32 36
33#include "lua_json.h"
34#include "strbuf.h" 37#include "strbuf.h"
35 38
36#include "die.h"
37
38
39static void verify_arg_count(lua_State *l, int nargs)
40{
41 luaL_argcheck(l, lua_gettop(l) <= nargs, nargs + 1, "too many arguments");
42}
43
44/* ===== ENCODING ===== */ 39/* ===== ENCODING ===== */
45 40
46static void json_encode_exception(lua_State *l, strbuf_t *json, 41static void json_encode_exception(lua_State *l, strbuf_t *json,
@@ -125,7 +120,8 @@ static int lua_array_length(lua_State *l)
125 /* table, startkey */ 120 /* table, startkey */
126 while (lua_next(l, -2) != 0) { 121 while (lua_next(l, -2) != 0) {
127 /* table, key, value */ 122 /* table, key, value */
128 if ((k = lua_tonumber(l, -2))) { 123 if (lua_isnumber(l, -2) &&
124 (k = lua_tonumber(l, -2))) {
129 /* Integer >= 1 ? */ 125 /* Integer >= 1 ? */
130 if (floor(k) == k && k >= 1) { 126 if (floor(k) == k && k >= 1) {
131 if (k > max) 127 if (k > max)
@@ -228,7 +224,7 @@ static void json_append_data(lua_State *l, strbuf_t *json)
228 break; 224 break;
229 case LUA_TTABLE: 225 case LUA_TTABLE:
230 len = lua_array_length(l); 226 len = lua_array_length(l);
231 if (len >= 0) 227 if (len > 0)
232 json_append_array(l, json, len); 228 json_append_array(l, json, len);
233 else 229 else
234 json_append_object(l, json); 230 json_append_object(l, json);
@@ -249,43 +245,18 @@ static void json_append_data(lua_State *l, strbuf_t *json)
249 } 245 }
250} 246}
251 247
252/* lua_json_encode can throw an exception */ 248static int json_encode(lua_State *l)
253char *lua_json_encode(lua_State *l, int *len)
254{ 249{
255 strbuf_t buf; 250 strbuf_t buf;
256 char *json; 251 char *json;
257
258 strbuf_init(&buf, 0);
259 json_append_data(l, &buf);
260 json = strbuf_free_to_string(&buf, len);
261
262 return json;
263}
264
265/* lua_json_pcall_encode(object) must be called via lua_pcall().
266 * This allows a C caller to catch any errors without needing
267 * to register the string with Lua for garbage collection. */
268int lua_json_pcall_encode(lua_State *l)
269{
270 char *json;
271 int len; 252 int len;
272 253
273 verify_arg_count(l, 1); 254 luaL_argcheck(l, lua_gettop(l) == 1, 1, "expected 1 argument");
274
275 json = lua_json_encode(l, &len);
276
277 lua_pushlightuserdata(l, json);
278 lua_pushnumber(l, len);
279
280 return 2;
281}
282 255
283int lua_api_json_encode(lua_State *l) 256 strbuf_init(&buf, 0);
284{ 257 json_append_data(l, &buf);
285 char *json; 258 json = strbuf_free_to_string(&buf, &len);
286 int len;
287 259
288 json = lua_json_encode(l, &len);
289 lua_pushlstring(l, json, len); 260 lua_pushlstring(l, json, len);
290 free(json); 261 free(json);
291 262
@@ -395,6 +366,98 @@ static void json_global_init()
395 json_ch2escape['u'] = 'u'; /* This needs to be parsed as unicode */ 366 json_ch2escape['u'] = 'u'; /* This needs to be parsed as unicode */
396} 367}
397 368
369static inline int hexdigit2int(char hex)
370{
371 if ('0' <= hex && hex <= '9')
372 return hex - '0';
373
374 /* Force lowercase */
375 hex |= 0x20;
376 if ('a' <= hex && hex <= 'f')
377 return 10 + hex - 'a';
378
379 return -1;
380}
381
382static int decode_hex4(const char *hex)
383{
384 int digit[4];
385 int i;
386
387 /* Convert ASCII hex digit to numeric digit
388 * Note: this returns an error for invalid hex digits, including
389 * NULL */
390 for (i = 0; i < 4; i++) {
391 digit[i] = hexdigit2int(hex[i]);
392 if (digit[i] < 0) {
393 return -1;
394 }
395 }
396
397 return (digit[0] << 12) +
398 (digit[1] << 8) +
399 (digit[2] << 4) +
400 digit[3];
401}
402
403static int codepoint_to_utf8(char *utf8, int codepoint)
404{
405 if (codepoint <= 0x7F) {
406 utf8[0] = codepoint;
407 return 1;
408 }
409
410 if (codepoint <= 0x7FF) {
411 utf8[0] = (codepoint >> 6) | 0xC0;
412 utf8[1] = (codepoint & 0x3F) | 0x80;
413 return 2;
414 }
415
416 if (codepoint <= 0xFFFF) {
417 utf8[0] = (codepoint >> 12) | 0xE0;
418 utf8[1] = ((codepoint >> 6) & 0x3F) | 0x80;
419 utf8[2] = (codepoint & 0x3F) | 0x80;
420 return 3;
421 }
422
423 return 0;
424}
425
426
427/* Called when index pointing to beginning of UCS-2 hex code: uXXXX
428 * Translate to UTF-8 and append to temporary token string.
429 * Must advance index to the next character to be processed.
430 * Returns: 0 success
431 * -1 error
432 */
433static int json_append_unicode_escape(json_parse_t *json)
434{
435 char utf8[4]; /* 3 bytes of UTF-8 can handle UCS-2 */
436 int codepoint;
437 int len;
438
439 /* Skip 'u' */
440 json->index++;
441
442 /* Fetch UCS-2 codepoint */
443 codepoint = decode_hex4(&json->data[json->index]);
444 if (codepoint < 0) {
445 return -1;
446 }
447
448 /* Convert to UTF-8 */
449 len = codepoint_to_utf8(utf8, codepoint);
450 if (!len) {
451 return -1;
452 }
453
454 /* Append bytes and advance counter */
455 strbuf_append_mem(json->tmp, utf8, len);
456 json->index += 4;
457
458 return 0;
459}
460
398static void json_next_string_token(json_parse_t *json, json_token_t *token) 461static void json_next_string_token(json_parse_t *json, json_token_t *token)
399{ 462{
400 char ch; 463 char ch;
@@ -402,36 +465,44 @@ static void json_next_string_token(json_parse_t *json, json_token_t *token)
402 /* Caller must ensure a string is next */ 465 /* Caller must ensure a string is next */
403 assert(json->data[json->index] == '"'); 466 assert(json->data[json->index] == '"');
404 467
405 /* Gobble string. FIXME, ugly */ 468 /* Skip " */
469 json->index++;
406 470
471 /* json->tmp is the temporary strbuf used to accumulate the
472 * decoded string value. */
407 json->tmp->length = 0; 473 json->tmp->length = 0;
408 while ((ch = json->data[++json->index]) != '"') { 474 while ((ch = json->data[json->index]) != '"') {
475 if (!ch) {
476 /* Premature end of the string */
477 token->type = T_ERROR;
478 return;
479 }
480
409 /* Handle escapes */ 481 /* Handle escapes */
410 if (ch == '\\') { 482 if (ch == '\\') {
411 /* Translate escape code */ 483 /* Skip \ and fetch escape character */
412 ch = json_ch2escape[(unsigned char)json->data[++json->index]]; 484 json->index++;
485 ch = json->data[json->index];
486
487 /* Translate escape code and append to tmp string */
488 ch = json_ch2escape[(unsigned char)ch];
489 if (ch == 'u') {
490 if (json_append_unicode_escape(json) < 0)
491 continue;
492
493 token->type = T_ERROR;
494 return;
495 }
413 if (!ch) { 496 if (!ch) {
414 /* Invalid escape code */ 497 /* Invalid escape code */
415 token->type = T_ERROR; 498 token->type = T_ERROR;
416 return; 499 return;
417 } 500 }
418 if (ch == 'u') {
419 /* Process unicode */
420 /* FIXME: cleanup memory handling. Implement iconv(3)
421 * conversion from UCS-2 -> UTF-8
422 */
423 if (!memcmp(&json->data[json->index], "u0000", 5)) {
424 /* Handle NULL */
425 ch = 0;
426 json->index += 4;
427 } else {
428 /* Remaining codepoints unhandled */
429 token->type = T_ERROR;
430 return;
431 }
432 }
433 } 501 }
502 /* Append normal character or translated single character
503 * Unicode escapes are handled above */
434 strbuf_append_char(json->tmp, ch); 504 strbuf_append_char(json->tmp, ch);
505 json->index++;
435 } 506 }
436 json->index++; /* Eat final quote (") */ 507 json->index++; /* Eat final quote (") */
437 508
@@ -646,7 +717,7 @@ static void json_process_value(lua_State *l, json_parse_t *json, json_token_t *t
646} 717}
647 718
648/* json_text must be null terminated string */ 719/* json_text must be null terminated string */
649void lua_json_decode(lua_State *l, const char *json_text) 720static void lua_json_decode(lua_State *l, const char *json_text)
650{ 721{
651 json_parse_t json; 722 json_parse_t json;
652 json_token_t token; 723 json_token_t token;
@@ -667,59 +738,42 @@ void lua_json_decode(lua_State *l, const char *json_text)
667 strbuf_free(json.tmp); 738 strbuf_free(json.tmp);
668} 739}
669 740
670/* lua_json_pcall_decode(string) must be called via lua_pcall(). 741static int json_decode(lua_State *l)
671 * This allows a C caller to catch any errors so the string can
672 * be freed before returning to Lua. */
673int lua_json_pcall_decode(lua_State *l)
674{
675 const char *json;
676
677 verify_arg_count(l, 1);
678 luaL_argcheck(l, lua_islightuserdata(l, 1), 1,
679 "missing lightuserdata");
680
681 json = lua_touserdata(l, 1);
682 lua_pop(l, 1);
683
684 lua_json_decode(l, json);
685
686 return 1;
687}
688
689static int lua_api_json_decode(lua_State *l)
690{ 742{
691 const char *json; 743 const char *json;
692 744
693 verify_arg_count(l, 1); 745 luaL_argcheck(l, lua_gettop(l) <= 1, 2, "found too many arguments");
694 json = luaL_checkstring(l, 1); 746 json = luaL_checkstring(l, 1);
695 747
696 lua_json_decode(l, json); 748 lua_json_decode(l, json);
697 749
698 lua_remove(l, 1);
699
700 return 1; 750 return 1;
701} 751}
702 752
703/* ===== INITIALISATION ===== */ 753/* ===== INITIALISATION ===== */
704 754
755/* FIXME: Rewrite to keep lookup tables within Lua (userdata?)
756 * Remove pthread dependency */
705static pthread_once_t json_global_init_once = PTHREAD_ONCE_INIT; 757static pthread_once_t json_global_init_once = PTHREAD_ONCE_INIT;
706 758
707void lua_json_init(lua_State *l) 759int luaopen_cjson(lua_State *l)
708{ 760{
709 luaL_Reg reg[] = { 761 luaL_Reg reg[] = {
710 { "encode", lua_api_json_encode }, 762 { "encode", json_encode },
711 { "decode", lua_api_json_decode }, 763 { "decode", json_decode },
712 { NULL, NULL } 764 { NULL, NULL }
713 }; 765 };
714 766
715 luaL_register(l, "json", reg); 767 luaL_register(l, "cjson", reg);
716 768
717 /* Set json.null, and pop "json" table from the stack */ 769 /* Set cjson.null */
718 lua_pushlightuserdata(l, NULL); 770 lua_pushlightuserdata(l, NULL);
719 lua_setfield(l, -2, "null"); 771 lua_setfield(l, -2, "null");
720 lua_pop(l, 1);
721 772
722 SYS_NOFAIL(pthread_once(&json_global_init_once, json_global_init)); 773 pthread_once(&json_global_init_once, json_global_init);
774
775 /* Return cjson table */
776 return 1;
723} 777}
724 778
725/* vi:ai et sw=4 ts=4: 779/* vi:ai et sw=4 ts=4:
diff --git a/strbuf.c b/strbuf.c
index 44145dd..208a7be 100644
--- a/strbuf.c
+++ b/strbuf.c
@@ -5,13 +5,14 @@
5 5
6#include "strbuf.h" 6#include "strbuf.h"
7 7
8static void die(const char *format, ...) 8void die(const char *fmt, ...)
9{ 9{
10 va_list arg; 10 va_list arg;
11 11
12 va_start(arg, format); 12 va_start(arg, fmt);
13 vfprintf(stderr, format, arg); 13 vfprintf(stderr, fmt, arg);
14 va_end(arg); 14 va_end(arg);
15 fprintf(stderr, "\n");
15 16
16 exit(-1); 17 exit(-1);
17} 18}