diff options
author | Mark Pulford <mark@kyne.com.au> | 2011-05-01 02:11:10 +0930 |
---|---|---|
committer | Mark Pulford <mark@kyne.com.au> | 2011-05-01 02:11:10 +0930 |
commit | 60fb31cfdd625ea3bc4a12a8440715c8ff0c9242 (patch) | |
tree | 58022c23864fde947839cf7ba0eb18749e4e7e07 | |
parent | 058acaa9f2d52a4854650d19964bb7892c99d9af (diff) | |
download | lua-cjson-60fb31cfdd625ea3bc4a12a8440715c8ff0c9242.tar.gz lua-cjson-60fb31cfdd625ea3bc4a12a8440715c8ff0c9242.tar.bz2 lua-cjson-60fb31cfdd625ea3bc4a12a8440715c8ff0c9242.zip |
Create "cjson" Lua module, support UCS-2 escapes
- Convert lua_json_init() into luaopen_cjson() to support dynamic .so
loading.
- Rename "json" to "cjson" to reduce conflicts with other JSON modules.
- Remove unnecessary *_pcall_* API. Lua calls are fast enough,
even through C.
- Encode empty tables as objects
- Add support for decoding all UCS-2 escape codes.
-rw-r--r-- | lua_cjson.c (renamed from lua_json.c) | 246 | ||||
-rw-r--r-- | strbuf.c | 7 |
2 files changed, 154 insertions, 99 deletions
@@ -2,11 +2,15 @@ | |||
2 | */ | 2 | */ |
3 | 3 | ||
4 | /* Caveats: | 4 | /* Caveats: |
5 | * - No unicode support | 5 | * - Assumes strings are valid UTF-8 and mostly treats them as opaque |
6 | * binary data. Will not throw an exception on bad data. | ||
7 | * - Will decode \uXXXX escapes, but leaves high codepoints as UTF-8 | ||
8 | * when encoding. | ||
6 | * - JSON "null" values are represented as lightuserdata. Compare with | 9 | * - JSON "null" values are represented as lightuserdata. Compare with |
7 | * json.null. | 10 | * json.null. |
8 | * - Parsing comments is not support. According to json.org, this isn't | 11 | * - Parsing comments is not supported. According to json.org, this isn't |
9 | * part of the spec. | 12 | * part of the spec. |
13 | * - Parser accepts number formats beyond the JSON spec. | ||
10 | * | 14 | * |
11 | * Note: lua_json_decode() probably spends significant time rehashing | 15 | * Note: lua_json_decode() probably spends significant time rehashing |
12 | * tables since it is difficult to know their size ahead of time. | 16 | * tables since it is difficult to know their size ahead of time. |
@@ -15,10 +19,10 @@ | |||
15 | */ | 19 | */ |
16 | 20 | ||
17 | /* FIXME: | 21 | /* FIXME: |
18 | * - Ensure JSON data is UTF-8. Fail otherwise. | ||
19 | * - Alternatively, dynamically support Unicode in JSON string. Return current locale. | ||
20 | * - Consider implementing other Unicode standards. | ||
21 | * - Option to encode non-printable characters? Only \" \\ are required | 22 | * - Option to encode non-printable characters? Only \" \\ are required |
23 | * - Protect against cycles when encoding JSON from a data structure | ||
24 | * - Max depth? Notice cycles? | ||
25 | * - Handle huge sparse arrays? | ||
22 | */ | 26 | */ |
23 | 27 | ||
24 | #include <assert.h> | 28 | #include <assert.h> |
@@ -30,17 +34,8 @@ | |||
30 | #include <lua.h> | 34 | #include <lua.h> |
31 | #include <lauxlib.h> | 35 | #include <lauxlib.h> |
32 | 36 | ||
33 | #include "lua_json.h" | ||
34 | #include "strbuf.h" | 37 | #include "strbuf.h" |
35 | 38 | ||
36 | #include "die.h" | ||
37 | |||
38 | |||
39 | static void verify_arg_count(lua_State *l, int nargs) | ||
40 | { | ||
41 | luaL_argcheck(l, lua_gettop(l) <= nargs, nargs + 1, "too many arguments"); | ||
42 | } | ||
43 | |||
44 | /* ===== ENCODING ===== */ | 39 | /* ===== ENCODING ===== */ |
45 | 40 | ||
46 | static void json_encode_exception(lua_State *l, strbuf_t *json, | 41 | static void json_encode_exception(lua_State *l, strbuf_t *json, |
@@ -125,7 +120,8 @@ static int lua_array_length(lua_State *l) | |||
125 | /* table, startkey */ | 120 | /* table, startkey */ |
126 | while (lua_next(l, -2) != 0) { | 121 | while (lua_next(l, -2) != 0) { |
127 | /* table, key, value */ | 122 | /* table, key, value */ |
128 | if ((k = lua_tonumber(l, -2))) { | 123 | if (lua_isnumber(l, -2) && |
124 | (k = lua_tonumber(l, -2))) { | ||
129 | /* Integer >= 1 ? */ | 125 | /* Integer >= 1 ? */ |
130 | if (floor(k) == k && k >= 1) { | 126 | if (floor(k) == k && k >= 1) { |
131 | if (k > max) | 127 | if (k > max) |
@@ -228,7 +224,7 @@ static void json_append_data(lua_State *l, strbuf_t *json) | |||
228 | break; | 224 | break; |
229 | case LUA_TTABLE: | 225 | case LUA_TTABLE: |
230 | len = lua_array_length(l); | 226 | len = lua_array_length(l); |
231 | if (len >= 0) | 227 | if (len > 0) |
232 | json_append_array(l, json, len); | 228 | json_append_array(l, json, len); |
233 | else | 229 | else |
234 | json_append_object(l, json); | 230 | json_append_object(l, json); |
@@ -249,43 +245,18 @@ static void json_append_data(lua_State *l, strbuf_t *json) | |||
249 | } | 245 | } |
250 | } | 246 | } |
251 | 247 | ||
252 | /* lua_json_encode can throw an exception */ | 248 | static int json_encode(lua_State *l) |
253 | char *lua_json_encode(lua_State *l, int *len) | ||
254 | { | 249 | { |
255 | strbuf_t buf; | 250 | strbuf_t buf; |
256 | char *json; | 251 | char *json; |
257 | |||
258 | strbuf_init(&buf, 0); | ||
259 | json_append_data(l, &buf); | ||
260 | json = strbuf_free_to_string(&buf, len); | ||
261 | |||
262 | return json; | ||
263 | } | ||
264 | |||
265 | /* lua_json_pcall_encode(object) must be called via lua_pcall(). | ||
266 | * This allows a C caller to catch any errors without needing | ||
267 | * to register the string with Lua for garbage collection. */ | ||
268 | int lua_json_pcall_encode(lua_State *l) | ||
269 | { | ||
270 | char *json; | ||
271 | int len; | 252 | int len; |
272 | 253 | ||
273 | verify_arg_count(l, 1); | 254 | luaL_argcheck(l, lua_gettop(l) == 1, 1, "expected 1 argument"); |
274 | |||
275 | json = lua_json_encode(l, &len); | ||
276 | |||
277 | lua_pushlightuserdata(l, json); | ||
278 | lua_pushnumber(l, len); | ||
279 | |||
280 | return 2; | ||
281 | } | ||
282 | 255 | ||
283 | int lua_api_json_encode(lua_State *l) | 256 | strbuf_init(&buf, 0); |
284 | { | 257 | json_append_data(l, &buf); |
285 | char *json; | 258 | json = strbuf_free_to_string(&buf, &len); |
286 | int len; | ||
287 | 259 | ||
288 | json = lua_json_encode(l, &len); | ||
289 | lua_pushlstring(l, json, len); | 260 | lua_pushlstring(l, json, len); |
290 | free(json); | 261 | free(json); |
291 | 262 | ||
@@ -395,6 +366,98 @@ static void json_global_init() | |||
395 | json_ch2escape['u'] = 'u'; /* This needs to be parsed as unicode */ | 366 | json_ch2escape['u'] = 'u'; /* This needs to be parsed as unicode */ |
396 | } | 367 | } |
397 | 368 | ||
369 | static inline int hexdigit2int(char hex) | ||
370 | { | ||
371 | if ('0' <= hex && hex <= '9') | ||
372 | return hex - '0'; | ||
373 | |||
374 | /* Force lowercase */ | ||
375 | hex |= 0x20; | ||
376 | if ('a' <= hex && hex <= 'f') | ||
377 | return 10 + hex - 'a'; | ||
378 | |||
379 | return -1; | ||
380 | } | ||
381 | |||
382 | static int decode_hex4(const char *hex) | ||
383 | { | ||
384 | int digit[4]; | ||
385 | int i; | ||
386 | |||
387 | /* Convert ASCII hex digit to numeric digit | ||
388 | * Note: this returns an error for invalid hex digits, including | ||
389 | * NULL */ | ||
390 | for (i = 0; i < 4; i++) { | ||
391 | digit[i] = hexdigit2int(hex[i]); | ||
392 | if (digit[i] < 0) { | ||
393 | return -1; | ||
394 | } | ||
395 | } | ||
396 | |||
397 | return (digit[0] << 12) + | ||
398 | (digit[1] << 8) + | ||
399 | (digit[2] << 4) + | ||
400 | digit[3]; | ||
401 | } | ||
402 | |||
403 | static int codepoint_to_utf8(char *utf8, int codepoint) | ||
404 | { | ||
405 | if (codepoint <= 0x7F) { | ||
406 | utf8[0] = codepoint; | ||
407 | return 1; | ||
408 | } | ||
409 | |||
410 | if (codepoint <= 0x7FF) { | ||
411 | utf8[0] = (codepoint >> 6) | 0xC0; | ||
412 | utf8[1] = (codepoint & 0x3F) | 0x80; | ||
413 | return 2; | ||
414 | } | ||
415 | |||
416 | if (codepoint <= 0xFFFF) { | ||
417 | utf8[0] = (codepoint >> 12) | 0xE0; | ||
418 | utf8[1] = ((codepoint >> 6) & 0x3F) | 0x80; | ||
419 | utf8[2] = (codepoint & 0x3F) | 0x80; | ||
420 | return 3; | ||
421 | } | ||
422 | |||
423 | return 0; | ||
424 | } | ||
425 | |||
426 | |||
427 | /* Called when index pointing to beginning of UCS-2 hex code: uXXXX | ||
428 | * Translate to UTF-8 and append to temporary token string. | ||
429 | * Must advance index to the next character to be processed. | ||
430 | * Returns: 0 success | ||
431 | * -1 error | ||
432 | */ | ||
433 | static int json_append_unicode_escape(json_parse_t *json) | ||
434 | { | ||
435 | char utf8[4]; /* 3 bytes of UTF-8 can handle UCS-2 */ | ||
436 | int codepoint; | ||
437 | int len; | ||
438 | |||
439 | /* Skip 'u' */ | ||
440 | json->index++; | ||
441 | |||
442 | /* Fetch UCS-2 codepoint */ | ||
443 | codepoint = decode_hex4(&json->data[json->index]); | ||
444 | if (codepoint < 0) { | ||
445 | return -1; | ||
446 | } | ||
447 | |||
448 | /* Convert to UTF-8 */ | ||
449 | len = codepoint_to_utf8(utf8, codepoint); | ||
450 | if (!len) { | ||
451 | return -1; | ||
452 | } | ||
453 | |||
454 | /* Append bytes and advance counter */ | ||
455 | strbuf_append_mem(json->tmp, utf8, len); | ||
456 | json->index += 4; | ||
457 | |||
458 | return 0; | ||
459 | } | ||
460 | |||
398 | static void json_next_string_token(json_parse_t *json, json_token_t *token) | 461 | static void json_next_string_token(json_parse_t *json, json_token_t *token) |
399 | { | 462 | { |
400 | char ch; | 463 | char ch; |
@@ -402,36 +465,44 @@ static void json_next_string_token(json_parse_t *json, json_token_t *token) | |||
402 | /* Caller must ensure a string is next */ | 465 | /* Caller must ensure a string is next */ |
403 | assert(json->data[json->index] == '"'); | 466 | assert(json->data[json->index] == '"'); |
404 | 467 | ||
405 | /* Gobble string. FIXME, ugly */ | 468 | /* Skip " */ |
469 | json->index++; | ||
406 | 470 | ||
471 | /* json->tmp is the temporary strbuf used to accumulate the | ||
472 | * decoded string value. */ | ||
407 | json->tmp->length = 0; | 473 | json->tmp->length = 0; |
408 | while ((ch = json->data[++json->index]) != '"') { | 474 | while ((ch = json->data[json->index]) != '"') { |
475 | if (!ch) { | ||
476 | /* Premature end of the string */ | ||
477 | token->type = T_ERROR; | ||
478 | return; | ||
479 | } | ||
480 | |||
409 | /* Handle escapes */ | 481 | /* Handle escapes */ |
410 | if (ch == '\\') { | 482 | if (ch == '\\') { |
411 | /* Translate escape code */ | 483 | /* Skip \ and fetch escape character */ |
412 | ch = json_ch2escape[(unsigned char)json->data[++json->index]]; | 484 | json->index++; |
485 | ch = json->data[json->index]; | ||
486 | |||
487 | /* Translate escape code and append to tmp string */ | ||
488 | ch = json_ch2escape[(unsigned char)ch]; | ||
489 | if (ch == 'u') { | ||
490 | if (json_append_unicode_escape(json) < 0) | ||
491 | continue; | ||
492 | |||
493 | token->type = T_ERROR; | ||
494 | return; | ||
495 | } | ||
413 | if (!ch) { | 496 | if (!ch) { |
414 | /* Invalid escape code */ | 497 | /* Invalid escape code */ |
415 | token->type = T_ERROR; | 498 | token->type = T_ERROR; |
416 | return; | 499 | return; |
417 | } | 500 | } |
418 | if (ch == 'u') { | ||
419 | /* Process unicode */ | ||
420 | /* FIXME: cleanup memory handling. Implement iconv(3) | ||
421 | * conversion from UCS-2 -> UTF-8 | ||
422 | */ | ||
423 | if (!memcmp(&json->data[json->index], "u0000", 5)) { | ||
424 | /* Handle NULL */ | ||
425 | ch = 0; | ||
426 | json->index += 4; | ||
427 | } else { | ||
428 | /* Remaining codepoints unhandled */ | ||
429 | token->type = T_ERROR; | ||
430 | return; | ||
431 | } | ||
432 | } | ||
433 | } | 501 | } |
502 | /* Append normal character or translated single character | ||
503 | * Unicode escapes are handled above */ | ||
434 | strbuf_append_char(json->tmp, ch); | 504 | strbuf_append_char(json->tmp, ch); |
505 | json->index++; | ||
435 | } | 506 | } |
436 | json->index++; /* Eat final quote (") */ | 507 | json->index++; /* Eat final quote (") */ |
437 | 508 | ||
@@ -646,7 +717,7 @@ static void json_process_value(lua_State *l, json_parse_t *json, json_token_t *t | |||
646 | } | 717 | } |
647 | 718 | ||
648 | /* json_text must be null terminated string */ | 719 | /* json_text must be null terminated string */ |
649 | void lua_json_decode(lua_State *l, const char *json_text) | 720 | static void lua_json_decode(lua_State *l, const char *json_text) |
650 | { | 721 | { |
651 | json_parse_t json; | 722 | json_parse_t json; |
652 | json_token_t token; | 723 | json_token_t token; |
@@ -667,59 +738,42 @@ void lua_json_decode(lua_State *l, const char *json_text) | |||
667 | strbuf_free(json.tmp); | 738 | strbuf_free(json.tmp); |
668 | } | 739 | } |
669 | 740 | ||
670 | /* lua_json_pcall_decode(string) must be called via lua_pcall(). | 741 | static int json_decode(lua_State *l) |
671 | * This allows a C caller to catch any errors so the string can | ||
672 | * be freed before returning to Lua. */ | ||
673 | int lua_json_pcall_decode(lua_State *l) | ||
674 | { | ||
675 | const char *json; | ||
676 | |||
677 | verify_arg_count(l, 1); | ||
678 | luaL_argcheck(l, lua_islightuserdata(l, 1), 1, | ||
679 | "missing lightuserdata"); | ||
680 | |||
681 | json = lua_touserdata(l, 1); | ||
682 | lua_pop(l, 1); | ||
683 | |||
684 | lua_json_decode(l, json); | ||
685 | |||
686 | return 1; | ||
687 | } | ||
688 | |||
689 | static int lua_api_json_decode(lua_State *l) | ||
690 | { | 742 | { |
691 | const char *json; | 743 | const char *json; |
692 | 744 | ||
693 | verify_arg_count(l, 1); | 745 | luaL_argcheck(l, lua_gettop(l) <= 1, 2, "found too many arguments"); |
694 | json = luaL_checkstring(l, 1); | 746 | json = luaL_checkstring(l, 1); |
695 | 747 | ||
696 | lua_json_decode(l, json); | 748 | lua_json_decode(l, json); |
697 | 749 | ||
698 | lua_remove(l, 1); | ||
699 | |||
700 | return 1; | 750 | return 1; |
701 | } | 751 | } |
702 | 752 | ||
703 | /* ===== INITIALISATION ===== */ | 753 | /* ===== INITIALISATION ===== */ |
704 | 754 | ||
755 | /* FIXME: Rewrite to keep lookup tables within Lua (userdata?) | ||
756 | * Remove pthread dependency */ | ||
705 | static pthread_once_t json_global_init_once = PTHREAD_ONCE_INIT; | 757 | static pthread_once_t json_global_init_once = PTHREAD_ONCE_INIT; |
706 | 758 | ||
707 | void lua_json_init(lua_State *l) | 759 | int luaopen_cjson(lua_State *l) |
708 | { | 760 | { |
709 | luaL_Reg reg[] = { | 761 | luaL_Reg reg[] = { |
710 | { "encode", lua_api_json_encode }, | 762 | { "encode", json_encode }, |
711 | { "decode", lua_api_json_decode }, | 763 | { "decode", json_decode }, |
712 | { NULL, NULL } | 764 | { NULL, NULL } |
713 | }; | 765 | }; |
714 | 766 | ||
715 | luaL_register(l, "json", reg); | 767 | luaL_register(l, "cjson", reg); |
716 | 768 | ||
717 | /* Set json.null, and pop "json" table from the stack */ | 769 | /* Set cjson.null */ |
718 | lua_pushlightuserdata(l, NULL); | 770 | lua_pushlightuserdata(l, NULL); |
719 | lua_setfield(l, -2, "null"); | 771 | lua_setfield(l, -2, "null"); |
720 | lua_pop(l, 1); | ||
721 | 772 | ||
722 | SYS_NOFAIL(pthread_once(&json_global_init_once, json_global_init)); | 773 | pthread_once(&json_global_init_once, json_global_init); |
774 | |||
775 | /* Return cjson table */ | ||
776 | return 1; | ||
723 | } | 777 | } |
724 | 778 | ||
725 | /* vi:ai et sw=4 ts=4: | 779 | /* vi:ai et sw=4 ts=4: |
@@ -5,13 +5,14 @@ | |||
5 | 5 | ||
6 | #include "strbuf.h" | 6 | #include "strbuf.h" |
7 | 7 | ||
8 | static void die(const char *format, ...) | 8 | void die(const char *fmt, ...) |
9 | { | 9 | { |
10 | va_list arg; | 10 | va_list arg; |
11 | 11 | ||
12 | va_start(arg, format); | 12 | va_start(arg, fmt); |
13 | vfprintf(stderr, format, arg); | 13 | vfprintf(stderr, fmt, arg); |
14 | va_end(arg); | 14 | va_end(arg); |
15 | fprintf(stderr, "\n"); | ||
15 | 16 | ||
16 | exit(-1); | 17 | exit(-1); |
17 | } | 18 | } |