diff options
| author | Mark Pulford <mark@kyne.com.au> | 2011-05-01 02:11:10 +0930 |
|---|---|---|
| committer | Mark Pulford <mark@kyne.com.au> | 2011-05-01 02:11:10 +0930 |
| commit | 60fb31cfdd625ea3bc4a12a8440715c8ff0c9242 (patch) | |
| tree | 58022c23864fde947839cf7ba0eb18749e4e7e07 /lua_cjson.c | |
| parent | 058acaa9f2d52a4854650d19964bb7892c99d9af (diff) | |
| download | lua-cjson-60fb31cfdd625ea3bc4a12a8440715c8ff0c9242.tar.gz lua-cjson-60fb31cfdd625ea3bc4a12a8440715c8ff0c9242.tar.bz2 lua-cjson-60fb31cfdd625ea3bc4a12a8440715c8ff0c9242.zip | |
Create "cjson" Lua module, support UCS-2 escapes
- Convert lua_json_init() into luaopen_cjson() to support dynamic .so
loading.
- Rename "json" to "cjson" to reduce conflicts with other JSON modules.
- Remove unnecessary *_pcall_* API. Lua calls are fast enough,
even through C.
- Encode empty tables as objects
- Add support for decoding all UCS-2 escape codes.
Diffstat (limited to '')
| -rw-r--r-- | lua_cjson.c (renamed from lua_json.c) | 246 |
1 files changed, 150 insertions, 96 deletions
| @@ -2,11 +2,15 @@ | |||
| 2 | */ | 2 | */ |
| 3 | 3 | ||
| 4 | /* Caveats: | 4 | /* Caveats: |
| 5 | * - No unicode support | 5 | * - Assumes strings are valid UTF-8 and mostly treats them as opaque |
| 6 | * binary data. Will not throw an exception on bad data. | ||
| 7 | * - Will decode \uXXXX escapes, but leaves high codepoints as UTF-8 | ||
| 8 | * when encoding. | ||
| 6 | * - JSON "null" values are represented as lightuserdata. Compare with | 9 | * - JSON "null" values are represented as lightuserdata. Compare with |
| 7 | * json.null. | 10 | * json.null. |
| 8 | * - Parsing comments is not support. According to json.org, this isn't | 11 | * - Parsing comments is not supported. According to json.org, this isn't |
| 9 | * part of the spec. | 12 | * part of the spec. |
| 13 | * - Parser accepts number formats beyond the JSON spec. | ||
| 10 | * | 14 | * |
| 11 | * Note: lua_json_decode() probably spends significant time rehashing | 15 | * Note: lua_json_decode() probably spends significant time rehashing |
| 12 | * tables since it is difficult to know their size ahead of time. | 16 | * tables since it is difficult to know their size ahead of time. |
| @@ -15,10 +19,10 @@ | |||
| 15 | */ | 19 | */ |
| 16 | 20 | ||
| 17 | /* FIXME: | 21 | /* FIXME: |
| 18 | * - Ensure JSON data is UTF-8. Fail otherwise. | ||
| 19 | * - Alternatively, dynamically support Unicode in JSON string. Return current locale. | ||
| 20 | * - Consider implementing other Unicode standards. | ||
| 21 | * - Option to encode non-printable characters? Only \" \\ are required | 22 | * - Option to encode non-printable characters? Only \" \\ are required |
| 23 | * - Protect against cycles when encoding JSON from a data structure | ||
| 24 | * - Max depth? Notice cycles? | ||
| 25 | * - Handle huge sparse arrays? | ||
| 22 | */ | 26 | */ |
| 23 | 27 | ||
| 24 | #include <assert.h> | 28 | #include <assert.h> |
| @@ -30,17 +34,8 @@ | |||
| 30 | #include <lua.h> | 34 | #include <lua.h> |
| 31 | #include <lauxlib.h> | 35 | #include <lauxlib.h> |
| 32 | 36 | ||
| 33 | #include "lua_json.h" | ||
| 34 | #include "strbuf.h" | 37 | #include "strbuf.h" |
| 35 | 38 | ||
| 36 | #include "die.h" | ||
| 37 | |||
| 38 | |||
| 39 | static void verify_arg_count(lua_State *l, int nargs) | ||
| 40 | { | ||
| 41 | luaL_argcheck(l, lua_gettop(l) <= nargs, nargs + 1, "too many arguments"); | ||
| 42 | } | ||
| 43 | |||
| 44 | /* ===== ENCODING ===== */ | 39 | /* ===== ENCODING ===== */ |
| 45 | 40 | ||
| 46 | static void json_encode_exception(lua_State *l, strbuf_t *json, | 41 | static void json_encode_exception(lua_State *l, strbuf_t *json, |
| @@ -125,7 +120,8 @@ static int lua_array_length(lua_State *l) | |||
| 125 | /* table, startkey */ | 120 | /* table, startkey */ |
| 126 | while (lua_next(l, -2) != 0) { | 121 | while (lua_next(l, -2) != 0) { |
| 127 | /* table, key, value */ | 122 | /* table, key, value */ |
| 128 | if ((k = lua_tonumber(l, -2))) { | 123 | if (lua_isnumber(l, -2) && |
| 124 | (k = lua_tonumber(l, -2))) { | ||
| 129 | /* Integer >= 1 ? */ | 125 | /* Integer >= 1 ? */ |
| 130 | if (floor(k) == k && k >= 1) { | 126 | if (floor(k) == k && k >= 1) { |
| 131 | if (k > max) | 127 | if (k > max) |
| @@ -228,7 +224,7 @@ static void json_append_data(lua_State *l, strbuf_t *json) | |||
| 228 | break; | 224 | break; |
| 229 | case LUA_TTABLE: | 225 | case LUA_TTABLE: |
| 230 | len = lua_array_length(l); | 226 | len = lua_array_length(l); |
| 231 | if (len >= 0) | 227 | if (len > 0) |
| 232 | json_append_array(l, json, len); | 228 | json_append_array(l, json, len); |
| 233 | else | 229 | else |
| 234 | json_append_object(l, json); | 230 | json_append_object(l, json); |
| @@ -249,43 +245,18 @@ static void json_append_data(lua_State *l, strbuf_t *json) | |||
| 249 | } | 245 | } |
| 250 | } | 246 | } |
| 251 | 247 | ||
| 252 | /* lua_json_encode can throw an exception */ | 248 | static int json_encode(lua_State *l) |
| 253 | char *lua_json_encode(lua_State *l, int *len) | ||
| 254 | { | 249 | { |
| 255 | strbuf_t buf; | 250 | strbuf_t buf; |
| 256 | char *json; | 251 | char *json; |
| 257 | |||
| 258 | strbuf_init(&buf, 0); | ||
| 259 | json_append_data(l, &buf); | ||
| 260 | json = strbuf_free_to_string(&buf, len); | ||
| 261 | |||
| 262 | return json; | ||
| 263 | } | ||
| 264 | |||
| 265 | /* lua_json_pcall_encode(object) must be called via lua_pcall(). | ||
| 266 | * This allows a C caller to catch any errors without needing | ||
| 267 | * to register the string with Lua for garbage collection. */ | ||
| 268 | int lua_json_pcall_encode(lua_State *l) | ||
| 269 | { | ||
| 270 | char *json; | ||
| 271 | int len; | 252 | int len; |
| 272 | 253 | ||
| 273 | verify_arg_count(l, 1); | 254 | luaL_argcheck(l, lua_gettop(l) == 1, 1, "expected 1 argument"); |
| 274 | |||
| 275 | json = lua_json_encode(l, &len); | ||
| 276 | |||
| 277 | lua_pushlightuserdata(l, json); | ||
| 278 | lua_pushnumber(l, len); | ||
| 279 | |||
| 280 | return 2; | ||
| 281 | } | ||
| 282 | 255 | ||
| 283 | int lua_api_json_encode(lua_State *l) | 256 | strbuf_init(&buf, 0); |
| 284 | { | 257 | json_append_data(l, &buf); |
| 285 | char *json; | 258 | json = strbuf_free_to_string(&buf, &len); |
| 286 | int len; | ||
| 287 | 259 | ||
| 288 | json = lua_json_encode(l, &len); | ||
| 289 | lua_pushlstring(l, json, len); | 260 | lua_pushlstring(l, json, len); |
| 290 | free(json); | 261 | free(json); |
| 291 | 262 | ||
| @@ -395,6 +366,98 @@ static void json_global_init() | |||
| 395 | json_ch2escape['u'] = 'u'; /* This needs to be parsed as unicode */ | 366 | json_ch2escape['u'] = 'u'; /* This needs to be parsed as unicode */ |
| 396 | } | 367 | } |
| 397 | 368 | ||
| 369 | static inline int hexdigit2int(char hex) | ||
| 370 | { | ||
| 371 | if ('0' <= hex && hex <= '9') | ||
| 372 | return hex - '0'; | ||
| 373 | |||
| 374 | /* Force lowercase */ | ||
| 375 | hex |= 0x20; | ||
| 376 | if ('a' <= hex && hex <= 'f') | ||
| 377 | return 10 + hex - 'a'; | ||
| 378 | |||
| 379 | return -1; | ||
| 380 | } | ||
| 381 | |||
| 382 | static int decode_hex4(const char *hex) | ||
| 383 | { | ||
| 384 | int digit[4]; | ||
| 385 | int i; | ||
| 386 | |||
| 387 | /* Convert ASCII hex digit to numeric digit | ||
| 388 | * Note: this returns an error for invalid hex digits, including | ||
| 389 | * NULL */ | ||
| 390 | for (i = 0; i < 4; i++) { | ||
| 391 | digit[i] = hexdigit2int(hex[i]); | ||
| 392 | if (digit[i] < 0) { | ||
| 393 | return -1; | ||
| 394 | } | ||
| 395 | } | ||
| 396 | |||
| 397 | return (digit[0] << 12) + | ||
| 398 | (digit[1] << 8) + | ||
| 399 | (digit[2] << 4) + | ||
| 400 | digit[3]; | ||
| 401 | } | ||
| 402 | |||
| 403 | static int codepoint_to_utf8(char *utf8, int codepoint) | ||
| 404 | { | ||
| 405 | if (codepoint <= 0x7F) { | ||
| 406 | utf8[0] = codepoint; | ||
| 407 | return 1; | ||
| 408 | } | ||
| 409 | |||
| 410 | if (codepoint <= 0x7FF) { | ||
| 411 | utf8[0] = (codepoint >> 6) | 0xC0; | ||
| 412 | utf8[1] = (codepoint & 0x3F) | 0x80; | ||
| 413 | return 2; | ||
| 414 | } | ||
| 415 | |||
| 416 | if (codepoint <= 0xFFFF) { | ||
| 417 | utf8[0] = (codepoint >> 12) | 0xE0; | ||
| 418 | utf8[1] = ((codepoint >> 6) & 0x3F) | 0x80; | ||
| 419 | utf8[2] = (codepoint & 0x3F) | 0x80; | ||
| 420 | return 3; | ||
| 421 | } | ||
| 422 | |||
| 423 | return 0; | ||
| 424 | } | ||
| 425 | |||
| 426 | |||
| 427 | /* Called when index pointing to beginning of UCS-2 hex code: uXXXX | ||
| 428 | * Translate to UTF-8 and append to temporary token string. | ||
| 429 | * Must advance index to the next character to be processed. | ||
| 430 | * Returns: 0 success | ||
| 431 | * -1 error | ||
| 432 | */ | ||
| 433 | static int json_append_unicode_escape(json_parse_t *json) | ||
| 434 | { | ||
| 435 | char utf8[4]; /* 3 bytes of UTF-8 can handle UCS-2 */ | ||
| 436 | int codepoint; | ||
| 437 | int len; | ||
| 438 | |||
| 439 | /* Skip 'u' */ | ||
| 440 | json->index++; | ||
| 441 | |||
| 442 | /* Fetch UCS-2 codepoint */ | ||
| 443 | codepoint = decode_hex4(&json->data[json->index]); | ||
| 444 | if (codepoint < 0) { | ||
| 445 | return -1; | ||
| 446 | } | ||
| 447 | |||
| 448 | /* Convert to UTF-8 */ | ||
| 449 | len = codepoint_to_utf8(utf8, codepoint); | ||
| 450 | if (!len) { | ||
| 451 | return -1; | ||
| 452 | } | ||
| 453 | |||
| 454 | /* Append bytes and advance counter */ | ||
| 455 | strbuf_append_mem(json->tmp, utf8, len); | ||
| 456 | json->index += 4; | ||
| 457 | |||
| 458 | return 0; | ||
| 459 | } | ||
| 460 | |||
| 398 | static void json_next_string_token(json_parse_t *json, json_token_t *token) | 461 | static void json_next_string_token(json_parse_t *json, json_token_t *token) |
| 399 | { | 462 | { |
| 400 | char ch; | 463 | char ch; |
| @@ -402,36 +465,44 @@ static void json_next_string_token(json_parse_t *json, json_token_t *token) | |||
| 402 | /* Caller must ensure a string is next */ | 465 | /* Caller must ensure a string is next */ |
| 403 | assert(json->data[json->index] == '"'); | 466 | assert(json->data[json->index] == '"'); |
| 404 | 467 | ||
| 405 | /* Gobble string. FIXME, ugly */ | 468 | /* Skip " */ |
| 469 | json->index++; | ||
| 406 | 470 | ||
| 471 | /* json->tmp is the temporary strbuf used to accumulate the | ||
| 472 | * decoded string value. */ | ||
| 407 | json->tmp->length = 0; | 473 | json->tmp->length = 0; |
| 408 | while ((ch = json->data[++json->index]) != '"') { | 474 | while ((ch = json->data[json->index]) != '"') { |
| 475 | if (!ch) { | ||
| 476 | /* Premature end of the string */ | ||
| 477 | token->type = T_ERROR; | ||
| 478 | return; | ||
| 479 | } | ||
| 480 | |||
| 409 | /* Handle escapes */ | 481 | /* Handle escapes */ |
| 410 | if (ch == '\\') { | 482 | if (ch == '\\') { |
| 411 | /* Translate escape code */ | 483 | /* Skip \ and fetch escape character */ |
| 412 | ch = json_ch2escape[(unsigned char)json->data[++json->index]]; | 484 | json->index++; |
| 485 | ch = json->data[json->index]; | ||
| 486 | |||
| 487 | /* Translate escape code and append to tmp string */ | ||
| 488 | ch = json_ch2escape[(unsigned char)ch]; | ||
| 489 | if (ch == 'u') { | ||
| 490 | if (json_append_unicode_escape(json) < 0) | ||
| 491 | continue; | ||
| 492 | |||
| 493 | token->type = T_ERROR; | ||
| 494 | return; | ||
| 495 | } | ||
| 413 | if (!ch) { | 496 | if (!ch) { |
| 414 | /* Invalid escape code */ | 497 | /* Invalid escape code */ |
| 415 | token->type = T_ERROR; | 498 | token->type = T_ERROR; |
| 416 | return; | 499 | return; |
| 417 | } | 500 | } |
| 418 | if (ch == 'u') { | ||
| 419 | /* Process unicode */ | ||
| 420 | /* FIXME: cleanup memory handling. Implement iconv(3) | ||
| 421 | * conversion from UCS-2 -> UTF-8 | ||
| 422 | */ | ||
| 423 | if (!memcmp(&json->data[json->index], "u0000", 5)) { | ||
| 424 | /* Handle NULL */ | ||
| 425 | ch = 0; | ||
| 426 | json->index += 4; | ||
| 427 | } else { | ||
| 428 | /* Remaining codepoints unhandled */ | ||
| 429 | token->type = T_ERROR; | ||
| 430 | return; | ||
| 431 | } | ||
| 432 | } | ||
| 433 | } | 501 | } |
| 502 | /* Append normal character or translated single character | ||
| 503 | * Unicode escapes are handled above */ | ||
| 434 | strbuf_append_char(json->tmp, ch); | 504 | strbuf_append_char(json->tmp, ch); |
| 505 | json->index++; | ||
| 435 | } | 506 | } |
| 436 | json->index++; /* Eat final quote (") */ | 507 | json->index++; /* Eat final quote (") */ |
| 437 | 508 | ||
| @@ -646,7 +717,7 @@ static void json_process_value(lua_State *l, json_parse_t *json, json_token_t *t | |||
| 646 | } | 717 | } |
| 647 | 718 | ||
| 648 | /* json_text must be null terminated string */ | 719 | /* json_text must be null terminated string */ |
| 649 | void lua_json_decode(lua_State *l, const char *json_text) | 720 | static void lua_json_decode(lua_State *l, const char *json_text) |
| 650 | { | 721 | { |
| 651 | json_parse_t json; | 722 | json_parse_t json; |
| 652 | json_token_t token; | 723 | json_token_t token; |
| @@ -667,59 +738,42 @@ void lua_json_decode(lua_State *l, const char *json_text) | |||
| 667 | strbuf_free(json.tmp); | 738 | strbuf_free(json.tmp); |
| 668 | } | 739 | } |
| 669 | 740 | ||
| 670 | /* lua_json_pcall_decode(string) must be called via lua_pcall(). | 741 | static int json_decode(lua_State *l) |
| 671 | * This allows a C caller to catch any errors so the string can | ||
| 672 | * be freed before returning to Lua. */ | ||
| 673 | int lua_json_pcall_decode(lua_State *l) | ||
| 674 | { | ||
| 675 | const char *json; | ||
| 676 | |||
| 677 | verify_arg_count(l, 1); | ||
| 678 | luaL_argcheck(l, lua_islightuserdata(l, 1), 1, | ||
| 679 | "missing lightuserdata"); | ||
| 680 | |||
| 681 | json = lua_touserdata(l, 1); | ||
| 682 | lua_pop(l, 1); | ||
| 683 | |||
| 684 | lua_json_decode(l, json); | ||
| 685 | |||
| 686 | return 1; | ||
| 687 | } | ||
| 688 | |||
| 689 | static int lua_api_json_decode(lua_State *l) | ||
| 690 | { | 742 | { |
| 691 | const char *json; | 743 | const char *json; |
| 692 | 744 | ||
| 693 | verify_arg_count(l, 1); | 745 | luaL_argcheck(l, lua_gettop(l) <= 1, 2, "found too many arguments"); |
| 694 | json = luaL_checkstring(l, 1); | 746 | json = luaL_checkstring(l, 1); |
| 695 | 747 | ||
| 696 | lua_json_decode(l, json); | 748 | lua_json_decode(l, json); |
| 697 | 749 | ||
| 698 | lua_remove(l, 1); | ||
| 699 | |||
| 700 | return 1; | 750 | return 1; |
| 701 | } | 751 | } |
| 702 | 752 | ||
| 703 | /* ===== INITIALISATION ===== */ | 753 | /* ===== INITIALISATION ===== */ |
| 704 | 754 | ||
| 755 | /* FIXME: Rewrite to keep lookup tables within Lua (userdata?) | ||
| 756 | * Remove pthread dependency */ | ||
| 705 | static pthread_once_t json_global_init_once = PTHREAD_ONCE_INIT; | 757 | static pthread_once_t json_global_init_once = PTHREAD_ONCE_INIT; |
| 706 | 758 | ||
| 707 | void lua_json_init(lua_State *l) | 759 | int luaopen_cjson(lua_State *l) |
| 708 | { | 760 | { |
| 709 | luaL_Reg reg[] = { | 761 | luaL_Reg reg[] = { |
| 710 | { "encode", lua_api_json_encode }, | 762 | { "encode", json_encode }, |
| 711 | { "decode", lua_api_json_decode }, | 763 | { "decode", json_decode }, |
| 712 | { NULL, NULL } | 764 | { NULL, NULL } |
| 713 | }; | 765 | }; |
| 714 | 766 | ||
| 715 | luaL_register(l, "json", reg); | 767 | luaL_register(l, "cjson", reg); |
| 716 | 768 | ||
| 717 | /* Set json.null, and pop "json" table from the stack */ | 769 | /* Set cjson.null */ |
| 718 | lua_pushlightuserdata(l, NULL); | 770 | lua_pushlightuserdata(l, NULL); |
| 719 | lua_setfield(l, -2, "null"); | 771 | lua_setfield(l, -2, "null"); |
| 720 | lua_pop(l, 1); | ||
| 721 | 772 | ||
| 722 | SYS_NOFAIL(pthread_once(&json_global_init_once, json_global_init)); | 773 | pthread_once(&json_global_init_once, json_global_init); |
| 774 | |||
| 775 | /* Return cjson table */ | ||
| 776 | return 1; | ||
| 723 | } | 777 | } |
| 724 | 778 | ||
| 725 | /* vi:ai et sw=4 ts=4: | 779 | /* vi:ai et sw=4 ts=4: |
