aboutsummaryrefslogtreecommitdiff
path: root/lua_json.c
diff options
context:
space:
mode:
authorMark Pulford <mark@kyne.com.au>2011-05-01 02:11:10 +0930
committerMark Pulford <mark@kyne.com.au>2011-05-01 02:11:10 +0930
commit60fb31cfdd625ea3bc4a12a8440715c8ff0c9242 (patch)
tree58022c23864fde947839cf7ba0eb18749e4e7e07 /lua_json.c
parent058acaa9f2d52a4854650d19964bb7892c99d9af (diff)
downloadlua-cjson-60fb31cfdd625ea3bc4a12a8440715c8ff0c9242.tar.gz
lua-cjson-60fb31cfdd625ea3bc4a12a8440715c8ff0c9242.tar.bz2
lua-cjson-60fb31cfdd625ea3bc4a12a8440715c8ff0c9242.zip
Create "cjson" Lua module, support UCS-2 escapes
- Convert lua_json_init() into luaopen_cjson() to support dynamic .so loading. - Rename "json" to "cjson" to reduce conflicts with other JSON modules. - Remove unnecessary *_pcall_* API. Lua calls are fast enough, even through C. - Encode empty tables as objects - Add support for decoding all UCS-2 escape codes.
Diffstat (limited to 'lua_json.c')
-rw-r--r--lua_json.c726
1 files changed, 0 insertions, 726 deletions
diff --git a/lua_json.c b/lua_json.c
deleted file mode 100644
index 77a8d90..0000000
--- a/lua_json.c
+++ /dev/null
@@ -1,726 +0,0 @@
1/* Lua JSON routines
2 */
3
4/* Caveats:
5 * - No unicode support
6 * - JSON "null" values are represented as lightuserdata. Compare with
7 * json.null.
8 * - Parsing comments is not support. According to json.org, this isn't
9 * part of the spec.
10 *
11 * Note: lua_json_decode() probably spends significant time rehashing
12 * tables since it is difficult to know their size ahead of time.
13 * Earlier JSON libaries didn't have this problem but the intermediate
14 * storage (and their implementations) were much slower anyway..
15 */
16
17/* FIXME:
18 * - Ensure JSON data is UTF-8. Fail otherwise.
19 * - Alternatively, dynamically support Unicode in JSON string. Return current locale.
20 * - Consider implementing other Unicode standards.
21 * - Option to encode non-printable characters? Only \" \\ are required
22 */
23
24#include <assert.h>
25#include <string.h>
26#include <math.h>
27
28#include <pthread.h>
29
30#include <lua.h>
31#include <lauxlib.h>
32
33#include "lua_json.h"
34#include "strbuf.h"
35
36#include "die.h"
37
38
39static void verify_arg_count(lua_State *l, int nargs)
40{
41 luaL_argcheck(l, lua_gettop(l) <= nargs, nargs + 1, "too many arguments");
42}
43
44/* ===== ENCODING ===== */
45
46static void json_encode_exception(lua_State *l, strbuf_t *json,
47 char *location, int lindex)
48
49{
50 strbuf_free(json);
51
52 luaL_error(l, "Cannot serialise %s: %s", location,
53 lua_typename(l, lua_type(l, lindex)));
54}
55
56/* JSON escape a character if required, or return NULL */
57static inline char *json_escape_char(int c)
58{
59 switch(c) {
60 case 0:
61 return "\\u0000";
62 case '\\':
63 return "\\\\";
64 case '"':
65 return "\\\"";
66 case '\b':
67 return "\\b";
68 case '\t':
69 return "\\t";
70 case '\n':
71 return "\\n";
72 case '\f':
73 return "\\f";
74 case '\r':
75 return "\\r";
76 }
77
78 return NULL;
79}
80
81/* json_append_string args:
82 * - lua_State
83 * - JSON strbuf
84 * - String (Lua stack index)
85 *
86 * Returns nothing. Doesn't remove string from Lua stack */
87static void json_append_string(lua_State *l, strbuf_t *json, int lindex)
88{
89 char *p;
90 int i;
91 const char *str;
92 size_t len;
93
94 str = lua_tolstring(l, lindex, &len);
95
96 /* Worst case is len * 6 (all unicode escapes).
97 * This buffer is reused constantly for small strings
98 * If there are any excess pages, they won't be hit anyway.
99 * This gains ~5% speedup. */
100 strbuf_ensure_empty_length(json, len * 6);
101
102 strbuf_append_char(json, '\"');
103 for (i = 0; i < len; i++) {
104 p = json_escape_char(str[i]);
105 if (p)
106 strbuf_append_string(json, p);
107 else
108 strbuf_append_char_unsafe(json, str[i]);
109 }
110 strbuf_append_char(json, '\"');
111}
112
113/* Find the size of the array on the top of the Lua stack
114 * -1 object (not a pure array)
115 * >=0 elements in array
116 */
117static int lua_array_length(lua_State *l)
118{
119 double k;
120 int max;
121
122 max = 0;
123
124 lua_pushnil(l);
125 /* table, startkey */
126 while (lua_next(l, -2) != 0) {
127 /* table, key, value */
128 if ((k = lua_tonumber(l, -2))) {
129 /* Integer >= 1 ? */
130 if (floor(k) == k && k >= 1) {
131 if (k > max)
132 max = k;
133 lua_pop(l, 1);
134 continue;
135 }
136 }
137
138 /* Must not be an array (non integer key) */
139 lua_pop(l, 2);
140 return -1;
141 }
142
143 return max;
144}
145
146static void json_append_data(lua_State *l, strbuf_t *json);
147
148/* json_append_array args:
149 * - lua_State
150 * - JSON strbuf
151 * - Size of passwd Lua array (top of stack) */
152static void json_append_array(lua_State *l, strbuf_t *json, int array_length)
153{
154 int comma, i;
155
156 strbuf_append_string(json, "[ ");
157
158 comma = 0;
159 for (i = 1; i <= array_length; i++) {
160 if (comma)
161 strbuf_append_string(json, ", ");
162 else
163 comma = 1;
164
165 lua_rawgeti(l, -1, i);
166 json_append_data(l, json);
167 lua_pop(l, 1);
168 }
169
170 strbuf_append_string(json, " ]");
171}
172
173static void json_append_object(lua_State *l, strbuf_t *json)
174{
175 int comma, keytype;
176
177 /* Object */
178 strbuf_append_string(json, "{ ");
179
180 lua_pushnil(l);
181 /* table, startkey */
182 comma = 0;
183 while (lua_next(l, -2) != 0) {
184 if (comma)
185 strbuf_append_string(json, ", ");
186 else
187 comma = 1;
188
189 /* table, key, value */
190 keytype = lua_type(l, -2);
191 if (keytype == LUA_TNUMBER) {
192 strbuf_append_fmt(json, "\"" LUA_NUMBER_FMT "\": ",
193 lua_tonumber(l, -2));
194 } else if (keytype == LUA_TSTRING) {
195 json_append_string(l, json, -2);
196 strbuf_append_string(json, ": ");
197 } else {
198 json_encode_exception(l, json, "table key", -2);
199 /* never returns */
200 }
201
202 /* table, key, value */
203 json_append_data(l, json);
204 lua_pop(l, 1);
205 /* table, key */
206 }
207
208 strbuf_append_string(json, " }");
209}
210
211/* Serialise Lua data into JSON string. */
212static void json_append_data(lua_State *l, strbuf_t *json)
213{
214 int len;
215
216 switch (lua_type(l, -1)) {
217 case LUA_TSTRING:
218 json_append_string(l, json, -1);
219 break;
220 case LUA_TNUMBER:
221 strbuf_append_fmt(json, "%lf", lua_tonumber(l, -1));
222 break;
223 case LUA_TBOOLEAN:
224 if (lua_toboolean(l, -1))
225 strbuf_append_string(json, "true");
226 else
227 strbuf_append_string(json, "false");
228 break;
229 case LUA_TTABLE:
230 len = lua_array_length(l);
231 if (len >= 0)
232 json_append_array(l, json, len);
233 else
234 json_append_object(l, json);
235 break;
236 case LUA_TNIL:
237 strbuf_append_string(json, "null");
238 break;
239 case LUA_TLIGHTUSERDATA:
240 if (lua_touserdata(l, -1) == NULL) {
241 strbuf_append_string(json, "null");
242 break;
243 }
244 default:
245 /* Remaining types (LUA_TFUNCTION, LUA_TUSERDATA, LUA_TTHREAD,
246 * and LUA_TLIGHTUSERDATA) cannot be serialised */
247 json_encode_exception(l, json, "value", -1);
248 /* never returns */
249 }
250}
251
252/* lua_json_encode can throw an exception */
253char *lua_json_encode(lua_State *l, int *len)
254{
255 strbuf_t buf;
256 char *json;
257
258 strbuf_init(&buf, 0);
259 json_append_data(l, &buf);
260 json = strbuf_free_to_string(&buf, len);
261
262 return json;
263}
264
265/* lua_json_pcall_encode(object) must be called via lua_pcall().
266 * This allows a C caller to catch any errors without needing
267 * to register the string with Lua for garbage collection. */
268int lua_json_pcall_encode(lua_State *l)
269{
270 char *json;
271 int len;
272
273 verify_arg_count(l, 1);
274
275 json = lua_json_encode(l, &len);
276
277 lua_pushlightuserdata(l, json);
278 lua_pushnumber(l, len);
279
280 return 2;
281}
282
283int lua_api_json_encode(lua_State *l)
284{
285 char *json;
286 int len;
287
288 json = lua_json_encode(l, &len);
289 lua_pushlstring(l, json, len);
290 free(json);
291
292 return 1;
293}
294
295/* ===== DECODING ===== */
296
297typedef struct {
298 const char *data;
299 int index;
300 strbuf_t *tmp; /* Temporary storage for strings */
301} json_parse_t;
302
303typedef enum {
304 T_OBJ_BEGIN,
305 T_OBJ_END,
306 T_ARR_BEGIN,
307 T_ARR_END,
308 T_STRING,
309 T_NUMBER,
310 T_BOOLEAN,
311 T_NULL,
312 T_COLON,
313 T_COMMA,
314 T_END,
315 T_WHITESPACE,
316 T_ERROR,
317 T_UNKNOWN
318} json_token_type_t;
319
320static const char *json_token_type_name[] = {
321 "T_OBJ_BEGIN",
322 "T_OBJ_END",
323 "T_ARR_BEGIN",
324 "T_ARR_END",
325 "T_STRING",
326 "T_NUMBER",
327 "T_BOOLEAN",
328 "T_NULL",
329 "T_COLON",
330 "T_COMMA",
331 "T_END",
332 "T_WHITESPACE",
333 "T_ERROR",
334 "T_UNKNOWN",
335 NULL
336};
337
338typedef struct {
339 json_token_type_t type;
340 int index;
341 union {
342 char *string;
343 double number;
344 int boolean;
345 } value;
346 int length; /* FIXME: Merge into union? Won't save memory, but more logical */
347} json_token_t;
348
349static void json_process_value(lua_State *l, json_parse_t *json, json_token_t *token);
350
351static json_token_type_t json_ch2token[256];
352static char json_ch2escape[256];
353
354static void json_global_init()
355{
356 int i;
357
358 /* Tag all characters as an error */
359 for (i = 0; i < 256; i++)
360 json_ch2token[i] = T_ERROR;
361
362 /* Set tokens that require no further processing */
363 json_ch2token['{'] = T_OBJ_BEGIN;
364 json_ch2token['}'] = T_OBJ_END;
365 json_ch2token['['] = T_ARR_BEGIN;
366 json_ch2token[']'] = T_ARR_END;
367 json_ch2token[','] = T_COMMA;
368 json_ch2token[':'] = T_COLON;
369 json_ch2token['\0'] = T_END;
370 json_ch2token[' '] = T_WHITESPACE;
371 json_ch2token['\t'] = T_WHITESPACE;
372 json_ch2token['\n'] = T_WHITESPACE;
373 json_ch2token['\r'] = T_WHITESPACE;
374
375 /* Update characters that require further processing */
376 json_ch2token['n'] = T_UNKNOWN;
377 json_ch2token['t'] = T_UNKNOWN;
378 json_ch2token['f'] = T_UNKNOWN;
379 json_ch2token['"'] = T_UNKNOWN;
380 json_ch2token['-'] = T_UNKNOWN;
381 for (i = 0; i < 10; i++)
382 json_ch2token['0' + i] = T_UNKNOWN;
383
384 for (i = 0; i < 256; i++)
385 json_ch2escape[i] = 0; /* String error */
386
387 json_ch2escape['"'] = '"';
388 json_ch2escape['\\'] = '\\';
389 json_ch2escape['/'] = '/';
390 json_ch2escape['b'] = '\b';
391 json_ch2escape['t'] = '\t';
392 json_ch2escape['n'] = '\n';
393 json_ch2escape['f'] = '\f';
394 json_ch2escape['r'] = '\r';
395 json_ch2escape['u'] = 'u'; /* This needs to be parsed as unicode */
396}
397
398static void json_next_string_token(json_parse_t *json, json_token_t *token)
399{
400 char ch;
401
402 /* Caller must ensure a string is next */
403 assert(json->data[json->index] == '"');
404
405 /* Gobble string. FIXME, ugly */
406
407 json->tmp->length = 0;
408 while ((ch = json->data[++json->index]) != '"') {
409 /* Handle escapes */
410 if (ch == '\\') {
411 /* Translate escape code */
412 ch = json_ch2escape[(unsigned char)json->data[++json->index]];
413 if (!ch) {
414 /* Invalid escape code */
415 token->type = T_ERROR;
416 return;
417 }
418 if (ch == 'u') {
419 /* Process unicode */
420 /* FIXME: cleanup memory handling. Implement iconv(3)
421 * conversion from UCS-2 -> UTF-8
422 */
423 if (!memcmp(&json->data[json->index], "u0000", 5)) {
424 /* Handle NULL */
425 ch = 0;
426 json->index += 4;
427 } else {
428 /* Remaining codepoints unhandled */
429 token->type = T_ERROR;
430 return;
431 }
432 }
433 }
434 strbuf_append_char(json->tmp, ch);
435 }
436 json->index++; /* Eat final quote (") */
437
438 strbuf_ensure_null(json->tmp);
439
440 token->type = T_STRING;
441 token->value.string = strbuf_string(json->tmp, NULL);
442 token->length = json->tmp->length;
443}
444
445static void json_next_number_token(json_parse_t *json, json_token_t *token)
446{
447 const char *startptr;
448 char *endptr;
449
450 /* FIXME:
451 * Verify that the number takes the following form:
452 * -?(0|[1-9]|[1-9][0-9]+)(.[0-9]+)?([eE][-+]?[0-9]+)?
453 * strtod() below allows other forms (Hex, infinity, NaN,..) */
454 /* i = json->index;
455 if (json->data[i] == '-')
456 i++;
457 j = i;
458 while ('0' <= json->data[i] && json->data[i] <= '9')
459 i++;
460 if (i == j)
461 return T_ERROR; */
462
463 token->type = T_NUMBER;
464 startptr = &json->data[json->index];
465 token->value.number = strtod(&json->data[json->index], &endptr);
466 if (startptr == endptr)
467 token->type = T_ERROR;
468 else
469 json->index += endptr - startptr; /* Skip the processed number */
470
471 return;
472}
473
474/* Fills in the token struct.
475 * T_STRING will return a pointer to the json_parse_t temporary string
476 * T_ERROR will leave the json->index pointer at the error.
477 */
478static void json_next_token(json_parse_t *json, json_token_t *token)
479{
480 int ch;
481
482 /* Eat whitespace. FIXME: UGLY */
483 token->type = json_ch2token[(unsigned char)json->data[json->index]];
484 while (token->type == T_WHITESPACE)
485 token->type = json_ch2token[(unsigned char)json->data[++json->index]];
486
487 token->index = json->index;
488
489 /* Don't advance the pointer for an error or the end */
490 if (token->type == T_ERROR || token->type == T_END)
491 return;
492
493 /* Found a known token, advance index and return */
494 if (token->type != T_UNKNOWN) {
495 json->index++;
496 return;
497 }
498
499 /* Process characters which triggered T_UNKNOWN */
500 ch = json->data[json->index];
501
502 if (ch == '"') {
503 json_next_string_token(json, token);
504 return;
505 } else if (ch == '-' || ('0' <= ch && ch <= '9')) {
506 json_next_number_token(json, token);
507 return;
508 } else if (!strncmp(&json->data[json->index], "true", 4)) {
509 token->type = T_BOOLEAN;
510 token->value.boolean = 1;
511 json->index += 4;
512 return;
513 } else if (!strncmp(&json->data[json->index], "false", 5)) {
514 token->type = T_BOOLEAN;
515 token->value.boolean = 0;
516 json->index += 5;
517 return;
518 } else if (!strncmp(&json->data[json->index], "null", 4)) {
519 token->type = T_NULL;
520 json->index += 4;
521 return;
522 }
523
524 token->type = T_ERROR;
525}
526
527/* This function does not return.
528 * DO NOT CALL WITH DYNAMIC MEMORY ALLOCATED.
529 * The only supported exception is the temporary parser string
530 * json->tmp struct.
531 * json and token should exist on the stack somewhere.
532 * luaL_error() will long_jmp and release the stack */
533static void json_throw_parse_error(lua_State *l, json_parse_t *json,
534 const char *exp, json_token_t *token)
535{
536 strbuf_free(json->tmp);
537 luaL_error(l, "Expected %s but found type <%s> at character %d",
538 exp, json_token_type_name[token->type], token->index);
539}
540
541static void json_parse_object_context(lua_State *l, json_parse_t *json)
542{
543 json_token_t token;
544
545 /* 3 slots required:
546 * .., table, key, value */
547 luaL_checkstack(l, 3, "too many nested data structures");
548
549 lua_newtable(l);
550
551 json_next_token(json, &token);
552
553 /* Handle empty objects */
554 if (token.type == T_OBJ_END)
555 return;
556
557 while (1) {
558 if (token.type != T_STRING)
559 json_throw_parse_error(l, json, "object key", &token);
560
561 lua_pushlstring(l, token.value.string, token.length); /* Push key */
562
563 json_next_token(json, &token);
564 if (token.type != T_COLON)
565 json_throw_parse_error(l, json, "colon", &token);
566
567 json_next_token(json, &token);
568 json_process_value(l, json, &token);
569 lua_rawset(l, -3); /* Set key = value */
570
571 json_next_token(json, &token);
572
573 if (token.type == T_OBJ_END)
574 return;
575
576 if (token.type != T_COMMA)
577 json_throw_parse_error(l, json, "comma or object end", &token);
578
579 json_next_token(json, &token);
580 }
581}
582
583/* Handle the array context */
584static void json_parse_array_context(lua_State *l, json_parse_t *json)
585{
586 json_token_t token;
587 int i;
588
589 /* 2 slots required:
590 * .., table, value */
591 luaL_checkstack(l, 2, "too many nested data structures");
592
593 lua_newtable(l);
594
595 json_next_token(json, &token);
596
597 /* Handle empty arrays */
598 if (token.type == T_ARR_END)
599 return;
600
601 i = 1;
602 while (1) {
603 json_process_value(l, json, &token);
604 lua_rawseti(l, -2, i); /* arr[i] = value */
605
606 json_next_token(json, &token);
607
608 if (token.type == T_ARR_END)
609 return;
610
611 if (token.type != T_COMMA)
612 json_throw_parse_error(l, json, "comma or array end", &token);
613
614 json_next_token(json, &token);
615 i++;
616 }
617}
618
619/* Handle the "value" context */
620static void json_process_value(lua_State *l, json_parse_t *json, json_token_t *token)
621{
622 switch (token->type) {
623 case T_STRING:
624 lua_pushlstring(l, token->value.string, token->length);
625 break;;
626 case T_NUMBER:
627 lua_pushnumber(l, token->value.number);
628 break;;
629 case T_BOOLEAN:
630 lua_pushboolean(l, token->value.boolean);
631 break;;
632 case T_OBJ_BEGIN:
633 json_parse_object_context(l, json);
634 break;;
635 case T_ARR_BEGIN:
636 json_parse_array_context(l, json);
637 break;;
638 case T_NULL:
639 /* In Lua, setting "t[k] = nil" will delete k from the table.
640 * Hence a NULL pointer lightuserdata object is used instead */
641 lua_pushlightuserdata(l, NULL);
642 break;;
643 default:
644 json_throw_parse_error(l, json, "value", token);
645 }
646}
647
648/* json_text must be null terminated string */
649void lua_json_decode(lua_State *l, const char *json_text)
650{
651 json_parse_t json;
652 json_token_t token;
653
654 json.data = json_text;
655 json.index = 0;
656 json.tmp = strbuf_new(0);
657
658 json_next_token(&json, &token);
659 json_process_value(l, &json, &token);
660
661 /* Ensure there is no more input left */
662 json_next_token(&json, &token);
663
664 if (token.type != T_END)
665 json_throw_parse_error(l, &json, "the end", &token);
666
667 strbuf_free(json.tmp);
668}
669
670/* lua_json_pcall_decode(string) must be called via lua_pcall().
671 * This allows a C caller to catch any errors so the string can
672 * be freed before returning to Lua. */
673int lua_json_pcall_decode(lua_State *l)
674{
675 const char *json;
676
677 verify_arg_count(l, 1);
678 luaL_argcheck(l, lua_islightuserdata(l, 1), 1,
679 "missing lightuserdata");
680
681 json = lua_touserdata(l, 1);
682 lua_pop(l, 1);
683
684 lua_json_decode(l, json);
685
686 return 1;
687}
688
689static int lua_api_json_decode(lua_State *l)
690{
691 const char *json;
692
693 verify_arg_count(l, 1);
694 json = luaL_checkstring(l, 1);
695
696 lua_json_decode(l, json);
697
698 lua_remove(l, 1);
699
700 return 1;
701}
702
703/* ===== INITIALISATION ===== */
704
705static pthread_once_t json_global_init_once = PTHREAD_ONCE_INIT;
706
707void lua_json_init(lua_State *l)
708{
709 luaL_Reg reg[] = {
710 { "encode", lua_api_json_encode },
711 { "decode", lua_api_json_decode },
712 { NULL, NULL }
713 };
714
715 luaL_register(l, "json", reg);
716
717 /* Set json.null, and pop "json" table from the stack */
718 lua_pushlightuserdata(l, NULL);
719 lua_setfield(l, -2, "null");
720 lua_pop(l, 1);
721
722 SYS_NOFAIL(pthread_once(&json_global_init_once, json_global_init));
723}
724
725/* vi:ai et sw=4 ts=4:
726 */