aboutsummaryrefslogtreecommitdiff
path: root/lua_cjson.c
diff options
context:
space:
mode:
Diffstat (limited to 'lua_cjson.c')
-rw-r--r--lua_cjson.c780
1 files changed, 780 insertions, 0 deletions
diff --git a/lua_cjson.c b/lua_cjson.c
new file mode 100644
index 0000000..203b3b8
--- /dev/null
+++ b/lua_cjson.c
@@ -0,0 +1,780 @@
1/* Lua JSON routines
2 */
3
4/* Caveats:
5 * - Assumes strings are valid UTF-8 and mostly treats them as opaque
6 * binary data. Will not throw an exception on bad data.
7 * - Will decode \uXXXX escapes, but leaves high codepoints as UTF-8
8 * when encoding.
9 * - JSON "null" values are represented as lightuserdata. Compare with
10 * json.null.
11 * - Parsing comments is not supported. According to json.org, this isn't
12 * part of the spec.
13 * - Parser accepts number formats beyond the JSON spec.
14 *
15 * Note: lua_json_decode() probably spends significant time rehashing
16 * tables since it is difficult to know their size ahead of time.
17 * Earlier JSON libaries didn't have this problem but the intermediate
18 * storage (and their implementations) were much slower anyway..
19 */
20
21/* FIXME:
22 * - Option to encode non-printable characters? Only \" \\ are required
23 * - Protect against cycles when encoding JSON from a data structure
24 * - Max depth? Notice cycles?
25 * - Handle huge sparse arrays?
26 */
27
28#include <assert.h>
29#include <string.h>
30#include <math.h>
31
32#include <pthread.h>
33
34#include <lua.h>
35#include <lauxlib.h>
36
37#include "strbuf.h"
38
39/* ===== ENCODING ===== */
40
41static void json_encode_exception(lua_State *l, strbuf_t *json,
42 char *location, int lindex)
43
44{
45 strbuf_free(json);
46
47 luaL_error(l, "Cannot serialise %s: %s", location,
48 lua_typename(l, lua_type(l, lindex)));
49}
50
51/* JSON escape a character if required, or return NULL */
52static inline char *json_escape_char(int c)
53{
54 switch(c) {
55 case 0:
56 return "\\u0000";
57 case '\\':
58 return "\\\\";
59 case '"':
60 return "\\\"";
61 case '\b':
62 return "\\b";
63 case '\t':
64 return "\\t";
65 case '\n':
66 return "\\n";
67 case '\f':
68 return "\\f";
69 case '\r':
70 return "\\r";
71 }
72
73 return NULL;
74}
75
76/* json_append_string args:
77 * - lua_State
78 * - JSON strbuf
79 * - String (Lua stack index)
80 *
81 * Returns nothing. Doesn't remove string from Lua stack */
82static void json_append_string(lua_State *l, strbuf_t *json, int lindex)
83{
84 char *p;
85 int i;
86 const char *str;
87 size_t len;
88
89 str = lua_tolstring(l, lindex, &len);
90
91 /* Worst case is len * 6 (all unicode escapes).
92 * This buffer is reused constantly for small strings
93 * If there are any excess pages, they won't be hit anyway.
94 * This gains ~5% speedup. */
95 strbuf_ensure_empty_length(json, len * 6);
96
97 strbuf_append_char(json, '\"');
98 for (i = 0; i < len; i++) {
99 p = json_escape_char(str[i]);
100 if (p)
101 strbuf_append_string(json, p);
102 else
103 strbuf_append_char_unsafe(json, str[i]);
104 }
105 strbuf_append_char(json, '\"');
106}
107
108/* Find the size of the array on the top of the Lua stack
109 * -1 object (not a pure array)
110 * >=0 elements in array
111 */
112static int lua_array_length(lua_State *l)
113{
114 double k;
115 int max;
116
117 max = 0;
118
119 lua_pushnil(l);
120 /* table, startkey */
121 while (lua_next(l, -2) != 0) {
122 /* table, key, value */
123 if (lua_isnumber(l, -2) &&
124 (k = lua_tonumber(l, -2))) {
125 /* Integer >= 1 ? */
126 if (floor(k) == k && k >= 1) {
127 if (k > max)
128 max = k;
129 lua_pop(l, 1);
130 continue;
131 }
132 }
133
134 /* Must not be an array (non integer key) */
135 lua_pop(l, 2);
136 return -1;
137 }
138
139 return max;
140}
141
142static void json_append_data(lua_State *l, strbuf_t *json);
143
144/* json_append_array args:
145 * - lua_State
146 * - JSON strbuf
147 * - Size of passwd Lua array (top of stack) */
148static void json_append_array(lua_State *l, strbuf_t *json, int array_length)
149{
150 int comma, i;
151
152 strbuf_append_string(json, "[ ");
153
154 comma = 0;
155 for (i = 1; i <= array_length; i++) {
156 if (comma)
157 strbuf_append_string(json, ", ");
158 else
159 comma = 1;
160
161 lua_rawgeti(l, -1, i);
162 json_append_data(l, json);
163 lua_pop(l, 1);
164 }
165
166 strbuf_append_string(json, " ]");
167}
168
169static void json_append_object(lua_State *l, strbuf_t *json)
170{
171 int comma, keytype;
172
173 /* Object */
174 strbuf_append_string(json, "{ ");
175
176 lua_pushnil(l);
177 /* table, startkey */
178 comma = 0;
179 while (lua_next(l, -2) != 0) {
180 if (comma)
181 strbuf_append_string(json, ", ");
182 else
183 comma = 1;
184
185 /* table, key, value */
186 keytype = lua_type(l, -2);
187 if (keytype == LUA_TNUMBER) {
188 strbuf_append_fmt(json, "\"" LUA_NUMBER_FMT "\": ",
189 lua_tonumber(l, -2));
190 } else if (keytype == LUA_TSTRING) {
191 json_append_string(l, json, -2);
192 strbuf_append_string(json, ": ");
193 } else {
194 json_encode_exception(l, json, "table key", -2);
195 /* never returns */
196 }
197
198 /* table, key, value */
199 json_append_data(l, json);
200 lua_pop(l, 1);
201 /* table, key */
202 }
203
204 strbuf_append_string(json, " }");
205}
206
207/* Serialise Lua data into JSON string. */
208static void json_append_data(lua_State *l, strbuf_t *json)
209{
210 int len;
211
212 switch (lua_type(l, -1)) {
213 case LUA_TSTRING:
214 json_append_string(l, json, -1);
215 break;
216 case LUA_TNUMBER:
217 strbuf_append_fmt(json, "%lf", lua_tonumber(l, -1));
218 break;
219 case LUA_TBOOLEAN:
220 if (lua_toboolean(l, -1))
221 strbuf_append_string(json, "true");
222 else
223 strbuf_append_string(json, "false");
224 break;
225 case LUA_TTABLE:
226 len = lua_array_length(l);
227 if (len > 0)
228 json_append_array(l, json, len);
229 else
230 json_append_object(l, json);
231 break;
232 case LUA_TNIL:
233 strbuf_append_string(json, "null");
234 break;
235 case LUA_TLIGHTUSERDATA:
236 if (lua_touserdata(l, -1) == NULL) {
237 strbuf_append_string(json, "null");
238 break;
239 }
240 default:
241 /* Remaining types (LUA_TFUNCTION, LUA_TUSERDATA, LUA_TTHREAD,
242 * and LUA_TLIGHTUSERDATA) cannot be serialised */
243 json_encode_exception(l, json, "value", -1);
244 /* never returns */
245 }
246}
247
248static int json_encode(lua_State *l)
249{
250 strbuf_t buf;
251 char *json;
252 int len;
253
254 luaL_argcheck(l, lua_gettop(l) == 1, 1, "expected 1 argument");
255
256 strbuf_init(&buf, 0);
257 json_append_data(l, &buf);
258 json = strbuf_free_to_string(&buf, &len);
259
260 lua_pushlstring(l, json, len);
261 free(json);
262
263 return 1;
264}
265
266/* ===== DECODING ===== */
267
268typedef struct {
269 const char *data;
270 int index;
271 strbuf_t *tmp; /* Temporary storage for strings */
272} json_parse_t;
273
274typedef enum {
275 T_OBJ_BEGIN,
276 T_OBJ_END,
277 T_ARR_BEGIN,
278 T_ARR_END,
279 T_STRING,
280 T_NUMBER,
281 T_BOOLEAN,
282 T_NULL,
283 T_COLON,
284 T_COMMA,
285 T_END,
286 T_WHITESPACE,
287 T_ERROR,
288 T_UNKNOWN
289} json_token_type_t;
290
291static const char *json_token_type_name[] = {
292 "T_OBJ_BEGIN",
293 "T_OBJ_END",
294 "T_ARR_BEGIN",
295 "T_ARR_END",
296 "T_STRING",
297 "T_NUMBER",
298 "T_BOOLEAN",
299 "T_NULL",
300 "T_COLON",
301 "T_COMMA",
302 "T_END",
303 "T_WHITESPACE",
304 "T_ERROR",
305 "T_UNKNOWN",
306 NULL
307};
308
309typedef struct {
310 json_token_type_t type;
311 int index;
312 union {
313 char *string;
314 double number;
315 int boolean;
316 } value;
317 int length; /* FIXME: Merge into union? Won't save memory, but more logical */
318} json_token_t;
319
320static void json_process_value(lua_State *l, json_parse_t *json, json_token_t *token);
321
322static json_token_type_t json_ch2token[256];
323static char json_ch2escape[256];
324
325static void json_global_init()
326{
327 int i;
328
329 /* Tag all characters as an error */
330 for (i = 0; i < 256; i++)
331 json_ch2token[i] = T_ERROR;
332
333 /* Set tokens that require no further processing */
334 json_ch2token['{'] = T_OBJ_BEGIN;
335 json_ch2token['}'] = T_OBJ_END;
336 json_ch2token['['] = T_ARR_BEGIN;
337 json_ch2token[']'] = T_ARR_END;
338 json_ch2token[','] = T_COMMA;
339 json_ch2token[':'] = T_COLON;
340 json_ch2token['\0'] = T_END;
341 json_ch2token[' '] = T_WHITESPACE;
342 json_ch2token['\t'] = T_WHITESPACE;
343 json_ch2token['\n'] = T_WHITESPACE;
344 json_ch2token['\r'] = T_WHITESPACE;
345
346 /* Update characters that require further processing */
347 json_ch2token['n'] = T_UNKNOWN;
348 json_ch2token['t'] = T_UNKNOWN;
349 json_ch2token['f'] = T_UNKNOWN;
350 json_ch2token['"'] = T_UNKNOWN;
351 json_ch2token['-'] = T_UNKNOWN;
352 for (i = 0; i < 10; i++)
353 json_ch2token['0' + i] = T_UNKNOWN;
354
355 for (i = 0; i < 256; i++)
356 json_ch2escape[i] = 0; /* String error */
357
358 json_ch2escape['"'] = '"';
359 json_ch2escape['\\'] = '\\';
360 json_ch2escape['/'] = '/';
361 json_ch2escape['b'] = '\b';
362 json_ch2escape['t'] = '\t';
363 json_ch2escape['n'] = '\n';
364 json_ch2escape['f'] = '\f';
365 json_ch2escape['r'] = '\r';
366 json_ch2escape['u'] = 'u'; /* This needs to be parsed as unicode */
367}
368
369static inline int hexdigit2int(char hex)
370{
371 if ('0' <= hex && hex <= '9')
372 return hex - '0';
373
374 /* Force lowercase */
375 hex |= 0x20;
376 if ('a' <= hex && hex <= 'f')
377 return 10 + hex - 'a';
378
379 return -1;
380}
381
382static int decode_hex4(const char *hex)
383{
384 int digit[4];
385 int i;
386
387 /* Convert ASCII hex digit to numeric digit
388 * Note: this returns an error for invalid hex digits, including
389 * NULL */
390 for (i = 0; i < 4; i++) {
391 digit[i] = hexdigit2int(hex[i]);
392 if (digit[i] < 0) {
393 return -1;
394 }
395 }
396
397 return (digit[0] << 12) +
398 (digit[1] << 8) +
399 (digit[2] << 4) +
400 digit[3];
401}
402
403static int codepoint_to_utf8(char *utf8, int codepoint)
404{
405 if (codepoint <= 0x7F) {
406 utf8[0] = codepoint;
407 return 1;
408 }
409
410 if (codepoint <= 0x7FF) {
411 utf8[0] = (codepoint >> 6) | 0xC0;
412 utf8[1] = (codepoint & 0x3F) | 0x80;
413 return 2;
414 }
415
416 if (codepoint <= 0xFFFF) {
417 utf8[0] = (codepoint >> 12) | 0xE0;
418 utf8[1] = ((codepoint >> 6) & 0x3F) | 0x80;
419 utf8[2] = (codepoint & 0x3F) | 0x80;
420 return 3;
421 }
422
423 return 0;
424}
425
426
427/* Called when index pointing to beginning of UCS-2 hex code: uXXXX
428 * Translate to UTF-8 and append to temporary token string.
429 * Must advance index to the next character to be processed.
430 * Returns: 0 success
431 * -1 error
432 */
433static int json_append_unicode_escape(json_parse_t *json)
434{
435 char utf8[4]; /* 3 bytes of UTF-8 can handle UCS-2 */
436 int codepoint;
437 int len;
438
439 /* Skip 'u' */
440 json->index++;
441
442 /* Fetch UCS-2 codepoint */
443 codepoint = decode_hex4(&json->data[json->index]);
444 if (codepoint < 0) {
445 return -1;
446 }
447
448 /* Convert to UTF-8 */
449 len = codepoint_to_utf8(utf8, codepoint);
450 if (!len) {
451 return -1;
452 }
453
454 /* Append bytes and advance counter */
455 strbuf_append_mem(json->tmp, utf8, len);
456 json->index += 4;
457
458 return 0;
459}
460
461static void json_next_string_token(json_parse_t *json, json_token_t *token)
462{
463 char ch;
464
465 /* Caller must ensure a string is next */
466 assert(json->data[json->index] == '"');
467
468 /* Skip " */
469 json->index++;
470
471 /* json->tmp is the temporary strbuf used to accumulate the
472 * decoded string value. */
473 json->tmp->length = 0;
474 while ((ch = json->data[json->index]) != '"') {
475 if (!ch) {
476 /* Premature end of the string */
477 token->type = T_ERROR;
478 return;
479 }
480
481 /* Handle escapes */
482 if (ch == '\\') {
483 /* Skip \ and fetch escape character */
484 json->index++;
485 ch = json->data[json->index];
486
487 /* Translate escape code and append to tmp string */
488 ch = json_ch2escape[(unsigned char)ch];
489 if (ch == 'u') {
490 if (json_append_unicode_escape(json) < 0)
491 continue;
492
493 token->type = T_ERROR;
494 return;
495 }
496 if (!ch) {
497 /* Invalid escape code */
498 token->type = T_ERROR;
499 return;
500 }
501 }
502 /* Append normal character or translated single character
503 * Unicode escapes are handled above */
504 strbuf_append_char(json->tmp, ch);
505 json->index++;
506 }
507 json->index++; /* Eat final quote (") */
508
509 strbuf_ensure_null(json->tmp);
510
511 token->type = T_STRING;
512 token->value.string = strbuf_string(json->tmp, NULL);
513 token->length = json->tmp->length;
514}
515
516static void json_next_number_token(json_parse_t *json, json_token_t *token)
517{
518 const char *startptr;
519 char *endptr;
520
521 /* FIXME:
522 * Verify that the number takes the following form:
523 * -?(0|[1-9]|[1-9][0-9]+)(.[0-9]+)?([eE][-+]?[0-9]+)?
524 * strtod() below allows other forms (Hex, infinity, NaN,..) */
525 /* i = json->index;
526 if (json->data[i] == '-')
527 i++;
528 j = i;
529 while ('0' <= json->data[i] && json->data[i] <= '9')
530 i++;
531 if (i == j)
532 return T_ERROR; */
533
534 token->type = T_NUMBER;
535 startptr = &json->data[json->index];
536 token->value.number = strtod(&json->data[json->index], &endptr);
537 if (startptr == endptr)
538 token->type = T_ERROR;
539 else
540 json->index += endptr - startptr; /* Skip the processed number */
541
542 return;
543}
544
545/* Fills in the token struct.
546 * T_STRING will return a pointer to the json_parse_t temporary string
547 * T_ERROR will leave the json->index pointer at the error.
548 */
549static void json_next_token(json_parse_t *json, json_token_t *token)
550{
551 int ch;
552
553 /* Eat whitespace. FIXME: UGLY */
554 token->type = json_ch2token[(unsigned char)json->data[json->index]];
555 while (token->type == T_WHITESPACE)
556 token->type = json_ch2token[(unsigned char)json->data[++json->index]];
557
558 token->index = json->index;
559
560 /* Don't advance the pointer for an error or the end */
561 if (token->type == T_ERROR || token->type == T_END)
562 return;
563
564 /* Found a known token, advance index and return */
565 if (token->type != T_UNKNOWN) {
566 json->index++;
567 return;
568 }
569
570 /* Process characters which triggered T_UNKNOWN */
571 ch = json->data[json->index];
572
573 if (ch == '"') {
574 json_next_string_token(json, token);
575 return;
576 } else if (ch == '-' || ('0' <= ch && ch <= '9')) {
577 json_next_number_token(json, token);
578 return;
579 } else if (!strncmp(&json->data[json->index], "true", 4)) {
580 token->type = T_BOOLEAN;
581 token->value.boolean = 1;
582 json->index += 4;
583 return;
584 } else if (!strncmp(&json->data[json->index], "false", 5)) {
585 token->type = T_BOOLEAN;
586 token->value.boolean = 0;
587 json->index += 5;
588 return;
589 } else if (!strncmp(&json->data[json->index], "null", 4)) {
590 token->type = T_NULL;
591 json->index += 4;
592 return;
593 }
594
595 token->type = T_ERROR;
596}
597
598/* This function does not return.
599 * DO NOT CALL WITH DYNAMIC MEMORY ALLOCATED.
600 * The only supported exception is the temporary parser string
601 * json->tmp struct.
602 * json and token should exist on the stack somewhere.
603 * luaL_error() will long_jmp and release the stack */
604static void json_throw_parse_error(lua_State *l, json_parse_t *json,
605 const char *exp, json_token_t *token)
606{
607 strbuf_free(json->tmp);
608 luaL_error(l, "Expected %s but found type <%s> at character %d",
609 exp, json_token_type_name[token->type], token->index);
610}
611
612static void json_parse_object_context(lua_State *l, json_parse_t *json)
613{
614 json_token_t token;
615
616 /* 3 slots required:
617 * .., table, key, value */
618 luaL_checkstack(l, 3, "too many nested data structures");
619
620 lua_newtable(l);
621
622 json_next_token(json, &token);
623
624 /* Handle empty objects */
625 if (token.type == T_OBJ_END)
626 return;
627
628 while (1) {
629 if (token.type != T_STRING)
630 json_throw_parse_error(l, json, "object key", &token);
631
632 lua_pushlstring(l, token.value.string, token.length); /* Push key */
633
634 json_next_token(json, &token);
635 if (token.type != T_COLON)
636 json_throw_parse_error(l, json, "colon", &token);
637
638 json_next_token(json, &token);
639 json_process_value(l, json, &token);
640 lua_rawset(l, -3); /* Set key = value */
641
642 json_next_token(json, &token);
643
644 if (token.type == T_OBJ_END)
645 return;
646
647 if (token.type != T_COMMA)
648 json_throw_parse_error(l, json, "comma or object end", &token);
649
650 json_next_token(json, &token);
651 }
652}
653
654/* Handle the array context */
655static void json_parse_array_context(lua_State *l, json_parse_t *json)
656{
657 json_token_t token;
658 int i;
659
660 /* 2 slots required:
661 * .., table, value */
662 luaL_checkstack(l, 2, "too many nested data structures");
663
664 lua_newtable(l);
665
666 json_next_token(json, &token);
667
668 /* Handle empty arrays */
669 if (token.type == T_ARR_END)
670 return;
671
672 i = 1;
673 while (1) {
674 json_process_value(l, json, &token);
675 lua_rawseti(l, -2, i); /* arr[i] = value */
676
677 json_next_token(json, &token);
678
679 if (token.type == T_ARR_END)
680 return;
681
682 if (token.type != T_COMMA)
683 json_throw_parse_error(l, json, "comma or array end", &token);
684
685 json_next_token(json, &token);
686 i++;
687 }
688}
689
690/* Handle the "value" context */
691static void json_process_value(lua_State *l, json_parse_t *json, json_token_t *token)
692{
693 switch (token->type) {
694 case T_STRING:
695 lua_pushlstring(l, token->value.string, token->length);
696 break;;
697 case T_NUMBER:
698 lua_pushnumber(l, token->value.number);
699 break;;
700 case T_BOOLEAN:
701 lua_pushboolean(l, token->value.boolean);
702 break;;
703 case T_OBJ_BEGIN:
704 json_parse_object_context(l, json);
705 break;;
706 case T_ARR_BEGIN:
707 json_parse_array_context(l, json);
708 break;;
709 case T_NULL:
710 /* In Lua, setting "t[k] = nil" will delete k from the table.
711 * Hence a NULL pointer lightuserdata object is used instead */
712 lua_pushlightuserdata(l, NULL);
713 break;;
714 default:
715 json_throw_parse_error(l, json, "value", token);
716 }
717}
718
719/* json_text must be null terminated string */
720static void lua_json_decode(lua_State *l, const char *json_text)
721{
722 json_parse_t json;
723 json_token_t token;
724
725 json.data = json_text;
726 json.index = 0;
727 json.tmp = strbuf_new(0);
728
729 json_next_token(&json, &token);
730 json_process_value(l, &json, &token);
731
732 /* Ensure there is no more input left */
733 json_next_token(&json, &token);
734
735 if (token.type != T_END)
736 json_throw_parse_error(l, &json, "the end", &token);
737
738 strbuf_free(json.tmp);
739}
740
741static int json_decode(lua_State *l)
742{
743 const char *json;
744
745 luaL_argcheck(l, lua_gettop(l) <= 1, 2, "found too many arguments");
746 json = luaL_checkstring(l, 1);
747
748 lua_json_decode(l, json);
749
750 return 1;
751}
752
753/* ===== INITIALISATION ===== */
754
755/* FIXME: Rewrite to keep lookup tables within Lua (userdata?)
756 * Remove pthread dependency */
757static pthread_once_t json_global_init_once = PTHREAD_ONCE_INIT;
758
759int luaopen_cjson(lua_State *l)
760{
761 luaL_Reg reg[] = {
762 { "encode", json_encode },
763 { "decode", json_decode },
764 { NULL, NULL }
765 };
766
767 luaL_register(l, "cjson", reg);
768
769 /* Set cjson.null */
770 lua_pushlightuserdata(l, NULL);
771 lua_setfield(l, -2, "null");
772
773 pthread_once(&json_global_init_once, json_global_init);
774
775 /* Return cjson table */
776 return 1;
777}
778
779/* vi:ai et sw=4 ts=4:
780 */