summaryrefslogtreecommitdiff
path: root/lua_json_decode.c
diff options
context:
space:
mode:
Diffstat (limited to 'lua_json_decode.c')
-rw-r--r--lua_json_decode.c405
1 files changed, 405 insertions, 0 deletions
diff --git a/lua_json_decode.c b/lua_json_decode.c
new file mode 100644
index 0000000..ae35574
--- /dev/null
+++ b/lua_json_decode.c
@@ -0,0 +1,405 @@
1#include <assert.h>
2#include <string.h>
3#include <lua.h>
4#include <lauxlib.h>
5#include "strbuf.h"
6
7/* Caveats:
8 * - NULL values do not work in objects (unssuported by Lua tables).
9 * - Could use a secial "null" table object, that is unique
10 * - NULL values work in arrays (probably not at the end)
11 */
12
13/* FIXME:
14 * - Ensure JSON data is UTF-8. Fail otherwise.
15 * - Alternatively, dynamically support Unicode in JSON string. Return current locale.
16 * - Use lua_checkstack() to ensure there is enough stack space left to
17 * fulfill an operation. What happens if we don't, is that acceptible too?
18 * Does lua_checkstack grow the stack, or merely check if it is possible?
19 * - Merge encode/decode files
20 */
21
22typedef struct {
23 const char *data;
24 int index;
25 strbuf_t *tmp; /* Temporary storage for strings */
26} json_parse_t;
27
28typedef enum {
29 T_OBJ_BEGIN,
30 T_OBJ_END,
31 T_ARR_BEGIN,
32 T_ARR_END,
33 T_STRING,
34 T_NUMBER,
35 T_BOOLEAN,
36 T_NULL,
37 T_COLON,
38 T_COMMA,
39 T_END,
40 T_WHITESPACE,
41 T_ERROR,
42 T_UNKNOWN
43} json_token_type_t;
44
45static const char *json_token_type_name[] = {
46 "T_OBJ_BEGIN",
47 "T_OBJ_END",
48 "T_ARR_BEGIN",
49 "T_ARR_END",
50 "T_STRING",
51 "T_NUMBER",
52 "T_BOOLEAN",
53 "T_NULL",
54 "T_COLON",
55 "T_COMMA",
56 "T_END",
57 "T_WHITESPACE",
58 "T_ERROR",
59 "T_UNKNOWN",
60 NULL
61};
62
63typedef struct {
64 json_token_type_t type;
65 int index;
66 union {
67 char *string;
68 double number;
69 int boolean;
70 } value;
71 int length; /* FIXME: Merge into union? Won't save memory, but more logical */
72} json_token_t;
73
74static void json_process_value(lua_State *l, json_parse_t *json, json_token_t *token);
75
76static json_token_type_t json_ch2token[256];
77static char json_ch2escape[256];
78
79void json_init_lookup_tables()
80{
81 int i;
82
83 /* Tag all characters as an error */
84 for (i = 0; i < 256; i++)
85 json_ch2token[i] = T_ERROR;
86
87 /* Set tokens that require no further processing */
88 json_ch2token['{'] = T_OBJ_BEGIN;
89 json_ch2token['}'] = T_OBJ_END;
90 json_ch2token['['] = T_ARR_BEGIN;
91 json_ch2token[']'] = T_ARR_END;
92 json_ch2token[','] = T_COMMA;
93 json_ch2token[':'] = T_COLON;
94 json_ch2token['\0'] = T_END;
95 json_ch2token[' '] = T_WHITESPACE;
96 json_ch2token['\t'] = T_WHITESPACE;
97 json_ch2token['\n'] = T_WHITESPACE;
98 json_ch2token['\r'] = T_WHITESPACE;
99
100 /* Update characters that require further processing */
101 json_ch2token['n'] = T_UNKNOWN;
102 json_ch2token['t'] = T_UNKNOWN;
103 json_ch2token['f'] = T_UNKNOWN;
104 json_ch2token['"'] = T_UNKNOWN;
105 json_ch2token['-'] = T_UNKNOWN;
106 for (i = 0; i < 10; i++)
107 json_ch2token['0' + i] = T_UNKNOWN;
108
109 for (i = 0; i < 256; i++)
110 json_ch2escape[i] = 0; /* String error */
111
112 json_ch2escape['"'] = '"';
113 json_ch2escape['\\'] = '\\';
114 json_ch2escape['/'] = '/';
115 json_ch2escape['b'] = '\b';
116 json_ch2escape['t'] = '\t';
117 json_ch2escape['n'] = '\n';
118 json_ch2escape['f'] = '\f';
119 json_ch2escape['r'] = '\r';
120 json_ch2escape['u'] = 'u'; /* This needs to be parsed as unicode */
121}
122
123static void json_next_string_token(json_parse_t *json, json_token_t *token)
124{
125 char ch;
126
127 /* Caller must ensure a string is next */
128 assert(json->data[json->index] == '"');
129
130 /* Gobble string. FIXME, ugly */
131
132 json->tmp->length = 0;
133 while ((ch = json->data[++json->index]) != '"') {
134 /* Handle escapes */
135 if (ch == '\\') {
136 /* Translate escape code */
137 ch = json_ch2escape[(unsigned char)json->data[++json->index]];
138 if (!ch) {
139 /* Invalid escape code */
140 token->type = T_ERROR;
141 return;
142 }
143 if (ch == 'u') {
144 /* Process unicode */
145 /* FIXME: cleanup memory handling. Implement iconv(3)
146 * conversion from UCS-2 -> UTF-8
147 */
148 if (!memcmp(&json->data[json->index], "u0000", 5)) {
149 /* Handle NULL */
150 ch = 0;
151 json->index += 4;
152 } else {
153 /* Remaining codepoints unhandled */
154 token->type = T_ERROR;
155 return;
156 }
157 }
158 }
159 strbuf_append_char(json->tmp, ch);
160 }
161 json->index++; /* Eat final quote (") */
162
163 strbuf_ensure_null(json->tmp);
164
165 token->type = T_STRING;
166 token->value.string = json->tmp->data;
167 token->length = json->tmp->length;
168}
169
170static void json_next_number_token(json_parse_t *json, json_token_t *token)
171{
172 const char *startptr;
173 char *endptr;
174
175 /* FIXME:
176 * Verify that the number takes the following form:
177 * -?(0|[1-9]|[1-9][0-9]+)(.[0-9]+)?([eE][-+]?[0-9]+)?
178 * strtod() below allows other forms (Hex, infinity, NaN,..) */
179 /* i = json->index;
180 if (json->data[i] == '-')
181 i++;
182 j = i;
183 while ('0' <= json->data[i] && json->data[i] <= '9')
184 i++;
185 if (i == j)
186 return T_ERROR; */
187
188 token->type = T_NUMBER;
189 startptr = &json->data[json->index];
190 token->value.number = strtod(&json->data[json->index], &endptr);
191 if (startptr == endptr)
192 token->type = T_ERROR;
193 else
194 json->index += endptr - startptr; /* Skip the processed number */
195
196 return;
197}
198
199/* Fills in the token struct.
200 * T_STRING will return a pointer to the json_parse_t temporary string
201 * T_ERROR will leave the json->index pointer at the error.
202 */
203static void json_next_token(json_parse_t *json, json_token_t *token)
204{
205 int ch;
206
207 /* Eat whitespace. FIXME: UGLY */
208 token->type = json_ch2token[(unsigned char)json->data[json->index]];
209 while (token->type == T_WHITESPACE)
210 token->type = json_ch2token[(unsigned char)json->data[++json->index]];
211
212 token->index = json->index;
213
214 /* Don't advance the pointer for an error or the end */
215 if (token->type == T_ERROR || token->type == T_END)
216 return;
217
218 /* Found a known token, advance index and return */
219 if (token->type != T_UNKNOWN) {
220 json->index++;
221 return;
222 }
223
224 ch = json->data[json->index];
225
226 /* Process characters which triggered T_UNKNOWN */
227 if (ch == '"') {
228 json_next_string_token(json, token);
229 return;
230 } else if (ch == '-' || ('0' <= ch && ch <= '9')) {
231 json_next_number_token(json, token);
232 return;
233 } else if (!strncmp(&json->data[json->index], "true", 4)) {
234 token->type = T_BOOLEAN;
235 token->value.boolean = 1;
236 json->index += 4;
237 return;
238 } else if (!strncmp(&json->data[json->index], "false", 5)) {
239 token->type = T_BOOLEAN;
240 token->value.boolean = 0;
241 json->index += 5;
242 return;
243 } else if (!strncmp(&json->data[json->index], "null", 4)) {
244 token->type = T_NULL;
245 json->index += 4;
246 return;
247 }
248
249 token->type = T_ERROR;
250}
251
252/* This function does not return.
253 * DO NOT CALL WITH DYNAMIC MEMORY ALLOCATED.
254 * The only allowed exception is the temporary parser string
255 * json->tmp struct.
256 * json and token should exist on the stack somewhere.
257 * luaL_error() will long_jmp and release the stack */
258static void json_throw_parse_error(lua_State *l, json_parse_t *json,
259 const char *exp, json_token_t *token)
260{
261 strbuf_free(json->tmp);
262 luaL_error(l, "Expected %s but found type <%s> at character %d",
263 exp, json_token_type_name[token->type], token->index);
264}
265
266static void json_parse_object_context(lua_State *l, json_parse_t *json)
267{
268 json_token_t token;
269
270 lua_newtable(l);
271
272 json_next_token(json, &token);
273
274 /* Handle empty objects */
275 if (token.type == T_OBJ_END)
276 return;
277
278 while (1) {
279 if (token.type != T_STRING)
280 json_throw_parse_error(l, json, "object key", &token);
281
282 lua_pushlstring(l, token.value.string, token.length); /* Push key */
283
284 json_next_token(json, &token);
285 if (token.type != T_COLON)
286 json_throw_parse_error(l, json, "colon", &token);
287
288 json_next_token(json, &token);
289 json_process_value(l, json, &token);
290 lua_rawset(l, -3); /* Set key = value */
291
292 json_next_token(json, &token);
293
294 if (token.type == T_OBJ_END)
295 return;
296
297 if (token.type != T_COMMA)
298 json_throw_parse_error(l, json, "comma or object end", &token);
299
300 json_next_token(json, &token);
301 } while (1);
302
303}
304
305/* Handle the array context */
306static void json_parse_array_context(lua_State *l, json_parse_t *json)
307{
308 json_token_t token;
309 int i;
310
311 lua_newtable(l);
312
313 json_next_token(json, &token);
314
315 /* Handle empty arrays */
316 if (token.type == T_ARR_END)
317 return;
318
319 i = 1;
320 while (1) {
321 json_process_value(l, json, &token);
322 lua_rawseti(l, -2, i); /* arr[i] = value */
323
324 json_next_token(json, &token);
325
326 if (token.type == T_ARR_END)
327 return;
328
329 if (token.type != T_COMMA)
330 json_throw_parse_error(l, json, "comma or array end", &token);
331
332 json_next_token(json, &token);
333 i++;
334 }
335}
336
337/* Handle the "value" context */
338static void json_process_value(lua_State *l, json_parse_t *json, json_token_t *token)
339{
340 switch (token->type) {
341 case T_STRING:
342 lua_pushlstring(l, token->value.string, token->length);
343 break;;
344 case T_NUMBER:
345 lua_pushnumber(l, token->value.number);
346 break;;
347 case T_BOOLEAN:
348 lua_pushboolean(l, token->value.boolean);
349 break;;
350 case T_OBJ_BEGIN:
351 json_parse_object_context(l, json);
352 break;;
353 case T_ARR_BEGIN:
354 json_parse_array_context(l, json);
355 break;;
356 case T_NULL:
357 lua_pushnil(l);
358 break;;
359 default:
360 json_throw_parse_error(l, json, "value", token);
361 }
362}
363
364/* json_text must be null terminated string */
365void json_parse(lua_State *l, const char *json_text)
366{
367 json_parse_t json;
368 json_token_t token;
369
370 json.data = json_text;
371 json.index = 0;
372 json.tmp = strbuf_new();
373 json.tmp->scale = 256;
374
375 json_next_token(&json, &token);
376 json_process_value(l, &json, &token);
377
378 /* Ensure there is no more input left */
379 json_next_token(&json, &token);
380
381 if (token.type != T_END)
382 json_throw_parse_error(l, &json, "the end", &token);
383
384 strbuf_free(json.tmp);
385}
386
387int lua_json_decode(lua_State *l)
388{
389 int i, n;
390
391 n = lua_gettop(l);
392
393 for (i = 1; i <= n; i++) {
394 if (lua_isstring(l, i)) {
395 json_parse(l, lua_tostring(l, i));
396 } else {
397 lua_pushnil(l);
398 }
399 }
400
401 return n; /* Number of results */
402}
403
404/* vi:ai et sw=4 ts=4:
405 */