From bbf1f5d35e8312fb7373a997664309adf9527af4 Mon Sep 17 00:00:00 2001
From: Mark Pulford <mark@kyne.com.au>
Date: Fri, 15 Apr 2011 20:58:53 +0930
Subject: Initial commit

Split Lua JSON from parent project to create standalone module.
Remove unnecesssary files from new repo.
---
 lua_json_decode.c | 405 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 405 insertions(+)
 create mode 100644 lua_json_decode.c

(limited to 'lua_json_decode.c')

diff --git a/lua_json_decode.c b/lua_json_decode.c
new file mode 100644
index 0000000..ae35574
--- /dev/null
+++ b/lua_json_decode.c
@@ -0,0 +1,405 @@
+#include <assert.h>
+#include <string.h>
+#include <lua.h>
+#include <lauxlib.h>
+#include "strbuf.h"
+
+/* Caveats:
+ * - NULL values do not work in objects (unssuported by Lua tables).
+ *   - Could use a secial "null" table object, that is unique
+ * - NULL values work in arrays (probably not at the end)
+ */
+
+/* FIXME:
+ * - Ensure JSON data is UTF-8. Fail otherwise.
+ *   - Alternatively, dynamically support Unicode in JSON string. Return current locale.
+ * - Use lua_checkstack() to ensure there is enough stack space left to
+ *   fulfill an operation. What happens if we don't, is that acceptible too?
+ *   Does lua_checkstack grow the stack, or merely check if it is possible?
+ * - Merge encode/decode files
+ */
+
+typedef struct {  
+    const char *data;
+    int index;
+    strbuf_t *tmp;    /* Temporary storage for strings */
+} json_parse_t;
+
+typedef enum {
+    T_OBJ_BEGIN,
+    T_OBJ_END,
+    T_ARR_BEGIN,
+    T_ARR_END,
+    T_STRING,
+    T_NUMBER,
+    T_BOOLEAN,
+    T_NULL,
+    T_COLON,
+    T_COMMA,
+    T_END,
+    T_WHITESPACE,
+    T_ERROR,
+    T_UNKNOWN
+} json_token_type_t;
+
+static const char *json_token_type_name[] = {
+    "T_OBJ_BEGIN",
+    "T_OBJ_END",
+    "T_ARR_BEGIN",
+    "T_ARR_END",
+    "T_STRING",
+    "T_NUMBER",
+    "T_BOOLEAN",
+    "T_NULL",
+    "T_COLON",
+    "T_COMMA",
+    "T_END",
+    "T_WHITESPACE",
+    "T_ERROR",
+    "T_UNKNOWN",
+    NULL
+};
+
+typedef struct {
+    json_token_type_t type;
+    int index;
+    union {
+        char *string;
+        double number;
+        int boolean;
+    } value;
+    int length; /* FIXME: Merge into union? Won't save memory, but more logical */
+} json_token_t;
+
+static void json_process_value(lua_State *l, json_parse_t *json, json_token_t *token);
+
+static json_token_type_t json_ch2token[256];
+static char json_ch2escape[256];
+
+void json_init_lookup_tables()
+{
+    int i;
+
+    /* Tag all characters as an error */
+    for (i = 0; i < 256; i++)
+        json_ch2token[i] = T_ERROR;
+
+    /* Set tokens that require no further processing */
+    json_ch2token['{'] = T_OBJ_BEGIN;
+    json_ch2token['}'] = T_OBJ_END;
+    json_ch2token['['] = T_ARR_BEGIN;
+    json_ch2token[']'] = T_ARR_END;
+    json_ch2token[','] = T_COMMA;
+    json_ch2token[':'] = T_COLON;
+    json_ch2token['\0'] = T_END;
+    json_ch2token[' '] = T_WHITESPACE;
+    json_ch2token['\t'] = T_WHITESPACE;
+    json_ch2token['\n'] = T_WHITESPACE;
+    json_ch2token['\r'] = T_WHITESPACE;
+
+    /* Update characters that require further processing */
+    json_ch2token['n'] = T_UNKNOWN;
+    json_ch2token['t'] = T_UNKNOWN;
+    json_ch2token['f'] = T_UNKNOWN;
+    json_ch2token['"'] = T_UNKNOWN;
+    json_ch2token['-'] = T_UNKNOWN;
+    for (i = 0; i < 10; i++)
+        json_ch2token['0' + i] = T_UNKNOWN;
+
+    for (i = 0; i < 256; i++)
+        json_ch2escape[i] = 0;  /* String error */
+
+    json_ch2escape['"'] = '"';
+    json_ch2escape['\\'] = '\\';
+    json_ch2escape['/'] = '/';
+    json_ch2escape['b'] = '\b';
+    json_ch2escape['t'] = '\t';
+    json_ch2escape['n'] = '\n';
+    json_ch2escape['f'] = '\f';
+    json_ch2escape['r'] = '\r';
+    json_ch2escape['u'] = 'u';  /* This needs to be parsed as unicode */
+}
+
+static void json_next_string_token(json_parse_t *json, json_token_t *token)
+{
+    char ch;
+
+    /* Caller must ensure a string is next */
+    assert(json->data[json->index] == '"');
+
+    /* Gobble string. FIXME, ugly */
+
+    json->tmp->length = 0;
+    while ((ch = json->data[++json->index]) != '"') {
+        /* Handle escapes */
+        if (ch == '\\') {
+            /* Translate escape code */
+            ch = json_ch2escape[(unsigned char)json->data[++json->index]];
+            if (!ch) {
+                /* Invalid escape code */
+                token->type = T_ERROR;
+                return;
+            }
+            if (ch == 'u') {
+                /* Process unicode */
+                /* FIXME: cleanup memory handling. Implement iconv(3)
+                 * conversion from UCS-2 -> UTF-8
+                 */
+                if (!memcmp(&json->data[json->index], "u0000", 5)) {
+                    /* Handle NULL */
+                    ch = 0;
+                    json->index += 4;
+                } else {
+                    /* Remaining codepoints unhandled */
+                    token->type = T_ERROR;
+                    return;
+                }
+            }
+        }
+        strbuf_append_char(json->tmp, ch);
+    }
+    json->index++;  /* Eat final quote (") */
+
+    strbuf_ensure_null(json->tmp);
+
+    token->type = T_STRING;
+    token->value.string = json->tmp->data;
+    token->length = json->tmp->length;
+}
+
+static void json_next_number_token(json_parse_t *json, json_token_t *token)
+{
+    const char *startptr;
+    char *endptr;
+
+    /* FIXME:
+     * Verify that the number takes the following form:
+     * -?(0|[1-9]|[1-9][0-9]+)(.[0-9]+)?([eE][-+]?[0-9]+)?
+     * strtod() below allows other forms (Hex, infinity, NaN,..) */
+    /* i = json->index;
+    if (json->data[i] == '-')
+        i++;
+    j = i;
+    while ('0' <= json->data[i] && json->data[i] <= '9')
+        i++;
+    if (i == j)
+        return T_ERROR; */
+
+    token->type = T_NUMBER;
+    startptr = &json->data[json->index];
+    token->value.number = strtod(&json->data[json->index], &endptr);
+    if (startptr == endptr)
+        token->type = T_ERROR;
+    else
+        json->index += endptr - startptr;   /* Skip the processed number */
+
+    return;
+}
+
+/* Fills in the token struct.
+ * T_STRING will return a pointer to the json_parse_t temporary string
+ * T_ERROR will leave the json->index pointer at the error.
+ */
+static void json_next_token(json_parse_t *json, json_token_t *token)
+{
+    int ch;
+
+    /* Eat whitespace. FIXME: UGLY */
+    token->type = json_ch2token[(unsigned char)json->data[json->index]];
+    while (token->type == T_WHITESPACE)
+        token->type = json_ch2token[(unsigned char)json->data[++json->index]];
+
+    token->index = json->index;
+
+    /* Don't advance the pointer for an error or the end */
+    if (token->type == T_ERROR || token->type == T_END)
+        return;
+
+    /* Found a known token, advance index and return */
+    if (token->type != T_UNKNOWN) {
+        json->index++;
+        return;
+    }
+
+    ch = json->data[json->index];
+
+    /* Process characters which triggered T_UNKNOWN */
+    if (ch == '"') {
+        json_next_string_token(json, token);
+        return;
+    } else if (ch == '-' || ('0' <= ch && ch <= '9')) {
+        json_next_number_token(json, token);
+        return;
+    } else if (!strncmp(&json->data[json->index], "true", 4)) {
+        token->type = T_BOOLEAN;
+        token->value.boolean = 1;
+        json->index += 4;
+        return;
+    } else if (!strncmp(&json->data[json->index], "false", 5)) {
+        token->type = T_BOOLEAN;
+        token->value.boolean = 0;
+        json->index += 5;
+        return;
+    } else if (!strncmp(&json->data[json->index], "null", 4)) {
+        token->type = T_NULL;
+        json->index += 4;
+        return;
+    }
+
+    token->type = T_ERROR;
+}
+
+/* This function does not return.
+ * DO NOT CALL WITH DYNAMIC MEMORY ALLOCATED.
+ * The only allowed exception is the temporary parser string
+ * json->tmp struct.
+ * json and token should exist on the stack somewhere.
+ * luaL_error() will long_jmp and release the stack */
+static void json_throw_parse_error(lua_State *l, json_parse_t *json,
+                                   const char *exp, json_token_t *token)
+{
+    strbuf_free(json->tmp);
+    luaL_error(l, "Expected %s but found type <%s> at character %d",
+               exp, json_token_type_name[token->type], token->index);
+}
+
+static void json_parse_object_context(lua_State *l, json_parse_t *json)
+{
+    json_token_t token;
+
+    lua_newtable(l);
+
+    json_next_token(json, &token);
+
+    /* Handle empty objects */
+    if (token.type == T_OBJ_END)
+        return;
+
+    while (1) {
+        if (token.type != T_STRING)
+            json_throw_parse_error(l, json, "object key", &token);
+
+        lua_pushlstring(l, token.value.string, token.length);     /* Push key */
+
+        json_next_token(json, &token);
+        if (token.type != T_COLON)
+            json_throw_parse_error(l, json, "colon", &token);
+
+        json_next_token(json, &token);
+        json_process_value(l, json, &token);
+        lua_rawset(l, -3);            /* Set key = value */
+
+        json_next_token(json, &token);
+
+        if (token.type == T_OBJ_END)
+            return;
+
+        if (token.type != T_COMMA)
+            json_throw_parse_error(l, json, "comma or object end", &token);
+
+        json_next_token(json, &token);
+    } while (1);
+
+}
+
+/* Handle the array context */
+static void json_parse_array_context(lua_State *l, json_parse_t *json)
+{
+    json_token_t token;
+    int i;
+
+    lua_newtable(l);
+
+    json_next_token(json, &token);
+
+    /* Handle empty arrays */
+    if (token.type == T_ARR_END)
+        return;
+
+    i = 1;
+    while (1) {
+        json_process_value(l, json, &token);
+        lua_rawseti(l, -2, i);            /* arr[i] = value */
+
+        json_next_token(json, &token);
+
+        if (token.type == T_ARR_END)
+            return;
+
+        if (token.type != T_COMMA)
+            json_throw_parse_error(l, json, "comma or array end", &token);
+
+        json_next_token(json, &token);
+        i++;
+    }
+}
+
+/* Handle the "value" context */
+static void json_process_value(lua_State *l, json_parse_t *json, json_token_t *token) 
+{
+    switch (token->type) {
+    case T_STRING:
+        lua_pushlstring(l, token->value.string, token->length);
+        break;;
+    case T_NUMBER:
+        lua_pushnumber(l, token->value.number);
+        break;;
+    case T_BOOLEAN:
+        lua_pushboolean(l, token->value.boolean);
+        break;;
+    case T_OBJ_BEGIN:
+        json_parse_object_context(l, json);
+        break;;
+    case T_ARR_BEGIN:
+        json_parse_array_context(l, json);
+        break;;
+    case T_NULL:
+        lua_pushnil(l);
+        break;;
+    default:
+        json_throw_parse_error(l, json, "value", token);
+    }
+}
+
+/* json_text must be null terminated string */
+void json_parse(lua_State *l, const char *json_text)
+{
+    json_parse_t json;
+    json_token_t token;
+
+    json.data = json_text;
+    json.index = 0;
+    json.tmp = strbuf_new();
+    json.tmp->scale = 256;
+
+    json_next_token(&json, &token);
+    json_process_value(l, &json, &token);
+
+    /* Ensure there is no more input left */
+    json_next_token(&json, &token);
+
+    if (token.type != T_END)
+        json_throw_parse_error(l, &json, "the end", &token);
+
+    strbuf_free(json.tmp);
+}
+
+int lua_json_decode(lua_State *l)
+{
+    int i, n;
+
+    n = lua_gettop(l);
+
+    for (i = 1; i <= n; i++) {
+        if (lua_isstring(l, i)) {
+            json_parse(l, lua_tostring(l, i));
+        } else {
+            lua_pushnil(l);
+        }
+    }
+
+    return n;   /* Number of results */
+}
+
+/* vi:ai et sw=4 ts=4:
+ */
-- 
cgit v1.2.3-55-g6feb