From bbf1f5d35e8312fb7373a997664309adf9527af4 Mon Sep 17 00:00:00 2001
From: Mark Pulford <mark@kyne.com.au>
Date: Fri, 15 Apr 2011 20:58:53 +0930
Subject: Initial commit

Split Lua JSON from parent project to create standalone module.
Remove unnecesssary files from new repo.
---
 lua_json_decode.c | 405 +++++++++++++++++++++++++++++++++++++++
 lua_json_encode.c | 256 +++++++++++++++++++++++++
 rfc4627.txt       | 563 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 strbuf.c          | 130 +++++++++++++
 strbuf.h          |  48 +++++
 5 files changed, 1402 insertions(+)
 create mode 100644 lua_json_decode.c
 create mode 100644 lua_json_encode.c
 create mode 100644 rfc4627.txt
 create mode 100644 strbuf.c
 create mode 100644 strbuf.h

diff --git a/lua_json_decode.c b/lua_json_decode.c
new file mode 100644
index 0000000..ae35574
--- /dev/null
+++ b/lua_json_decode.c
@@ -0,0 +1,405 @@
+#include <assert.h>
+#include <string.h>
+#include <lua.h>
+#include <lauxlib.h>
+#include "strbuf.h"
+
+/* Caveats:
+ * - NULL values do not work in objects (unssuported by Lua tables).
+ *   - Could use a secial "null" table object, that is unique
+ * - NULL values work in arrays (probably not at the end)
+ */
+
+/* FIXME:
+ * - Ensure JSON data is UTF-8. Fail otherwise.
+ *   - Alternatively, dynamically support Unicode in JSON string. Return current locale.
+ * - Use lua_checkstack() to ensure there is enough stack space left to
+ *   fulfill an operation. What happens if we don't, is that acceptible too?
+ *   Does lua_checkstack grow the stack, or merely check if it is possible?
+ * - Merge encode/decode files
+ */
+
+typedef struct {  
+    const char *data;
+    int index;
+    strbuf_t *tmp;    /* Temporary storage for strings */
+} json_parse_t;
+
+typedef enum {
+    T_OBJ_BEGIN,
+    T_OBJ_END,
+    T_ARR_BEGIN,
+    T_ARR_END,
+    T_STRING,
+    T_NUMBER,
+    T_BOOLEAN,
+    T_NULL,
+    T_COLON,
+    T_COMMA,
+    T_END,
+    T_WHITESPACE,
+    T_ERROR,
+    T_UNKNOWN
+} json_token_type_t;
+
+static const char *json_token_type_name[] = {
+    "T_OBJ_BEGIN",
+    "T_OBJ_END",
+    "T_ARR_BEGIN",
+    "T_ARR_END",
+    "T_STRING",
+    "T_NUMBER",
+    "T_BOOLEAN",
+    "T_NULL",
+    "T_COLON",
+    "T_COMMA",
+    "T_END",
+    "T_WHITESPACE",
+    "T_ERROR",
+    "T_UNKNOWN",
+    NULL
+};
+
+typedef struct {
+    json_token_type_t type;
+    int index;
+    union {
+        char *string;
+        double number;
+        int boolean;
+    } value;
+    int length; /* FIXME: Merge into union? Won't save memory, but more logical */
+} json_token_t;
+
+static void json_process_value(lua_State *l, json_parse_t *json, json_token_t *token);
+
+static json_token_type_t json_ch2token[256];
+static char json_ch2escape[256];
+
+void json_init_lookup_tables()
+{
+    int i;
+
+    /* Tag all characters as an error */
+    for (i = 0; i < 256; i++)
+        json_ch2token[i] = T_ERROR;
+
+    /* Set tokens that require no further processing */
+    json_ch2token['{'] = T_OBJ_BEGIN;
+    json_ch2token['}'] = T_OBJ_END;
+    json_ch2token['['] = T_ARR_BEGIN;
+    json_ch2token[']'] = T_ARR_END;
+    json_ch2token[','] = T_COMMA;
+    json_ch2token[':'] = T_COLON;
+    json_ch2token['\0'] = T_END;
+    json_ch2token[' '] = T_WHITESPACE;
+    json_ch2token['\t'] = T_WHITESPACE;
+    json_ch2token['\n'] = T_WHITESPACE;
+    json_ch2token['\r'] = T_WHITESPACE;
+
+    /* Update characters that require further processing */
+    json_ch2token['n'] = T_UNKNOWN;
+    json_ch2token['t'] = T_UNKNOWN;
+    json_ch2token['f'] = T_UNKNOWN;
+    json_ch2token['"'] = T_UNKNOWN;
+    json_ch2token['-'] = T_UNKNOWN;
+    for (i = 0; i < 10; i++)
+        json_ch2token['0' + i] = T_UNKNOWN;
+
+    for (i = 0; i < 256; i++)
+        json_ch2escape[i] = 0;  /* String error */
+
+    json_ch2escape['"'] = '"';
+    json_ch2escape['\\'] = '\\';
+    json_ch2escape['/'] = '/';
+    json_ch2escape['b'] = '\b';
+    json_ch2escape['t'] = '\t';
+    json_ch2escape['n'] = '\n';
+    json_ch2escape['f'] = '\f';
+    json_ch2escape['r'] = '\r';
+    json_ch2escape['u'] = 'u';  /* This needs to be parsed as unicode */
+}
+
+static void json_next_string_token(json_parse_t *json, json_token_t *token)
+{
+    char ch;
+
+    /* Caller must ensure a string is next */
+    assert(json->data[json->index] == '"');
+
+    /* Gobble string. FIXME, ugly */
+
+    json->tmp->length = 0;
+    while ((ch = json->data[++json->index]) != '"') {
+        /* Handle escapes */
+        if (ch == '\\') {
+            /* Translate escape code */
+            ch = json_ch2escape[(unsigned char)json->data[++json->index]];
+            if (!ch) {
+                /* Invalid escape code */
+                token->type = T_ERROR;
+                return;
+            }
+            if (ch == 'u') {
+                /* Process unicode */
+                /* FIXME: cleanup memory handling. Implement iconv(3)
+                 * conversion from UCS-2 -> UTF-8
+                 */
+                if (!memcmp(&json->data[json->index], "u0000", 5)) {
+                    /* Handle NULL */
+                    ch = 0;
+                    json->index += 4;
+                } else {
+                    /* Remaining codepoints unhandled */
+                    token->type = T_ERROR;
+                    return;
+                }
+            }
+        }
+        strbuf_append_char(json->tmp, ch);
+    }
+    json->index++;  /* Eat final quote (") */
+
+    strbuf_ensure_null(json->tmp);
+
+    token->type = T_STRING;
+    token->value.string = json->tmp->data;
+    token->length = json->tmp->length;
+}
+
+static void json_next_number_token(json_parse_t *json, json_token_t *token)
+{
+    const char *startptr;
+    char *endptr;
+
+    /* FIXME:
+     * Verify that the number takes the following form:
+     * -?(0|[1-9]|[1-9][0-9]+)(.[0-9]+)?([eE][-+]?[0-9]+)?
+     * strtod() below allows other forms (Hex, infinity, NaN,..) */
+    /* i = json->index;
+    if (json->data[i] == '-')
+        i++;
+    j = i;
+    while ('0' <= json->data[i] && json->data[i] <= '9')
+        i++;
+    if (i == j)
+        return T_ERROR; */
+
+    token->type = T_NUMBER;
+    startptr = &json->data[json->index];
+    token->value.number = strtod(&json->data[json->index], &endptr);
+    if (startptr == endptr)
+        token->type = T_ERROR;
+    else
+        json->index += endptr - startptr;   /* Skip the processed number */
+
+    return;
+}
+
+/* Fills in the token struct.
+ * T_STRING will return a pointer to the json_parse_t temporary string
+ * T_ERROR will leave the json->index pointer at the error.
+ */
+static void json_next_token(json_parse_t *json, json_token_t *token)
+{
+    int ch;
+
+    /* Eat whitespace. FIXME: UGLY */
+    token->type = json_ch2token[(unsigned char)json->data[json->index]];
+    while (token->type == T_WHITESPACE)
+        token->type = json_ch2token[(unsigned char)json->data[++json->index]];
+
+    token->index = json->index;
+
+    /* Don't advance the pointer for an error or the end */
+    if (token->type == T_ERROR || token->type == T_END)
+        return;
+
+    /* Found a known token, advance index and return */
+    if (token->type != T_UNKNOWN) {
+        json->index++;
+        return;
+    }
+
+    ch = json->data[json->index];
+
+    /* Process characters which triggered T_UNKNOWN */
+    if (ch == '"') {
+        json_next_string_token(json, token);
+        return;
+    } else if (ch == '-' || ('0' <= ch && ch <= '9')) {
+        json_next_number_token(json, token);
+        return;
+    } else if (!strncmp(&json->data[json->index], "true", 4)) {
+        token->type = T_BOOLEAN;
+        token->value.boolean = 1;
+        json->index += 4;
+        return;
+    } else if (!strncmp(&json->data[json->index], "false", 5)) {
+        token->type = T_BOOLEAN;
+        token->value.boolean = 0;
+        json->index += 5;
+        return;
+    } else if (!strncmp(&json->data[json->index], "null", 4)) {
+        token->type = T_NULL;
+        json->index += 4;
+        return;
+    }
+
+    token->type = T_ERROR;
+}
+
+/* This function does not return.
+ * DO NOT CALL WITH DYNAMIC MEMORY ALLOCATED.
+ * The only allowed exception is the temporary parser string
+ * json->tmp struct.
+ * json and token should exist on the stack somewhere.
+ * luaL_error() will long_jmp and release the stack */
+static void json_throw_parse_error(lua_State *l, json_parse_t *json,
+                                   const char *exp, json_token_t *token)
+{
+    strbuf_free(json->tmp);
+    luaL_error(l, "Expected %s but found type <%s> at character %d",
+               exp, json_token_type_name[token->type], token->index);
+}
+
+static void json_parse_object_context(lua_State *l, json_parse_t *json)
+{
+    json_token_t token;
+
+    lua_newtable(l);
+
+    json_next_token(json, &token);
+
+    /* Handle empty objects */
+    if (token.type == T_OBJ_END)
+        return;
+
+    while (1) {
+        if (token.type != T_STRING)
+            json_throw_parse_error(l, json, "object key", &token);
+
+        lua_pushlstring(l, token.value.string, token.length);     /* Push key */
+
+        json_next_token(json, &token);
+        if (token.type != T_COLON)
+            json_throw_parse_error(l, json, "colon", &token);
+
+        json_next_token(json, &token);
+        json_process_value(l, json, &token);
+        lua_rawset(l, -3);            /* Set key = value */
+
+        json_next_token(json, &token);
+
+        if (token.type == T_OBJ_END)
+            return;
+
+        if (token.type != T_COMMA)
+            json_throw_parse_error(l, json, "comma or object end", &token);
+
+        json_next_token(json, &token);
+    } while (1);
+
+}
+
+/* Handle the array context */
+static void json_parse_array_context(lua_State *l, json_parse_t *json)
+{
+    json_token_t token;
+    int i;
+
+    lua_newtable(l);
+
+    json_next_token(json, &token);
+
+    /* Handle empty arrays */
+    if (token.type == T_ARR_END)
+        return;
+
+    i = 1;
+    while (1) {
+        json_process_value(l, json, &token);
+        lua_rawseti(l, -2, i);            /* arr[i] = value */
+
+        json_next_token(json, &token);
+
+        if (token.type == T_ARR_END)
+            return;
+
+        if (token.type != T_COMMA)
+            json_throw_parse_error(l, json, "comma or array end", &token);
+
+        json_next_token(json, &token);
+        i++;
+    }
+}
+
+/* Handle the "value" context */
+static void json_process_value(lua_State *l, json_parse_t *json, json_token_t *token) 
+{
+    switch (token->type) {
+    case T_STRING:
+        lua_pushlstring(l, token->value.string, token->length);
+        break;;
+    case T_NUMBER:
+        lua_pushnumber(l, token->value.number);
+        break;;
+    case T_BOOLEAN:
+        lua_pushboolean(l, token->value.boolean);
+        break;;
+    case T_OBJ_BEGIN:
+        json_parse_object_context(l, json);
+        break;;
+    case T_ARR_BEGIN:
+        json_parse_array_context(l, json);
+        break;;
+    case T_NULL:
+        lua_pushnil(l);
+        break;;
+    default:
+        json_throw_parse_error(l, json, "value", token);
+    }
+}
+
+/* json_text must be null terminated string */
+void json_parse(lua_State *l, const char *json_text)
+{
+    json_parse_t json;
+    json_token_t token;
+
+    json.data = json_text;
+    json.index = 0;
+    json.tmp = strbuf_new();
+    json.tmp->scale = 256;
+
+    json_next_token(&json, &token);
+    json_process_value(l, &json, &token);
+
+    /* Ensure there is no more input left */
+    json_next_token(&json, &token);
+
+    if (token.type != T_END)
+        json_throw_parse_error(l, &json, "the end", &token);
+
+    strbuf_free(json.tmp);
+}
+
+int lua_json_decode(lua_State *l)
+{
+    int i, n;
+
+    n = lua_gettop(l);
+
+    for (i = 1; i <= n; i++) {
+        if (lua_isstring(l, i)) {
+            json_parse(l, lua_tostring(l, i));
+        } else {
+            lua_pushnil(l);
+        }
+    }
+
+    return n;   /* Number of results */
+}
+
+/* vi:ai et sw=4 ts=4:
+ */
diff --git a/lua_json_encode.c b/lua_json_encode.c
new file mode 100644
index 0000000..201f769
--- /dev/null
+++ b/lua_json_encode.c
@@ -0,0 +1,256 @@
+/*
+ * Lua JSON routines
+ *
+ * CAVEATS:
+ * - JSON "null" handling:
+ *   - Decoding a "null" in an array will leave a "nil" placeholder in Lua, but will not show up at the end of the array.
+ *   - Decoding a "null" in an object will ensure that particular key is deleted in the Lua table.
+ */
+
+#include <string.h>
+#include <math.h>
+
+#include <lua.h>
+#include <lauxlib.h>
+#include <json/json.h>
+
+#include "lua_json.h"
+#include "utils.h"
+#include "str.h"
+
+/* FIXME:
+ * - Don't just pushnil on error and return?
+ * - Review all strbuf usage for NULL termination
+ */
+
+/* JSON escape a character if required, or return NULL */
+static inline char *json_escape_char(int c)
+{
+    switch(c) {
+    case 0:
+        return "\\u0000";
+    case '\\':
+        return "\\\\";
+    case '"':
+        return "\\\"";
+    case '\b':
+        return "\\b";
+    case '\t':
+        return "\\t";
+    case '\n':
+        return "\\n";
+    case '\f':
+        return "\\f";
+    case '\r':
+        return "\\r";
+    }
+
+    return NULL;
+}
+
+/* FIXME:
+ * - Use lua_checklstring() instead of lua_tolstring() ?*
+ */
+
+/* FIXME:
+ * - Option to encode non-printable characters? Only \" \\ are required
+ * - Unicode?
+ * - Improve performance?
+ */
+static void json_append_string(lua_State *l, struct str *json, int index)
+{
+    char *p;
+    int i;
+    const char *str;
+    size_t len;
+
+    str = lua_tolstring(l, index, &len);
+
+    strbuf_append_char(json, '\"');
+    for (i = 0; i < len; i++) {
+        p = json_escape_char(str[i]);
+        if (p)
+            strbuf_append_mem(json, p, strlen(p));
+        else
+            strbuf_append_char(json, str[i]);
+    }
+    strbuf_append_char(json, '\"');
+}
+
+/* Find the size of the array on the top of the Lua stack
+ * -1   object
+ * >=0  elements in array
+ */
+static int lua_array_length(lua_State *l)
+{
+    double k;
+    int max;
+
+    max = 0;
+
+    lua_pushnil(l);
+    /* table, startkey */
+    while (lua_next(l, -2) != 0) {
+        /* table, key, value */
+        if ((k = lua_tonumber(l, -2))) {
+            /* Integer >= 1 ? */
+            if (floor(k) == k && k >= 1) {
+                if (k > max)
+                    max = k;
+                lua_pop(l, 1);
+                continue;
+            }
+        }
+
+        /* Must not be an array (non integer key) */
+        lua_pop(l, 2);
+        return -1;
+    }
+
+    return max;
+}
+
+static void json_append_data(lua_State *l, struct str *s);
+
+static void json_append_array(lua_State *l, struct str *s, int size)
+{
+    int comma, i;
+
+    strbuf_append_mem(s, "[ ", 2);
+
+    comma = 0;
+    for (i = 1; i <= size; i++) {
+        if (comma)
+            strbuf_append_mem(s, ", ", 2);
+        else
+            comma = 1;
+
+        lua_rawgeti(l, -1, i);
+        json_append_data(l, s);
+        lua_pop(l, 1);
+    }
+
+    strbuf_append_mem(s, " ]", 2);
+}
+
+static void json_append_object(lua_State *l, struct str *s)
+{
+    int comma, keytype;
+
+    /* Object */
+    strbuf_append_mem(s, "{ ", 2);
+
+    lua_pushnil(l);
+    /* table, startkey */
+    comma = 0;
+    while (lua_next(l, -2) != 0) {
+        if (comma)
+            strbuf_append_mem(s, ", ", 2);
+        else
+            comma = 1;
+
+        /* table, key, value */
+        keytype = lua_type(l, -2);
+        if (keytype == LUA_TNUMBER) {
+            strbuf_append(s, "\"" LUA_NUMBER_FMT "\": ", lua_tonumber(l, -2));
+        } else if (keytype == LUA_TSTRING) {
+            json_append_string(l, s, -2);
+            strbuf_append_mem(s, ": ", 2);
+        } else {
+            die("Cannot serialise table key %s", lua_typename(l, lua_type(l, -2)));
+        }
+
+        /* table, key, value */
+        json_append_data(l, s);
+        lua_pop(l, 1);
+        /* table, key */
+    }
+
+    strbuf_append_mem(s, " }", 2);
+}
+
+/* Serialise Lua data into JSON string.
+ *
+ * FIXME:
+ * - Error handling when cannot serialise key or value (return to script)
+ */
+static void json_append_data(lua_State *l, struct str *s)
+{
+    int len;
+
+    switch (lua_type(l, -1)) {
+    case LUA_TSTRING:
+        json_append_string(l, s, -1);
+        break;
+    case LUA_TNUMBER:
+        strbuf_append(s, "%lf", lua_tonumber(l, -1));
+        break;
+    case LUA_TBOOLEAN:
+        if (lua_toboolean(l, -1))
+            strbuf_append_mem(s, "true", 4);
+        else
+            strbuf_append_mem(s, "false", 5);
+        break;
+    case LUA_TTABLE:
+        len = lua_array_length(l);
+        if (len >= 0)
+            json_append_array(l, s, len);
+        else
+            json_append_object(l, s);
+        break;
+    case LUA_TNIL:
+        strbuf_append_mem(s, "null", 4);
+        break;
+    default:
+        /* Remaining types (LUA_TFUNCTION, LUA_TUSERDATA, LUA_TTHREAD, and LUA_TLIGHTUSERDATA)
+         * cannot be serialised */
+        /* FIXME: return error */
+        die("Cannot serialise %s", lua_typename(l, lua_type(l, -1)));
+    }
+}
+
+char *lua_to_json(lua_State *l, int *len)
+{
+    struct str *s;
+    char *data;
+
+    s = strbuf_new();
+    strbuf_set_increment(s, 256);
+    json_append_data(l, s);
+    data = strbuf_to_char(s, len);
+
+    return data;
+}
+
+int lua_json_encode(lua_State *l)
+{
+    char *json;
+    int len;
+
+    json = lua_to_json(l, &len);
+    lua_pushlstring(l, json, len);
+    free(json);
+
+    return 1;
+}
+
+void lua_json_init(lua_State *l)
+{
+    luaL_Reg reg[] = {
+        { "encode", lua_json_encode },
+        { "decode", lua_json_decode },
+        { NULL, NULL }
+    };
+
+    /* Create "db" table.
+     * Added functions as table entries
+     */
+
+    luaL_register(l, "json", reg);
+
+    /* FIXME: Debugging */
+    json_init_lookup_tables();
+}
+
+/* vi:ai et sw=4 ts=4:
+ */
diff --git a/rfc4627.txt b/rfc4627.txt
new file mode 100644
index 0000000..67b8909
--- /dev/null
+++ b/rfc4627.txt
@@ -0,0 +1,563 @@
+
+
+
+
+
+
+Network Working Group                                       D. Crockford
+Request for Comments: 4627                                      JSON.org
+Category: Informational                                        July 2006
+
+
+ The application/json Media Type for JavaScript Object Notation (JSON)
+
+Status of This Memo
+
+   This memo provides information for the Internet community.  It does
+   not specify an Internet standard of any kind.  Distribution of this
+   memo is unlimited.
+
+Copyright Notice
+
+   Copyright (C) The Internet Society (2006).
+
+Abstract
+
+   JavaScript Object Notation (JSON) is a lightweight, text-based,
+   language-independent data interchange format.  It was derived from
+   the ECMAScript Programming Language Standard.  JSON defines a small
+   set of formatting rules for the portable representation of structured
+   data.
+
+1.  Introduction
+
+   JavaScript Object Notation (JSON) is a text format for the
+   serialization of structured data.  It is derived from the object
+   literals of JavaScript, as defined in the ECMAScript Programming
+   Language Standard, Third Edition [ECMA].
+
+   JSON can represent four primitive types (strings, numbers, booleans,
+   and null) and two structured types (objects and arrays).
+
+   A string is a sequence of zero or more Unicode characters [UNICODE].
+
+   An object is an unordered collection of zero or more name/value
+   pairs, where a name is a string and a value is a string, number,
+   boolean, null, object, or array.
+
+   An array is an ordered sequence of zero or more values.
+
+   The terms "object" and "array" come from the conventions of
+   JavaScript.
+
+   JSON's design goals were for it to be minimal, portable, textual, and
+   a subset of JavaScript.
+
+
+
+Crockford                    Informational                      [Page 1]
+
+RFC 4627                          JSON                         July 2006
+
+
+1.1.  Conventions Used in This Document
+
+   The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT",
+   "SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this
+   document are to be interpreted as described in [RFC2119].
+
+   The grammatical rules in this document are to be interpreted as
+   described in [RFC4234].
+
+2.  JSON Grammar
+
+   A JSON text is a sequence of tokens.  The set of tokens includes six
+   structural characters, strings, numbers, and three literal names.
+
+   A JSON text is a serialized object or array.
+
+      JSON-text = object / array
+
+   These are the six structural characters:
+
+      begin-array     = ws %x5B ws  ; [ left square bracket
+
+      begin-object    = ws %x7B ws  ; { left curly bracket
+
+      end-array       = ws %x5D ws  ; ] right square bracket
+
+      end-object      = ws %x7D ws  ; } right curly bracket
+
+      name-separator  = ws %x3A ws  ; : colon
+
+      value-separator = ws %x2C ws  ; , comma
+
+   Insignificant whitespace is allowed before or after any of the six
+   structural characters.
+
+      ws = *(
+                %x20 /              ; Space
+                %x09 /              ; Horizontal tab
+                %x0A /              ; Line feed or New line
+                %x0D                ; Carriage return
+            )
+
+2.1.  Values
+
+   A JSON value MUST be an object, array, number, or string, or one of
+   the following three literal names:
+
+      false null true
+
+
+
+Crockford                    Informational                      [Page 2]
+
+RFC 4627                          JSON                         July 2006
+
+
+   The literal names MUST be lowercase.  No other literal names are
+   allowed.
+
+         value = false / null / true / object / array / number / string
+
+         false = %x66.61.6c.73.65   ; false
+
+         null  = %x6e.75.6c.6c      ; null
+
+         true  = %x74.72.75.65      ; true
+
+2.2.  Objects
+
+   An object structure is represented as a pair of curly brackets
+   surrounding zero or more name/value pairs (or members).  A name is a
+   string.  A single colon comes after each name, separating the name
+   from the value.  A single comma separates a value from a following
+   name.  The names within an object SHOULD be unique.
+
+      object = begin-object [ member *( value-separator member ) ]
+      end-object
+
+      member = string name-separator value
+
+2.3.  Arrays
+
+   An array structure is represented as square brackets surrounding zero
+   or more values (or elements).  Elements are separated by commas.
+
+      array = begin-array [ value *( value-separator value ) ] end-array
+
+2.4.  Numbers
+
+   The representation of numbers is similar to that used in most
+   programming languages.  A number contains an integer component that
+   may be prefixed with an optional minus sign, which may be followed by
+   a fraction part and/or an exponent part.
+
+   Octal and hex forms are not allowed.  Leading zeros are not allowed.
+
+   A fraction part is a decimal point followed by one or more digits.
+
+   An exponent part begins with the letter E in upper or lowercase,
+   which may be followed by a plus or minus sign.  The E and optional
+   sign are followed by one or more digits.
+
+   Numeric values that cannot be represented as sequences of digits
+   (such as Infinity and NaN) are not permitted.
+
+
+
+Crockford                    Informational                      [Page 3]
+
+RFC 4627                          JSON                         July 2006
+
+
+         number = [ minus ] int [ frac ] [ exp ]
+
+         decimal-point = %x2E       ; .
+
+         digit1-9 = %x31-39         ; 1-9
+
+         e = %x65 / %x45            ; e E
+
+         exp = e [ minus / plus ] 1*DIGIT
+
+         frac = decimal-point 1*DIGIT
+
+         int = zero / ( digit1-9 *DIGIT )
+
+         minus = %x2D               ; -
+
+         plus = %x2B                ; +
+
+         zero = %x30                ; 0
+
+2.5.  Strings
+
+   The representation of strings is similar to conventions used in the C
+   family of programming languages.  A string begins and ends with
+   quotation marks.  All Unicode characters may be placed within the
+   quotation marks except for the characters that must be escaped:
+   quotation mark, reverse solidus, and the control characters (U+0000
+   through U+001F).
+
+   Any character may be escaped.  If the character is in the Basic
+   Multilingual Plane (U+0000 through U+FFFF), then it may be
+   represented as a six-character sequence: a reverse solidus, followed
+   by the lowercase letter u, followed by four hexadecimal digits that
+   encode the character's code point.  The hexadecimal letters A though
+   F can be upper or lowercase.  So, for example, a string containing
+   only a single reverse solidus character may be represented as
+   "\u005C".
+
+   Alternatively, there are two-character sequence escape
+   representations of some popular characters.  So, for example, a
+   string containing only a single reverse solidus character may be
+   represented more compactly as "\\".
+
+   To escape an extended character that is not in the Basic Multilingual
+   Plane, the character is represented as a twelve-character sequence,
+   encoding the UTF-16 surrogate pair.  So, for example, a string
+   containing only the G clef character (U+1D11E) may be represented as
+   "\uD834\uDD1E".
+
+
+
+Crockford                    Informational                      [Page 4]
+
+RFC 4627                          JSON                         July 2006
+
+
+         string = quotation-mark *char quotation-mark
+
+         char = unescaped /
+                escape (
+                    %x22 /          ; "    quotation mark  U+0022
+                    %x5C /          ; \    reverse solidus U+005C
+                    %x2F /          ; /    solidus         U+002F
+                    %x62 /          ; b    backspace       U+0008
+                    %x66 /          ; f    form feed       U+000C
+                    %x6E /          ; n    line feed       U+000A
+                    %x72 /          ; r    carriage return U+000D
+                    %x74 /          ; t    tab             U+0009
+                    %x75 4HEXDIG )  ; uXXXX                U+XXXX
+
+         escape = %x5C              ; \
+
+         quotation-mark = %x22      ; "
+
+         unescaped = %x20-21 / %x23-5B / %x5D-10FFFF
+
+3.  Encoding
+
+   JSON text SHALL be encoded in Unicode.  The default encoding is
+   UTF-8.
+
+   Since the first two characters of a JSON text will always be ASCII
+   characters [RFC0020], it is possible to determine whether an octet
+   stream is UTF-8, UTF-16 (BE or LE), or UTF-32 (BE or LE) by looking
+   at the pattern of nulls in the first four octets.
+
+           00 00 00 xx  UTF-32BE
+           00 xx 00 xx  UTF-16BE
+           xx 00 00 00  UTF-32LE
+           xx 00 xx 00  UTF-16LE
+           xx xx xx xx  UTF-8
+
+4.  Parsers
+
+   A JSON parser transforms a JSON text into another representation.  A
+   JSON parser MUST accept all texts that conform to the JSON grammar.
+   A JSON parser MAY accept non-JSON forms or extensions.
+
+   An implementation may set limits on the size of texts that it
+   accepts.  An implementation may set limits on the maximum depth of
+   nesting.  An implementation may set limits on the range of numbers.
+   An implementation may set limits on the length and character contents
+   of strings.
+
+
+
+
+Crockford                    Informational                      [Page 5]
+
+RFC 4627                          JSON                         July 2006
+
+
+5. Generators
+
+   A JSON generator produces JSON text.  The resulting text MUST
+   strictly conform to the JSON grammar.
+
+6. IANA Considerations
+
+   The MIME media type for JSON text is application/json.
+
+   Type name: application
+
+   Subtype name: json
+
+   Required parameters: n/a
+
+   Optional parameters: n/a
+
+   Encoding considerations: 8bit if UTF-8; binary if UTF-16 or UTF-32
+
+      JSON may be represented using UTF-8, UTF-16, or UTF-32.  When JSON
+      is written in UTF-8, JSON is 8bit compatible.  When JSON is
+      written in UTF-16 or UTF-32, the binary content-transfer-encoding
+      must be used.
+
+   Security considerations:
+
+   Generally there are security issues with scripting languages.  JSON
+   is a subset of JavaScript, but it is a safe subset that excludes
+   assignment and invocation.
+
+   A JSON text can be safely passed into JavaScript's eval() function
+   (which compiles and executes a string) if all the characters not
+   enclosed in strings are in the set of characters that form JSON
+   tokens.  This can be quickly determined in JavaScript with two
+   regular expressions and calls to the test and replace methods.
+
+      var my_JSON_object = !(/[^,:{}\[\]0-9.\-+Eaeflnr-u \n\r\t]/.test(
+             text.replace(/"(\\.|[^"\\])*"/g, ''))) &&
+         eval('(' + text + ')');
+
+   Interoperability considerations: n/a
+
+   Published specification: RFC 4627
+
+
+
+
+
+
+
+
+Crockford                    Informational                      [Page 6]
+
+RFC 4627                          JSON                         July 2006
+
+
+   Applications that use this media type:
+
+      JSON has been used to exchange data between applications written
+      in all of these programming languages: ActionScript, C, C#,
+      ColdFusion, Common Lisp, E, Erlang, Java, JavaScript, Lua,
+      Objective CAML, Perl, PHP, Python, Rebol, Ruby, and Scheme.
+
+   Additional information:
+
+      Magic number(s): n/a
+      File extension(s): .json
+      Macintosh file type code(s): TEXT
+
+   Person & email address to contact for further information:
+      Douglas Crockford
+      douglas@crockford.com
+
+   Intended usage: COMMON
+
+   Restrictions on usage: none
+
+   Author:
+      Douglas Crockford
+      douglas@crockford.com
+
+   Change controller:
+      Douglas Crockford
+      douglas@crockford.com
+
+7. Security Considerations
+
+   See Security Considerations in Section 6.
+
+8. Examples
+
+   This is a JSON object:
+
+   {
+      "Image": {
+          "Width":  800,
+          "Height": 600,
+          "Title":  "View from 15th Floor",
+          "Thumbnail": {
+              "Url":    "http://www.example.com/image/481989943",
+              "Height": 125,
+              "Width":  "100"
+          },
+          "IDs": [116, 943, 234, 38793]
+
+
+
+Crockford                    Informational                      [Page 7]
+
+RFC 4627                          JSON                         July 2006
+
+
+        }
+   }
+
+   Its Image member is an object whose Thumbnail member is an object
+   and whose IDs member is an array of numbers.
+
+   This is a JSON array containing two objects:
+
+   [
+      {
+         "precision": "zip",
+         "Latitude":  37.7668,
+         "Longitude": -122.3959,
+         "Address":   "",
+         "City":      "SAN FRANCISCO",
+         "State":     "CA",
+         "Zip":       "94107",
+         "Country":   "US"
+      },
+      {
+         "precision": "zip",
+         "Latitude":  37.371991,
+         "Longitude": -122.026020,
+         "Address":   "",
+         "City":      "SUNNYVALE",
+         "State":     "CA",
+         "Zip":       "94085",
+         "Country":   "US"
+      }
+   ]
+
+9. References
+
+9.1.  Normative References
+
+   [ECMA]    European Computer Manufacturers Association, "ECMAScript
+             Language Specification 3rd Edition", December 1999,
+             <http://www.ecma-international.org/publications/files/
+             ecma-st/ECMA-262.pdf>.
+
+   [RFC0020] Cerf, V., "ASCII format for network interchange", RFC 20,
+             October 1969.
+
+   [RFC2119] Bradner, S., "Key words for use in RFCs to Indicate
+             Requirement Levels", BCP 14, RFC 2119, March 1997.
+
+   [RFC4234] Crocker, D. and P.  Overell, "Augmented BNF for Syntax
+             Specifications: ABNF", RFC 4234, October 2005.
+
+
+
+Crockford                    Informational                      [Page 8]
+
+RFC 4627                          JSON                         July 2006
+
+
+   [UNICODE] The Unicode Consortium, "The Unicode Standard Version 4.0",
+             2003, <http://www.unicode.org/versions/Unicode4.1.0/>.
+
+Author's Address
+
+   Douglas Crockford
+   JSON.org
+   EMail: douglas@crockford.com
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Crockford                    Informational                      [Page 9]
+
+RFC 4627                          JSON                         July 2006
+
+
+Full Copyright Statement
+
+   Copyright (C) The Internet Society (2006).
+
+   This document is subject to the rights, licenses and restrictions
+   contained in BCP 78, and except as set forth therein, the authors
+   retain all their rights.
+
+   This document and the information contained herein are provided on an
+   "AS IS" basis and THE CONTRIBUTOR, THE ORGANIZATION HE/SHE REPRESENTS
+   OR IS SPONSORED BY (IF ANY), THE INTERNET SOCIETY AND THE INTERNET
+   ENGINEERING TASK FORCE DISCLAIM ALL WARRANTIES, EXPRESS OR IMPLIED,
+   INCLUDING BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE
+   INFORMATION HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED
+   WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
+
+Intellectual Property
+
+   The IETF takes no position regarding the validity or scope of any
+   Intellectual Property Rights or other rights that might be claimed to
+   pertain to the implementation or use of the technology described in
+   this document or the extent to which any license under such rights
+   might or might not be available; nor does it represent that it has
+   made any independent effort to identify any such rights.  Information
+   on the procedures with respect to rights in RFC documents can be
+   found in BCP 78 and BCP 79.
+
+   Copies of IPR disclosures made to the IETF Secretariat and any
+   assurances of licenses to be made available, or the result of an
+   attempt made to obtain a general license or permission for the use of
+   such proprietary rights by implementers or users of this
+   specification can be obtained from the IETF on-line IPR repository at
+   http://www.ietf.org/ipr.
+
+   The IETF invites any interested party to bring to its attention any
+   copyrights, patents or patent applications, or other proprietary
+   rights that may cover technology that may be required to implement
+   this standard.  Please address the information to the IETF at
+   ietf-ipr@ietf.org.
+
+Acknowledgement
+
+   Funding for the RFC Editor function is provided by the IETF
+   Administrative Support Activity (IASA).
+
+
+
+
+
+
+
+Crockford                    Informational                     [Page 10]
+
diff --git a/strbuf.c b/strbuf.c
new file mode 100644
index 0000000..f823884
--- /dev/null
+++ b/strbuf.c
@@ -0,0 +1,130 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <string.h>
+
+#include "strbuf.h"
+
+static void die(const char *format, ...)
+{
+    va_list arg;
+
+    va_start(arg, format);
+    vfprintf(stderr, format, arg);
+    va_end(arg);
+
+    exit(-1);
+}
+
+void strbuf_init(strbuf_t *s)
+{
+    s->data = NULL;
+    s->size = 0;
+    s->length = 0;
+    s->increment = STRBUF_DEFAULT_INCREMENT;
+}
+
+strbuf_t *strbuf_new()
+{
+    strbuf_t *s;
+
+    s = malloc(sizeof(strbuf_t));
+    if (!s)
+        die("Out of memory");
+
+    strbuf_init(s);
+
+    return s;
+}
+
+void strbuf_set_increment(strbuf_t *s, int increment)
+{
+    if (increment <= 0)
+        die("BUG: Invalid string increment");
+
+    s->increment = increment;
+}
+
+void strbuf_free(strbuf_t *s)
+{
+    if (s->data)
+        free(s->data);
+    free(s);
+}
+
+char *strbuf_to_char(strbuf_t *s, int *len)
+{
+    char *data;
+
+    data = s->data;
+    if (len)
+        *len = s->length;
+
+    free(s);
+
+    return data;
+}
+
+/* Ensure strbuf can handle a string length bytes long (ignoring NULL
+ * optional termination). */
+void strbuf_resize(strbuf_t *s, int len)
+{
+    int newsize;
+
+    /* Esnure there is room for optional NULL termination */
+    newsize = len + 1;
+    /* Round up to the next increment */
+    newsize = ((newsize + s->increment - 1) / s->increment) * s->increment;
+    s->size = newsize;
+    s->data = realloc(s->data, s->size);
+    if (!s->data)
+        die("Out of memory");
+}
+
+void strbuf_append_mem(strbuf_t *s, const char *c, int len)
+{
+    if (len > strbuf_emptylen(s))
+        strbuf_resize(s, s->length + len);
+
+    memcpy(s->data + s->length, c, len);
+    s->length += len;
+}
+
+void strbuf_ensure_null(strbuf_t *s)
+{
+    s->data[s->length] = 0;
+}
+
+void strbuf_append_fmt(strbuf_t *s, const char *fmt, ...)
+{
+    va_list arg;
+    int fmt_len, try;
+    int empty_len;
+
+    /* If the first attempt to append fails, resize the buffer appropriately
+     * and try again */
+    for (try = 0; ; try++) {
+        va_start(arg, fmt);
+        /* Append the new formatted string */
+        /* fmt_len is the length of the string required, excluding the
+         * trailing NULL */
+        empty_len = strbuf_emptylen(s);
+        /* Add 1 since there is also space for the terminating NULL.
+         * If the string hasn't been allocated then empty_len == -1,
+         * and vsprintf() won't store anything on the first pass */
+        fmt_len = vsnprintf(s->data + s->length, empty_len + 1, fmt, arg);
+        va_end(arg);
+
+        if (fmt_len <= empty_len)
+            break;  /* SUCCESS */
+        if (try > 0)
+            die("BUG: length of formatted string changed");
+
+        strbuf_resize(s, s->length + fmt_len);
+    }
+
+    s->length += fmt_len;
+}
+
+/* vi:ai et sw=4 ts=4:
+ */
diff --git a/strbuf.h b/strbuf.h
new file mode 100644
index 0000000..fb07e6f
--- /dev/null
+++ b/strbuf.h
@@ -0,0 +1,48 @@
+#include <stdlib.h>
+#include <stdarg.h>
+
+typedef struct {
+    char *data;
+    int size;   /* Bytes allocated */
+    int length; /* Current length of string, not including NULL */
+    int increment;  /* Allocation Increments */
+} strbuf_t;
+
+#ifndef STRBUF_DEFAULT_INCREMENT
+#define STRBUF_DEFAULT_INCREMENT 8
+#endif
+
+extern void strbuf_init(strbuf_t *s);
+extern strbuf_t *strbuf_new();
+extern void strbuf_free(strbuf_t *s);
+extern char *strbuf_to_char(strbuf_t *s, int *len);
+
+extern void strbuf_set_increment(strbuf_t *s, int increment);
+extern void strbuf_resize(strbuf_t *s, int len);
+extern void strbuf_append_fmt(strbuf_t *s, const char *format, ...);
+extern void strbuf_append_mem(strbuf_t *s, const char *c, int len);
+extern void strbuf_ensure_null(strbuf_t *s);
+
+/* Return bytes remaining in the string buffer
+ * Ensure there is space for a NULL.
+ * Returns -1 if the string has not been allocated yet */
+static inline int strbuf_emptylen(strbuf_t *s)
+{
+    return s->size - s->length - 1;
+}
+
+static inline int strbuf_length(strbuf_t *s)
+{
+    return s->length;
+}
+
+static inline void strbuf_append_char(strbuf_t *s, const char c)
+{
+    if (strbuf_emptylen(s) < 1)
+        strbuf_resize(s, s->length + 1);
+
+    s->data[s->length++] = c;
+}
+
+/* vi:ai et sw=4 ts=4:
+ */
-- 
cgit v1.2.3-55-g6feb