From bbf1f5d35e8312fb7373a997664309adf9527af4 Mon Sep 17 00:00:00 2001 From: Mark Pulford Date: Fri, 15 Apr 2011 20:58:53 +0930 Subject: Initial commit Split Lua JSON from parent project to create standalone module. Remove unnecesssary files from new repo. --- lua_json_decode.c | 405 +++++++++++++++++++++++++++++++++++++++ lua_json_encode.c | 256 +++++++++++++++++++++++++ rfc4627.txt | 563 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ strbuf.c | 130 +++++++++++++ strbuf.h | 48 +++++ 5 files changed, 1402 insertions(+) create mode 100644 lua_json_decode.c create mode 100644 lua_json_encode.c create mode 100644 rfc4627.txt create mode 100644 strbuf.c create mode 100644 strbuf.h diff --git a/lua_json_decode.c b/lua_json_decode.c new file mode 100644 index 0000000..ae35574 --- /dev/null +++ b/lua_json_decode.c @@ -0,0 +1,405 @@ +#include +#include +#include +#include +#include "strbuf.h" + +/* Caveats: + * - NULL values do not work in objects (unssuported by Lua tables). + * - Could use a secial "null" table object, that is unique + * - NULL values work in arrays (probably not at the end) + */ + +/* FIXME: + * - Ensure JSON data is UTF-8. Fail otherwise. + * - Alternatively, dynamically support Unicode in JSON string. Return current locale. + * - Use lua_checkstack() to ensure there is enough stack space left to + * fulfill an operation. What happens if we don't, is that acceptible too? + * Does lua_checkstack grow the stack, or merely check if it is possible? + * - Merge encode/decode files + */ + +typedef struct { + const char *data; + int index; + strbuf_t *tmp; /* Temporary storage for strings */ +} json_parse_t; + +typedef enum { + T_OBJ_BEGIN, + T_OBJ_END, + T_ARR_BEGIN, + T_ARR_END, + T_STRING, + T_NUMBER, + T_BOOLEAN, + T_NULL, + T_COLON, + T_COMMA, + T_END, + T_WHITESPACE, + T_ERROR, + T_UNKNOWN +} json_token_type_t; + +static const char *json_token_type_name[] = { + "T_OBJ_BEGIN", + "T_OBJ_END", + "T_ARR_BEGIN", + "T_ARR_END", + "T_STRING", + "T_NUMBER", + "T_BOOLEAN", + "T_NULL", + "T_COLON", + "T_COMMA", + "T_END", + "T_WHITESPACE", + "T_ERROR", + "T_UNKNOWN", + NULL +}; + +typedef struct { + json_token_type_t type; + int index; + union { + char *string; + double number; + int boolean; + } value; + int length; /* FIXME: Merge into union? Won't save memory, but more logical */ +} json_token_t; + +static void json_process_value(lua_State *l, json_parse_t *json, json_token_t *token); + +static json_token_type_t json_ch2token[256]; +static char json_ch2escape[256]; + +void json_init_lookup_tables() +{ + int i; + + /* Tag all characters as an error */ + for (i = 0; i < 256; i++) + json_ch2token[i] = T_ERROR; + + /* Set tokens that require no further processing */ + json_ch2token['{'] = T_OBJ_BEGIN; + json_ch2token['}'] = T_OBJ_END; + json_ch2token['['] = T_ARR_BEGIN; + json_ch2token[']'] = T_ARR_END; + json_ch2token[','] = T_COMMA; + json_ch2token[':'] = T_COLON; + json_ch2token['\0'] = T_END; + json_ch2token[' '] = T_WHITESPACE; + json_ch2token['\t'] = T_WHITESPACE; + json_ch2token['\n'] = T_WHITESPACE; + json_ch2token['\r'] = T_WHITESPACE; + + /* Update characters that require further processing */ + json_ch2token['n'] = T_UNKNOWN; + json_ch2token['t'] = T_UNKNOWN; + json_ch2token['f'] = T_UNKNOWN; + json_ch2token['"'] = T_UNKNOWN; + json_ch2token['-'] = T_UNKNOWN; + for (i = 0; i < 10; i++) + json_ch2token['0' + i] = T_UNKNOWN; + + for (i = 0; i < 256; i++) + json_ch2escape[i] = 0; /* String error */ + + json_ch2escape['"'] = '"'; + json_ch2escape['\\'] = '\\'; + json_ch2escape['/'] = '/'; + json_ch2escape['b'] = '\b'; + json_ch2escape['t'] = '\t'; + json_ch2escape['n'] = '\n'; + json_ch2escape['f'] = '\f'; + json_ch2escape['r'] = '\r'; + json_ch2escape['u'] = 'u'; /* This needs to be parsed as unicode */ +} + +static void json_next_string_token(json_parse_t *json, json_token_t *token) +{ + char ch; + + /* Caller must ensure a string is next */ + assert(json->data[json->index] == '"'); + + /* Gobble string. FIXME, ugly */ + + json->tmp->length = 0; + while ((ch = json->data[++json->index]) != '"') { + /* Handle escapes */ + if (ch == '\\') { + /* Translate escape code */ + ch = json_ch2escape[(unsigned char)json->data[++json->index]]; + if (!ch) { + /* Invalid escape code */ + token->type = T_ERROR; + return; + } + if (ch == 'u') { + /* Process unicode */ + /* FIXME: cleanup memory handling. Implement iconv(3) + * conversion from UCS-2 -> UTF-8 + */ + if (!memcmp(&json->data[json->index], "u0000", 5)) { + /* Handle NULL */ + ch = 0; + json->index += 4; + } else { + /* Remaining codepoints unhandled */ + token->type = T_ERROR; + return; + } + } + } + strbuf_append_char(json->tmp, ch); + } + json->index++; /* Eat final quote (") */ + + strbuf_ensure_null(json->tmp); + + token->type = T_STRING; + token->value.string = json->tmp->data; + token->length = json->tmp->length; +} + +static void json_next_number_token(json_parse_t *json, json_token_t *token) +{ + const char *startptr; + char *endptr; + + /* FIXME: + * Verify that the number takes the following form: + * -?(0|[1-9]|[1-9][0-9]+)(.[0-9]+)?([eE][-+]?[0-9]+)? + * strtod() below allows other forms (Hex, infinity, NaN,..) */ + /* i = json->index; + if (json->data[i] == '-') + i++; + j = i; + while ('0' <= json->data[i] && json->data[i] <= '9') + i++; + if (i == j) + return T_ERROR; */ + + token->type = T_NUMBER; + startptr = &json->data[json->index]; + token->value.number = strtod(&json->data[json->index], &endptr); + if (startptr == endptr) + token->type = T_ERROR; + else + json->index += endptr - startptr; /* Skip the processed number */ + + return; +} + +/* Fills in the token struct. + * T_STRING will return a pointer to the json_parse_t temporary string + * T_ERROR will leave the json->index pointer at the error. + */ +static void json_next_token(json_parse_t *json, json_token_t *token) +{ + int ch; + + /* Eat whitespace. FIXME: UGLY */ + token->type = json_ch2token[(unsigned char)json->data[json->index]]; + while (token->type == T_WHITESPACE) + token->type = json_ch2token[(unsigned char)json->data[++json->index]]; + + token->index = json->index; + + /* Don't advance the pointer for an error or the end */ + if (token->type == T_ERROR || token->type == T_END) + return; + + /* Found a known token, advance index and return */ + if (token->type != T_UNKNOWN) { + json->index++; + return; + } + + ch = json->data[json->index]; + + /* Process characters which triggered T_UNKNOWN */ + if (ch == '"') { + json_next_string_token(json, token); + return; + } else if (ch == '-' || ('0' <= ch && ch <= '9')) { + json_next_number_token(json, token); + return; + } else if (!strncmp(&json->data[json->index], "true", 4)) { + token->type = T_BOOLEAN; + token->value.boolean = 1; + json->index += 4; + return; + } else if (!strncmp(&json->data[json->index], "false", 5)) { + token->type = T_BOOLEAN; + token->value.boolean = 0; + json->index += 5; + return; + } else if (!strncmp(&json->data[json->index], "null", 4)) { + token->type = T_NULL; + json->index += 4; + return; + } + + token->type = T_ERROR; +} + +/* This function does not return. + * DO NOT CALL WITH DYNAMIC MEMORY ALLOCATED. + * The only allowed exception is the temporary parser string + * json->tmp struct. + * json and token should exist on the stack somewhere. + * luaL_error() will long_jmp and release the stack */ +static void json_throw_parse_error(lua_State *l, json_parse_t *json, + const char *exp, json_token_t *token) +{ + strbuf_free(json->tmp); + luaL_error(l, "Expected %s but found type <%s> at character %d", + exp, json_token_type_name[token->type], token->index); +} + +static void json_parse_object_context(lua_State *l, json_parse_t *json) +{ + json_token_t token; + + lua_newtable(l); + + json_next_token(json, &token); + + /* Handle empty objects */ + if (token.type == T_OBJ_END) + return; + + while (1) { + if (token.type != T_STRING) + json_throw_parse_error(l, json, "object key", &token); + + lua_pushlstring(l, token.value.string, token.length); /* Push key */ + + json_next_token(json, &token); + if (token.type != T_COLON) + json_throw_parse_error(l, json, "colon", &token); + + json_next_token(json, &token); + json_process_value(l, json, &token); + lua_rawset(l, -3); /* Set key = value */ + + json_next_token(json, &token); + + if (token.type == T_OBJ_END) + return; + + if (token.type != T_COMMA) + json_throw_parse_error(l, json, "comma or object end", &token); + + json_next_token(json, &token); + } while (1); + +} + +/* Handle the array context */ +static void json_parse_array_context(lua_State *l, json_parse_t *json) +{ + json_token_t token; + int i; + + lua_newtable(l); + + json_next_token(json, &token); + + /* Handle empty arrays */ + if (token.type == T_ARR_END) + return; + + i = 1; + while (1) { + json_process_value(l, json, &token); + lua_rawseti(l, -2, i); /* arr[i] = value */ + + json_next_token(json, &token); + + if (token.type == T_ARR_END) + return; + + if (token.type != T_COMMA) + json_throw_parse_error(l, json, "comma or array end", &token); + + json_next_token(json, &token); + i++; + } +} + +/* Handle the "value" context */ +static void json_process_value(lua_State *l, json_parse_t *json, json_token_t *token) +{ + switch (token->type) { + case T_STRING: + lua_pushlstring(l, token->value.string, token->length); + break;; + case T_NUMBER: + lua_pushnumber(l, token->value.number); + break;; + case T_BOOLEAN: + lua_pushboolean(l, token->value.boolean); + break;; + case T_OBJ_BEGIN: + json_parse_object_context(l, json); + break;; + case T_ARR_BEGIN: + json_parse_array_context(l, json); + break;; + case T_NULL: + lua_pushnil(l); + break;; + default: + json_throw_parse_error(l, json, "value", token); + } +} + +/* json_text must be null terminated string */ +void json_parse(lua_State *l, const char *json_text) +{ + json_parse_t json; + json_token_t token; + + json.data = json_text; + json.index = 0; + json.tmp = strbuf_new(); + json.tmp->scale = 256; + + json_next_token(&json, &token); + json_process_value(l, &json, &token); + + /* Ensure there is no more input left */ + json_next_token(&json, &token); + + if (token.type != T_END) + json_throw_parse_error(l, &json, "the end", &token); + + strbuf_free(json.tmp); +} + +int lua_json_decode(lua_State *l) +{ + int i, n; + + n = lua_gettop(l); + + for (i = 1; i <= n; i++) { + if (lua_isstring(l, i)) { + json_parse(l, lua_tostring(l, i)); + } else { + lua_pushnil(l); + } + } + + return n; /* Number of results */ +} + +/* vi:ai et sw=4 ts=4: + */ diff --git a/lua_json_encode.c b/lua_json_encode.c new file mode 100644 index 0000000..201f769 --- /dev/null +++ b/lua_json_encode.c @@ -0,0 +1,256 @@ +/* + * Lua JSON routines + * + * CAVEATS: + * - JSON "null" handling: + * - Decoding a "null" in an array will leave a "nil" placeholder in Lua, but will not show up at the end of the array. + * - Decoding a "null" in an object will ensure that particular key is deleted in the Lua table. + */ + +#include +#include + +#include +#include +#include + +#include "lua_json.h" +#include "utils.h" +#include "str.h" + +/* FIXME: + * - Don't just pushnil on error and return? + * - Review all strbuf usage for NULL termination + */ + +/* JSON escape a character if required, or return NULL */ +static inline char *json_escape_char(int c) +{ + switch(c) { + case 0: + return "\\u0000"; + case '\\': + return "\\\\"; + case '"': + return "\\\""; + case '\b': + return "\\b"; + case '\t': + return "\\t"; + case '\n': + return "\\n"; + case '\f': + return "\\f"; + case '\r': + return "\\r"; + } + + return NULL; +} + +/* FIXME: + * - Use lua_checklstring() instead of lua_tolstring() ?* + */ + +/* FIXME: + * - Option to encode non-printable characters? Only \" \\ are required + * - Unicode? + * - Improve performance? + */ +static void json_append_string(lua_State *l, struct str *json, int index) +{ + char *p; + int i; + const char *str; + size_t len; + + str = lua_tolstring(l, index, &len); + + strbuf_append_char(json, '\"'); + for (i = 0; i < len; i++) { + p = json_escape_char(str[i]); + if (p) + strbuf_append_mem(json, p, strlen(p)); + else + strbuf_append_char(json, str[i]); + } + strbuf_append_char(json, '\"'); +} + +/* Find the size of the array on the top of the Lua stack + * -1 object + * >=0 elements in array + */ +static int lua_array_length(lua_State *l) +{ + double k; + int max; + + max = 0; + + lua_pushnil(l); + /* table, startkey */ + while (lua_next(l, -2) != 0) { + /* table, key, value */ + if ((k = lua_tonumber(l, -2))) { + /* Integer >= 1 ? */ + if (floor(k) == k && k >= 1) { + if (k > max) + max = k; + lua_pop(l, 1); + continue; + } + } + + /* Must not be an array (non integer key) */ + lua_pop(l, 2); + return -1; + } + + return max; +} + +static void json_append_data(lua_State *l, struct str *s); + +static void json_append_array(lua_State *l, struct str *s, int size) +{ + int comma, i; + + strbuf_append_mem(s, "[ ", 2); + + comma = 0; + for (i = 1; i <= size; i++) { + if (comma) + strbuf_append_mem(s, ", ", 2); + else + comma = 1; + + lua_rawgeti(l, -1, i); + json_append_data(l, s); + lua_pop(l, 1); + } + + strbuf_append_mem(s, " ]", 2); +} + +static void json_append_object(lua_State *l, struct str *s) +{ + int comma, keytype; + + /* Object */ + strbuf_append_mem(s, "{ ", 2); + + lua_pushnil(l); + /* table, startkey */ + comma = 0; + while (lua_next(l, -2) != 0) { + if (comma) + strbuf_append_mem(s, ", ", 2); + else + comma = 1; + + /* table, key, value */ + keytype = lua_type(l, -2); + if (keytype == LUA_TNUMBER) { + strbuf_append(s, "\"" LUA_NUMBER_FMT "\": ", lua_tonumber(l, -2)); + } else if (keytype == LUA_TSTRING) { + json_append_string(l, s, -2); + strbuf_append_mem(s, ": ", 2); + } else { + die("Cannot serialise table key %s", lua_typename(l, lua_type(l, -2))); + } + + /* table, key, value */ + json_append_data(l, s); + lua_pop(l, 1); + /* table, key */ + } + + strbuf_append_mem(s, " }", 2); +} + +/* Serialise Lua data into JSON string. + * + * FIXME: + * - Error handling when cannot serialise key or value (return to script) + */ +static void json_append_data(lua_State *l, struct str *s) +{ + int len; + + switch (lua_type(l, -1)) { + case LUA_TSTRING: + json_append_string(l, s, -1); + break; + case LUA_TNUMBER: + strbuf_append(s, "%lf", lua_tonumber(l, -1)); + break; + case LUA_TBOOLEAN: + if (lua_toboolean(l, -1)) + strbuf_append_mem(s, "true", 4); + else + strbuf_append_mem(s, "false", 5); + break; + case LUA_TTABLE: + len = lua_array_length(l); + if (len >= 0) + json_append_array(l, s, len); + else + json_append_object(l, s); + break; + case LUA_TNIL: + strbuf_append_mem(s, "null", 4); + break; + default: + /* Remaining types (LUA_TFUNCTION, LUA_TUSERDATA, LUA_TTHREAD, and LUA_TLIGHTUSERDATA) + * cannot be serialised */ + /* FIXME: return error */ + die("Cannot serialise %s", lua_typename(l, lua_type(l, -1))); + } +} + +char *lua_to_json(lua_State *l, int *len) +{ + struct str *s; + char *data; + + s = strbuf_new(); + strbuf_set_increment(s, 256); + json_append_data(l, s); + data = strbuf_to_char(s, len); + + return data; +} + +int lua_json_encode(lua_State *l) +{ + char *json; + int len; + + json = lua_to_json(l, &len); + lua_pushlstring(l, json, len); + free(json); + + return 1; +} + +void lua_json_init(lua_State *l) +{ + luaL_Reg reg[] = { + { "encode", lua_json_encode }, + { "decode", lua_json_decode }, + { NULL, NULL } + }; + + /* Create "db" table. + * Added functions as table entries + */ + + luaL_register(l, "json", reg); + + /* FIXME: Debugging */ + json_init_lookup_tables(); +} + +/* vi:ai et sw=4 ts=4: + */ diff --git a/rfc4627.txt b/rfc4627.txt new file mode 100644 index 0000000..67b8909 --- /dev/null +++ b/rfc4627.txt @@ -0,0 +1,563 @@ + + + + + + +Network Working Group D. Crockford +Request for Comments: 4627 JSON.org +Category: Informational July 2006 + + + The application/json Media Type for JavaScript Object Notation (JSON) + +Status of This Memo + + This memo provides information for the Internet community. It does + not specify an Internet standard of any kind. Distribution of this + memo is unlimited. + +Copyright Notice + + Copyright (C) The Internet Society (2006). + +Abstract + + JavaScript Object Notation (JSON) is a lightweight, text-based, + language-independent data interchange format. It was derived from + the ECMAScript Programming Language Standard. JSON defines a small + set of formatting rules for the portable representation of structured + data. + +1. Introduction + + JavaScript Object Notation (JSON) is a text format for the + serialization of structured data. It is derived from the object + literals of JavaScript, as defined in the ECMAScript Programming + Language Standard, Third Edition [ECMA]. + + JSON can represent four primitive types (strings, numbers, booleans, + and null) and two structured types (objects and arrays). + + A string is a sequence of zero or more Unicode characters [UNICODE]. + + An object is an unordered collection of zero or more name/value + pairs, where a name is a string and a value is a string, number, + boolean, null, object, or array. + + An array is an ordered sequence of zero or more values. + + The terms "object" and "array" come from the conventions of + JavaScript. + + JSON's design goals were for it to be minimal, portable, textual, and + a subset of JavaScript. + + + +Crockford Informational [Page 1] + +RFC 4627 JSON July 2006 + + +1.1. Conventions Used in This Document + + The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", + "SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this + document are to be interpreted as described in [RFC2119]. + + The grammatical rules in this document are to be interpreted as + described in [RFC4234]. + +2. JSON Grammar + + A JSON text is a sequence of tokens. The set of tokens includes six + structural characters, strings, numbers, and three literal names. + + A JSON text is a serialized object or array. + + JSON-text = object / array + + These are the six structural characters: + + begin-array = ws %x5B ws ; [ left square bracket + + begin-object = ws %x7B ws ; { left curly bracket + + end-array = ws %x5D ws ; ] right square bracket + + end-object = ws %x7D ws ; } right curly bracket + + name-separator = ws %x3A ws ; : colon + + value-separator = ws %x2C ws ; , comma + + Insignificant whitespace is allowed before or after any of the six + structural characters. + + ws = *( + %x20 / ; Space + %x09 / ; Horizontal tab + %x0A / ; Line feed or New line + %x0D ; Carriage return + ) + +2.1. Values + + A JSON value MUST be an object, array, number, or string, or one of + the following three literal names: + + false null true + + + +Crockford Informational [Page 2] + +RFC 4627 JSON July 2006 + + + The literal names MUST be lowercase. No other literal names are + allowed. + + value = false / null / true / object / array / number / string + + false = %x66.61.6c.73.65 ; false + + null = %x6e.75.6c.6c ; null + + true = %x74.72.75.65 ; true + +2.2. Objects + + An object structure is represented as a pair of curly brackets + surrounding zero or more name/value pairs (or members). A name is a + string. A single colon comes after each name, separating the name + from the value. A single comma separates a value from a following + name. The names within an object SHOULD be unique. + + object = begin-object [ member *( value-separator member ) ] + end-object + + member = string name-separator value + +2.3. Arrays + + An array structure is represented as square brackets surrounding zero + or more values (or elements). Elements are separated by commas. + + array = begin-array [ value *( value-separator value ) ] end-array + +2.4. Numbers + + The representation of numbers is similar to that used in most + programming languages. A number contains an integer component that + may be prefixed with an optional minus sign, which may be followed by + a fraction part and/or an exponent part. + + Octal and hex forms are not allowed. Leading zeros are not allowed. + + A fraction part is a decimal point followed by one or more digits. + + An exponent part begins with the letter E in upper or lowercase, + which may be followed by a plus or minus sign. The E and optional + sign are followed by one or more digits. + + Numeric values that cannot be represented as sequences of digits + (such as Infinity and NaN) are not permitted. + + + +Crockford Informational [Page 3] + +RFC 4627 JSON July 2006 + + + number = [ minus ] int [ frac ] [ exp ] + + decimal-point = %x2E ; . + + digit1-9 = %x31-39 ; 1-9 + + e = %x65 / %x45 ; e E + + exp = e [ minus / plus ] 1*DIGIT + + frac = decimal-point 1*DIGIT + + int = zero / ( digit1-9 *DIGIT ) + + minus = %x2D ; - + + plus = %x2B ; + + + zero = %x30 ; 0 + +2.5. Strings + + The representation of strings is similar to conventions used in the C + family of programming languages. A string begins and ends with + quotation marks. All Unicode characters may be placed within the + quotation marks except for the characters that must be escaped: + quotation mark, reverse solidus, and the control characters (U+0000 + through U+001F). + + Any character may be escaped. If the character is in the Basic + Multilingual Plane (U+0000 through U+FFFF), then it may be + represented as a six-character sequence: a reverse solidus, followed + by the lowercase letter u, followed by four hexadecimal digits that + encode the character's code point. The hexadecimal letters A though + F can be upper or lowercase. So, for example, a string containing + only a single reverse solidus character may be represented as + "\u005C". + + Alternatively, there are two-character sequence escape + representations of some popular characters. So, for example, a + string containing only a single reverse solidus character may be + represented more compactly as "\\". + + To escape an extended character that is not in the Basic Multilingual + Plane, the character is represented as a twelve-character sequence, + encoding the UTF-16 surrogate pair. So, for example, a string + containing only the G clef character (U+1D11E) may be represented as + "\uD834\uDD1E". + + + +Crockford Informational [Page 4] + +RFC 4627 JSON July 2006 + + + string = quotation-mark *char quotation-mark + + char = unescaped / + escape ( + %x22 / ; " quotation mark U+0022 + %x5C / ; \ reverse solidus U+005C + %x2F / ; / solidus U+002F + %x62 / ; b backspace U+0008 + %x66 / ; f form feed U+000C + %x6E / ; n line feed U+000A + %x72 / ; r carriage return U+000D + %x74 / ; t tab U+0009 + %x75 4HEXDIG ) ; uXXXX U+XXXX + + escape = %x5C ; \ + + quotation-mark = %x22 ; " + + unescaped = %x20-21 / %x23-5B / %x5D-10FFFF + +3. Encoding + + JSON text SHALL be encoded in Unicode. The default encoding is + UTF-8. + + Since the first two characters of a JSON text will always be ASCII + characters [RFC0020], it is possible to determine whether an octet + stream is UTF-8, UTF-16 (BE or LE), or UTF-32 (BE or LE) by looking + at the pattern of nulls in the first four octets. + + 00 00 00 xx UTF-32BE + 00 xx 00 xx UTF-16BE + xx 00 00 00 UTF-32LE + xx 00 xx 00 UTF-16LE + xx xx xx xx UTF-8 + +4. Parsers + + A JSON parser transforms a JSON text into another representation. A + JSON parser MUST accept all texts that conform to the JSON grammar. + A JSON parser MAY accept non-JSON forms or extensions. + + An implementation may set limits on the size of texts that it + accepts. An implementation may set limits on the maximum depth of + nesting. An implementation may set limits on the range of numbers. + An implementation may set limits on the length and character contents + of strings. + + + + +Crockford Informational [Page 5] + +RFC 4627 JSON July 2006 + + +5. Generators + + A JSON generator produces JSON text. The resulting text MUST + strictly conform to the JSON grammar. + +6. IANA Considerations + + The MIME media type for JSON text is application/json. + + Type name: application + + Subtype name: json + + Required parameters: n/a + + Optional parameters: n/a + + Encoding considerations: 8bit if UTF-8; binary if UTF-16 or UTF-32 + + JSON may be represented using UTF-8, UTF-16, or UTF-32. When JSON + is written in UTF-8, JSON is 8bit compatible. When JSON is + written in UTF-16 or UTF-32, the binary content-transfer-encoding + must be used. + + Security considerations: + + Generally there are security issues with scripting languages. JSON + is a subset of JavaScript, but it is a safe subset that excludes + assignment and invocation. + + A JSON text can be safely passed into JavaScript's eval() function + (which compiles and executes a string) if all the characters not + enclosed in strings are in the set of characters that form JSON + tokens. This can be quickly determined in JavaScript with two + regular expressions and calls to the test and replace methods. + + var my_JSON_object = !(/[^,:{}\[\]0-9.\-+Eaeflnr-u \n\r\t]/.test( + text.replace(/"(\\.|[^"\\])*"/g, ''))) && + eval('(' + text + ')'); + + Interoperability considerations: n/a + + Published specification: RFC 4627 + + + + + + + + +Crockford Informational [Page 6] + +RFC 4627 JSON July 2006 + + + Applications that use this media type: + + JSON has been used to exchange data between applications written + in all of these programming languages: ActionScript, C, C#, + ColdFusion, Common Lisp, E, Erlang, Java, JavaScript, Lua, + Objective CAML, Perl, PHP, Python, Rebol, Ruby, and Scheme. + + Additional information: + + Magic number(s): n/a + File extension(s): .json + Macintosh file type code(s): TEXT + + Person & email address to contact for further information: + Douglas Crockford + douglas@crockford.com + + Intended usage: COMMON + + Restrictions on usage: none + + Author: + Douglas Crockford + douglas@crockford.com + + Change controller: + Douglas Crockford + douglas@crockford.com + +7. Security Considerations + + See Security Considerations in Section 6. + +8. Examples + + This is a JSON object: + + { + "Image": { + "Width": 800, + "Height": 600, + "Title": "View from 15th Floor", + "Thumbnail": { + "Url": "http://www.example.com/image/481989943", + "Height": 125, + "Width": "100" + }, + "IDs": [116, 943, 234, 38793] + + + +Crockford Informational [Page 7] + +RFC 4627 JSON July 2006 + + + } + } + + Its Image member is an object whose Thumbnail member is an object + and whose IDs member is an array of numbers. + + This is a JSON array containing two objects: + + [ + { + "precision": "zip", + "Latitude": 37.7668, + "Longitude": -122.3959, + "Address": "", + "City": "SAN FRANCISCO", + "State": "CA", + "Zip": "94107", + "Country": "US" + }, + { + "precision": "zip", + "Latitude": 37.371991, + "Longitude": -122.026020, + "Address": "", + "City": "SUNNYVALE", + "State": "CA", + "Zip": "94085", + "Country": "US" + } + ] + +9. References + +9.1. Normative References + + [ECMA] European Computer Manufacturers Association, "ECMAScript + Language Specification 3rd Edition", December 1999, + . + + [RFC0020] Cerf, V., "ASCII format for network interchange", RFC 20, + October 1969. + + [RFC2119] Bradner, S., "Key words for use in RFCs to Indicate + Requirement Levels", BCP 14, RFC 2119, March 1997. + + [RFC4234] Crocker, D. and P. Overell, "Augmented BNF for Syntax + Specifications: ABNF", RFC 4234, October 2005. + + + +Crockford Informational [Page 8] + +RFC 4627 JSON July 2006 + + + [UNICODE] The Unicode Consortium, "The Unicode Standard Version 4.0", + 2003, . + +Author's Address + + Douglas Crockford + JSON.org + EMail: douglas@crockford.com + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Crockford Informational [Page 9] + +RFC 4627 JSON July 2006 + + +Full Copyright Statement + + Copyright (C) The Internet Society (2006). + + This document is subject to the rights, licenses and restrictions + contained in BCP 78, and except as set forth therein, the authors + retain all their rights. + + This document and the information contained herein are provided on an + "AS IS" basis and THE CONTRIBUTOR, THE ORGANIZATION HE/SHE REPRESENTS + OR IS SPONSORED BY (IF ANY), THE INTERNET SOCIETY AND THE INTERNET + ENGINEERING TASK FORCE DISCLAIM ALL WARRANTIES, EXPRESS OR IMPLIED, + INCLUDING BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE + INFORMATION HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED + WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. + +Intellectual Property + + The IETF takes no position regarding the validity or scope of any + Intellectual Property Rights or other rights that might be claimed to + pertain to the implementation or use of the technology described in + this document or the extent to which any license under such rights + might or might not be available; nor does it represent that it has + made any independent effort to identify any such rights. Information + on the procedures with respect to rights in RFC documents can be + found in BCP 78 and BCP 79. + + Copies of IPR disclosures made to the IETF Secretariat and any + assurances of licenses to be made available, or the result of an + attempt made to obtain a general license or permission for the use of + such proprietary rights by implementers or users of this + specification can be obtained from the IETF on-line IPR repository at + http://www.ietf.org/ipr. + + The IETF invites any interested party to bring to its attention any + copyrights, patents or patent applications, or other proprietary + rights that may cover technology that may be required to implement + this standard. Please address the information to the IETF at + ietf-ipr@ietf.org. + +Acknowledgement + + Funding for the RFC Editor function is provided by the IETF + Administrative Support Activity (IASA). + + + + + + + +Crockford Informational [Page 10] + diff --git a/strbuf.c b/strbuf.c new file mode 100644 index 0000000..f823884 --- /dev/null +++ b/strbuf.c @@ -0,0 +1,130 @@ +#include +#include +#include +#include + +#include "strbuf.h" + +static void die(const char *format, ...) +{ + va_list arg; + + va_start(arg, format); + vfprintf(stderr, format, arg); + va_end(arg); + + exit(-1); +} + +void strbuf_init(strbuf_t *s) +{ + s->data = NULL; + s->size = 0; + s->length = 0; + s->increment = STRBUF_DEFAULT_INCREMENT; +} + +strbuf_t *strbuf_new() +{ + strbuf_t *s; + + s = malloc(sizeof(strbuf_t)); + if (!s) + die("Out of memory"); + + strbuf_init(s); + + return s; +} + +void strbuf_set_increment(strbuf_t *s, int increment) +{ + if (increment <= 0) + die("BUG: Invalid string increment"); + + s->increment = increment; +} + +void strbuf_free(strbuf_t *s) +{ + if (s->data) + free(s->data); + free(s); +} + +char *strbuf_to_char(strbuf_t *s, int *len) +{ + char *data; + + data = s->data; + if (len) + *len = s->length; + + free(s); + + return data; +} + +/* Ensure strbuf can handle a string length bytes long (ignoring NULL + * optional termination). */ +void strbuf_resize(strbuf_t *s, int len) +{ + int newsize; + + /* Esnure there is room for optional NULL termination */ + newsize = len + 1; + /* Round up to the next increment */ + newsize = ((newsize + s->increment - 1) / s->increment) * s->increment; + s->size = newsize; + s->data = realloc(s->data, s->size); + if (!s->data) + die("Out of memory"); +} + +void strbuf_append_mem(strbuf_t *s, const char *c, int len) +{ + if (len > strbuf_emptylen(s)) + strbuf_resize(s, s->length + len); + + memcpy(s->data + s->length, c, len); + s->length += len; +} + +void strbuf_ensure_null(strbuf_t *s) +{ + s->data[s->length] = 0; +} + +void strbuf_append_fmt(strbuf_t *s, const char *fmt, ...) +{ + va_list arg; + int fmt_len, try; + int empty_len; + + /* If the first attempt to append fails, resize the buffer appropriately + * and try again */ + for (try = 0; ; try++) { + va_start(arg, fmt); + /* Append the new formatted string */ + /* fmt_len is the length of the string required, excluding the + * trailing NULL */ + empty_len = strbuf_emptylen(s); + /* Add 1 since there is also space for the terminating NULL. + * If the string hasn't been allocated then empty_len == -1, + * and vsprintf() won't store anything on the first pass */ + fmt_len = vsnprintf(s->data + s->length, empty_len + 1, fmt, arg); + va_end(arg); + + if (fmt_len <= empty_len) + break; /* SUCCESS */ + if (try > 0) + die("BUG: length of formatted string changed"); + + strbuf_resize(s, s->length + fmt_len); + } + + s->length += fmt_len; +} + +/* vi:ai et sw=4 ts=4: + */ diff --git a/strbuf.h b/strbuf.h new file mode 100644 index 0000000..fb07e6f --- /dev/null +++ b/strbuf.h @@ -0,0 +1,48 @@ +#include +#include + +typedef struct { + char *data; + int size; /* Bytes allocated */ + int length; /* Current length of string, not including NULL */ + int increment; /* Allocation Increments */ +} strbuf_t; + +#ifndef STRBUF_DEFAULT_INCREMENT +#define STRBUF_DEFAULT_INCREMENT 8 +#endif + +extern void strbuf_init(strbuf_t *s); +extern strbuf_t *strbuf_new(); +extern void strbuf_free(strbuf_t *s); +extern char *strbuf_to_char(strbuf_t *s, int *len); + +extern void strbuf_set_increment(strbuf_t *s, int increment); +extern void strbuf_resize(strbuf_t *s, int len); +extern void strbuf_append_fmt(strbuf_t *s, const char *format, ...); +extern void strbuf_append_mem(strbuf_t *s, const char *c, int len); +extern void strbuf_ensure_null(strbuf_t *s); + +/* Return bytes remaining in the string buffer + * Ensure there is space for a NULL. + * Returns -1 if the string has not been allocated yet */ +static inline int strbuf_emptylen(strbuf_t *s) +{ + return s->size - s->length - 1; +} + +static inline int strbuf_length(strbuf_t *s) +{ + return s->length; +} + +static inline void strbuf_append_char(strbuf_t *s, const char c) +{ + if (strbuf_emptylen(s) < 1) + strbuf_resize(s, s->length + 1); + + s->data[s->length++] = c; +} + +/* vi:ai et sw=4 ts=4: + */ -- cgit v1.2.3-55-g6feb