From 024dd94968e60fa3177c869a0c200d116f78f924 Mon Sep 17 00:00:00 2001
From: Mark Pulford <mark@kyne.com.au>
Date: Sun, 1 May 2011 18:19:42 +0930
Subject: Support optionally parsing Inf/NaN/Hex numbers

Change strict_numbers to control whether json.decode will parse an
expanded set of numbers (Hex, Inf, NaN).
---
 lua_cjson.c | 104 ++++++++++++++++++++++++++++++++++++++++++------------------
 1 file changed, 73 insertions(+), 31 deletions(-)

(limited to 'lua_cjson.c')

diff --git a/lua_cjson.c b/lua_cjson.c
index 379ea23..abf2bfc 100644
--- a/lua_cjson.c
+++ b/lua_cjson.c
@@ -29,7 +29,6 @@
  *   JSON. Most unprintable characters are not escaped.
  * - Invalid UTF-8 characters are not detected and will be passed
  *   untouched.
- * - Cannot parse NaN/Inf numbers when strict_numbers has been disabled.
  * - Javascript comments are not part of the JSON spec, and are not
  *   supported.
  *
@@ -175,6 +174,9 @@ static int json_max_depth(lua_State *l)
     return 1;
 }
 
+/* When disabled, supports:
+ * - encoding/decoding NaN/Infinity.
+ * - decoding hexidecimal numbers. */
 static int json_strict_numbers(lua_State *l)
 {
     json_config_t *cfg;
@@ -214,17 +216,21 @@ static void json_create_config(lua_State *l)
     cfg->ch2token['\r'] = T_WHITESPACE;
 
     /* Update characters that require further processing */
-    cfg->ch2token['n'] = T_UNKNOWN;
-    cfg->ch2token['t'] = T_UNKNOWN;
-    cfg->ch2token['f'] = T_UNKNOWN;
-    cfg->ch2token['"'] = T_UNKNOWN;
+    cfg->ch2token['f'] = T_UNKNOWN;     /* false? */
+    cfg->ch2token['i'] = T_UNKNOWN;     /* inf, ininity? */
+    cfg->ch2token['I'] = T_UNKNOWN;
+    cfg->ch2token['n'] = T_UNKNOWN;     /* null, nan? */
+    cfg->ch2token['N'] = T_UNKNOWN;
+    cfg->ch2token['t'] = T_UNKNOWN;     /* true? */
+    cfg->ch2token['"'] = T_UNKNOWN;     /* string? */
+    cfg->ch2token['+'] = T_UNKNOWN;     /* number? */
     cfg->ch2token['-'] = T_UNKNOWN;
     for (i = 0; i < 10; i++)
         cfg->ch2token['0' + i] = T_UNKNOWN;
 
+    /* Lookup table for parsing escape characters */
     for (i = 0; i < 256; i++)
-        cfg->ch2escape[i] = 0;  /* String error */
-
+        cfg->ch2escape[i] = 0;          /* String error */
     cfg->ch2escape['"'] = '"';
     cfg->ch2escape['\\'] = '\\';
     cfg->ch2escape['/'] = '/';
@@ -233,7 +239,7 @@ static void json_create_config(lua_State *l)
     cfg->ch2escape['n'] = '\n';
     cfg->ch2escape['f'] = '\f';
     cfg->ch2escape['r'] = '\r';
-    cfg->ch2escape['u'] = 'u';  /* This needs to be parsed as unicode */
+    cfg->ch2escape['u'] = 'u';          /* Unicode parsing required */
 
     cfg->sparse_ratio = DEFAULT_SPARSE_RATIO;
     cfg->max_depth = DEFAULT_MAX_DEPTH;
@@ -675,33 +681,53 @@ static void json_next_string_token(json_parse_t *json, json_token_t *token)
     token->value.string = strbuf_string(json->tmp, &token->string_len);
 }
 
-static void json_next_number_token(json_parse_t *json, json_token_t *token)
+/* JSON numbers should take the following form:
+ *      -?(0|[1-9]|[1-9][0-9]+)(.[0-9]+)?([eE][-+]?[0-9]+)?
+ *
+ * json_next_number_token() uses strtod() which allows other forms:
+ * - numbers starting with '+'
+ * - NaN, -NaN, infinity, -infinity
+ * - hexidecimal numbers
+ *
+ * json_is_invalid_number() detects "numbers" which may pass strtod()'s
+ * error checking, but should not be allowed with strict JSON.
+ *
+ * json_is_invalid_number() may pass numbers which cause strtod()
+ * to generate an error.
+ */
+static int json_is_invalid_number(json_parse_t *json)
 {
-    const char *startptr;
-    char *endptr;
-    int i;
+    int i = json->index;
+    char ch;
+
+    /* Reject numbers starting with + */
+    if (json->data[i] == '+')
+        return 1;
 
-    /* JSON numbers should take the following form:
-     *      -?(0|[1-9]|[1-9][0-9]+)(.[0-9]+)?([eE][-+]?[0-9]+)?
-     *
-     * strtod() below allows other forms:
-     * - numbers starting with '+'
-     * - infinity, NaN
-     * - hexidecimal numbers
-     *
-     * Infinity/NaN and numbers starting with '+' can't occur due to
-     * earlier parser error checking.
-     *
-     * Generate an error if a hexidecimal number has been
-     * provided ("0x" or "0X").
-     */
-    i = json->index;
     if (json->data[i] == '-')
         i++;
-    if (json->data[i] == '0' && (json->data[i + 1] | 0x20) == 'x') {
-        json_set_token_error(token, json, "invalid number (hexidecimal)");
-        return;
-    }
+
+    /* Reject numbers starting with 0x, pass other numbers starting
+     * with 0 */
+    if (json->data[i] == '0')
+        return ((json->data[i + 1] | 0x20) == 'x');
+
+    /* Reject inf/nan */
+    ch = json->data[i] | 0x20;
+    if (ch == 'i' && !strncasecmp(&json->data[i], "inf", 3))
+        return 1;
+    if (ch == 'n' && !strncasecmp(&json->data[i], "nan", 3))
+        return 1;
+
+    /* Pass all other numbers which may still be invalid, but
+     * strtod() will catch them. */
+    return 0;
+}
+
+static void json_next_number_token(json_parse_t *json, json_token_t *token)
+{
+    const char *startptr;
+    char *endptr;
 
     token->type = T_NUMBER;
     startptr = &json->data[json->index];
@@ -748,10 +774,18 @@ static void json_next_token(json_parse_t *json, json_token_t *token)
     /* Process characters which triggered T_UNKNOWN */
     ch = json->data[json->index];
 
+    /* Must use strncmp() to match the front of the JSON string
+     * JSON identifier must be lowercase.
+     * When strict_numbers if disabled, either case is allowed for
+     * Infinity/NaN (since we are no longer following the spec..) */
     if (ch == '"') {
         json_next_string_token(json, token);
         return;
     } else if (ch == '-' || ('0' <= ch && ch <= '9')) {
+        if (json->cfg->strict_numbers && json_is_invalid_number(json)) {
+            json_set_token_error(token, json, "invalid number");
+            return;
+        }
         json_next_number_token(json, token);
         return;
     } else if (!strncmp(&json->data[json->index], "true", 4)) {
@@ -768,6 +802,14 @@ static void json_next_token(json_parse_t *json, json_token_t *token)
         token->type = T_NULL;
         json->index += 4;
         return;
+    } else if (!json->cfg->strict_numbers && json_is_invalid_number(json)) {
+        /* When strict_numbers is disabled, only attempt to process
+         * numbers we know are invalid JSON (Inf, NaN, hex)
+         * This is required to generate an appropriate token error,
+         * otherwise all bad tokens will register as "invalid number"
+         */
+        json_next_number_token(json, token);
+        return;
     }
 
     /* Token starts with t/f/n but isn't recognised above. */
-- 
cgit v1.2.3-55-g6feb