Support optionally parsing Inf/NaN/Hex numbers

Change strict_numbers to control whether json.decode will parse an expanded set of numbers (Hex, Inf, NaN).
author: Mark Pulford <mark@kyne.com.au> 2011-05-01 18:19:42 +0930
committer: Mark Pulford <mark@kyne.com.au> 2011-05-01 18:19:42 +0930
commit: 024dd94968e60fa3177c869a0c200d116f78f924 (patch)
tree: 5b10db3833a5b16e94888da581b94c839d431279 /lua_cjson.c
parent: 439e03c6f9296ade78985a8d0b5c892846b6b06a (diff)
download: lua-cjson-024dd94968e60fa3177c869a0c200d116f78f924.tar.gz
lua-cjson-024dd94968e60fa3177c869a0c200d116f78f924.tar.bz2
lua-cjson-024dd94968e60fa3177c869a0c200d116f78f924.zip
1 files changed, 73 insertions, 31 deletions
diff --git a/lua_cjson.c b/lua_cjson.c
index 379ea23..abf2bfc 100644
--- a/lua_cjson.c
+++ b/lua_cjson.c
@@ -29,7 +29,6 @@
 *   JSON. Most unprintable characters are not escaped.
 * - Invalid UTF-8 characters are not detected and will be passed
 *   untouched.
- * - Cannot parse NaN/Inf numbers when strict_numbers has been disabled.
 * - Javascript comments are not part of the JSON spec, and are not
 *   supported.
 *
@@ -175,6 +174,9 @@ static int json_max_depth(lua_State *l)
    return 1;
 }
+/* When disabled, supports:
+ * - encoding/decoding NaN/Infinity.
+ * - decoding hexidecimal numbers. */
 static int json_strict_numbers(lua_State *l)
 {
    json_config_t *cfg;
@@ -214,17 +216,21 @@ static void json_create_config(lua_State *l)
    cfg->ch2token['\r'] = T_WHITESPACE;
    /* Update characters that require further processing */
-    cfg->ch2token['n'] = T_UNKNOWN;
+    cfg->ch2token['f'] = T_UNKNOWN;     /* false? */
-    cfg->ch2token['t'] = T_UNKNOWN;
+    cfg->ch2token['i'] = T_UNKNOWN;     /* inf, ininity? */
-    cfg->ch2token['f'] = T_UNKNOWN;
+    cfg->ch2token['I'] = T_UNKNOWN;
-    cfg->ch2token['"'] = T_UNKNOWN;
+    cfg->ch2token['n'] = T_UNKNOWN;     /* null, nan? */
+    cfg->ch2token['N'] = T_UNKNOWN;
+    cfg->ch2token['t'] = T_UNKNOWN;     /* true? */
+    cfg->ch2token['"'] = T_UNKNOWN;     /* string? */
+    cfg->ch2token['+'] = T_UNKNOWN;     /* number? */
    cfg->ch2token['-'] = T_UNKNOWN;
    for (i = 0; i < 10; i++)
        cfg->ch2token['0' + i] = T_UNKNOWN;
+    /* Lookup table for parsing escape characters */
    for (i = 0; i < 256; i++)
-        cfg->ch2escape[i] = 0;  /* String error */
+        cfg->ch2escape[i] = 0;          /* String error */
    cfg->ch2escape['"'] = '"';
    cfg->ch2escape['\\'] = '\\';
    cfg->ch2escape['/'] = '/';
@@ -233,7 +239,7 @@ static void json_create_config(lua_State *l)
    cfg->ch2escape['n'] = '\n';
    cfg->ch2escape['f'] = '\f';
    cfg->ch2escape['r'] = '\r';
-    cfg->ch2escape['u'] = 'u';  /* This needs to be parsed as unicode */
+    cfg->ch2escape['u'] = 'u';          /* Unicode parsing required */
    cfg->sparse_ratio = DEFAULT_SPARSE_RATIO;
    cfg->max_depth = DEFAULT_MAX_DEPTH;
@@ -675,33 +681,53 @@ static void json_next_string_token(json_parse_t *json, json_token_t *token)
    token->value.string = strbuf_string(json->tmp, &token->string_len);
 }
-static void json_next_number_token(json_parse_t *json, json_token_t *token)
+/* JSON numbers should take the following form:
+ *      -?(0|[1-9]|[1-9][0-9]+)(.[0-9]+)?([eE][-+]?[0-9]+)?
+ *
+ * json_next_number_token() uses strtod() which allows other forms:
+ * - numbers starting with '+'
+ * - NaN, -NaN, infinity, -infinity
+ * - hexidecimal numbers
+ *
+ * json_is_invalid_number() detects "numbers" which may pass strtod()'s
+ * error checking, but should not be allowed with strict JSON.
+ *
+ * json_is_invalid_number() may pass numbers which cause strtod()
+ * to generate an error.
+ */
+static int json_is_invalid_number(json_parse_t *json)
 {
-    const char *startptr;
+    int i = json->index;
-    char *endptr;
+    char ch;
-    int i;
+    /* Reject numbers starting with + */
+    if (json->data[i] == '+')
+        return 1;
-    /* JSON numbers should take the following form:
-     *      -?(0|[1-9]|[1-9][0-9]+)(.[0-9]+)?([eE][-+]?[0-9]+)?
-     *
-     * strtod() below allows other forms:
-     * - numbers starting with '+'
-     * - infinity, NaN
-     * - hexidecimal numbers
-     *
-     * Infinity/NaN and numbers starting with '+' can't occur due to
-     * earlier parser error checking.
-     *
-     * Generate an error if a hexidecimal number has been
-     * provided ("0x" or "0X").
-     */
-    i = json->index;
    if (json->data[i] == '-')
        i++;
-    if (json->data[i] == '0' && (json->data[i + 1] | 0x20) == 'x') {
-        json_set_token_error(token, json, "invalid number (hexidecimal)");
+    /* Reject numbers starting with 0x, pass other numbers starting
-        return;
+     * with 0 */
-    }
+    if (json->data[i] == '0')
+        return ((json->data[i + 1] | 0x20) == 'x');
+    /* Reject inf/nan */
+    ch = json->data[i] | 0x20;
+    if (ch == 'i' && !strncasecmp(&json->data[i], "inf", 3))
+        return 1;
+    if (ch == 'n' && !strncasecmp(&json->data[i], "nan", 3))
+        return 1;
+    /* Pass all other numbers which may still be invalid, but
+     * strtod() will catch them. */
+    return 0;
+}
+static void json_next_number_token(json_parse_t *json, json_token_t *token)
+{
+    const char *startptr;
+    char *endptr;
    token->type = T_NUMBER;
    startptr = &json->data[json->index];
@@ -748,10 +774,18 @@ static void json_next_token(json_parse_t *json, json_token_t *token)
    /* Process characters which triggered T_UNKNOWN */
    ch = json->data[json->index];
+    /* Must use strncmp() to match the front of the JSON string
+     * JSON identifier must be lowercase.
+     * When strict_numbers if disabled, either case is allowed for
+     * Infinity/NaN (since we are no longer following the spec..) */
    if (ch == '"') {
        json_next_string_token(json, token);
        return;
    } else if (ch == '-' || ('0' <= ch && ch <= '9')) {
+        if (json->cfg->strict_numbers && json_is_invalid_number(json)) {
+            json_set_token_error(token, json, "invalid number");
+            return;
+        }
        json_next_number_token(json, token);
        return;
    } else if (!strncmp(&json->data[json->index], "true", 4)) {
@@ -768,6 +802,14 @@ static void json_next_token(json_parse_t *json, json_token_t *token)
        token->type = T_NULL;
        json->index += 4;
        return;
+    } else if (!json->cfg->strict_numbers && json_is_invalid_number(json)) {
+        /* When strict_numbers is disabled, only attempt to process
+         * numbers we know are invalid JSON (Inf, NaN, hex)
+         * This is required to generate an appropriate token error,
+         * otherwise all bad tokens will register as "invalid number"
+         */
+        json_next_number_token(json, token);
+        return;
    }
    /* Token starts with t/f/n but isn't recognised above. */
author	Mark Pulford <mark@kyne.com.au>	2011-05-01 18:19:42 +0930
committer	Mark Pulford <mark@kyne.com.au>	2011-05-01 18:19:42 +0930
commit	024dd94968e60fa3177c869a0c200d116f78f924 (patch)
tree	5b10db3833a5b16e94888da581b94c839d431279 /lua_cjson.c
parent	439e03c6f9296ade78985a8d0b5c892846b6b06a (diff)
download	lua-cjson-024dd94968e60fa3177c869a0c200d116f78f924.tar.gz lua-cjson-024dd94968e60fa3177c869a0c200d116f78f924.tar.bz2 lua-cjson-024dd94968e60fa3177c869a0c200d116f78f924.zip

diff --git a/lua_cjson.c b/lua_cjson.c index 379ea23..abf2bfc 100644 --- a/lua_cjson.c +++ b/lua_cjson.c
@@ -29,7 +29,6 @@
29	* JSON. Most unprintable characters are not escaped.	29	* JSON. Most unprintable characters are not escaped.
30	* - Invalid UTF-8 characters are not detected and will be passed	30	* - Invalid UTF-8 characters are not detected and will be passed
31	* untouched.	31	* untouched.
32	* - Cannot parse NaN/Inf numbers when strict_numbers has been disabled.
33	* - Javascript comments are not part of the JSON spec, and are not	32	* - Javascript comments are not part of the JSON spec, and are not
34	* supported.	33	* supported.
35	*	34	*
@@ -175,6 +174,9 @@ static int json_max_depth(lua_State *l)
175	return 1;	174	return 1;
176	}	175	}
177		176
		177	/* When disabled, supports:
		178	* - encoding/decoding NaN/Infinity.
		179	* - decoding hexidecimal numbers. */
178	static int json_strict_numbers(lua_State *l)	180	static int json_strict_numbers(lua_State *l)
179	{	181	{
180	json_config_t *cfg;	182	json_config_t *cfg;
@@ -214,17 +216,21 @@ static void json_create_config(lua_State *l)
214	cfg->ch2token['\r'] = T_WHITESPACE;	216	cfg->ch2token['\r'] = T_WHITESPACE;
215		217
216	/* Update characters that require further processing */	218	/* Update characters that require further processing */
217	cfg->ch2token['n'] = T_UNKNOWN;	219	cfg->ch2token['f'] = T_UNKNOWN; /* false? */
218	cfg->ch2token['t'] = T_UNKNOWN;	220	cfg->ch2token['i'] = T_UNKNOWN; /* inf, ininity? */
219	cfg->ch2token['f'] = T_UNKNOWN;	221	cfg->ch2token['I'] = T_UNKNOWN;
220	cfg->ch2token['"'] = T_UNKNOWN;	222	cfg->ch2token['n'] = T_UNKNOWN; /* null, nan? */
		223	cfg->ch2token['N'] = T_UNKNOWN;
		224	cfg->ch2token['t'] = T_UNKNOWN; /* true? */
		225	cfg->ch2token['"'] = T_UNKNOWN; /* string? */
		226	cfg->ch2token['+'] = T_UNKNOWN; /* number? */
221	cfg->ch2token['-'] = T_UNKNOWN;	227	cfg->ch2token['-'] = T_UNKNOWN;
222	for (i = 0; i < 10; i++)	228	for (i = 0; i < 10; i++)
223	cfg->ch2token['0' + i] = T_UNKNOWN;	229	cfg->ch2token['0' + i] = T_UNKNOWN;
224		230
		231	/* Lookup table for parsing escape characters */
225	for (i = 0; i < 256; i++)	232	for (i = 0; i < 256; i++)
226	cfg->ch2escape[i] = 0; /* String error */	233	cfg->ch2escape[i] = 0; /* String error */
227
228	cfg->ch2escape['"'] = '"';	234	cfg->ch2escape['"'] = '"';
229	cfg->ch2escape['\\'] = '\\';	235	cfg->ch2escape['\\'] = '\\';
230	cfg->ch2escape['/'] = '/';	236	cfg->ch2escape['/'] = '/';
@@ -233,7 +239,7 @@ static void json_create_config(lua_State *l)
233	cfg->ch2escape['n'] = '\n';	239	cfg->ch2escape['n'] = '\n';
234	cfg->ch2escape['f'] = '\f';	240	cfg->ch2escape['f'] = '\f';
235	cfg->ch2escape['r'] = '\r';	241	cfg->ch2escape['r'] = '\r';
236	cfg->ch2escape['u'] = 'u'; /* This needs to be parsed as unicode */	242	cfg->ch2escape['u'] = 'u'; /* Unicode parsing required */
237		243
238	cfg->sparse_ratio = DEFAULT_SPARSE_RATIO;	244	cfg->sparse_ratio = DEFAULT_SPARSE_RATIO;
239	cfg->max_depth = DEFAULT_MAX_DEPTH;	245	cfg->max_depth = DEFAULT_MAX_DEPTH;
@@ -675,33 +681,53 @@ static void json_next_string_token(json_parse_t json, json_token_t token)
675	token->value.string = strbuf_string(json->tmp, &token->string_len);	681	token->value.string = strbuf_string(json->tmp, &token->string_len);
676	}	682	}
677		683
678	static void json_next_number_token(json_parse_t json, json_token_t token)	684	/* JSON numbers should take the following form:
		685	* -?(0\|[1-9]\|[1-9][0-9]+)(.[0-9]+)?([eE][-+]?[0-9]+)?
		686	*
		687	* json_next_number_token() uses strtod() which allows other forms:
		688	* - numbers starting with '+'
		689	* - NaN, -NaN, infinity, -infinity
		690	* - hexidecimal numbers
		691	*
		692	* json_is_invalid_number() detects "numbers" which may pass strtod()'s
		693	* error checking, but should not be allowed with strict JSON.
		694	*
		695	* json_is_invalid_number() may pass numbers which cause strtod()
		696	* to generate an error.
		697	*/
		698	static int json_is_invalid_number(json_parse_t *json)
679	{	699	{
680	const char *startptr;	700	int i = json->index;
681	char *endptr;	701	char ch;
682	int i;	702
		703	/* Reject numbers starting with + */
		704	if (json->data[i] == '+')
		705	return 1;
683		706
684	/* JSON numbers should take the following form:
685	* -?(0\|[1-9]\|[1-9][0-9]+)(.[0-9]+)?([eE][-+]?[0-9]+)?
686	*
687	* strtod() below allows other forms:
688	* - numbers starting with '+'
689	* - infinity, NaN
690	* - hexidecimal numbers
691	*
692	* Infinity/NaN and numbers starting with '+' can't occur due to
693	* earlier parser error checking.
694	*
695	* Generate an error if a hexidecimal number has been
696	* provided ("0x" or "0X").
697	*/
698	i = json->index;
699	if (json->data[i] == '-')	707	if (json->data[i] == '-')
700	i++;	708	i++;
701	if (json->data[i] == '0' && (json->data[i + 1] \| 0x20) == 'x') {	709
702	json_set_token_error(token, json, "invalid number (hexidecimal)");	710	/* Reject numbers starting with 0x, pass other numbers starting
703	return;	711	* with 0 */
704	}	712	if (json->data[i] == '0')
		713	return ((json->data[i + 1] \| 0x20) == 'x');
		714
		715	/* Reject inf/nan */
		716	ch = json->data[i] \| 0x20;
		717	if (ch == 'i' && !strncasecmp(&json->data[i], "inf", 3))
		718	return 1;
		719	if (ch == 'n' && !strncasecmp(&json->data[i], "nan", 3))
		720	return 1;
		721
		722	/* Pass all other numbers which may still be invalid, but
		723	* strtod() will catch them. */
		724	return 0;
		725	}
		726
		727	static void json_next_number_token(json_parse_t json, json_token_t token)
		728	{
		729	const char *startptr;
		730	char *endptr;
705		731
706	token->type = T_NUMBER;	732	token->type = T_NUMBER;
707	startptr = &json->data[json->index];	733	startptr = &json->data[json->index];
@@ -748,10 +774,18 @@ static void json_next_token(json_parse_t json, json_token_t token)
748	/* Process characters which triggered T_UNKNOWN */	774	/* Process characters which triggered T_UNKNOWN */
749	ch = json->data[json->index];	775	ch = json->data[json->index];
750		776
		777	/* Must use strncmp() to match the front of the JSON string
		778	* JSON identifier must be lowercase.
		779	* When strict_numbers if disabled, either case is allowed for
		780	* Infinity/NaN (since we are no longer following the spec..) */
751	if (ch == '"') {	781	if (ch == '"') {
752	json_next_string_token(json, token);	782	json_next_string_token(json, token);
753	return;	783	return;
754	} else if (ch == '-' \|\| ('0' <= ch && ch <= '9')) {	784	} else if (ch == '-' \|\| ('0' <= ch && ch <= '9')) {
		785	if (json->cfg->strict_numbers && json_is_invalid_number(json)) {
		786	json_set_token_error(token, json, "invalid number");
		787	return;
		788	}
755	json_next_number_token(json, token);	789	json_next_number_token(json, token);
756	return;	790	return;
757	} else if (!strncmp(&json->data[json->index], "true", 4)) {	791	} else if (!strncmp(&json->data[json->index], "true", 4)) {
@@ -768,6 +802,14 @@ static void json_next_token(json_parse_t json, json_token_t token)
768	token->type = T_NULL;	802	token->type = T_NULL;
769	json->index += 4;	803	json->index += 4;
770	return;	804	return;
		805	} else if (!json->cfg->strict_numbers && json_is_invalid_number(json)) {
		806	/* When strict_numbers is disabled, only attempt to process
		807	* numbers we know are invalid JSON (Inf, NaN, hex)
		808	* This is required to generate an appropriate token error,
		809	* otherwise all bad tokens will register as "invalid number"
		810	*/
		811	json_next_number_token(json, token);
		812	return;
771	}	813	}
772		814
773	/* Token starts with t/f/n but isn't recognised above. */	815	/* Token starts with t/f/n but isn't recognised above. */