aboutsummaryrefslogtreecommitdiff
path: root/lua_cjson.c
diff options
context:
space:
mode:
authorMark Pulford <mark@kyne.com.au>2011-05-01 18:19:42 +0930
committerMark Pulford <mark@kyne.com.au>2011-05-01 18:19:42 +0930
commit024dd94968e60fa3177c869a0c200d116f78f924 (patch)
tree5b10db3833a5b16e94888da581b94c839d431279 /lua_cjson.c
parent439e03c6f9296ade78985a8d0b5c892846b6b06a (diff)
downloadlua-cjson-024dd94968e60fa3177c869a0c200d116f78f924.tar.gz
lua-cjson-024dd94968e60fa3177c869a0c200d116f78f924.tar.bz2
lua-cjson-024dd94968e60fa3177c869a0c200d116f78f924.zip
Support optionally parsing Inf/NaN/Hex numbers
Change strict_numbers to control whether json.decode will parse an expanded set of numbers (Hex, Inf, NaN).
Diffstat (limited to 'lua_cjson.c')
-rw-r--r--lua_cjson.c104
1 files changed, 73 insertions, 31 deletions
diff --git a/lua_cjson.c b/lua_cjson.c
index 379ea23..abf2bfc 100644
--- a/lua_cjson.c
+++ b/lua_cjson.c
@@ -29,7 +29,6 @@
29 * JSON. Most unprintable characters are not escaped. 29 * JSON. Most unprintable characters are not escaped.
30 * - Invalid UTF-8 characters are not detected and will be passed 30 * - Invalid UTF-8 characters are not detected and will be passed
31 * untouched. 31 * untouched.
32 * - Cannot parse NaN/Inf numbers when strict_numbers has been disabled.
33 * - Javascript comments are not part of the JSON spec, and are not 32 * - Javascript comments are not part of the JSON spec, and are not
34 * supported. 33 * supported.
35 * 34 *
@@ -175,6 +174,9 @@ static int json_max_depth(lua_State *l)
175 return 1; 174 return 1;
176} 175}
177 176
177/* When disabled, supports:
178 * - encoding/decoding NaN/Infinity.
179 * - decoding hexidecimal numbers. */
178static int json_strict_numbers(lua_State *l) 180static int json_strict_numbers(lua_State *l)
179{ 181{
180 json_config_t *cfg; 182 json_config_t *cfg;
@@ -214,17 +216,21 @@ static void json_create_config(lua_State *l)
214 cfg->ch2token['\r'] = T_WHITESPACE; 216 cfg->ch2token['\r'] = T_WHITESPACE;
215 217
216 /* Update characters that require further processing */ 218 /* Update characters that require further processing */
217 cfg->ch2token['n'] = T_UNKNOWN; 219 cfg->ch2token['f'] = T_UNKNOWN; /* false? */
218 cfg->ch2token['t'] = T_UNKNOWN; 220 cfg->ch2token['i'] = T_UNKNOWN; /* inf, ininity? */
219 cfg->ch2token['f'] = T_UNKNOWN; 221 cfg->ch2token['I'] = T_UNKNOWN;
220 cfg->ch2token['"'] = T_UNKNOWN; 222 cfg->ch2token['n'] = T_UNKNOWN; /* null, nan? */
223 cfg->ch2token['N'] = T_UNKNOWN;
224 cfg->ch2token['t'] = T_UNKNOWN; /* true? */
225 cfg->ch2token['"'] = T_UNKNOWN; /* string? */
226 cfg->ch2token['+'] = T_UNKNOWN; /* number? */
221 cfg->ch2token['-'] = T_UNKNOWN; 227 cfg->ch2token['-'] = T_UNKNOWN;
222 for (i = 0; i < 10; i++) 228 for (i = 0; i < 10; i++)
223 cfg->ch2token['0' + i] = T_UNKNOWN; 229 cfg->ch2token['0' + i] = T_UNKNOWN;
224 230
231 /* Lookup table for parsing escape characters */
225 for (i = 0; i < 256; i++) 232 for (i = 0; i < 256; i++)
226 cfg->ch2escape[i] = 0; /* String error */ 233 cfg->ch2escape[i] = 0; /* String error */
227
228 cfg->ch2escape['"'] = '"'; 234 cfg->ch2escape['"'] = '"';
229 cfg->ch2escape['\\'] = '\\'; 235 cfg->ch2escape['\\'] = '\\';
230 cfg->ch2escape['/'] = '/'; 236 cfg->ch2escape['/'] = '/';
@@ -233,7 +239,7 @@ static void json_create_config(lua_State *l)
233 cfg->ch2escape['n'] = '\n'; 239 cfg->ch2escape['n'] = '\n';
234 cfg->ch2escape['f'] = '\f'; 240 cfg->ch2escape['f'] = '\f';
235 cfg->ch2escape['r'] = '\r'; 241 cfg->ch2escape['r'] = '\r';
236 cfg->ch2escape['u'] = 'u'; /* This needs to be parsed as unicode */ 242 cfg->ch2escape['u'] = 'u'; /* Unicode parsing required */
237 243
238 cfg->sparse_ratio = DEFAULT_SPARSE_RATIO; 244 cfg->sparse_ratio = DEFAULT_SPARSE_RATIO;
239 cfg->max_depth = DEFAULT_MAX_DEPTH; 245 cfg->max_depth = DEFAULT_MAX_DEPTH;
@@ -675,33 +681,53 @@ static void json_next_string_token(json_parse_t *json, json_token_t *token)
675 token->value.string = strbuf_string(json->tmp, &token->string_len); 681 token->value.string = strbuf_string(json->tmp, &token->string_len);
676} 682}
677 683
678static void json_next_number_token(json_parse_t *json, json_token_t *token) 684/* JSON numbers should take the following form:
685 * -?(0|[1-9]|[1-9][0-9]+)(.[0-9]+)?([eE][-+]?[0-9]+)?
686 *
687 * json_next_number_token() uses strtod() which allows other forms:
688 * - numbers starting with '+'
689 * - NaN, -NaN, infinity, -infinity
690 * - hexidecimal numbers
691 *
692 * json_is_invalid_number() detects "numbers" which may pass strtod()'s
693 * error checking, but should not be allowed with strict JSON.
694 *
695 * json_is_invalid_number() may pass numbers which cause strtod()
696 * to generate an error.
697 */
698static int json_is_invalid_number(json_parse_t *json)
679{ 699{
680 const char *startptr; 700 int i = json->index;
681 char *endptr; 701 char ch;
682 int i; 702
703 /* Reject numbers starting with + */
704 if (json->data[i] == '+')
705 return 1;
683 706
684 /* JSON numbers should take the following form:
685 * -?(0|[1-9]|[1-9][0-9]+)(.[0-9]+)?([eE][-+]?[0-9]+)?
686 *
687 * strtod() below allows other forms:
688 * - numbers starting with '+'
689 * - infinity, NaN
690 * - hexidecimal numbers
691 *
692 * Infinity/NaN and numbers starting with '+' can't occur due to
693 * earlier parser error checking.
694 *
695 * Generate an error if a hexidecimal number has been
696 * provided ("0x" or "0X").
697 */
698 i = json->index;
699 if (json->data[i] == '-') 707 if (json->data[i] == '-')
700 i++; 708 i++;
701 if (json->data[i] == '0' && (json->data[i + 1] | 0x20) == 'x') { 709
702 json_set_token_error(token, json, "invalid number (hexidecimal)"); 710 /* Reject numbers starting with 0x, pass other numbers starting
703 return; 711 * with 0 */
704 } 712 if (json->data[i] == '0')
713 return ((json->data[i + 1] | 0x20) == 'x');
714
715 /* Reject inf/nan */
716 ch = json->data[i] | 0x20;
717 if (ch == 'i' && !strncasecmp(&json->data[i], "inf", 3))
718 return 1;
719 if (ch == 'n' && !strncasecmp(&json->data[i], "nan", 3))
720 return 1;
721
722 /* Pass all other numbers which may still be invalid, but
723 * strtod() will catch them. */
724 return 0;
725}
726
727static void json_next_number_token(json_parse_t *json, json_token_t *token)
728{
729 const char *startptr;
730 char *endptr;
705 731
706 token->type = T_NUMBER; 732 token->type = T_NUMBER;
707 startptr = &json->data[json->index]; 733 startptr = &json->data[json->index];
@@ -748,10 +774,18 @@ static void json_next_token(json_parse_t *json, json_token_t *token)
748 /* Process characters which triggered T_UNKNOWN */ 774 /* Process characters which triggered T_UNKNOWN */
749 ch = json->data[json->index]; 775 ch = json->data[json->index];
750 776
777 /* Must use strncmp() to match the front of the JSON string
778 * JSON identifier must be lowercase.
779 * When strict_numbers if disabled, either case is allowed for
780 * Infinity/NaN (since we are no longer following the spec..) */
751 if (ch == '"') { 781 if (ch == '"') {
752 json_next_string_token(json, token); 782 json_next_string_token(json, token);
753 return; 783 return;
754 } else if (ch == '-' || ('0' <= ch && ch <= '9')) { 784 } else if (ch == '-' || ('0' <= ch && ch <= '9')) {
785 if (json->cfg->strict_numbers && json_is_invalid_number(json)) {
786 json_set_token_error(token, json, "invalid number");
787 return;
788 }
755 json_next_number_token(json, token); 789 json_next_number_token(json, token);
756 return; 790 return;
757 } else if (!strncmp(&json->data[json->index], "true", 4)) { 791 } else if (!strncmp(&json->data[json->index], "true", 4)) {
@@ -768,6 +802,14 @@ static void json_next_token(json_parse_t *json, json_token_t *token)
768 token->type = T_NULL; 802 token->type = T_NULL;
769 json->index += 4; 803 json->index += 4;
770 return; 804 return;
805 } else if (!json->cfg->strict_numbers && json_is_invalid_number(json)) {
806 /* When strict_numbers is disabled, only attempt to process
807 * numbers we know are invalid JSON (Inf, NaN, hex)
808 * This is required to generate an appropriate token error,
809 * otherwise all bad tokens will register as "invalid number"
810 */
811 json_next_number_token(json, token);
812 return;
771 } 813 }
772 814
773 /* Token starts with t/f/n but isn't recognised above. */ 815 /* Token starts with t/f/n but isn't recognised above. */