diff options
author | Mark Pulford <mark@kyne.com.au> | 2011-05-01 18:19:42 +0930 |
---|---|---|
committer | Mark Pulford <mark@kyne.com.au> | 2011-05-01 18:19:42 +0930 |
commit | 024dd94968e60fa3177c869a0c200d116f78f924 (patch) | |
tree | 5b10db3833a5b16e94888da581b94c839d431279 /lua_cjson.c | |
parent | 439e03c6f9296ade78985a8d0b5c892846b6b06a (diff) | |
download | lua-cjson-024dd94968e60fa3177c869a0c200d116f78f924.tar.gz lua-cjson-024dd94968e60fa3177c869a0c200d116f78f924.tar.bz2 lua-cjson-024dd94968e60fa3177c869a0c200d116f78f924.zip |
Support optionally parsing Inf/NaN/Hex numbers
Change strict_numbers to control whether json.decode will parse an
expanded set of numbers (Hex, Inf, NaN).
Diffstat (limited to 'lua_cjson.c')
-rw-r--r-- | lua_cjson.c | 104 |
1 files changed, 73 insertions, 31 deletions
diff --git a/lua_cjson.c b/lua_cjson.c index 379ea23..abf2bfc 100644 --- a/lua_cjson.c +++ b/lua_cjson.c | |||
@@ -29,7 +29,6 @@ | |||
29 | * JSON. Most unprintable characters are not escaped. | 29 | * JSON. Most unprintable characters are not escaped. |
30 | * - Invalid UTF-8 characters are not detected and will be passed | 30 | * - Invalid UTF-8 characters are not detected and will be passed |
31 | * untouched. | 31 | * untouched. |
32 | * - Cannot parse NaN/Inf numbers when strict_numbers has been disabled. | ||
33 | * - Javascript comments are not part of the JSON spec, and are not | 32 | * - Javascript comments are not part of the JSON spec, and are not |
34 | * supported. | 33 | * supported. |
35 | * | 34 | * |
@@ -175,6 +174,9 @@ static int json_max_depth(lua_State *l) | |||
175 | return 1; | 174 | return 1; |
176 | } | 175 | } |
177 | 176 | ||
177 | /* When disabled, supports: | ||
178 | * - encoding/decoding NaN/Infinity. | ||
179 | * - decoding hexidecimal numbers. */ | ||
178 | static int json_strict_numbers(lua_State *l) | 180 | static int json_strict_numbers(lua_State *l) |
179 | { | 181 | { |
180 | json_config_t *cfg; | 182 | json_config_t *cfg; |
@@ -214,17 +216,21 @@ static void json_create_config(lua_State *l) | |||
214 | cfg->ch2token['\r'] = T_WHITESPACE; | 216 | cfg->ch2token['\r'] = T_WHITESPACE; |
215 | 217 | ||
216 | /* Update characters that require further processing */ | 218 | /* Update characters that require further processing */ |
217 | cfg->ch2token['n'] = T_UNKNOWN; | 219 | cfg->ch2token['f'] = T_UNKNOWN; /* false? */ |
218 | cfg->ch2token['t'] = T_UNKNOWN; | 220 | cfg->ch2token['i'] = T_UNKNOWN; /* inf, ininity? */ |
219 | cfg->ch2token['f'] = T_UNKNOWN; | 221 | cfg->ch2token['I'] = T_UNKNOWN; |
220 | cfg->ch2token['"'] = T_UNKNOWN; | 222 | cfg->ch2token['n'] = T_UNKNOWN; /* null, nan? */ |
223 | cfg->ch2token['N'] = T_UNKNOWN; | ||
224 | cfg->ch2token['t'] = T_UNKNOWN; /* true? */ | ||
225 | cfg->ch2token['"'] = T_UNKNOWN; /* string? */ | ||
226 | cfg->ch2token['+'] = T_UNKNOWN; /* number? */ | ||
221 | cfg->ch2token['-'] = T_UNKNOWN; | 227 | cfg->ch2token['-'] = T_UNKNOWN; |
222 | for (i = 0; i < 10; i++) | 228 | for (i = 0; i < 10; i++) |
223 | cfg->ch2token['0' + i] = T_UNKNOWN; | 229 | cfg->ch2token['0' + i] = T_UNKNOWN; |
224 | 230 | ||
231 | /* Lookup table for parsing escape characters */ | ||
225 | for (i = 0; i < 256; i++) | 232 | for (i = 0; i < 256; i++) |
226 | cfg->ch2escape[i] = 0; /* String error */ | 233 | cfg->ch2escape[i] = 0; /* String error */ |
227 | |||
228 | cfg->ch2escape['"'] = '"'; | 234 | cfg->ch2escape['"'] = '"'; |
229 | cfg->ch2escape['\\'] = '\\'; | 235 | cfg->ch2escape['\\'] = '\\'; |
230 | cfg->ch2escape['/'] = '/'; | 236 | cfg->ch2escape['/'] = '/'; |
@@ -233,7 +239,7 @@ static void json_create_config(lua_State *l) | |||
233 | cfg->ch2escape['n'] = '\n'; | 239 | cfg->ch2escape['n'] = '\n'; |
234 | cfg->ch2escape['f'] = '\f'; | 240 | cfg->ch2escape['f'] = '\f'; |
235 | cfg->ch2escape['r'] = '\r'; | 241 | cfg->ch2escape['r'] = '\r'; |
236 | cfg->ch2escape['u'] = 'u'; /* This needs to be parsed as unicode */ | 242 | cfg->ch2escape['u'] = 'u'; /* Unicode parsing required */ |
237 | 243 | ||
238 | cfg->sparse_ratio = DEFAULT_SPARSE_RATIO; | 244 | cfg->sparse_ratio = DEFAULT_SPARSE_RATIO; |
239 | cfg->max_depth = DEFAULT_MAX_DEPTH; | 245 | cfg->max_depth = DEFAULT_MAX_DEPTH; |
@@ -675,33 +681,53 @@ static void json_next_string_token(json_parse_t *json, json_token_t *token) | |||
675 | token->value.string = strbuf_string(json->tmp, &token->string_len); | 681 | token->value.string = strbuf_string(json->tmp, &token->string_len); |
676 | } | 682 | } |
677 | 683 | ||
678 | static void json_next_number_token(json_parse_t *json, json_token_t *token) | 684 | /* JSON numbers should take the following form: |
685 | * -?(0|[1-9]|[1-9][0-9]+)(.[0-9]+)?([eE][-+]?[0-9]+)? | ||
686 | * | ||
687 | * json_next_number_token() uses strtod() which allows other forms: | ||
688 | * - numbers starting with '+' | ||
689 | * - NaN, -NaN, infinity, -infinity | ||
690 | * - hexidecimal numbers | ||
691 | * | ||
692 | * json_is_invalid_number() detects "numbers" which may pass strtod()'s | ||
693 | * error checking, but should not be allowed with strict JSON. | ||
694 | * | ||
695 | * json_is_invalid_number() may pass numbers which cause strtod() | ||
696 | * to generate an error. | ||
697 | */ | ||
698 | static int json_is_invalid_number(json_parse_t *json) | ||
679 | { | 699 | { |
680 | const char *startptr; | 700 | int i = json->index; |
681 | char *endptr; | 701 | char ch; |
682 | int i; | 702 | |
703 | /* Reject numbers starting with + */ | ||
704 | if (json->data[i] == '+') | ||
705 | return 1; | ||
683 | 706 | ||
684 | /* JSON numbers should take the following form: | ||
685 | * -?(0|[1-9]|[1-9][0-9]+)(.[0-9]+)?([eE][-+]?[0-9]+)? | ||
686 | * | ||
687 | * strtod() below allows other forms: | ||
688 | * - numbers starting with '+' | ||
689 | * - infinity, NaN | ||
690 | * - hexidecimal numbers | ||
691 | * | ||
692 | * Infinity/NaN and numbers starting with '+' can't occur due to | ||
693 | * earlier parser error checking. | ||
694 | * | ||
695 | * Generate an error if a hexidecimal number has been | ||
696 | * provided ("0x" or "0X"). | ||
697 | */ | ||
698 | i = json->index; | ||
699 | if (json->data[i] == '-') | 707 | if (json->data[i] == '-') |
700 | i++; | 708 | i++; |
701 | if (json->data[i] == '0' && (json->data[i + 1] | 0x20) == 'x') { | 709 | |
702 | json_set_token_error(token, json, "invalid number (hexidecimal)"); | 710 | /* Reject numbers starting with 0x, pass other numbers starting |
703 | return; | 711 | * with 0 */ |
704 | } | 712 | if (json->data[i] == '0') |
713 | return ((json->data[i + 1] | 0x20) == 'x'); | ||
714 | |||
715 | /* Reject inf/nan */ | ||
716 | ch = json->data[i] | 0x20; | ||
717 | if (ch == 'i' && !strncasecmp(&json->data[i], "inf", 3)) | ||
718 | return 1; | ||
719 | if (ch == 'n' && !strncasecmp(&json->data[i], "nan", 3)) | ||
720 | return 1; | ||
721 | |||
722 | /* Pass all other numbers which may still be invalid, but | ||
723 | * strtod() will catch them. */ | ||
724 | return 0; | ||
725 | } | ||
726 | |||
727 | static void json_next_number_token(json_parse_t *json, json_token_t *token) | ||
728 | { | ||
729 | const char *startptr; | ||
730 | char *endptr; | ||
705 | 731 | ||
706 | token->type = T_NUMBER; | 732 | token->type = T_NUMBER; |
707 | startptr = &json->data[json->index]; | 733 | startptr = &json->data[json->index]; |
@@ -748,10 +774,18 @@ static void json_next_token(json_parse_t *json, json_token_t *token) | |||
748 | /* Process characters which triggered T_UNKNOWN */ | 774 | /* Process characters which triggered T_UNKNOWN */ |
749 | ch = json->data[json->index]; | 775 | ch = json->data[json->index]; |
750 | 776 | ||
777 | /* Must use strncmp() to match the front of the JSON string | ||
778 | * JSON identifier must be lowercase. | ||
779 | * When strict_numbers if disabled, either case is allowed for | ||
780 | * Infinity/NaN (since we are no longer following the spec..) */ | ||
751 | if (ch == '"') { | 781 | if (ch == '"') { |
752 | json_next_string_token(json, token); | 782 | json_next_string_token(json, token); |
753 | return; | 783 | return; |
754 | } else if (ch == '-' || ('0' <= ch && ch <= '9')) { | 784 | } else if (ch == '-' || ('0' <= ch && ch <= '9')) { |
785 | if (json->cfg->strict_numbers && json_is_invalid_number(json)) { | ||
786 | json_set_token_error(token, json, "invalid number"); | ||
787 | return; | ||
788 | } | ||
755 | json_next_number_token(json, token); | 789 | json_next_number_token(json, token); |
756 | return; | 790 | return; |
757 | } else if (!strncmp(&json->data[json->index], "true", 4)) { | 791 | } else if (!strncmp(&json->data[json->index], "true", 4)) { |
@@ -768,6 +802,14 @@ static void json_next_token(json_parse_t *json, json_token_t *token) | |||
768 | token->type = T_NULL; | 802 | token->type = T_NULL; |
769 | json->index += 4; | 803 | json->index += 4; |
770 | return; | 804 | return; |
805 | } else if (!json->cfg->strict_numbers && json_is_invalid_number(json)) { | ||
806 | /* When strict_numbers is disabled, only attempt to process | ||
807 | * numbers we know are invalid JSON (Inf, NaN, hex) | ||
808 | * This is required to generate an appropriate token error, | ||
809 | * otherwise all bad tokens will register as "invalid number" | ||
810 | */ | ||
811 | json_next_number_token(json, token); | ||
812 | return; | ||
771 | } | 813 | } |
772 | 814 | ||
773 | /* Token starts with t/f/n but isn't recognised above. */ | 815 | /* Token starts with t/f/n but isn't recognised above. */ |