diff options
| author | Mark Pulford <mark@kyne.com.au> | 2011-05-01 18:19:42 +0930 | 
|---|---|---|
| committer | Mark Pulford <mark@kyne.com.au> | 2011-05-01 18:19:42 +0930 | 
| commit | 024dd94968e60fa3177c869a0c200d116f78f924 (patch) | |
| tree | 5b10db3833a5b16e94888da581b94c839d431279 | |
| parent | 439e03c6f9296ade78985a8d0b5c892846b6b06a (diff) | |
| download | lua-cjson-024dd94968e60fa3177c869a0c200d116f78f924.tar.gz lua-cjson-024dd94968e60fa3177c869a0c200d116f78f924.tar.bz2 lua-cjson-024dd94968e60fa3177c869a0c200d116f78f924.zip  | |
Support optionally parsing Inf/NaN/Hex numbers
Change strict_numbers to control whether json.decode will parse an
expanded set of numbers (Hex, Inf, NaN).
Diffstat (limited to '')
| -rw-r--r-- | lua_cjson.c | 104 | 
1 files changed, 73 insertions, 31 deletions
diff --git a/lua_cjson.c b/lua_cjson.c index 379ea23..abf2bfc 100644 --- a/lua_cjson.c +++ b/lua_cjson.c  | |||
| @@ -29,7 +29,6 @@ | |||
| 29 | * JSON. Most unprintable characters are not escaped. | 29 | * JSON. Most unprintable characters are not escaped. | 
| 30 | * - Invalid UTF-8 characters are not detected and will be passed | 30 | * - Invalid UTF-8 characters are not detected and will be passed | 
| 31 | * untouched. | 31 | * untouched. | 
| 32 | * - Cannot parse NaN/Inf numbers when strict_numbers has been disabled. | ||
| 33 | * - Javascript comments are not part of the JSON spec, and are not | 32 | * - Javascript comments are not part of the JSON spec, and are not | 
| 34 | * supported. | 33 | * supported. | 
| 35 | * | 34 | * | 
| @@ -175,6 +174,9 @@ static int json_max_depth(lua_State *l) | |||
| 175 | return 1; | 174 | return 1; | 
| 176 | } | 175 | } | 
| 177 | 176 | ||
| 177 | /* When disabled, supports: | ||
| 178 | * - encoding/decoding NaN/Infinity. | ||
| 179 | * - decoding hexidecimal numbers. */ | ||
| 178 | static int json_strict_numbers(lua_State *l) | 180 | static int json_strict_numbers(lua_State *l) | 
| 179 | { | 181 | { | 
| 180 | json_config_t *cfg; | 182 | json_config_t *cfg; | 
| @@ -214,17 +216,21 @@ static void json_create_config(lua_State *l) | |||
| 214 | cfg->ch2token['\r'] = T_WHITESPACE; | 216 | cfg->ch2token['\r'] = T_WHITESPACE; | 
| 215 | 217 | ||
| 216 | /* Update characters that require further processing */ | 218 | /* Update characters that require further processing */ | 
| 217 | cfg->ch2token['n'] = T_UNKNOWN; | 219 | cfg->ch2token['f'] = T_UNKNOWN; /* false? */ | 
| 218 | cfg->ch2token['t'] = T_UNKNOWN; | 220 | cfg->ch2token['i'] = T_UNKNOWN; /* inf, ininity? */ | 
| 219 | cfg->ch2token['f'] = T_UNKNOWN; | 221 | cfg->ch2token['I'] = T_UNKNOWN; | 
| 220 | cfg->ch2token['"'] = T_UNKNOWN; | 222 | cfg->ch2token['n'] = T_UNKNOWN; /* null, nan? */ | 
| 223 | cfg->ch2token['N'] = T_UNKNOWN; | ||
| 224 | cfg->ch2token['t'] = T_UNKNOWN; /* true? */ | ||
| 225 | cfg->ch2token['"'] = T_UNKNOWN; /* string? */ | ||
| 226 | cfg->ch2token['+'] = T_UNKNOWN; /* number? */ | ||
| 221 | cfg->ch2token['-'] = T_UNKNOWN; | 227 | cfg->ch2token['-'] = T_UNKNOWN; | 
| 222 | for (i = 0; i < 10; i++) | 228 | for (i = 0; i < 10; i++) | 
| 223 | cfg->ch2token['0' + i] = T_UNKNOWN; | 229 | cfg->ch2token['0' + i] = T_UNKNOWN; | 
| 224 | 230 | ||
| 231 | /* Lookup table for parsing escape characters */ | ||
| 225 | for (i = 0; i < 256; i++) | 232 | for (i = 0; i < 256; i++) | 
| 226 | cfg->ch2escape[i] = 0; /* String error */ | 233 | cfg->ch2escape[i] = 0; /* String error */ | 
| 227 | |||
| 228 | cfg->ch2escape['"'] = '"'; | 234 | cfg->ch2escape['"'] = '"'; | 
| 229 | cfg->ch2escape['\\'] = '\\'; | 235 | cfg->ch2escape['\\'] = '\\'; | 
| 230 | cfg->ch2escape['/'] = '/'; | 236 | cfg->ch2escape['/'] = '/'; | 
| @@ -233,7 +239,7 @@ static void json_create_config(lua_State *l) | |||
| 233 | cfg->ch2escape['n'] = '\n'; | 239 | cfg->ch2escape['n'] = '\n'; | 
| 234 | cfg->ch2escape['f'] = '\f'; | 240 | cfg->ch2escape['f'] = '\f'; | 
| 235 | cfg->ch2escape['r'] = '\r'; | 241 | cfg->ch2escape['r'] = '\r'; | 
| 236 | cfg->ch2escape['u'] = 'u'; /* This needs to be parsed as unicode */ | 242 | cfg->ch2escape['u'] = 'u'; /* Unicode parsing required */ | 
| 237 | 243 | ||
| 238 | cfg->sparse_ratio = DEFAULT_SPARSE_RATIO; | 244 | cfg->sparse_ratio = DEFAULT_SPARSE_RATIO; | 
| 239 | cfg->max_depth = DEFAULT_MAX_DEPTH; | 245 | cfg->max_depth = DEFAULT_MAX_DEPTH; | 
| @@ -675,33 +681,53 @@ static void json_next_string_token(json_parse_t *json, json_token_t *token) | |||
| 675 | token->value.string = strbuf_string(json->tmp, &token->string_len); | 681 | token->value.string = strbuf_string(json->tmp, &token->string_len); | 
| 676 | } | 682 | } | 
| 677 | 683 | ||
| 678 | static void json_next_number_token(json_parse_t *json, json_token_t *token) | 684 | /* JSON numbers should take the following form: | 
| 685 | * -?(0|[1-9]|[1-9][0-9]+)(.[0-9]+)?([eE][-+]?[0-9]+)? | ||
| 686 | * | ||
| 687 | * json_next_number_token() uses strtod() which allows other forms: | ||
| 688 | * - numbers starting with '+' | ||
| 689 | * - NaN, -NaN, infinity, -infinity | ||
| 690 | * - hexidecimal numbers | ||
| 691 | * | ||
| 692 | * json_is_invalid_number() detects "numbers" which may pass strtod()'s | ||
| 693 | * error checking, but should not be allowed with strict JSON. | ||
| 694 | * | ||
| 695 | * json_is_invalid_number() may pass numbers which cause strtod() | ||
| 696 | * to generate an error. | ||
| 697 | */ | ||
| 698 | static int json_is_invalid_number(json_parse_t *json) | ||
| 679 | { | 699 | { | 
| 680 | const char *startptr; | 700 | int i = json->index; | 
| 681 | char *endptr; | 701 | char ch; | 
| 682 | int i; | 702 | |
| 703 | /* Reject numbers starting with + */ | ||
| 704 | if (json->data[i] == '+') | ||
| 705 | return 1; | ||
| 683 | 706 | ||
| 684 | /* JSON numbers should take the following form: | ||
| 685 | * -?(0|[1-9]|[1-9][0-9]+)(.[0-9]+)?([eE][-+]?[0-9]+)? | ||
| 686 | * | ||
| 687 | * strtod() below allows other forms: | ||
| 688 | * - numbers starting with '+' | ||
| 689 | * - infinity, NaN | ||
| 690 | * - hexidecimal numbers | ||
| 691 | * | ||
| 692 | * Infinity/NaN and numbers starting with '+' can't occur due to | ||
| 693 | * earlier parser error checking. | ||
| 694 | * | ||
| 695 | * Generate an error if a hexidecimal number has been | ||
| 696 | * provided ("0x" or "0X"). | ||
| 697 | */ | ||
| 698 | i = json->index; | ||
| 699 | if (json->data[i] == '-') | 707 | if (json->data[i] == '-') | 
| 700 | i++; | 708 | i++; | 
| 701 | if (json->data[i] == '0' && (json->data[i + 1] | 0x20) == 'x') { | 709 | |
| 702 | json_set_token_error(token, json, "invalid number (hexidecimal)"); | 710 | /* Reject numbers starting with 0x, pass other numbers starting | 
| 703 | return; | 711 | * with 0 */ | 
| 704 | } | 712 | if (json->data[i] == '0') | 
| 713 | return ((json->data[i + 1] | 0x20) == 'x'); | ||
| 714 | |||
| 715 | /* Reject inf/nan */ | ||
| 716 | ch = json->data[i] | 0x20; | ||
| 717 | if (ch == 'i' && !strncasecmp(&json->data[i], "inf", 3)) | ||
| 718 | return 1; | ||
| 719 | if (ch == 'n' && !strncasecmp(&json->data[i], "nan", 3)) | ||
| 720 | return 1; | ||
| 721 | |||
| 722 | /* Pass all other numbers which may still be invalid, but | ||
| 723 | * strtod() will catch them. */ | ||
| 724 | return 0; | ||
| 725 | } | ||
| 726 | |||
| 727 | static void json_next_number_token(json_parse_t *json, json_token_t *token) | ||
| 728 | { | ||
| 729 | const char *startptr; | ||
| 730 | char *endptr; | ||
| 705 | 731 | ||
| 706 | token->type = T_NUMBER; | 732 | token->type = T_NUMBER; | 
| 707 | startptr = &json->data[json->index]; | 733 | startptr = &json->data[json->index]; | 
| @@ -748,10 +774,18 @@ static void json_next_token(json_parse_t *json, json_token_t *token) | |||
| 748 | /* Process characters which triggered T_UNKNOWN */ | 774 | /* Process characters which triggered T_UNKNOWN */ | 
| 749 | ch = json->data[json->index]; | 775 | ch = json->data[json->index]; | 
| 750 | 776 | ||
| 777 | /* Must use strncmp() to match the front of the JSON string | ||
| 778 | * JSON identifier must be lowercase. | ||
| 779 | * When strict_numbers if disabled, either case is allowed for | ||
| 780 | * Infinity/NaN (since we are no longer following the spec..) */ | ||
| 751 | if (ch == '"') { | 781 | if (ch == '"') { | 
| 752 | json_next_string_token(json, token); | 782 | json_next_string_token(json, token); | 
| 753 | return; | 783 | return; | 
| 754 | } else if (ch == '-' || ('0' <= ch && ch <= '9')) { | 784 | } else if (ch == '-' || ('0' <= ch && ch <= '9')) { | 
| 785 | if (json->cfg->strict_numbers && json_is_invalid_number(json)) { | ||
| 786 | json_set_token_error(token, json, "invalid number"); | ||
| 787 | return; | ||
| 788 | } | ||
| 755 | json_next_number_token(json, token); | 789 | json_next_number_token(json, token); | 
| 756 | return; | 790 | return; | 
| 757 | } else if (!strncmp(&json->data[json->index], "true", 4)) { | 791 | } else if (!strncmp(&json->data[json->index], "true", 4)) { | 
| @@ -768,6 +802,14 @@ static void json_next_token(json_parse_t *json, json_token_t *token) | |||
| 768 | token->type = T_NULL; | 802 | token->type = T_NULL; | 
| 769 | json->index += 4; | 803 | json->index += 4; | 
| 770 | return; | 804 | return; | 
| 805 | } else if (!json->cfg->strict_numbers && json_is_invalid_number(json)) { | ||
| 806 | /* When strict_numbers is disabled, only attempt to process | ||
| 807 | * numbers we know are invalid JSON (Inf, NaN, hex) | ||
| 808 | * This is required to generate an appropriate token error, | ||
| 809 | * otherwise all bad tokens will register as "invalid number" | ||
| 810 | */ | ||
| 811 | json_next_number_token(json, token); | ||
| 812 | return; | ||
| 771 | } | 813 | } | 
| 772 | 814 | ||
| 773 | /* Token starts with t/f/n but isn't recognised above. */ | 815 | /* Token starts with t/f/n but isn't recognised above. */ | 
