diff options
| author | Mark Pulford <mark@kyne.com.au> | 2012-01-08 13:28:58 +1030 |
|---|---|---|
| committer | Mark Pulford <mark@kyne.com.au> | 2012-03-04 18:54:34 +1030 |
| commit | e8163b395942b0e5ec0c4aad07b66472d195e61a (patch) | |
| tree | 06ce595403fb25f8c92d3912d8c1d9f06fe05a25 | |
| parent | c7fbb8e441b6a62e0d6d016add8ed6b44d90d981 (diff) | |
| download | lua-cjson-e8163b395942b0e5ec0c4aad07b66472d195e61a.tar.gz lua-cjson-e8163b395942b0e5ec0c4aad07b66472d195e61a.tar.bz2 lua-cjson-e8163b395942b0e5ec0c4aad07b66472d195e61a.zip | |
Improve performance by tracking decode ptr
Track pointer to the current location in the JSON string, instead of
an index to the string array. Improves decode performance 1-10%.
json_next_token():
- Clean up white space handling and leave "ch" containing the current
non-whitespace character.
| -rw-r--r-- | lua_cjson.c | 95 |
1 files changed, 49 insertions, 46 deletions
diff --git a/lua_cjson.c b/lua_cjson.c index 708e695..344cb43 100644 --- a/lua_cjson.c +++ b/lua_cjson.c | |||
| @@ -127,7 +127,7 @@ typedef struct { | |||
| 127 | 127 | ||
| 128 | typedef struct { | 128 | typedef struct { |
| 129 | const char *data; | 129 | const char *data; |
| 130 | int index; | 130 | const char *ptr; |
| 131 | strbuf_t *tmp; /* Temporary storage for strings */ | 131 | strbuf_t *tmp; /* Temporary storage for strings */ |
| 132 | json_config_t *cfg; | 132 | json_config_t *cfg; |
| 133 | } json_parse_t; | 133 | } json_parse_t; |
| @@ -806,7 +806,7 @@ static int json_append_unicode_escape(json_parse_t *json) | |||
| 806 | int escape_len = 6; | 806 | int escape_len = 6; |
| 807 | 807 | ||
| 808 | /* Fetch UTF-16 code unit */ | 808 | /* Fetch UTF-16 code unit */ |
| 809 | codepoint = decode_hex4(&json->data[json->index + 2]); | 809 | codepoint = decode_hex4(json->ptr + 2); |
| 810 | if (codepoint < 0) | 810 | if (codepoint < 0) |
| 811 | return -1; | 811 | return -1; |
| 812 | 812 | ||
| @@ -822,13 +822,13 @@ static int json_append_unicode_escape(json_parse_t *json) | |||
| 822 | return -1; | 822 | return -1; |
| 823 | 823 | ||
| 824 | /* Ensure the next code is a unicode escape */ | 824 | /* Ensure the next code is a unicode escape */ |
| 825 | if (json->data[json->index + escape_len] != '\\' || | 825 | if (*(json->ptr + escape_len) != '\\' || |
| 826 | json->data[json->index + escape_len + 1] != 'u') { | 826 | *(json->ptr + escape_len + 1) != 'u') { |
| 827 | return -1; | 827 | return -1; |
| 828 | } | 828 | } |
| 829 | 829 | ||
| 830 | /* Fetch the next codepoint */ | 830 | /* Fetch the next codepoint */ |
| 831 | surrogate_low = decode_hex4(&json->data[json->index + 2 + escape_len]); | 831 | surrogate_low = decode_hex4(json->ptr + 2 + escape_len); |
| 832 | if (surrogate_low < 0) | 832 | if (surrogate_low < 0) |
| 833 | return -1; | 833 | return -1; |
| 834 | 834 | ||
| @@ -850,7 +850,7 @@ static int json_append_unicode_escape(json_parse_t *json) | |||
| 850 | 850 | ||
| 851 | /* Append bytes and advance parse index */ | 851 | /* Append bytes and advance parse index */ |
| 852 | strbuf_append_mem_unsafe(json->tmp, utf8, len); | 852 | strbuf_append_mem_unsafe(json->tmp, utf8, len); |
| 853 | json->index += escape_len; | 853 | json->ptr += escape_len; |
| 854 | 854 | ||
| 855 | return 0; | 855 | return 0; |
| 856 | } | 856 | } |
| @@ -859,7 +859,7 @@ static void json_set_token_error(json_token_t *token, json_parse_t *json, | |||
| 859 | const char *errtype) | 859 | const char *errtype) |
| 860 | { | 860 | { |
| 861 | token->type = T_ERROR; | 861 | token->type = T_ERROR; |
| 862 | token->index = json->index; | 862 | token->index = json->ptr - json->data; |
| 863 | token->value.string = errtype; | 863 | token->value.string = errtype; |
| 864 | } | 864 | } |
| 865 | 865 | ||
| @@ -869,10 +869,10 @@ static void json_next_string_token(json_parse_t *json, json_token_t *token) | |||
| 869 | char ch; | 869 | char ch; |
| 870 | 870 | ||
| 871 | /* Caller must ensure a string is next */ | 871 | /* Caller must ensure a string is next */ |
| 872 | assert(json->data[json->index] == '"'); | 872 | assert(*json->ptr == '"'); |
| 873 | 873 | ||
| 874 | /* Skip " */ | 874 | /* Skip " */ |
| 875 | json->index++; | 875 | json->ptr++; |
| 876 | 876 | ||
| 877 | /* json->tmp is the temporary strbuf used to accumulate the | 877 | /* json->tmp is the temporary strbuf used to accumulate the |
| 878 | * decoded string value. | 878 | * decoded string value. |
| @@ -880,7 +880,7 @@ static void json_next_string_token(json_parse_t *json, json_token_t *token) | |||
| 880 | */ | 880 | */ |
| 881 | strbuf_reset(json->tmp); | 881 | strbuf_reset(json->tmp); |
| 882 | 882 | ||
| 883 | while ((ch = json->data[json->index]) != '"') { | 883 | while ((ch = *json->ptr) != '"') { |
| 884 | if (!ch) { | 884 | if (!ch) { |
| 885 | /* Premature end of the string */ | 885 | /* Premature end of the string */ |
| 886 | json_set_token_error(token, json, "unexpected end of string"); | 886 | json_set_token_error(token, json, "unexpected end of string"); |
| @@ -890,7 +890,7 @@ static void json_next_string_token(json_parse_t *json, json_token_t *token) | |||
| 890 | /* Handle escapes */ | 890 | /* Handle escapes */ |
| 891 | if (ch == '\\') { | 891 | if (ch == '\\') { |
| 892 | /* Fetch escape character */ | 892 | /* Fetch escape character */ |
| 893 | ch = json->data[json->index + 1]; | 893 | ch = *(json->ptr + 1); |
| 894 | 894 | ||
| 895 | /* Translate escape code and append to tmp string */ | 895 | /* Translate escape code and append to tmp string */ |
| 896 | ch = escape2char[(unsigned char)ch]; | 896 | ch = escape2char[(unsigned char)ch]; |
| @@ -908,14 +908,14 @@ static void json_next_string_token(json_parse_t *json, json_token_t *token) | |||
| 908 | } | 908 | } |
| 909 | 909 | ||
| 910 | /* Skip '\' */ | 910 | /* Skip '\' */ |
| 911 | json->index++; | 911 | json->ptr++; |
| 912 | } | 912 | } |
| 913 | /* Append normal character or translated single character | 913 | /* Append normal character or translated single character |
| 914 | * Unicode escapes are handled above */ | 914 | * Unicode escapes are handled above */ |
| 915 | strbuf_append_char_unsafe(json->tmp, ch); | 915 | strbuf_append_char_unsafe(json->tmp, ch); |
| 916 | json->index++; | 916 | json->ptr++; |
| 917 | } | 917 | } |
| 918 | json->index++; /* Eat final quote (") */ | 918 | json->ptr++; /* Eat final quote (") */ |
| 919 | 919 | ||
| 920 | strbuf_ensure_null(json->tmp); | 920 | strbuf_ensure_null(json->tmp); |
| 921 | 921 | ||
| @@ -940,33 +940,33 @@ static void json_next_string_token(json_parse_t *json, json_token_t *token) | |||
| 940 | */ | 940 | */ |
| 941 | static int json_is_invalid_number(json_parse_t *json) | 941 | static int json_is_invalid_number(json_parse_t *json) |
| 942 | { | 942 | { |
| 943 | int i = json->index; | 943 | const char *p = json->ptr; |
| 944 | 944 | ||
| 945 | /* Reject numbers starting with + */ | 945 | /* Reject numbers starting with + */ |
| 946 | if (json->data[i] == '+') | 946 | if (*p == '+') |
| 947 | return 1; | 947 | return 1; |
| 948 | 948 | ||
| 949 | /* Skip minus sign if it exists */ | 949 | /* Skip minus sign if it exists */ |
| 950 | if (json->data[i] == '-') | 950 | if (*p == '-') |
| 951 | i++; | 951 | p++; |
| 952 | 952 | ||
| 953 | /* Reject numbers starting with 0x, or leading zeros */ | 953 | /* Reject numbers starting with 0x, or leading zeros */ |
| 954 | if (json->data[i] == '0') { | 954 | if (*p == '0') { |
| 955 | int ch2 = json->data[i + 1]; | 955 | int ch2 = *(p + 1); |
| 956 | 956 | ||
| 957 | if ((ch2 | 0x20) == 'x' || /* Hex */ | 957 | if ((ch2 | 0x20) == 'x' || /* Hex */ |
| 958 | ('0' <= ch2 && ch2 <= '9')) /* Leading zero */ | 958 | ('0' <= ch2 && ch2 <= '9')) /* Leading zero */ |
| 959 | return 1; | 959 | return 1; |
| 960 | 960 | ||
| 961 | return 0; | 961 | return 0; |
| 962 | } else if (json->data[i] <= '9') { | 962 | } else if (*p <= '9') { |
| 963 | return 0; /* Ordinary number */ | 963 | return 0; /* Ordinary number */ |
| 964 | } | 964 | } |
| 965 | 965 | ||
| 966 | /* Reject inf/nan */ | 966 | /* Reject inf/nan */ |
| 967 | if (!strncasecmp(&json->data[i], "inf", 3)) | 967 | if (!strncasecmp(p, "inf", 3)) |
| 968 | return 1; | 968 | return 1; |
| 969 | if (!strncasecmp(&json->data[i], "nan", 3)) | 969 | if (!strncasecmp(p, "nan", 3)) |
| 970 | return 1; | 970 | return 1; |
| 971 | 971 | ||
| 972 | /* Pass all other numbers which may still be invalid, but | 972 | /* Pass all other numbers which may still be invalid, but |
| @@ -976,35 +976,39 @@ static int json_is_invalid_number(json_parse_t *json) | |||
| 976 | 976 | ||
| 977 | static void json_next_number_token(json_parse_t *json, json_token_t *token) | 977 | static void json_next_number_token(json_parse_t *json, json_token_t *token) |
| 978 | { | 978 | { |
| 979 | const char *startptr; | ||
| 980 | char *endptr; | 979 | char *endptr; |
| 981 | 980 | ||
| 982 | token->type = T_NUMBER; | 981 | token->type = T_NUMBER; |
| 983 | startptr = &json->data[json->index]; | 982 | token->value.number = fpconv_strtod(json->ptr, &endptr); |
| 984 | token->value.number = fpconv_strtod(&json->data[json->index], &endptr); | 983 | if (json->ptr == endptr) |
| 985 | if (startptr == endptr) | ||
| 986 | json_set_token_error(token, json, "invalid number"); | 984 | json_set_token_error(token, json, "invalid number"); |
| 987 | else | 985 | else |
| 988 | json->index += endptr - startptr; /* Skip the processed number */ | 986 | json->ptr = endptr; /* Skip the processed number */ |
| 989 | 987 | ||
| 990 | return; | 988 | return; |
| 991 | } | 989 | } |
| 992 | 990 | ||
| 993 | /* Fills in the token struct. | 991 | /* Fills in the token struct. |
| 994 | * T_STRING will return a pointer to the json_parse_t temporary string | 992 | * T_STRING will return a pointer to the json_parse_t temporary string |
| 995 | * T_ERROR will leave the json->index pointer at the error. | 993 | * T_ERROR will leave the json->ptr pointer at the error. |
| 996 | */ | 994 | */ |
| 997 | static void json_next_token(json_parse_t *json, json_token_t *token) | 995 | static void json_next_token(json_parse_t *json, json_token_t *token) |
| 998 | { | 996 | { |
| 999 | json_token_type_t *ch2token = json->cfg->ch2token; | 997 | json_token_type_t *ch2token = json->cfg->ch2token; |
| 1000 | int ch; | 998 | int ch; |
| 1001 | 999 | ||
| 1002 | /* Eat whitespace. FIXME: UGLY */ | 1000 | /* Eat whitespace. */ |
| 1003 | token->type = ch2token[(unsigned char)json->data[json->index]]; | 1001 | while (1) { |
| 1004 | while (token->type == T_WHITESPACE) | 1002 | ch = (unsigned char)*(json->ptr); |
| 1005 | token->type = ch2token[(unsigned char)json->data[++json->index]]; | 1003 | token->type = ch2token[ch]; |
| 1004 | if (token->type != T_WHITESPACE) | ||
| 1005 | break; | ||
| 1006 | json->ptr++; | ||
| 1007 | } | ||
| 1006 | 1008 | ||
| 1007 | token->index = json->index; | 1009 | /* Store location of new token. Required when throwing errors |
| 1010 | * for unexpected tokens (syntax errors). */ | ||
| 1011 | token->index = json->ptr - json->data; | ||
| 1008 | 1012 | ||
| 1009 | /* Don't advance the pointer for an error or the end */ | 1013 | /* Don't advance the pointer for an error or the end */ |
| 1010 | if (token->type == T_ERROR) { | 1014 | if (token->type == T_ERROR) { |
| @@ -1018,14 +1022,13 @@ static void json_next_token(json_parse_t *json, json_token_t *token) | |||
| 1018 | 1022 | ||
| 1019 | /* Found a known single character token, advance index and return */ | 1023 | /* Found a known single character token, advance index and return */ |
| 1020 | if (token->type != T_UNKNOWN) { | 1024 | if (token->type != T_UNKNOWN) { |
| 1021 | json->index++; | 1025 | json->ptr++; |
| 1022 | return; | 1026 | return; |
| 1023 | } | 1027 | } |
| 1024 | 1028 | ||
| 1025 | /* Process characters which triggered T_UNKNOWN */ | 1029 | /* Process characters which triggered T_UNKNOWN |
| 1026 | ch = json->data[json->index]; | 1030 | * |
| 1027 | 1031 | * Must use strncmp() to match the front of the JSON string. | |
| 1028 | /* Must use strncmp() to match the front of the JSON string. | ||
| 1029 | * JSON identifier must be lowercase. | 1032 | * JSON identifier must be lowercase. |
| 1030 | * When strict_numbers if disabled, either case is allowed for | 1033 | * When strict_numbers if disabled, either case is allowed for |
| 1031 | * Infinity/NaN (since we are no longer following the spec..) */ | 1034 | * Infinity/NaN (since we are no longer following the spec..) */ |
| @@ -1039,19 +1042,19 @@ static void json_next_token(json_parse_t *json, json_token_t *token) | |||
| 1039 | } | 1042 | } |
| 1040 | json_next_number_token(json, token); | 1043 | json_next_number_token(json, token); |
| 1041 | return; | 1044 | return; |
| 1042 | } else if (!strncmp(&json->data[json->index], "true", 4)) { | 1045 | } else if (!strncmp(json->ptr, "true", 4)) { |
| 1043 | token->type = T_BOOLEAN; | 1046 | token->type = T_BOOLEAN; |
| 1044 | token->value.boolean = 1; | 1047 | token->value.boolean = 1; |
| 1045 | json->index += 4; | 1048 | json->ptr += 4; |
| 1046 | return; | 1049 | return; |
| 1047 | } else if (!strncmp(&json->data[json->index], "false", 5)) { | 1050 | } else if (!strncmp(json->ptr, "false", 5)) { |
| 1048 | token->type = T_BOOLEAN; | 1051 | token->type = T_BOOLEAN; |
| 1049 | token->value.boolean = 0; | 1052 | token->value.boolean = 0; |
| 1050 | json->index += 5; | 1053 | json->ptr += 5; |
| 1051 | return; | 1054 | return; |
| 1052 | } else if (!strncmp(&json->data[json->index], "null", 4)) { | 1055 | } else if (!strncmp(json->ptr, "null", 4)) { |
| 1053 | token->type = T_NULL; | 1056 | token->type = T_NULL; |
| 1054 | json->index += 4; | 1057 | json->ptr += 4; |
| 1055 | return; | 1058 | return; |
| 1056 | } else if (!json->cfg->decode_refuse_badnum && | 1059 | } else if (!json->cfg->decode_refuse_badnum && |
| 1057 | json_is_invalid_number(json)) { | 1060 | json_is_invalid_number(json)) { |
| @@ -1219,7 +1222,7 @@ static void lua_json_decode(lua_State *l, const char *json_text, int json_len) | |||
| 1219 | 1222 | ||
| 1220 | json.cfg = json_fetch_config(l); | 1223 | json.cfg = json_fetch_config(l); |
| 1221 | json.data = json_text; | 1224 | json.data = json_text; |
| 1222 | json.index = 0; | 1225 | json.ptr = json.data; |
| 1223 | 1226 | ||
| 1224 | /* Ensure the temporary buffer can hold the entire string. | 1227 | /* Ensure the temporary buffer can hold the entire string. |
| 1225 | * This means we no longer need to do length checks since the decoded | 1228 | * This means we no longer need to do length checks since the decoded |
