diff options
author | Mark Pulford <mark@kyne.com.au> | 2012-01-08 13:28:58 +1030 |
---|---|---|
committer | Mark Pulford <mark@kyne.com.au> | 2012-03-04 18:54:34 +1030 |
commit | e8163b395942b0e5ec0c4aad07b66472d195e61a (patch) | |
tree | 06ce595403fb25f8c92d3912d8c1d9f06fe05a25 /lua_cjson.c | |
parent | c7fbb8e441b6a62e0d6d016add8ed6b44d90d981 (diff) | |
download | lua-cjson-e8163b395942b0e5ec0c4aad07b66472d195e61a.tar.gz lua-cjson-e8163b395942b0e5ec0c4aad07b66472d195e61a.tar.bz2 lua-cjson-e8163b395942b0e5ec0c4aad07b66472d195e61a.zip |
Improve performance by tracking decode ptr
Track pointer to the current location in the JSON string, instead of
an index to the string array. Improves decode performance 1-10%.
json_next_token():
- Clean up white space handling and leave "ch" containing the current
non-whitespace character.
Diffstat (limited to 'lua_cjson.c')
-rw-r--r-- | lua_cjson.c | 95 |
1 files changed, 49 insertions, 46 deletions
diff --git a/lua_cjson.c b/lua_cjson.c index 708e695..344cb43 100644 --- a/lua_cjson.c +++ b/lua_cjson.c | |||
@@ -127,7 +127,7 @@ typedef struct { | |||
127 | 127 | ||
128 | typedef struct { | 128 | typedef struct { |
129 | const char *data; | 129 | const char *data; |
130 | int index; | 130 | const char *ptr; |
131 | strbuf_t *tmp; /* Temporary storage for strings */ | 131 | strbuf_t *tmp; /* Temporary storage for strings */ |
132 | json_config_t *cfg; | 132 | json_config_t *cfg; |
133 | } json_parse_t; | 133 | } json_parse_t; |
@@ -806,7 +806,7 @@ static int json_append_unicode_escape(json_parse_t *json) | |||
806 | int escape_len = 6; | 806 | int escape_len = 6; |
807 | 807 | ||
808 | /* Fetch UTF-16 code unit */ | 808 | /* Fetch UTF-16 code unit */ |
809 | codepoint = decode_hex4(&json->data[json->index + 2]); | 809 | codepoint = decode_hex4(json->ptr + 2); |
810 | if (codepoint < 0) | 810 | if (codepoint < 0) |
811 | return -1; | 811 | return -1; |
812 | 812 | ||
@@ -822,13 +822,13 @@ static int json_append_unicode_escape(json_parse_t *json) | |||
822 | return -1; | 822 | return -1; |
823 | 823 | ||
824 | /* Ensure the next code is a unicode escape */ | 824 | /* Ensure the next code is a unicode escape */ |
825 | if (json->data[json->index + escape_len] != '\\' || | 825 | if (*(json->ptr + escape_len) != '\\' || |
826 | json->data[json->index + escape_len + 1] != 'u') { | 826 | *(json->ptr + escape_len + 1) != 'u') { |
827 | return -1; | 827 | return -1; |
828 | } | 828 | } |
829 | 829 | ||
830 | /* Fetch the next codepoint */ | 830 | /* Fetch the next codepoint */ |
831 | surrogate_low = decode_hex4(&json->data[json->index + 2 + escape_len]); | 831 | surrogate_low = decode_hex4(json->ptr + 2 + escape_len); |
832 | if (surrogate_low < 0) | 832 | if (surrogate_low < 0) |
833 | return -1; | 833 | return -1; |
834 | 834 | ||
@@ -850,7 +850,7 @@ static int json_append_unicode_escape(json_parse_t *json) | |||
850 | 850 | ||
851 | /* Append bytes and advance parse index */ | 851 | /* Append bytes and advance parse index */ |
852 | strbuf_append_mem_unsafe(json->tmp, utf8, len); | 852 | strbuf_append_mem_unsafe(json->tmp, utf8, len); |
853 | json->index += escape_len; | 853 | json->ptr += escape_len; |
854 | 854 | ||
855 | return 0; | 855 | return 0; |
856 | } | 856 | } |
@@ -859,7 +859,7 @@ static void json_set_token_error(json_token_t *token, json_parse_t *json, | |||
859 | const char *errtype) | 859 | const char *errtype) |
860 | { | 860 | { |
861 | token->type = T_ERROR; | 861 | token->type = T_ERROR; |
862 | token->index = json->index; | 862 | token->index = json->ptr - json->data; |
863 | token->value.string = errtype; | 863 | token->value.string = errtype; |
864 | } | 864 | } |
865 | 865 | ||
@@ -869,10 +869,10 @@ static void json_next_string_token(json_parse_t *json, json_token_t *token) | |||
869 | char ch; | 869 | char ch; |
870 | 870 | ||
871 | /* Caller must ensure a string is next */ | 871 | /* Caller must ensure a string is next */ |
872 | assert(json->data[json->index] == '"'); | 872 | assert(*json->ptr == '"'); |
873 | 873 | ||
874 | /* Skip " */ | 874 | /* Skip " */ |
875 | json->index++; | 875 | json->ptr++; |
876 | 876 | ||
877 | /* json->tmp is the temporary strbuf used to accumulate the | 877 | /* json->tmp is the temporary strbuf used to accumulate the |
878 | * decoded string value. | 878 | * decoded string value. |
@@ -880,7 +880,7 @@ static void json_next_string_token(json_parse_t *json, json_token_t *token) | |||
880 | */ | 880 | */ |
881 | strbuf_reset(json->tmp); | 881 | strbuf_reset(json->tmp); |
882 | 882 | ||
883 | while ((ch = json->data[json->index]) != '"') { | 883 | while ((ch = *json->ptr) != '"') { |
884 | if (!ch) { | 884 | if (!ch) { |
885 | /* Premature end of the string */ | 885 | /* Premature end of the string */ |
886 | json_set_token_error(token, json, "unexpected end of string"); | 886 | json_set_token_error(token, json, "unexpected end of string"); |
@@ -890,7 +890,7 @@ static void json_next_string_token(json_parse_t *json, json_token_t *token) | |||
890 | /* Handle escapes */ | 890 | /* Handle escapes */ |
891 | if (ch == '\\') { | 891 | if (ch == '\\') { |
892 | /* Fetch escape character */ | 892 | /* Fetch escape character */ |
893 | ch = json->data[json->index + 1]; | 893 | ch = *(json->ptr + 1); |
894 | 894 | ||
895 | /* Translate escape code and append to tmp string */ | 895 | /* Translate escape code and append to tmp string */ |
896 | ch = escape2char[(unsigned char)ch]; | 896 | ch = escape2char[(unsigned char)ch]; |
@@ -908,14 +908,14 @@ static void json_next_string_token(json_parse_t *json, json_token_t *token) | |||
908 | } | 908 | } |
909 | 909 | ||
910 | /* Skip '\' */ | 910 | /* Skip '\' */ |
911 | json->index++; | 911 | json->ptr++; |
912 | } | 912 | } |
913 | /* Append normal character or translated single character | 913 | /* Append normal character or translated single character |
914 | * Unicode escapes are handled above */ | 914 | * Unicode escapes are handled above */ |
915 | strbuf_append_char_unsafe(json->tmp, ch); | 915 | strbuf_append_char_unsafe(json->tmp, ch); |
916 | json->index++; | 916 | json->ptr++; |
917 | } | 917 | } |
918 | json->index++; /* Eat final quote (") */ | 918 | json->ptr++; /* Eat final quote (") */ |
919 | 919 | ||
920 | strbuf_ensure_null(json->tmp); | 920 | strbuf_ensure_null(json->tmp); |
921 | 921 | ||
@@ -940,33 +940,33 @@ static void json_next_string_token(json_parse_t *json, json_token_t *token) | |||
940 | */ | 940 | */ |
941 | static int json_is_invalid_number(json_parse_t *json) | 941 | static int json_is_invalid_number(json_parse_t *json) |
942 | { | 942 | { |
943 | int i = json->index; | 943 | const char *p = json->ptr; |
944 | 944 | ||
945 | /* Reject numbers starting with + */ | 945 | /* Reject numbers starting with + */ |
946 | if (json->data[i] == '+') | 946 | if (*p == '+') |
947 | return 1; | 947 | return 1; |
948 | 948 | ||
949 | /* Skip minus sign if it exists */ | 949 | /* Skip minus sign if it exists */ |
950 | if (json->data[i] == '-') | 950 | if (*p == '-') |
951 | i++; | 951 | p++; |
952 | 952 | ||
953 | /* Reject numbers starting with 0x, or leading zeros */ | 953 | /* Reject numbers starting with 0x, or leading zeros */ |
954 | if (json->data[i] == '0') { | 954 | if (*p == '0') { |
955 | int ch2 = json->data[i + 1]; | 955 | int ch2 = *(p + 1); |
956 | 956 | ||
957 | if ((ch2 | 0x20) == 'x' || /* Hex */ | 957 | if ((ch2 | 0x20) == 'x' || /* Hex */ |
958 | ('0' <= ch2 && ch2 <= '9')) /* Leading zero */ | 958 | ('0' <= ch2 && ch2 <= '9')) /* Leading zero */ |
959 | return 1; | 959 | return 1; |
960 | 960 | ||
961 | return 0; | 961 | return 0; |
962 | } else if (json->data[i] <= '9') { | 962 | } else if (*p <= '9') { |
963 | return 0; /* Ordinary number */ | 963 | return 0; /* Ordinary number */ |
964 | } | 964 | } |
965 | 965 | ||
966 | /* Reject inf/nan */ | 966 | /* Reject inf/nan */ |
967 | if (!strncasecmp(&json->data[i], "inf", 3)) | 967 | if (!strncasecmp(p, "inf", 3)) |
968 | return 1; | 968 | return 1; |
969 | if (!strncasecmp(&json->data[i], "nan", 3)) | 969 | if (!strncasecmp(p, "nan", 3)) |
970 | return 1; | 970 | return 1; |
971 | 971 | ||
972 | /* Pass all other numbers which may still be invalid, but | 972 | /* Pass all other numbers which may still be invalid, but |
@@ -976,35 +976,39 @@ static int json_is_invalid_number(json_parse_t *json) | |||
976 | 976 | ||
977 | static void json_next_number_token(json_parse_t *json, json_token_t *token) | 977 | static void json_next_number_token(json_parse_t *json, json_token_t *token) |
978 | { | 978 | { |
979 | const char *startptr; | ||
980 | char *endptr; | 979 | char *endptr; |
981 | 980 | ||
982 | token->type = T_NUMBER; | 981 | token->type = T_NUMBER; |
983 | startptr = &json->data[json->index]; | 982 | token->value.number = fpconv_strtod(json->ptr, &endptr); |
984 | token->value.number = fpconv_strtod(&json->data[json->index], &endptr); | 983 | if (json->ptr == endptr) |
985 | if (startptr == endptr) | ||
986 | json_set_token_error(token, json, "invalid number"); | 984 | json_set_token_error(token, json, "invalid number"); |
987 | else | 985 | else |
988 | json->index += endptr - startptr; /* Skip the processed number */ | 986 | json->ptr = endptr; /* Skip the processed number */ |
989 | 987 | ||
990 | return; | 988 | return; |
991 | } | 989 | } |
992 | 990 | ||
993 | /* Fills in the token struct. | 991 | /* Fills in the token struct. |
994 | * T_STRING will return a pointer to the json_parse_t temporary string | 992 | * T_STRING will return a pointer to the json_parse_t temporary string |
995 | * T_ERROR will leave the json->index pointer at the error. | 993 | * T_ERROR will leave the json->ptr pointer at the error. |
996 | */ | 994 | */ |
997 | static void json_next_token(json_parse_t *json, json_token_t *token) | 995 | static void json_next_token(json_parse_t *json, json_token_t *token) |
998 | { | 996 | { |
999 | json_token_type_t *ch2token = json->cfg->ch2token; | 997 | json_token_type_t *ch2token = json->cfg->ch2token; |
1000 | int ch; | 998 | int ch; |
1001 | 999 | ||
1002 | /* Eat whitespace. FIXME: UGLY */ | 1000 | /* Eat whitespace. */ |
1003 | token->type = ch2token[(unsigned char)json->data[json->index]]; | 1001 | while (1) { |
1004 | while (token->type == T_WHITESPACE) | 1002 | ch = (unsigned char)*(json->ptr); |
1005 | token->type = ch2token[(unsigned char)json->data[++json->index]]; | 1003 | token->type = ch2token[ch]; |
1004 | if (token->type != T_WHITESPACE) | ||
1005 | break; | ||
1006 | json->ptr++; | ||
1007 | } | ||
1006 | 1008 | ||
1007 | token->index = json->index; | 1009 | /* Store location of new token. Required when throwing errors |
1010 | * for unexpected tokens (syntax errors). */ | ||
1011 | token->index = json->ptr - json->data; | ||
1008 | 1012 | ||
1009 | /* Don't advance the pointer for an error or the end */ | 1013 | /* Don't advance the pointer for an error or the end */ |
1010 | if (token->type == T_ERROR) { | 1014 | if (token->type == T_ERROR) { |
@@ -1018,14 +1022,13 @@ static void json_next_token(json_parse_t *json, json_token_t *token) | |||
1018 | 1022 | ||
1019 | /* Found a known single character token, advance index and return */ | 1023 | /* Found a known single character token, advance index and return */ |
1020 | if (token->type != T_UNKNOWN) { | 1024 | if (token->type != T_UNKNOWN) { |
1021 | json->index++; | 1025 | json->ptr++; |
1022 | return; | 1026 | return; |
1023 | } | 1027 | } |
1024 | 1028 | ||
1025 | /* Process characters which triggered T_UNKNOWN */ | 1029 | /* Process characters which triggered T_UNKNOWN |
1026 | ch = json->data[json->index]; | 1030 | * |
1027 | 1031 | * Must use strncmp() to match the front of the JSON string. | |
1028 | /* Must use strncmp() to match the front of the JSON string. | ||
1029 | * JSON identifier must be lowercase. | 1032 | * JSON identifier must be lowercase. |
1030 | * When strict_numbers if disabled, either case is allowed for | 1033 | * When strict_numbers if disabled, either case is allowed for |
1031 | * Infinity/NaN (since we are no longer following the spec..) */ | 1034 | * Infinity/NaN (since we are no longer following the spec..) */ |
@@ -1039,19 +1042,19 @@ static void json_next_token(json_parse_t *json, json_token_t *token) | |||
1039 | } | 1042 | } |
1040 | json_next_number_token(json, token); | 1043 | json_next_number_token(json, token); |
1041 | return; | 1044 | return; |
1042 | } else if (!strncmp(&json->data[json->index], "true", 4)) { | 1045 | } else if (!strncmp(json->ptr, "true", 4)) { |
1043 | token->type = T_BOOLEAN; | 1046 | token->type = T_BOOLEAN; |
1044 | token->value.boolean = 1; | 1047 | token->value.boolean = 1; |
1045 | json->index += 4; | 1048 | json->ptr += 4; |
1046 | return; | 1049 | return; |
1047 | } else if (!strncmp(&json->data[json->index], "false", 5)) { | 1050 | } else if (!strncmp(json->ptr, "false", 5)) { |
1048 | token->type = T_BOOLEAN; | 1051 | token->type = T_BOOLEAN; |
1049 | token->value.boolean = 0; | 1052 | token->value.boolean = 0; |
1050 | json->index += 5; | 1053 | json->ptr += 5; |
1051 | return; | 1054 | return; |
1052 | } else if (!strncmp(&json->data[json->index], "null", 4)) { | 1055 | } else if (!strncmp(json->ptr, "null", 4)) { |
1053 | token->type = T_NULL; | 1056 | token->type = T_NULL; |
1054 | json->index += 4; | 1057 | json->ptr += 4; |
1055 | return; | 1058 | return; |
1056 | } else if (!json->cfg->decode_refuse_badnum && | 1059 | } else if (!json->cfg->decode_refuse_badnum && |
1057 | json_is_invalid_number(json)) { | 1060 | json_is_invalid_number(json)) { |
@@ -1219,7 +1222,7 @@ static void lua_json_decode(lua_State *l, const char *json_text, int json_len) | |||
1219 | 1222 | ||
1220 | json.cfg = json_fetch_config(l); | 1223 | json.cfg = json_fetch_config(l); |
1221 | json.data = json_text; | 1224 | json.data = json_text; |
1222 | json.index = 0; | 1225 | json.ptr = json.data; |
1223 | 1226 | ||
1224 | /* Ensure the temporary buffer can hold the entire string. | 1227 | /* Ensure the temporary buffer can hold the entire string. |
1225 | * This means we no longer need to do length checks since the decoded | 1228 | * This means we no longer need to do length checks since the decoded |