aboutsummaryrefslogtreecommitdiff
path: root/lua_cjson.c
diff options
context:
space:
mode:
authorMark Pulford <mark@kyne.com.au>2012-01-08 13:28:58 +1030
committerMark Pulford <mark@kyne.com.au>2012-03-04 18:54:34 +1030
commite8163b395942b0e5ec0c4aad07b66472d195e61a (patch)
tree06ce595403fb25f8c92d3912d8c1d9f06fe05a25 /lua_cjson.c
parentc7fbb8e441b6a62e0d6d016add8ed6b44d90d981 (diff)
downloadlua-cjson-e8163b395942b0e5ec0c4aad07b66472d195e61a.tar.gz
lua-cjson-e8163b395942b0e5ec0c4aad07b66472d195e61a.tar.bz2
lua-cjson-e8163b395942b0e5ec0c4aad07b66472d195e61a.zip
Improve performance by tracking decode ptr
Track pointer to the current location in the JSON string, instead of an index to the string array. Improves decode performance 1-10%. json_next_token(): - Clean up white space handling and leave "ch" containing the current non-whitespace character.
Diffstat (limited to 'lua_cjson.c')
-rw-r--r--lua_cjson.c95
1 files changed, 49 insertions, 46 deletions
diff --git a/lua_cjson.c b/lua_cjson.c
index 708e695..344cb43 100644
--- a/lua_cjson.c
+++ b/lua_cjson.c
@@ -127,7 +127,7 @@ typedef struct {
127 127
128typedef struct { 128typedef struct {
129 const char *data; 129 const char *data;
130 int index; 130 const char *ptr;
131 strbuf_t *tmp; /* Temporary storage for strings */ 131 strbuf_t *tmp; /* Temporary storage for strings */
132 json_config_t *cfg; 132 json_config_t *cfg;
133} json_parse_t; 133} json_parse_t;
@@ -806,7 +806,7 @@ static int json_append_unicode_escape(json_parse_t *json)
806 int escape_len = 6; 806 int escape_len = 6;
807 807
808 /* Fetch UTF-16 code unit */ 808 /* Fetch UTF-16 code unit */
809 codepoint = decode_hex4(&json->data[json->index + 2]); 809 codepoint = decode_hex4(json->ptr + 2);
810 if (codepoint < 0) 810 if (codepoint < 0)
811 return -1; 811 return -1;
812 812
@@ -822,13 +822,13 @@ static int json_append_unicode_escape(json_parse_t *json)
822 return -1; 822 return -1;
823 823
824 /* Ensure the next code is a unicode escape */ 824 /* Ensure the next code is a unicode escape */
825 if (json->data[json->index + escape_len] != '\\' || 825 if (*(json->ptr + escape_len) != '\\' ||
826 json->data[json->index + escape_len + 1] != 'u') { 826 *(json->ptr + escape_len + 1) != 'u') {
827 return -1; 827 return -1;
828 } 828 }
829 829
830 /* Fetch the next codepoint */ 830 /* Fetch the next codepoint */
831 surrogate_low = decode_hex4(&json->data[json->index + 2 + escape_len]); 831 surrogate_low = decode_hex4(json->ptr + 2 + escape_len);
832 if (surrogate_low < 0) 832 if (surrogate_low < 0)
833 return -1; 833 return -1;
834 834
@@ -850,7 +850,7 @@ static int json_append_unicode_escape(json_parse_t *json)
850 850
851 /* Append bytes and advance parse index */ 851 /* Append bytes and advance parse index */
852 strbuf_append_mem_unsafe(json->tmp, utf8, len); 852 strbuf_append_mem_unsafe(json->tmp, utf8, len);
853 json->index += escape_len; 853 json->ptr += escape_len;
854 854
855 return 0; 855 return 0;
856} 856}
@@ -859,7 +859,7 @@ static void json_set_token_error(json_token_t *token, json_parse_t *json,
859 const char *errtype) 859 const char *errtype)
860{ 860{
861 token->type = T_ERROR; 861 token->type = T_ERROR;
862 token->index = json->index; 862 token->index = json->ptr - json->data;
863 token->value.string = errtype; 863 token->value.string = errtype;
864} 864}
865 865
@@ -869,10 +869,10 @@ static void json_next_string_token(json_parse_t *json, json_token_t *token)
869 char ch; 869 char ch;
870 870
871 /* Caller must ensure a string is next */ 871 /* Caller must ensure a string is next */
872 assert(json->data[json->index] == '"'); 872 assert(*json->ptr == '"');
873 873
874 /* Skip " */ 874 /* Skip " */
875 json->index++; 875 json->ptr++;
876 876
877 /* json->tmp is the temporary strbuf used to accumulate the 877 /* json->tmp is the temporary strbuf used to accumulate the
878 * decoded string value. 878 * decoded string value.
@@ -880,7 +880,7 @@ static void json_next_string_token(json_parse_t *json, json_token_t *token)
880 */ 880 */
881 strbuf_reset(json->tmp); 881 strbuf_reset(json->tmp);
882 882
883 while ((ch = json->data[json->index]) != '"') { 883 while ((ch = *json->ptr) != '"') {
884 if (!ch) { 884 if (!ch) {
885 /* Premature end of the string */ 885 /* Premature end of the string */
886 json_set_token_error(token, json, "unexpected end of string"); 886 json_set_token_error(token, json, "unexpected end of string");
@@ -890,7 +890,7 @@ static void json_next_string_token(json_parse_t *json, json_token_t *token)
890 /* Handle escapes */ 890 /* Handle escapes */
891 if (ch == '\\') { 891 if (ch == '\\') {
892 /* Fetch escape character */ 892 /* Fetch escape character */
893 ch = json->data[json->index + 1]; 893 ch = *(json->ptr + 1);
894 894
895 /* Translate escape code and append to tmp string */ 895 /* Translate escape code and append to tmp string */
896 ch = escape2char[(unsigned char)ch]; 896 ch = escape2char[(unsigned char)ch];
@@ -908,14 +908,14 @@ static void json_next_string_token(json_parse_t *json, json_token_t *token)
908 } 908 }
909 909
910 /* Skip '\' */ 910 /* Skip '\' */
911 json->index++; 911 json->ptr++;
912 } 912 }
913 /* Append normal character or translated single character 913 /* Append normal character or translated single character
914 * Unicode escapes are handled above */ 914 * Unicode escapes are handled above */
915 strbuf_append_char_unsafe(json->tmp, ch); 915 strbuf_append_char_unsafe(json->tmp, ch);
916 json->index++; 916 json->ptr++;
917 } 917 }
918 json->index++; /* Eat final quote (") */ 918 json->ptr++; /* Eat final quote (") */
919 919
920 strbuf_ensure_null(json->tmp); 920 strbuf_ensure_null(json->tmp);
921 921
@@ -940,33 +940,33 @@ static void json_next_string_token(json_parse_t *json, json_token_t *token)
940 */ 940 */
941static int json_is_invalid_number(json_parse_t *json) 941static int json_is_invalid_number(json_parse_t *json)
942{ 942{
943 int i = json->index; 943 const char *p = json->ptr;
944 944
945 /* Reject numbers starting with + */ 945 /* Reject numbers starting with + */
946 if (json->data[i] == '+') 946 if (*p == '+')
947 return 1; 947 return 1;
948 948
949 /* Skip minus sign if it exists */ 949 /* Skip minus sign if it exists */
950 if (json->data[i] == '-') 950 if (*p == '-')
951 i++; 951 p++;
952 952
953 /* Reject numbers starting with 0x, or leading zeros */ 953 /* Reject numbers starting with 0x, or leading zeros */
954 if (json->data[i] == '0') { 954 if (*p == '0') {
955 int ch2 = json->data[i + 1]; 955 int ch2 = *(p + 1);
956 956
957 if ((ch2 | 0x20) == 'x' || /* Hex */ 957 if ((ch2 | 0x20) == 'x' || /* Hex */
958 ('0' <= ch2 && ch2 <= '9')) /* Leading zero */ 958 ('0' <= ch2 && ch2 <= '9')) /* Leading zero */
959 return 1; 959 return 1;
960 960
961 return 0; 961 return 0;
962 } else if (json->data[i] <= '9') { 962 } else if (*p <= '9') {
963 return 0; /* Ordinary number */ 963 return 0; /* Ordinary number */
964 } 964 }
965 965
966 /* Reject inf/nan */ 966 /* Reject inf/nan */
967 if (!strncasecmp(&json->data[i], "inf", 3)) 967 if (!strncasecmp(p, "inf", 3))
968 return 1; 968 return 1;
969 if (!strncasecmp(&json->data[i], "nan", 3)) 969 if (!strncasecmp(p, "nan", 3))
970 return 1; 970 return 1;
971 971
972 /* Pass all other numbers which may still be invalid, but 972 /* Pass all other numbers which may still be invalid, but
@@ -976,35 +976,39 @@ static int json_is_invalid_number(json_parse_t *json)
976 976
977static void json_next_number_token(json_parse_t *json, json_token_t *token) 977static void json_next_number_token(json_parse_t *json, json_token_t *token)
978{ 978{
979 const char *startptr;
980 char *endptr; 979 char *endptr;
981 980
982 token->type = T_NUMBER; 981 token->type = T_NUMBER;
983 startptr = &json->data[json->index]; 982 token->value.number = fpconv_strtod(json->ptr, &endptr);
984 token->value.number = fpconv_strtod(&json->data[json->index], &endptr); 983 if (json->ptr == endptr)
985 if (startptr == endptr)
986 json_set_token_error(token, json, "invalid number"); 984 json_set_token_error(token, json, "invalid number");
987 else 985 else
988 json->index += endptr - startptr; /* Skip the processed number */ 986 json->ptr = endptr; /* Skip the processed number */
989 987
990 return; 988 return;
991} 989}
992 990
993/* Fills in the token struct. 991/* Fills in the token struct.
994 * T_STRING will return a pointer to the json_parse_t temporary string 992 * T_STRING will return a pointer to the json_parse_t temporary string
995 * T_ERROR will leave the json->index pointer at the error. 993 * T_ERROR will leave the json->ptr pointer at the error.
996 */ 994 */
997static void json_next_token(json_parse_t *json, json_token_t *token) 995static void json_next_token(json_parse_t *json, json_token_t *token)
998{ 996{
999 json_token_type_t *ch2token = json->cfg->ch2token; 997 json_token_type_t *ch2token = json->cfg->ch2token;
1000 int ch; 998 int ch;
1001 999
1002 /* Eat whitespace. FIXME: UGLY */ 1000 /* Eat whitespace. */
1003 token->type = ch2token[(unsigned char)json->data[json->index]]; 1001 while (1) {
1004 while (token->type == T_WHITESPACE) 1002 ch = (unsigned char)*(json->ptr);
1005 token->type = ch2token[(unsigned char)json->data[++json->index]]; 1003 token->type = ch2token[ch];
1004 if (token->type != T_WHITESPACE)
1005 break;
1006 json->ptr++;
1007 }
1006 1008
1007 token->index = json->index; 1009 /* Store location of new token. Required when throwing errors
1010 * for unexpected tokens (syntax errors). */
1011 token->index = json->ptr - json->data;
1008 1012
1009 /* Don't advance the pointer for an error or the end */ 1013 /* Don't advance the pointer for an error or the end */
1010 if (token->type == T_ERROR) { 1014 if (token->type == T_ERROR) {
@@ -1018,14 +1022,13 @@ static void json_next_token(json_parse_t *json, json_token_t *token)
1018 1022
1019 /* Found a known single character token, advance index and return */ 1023 /* Found a known single character token, advance index and return */
1020 if (token->type != T_UNKNOWN) { 1024 if (token->type != T_UNKNOWN) {
1021 json->index++; 1025 json->ptr++;
1022 return; 1026 return;
1023 } 1027 }
1024 1028
1025 /* Process characters which triggered T_UNKNOWN */ 1029 /* Process characters which triggered T_UNKNOWN
1026 ch = json->data[json->index]; 1030 *
1027 1031 * Must use strncmp() to match the front of the JSON string.
1028 /* Must use strncmp() to match the front of the JSON string.
1029 * JSON identifier must be lowercase. 1032 * JSON identifier must be lowercase.
1030 * When strict_numbers if disabled, either case is allowed for 1033 * When strict_numbers if disabled, either case is allowed for
1031 * Infinity/NaN (since we are no longer following the spec..) */ 1034 * Infinity/NaN (since we are no longer following the spec..) */
@@ -1039,19 +1042,19 @@ static void json_next_token(json_parse_t *json, json_token_t *token)
1039 } 1042 }
1040 json_next_number_token(json, token); 1043 json_next_number_token(json, token);
1041 return; 1044 return;
1042 } else if (!strncmp(&json->data[json->index], "true", 4)) { 1045 } else if (!strncmp(json->ptr, "true", 4)) {
1043 token->type = T_BOOLEAN; 1046 token->type = T_BOOLEAN;
1044 token->value.boolean = 1; 1047 token->value.boolean = 1;
1045 json->index += 4; 1048 json->ptr += 4;
1046 return; 1049 return;
1047 } else if (!strncmp(&json->data[json->index], "false", 5)) { 1050 } else if (!strncmp(json->ptr, "false", 5)) {
1048 token->type = T_BOOLEAN; 1051 token->type = T_BOOLEAN;
1049 token->value.boolean = 0; 1052 token->value.boolean = 0;
1050 json->index += 5; 1053 json->ptr += 5;
1051 return; 1054 return;
1052 } else if (!strncmp(&json->data[json->index], "null", 4)) { 1055 } else if (!strncmp(json->ptr, "null", 4)) {
1053 token->type = T_NULL; 1056 token->type = T_NULL;
1054 json->index += 4; 1057 json->ptr += 4;
1055 return; 1058 return;
1056 } else if (!json->cfg->decode_refuse_badnum && 1059 } else if (!json->cfg->decode_refuse_badnum &&
1057 json_is_invalid_number(json)) { 1060 json_is_invalid_number(json)) {
@@ -1219,7 +1222,7 @@ static void lua_json_decode(lua_State *l, const char *json_text, int json_len)
1219 1222
1220 json.cfg = json_fetch_config(l); 1223 json.cfg = json_fetch_config(l);
1221 json.data = json_text; 1224 json.data = json_text;
1222 json.index = 0; 1225 json.ptr = json.data;
1223 1226
1224 /* Ensure the temporary buffer can hold the entire string. 1227 /* Ensure the temporary buffer can hold the entire string.
1225 * This means we no longer need to do length checks since the decoded 1228 * This means we no longer need to do length checks since the decoded