Improve performance by tracking decode ptr

Track pointer to the current location in the JSON string, instead of an index to the string array. Improves decode performance 1-10%. json_next_token(): - Clean up white space handling and leave "ch" containing the current non-whitespace character.
author: Mark Pulford <mark@kyne.com.au> 2012-01-08 13:28:58 +1030
committer: Mark Pulford <mark@kyne.com.au> 2012-03-04 18:54:34 +1030
commit: e8163b395942b0e5ec0c4aad07b66472d195e61a (patch)
tree: 06ce595403fb25f8c92d3912d8c1d9f06fe05a25 /lua_cjson.c
parent: c7fbb8e441b6a62e0d6d016add8ed6b44d90d981 (diff)
download: lua-cjson-e8163b395942b0e5ec0c4aad07b66472d195e61a.tar.gz
lua-cjson-e8163b395942b0e5ec0c4aad07b66472d195e61a.tar.bz2
lua-cjson-e8163b395942b0e5ec0c4aad07b66472d195e61a.zip
1 files changed, 49 insertions, 46 deletions
diff --git a/lua_cjson.c b/lua_cjson.c
index 708e695..344cb43 100644
--- a/lua_cjson.c
+++ b/lua_cjson.c
@@ -127,7 +127,7 @@ typedef struct {
 typedef struct {
    const char *data;
-    int index;
+    const char *ptr;
    strbuf_t *tmp;    /* Temporary storage for strings */
    json_config_t *cfg;
 } json_parse_t;
@@ -806,7 +806,7 @@ static int json_append_unicode_escape(json_parse_t *json)
    int escape_len = 6;
    /* Fetch UTF-16 code unit */
-    codepoint = decode_hex4(&json->data[json->index + 2]);
+    codepoint = decode_hex4(json->ptr + 2);
    if (codepoint < 0)
        return -1;
@@ -822,13 +822,13 @@ static int json_append_unicode_escape(json_parse_t *json)
            return -1;
        /* Ensure the next code is a unicode escape */
-        if (json->data[json->index + escape_len] != '\\' ||
+        if (*(json->ptr + escape_len) != '\\' ||
-            json->data[json->index + escape_len + 1] != 'u') {
+            *(json->ptr + escape_len + 1) != 'u') {
            return -1;
        }
        /* Fetch the next codepoint */
-        surrogate_low = decode_hex4(&json->data[json->index + 2 + escape_len]);
+        surrogate_low = decode_hex4(json->ptr + 2 + escape_len);
        if (surrogate_low < 0)
            return -1;
@@ -850,7 +850,7 @@ static int json_append_unicode_escape(json_parse_t *json)
    /* Append bytes and advance parse index */
    strbuf_append_mem_unsafe(json->tmp, utf8, len);
-    json->index += escape_len;
+    json->ptr += escape_len;
    return 0;
 }
@@ -859,7 +859,7 @@ static void json_set_token_error(json_token_t *token, json_parse_t *json,
                                 const char *errtype)
 {
    token->type = T_ERROR;
-    token->index = json->index;
+    token->index = json->ptr - json->data;
    token->value.string = errtype;
 }
@@ -869,10 +869,10 @@ static void json_next_string_token(json_parse_t *json, json_token_t *token)
    char ch;
    /* Caller must ensure a string is next */
-    assert(json->data[json->index] == '"');
+    assert(*json->ptr == '"');
    /* Skip " */
-    json->index++;
+    json->ptr++;
    /* json->tmp is the temporary strbuf used to accumulate the
     * decoded string value.
@@ -880,7 +880,7 @@ static void json_next_string_token(json_parse_t *json, json_token_t *token)
     */
    strbuf_reset(json->tmp);
-    while ((ch = json->data[json->index]) != '"') {
+    while ((ch = *json->ptr) != '"') {
        if (!ch) {
            /* Premature end of the string */
            json_set_token_error(token, json, "unexpected end of string");
@@ -890,7 +890,7 @@ static void json_next_string_token(json_parse_t *json, json_token_t *token)
        /* Handle escapes */
        if (ch == '\\') {
            /* Fetch escape character */
-            ch = json->data[json->index + 1];
+            ch = *(json->ptr + 1);
            /* Translate escape code and append to tmp string */
            ch = escape2char[(unsigned char)ch];
@@ -908,14 +908,14 @@ static void json_next_string_token(json_parse_t *json, json_token_t *token)
            }
            /* Skip '\' */
-            json->index++;
+            json->ptr++;
        }
        /* Append normal character or translated single character
         * Unicode escapes are handled above */
        strbuf_append_char_unsafe(json->tmp, ch);
-        json->index++;
+        json->ptr++;
    }
-    json->index++;  /* Eat final quote (") */
+    json->ptr++;    /* Eat final quote (") */
    strbuf_ensure_null(json->tmp);
@@ -940,33 +940,33 @@ static void json_next_string_token(json_parse_t *json, json_token_t *token)
 */
 static int json_is_invalid_number(json_parse_t *json)
 {
-    int i = json->index;
+    const char *p = json->ptr;
    /* Reject numbers starting with + */
-    if (json->data[i] == '+')
+    if (*p == '+')
        return 1;
    /* Skip minus sign if it exists */
-    if (json->data[i] == '-')
+    if (*p == '-')
-        i++;
+        p++;
    /* Reject numbers starting with 0x, or leading zeros */
-    if (json->data[i] == '0') {
+    if (*p == '0') {
-        int ch2 = json->data[i + 1];
+        int ch2 = *(p + 1);
        if ((ch2 | 0x20) == 'x' ||          /* Hex */
            ('0' <= ch2 && ch2 <= '9'))     /* Leading zero */
            return 1;
        return 0;
-    } else if (json->data[i] <= '9') {
+    } else if (*p <= '9') {
        return 0;                           /* Ordinary number */
    }
    /* Reject inf/nan */
-    if (!strncasecmp(&json->data[i], "inf", 3))
+    if (!strncasecmp(p, "inf", 3))
        return 1;
-    if (!strncasecmp(&json->data[i], "nan", 3))
+    if (!strncasecmp(p, "nan", 3))
        return 1;
    /* Pass all other numbers which may still be invalid, but
@@ -976,35 +976,39 @@ static int json_is_invalid_number(json_parse_t *json)
 static void json_next_number_token(json_parse_t *json, json_token_t *token)
 {
-    const char *startptr;
    char *endptr;
    token->type = T_NUMBER;
-    startptr = &json->data[json->index];
+    token->value.number = fpconv_strtod(json->ptr, &endptr);
-    token->value.number = fpconv_strtod(&json->data[json->index], &endptr);
+    if (json->ptr == endptr)
-    if (startptr == endptr)
        json_set_token_error(token, json, "invalid number");
    else
-        json->index += endptr - startptr;   /* Skip the processed number */
+        json->ptr = endptr;     /* Skip the processed number */
    return;
 }
 /* Fills in the token struct.
 * T_STRING will return a pointer to the json_parse_t temporary string
- * T_ERROR will leave the json->index pointer at the error.
+ * T_ERROR will leave the json->ptr pointer at the error.
 */
 static void json_next_token(json_parse_t *json, json_token_t *token)
 {
    json_token_type_t *ch2token = json->cfg->ch2token;
    int ch;
-    /* Eat whitespace. FIXME: UGLY */
+    /* Eat whitespace. */
-    token->type = ch2token[(unsigned char)json->data[json->index]];
+    while (1) {
-    while (token->type == T_WHITESPACE)
+        ch = (unsigned char)*(json->ptr);
-        token->type = ch2token[(unsigned char)json->data[++json->index]];
+        token->type = ch2token[ch];
+        if (token->type != T_WHITESPACE)
+            break;
+        json->ptr++;
+    }
-    token->index = json->index;
+    /* Store location of new token. Required when throwing errors
+     * for unexpected tokens (syntax errors). */
+    token->index = json->ptr - json->data;
    /* Don't advance the pointer for an error or the end */
    if (token->type == T_ERROR) {
@@ -1018,14 +1022,13 @@ static void json_next_token(json_parse_t *json, json_token_t *token)
    /* Found a known single character token, advance index and return */
    if (token->type != T_UNKNOWN) {
-        json->index++;
+        json->ptr++;
        return;
    }
-    /* Process characters which triggered T_UNKNOWN */
+    /* Process characters which triggered T_UNKNOWN
-    ch = json->data[json->index];
+     *
+     * Must use strncmp() to match the front of the JSON string.
-    /* Must use strncmp() to match the front of the JSON string.
     * JSON identifier must be lowercase.
     * When strict_numbers if disabled, either case is allowed for
     * Infinity/NaN (since we are no longer following the spec..) */
@@ -1039,19 +1042,19 @@ static void json_next_token(json_parse_t *json, json_token_t *token)
        }
        json_next_number_token(json, token);
        return;
-    } else if (!strncmp(&json->data[json->index], "true", 4)) {
+    } else if (!strncmp(json->ptr, "true", 4)) {
        token->type = T_BOOLEAN;
        token->value.boolean = 1;
-        json->index += 4;
+        json->ptr += 4;
        return;
-    } else if (!strncmp(&json->data[json->index], "false", 5)) {
+    } else if (!strncmp(json->ptr, "false", 5)) {
        token->type = T_BOOLEAN;
        token->value.boolean = 0;
-        json->index += 5;
+        json->ptr += 5;
        return;
-    } else if (!strncmp(&json->data[json->index], "null", 4)) {
+    } else if (!strncmp(json->ptr, "null", 4)) {
        token->type = T_NULL;
-        json->index += 4;
+        json->ptr += 4;
        return;
    } else if (!json->cfg->decode_refuse_badnum &&
               json_is_invalid_number(json)) {
@@ -1219,7 +1222,7 @@ static void lua_json_decode(lua_State *l, const char *json_text, int json_len)
    json.cfg = json_fetch_config(l);
    json.data = json_text;
-    json.index = 0;
+    json.ptr = json.data;
    /* Ensure the temporary buffer can hold the entire string.
     * This means we no longer need to do length checks since the decoded
author	Mark Pulford <mark@kyne.com.au>	2012-01-08 13:28:58 +1030
committer	Mark Pulford <mark@kyne.com.au>	2012-03-04 18:54:34 +1030
commit	e8163b395942b0e5ec0c4aad07b66472d195e61a (patch)
tree	06ce595403fb25f8c92d3912d8c1d9f06fe05a25 /lua_cjson.c
parent	c7fbb8e441b6a62e0d6d016add8ed6b44d90d981 (diff)
download	lua-cjson-e8163b395942b0e5ec0c4aad07b66472d195e61a.tar.gz lua-cjson-e8163b395942b0e5ec0c4aad07b66472d195e61a.tar.bz2 lua-cjson-e8163b395942b0e5ec0c4aad07b66472d195e61a.zip

diff --git a/lua_cjson.c b/lua_cjson.c index 708e695..344cb43 100644 --- a/lua_cjson.c +++ b/lua_cjson.c
@@ -127,7 +127,7 @@ typedef struct {
127		127
128	typedef struct {	128	typedef struct {
129	const char *data;	129	const char *data;
130	int index;	130	const char *ptr;
131	strbuf_t tmp; / Temporary storage for strings */	131	strbuf_t tmp; / Temporary storage for strings */
132	json_config_t *cfg;	132	json_config_t *cfg;
133	} json_parse_t;	133	} json_parse_t;
@@ -806,7 +806,7 @@ static int json_append_unicode_escape(json_parse_t *json)
806	int escape_len = 6;	806	int escape_len = 6;
807		807
808	/* Fetch UTF-16 code unit */	808	/* Fetch UTF-16 code unit */
809	codepoint = decode_hex4(&json->data[json->index + 2]);	809	codepoint = decode_hex4(json->ptr + 2);
810	if (codepoint < 0)	810	if (codepoint < 0)
811	return -1;	811	return -1;
812		812
@@ -822,13 +822,13 @@ static int json_append_unicode_escape(json_parse_t *json)
822	return -1;	822	return -1;
823		823
824	/* Ensure the next code is a unicode escape */	824	/* Ensure the next code is a unicode escape */
825	if (json->data[json->index + escape_len] != '\\' \|\|	825	if (*(json->ptr + escape_len) != '\\' \|\|
826	json->data[json->index + escape_len + 1] != 'u') {	826	*(json->ptr + escape_len + 1) != 'u') {
827	return -1;	827	return -1;
828	}	828	}
829		829
830	/* Fetch the next codepoint */	830	/* Fetch the next codepoint */
831	surrogate_low = decode_hex4(&json->data[json->index + 2 + escape_len]);	831	surrogate_low = decode_hex4(json->ptr + 2 + escape_len);
832	if (surrogate_low < 0)	832	if (surrogate_low < 0)
833	return -1;	833	return -1;
834		834
@@ -850,7 +850,7 @@ static int json_append_unicode_escape(json_parse_t *json)
850		850
851	/* Append bytes and advance parse index */	851	/* Append bytes and advance parse index */
852	strbuf_append_mem_unsafe(json->tmp, utf8, len);	852	strbuf_append_mem_unsafe(json->tmp, utf8, len);
853	json->index += escape_len;	853	json->ptr += escape_len;
854		854
855	return 0;	855	return 0;
856	}	856	}
@@ -859,7 +859,7 @@ static void json_set_token_error(json_token_t token, json_parse_t json,
859	const char *errtype)	859	const char *errtype)
860	{	860	{
861	token->type = T_ERROR;	861	token->type = T_ERROR;
862	token->index = json->index;	862	token->index = json->ptr - json->data;
863	token->value.string = errtype;	863	token->value.string = errtype;
864	}	864	}
865		865
@@ -869,10 +869,10 @@ static void json_next_string_token(json_parse_t json, json_token_t token)
869	char ch;	869	char ch;
870		870
871	/* Caller must ensure a string is next */	871	/* Caller must ensure a string is next */
872	assert(json->data[json->index] == '"');	872	assert(*json->ptr == '"');
873		873
874	/* Skip " */	874	/* Skip " */
875	json->index++;	875	json->ptr++;
876		876
877	/* json->tmp is the temporary strbuf used to accumulate the	877	/* json->tmp is the temporary strbuf used to accumulate the
878	* decoded string value.	878	* decoded string value.
@@ -880,7 +880,7 @@ static void json_next_string_token(json_parse_t json, json_token_t token)
880	*/	880	*/
881	strbuf_reset(json->tmp);	881	strbuf_reset(json->tmp);
882		882
883	while ((ch = json->data[json->index]) != '"') {	883	while ((ch = *json->ptr) != '"') {
884	if (!ch) {	884	if (!ch) {
885	/* Premature end of the string */	885	/* Premature end of the string */
886	json_set_token_error(token, json, "unexpected end of string");	886	json_set_token_error(token, json, "unexpected end of string");
@@ -890,7 +890,7 @@ static void json_next_string_token(json_parse_t json, json_token_t token)
890	/* Handle escapes */	890	/* Handle escapes */
891	if (ch == '\\') {	891	if (ch == '\\') {
892	/* Fetch escape character */	892	/* Fetch escape character */
893	ch = json->data[json->index + 1];	893	ch = *(json->ptr + 1);
894		894
895	/* Translate escape code and append to tmp string */	895	/* Translate escape code and append to tmp string */
896	ch = escape2char[(unsigned char)ch];	896	ch = escape2char[(unsigned char)ch];
@@ -908,14 +908,14 @@ static void json_next_string_token(json_parse_t json, json_token_t token)
908	}	908	}
909		909
910	/* Skip '\' */	910	/* Skip '\' */
911	json->index++;	911	json->ptr++;
912	}	912	}
913	/* Append normal character or translated single character	913	/* Append normal character or translated single character
914	* Unicode escapes are handled above */	914	* Unicode escapes are handled above */
915	strbuf_append_char_unsafe(json->tmp, ch);	915	strbuf_append_char_unsafe(json->tmp, ch);
916	json->index++;	916	json->ptr++;
917	}	917	}
918	json->index++; /* Eat final quote (") */	918	json->ptr++; /* Eat final quote (") */
919		919
920	strbuf_ensure_null(json->tmp);	920	strbuf_ensure_null(json->tmp);
921		921
@@ -940,33 +940,33 @@ static void json_next_string_token(json_parse_t json, json_token_t token)
940	*/	940	*/
941	static int json_is_invalid_number(json_parse_t *json)	941	static int json_is_invalid_number(json_parse_t *json)
942	{	942	{
943	int i = json->index;	943	const char *p = json->ptr;
944		944
945	/* Reject numbers starting with + */	945	/* Reject numbers starting with + */
946	if (json->data[i] == '+')	946	if (*p == '+')
947	return 1;	947	return 1;
948		948
949	/* Skip minus sign if it exists */	949	/* Skip minus sign if it exists */
950	if (json->data[i] == '-')	950	if (*p == '-')
951	i++;	951	p++;
952		952
953	/* Reject numbers starting with 0x, or leading zeros */	953	/* Reject numbers starting with 0x, or leading zeros */
954	if (json->data[i] == '0') {	954	if (*p == '0') {
955	int ch2 = json->data[i + 1];	955	int ch2 = *(p + 1);
956		956
957	if ((ch2 \| 0x20) == 'x' \|\| /* Hex */	957	if ((ch2 \| 0x20) == 'x' \|\| /* Hex */
958	('0' <= ch2 && ch2 <= '9')) /* Leading zero */	958	('0' <= ch2 && ch2 <= '9')) /* Leading zero */
959	return 1;	959	return 1;
960		960
961	return 0;	961	return 0;
962	} else if (json->data[i] <= '9') {	962	} else if (*p <= '9') {
963	return 0; /* Ordinary number */	963	return 0; /* Ordinary number */
964	}	964	}
965		965
966	/* Reject inf/nan */	966	/* Reject inf/nan */
967	if (!strncasecmp(&json->data[i], "inf", 3))	967	if (!strncasecmp(p, "inf", 3))
968	return 1;	968	return 1;
969	if (!strncasecmp(&json->data[i], "nan", 3))	969	if (!strncasecmp(p, "nan", 3))
970	return 1;	970	return 1;
971		971
972	/* Pass all other numbers which may still be invalid, but	972	/* Pass all other numbers which may still be invalid, but
@@ -976,35 +976,39 @@ static int json_is_invalid_number(json_parse_t *json)
976		976
977	static void json_next_number_token(json_parse_t json, json_token_t token)	977	static void json_next_number_token(json_parse_t json, json_token_t token)
978	{	978	{
979	const char *startptr;
980	char *endptr;	979	char *endptr;
981		980
982	token->type = T_NUMBER;	981	token->type = T_NUMBER;
983	startptr = &json->data[json->index];	982	token->value.number = fpconv_strtod(json->ptr, &endptr);
984	token->value.number = fpconv_strtod(&json->data[json->index], &endptr);	983	if (json->ptr == endptr)
985	if (startptr == endptr)
986	json_set_token_error(token, json, "invalid number");	984	json_set_token_error(token, json, "invalid number");
987	else	985	else
988	json->index += endptr - startptr; /* Skip the processed number */	986	json->ptr = endptr; /* Skip the processed number */
989		987
990	return;	988	return;
991	}	989	}
992		990
993	/* Fills in the token struct.	991	/* Fills in the token struct.
994	* T_STRING will return a pointer to the json_parse_t temporary string	992	* T_STRING will return a pointer to the json_parse_t temporary string
995	* T_ERROR will leave the json->index pointer at the error.	993	* T_ERROR will leave the json->ptr pointer at the error.
996	*/	994	*/
997	static void json_next_token(json_parse_t json, json_token_t token)	995	static void json_next_token(json_parse_t json, json_token_t token)
998	{	996	{
999	json_token_type_t *ch2token = json->cfg->ch2token;	997	json_token_type_t *ch2token = json->cfg->ch2token;
1000	int ch;	998	int ch;
1001		999
1002	/* Eat whitespace. FIXME: UGLY */	1000	/* Eat whitespace. */
1003	token->type = ch2token[(unsigned char)json->data[json->index]];	1001	while (1) {
1004	while (token->type == T_WHITESPACE)	1002	ch = (unsigned char)*(json->ptr);
1005	token->type = ch2token[(unsigned char)json->data[++json->index]];	1003	token->type = ch2token[ch];
		1004	if (token->type != T_WHITESPACE)
		1005	break;
		1006	json->ptr++;
		1007	}
1006		1008
1007	token->index = json->index;	1009	/* Store location of new token. Required when throwing errors
		1010	* for unexpected tokens (syntax errors). */
		1011	token->index = json->ptr - json->data;
1008		1012
1009	/* Don't advance the pointer for an error or the end */	1013	/* Don't advance the pointer for an error or the end */
1010	if (token->type == T_ERROR) {	1014	if (token->type == T_ERROR) {
@@ -1018,14 +1022,13 @@ static void json_next_token(json_parse_t json, json_token_t token)
1018		1022
1019	/* Found a known single character token, advance index and return */	1023	/* Found a known single character token, advance index and return */
1020	if (token->type != T_UNKNOWN) {	1024	if (token->type != T_UNKNOWN) {
1021	json->index++;	1025	json->ptr++;
1022	return;	1026	return;
1023	}	1027	}
1024		1028
1025	/* Process characters which triggered T_UNKNOWN */	1029	/* Process characters which triggered T_UNKNOWN
1026	ch = json->data[json->index];	1030	*
1027		1031	* Must use strncmp() to match the front of the JSON string.
1028	/* Must use strncmp() to match the front of the JSON string.
1029	* JSON identifier must be lowercase.	1032	* JSON identifier must be lowercase.
1030	* When strict_numbers if disabled, either case is allowed for	1033	* When strict_numbers if disabled, either case is allowed for
1031	* Infinity/NaN (since we are no longer following the spec..) */	1034	* Infinity/NaN (since we are no longer following the spec..) */
@@ -1039,19 +1042,19 @@ static void json_next_token(json_parse_t json, json_token_t token)
1039	}	1042	}
1040	json_next_number_token(json, token);	1043	json_next_number_token(json, token);
1041	return;	1044	return;
1042	} else if (!strncmp(&json->data[json->index], "true", 4)) {	1045	} else if (!strncmp(json->ptr, "true", 4)) {
1043	token->type = T_BOOLEAN;	1046	token->type = T_BOOLEAN;
1044	token->value.boolean = 1;	1047	token->value.boolean = 1;
1045	json->index += 4;	1048	json->ptr += 4;
1046	return;	1049	return;
1047	} else if (!strncmp(&json->data[json->index], "false", 5)) {	1050	} else if (!strncmp(json->ptr, "false", 5)) {
1048	token->type = T_BOOLEAN;	1051	token->type = T_BOOLEAN;
1049	token->value.boolean = 0;	1052	token->value.boolean = 0;
1050	json->index += 5;	1053	json->ptr += 5;
1051	return;	1054	return;
1052	} else if (!strncmp(&json->data[json->index], "null", 4)) {	1055	} else if (!strncmp(json->ptr, "null", 4)) {
1053	token->type = T_NULL;	1056	token->type = T_NULL;
1054	json->index += 4;	1057	json->ptr += 4;
1055	return;	1058	return;
1056	} else if (!json->cfg->decode_refuse_badnum &&	1059	} else if (!json->cfg->decode_refuse_badnum &&
1057	json_is_invalid_number(json)) {	1060	json_is_invalid_number(json)) {
@@ -1219,7 +1222,7 @@ static void lua_json_decode(lua_State l, const char json_text, int json_len)
1219		1222
1220	json.cfg = json_fetch_config(l);	1223	json.cfg = json_fetch_config(l);
1221	json.data = json_text;	1224	json.data = json_text;
1222	json.index = 0;	1225	json.ptr = json.data;
1223		1226
1224	/* Ensure the temporary buffer can hold the entire string.	1227	/* Ensure the temporary buffer can hold the entire string.
1225	* This means we no longer need to do length checks since the decoded	1228	* This means we no longer need to do length checks since the decoded