From e8163b395942b0e5ec0c4aad07b66472d195e61a Mon Sep 17 00:00:00 2001
From: Mark Pulford <mark@kyne.com.au>
Date: Sun, 8 Jan 2012 13:28:58 +1030
Subject: Improve performance by tracking decode ptr

Track pointer to the current location in the JSON string, instead of
an index to the string array. Improves decode performance 1-10%.

json_next_token():
- Clean up white space handling and leave "ch" containing the current
  non-whitespace character.
---
 lua_cjson.c | 95 +++++++++++++++++++++++++++++++------------------------------
 1 file changed, 49 insertions(+), 46 deletions(-)

(limited to 'lua_cjson.c')

diff --git a/lua_cjson.c b/lua_cjson.c
index 708e695..344cb43 100644
--- a/lua_cjson.c
+++ b/lua_cjson.c
@@ -127,7 +127,7 @@ typedef struct {
 
 typedef struct {
     const char *data;
-    int index;
+    const char *ptr;
     strbuf_t *tmp;    /* Temporary storage for strings */
     json_config_t *cfg;
 } json_parse_t;
@@ -806,7 +806,7 @@ static int json_append_unicode_escape(json_parse_t *json)
     int escape_len = 6;
 
     /* Fetch UTF-16 code unit */
-    codepoint = decode_hex4(&json->data[json->index + 2]);
+    codepoint = decode_hex4(json->ptr + 2);
     if (codepoint < 0)
         return -1;
 
@@ -822,13 +822,13 @@ static int json_append_unicode_escape(json_parse_t *json)
             return -1;
 
         /* Ensure the next code is a unicode escape */
-        if (json->data[json->index + escape_len] != '\\' ||
-            json->data[json->index + escape_len + 1] != 'u') {
+        if (*(json->ptr + escape_len) != '\\' ||
+            *(json->ptr + escape_len + 1) != 'u') {
             return -1;
         }
 
         /* Fetch the next codepoint */
-        surrogate_low = decode_hex4(&json->data[json->index + 2 + escape_len]);
+        surrogate_low = decode_hex4(json->ptr + 2 + escape_len);
         if (surrogate_low < 0)
             return -1;
 
@@ -850,7 +850,7 @@ static int json_append_unicode_escape(json_parse_t *json)
 
     /* Append bytes and advance parse index */
     strbuf_append_mem_unsafe(json->tmp, utf8, len);
-    json->index += escape_len;
+    json->ptr += escape_len;
 
     return 0;
 }
@@ -859,7 +859,7 @@ static void json_set_token_error(json_token_t *token, json_parse_t *json,
                                  const char *errtype)
 {
     token->type = T_ERROR;
-    token->index = json->index;
+    token->index = json->ptr - json->data;
     token->value.string = errtype;
 }
 
@@ -869,10 +869,10 @@ static void json_next_string_token(json_parse_t *json, json_token_t *token)
     char ch;
 
     /* Caller must ensure a string is next */
-    assert(json->data[json->index] == '"');
+    assert(*json->ptr == '"');
 
     /* Skip " */
-    json->index++;
+    json->ptr++;
 
     /* json->tmp is the temporary strbuf used to accumulate the
      * decoded string value.
@@ -880,7 +880,7 @@ static void json_next_string_token(json_parse_t *json, json_token_t *token)
      */
     strbuf_reset(json->tmp);
 
-    while ((ch = json->data[json->index]) != '"') {
+    while ((ch = *json->ptr) != '"') {
         if (!ch) {
             /* Premature end of the string */
             json_set_token_error(token, json, "unexpected end of string");
@@ -890,7 +890,7 @@ static void json_next_string_token(json_parse_t *json, json_token_t *token)
         /* Handle escapes */
         if (ch == '\\') {
             /* Fetch escape character */
-            ch = json->data[json->index + 1];
+            ch = *(json->ptr + 1);
 
             /* Translate escape code and append to tmp string */
             ch = escape2char[(unsigned char)ch];
@@ -908,14 +908,14 @@ static void json_next_string_token(json_parse_t *json, json_token_t *token)
             }
 
             /* Skip '\' */
-            json->index++;
+            json->ptr++;
         }
         /* Append normal character or translated single character
          * Unicode escapes are handled above */
         strbuf_append_char_unsafe(json->tmp, ch);
-        json->index++;
+        json->ptr++;
     }
-    json->index++;  /* Eat final quote (") */
+    json->ptr++;    /* Eat final quote (") */
 
     strbuf_ensure_null(json->tmp);
 
@@ -940,33 +940,33 @@ static void json_next_string_token(json_parse_t *json, json_token_t *token)
  */
 static int json_is_invalid_number(json_parse_t *json)
 {
-    int i = json->index;
+    const char *p = json->ptr;
 
     /* Reject numbers starting with + */
-    if (json->data[i] == '+')
+    if (*p == '+')
         return 1;
 
     /* Skip minus sign if it exists */
-    if (json->data[i] == '-')
-        i++;
+    if (*p == '-')
+        p++;
 
     /* Reject numbers starting with 0x, or leading zeros */
-    if (json->data[i] == '0') {
-        int ch2 = json->data[i + 1];
+    if (*p == '0') {
+        int ch2 = *(p + 1);
 
         if ((ch2 | 0x20) == 'x' ||          /* Hex */
             ('0' <= ch2 && ch2 <= '9'))     /* Leading zero */
             return 1;
 
         return 0;
-    } else if (json->data[i] <= '9') {
+    } else if (*p <= '9') {
         return 0;                           /* Ordinary number */
     }
 
     /* Reject inf/nan */
-    if (!strncasecmp(&json->data[i], "inf", 3))
+    if (!strncasecmp(p, "inf", 3))
         return 1;
-    if (!strncasecmp(&json->data[i], "nan", 3))
+    if (!strncasecmp(p, "nan", 3))
         return 1;
 
     /* Pass all other numbers which may still be invalid, but
@@ -976,35 +976,39 @@ static int json_is_invalid_number(json_parse_t *json)
 
 static void json_next_number_token(json_parse_t *json, json_token_t *token)
 {
-    const char *startptr;
     char *endptr;
 
     token->type = T_NUMBER;
-    startptr = &json->data[json->index];
-    token->value.number = fpconv_strtod(&json->data[json->index], &endptr);
-    if (startptr == endptr)
+    token->value.number = fpconv_strtod(json->ptr, &endptr);
+    if (json->ptr == endptr)
         json_set_token_error(token, json, "invalid number");
     else
-        json->index += endptr - startptr;   /* Skip the processed number */
+        json->ptr = endptr;     /* Skip the processed number */
 
     return;
 }
 
 /* Fills in the token struct.
  * T_STRING will return a pointer to the json_parse_t temporary string
- * T_ERROR will leave the json->index pointer at the error.
+ * T_ERROR will leave the json->ptr pointer at the error.
  */
 static void json_next_token(json_parse_t *json, json_token_t *token)
 {
     json_token_type_t *ch2token = json->cfg->ch2token;
     int ch;
 
-    /* Eat whitespace. FIXME: UGLY */
-    token->type = ch2token[(unsigned char)json->data[json->index]];
-    while (token->type == T_WHITESPACE)
-        token->type = ch2token[(unsigned char)json->data[++json->index]];
+    /* Eat whitespace. */
+    while (1) {
+        ch = (unsigned char)*(json->ptr);
+        token->type = ch2token[ch];
+        if (token->type != T_WHITESPACE)
+            break;
+        json->ptr++;
+    }
 
-    token->index = json->index;
+    /* Store location of new token. Required when throwing errors
+     * for unexpected tokens (syntax errors). */
+    token->index = json->ptr - json->data;
 
     /* Don't advance the pointer for an error or the end */
     if (token->type == T_ERROR) {
@@ -1018,14 +1022,13 @@ static void json_next_token(json_parse_t *json, json_token_t *token)
 
     /* Found a known single character token, advance index and return */
     if (token->type != T_UNKNOWN) {
-        json->index++;
+        json->ptr++;
         return;
     }
 
-    /* Process characters which triggered T_UNKNOWN */
-    ch = json->data[json->index];
-
-    /* Must use strncmp() to match the front of the JSON string.
+    /* Process characters which triggered T_UNKNOWN
+     *
+     * Must use strncmp() to match the front of the JSON string.
      * JSON identifier must be lowercase.
      * When strict_numbers if disabled, either case is allowed for
      * Infinity/NaN (since we are no longer following the spec..) */
@@ -1039,19 +1042,19 @@ static void json_next_token(json_parse_t *json, json_token_t *token)
         }
         json_next_number_token(json, token);
         return;
-    } else if (!strncmp(&json->data[json->index], "true", 4)) {
+    } else if (!strncmp(json->ptr, "true", 4)) {
         token->type = T_BOOLEAN;
         token->value.boolean = 1;
-        json->index += 4;
+        json->ptr += 4;
         return;
-    } else if (!strncmp(&json->data[json->index], "false", 5)) {
+    } else if (!strncmp(json->ptr, "false", 5)) {
         token->type = T_BOOLEAN;
         token->value.boolean = 0;
-        json->index += 5;
+        json->ptr += 5;
         return;
-    } else if (!strncmp(&json->data[json->index], "null", 4)) {
+    } else if (!strncmp(json->ptr, "null", 4)) {
         token->type = T_NULL;
-        json->index += 4;
+        json->ptr += 4;
         return;
     } else if (!json->cfg->decode_refuse_badnum &&
                json_is_invalid_number(json)) {
@@ -1219,7 +1222,7 @@ static void lua_json_decode(lua_State *l, const char *json_text, int json_len)
 
     json.cfg = json_fetch_config(l);
     json.data = json_text;
-    json.index = 0;
+    json.ptr = json.data;
 
     /* Ensure the temporary buffer can hold the entire string.
      * This means we no longer need to do length checks since the decoded
-- 
cgit v1.2.3-55-g6feb