Add detailed parse error reporting

- Always report the correct index of the token error. - Use value.string to report what was found instead of just T_ERROR. - Fix inverted unicode escape error detection.
author: Mark Pulford <mark@kyne.com.au> 2011-05-01 14:24:05 +0930
committer: Mark Pulford <mark@kyne.com.au> 2011-05-01 14:24:05 +0930
commit: fb5405eacede2f357fe496de54830ffff06e14d3 (patch)
tree: dd3bd89b4a14c882fbc95a5e2128b32777f69217
parent: 2fc7477b155cdecfee3b0a47203c0706c27db73e (diff)
download: lua-cjson-fb5405eacede2f357fe496de54830ffff06e14d3.tar.gz
lua-cjson-fb5405eacede2f357fe496de54830ffff06e14d3.tar.bz2
lua-cjson-fb5405eacede2f357fe496de54830ffff06e14d3.zip
1 files changed, 45 insertions, 19 deletions
diff --git a/lua_cjson.c b/lua_cjson.c
index b2869c1..24b9704 100644
--- a/lua_cjson.c
+++ b/lua_cjson.c
@@ -93,7 +93,7 @@ typedef struct {
        double number;
        int boolean;
    } value;
-    int length; /* FIXME: Merge into union? Won't save memory, but more logical */
+    int string_len;
 } json_token_t;
 /* ===== CONFIGURATION ===== */
@@ -540,11 +540,8 @@ static int json_append_unicode_escape(json_parse_t *json)
    int codepoint;
    int len;
-    /* Skip 'u' */
-    json->index++;
    /* Fetch UCS-2 codepoint */
-    codepoint = decode_hex4(&json->data[json->index]);
+    codepoint = decode_hex4(&json->data[json->index + 1]);
    if (codepoint < 0) {
        return -1;
    }
@@ -557,7 +554,7 @@ static int json_append_unicode_escape(json_parse_t *json)
    /* Append bytes and advance counter */
    strbuf_append_mem(json->tmp, utf8, len);
-    json->index += 4;
+    json->index += 5;
    return 0;
 }
@@ -580,6 +577,8 @@ static void json_next_string_token(json_parse_t *json, json_token_t *token)
        if (!ch) {
            /* Premature end of the string */
            token->type = T_ERROR;
+            token->index = json->index;
+            token->value.string = "unexpected end of string";
            return;
        }
        
@@ -592,15 +591,19 @@ static void json_next_string_token(json_parse_t *json, json_token_t *token)
            /* Translate escape code and append to tmp string */
            ch = ch2escape[(unsigned char)ch];
            if (ch == 'u') {
-                if (json_append_unicode_escape(json) < 0)
+                if (json_append_unicode_escape(json) == 0)
                    continue;
                token->type = T_ERROR;
+                token->index = json->index - 1;     /* point at '\' */
+                token->value.string = "invalid unicode escape";
                return;
            }
            if (!ch) {
                /* Invalid escape code */
                token->type = T_ERROR;
+                token->index = json->index - 1;
+                token->value.string = "invalid escape";
                return;
            }
        }
@@ -614,8 +617,7 @@ static void json_next_string_token(json_parse_t *json, json_token_t *token)
    strbuf_ensure_null(json->tmp);
    token->type = T_STRING;
-    token->value.string = strbuf_string(json->tmp, NULL);
+    token->value.string = strbuf_string(json->tmp, &token->string_len);
-    token->length = json->tmp->length;
 }
 static void json_next_number_token(json_parse_t *json, json_token_t *token)
@@ -639,10 +641,13 @@ static void json_next_number_token(json_parse_t *json, json_token_t *token)
    token->type = T_NUMBER;
    startptr = &json->data[json->index];
    token->value.number = strtod(&json->data[json->index], &endptr);
-    if (startptr == endptr)
+    if (startptr == endptr) {
        token->type = T_ERROR;
-    else
+        token->index = json->index;
+        token->value.string = "invalid number";
+    } else {
        json->index += endptr - startptr;   /* Skip the processed number */
+    }
    return;
 }
@@ -664,10 +669,15 @@ static void json_next_token(json_parse_t *json, json_token_t *token)
    token->index = json->index;
    /* Don't advance the pointer for an error or the end */
-    if (token->type == T_ERROR || token->type == T_END)
+    if (token->type == T_ERROR) {
+        token->value.string = "invalid token";
+        return;
+    }
+    if (token->type == T_END)
        return;
-    /* Found a known token, advance index and return */
+    /* Found a known single character token, advance index and return */
    if (token->type != T_UNKNOWN) {
        json->index++;
        return;
@@ -698,7 +708,10 @@ static void json_next_token(json_parse_t *json, json_token_t *token)
        return;
    }
+    /* We can fall through here if a token starts with t/f/n but isn't
+     * recognised above */
    token->type = T_ERROR;
+    token->value.string = "invalid token";
 }
 /* This function does not return.
@@ -710,9 +723,18 @@ static void json_next_token(json_parse_t *json, json_token_t *token)
 static void json_throw_parse_error(lua_State *l, json_parse_t *json,
                                   const char *exp, json_token_t *token)
 {
+    const char *found;
    strbuf_free(json->tmp);
-    luaL_error(l, "Expected %s but found type <%s> at character %d",
-               exp, json_token_type_name[token->type], token->index);
+    if (token->type == T_ERROR)
+        found = token->value.string;
+    else
+        found = json_token_type_name[token->type];
+    /* Note: token->index is 0 based, display starting from 1 */
+    luaL_error(l, "Expected %s but found %s at character %d",
+               exp, found, token->index + 1);
 }
 static void json_parse_object_context(lua_State *l, json_parse_t *json)
@@ -733,17 +755,21 @@ static void json_parse_object_context(lua_State *l, json_parse_t *json)
    while (1) {
        if (token.type != T_STRING)
-            json_throw_parse_error(l, json, "object key", &token);
+            json_throw_parse_error(l, json, "object key string", &token);
-        lua_pushlstring(l, token.value.string, token.length);     /* Push key */
+        /* Push key */
+        lua_pushlstring(l, token.value.string, token.string_len);
        json_next_token(json, &token);
        if (token.type != T_COLON)
            json_throw_parse_error(l, json, "colon", &token);
+        /* Fetch value */
        json_next_token(json, &token);
        json_process_value(l, json, &token);
-        lua_rawset(l, -3);            /* Set key = value */
+        /* Set key = value */
+        lua_rawset(l, -3);
        json_next_token(json, &token);
@@ -798,7 +824,7 @@ static void json_process_value(lua_State *l, json_parse_t *json, json_token_t *t
 {
    switch (token->type) {
    case T_STRING:
-        lua_pushlstring(l, token->value.string, token->length);
+        lua_pushlstring(l, token->value.string, token->string_len);
        break;;
    case T_NUMBER:
        lua_pushnumber(l, token->value.number);
author	Mark Pulford <mark@kyne.com.au>	2011-05-01 14:24:05 +0930
committer	Mark Pulford <mark@kyne.com.au>	2011-05-01 14:24:05 +0930
commit	fb5405eacede2f357fe496de54830ffff06e14d3 (patch)
tree	dd3bd89b4a14c882fbc95a5e2128b32777f69217
parent	2fc7477b155cdecfee3b0a47203c0706c27db73e (diff)
download	lua-cjson-fb5405eacede2f357fe496de54830ffff06e14d3.tar.gz lua-cjson-fb5405eacede2f357fe496de54830ffff06e14d3.tar.bz2 lua-cjson-fb5405eacede2f357fe496de54830ffff06e14d3.zip

diff --git a/lua_cjson.c b/lua_cjson.c index b2869c1..24b9704 100644 --- a/lua_cjson.c +++ b/lua_cjson.c
@@ -93,7 +93,7 @@ typedef struct {
93	double number;	93	double number;
94	int boolean;	94	int boolean;
95	} value;	95	} value;
96	int length; /* FIXME: Merge into union? Won't save memory, but more logical */	96	int string_len;
97	} json_token_t;	97	} json_token_t;
98		98
99	/* ===== CONFIGURATION ===== */	99	/* ===== CONFIGURATION ===== */
@@ -540,11 +540,8 @@ static int json_append_unicode_escape(json_parse_t *json)
540	int codepoint;	540	int codepoint;
541	int len;	541	int len;
542		542
543	/* Skip 'u' */
544	json->index++;
545
546	/* Fetch UCS-2 codepoint */	543	/* Fetch UCS-2 codepoint */
547	codepoint = decode_hex4(&json->data[json->index]);	544	codepoint = decode_hex4(&json->data[json->index + 1]);
548	if (codepoint < 0) {	545	if (codepoint < 0) {
549	return -1;	546	return -1;
550	}	547	}
@@ -557,7 +554,7 @@ static int json_append_unicode_escape(json_parse_t *json)
557		554
558	/* Append bytes and advance counter */	555	/* Append bytes and advance counter */
559	strbuf_append_mem(json->tmp, utf8, len);	556	strbuf_append_mem(json->tmp, utf8, len);
560	json->index += 4;	557	json->index += 5;
561		558
562	return 0;	559	return 0;
563	}	560	}
@@ -580,6 +577,8 @@ static void json_next_string_token(json_parse_t json, json_token_t token)
580	if (!ch) {	577	if (!ch) {
581	/* Premature end of the string */	578	/* Premature end of the string */
582	token->type = T_ERROR;	579	token->type = T_ERROR;
		580	token->index = json->index;
		581	token->value.string = "unexpected end of string";
583	return;	582	return;
584	}	583	}
585		584
@@ -592,15 +591,19 @@ static void json_next_string_token(json_parse_t json, json_token_t token)
592	/* Translate escape code and append to tmp string */	591	/* Translate escape code and append to tmp string */
593	ch = ch2escape[(unsigned char)ch];	592	ch = ch2escape[(unsigned char)ch];
594	if (ch == 'u') {	593	if (ch == 'u') {
595	if (json_append_unicode_escape(json) < 0)	594	if (json_append_unicode_escape(json) == 0)
596	continue;	595	continue;
597		596
598	token->type = T_ERROR;	597	token->type = T_ERROR;
		598	token->index = json->index - 1; /* point at '\' */
		599	token->value.string = "invalid unicode escape";
599	return;	600	return;
600	}	601	}
601	if (!ch) {	602	if (!ch) {
602	/* Invalid escape code */	603	/* Invalid escape code */
603	token->type = T_ERROR;	604	token->type = T_ERROR;
		605	token->index = json->index - 1;
		606	token->value.string = "invalid escape";
604	return;	607	return;
605	}	608	}
606	}	609	}
@@ -614,8 +617,7 @@ static void json_next_string_token(json_parse_t json, json_token_t token)
614	strbuf_ensure_null(json->tmp);	617	strbuf_ensure_null(json->tmp);
615		618
616	token->type = T_STRING;	619	token->type = T_STRING;
617	token->value.string = strbuf_string(json->tmp, NULL);	620	token->value.string = strbuf_string(json->tmp, &token->string_len);
618	token->length = json->tmp->length;
619	}	621	}
620		622
621	static void json_next_number_token(json_parse_t json, json_token_t token)	623	static void json_next_number_token(json_parse_t json, json_token_t token)
@@ -639,10 +641,13 @@ static void json_next_number_token(json_parse_t json, json_token_t token)
639	token->type = T_NUMBER;	641	token->type = T_NUMBER;
640	startptr = &json->data[json->index];	642	startptr = &json->data[json->index];
641	token->value.number = strtod(&json->data[json->index], &endptr);	643	token->value.number = strtod(&json->data[json->index], &endptr);
642	if (startptr == endptr)	644	if (startptr == endptr) {
643	token->type = T_ERROR;	645	token->type = T_ERROR;
644	else	646	token->index = json->index;
		647	token->value.string = "invalid number";
		648	} else {
645	json->index += endptr - startptr; /* Skip the processed number */	649	json->index += endptr - startptr; /* Skip the processed number */
		650	}
646		651
647	return;	652	return;
648	}	653	}
@@ -664,10 +669,15 @@ static void json_next_token(json_parse_t json, json_token_t token)
664	token->index = json->index;	669	token->index = json->index;
665		670
666	/* Don't advance the pointer for an error or the end */	671	/* Don't advance the pointer for an error or the end */
667	if (token->type == T_ERROR \|\| token->type == T_END)	672	if (token->type == T_ERROR) {
		673	token->value.string = "invalid token";
		674	return;
		675	}
		676
		677	if (token->type == T_END)
668	return;	678	return;
669		679
670	/* Found a known token, advance index and return */	680	/* Found a known single character token, advance index and return */
671	if (token->type != T_UNKNOWN) {	681	if (token->type != T_UNKNOWN) {
672	json->index++;	682	json->index++;
673	return;	683	return;
@@ -698,7 +708,10 @@ static void json_next_token(json_parse_t json, json_token_t token)
698	return;	708	return;
699	}	709	}
700		710
		711	/* We can fall through here if a token starts with t/f/n but isn't
		712	* recognised above */
701	token->type = T_ERROR;	713	token->type = T_ERROR;
		714	token->value.string = "invalid token";
702	}	715	}
703		716
704	/* This function does not return.	717	/* This function does not return.
@@ -710,9 +723,18 @@ static void json_next_token(json_parse_t json, json_token_t token)
710	static void json_throw_parse_error(lua_State l, json_parse_t json,	723	static void json_throw_parse_error(lua_State l, json_parse_t json,
711	const char exp, json_token_t token)	724	const char exp, json_token_t token)
712	{	725	{
		726	const char *found;
		727
713	strbuf_free(json->tmp);	728	strbuf_free(json->tmp);
714	luaL_error(l, "Expected %s but found type <%s> at character %d",	729
715	exp, json_token_type_name[token->type], token->index);	730	if (token->type == T_ERROR)
		731	found = token->value.string;
		732	else
		733	found = json_token_type_name[token->type];
		734
		735	/* Note: token->index is 0 based, display starting from 1 */
		736	luaL_error(l, "Expected %s but found %s at character %d",
		737	exp, found, token->index + 1);
716	}	738	}
717		739
718	static void json_parse_object_context(lua_State l, json_parse_t json)	740	static void json_parse_object_context(lua_State l, json_parse_t json)
@@ -733,17 +755,21 @@ static void json_parse_object_context(lua_State l, json_parse_t json)
733		755
734	while (1) {	756	while (1) {
735	if (token.type != T_STRING)	757	if (token.type != T_STRING)
736	json_throw_parse_error(l, json, "object key", &token);	758	json_throw_parse_error(l, json, "object key string", &token);
737		759
738	lua_pushlstring(l, token.value.string, token.length); /* Push key */	760	/* Push key */
		761	lua_pushlstring(l, token.value.string, token.string_len);
739		762
740	json_next_token(json, &token);	763	json_next_token(json, &token);
741	if (token.type != T_COLON)	764	if (token.type != T_COLON)
742	json_throw_parse_error(l, json, "colon", &token);	765	json_throw_parse_error(l, json, "colon", &token);
743		766
		767	/* Fetch value */
744	json_next_token(json, &token);	768	json_next_token(json, &token);
745	json_process_value(l, json, &token);	769	json_process_value(l, json, &token);
746	lua_rawset(l, -3); /* Set key = value */	770
		771	/* Set key = value */
		772	lua_rawset(l, -3);
747		773
748	json_next_token(json, &token);	774	json_next_token(json, &token);
749		775
@@ -798,7 +824,7 @@ static void json_process_value(lua_State l, json_parse_t json, json_token_t *t
798	{	824	{
799	switch (token->type) {	825	switch (token->type) {
800	case T_STRING:	826	case T_STRING:
801	lua_pushlstring(l, token->value.string, token->length);	827	lua_pushlstring(l, token->value.string, token->string_len);
802	break;;	828	break;;
803	case T_NUMBER:	829	case T_NUMBER:
804	lua_pushnumber(l, token->value.number);	830	lua_pushnumber(l, token->value.number);