diff options
author | Mark Pulford <mark@kyne.com.au> | 2011-05-01 14:24:05 +0930 |
---|---|---|
committer | Mark Pulford <mark@kyne.com.au> | 2011-05-01 14:24:05 +0930 |
commit | fb5405eacede2f357fe496de54830ffff06e14d3 (patch) | |
tree | dd3bd89b4a14c882fbc95a5e2128b32777f69217 | |
parent | 2fc7477b155cdecfee3b0a47203c0706c27db73e (diff) | |
download | lua-cjson-fb5405eacede2f357fe496de54830ffff06e14d3.tar.gz lua-cjson-fb5405eacede2f357fe496de54830ffff06e14d3.tar.bz2 lua-cjson-fb5405eacede2f357fe496de54830ffff06e14d3.zip |
Add detailed parse error reporting
- Always report the correct index of the token error.
- Use value.string to report what was found instead of just T_ERROR.
- Fix inverted unicode escape error detection.
-rw-r--r-- | lua_cjson.c | 64 |
1 files changed, 45 insertions, 19 deletions
diff --git a/lua_cjson.c b/lua_cjson.c index b2869c1..24b9704 100644 --- a/lua_cjson.c +++ b/lua_cjson.c | |||
@@ -93,7 +93,7 @@ typedef struct { | |||
93 | double number; | 93 | double number; |
94 | int boolean; | 94 | int boolean; |
95 | } value; | 95 | } value; |
96 | int length; /* FIXME: Merge into union? Won't save memory, but more logical */ | 96 | int string_len; |
97 | } json_token_t; | 97 | } json_token_t; |
98 | 98 | ||
99 | /* ===== CONFIGURATION ===== */ | 99 | /* ===== CONFIGURATION ===== */ |
@@ -540,11 +540,8 @@ static int json_append_unicode_escape(json_parse_t *json) | |||
540 | int codepoint; | 540 | int codepoint; |
541 | int len; | 541 | int len; |
542 | 542 | ||
543 | /* Skip 'u' */ | ||
544 | json->index++; | ||
545 | |||
546 | /* Fetch UCS-2 codepoint */ | 543 | /* Fetch UCS-2 codepoint */ |
547 | codepoint = decode_hex4(&json->data[json->index]); | 544 | codepoint = decode_hex4(&json->data[json->index + 1]); |
548 | if (codepoint < 0) { | 545 | if (codepoint < 0) { |
549 | return -1; | 546 | return -1; |
550 | } | 547 | } |
@@ -557,7 +554,7 @@ static int json_append_unicode_escape(json_parse_t *json) | |||
557 | 554 | ||
558 | /* Append bytes and advance counter */ | 555 | /* Append bytes and advance counter */ |
559 | strbuf_append_mem(json->tmp, utf8, len); | 556 | strbuf_append_mem(json->tmp, utf8, len); |
560 | json->index += 4; | 557 | json->index += 5; |
561 | 558 | ||
562 | return 0; | 559 | return 0; |
563 | } | 560 | } |
@@ -580,6 +577,8 @@ static void json_next_string_token(json_parse_t *json, json_token_t *token) | |||
580 | if (!ch) { | 577 | if (!ch) { |
581 | /* Premature end of the string */ | 578 | /* Premature end of the string */ |
582 | token->type = T_ERROR; | 579 | token->type = T_ERROR; |
580 | token->index = json->index; | ||
581 | token->value.string = "unexpected end of string"; | ||
583 | return; | 582 | return; |
584 | } | 583 | } |
585 | 584 | ||
@@ -592,15 +591,19 @@ static void json_next_string_token(json_parse_t *json, json_token_t *token) | |||
592 | /* Translate escape code and append to tmp string */ | 591 | /* Translate escape code and append to tmp string */ |
593 | ch = ch2escape[(unsigned char)ch]; | 592 | ch = ch2escape[(unsigned char)ch]; |
594 | if (ch == 'u') { | 593 | if (ch == 'u') { |
595 | if (json_append_unicode_escape(json) < 0) | 594 | if (json_append_unicode_escape(json) == 0) |
596 | continue; | 595 | continue; |
597 | 596 | ||
598 | token->type = T_ERROR; | 597 | token->type = T_ERROR; |
598 | token->index = json->index - 1; /* point at '\' */ | ||
599 | token->value.string = "invalid unicode escape"; | ||
599 | return; | 600 | return; |
600 | } | 601 | } |
601 | if (!ch) { | 602 | if (!ch) { |
602 | /* Invalid escape code */ | 603 | /* Invalid escape code */ |
603 | token->type = T_ERROR; | 604 | token->type = T_ERROR; |
605 | token->index = json->index - 1; | ||
606 | token->value.string = "invalid escape"; | ||
604 | return; | 607 | return; |
605 | } | 608 | } |
606 | } | 609 | } |
@@ -614,8 +617,7 @@ static void json_next_string_token(json_parse_t *json, json_token_t *token) | |||
614 | strbuf_ensure_null(json->tmp); | 617 | strbuf_ensure_null(json->tmp); |
615 | 618 | ||
616 | token->type = T_STRING; | 619 | token->type = T_STRING; |
617 | token->value.string = strbuf_string(json->tmp, NULL); | 620 | token->value.string = strbuf_string(json->tmp, &token->string_len); |
618 | token->length = json->tmp->length; | ||
619 | } | 621 | } |
620 | 622 | ||
621 | static void json_next_number_token(json_parse_t *json, json_token_t *token) | 623 | static void json_next_number_token(json_parse_t *json, json_token_t *token) |
@@ -639,10 +641,13 @@ static void json_next_number_token(json_parse_t *json, json_token_t *token) | |||
639 | token->type = T_NUMBER; | 641 | token->type = T_NUMBER; |
640 | startptr = &json->data[json->index]; | 642 | startptr = &json->data[json->index]; |
641 | token->value.number = strtod(&json->data[json->index], &endptr); | 643 | token->value.number = strtod(&json->data[json->index], &endptr); |
642 | if (startptr == endptr) | 644 | if (startptr == endptr) { |
643 | token->type = T_ERROR; | 645 | token->type = T_ERROR; |
644 | else | 646 | token->index = json->index; |
647 | token->value.string = "invalid number"; | ||
648 | } else { | ||
645 | json->index += endptr - startptr; /* Skip the processed number */ | 649 | json->index += endptr - startptr; /* Skip the processed number */ |
650 | } | ||
646 | 651 | ||
647 | return; | 652 | return; |
648 | } | 653 | } |
@@ -664,10 +669,15 @@ static void json_next_token(json_parse_t *json, json_token_t *token) | |||
664 | token->index = json->index; | 669 | token->index = json->index; |
665 | 670 | ||
666 | /* Don't advance the pointer for an error or the end */ | 671 | /* Don't advance the pointer for an error or the end */ |
667 | if (token->type == T_ERROR || token->type == T_END) | 672 | if (token->type == T_ERROR) { |
673 | token->value.string = "invalid token"; | ||
674 | return; | ||
675 | } | ||
676 | |||
677 | if (token->type == T_END) | ||
668 | return; | 678 | return; |
669 | 679 | ||
670 | /* Found a known token, advance index and return */ | 680 | /* Found a known single character token, advance index and return */ |
671 | if (token->type != T_UNKNOWN) { | 681 | if (token->type != T_UNKNOWN) { |
672 | json->index++; | 682 | json->index++; |
673 | return; | 683 | return; |
@@ -698,7 +708,10 @@ static void json_next_token(json_parse_t *json, json_token_t *token) | |||
698 | return; | 708 | return; |
699 | } | 709 | } |
700 | 710 | ||
711 | /* We can fall through here if a token starts with t/f/n but isn't | ||
712 | * recognised above */ | ||
701 | token->type = T_ERROR; | 713 | token->type = T_ERROR; |
714 | token->value.string = "invalid token"; | ||
702 | } | 715 | } |
703 | 716 | ||
704 | /* This function does not return. | 717 | /* This function does not return. |
@@ -710,9 +723,18 @@ static void json_next_token(json_parse_t *json, json_token_t *token) | |||
710 | static void json_throw_parse_error(lua_State *l, json_parse_t *json, | 723 | static void json_throw_parse_error(lua_State *l, json_parse_t *json, |
711 | const char *exp, json_token_t *token) | 724 | const char *exp, json_token_t *token) |
712 | { | 725 | { |
726 | const char *found; | ||
727 | |||
713 | strbuf_free(json->tmp); | 728 | strbuf_free(json->tmp); |
714 | luaL_error(l, "Expected %s but found type <%s> at character %d", | 729 | |
715 | exp, json_token_type_name[token->type], token->index); | 730 | if (token->type == T_ERROR) |
731 | found = token->value.string; | ||
732 | else | ||
733 | found = json_token_type_name[token->type]; | ||
734 | |||
735 | /* Note: token->index is 0 based, display starting from 1 */ | ||
736 | luaL_error(l, "Expected %s but found %s at character %d", | ||
737 | exp, found, token->index + 1); | ||
716 | } | 738 | } |
717 | 739 | ||
718 | static void json_parse_object_context(lua_State *l, json_parse_t *json) | 740 | static void json_parse_object_context(lua_State *l, json_parse_t *json) |
@@ -733,17 +755,21 @@ static void json_parse_object_context(lua_State *l, json_parse_t *json) | |||
733 | 755 | ||
734 | while (1) { | 756 | while (1) { |
735 | if (token.type != T_STRING) | 757 | if (token.type != T_STRING) |
736 | json_throw_parse_error(l, json, "object key", &token); | 758 | json_throw_parse_error(l, json, "object key string", &token); |
737 | 759 | ||
738 | lua_pushlstring(l, token.value.string, token.length); /* Push key */ | 760 | /* Push key */ |
761 | lua_pushlstring(l, token.value.string, token.string_len); | ||
739 | 762 | ||
740 | json_next_token(json, &token); | 763 | json_next_token(json, &token); |
741 | if (token.type != T_COLON) | 764 | if (token.type != T_COLON) |
742 | json_throw_parse_error(l, json, "colon", &token); | 765 | json_throw_parse_error(l, json, "colon", &token); |
743 | 766 | ||
767 | /* Fetch value */ | ||
744 | json_next_token(json, &token); | 768 | json_next_token(json, &token); |
745 | json_process_value(l, json, &token); | 769 | json_process_value(l, json, &token); |
746 | lua_rawset(l, -3); /* Set key = value */ | 770 | |
771 | /* Set key = value */ | ||
772 | lua_rawset(l, -3); | ||
747 | 773 | ||
748 | json_next_token(json, &token); | 774 | json_next_token(json, &token); |
749 | 775 | ||
@@ -798,7 +824,7 @@ static void json_process_value(lua_State *l, json_parse_t *json, json_token_t *t | |||
798 | { | 824 | { |
799 | switch (token->type) { | 825 | switch (token->type) { |
800 | case T_STRING: | 826 | case T_STRING: |
801 | lua_pushlstring(l, token->value.string, token->length); | 827 | lua_pushlstring(l, token->value.string, token->string_len); |
802 | break;; | 828 | break;; |
803 | case T_NUMBER: | 829 | case T_NUMBER: |
804 | lua_pushnumber(l, token->value.number); | 830 | lua_pushnumber(l, token->value.number); |