aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDenys Vlasenko <vda.linux@googlemail.com>2018-12-26 18:32:43 +0100
committerDenys Vlasenko <vda.linux@googlemail.com>2018-12-26 18:32:43 +0100
commit7d32e25bf33c98af6878715907ebbf297ef3d64a (patch)
tree94de45b3d5b0ab4b093e48a53b85d94d1c3327d9
parent63ad7993845fbf59264e22295ae9c06250832119 (diff)
downloadbusybox-w32-7d32e25bf33c98af6878715907ebbf297ef3d64a.tar.gz
busybox-w32-7d32e25bf33c98af6878715907ebbf297ef3d64a.tar.bz2
busybox-w32-7d32e25bf33c98af6878715907ebbf297ef3d64a.zip
bc: prepare for char-by-char input handling
function old new delta peek_inbuf - 292 +292 parse_lex_by_checking_eq_sign - 26 +26 eat_inbuf - 22 +22 zbc_vm_execute_FILE 52 61 +9 bc_lex_lineComment 29 30 +1 zbc_lex_number 174 172 -2 bc_vm_run 104 99 -5 zbc_num_divmod 156 150 -6 bc_lex_file 24 - -24 bc_lex_assign 26 - -26 zbc_lex_next 1982 1587 -395 ------------------------------------------------------------------------------ (add/remove: 3/2 grow/shrink: 2/4 up/down: 350/-458) Total: -108 bytes Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
-rw-r--r--miscutils/bc.c443
-rw-r--r--testsuite/dc_strings_results.txt8
2 files changed, 214 insertions, 237 deletions
diff --git a/miscutils/bc.c b/miscutils/bc.c
index f4e499f13..5789869a7 100644
--- a/miscutils/bc.c
+++ b/miscutils/bc.c
@@ -713,7 +713,6 @@ dc_LEX_to_INST[] = { // starts at XC_LEX_OP_POWER // corresponding XC/DC_L
713typedef struct BcParse { 713typedef struct BcParse {
714 smallint lex; // was BcLexType // first member is most used 714 smallint lex; // was BcLexType // first member is most used
715 smallint lex_last; // was BcLexType 715 smallint lex_last; // was BcLexType
716 bool lex_newline;
717 size_t lex_line; 716 size_t lex_line;
718 const char *lex_inbuf; 717 const char *lex_inbuf;
719 const char *lex_end; 718 const char *lex_end;
@@ -2741,147 +2740,6 @@ static BC_STATUS zbc_num_parse(BcNum *n, const char *val, unsigned base_t)
2741} 2740}
2742#define zbc_num_parse(...) (zbc_num_parse(__VA_ARGS__) COMMA_SUCCESS) 2741#define zbc_num_parse(...) (zbc_num_parse(__VA_ARGS__) COMMA_SUCCESS)
2743 2742
2744static void bc_lex_lineComment(void)
2745{
2746 BcParse *p = &G.prs;
2747 // Try: echo -n '#foo' | bc
2748 p->lex = XC_LEX_WHITESPACE;
2749 while (p->lex_inbuf < p->lex_end && *p->lex_inbuf != '\n')
2750 p->lex_inbuf++;
2751}
2752
2753static void bc_lex_whitespace(void)
2754{
2755 BcParse *p = &G.prs;
2756 p->lex = XC_LEX_WHITESPACE;
2757 for (;;) {
2758 char c = *p->lex_inbuf;
2759 if (c == '\n') // this is XC_LEX_NLINE, not XC_LEX_WHITESPACE
2760 break;
2761 if (!isspace(c))
2762 break;
2763 p->lex_inbuf++;
2764 }
2765}
2766
2767static BC_STATUS zbc_lex_number(char start)
2768{
2769 BcParse *p = &G.prs;
2770 const char *buf = p->lex_inbuf;
2771 size_t len, i, ccnt;
2772 bool pt;
2773
2774 pt = (start == '.');
2775 p->lex = XC_LEX_NUMBER;
2776 ccnt = i = 0;
2777 for (;;) {
2778 char c = buf[i];
2779 if (c == '\0')
2780 break;
2781 if (c == '\\' && buf[i + 1] == '\n') {
2782 i += 2;
2783 //number_of_backslashes++ - see comment below
2784 continue;
2785 }
2786 if (!isdigit(c) && (c < 'A' || c > 'F')) {
2787 if (c != '.') break;
2788 // if '.' was already seen, stop on second one:
2789 if (pt) break;
2790 pt = true;
2791 }
2792 // buf[i] is one of "0-9A-F."
2793 i++;
2794 if (c != '.')
2795 ccnt = i;
2796 }
2797 //ccnt is the number of chars in the number string, excluding possible
2798 //trailing "[\<newline>].[\<newline>]" (with any number of \<NL> repetitions).
2799 //i is buf[i] index of the first not-yet-parsed char after that.
2800 p->lex_inbuf += i;
2801
2802 // This might overestimate the size, if there are "\<NL>"'s
2803 // in the number. Subtracting number_of_backslashes*2 correctly
2804 // is not that easy: consider that in the case of "NNN.\<NL>"
2805 // loop above will count "\<NL>" before it realizes it is not
2806 // in fact *inside* the number:
2807 len = ccnt + 1; // +1 byte for NUL termination
2808
2809 // This check makes sense only if size_t is (much) larger than BC_MAX_NUM.
2810 if (SIZE_MAX > (BC_MAX_NUM | 0xff)) {
2811 if (len > BC_MAX_NUM)
2812 RETURN_STATUS(bc_error("number too long: must be [1,"BC_MAX_NUM_STR"]"));
2813 }
2814
2815 bc_vec_pop_all(&p->lex_strnumbuf);
2816 bc_vec_expand(&p->lex_strnumbuf, 1 + len);
2817 bc_vec_push(&p->lex_strnumbuf, &start);
2818
2819 while (ccnt != 0) {
2820 // If we have hit a backslash, skip it. We don't have
2821 // to check for a newline because it's guaranteed.
2822 if (*buf == '\\') {
2823 buf += 2;
2824 ccnt -= 2;
2825 continue;
2826 }
2827 bc_vec_push(&p->lex_strnumbuf, buf);
2828 buf++;
2829 ccnt--;
2830 }
2831
2832 bc_vec_pushZeroByte(&p->lex_strnumbuf);
2833
2834 RETURN_STATUS(BC_STATUS_SUCCESS);
2835}
2836#define zbc_lex_number(...) (zbc_lex_number(__VA_ARGS__) COMMA_SUCCESS)
2837
2838static void bc_lex_name(void)
2839{
2840 BcParse *p = &G.prs;
2841 size_t i;
2842 const char *buf;
2843
2844 p->lex = XC_LEX_NAME;
2845
2846 i = 0;
2847 buf = p->lex_inbuf - 1;
2848 for (;;) {
2849 char c = buf[i];
2850 if ((c < 'a' || c > 'z') && !isdigit(c) && c != '_') break;
2851 i++;
2852 }
2853
2854#if 0 // We do not protect against people with gigabyte-long names
2855 // This check makes sense only if size_t is (much) larger than BC_MAX_STRING.
2856 if (SIZE_MAX > (BC_MAX_STRING | 0xff)) {
2857 if (i > BC_MAX_STRING)
2858 return bc_error("name too long: must be [1,"BC_MAX_STRING_STR"]");
2859 }
2860#endif
2861 bc_vec_string(&p->lex_strnumbuf, i, buf);
2862
2863 // Increment the index. We minus 1 because it has already been incremented.
2864 p->lex_inbuf += i - 1;
2865
2866 //return BC_STATUS_SUCCESS;
2867}
2868
2869static void bc_lex_init(void)
2870{
2871 bc_char_vec_init(&G.prs.lex_strnumbuf);
2872}
2873
2874static void bc_lex_free(void)
2875{
2876 bc_vec_free(&G.prs.lex_strnumbuf);
2877}
2878
2879static void bc_lex_file(void)
2880{
2881 G.err_line = G.prs.lex_line = 1;
2882 G.prs.lex_newline = false;
2883}
2884
2885static bool bc_lex_more_input(void) 2743static bool bc_lex_more_input(void)
2886{ 2744{
2887 BcParse *p = &G.prs; 2745 BcParse *p = &G.prs;
@@ -2996,6 +2854,147 @@ static bool bc_lex_more_input(void)
2996 return G.input_buffer.len > 1; 2854 return G.input_buffer.len > 1;
2997} 2855}
2998 2856
2857// p->lex_inbuf points to the current string to be parsed.
2858// if p->lex_inbuf points to '\0', it's either EOF or it points after
2859// last processed line's terminating '\n' (and more reading needs to be done
2860// to get next character).
2861//
2862// If you are in a situation where that is a possibility, call peek_inbuf().
2863// If necessary, it performs more reading and changes p->lex_inbuf,
2864// then it returns *p->lex_inbuf (which will be '\0' only if it's EOF).
2865// After it, just referencing *p->lex_inbuf is valid, and if it wasn't '\0',
2866// it's ok to do p->lex_inbuf++ once without end-of-buffer checking.
2867//
2868// eat_inbuf() is equvalent to "peek_inbuf(); if (c) p->lex_inbuf++":
2869// it returns current char and advances the pointer (if not EOF).
2870// After eat_inbuf(), referencing p->lex_inbuf[-1] and *p->lex_inbuf is valid.
2871//
2872// In many cases, you can use fast *p->lex_inbuf instead of peek_inbuf():
2873// unless prev char might have been '\n', *p->lex_inbuf is '\0' ONLY
2874// on real EOF, not end-of-buffer.
2875static char peek_inbuf(void)
2876{
2877 if (G.prs.lex_inbuf == G.prs.lex_end) {
2878 if (G.prs.lex_input_fp)
2879 if (!bc_lex_more_input())
2880 G.prs.lex_input_fp = NULL;
2881 }
2882 return *G.prs.lex_inbuf;
2883}
2884static char eat_inbuf(void)
2885{
2886 char c = peek_inbuf();
2887 if (c) G.prs.lex_inbuf++;
2888 return c;
2889}
2890
2891static void bc_lex_lineComment(void)
2892{
2893 BcParse *p = &G.prs;
2894 char c;
2895
2896 // Try: echo -n '#foo' | bc
2897 p->lex = XC_LEX_WHITESPACE;
2898
2899 // We depend here on input being done in whole lines:
2900 // '\0' which isn't the EOF can only be seen after '\n'.
2901 while ((c = *p->lex_inbuf) != '\n' && c != '\0')
2902 p->lex_inbuf++;
2903}
2904
2905static void bc_lex_whitespace(void)
2906{
2907 BcParse *p = &G.prs;
2908
2909 p->lex = XC_LEX_WHITESPACE;
2910 for (;;) {
2911 // We depend here on input being done in whole lines:
2912 // '\0' which isn't the EOF can only be seen after '\n'.
2913 char c = *p->lex_inbuf;
2914 if (c == '\n') // this is XC_LEX_NLINE, not XC_LEX_WHITESPACE
2915 break;
2916 if (!isspace(c))
2917 break;
2918 p->lex_inbuf++;
2919 }
2920}
2921
2922static BC_STATUS zbc_lex_number(char last)
2923{
2924 BcParse *p = &G.prs;
2925 bool pt;
2926
2927 bc_vec_pop_all(&p->lex_strnumbuf);
2928 bc_vec_pushByte(&p->lex_strnumbuf, last);
2929
2930 pt = (last == '.');
2931 p->lex = XC_LEX_NUMBER;
2932 for (;;) {
2933 // We depend here on input being done in whole lines:
2934 // '\0' which isn't the EOF can only be seen after '\n'.
2935 char c = *p->lex_inbuf;
2936 check_c:
2937 if (c == '\0')
2938 break;
2939 if (c == '\\' && p->lex_inbuf[1] == '\n') {
2940 p->lex_inbuf += 2;
2941 p->lex_line++;
2942 c = peek_inbuf(); // force next line to be read
2943 goto check_c;
2944 }
2945 if (!isdigit(c) && (c < 'A' || c > 'F')) {
2946 if (c != '.') break;
2947 // if '.' was already seen, stop on second one:
2948 if (pt) break;
2949 pt = true;
2950 }
2951 // c is one of "0-9A-F."
2952 last = c;
2953 bc_vec_push(&p->lex_strnumbuf, p->lex_inbuf);
2954 p->lex_inbuf++;
2955 }
2956 if (last == '.') // remove trailing '.' if any
2957 bc_vec_pop(&p->lex_strnumbuf);
2958 bc_vec_pushZeroByte(&p->lex_strnumbuf);
2959
2960 G.err_line = G.prs.lex_line;
2961 RETURN_STATUS(BC_STATUS_SUCCESS);
2962}
2963#define zbc_lex_number(...) (zbc_lex_number(__VA_ARGS__) COMMA_SUCCESS)
2964
2965static void bc_lex_name(void)
2966{
2967 BcParse *p = &G.prs;
2968 size_t i;
2969 const char *buf;
2970
2971 p->lex = XC_LEX_NAME;
2972
2973 // Since names can't cross lines with \<newline>,
2974 // we depend on the fact that whole line is in the buffer
2975 i = 0;
2976 buf = p->lex_inbuf - 1;
2977 for (;;) {
2978 char c = buf[i];
2979 if ((c < 'a' || c > 'z') && !isdigit(c) && c != '_') break;
2980 i++;
2981 }
2982
2983#if 0 // We do not protect against people with gigabyte-long names
2984 // This check makes sense only if size_t is (much) larger than BC_MAX_STRING.
2985 if (SIZE_MAX > (BC_MAX_STRING | 0xff)) {
2986 if (i > BC_MAX_STRING)
2987 return bc_error("name too long: must be [1,"BC_MAX_STRING_STR"]");
2988 }
2989#endif
2990 bc_vec_string(&p->lex_strnumbuf, i, buf);
2991
2992 // Increment the index. We minus 1 because it has already been incremented.
2993 p->lex_inbuf += i - 1;
2994
2995 //return BC_STATUS_SUCCESS;
2996}
2997
2999IF_BC(static BC_STATUS zbc_lex_token(void);) 2998IF_BC(static BC_STATUS zbc_lex_token(void);)
3000IF_DC(static BC_STATUS zdc_lex_token(void);) 2999IF_DC(static BC_STATUS zdc_lex_token(void);)
3001#define zbc_lex_token(...) (zbc_lex_token(__VA_ARGS__) COMMA_SUCCESS) 3000#define zbc_lex_token(...) (zbc_lex_token(__VA_ARGS__) COMMA_SUCCESS)
@@ -3007,26 +3006,18 @@ static BC_STATUS zbc_lex_next(void)
3007 BcStatus s; 3006 BcStatus s;
3008 3007
3009 p->lex_last = p->lex; 3008 p->lex_last = p->lex;
3010 if (p->lex_last == XC_LEX_EOF) RETURN_STATUS(bc_error("end of file")); 3009 if (p->lex_last == XC_LEX_EOF)
3011 3010 RETURN_STATUS(bc_error("end of file"));
3012 p->lex_line += p->lex_newline;
3013 G.err_line = p->lex_line;
3014 p->lex_newline = false;
3015 3011
3016 // Loop until failure or we don't have whitespace. This 3012 // Loop until failure or we don't have whitespace. This
3017 // is so the parser doesn't get inundated with whitespace. 3013 // is so the parser doesn't get inundated with whitespace.
3018 // Comments are also XC_LEX_WHITESPACE tokens and eaten here. 3014 // Comments are also XC_LEX_WHITESPACE tokens and eaten here.
3019 s = BC_STATUS_SUCCESS; 3015 s = BC_STATUS_SUCCESS;
3020 do { 3016 do {
3021 if (p->lex_inbuf == p->lex_end) { 3017 if (*p->lex_inbuf == '\0') {
3022 p->lex = XC_LEX_EOF; 3018 p->lex = XC_LEX_EOF;
3023 if (!G.prs.lex_input_fp) 3019 if (peek_inbuf() == '\0')
3024 RETURN_STATUS(BC_STATUS_SUCCESS); 3020 RETURN_STATUS(BC_STATUS_SUCCESS);
3025 if (!bc_lex_more_input()) {
3026 G.prs.lex_input_fp = NULL;
3027 RETURN_STATUS(BC_STATUS_SUCCESS);
3028 }
3029 // here it's guaranteed that p->lex_ibuf is below p->lex_end
3030 } 3021 }
3031 p->lex_next_at = p->lex_inbuf; 3022 p->lex_next_at = p->lex_inbuf;
3032 dbg_lex("next string to parse:'%.*s'", 3023 dbg_lex("next string to parse:'%.*s'",
@@ -3126,80 +3117,70 @@ static BC_STATUS zbc_lex_identifier(void)
3126static BC_STATUS zbc_lex_string(void) 3117static BC_STATUS zbc_lex_string(void)
3127{ 3118{
3128 BcParse *p = &G.prs; 3119 BcParse *p = &G.prs;
3129 size_t len, nls, i;
3130 3120
3131 p->lex = XC_LEX_STR; 3121 p->lex = XC_LEX_STR;
3132 3122 bc_vec_pop_all(&p->lex_strnumbuf);
3133 nls = 0;
3134 i = 0;
3135 for (;;) { 3123 for (;;) {
3136 char c = p->lex_inbuf[i]; 3124 char c = peek_inbuf(); // strings can cross lines
3137 if (c == '\0') { 3125 if (c == '\0') {
3138 p->lex_inbuf += i; 3126 RETURN_STATUS(bc_error("unterminated string1"));
3139 RETURN_STATUS(bc_error("unterminated string"));
3140 } 3127 }
3141 if (c == '"') 3128 if (c == '"')
3142 break; 3129 break;
3143 nls += (c == '\n'); 3130 if (c == '\n')
3144 i++; 3131 p->lex_line++;
3145 } 3132 bc_vec_push(&p->lex_strnumbuf, p->lex_inbuf);
3146 3133 p->lex_inbuf++;
3147 len = i;
3148 // This check makes sense only if size_t is (much) larger than BC_MAX_STRING.
3149 if (SIZE_MAX > (BC_MAX_STRING | 0xff)) {
3150 if (len > BC_MAX_STRING)
3151 RETURN_STATUS(bc_error("string too long: must be [1,"BC_MAX_STRING_STR"]"));
3152 } 3134 }
3153 bc_vec_string(&p->lex_strnumbuf, len, p->lex_inbuf); 3135 bc_vec_pushZeroByte(&p->lex_strnumbuf);
3136 p->lex_inbuf++;
3154 3137
3155 p->lex_inbuf += i + 1;
3156 p->lex_line += nls;
3157 G.err_line = p->lex_line; 3138 G.err_line = p->lex_line;
3158
3159 RETURN_STATUS(BC_STATUS_SUCCESS); 3139 RETURN_STATUS(BC_STATUS_SUCCESS);
3160} 3140}
3161#define zbc_lex_string(...) (zbc_lex_string(__VA_ARGS__) COMMA_SUCCESS) 3141#define zbc_lex_string(...) (zbc_lex_string(__VA_ARGS__) COMMA_SUCCESS)
3162 3142
3163static void bc_lex_assign(unsigned with_and_without) 3143static void parse_lex_by_checking_eq_sign(unsigned with_and_without)
3164{ 3144{
3165 BcParse *p = &G.prs; 3145 BcParse *p = &G.prs;
3166 if (*p->lex_inbuf == '=') { 3146 if (*p->lex_inbuf == '=') {
3147 // ^^^ not using peek_inbuf() since '==' etc can't be split across lines
3167 p->lex_inbuf++; 3148 p->lex_inbuf++;
3168 with_and_without >>= 8; // store "with" value 3149 with_and_without >>= 8; // store "with" value
3169 } // else store "without" value 3150 } // else store "without" value
3170 p->lex = (with_and_without & 0xff); 3151 p->lex = (with_and_without & 0xff);
3171} 3152}
3172#define bc_lex_assign(with, without) \ 3153#define parse_lex_by_checking_eq_sign(with, without) \
3173 bc_lex_assign(((with)<<8)|(without)) 3154 parse_lex_by_checking_eq_sign(((with)<<8)|(without))
3174 3155
3175static BC_STATUS zbc_lex_comment(void) 3156static BC_STATUS zbc_lex_comment(void)
3176{ 3157{
3177 BcParse *p = &G.prs; 3158 BcParse *p = &G.prs;
3178 size_t i, nls = 0;
3179 const char *buf = p->lex_inbuf;
3180 3159
3181 p->lex = XC_LEX_WHITESPACE; 3160 p->lex = XC_LEX_WHITESPACE;
3182 i = 0; /* here lex_inbuf[0] is the '*' of opening comment delimiter */ 3161 // here lex_inbuf is at '*' of opening comment delimiter
3183 for (;;) { 3162 for (;;) {
3184 char c = buf[++i]; 3163 char c;
3164
3165 p->lex_inbuf++;
3166 c = peek_inbuf();
3185 check_star: 3167 check_star:
3186 if (c == '*') { 3168 if (c == '*') {
3187 c = buf[++i]; 3169 p->lex_inbuf++;
3170 c = peek_inbuf();
3188 if (c == '/') 3171 if (c == '/')
3189 break; 3172 break;
3190 goto check_star; 3173 goto check_star;
3191 } 3174 }
3192 if (c == '\0') { 3175 if (c == '\0') {
3193 p->lex_inbuf += i;
3194 RETURN_STATUS(bc_error("unterminated comment")); 3176 RETURN_STATUS(bc_error("unterminated comment"));
3195 } 3177 }
3196 nls += (c == '\n'); 3178 if (c == '\n')
3179 p->lex_line++;
3197 } 3180 }
3181 p->lex_inbuf++; // skip trailing '/'
3198 3182
3199 p->lex_inbuf += i + 1;
3200 p->lex_line += nls;
3201 G.err_line = p->lex_line; 3183 G.err_line = p->lex_line;
3202
3203 RETURN_STATUS(BC_STATUS_SUCCESS); 3184 RETURN_STATUS(BC_STATUS_SUCCESS);
3204} 3185}
3205#define zbc_lex_comment(...) (zbc_lex_comment(__VA_ARGS__) COMMA_SUCCESS) 3186#define zbc_lex_comment(...) (zbc_lex_comment(__VA_ARGS__) COMMA_SUCCESS)
@@ -3209,7 +3190,7 @@ static BC_STATUS zbc_lex_token(void)
3209{ 3190{
3210 BcParse *p = &G.prs; 3191 BcParse *p = &G.prs;
3211 BcStatus s = BC_STATUS_SUCCESS; 3192 BcStatus s = BC_STATUS_SUCCESS;
3212 char c = *p->lex_inbuf++; 3193 char c = eat_inbuf();
3213 char c2; 3194 char c2;
3214 3195
3215 // This is the workhorse of the lexer. 3196 // This is the workhorse of the lexer.
@@ -3217,11 +3198,10 @@ static BC_STATUS zbc_lex_token(void)
3217// case '\0': // probably never reached 3198// case '\0': // probably never reached
3218// p->lex_inbuf--; 3199// p->lex_inbuf--;
3219// p->lex = XC_LEX_EOF; 3200// p->lex = XC_LEX_EOF;
3220// p->lex_newline = true;
3221// break; 3201// break;
3222 case '\n': 3202 case '\n':
3203 p->lex_line++;
3223 p->lex = XC_LEX_NLINE; 3204 p->lex = XC_LEX_NLINE;
3224 p->lex_newline = true;
3225 break; 3205 break;
3226 case '\t': 3206 case '\t':
3227 case '\v': 3207 case '\v':
@@ -3231,7 +3211,7 @@ static BC_STATUS zbc_lex_token(void)
3231 bc_lex_whitespace(); 3211 bc_lex_whitespace();
3232 break; 3212 break;
3233 case '!': 3213 case '!':
3234 bc_lex_assign(XC_LEX_OP_REL_NE, BC_LEX_OP_BOOL_NOT); 3214 parse_lex_by_checking_eq_sign(XC_LEX_OP_REL_NE, BC_LEX_OP_BOOL_NOT);
3235 if (p->lex == BC_LEX_OP_BOOL_NOT) { 3215 if (p->lex == BC_LEX_OP_BOOL_NOT) {
3236 s = zbc_POSIX_does_not_allow_bool_ops_this_is_bad("!"); 3216 s = zbc_POSIX_does_not_allow_bool_ops_this_is_bad("!");
3237 if (s) RETURN_STATUS(s); 3217 if (s) RETURN_STATUS(s);
@@ -3246,7 +3226,7 @@ static BC_STATUS zbc_lex_token(void)
3246 bc_lex_lineComment(); 3226 bc_lex_lineComment();
3247 break; 3227 break;
3248 case '%': 3228 case '%':
3249 bc_lex_assign(BC_LEX_OP_ASSIGN_MODULUS, XC_LEX_OP_MODULUS); 3229 parse_lex_by_checking_eq_sign(BC_LEX_OP_ASSIGN_MODULUS, XC_LEX_OP_MODULUS);
3250 break; 3230 break;
3251 case '&': 3231 case '&':
3252 c2 = *p->lex_inbuf; 3232 c2 = *p->lex_inbuf;
@@ -3265,7 +3245,7 @@ static BC_STATUS zbc_lex_token(void)
3265 p->lex = (BcLexType)(c - '(' + BC_LEX_LPAREN); 3245 p->lex = (BcLexType)(c - '(' + BC_LEX_LPAREN);
3266 break; 3246 break;
3267 case '*': 3247 case '*':
3268 bc_lex_assign(BC_LEX_OP_ASSIGN_MULTIPLY, XC_LEX_OP_MULTIPLY); 3248 parse_lex_by_checking_eq_sign(BC_LEX_OP_ASSIGN_MULTIPLY, XC_LEX_OP_MULTIPLY);
3269 break; 3249 break;
3270 case '+': 3250 case '+':
3271 c2 = *p->lex_inbuf; 3251 c2 = *p->lex_inbuf;
@@ -3273,7 +3253,7 @@ static BC_STATUS zbc_lex_token(void)
3273 p->lex_inbuf++; 3253 p->lex_inbuf++;
3274 p->lex = BC_LEX_OP_INC; 3254 p->lex = BC_LEX_OP_INC;
3275 } else 3255 } else
3276 bc_lex_assign(BC_LEX_OP_ASSIGN_PLUS, XC_LEX_OP_PLUS); 3256 parse_lex_by_checking_eq_sign(BC_LEX_OP_ASSIGN_PLUS, XC_LEX_OP_PLUS);
3277 break; 3257 break;
3278 case ',': 3258 case ',':
3279 p->lex = BC_LEX_COMMA; 3259 p->lex = BC_LEX_COMMA;
@@ -3284,7 +3264,7 @@ static BC_STATUS zbc_lex_token(void)
3284 p->lex_inbuf++; 3264 p->lex_inbuf++;
3285 p->lex = BC_LEX_OP_DEC; 3265 p->lex = BC_LEX_OP_DEC;
3286 } else 3266 } else
3287 bc_lex_assign(BC_LEX_OP_ASSIGN_MINUS, XC_LEX_OP_MINUS); 3267 parse_lex_by_checking_eq_sign(BC_LEX_OP_ASSIGN_MINUS, XC_LEX_OP_MINUS);
3288 break; 3268 break;
3289 case '.': 3269 case '.':
3290 if (isdigit(*p->lex_inbuf)) 3270 if (isdigit(*p->lex_inbuf))
@@ -3299,7 +3279,7 @@ static BC_STATUS zbc_lex_token(void)
3299 if (c2 == '*') 3279 if (c2 == '*')
3300 s = zbc_lex_comment(); 3280 s = zbc_lex_comment();
3301 else 3281 else
3302 bc_lex_assign(BC_LEX_OP_ASSIGN_DIVIDE, XC_LEX_OP_DIVIDE); 3282 parse_lex_by_checking_eq_sign(BC_LEX_OP_ASSIGN_DIVIDE, XC_LEX_OP_DIVIDE);
3303 break; 3283 break;
3304 case '0': 3284 case '0':
3305 case '1': 3285 case '1':
@@ -3323,13 +3303,13 @@ static BC_STATUS zbc_lex_token(void)
3323 p->lex = BC_LEX_SCOLON; 3303 p->lex = BC_LEX_SCOLON;
3324 break; 3304 break;
3325 case '<': 3305 case '<':
3326 bc_lex_assign(XC_LEX_OP_REL_LE, XC_LEX_OP_REL_LT); 3306 parse_lex_by_checking_eq_sign(XC_LEX_OP_REL_LE, XC_LEX_OP_REL_LT);
3327 break; 3307 break;
3328 case '=': 3308 case '=':
3329 bc_lex_assign(XC_LEX_OP_REL_EQ, BC_LEX_OP_ASSIGN); 3309 parse_lex_by_checking_eq_sign(XC_LEX_OP_REL_EQ, BC_LEX_OP_ASSIGN);
3330 break; 3310 break;
3331 case '>': 3311 case '>':
3332 bc_lex_assign(XC_LEX_OP_REL_GE, XC_LEX_OP_REL_GT); 3312 parse_lex_by_checking_eq_sign(XC_LEX_OP_REL_GE, XC_LEX_OP_REL_GT);
3333 break; 3313 break;
3334 case '[': 3314 case '[':
3335 case ']': 3315 case ']':
@@ -3343,7 +3323,7 @@ static BC_STATUS zbc_lex_token(void)
3343 s = bc_error_bad_character(c); 3323 s = bc_error_bad_character(c);
3344 break; 3324 break;
3345 case '^': 3325 case '^':
3346 bc_lex_assign(BC_LEX_OP_ASSIGN_POWER, XC_LEX_OP_POWER); 3326 parse_lex_by_checking_eq_sign(BC_LEX_OP_ASSIGN_POWER, XC_LEX_OP_POWER);
3347 break; 3327 break;
3348 case 'a': 3328 case 'a':
3349 case 'b': 3329 case 'b':
@@ -3422,43 +3402,30 @@ static BC_STATUS zdc_lex_register(void)
3422static BC_STATUS zdc_lex_string(void) 3402static BC_STATUS zdc_lex_string(void)
3423{ 3403{
3424 BcParse *p = &G.prs; 3404 BcParse *p = &G.prs;
3425 size_t depth, nls, i; 3405 size_t depth;
3426 3406
3427 p->lex = XC_LEX_STR; 3407 p->lex = XC_LEX_STR;
3428 bc_vec_pop_all(&p->lex_strnumbuf); 3408 bc_vec_pop_all(&p->lex_strnumbuf);
3429 3409
3430 nls = 0;
3431 depth = 1; 3410 depth = 1;
3432 i = 0;
3433 for (;;) { 3411 for (;;) {
3434 char c = p->lex_inbuf[i]; 3412 char c = peek_inbuf();
3435 if (c == '\0') { 3413 if (c == '\0') {
3436 p->lex_inbuf += i; 3414 RETURN_STATUS(bc_error("unterminated string"));
3437 RETURN_STATUS(bc_error("string end could not be found"));
3438 }
3439 nls += (c == '\n');
3440 if (i == 0 || p->lex_inbuf[i - 1] != '\\') {
3441 if (c == '[') depth++;
3442 if (c == ']')
3443 if (--depth == 0)
3444 break;
3445 } 3415 }
3446 bc_vec_push(&p->lex_strnumbuf, &p->lex_inbuf[i]); 3416 if (c == '[') depth++;
3447 i++; 3417 if (c == ']')
3418 if (--depth == 0)
3419 break;
3420 if (c == '\n')
3421 p->lex_line++;
3422 bc_vec_push(&p->lex_strnumbuf, p->lex_inbuf);
3423 p->lex_inbuf++;
3448 } 3424 }
3449 i++;
3450
3451 bc_vec_pushZeroByte(&p->lex_strnumbuf); 3425 bc_vec_pushZeroByte(&p->lex_strnumbuf);
3452 // This check makes sense only if size_t is (much) larger than BC_MAX_STRING. 3426 p->lex_inbuf++; // skip trailing ']'
3453 if (SIZE_MAX > (BC_MAX_STRING | 0xff)) {
3454 if (i > BC_MAX_STRING)
3455 RETURN_STATUS(bc_error("string too long: must be [1,"BC_MAX_STRING_STR"]"));
3456 }
3457 3427
3458 p->lex_inbuf += i;
3459 p->lex_line += nls;
3460 G.err_line = p->lex_line; 3428 G.err_line = p->lex_line;
3461
3462 RETURN_STATUS(BC_STATUS_SUCCESS); 3429 RETURN_STATUS(BC_STATUS_SUCCESS);
3463} 3430}
3464#define zdc_lex_string(...) (zdc_lex_string(__VA_ARGS__) COMMA_SUCCESS) 3431#define zdc_lex_string(...) (zdc_lex_string(__VA_ARGS__) COMMA_SUCCESS)
@@ -3486,7 +3453,7 @@ static BC_STATUS zdc_lex_token(void)
3486 } 3453 }
3487 3454
3488 s = BC_STATUS_SUCCESS; 3455 s = BC_STATUS_SUCCESS;
3489 c = *p->lex_inbuf++; 3456 c = eat_inbuf();
3490 if (c >= '%' && c <= '~' 3457 if (c >= '%' && c <= '~'
3491 && (p->lex = dc_char_to_LEX[c - '%']) != XC_LEX_INVALID 3458 && (p->lex = dc_char_to_LEX[c - '%']) != XC_LEX_INVALID
3492 ) { 3459 ) {
@@ -3507,15 +3474,14 @@ static BC_STATUS zdc_lex_token(void)
3507 // commands are not executed on pressing <enter>). 3474 // commands are not executed on pressing <enter>).
3508 // IOW: typing "1p<enter>" should print "1" _at once_, 3475 // IOW: typing "1p<enter>" should print "1" _at once_,
3509 // not after some more input. 3476 // not after some more input.
3477 p->lex_line++;
3510 p->lex = XC_LEX_NLINE; 3478 p->lex = XC_LEX_NLINE;
3511 p->lex_newline = true;
3512 break; 3479 break;
3513 case '\t': 3480 case '\t':
3514 case '\v': 3481 case '\v':
3515 case '\f': 3482 case '\f':
3516 case '\r': 3483 case '\r':
3517 case ' ': 3484 case ' ':
3518 p->lex_newline = 0; // was (c == '\n')
3519 bc_lex_whitespace(); 3485 bc_lex_whitespace();
3520 break; 3486 break;
3521 case '!': 3487 case '!':
@@ -3702,7 +3668,7 @@ static void bc_parse_free(void)
3702 IF_BC(bc_vec_free(&p->exits);) 3668 IF_BC(bc_vec_free(&p->exits);)
3703 IF_BC(bc_vec_free(&p->conds);) 3669 IF_BC(bc_vec_free(&p->conds);)
3704 IF_BC(bc_vec_free(&p->ops);) 3670 IF_BC(bc_vec_free(&p->ops);)
3705 bc_lex_free(); 3671 bc_vec_free(&G.prs.lex_strnumbuf);
3706} 3672}
3707 3673
3708static void bc_parse_create(size_t fidx) 3674static void bc_parse_create(size_t fidx)
@@ -3710,7 +3676,7 @@ static void bc_parse_create(size_t fidx)
3710 BcParse *p = &G.prs; 3676 BcParse *p = &G.prs;
3711 memset(p, 0, sizeof(BcParse)); 3677 memset(p, 0, sizeof(BcParse));
3712 3678
3713 bc_lex_init(); 3679 bc_char_vec_init(&G.prs.lex_strnumbuf);
3714 IF_BC(bc_vec_init(&p->exits, sizeof(size_t), NULL);) 3680 IF_BC(bc_vec_init(&p->exits, sizeof(size_t), NULL);)
3715 IF_BC(bc_vec_init(&p->conds, sizeof(size_t), NULL);) 3681 IF_BC(bc_vec_init(&p->conds, sizeof(size_t), NULL);)
3716 IF_BC(bc_vec_init(&p->ops, sizeof(BcLexType), NULL);) 3682 IF_BC(bc_vec_init(&p->ops, sizeof(BcLexType), NULL);)
@@ -4753,11 +4719,13 @@ static BcStatus bc_parse_expr_empty_ok(uint8_t flags)
4753 switch (t) { 4719 switch (t) {
4754 case BC_LEX_OP_INC: 4720 case BC_LEX_OP_INC:
4755 case BC_LEX_OP_DEC: 4721 case BC_LEX_OP_DEC:
4722 dbg_lex("%s:%d LEX_OP_INC/DEC", __func__, __LINE__);
4756 s = zbc_parse_incdec(&prev, &paren_expr, &nexprs, flags); 4723 s = zbc_parse_incdec(&prev, &paren_expr, &nexprs, flags);
4757 rprn = bin_last = false; 4724 rprn = bin_last = false;
4758 //get_token = false; - already is 4725 //get_token = false; - already is
4759 break; 4726 break;
4760 case XC_LEX_OP_MINUS: 4727 case XC_LEX_OP_MINUS:
4728 dbg_lex("%s:%d LEX_OP_MINUS", __func__, __LINE__);
4761 s = zbc_parse_minus(&prev, ops_bgn, rprn, &nexprs); 4729 s = zbc_parse_minus(&prev, ops_bgn, rprn, &nexprs);
4762 rprn = false; 4730 rprn = false;
4763 //get_token = false; - already is 4731 //get_token = false; - already is
@@ -4770,6 +4738,7 @@ static BcStatus bc_parse_expr_empty_ok(uint8_t flags)
4770 case BC_LEX_OP_ASSIGN_PLUS: 4738 case BC_LEX_OP_ASSIGN_PLUS:
4771 case BC_LEX_OP_ASSIGN_MINUS: 4739 case BC_LEX_OP_ASSIGN_MINUS:
4772 case BC_LEX_OP_ASSIGN: 4740 case BC_LEX_OP_ASSIGN:
4741 dbg_lex("%s:%d LEX_ASSIGNxyz", __func__, __LINE__);
4773 if (prev != XC_INST_VAR && prev != XC_INST_ARRAY_ELEM 4742 if (prev != XC_INST_VAR && prev != XC_INST_ARRAY_ELEM
4774 && prev != XC_INST_SCALE && prev != XC_INST_IBASE 4743 && prev != XC_INST_SCALE && prev != XC_INST_IBASE
4775 && prev != XC_INST_OBASE && prev != BC_INST_LAST 4744 && prev != XC_INST_OBASE && prev != BC_INST_LAST
@@ -4794,6 +4763,7 @@ static BcStatus bc_parse_expr_empty_ok(uint8_t flags)
4794 case BC_LEX_OP_BOOL_NOT: 4763 case BC_LEX_OP_BOOL_NOT:
4795 case BC_LEX_OP_BOOL_OR: 4764 case BC_LEX_OP_BOOL_OR:
4796 case BC_LEX_OP_BOOL_AND: 4765 case BC_LEX_OP_BOOL_AND:
4766 dbg_lex("%s:%d LEX_OP_xyz", __func__, __LINE__);
4797 if (((t == BC_LEX_OP_BOOL_NOT) != bin_last) 4767 if (((t == BC_LEX_OP_BOOL_NOT) != bin_last)
4798 || (t != BC_LEX_OP_BOOL_NOT && prev == XC_INST_BOOL_NOT) 4768 || (t != BC_LEX_OP_BOOL_NOT && prev == XC_INST_BOOL_NOT)
4799 ) { 4769 ) {
@@ -4808,6 +4778,7 @@ static BcStatus bc_parse_expr_empty_ok(uint8_t flags)
4808 bin_last = (t != BC_LEX_OP_BOOL_NOT); 4778 bin_last = (t != BC_LEX_OP_BOOL_NOT);
4809 break; 4779 break;
4810 case BC_LEX_LPAREN: 4780 case BC_LEX_LPAREN:
4781 dbg_lex("%s:%d LEX_LPAREN", __func__, __LINE__);
4811 if (BC_PARSE_LEAF(prev, rprn)) 4782 if (BC_PARSE_LEAF(prev, rprn))
4812 return bc_error_bad_expression(); 4783 return bc_error_bad_expression();
4813 bc_vec_push(&p->ops, &t); 4784 bc_vec_push(&p->ops, &t);
@@ -4817,6 +4788,7 @@ static BcStatus bc_parse_expr_empty_ok(uint8_t flags)
4817 rprn = bin_last = false; 4788 rprn = bin_last = false;
4818 break; 4789 break;
4819 case BC_LEX_RPAREN: 4790 case BC_LEX_RPAREN:
4791 dbg_lex("%s:%d LEX_RPAREN", __func__, __LINE__);
4820 if (bin_last || prev == XC_INST_BOOL_NOT) 4792 if (bin_last || prev == XC_INST_BOOL_NOT)
4821 return bc_error_bad_expression(); 4793 return bc_error_bad_expression();
4822 if (nparens == 0) { 4794 if (nparens == 0) {
@@ -4833,6 +4805,7 @@ static BcStatus bc_parse_expr_empty_ok(uint8_t flags)
4833 bin_last = false; 4805 bin_last = false;
4834 break; 4806 break;
4835 case XC_LEX_NAME: 4807 case XC_LEX_NAME:
4808 dbg_lex("%s:%d LEX_NAME", __func__, __LINE__);
4836 if (BC_PARSE_LEAF(prev, rprn)) 4809 if (BC_PARSE_LEAF(prev, rprn))
4837 return bc_error_bad_expression(); 4810 return bc_error_bad_expression();
4838 s = zbc_parse_name(&prev, flags & ~BC_PARSE_NOCALL); 4811 s = zbc_parse_name(&prev, flags & ~BC_PARSE_NOCALL);
@@ -4842,6 +4815,7 @@ static BcStatus bc_parse_expr_empty_ok(uint8_t flags)
4842 nexprs++; 4815 nexprs++;
4843 break; 4816 break;
4844 case XC_LEX_NUMBER: 4817 case XC_LEX_NUMBER:
4818 dbg_lex("%s:%d LEX_NUMBER", __func__, __LINE__);
4845 if (BC_PARSE_LEAF(prev, rprn)) 4819 if (BC_PARSE_LEAF(prev, rprn))
4846 return bc_error_bad_expression(); 4820 return bc_error_bad_expression();
4847 bc_parse_pushNUM(); 4821 bc_parse_pushNUM();
@@ -4854,6 +4828,7 @@ static BcStatus bc_parse_expr_empty_ok(uint8_t flags)
4854 case BC_LEX_KEY_IBASE: 4828 case BC_LEX_KEY_IBASE:
4855 case BC_LEX_KEY_LAST: 4829 case BC_LEX_KEY_LAST:
4856 case BC_LEX_KEY_OBASE: 4830 case BC_LEX_KEY_OBASE:
4831 dbg_lex("%s:%d LEX_IBASE/LAST/OBASE", __func__, __LINE__);
4857 if (BC_PARSE_LEAF(prev, rprn)) 4832 if (BC_PARSE_LEAF(prev, rprn))
4858 return bc_error_bad_expression(); 4833 return bc_error_bad_expression();
4859 prev = (char) (t - BC_LEX_KEY_IBASE + XC_INST_IBASE); 4834 prev = (char) (t - BC_LEX_KEY_IBASE + XC_INST_IBASE);
@@ -4865,6 +4840,7 @@ static BcStatus bc_parse_expr_empty_ok(uint8_t flags)
4865 break; 4840 break;
4866 case BC_LEX_KEY_LENGTH: 4841 case BC_LEX_KEY_LENGTH:
4867 case BC_LEX_KEY_SQRT: 4842 case BC_LEX_KEY_SQRT:
4843 dbg_lex("%s:%d LEX_LEN/SQRT", __func__, __LINE__);
4868 if (BC_PARSE_LEAF(prev, rprn)) 4844 if (BC_PARSE_LEAF(prev, rprn))
4869 return bc_error_bad_expression(); 4845 return bc_error_bad_expression();
4870 s = zbc_parse_builtin(t, flags, &prev); 4846 s = zbc_parse_builtin(t, flags, &prev);
@@ -4874,6 +4850,7 @@ static BcStatus bc_parse_expr_empty_ok(uint8_t flags)
4874 nexprs++; 4850 nexprs++;
4875 break; 4851 break;
4876 case BC_LEX_KEY_READ: 4852 case BC_LEX_KEY_READ:
4853 dbg_lex("%s:%d LEX_READ", __func__, __LINE__);
4877 if (BC_PARSE_LEAF(prev, rprn)) 4854 if (BC_PARSE_LEAF(prev, rprn))
4878 return bc_error_bad_expression(); 4855 return bc_error_bad_expression();
4879 s = zbc_parse_read(); 4856 s = zbc_parse_read();
@@ -4884,6 +4861,7 @@ static BcStatus bc_parse_expr_empty_ok(uint8_t flags)
4884 nexprs++; 4861 nexprs++;
4885 break; 4862 break;
4886 case BC_LEX_KEY_SCALE: 4863 case BC_LEX_KEY_SCALE:
4864 dbg_lex("%s:%d LEX_SCALE", __func__, __LINE__);
4887 if (BC_PARSE_LEAF(prev, rprn)) 4865 if (BC_PARSE_LEAF(prev, rprn))
4888 return bc_error_bad_expression(); 4866 return bc_error_bad_expression();
4889 s = zbc_parse_scale(&prev, flags); 4867 s = zbc_parse_scale(&prev, flags);
@@ -5348,7 +5326,7 @@ static BC_STATUS zbc_program_read(void)
5348 5326
5349 sv_parse = G.prs; // struct copy 5327 sv_parse = G.prs; // struct copy
5350 bc_parse_create(BC_PROG_READ); 5328 bc_parse_create(BC_PROG_READ);
5351 //bc_lex_file(&G.prs.l); - not needed, error line info is not printed for read() 5329 //G.err_line = G.prs.lex_line = 1; - not needed, error line info is not printed for read()
5352 5330
5353 s = zbc_parse_text_init(buf.v); 5331 s = zbc_parse_text_init(buf.v);
5354 if (s) goto exec_err; 5332 if (s) goto exec_err;
@@ -6950,7 +6928,7 @@ static BC_STATUS zbc_vm_execute_FILE(FILE *fp, const char *filename)
6950 6928
6951 G.prs.lex_filename = filename; 6929 G.prs.lex_filename = filename;
6952 G.prs.lex_input_fp = fp; 6930 G.prs.lex_input_fp = fp;
6953 bc_lex_file(); 6931 G.err_line = G.prs.lex_line = 1;
6954 6932
6955 do { 6933 do {
6956 s = zbc_vm_process(""); 6934 s = zbc_vm_process("");
@@ -7232,7 +7210,6 @@ static BC_STATUS zbc_vm_exec(void)
7232 // We know that internal library is not buggy, 7210 // We know that internal library is not buggy,
7233 // thus error checking is normally disabled. 7211 // thus error checking is normally disabled.
7234# define DEBUG_LIB 0 7212# define DEBUG_LIB 0
7235 bc_lex_file();
7236 s = zbc_vm_process(bc_lib); 7213 s = zbc_vm_process(bc_lib);
7237 if (DEBUG_LIB && s) RETURN_STATUS(s); 7214 if (DEBUG_LIB && s) RETURN_STATUS(s);
7238 } 7215 }
diff --git a/testsuite/dc_strings_results.txt b/testsuite/dc_strings_results.txt
index d606637cc..e49b9b288 100644
--- a/testsuite/dc_strings_results.txt
+++ b/testsuite/dc_strings_results.txt
@@ -1,9 +1,9 @@
113 113
2Hello, World! 2Hello, World!
316 3Hello, \[ World!]ZpR
4Hello, \[ World! 4[Hello, \[ World!]pR
516 5[Hello, \] World!]ZpR
6Hello, \] World! 6[Hello, \] World!
71 71
82 82
93 93