diff options
| author | Roberto Ierusalimschy <roberto@inf.puc-rio.br> | 1998-05-27 10:08:34 -0300 |
|---|---|---|
| committer | Roberto Ierusalimschy <roberto@inf.puc-rio.br> | 1998-05-27 10:08:34 -0300 |
| commit | 7e59a8901d063dbea4eb0693c9c2d85bda1fc5f6 (patch) | |
| tree | 1834168cd16e821a017e3d8408978f89e6c2ddaf /llex.c | |
| parent | abc6eac404da8181ad945ac6950f61a65ba7dfa5 (diff) | |
| download | lua-7e59a8901d063dbea4eb0693c9c2d85bda1fc5f6.tar.gz lua-7e59a8901d063dbea4eb0693c9c2d85bda1fc5f6.tar.bz2 lua-7e59a8901d063dbea4eb0693c9c2d85bda1fc5f6.zip | |
NEW LL(1) PARSER
Diffstat (limited to 'llex.c')
| -rw-r--r-- | llex.c | 138 |
1 files changed, 74 insertions, 64 deletions
| @@ -1,5 +1,5 @@ | |||
| 1 | /* | 1 | /* |
| 2 | ** $Id: llex.c,v 1.17 1998/03/09 17:22:49 roberto Exp roberto $ | 2 | ** $Id: llex.c,v 1.18 1998/03/20 14:18:18 roberto Exp roberto $ |
| 3 | ** Lexical Analizer | 3 | ** Lexical Analizer |
| 4 | ** See Copyright Notice in lua.h | 4 | ** See Copyright Notice in lua.h |
| 5 | */ | 5 | */ |
| @@ -15,7 +15,6 @@ | |||
| 15 | #include "lparser.h" | 15 | #include "lparser.h" |
| 16 | #include "lstate.h" | 16 | #include "lstate.h" |
| 17 | #include "lstring.h" | 17 | #include "lstring.h" |
| 18 | #include "lstx.h" | ||
| 19 | #include "luadebug.h" | 18 | #include "luadebug.h" |
| 20 | #include "lzio.h" | 19 | #include "lzio.h" |
| 21 | 20 | ||
| @@ -27,23 +26,53 @@ int lua_debug=0; | |||
| 27 | #define next(LS) (LS->current = zgetc(LS->lex_z)) | 26 | #define next(LS) (LS->current = zgetc(LS->lex_z)) |
| 28 | 27 | ||
| 29 | 28 | ||
| 30 | static struct { | 29 | #define save(c) luaL_addchar(c) |
| 31 | char *name; | 30 | #define save_and_next(LS) (save(LS->current), next(LS)) |
| 32 | int token; | 31 | |
| 33 | } reserved [] = { | 32 | |
| 34 | {"and", AND}, {"do", DO}, {"else", ELSE}, {"elseif", ELSEIF}, | 33 | char *reserved [] = {"and", "do", "else", "elseif", "end", "function", |
| 35 | {"end", END}, {"function", FUNCTION}, {"if", IF}, {"local", LOCAL}, | 34 | "if", "local", "nil", "not", "or", "repeat", "return", "then", |
| 36 | {"nil", NIL}, {"not", NOT}, {"or", OR}, {"repeat", REPEAT}, | 35 | "until", "while"}; |
| 37 | {"return", RETURN}, {"then", THEN}, {"until", UNTIL}, {"while", WHILE} | 36 | |
| 38 | }; | ||
| 39 | 37 | ||
| 40 | void luaX_init (void) | 38 | void luaX_init (void) |
| 41 | { | 39 | { |
| 42 | int i; | 40 | int i; |
| 43 | for (i=0; i<(sizeof(reserved)/sizeof(reserved[0])); i++) { | 41 | for (i=0; i<(sizeof(reserved)/sizeof(reserved[0])); i++) { |
| 44 | TaggedString *ts = luaS_new(reserved[i].name); | 42 | TaggedString *ts = luaS_new(reserved[i]); |
| 45 | ts->head.marked = reserved[i].token; /* reserved word (always > 255) */ | 43 | ts->head.marked = FIRST_RESERVED+i; /* reserved word (always > 255) */ |
| 44 | } | ||
| 45 | } | ||
| 46 | |||
| 47 | |||
| 48 | void luaX_syntaxerror (LexState *ls, char *s, char *token) { | ||
| 49 | if (token[0] == 0) | ||
| 50 | token = "<eof>"; | ||
| 51 | luaL_verror("%.100s;\n last token read: `%.50s' at line %d in file %.50s", | ||
| 52 | s, token, ls->linenumber, zname(ls->lex_z)); | ||
| 53 | } | ||
| 54 | |||
| 55 | |||
| 56 | void luaX_error (LexState *ls, char *s) { | ||
| 57 | save(0); | ||
| 58 | luaX_syntaxerror(ls, s, luaL_buffer()); | ||
| 59 | } | ||
| 60 | |||
| 61 | |||
| 62 | void luaX_token2str (LexState *ls, int token, char *s) { | ||
| 63 | if (token < 255) { | ||
| 64 | s[0] = token; | ||
| 65 | s[1] = 0; | ||
| 46 | } | 66 | } |
| 67 | else | ||
| 68 | strcpy(s, reserved[token-FIRST_RESERVED]); | ||
| 69 | } | ||
| 70 | |||
| 71 | |||
| 72 | static void luaX_invalidchar (LexState *ls, int c) { | ||
| 73 | char buff[10]; | ||
| 74 | sprintf(buff, "0x%X", c); | ||
| 75 | luaX_syntaxerror(ls, "invalid control char", buff); | ||
| 47 | } | 76 | } |
| 48 | 77 | ||
| 49 | 78 | ||
| @@ -56,16 +85,15 @@ static void firstline (LexState *LS) | |||
| 56 | } | 85 | } |
| 57 | 86 | ||
| 58 | 87 | ||
| 59 | void luaX_setinput (ZIO *z) | 88 | void luaX_setinput (LexState *LS, ZIO *z) |
| 60 | { | 89 | { |
| 61 | LexState *LS = L->lexstate; | ||
| 62 | LS->current = '\n'; | 90 | LS->current = '\n'; |
| 63 | LS->linelasttoken = 0; | ||
| 64 | LS->linenumber = 0; | 91 | LS->linenumber = 0; |
| 65 | LS->iflevel = 0; | 92 | LS->iflevel = 0; |
| 66 | LS->ifstate[0].skip = 0; | 93 | LS->ifstate[0].skip = 0; |
| 67 | LS->ifstate[0].elsepart = 1; /* to avoid a free $else */ | 94 | LS->ifstate[0].elsepart = 1; /* to avoid a free $else */ |
| 68 | LS->lex_z = z; | 95 | LS->lex_z = z; |
| 96 | LS->fs = NULL; | ||
| 69 | firstline(LS); | 97 | firstline(LS); |
| 70 | luaL_resetbuffer(); | 98 | luaL_resetbuffer(); |
| 71 | } | 99 | } |
| @@ -87,7 +115,7 @@ static void skipspace (LexState *LS) | |||
| 87 | } | 115 | } |
| 88 | 116 | ||
| 89 | 117 | ||
| 90 | static int checkcond (char *buff) | 118 | static int checkcond (LexState *LS, char *buff) |
| 91 | { | 119 | { |
| 92 | static char *opts[] = {"nil", "1", NULL}; | 120 | static char *opts[] = {"nil", "1", NULL}; |
| 93 | int i = luaO_findstring(buff, opts); | 121 | int i = luaO_findstring(buff, opts); |
| @@ -95,7 +123,7 @@ static int checkcond (char *buff) | |||
| 95 | else if (isalpha((unsigned char)buff[0]) || buff[0] == '_') | 123 | else if (isalpha((unsigned char)buff[0]) || buff[0] == '_') |
| 96 | return luaS_globaldefined(buff); | 124 | return luaS_globaldefined(buff); |
| 97 | else { | 125 | else { |
| 98 | luaY_syntaxerror("invalid $if condition", buff); | 126 | luaX_syntaxerror(LS, "invalid $if condition", buff); |
| 99 | return 0; /* to avoid warnings */ | 127 | return 0; /* to avoid warnings */ |
| 100 | } | 128 | } |
| 101 | } | 129 | } |
| @@ -108,7 +136,7 @@ static void readname (LexState *LS, char *buff) | |||
| 108 | while (isalnum(LS->current) || LS->current == '_') { | 136 | while (isalnum(LS->current) || LS->current == '_') { |
| 109 | if (i >= PRAGMASIZE) { | 137 | if (i >= PRAGMASIZE) { |
| 110 | buff[PRAGMASIZE] = 0; | 138 | buff[PRAGMASIZE] = 0; |
| 111 | luaY_syntaxerror("pragma too long", buff); | 139 | luaX_syntaxerror(LS, "pragma too long", buff); |
| 112 | } | 140 | } |
| 113 | buff[i++] = LS->current; | 141 | buff[i++] = LS->current; |
| 114 | next(LS); | 142 | next(LS); |
| @@ -126,7 +154,7 @@ static void ifskip (LexState *LS) | |||
| 126 | if (LS->current == '\n') | 154 | if (LS->current == '\n') |
| 127 | inclinenumber(LS); | 155 | inclinenumber(LS); |
| 128 | else if (LS->current == EOZ) | 156 | else if (LS->current == EOZ) |
| 129 | luaY_error("input ends inside a $if"); | 157 | luaX_error(LS, "input ends inside a $if"); |
| 130 | else next(LS); | 158 | else next(LS); |
| 131 | } | 159 | } |
| 132 | } | 160 | } |
| @@ -159,35 +187,35 @@ static void inclinenumber (LexState *LS) | |||
| 159 | break; | 187 | break; |
| 160 | case 3: /* end */ | 188 | case 3: /* end */ |
| 161 | if (LS->iflevel-- == 0) | 189 | if (LS->iflevel-- == 0) |
| 162 | luaY_syntaxerror("unmatched $end", "$end"); | 190 | luaX_syntaxerror(LS, "unmatched $end", "$end"); |
| 163 | break; | 191 | break; |
| 164 | case 4: /* ifnot */ | 192 | case 4: /* ifnot */ |
| 165 | ifnot = 1; | 193 | ifnot = 1; |
| 166 | /* go through */ | 194 | /* go through */ |
| 167 | case 5: /* if */ | 195 | case 5: /* if */ |
| 168 | if (LS->iflevel == MAX_IFS-1) | 196 | if (LS->iflevel == MAX_IFS-1) |
| 169 | luaY_syntaxerror("too many nested $ifs", "$if"); | 197 | luaX_syntaxerror(LS, "too many nested $ifs", "$if"); |
| 170 | readname(LS, buff); | 198 | readname(LS, buff); |
| 171 | LS->iflevel++; | 199 | LS->iflevel++; |
| 172 | LS->ifstate[LS->iflevel].elsepart = 0; | 200 | LS->ifstate[LS->iflevel].elsepart = 0; |
| 173 | LS->ifstate[LS->iflevel].condition = checkcond(buff) ? !ifnot : ifnot; | 201 | LS->ifstate[LS->iflevel].condition = checkcond(LS, buff) ? !ifnot : ifnot; |
| 174 | LS->ifstate[LS->iflevel].skip = skip || !LS->ifstate[LS->iflevel].condition; | 202 | LS->ifstate[LS->iflevel].skip = skip || !LS->ifstate[LS->iflevel].condition; |
| 175 | break; | 203 | break; |
| 176 | case 6: /* else */ | 204 | case 6: /* else */ |
| 177 | if (LS->ifstate[LS->iflevel].elsepart) | 205 | if (LS->ifstate[LS->iflevel].elsepart) |
| 178 | luaY_syntaxerror("unmatched $else", "$else"); | 206 | luaX_syntaxerror(LS, "unmatched $else", "$else"); |
| 179 | LS->ifstate[LS->iflevel].elsepart = 1; | 207 | LS->ifstate[LS->iflevel].elsepart = 1; |
| 180 | LS->ifstate[LS->iflevel].skip = LS->ifstate[LS->iflevel-1].skip || | 208 | LS->ifstate[LS->iflevel].skip = LS->ifstate[LS->iflevel-1].skip || |
| 181 | LS->ifstate[LS->iflevel].condition; | 209 | LS->ifstate[LS->iflevel].condition; |
| 182 | break; | 210 | break; |
| 183 | default: | 211 | default: |
| 184 | luaY_syntaxerror("unknown pragma", buff); | 212 | luaX_syntaxerror(LS, "unknown pragma", buff); |
| 185 | } | 213 | } |
| 186 | skipspace(LS); | 214 | skipspace(LS); |
| 187 | if (LS->current == '\n') /* pragma must end with a '\n' ... */ | 215 | if (LS->current == '\n') /* pragma must end with a '\n' ... */ |
| 188 | inclinenumber(LS); | 216 | inclinenumber(LS); |
| 189 | else if (LS->current != EOZ) /* or eof */ | 217 | else if (LS->current != EOZ) /* or eof */ |
| 190 | luaY_syntaxerror("invalid pragma format", buff); | 218 | luaX_syntaxerror(LS, "invalid pragma format", buff); |
| 191 | ifskip(LS); | 219 | ifskip(LS); |
| 192 | } | 220 | } |
| 193 | } | 221 | } |
| @@ -201,25 +229,16 @@ static void inclinenumber (LexState *LS) | |||
| 201 | 229 | ||
| 202 | 230 | ||
| 203 | 231 | ||
| 204 | #define save(c) luaL_addchar(c) | ||
| 205 | #define save_and_next(LS) (save(LS->current), next(LS)) | ||
| 206 | |||
| 207 | |||
| 208 | char *luaX_lasttoken (void) | ||
| 209 | { | ||
| 210 | save(0); | ||
| 211 | return luaL_buffer(); | ||
| 212 | } | ||
| 213 | 232 | ||
| 214 | 233 | ||
| 215 | static int read_long_string (LexState *LS, YYSTYPE *l) | 234 | static int read_long_string (LexState *LS) |
| 216 | { | 235 | { |
| 217 | int cont = 0; | 236 | int cont = 0; |
| 218 | while (1) { | 237 | while (1) { |
| 219 | switch (LS->current) { | 238 | switch (LS->current) { |
| 220 | case EOZ: | 239 | case EOZ: |
| 221 | luaY_error("unfinished long string"); | 240 | luaX_error(LS, "unfinished long string"); |
| 222 | return 0; /* to avoid warnings */ | 241 | return EOS; /* to avoid warnings */ |
| 223 | case '[': | 242 | case '[': |
| 224 | save_and_next(LS); | 243 | save_and_next(LS); |
| 225 | if (LS->current == '[') { | 244 | if (LS->current == '[') { |
| @@ -244,25 +263,15 @@ static int read_long_string (LexState *LS, YYSTYPE *l) | |||
| 244 | } | 263 | } |
| 245 | } endloop: | 264 | } endloop: |
| 246 | save_and_next(LS); /* pass the second ']' */ | 265 | save_and_next(LS); /* pass the second ']' */ |
| 247 | l->pTStr = luaS_newlstr(L->Mbuffbase+2, | 266 | LS->seminfo.ts = luaS_newlstr(L->Mbuffbase+2, |
| 248 | L->Mbuffnext-(L->Mbuffbase-L->Mbuffer)-4); | 267 | L->Mbuffnext-(L->Mbuffbase-L->Mbuffer)-4); |
| 249 | return STRING; | 268 | return STRING; |
| 250 | } | 269 | } |
| 251 | 270 | ||
| 252 | 271 | ||
| 253 | /* to avoid warnings; this declaration cannot be public since YYSTYPE | 272 | int luaX_lex (LexState *LS) { |
| 254 | ** cannot be visible in llex.h (otherwise there is an error, since | ||
| 255 | ** the parser body redefines it!) | ||
| 256 | */ | ||
| 257 | int luaY_lex (YYSTYPE *l); | ||
| 258 | int luaY_lex (YYSTYPE *l) | ||
| 259 | { | ||
| 260 | LexState *LS = L->lexstate; | ||
| 261 | double a; | 273 | double a; |
| 262 | luaL_resetbuffer(); | 274 | luaL_resetbuffer(); |
| 263 | if (lua_debug) | ||
| 264 | luaY_codedebugline(LS->linelasttoken); | ||
| 265 | LS->linelasttoken = LS->linenumber; | ||
| 266 | while (1) { | 275 | while (1) { |
| 267 | switch (LS->current) { | 276 | switch (LS->current) { |
| 268 | 277 | ||
| @@ -272,7 +281,6 @@ int luaY_lex (YYSTYPE *l) | |||
| 272 | 281 | ||
| 273 | case '\n': | 282 | case '\n': |
| 274 | inclinenumber(LS); | 283 | inclinenumber(LS); |
| 275 | LS->linelasttoken = LS->linenumber; | ||
| 276 | continue; | 284 | continue; |
| 277 | 285 | ||
| 278 | case '-': | 286 | case '-': |
| @@ -287,7 +295,7 @@ int luaY_lex (YYSTYPE *l) | |||
| 287 | if (LS->current != '[') return '['; | 295 | if (LS->current != '[') return '['; |
| 288 | else { | 296 | else { |
| 289 | save_and_next(LS); /* pass the second '[' */ | 297 | save_and_next(LS); /* pass the second '[' */ |
| 290 | return read_long_string(LS, l); | 298 | return read_long_string(LS); |
| 291 | } | 299 | } |
| 292 | 300 | ||
| 293 | case '=': | 301 | case '=': |
| @@ -318,8 +326,8 @@ int luaY_lex (YYSTYPE *l) | |||
| 318 | switch (LS->current) { | 326 | switch (LS->current) { |
| 319 | case EOZ: | 327 | case EOZ: |
| 320 | case '\n': | 328 | case '\n': |
| 321 | luaY_error("unfinished string"); | 329 | luaX_error(LS, "unfinished string"); |
| 322 | return 0; /* to avoid warnings */ | 330 | return EOS; /* to avoid warnings */ |
| 323 | case '\\': | 331 | case '\\': |
| 324 | next(LS); /* do not save the '\' */ | 332 | next(LS); /* do not save the '\' */ |
| 325 | switch (LS->current) { | 333 | switch (LS->current) { |
| @@ -345,13 +353,13 @@ int luaY_lex (YYSTYPE *l) | |||
| 345 | next(LS); | 353 | next(LS); |
| 346 | } while (++i<3 && isdigit(LS->current)); | 354 | } while (++i<3 && isdigit(LS->current)); |
| 347 | if (c >= 256) | 355 | if (c >= 256) |
| 348 | luaY_error("escape sequence too large"); | 356 | luaX_error(LS, "escape sequence too large"); |
| 349 | save(c); | 357 | save(c); |
| 350 | } | 358 | } |
| 351 | else { | 359 | else { |
| 352 | save('\\'); | 360 | save('\\'); |
| 353 | save(LS->current); | 361 | save(LS->current); |
| 354 | luaY_error("invalid escape sequence"); | 362 | luaX_error(LS, "invalid escape sequence"); |
| 355 | } | 363 | } |
| 356 | break; | 364 | break; |
| 357 | } | 365 | } |
| @@ -362,7 +370,7 @@ int luaY_lex (YYSTYPE *l) | |||
| 362 | } | 370 | } |
| 363 | } | 371 | } |
| 364 | save_and_next(LS); /* skip delimiter */ | 372 | save_and_next(LS); /* skip delimiter */ |
| 365 | l->pTStr = luaS_newlstr(L->Mbuffbase+1, | 373 | LS->seminfo.ts = luaS_newlstr(L->Mbuffbase+1, |
| 366 | L->Mbuffnext-(L->Mbuffbase-L->Mbuffer)-2); | 374 | L->Mbuffnext-(L->Mbuffbase-L->Mbuffer)-2); |
| 367 | return STRING; | 375 | return STRING; |
| 368 | } | 376 | } |
| @@ -395,7 +403,7 @@ int luaY_lex (YYSTYPE *l) | |||
| 395 | save_and_next(LS); | 403 | save_and_next(LS); |
| 396 | if (LS->current == '.') { | 404 | if (LS->current == '.') { |
| 397 | save('.'); | 405 | save('.'); |
| 398 | luaY_error( | 406 | luaX_error(LS, |
| 399 | "ambiguous syntax (decimal point x string concatenation)"); | 407 | "ambiguous syntax (decimal point x string concatenation)"); |
| 400 | } | 408 | } |
| 401 | } | 409 | } |
| @@ -415,7 +423,7 @@ int luaY_lex (YYSTYPE *l) | |||
| 415 | neg = (LS->current=='-'); | 423 | neg = (LS->current=='-'); |
| 416 | if (LS->current == '+' || LS->current == '-') save_and_next(LS); | 424 | if (LS->current == '+' || LS->current == '-') save_and_next(LS); |
| 417 | if (!isdigit(LS->current)) | 425 | if (!isdigit(LS->current)) |
| 418 | luaY_error("invalid numeral format"); | 426 | luaX_error(LS, "invalid numeral format"); |
| 419 | do { | 427 | do { |
| 420 | e = 10.0*e + (LS->current-'0'); | 428 | e = 10.0*e + (LS->current-'0'); |
| 421 | save_and_next(LS); | 429 | save_and_next(LS); |
| @@ -426,18 +434,20 @@ int luaY_lex (YYSTYPE *l) | |||
| 426 | ea *= ea; | 434 | ea *= ea; |
| 427 | } | 435 | } |
| 428 | } | 436 | } |
| 429 | l->vReal = a; | 437 | LS->seminfo.r = a; |
| 430 | return NUMBER; | 438 | return NUMBER; |
| 431 | } | 439 | } |
| 432 | 440 | ||
| 433 | case EOZ: | 441 | case EOZ: |
| 434 | if (LS->iflevel > 0) | 442 | if (LS->iflevel > 0) |
| 435 | luaY_error("input ends inside a $if"); | 443 | luaX_error(LS, "input ends inside a $if"); |
| 436 | return 0; | 444 | return EOS; |
| 437 | 445 | ||
| 438 | default: | 446 | default: |
| 439 | if (LS->current != '_' && !isalpha(LS->current)) { | 447 | if (LS->current != '_' && !isalpha(LS->current)) { |
| 440 | int c = LS->current; | 448 | int c = LS->current; |
| 449 | if (iscntrl(c)) | ||
| 450 | luaX_invalidchar(LS, c); | ||
| 441 | save_and_next(LS); | 451 | save_and_next(LS); |
| 442 | return c; | 452 | return c; |
| 443 | } | 453 | } |
| @@ -448,9 +458,9 @@ int luaY_lex (YYSTYPE *l) | |||
| 448 | } while (isalnum(LS->current) || LS->current == '_'); | 458 | } while (isalnum(LS->current) || LS->current == '_'); |
| 449 | save(0); | 459 | save(0); |
| 450 | ts = luaS_new(L->Mbuffbase); | 460 | ts = luaS_new(L->Mbuffbase); |
| 451 | if (ts->head.marked > 255) | 461 | if (ts->head.marked >= 'A') |
| 452 | return ts->head.marked; /* reserved word */ | 462 | return ts->head.marked; /* reserved word */ |
| 453 | l->pTStr = ts; | 463 | LS->seminfo.ts = ts; |
| 454 | return NAME; | 464 | return NAME; |
| 455 | } | 465 | } |
| 456 | } | 466 | } |
