diff options
| author | Roberto Ierusalimschy <roberto@inf.puc-rio.br> | 1997-09-16 16:25:59 -0300 |
|---|---|---|
| committer | Roberto Ierusalimschy <roberto@inf.puc-rio.br> | 1997-09-16 16:25:59 -0300 |
| commit | 60cc473bcfce079d1525fcffcfdfbeb66e35afa2 (patch) | |
| tree | 368fec478cec8637af6346e69ec66722ab38a9cf /llex.c | |
| parent | 43a2ee6ea1b7825c1892de614cb38a3fe487a19f (diff) | |
| download | lua-60cc473bcfce079d1525fcffcfdfbeb66e35afa2.tar.gz lua-60cc473bcfce079d1525fcffcfdfbeb66e35afa2.tar.bz2 lua-60cc473bcfce079d1525fcffcfdfbeb66e35afa2.zip | |
Lexical Analizer
Diffstat (limited to 'llex.c')
| -rw-r--r-- | llex.c | 462 |
1 files changed, 462 insertions, 0 deletions
| @@ -0,0 +1,462 @@ | |||
| 1 | /* | ||
| 2 | ** $Id: $ | ||
| 3 | ** Lexical Analizer | ||
| 4 | ** See Copyright Notice in lua.h | ||
| 5 | */ | ||
| 6 | |||
| 7 | |||
| 8 | #include <ctype.h> | ||
| 9 | #include <string.h> | ||
| 10 | |||
| 11 | #include "lglobal.h" | ||
| 12 | #include "llex.h" | ||
| 13 | #include "lmem.h" | ||
| 14 | #include "lobject.h" | ||
| 15 | #include "lparser.h" | ||
| 16 | #include "lstring.h" | ||
| 17 | #include "ltokens.h" | ||
| 18 | #include "luadebug.h" | ||
| 19 | #include "lzio.h" | ||
| 20 | |||
| 21 | |||
| 22 | static int current; /* look ahead character */ | ||
| 23 | static ZIO *lex_z; | ||
| 24 | |||
| 25 | |||
| 26 | int luaX_linenumber; | ||
| 27 | int lua_debug=0; | ||
| 28 | |||
| 29 | |||
| 30 | #define next() (current = zgetc(lex_z)) | ||
| 31 | |||
| 32 | |||
| 33 | |||
| 34 | static void addReserved (void) | ||
| 35 | { | ||
| 36 | static struct { | ||
| 37 | char *name; | ||
| 38 | int token; | ||
| 39 | } reserved [] = { | ||
| 40 | {"and", AND}, {"do", DO}, {"else", ELSE}, {"elseif", ELSEIF}, | ||
| 41 | {"end", END}, {"function", FUNCTION}, {"if", IF}, {"local", LOCAL}, | ||
| 42 | {"nil", NIL}, {"not", NOT}, {"or", OR}, {"repeat", REPEAT}, | ||
| 43 | {"return", RETURN}, {"then", THEN}, {"until", UNTIL}, {"while", WHILE} | ||
| 44 | }; | ||
| 45 | static int firsttime = 1; | ||
| 46 | if (firsttime) { | ||
| 47 | int i; | ||
| 48 | firsttime = 0; | ||
| 49 | for (i=0; i<(sizeof(reserved)/sizeof(reserved[0])); i++) { | ||
| 50 | TaggedString *ts = luaS_new(reserved[i].name); | ||
| 51 | ts->marked = reserved[i].token; /* reserved word (always > 255) */ | ||
| 52 | } | ||
| 53 | } | ||
| 54 | } | ||
| 55 | |||
| 56 | |||
| 57 | |||
| 58 | #define MAX_IFS 5 | ||
| 59 | |||
| 60 | /* "ifstate" keeps the state of each nested $if the lexical is dealing with. */ | ||
| 61 | |||
| 62 | static struct { | ||
| 63 | int elsepart; /* true if its in the $else part */ | ||
| 64 | int condition; /* true if $if condition is true */ | ||
| 65 | int skip; /* true if part must be skiped */ | ||
| 66 | } ifstate[MAX_IFS]; | ||
| 67 | |||
| 68 | static int iflevel; /* level of nested $if's */ | ||
| 69 | |||
| 70 | |||
| 71 | static struct textbuff { | ||
| 72 | char *text; | ||
| 73 | int tokensize; | ||
| 74 | int buffsize; | ||
| 75 | } textbuff; | ||
| 76 | |||
| 77 | |||
| 78 | static void firstline (void) | ||
| 79 | { | ||
| 80 | int c = zgetc(lex_z); | ||
| 81 | if (c == '#') | ||
| 82 | while((c=zgetc(lex_z)) != '\n' && c != EOZ) /* skip first line */; | ||
| 83 | zungetc(lex_z); | ||
| 84 | } | ||
| 85 | |||
| 86 | |||
| 87 | void luaX_setinput (ZIO *z) | ||
| 88 | { | ||
| 89 | addReserved(); | ||
| 90 | current = '\n'; | ||
| 91 | luaX_linenumber = 0; | ||
| 92 | iflevel = 0; | ||
| 93 | ifstate[0].skip = 0; | ||
| 94 | ifstate[0].elsepart = 1; /* to avoid a free $else */ | ||
| 95 | lex_z = z; | ||
| 96 | firstline(); | ||
| 97 | textbuff.buffsize = 20; | ||
| 98 | textbuff.text = luaM_buffer(textbuff.buffsize); | ||
| 99 | } | ||
| 100 | |||
| 101 | |||
| 102 | |||
| 103 | /* | ||
| 104 | ** ======================================================= | ||
| 105 | ** PRAGMAS | ||
| 106 | ** ======================================================= | ||
| 107 | */ | ||
| 108 | |||
| 109 | #define PRAGMASIZE 20 | ||
| 110 | |||
| 111 | static void skipspace (void) | ||
| 112 | { | ||
| 113 | while (current == ' ' || current == '\t') next(); | ||
| 114 | } | ||
| 115 | |||
| 116 | |||
| 117 | static int checkcond (char *buff) | ||
| 118 | { | ||
| 119 | static char *opts[] = {"nil", "1"}; | ||
| 120 | int i = luaO_findstring(buff, opts); | ||
| 121 | if (i >= 0) return i; | ||
| 122 | else if (isalpha((unsigned char)buff[0]) || buff[0] == '_') | ||
| 123 | return luaG_globaldefined(buff); | ||
| 124 | else { | ||
| 125 | luaY_syntaxerror("invalid $if condition", buff); | ||
| 126 | return 0; /* to avoid warnings */ | ||
| 127 | } | ||
| 128 | } | ||
| 129 | |||
| 130 | |||
| 131 | static void readname (char *buff) | ||
| 132 | { | ||
| 133 | int i = 0; | ||
| 134 | skipspace(); | ||
| 135 | while (isalnum(current) || current == '_') { | ||
| 136 | if (i >= PRAGMASIZE) { | ||
| 137 | buff[PRAGMASIZE] = 0; | ||
| 138 | luaY_syntaxerror("pragma too long", buff); | ||
| 139 | } | ||
| 140 | buff[i++] = current; | ||
| 141 | next(); | ||
| 142 | } | ||
| 143 | buff[i] = 0; | ||
| 144 | } | ||
| 145 | |||
| 146 | |||
| 147 | static void inclinenumber (void); | ||
| 148 | |||
| 149 | |||
| 150 | static void ifskip (void) | ||
| 151 | { | ||
| 152 | while (ifstate[iflevel].skip) { | ||
| 153 | if (current == '\n') | ||
| 154 | inclinenumber(); | ||
| 155 | else if (current == EOZ) | ||
| 156 | luaY_syntaxerror("input ends inside a $if", ""); | ||
| 157 | else next(); | ||
| 158 | } | ||
| 159 | } | ||
| 160 | |||
| 161 | |||
| 162 | static void inclinenumber (void) | ||
| 163 | { | ||
| 164 | static char *pragmas [] = | ||
| 165 | {"debug", "nodebug", "endinput", "end", "ifnot", "if", "else", NULL}; | ||
| 166 | next(); /* skip '\n' */ | ||
| 167 | ++luaX_linenumber; | ||
| 168 | if (current == '$') { /* is a pragma? */ | ||
| 169 | char buff[PRAGMASIZE+1]; | ||
| 170 | int ifnot = 0; | ||
| 171 | int skip = ifstate[iflevel].skip; | ||
| 172 | next(); /* skip $ */ | ||
| 173 | readname(buff); | ||
| 174 | switch (luaO_findstring(buff, pragmas)) { | ||
| 175 | case 0: /* debug */ | ||
| 176 | if (!skip) lua_debug = 1; | ||
| 177 | break; | ||
| 178 | case 1: /* nodebug */ | ||
| 179 | if (!skip) lua_debug = 0; | ||
| 180 | break; | ||
| 181 | case 2: /* endinput */ | ||
| 182 | if (!skip) { | ||
| 183 | current = EOZ; | ||
| 184 | iflevel = 0; /* to allow $endinput inside a $if */ | ||
| 185 | } | ||
| 186 | break; | ||
| 187 | case 3: /* end */ | ||
| 188 | if (iflevel-- == 0) | ||
| 189 | luaY_syntaxerror("unmatched $end", "$end"); | ||
| 190 | break; | ||
| 191 | case 4: /* ifnot */ | ||
| 192 | ifnot = 1; | ||
| 193 | /* go through */ | ||
| 194 | case 5: /* if */ | ||
| 195 | if (iflevel == MAX_IFS-1) | ||
| 196 | luaY_syntaxerror("too many nested `$ifs'", "$if"); | ||
| 197 | readname(buff); | ||
| 198 | iflevel++; | ||
| 199 | ifstate[iflevel].elsepart = 0; | ||
| 200 | ifstate[iflevel].condition = checkcond(buff) ? !ifnot : ifnot; | ||
| 201 | ifstate[iflevel].skip = skip || !ifstate[iflevel].condition; | ||
| 202 | break; | ||
| 203 | case 6: /* else */ | ||
| 204 | if (ifstate[iflevel].elsepart) | ||
| 205 | luaY_syntaxerror("unmatched $else", "$else"); | ||
| 206 | ifstate[iflevel].elsepart = 1; | ||
| 207 | ifstate[iflevel].skip = | ||
| 208 | ifstate[iflevel-1].skip || ifstate[iflevel].condition; | ||
| 209 | break; | ||
| 210 | default: | ||
| 211 | luaY_syntaxerror("invalid pragma", buff); | ||
| 212 | } | ||
| 213 | skipspace(); | ||
| 214 | if (current == '\n') /* pragma must end with a '\n' ... */ | ||
| 215 | inclinenumber(); | ||
| 216 | else if (current != EOZ) /* or eof */ | ||
| 217 | luaY_syntaxerror("invalid pragma format", buff); | ||
| 218 | ifskip(); | ||
| 219 | } | ||
| 220 | } | ||
| 221 | |||
| 222 | |||
| 223 | /* | ||
| 224 | ** ======================================================= | ||
| 225 | ** LEXICAL ANALIZER | ||
| 226 | ** ======================================================= | ||
| 227 | */ | ||
| 228 | |||
| 229 | |||
| 230 | |||
| 231 | static void save (int c) | ||
| 232 | { | ||
| 233 | if (textbuff.tokensize >= textbuff.buffsize) | ||
| 234 | textbuff.text = luaM_buffer(textbuff.buffsize *= 2); | ||
| 235 | textbuff.text[textbuff.tokensize++] = c; | ||
| 236 | } | ||
| 237 | |||
| 238 | |||
| 239 | char *luaX_lasttoken (void) | ||
| 240 | { | ||
| 241 | save(0); | ||
| 242 | return textbuff.text; | ||
| 243 | } | ||
| 244 | |||
| 245 | |||
| 246 | #define save_and_next() (save(current), next()) | ||
| 247 | |||
| 248 | |||
| 249 | static int read_long_string (void) | ||
| 250 | { | ||
| 251 | int cont = 0; | ||
| 252 | while (1) { | ||
| 253 | switch (current) { | ||
| 254 | case EOZ: | ||
| 255 | save(0); | ||
| 256 | return WRONGTOKEN; | ||
| 257 | case '[': | ||
| 258 | save_and_next(); | ||
| 259 | if (current == '[') { | ||
| 260 | cont++; | ||
| 261 | save_and_next(); | ||
| 262 | } | ||
| 263 | continue; | ||
| 264 | case ']': | ||
| 265 | save_and_next(); | ||
| 266 | if (current == ']') { | ||
| 267 | if (cont == 0) goto endloop; | ||
| 268 | cont--; | ||
| 269 | save_and_next(); | ||
| 270 | } | ||
| 271 | continue; | ||
| 272 | case '\n': | ||
| 273 | save('\n'); | ||
| 274 | inclinenumber(); | ||
| 275 | continue; | ||
| 276 | default: | ||
| 277 | save_and_next(); | ||
| 278 | } | ||
| 279 | } endloop: | ||
| 280 | save_and_next(); /* pass the second ']' */ | ||
| 281 | textbuff.text[textbuff.tokensize-2] = 0; /* erases ']]' */ | ||
| 282 | luaY_lval.pTStr = luaS_new(textbuff.text+2); | ||
| 283 | textbuff.text[textbuff.tokensize-2] = ']'; /* restores ']]' */ | ||
| 284 | return STRING; | ||
| 285 | } | ||
| 286 | |||
| 287 | |||
| 288 | int luaY_lex (void) | ||
| 289 | { | ||
| 290 | static int linelasttoken = 0; | ||
| 291 | double a; | ||
| 292 | textbuff.tokensize = 0; | ||
| 293 | if (lua_debug) | ||
| 294 | luaY_codedebugline(linelasttoken); | ||
| 295 | linelasttoken = luaX_linenumber; | ||
| 296 | while (1) { | ||
| 297 | switch (current) { | ||
| 298 | case '\n': | ||
| 299 | inclinenumber(); | ||
| 300 | linelasttoken = luaX_linenumber; | ||
| 301 | continue; | ||
| 302 | |||
| 303 | case ' ': case '\t': case '\r': /* CR: to avoid problems with DOS */ | ||
| 304 | next(); | ||
| 305 | continue; | ||
| 306 | |||
| 307 | case '-': | ||
| 308 | save_and_next(); | ||
| 309 | if (current != '-') return '-'; | ||
| 310 | do { next(); } while (current != '\n' && current != EOZ); | ||
| 311 | textbuff.tokensize = 0; | ||
| 312 | continue; | ||
| 313 | |||
| 314 | case '[': | ||
| 315 | save_and_next(); | ||
| 316 | if (current != '[') return '['; | ||
| 317 | else { | ||
| 318 | save_and_next(); /* pass the second '[' */ | ||
| 319 | return read_long_string(); | ||
| 320 | } | ||
| 321 | |||
| 322 | case '=': | ||
| 323 | save_and_next(); | ||
| 324 | if (current != '=') return '='; | ||
| 325 | else { save_and_next(); return EQ; } | ||
| 326 | |||
| 327 | case '<': | ||
| 328 | save_and_next(); | ||
| 329 | if (current != '=') return '<'; | ||
| 330 | else { save_and_next(); return LE; } | ||
| 331 | |||
| 332 | case '>': | ||
| 333 | save_and_next(); | ||
| 334 | if (current != '=') return '>'; | ||
| 335 | else { save_and_next(); return GE; } | ||
| 336 | |||
| 337 | case '~': | ||
| 338 | save_and_next(); | ||
| 339 | if (current != '=') return '~'; | ||
| 340 | else { save_and_next(); return NE; } | ||
| 341 | |||
| 342 | case '"': | ||
| 343 | case '\'': { | ||
| 344 | int del = current; | ||
| 345 | save_and_next(); | ||
| 346 | while (current != del) { | ||
| 347 | switch (current) { | ||
| 348 | case EOZ: | ||
| 349 | case '\n': | ||
| 350 | save(0); | ||
| 351 | return WRONGTOKEN; | ||
| 352 | case '\\': | ||
| 353 | next(); /* do not save the '\' */ | ||
| 354 | switch (current) { | ||
| 355 | case 'n': save('\n'); next(); break; | ||
| 356 | case 't': save('\t'); next(); break; | ||
| 357 | case 'r': save('\r'); next(); break; | ||
| 358 | case '\n': save('\n'); inclinenumber(); break; | ||
| 359 | default : save_and_next(); break; | ||
| 360 | } | ||
| 361 | break; | ||
| 362 | default: | ||
| 363 | save_and_next(); | ||
| 364 | } | ||
| 365 | } | ||
| 366 | next(); /* skip delimiter */ | ||
| 367 | save(0); | ||
| 368 | luaY_lval.pTStr = luaS_new(textbuff.text+1); | ||
| 369 | textbuff.text[textbuff.tokensize-1] = del; /* restore delimiter */ | ||
| 370 | return STRING; | ||
| 371 | } | ||
| 372 | |||
| 373 | case '.': | ||
| 374 | save_and_next(); | ||
| 375 | if (current == '.') | ||
| 376 | { | ||
| 377 | save_and_next(); | ||
| 378 | if (current == '.') | ||
| 379 | { | ||
| 380 | save_and_next(); | ||
| 381 | return DOTS; /* ... */ | ||
| 382 | } | ||
| 383 | else return CONC; /* .. */ | ||
| 384 | } | ||
| 385 | else if (!isdigit(current)) return '.'; | ||
| 386 | /* current is a digit: goes through to number */ | ||
| 387 | a=0.0; | ||
| 388 | goto fraction; | ||
| 389 | |||
| 390 | case '0': case '1': case '2': case '3': case '4': | ||
| 391 | case '5': case '6': case '7': case '8': case '9': | ||
| 392 | a=0.0; | ||
| 393 | do { | ||
| 394 | a=10.0*a+(current-'0'); | ||
| 395 | save_and_next(); | ||
| 396 | } while (isdigit(current)); | ||
| 397 | if (current == '.') { | ||
| 398 | save_and_next(); | ||
| 399 | if (current == '.') { | ||
| 400 | save(0); | ||
| 401 | luaY_error( | ||
| 402 | "ambiguous syntax (decimal point x string concatenation)"); | ||
| 403 | } | ||
| 404 | } | ||
| 405 | fraction: | ||
| 406 | { double da=0.1; | ||
| 407 | while (isdigit(current)) | ||
| 408 | { | ||
| 409 | a+=(current-'0')*da; | ||
| 410 | da/=10.0; | ||
| 411 | save_and_next(); | ||
| 412 | } | ||
| 413 | if (toupper(current) == 'E') { | ||
| 414 | int e=0; | ||
| 415 | int neg; | ||
| 416 | double ea; | ||
| 417 | save_and_next(); | ||
| 418 | neg=(current=='-'); | ||
| 419 | if (current == '+' || current == '-') save_and_next(); | ||
| 420 | if (!isdigit(current)) { | ||
| 421 | save(0); return WRONGTOKEN; } | ||
| 422 | do { | ||
| 423 | e=10.0*e+(current-'0'); | ||
| 424 | save_and_next(); | ||
| 425 | } while (isdigit(current)); | ||
| 426 | for (ea=neg?0.1:10.0; e>0; e>>=1) | ||
| 427 | { | ||
| 428 | if (e & 1) a*=ea; | ||
| 429 | ea*=ea; | ||
| 430 | } | ||
| 431 | } | ||
| 432 | luaY_lval.vReal = a; | ||
| 433 | return NUMBER; | ||
| 434 | } | ||
| 435 | |||
| 436 | case EOZ: | ||
| 437 | save(0); | ||
| 438 | if (iflevel > 0) | ||
| 439 | luaY_error("missing $endif"); | ||
| 440 | return 0; | ||
| 441 | |||
| 442 | default: | ||
| 443 | if (current != '_' && !isalpha(current)) { | ||
| 444 | save_and_next(); | ||
| 445 | return textbuff.text[0]; | ||
| 446 | } | ||
| 447 | else { /* identifier or reserved word */ | ||
| 448 | TaggedString *ts; | ||
| 449 | do { | ||
| 450 | save_and_next(); | ||
| 451 | } while (isalnum(current) || current == '_'); | ||
| 452 | save(0); | ||
| 453 | ts = luaS_new(textbuff.text); | ||
| 454 | if (ts->marked > 255) | ||
| 455 | return ts->marked; /* reserved word */ | ||
| 456 | luaY_lval.pTStr = ts; | ||
| 457 | return NAME; | ||
| 458 | } | ||
| 459 | } | ||
| 460 | } | ||
| 461 | } | ||
| 462 | |||
