diff options
Diffstat (limited to 'llex.c')
-rw-r--r-- | llex.c | 104 |
1 files changed, 62 insertions, 42 deletions
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | ** $Id: llex.c,v 2.35 2010/02/27 21:16:24 roberto Exp roberto $ | 2 | ** $Id: llex.c,v 2.36 2010/04/05 16:35:37 roberto Exp roberto $ |
3 | ** Lexical Analyzer | 3 | ** Lexical Analyzer |
4 | ** See Copyright Notice in lua.h | 4 | ** See Copyright Notice in lua.h |
5 | */ | 5 | */ |
@@ -117,21 +117,30 @@ void luaX_syntaxerror (LexState *ls, const char *msg) { | |||
117 | } | 117 | } |
118 | 118 | ||
119 | 119 | ||
120 | /* | ||
121 | ** creates a new string and anchors it in function's table so that | ||
122 | ** it will not be collected until the end of the function's compilation | ||
123 | ** (by that time it should be anchored in function's prototype) | ||
124 | */ | ||
120 | TString *luaX_newstring (LexState *ls, const char *str, size_t l) { | 125 | TString *luaX_newstring (LexState *ls, const char *str, size_t l) { |
121 | lua_State *L = ls->L; | 126 | lua_State *L = ls->L; |
122 | TValue *o; /* entry for `str' */ | 127 | TValue *o; /* entry for `str' */ |
123 | TString *ts = luaS_newlstr(L, str, l); | 128 | TString *ts = luaS_newlstr(L, str, l); /* create new string */ |
124 | setsvalue2s(L, L->top++, ts); /* anchor string */ | 129 | setsvalue2s(L, L->top++, ts); /* temporarily anchor it in stack */ |
125 | o = luaH_setstr(L, ls->fs->h, ts); | 130 | o = luaH_setstr(L, ls->fs->h, ts); |
126 | if (ttisnil(o)) { | 131 | if (ttisnil(o)) { |
127 | setbvalue(o, 1); /* make sure `str' will not be collected */ | 132 | setbvalue(o, 1); /* t[string] = true */ |
128 | luaC_checkGC(L); | 133 | luaC_checkGC(L); |
129 | } | 134 | } |
130 | L->top--; | 135 | L->top--; /* remove string from stack */ |
131 | return ts; | 136 | return ts; |
132 | } | 137 | } |
133 | 138 | ||
134 | 139 | ||
140 | /* | ||
141 | ** increment line number and skips newline sequence (any of | ||
142 | ** \n, \r, \n\r, or \r\n) | ||
143 | */ | ||
135 | static void inclinenumber (LexState *ls) { | 144 | static void inclinenumber (LexState *ls) { |
136 | int old = ls->current; | 145 | int old = ls->current; |
137 | lua_assert(currIsNewline(ls)); | 146 | lua_assert(currIsNewline(ls)); |
@@ -152,7 +161,7 @@ void luaX_setinput (lua_State *L, LexState *ls, ZIO *z, TString *source) { | |||
152 | ls->linenumber = 1; | 161 | ls->linenumber = 1; |
153 | ls->lastline = 1; | 162 | ls->lastline = 1; |
154 | ls->source = source; | 163 | ls->source = source; |
155 | ls->envn = luaS_new(L, "_ENV"); | 164 | ls->envn = luaS_new(L, "_ENV"); /* create env name */ |
156 | luaS_fix(ls->envn); /* never collect this name */ | 165 | luaS_fix(ls->envn); /* never collect this name */ |
157 | luaZ_resizebuffer(ls->L, ls->buff, LUA_MINBUFFER); /* initialize buffer */ | 166 | luaZ_resizebuffer(ls->L, ls->buff, LUA_MINBUFFER); /* initialize buffer */ |
158 | next(ls); /* read first char */ | 167 | next(ls); /* read first char */ |
@@ -176,6 +185,9 @@ static int check_next (LexState *ls, const char *set) { | |||
176 | } | 185 | } |
177 | 186 | ||
178 | 187 | ||
188 | /* | ||
189 | ** change all characters 'from' in buffer to 'to' | ||
190 | */ | ||
179 | static void buffreplace (LexState *ls, char from, char to) { | 191 | static void buffreplace (LexState *ls, char from, char to) { |
180 | size_t n = luaZ_bufflen(ls->buff); | 192 | size_t n = luaZ_bufflen(ls->buff); |
181 | char *p = luaZ_buffer(ls->buff); | 193 | char *p = luaZ_buffer(ls->buff); |
@@ -188,11 +200,14 @@ static void buffreplace (LexState *ls, char from, char to) { | |||
188 | #define getlocaledecpoint() (localeconv()->decimal_point[0]) | 200 | #define getlocaledecpoint() (localeconv()->decimal_point[0]) |
189 | #endif | 201 | #endif |
190 | 202 | ||
203 | /* | ||
204 | ** in case of format error, try to change decimal point separator to | ||
205 | ** the one defined in the current locale and check again | ||
206 | */ | ||
191 | static void trydecpoint (LexState *ls, SemInfo *seminfo) { | 207 | static void trydecpoint (LexState *ls, SemInfo *seminfo) { |
192 | /* format error: try to update decimal point separator */ | ||
193 | char old = ls->decpoint; | 208 | char old = ls->decpoint; |
194 | ls->decpoint = getlocaledecpoint(); | 209 | ls->decpoint = getlocaledecpoint(); |
195 | buffreplace(ls, old, ls->decpoint); /* try updated decimal separator */ | 210 | buffreplace(ls, old, ls->decpoint); /* try new decimal separator */ |
196 | if (!luaO_str2d(luaZ_buffer(ls->buff), &seminfo->r)) { | 211 | if (!luaO_str2d(luaZ_buffer(ls->buff), &seminfo->r)) { |
197 | /* format error with correct decimal point: no more options */ | 212 | /* format error with correct decimal point: no more options */ |
198 | buffreplace(ls, ls->decpoint, '.'); /* undo change (for error message) */ | 213 | buffreplace(ls, ls->decpoint, '.'); /* undo change (for error message) */ |
@@ -218,6 +233,10 @@ static void read_numeral (LexState *ls, SemInfo *seminfo) { | |||
218 | } | 233 | } |
219 | 234 | ||
220 | 235 | ||
236 | /* | ||
237 | ** skip a sequence '[=*=[' or ']=*]' and return its number of '='s or | ||
238 | ** -1 if sequence is malformed | ||
239 | */ | ||
221 | static int skip_sep (LexState *ls) { | 240 | static int skip_sep (LexState *ls) { |
222 | int count = 0; | 241 | int count = 0; |
223 | int s = ls->current; | 242 | int s = ls->current; |
@@ -248,8 +267,7 @@ static void read_long_string (LexState *ls, SemInfo *seminfo, int sep) { | |||
248 | } | 267 | } |
249 | break; | 268 | break; |
250 | } | 269 | } |
251 | case '\n': | 270 | case '\n': case '\r': { |
252 | case '\r': { | ||
253 | save(ls, '\n'); | 271 | save(ls, '\n'); |
254 | inclinenumber(ls); | 272 | inclinenumber(ls); |
255 | if (!seminfo) luaZ_resetbuffer(ls->buff); /* avoid wasting space */ | 273 | if (!seminfo) luaZ_resetbuffer(ls->buff); /* avoid wasting space */ |
@@ -310,7 +328,7 @@ static int readdecesc (LexState *ls) { | |||
310 | 328 | ||
311 | 329 | ||
312 | static void read_string (LexState *ls, int del, SemInfo *seminfo) { | 330 | static void read_string (LexState *ls, int del, SemInfo *seminfo) { |
313 | save_and_next(ls); | 331 | save_and_next(ls); /* keep delimiter (for error messages) */ |
314 | while (ls->current != del) { | 332 | while (ls->current != del) { |
315 | switch (ls->current) { | 333 | switch (ls->current) { |
316 | case EOZ: | 334 | case EOZ: |
@@ -335,6 +353,14 @@ static void read_string (LexState *ls, int del, SemInfo *seminfo) { | |||
335 | case '\n': | 353 | case '\n': |
336 | case '\r': save(ls, '\n'); inclinenumber(ls); continue; | 354 | case '\r': save(ls, '\n'); inclinenumber(ls); continue; |
337 | case EOZ: continue; /* will raise an error next loop */ | 355 | case EOZ: continue; /* will raise an error next loop */ |
356 | case '*': { /* skip following span of spaces */ | ||
357 | next(ls); /* skip the '*' */ | ||
358 | while (lisspace(ls->current)) { | ||
359 | if (currIsNewline(ls)) inclinenumber(ls); | ||
360 | else next(ls); | ||
361 | } | ||
362 | continue; /* do not save 'c' */ | ||
363 | } | ||
338 | default: { | 364 | default: { |
339 | if (!lisdigit(ls->current)) | 365 | if (!lisdigit(ls->current)) |
340 | c = ls->current; /* handles \\, \", \', and \? */ | 366 | c = ls->current; /* handles \\, \", \', and \? */ |
@@ -361,31 +387,34 @@ static int llex (LexState *ls, SemInfo *seminfo) { | |||
361 | luaZ_resetbuffer(ls->buff); | 387 | luaZ_resetbuffer(ls->buff); |
362 | for (;;) { | 388 | for (;;) { |
363 | switch (ls->current) { | 389 | switch (ls->current) { |
364 | case '\n': | 390 | case '\n': case '\r': { /* line breaks */ |
365 | case '\r': { | ||
366 | inclinenumber(ls); | 391 | inclinenumber(ls); |
367 | break; | 392 | break; |
368 | } | 393 | } |
369 | case '-': { | 394 | case ' ': case '\f': case '\t': case '\v': { /* spaces */ |
395 | next(ls); | ||
396 | break; | ||
397 | } | ||
398 | case '-': { /* '-' or '--' (comment) */ | ||
370 | next(ls); | 399 | next(ls); |
371 | if (ls->current != '-') return '-'; | 400 | if (ls->current != '-') return '-'; |
372 | /* else is a comment */ | 401 | /* else is a comment */ |
373 | next(ls); | 402 | next(ls); |
374 | if (ls->current == '[') { | 403 | if (ls->current == '[') { /* long comment? */ |
375 | int sep = skip_sep(ls); | 404 | int sep = skip_sep(ls); |
376 | luaZ_resetbuffer(ls->buff); /* `skip_sep' may dirty the buffer */ | 405 | luaZ_resetbuffer(ls->buff); /* `skip_sep' may dirty the buffer */ |
377 | if (sep >= 0) { | 406 | if (sep >= 0) { |
378 | read_long_string(ls, NULL, sep); /* long comment */ | 407 | read_long_string(ls, NULL, sep); /* skip long comment */ |
379 | luaZ_resetbuffer(ls->buff); | 408 | luaZ_resetbuffer(ls->buff); /* previous call may dirty the buff. */ |
380 | break; | 409 | break; |
381 | } | 410 | } |
382 | } | 411 | } |
383 | /* else short comment */ | 412 | /* else short comment */ |
384 | while (!currIsNewline(ls) && ls->current != EOZ) | 413 | while (!currIsNewline(ls) && ls->current != EOZ) |
385 | next(ls); | 414 | next(ls); /* skip until end of line (or end of file) */ |
386 | break; | 415 | break; |
387 | } | 416 | } |
388 | case '[': { | 417 | case '[': { /* long string or simply '[' */ |
389 | int sep = skip_sep(ls); | 418 | int sep = skip_sep(ls); |
390 | if (sep >= 0) { | 419 | if (sep >= 0) { |
391 | read_long_string(ls, seminfo, sep); | 420 | read_long_string(ls, seminfo, sep); |
@@ -414,39 +443,30 @@ static int llex (LexState *ls, SemInfo *seminfo) { | |||
414 | if (ls->current != '=') return '~'; | 443 | if (ls->current != '=') return '~'; |
415 | else { next(ls); return TK_NE; } | 444 | else { next(ls); return TK_NE; } |
416 | } | 445 | } |
417 | case '"': | 446 | case '"': case '\'': { /* short literal strings */ |
418 | case '\'': { | ||
419 | read_string(ls, ls->current, seminfo); | 447 | read_string(ls, ls->current, seminfo); |
420 | return TK_STRING; | 448 | return TK_STRING; |
421 | } | 449 | } |
422 | case '.': { | 450 | case '.': { /* '.', '..', '...', or number */ |
423 | save_and_next(ls); | 451 | save_and_next(ls); |
424 | if (check_next(ls, ".")) { | 452 | if (check_next(ls, ".")) { |
425 | if (check_next(ls, ".")) | 453 | if (check_next(ls, ".")) |
426 | return TK_DOTS; /* ... */ | 454 | return TK_DOTS; /* '...' */ |
427 | else return TK_CONCAT; /* .. */ | 455 | else return TK_CONCAT; /* '..' */ |
428 | } | 456 | } |
429 | else if (!lisdigit(ls->current)) return '.'; | 457 | else if (!lisdigit(ls->current)) return '.'; |
430 | else { | 458 | /* else go through */ |
431 | read_numeral(ls, seminfo); | 459 | } |
432 | return TK_NUMBER; | 460 | case '0': case '1': case '2': case '3': case '4': |
433 | } | 461 | case '5': case '6': case '7': case '8': case '9': { |
462 | read_numeral(ls, seminfo); | ||
463 | return TK_NUMBER; | ||
434 | } | 464 | } |
435 | case EOZ: { | 465 | case EOZ: { |
436 | return TK_EOS; | 466 | return TK_EOS; |
437 | } | 467 | } |
438 | default: { | 468 | default: { |
439 | if (lisspace(ls->current)) { | 469 | if (lislalpha(ls->current)) { /* identifier or reserved word? */ |
440 | lua_assert(!currIsNewline(ls)); | ||
441 | next(ls); | ||
442 | break; | ||
443 | } | ||
444 | else if (lisdigit(ls->current)) { | ||
445 | read_numeral(ls, seminfo); | ||
446 | return TK_NUMBER; | ||
447 | } | ||
448 | else if (lislalpha(ls->current)) { | ||
449 | /* identifier or reserved word */ | ||
450 | TString *ts; | 470 | TString *ts; |
451 | do { | 471 | do { |
452 | save_and_next(ls); | 472 | save_and_next(ls); |
@@ -460,10 +480,10 @@ static int llex (LexState *ls, SemInfo *seminfo) { | |||
460 | return TK_NAME; | 480 | return TK_NAME; |
461 | } | 481 | } |
462 | } | 482 | } |
463 | else { | 483 | else { /* single-char tokens (+ - / ...) */ |
464 | int c = ls->current; | 484 | int c = ls->current; |
465 | next(ls); | 485 | next(ls); |
466 | return c; /* single-char tokens (+ - / ...) */ | 486 | return c; |
467 | } | 487 | } |
468 | } | 488 | } |
469 | } | 489 | } |