aboutsummaryrefslogtreecommitdiff
path: root/llex.c
diff options
context:
space:
mode:
authorRoberto Ierusalimschy <roberto@inf.puc-rio.br>2003-08-28 11:38:46 -0300
committerRoberto Ierusalimschy <roberto@inf.puc-rio.br>2003-08-28 11:38:46 -0300
commitb27664e0db93f513fe43fa582a9950b8ad701905 (patch)
treeb08d94d00d284638528b6ae13212e37ba28ed930 /llex.c
parentbeb896b08223d7ae11868ee1ea7a4858626b5ee7 (diff)
downloadlua-b27664e0db93f513fe43fa582a9950b8ad701905.tar.gz
lua-b27664e0db93f513fe43fa582a9950b8ad701905.tar.bz2
lua-b27664e0db93f513fe43fa582a9950b8ad701905.zip
simpler manipulation of buffer count in scanner
Diffstat (limited to 'llex.c')
-rw-r--r--llex.c349
1 files changed, 163 insertions, 186 deletions
diff --git a/llex.c b/llex.c
index e7b20a46..a6f88f2d 100644
--- a/llex.c
+++ b/llex.c
@@ -1,5 +1,5 @@
1/* 1/*
2** $Id: llex.c,v 1.121 2003/08/21 14:16:43 roberto Exp roberto $ 2** $Id: llex.c,v 1.122 2003/08/27 21:01:44 roberto Exp roberto $
3** Lexical Analyzer 3** Lexical Analyzer
4** See Copyright Notice in lua.h 4** See Copyright Notice in lua.h
5*/ 5*/
@@ -22,10 +22,12 @@
22 22
23 23
24 24
25#define next(LS) (LS->current = zgetc(LS->z)) 25#define next(ls) (ls->current = zgetc(ls->z))
26 26
27#define save(ls,c) luaZ_save(ls->L,ls->buff,c)
27 28
28#define nextIsNewline(LS) (LS->current == '\n' || LS->current == '\r') 29
30#define currIsNewline(ls) (ls->current == '\n' || ls->current == '\r')
29 31
30 32
31/* ORDER RESERVED */ 33/* ORDER RESERVED */
@@ -83,6 +85,7 @@ void luaX_syntaxerror (LexState *ls, const char *msg) {
83 break; 85 break;
84 case TK_STRING: 86 case TK_STRING:
85 case TK_NUMBER: 87 case TK_NUMBER:
88 save(ls, '\0');
86 lasttoken = luaZ_buffer(ls->buff); 89 lasttoken = luaZ_buffer(ls->buff);
87 break; 90 break;
88 default: 91 default:
@@ -106,41 +109,43 @@ const char *luaX_token2str (LexState *ls, int token) {
106static void luaX_lexerror (LexState *ls, const char *s, int token) { 109static void luaX_lexerror (LexState *ls, const char *s, int token) {
107 if (token == TK_EOS) 110 if (token == TK_EOS)
108 luaX_error(ls, s, luaX_token2str(ls, token)); 111 luaX_error(ls, s, luaX_token2str(ls, token));
109 else 112 else {
113 save(ls, '\0');
110 luaX_error(ls, s, luaZ_buffer(ls->buff)); 114 luaX_error(ls, s, luaZ_buffer(ls->buff));
115 }
111} 116}
112 117
113 118
114TString *luaX_newstring (LexState *LS, const char *str, size_t l) { 119TString *luaX_newstring (LexState *ls, const char *str, size_t l) {
115 lua_State *L = LS->L; 120 lua_State *L = ls->L;
116 TString *ts = luaS_newlstr(L, str, l); 121 TString *ts = luaS_newlstr(L, str, l);
117 TObject *o = luaH_setstr(L, LS->fs->h, ts); /* entry for `str' */ 122 TObject *o = luaH_setstr(L, ls->fs->h, ts); /* entry for `str' */
118 if (ttisnil(o)) 123 if (ttisnil(o))
119 setbvalue(o, 1); /* make sure `str' will not be collected */ 124 setbvalue(o, 1); /* make sure `str' will not be collected */
120 return ts; 125 return ts;
121} 126}
122 127
123 128
124static void inclinenumber (LexState *LS) { 129static void inclinenumber (LexState *ls) {
125 int old = LS->current; 130 int old = ls->current;
126 lua_assert(nextIsNewline(LS)); 131 lua_assert(currIsNewline(ls));
127 next(LS); /* skip `\n' or `\r' */ 132 next(ls); /* skip `\n' or `\r' */
128 if (nextIsNewline(LS) && LS->current != old) 133 if (currIsNewline(ls) && ls->current != old)
129 next(LS); /* skip `\n\r' or `\r\n' */ 134 next(ls); /* skip `\n\r' or `\r\n' */
130 ++LS->linenumber; 135 ++ls->linenumber;
131 luaX_checklimit(LS, LS->linenumber, MAX_INT, "lines in a chunk"); 136 luaX_checklimit(ls, ls->linenumber, MAX_INT, "lines in a chunk");
132} 137}
133 138
134 139
135void luaX_setinput (lua_State *L, LexState *LS, ZIO *z, TString *source) { 140void luaX_setinput (lua_State *L, LexState *ls, ZIO *z, TString *source) {
136 LS->L = L; 141 ls->L = L;
137 LS->lookahead.token = TK_EOS; /* no look-ahead token */ 142 ls->lookahead.token = TK_EOS; /* no look-ahead token */
138 LS->z = z; 143 ls->z = z;
139 LS->fs = NULL; 144 ls->fs = NULL;
140 LS->linenumber = 1; 145 ls->linenumber = 1;
141 LS->lastline = 1; 146 ls->lastline = 1;
142 LS->source = source; 147 ls->source = source;
143 next(LS); /* read first char */ 148 next(ls); /* read first char */
144} 149}
145 150
146 151
@@ -152,246 +157,214 @@ void luaX_setinput (lua_State *L, LexState *LS, ZIO *z, TString *source) {
152*/ 157*/
153 158
154 159
155/* use buffer to store names, literal strings and numbers */
156
157/* extra space to allocate when growing buffer */
158#define EXTRABUFF 32
159
160/* maximum number of chars that can be read without checking buffer size */
161#define MAXNOCHECK 5
162
163#define checkbuffer(LS, len) \
164 if (((len)+MAXNOCHECK)*sizeof(char) > luaZ_sizebuffer((LS)->buff)) \
165 luaZ_openspace((LS)->L, (LS)->buff, (len)+EXTRABUFF)
166
167#define save(LS, c, l) \
168 (luaZ_buffer((LS)->buff)[l++] = cast(char, c))
169#define save_and_next(LS, l) (save(LS, LS->current, l), next(LS))
170 160
171 161static void save_and_next (LexState *ls) {
172static size_t readname (LexState *LS) { 162 save(ls, ls->current);
173 size_t l = 0; 163 next(ls);
174 checkbuffer(LS, l);
175 do {
176 checkbuffer(LS, l);
177 save_and_next(LS, l);
178 } while (isalnum(LS->current) || LS->current == '_');
179 save(LS, '\0', l);
180 return l-1;
181} 164}
182 165
183 166
167
184/* LUA_NUMBER */ 168/* LUA_NUMBER */
185static void read_numeral (LexState *LS, int comma, SemInfo *seminfo) { 169static void read_numeral (LexState *ls, SemInfo *seminfo) {
186 size_t l = 0; 170 while (isdigit(ls->current)) {
187 checkbuffer(LS, l); 171 save_and_next(ls);
188 if (comma) save(LS, '.', l);
189 while (isdigit(LS->current)) {
190 checkbuffer(LS, l);
191 save_and_next(LS, l);
192 } 172 }
193 if (LS->current == '.') { 173 if (ls->current == '.') {
194 save_and_next(LS, l); 174 save_and_next(ls);
195 if (LS->current == '.') { 175 if (ls->current == '.') {
196 save_and_next(LS, l); 176 save_and_next(ls);
197 save(LS, '\0', l); 177 luaX_lexerror(ls,
198 luaX_lexerror(LS,
199 "ambiguous syntax (decimal point x string concatenation)", 178 "ambiguous syntax (decimal point x string concatenation)",
200 TK_NUMBER); 179 TK_NUMBER);
201 } 180 }
202 } 181 }
203 while (isdigit(LS->current)) { 182 while (isdigit(ls->current)) {
204 checkbuffer(LS, l); 183 save_and_next(ls);
205 save_and_next(LS, l);
206 } 184 }
207 if (LS->current == 'e' || LS->current == 'E') { 185 if (ls->current == 'e' || ls->current == 'E') {
208 save_and_next(LS, l); /* read `E' */ 186 save_and_next(ls); /* read `E' */
209 if (LS->current == '+' || LS->current == '-') 187 if (ls->current == '+' || ls->current == '-')
210 save_and_next(LS, l); /* optional exponent sign */ 188 save_and_next(ls); /* optional exponent sign */
211 while (isdigit(LS->current)) { 189 while (isdigit(ls->current)) {
212 checkbuffer(LS, l); 190 save_and_next(ls);
213 save_and_next(LS, l);
214 } 191 }
215 } 192 }
216 save(LS, '\0', l); 193 save(ls, '\0');
217 if (!luaO_str2d(luaZ_buffer(LS->buff), &seminfo->r)) 194 if (!luaO_str2d(luaZ_buffer(ls->buff), &seminfo->r))
218 luaX_lexerror(LS, "malformed number", TK_NUMBER); 195 luaX_lexerror(ls, "malformed number", TK_NUMBER);
219} 196}
220 197
221 198
222static void read_long_string (LexState *LS, SemInfo *seminfo) { 199static void read_long_string (LexState *ls, SemInfo *seminfo) {
223 int cont = 0; 200 int cont = 0;
224 size_t l = 0; 201 save_and_next(ls); /* pass the second `[' */
225 checkbuffer(LS, l); 202 if (currIsNewline(ls)) /* string starts with a newline? */
226 save(LS, '[', l); /* save first `[' */ 203 inclinenumber(ls); /* skip it */
227 save_and_next(LS, l); /* pass the second `[' */
228 if (nextIsNewline(LS)) /* string starts with a newline? */
229 inclinenumber(LS); /* skip it */
230 for (;;) { 204 for (;;) {
231 checkbuffer(LS, l); 205 switch (ls->current) {
232 switch (LS->current) {
233 case EOZ: 206 case EOZ:
234 save(LS, '\0', l); 207 luaX_lexerror(ls, (seminfo) ? "unfinished long string" :
235 luaX_lexerror(LS, (seminfo) ? "unfinished long string" :
236 "unfinished long comment", TK_EOS); 208 "unfinished long comment", TK_EOS);
237 break; /* to avoid warnings */ 209 break; /* to avoid warnings */
238 case '[': 210 case '[':
239 save_and_next(LS, l); 211 save_and_next(ls);
240 if (LS->current == '[') { 212 if (ls->current == '[') {
241 cont++; 213 cont++;
242 save_and_next(LS, l); 214 save_and_next(ls);
243 } 215 }
244 continue; 216 continue;
245 case ']': 217 case ']':
246 save_and_next(LS, l); 218 save_and_next(ls);
247 if (LS->current == ']') { 219 if (ls->current == ']') {
248 if (cont == 0) goto endloop; 220 if (cont == 0) goto endloop;
249 cont--; 221 cont--;
250 save_and_next(LS, l); 222 save_and_next(ls);
251 } 223 }
252 continue; 224 continue;
253 case '\n': 225 case '\n':
254 case '\r': 226 case '\r':
255 save(LS, '\n', l); 227 save(ls, '\n');
256 inclinenumber(LS); 228 inclinenumber(ls);
257 if (!seminfo) l = 0; /* reset buffer to avoid wasting space */ 229 if (!seminfo) luaZ_resetbuffer(ls->buff); /* avoid wasting space */
258 continue; 230 continue;
259 default: 231 default:
260 save_and_next(LS, l); 232 if (seminfo) save_and_next(ls);
233 else next(ls);
261 } 234 }
262 } endloop: 235 } endloop:
263 save_and_next(LS, l); /* skip the second `]' */ 236 save_and_next(ls); /* skip the second `]' */
264 save(LS, '\0', l);
265 if (seminfo) 237 if (seminfo)
266 seminfo->ts = luaX_newstring(LS, luaZ_buffer(LS->buff) + 2, l - 5); 238 seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + 2,
239 luaZ_bufflen(ls->buff) - 4);
267} 240}
268 241
269 242
270static void read_string (LexState *LS, int del, SemInfo *seminfo) { 243static void read_string (LexState *ls, int del, SemInfo *seminfo) {
271 size_t l = 0; 244 save_and_next(ls);
272 checkbuffer(LS, l); 245 while (ls->current != del) {
273 save_and_next(LS, l); 246 switch (ls->current) {
274 while (LS->current != del) {
275 checkbuffer(LS, l);
276 switch (LS->current) {
277 case EOZ: 247 case EOZ:
278 save(LS, '\0', l); 248 luaX_lexerror(ls, "unfinished string", TK_EOS);
279 luaX_lexerror(LS, "unfinished string", TK_EOS); 249 continue; /* to avoid warnings */
280 break; /* to avoid warnings */
281 case '\n': 250 case '\n':
282 case '\r': 251 case '\r':
283 save(LS, '\0', l); 252 luaX_lexerror(ls, "unfinished string", TK_STRING);
284 luaX_lexerror(LS, "unfinished string", TK_STRING); 253 continue; /* to avoid warnings */
285 break; /* to avoid warnings */ 254 case '\\': {
286 case '\\': 255 int c;
287 next(LS); /* do not save the `\' */ 256 next(ls); /* do not save the `\' */
288 switch (LS->current) { 257 switch (ls->current) {
289 case 'a': save(LS, '\a', l); next(LS); break; 258 case 'a': c = '\a'; break;
290 case 'b': save(LS, '\b', l); next(LS); break; 259 case 'b': c = '\b'; break;
291 case 'f': save(LS, '\f', l); next(LS); break; 260 case 'f': c = '\f'; break;
292 case 'n': save(LS, '\n', l); next(LS); break; 261 case 'n': c = '\n'; break;
293 case 'r': save(LS, '\r', l); next(LS); break; 262 case 'r': c = '\r'; break;
294 case 't': save(LS, '\t', l); next(LS); break; 263 case 't': c = '\t'; break;
295 case 'v': save(LS, '\v', l); next(LS); break; 264 case 'v': c = '\v'; break;
296 case '\n': /* go through */ 265 case '\n': /* go through */
297 case '\r': save(LS, '\n', l); inclinenumber(LS); break; 266 case '\r': save(ls, '\n'); inclinenumber(ls); continue;
298 case EOZ: break; /* will raise an error next loop */ 267 case EOZ: continue; /* will raise an error next loop */
299 default: { 268 default: {
300 if (!isdigit(LS->current)) 269 if (!isdigit(ls->current))
301 save_and_next(LS, l); /* handles \\, \", \', and \? */ 270 save_and_next(ls); /* handles \\, \", \', and \? */
302 else { /* \xxx */ 271 else { /* \xxx */
303 int c = 0;
304 int i = 0; 272 int i = 0;
273 c = 0;
305 do { 274 do {
306 c = 10*c + (LS->current-'0'); 275 c = 10*c + (ls->current-'0');
307 next(LS); 276 next(ls);
308 } while (++i<3 && isdigit(LS->current)); 277 } while (++i<3 && isdigit(ls->current));
309 if (c > UCHAR_MAX) { 278 if (c > UCHAR_MAX)
310 save(LS, '\0', l); 279 luaX_lexerror(ls, "escape sequence too large", TK_STRING);
311 luaX_lexerror(LS, "escape sequence too large", TK_STRING); 280 save(ls, c);
312 }
313 save(LS, c, l);
314 } 281 }
282 continue;
315 } 283 }
316 } 284 }
317 break; 285 save(ls, c);
286 next(ls);
287 continue;
288 }
318 default: 289 default:
319 save_and_next(LS, l); 290 save_and_next(ls);
320 } 291 }
321 } 292 }
322 save_and_next(LS, l); /* skip delimiter */ 293 save_and_next(ls); /* skip delimiter */
323 save(LS, '\0', l); 294 seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + 1,
324 seminfo->ts = luaX_newstring(LS, luaZ_buffer(LS->buff) + 1, l - 3); 295 luaZ_bufflen(ls->buff) - 2);
325} 296}
326 297
327 298
328int luaX_lex (LexState *LS, SemInfo *seminfo) { 299int luaX_lex (LexState *ls, SemInfo *seminfo) {
300 luaZ_resetbuffer(ls->buff);
329 for (;;) { 301 for (;;) {
330 switch (LS->current) { 302 switch (ls->current) {
331
332 case '\n': 303 case '\n':
333 case '\r': { 304 case '\r': {
334 inclinenumber(LS); 305 inclinenumber(ls);
335 continue; 306 continue;
336 } 307 }
337 case '-': { 308 case '-': {
338 next(LS); 309 next(ls);
339 if (LS->current != '-') return '-'; 310 if (ls->current != '-') return '-';
340 /* else is a comment */ 311 /* else is a comment */
341 next(LS); 312 next(ls);
342 if (LS->current == '[' && (next(LS), LS->current == '[')) 313 if (ls->current == '[' && (next(ls), ls->current == '[')) {
343 read_long_string(LS, NULL); /* long comment */ 314 read_long_string(ls, NULL); /* long comment */
315 luaZ_resetbuffer(ls->buff);
316 }
344 else /* short comment */ 317 else /* short comment */
345 while (!nextIsNewline(LS) && LS->current != EOZ) 318 while (!currIsNewline(ls) && ls->current != EOZ)
346 next(LS); 319 next(ls);
347 continue; 320 continue;
348 } 321 }
349 case '[': { 322 case '[': {
350 next(LS); 323 save_and_next(ls);
351 if (LS->current != '[') return '['; 324 if (ls->current != '[') return '[';
352 else { 325 else {
353 read_long_string(LS, seminfo); 326 read_long_string(ls, seminfo);
354 return TK_STRING; 327 return TK_STRING;
355 } 328 }
356 } 329 }
357 case '=': { 330 case '=': {
358 next(LS); 331 next(ls);
359 if (LS->current != '=') return '='; 332 if (ls->current != '=') return '=';
360 else { next(LS); return TK_EQ; } 333 else { next(ls); return TK_EQ; }
361 } 334 }
362 case '<': { 335 case '<': {
363 next(LS); 336 next(ls);
364 if (LS->current != '=') return '<'; 337 if (ls->current != '=') return '<';
365 else { next(LS); return TK_LE; } 338 else { next(ls); return TK_LE; }
366 } 339 }
367 case '>': { 340 case '>': {
368 next(LS); 341 next(ls);
369 if (LS->current != '=') return '>'; 342 if (ls->current != '=') return '>';
370 else { next(LS); return TK_GE; } 343 else { next(ls); return TK_GE; }
371 } 344 }
372 case '~': { 345 case '~': {
373 next(LS); 346 next(ls);
374 if (LS->current != '=') return '~'; 347 if (ls->current != '=') return '~';
375 else { next(LS); return TK_NE; } 348 else { next(ls); return TK_NE; }
376 } 349 }
377 case '"': 350 case '"':
378 case '\'': { 351 case '\'': {
379 read_string(LS, LS->current, seminfo); 352 read_string(ls, ls->current, seminfo);
380 return TK_STRING; 353 return TK_STRING;
381 } 354 }
382 case '.': { 355 case '.': {
383 next(LS); 356 save_and_next(ls);
384 if (LS->current == '.') { 357 if (ls->current == '.') {
385 next(LS); 358 next(ls);
386 if (LS->current == '.') { 359 if (ls->current == '.') {
387 next(LS); 360 next(ls);
388 return TK_DOTS; /* ... */ 361 return TK_DOTS; /* ... */
389 } 362 }
390 else return TK_CONCAT; /* .. */ 363 else return TK_CONCAT; /* .. */
391 } 364 }
392 else if (!isdigit(LS->current)) return '.'; 365 else if (!isdigit(ls->current)) return '.';
393 else { 366 else {
394 read_numeral(LS, 1, seminfo); 367 read_numeral(ls, seminfo);
395 return TK_NUMBER; 368 return TK_NUMBER;
396 } 369 }
397 } 370 }
@@ -399,30 +372,34 @@ int luaX_lex (LexState *LS, SemInfo *seminfo) {
399 return TK_EOS; 372 return TK_EOS;
400 } 373 }
401 default: { 374 default: {
402 if (isspace(LS->current)) { 375 if (isspace(ls->current)) {
403 lua_assert(!nextIsNewline(LS)); 376 lua_assert(!currIsNewline(ls));
404 next(LS); 377 next(ls);
405 continue; 378 continue;
406 } 379 }
407 else if (isdigit(LS->current)) { 380 else if (isdigit(ls->current)) {
408 read_numeral(LS, 0, seminfo); 381 read_numeral(ls, seminfo);
409 return TK_NUMBER; 382 return TK_NUMBER;
410 } 383 }
411 else if (isalpha(LS->current) || LS->current == '_') { 384 else if (isalpha(ls->current) || ls->current == '_') {
412 /* identifier or reserved word */ 385 /* identifier or reserved word */
413 size_t l = readname(LS); 386 TString *ts;
414 TString *ts = luaX_newstring(LS, luaZ_buffer(LS->buff), l); 387 do {
388 save_and_next(ls);
389 } while (isalnum(ls->current) || ls->current == '_');
390 ts = luaX_newstring(ls, luaZ_buffer(ls->buff),
391 luaZ_bufflen(ls->buff));
415 if (ts->tsv.reserved > 0) /* reserved word? */ 392 if (ts->tsv.reserved > 0) /* reserved word? */
416 return ts->tsv.reserved - 1 + FIRST_RESERVED; 393 return ts->tsv.reserved - 1 + FIRST_RESERVED;
417 seminfo->ts = ts; 394 seminfo->ts = ts;
418 return TK_NAME; 395 return TK_NAME;
419 } 396 }
420 else { 397 else {
421 int c = LS->current; 398 int c = ls->current;
422 if (iscntrl(c)) 399 if (iscntrl(c))
423 luaX_error(LS, "invalid control char", 400 luaX_error(ls, "invalid control char",
424 luaO_pushfstring(LS->L, "char(%d)", c)); 401 luaO_pushfstring(ls->L, "char(%d)", c));
425 next(LS); 402 next(ls);
426 return c; /* single-char tokens (+ - / ...) */ 403 return c; /* single-char tokens (+ - / ...) */
427 } 404 }
428 } 405 }