aboutsummaryrefslogtreecommitdiff
path: root/llex.c
diff options
context:
space:
mode:
authorRoberto Ierusalimschy <roberto@inf.puc-rio.br>2001-02-23 14:17:25 -0300
committerRoberto Ierusalimschy <roberto@inf.puc-rio.br>2001-02-23 14:17:25 -0300
commit39b79783297bee79db9853b63d199e120a009a8f (patch)
treec738c621c4c28d8822c2f785400786301985273b /llex.c
parentd164e2294f73d8e69f00d95a66014514b2dd0ec0 (diff)
downloadlua-39b79783297bee79db9853b63d199e120a009a8f.tar.gz
lua-39b79783297bee79db9853b63d199e120a009a8f.tar.bz2
lua-39b79783297bee79db9853b63d199e120a009a8f.zip
first (big) step to support wide chars
Diffstat (limited to 'llex.c')
-rw-r--r--llex.c230
1 files changed, 118 insertions, 112 deletions
diff --git a/llex.c b/llex.c
index 386ea316..0bbc43be 100644
--- a/llex.c
+++ b/llex.c
@@ -1,5 +1,5 @@
1/* 1/*
2** $Id: llex.c,v 1.78 2001/02/22 17:15:18 roberto Exp roberto $ 2** $Id: llex.c,v 1.79 2001/02/22 18:59:59 roberto Exp roberto $
3** Lexical Analyzer 3** Lexical Analyzer
4** See Copyright Notice in lua.h 4** See Copyright Notice in lua.h
5*/ 5*/
@@ -25,10 +25,13 @@
25 25
26 26
27/* ORDER RESERVED */ 27/* ORDER RESERVED */
28static const char *const token2string [] = { 28static const l_char *const token2string [] = {
29 "and", "break", "do", "else", "elseif", "end", "for", 29 l_s("and"), l_s("break"), l_s("do"), l_s("else"), l_s("elseif"),
30 "function", "if", "local", "nil", "not", "or", "repeat", "return", "then", 30 l_s("end"), l_s("for"), l_s("function"), l_s("if"), l_s("local"),
31 "until", "while", "", "..", "...", "==", ">=", "<=", "~=", "", "", "<eof>"}; 31 l_s("nil"), l_s("not"), l_s("or"), l_s("repeat"), l_s("return"),
32 l_s("then"), l_s("until"), l_s("while"), l_s(""), l_s(".."), l_s("..."),
33 l_s("=="), l_s(">="), l_s("<="), l_s("~="), l_s(""), l_s(""), l_s("<eof>")
34};
32 35
33 36
34void luaX_init (lua_State *L) { 37void luaX_init (lua_State *L) {
@@ -44,37 +47,38 @@ void luaX_init (lua_State *L) {
44#define MAXSRC 80 47#define MAXSRC 80
45 48
46 49
47void luaX_checklimit (LexState *ls, int val, int limit, const char *msg) { 50void luaX_checklimit (LexState *ls, int val, int limit, const l_char *msg) {
48 if (val > limit) { 51 if (val > limit) {
49 char buff[90]; 52 l_char buff[90];
50 sprintf(buff, "too many %.40s (limit=%d)", msg, limit); 53 sprintf(buff, l_s("too many %.40s (limit=%d)"), msg, limit);
51 luaX_error(ls, buff, ls->t.token); 54 luaX_error(ls, buff, ls->t.token);
52 } 55 }
53} 56}
54 57
55 58
56void luaX_syntaxerror (LexState *ls, const char *s, const char *token) { 59void luaX_syntaxerror (LexState *ls, const l_char *s, const l_char *token) {
57 char buff[MAXSRC]; 60 l_char buff[MAXSRC];
58 luaO_chunkid(buff, getstr(ls->source), sizeof(buff)); 61 luaO_chunkid(buff, getstr(ls->source), sizeof(buff));
59 luaO_verror(ls->L, "%.99s;\n last token read: `%.30s' at line %d in %.80s", 62 luaO_verror(ls->L,
63 l_s("%.99s;\n last token read: `%.30s' at line %d in %.80s"),
60 s, token, ls->linenumber, buff); 64 s, token, ls->linenumber, buff);
61} 65}
62 66
63 67
64void luaX_error (LexState *ls, const char *s, int token) { 68void luaX_error (LexState *ls, const l_char *s, int token) {
65 char buff[TOKEN_LEN]; 69 l_char buff[TOKEN_LEN];
66 luaX_token2str(token, buff); 70 luaX_token2str(token, buff);
67 if (buff[0] == '\0') 71 if (buff[0] == l_c('\0'))
68 luaX_syntaxerror(ls, s, G(ls->L)->Mbuffer); 72 luaX_syntaxerror(ls, s, G(ls->L)->Mbuffer);
69 else 73 else
70 luaX_syntaxerror(ls, s, buff); 74 luaX_syntaxerror(ls, s, buff);
71} 75}
72 76
73 77
74void luaX_token2str (int token, char *s) { 78void luaX_token2str (int token, l_char *s) {
75 if (token < 256) { 79 if (token < 256) {
76 s[0] = (char)token; 80 s[0] = (l_char)token;
77 s[1] = '\0'; 81 s[1] = l_c('\0');
78 } 82 }
79 else 83 else
80 strcpy(s, token2string[token-FIRST_RESERVED]); 84 strcpy(s, token2string[token-FIRST_RESERVED]);
@@ -82,16 +86,16 @@ void luaX_token2str (int token, char *s) {
82 86
83 87
84static void luaX_invalidchar (LexState *ls, int c) { 88static void luaX_invalidchar (LexState *ls, int c) {
85 char buff[8]; 89 l_char buff[8];
86 sprintf(buff, "0x%02X", c); 90 sprintf(buff, l_s("0x%02X"), c);
87 luaX_syntaxerror(ls, "invalid control char", buff); 91 luaX_syntaxerror(ls, l_s("invalid control l_char"), buff);
88} 92}
89 93
90 94
91static void inclinenumber (LexState *LS) { 95static void inclinenumber (LexState *LS) {
92 next(LS); /* skip `\n' */ 96 next(LS); /* skip `\n' */
93 ++LS->linenumber; 97 ++LS->linenumber;
94 luaX_checklimit(LS, LS->linenumber, MAX_INT, "lines in a chunk"); 98 luaX_checklimit(LS, LS->linenumber, MAX_INT, l_s("lines in a chunk"));
95} 99}
96 100
97 101
@@ -104,10 +108,10 @@ void luaX_setinput (lua_State *L, LexState *LS, ZIO *z, TString *source) {
104 LS->lastline = 1; 108 LS->lastline = 1;
105 LS->source = source; 109 LS->source = source;
106 next(LS); /* read first char */ 110 next(LS); /* read first char */
107 if (LS->current == '#') { 111 if (LS->current == l_c('#')) {
108 do { /* skip first line */ 112 do { /* skip first line */
109 next(LS); 113 next(LS);
110 } while (LS->current != '\n' && LS->current != EOZ); 114 } while (LS->current != l_c('\n') && LS->current != EOZ);
111 } 115 }
112} 116}
113 117
@@ -126,7 +130,7 @@ void luaX_setinput (lua_State *L, LexState *LS, ZIO *z, TString *source) {
126#define checkbuffer(L, n, len) if ((len)+(n) > G(L)->Mbuffsize) \ 130#define checkbuffer(L, n, len) if ((len)+(n) > G(L)->Mbuffsize) \
127 luaO_openspace(L, (len)+(n)+EXTRABUFF) 131 luaO_openspace(L, (len)+(n)+EXTRABUFF)
128 132
129#define save(L, c, l) (G(L)->Mbuffer[l++] = (char)c) 133#define save(L, c, l) (G(L)->Mbuffer[l++] = (l_char)c)
130#define save_and_next(L, LS, l) (save(L, LS->current, l), next(LS)) 134#define save_and_next(L, LS, l) (save(L, LS->current, l), next(LS))
131 135
132 136
@@ -137,8 +141,8 @@ static size_t readname (LexState *LS) {
137 do { 141 do {
138 checkbuffer(L, 10, l); 142 checkbuffer(L, 10, l);
139 save_and_next(L, LS, l); 143 save_and_next(L, LS, l);
140 } while (isalnum(LS->current) || LS->current == '_'); 144 } while (isalnum(LS->current) || LS->current == l_c('_'));
141 save(L, '\0', l); 145 save(L, l_c('\0'), l);
142 return l-1; 146 return l-1;
143} 147}
144 148
@@ -148,36 +152,37 @@ static void read_number (LexState *LS, int comma, SemInfo *seminfo) {
148 lua_State *L = LS->L; 152 lua_State *L = LS->L;
149 size_t l = 0; 153 size_t l = 0;
150 checkbuffer(L, 10, l); 154 checkbuffer(L, 10, l);
151 if (comma) save(L, '.', l); 155 if (comma) save(L, l_c('.'), l);
152 while (isdigit(LS->current)) { 156 while (isdigit(LS->current)) {
153 checkbuffer(L, 10, l); 157 checkbuffer(L, 10, l);
154 save_and_next(L, LS, l); 158 save_and_next(L, LS, l);
155 } 159 }
156 if (LS->current == '.') { 160 if (LS->current == l_c('.')) {
157 save_and_next(L, LS, l); 161 save_and_next(L, LS, l);
158 if (LS->current == '.') { 162 if (LS->current == l_c('.')) {
159 save_and_next(L, LS, l); 163 save_and_next(L, LS, l);
160 save(L, '\0', l); 164 save(L, l_c('\0'), l);
161 luaX_error(LS, "ambiguous syntax" 165 luaX_error(LS,
162 " (decimal point x string concatenation)", TK_NUMBER); 166 l_s("ambiguous syntax (decimal point x string concatenation)"),
167 TK_NUMBER);
163 } 168 }
164 } 169 }
165 while (isdigit(LS->current)) { 170 while (isdigit(LS->current)) {
166 checkbuffer(L, 10, l); 171 checkbuffer(L, 10, l);
167 save_and_next(L, LS, l); 172 save_and_next(L, LS, l);
168 } 173 }
169 if (LS->current == 'e' || LS->current == 'E') { 174 if (LS->current == l_c('e') || LS->current == l_c('E')) {
170 save_and_next(L, LS, l); /* read `E' */ 175 save_and_next(L, LS, l); /* read `E' */
171 if (LS->current == '+' || LS->current == '-') 176 if (LS->current == l_c('+') || LS->current == l_c('-'))
172 save_and_next(L, LS, l); /* optional exponent sign */ 177 save_and_next(L, LS, l); /* optional exponent sign */
173 while (isdigit(LS->current)) { 178 while (isdigit(LS->current)) {
174 checkbuffer(L, 10, l); 179 checkbuffer(L, 10, l);
175 save_and_next(L, LS, l); 180 save_and_next(L, LS, l);
176 } 181 }
177 } 182 }
178 save(L, '\0', l); 183 save(L, l_c('\0'), l);
179 if (!luaO_str2d(G(L)->Mbuffer, &seminfo->r)) 184 if (!luaO_str2d(G(L)->Mbuffer, &seminfo->r))
180 luaX_error(LS, "malformed number", TK_NUMBER); 185 luaX_error(LS, l_s("malformed number"), TK_NUMBER);
181} 186}
182 187
183 188
@@ -186,32 +191,32 @@ static void read_long_string (LexState *LS, SemInfo *seminfo) {
186 int cont = 0; 191 int cont = 0;
187 size_t l = 0; 192 size_t l = 0;
188 checkbuffer(L, 10, l); 193 checkbuffer(L, 10, l);
189 save(L, '[', l); /* save first `[' */ 194 save(L, l_c('['), l); /* save first `[' */
190 save_and_next(L, LS, l); /* pass the second `[' */ 195 save_and_next(L, LS, l); /* pass the second `[' */
191 for (;;) { 196 for (;;) {
192 checkbuffer(L, 10, l); 197 checkbuffer(L, 10, l);
193 switch (LS->current) { 198 switch (LS->current) {
194 case EOZ: 199 case EOZ:
195 save(L, '\0', l); 200 save(L, l_c('\0'), l);
196 luaX_error(LS, "unfinished long string", TK_STRING); 201 luaX_error(LS, l_s("unfinished long string"), TK_STRING);
197 break; /* to avoid warnings */ 202 break; /* to avoid warnings */
198 case '[': 203 case l_c('['):
199 save_and_next(L, LS, l); 204 save_and_next(L, LS, l);
200 if (LS->current == '[') { 205 if (LS->current == l_c('[')) {
201 cont++; 206 cont++;
202 save_and_next(L, LS, l); 207 save_and_next(L, LS, l);
203 } 208 }
204 continue; 209 continue;
205 case ']': 210 case l_c(']'):
206 save_and_next(L, LS, l); 211 save_and_next(L, LS, l);
207 if (LS->current == ']') { 212 if (LS->current == l_c(']')) {
208 if (cont == 0) goto endloop; 213 if (cont == 0) goto endloop;
209 cont--; 214 cont--;
210 save_and_next(L, LS, l); 215 save_and_next(L, LS, l);
211 } 216 }
212 continue; 217 continue;
213 case '\n': 218 case l_c('\n'):
214 save(L, '\n', l); 219 save(L, l_c('\n'), l);
215 inclinenumber(LS); 220 inclinenumber(LS);
216 continue; 221 continue;
217 default: 222 default:
@@ -219,7 +224,7 @@ static void read_long_string (LexState *LS, SemInfo *seminfo) {
219 } 224 }
220 } endloop: 225 } endloop:
221 save_and_next(L, LS, l); /* skip the second `]' */ 226 save_and_next(L, LS, l); /* skip the second `]' */
222 save(L, '\0', l); 227 save(L, l_c('\0'), l);
223 seminfo->ts = luaS_newlstr(L, G(L)->Mbuffer+2, l-5); 228 seminfo->ts = luaS_newlstr(L, G(L)->Mbuffer+2, l-5);
224} 229}
225 230
@@ -232,38 +237,38 @@ static void read_string (LexState *LS, int del, SemInfo *seminfo) {
232 while (LS->current != del) { 237 while (LS->current != del) {
233 checkbuffer(L, 10, l); 238 checkbuffer(L, 10, l);
234 switch (LS->current) { 239 switch (LS->current) {
235 case EOZ: case '\n': 240 case EOZ: case l_c('\n'):
236 save(L, '\0', l); 241 save(L, l_c('\0'), l);
237 luaX_error(LS, "unfinished string", TK_STRING); 242 luaX_error(LS, l_s("unfinished string"), TK_STRING);
238 break; /* to avoid warnings */ 243 break; /* to avoid warnings */
239 case '\\': 244 case l_c('\\'):
240 next(LS); /* do not save the `\' */ 245 next(LS); /* do not save the `\' */
241 switch (LS->current) { 246 switch (LS->current) {
242 case 'a': save(L, '\a', l); next(LS); break; 247 case l_c('a'): save(L, l_c('\a'), l); next(LS); break;
243 case 'b': save(L, '\b', l); next(LS); break; 248 case l_c('b'): save(L, l_c('\b'), l); next(LS); break;
244 case 'f': save(L, '\f', l); next(LS); break; 249 case l_c('f'): save(L, l_c('\f'), l); next(LS); break;
245 case 'n': save(L, '\n', l); next(LS); break; 250 case l_c('n'): save(L, l_c('\n'), l); next(LS); break;
246 case 'r': save(L, '\r', l); next(LS); break; 251 case l_c('r'): save(L, l_c('\r'), l); next(LS); break;
247 case 't': save(L, '\t', l); next(LS); break; 252 case l_c('t'): save(L, l_c('\t'), l); next(LS); break;
248 case 'v': save(L, '\v', l); next(LS); break; 253 case l_c('v'): save(L, l_c('\v'), l); next(LS); break;
249 case '\n': save(L, '\n', l); inclinenumber(LS); break; 254 case l_c('\n'): save(L, l_c('\n'), l); inclinenumber(LS); break;
250 case '0': case '1': case '2': case '3': case '4': 255 default: {
251 case '5': case '6': case '7': case '8': case '9': { 256 if (!isdigit(LS->current))
252 int c = 0; 257 save_and_next(L, LS, l); /* handles \\, \", \', and \? */
253 int i = 0; 258 else { /* \xxx */
254 do { 259 int c = 0;
255 c = 10*c + (LS->current-'0'); 260 int i = 0;
256 next(LS); 261 do {
257 } while (++i<3 && isdigit(LS->current)); 262 c = 10*c + (LS->current-l_c('0'));
258 if (c > UCHAR_MAX) { 263 next(LS);
259 save(L, '\0', l); 264 } while (++i<3 && isdigit(LS->current));
260 luaX_error(LS, "escape sequence too large", TK_STRING); 265 if (c > UCHAR_MAX) {
266 save(L, l_c('\0'), l);
267 luaX_error(LS, l_s("escape sequence too large"), TK_STRING);
268 }
269 save(L, c, l);
261 } 270 }
262 save(L, c, l);
263 break;
264 } 271 }
265 default: /* handles \\, \", \', and \? */
266 save_and_next(L, LS, l);
267 } 272 }
268 break; 273 break;
269 default: 274 default:
@@ -271,7 +276,7 @@ static void read_string (LexState *LS, int del, SemInfo *seminfo) {
271 } 276 }
272 } 277 }
273 save_and_next(L, LS, l); /* skip delimiter */ 278 save_and_next(L, LS, l); /* skip delimiter */
274 save(L, '\0', l); 279 save(L, l_c('\0'), l);
275 seminfo->ts = luaS_newlstr(L, G(L)->Mbuffer+1, l-3); 280 seminfo->ts = luaS_newlstr(L, G(L)->Mbuffer+1, l-3);
276} 281}
277 282
@@ -280,92 +285,85 @@ int luaX_lex (LexState *LS, SemInfo *seminfo) {
280 for (;;) { 285 for (;;) {
281 switch (LS->current) { 286 switch (LS->current) {
282 287
283 case ' ': case '\t': case '\r': /* `\r' to avoid problems with DOS */ 288 case l_c(' '): case l_c('\t'): case l_c('\r'): /* `\r' to avoid problems with DOS */
284 next(LS); 289 next(LS);
285 continue; 290 continue;
286 291
287 case '\n': 292 case l_c('\n'):
288 inclinenumber(LS); 293 inclinenumber(LS);
289 continue; 294 continue;
290 295
291 case '$': 296 case l_c('$'):
292 luaX_error(LS, "unexpected `$' (pragmas are no longer supported)", '$'); 297 luaX_error(LS,
298 l_s("unexpected `$' (pragmas are no longer supported)"),
299 LS->current);
293 break; 300 break;
294 301
295 case '-': 302 case l_c('-'):
296 next(LS); 303 next(LS);
297 if (LS->current != '-') return '-'; 304 if (LS->current != l_c('-')) return l_c('-');
298 do { next(LS); } while (LS->current != '\n' && LS->current != EOZ); 305 do { next(LS); } while (LS->current != l_c('\n') && LS->current != EOZ);
299 continue; 306 continue;
300 307
301 case '[': 308 case l_c('['):
302 next(LS); 309 next(LS);
303 if (LS->current != '[') return '['; 310 if (LS->current != l_c('[')) return l_c('[');
304 else { 311 else {
305 read_long_string(LS, seminfo); 312 read_long_string(LS, seminfo);
306 return TK_STRING; 313 return TK_STRING;
307 } 314 }
308 315
309 case '=': 316 case l_c('='):
310 next(LS); 317 next(LS);
311 if (LS->current != '=') return '='; 318 if (LS->current != l_c('=')) return l_c('=');
312 else { next(LS); return TK_EQ; } 319 else { next(LS); return TK_EQ; }
313 320
314 case '<': 321 case l_c('<'):
315 next(LS); 322 next(LS);
316 if (LS->current != '=') return '<'; 323 if (LS->current != l_c('=')) return l_c('<');
317 else { next(LS); return TK_LE; } 324 else { next(LS); return TK_LE; }
318 325
319 case '>': 326 case l_c('>'):
320 next(LS); 327 next(LS);
321 if (LS->current != '=') return '>'; 328 if (LS->current != l_c('=')) return l_c('>');
322 else { next(LS); return TK_GE; } 329 else { next(LS); return TK_GE; }
323 330
324 case '~': 331 case l_c('~'):
325 next(LS); 332 next(LS);
326 if (LS->current != '=') return '~'; 333 if (LS->current != l_c('=')) return l_c('~');
327 else { next(LS); return TK_NE; } 334 else { next(LS); return TK_NE; }
328 335
329 case '"': 336 case l_c('"'):
330 case '\'': 337 case l_c('\''):
331 read_string(LS, LS->current, seminfo); 338 read_string(LS, LS->current, seminfo);
332 return TK_STRING; 339 return TK_STRING;
333 340
334 case '.': 341 case l_c('.'):
335 next(LS); 342 next(LS);
336 if (LS->current == '.') { 343 if (LS->current == l_c('.')) {
337 next(LS); 344 next(LS);
338 if (LS->current == '.') { 345 if (LS->current == l_c('.')) {
339 next(LS); 346 next(LS);
340 return TK_DOTS; /* ... */ 347 return TK_DOTS; /* ... */
341 } 348 }
342 else return TK_CONCAT; /* .. */ 349 else return TK_CONCAT; /* .. */
343 } 350 }
344 else if (!isdigit(LS->current)) return '.'; 351 else if (!isdigit(LS->current)) return l_c('.');
345 else { 352 else {
346 read_number(LS, 1, seminfo); 353 read_number(LS, 1, seminfo);
347 return TK_NUMBER; 354 return TK_NUMBER;
348 } 355 }
349 356
350 case '0': case '1': case '2': case '3': case '4':
351 case '5': case '6': case '7': case '8': case '9':
352 read_number(LS, 0, seminfo);
353 return TK_NUMBER;
354
355 case EOZ: 357 case EOZ:
356 return TK_EOS; 358 return TK_EOS;
357 359
358 case '_': goto tname; 360 default: {
359 361 if (isdigit(LS->current)) {
360 default: 362 read_number(LS, 0, seminfo);
361 if (!isalpha(LS->current)) { 363 return TK_NUMBER;
362 int c = LS->current;
363 if (iscntrl(c))
364 luaX_invalidchar(LS, c);
365 next(LS);
366 return c;
367 } 364 }
368 tname: { /* identifier or reserved word */ 365 else if (isalpha(LS->current) || LS->current == l_c('_')) {
366 /* identifier or reserved word */
369 size_t l = readname(LS); 367 size_t l = readname(LS);
370 TString *ts = luaS_newlstr(LS->L, G(LS->L)->Mbuffer, l); 368 TString *ts = luaS_newlstr(LS->L, G(LS->L)->Mbuffer, l);
371 if (ts->marked >= RESERVEDMARK) /* reserved word? */ 369 if (ts->marked >= RESERVEDMARK) /* reserved word? */
@@ -373,6 +371,14 @@ int luaX_lex (LexState *LS, SemInfo *seminfo) {
373 seminfo->ts = ts; 371 seminfo->ts = ts;
374 return TK_NAME; 372 return TK_NAME;
375 } 373 }
374 else {
375 int c = LS->current;
376 if (iscntrl(c))
377 luaX_invalidchar(LS, c);
378 next(LS);
379 return c; /* single-char tokens (+ - / ...) */
380 }
381 }
376 } 382 }
377 } 383 }
378} 384}