diff options
| author | Roberto Ierusalimschy <roberto@inf.puc-rio.br> | 2009-05-18 14:28:04 -0300 |
|---|---|---|
| committer | Roberto Ierusalimschy <roberto@inf.puc-rio.br> | 2009-05-18 14:28:04 -0300 |
| commit | 889284ebd01dbc923403b0aa38b82dc80ed86af8 (patch) | |
| tree | 739b2ee0b7b76da05843a0b646137b142e3e8470 | |
| parent | 6956331093e3a8d21c795b3bb125594b8ed6617b (diff) | |
| download | lua-889284ebd01dbc923403b0aa38b82dc80ed86af8.tar.gz lua-889284ebd01dbc923403b0aa38b82dc80ed86af8.tar.bz2 lua-889284ebd01dbc923403b0aa38b82dc80ed86af8.zip | |
hexadecimal escape sequences in strings + better error messages for
bad decimal escape sequences
| -rw-r--r-- | llex.c | 77 |
1 files changed, 55 insertions, 22 deletions
| @@ -1,5 +1,5 @@ | |||
| 1 | /* | 1 | /* |
| 2 | ** $Id: llex.c,v 2.31 2009/02/19 17:18:25 roberto Exp roberto $ | 2 | ** $Id: llex.c,v 2.32 2009/03/11 13:27:32 roberto Exp roberto $ |
| 3 | ** Lexical Analyzer | 3 | ** Lexical Analyzer |
| 4 | ** See Copyright Notice in lua.h | 4 | ** See Copyright Notice in lua.h |
| 5 | */ | 5 | */ |
| @@ -29,7 +29,6 @@ | |||
| 29 | 29 | ||
| 30 | 30 | ||
| 31 | 31 | ||
| 32 | |||
| 33 | #define currIsNewline(ls) (ls->current == '\n' || ls->current == '\r') | 32 | #define currIsNewline(ls) (ls->current == '\n' || ls->current == '\r') |
| 34 | 33 | ||
| 35 | 34 | ||
| @@ -52,14 +51,14 @@ static void lexerror (LexState *ls, const char *msg, int token); | |||
| 52 | 51 | ||
| 53 | static void save (LexState *ls, int c) { | 52 | static void save (LexState *ls, int c) { |
| 54 | Mbuffer *b = ls->buff; | 53 | Mbuffer *b = ls->buff; |
| 55 | if (b->n + 1 > b->buffsize) { | 54 | if (luaZ_bufflen(b) + 1 > luaZ_sizebuffer(b)) { |
| 56 | size_t newsize; | 55 | size_t newsize; |
| 57 | if (b->buffsize >= MAX_SIZET/2) | 56 | if (luaZ_sizebuffer(b) >= MAX_SIZET/2) |
| 58 | lexerror(ls, "lexical element too long", 0); | 57 | lexerror(ls, "lexical element too long", 0); |
| 59 | newsize = b->buffsize * 2; | 58 | newsize = luaZ_sizebuffer(b) * 2; |
| 60 | luaZ_resizebuffer(ls->L, b, newsize); | 59 | luaZ_resizebuffer(ls->L, b, newsize); |
| 61 | } | 60 | } |
| 62 | b->buffer[b->n++] = cast(char, c); | 61 | b->buffer[luaZ_bufflen(b)++] = cast(char, c); |
| 63 | } | 62 | } |
| 64 | 63 | ||
| 65 | 64 | ||
| @@ -264,6 +263,48 @@ static void read_long_string (LexState *ls, SemInfo *seminfo, int sep) { | |||
| 264 | } | 263 | } |
| 265 | 264 | ||
| 266 | 265 | ||
| 266 | static int hexavalue (int c) { | ||
| 267 | if (lisdigit(c)) return c - '0'; | ||
| 268 | else if (lisupper(c)) return c - 'A' + 10; | ||
| 269 | else return c - 'a' + 10; | ||
| 270 | } | ||
| 271 | |||
| 272 | |||
| 273 | static int readhexaesc (LexState *ls) { | ||
| 274 | int c1, c2 = EOZ; | ||
| 275 | if (!lisxdigit(c1 = next(ls)) || !lisxdigit(c2 = next(ls))) { | ||
| 276 | luaZ_resetbuffer(ls->buff); /* prepare error message */ | ||
| 277 | save(ls, '\\'); save(ls, 'x'); | ||
| 278 | if (c1 != EOZ) save(ls, c1); | ||
| 279 | if (c2 != EOZ) save(ls, c2); | ||
| 280 | lexerror(ls, "hexadecimal digit expected", TK_STRING); | ||
| 281 | } | ||
| 282 | return (hexavalue(c1) << 4) + hexavalue(c2); | ||
| 283 | } | ||
| 284 | |||
| 285 | |||
| 286 | static int readdecesc (LexState *ls) { | ||
| 287 | int c1 = ls->current, c2, c3; | ||
| 288 | int c = c1 - '0'; | ||
| 289 | if (lisdigit(c2 = next(ls))) { | ||
| 290 | c = 10*c + c2 - '0'; | ||
| 291 | if (lisdigit(c3 = next(ls))) { | ||
| 292 | c = 10*c + c3 - '0'; | ||
| 293 | if (c > UCHAR_MAX) { | ||
| 294 | luaZ_resetbuffer(ls->buff); /* prepare error message */ | ||
| 295 | save(ls, '\\'); | ||
| 296 | save(ls, c1); save(ls, c2); save(ls, c3); | ||
| 297 | lexerror(ls, "decimal escape too large", TK_STRING); | ||
| 298 | } | ||
| 299 | return c; | ||
| 300 | } | ||
| 301 | } | ||
| 302 | /* else, has read one character that was not a digit */ | ||
| 303 | zungetc(ls->z); /* return it to input stream */ | ||
| 304 | return c; | ||
| 305 | } | ||
| 306 | |||
| 307 | |||
| 267 | static void read_string (LexState *ls, int del, SemInfo *seminfo) { | 308 | static void read_string (LexState *ls, int del, SemInfo *seminfo) { |
| 268 | save_and_next(ls); | 309 | save_and_next(ls); |
| 269 | while (ls->current != del) { | 310 | while (ls->current != del) { |
| @@ -275,8 +316,8 @@ static void read_string (LexState *ls, int del, SemInfo *seminfo) { | |||
| 275 | case '\r': | 316 | case '\r': |
| 276 | lexerror(ls, "unfinished string", TK_STRING); | 317 | lexerror(ls, "unfinished string", TK_STRING); |
| 277 | continue; /* to avoid warnings */ | 318 | continue; /* to avoid warnings */ |
| 278 | case '\\': { | 319 | case '\\': { /* escape sequences */ |
| 279 | int c; | 320 | int c; /* final character to be saved */ |
| 280 | next(ls); /* do not save the `\' */ | 321 | next(ls); /* do not save the `\' */ |
| 281 | switch (ls->current) { | 322 | switch (ls->current) { |
| 282 | case 'a': c = '\a'; break; | 323 | case 'a': c = '\a'; break; |
| @@ -286,28 +327,20 @@ static void read_string (LexState *ls, int del, SemInfo *seminfo) { | |||
| 286 | case 'r': c = '\r'; break; | 327 | case 'r': c = '\r'; break; |
| 287 | case 't': c = '\t'; break; | 328 | case 't': c = '\t'; break; |
| 288 | case 'v': c = '\v'; break; | 329 | case 'v': c = '\v'; break; |
| 330 | case 'x': c = readhexaesc(ls); break; | ||
| 289 | case '\n': | 331 | case '\n': |
| 290 | case '\r': save(ls, '\n'); inclinenumber(ls); continue; | 332 | case '\r': save(ls, '\n'); inclinenumber(ls); continue; |
| 291 | case EOZ: continue; /* will raise an error next loop */ | 333 | case EOZ: continue; /* will raise an error next loop */ |
| 292 | default: { | 334 | default: { |
| 293 | if (!lisdigit(ls->current)) | 335 | if (!lisdigit(ls->current)) |
| 294 | save_and_next(ls); /* handles \\, \", \', and \? */ | 336 | c = ls->current; /* handles \\, \", \', and \? */ |
| 295 | else { /* \xxx */ | 337 | else /* digital escape \ddd */ |
| 296 | int i = 0; | 338 | c = readdecesc(ls); |
| 297 | c = 0; | 339 | break; |
| 298 | do { | ||
| 299 | c = 10*c + (ls->current-'0'); | ||
| 300 | next(ls); | ||
| 301 | } while (++i<3 && lisdigit(ls->current)); | ||
| 302 | if (c > UCHAR_MAX) | ||
| 303 | lexerror(ls, "escape sequence too large", TK_STRING); | ||
| 304 | save(ls, c); | ||
| 305 | } | ||
| 306 | continue; | ||
| 307 | } | 340 | } |
| 308 | } | 341 | } |
| 309 | save(ls, c); | ||
| 310 | next(ls); | 342 | next(ls); |
| 343 | save(ls, c); | ||
| 311 | continue; | 344 | continue; |
| 312 | } | 345 | } |
| 313 | default: | 346 | default: |
