diff options
author | Roberto Ierusalimschy <roberto@inf.puc-rio.br> | 2012-07-31 14:48:42 -0300 |
---|---|---|
committer | Roberto Ierusalimschy <roberto@inf.puc-rio.br> | 2012-07-31 14:48:42 -0300 |
commit | 6625cbecd101bfc0bc1d1034a4b527bd9a17a5de (patch) | |
tree | a69a04b8ba006ede9eada251502c8381b72173be | |
parent | 4ac55997ecc70410c065a9494a694c2ad25c0104 (diff) | |
download | lua-6625cbecd101bfc0bc1d1034a4b527bd9a17a5de.tar.gz lua-6625cbecd101bfc0bc1d1034a4b527bd9a17a5de.tar.bz2 lua-6625cbecd101bfc0bc1d1034a4b527bd9a17a5de.zip |
Bug: Some patterns can overflow the C stack, due to recursion
(Took the opportunity to refactor function 'match')
-rw-r--r-- | lstrlib.c | 201 |
1 files changed, 124 insertions, 77 deletions
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | ** $Id: lstrlib.c,v 1.175 2012/04/20 13:16:48 roberto Exp roberto $ | 2 | ** $Id: lstrlib.c,v 1.176 2012/05/23 15:37:09 roberto Exp roberto $ |
3 | ** Standard library for string operations and pattern-matching | 3 | ** Standard library for string operations and pattern-matching |
4 | ** See Copyright Notice in lua.h | 4 | ** See Copyright Notice in lua.h |
5 | */ | 5 | */ |
@@ -194,7 +194,9 @@ static int str_dump (lua_State *L) { | |||
194 | #define CAP_UNFINISHED (-1) | 194 | #define CAP_UNFINISHED (-1) |
195 | #define CAP_POSITION (-2) | 195 | #define CAP_POSITION (-2) |
196 | 196 | ||
197 | |||
197 | typedef struct MatchState { | 198 | typedef struct MatchState { |
199 | int matchdepth; /* control for recursive depth (to avoid C stack overflow) */ | ||
198 | const char *src_init; /* init of source string */ | 200 | const char *src_init; /* init of source string */ |
199 | const char *src_end; /* end ('\0') of source string */ | 201 | const char *src_end; /* end ('\0') of source string */ |
200 | const char *p_end; /* end ('\0') of pattern */ | 202 | const char *p_end; /* end ('\0') of pattern */ |
@@ -207,6 +209,16 @@ typedef struct MatchState { | |||
207 | } MatchState; | 209 | } MatchState; |
208 | 210 | ||
209 | 211 | ||
212 | /* recursive function */ | ||
213 | static const char *match (MatchState *ms, const char *s, const char *p); | ||
214 | |||
215 | |||
216 | /* maximum recursion depth for 'match' */ | ||
217 | #if !defined(MAXCCALLS) | ||
218 | #define MAXCCALLS 200 | ||
219 | #endif | ||
220 | |||
221 | |||
210 | #define L_ESC '%' | 222 | #define L_ESC '%' |
211 | #define SPECIALS "^$*+?.([%-" | 223 | #define SPECIALS "^$*+?.([%-" |
212 | 224 | ||
@@ -294,19 +306,22 @@ static int matchbracketclass (int c, const char *p, const char *ec) { | |||
294 | } | 306 | } |
295 | 307 | ||
296 | 308 | ||
297 | static int singlematch (int c, const char *p, const char *ep) { | 309 | static int singlematch (MatchState *ms, const char *s, const char *p, |
298 | switch (*p) { | 310 | const char *ep) { |
299 | case '.': return 1; /* matches any char */ | 311 | if (s >= ms->src_end) |
300 | case L_ESC: return match_class(c, uchar(*(p+1))); | 312 | return 0; |
301 | case '[': return matchbracketclass(c, p, ep-1); | 313 | else { |
302 | default: return (uchar(*p) == c); | 314 | int c = uchar(*s); |
315 | switch (*p) { | ||
316 | case '.': return 1; /* matches any char */ | ||
317 | case L_ESC: return match_class(c, uchar(*(p+1))); | ||
318 | case '[': return matchbracketclass(c, p, ep-1); | ||
319 | default: return (uchar(*p) == c); | ||
320 | } | ||
303 | } | 321 | } |
304 | } | 322 | } |
305 | 323 | ||
306 | 324 | ||
307 | static const char *match (MatchState *ms, const char *s, const char *p); | ||
308 | |||
309 | |||
310 | static const char *matchbalance (MatchState *ms, const char *s, | 325 | static const char *matchbalance (MatchState *ms, const char *s, |
311 | const char *p) { | 326 | const char *p) { |
312 | if (p >= ms->p_end - 1) | 327 | if (p >= ms->p_end - 1) |
@@ -331,7 +346,7 @@ static const char *matchbalance (MatchState *ms, const char *s, | |||
331 | static const char *max_expand (MatchState *ms, const char *s, | 346 | static const char *max_expand (MatchState *ms, const char *s, |
332 | const char *p, const char *ep) { | 347 | const char *p, const char *ep) { |
333 | ptrdiff_t i = 0; /* counts maximum expand for item */ | 348 | ptrdiff_t i = 0; /* counts maximum expand for item */ |
334 | while ((s+i)<ms->src_end && singlematch(uchar(*(s+i)), p, ep)) | 349 | while (singlematch(ms, s + i, p, ep)) |
335 | i++; | 350 | i++; |
336 | /* keeps trying to match with the maximum repetitions */ | 351 | /* keeps trying to match with the maximum repetitions */ |
337 | while (i>=0) { | 352 | while (i>=0) { |
@@ -349,7 +364,7 @@ static const char *min_expand (MatchState *ms, const char *s, | |||
349 | const char *res = match(ms, s, ep+1); | 364 | const char *res = match(ms, s, ep+1); |
350 | if (res != NULL) | 365 | if (res != NULL) |
351 | return res; | 366 | return res; |
352 | else if (s<ms->src_end && singlematch(uchar(*s), p, ep)) | 367 | else if (singlematch(ms, s, p, ep)) |
353 | s++; /* try with one more repetition */ | 368 | s++; /* try with one more repetition */ |
354 | else return NULL; | 369 | else return NULL; |
355 | } | 370 | } |
@@ -393,79 +408,105 @@ static const char *match_capture (MatchState *ms, const char *s, int l) { | |||
393 | 408 | ||
394 | 409 | ||
395 | static const char *match (MatchState *ms, const char *s, const char *p) { | 410 | static const char *match (MatchState *ms, const char *s, const char *p) { |
411 | if (ms->matchdepth-- == 0) | ||
412 | luaL_error(ms->L, "pattern too complex"); | ||
396 | init: /* using goto's to optimize tail recursion */ | 413 | init: /* using goto's to optimize tail recursion */ |
397 | if (p == ms->p_end) /* end of pattern? */ | 414 | if (p != ms->p_end) { /* end of pattern? */ |
398 | return s; /* match succeeded */ | 415 | switch (*p) { |
399 | switch (*p) { | 416 | case '(': { /* start capture */ |
400 | case '(': { /* start capture */ | 417 | if (*(p + 1) == ')') /* position capture? */ |
401 | if (*(p+1) == ')') /* position capture? */ | 418 | s = start_capture(ms, s, p + 2, CAP_POSITION); |
402 | return start_capture(ms, s, p+2, CAP_POSITION); | 419 | else |
403 | else | 420 | s = start_capture(ms, s, p + 1, CAP_UNFINISHED); |
404 | return start_capture(ms, s, p+1, CAP_UNFINISHED); | 421 | break; |
405 | } | ||
406 | case ')': { /* end capture */ | ||
407 | return end_capture(ms, s, p+1); | ||
408 | } | ||
409 | case '$': { | ||
410 | if ((p+1) == ms->p_end) /* is the `$' the last char in pattern? */ | ||
411 | return (s == ms->src_end) ? s : NULL; /* check end of string */ | ||
412 | else goto dflt; | ||
413 | } | ||
414 | case L_ESC: { /* escaped sequences not in the format class[*+?-]? */ | ||
415 | switch (*(p+1)) { | ||
416 | case 'b': { /* balanced string? */ | ||
417 | s = matchbalance(ms, s, p+2); | ||
418 | if (s == NULL) return NULL; | ||
419 | p+=4; goto init; /* else return match(ms, s, p+4); */ | ||
420 | } | ||
421 | case 'f': { /* frontier? */ | ||
422 | const char *ep; char previous; | ||
423 | p += 2; | ||
424 | if (*p != '[') | ||
425 | luaL_error(ms->L, "missing " LUA_QL("[") " after " | ||
426 | LUA_QL("%%f") " in pattern"); | ||
427 | ep = classend(ms, p); /* points to what is next */ | ||
428 | previous = (s == ms->src_init) ? '\0' : *(s-1); | ||
429 | if (matchbracketclass(uchar(previous), p, ep-1) || | ||
430 | !matchbracketclass(uchar(*s), p, ep-1)) return NULL; | ||
431 | p=ep; goto init; /* else return match(ms, s, ep); */ | ||
432 | } | ||
433 | case '0': case '1': case '2': case '3': | ||
434 | case '4': case '5': case '6': case '7': | ||
435 | case '8': case '9': { /* capture results (%0-%9)? */ | ||
436 | s = match_capture(ms, s, uchar(*(p+1))); | ||
437 | if (s == NULL) return NULL; | ||
438 | p+=2; goto init; /* else return match(ms, s, p+2) */ | ||
439 | } | ||
440 | default: goto dflt; | ||
441 | } | 422 | } |
442 | } | 423 | case ')': { /* end capture */ |
443 | default: dflt: { /* pattern class plus optional suffix */ | 424 | s = end_capture(ms, s, p + 1); |
444 | const char *ep = classend(ms, p); /* points to what is next */ | 425 | break; |
445 | int m = s < ms->src_end && singlematch(uchar(*s), p, ep); | 426 | } |
446 | switch (*ep) { | 427 | case '$': { |
447 | case '?': { /* optional */ | 428 | if ((p + 1) != ms->p_end) /* is the `$' the last char in pattern? */ |
448 | const char *res; | 429 | goto dflt; /* no; go to default */ |
449 | if (m && ((res=match(ms, s+1, ep+1)) != NULL)) | 430 | s = (s == ms->src_end) ? s : NULL; /* check end of string */ |
450 | return res; | 431 | break; |
451 | p=ep+1; goto init; /* else return match(ms, s, ep+1); */ | 432 | } |
452 | } | 433 | case L_ESC: { /* escaped sequences not in the format class[*+?-]? */ |
453 | case '*': { /* 0 or more repetitions */ | 434 | switch (*(p + 1)) { |
454 | return max_expand(ms, s, p, ep); | 435 | case 'b': { /* balanced string? */ |
455 | } | 436 | s = matchbalance(ms, s, p + 2); |
456 | case '+': { /* 1 or more repetitions */ | 437 | if (s != NULL) { |
457 | return (m ? max_expand(ms, s+1, p, ep) : NULL); | 438 | p += 4; goto init; /* return match(ms, s, p + 4); */ |
439 | } /* else fail (s == NULL) */ | ||
440 | break; | ||
441 | } | ||
442 | case 'f': { /* frontier? */ | ||
443 | const char *ep; char previous; | ||
444 | p += 2; | ||
445 | if (*p != '[') | ||
446 | luaL_error(ms->L, "missing " LUA_QL("[") " after " | ||
447 | LUA_QL("%%f") " in pattern"); | ||
448 | ep = classend(ms, p); /* points to what is next */ | ||
449 | previous = (s == ms->src_init) ? '\0' : *(s - 1); | ||
450 | if (!matchbracketclass(uchar(previous), p, ep - 1) && | ||
451 | matchbracketclass(uchar(*s), p, ep - 1)) { | ||
452 | p = ep; goto init; /* return match(ms, s, ep); */ | ||
453 | } | ||
454 | s = NULL; /* match failed */ | ||
455 | break; | ||
456 | } | ||
457 | case '0': case '1': case '2': case '3': | ||
458 | case '4': case '5': case '6': case '7': | ||
459 | case '8': case '9': { /* capture results (%0-%9)? */ | ||
460 | s = match_capture(ms, s, uchar(*(p + 1))); | ||
461 | if (s != NULL) { | ||
462 | p += 2; goto init; /* return match(ms, s, p + 2) */ | ||
463 | } | ||
464 | break; | ||
465 | } | ||
466 | default: goto dflt; | ||
458 | } | 467 | } |
459 | case '-': { /* 0 or more repetitions (minimum) */ | 468 | break; |
460 | return min_expand(ms, s, p, ep); | 469 | } |
470 | default: dflt: { /* pattern class plus optional suffix */ | ||
471 | const char *ep = classend(ms, p); /* points to optional suffix */ | ||
472 | /* does not match at least once? */ | ||
473 | if (!singlematch(ms, s, p, ep)) { | ||
474 | if (*ep == '*' || *ep == '?' || *ep == '-') { /* accept empty? */ | ||
475 | p = ep + 1; goto init; /* return match(ms, s, ep + 1); */ | ||
476 | } | ||
477 | else /* '+' or no suffix */ | ||
478 | s = NULL; /* fail */ | ||
461 | } | 479 | } |
462 | default: { | 480 | else { /* matched once */ |
463 | if (!m) return NULL; | 481 | switch (*ep) { /* handle optional suffix */ |
464 | s++; p=ep; goto init; /* else return match(ms, s+1, ep); */ | 482 | case '?': { /* optional */ |
483 | const char *res; | ||
484 | if ((res = match(ms, s + 1, ep + 1)) != NULL) | ||
485 | s = res; | ||
486 | else { | ||
487 | p = ep + 1; goto init; /* else return match(ms, s, ep + 1); */ | ||
488 | } | ||
489 | break; | ||
490 | } | ||
491 | case '+': /* 1 or more repetitions */ | ||
492 | s++; /* 1 match already done */ | ||
493 | /* go through */ | ||
494 | case '*': /* 0 or more repetitions */ | ||
495 | s = max_expand(ms, s, p, ep); | ||
496 | break; | ||
497 | case '-': /* 0 or more repetitions (minimum) */ | ||
498 | s = min_expand(ms, s, p, ep); | ||
499 | break; | ||
500 | default: /* no suffix */ | ||
501 | s++; p = ep; goto init; /* return match(ms, s + 1, ep); */ | ||
502 | } | ||
465 | } | 503 | } |
504 | break; | ||
466 | } | 505 | } |
467 | } | 506 | } |
468 | } | 507 | } |
508 | ms->matchdepth++; | ||
509 | return s; | ||
469 | } | 510 | } |
470 | 511 | ||
471 | 512 | ||
@@ -561,12 +602,14 @@ static int str_find_aux (lua_State *L, int find) { | |||
561 | p++; lp--; /* skip anchor character */ | 602 | p++; lp--; /* skip anchor character */ |
562 | } | 603 | } |
563 | ms.L = L; | 604 | ms.L = L; |
605 | ms.matchdepth = MAXCCALLS; | ||
564 | ms.src_init = s; | 606 | ms.src_init = s; |
565 | ms.src_end = s + ls; | 607 | ms.src_end = s + ls; |
566 | ms.p_end = p + lp; | 608 | ms.p_end = p + lp; |
567 | do { | 609 | do { |
568 | const char *res; | 610 | const char *res; |
569 | ms.level = 0; | 611 | ms.level = 0; |
612 | lua_assert(ms.matchdepth == MAXCCALLS); | ||
570 | if ((res=match(&ms, s1, p)) != NULL) { | 613 | if ((res=match(&ms, s1, p)) != NULL) { |
571 | if (find) { | 614 | if (find) { |
572 | lua_pushinteger(L, s1 - s + 1); /* start */ | 615 | lua_pushinteger(L, s1 - s + 1); /* start */ |
@@ -600,6 +643,7 @@ static int gmatch_aux (lua_State *L) { | |||
600 | const char *p = lua_tolstring(L, lua_upvalueindex(2), &lp); | 643 | const char *p = lua_tolstring(L, lua_upvalueindex(2), &lp); |
601 | const char *src; | 644 | const char *src; |
602 | ms.L = L; | 645 | ms.L = L; |
646 | ms.matchdepth = MAXCCALLS; | ||
603 | ms.src_init = s; | 647 | ms.src_init = s; |
604 | ms.src_end = s+ls; | 648 | ms.src_end = s+ls; |
605 | ms.p_end = p + lp; | 649 | ms.p_end = p + lp; |
@@ -608,6 +652,7 @@ static int gmatch_aux (lua_State *L) { | |||
608 | src++) { | 652 | src++) { |
609 | const char *e; | 653 | const char *e; |
610 | ms.level = 0; | 654 | ms.level = 0; |
655 | lua_assert(ms.matchdepth == MAXCCALLS); | ||
611 | if ((e = match(&ms, src, p)) != NULL) { | 656 | if ((e = match(&ms, src, p)) != NULL) { |
612 | lua_Integer newstart = e-s; | 657 | lua_Integer newstart = e-s; |
613 | if (e == src) newstart++; /* empty match? go at least one position */ | 658 | if (e == src) newstart++; /* empty match? go at least one position */ |
@@ -705,12 +750,14 @@ static int str_gsub (lua_State *L) { | |||
705 | p++; lp--; /* skip anchor character */ | 750 | p++; lp--; /* skip anchor character */ |
706 | } | 751 | } |
707 | ms.L = L; | 752 | ms.L = L; |
753 | ms.matchdepth = MAXCCALLS; | ||
708 | ms.src_init = src; | 754 | ms.src_init = src; |
709 | ms.src_end = src+srcl; | 755 | ms.src_end = src+srcl; |
710 | ms.p_end = p + lp; | 756 | ms.p_end = p + lp; |
711 | while (n < max_s) { | 757 | while (n < max_s) { |
712 | const char *e; | 758 | const char *e; |
713 | ms.level = 0; | 759 | ms.level = 0; |
760 | lua_assert(ms.matchdepth == MAXCCALLS); | ||
714 | e = match(&ms, src, p); | 761 | e = match(&ms, src, p); |
715 | if (e) { | 762 | if (e) { |
716 | n++; | 763 | n++; |