aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRoberto Ierusalimschy <roberto@inf.puc-rio.br>2012-07-31 14:48:42 -0300
committerRoberto Ierusalimschy <roberto@inf.puc-rio.br>2012-07-31 14:48:42 -0300
commit6625cbecd101bfc0bc1d1034a4b527bd9a17a5de (patch)
treea69a04b8ba006ede9eada251502c8381b72173be
parent4ac55997ecc70410c065a9494a694c2ad25c0104 (diff)
downloadlua-6625cbecd101bfc0bc1d1034a4b527bd9a17a5de.tar.gz
lua-6625cbecd101bfc0bc1d1034a4b527bd9a17a5de.tar.bz2
lua-6625cbecd101bfc0bc1d1034a4b527bd9a17a5de.zip
Bug: Some patterns can overflow the C stack, due to recursion
(Took the opportunity to refactor function 'match')
-rw-r--r--lstrlib.c201
1 files changed, 124 insertions, 77 deletions
diff --git a/lstrlib.c b/lstrlib.c
index 4579a9fd..e8f26142 100644
--- a/lstrlib.c
+++ b/lstrlib.c
@@ -1,5 +1,5 @@
1/* 1/*
2** $Id: lstrlib.c,v 1.175 2012/04/20 13:16:48 roberto Exp roberto $ 2** $Id: lstrlib.c,v 1.176 2012/05/23 15:37:09 roberto Exp roberto $
3** Standard library for string operations and pattern-matching 3** Standard library for string operations and pattern-matching
4** See Copyright Notice in lua.h 4** See Copyright Notice in lua.h
5*/ 5*/
@@ -194,7 +194,9 @@ static int str_dump (lua_State *L) {
194#define CAP_UNFINISHED (-1) 194#define CAP_UNFINISHED (-1)
195#define CAP_POSITION (-2) 195#define CAP_POSITION (-2)
196 196
197
197typedef struct MatchState { 198typedef struct MatchState {
199 int matchdepth; /* control for recursive depth (to avoid C stack overflow) */
198 const char *src_init; /* init of source string */ 200 const char *src_init; /* init of source string */
199 const char *src_end; /* end ('\0') of source string */ 201 const char *src_end; /* end ('\0') of source string */
200 const char *p_end; /* end ('\0') of pattern */ 202 const char *p_end; /* end ('\0') of pattern */
@@ -207,6 +209,16 @@ typedef struct MatchState {
207} MatchState; 209} MatchState;
208 210
209 211
212/* recursive function */
213static const char *match (MatchState *ms, const char *s, const char *p);
214
215
216/* maximum recursion depth for 'match' */
217#if !defined(MAXCCALLS)
218#define MAXCCALLS 200
219#endif
220
221
210#define L_ESC '%' 222#define L_ESC '%'
211#define SPECIALS "^$*+?.([%-" 223#define SPECIALS "^$*+?.([%-"
212 224
@@ -294,19 +306,22 @@ static int matchbracketclass (int c, const char *p, const char *ec) {
294} 306}
295 307
296 308
297static int singlematch (int c, const char *p, const char *ep) { 309static int singlematch (MatchState *ms, const char *s, const char *p,
298 switch (*p) { 310 const char *ep) {
299 case '.': return 1; /* matches any char */ 311 if (s >= ms->src_end)
300 case L_ESC: return match_class(c, uchar(*(p+1))); 312 return 0;
301 case '[': return matchbracketclass(c, p, ep-1); 313 else {
302 default: return (uchar(*p) == c); 314 int c = uchar(*s);
315 switch (*p) {
316 case '.': return 1; /* matches any char */
317 case L_ESC: return match_class(c, uchar(*(p+1)));
318 case '[': return matchbracketclass(c, p, ep-1);
319 default: return (uchar(*p) == c);
320 }
303 } 321 }
304} 322}
305 323
306 324
307static const char *match (MatchState *ms, const char *s, const char *p);
308
309
310static const char *matchbalance (MatchState *ms, const char *s, 325static const char *matchbalance (MatchState *ms, const char *s,
311 const char *p) { 326 const char *p) {
312 if (p >= ms->p_end - 1) 327 if (p >= ms->p_end - 1)
@@ -331,7 +346,7 @@ static const char *matchbalance (MatchState *ms, const char *s,
331static const char *max_expand (MatchState *ms, const char *s, 346static const char *max_expand (MatchState *ms, const char *s,
332 const char *p, const char *ep) { 347 const char *p, const char *ep) {
333 ptrdiff_t i = 0; /* counts maximum expand for item */ 348 ptrdiff_t i = 0; /* counts maximum expand for item */
334 while ((s+i)<ms->src_end && singlematch(uchar(*(s+i)), p, ep)) 349 while (singlematch(ms, s + i, p, ep))
335 i++; 350 i++;
336 /* keeps trying to match with the maximum repetitions */ 351 /* keeps trying to match with the maximum repetitions */
337 while (i>=0) { 352 while (i>=0) {
@@ -349,7 +364,7 @@ static const char *min_expand (MatchState *ms, const char *s,
349 const char *res = match(ms, s, ep+1); 364 const char *res = match(ms, s, ep+1);
350 if (res != NULL) 365 if (res != NULL)
351 return res; 366 return res;
352 else if (s<ms->src_end && singlematch(uchar(*s), p, ep)) 367 else if (singlematch(ms, s, p, ep))
353 s++; /* try with one more repetition */ 368 s++; /* try with one more repetition */
354 else return NULL; 369 else return NULL;
355 } 370 }
@@ -393,79 +408,105 @@ static const char *match_capture (MatchState *ms, const char *s, int l) {
393 408
394 409
395static const char *match (MatchState *ms, const char *s, const char *p) { 410static const char *match (MatchState *ms, const char *s, const char *p) {
411 if (ms->matchdepth-- == 0)
412 luaL_error(ms->L, "pattern too complex");
396 init: /* using goto's to optimize tail recursion */ 413 init: /* using goto's to optimize tail recursion */
397 if (p == ms->p_end) /* end of pattern? */ 414 if (p != ms->p_end) { /* end of pattern? */
398 return s; /* match succeeded */ 415 switch (*p) {
399 switch (*p) { 416 case '(': { /* start capture */
400 case '(': { /* start capture */ 417 if (*(p + 1) == ')') /* position capture? */
401 if (*(p+1) == ')') /* position capture? */ 418 s = start_capture(ms, s, p + 2, CAP_POSITION);
402 return start_capture(ms, s, p+2, CAP_POSITION); 419 else
403 else 420 s = start_capture(ms, s, p + 1, CAP_UNFINISHED);
404 return start_capture(ms, s, p+1, CAP_UNFINISHED); 421 break;
405 }
406 case ')': { /* end capture */
407 return end_capture(ms, s, p+1);
408 }
409 case '$': {
410 if ((p+1) == ms->p_end) /* is the `$' the last char in pattern? */
411 return (s == ms->src_end) ? s : NULL; /* check end of string */
412 else goto dflt;
413 }
414 case L_ESC: { /* escaped sequences not in the format class[*+?-]? */
415 switch (*(p+1)) {
416 case 'b': { /* balanced string? */
417 s = matchbalance(ms, s, p+2);
418 if (s == NULL) return NULL;
419 p+=4; goto init; /* else return match(ms, s, p+4); */
420 }
421 case 'f': { /* frontier? */
422 const char *ep; char previous;
423 p += 2;
424 if (*p != '[')
425 luaL_error(ms->L, "missing " LUA_QL("[") " after "
426 LUA_QL("%%f") " in pattern");
427 ep = classend(ms, p); /* points to what is next */
428 previous = (s == ms->src_init) ? '\0' : *(s-1);
429 if (matchbracketclass(uchar(previous), p, ep-1) ||
430 !matchbracketclass(uchar(*s), p, ep-1)) return NULL;
431 p=ep; goto init; /* else return match(ms, s, ep); */
432 }
433 case '0': case '1': case '2': case '3':
434 case '4': case '5': case '6': case '7':
435 case '8': case '9': { /* capture results (%0-%9)? */
436 s = match_capture(ms, s, uchar(*(p+1)));
437 if (s == NULL) return NULL;
438 p+=2; goto init; /* else return match(ms, s, p+2) */
439 }
440 default: goto dflt;
441 } 422 }
442 } 423 case ')': { /* end capture */
443 default: dflt: { /* pattern class plus optional suffix */ 424 s = end_capture(ms, s, p + 1);
444 const char *ep = classend(ms, p); /* points to what is next */ 425 break;
445 int m = s < ms->src_end && singlematch(uchar(*s), p, ep); 426 }
446 switch (*ep) { 427 case '$': {
447 case '?': { /* optional */ 428 if ((p + 1) != ms->p_end) /* is the `$' the last char in pattern? */
448 const char *res; 429 goto dflt; /* no; go to default */
449 if (m && ((res=match(ms, s+1, ep+1)) != NULL)) 430 s = (s == ms->src_end) ? s : NULL; /* check end of string */
450 return res; 431 break;
451 p=ep+1; goto init; /* else return match(ms, s, ep+1); */ 432 }
452 } 433 case L_ESC: { /* escaped sequences not in the format class[*+?-]? */
453 case '*': { /* 0 or more repetitions */ 434 switch (*(p + 1)) {
454 return max_expand(ms, s, p, ep); 435 case 'b': { /* balanced string? */
455 } 436 s = matchbalance(ms, s, p + 2);
456 case '+': { /* 1 or more repetitions */ 437 if (s != NULL) {
457 return (m ? max_expand(ms, s+1, p, ep) : NULL); 438 p += 4; goto init; /* return match(ms, s, p + 4); */
439 } /* else fail (s == NULL) */
440 break;
441 }
442 case 'f': { /* frontier? */
443 const char *ep; char previous;
444 p += 2;
445 if (*p != '[')
446 luaL_error(ms->L, "missing " LUA_QL("[") " after "
447 LUA_QL("%%f") " in pattern");
448 ep = classend(ms, p); /* points to what is next */
449 previous = (s == ms->src_init) ? '\0' : *(s - 1);
450 if (!matchbracketclass(uchar(previous), p, ep - 1) &&
451 matchbracketclass(uchar(*s), p, ep - 1)) {
452 p = ep; goto init; /* return match(ms, s, ep); */
453 }
454 s = NULL; /* match failed */
455 break;
456 }
457 case '0': case '1': case '2': case '3':
458 case '4': case '5': case '6': case '7':
459 case '8': case '9': { /* capture results (%0-%9)? */
460 s = match_capture(ms, s, uchar(*(p + 1)));
461 if (s != NULL) {
462 p += 2; goto init; /* return match(ms, s, p + 2) */
463 }
464 break;
465 }
466 default: goto dflt;
458 } 467 }
459 case '-': { /* 0 or more repetitions (minimum) */ 468 break;
460 return min_expand(ms, s, p, ep); 469 }
470 default: dflt: { /* pattern class plus optional suffix */
471 const char *ep = classend(ms, p); /* points to optional suffix */
472 /* does not match at least once? */
473 if (!singlematch(ms, s, p, ep)) {
474 if (*ep == '*' || *ep == '?' || *ep == '-') { /* accept empty? */
475 p = ep + 1; goto init; /* return match(ms, s, ep + 1); */
476 }
477 else /* '+' or no suffix */
478 s = NULL; /* fail */
461 } 479 }
462 default: { 480 else { /* matched once */
463 if (!m) return NULL; 481 switch (*ep) { /* handle optional suffix */
464 s++; p=ep; goto init; /* else return match(ms, s+1, ep); */ 482 case '?': { /* optional */
483 const char *res;
484 if ((res = match(ms, s + 1, ep + 1)) != NULL)
485 s = res;
486 else {
487 p = ep + 1; goto init; /* else return match(ms, s, ep + 1); */
488 }
489 break;
490 }
491 case '+': /* 1 or more repetitions */
492 s++; /* 1 match already done */
493 /* go through */
494 case '*': /* 0 or more repetitions */
495 s = max_expand(ms, s, p, ep);
496 break;
497 case '-': /* 0 or more repetitions (minimum) */
498 s = min_expand(ms, s, p, ep);
499 break;
500 default: /* no suffix */
501 s++; p = ep; goto init; /* return match(ms, s + 1, ep); */
502 }
465 } 503 }
504 break;
466 } 505 }
467 } 506 }
468 } 507 }
508 ms->matchdepth++;
509 return s;
469} 510}
470 511
471 512
@@ -561,12 +602,14 @@ static int str_find_aux (lua_State *L, int find) {
561 p++; lp--; /* skip anchor character */ 602 p++; lp--; /* skip anchor character */
562 } 603 }
563 ms.L = L; 604 ms.L = L;
605 ms.matchdepth = MAXCCALLS;
564 ms.src_init = s; 606 ms.src_init = s;
565 ms.src_end = s + ls; 607 ms.src_end = s + ls;
566 ms.p_end = p + lp; 608 ms.p_end = p + lp;
567 do { 609 do {
568 const char *res; 610 const char *res;
569 ms.level = 0; 611 ms.level = 0;
612 lua_assert(ms.matchdepth == MAXCCALLS);
570 if ((res=match(&ms, s1, p)) != NULL) { 613 if ((res=match(&ms, s1, p)) != NULL) {
571 if (find) { 614 if (find) {
572 lua_pushinteger(L, s1 - s + 1); /* start */ 615 lua_pushinteger(L, s1 - s + 1); /* start */
@@ -600,6 +643,7 @@ static int gmatch_aux (lua_State *L) {
600 const char *p = lua_tolstring(L, lua_upvalueindex(2), &lp); 643 const char *p = lua_tolstring(L, lua_upvalueindex(2), &lp);
601 const char *src; 644 const char *src;
602 ms.L = L; 645 ms.L = L;
646 ms.matchdepth = MAXCCALLS;
603 ms.src_init = s; 647 ms.src_init = s;
604 ms.src_end = s+ls; 648 ms.src_end = s+ls;
605 ms.p_end = p + lp; 649 ms.p_end = p + lp;
@@ -608,6 +652,7 @@ static int gmatch_aux (lua_State *L) {
608 src++) { 652 src++) {
609 const char *e; 653 const char *e;
610 ms.level = 0; 654 ms.level = 0;
655 lua_assert(ms.matchdepth == MAXCCALLS);
611 if ((e = match(&ms, src, p)) != NULL) { 656 if ((e = match(&ms, src, p)) != NULL) {
612 lua_Integer newstart = e-s; 657 lua_Integer newstart = e-s;
613 if (e == src) newstart++; /* empty match? go at least one position */ 658 if (e == src) newstart++; /* empty match? go at least one position */
@@ -705,12 +750,14 @@ static int str_gsub (lua_State *L) {
705 p++; lp--; /* skip anchor character */ 750 p++; lp--; /* skip anchor character */
706 } 751 }
707 ms.L = L; 752 ms.L = L;
753 ms.matchdepth = MAXCCALLS;
708 ms.src_init = src; 754 ms.src_init = src;
709 ms.src_end = src+srcl; 755 ms.src_end = src+srcl;
710 ms.p_end = p + lp; 756 ms.p_end = p + lp;
711 while (n < max_s) { 757 while (n < max_s) {
712 const char *e; 758 const char *e;
713 ms.level = 0; 759 ms.level = 0;
760 lua_assert(ms.matchdepth == MAXCCALLS);
714 e = match(&ms, src, p); 761 e = match(&ms, src, p);
715 if (e) { 762 if (e) {
716 n++; 763 n++;