diff options
| author | Roberto Ierusalimschy <roberto@inf.puc-rio.br> | 2015-09-26 15:45:03 -0300 |
|---|---|---|
| committer | Roberto Ierusalimschy <roberto@inf.puc-rio.br> | 2015-09-26 15:45:03 -0300 |
| commit | 8264dbc2bb42d4119ec54caa55e4bece2d6985d6 (patch) | |
| tree | 2900158197ee78e1254cc0860df06c6441bc6988 | |
| parent | 9fae7b6d3fca02b4661aaa7c16e9fbeec8964b9b (diff) | |
| download | lua-8264dbc2bb42d4119ec54caa55e4bece2d6985d6.tar.gz lua-8264dbc2bb42d4119ec54caa55e4bece2d6985d6.tar.bz2 lua-8264dbc2bb42d4119ec54caa55e4bece2d6985d6.zip | |
implemented counter to abort non-linear behavior in pattern matching
| -rw-r--r-- | lstrlib.c | 72 |
1 files changed, 46 insertions, 26 deletions
| @@ -1,5 +1,5 @@ | |||
| 1 | /* | 1 | /* |
| 2 | ** $Id: lstrlib.c,v 1.231 2015/06/24 18:25:10 roberto Exp roberto $ | 2 | ** $Id: lstrlib.c,v 1.232 2015/07/20 16:30:22 roberto Exp roberto $ |
| 3 | ** Standard library for string operations and pattern-matching | 3 | ** Standard library for string operations and pattern-matching |
| 4 | ** See Copyright Notice in lua.h | 4 | ** See Copyright Notice in lua.h |
| 5 | */ | 5 | */ |
| @@ -41,8 +41,10 @@ | |||
| 41 | ** Some sizes are better limited to fit in 'int', but must also fit in | 41 | ** Some sizes are better limited to fit in 'int', but must also fit in |
| 42 | ** 'size_t'. (We assume that 'lua_Integer' cannot be smaller than 'int'.) | 42 | ** 'size_t'. (We assume that 'lua_Integer' cannot be smaller than 'int'.) |
| 43 | */ | 43 | */ |
| 44 | #define MAX_SIZET ((size_t)(~(size_t)0)) | ||
| 45 | |||
| 44 | #define MAXSIZE \ | 46 | #define MAXSIZE \ |
| 45 | (sizeof(size_t) < sizeof(int) ? (~(size_t)0) : (size_t)(INT_MAX)) | 47 | (sizeof(size_t) < sizeof(int) ? MAX_SIZET : (size_t)(INT_MAX)) |
| 46 | 48 | ||
| 47 | 49 | ||
| 48 | 50 | ||
| @@ -208,11 +210,12 @@ static int str_dump (lua_State *L) { | |||
| 208 | 210 | ||
| 209 | 211 | ||
| 210 | typedef struct MatchState { | 212 | typedef struct MatchState { |
| 211 | int matchdepth; /* control for recursive depth (to avoid C stack overflow) */ | ||
| 212 | const char *src_init; /* init of source string */ | 213 | const char *src_init; /* init of source string */ |
| 213 | const char *src_end; /* end ('\0') of source string */ | 214 | const char *src_end; /* end ('\0') of source string */ |
| 214 | const char *p_end; /* end ('\0') of pattern */ | 215 | const char *p_end; /* end ('\0') of pattern */ |
| 215 | lua_State *L; | 216 | lua_State *L; |
| 217 | size_t nrep; /* limit to avoid non-linear complexity */ | ||
| 218 | int matchdepth; /* control for recursive depth (to avoid C stack overflow) */ | ||
| 216 | int level; /* total number of captures (finished or unfinished) */ | 219 | int level; /* total number of captures (finished or unfinished) */ |
| 217 | struct { | 220 | struct { |
| 218 | const char *init; | 221 | const char *init; |
| @@ -231,6 +234,17 @@ static const char *match (MatchState *ms, const char *s, const char *p); | |||
| 231 | #endif | 234 | #endif |
| 232 | 235 | ||
| 233 | 236 | ||
| 237 | /* | ||
| 238 | ** parameters to control the maximum number of operators handled in | ||
| 239 | ** a match (to avoid non-linear complexity). The maximum will be: | ||
| 240 | ** (subject length) * A_REPS + B_REPS | ||
| 241 | */ | ||
| 242 | #if !defined(A_REPS) | ||
| 243 | #define A_REPS 4 | ||
| 244 | #define B_REPS 100000 | ||
| 245 | #endif | ||
| 246 | |||
| 247 | |||
| 234 | #define L_ESC '%' | 248 | #define L_ESC '%' |
| 235 | #define SPECIALS "^$*+?.([%-" | 249 | #define SPECIALS "^$*+?.([%-" |
| 236 | 250 | ||
| @@ -488,6 +502,8 @@ static const char *match (MatchState *ms, const char *s, const char *p) { | |||
| 488 | s = NULL; /* fail */ | 502 | s = NULL; /* fail */ |
| 489 | } | 503 | } |
| 490 | else { /* matched once */ | 504 | else { /* matched once */ |
| 505 | if (ms->nrep-- == 0) | ||
| 506 | luaL_error(ms->L, "pattern too complex"); | ||
| 491 | switch (*ep) { /* handle optional suffix */ | 507 | switch (*ep) { /* handle optional suffix */ |
| 492 | case '?': { /* optional */ | 508 | case '?': { /* optional */ |
| 493 | const char *res; | 509 | const char *res; |
| @@ -584,6 +600,26 @@ static int nospecials (const char *p, size_t l) { | |||
| 584 | } | 600 | } |
| 585 | 601 | ||
| 586 | 602 | ||
| 603 | static void prepstate (MatchState *ms, lua_State *L, | ||
| 604 | const char *s, size_t ls, const char *p, size_t lp) { | ||
| 605 | ms->L = L; | ||
| 606 | ms->matchdepth = MAXCCALLS; | ||
| 607 | ms->src_init = s; | ||
| 608 | ms->src_end = s + ls; | ||
| 609 | ms->p_end = p + lp; | ||
| 610 | if (ls < (MAX_SIZET - B_REPS) / A_REPS) | ||
| 611 | ms->nrep = A_REPS * ls + B_REPS; | ||
| 612 | else /* overflow (very long subject) */ | ||
| 613 | ms->nrep = MAX_SIZET; /* no limit */ | ||
| 614 | } | ||
| 615 | |||
| 616 | |||
| 617 | static void reprepstate (MatchState *ms) { | ||
| 618 | ms->level = 0; | ||
| 619 | lua_assert(ms->matchdepth == MAXCCALLS); | ||
| 620 | } | ||
| 621 | |||
| 622 | |||
| 587 | static int str_find_aux (lua_State *L, int find) { | 623 | static int str_find_aux (lua_State *L, int find) { |
| 588 | size_t ls, lp; | 624 | size_t ls, lp; |
| 589 | const char *s = luaL_checklstring(L, 1, &ls); | 625 | const char *s = luaL_checklstring(L, 1, &ls); |
| @@ -611,15 +647,10 @@ static int str_find_aux (lua_State *L, int find) { | |||
| 611 | if (anchor) { | 647 | if (anchor) { |
| 612 | p++; lp--; /* skip anchor character */ | 648 | p++; lp--; /* skip anchor character */ |
| 613 | } | 649 | } |
| 614 | ms.L = L; | 650 | prepstate(&ms, L, s, ls, p, lp); |
| 615 | ms.matchdepth = MAXCCALLS; | ||
| 616 | ms.src_init = s; | ||
| 617 | ms.src_end = s + ls; | ||
| 618 | ms.p_end = p + lp; | ||
| 619 | do { | 651 | do { |
| 620 | const char *res; | 652 | const char *res; |
| 621 | ms.level = 0; | 653 | reprepstate(&ms); |
| 622 | lua_assert(ms.matchdepth == MAXCCALLS); | ||
| 623 | if ((res=match(&ms, s1, p)) != NULL) { | 654 | if ((res=match(&ms, s1, p)) != NULL) { |
| 624 | if (find) { | 655 | if (find) { |
| 625 | lua_pushinteger(L, (s1 - s) + 1); /* start */ | 656 | lua_pushinteger(L, (s1 - s) + 1); /* start */ |
| @@ -652,17 +683,12 @@ static int gmatch_aux (lua_State *L) { | |||
| 652 | const char *s = lua_tolstring(L, lua_upvalueindex(1), &ls); | 683 | const char *s = lua_tolstring(L, lua_upvalueindex(1), &ls); |
| 653 | const char *p = lua_tolstring(L, lua_upvalueindex(2), &lp); | 684 | const char *p = lua_tolstring(L, lua_upvalueindex(2), &lp); |
| 654 | const char *src; | 685 | const char *src; |
| 655 | ms.L = L; | 686 | prepstate(&ms, L, s, ls, p, lp); |
| 656 | ms.matchdepth = MAXCCALLS; | ||
| 657 | ms.src_init = s; | ||
| 658 | ms.src_end = s+ls; | ||
| 659 | ms.p_end = p + lp; | ||
| 660 | for (src = s + (size_t)lua_tointeger(L, lua_upvalueindex(3)); | 687 | for (src = s + (size_t)lua_tointeger(L, lua_upvalueindex(3)); |
| 661 | src <= ms.src_end; | 688 | src <= ms.src_end; |
| 662 | src++) { | 689 | src++) { |
| 663 | const char *e; | 690 | const char *e; |
| 664 | ms.level = 0; | 691 | reprepstate(&ms); |
| 665 | lua_assert(ms.matchdepth == MAXCCALLS); | ||
| 666 | if ((e = match(&ms, src, p)) != NULL) { | 692 | if ((e = match(&ms, src, p)) != NULL) { |
| 667 | lua_Integer newstart = e-s; | 693 | lua_Integer newstart = e-s; |
| 668 | if (e == src) newstart++; /* empty match? go at least one position */ | 694 | if (e == src) newstart++; /* empty match? go at least one position */ |
| @@ -761,17 +787,11 @@ static int str_gsub (lua_State *L) { | |||
| 761 | if (anchor) { | 787 | if (anchor) { |
| 762 | p++; lp--; /* skip anchor character */ | 788 | p++; lp--; /* skip anchor character */ |
| 763 | } | 789 | } |
| 764 | ms.L = L; | 790 | prepstate(&ms, L, src, srcl, p, lp); |
| 765 | ms.matchdepth = MAXCCALLS; | ||
| 766 | ms.src_init = src; | ||
| 767 | ms.src_end = src+srcl; | ||
| 768 | ms.p_end = p + lp; | ||
| 769 | while (n < max_s) { | 791 | while (n < max_s) { |
| 770 | const char *e; | 792 | const char *e; |
| 771 | ms.level = 0; | 793 | reprepstate(&ms); |
| 772 | lua_assert(ms.matchdepth == MAXCCALLS); | 794 | if ((e = match(&ms, src, p)) != NULL) { |
| 773 | e = match(&ms, src, p); | ||
| 774 | if (e) { | ||
| 775 | n++; | 795 | n++; |
| 776 | add_value(&ms, &b, src, e, tr); | 796 | add_value(&ms, &b, src, e, tr); |
| 777 | } | 797 | } |
