diff options
author | Roberto Ierusalimschy <roberto@inf.puc-rio.br> | 2015-09-26 15:45:03 -0300 |
---|---|---|
committer | Roberto Ierusalimschy <roberto@inf.puc-rio.br> | 2015-09-26 15:45:03 -0300 |
commit | 8264dbc2bb42d4119ec54caa55e4bece2d6985d6 (patch) | |
tree | 2900158197ee78e1254cc0860df06c6441bc6988 /lstrlib.c | |
parent | 9fae7b6d3fca02b4661aaa7c16e9fbeec8964b9b (diff) | |
download | lua-8264dbc2bb42d4119ec54caa55e4bece2d6985d6.tar.gz lua-8264dbc2bb42d4119ec54caa55e4bece2d6985d6.tar.bz2 lua-8264dbc2bb42d4119ec54caa55e4bece2d6985d6.zip |
implemented counter to abort non-linear behavior in pattern matching
Diffstat (limited to 'lstrlib.c')
-rw-r--r-- | lstrlib.c | 72 |
1 files changed, 46 insertions, 26 deletions
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | ** $Id: lstrlib.c,v 1.231 2015/06/24 18:25:10 roberto Exp roberto $ | 2 | ** $Id: lstrlib.c,v 1.232 2015/07/20 16:30:22 roberto Exp roberto $ |
3 | ** Standard library for string operations and pattern-matching | 3 | ** Standard library for string operations and pattern-matching |
4 | ** See Copyright Notice in lua.h | 4 | ** See Copyright Notice in lua.h |
5 | */ | 5 | */ |
@@ -41,8 +41,10 @@ | |||
41 | ** Some sizes are better limited to fit in 'int', but must also fit in | 41 | ** Some sizes are better limited to fit in 'int', but must also fit in |
42 | ** 'size_t'. (We assume that 'lua_Integer' cannot be smaller than 'int'.) | 42 | ** 'size_t'. (We assume that 'lua_Integer' cannot be smaller than 'int'.) |
43 | */ | 43 | */ |
44 | #define MAX_SIZET ((size_t)(~(size_t)0)) | ||
45 | |||
44 | #define MAXSIZE \ | 46 | #define MAXSIZE \ |
45 | (sizeof(size_t) < sizeof(int) ? (~(size_t)0) : (size_t)(INT_MAX)) | 47 | (sizeof(size_t) < sizeof(int) ? MAX_SIZET : (size_t)(INT_MAX)) |
46 | 48 | ||
47 | 49 | ||
48 | 50 | ||
@@ -208,11 +210,12 @@ static int str_dump (lua_State *L) { | |||
208 | 210 | ||
209 | 211 | ||
210 | typedef struct MatchState { | 212 | typedef struct MatchState { |
211 | int matchdepth; /* control for recursive depth (to avoid C stack overflow) */ | ||
212 | const char *src_init; /* init of source string */ | 213 | const char *src_init; /* init of source string */ |
213 | const char *src_end; /* end ('\0') of source string */ | 214 | const char *src_end; /* end ('\0') of source string */ |
214 | const char *p_end; /* end ('\0') of pattern */ | 215 | const char *p_end; /* end ('\0') of pattern */ |
215 | lua_State *L; | 216 | lua_State *L; |
217 | size_t nrep; /* limit to avoid non-linear complexity */ | ||
218 | int matchdepth; /* control for recursive depth (to avoid C stack overflow) */ | ||
216 | int level; /* total number of captures (finished or unfinished) */ | 219 | int level; /* total number of captures (finished or unfinished) */ |
217 | struct { | 220 | struct { |
218 | const char *init; | 221 | const char *init; |
@@ -231,6 +234,17 @@ static const char *match (MatchState *ms, const char *s, const char *p); | |||
231 | #endif | 234 | #endif |
232 | 235 | ||
233 | 236 | ||
237 | /* | ||
238 | ** parameters to control the maximum number of operators handled in | ||
239 | ** a match (to avoid non-linear complexity). The maximum will be: | ||
240 | ** (subject length) * A_REPS + B_REPS | ||
241 | */ | ||
242 | #if !defined(A_REPS) | ||
243 | #define A_REPS 4 | ||
244 | #define B_REPS 100000 | ||
245 | #endif | ||
246 | |||
247 | |||
234 | #define L_ESC '%' | 248 | #define L_ESC '%' |
235 | #define SPECIALS "^$*+?.([%-" | 249 | #define SPECIALS "^$*+?.([%-" |
236 | 250 | ||
@@ -488,6 +502,8 @@ static const char *match (MatchState *ms, const char *s, const char *p) { | |||
488 | s = NULL; /* fail */ | 502 | s = NULL; /* fail */ |
489 | } | 503 | } |
490 | else { /* matched once */ | 504 | else { /* matched once */ |
505 | if (ms->nrep-- == 0) | ||
506 | luaL_error(ms->L, "pattern too complex"); | ||
491 | switch (*ep) { /* handle optional suffix */ | 507 | switch (*ep) { /* handle optional suffix */ |
492 | case '?': { /* optional */ | 508 | case '?': { /* optional */ |
493 | const char *res; | 509 | const char *res; |
@@ -584,6 +600,26 @@ static int nospecials (const char *p, size_t l) { | |||
584 | } | 600 | } |
585 | 601 | ||
586 | 602 | ||
603 | static void prepstate (MatchState *ms, lua_State *L, | ||
604 | const char *s, size_t ls, const char *p, size_t lp) { | ||
605 | ms->L = L; | ||
606 | ms->matchdepth = MAXCCALLS; | ||
607 | ms->src_init = s; | ||
608 | ms->src_end = s + ls; | ||
609 | ms->p_end = p + lp; | ||
610 | if (ls < (MAX_SIZET - B_REPS) / A_REPS) | ||
611 | ms->nrep = A_REPS * ls + B_REPS; | ||
612 | else /* overflow (very long subject) */ | ||
613 | ms->nrep = MAX_SIZET; /* no limit */ | ||
614 | } | ||
615 | |||
616 | |||
617 | static void reprepstate (MatchState *ms) { | ||
618 | ms->level = 0; | ||
619 | lua_assert(ms->matchdepth == MAXCCALLS); | ||
620 | } | ||
621 | |||
622 | |||
587 | static int str_find_aux (lua_State *L, int find) { | 623 | static int str_find_aux (lua_State *L, int find) { |
588 | size_t ls, lp; | 624 | size_t ls, lp; |
589 | const char *s = luaL_checklstring(L, 1, &ls); | 625 | const char *s = luaL_checklstring(L, 1, &ls); |
@@ -611,15 +647,10 @@ static int str_find_aux (lua_State *L, int find) { | |||
611 | if (anchor) { | 647 | if (anchor) { |
612 | p++; lp--; /* skip anchor character */ | 648 | p++; lp--; /* skip anchor character */ |
613 | } | 649 | } |
614 | ms.L = L; | 650 | prepstate(&ms, L, s, ls, p, lp); |
615 | ms.matchdepth = MAXCCALLS; | ||
616 | ms.src_init = s; | ||
617 | ms.src_end = s + ls; | ||
618 | ms.p_end = p + lp; | ||
619 | do { | 651 | do { |
620 | const char *res; | 652 | const char *res; |
621 | ms.level = 0; | 653 | reprepstate(&ms); |
622 | lua_assert(ms.matchdepth == MAXCCALLS); | ||
623 | if ((res=match(&ms, s1, p)) != NULL) { | 654 | if ((res=match(&ms, s1, p)) != NULL) { |
624 | if (find) { | 655 | if (find) { |
625 | lua_pushinteger(L, (s1 - s) + 1); /* start */ | 656 | lua_pushinteger(L, (s1 - s) + 1); /* start */ |
@@ -652,17 +683,12 @@ static int gmatch_aux (lua_State *L) { | |||
652 | const char *s = lua_tolstring(L, lua_upvalueindex(1), &ls); | 683 | const char *s = lua_tolstring(L, lua_upvalueindex(1), &ls); |
653 | const char *p = lua_tolstring(L, lua_upvalueindex(2), &lp); | 684 | const char *p = lua_tolstring(L, lua_upvalueindex(2), &lp); |
654 | const char *src; | 685 | const char *src; |
655 | ms.L = L; | 686 | prepstate(&ms, L, s, ls, p, lp); |
656 | ms.matchdepth = MAXCCALLS; | ||
657 | ms.src_init = s; | ||
658 | ms.src_end = s+ls; | ||
659 | ms.p_end = p + lp; | ||
660 | for (src = s + (size_t)lua_tointeger(L, lua_upvalueindex(3)); | 687 | for (src = s + (size_t)lua_tointeger(L, lua_upvalueindex(3)); |
661 | src <= ms.src_end; | 688 | src <= ms.src_end; |
662 | src++) { | 689 | src++) { |
663 | const char *e; | 690 | const char *e; |
664 | ms.level = 0; | 691 | reprepstate(&ms); |
665 | lua_assert(ms.matchdepth == MAXCCALLS); | ||
666 | if ((e = match(&ms, src, p)) != NULL) { | 692 | if ((e = match(&ms, src, p)) != NULL) { |
667 | lua_Integer newstart = e-s; | 693 | lua_Integer newstart = e-s; |
668 | if (e == src) newstart++; /* empty match? go at least one position */ | 694 | if (e == src) newstart++; /* empty match? go at least one position */ |
@@ -761,17 +787,11 @@ static int str_gsub (lua_State *L) { | |||
761 | if (anchor) { | 787 | if (anchor) { |
762 | p++; lp--; /* skip anchor character */ | 788 | p++; lp--; /* skip anchor character */ |
763 | } | 789 | } |
764 | ms.L = L; | 790 | prepstate(&ms, L, src, srcl, p, lp); |
765 | ms.matchdepth = MAXCCALLS; | ||
766 | ms.src_init = src; | ||
767 | ms.src_end = src+srcl; | ||
768 | ms.p_end = p + lp; | ||
769 | while (n < max_s) { | 791 | while (n < max_s) { |
770 | const char *e; | 792 | const char *e; |
771 | ms.level = 0; | 793 | reprepstate(&ms); |
772 | lua_assert(ms.matchdepth == MAXCCALLS); | 794 | if ((e = match(&ms, src, p)) != NULL) { |
773 | e = match(&ms, src, p); | ||
774 | if (e) { | ||
775 | n++; | 795 | n++; |
776 | add_value(&ms, &b, src, e, tr); | 796 | add_value(&ms, &b, src, e, tr); |
777 | } | 797 | } |