aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorMike Pall <mike>2012-08-28 21:22:23 +0200
committerMike Pall <mike>2012-08-28 21:22:23 +0200
commitff00a78f3ab2597a5b4113d247ca59d26538e752 (patch)
treeb7705ad6fb04780adc6cd63ace074288f83a50b3 /src
parent751cd9d82180f1bd99a738acc29bc114995a42e4 (diff)
downloadluajit-ff00a78f3ab2597a5b4113d247ca59d26538e752.tar.gz
luajit-ff00a78f3ab2597a5b4113d247ca59d26538e752.tar.bz2
luajit-ff00a78f3ab2597a5b4113d247ca59d26538e752.zip
Limit recursion depth in string.match() et al.
Diffstat (limited to 'src')
-rw-r--r--src/lib_string.c66
-rw-r--r--src/lj_errmsg.h1
2 files changed, 41 insertions, 26 deletions
diff --git a/src/lib_string.c b/src/lib_string.c
index d4a2b0d1..6d0b0a04 100644
--- a/src/lib_string.c
+++ b/src/lib_string.c
@@ -148,6 +148,7 @@ typedef struct MatchState {
148 const char *src_end; /* end (`\0') of source string */ 148 const char *src_end; /* end (`\0') of source string */
149 lua_State *L; 149 lua_State *L;
150 int level; /* total number of captures (finished or unfinished) */ 150 int level; /* total number of captures (finished or unfinished) */
151 int depth;
151 struct { 152 struct {
152 const char *init; 153 const char *init;
153 ptrdiff_t len; 154 ptrdiff_t len;
@@ -339,22 +340,26 @@ static const char *match_capture(MatchState *ms, const char *s, int l)
339 340
340static const char *match(MatchState *ms, const char *s, const char *p) 341static const char *match(MatchState *ms, const char *s, const char *p)
341{ 342{
343 if (++ms->depth > LJ_MAX_XLEVEL)
344 lj_err_caller(ms->L, LJ_ERR_STRPATX);
342 init: /* using goto's to optimize tail recursion */ 345 init: /* using goto's to optimize tail recursion */
343 switch (*p) { 346 switch (*p) {
344 case '(': /* start capture */ 347 case '(': /* start capture */
345 if (*(p+1) == ')') /* position capture? */ 348 if (*(p+1) == ')') /* position capture? */
346 return start_capture(ms, s, p+2, CAP_POSITION); 349 s = start_capture(ms, s, p+2, CAP_POSITION);
347 else 350 else
348 return start_capture(ms, s, p+1, CAP_UNFINISHED); 351 s = start_capture(ms, s, p+1, CAP_UNFINISHED);
352 break;
349 case ')': /* end capture */ 353 case ')': /* end capture */
350 return end_capture(ms, s, p+1); 354 s = end_capture(ms, s, p+1);
355 break;
351 case L_ESC: 356 case L_ESC:
352 switch (*(p+1)) { 357 switch (*(p+1)) {
353 case 'b': /* balanced string? */ 358 case 'b': /* balanced string? */
354 s = matchbalance(ms, s, p+2); 359 s = matchbalance(ms, s, p+2);
355 if (s == NULL) return NULL; 360 if (s == NULL) break;
356 p+=4; 361 p+=4;
357 goto init; /* else return match(ms, s, p+4); */ 362 goto init; /* else s = match(ms, s, p+4); */
358 case 'f': { /* frontier? */ 363 case 'f': { /* frontier? */
359 const char *ep; char previous; 364 const char *ep; char previous;
360 p += 2; 365 p += 2;
@@ -363,50 +368,59 @@ static const char *match(MatchState *ms, const char *s, const char *p)
363 ep = classend(ms, p); /* points to what is next */ 368 ep = classend(ms, p); /* points to what is next */
364 previous = (s == ms->src_init) ? '\0' : *(s-1); 369 previous = (s == ms->src_init) ? '\0' : *(s-1);
365 if (matchbracketclass(uchar(previous), p, ep-1) || 370 if (matchbracketclass(uchar(previous), p, ep-1) ||
366 !matchbracketclass(uchar(*s), p, ep-1)) return NULL; 371 !matchbracketclass(uchar(*s), p, ep-1)) { s = NULL; break; }
367 p=ep; 372 p=ep;
368 goto init; /* else return match(ms, s, ep); */ 373 goto init; /* else s = match(ms, s, ep); */
369 } 374 }
370 default: 375 default:
371 if (lj_char_isdigit(uchar(*(p+1)))) { /* capture results (%0-%9)? */ 376 if (lj_char_isdigit(uchar(*(p+1)))) { /* capture results (%0-%9)? */
372 s = match_capture(ms, s, uchar(*(p+1))); 377 s = match_capture(ms, s, uchar(*(p+1)));
373 if (s == NULL) return NULL; 378 if (s == NULL) break;
374 p+=2; 379 p+=2;
375 goto init; /* else return match(ms, s, p+2) */ 380 goto init; /* else s = match(ms, s, p+2) */
376 } 381 }
377 goto dflt; /* case default */ 382 goto dflt; /* case default */
378 } 383 }
384 break;
379 case '\0': /* end of pattern */ 385 case '\0': /* end of pattern */
380 return s; /* match succeeded */ 386 break; /* match succeeded */
381 case '$': 387 case '$':
382 if (*(p+1) == '\0') /* is the `$' the last char in pattern? */ 388 /* is the `$' the last char in pattern? */
383 return (s == ms->src_end) ? s : NULL; /* check end of string */ 389 if (*(p+1) != '\0') goto dflt;
384 else 390 if (s != ms->src_end) s = NULL; /* check end of string */
385 goto dflt; 391 break;
386 default: dflt: { /* it is a pattern item */ 392 default: dflt: { /* it is a pattern item */
387 const char *ep = classend(ms, p); /* points to what is next */ 393 const char *ep = classend(ms, p); /* points to what is next */
388 int m = s<ms->src_end && singlematch(uchar(*s), p, ep); 394 int m = s<ms->src_end && singlematch(uchar(*s), p, ep);
389 switch (*ep) { 395 switch (*ep) {
390 case '?': { /* optional */ 396 case '?': { /* optional */
391 const char *res; 397 const char *res;
392 if (m && ((res=match(ms, s+1, ep+1)) != NULL)) 398 if (m && ((res=match(ms, s+1, ep+1)) != NULL)) {
393 return res; 399 s = res;
400 break;
401 }
394 p=ep+1; 402 p=ep+1;
395 goto init; /* else return match(ms, s, ep+1); */ 403 goto init; /* else s = match(ms, s, ep+1); */
396 } 404 }
397 case '*': /* 0 or more repetitions */ 405 case '*': /* 0 or more repetitions */
398 return max_expand(ms, s, p, ep); 406 s = max_expand(ms, s, p, ep);
407 break;
399 case '+': /* 1 or more repetitions */ 408 case '+': /* 1 or more repetitions */
400 return (m ? max_expand(ms, s+1, p, ep) : NULL); 409 s = (m ? max_expand(ms, s+1, p, ep) : NULL);
410 break;
401 case '-': /* 0 or more repetitions (minimum) */ 411 case '-': /* 0 or more repetitions (minimum) */
402 return min_expand(ms, s, p, ep); 412 s = min_expand(ms, s, p, ep);
413 break;
403 default: 414 default:
404 if (!m) return NULL; 415 if (m) { s++; p=ep; goto init; } /* else s = match(ms, s+1, ep); */
405 s++; p=ep; 416 s = NULL;
406 goto init; /* else return match(ms, s+1, ep); */ 417 break;
407 } 418 }
419 break;
408 } 420 }
409 } 421 }
422 ms->depth--;
423 return s;
410} 424}
411 425
412static const char *lmemfind(const char *s1, size_t l1, 426static const char *lmemfind(const char *s1, size_t l1,
@@ -495,7 +509,7 @@ static int str_find_aux(lua_State *L, int find)
495 ms.src_end = s+l1; 509 ms.src_end = s+l1;
496 do { 510 do {
497 const char *res; 511 const char *res;
498 ms.level = 0; 512 ms.level = ms.depth = 0;
499 if ((res=match(&ms, s1, p)) != NULL) { 513 if ((res=match(&ms, s1, p)) != NULL) {
500 if (find) { 514 if (find) {
501 lua_pushinteger(L, s1-s+1); /* start */ 515 lua_pushinteger(L, s1-s+1); /* start */
@@ -534,7 +548,7 @@ LJLIB_NOREG LJLIB_CF(string_gmatch_aux)
534 ms.src_end = s + str->len; 548 ms.src_end = s + str->len;
535 for (; src <= ms.src_end; src++) { 549 for (; src <= ms.src_end; src++) {
536 const char *e; 550 const char *e;
537 ms.level = 0; 551 ms.level = ms.depth = 0;
538 if ((e = match(&ms, src, p)) != NULL) { 552 if ((e = match(&ms, src, p)) != NULL) {
539 int32_t pos = (int32_t)(e - s); 553 int32_t pos = (int32_t)(e - s);
540 if (e == src) pos++; /* Ensure progress for empty match. */ 554 if (e == src) pos++; /* Ensure progress for empty match. */
@@ -628,7 +642,7 @@ LJLIB_CF(string_gsub)
628 ms.src_end = src+srcl; 642 ms.src_end = src+srcl;
629 while (n < max_s) { 643 while (n < max_s) {
630 const char *e; 644 const char *e;
631 ms.level = 0; 645 ms.level = ms.depth = 0;
632 e = match(&ms, src, p); 646 e = match(&ms, src, p);
633 if (e) { 647 if (e) {
634 n++; 648 n++;
diff --git a/src/lj_errmsg.h b/src/lj_errmsg.h
index d59d76a2..5b10dea8 100644
--- a/src/lj_errmsg.h
+++ b/src/lj_errmsg.h
@@ -91,6 +91,7 @@ ERRDEF(STRPATC, "invalid pattern capture")
91ERRDEF(STRPATE, "malformed pattern (ends with " LUA_QL("%") ")") 91ERRDEF(STRPATE, "malformed pattern (ends with " LUA_QL("%") ")")
92ERRDEF(STRPATM, "malformed pattern (missing " LUA_QL("]") ")") 92ERRDEF(STRPATM, "malformed pattern (missing " LUA_QL("]") ")")
93ERRDEF(STRPATU, "unbalanced pattern") 93ERRDEF(STRPATU, "unbalanced pattern")
94ERRDEF(STRPATX, "pattern too complex")
94ERRDEF(STRCAPI, "invalid capture index") 95ERRDEF(STRCAPI, "invalid capture index")
95ERRDEF(STRCAPN, "too many captures") 96ERRDEF(STRCAPN, "too many captures")
96ERRDEF(STRCAPU, "unfinished capture") 97ERRDEF(STRCAPU, "unfinished capture")