diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/lib_string.c | 93 | ||||
| -rw-r--r-- | src/lj_str.c | 36 | ||||
| -rw-r--r-- | src/lj_str.h | 7 |
3 files changed, 73 insertions, 63 deletions
diff --git a/src/lib_string.c b/src/lib_string.c index e460e834..ac21dda4 100644 --- a/src/lib_string.c +++ b/src/lib_string.c | |||
| @@ -155,7 +155,6 @@ typedef struct MatchState { | |||
| 155 | } MatchState; | 155 | } MatchState; |
| 156 | 156 | ||
| 157 | #define L_ESC '%' | 157 | #define L_ESC '%' |
| 158 | #define SPECIALS "^$*+?.([%-" | ||
| 159 | 158 | ||
| 160 | static int check_capture(MatchState *ms, int l) | 159 | static int check_capture(MatchState *ms, int l) |
| 161 | { | 160 | { |
| @@ -422,30 +421,6 @@ static const char *match(MatchState *ms, const char *s, const char *p) | |||
| 422 | return s; | 421 | return s; |
| 423 | } | 422 | } |
| 424 | 423 | ||
| 425 | static const char *lmemfind(const char *s1, size_t l1, | ||
| 426 | const char *s2, size_t l2) | ||
| 427 | { | ||
| 428 | if (l2 == 0) { | ||
| 429 | return s1; /* empty strings are everywhere */ | ||
| 430 | } else if (l2 > l1) { | ||
| 431 | return NULL; /* avoids a negative `l1' */ | ||
| 432 | } else { | ||
| 433 | const char *init; /* to search for a `*s2' inside `s1' */ | ||
| 434 | l2--; /* 1st char will be checked by `memchr' */ | ||
| 435 | l1 = l1-l2; /* `s2' cannot be found after that */ | ||
| 436 | while (l1 > 0 && (init = (const char *)memchr(s1, *s2, l1)) != NULL) { | ||
| 437 | init++; /* 1st char is already checked */ | ||
| 438 | if (memcmp(init, s2+1, l2) == 0) { | ||
| 439 | return init-1; | ||
| 440 | } else { /* correct `l1' and `s1' to try again */ | ||
| 441 | l1 -= (size_t)(init-s1); | ||
| 442 | s1 = init; | ||
| 443 | } | ||
| 444 | } | ||
| 445 | return NULL; /* not found */ | ||
| 446 | } | ||
| 447 | } | ||
| 448 | |||
| 449 | static void push_onecapture(MatchState *ms, int i, const char *s, const char *e) | 424 | static void push_onecapture(MatchState *ms, int i, const char *s, const char *e) |
| 450 | { | 425 | { |
| 451 | if (i >= ms->level) { | 426 | if (i >= ms->level) { |
| @@ -473,60 +448,56 @@ static int push_captures(MatchState *ms, const char *s, const char *e) | |||
| 473 | return nlevels; /* number of strings pushed */ | 448 | return nlevels; /* number of strings pushed */ |
| 474 | } | 449 | } |
| 475 | 450 | ||
| 476 | static ptrdiff_t posrelat(ptrdiff_t pos, size_t len) | ||
| 477 | { | ||
| 478 | /* relative string position: negative means back from end */ | ||
| 479 | if (pos < 0) pos += (ptrdiff_t)len + 1; | ||
| 480 | return (pos >= 0) ? pos : 0; | ||
| 481 | } | ||
| 482 | |||
| 483 | static int str_find_aux(lua_State *L, int find) | 451 | static int str_find_aux(lua_State *L, int find) |
| 484 | { | 452 | { |
| 485 | size_t l1, l2; | 453 | GCstr *s = lj_lib_checkstr(L, 1); |
| 486 | const char *s = luaL_checklstring(L, 1, &l1); | 454 | GCstr *p = lj_lib_checkstr(L, 2); |
| 487 | const char *p = luaL_checklstring(L, 2, &l2); | 455 | int32_t start = lj_lib_optint(L, 3, 1); |
| 488 | ptrdiff_t init = posrelat(luaL_optinteger(L, 3, 1), l1) - 1; | 456 | MSize st; |
| 489 | if (init < 0) { | 457 | if (start < 0) start += (int32_t)s->len; else start--; |
| 490 | init = 0; | 458 | if (start < 0) start = 0; |
| 491 | } else if ((size_t)(init) > l1) { | 459 | st = (MSize)start; |
| 460 | if (st > s->len) { | ||
| 492 | #if LJ_52 | 461 | #if LJ_52 |
| 493 | setnilV(L->top-1); | 462 | setnilV(L->top-1); |
| 494 | return 1; | 463 | return 1; |
| 495 | #else | 464 | #else |
| 496 | init = (ptrdiff_t)l1; | 465 | st = s->len; |
| 497 | #endif | 466 | #endif |
| 498 | } | 467 | } |
| 499 | if (find && (lua_toboolean(L, 4) || /* explicit request? */ | 468 | if (find && ((L->base+3 < L->top && tvistruecond(L->base+3)) || |
| 500 | strpbrk(p, SPECIALS) == NULL)) { /* or no special characters? */ | 469 | !lj_str_haspattern(p))) { /* Search for fixed string. */ |
| 501 | /* do a plain search */ | 470 | const char *q = lj_str_find(strdata(s)+st, strdata(p), s->len-st, p->len); |
| 502 | const char *s2 = lmemfind(s+init, l1-(size_t)init, p, l2); | 471 | if (q) { |
| 503 | if (s2) { | 472 | setintV(L->top-2, (int32_t)(q-strdata(s)) + 1); |
| 504 | lua_pushinteger(L, s2-s+1); | 473 | setintV(L->top-1, (int32_t)(q-strdata(s)) + (int32_t)p->len); |
| 505 | lua_pushinteger(L, s2-s+(ptrdiff_t)l2); | ||
| 506 | return 2; | 474 | return 2; |
| 507 | } | 475 | } |
| 508 | } else { | 476 | } else { /* Search for pattern. */ |
| 509 | MatchState ms; | 477 | MatchState ms; |
| 510 | int anchor = (*p == '^') ? (p++, 1) : 0; | 478 | const char *pstr = strdata(p); |
| 511 | const char *s1=s+init; | 479 | const char *sstr = strdata(s) + st; |
| 480 | int anchor = 0; | ||
| 481 | if (*pstr == '^') { pstr++; anchor = 1; } | ||
| 512 | ms.L = L; | 482 | ms.L = L; |
| 513 | ms.src_init = s; | 483 | ms.src_init = strdata(s); |
| 514 | ms.src_end = s+l1; | 484 | ms.src_end = strdata(s) + s->len; |
| 515 | do { | 485 | do { /* Loop through string and try to match the pattern. */ |
| 516 | const char *res; | 486 | const char *q; |
| 517 | ms.level = ms.depth = 0; | 487 | ms.level = ms.depth = 0; |
| 518 | if ((res=match(&ms, s1, p)) != NULL) { | 488 | q = match(&ms, sstr, pstr); |
| 489 | if (q) { | ||
| 519 | if (find) { | 490 | if (find) { |
| 520 | lua_pushinteger(L, s1-s+1); /* start */ | 491 | setintV(L->top++, (int32_t)(sstr-(strdata(s)-1))); |
| 521 | lua_pushinteger(L, res-s); /* end */ | 492 | setintV(L->top++, (int32_t)(q-strdata(s))); |
| 522 | return push_captures(&ms, NULL, 0) + 2; | 493 | return push_captures(&ms, NULL, NULL) + 2; |
| 523 | } else { | 494 | } else { |
| 524 | return push_captures(&ms, s1, res); | 495 | return push_captures(&ms, sstr, q); |
| 525 | } | 496 | } |
| 526 | } | 497 | } |
| 527 | } while (s1++ < ms.src_end && !anchor); | 498 | } while (sstr++ < ms.src_end && !anchor); |
| 528 | } | 499 | } |
| 529 | lua_pushnil(L); /* not found */ | 500 | setnilV(L->top-1); /* Not found. */ |
| 530 | return 1; | 501 | return 1; |
| 531 | } | 502 | } |
| 532 | 503 | ||
diff --git a/src/lj_str.c b/src/lj_str.c index 675213d7..f5bbae26 100644 --- a/src/lj_str.c +++ b/src/lj_str.c | |||
| @@ -16,7 +16,7 @@ | |||
| 16 | #include "lj_state.h" | 16 | #include "lj_state.h" |
| 17 | #include "lj_char.h" | 17 | #include "lj_char.h" |
| 18 | 18 | ||
| 19 | /* -- String interning ---------------------------------------------------- */ | 19 | /* -- String helpers ------------------------------------------------------ */ |
| 20 | 20 | ||
| 21 | /* Ordered compare of strings. Assumes string data is 4-byte aligned. */ | 21 | /* Ordered compare of strings. Assumes string data is 4-byte aligned. */ |
| 22 | int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b) | 22 | int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b) |
| @@ -62,6 +62,40 @@ static LJ_AINLINE int str_fastcmp(const char *a, const char *b, MSize len) | |||
| 62 | return 0; | 62 | return 0; |
| 63 | } | 63 | } |
| 64 | 64 | ||
| 65 | /* Find fixed string p inside string s. */ | ||
| 66 | const char *lj_str_find(const char *s, const char *p, MSize slen, MSize plen) | ||
| 67 | { | ||
| 68 | if (plen <= slen) { | ||
| 69 | if (plen == 0) { | ||
| 70 | return s; | ||
| 71 | } else { | ||
| 72 | int c = *(const uint8_t *)p++; | ||
| 73 | plen--; slen -= plen; | ||
| 74 | while (slen) { | ||
| 75 | const char *q = (const char *)memchr(s, c, slen); | ||
| 76 | if (!q) break; | ||
| 77 | if (memcmp(q+1, p, plen) == 0) return q; | ||
| 78 | q++; slen -= (MSize)(q-s); s = q; | ||
| 79 | } | ||
| 80 | } | ||
| 81 | } | ||
| 82 | return NULL; | ||
| 83 | } | ||
| 84 | |||
| 85 | /* Check whether a string has a pattern matching character. */ | ||
| 86 | int lj_str_haspattern(GCstr *s) | ||
| 87 | { | ||
| 88 | const char *p = strdata(s), *q = p + s->len; | ||
| 89 | while (p < q) { | ||
| 90 | int c = *(const uint8_t *)p++; | ||
| 91 | if (lj_char_ispunct(c) && strchr("^$*+?.([%-", c)) | ||
| 92 | return 1; /* Found a pattern matching char. */ | ||
| 93 | } | ||
| 94 | return 0; /* No pattern matching chars found. */ | ||
| 95 | } | ||
| 96 | |||
| 97 | /* -- String interning ---------------------------------------------------- */ | ||
| 98 | |||
| 65 | /* Resize the string hash table (grow and shrink). */ | 99 | /* Resize the string hash table (grow and shrink). */ |
| 66 | void lj_str_resize(lua_State *L, MSize newmask) | 100 | void lj_str_resize(lua_State *L, MSize newmask) |
| 67 | { | 101 | { |
diff --git a/src/lj_str.h b/src/lj_str.h index bf508d3b..dd9b3d94 100644 --- a/src/lj_str.h +++ b/src/lj_str.h | |||
| @@ -10,8 +10,13 @@ | |||
| 10 | 10 | ||
| 11 | #include "lj_obj.h" | 11 | #include "lj_obj.h" |
| 12 | 12 | ||
| 13 | /* String interning. */ | 13 | /* String helpers. */ |
| 14 | LJ_FUNC int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b); | 14 | LJ_FUNC int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b); |
| 15 | LJ_FUNC const char *lj_str_find(const char *s, const char *f, | ||
| 16 | MSize slen, MSize flen); | ||
| 17 | LJ_FUNC int lj_str_haspattern(GCstr *s); | ||
| 18 | |||
| 19 | /* String interning. */ | ||
| 15 | LJ_FUNC void lj_str_resize(lua_State *L, MSize newmask); | 20 | LJ_FUNC void lj_str_resize(lua_State *L, MSize newmask); |
| 16 | LJ_FUNCA GCstr *lj_str_new(lua_State *L, const char *str, size_t len); | 21 | LJ_FUNCA GCstr *lj_str_new(lua_State *L, const char *str, size_t len); |
| 17 | LJ_FUNC void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s); | 22 | LJ_FUNC void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s); |
