aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/lib_string.c93
-rw-r--r--src/lj_str.c36
-rw-r--r--src/lj_str.h7
3 files changed, 73 insertions, 63 deletions
diff --git a/src/lib_string.c b/src/lib_string.c
index e460e834..ac21dda4 100644
--- a/src/lib_string.c
+++ b/src/lib_string.c
@@ -155,7 +155,6 @@ typedef struct MatchState {
155} MatchState; 155} MatchState;
156 156
157#define L_ESC '%' 157#define L_ESC '%'
158#define SPECIALS "^$*+?.([%-"
159 158
160static int check_capture(MatchState *ms, int l) 159static int check_capture(MatchState *ms, int l)
161{ 160{
@@ -422,30 +421,6 @@ static const char *match(MatchState *ms, const char *s, const char *p)
422 return s; 421 return s;
423} 422}
424 423
425static const char *lmemfind(const char *s1, size_t l1,
426 const char *s2, size_t l2)
427{
428 if (l2 == 0) {
429 return s1; /* empty strings are everywhere */
430 } else if (l2 > l1) {
431 return NULL; /* avoids a negative `l1' */
432 } else {
433 const char *init; /* to search for a `*s2' inside `s1' */
434 l2--; /* 1st char will be checked by `memchr' */
435 l1 = l1-l2; /* `s2' cannot be found after that */
436 while (l1 > 0 && (init = (const char *)memchr(s1, *s2, l1)) != NULL) {
437 init++; /* 1st char is already checked */
438 if (memcmp(init, s2+1, l2) == 0) {
439 return init-1;
440 } else { /* correct `l1' and `s1' to try again */
441 l1 -= (size_t)(init-s1);
442 s1 = init;
443 }
444 }
445 return NULL; /* not found */
446 }
447}
448
449static void push_onecapture(MatchState *ms, int i, const char *s, const char *e) 424static void push_onecapture(MatchState *ms, int i, const char *s, const char *e)
450{ 425{
451 if (i >= ms->level) { 426 if (i >= ms->level) {
@@ -473,60 +448,56 @@ static int push_captures(MatchState *ms, const char *s, const char *e)
473 return nlevels; /* number of strings pushed */ 448 return nlevels; /* number of strings pushed */
474} 449}
475 450
476static ptrdiff_t posrelat(ptrdiff_t pos, size_t len)
477{
478 /* relative string position: negative means back from end */
479 if (pos < 0) pos += (ptrdiff_t)len + 1;
480 return (pos >= 0) ? pos : 0;
481}
482
483static int str_find_aux(lua_State *L, int find) 451static int str_find_aux(lua_State *L, int find)
484{ 452{
485 size_t l1, l2; 453 GCstr *s = lj_lib_checkstr(L, 1);
486 const char *s = luaL_checklstring(L, 1, &l1); 454 GCstr *p = lj_lib_checkstr(L, 2);
487 const char *p = luaL_checklstring(L, 2, &l2); 455 int32_t start = lj_lib_optint(L, 3, 1);
488 ptrdiff_t init = posrelat(luaL_optinteger(L, 3, 1), l1) - 1; 456 MSize st;
489 if (init < 0) { 457 if (start < 0) start += (int32_t)s->len; else start--;
490 init = 0; 458 if (start < 0) start = 0;
491 } else if ((size_t)(init) > l1) { 459 st = (MSize)start;
460 if (st > s->len) {
492#if LJ_52 461#if LJ_52
493 setnilV(L->top-1); 462 setnilV(L->top-1);
494 return 1; 463 return 1;
495#else 464#else
496 init = (ptrdiff_t)l1; 465 st = s->len;
497#endif 466#endif
498 } 467 }
499 if (find && (lua_toboolean(L, 4) || /* explicit request? */ 468 if (find && ((L->base+3 < L->top && tvistruecond(L->base+3)) ||
500 strpbrk(p, SPECIALS) == NULL)) { /* or no special characters? */ 469 !lj_str_haspattern(p))) { /* Search for fixed string. */
501 /* do a plain search */ 470 const char *q = lj_str_find(strdata(s)+st, strdata(p), s->len-st, p->len);
502 const char *s2 = lmemfind(s+init, l1-(size_t)init, p, l2); 471 if (q) {
503 if (s2) { 472 setintV(L->top-2, (int32_t)(q-strdata(s)) + 1);
504 lua_pushinteger(L, s2-s+1); 473 setintV(L->top-1, (int32_t)(q-strdata(s)) + (int32_t)p->len);
505 lua_pushinteger(L, s2-s+(ptrdiff_t)l2);
506 return 2; 474 return 2;
507 } 475 }
508 } else { 476 } else { /* Search for pattern. */
509 MatchState ms; 477 MatchState ms;
510 int anchor = (*p == '^') ? (p++, 1) : 0; 478 const char *pstr = strdata(p);
511 const char *s1=s+init; 479 const char *sstr = strdata(s) + st;
480 int anchor = 0;
481 if (*pstr == '^') { pstr++; anchor = 1; }
512 ms.L = L; 482 ms.L = L;
513 ms.src_init = s; 483 ms.src_init = strdata(s);
514 ms.src_end = s+l1; 484 ms.src_end = strdata(s) + s->len;
515 do { 485 do { /* Loop through string and try to match the pattern. */
516 const char *res; 486 const char *q;
517 ms.level = ms.depth = 0; 487 ms.level = ms.depth = 0;
518 if ((res=match(&ms, s1, p)) != NULL) { 488 q = match(&ms, sstr, pstr);
489 if (q) {
519 if (find) { 490 if (find) {
520 lua_pushinteger(L, s1-s+1); /* start */ 491 setintV(L->top++, (int32_t)(sstr-(strdata(s)-1)));
521 lua_pushinteger(L, res-s); /* end */ 492 setintV(L->top++, (int32_t)(q-strdata(s)));
522 return push_captures(&ms, NULL, 0) + 2; 493 return push_captures(&ms, NULL, NULL) + 2;
523 } else { 494 } else {
524 return push_captures(&ms, s1, res); 495 return push_captures(&ms, sstr, q);
525 } 496 }
526 } 497 }
527 } while (s1++ < ms.src_end && !anchor); 498 } while (sstr++ < ms.src_end && !anchor);
528 } 499 }
529 lua_pushnil(L); /* not found */ 500 setnilV(L->top-1); /* Not found. */
530 return 1; 501 return 1;
531} 502}
532 503
diff --git a/src/lj_str.c b/src/lj_str.c
index 675213d7..f5bbae26 100644
--- a/src/lj_str.c
+++ b/src/lj_str.c
@@ -16,7 +16,7 @@
16#include "lj_state.h" 16#include "lj_state.h"
17#include "lj_char.h" 17#include "lj_char.h"
18 18
19/* -- String interning ---------------------------------------------------- */ 19/* -- String helpers ------------------------------------------------------ */
20 20
21/* Ordered compare of strings. Assumes string data is 4-byte aligned. */ 21/* Ordered compare of strings. Assumes string data is 4-byte aligned. */
22int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b) 22int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b)
@@ -62,6 +62,40 @@ static LJ_AINLINE int str_fastcmp(const char *a, const char *b, MSize len)
62 return 0; 62 return 0;
63} 63}
64 64
65/* Find fixed string p inside string s. */
66const char *lj_str_find(const char *s, const char *p, MSize slen, MSize plen)
67{
68 if (plen <= slen) {
69 if (plen == 0) {
70 return s;
71 } else {
72 int c = *(const uint8_t *)p++;
73 plen--; slen -= plen;
74 while (slen) {
75 const char *q = (const char *)memchr(s, c, slen);
76 if (!q) break;
77 if (memcmp(q+1, p, plen) == 0) return q;
78 q++; slen -= (MSize)(q-s); s = q;
79 }
80 }
81 }
82 return NULL;
83}
84
85/* Check whether a string has a pattern matching character. */
86int lj_str_haspattern(GCstr *s)
87{
88 const char *p = strdata(s), *q = p + s->len;
89 while (p < q) {
90 int c = *(const uint8_t *)p++;
91 if (lj_char_ispunct(c) && strchr("^$*+?.([%-", c))
92 return 1; /* Found a pattern matching char. */
93 }
94 return 0; /* No pattern matching chars found. */
95}
96
97/* -- String interning ---------------------------------------------------- */
98
65/* Resize the string hash table (grow and shrink). */ 99/* Resize the string hash table (grow and shrink). */
66void lj_str_resize(lua_State *L, MSize newmask) 100void lj_str_resize(lua_State *L, MSize newmask)
67{ 101{
diff --git a/src/lj_str.h b/src/lj_str.h
index bf508d3b..dd9b3d94 100644
--- a/src/lj_str.h
+++ b/src/lj_str.h
@@ -10,8 +10,13 @@
10 10
11#include "lj_obj.h" 11#include "lj_obj.h"
12 12
13/* String interning. */ 13/* String helpers. */
14LJ_FUNC int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b); 14LJ_FUNC int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b);
15LJ_FUNC const char *lj_str_find(const char *s, const char *f,
16 MSize slen, MSize flen);
17LJ_FUNC int lj_str_haspattern(GCstr *s);
18
19/* String interning. */
15LJ_FUNC void lj_str_resize(lua_State *L, MSize newmask); 20LJ_FUNC void lj_str_resize(lua_State *L, MSize newmask);
16LJ_FUNCA GCstr *lj_str_new(lua_State *L, const char *str, size_t len); 21LJ_FUNCA GCstr *lj_str_new(lua_State *L, const char *str, size_t len);
17LJ_FUNC void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s); 22LJ_FUNC void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s);