diff options
author | Mike Pall <mike> | 2013-05-07 20:44:58 +0200 |
---|---|---|
committer | Mike Pall <mike> | 2013-05-07 20:44:58 +0200 |
commit | 43de451d7876c383a747c08f42424520ddcb74fa (patch) | |
tree | 01f5c6d16192fa3a571f17f2531fc4d57c9f2808 | |
parent | 2be1c2658f3d4dd39194102bd502b01bd4fa883c (diff) | |
download | luajit-43de451d7876c383a747c08f42424520ddcb74fa.tar.gz luajit-43de451d7876c383a747c08f42424520ddcb74fa.tar.bz2 luajit-43de451d7876c383a747c08f42424520ddcb74fa.zip |
Partially refactor string.find().
-rw-r--r-- | src/lib_string.c | 93 | ||||
-rw-r--r-- | src/lj_str.c | 36 | ||||
-rw-r--r-- | src/lj_str.h | 7 |
3 files changed, 73 insertions, 63 deletions
diff --git a/src/lib_string.c b/src/lib_string.c index e460e834..ac21dda4 100644 --- a/src/lib_string.c +++ b/src/lib_string.c | |||
@@ -155,7 +155,6 @@ typedef struct MatchState { | |||
155 | } MatchState; | 155 | } MatchState; |
156 | 156 | ||
157 | #define L_ESC '%' | 157 | #define L_ESC '%' |
158 | #define SPECIALS "^$*+?.([%-" | ||
159 | 158 | ||
160 | static int check_capture(MatchState *ms, int l) | 159 | static int check_capture(MatchState *ms, int l) |
161 | { | 160 | { |
@@ -422,30 +421,6 @@ static const char *match(MatchState *ms, const char *s, const char *p) | |||
422 | return s; | 421 | return s; |
423 | } | 422 | } |
424 | 423 | ||
425 | static const char *lmemfind(const char *s1, size_t l1, | ||
426 | const char *s2, size_t l2) | ||
427 | { | ||
428 | if (l2 == 0) { | ||
429 | return s1; /* empty strings are everywhere */ | ||
430 | } else if (l2 > l1) { | ||
431 | return NULL; /* avoids a negative `l1' */ | ||
432 | } else { | ||
433 | const char *init; /* to search for a `*s2' inside `s1' */ | ||
434 | l2--; /* 1st char will be checked by `memchr' */ | ||
435 | l1 = l1-l2; /* `s2' cannot be found after that */ | ||
436 | while (l1 > 0 && (init = (const char *)memchr(s1, *s2, l1)) != NULL) { | ||
437 | init++; /* 1st char is already checked */ | ||
438 | if (memcmp(init, s2+1, l2) == 0) { | ||
439 | return init-1; | ||
440 | } else { /* correct `l1' and `s1' to try again */ | ||
441 | l1 -= (size_t)(init-s1); | ||
442 | s1 = init; | ||
443 | } | ||
444 | } | ||
445 | return NULL; /* not found */ | ||
446 | } | ||
447 | } | ||
448 | |||
449 | static void push_onecapture(MatchState *ms, int i, const char *s, const char *e) | 424 | static void push_onecapture(MatchState *ms, int i, const char *s, const char *e) |
450 | { | 425 | { |
451 | if (i >= ms->level) { | 426 | if (i >= ms->level) { |
@@ -473,60 +448,56 @@ static int push_captures(MatchState *ms, const char *s, const char *e) | |||
473 | return nlevels; /* number of strings pushed */ | 448 | return nlevels; /* number of strings pushed */ |
474 | } | 449 | } |
475 | 450 | ||
476 | static ptrdiff_t posrelat(ptrdiff_t pos, size_t len) | ||
477 | { | ||
478 | /* relative string position: negative means back from end */ | ||
479 | if (pos < 0) pos += (ptrdiff_t)len + 1; | ||
480 | return (pos >= 0) ? pos : 0; | ||
481 | } | ||
482 | |||
483 | static int str_find_aux(lua_State *L, int find) | 451 | static int str_find_aux(lua_State *L, int find) |
484 | { | 452 | { |
485 | size_t l1, l2; | 453 | GCstr *s = lj_lib_checkstr(L, 1); |
486 | const char *s = luaL_checklstring(L, 1, &l1); | 454 | GCstr *p = lj_lib_checkstr(L, 2); |
487 | const char *p = luaL_checklstring(L, 2, &l2); | 455 | int32_t start = lj_lib_optint(L, 3, 1); |
488 | ptrdiff_t init = posrelat(luaL_optinteger(L, 3, 1), l1) - 1; | 456 | MSize st; |
489 | if (init < 0) { | 457 | if (start < 0) start += (int32_t)s->len; else start--; |
490 | init = 0; | 458 | if (start < 0) start = 0; |
491 | } else if ((size_t)(init) > l1) { | 459 | st = (MSize)start; |
460 | if (st > s->len) { | ||
492 | #if LJ_52 | 461 | #if LJ_52 |
493 | setnilV(L->top-1); | 462 | setnilV(L->top-1); |
494 | return 1; | 463 | return 1; |
495 | #else | 464 | #else |
496 | init = (ptrdiff_t)l1; | 465 | st = s->len; |
497 | #endif | 466 | #endif |
498 | } | 467 | } |
499 | if (find && (lua_toboolean(L, 4) || /* explicit request? */ | 468 | if (find && ((L->base+3 < L->top && tvistruecond(L->base+3)) || |
500 | strpbrk(p, SPECIALS) == NULL)) { /* or no special characters? */ | 469 | !lj_str_haspattern(p))) { /* Search for fixed string. */ |
501 | /* do a plain search */ | 470 | const char *q = lj_str_find(strdata(s)+st, strdata(p), s->len-st, p->len); |
502 | const char *s2 = lmemfind(s+init, l1-(size_t)init, p, l2); | 471 | if (q) { |
503 | if (s2) { | 472 | setintV(L->top-2, (int32_t)(q-strdata(s)) + 1); |
504 | lua_pushinteger(L, s2-s+1); | 473 | setintV(L->top-1, (int32_t)(q-strdata(s)) + (int32_t)p->len); |
505 | lua_pushinteger(L, s2-s+(ptrdiff_t)l2); | ||
506 | return 2; | 474 | return 2; |
507 | } | 475 | } |
508 | } else { | 476 | } else { /* Search for pattern. */ |
509 | MatchState ms; | 477 | MatchState ms; |
510 | int anchor = (*p == '^') ? (p++, 1) : 0; | 478 | const char *pstr = strdata(p); |
511 | const char *s1=s+init; | 479 | const char *sstr = strdata(s) + st; |
480 | int anchor = 0; | ||
481 | if (*pstr == '^') { pstr++; anchor = 1; } | ||
512 | ms.L = L; | 482 | ms.L = L; |
513 | ms.src_init = s; | 483 | ms.src_init = strdata(s); |
514 | ms.src_end = s+l1; | 484 | ms.src_end = strdata(s) + s->len; |
515 | do { | 485 | do { /* Loop through string and try to match the pattern. */ |
516 | const char *res; | 486 | const char *q; |
517 | ms.level = ms.depth = 0; | 487 | ms.level = ms.depth = 0; |
518 | if ((res=match(&ms, s1, p)) != NULL) { | 488 | q = match(&ms, sstr, pstr); |
489 | if (q) { | ||
519 | if (find) { | 490 | if (find) { |
520 | lua_pushinteger(L, s1-s+1); /* start */ | 491 | setintV(L->top++, (int32_t)(sstr-(strdata(s)-1))); |
521 | lua_pushinteger(L, res-s); /* end */ | 492 | setintV(L->top++, (int32_t)(q-strdata(s))); |
522 | return push_captures(&ms, NULL, 0) + 2; | 493 | return push_captures(&ms, NULL, NULL) + 2; |
523 | } else { | 494 | } else { |
524 | return push_captures(&ms, s1, res); | 495 | return push_captures(&ms, sstr, q); |
525 | } | 496 | } |
526 | } | 497 | } |
527 | } while (s1++ < ms.src_end && !anchor); | 498 | } while (sstr++ < ms.src_end && !anchor); |
528 | } | 499 | } |
529 | lua_pushnil(L); /* not found */ | 500 | setnilV(L->top-1); /* Not found. */ |
530 | return 1; | 501 | return 1; |
531 | } | 502 | } |
532 | 503 | ||
diff --git a/src/lj_str.c b/src/lj_str.c index 675213d7..f5bbae26 100644 --- a/src/lj_str.c +++ b/src/lj_str.c | |||
@@ -16,7 +16,7 @@ | |||
16 | #include "lj_state.h" | 16 | #include "lj_state.h" |
17 | #include "lj_char.h" | 17 | #include "lj_char.h" |
18 | 18 | ||
19 | /* -- String interning ---------------------------------------------------- */ | 19 | /* -- String helpers ------------------------------------------------------ */ |
20 | 20 | ||
21 | /* Ordered compare of strings. Assumes string data is 4-byte aligned. */ | 21 | /* Ordered compare of strings. Assumes string data is 4-byte aligned. */ |
22 | int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b) | 22 | int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b) |
@@ -62,6 +62,40 @@ static LJ_AINLINE int str_fastcmp(const char *a, const char *b, MSize len) | |||
62 | return 0; | 62 | return 0; |
63 | } | 63 | } |
64 | 64 | ||
65 | /* Find fixed string p inside string s. */ | ||
66 | const char *lj_str_find(const char *s, const char *p, MSize slen, MSize plen) | ||
67 | { | ||
68 | if (plen <= slen) { | ||
69 | if (plen == 0) { | ||
70 | return s; | ||
71 | } else { | ||
72 | int c = *(const uint8_t *)p++; | ||
73 | plen--; slen -= plen; | ||
74 | while (slen) { | ||
75 | const char *q = (const char *)memchr(s, c, slen); | ||
76 | if (!q) break; | ||
77 | if (memcmp(q+1, p, plen) == 0) return q; | ||
78 | q++; slen -= (MSize)(q-s); s = q; | ||
79 | } | ||
80 | } | ||
81 | } | ||
82 | return NULL; | ||
83 | } | ||
84 | |||
85 | /* Check whether a string has a pattern matching character. */ | ||
86 | int lj_str_haspattern(GCstr *s) | ||
87 | { | ||
88 | const char *p = strdata(s), *q = p + s->len; | ||
89 | while (p < q) { | ||
90 | int c = *(const uint8_t *)p++; | ||
91 | if (lj_char_ispunct(c) && strchr("^$*+?.([%-", c)) | ||
92 | return 1; /* Found a pattern matching char. */ | ||
93 | } | ||
94 | return 0; /* No pattern matching chars found. */ | ||
95 | } | ||
96 | |||
97 | /* -- String interning ---------------------------------------------------- */ | ||
98 | |||
65 | /* Resize the string hash table (grow and shrink). */ | 99 | /* Resize the string hash table (grow and shrink). */ |
66 | void lj_str_resize(lua_State *L, MSize newmask) | 100 | void lj_str_resize(lua_State *L, MSize newmask) |
67 | { | 101 | { |
diff --git a/src/lj_str.h b/src/lj_str.h index bf508d3b..dd9b3d94 100644 --- a/src/lj_str.h +++ b/src/lj_str.h | |||
@@ -10,8 +10,13 @@ | |||
10 | 10 | ||
11 | #include "lj_obj.h" | 11 | #include "lj_obj.h" |
12 | 12 | ||
13 | /* String interning. */ | 13 | /* String helpers. */ |
14 | LJ_FUNC int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b); | 14 | LJ_FUNC int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b); |
15 | LJ_FUNC const char *lj_str_find(const char *s, const char *f, | ||
16 | MSize slen, MSize flen); | ||
17 | LJ_FUNC int lj_str_haspattern(GCstr *s); | ||
18 | |||
19 | /* String interning. */ | ||
15 | LJ_FUNC void lj_str_resize(lua_State *L, MSize newmask); | 20 | LJ_FUNC void lj_str_resize(lua_State *L, MSize newmask); |
16 | LJ_FUNCA GCstr *lj_str_new(lua_State *L, const char *str, size_t len); | 21 | LJ_FUNCA GCstr *lj_str_new(lua_State *L, const char *str, size_t len); |
17 | LJ_FUNC void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s); | 22 | LJ_FUNC void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s); |