diff options
| -rw-r--r-- | lpcap.c | 26 | ||||
| -rw-r--r-- | lpcap.h | 14 | ||||
| -rw-r--r-- | lpprint.c | 27 | ||||
| -rw-r--r-- | lpprint.h | 2 | ||||
| -rw-r--r-- | lptree.c | 1 | ||||
| -rw-r--r-- | lpvm.c | 20 |
6 files changed, 60 insertions, 30 deletions
| @@ -10,7 +10,7 @@ | |||
| 10 | 10 | ||
| 11 | #define isclosecap(cap) (captype(cap) == Cclose) | 11 | #define isclosecap(cap) (captype(cap) == Cclose) |
| 12 | 12 | ||
| 13 | #define closeaddr(c) ((c)->s + (c)->siz - 1) | 13 | #define closeaddr(cs,c) ((cs)->s + (c)->index + (c)->siz - 1) |
| 14 | 14 | ||
| 15 | #define isfullcap(cap) ((cap)->siz != 0) | 15 | #define isfullcap(cap) ((cap)->siz != 0) |
| 16 | 16 | ||
| @@ -82,7 +82,8 @@ static void nextcap (CapState *cs) { | |||
| 82 | static int pushnestedvalues (CapState *cs, int addextra) { | 82 | static int pushnestedvalues (CapState *cs, int addextra) { |
| 83 | Capture *co = cs->cap; | 83 | Capture *co = cs->cap; |
| 84 | if (isfullcap(cs->cap++)) { /* no nested captures? */ | 84 | if (isfullcap(cs->cap++)) { /* no nested captures? */ |
| 85 | lua_pushlstring(cs->L, co->s, co->siz - 1); /* push whole match */ | 85 | /* push whole match */ |
| 86 | lua_pushlstring(cs->L, cs->s + co->index, co->siz - 1); | ||
| 86 | return 1; /* that is it */ | 87 | return 1; /* that is it */ |
| 87 | } | 88 | } |
| 88 | else { | 89 | else { |
| @@ -90,7 +91,8 @@ static int pushnestedvalues (CapState *cs, int addextra) { | |||
| 90 | while (!isclosecap(cs->cap)) /* repeat for all nested patterns */ | 91 | while (!isclosecap(cs->cap)) /* repeat for all nested patterns */ |
| 91 | n += pushcapture(cs); | 92 | n += pushcapture(cs); |
| 92 | if (addextra || n == 0) { /* need extra? */ | 93 | if (addextra || n == 0) { /* need extra? */ |
| 93 | lua_pushlstring(cs->L, co->s, cs->cap->s - co->s); /* push whole match */ | 94 | /* push whole match */ |
| 95 | lua_pushlstring(cs->L, cs->s + co->index, cs->cap->index - co->index); | ||
| 94 | n++; | 96 | n++; |
| 95 | } | 97 | } |
| 96 | cs->cap++; /* skip close entry */ | 98 | cs->cap++; /* skip close entry */ |
| @@ -295,7 +297,7 @@ int runtimecap (CapState *cs, Capture *close, const char *s, int *rem) { | |||
| 295 | assert(captype(open) == Cgroup); | 297 | assert(captype(open) == Cgroup); |
| 296 | id = finddyncap(open, close); /* get first dynamic capture argument */ | 298 | id = finddyncap(open, close); /* get first dynamic capture argument */ |
| 297 | close->kind = Cclose; /* closes the group */ | 299 | close->kind = Cclose; /* closes the group */ |
| 298 | close->s = s; | 300 | close->index = s - cs->s; |
| 299 | cs->cap = open; cs->valuecached = 0; /* prepare capture state */ | 301 | cs->cap = open; cs->valuecached = 0; /* prepare capture state */ |
| 300 | luaL_checkstack(L, 4, "too many runtime captures"); | 302 | luaL_checkstack(L, 4, "too many runtime captures"); |
| 301 | pushluaval(cs); /* push function to be called */ | 303 | pushluaval(cs); /* push function to be called */ |
| @@ -342,7 +344,7 @@ typedef struct StrAux { | |||
| 342 | static int getstrcaps (CapState *cs, StrAux *cps, int n) { | 344 | static int getstrcaps (CapState *cs, StrAux *cps, int n) { |
| 343 | int k = n++; | 345 | int k = n++; |
| 344 | cps[k].isstring = 1; /* get string value */ | 346 | cps[k].isstring = 1; /* get string value */ |
| 345 | cps[k].u.s.s = cs->cap->s; /* starts here */ | 347 | cps[k].u.s.s = cs->s + cs->cap->index; /* starts here */ |
| 346 | if (!isfullcap(cs->cap++)) { /* nested captures? */ | 348 | if (!isfullcap(cs->cap++)) { /* nested captures? */ |
| 347 | while (!isclosecap(cs->cap)) { /* traverse them */ | 349 | while (!isclosecap(cs->cap)) { /* traverse them */ |
| 348 | if (n >= MAXSTRCAPS) /* too many captures? */ | 350 | if (n >= MAXSTRCAPS) /* too many captures? */ |
| @@ -358,7 +360,7 @@ static int getstrcaps (CapState *cs, StrAux *cps, int n) { | |||
| 358 | } | 360 | } |
| 359 | cs->cap++; /* skip close */ | 361 | cs->cap++; /* skip close */ |
| 360 | } | 362 | } |
| 361 | cps[k].u.s.e = closeaddr(cs->cap - 1); /* ends here */ | 363 | cps[k].u.s.e = closeaddr(cs, cs->cap - 1); /* ends here */ |
| 362 | return n; | 364 | return n; |
| 363 | } | 365 | } |
| 364 | 366 | ||
| @@ -407,20 +409,21 @@ static void stringcap (luaL_Buffer *b, CapState *cs) { | |||
| 407 | ** Substitution capture: add result to buffer 'b' | 409 | ** Substitution capture: add result to buffer 'b' |
| 408 | */ | 410 | */ |
| 409 | static void substcap (luaL_Buffer *b, CapState *cs) { | 411 | static void substcap (luaL_Buffer *b, CapState *cs) { |
| 410 | const char *curr = cs->cap->s; | 412 | const char *curr = cs->s + cs->cap->index; |
| 411 | if (isfullcap(cs->cap)) /* no nested captures? */ | 413 | if (isfullcap(cs->cap)) /* no nested captures? */ |
| 412 | luaL_addlstring(b, curr, cs->cap->siz - 1); /* keep original text */ | 414 | luaL_addlstring(b, curr, cs->cap->siz - 1); /* keep original text */ |
| 413 | else { | 415 | else { |
| 414 | cs->cap++; /* skip open entry */ | 416 | cs->cap++; /* skip open entry */ |
| 415 | while (!isclosecap(cs->cap)) { /* traverse nested captures */ | 417 | while (!isclosecap(cs->cap)) { /* traverse nested captures */ |
| 416 | const char *next = cs->cap->s; | 418 | const char *next = cs->s + cs->cap->index; |
| 417 | luaL_addlstring(b, curr, next - curr); /* add text up to capture */ | 419 | luaL_addlstring(b, curr, next - curr); /* add text up to capture */ |
| 418 | if (addonestring(b, cs, "replacement")) | 420 | if (addonestring(b, cs, "replacement")) |
| 419 | curr = closeaddr(cs->cap - 1); /* continue after match */ | 421 | curr = closeaddr(cs, cs->cap - 1); /* continue after match */ |
| 420 | else /* no capture value */ | 422 | else /* no capture value */ |
| 421 | curr = next; /* keep original text in final result */ | 423 | curr = next; /* keep original text in final result */ |
| 422 | } | 424 | } |
| 423 | luaL_addlstring(b, curr, cs->cap->s - curr); /* add last piece of text */ | 425 | /* add last piece of text */ |
| 426 | luaL_addlstring(b, curr, cs->s + cs->cap->index - curr); | ||
| 424 | } | 427 | } |
| 425 | cs->cap++; /* go to next capture */ | 428 | cs->cap++; /* go to next capture */ |
| 426 | } | 429 | } |
| @@ -473,7 +476,7 @@ static int pushcapture (CapState *cs) { | |||
| 473 | return luaL_error(L, "subcapture nesting too deep"); | 476 | return luaL_error(L, "subcapture nesting too deep"); |
| 474 | switch (captype(cs->cap)) { | 477 | switch (captype(cs->cap)) { |
| 475 | case Cposition: { | 478 | case Cposition: { |
| 476 | lua_pushinteger(L, cs->cap->s - cs->s + 1); | 479 | lua_pushinteger(L, cs->cap->index + 1); |
| 477 | cs->cap++; | 480 | cs->cap++; |
| 478 | res = 1; | 481 | res = 1; |
| 479 | break; | 482 | break; |
| @@ -553,6 +556,7 @@ static int pushcapture (CapState *cs) { | |||
| 553 | int getcaptures (lua_State *L, const char *s, const char *r, int ptop) { | 556 | int getcaptures (lua_State *L, const char *s, const char *r, int ptop) { |
| 554 | Capture *capture = (Capture *)lua_touserdata(L, caplistidx(ptop)); | 557 | Capture *capture = (Capture *)lua_touserdata(L, caplistidx(ptop)); |
| 555 | int n = 0; | 558 | int n = 0; |
| 559 | /* printcaplist(capture); */ | ||
| 556 | if (!isclosecap(capture)) { /* is there any capture? */ | 560 | if (!isclosecap(capture)) { /* is there any capture? */ |
| 557 | CapState cs; | 561 | CapState cs; |
| 558 | cs.ocap = cs.cap = capture; cs.L = L; cs.reclevel = 0; | 562 | cs.ocap = cs.cap = capture; cs.L = L; cs.reclevel = 0; |
| @@ -27,8 +27,20 @@ typedef enum CapKind { | |||
| 27 | } CapKind; | 27 | } CapKind; |
| 28 | 28 | ||
| 29 | 29 | ||
| 30 | /* | ||
| 31 | ** An unsigned integer large enough to index any subject entirely. | ||
| 32 | ** It can be size_t, but that will double the size of the array | ||
| 33 | ** of captures in a 64-bit machine. | ||
| 34 | */ | ||
| 35 | #if !defined(Index_t) | ||
| 36 | typedef uint Index_t; | ||
| 37 | #endif | ||
| 38 | |||
| 39 | #define MAXINDT (~(Index_t)0) | ||
| 40 | |||
| 41 | |||
| 30 | typedef struct Capture { | 42 | typedef struct Capture { |
| 31 | const char *s; /* subject position */ | 43 | Index_t index; /* subject position */ |
| 32 | unsigned short idx; /* extra info (group name, arg index, etc.) */ | 44 | unsigned short idx; /* extra info (group name, arg index, etc.) */ |
| 33 | byte kind; /* kind of capture */ | 45 | byte kind; /* kind of capture */ |
| 34 | byte siz; /* size of full capture + 1 (0 = not a full capture) */ | 46 | byte siz; /* size of full capture + 1 (0 = not a full capture) */ |
| @@ -60,7 +60,7 @@ static void printTcharset (TTree *tree) { | |||
| 60 | static const char *capkind (int kind) { | 60 | static const char *capkind (int kind) { |
| 61 | const char *const modes[] = { | 61 | const char *const modes[] = { |
| 62 | "close", "position", "constant", "backref", | 62 | "close", "position", "constant", "backref", |
| 63 | "argument", "simple", "table", "function", "replace", | 63 | "argument", "simple", "table", "function", "accumulator", |
| 64 | "query", "string", "num", "substitution", "fold", | 64 | "query", "string", "num", "substitution", "fold", |
| 65 | "runtime", "group"}; | 65 | "runtime", "group"}; |
| 66 | return modes[kind]; | 66 | return modes[kind]; |
| @@ -147,16 +147,29 @@ void printpatt (Instruction *p) { | |||
| 147 | } | 147 | } |
| 148 | 148 | ||
| 149 | 149 | ||
| 150 | static void printcap (Capture *cap) { | 150 | static void printcap (Capture *cap, int ident) { |
| 151 | printf("%s (idx: %d - size: %d) -> %p\n", | 151 | while (ident--) printf(" "); |
| 152 | capkind(cap->kind), cap->idx, cap->siz, cap->s); | 152 | printf("%s (idx: %d - size: %d) -> %ld\n", |
| 153 | capkind(cap->kind), cap->idx, cap->siz, (long)cap->index); | ||
| 153 | } | 154 | } |
| 154 | 155 | ||
| 155 | 156 | ||
| 156 | void printcaplist (Capture *cap, Capture *limit) { | 157 | static Capture *printcap2close (Capture *cap, int ident) { |
| 158 | while (cap->kind != Cclose) { | ||
| 159 | printcap(cap, ident); | ||
| 160 | if (cap->siz == 0) { | ||
| 161 | cap = printcap2close(cap + 1, ident + 2); | ||
| 162 | printcap(cap, ident); /* print 'close' capture */ | ||
| 163 | } | ||
| 164 | cap++; | ||
| 165 | } | ||
| 166 | return cap; | ||
| 167 | } | ||
| 168 | |||
| 169 | |||
| 170 | void printcaplist (Capture *cap) { | ||
| 157 | printf(">======\n"); | 171 | printf(">======\n"); |
| 158 | for (; cap->s && (limit == NULL || cap < limit); cap++) | 172 | printcap2close(cap, 0); |
| 159 | printcap(cap); | ||
| 160 | printf("=======\n"); | 173 | printf("=======\n"); |
| 161 | } | 174 | } |
| 162 | 175 | ||
| @@ -13,7 +13,7 @@ void printpatt (Instruction *p); | |||
| 13 | void printtree (TTree *tree, int ident); | 13 | void printtree (TTree *tree, int ident); |
| 14 | void printktable (lua_State *L, int idx); | 14 | void printktable (lua_State *L, int idx); |
| 15 | void printcharset (const byte *st); | 15 | void printcharset (const byte *st); |
| 16 | void printcaplist (Capture *cap, Capture *limit); | 16 | void printcaplist (Capture *cap); |
| 17 | void printinst (const Instruction *op, const Instruction *p); | 17 | void printinst (const Instruction *op, const Instruction *p); |
| 18 | 18 | ||
| 19 | #else | 19 | #else |
| @@ -1253,6 +1253,7 @@ static int lp_match (lua_State *L) { | |||
| 1253 | const char *s = luaL_checklstring(L, SUBJIDX, &l); | 1253 | const char *s = luaL_checklstring(L, SUBJIDX, &l); |
| 1254 | size_t i = initposition(L, l); | 1254 | size_t i = initposition(L, l); |
| 1255 | int ptop = lua_gettop(L); | 1255 | int ptop = lua_gettop(L); |
| 1256 | luaL_argcheck(L, l < MAXINDT, SUBJIDX, "subject too long"); | ||
| 1256 | lua_pushnil(L); /* initialize subscache */ | 1257 | lua_pushnil(L); /* initialize subscache */ |
| 1257 | lua_pushlightuserdata(L, capture); /* initialize caplistidx */ | 1258 | lua_pushlightuserdata(L, capture); /* initialize caplistidx */ |
| 1258 | lua_getuservalue(L, 1); /* initialize ktableidx */ | 1259 | lua_getuservalue(L, 1); /* initialize ktableidx */ |
| @@ -169,7 +169,7 @@ static int resdyncaptures (lua_State *L, int fr, int curr, int limit) { | |||
| 169 | ** value, 'n' is the number of values (at least 1). The open group | 169 | ** value, 'n' is the number of values (at least 1). The open group |
| 170 | ** capture is already in 'capture', before the place for the new entries. | 170 | ** capture is already in 'capture', before the place for the new entries. |
| 171 | */ | 171 | */ |
| 172 | static void adddyncaptures (const char *s, Capture *capture, int n, int fd) { | 172 | static void adddyncaptures (Index_t index, Capture *capture, int n, int fd) { |
| 173 | int i; | 173 | int i; |
| 174 | assert(capture[-1].kind == Cgroup && capture[-1].siz == 0); | 174 | assert(capture[-1].kind == Cgroup && capture[-1].siz == 0); |
| 175 | capture[-1].idx = 0; /* make group capture an anonymous group */ | 175 | capture[-1].idx = 0; /* make group capture an anonymous group */ |
| @@ -177,11 +177,11 @@ static void adddyncaptures (const char *s, Capture *capture, int n, int fd) { | |||
| 177 | capture[i].kind = Cruntime; | 177 | capture[i].kind = Cruntime; |
| 178 | capture[i].siz = 1; /* mark it as closed */ | 178 | capture[i].siz = 1; /* mark it as closed */ |
| 179 | capture[i].idx = fd + i; /* stack index of capture value */ | 179 | capture[i].idx = fd + i; /* stack index of capture value */ |
| 180 | capture[i].s = s; | 180 | capture[i].index = index; |
| 181 | } | 181 | } |
| 182 | capture[n].kind = Cclose; /* close group */ | 182 | capture[n].kind = Cclose; /* close group */ |
| 183 | capture[n].siz = 1; | 183 | capture[n].siz = 1; |
| 184 | capture[n].s = s; | 184 | capture[n].index = index; |
| 185 | } | 185 | } |
| 186 | 186 | ||
| 187 | 187 | ||
| @@ -225,7 +225,7 @@ const char *match (lua_State *L, const char *o, const char *s, const char *e, | |||
| 225 | case IEnd: { | 225 | case IEnd: { |
| 226 | assert(stack == getstackbase(L, ptop) + 1); | 226 | assert(stack == getstackbase(L, ptop) + 1); |
| 227 | capture[captop].kind = Cclose; | 227 | capture[captop].kind = Cclose; |
| 228 | capture[captop].s = NULL; | 228 | capture[captop].index = MAXINDT; |
| 229 | return s; | 229 | return s; |
| 230 | } | 230 | } |
| 231 | case IGiveup: { | 231 | case IGiveup: { |
| @@ -383,7 +383,7 @@ const char *match (lua_State *L, const char *o, const char *s, const char *e, | |||
| 383 | luaL_error(L, "too many results in match-time capture"); | 383 | luaL_error(L, "too many results in match-time capture"); |
| 384 | /* add new captures + close group to 'capture' list */ | 384 | /* add new captures + close group to 'capture' list */ |
| 385 | capture = growcap(L, capture, &capsize, captop, n + 1, ptop); | 385 | capture = growcap(L, capture, &capsize, captop, n + 1, ptop); |
| 386 | adddyncaptures(s, capture + captop, n, fr); | 386 | adddyncaptures(s - o, capture + captop, n, fr); |
| 387 | captop += n + 1; /* new captures + close group */ | 387 | captop += n + 1; /* new captures + close group */ |
| 388 | } | 388 | } |
| 389 | p++; | 389 | p++; |
| @@ -394,24 +394,24 @@ const char *match (lua_State *L, const char *o, const char *s, const char *e, | |||
| 394 | assert(captop > 0); | 394 | assert(captop > 0); |
| 395 | /* if possible, turn capture into a full capture */ | 395 | /* if possible, turn capture into a full capture */ |
| 396 | if (capture[captop - 1].siz == 0 && | 396 | if (capture[captop - 1].siz == 0 && |
| 397 | s1 - capture[captop - 1].s < UCHAR_MAX) { | 397 | (s1 - o) - capture[captop - 1].index < UCHAR_MAX) { |
| 398 | capture[captop - 1].siz = s1 - capture[captop - 1].s + 1; | 398 | capture[captop - 1].siz = (s1 - o) - capture[captop - 1].index + 1; |
| 399 | p++; | 399 | p++; |
| 400 | continue; | 400 | continue; |
| 401 | } | 401 | } |
| 402 | else { | 402 | else { |
| 403 | capture[captop].siz = 1; /* mark entry as closed */ | 403 | capture[captop].siz = 1; /* mark entry as closed */ |
| 404 | capture[captop].s = s; | 404 | capture[captop].index = s - o; |
| 405 | goto pushcapture; | 405 | goto pushcapture; |
| 406 | } | 406 | } |
| 407 | } | 407 | } |
| 408 | case IOpenCapture: | 408 | case IOpenCapture: |
| 409 | capture[captop].siz = 0; /* mark entry as open */ | 409 | capture[captop].siz = 0; /* mark entry as open */ |
| 410 | capture[captop].s = s; | 410 | capture[captop].index = s - o; |
| 411 | goto pushcapture; | 411 | goto pushcapture; |
| 412 | case IFullCapture: | 412 | case IFullCapture: |
| 413 | capture[captop].siz = getoff(p) + 1; /* save capture size */ | 413 | capture[captop].siz = getoff(p) + 1; /* save capture size */ |
| 414 | capture[captop].s = s - getoff(p); | 414 | capture[captop].index = s - o - getoff(p); |
| 415 | /* goto pushcapture; */ | 415 | /* goto pushcapture; */ |
| 416 | pushcapture: { | 416 | pushcapture: { |
| 417 | capture[captop].idx = p->i.aux2.key; | 417 | capture[captop].idx = p->i.aux2.key; |
