diff options
author | Roberto Ierusalimschy <roberto@inf.puc-rio.br> | 2023-06-15 11:28:33 -0300 |
---|---|---|
committer | Roberto Ierusalimschy <roberto@inf.puc-rio.br> | 2023-06-15 11:28:33 -0300 |
commit | a561630f17e61548193666abf9a8b20f20462558 (patch) | |
tree | b1ffda7472f2c801bb7677239b42b0e09b1ceec9 | |
parent | cf1705c1d96b549ef5887a2bc3038dbc31912e50 (diff) | |
download | lpeg-a561630f17e61548193666abf9a8b20f20462558.tar.gz lpeg-a561630f17e61548193666abf9a8b20f20462558.tar.bz2 lpeg-a561630f17e61548193666abf9a8b20f20462558.zip |
Full captures can contain nested captures
Nested captures can be recognized because they start (and end) inside
the character range of the full capture. This optimization can remove
a lot of 'close' captures from the capture logs.
-rw-r--r-- | lpcap.c | 189 | ||||
-rw-r--r-- | lpcap.h | 12 | ||||
-rw-r--r-- | lpprint.c | 26 | ||||
-rw-r--r-- | lpprint.h | 2 | ||||
-rw-r--r-- | lpvm.c | 41 |
5 files changed, 173 insertions, 97 deletions
@@ -3,21 +3,37 @@ | |||
3 | #include "lauxlib.h" | 3 | #include "lauxlib.h" |
4 | 4 | ||
5 | #include "lpcap.h" | 5 | #include "lpcap.h" |
6 | #include "lpprint.h" | ||
6 | #include "lptypes.h" | 7 | #include "lptypes.h" |
7 | 8 | ||
8 | 9 | ||
9 | #define captype(cap) ((cap)->kind) | 10 | #define getfromktable(cs,v) lua_rawgeti((cs)->L, ktableidx((cs)->ptop), v) |
10 | 11 | ||
11 | #define isclosecap(cap) (captype(cap) == Cclose) | 12 | #define pushluaval(cs) getfromktable(cs, (cs)->cap->idx) |
12 | 13 | ||
13 | #define closeaddr(cs,c) ((cs)->s + (c)->index + (c)->siz - 1) | ||
14 | 14 | ||
15 | #define isfullcap(cap) ((cap)->siz != 0) | ||
16 | 15 | ||
17 | #define getfromktable(cs,v) lua_rawgeti((cs)->L, ktableidx((cs)->ptop), v) | 16 | #define skipclose(cs,head) \ |
17 | if (isopencap(head)) { assert(isclosecap(cs->cap)); cs->cap++; } | ||
18 | 18 | ||
19 | #define pushluaval(cs) getfromktable(cs, (cs)->cap->idx) | ||
20 | 19 | ||
20 | /* | ||
21 | ** Return the size of capture 'cap'. If it is an open capture, 'close' | ||
22 | ** must be its corresponding close. | ||
23 | */ | ||
24 | static Index_t capsize (Capture *cap, Capture *close) { | ||
25 | if (isopencap(cap)) { | ||
26 | assert(isclosecap(close)); | ||
27 | return close->index - cap->index; | ||
28 | } | ||
29 | else | ||
30 | return cap->siz - 1; | ||
31 | } | ||
32 | |||
33 | |||
34 | static Index_t closesize (CapState *cs, Capture *head) { | ||
35 | return capsize(head, cs->cap); | ||
36 | } | ||
21 | 37 | ||
22 | 38 | ||
23 | /* | 39 | /* |
@@ -40,35 +56,40 @@ static int pushcapture (CapState *cs); | |||
40 | 56 | ||
41 | /* | 57 | /* |
42 | ** Goes back in a list of captures looking for an open capture | 58 | ** Goes back in a list of captures looking for an open capture |
43 | ** corresponding to a close | 59 | ** corresponding to a close one. |
44 | */ | 60 | */ |
45 | static Capture *findopen (Capture *cap) { | 61 | static Capture *findopen (Capture *cap) { |
46 | int n = 0; /* number of closes waiting an open */ | 62 | int n = 0; /* number of closes waiting an open */ |
47 | for (;;) { | 63 | for (;;) { |
48 | cap--; | 64 | cap--; |
49 | if (isclosecap(cap)) n++; /* one more open to skip */ | 65 | if (isclosecap(cap)) n++; /* one more open to skip */ |
50 | else if (!isfullcap(cap)) | 66 | else if (isopencap(cap)) |
51 | if (n-- == 0) return cap; | 67 | if (n-- == 0) return cap; |
52 | } | 68 | } |
53 | } | 69 | } |
54 | 70 | ||
55 | 71 | ||
56 | /* | 72 | /* |
57 | ** Go to the next capture | 73 | ** Go to the next capture at the same level. |
58 | */ | 74 | */ |
59 | static void nextcap (CapState *cs) { | 75 | static void nextcap (CapState *cs) { |
60 | Capture *cap = cs->cap; | 76 | Capture *cap = cs->cap; |
61 | if (!isfullcap(cap)) { /* not a single capture? */ | 77 | if (isopencap(cap)) { /* must look for a close? */ |
62 | int n = 0; /* number of opens waiting a close */ | 78 | int n = 0; /* number of opens waiting a close */ |
63 | for (;;) { /* look for corresponding close */ | 79 | for (;;) { /* look for corresponding close */ |
64 | cap++; | 80 | cap++; |
65 | if (isclosecap(cap)) { | 81 | if (isopencap(cap)) n++; |
82 | else if (isclosecap(cap)) | ||
66 | if (n-- == 0) break; | 83 | if (n-- == 0) break; |
67 | } | ||
68 | else if (!isfullcap(cap)) n++; | ||
69 | } | 84 | } |
85 | cs->cap = cap + 1; /* + 1 to skip last close */ | ||
86 | } | ||
87 | else { | ||
88 | Capture *next; | ||
89 | for (next = cap + 1; capinside(cap, next); next++) | ||
90 | ; /* skip captures inside current one */ | ||
91 | cs->cap = next; | ||
70 | } | 92 | } |
71 | cs->cap = cap + 1; /* + 1 to skip last close (or entire single capture) */ | ||
72 | } | 93 | } |
73 | 94 | ||
74 | 95 | ||
@@ -80,24 +101,17 @@ static void nextcap (CapState *cs) { | |||
80 | ** so the function never returns zero. | 101 | ** so the function never returns zero. |
81 | */ | 102 | */ |
82 | static int pushnestedvalues (CapState *cs, int addextra) { | 103 | static int pushnestedvalues (CapState *cs, int addextra) { |
83 | Capture *co = cs->cap; | 104 | Capture *head = cs->cap++; /* original capture */ |
84 | if (isfullcap(cs->cap++)) { /* no nested captures? */ | 105 | int n = 0; /* number of pushed subvalues */ |
85 | /* push whole match */ | 106 | /* repeat for all nested patterns */ |
86 | lua_pushlstring(cs->L, cs->s + co->index, co->siz - 1); | 107 | while (capinside(head, cs->cap)) |
87 | return 1; /* that is it */ | 108 | n += pushcapture(cs); |
88 | } | 109 | if (addextra || n == 0) { /* need extra? */ |
89 | else { | 110 | lua_pushlstring(cs->L, cs->s + head->index, closesize(cs, head)); |
90 | int n = 0; | 111 | n++; |
91 | while (!isclosecap(cs->cap)) /* repeat for all nested patterns */ | ||
92 | n += pushcapture(cs); | ||
93 | if (addextra || n == 0) { /* need extra? */ | ||
94 | /* push whole match */ | ||
95 | lua_pushlstring(cs->L, cs->s + co->index, cs->cap->index - co->index); | ||
96 | n++; | ||
97 | } | ||
98 | cs->cap++; /* skip close entry */ | ||
99 | return n; | ||
100 | } | 112 | } |
113 | skipclose(cs, head); | ||
114 | return n; | ||
101 | } | 115 | } |
102 | 116 | ||
103 | 117 | ||
@@ -112,17 +126,35 @@ static void pushonenestedvalue (CapState *cs) { | |||
112 | 126 | ||
113 | 127 | ||
114 | /* | 128 | /* |
129 | ** Checks whether group 'grp' is visible to 'ref', that is, | ||
130 | ** 'grp' is not nested inside a capture that does not contain 'ref'. | ||
131 | ** To do that, must find minimum capture that contains 'grp'. | ||
132 | */ | ||
133 | static int capvisible (CapState *cs, Capture *grp, Capture *ref) { | ||
134 | Capture *cap = grp; | ||
135 | while (cap-- > cs->ocap) { /* repeat until end of list */ | ||
136 | if (isclosecap(cap)) | ||
137 | cap = findopen(cap); /* skip nested captures */ | ||
138 | else if (capinside(cap, grp)) /* is 'grp' inside cap? */ | ||
139 | return capinside(cap, ref); /* ok iff cap also contains 'ref' */ | ||
140 | } | ||
141 | return 1; /* 'grp' is not inside any capture */ | ||
142 | } | ||
143 | |||
144 | |||
145 | /* | ||
115 | ** Try to find a named group capture with the name given at the top of | 146 | ** Try to find a named group capture with the name given at the top of |
116 | ** the stack; goes backward from 'cap'. | 147 | ** the stack; goes backward from 'ref'. |
117 | */ | 148 | */ |
118 | static Capture *findback (CapState *cs, Capture *cap) { | 149 | static Capture *findback (CapState *cs, Capture *ref) { |
119 | lua_State *L = cs->L; | 150 | lua_State *L = cs->L; |
151 | Capture *cap = ref; | ||
120 | while (cap-- > cs->ocap) { /* repeat until end of list */ | 152 | while (cap-- > cs->ocap) { /* repeat until end of list */ |
121 | if (isclosecap(cap)) | 153 | if (isopencap(cap)) |
154 | continue; /* enclosing captures are not visible to 'ref' */ | ||
155 | else if (isclosecap(cap)) | ||
122 | cap = findopen(cap); /* skip nested captures */ | 156 | cap = findopen(cap); /* skip nested captures */ |
123 | else if (!isfullcap(cap)) | 157 | if (captype(cap) == Cgroup && capvisible(cs, cap, ref)) { |
124 | continue; /* opening an enclosing capture: skip and get previous */ | ||
125 | if (captype(cap) == Cgroup) { | ||
126 | getfromktable(cs, cap->idx); /* get group name */ | 158 | getfromktable(cs, cap->idx); /* get group name */ |
127 | if (lp_equal(L, -2, -1)) { /* right group? */ | 159 | if (lp_equal(L, -2, -1)) { /* right group? */ |
128 | lua_pop(L, 2); /* remove reference name and group name */ | 160 | lua_pop(L, 2); /* remove reference name and group name */ |
@@ -156,11 +188,10 @@ static int backrefcap (CapState *cs) { | |||
156 | */ | 188 | */ |
157 | static int tablecap (CapState *cs) { | 189 | static int tablecap (CapState *cs) { |
158 | lua_State *L = cs->L; | 190 | lua_State *L = cs->L; |
191 | Capture *head = cs->cap++; | ||
159 | int n = 0; | 192 | int n = 0; |
160 | lua_newtable(L); | 193 | lua_newtable(L); |
161 | if (isfullcap(cs->cap++)) | 194 | while (capinside(head, cs->cap)) { |
162 | return 1; /* table is empty */ | ||
163 | while (!isclosecap(cs->cap)) { | ||
164 | if (captype(cs->cap) == Cgroup && cs->cap->idx != 0) { /* named group? */ | 195 | if (captype(cs->cap) == Cgroup && cs->cap->idx != 0) { /* named group? */ |
165 | pushluaval(cs); /* push group name */ | 196 | pushluaval(cs); /* push group name */ |
166 | pushonenestedvalue(cs); | 197 | pushonenestedvalue(cs); |
@@ -174,7 +205,7 @@ static int tablecap (CapState *cs) { | |||
174 | n += k; | 205 | n += k; |
175 | } | 206 | } |
176 | } | 207 | } |
177 | cs->cap++; /* skip close entry */ | 208 | skipclose(cs, head); |
178 | return 1; /* number of values pushed (only the table) */ | 209 | return 1; /* number of values pushed (only the table) */ |
179 | } | 210 | } |
180 | 211 | ||
@@ -201,20 +232,20 @@ static int querycap (CapState *cs) { | |||
201 | static int foldcap (CapState *cs) { | 232 | static int foldcap (CapState *cs) { |
202 | int n; | 233 | int n; |
203 | lua_State *L = cs->L; | 234 | lua_State *L = cs->L; |
204 | int idx = cs->cap->idx; | 235 | Capture *head = cs->cap++; |
205 | if (isfullcap(cs->cap++) || /* no nested captures? */ | 236 | int idx = head->idx; |
206 | isclosecap(cs->cap) || /* no nested captures (large subject)? */ | 237 | if (isclosecap(cs->cap) || /* no nested captures (large subject)? */ |
207 | (n = pushcapture(cs)) == 0) /* nested captures with no values? */ | 238 | (n = pushcapture(cs)) == 0) /* nested captures with no values? */ |
208 | return luaL_error(L, "no initial value for fold capture"); | 239 | return luaL_error(L, "no initial value for fold capture"); |
209 | if (n > 1) | 240 | if (n > 1) |
210 | lua_pop(L, n - 1); /* leave only one result for accumulator */ | 241 | lua_pop(L, n - 1); /* leave only one result for accumulator */ |
211 | while (!isclosecap(cs->cap)) { | 242 | while (capinside(head, cs->cap)) { |
212 | lua_pushvalue(L, updatecache(cs, idx)); /* get folding function */ | 243 | lua_pushvalue(L, updatecache(cs, idx)); /* get folding function */ |
213 | lua_insert(L, -2); /* put it before accumulator */ | 244 | lua_insert(L, -2); /* put it before accumulator */ |
214 | n = pushcapture(cs); /* get next capture's values */ | 245 | n = pushcapture(cs); /* get next capture's values */ |
215 | lua_call(L, n + 1, 1); /* call folding function */ | 246 | lua_call(L, n + 1, 1); /* call folding function */ |
216 | } | 247 | } |
217 | cs->cap++; /* skip close entry */ | 248 | skipclose(cs, head); |
218 | return 1; /* only accumulator left on the stack */ | 249 | return 1; /* only accumulator left on the stack */ |
219 | } | 250 | } |
220 | 251 | ||
@@ -327,8 +358,8 @@ typedef struct StrAux { | |||
327 | union { | 358 | union { |
328 | Capture *cp; /* if not a string, respective capture */ | 359 | Capture *cp; /* if not a string, respective capture */ |
329 | struct { /* if it is a string... */ | 360 | struct { /* if it is a string... */ |
330 | const char *s; /* ... starts here */ | 361 | Index_t idx; /* starts here */ |
331 | const char *e; /* ... ends here */ | 362 | Index_t siz; /* with this size */ |
332 | } s; | 363 | } s; |
333 | } u; | 364 | } u; |
334 | } StrAux; | 365 | } StrAux; |
@@ -343,24 +374,23 @@ typedef struct StrAux { | |||
343 | */ | 374 | */ |
344 | static int getstrcaps (CapState *cs, StrAux *cps, int n) { | 375 | static int getstrcaps (CapState *cs, StrAux *cps, int n) { |
345 | int k = n++; | 376 | int k = n++; |
377 | Capture *head = cs->cap++; | ||
346 | cps[k].isstring = 1; /* get string value */ | 378 | cps[k].isstring = 1; /* get string value */ |
347 | cps[k].u.s.s = cs->s + cs->cap->index; /* starts here */ | 379 | cps[k].u.s.idx = head->index; /* starts here */ |
348 | if (!isfullcap(cs->cap++)) { /* nested captures? */ | 380 | while (capinside(head, cs->cap)) { |
349 | while (!isclosecap(cs->cap)) { /* traverse them */ | 381 | if (n >= MAXSTRCAPS) /* too many captures? */ |
350 | if (n >= MAXSTRCAPS) /* too many captures? */ | 382 | nextcap(cs); /* skip extra captures (will not need them) */ |
351 | nextcap(cs); /* skip extra captures (will not need them) */ | 383 | else if (captype(cs->cap) == Csimple) /* string? */ |
352 | else if (captype(cs->cap) == Csimple) /* string? */ | 384 | n = getstrcaps(cs, cps, n); /* put info. into array */ |
353 | n = getstrcaps(cs, cps, n); /* put info. into array */ | 385 | else { |
354 | else { | 386 | cps[n].isstring = 0; /* not a string */ |
355 | cps[n].isstring = 0; /* not a string */ | 387 | cps[n].u.cp = cs->cap; /* keep original capture */ |
356 | cps[n].u.cp = cs->cap; /* keep original capture */ | 388 | nextcap(cs); |
357 | nextcap(cs); | 389 | n++; |
358 | n++; | ||
359 | } | ||
360 | } | 390 | } |
361 | cs->cap++; /* skip close */ | ||
362 | } | 391 | } |
363 | cps[k].u.s.e = closeaddr(cs, cs->cap - 1); /* ends here */ | 392 | cps[k].u.s.siz = closesize(cs, head); |
393 | skipclose(cs, head); | ||
364 | return n; | 394 | return n; |
365 | } | 395 | } |
366 | 396 | ||
@@ -382,7 +412,7 @@ static void stringcap (luaL_Buffer *b, CapState *cs) { | |||
382 | const char *fmt; /* format string */ | 412 | const char *fmt; /* format string */ |
383 | fmt = lua_tolstring(cs->L, updatecache(cs, cs->cap->idx), &len); | 413 | fmt = lua_tolstring(cs->L, updatecache(cs, cs->cap->idx), &len); |
384 | n = getstrcaps(cs, cps, 0) - 1; /* collect nested captures */ | 414 | n = getstrcaps(cs, cps, 0) - 1; /* collect nested captures */ |
385 | for (i = 0; i < len; i++) { /* traverse them */ | 415 | for (i = 0; i < len; i++) { /* traverse format string */ |
386 | if (fmt[i] != '%') /* not an escape? */ | 416 | if (fmt[i] != '%') /* not an escape? */ |
387 | luaL_addchar(b, fmt[i]); /* add it to buffer */ | 417 | luaL_addchar(b, fmt[i]); /* add it to buffer */ |
388 | else if (fmt[++i] < '0' || fmt[i] > '9') /* not followed by a digit? */ | 418 | else if (fmt[++i] < '0' || fmt[i] > '9') /* not followed by a digit? */ |
@@ -392,7 +422,7 @@ static void stringcap (luaL_Buffer *b, CapState *cs) { | |||
392 | if (l > n) | 422 | if (l > n) |
393 | luaL_error(cs->L, "invalid capture index (%d)", l); | 423 | luaL_error(cs->L, "invalid capture index (%d)", l); |
394 | else if (cps[l].isstring) | 424 | else if (cps[l].isstring) |
395 | luaL_addlstring(b, cps[l].u.s.s, cps[l].u.s.e - cps[l].u.s.s); | 425 | luaL_addlstring(b, cs->s + cps[l].u.s.idx, cps[l].u.s.siz); |
396 | else { | 426 | else { |
397 | Capture *curr = cs->cap; | 427 | Capture *curr = cs->cap; |
398 | cs->cap = cps[l].u.cp; /* go back to evaluate that nested capture */ | 428 | cs->cap = cps[l].u.cp; /* go back to evaluate that nested capture */ |
@@ -410,22 +440,19 @@ static void stringcap (luaL_Buffer *b, CapState *cs) { | |||
410 | */ | 440 | */ |
411 | static void substcap (luaL_Buffer *b, CapState *cs) { | 441 | static void substcap (luaL_Buffer *b, CapState *cs) { |
412 | const char *curr = cs->s + cs->cap->index; | 442 | const char *curr = cs->s + cs->cap->index; |
413 | if (isfullcap(cs->cap)) /* no nested captures? */ | 443 | Capture *head = cs->cap++; |
414 | luaL_addlstring(b, curr, cs->cap->siz - 1); /* keep original text */ | 444 | while (capinside(head, cs->cap)) { |
415 | else { | 445 | Capture *cap = cs->cap; |
416 | cs->cap++; /* skip open entry */ | 446 | const char *caps = cs->s + cap->index; |
417 | while (!isclosecap(cs->cap)) { /* traverse nested captures */ | 447 | luaL_addlstring(b, curr, caps - curr); /* add text up to capture */ |
418 | const char *next = cs->s + cs->cap->index; | 448 | if (addonestring(b, cs, "replacement")) |
419 | luaL_addlstring(b, curr, next - curr); /* add text up to capture */ | 449 | curr = caps + capsize(cap, cs->cap - 1); /* continue after match */ |
420 | if (addonestring(b, cs, "replacement")) | 450 | else /* no capture value */ |
421 | curr = closeaddr(cs, cs->cap - 1); /* continue after match */ | 451 | curr = caps; /* keep original text in final result */ |
422 | else /* no capture value */ | ||
423 | curr = next; /* keep original text in final result */ | ||
424 | } | ||
425 | /* add last piece of text */ | ||
426 | luaL_addlstring(b, curr, cs->s + cs->cap->index - curr); | ||
427 | } | 452 | } |
428 | cs->cap++; /* go to next capture */ | 453 | /* add last piece of text */ |
454 | luaL_addlstring(b, curr, cs->s + head->index + closesize(cs, head) - curr); | ||
455 | skipclose(cs, head); | ||
429 | } | 456 | } |
430 | 457 | ||
431 | 458 | ||
@@ -548,7 +575,7 @@ static int pushcapture (CapState *cs) { | |||
548 | /* | 575 | /* |
549 | ** Prepare a CapState structure and traverse the entire list of | 576 | ** Prepare a CapState structure and traverse the entire list of |
550 | ** captures in the stack pushing its results. 's' is the subject | 577 | ** captures in the stack pushing its results. 's' is the subject |
551 | ** string, 'r' is the final position of the match, and 'ptop' | 578 | ** string, 'r' is the final position of the match, and 'ptop' |
552 | ** the index in the stack where some useful values were pushed. | 579 | ** the index in the stack where some useful values were pushed. |
553 | ** Returns the number of results pushed. (If the list produces no | 580 | ** Returns the number of results pushed. (If the list produces no |
554 | ** results, push the final position of the match.) | 581 | ** results, push the final position of the match.) |
@@ -59,6 +59,18 @@ typedef struct CapState { | |||
59 | } CapState; | 59 | } CapState; |
60 | 60 | ||
61 | 61 | ||
62 | #define captype(cap) ((cap)->kind) | ||
63 | |||
64 | #define isclosecap(cap) (captype(cap) == Cclose) | ||
65 | #define isopencap(cap) ((cap)->siz == 0) | ||
66 | |||
67 | /* true if c2 is (any number of levels) inside c1 */ | ||
68 | #define capinside(c1,c2) \ | ||
69 | (isopencap(c1) ? !isclosecap(c2) \ | ||
70 | : (c2)->index < (c1)->index + (c1)->siz - 1) | ||
71 | |||
72 | |||
73 | |||
62 | int runtimecap (CapState *cs, Capture *close, const char *s, int *rem); | 74 | int runtimecap (CapState *cs, Capture *close, const char *s, int *rem); |
63 | int getcaptures (lua_State *L, const char *s, const char *r, int ptop); | 75 | int getcaptures (lua_State *L, const char *s, const char *r, int ptop); |
64 | int finddyncap (Capture *cap, Capture *last); | 76 | int finddyncap (Capture *cap, Capture *last); |
@@ -149,27 +149,35 @@ void printpatt (Instruction *p) { | |||
149 | 149 | ||
150 | static void printcap (Capture *cap, int ident) { | 150 | static void printcap (Capture *cap, int ident) { |
151 | while (ident--) printf(" "); | 151 | while (ident--) printf(" "); |
152 | printf("%s (idx: %d - size: %d) -> %ld\n", | 152 | printf("%s (idx: %d - size: %d) -> %lu\n", |
153 | capkind(cap->kind), cap->idx, cap->siz, (long)cap->index); | 153 | capkind(cap->kind), cap->idx, cap->siz, (long)cap->index); |
154 | } | 154 | } |
155 | 155 | ||
156 | 156 | ||
157 | /* | ||
158 | ** Print a capture and its nested captures | ||
159 | */ | ||
157 | static Capture *printcap2close (Capture *cap, int ident) { | 160 | static Capture *printcap2close (Capture *cap, int ident) { |
158 | while (cap->kind != Cclose) { | 161 | Capture *head = cap++; |
159 | printcap(cap, ident); | 162 | printcap(head, ident); /* print head capture */ |
160 | if (cap->siz == 0) { | 163 | while (capinside(head, cap)) |
161 | cap = printcap2close(cap + 1, ident + 2); | 164 | cap = printcap2close(cap, ident + 2); /* print nested captures */ |
162 | printcap(cap, ident); /* print 'close' capture */ | 165 | if (isopencap(head)) { |
163 | } | 166 | assert(isclosecap(cap)); |
164 | cap++; | 167 | printcap(cap++, ident); /* print and skip close capture */ |
165 | } | 168 | } |
166 | return cap; | 169 | return cap; |
167 | } | 170 | } |
168 | 171 | ||
169 | 172 | ||
170 | void printcaplist (Capture *cap) { | 173 | void printcaplist (Capture *cap) { |
174 | { /* for debugging, print first a raw list of captures */ | ||
175 | Capture *c = cap; | ||
176 | while (c->index != MAXINDT) { printcap(c, 0); c++; } | ||
177 | } | ||
171 | printf(">======\n"); | 178 | printf(">======\n"); |
172 | printcap2close(cap, 0); | 179 | while (!isclosecap(cap)) |
180 | cap = printcap2close(cap, 0); | ||
173 | printf("=======\n"); | 181 | printf("=======\n"); |
174 | } | 182 | } |
175 | 183 | ||
@@ -22,7 +22,7 @@ void printinst (const Instruction *op, const Instruction *p); | |||
22 | luaL_error(L, "function only implemented in debug mode") | 22 | luaL_error(L, "function only implemented in debug mode") |
23 | #define printtree(tree,i) \ | 23 | #define printtree(tree,i) \ |
24 | luaL_error(L, "function only implemented in debug mode") | 24 | luaL_error(L, "function only implemented in debug mode") |
25 | #define printpatt(p,n) \ | 25 | #define printpatt(p) \ |
26 | luaL_error(L, "function only implemented in debug mode") | 26 | luaL_error(L, "function only implemented in debug mode") |
27 | 27 | ||
28 | #endif | 28 | #endif |
@@ -198,6 +198,37 @@ static int removedyncap (lua_State *L, Capture *capture, | |||
198 | } | 198 | } |
199 | 199 | ||
200 | 200 | ||
201 | /** | ||
202 | ** Maximum number of captures to visit when looking for an 'open'. | ||
203 | */ | ||
204 | #define MAXLOP 20 | ||
205 | |||
206 | /* | ||
207 | ** Find the corresponding 'open' capture before 'cap', when that capture | ||
208 | ** can become a full capture. If a full capture c1 is followed by an | ||
209 | ** empty capture c2, there is no way to know whether c2 is inside | ||
210 | ** c1. So, full captures can enclose only captures that start *before* | ||
211 | ** its end. | ||
212 | */ | ||
213 | static Capture *findopen (Capture *cap, Index_t currindex) { | ||
214 | int i; | ||
215 | cap--; /* check last capture */ | ||
216 | /* Must it be inside current one, but starts where current one ends? */ | ||
217 | if (!isopencap(cap) && cap->index == currindex) | ||
218 | return NULL; /* current one cannot be a full capture */ | ||
219 | /* else, look for an 'open' capture */ | ||
220 | for (i = 0; i < MAXLOP; i++, cap--) { | ||
221 | if (currindex - cap->index >= UCHAR_MAX) | ||
222 | return NULL; /* capture too long for a full capture */ | ||
223 | else if (isopencap(cap)) /* open capture? */ | ||
224 | return cap; /* that's the one to be closed */ | ||
225 | else if (cap->kind == Cclose) | ||
226 | return NULL; /* a full capture should not nest a non-full one */ | ||
227 | } | ||
228 | return NULL; /* not found within allowed search limit */ | ||
229 | } | ||
230 | |||
231 | |||
201 | /* | 232 | /* |
202 | ** Opcode interpreter | 233 | ** Opcode interpreter |
203 | */ | 234 | */ |
@@ -390,16 +421,14 @@ const char *match (lua_State *L, const char *o, const char *s, const char *e, | |||
390 | continue; | 421 | continue; |
391 | } | 422 | } |
392 | case ICloseCapture: { | 423 | case ICloseCapture: { |
393 | const char *s1 = s; | 424 | Capture *open = findopen(capture + captop, s - o); |
394 | assert(captop > 0); | 425 | assert(captop > 0); |
395 | /* if possible, turn capture into a full capture */ | 426 | if (open) { /* if possible, turn capture into a full capture */ |
396 | if (capture[captop - 1].siz == 0 && | 427 | open->siz = (s - o) - open->index + 1; |
397 | (s1 - o) - capture[captop - 1].index < UCHAR_MAX) { | ||
398 | capture[captop - 1].siz = (s1 - o) - capture[captop - 1].index + 1; | ||
399 | p++; | 428 | p++; |
400 | continue; | 429 | continue; |
401 | } | 430 | } |
402 | else { | 431 | else { /* must create a close capture */ |
403 | capture[captop].siz = 1; /* mark entry as closed */ | 432 | capture[captop].siz = 1; /* mark entry as closed */ |
404 | capture[captop].index = s - o; | 433 | capture[captop].index = s - o; |
405 | goto pushcapture; | 434 | goto pushcapture; |