diff options
author | Roberto Ierusalimschy <roberto@inf.puc-rio.br> | 2023-06-15 11:28:33 -0300 |
---|---|---|
committer | Roberto Ierusalimschy <roberto@inf.puc-rio.br> | 2023-06-15 11:28:33 -0300 |
commit | a561630f17e61548193666abf9a8b20f20462558 (patch) | |
tree | b1ffda7472f2c801bb7677239b42b0e09b1ceec9 /lpvm.c | |
parent | cf1705c1d96b549ef5887a2bc3038dbc31912e50 (diff) | |
download | lpeg-a561630f17e61548193666abf9a8b20f20462558.tar.gz lpeg-a561630f17e61548193666abf9a8b20f20462558.tar.bz2 lpeg-a561630f17e61548193666abf9a8b20f20462558.zip |
Full captures can contain nested captures
Nested captures can be recognized because they start (and end) inside
the character range of the full capture. This optimization can remove
a lot of 'close' captures from the capture logs.
Diffstat (limited to '')
-rw-r--r-- | lpvm.c | 41 |
1 files changed, 35 insertions, 6 deletions
@@ -198,6 +198,37 @@ static int removedyncap (lua_State *L, Capture *capture, | |||
198 | } | 198 | } |
199 | 199 | ||
200 | 200 | ||
201 | /** | ||
202 | ** Maximum number of captures to visit when looking for an 'open'. | ||
203 | */ | ||
204 | #define MAXLOP 20 | ||
205 | |||
206 | /* | ||
207 | ** Find the corresponding 'open' capture before 'cap', when that capture | ||
208 | ** can become a full capture. If a full capture c1 is followed by an | ||
209 | ** empty capture c2, there is no way to know whether c2 is inside | ||
210 | ** c1. So, full captures can enclose only captures that start *before* | ||
211 | ** its end. | ||
212 | */ | ||
213 | static Capture *findopen (Capture *cap, Index_t currindex) { | ||
214 | int i; | ||
215 | cap--; /* check last capture */ | ||
216 | /* Must it be inside current one, but starts where current one ends? */ | ||
217 | if (!isopencap(cap) && cap->index == currindex) | ||
218 | return NULL; /* current one cannot be a full capture */ | ||
219 | /* else, look for an 'open' capture */ | ||
220 | for (i = 0; i < MAXLOP; i++, cap--) { | ||
221 | if (currindex - cap->index >= UCHAR_MAX) | ||
222 | return NULL; /* capture too long for a full capture */ | ||
223 | else if (isopencap(cap)) /* open capture? */ | ||
224 | return cap; /* that's the one to be closed */ | ||
225 | else if (cap->kind == Cclose) | ||
226 | return NULL; /* a full capture should not nest a non-full one */ | ||
227 | } | ||
228 | return NULL; /* not found within allowed search limit */ | ||
229 | } | ||
230 | |||
231 | |||
201 | /* | 232 | /* |
202 | ** Opcode interpreter | 233 | ** Opcode interpreter |
203 | */ | 234 | */ |
@@ -390,16 +421,14 @@ const char *match (lua_State *L, const char *o, const char *s, const char *e, | |||
390 | continue; | 421 | continue; |
391 | } | 422 | } |
392 | case ICloseCapture: { | 423 | case ICloseCapture: { |
393 | const char *s1 = s; | 424 | Capture *open = findopen(capture + captop, s - o); |
394 | assert(captop > 0); | 425 | assert(captop > 0); |
395 | /* if possible, turn capture into a full capture */ | 426 | if (open) { /* if possible, turn capture into a full capture */ |
396 | if (capture[captop - 1].siz == 0 && | 427 | open->siz = (s - o) - open->index + 1; |
397 | (s1 - o) - capture[captop - 1].index < UCHAR_MAX) { | ||
398 | capture[captop - 1].siz = (s1 - o) - capture[captop - 1].index + 1; | ||
399 | p++; | 428 | p++; |
400 | continue; | 429 | continue; |
401 | } | 430 | } |
402 | else { | 431 | else { /* must create a close capture */ |
403 | capture[captop].siz = 1; /* mark entry as closed */ | 432 | capture[captop].siz = 1; /* mark entry as closed */ |
404 | capture[captop].index = s - o; | 433 | capture[captop].index = s - o; |
405 | goto pushcapture; | 434 | goto pushcapture; |