aboutsummaryrefslogtreecommitdiff
path: root/lpvm.c
diff options
context:
space:
mode:
authorRoberto Ierusalimschy <roberto@inf.puc-rio.br>2023-06-15 11:28:33 -0300
committerRoberto Ierusalimschy <roberto@inf.puc-rio.br>2023-06-15 11:28:33 -0300
commita561630f17e61548193666abf9a8b20f20462558 (patch)
treeb1ffda7472f2c801bb7677239b42b0e09b1ceec9 /lpvm.c
parentcf1705c1d96b549ef5887a2bc3038dbc31912e50 (diff)
downloadlpeg-a561630f17e61548193666abf9a8b20f20462558.tar.gz
lpeg-a561630f17e61548193666abf9a8b20f20462558.tar.bz2
lpeg-a561630f17e61548193666abf9a8b20f20462558.zip
Full captures can contain nested captures
Nested captures can be recognized because they start (and end) inside the character range of the full capture. This optimization can remove a lot of 'close' captures from the capture logs.
Diffstat (limited to '')
-rw-r--r--lpvm.c41
1 files changed, 35 insertions, 6 deletions
diff --git a/lpvm.c b/lpvm.c
index a65a6f6..5a30679 100644
--- a/lpvm.c
+++ b/lpvm.c
@@ -198,6 +198,37 @@ static int removedyncap (lua_State *L, Capture *capture,
198} 198}
199 199
200 200
201/**
202** Maximum number of captures to visit when looking for an 'open'.
203*/
204#define MAXLOP 20
205
206/*
207** Find the corresponding 'open' capture before 'cap', when that capture
208** can become a full capture. If a full capture c1 is followed by an
209** empty capture c2, there is no way to know whether c2 is inside
210** c1. So, full captures can enclose only captures that start *before*
211** its end.
212*/
213static Capture *findopen (Capture *cap, Index_t currindex) {
214 int i;
215 cap--; /* check last capture */
216 /* Must it be inside current one, but starts where current one ends? */
217 if (!isopencap(cap) && cap->index == currindex)
218 return NULL; /* current one cannot be a full capture */
219 /* else, look for an 'open' capture */
220 for (i = 0; i < MAXLOP; i++, cap--) {
221 if (currindex - cap->index >= UCHAR_MAX)
222 return NULL; /* capture too long for a full capture */
223 else if (isopencap(cap)) /* open capture? */
224 return cap; /* that's the one to be closed */
225 else if (cap->kind == Cclose)
226 return NULL; /* a full capture should not nest a non-full one */
227 }
228 return NULL; /* not found within allowed search limit */
229}
230
231
201/* 232/*
202** Opcode interpreter 233** Opcode interpreter
203*/ 234*/
@@ -390,16 +421,14 @@ const char *match (lua_State *L, const char *o, const char *s, const char *e,
390 continue; 421 continue;
391 } 422 }
392 case ICloseCapture: { 423 case ICloseCapture: {
393 const char *s1 = s; 424 Capture *open = findopen(capture + captop, s - o);
394 assert(captop > 0); 425 assert(captop > 0);
395 /* if possible, turn capture into a full capture */ 426 if (open) { /* if possible, turn capture into a full capture */
396 if (capture[captop - 1].siz == 0 && 427 open->siz = (s - o) - open->index + 1;
397 (s1 - o) - capture[captop - 1].index < UCHAR_MAX) {
398 capture[captop - 1].siz = (s1 - o) - capture[captop - 1].index + 1;
399 p++; 428 p++;
400 continue; 429 continue;
401 } 430 }
402 else { 431 else { /* must create a close capture */
403 capture[captop].siz = 1; /* mark entry as closed */ 432 capture[captop].siz = 1; /* mark entry as closed */
404 capture[captop].index = s - o; 433 capture[captop].index = s - o;
405 goto pushcapture; 434 goto pushcapture;