aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRoberto Ierusalimschy <roberto@inf.puc-rio.br>2023-06-07 15:41:01 -0300
committerRoberto Ierusalimschy <roberto@inf.puc-rio.br>2023-06-07 15:41:01 -0300
commitcf1705c1d96b549ef5887a2bc3038dbc31912e50 (patch)
tree98d37b61c4f095b089fa40e55032e0479932be6c
parente31e13f59ef1a4df1698b15ff1fe0198553cc3c2 (diff)
downloadlpeg-cf1705c1d96b549ef5887a2bc3038dbc31912e50.tar.gz
lpeg-cf1705c1d96b549ef5887a2bc3038dbc31912e50.tar.bz2
lpeg-cf1705c1d96b549ef5887a2bc3038dbc31912e50.zip
Captures point to string positions using indices
That uses 4 bytes (uint) instead of 8 (char*); the size of the structure 'Capture' reduces from 16 to 8 bytes in 64-bit machines.
-rw-r--r--lpcap.c26
-rw-r--r--lpcap.h14
-rw-r--r--lpprint.c27
-rw-r--r--lpprint.h2
-rw-r--r--lptree.c1
-rw-r--r--lpvm.c20
6 files changed, 60 insertions, 30 deletions
diff --git a/lpcap.c b/lpcap.c
index 4750f01..d17a47a 100644
--- a/lpcap.c
+++ b/lpcap.c
@@ -10,7 +10,7 @@
10 10
11#define isclosecap(cap) (captype(cap) == Cclose) 11#define isclosecap(cap) (captype(cap) == Cclose)
12 12
13#define closeaddr(c) ((c)->s + (c)->siz - 1) 13#define closeaddr(cs,c) ((cs)->s + (c)->index + (c)->siz - 1)
14 14
15#define isfullcap(cap) ((cap)->siz != 0) 15#define isfullcap(cap) ((cap)->siz != 0)
16 16
@@ -82,7 +82,8 @@ static void nextcap (CapState *cs) {
82static int pushnestedvalues (CapState *cs, int addextra) { 82static int pushnestedvalues (CapState *cs, int addextra) {
83 Capture *co = cs->cap; 83 Capture *co = cs->cap;
84 if (isfullcap(cs->cap++)) { /* no nested captures? */ 84 if (isfullcap(cs->cap++)) { /* no nested captures? */
85 lua_pushlstring(cs->L, co->s, co->siz - 1); /* push whole match */ 85 /* push whole match */
86 lua_pushlstring(cs->L, cs->s + co->index, co->siz - 1);
86 return 1; /* that is it */ 87 return 1; /* that is it */
87 } 88 }
88 else { 89 else {
@@ -90,7 +91,8 @@ static int pushnestedvalues (CapState *cs, int addextra) {
90 while (!isclosecap(cs->cap)) /* repeat for all nested patterns */ 91 while (!isclosecap(cs->cap)) /* repeat for all nested patterns */
91 n += pushcapture(cs); 92 n += pushcapture(cs);
92 if (addextra || n == 0) { /* need extra? */ 93 if (addextra || n == 0) { /* need extra? */
93 lua_pushlstring(cs->L, co->s, cs->cap->s - co->s); /* push whole match */ 94 /* push whole match */
95 lua_pushlstring(cs->L, cs->s + co->index, cs->cap->index - co->index);
94 n++; 96 n++;
95 } 97 }
96 cs->cap++; /* skip close entry */ 98 cs->cap++; /* skip close entry */
@@ -295,7 +297,7 @@ int runtimecap (CapState *cs, Capture *close, const char *s, int *rem) {
295 assert(captype(open) == Cgroup); 297 assert(captype(open) == Cgroup);
296 id = finddyncap(open, close); /* get first dynamic capture argument */ 298 id = finddyncap(open, close); /* get first dynamic capture argument */
297 close->kind = Cclose; /* closes the group */ 299 close->kind = Cclose; /* closes the group */
298 close->s = s; 300 close->index = s - cs->s;
299 cs->cap = open; cs->valuecached = 0; /* prepare capture state */ 301 cs->cap = open; cs->valuecached = 0; /* prepare capture state */
300 luaL_checkstack(L, 4, "too many runtime captures"); 302 luaL_checkstack(L, 4, "too many runtime captures");
301 pushluaval(cs); /* push function to be called */ 303 pushluaval(cs); /* push function to be called */
@@ -342,7 +344,7 @@ typedef struct StrAux {
342static int getstrcaps (CapState *cs, StrAux *cps, int n) { 344static int getstrcaps (CapState *cs, StrAux *cps, int n) {
343 int k = n++; 345 int k = n++;
344 cps[k].isstring = 1; /* get string value */ 346 cps[k].isstring = 1; /* get string value */
345 cps[k].u.s.s = cs->cap->s; /* starts here */ 347 cps[k].u.s.s = cs->s + cs->cap->index; /* starts here */
346 if (!isfullcap(cs->cap++)) { /* nested captures? */ 348 if (!isfullcap(cs->cap++)) { /* nested captures? */
347 while (!isclosecap(cs->cap)) { /* traverse them */ 349 while (!isclosecap(cs->cap)) { /* traverse them */
348 if (n >= MAXSTRCAPS) /* too many captures? */ 350 if (n >= MAXSTRCAPS) /* too many captures? */
@@ -358,7 +360,7 @@ static int getstrcaps (CapState *cs, StrAux *cps, int n) {
358 } 360 }
359 cs->cap++; /* skip close */ 361 cs->cap++; /* skip close */
360 } 362 }
361 cps[k].u.s.e = closeaddr(cs->cap - 1); /* ends here */ 363 cps[k].u.s.e = closeaddr(cs, cs->cap - 1); /* ends here */
362 return n; 364 return n;
363} 365}
364 366
@@ -407,20 +409,21 @@ static void stringcap (luaL_Buffer *b, CapState *cs) {
407** Substitution capture: add result to buffer 'b' 409** Substitution capture: add result to buffer 'b'
408*/ 410*/
409static void substcap (luaL_Buffer *b, CapState *cs) { 411static void substcap (luaL_Buffer *b, CapState *cs) {
410 const char *curr = cs->cap->s; 412 const char *curr = cs->s + cs->cap->index;
411 if (isfullcap(cs->cap)) /* no nested captures? */ 413 if (isfullcap(cs->cap)) /* no nested captures? */
412 luaL_addlstring(b, curr, cs->cap->siz - 1); /* keep original text */ 414 luaL_addlstring(b, curr, cs->cap->siz - 1); /* keep original text */
413 else { 415 else {
414 cs->cap++; /* skip open entry */ 416 cs->cap++; /* skip open entry */
415 while (!isclosecap(cs->cap)) { /* traverse nested captures */ 417 while (!isclosecap(cs->cap)) { /* traverse nested captures */
416 const char *next = cs->cap->s; 418 const char *next = cs->s + cs->cap->index;
417 luaL_addlstring(b, curr, next - curr); /* add text up to capture */ 419 luaL_addlstring(b, curr, next - curr); /* add text up to capture */
418 if (addonestring(b, cs, "replacement")) 420 if (addonestring(b, cs, "replacement"))
419 curr = closeaddr(cs->cap - 1); /* continue after match */ 421 curr = closeaddr(cs, cs->cap - 1); /* continue after match */
420 else /* no capture value */ 422 else /* no capture value */
421 curr = next; /* keep original text in final result */ 423 curr = next; /* keep original text in final result */
422 } 424 }
423 luaL_addlstring(b, curr, cs->cap->s - curr); /* add last piece of text */ 425 /* add last piece of text */
426 luaL_addlstring(b, curr, cs->s + cs->cap->index - curr);
424 } 427 }
425 cs->cap++; /* go to next capture */ 428 cs->cap++; /* go to next capture */
426} 429}
@@ -473,7 +476,7 @@ static int pushcapture (CapState *cs) {
473 return luaL_error(L, "subcapture nesting too deep"); 476 return luaL_error(L, "subcapture nesting too deep");
474 switch (captype(cs->cap)) { 477 switch (captype(cs->cap)) {
475 case Cposition: { 478 case Cposition: {
476 lua_pushinteger(L, cs->cap->s - cs->s + 1); 479 lua_pushinteger(L, cs->cap->index + 1);
477 cs->cap++; 480 cs->cap++;
478 res = 1; 481 res = 1;
479 break; 482 break;
@@ -553,6 +556,7 @@ static int pushcapture (CapState *cs) {
553int getcaptures (lua_State *L, const char *s, const char *r, int ptop) { 556int getcaptures (lua_State *L, const char *s, const char *r, int ptop) {
554 Capture *capture = (Capture *)lua_touserdata(L, caplistidx(ptop)); 557 Capture *capture = (Capture *)lua_touserdata(L, caplistidx(ptop));
555 int n = 0; 558 int n = 0;
559 /* printcaplist(capture); */
556 if (!isclosecap(capture)) { /* is there any capture? */ 560 if (!isclosecap(capture)) { /* is there any capture? */
557 CapState cs; 561 CapState cs;
558 cs.ocap = cs.cap = capture; cs.L = L; cs.reclevel = 0; 562 cs.ocap = cs.cap = capture; cs.L = L; cs.reclevel = 0;
diff --git a/lpcap.h b/lpcap.h
index e72d913..7cf8c24 100644
--- a/lpcap.h
+++ b/lpcap.h
@@ -27,8 +27,20 @@ typedef enum CapKind {
27} CapKind; 27} CapKind;
28 28
29 29
30/*
31** An unsigned integer large enough to index any subject entirely.
32** It can be size_t, but that will double the size of the array
33** of captures in a 64-bit machine.
34*/
35#if !defined(Index_t)
36typedef uint Index_t;
37#endif
38
39#define MAXINDT (~(Index_t)0)
40
41
30typedef struct Capture { 42typedef struct Capture {
31 const char *s; /* subject position */ 43 Index_t index; /* subject position */
32 unsigned short idx; /* extra info (group name, arg index, etc.) */ 44 unsigned short idx; /* extra info (group name, arg index, etc.) */
33 byte kind; /* kind of capture */ 45 byte kind; /* kind of capture */
34 byte siz; /* size of full capture + 1 (0 = not a full capture) */ 46 byte siz; /* size of full capture + 1 (0 = not a full capture) */
diff --git a/lpprint.c b/lpprint.c
index 1142376..bdb85b8 100644
--- a/lpprint.c
+++ b/lpprint.c
@@ -60,7 +60,7 @@ static void printTcharset (TTree *tree) {
60static const char *capkind (int kind) { 60static const char *capkind (int kind) {
61 const char *const modes[] = { 61 const char *const modes[] = {
62 "close", "position", "constant", "backref", 62 "close", "position", "constant", "backref",
63 "argument", "simple", "table", "function", "replace", 63 "argument", "simple", "table", "function", "accumulator",
64 "query", "string", "num", "substitution", "fold", 64 "query", "string", "num", "substitution", "fold",
65 "runtime", "group"}; 65 "runtime", "group"};
66 return modes[kind]; 66 return modes[kind];
@@ -147,16 +147,29 @@ void printpatt (Instruction *p) {
147} 147}
148 148
149 149
150static void printcap (Capture *cap) { 150static void printcap (Capture *cap, int ident) {
151 printf("%s (idx: %d - size: %d) -> %p\n", 151 while (ident--) printf(" ");
152 capkind(cap->kind), cap->idx, cap->siz, cap->s); 152 printf("%s (idx: %d - size: %d) -> %ld\n",
153 capkind(cap->kind), cap->idx, cap->siz, (long)cap->index);
153} 154}
154 155
155 156
156void printcaplist (Capture *cap, Capture *limit) { 157static Capture *printcap2close (Capture *cap, int ident) {
158 while (cap->kind != Cclose) {
159 printcap(cap, ident);
160 if (cap->siz == 0) {
161 cap = printcap2close(cap + 1, ident + 2);
162 printcap(cap, ident); /* print 'close' capture */
163 }
164 cap++;
165 }
166 return cap;
167}
168
169
170void printcaplist (Capture *cap) {
157 printf(">======\n"); 171 printf(">======\n");
158 for (; cap->s && (limit == NULL || cap < limit); cap++) 172 printcap2close(cap, 0);
159 printcap(cap);
160 printf("=======\n"); 173 printf("=======\n");
161} 174}
162 175
diff --git a/lpprint.h b/lpprint.h
index e97f8c0..aafdafc 100644
--- a/lpprint.h
+++ b/lpprint.h
@@ -13,7 +13,7 @@ void printpatt (Instruction *p);
13void printtree (TTree *tree, int ident); 13void printtree (TTree *tree, int ident);
14void printktable (lua_State *L, int idx); 14void printktable (lua_State *L, int idx);
15void printcharset (const byte *st); 15void printcharset (const byte *st);
16void printcaplist (Capture *cap, Capture *limit); 16void printcaplist (Capture *cap);
17void printinst (const Instruction *op, const Instruction *p); 17void printinst (const Instruction *op, const Instruction *p);
18 18
19#else 19#else
diff --git a/lptree.c b/lptree.c
index 330471a..475b0c3 100644
--- a/lptree.c
+++ b/lptree.c
@@ -1253,6 +1253,7 @@ static int lp_match (lua_State *L) {
1253 const char *s = luaL_checklstring(L, SUBJIDX, &l); 1253 const char *s = luaL_checklstring(L, SUBJIDX, &l);
1254 size_t i = initposition(L, l); 1254 size_t i = initposition(L, l);
1255 int ptop = lua_gettop(L); 1255 int ptop = lua_gettop(L);
1256 luaL_argcheck(L, l < MAXINDT, SUBJIDX, "subject too long");
1256 lua_pushnil(L); /* initialize subscache */ 1257 lua_pushnil(L); /* initialize subscache */
1257 lua_pushlightuserdata(L, capture); /* initialize caplistidx */ 1258 lua_pushlightuserdata(L, capture); /* initialize caplistidx */
1258 lua_getuservalue(L, 1); /* initialize ktableidx */ 1259 lua_getuservalue(L, 1); /* initialize ktableidx */
diff --git a/lpvm.c b/lpvm.c
index e6f7dac..a65a6f6 100644
--- a/lpvm.c
+++ b/lpvm.c
@@ -169,7 +169,7 @@ static int resdyncaptures (lua_State *L, int fr, int curr, int limit) {
169** value, 'n' is the number of values (at least 1). The open group 169** value, 'n' is the number of values (at least 1). The open group
170** capture is already in 'capture', before the place for the new entries. 170** capture is already in 'capture', before the place for the new entries.
171*/ 171*/
172static void adddyncaptures (const char *s, Capture *capture, int n, int fd) { 172static void adddyncaptures (Index_t index, Capture *capture, int n, int fd) {
173 int i; 173 int i;
174 assert(capture[-1].kind == Cgroup && capture[-1].siz == 0); 174 assert(capture[-1].kind == Cgroup && capture[-1].siz == 0);
175 capture[-1].idx = 0; /* make group capture an anonymous group */ 175 capture[-1].idx = 0; /* make group capture an anonymous group */
@@ -177,11 +177,11 @@ static void adddyncaptures (const char *s, Capture *capture, int n, int fd) {
177 capture[i].kind = Cruntime; 177 capture[i].kind = Cruntime;
178 capture[i].siz = 1; /* mark it as closed */ 178 capture[i].siz = 1; /* mark it as closed */
179 capture[i].idx = fd + i; /* stack index of capture value */ 179 capture[i].idx = fd + i; /* stack index of capture value */
180 capture[i].s = s; 180 capture[i].index = index;
181 } 181 }
182 capture[n].kind = Cclose; /* close group */ 182 capture[n].kind = Cclose; /* close group */
183 capture[n].siz = 1; 183 capture[n].siz = 1;
184 capture[n].s = s; 184 capture[n].index = index;
185} 185}
186 186
187 187
@@ -225,7 +225,7 @@ const char *match (lua_State *L, const char *o, const char *s, const char *e,
225 case IEnd: { 225 case IEnd: {
226 assert(stack == getstackbase(L, ptop) + 1); 226 assert(stack == getstackbase(L, ptop) + 1);
227 capture[captop].kind = Cclose; 227 capture[captop].kind = Cclose;
228 capture[captop].s = NULL; 228 capture[captop].index = MAXINDT;
229 return s; 229 return s;
230 } 230 }
231 case IGiveup: { 231 case IGiveup: {
@@ -383,7 +383,7 @@ const char *match (lua_State *L, const char *o, const char *s, const char *e,
383 luaL_error(L, "too many results in match-time capture"); 383 luaL_error(L, "too many results in match-time capture");
384 /* add new captures + close group to 'capture' list */ 384 /* add new captures + close group to 'capture' list */
385 capture = growcap(L, capture, &capsize, captop, n + 1, ptop); 385 capture = growcap(L, capture, &capsize, captop, n + 1, ptop);
386 adddyncaptures(s, capture + captop, n, fr); 386 adddyncaptures(s - o, capture + captop, n, fr);
387 captop += n + 1; /* new captures + close group */ 387 captop += n + 1; /* new captures + close group */
388 } 388 }
389 p++; 389 p++;
@@ -394,24 +394,24 @@ const char *match (lua_State *L, const char *o, const char *s, const char *e,
394 assert(captop > 0); 394 assert(captop > 0);
395 /* if possible, turn capture into a full capture */ 395 /* if possible, turn capture into a full capture */
396 if (capture[captop - 1].siz == 0 && 396 if (capture[captop - 1].siz == 0 &&
397 s1 - capture[captop - 1].s < UCHAR_MAX) { 397 (s1 - o) - capture[captop - 1].index < UCHAR_MAX) {
398 capture[captop - 1].siz = s1 - capture[captop - 1].s + 1; 398 capture[captop - 1].siz = (s1 - o) - capture[captop - 1].index + 1;
399 p++; 399 p++;
400 continue; 400 continue;
401 } 401 }
402 else { 402 else {
403 capture[captop].siz = 1; /* mark entry as closed */ 403 capture[captop].siz = 1; /* mark entry as closed */
404 capture[captop].s = s; 404 capture[captop].index = s - o;
405 goto pushcapture; 405 goto pushcapture;
406 } 406 }
407 } 407 }
408 case IOpenCapture: 408 case IOpenCapture:
409 capture[captop].siz = 0; /* mark entry as open */ 409 capture[captop].siz = 0; /* mark entry as open */
410 capture[captop].s = s; 410 capture[captop].index = s - o;
411 goto pushcapture; 411 goto pushcapture;
412 case IFullCapture: 412 case IFullCapture:
413 capture[captop].siz = getoff(p) + 1; /* save capture size */ 413 capture[captop].siz = getoff(p) + 1; /* save capture size */
414 capture[captop].s = s - getoff(p); 414 capture[captop].index = s - o - getoff(p);
415 /* goto pushcapture; */ 415 /* goto pushcapture; */
416 pushcapture: { 416 pushcapture: {
417 capture[captop].idx = p->i.aux2.key; 417 capture[captop].idx = p->i.aux2.key;