diff options
author | Roberto Ierusalimschy <roberto@inf.puc-rio.br> | 2023-06-07 15:41:01 -0300 |
---|---|---|
committer | Roberto Ierusalimschy <roberto@inf.puc-rio.br> | 2023-06-07 15:41:01 -0300 |
commit | cf1705c1d96b549ef5887a2bc3038dbc31912e50 (patch) | |
tree | 98d37b61c4f095b089fa40e55032e0479932be6c | |
parent | e31e13f59ef1a4df1698b15ff1fe0198553cc3c2 (diff) | |
download | lpeg-cf1705c1d96b549ef5887a2bc3038dbc31912e50.tar.gz lpeg-cf1705c1d96b549ef5887a2bc3038dbc31912e50.tar.bz2 lpeg-cf1705c1d96b549ef5887a2bc3038dbc31912e50.zip |
Captures point to string positions using indices
That uses 4 bytes (uint) instead of 8 (char*); the size of the
structure 'Capture' reduces from 16 to 8 bytes in 64-bit machines.
-rw-r--r-- | lpcap.c | 26 | ||||
-rw-r--r-- | lpcap.h | 14 | ||||
-rw-r--r-- | lpprint.c | 27 | ||||
-rw-r--r-- | lpprint.h | 2 | ||||
-rw-r--r-- | lptree.c | 1 | ||||
-rw-r--r-- | lpvm.c | 20 |
6 files changed, 60 insertions, 30 deletions
@@ -10,7 +10,7 @@ | |||
10 | 10 | ||
11 | #define isclosecap(cap) (captype(cap) == Cclose) | 11 | #define isclosecap(cap) (captype(cap) == Cclose) |
12 | 12 | ||
13 | #define closeaddr(c) ((c)->s + (c)->siz - 1) | 13 | #define closeaddr(cs,c) ((cs)->s + (c)->index + (c)->siz - 1) |
14 | 14 | ||
15 | #define isfullcap(cap) ((cap)->siz != 0) | 15 | #define isfullcap(cap) ((cap)->siz != 0) |
16 | 16 | ||
@@ -82,7 +82,8 @@ static void nextcap (CapState *cs) { | |||
82 | static int pushnestedvalues (CapState *cs, int addextra) { | 82 | static int pushnestedvalues (CapState *cs, int addextra) { |
83 | Capture *co = cs->cap; | 83 | Capture *co = cs->cap; |
84 | if (isfullcap(cs->cap++)) { /* no nested captures? */ | 84 | if (isfullcap(cs->cap++)) { /* no nested captures? */ |
85 | lua_pushlstring(cs->L, co->s, co->siz - 1); /* push whole match */ | 85 | /* push whole match */ |
86 | lua_pushlstring(cs->L, cs->s + co->index, co->siz - 1); | ||
86 | return 1; /* that is it */ | 87 | return 1; /* that is it */ |
87 | } | 88 | } |
88 | else { | 89 | else { |
@@ -90,7 +91,8 @@ static int pushnestedvalues (CapState *cs, int addextra) { | |||
90 | while (!isclosecap(cs->cap)) /* repeat for all nested patterns */ | 91 | while (!isclosecap(cs->cap)) /* repeat for all nested patterns */ |
91 | n += pushcapture(cs); | 92 | n += pushcapture(cs); |
92 | if (addextra || n == 0) { /* need extra? */ | 93 | if (addextra || n == 0) { /* need extra? */ |
93 | lua_pushlstring(cs->L, co->s, cs->cap->s - co->s); /* push whole match */ | 94 | /* push whole match */ |
95 | lua_pushlstring(cs->L, cs->s + co->index, cs->cap->index - co->index); | ||
94 | n++; | 96 | n++; |
95 | } | 97 | } |
96 | cs->cap++; /* skip close entry */ | 98 | cs->cap++; /* skip close entry */ |
@@ -295,7 +297,7 @@ int runtimecap (CapState *cs, Capture *close, const char *s, int *rem) { | |||
295 | assert(captype(open) == Cgroup); | 297 | assert(captype(open) == Cgroup); |
296 | id = finddyncap(open, close); /* get first dynamic capture argument */ | 298 | id = finddyncap(open, close); /* get first dynamic capture argument */ |
297 | close->kind = Cclose; /* closes the group */ | 299 | close->kind = Cclose; /* closes the group */ |
298 | close->s = s; | 300 | close->index = s - cs->s; |
299 | cs->cap = open; cs->valuecached = 0; /* prepare capture state */ | 301 | cs->cap = open; cs->valuecached = 0; /* prepare capture state */ |
300 | luaL_checkstack(L, 4, "too many runtime captures"); | 302 | luaL_checkstack(L, 4, "too many runtime captures"); |
301 | pushluaval(cs); /* push function to be called */ | 303 | pushluaval(cs); /* push function to be called */ |
@@ -342,7 +344,7 @@ typedef struct StrAux { | |||
342 | static int getstrcaps (CapState *cs, StrAux *cps, int n) { | 344 | static int getstrcaps (CapState *cs, StrAux *cps, int n) { |
343 | int k = n++; | 345 | int k = n++; |
344 | cps[k].isstring = 1; /* get string value */ | 346 | cps[k].isstring = 1; /* get string value */ |
345 | cps[k].u.s.s = cs->cap->s; /* starts here */ | 347 | cps[k].u.s.s = cs->s + cs->cap->index; /* starts here */ |
346 | if (!isfullcap(cs->cap++)) { /* nested captures? */ | 348 | if (!isfullcap(cs->cap++)) { /* nested captures? */ |
347 | while (!isclosecap(cs->cap)) { /* traverse them */ | 349 | while (!isclosecap(cs->cap)) { /* traverse them */ |
348 | if (n >= MAXSTRCAPS) /* too many captures? */ | 350 | if (n >= MAXSTRCAPS) /* too many captures? */ |
@@ -358,7 +360,7 @@ static int getstrcaps (CapState *cs, StrAux *cps, int n) { | |||
358 | } | 360 | } |
359 | cs->cap++; /* skip close */ | 361 | cs->cap++; /* skip close */ |
360 | } | 362 | } |
361 | cps[k].u.s.e = closeaddr(cs->cap - 1); /* ends here */ | 363 | cps[k].u.s.e = closeaddr(cs, cs->cap - 1); /* ends here */ |
362 | return n; | 364 | return n; |
363 | } | 365 | } |
364 | 366 | ||
@@ -407,20 +409,21 @@ static void stringcap (luaL_Buffer *b, CapState *cs) { | |||
407 | ** Substitution capture: add result to buffer 'b' | 409 | ** Substitution capture: add result to buffer 'b' |
408 | */ | 410 | */ |
409 | static void substcap (luaL_Buffer *b, CapState *cs) { | 411 | static void substcap (luaL_Buffer *b, CapState *cs) { |
410 | const char *curr = cs->cap->s; | 412 | const char *curr = cs->s + cs->cap->index; |
411 | if (isfullcap(cs->cap)) /* no nested captures? */ | 413 | if (isfullcap(cs->cap)) /* no nested captures? */ |
412 | luaL_addlstring(b, curr, cs->cap->siz - 1); /* keep original text */ | 414 | luaL_addlstring(b, curr, cs->cap->siz - 1); /* keep original text */ |
413 | else { | 415 | else { |
414 | cs->cap++; /* skip open entry */ | 416 | cs->cap++; /* skip open entry */ |
415 | while (!isclosecap(cs->cap)) { /* traverse nested captures */ | 417 | while (!isclosecap(cs->cap)) { /* traverse nested captures */ |
416 | const char *next = cs->cap->s; | 418 | const char *next = cs->s + cs->cap->index; |
417 | luaL_addlstring(b, curr, next - curr); /* add text up to capture */ | 419 | luaL_addlstring(b, curr, next - curr); /* add text up to capture */ |
418 | if (addonestring(b, cs, "replacement")) | 420 | if (addonestring(b, cs, "replacement")) |
419 | curr = closeaddr(cs->cap - 1); /* continue after match */ | 421 | curr = closeaddr(cs, cs->cap - 1); /* continue after match */ |
420 | else /* no capture value */ | 422 | else /* no capture value */ |
421 | curr = next; /* keep original text in final result */ | 423 | curr = next; /* keep original text in final result */ |
422 | } | 424 | } |
423 | luaL_addlstring(b, curr, cs->cap->s - curr); /* add last piece of text */ | 425 | /* add last piece of text */ |
426 | luaL_addlstring(b, curr, cs->s + cs->cap->index - curr); | ||
424 | } | 427 | } |
425 | cs->cap++; /* go to next capture */ | 428 | cs->cap++; /* go to next capture */ |
426 | } | 429 | } |
@@ -473,7 +476,7 @@ static int pushcapture (CapState *cs) { | |||
473 | return luaL_error(L, "subcapture nesting too deep"); | 476 | return luaL_error(L, "subcapture nesting too deep"); |
474 | switch (captype(cs->cap)) { | 477 | switch (captype(cs->cap)) { |
475 | case Cposition: { | 478 | case Cposition: { |
476 | lua_pushinteger(L, cs->cap->s - cs->s + 1); | 479 | lua_pushinteger(L, cs->cap->index + 1); |
477 | cs->cap++; | 480 | cs->cap++; |
478 | res = 1; | 481 | res = 1; |
479 | break; | 482 | break; |
@@ -553,6 +556,7 @@ static int pushcapture (CapState *cs) { | |||
553 | int getcaptures (lua_State *L, const char *s, const char *r, int ptop) { | 556 | int getcaptures (lua_State *L, const char *s, const char *r, int ptop) { |
554 | Capture *capture = (Capture *)lua_touserdata(L, caplistidx(ptop)); | 557 | Capture *capture = (Capture *)lua_touserdata(L, caplistidx(ptop)); |
555 | int n = 0; | 558 | int n = 0; |
559 | /* printcaplist(capture); */ | ||
556 | if (!isclosecap(capture)) { /* is there any capture? */ | 560 | if (!isclosecap(capture)) { /* is there any capture? */ |
557 | CapState cs; | 561 | CapState cs; |
558 | cs.ocap = cs.cap = capture; cs.L = L; cs.reclevel = 0; | 562 | cs.ocap = cs.cap = capture; cs.L = L; cs.reclevel = 0; |
@@ -27,8 +27,20 @@ typedef enum CapKind { | |||
27 | } CapKind; | 27 | } CapKind; |
28 | 28 | ||
29 | 29 | ||
30 | /* | ||
31 | ** An unsigned integer large enough to index any subject entirely. | ||
32 | ** It can be size_t, but that will double the size of the array | ||
33 | ** of captures in a 64-bit machine. | ||
34 | */ | ||
35 | #if !defined(Index_t) | ||
36 | typedef uint Index_t; | ||
37 | #endif | ||
38 | |||
39 | #define MAXINDT (~(Index_t)0) | ||
40 | |||
41 | |||
30 | typedef struct Capture { | 42 | typedef struct Capture { |
31 | const char *s; /* subject position */ | 43 | Index_t index; /* subject position */ |
32 | unsigned short idx; /* extra info (group name, arg index, etc.) */ | 44 | unsigned short idx; /* extra info (group name, arg index, etc.) */ |
33 | byte kind; /* kind of capture */ | 45 | byte kind; /* kind of capture */ |
34 | byte siz; /* size of full capture + 1 (0 = not a full capture) */ | 46 | byte siz; /* size of full capture + 1 (0 = not a full capture) */ |
@@ -60,7 +60,7 @@ static void printTcharset (TTree *tree) { | |||
60 | static const char *capkind (int kind) { | 60 | static const char *capkind (int kind) { |
61 | const char *const modes[] = { | 61 | const char *const modes[] = { |
62 | "close", "position", "constant", "backref", | 62 | "close", "position", "constant", "backref", |
63 | "argument", "simple", "table", "function", "replace", | 63 | "argument", "simple", "table", "function", "accumulator", |
64 | "query", "string", "num", "substitution", "fold", | 64 | "query", "string", "num", "substitution", "fold", |
65 | "runtime", "group"}; | 65 | "runtime", "group"}; |
66 | return modes[kind]; | 66 | return modes[kind]; |
@@ -147,16 +147,29 @@ void printpatt (Instruction *p) { | |||
147 | } | 147 | } |
148 | 148 | ||
149 | 149 | ||
150 | static void printcap (Capture *cap) { | 150 | static void printcap (Capture *cap, int ident) { |
151 | printf("%s (idx: %d - size: %d) -> %p\n", | 151 | while (ident--) printf(" "); |
152 | capkind(cap->kind), cap->idx, cap->siz, cap->s); | 152 | printf("%s (idx: %d - size: %d) -> %ld\n", |
153 | capkind(cap->kind), cap->idx, cap->siz, (long)cap->index); | ||
153 | } | 154 | } |
154 | 155 | ||
155 | 156 | ||
156 | void printcaplist (Capture *cap, Capture *limit) { | 157 | static Capture *printcap2close (Capture *cap, int ident) { |
158 | while (cap->kind != Cclose) { | ||
159 | printcap(cap, ident); | ||
160 | if (cap->siz == 0) { | ||
161 | cap = printcap2close(cap + 1, ident + 2); | ||
162 | printcap(cap, ident); /* print 'close' capture */ | ||
163 | } | ||
164 | cap++; | ||
165 | } | ||
166 | return cap; | ||
167 | } | ||
168 | |||
169 | |||
170 | void printcaplist (Capture *cap) { | ||
157 | printf(">======\n"); | 171 | printf(">======\n"); |
158 | for (; cap->s && (limit == NULL || cap < limit); cap++) | 172 | printcap2close(cap, 0); |
159 | printcap(cap); | ||
160 | printf("=======\n"); | 173 | printf("=======\n"); |
161 | } | 174 | } |
162 | 175 | ||
@@ -13,7 +13,7 @@ void printpatt (Instruction *p); | |||
13 | void printtree (TTree *tree, int ident); | 13 | void printtree (TTree *tree, int ident); |
14 | void printktable (lua_State *L, int idx); | 14 | void printktable (lua_State *L, int idx); |
15 | void printcharset (const byte *st); | 15 | void printcharset (const byte *st); |
16 | void printcaplist (Capture *cap, Capture *limit); | 16 | void printcaplist (Capture *cap); |
17 | void printinst (const Instruction *op, const Instruction *p); | 17 | void printinst (const Instruction *op, const Instruction *p); |
18 | 18 | ||
19 | #else | 19 | #else |
@@ -1253,6 +1253,7 @@ static int lp_match (lua_State *L) { | |||
1253 | const char *s = luaL_checklstring(L, SUBJIDX, &l); | 1253 | const char *s = luaL_checklstring(L, SUBJIDX, &l); |
1254 | size_t i = initposition(L, l); | 1254 | size_t i = initposition(L, l); |
1255 | int ptop = lua_gettop(L); | 1255 | int ptop = lua_gettop(L); |
1256 | luaL_argcheck(L, l < MAXINDT, SUBJIDX, "subject too long"); | ||
1256 | lua_pushnil(L); /* initialize subscache */ | 1257 | lua_pushnil(L); /* initialize subscache */ |
1257 | lua_pushlightuserdata(L, capture); /* initialize caplistidx */ | 1258 | lua_pushlightuserdata(L, capture); /* initialize caplistidx */ |
1258 | lua_getuservalue(L, 1); /* initialize ktableidx */ | 1259 | lua_getuservalue(L, 1); /* initialize ktableidx */ |
@@ -169,7 +169,7 @@ static int resdyncaptures (lua_State *L, int fr, int curr, int limit) { | |||
169 | ** value, 'n' is the number of values (at least 1). The open group | 169 | ** value, 'n' is the number of values (at least 1). The open group |
170 | ** capture is already in 'capture', before the place for the new entries. | 170 | ** capture is already in 'capture', before the place for the new entries. |
171 | */ | 171 | */ |
172 | static void adddyncaptures (const char *s, Capture *capture, int n, int fd) { | 172 | static void adddyncaptures (Index_t index, Capture *capture, int n, int fd) { |
173 | int i; | 173 | int i; |
174 | assert(capture[-1].kind == Cgroup && capture[-1].siz == 0); | 174 | assert(capture[-1].kind == Cgroup && capture[-1].siz == 0); |
175 | capture[-1].idx = 0; /* make group capture an anonymous group */ | 175 | capture[-1].idx = 0; /* make group capture an anonymous group */ |
@@ -177,11 +177,11 @@ static void adddyncaptures (const char *s, Capture *capture, int n, int fd) { | |||
177 | capture[i].kind = Cruntime; | 177 | capture[i].kind = Cruntime; |
178 | capture[i].siz = 1; /* mark it as closed */ | 178 | capture[i].siz = 1; /* mark it as closed */ |
179 | capture[i].idx = fd + i; /* stack index of capture value */ | 179 | capture[i].idx = fd + i; /* stack index of capture value */ |
180 | capture[i].s = s; | 180 | capture[i].index = index; |
181 | } | 181 | } |
182 | capture[n].kind = Cclose; /* close group */ | 182 | capture[n].kind = Cclose; /* close group */ |
183 | capture[n].siz = 1; | 183 | capture[n].siz = 1; |
184 | capture[n].s = s; | 184 | capture[n].index = index; |
185 | } | 185 | } |
186 | 186 | ||
187 | 187 | ||
@@ -225,7 +225,7 @@ const char *match (lua_State *L, const char *o, const char *s, const char *e, | |||
225 | case IEnd: { | 225 | case IEnd: { |
226 | assert(stack == getstackbase(L, ptop) + 1); | 226 | assert(stack == getstackbase(L, ptop) + 1); |
227 | capture[captop].kind = Cclose; | 227 | capture[captop].kind = Cclose; |
228 | capture[captop].s = NULL; | 228 | capture[captop].index = MAXINDT; |
229 | return s; | 229 | return s; |
230 | } | 230 | } |
231 | case IGiveup: { | 231 | case IGiveup: { |
@@ -383,7 +383,7 @@ const char *match (lua_State *L, const char *o, const char *s, const char *e, | |||
383 | luaL_error(L, "too many results in match-time capture"); | 383 | luaL_error(L, "too many results in match-time capture"); |
384 | /* add new captures + close group to 'capture' list */ | 384 | /* add new captures + close group to 'capture' list */ |
385 | capture = growcap(L, capture, &capsize, captop, n + 1, ptop); | 385 | capture = growcap(L, capture, &capsize, captop, n + 1, ptop); |
386 | adddyncaptures(s, capture + captop, n, fr); | 386 | adddyncaptures(s - o, capture + captop, n, fr); |
387 | captop += n + 1; /* new captures + close group */ | 387 | captop += n + 1; /* new captures + close group */ |
388 | } | 388 | } |
389 | p++; | 389 | p++; |
@@ -394,24 +394,24 @@ const char *match (lua_State *L, const char *o, const char *s, const char *e, | |||
394 | assert(captop > 0); | 394 | assert(captop > 0); |
395 | /* if possible, turn capture into a full capture */ | 395 | /* if possible, turn capture into a full capture */ |
396 | if (capture[captop - 1].siz == 0 && | 396 | if (capture[captop - 1].siz == 0 && |
397 | s1 - capture[captop - 1].s < UCHAR_MAX) { | 397 | (s1 - o) - capture[captop - 1].index < UCHAR_MAX) { |
398 | capture[captop - 1].siz = s1 - capture[captop - 1].s + 1; | 398 | capture[captop - 1].siz = (s1 - o) - capture[captop - 1].index + 1; |
399 | p++; | 399 | p++; |
400 | continue; | 400 | continue; |
401 | } | 401 | } |
402 | else { | 402 | else { |
403 | capture[captop].siz = 1; /* mark entry as closed */ | 403 | capture[captop].siz = 1; /* mark entry as closed */ |
404 | capture[captop].s = s; | 404 | capture[captop].index = s - o; |
405 | goto pushcapture; | 405 | goto pushcapture; |
406 | } | 406 | } |
407 | } | 407 | } |
408 | case IOpenCapture: | 408 | case IOpenCapture: |
409 | capture[captop].siz = 0; /* mark entry as open */ | 409 | capture[captop].siz = 0; /* mark entry as open */ |
410 | capture[captop].s = s; | 410 | capture[captop].index = s - o; |
411 | goto pushcapture; | 411 | goto pushcapture; |
412 | case IFullCapture: | 412 | case IFullCapture: |
413 | capture[captop].siz = getoff(p) + 1; /* save capture size */ | 413 | capture[captop].siz = getoff(p) + 1; /* save capture size */ |
414 | capture[captop].s = s - getoff(p); | 414 | capture[captop].index = s - o - getoff(p); |
415 | /* goto pushcapture; */ | 415 | /* goto pushcapture; */ |
416 | pushcapture: { | 416 | pushcapture: { |
417 | capture[captop].idx = p->i.aux2.key; | 417 | capture[captop].idx = p->i.aux2.key; |