aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRoberto Ierusalimschy <roberto@inf.puc-rio.br>2023-05-29 09:39:03 -0300
committerRoberto Ierusalimschy <roberto@inf.puc-rio.br>2023-05-29 09:39:03 -0300
commit44fab2a44d06a956c3121ceba2b39ca7b00dc428 (patch)
tree9f492ba2ebaa4fb3cf6554fcb63f3fe026d43a5d
parent460a35cbcb33fbc56f5a658b96a793b9bb8963e9 (diff)
downloadlpeg-44fab2a44d06a956c3121ceba2b39ca7b00dc428.tar.gz
lpeg-44fab2a44d06a956c3121ceba2b39ca7b00dc428.tar.bz2
lpeg-44fab2a44d06a956c3121ceba2b39ca7b00dc428.zip
Code size stored in code itself
Most patterns do not have code, as they are not directly used for a match; they are created only to compose larger patterns. So, we shouldn't waste space to store the size of their code, as a NULL pointer already indicates that the size is zero.
-rw-r--r--lpcode.c67
-rw-r--r--lpcode.h4
-rw-r--r--lpprint.c3
-rw-r--r--lpprint.h2
-rw-r--r--lptree.c9
-rw-r--r--lptree.h1
-rw-r--r--lpvm.h8
7 files changed, 66 insertions, 28 deletions
diff --git a/lpcode.c b/lpcode.c
index 9d7d0eb..f3b8ae3 100644
--- a/lpcode.c
+++ b/lpcode.c
@@ -392,29 +392,63 @@ static void codegen (CompileState *compst, TTree *tree, int opt, int tt,
392 const Charset *fl); 392 const Charset *fl);
393 393
394 394
395void realloccode (lua_State *L, Pattern *p, int nsize) { 395static void finishrelcode (lua_State *L, Pattern *p, Instruction *block,
396 int size) {
397 if (block == NULL)
398 luaL_error(L, "not enough memory");
399 block->codesize = size;
400 p->code = (Instruction *)block + 1;
401}
402
403
404/*
405** Initialize array 'p->code'
406*/
407static void newcode (lua_State *L, Pattern *p, int size) {
396 void *ud; 408 void *ud;
409 Instruction *block;
397 lua_Alloc f = lua_getallocf(L, &ud); 410 lua_Alloc f = lua_getallocf(L, &ud);
398 void *newblock = f(ud, p->code, p->codesize * sizeof(Instruction), 411 size++; /* slot for 'codesize' */
399 nsize * sizeof(Instruction)); 412 block = (Instruction*) f(ud, NULL, 0, size * sizeof(Instruction));
400 if (newblock == NULL && nsize > 0) 413 finishrelcode(L, p, block, size);
401 luaL_error(L, "not enough memory"); 414}
402 p->code = (Instruction *)newblock; 415
403 p->codesize = nsize; 416
417void freecode (lua_State *L, Pattern *p) {
418 if (p->code != NULL) {
419 void *ud;
420 lua_Alloc f = lua_getallocf(L, &ud);
421 uint osize = p->code[-1].codesize;
422 f(ud, p->code - 1, osize * sizeof(Instruction), 0); /* free block */
423 }
424}
425
426
427/*
428** Assume that 'nsize' is not zero and that 'p->code' already exists.
429*/
430static void realloccode (lua_State *L, Pattern *p, int nsize) {
431 void *ud;
432 lua_Alloc f = lua_getallocf(L, &ud);
433 Instruction *block = p->code - 1;
434 uint osize = block->codesize;
435 nsize++; /* add the 'codesize' slot to size */
436 block = (Instruction*) f(ud, block, osize * sizeof(Instruction),
437 nsize * sizeof(Instruction));
438 finishrelcode(L, p, block, nsize);
404} 439}
405 440
406 441
407/* 442/*
408** Add space for 'n' more instructions and return the index of 443** Add space for an instruction with 'n' slots and return its index.
409** the first one.
410*/ 444*/
411static int nextinstruction (CompileState *compst, int n) { 445static int nextinstruction (CompileState *compst, int n) {
412 int size = compst->p->codesize; 446 int size = compst->p->code[-1].codesize - 1;
413 int ncode = compst->ncode; 447 int ncode = compst->ncode;
414 if (ncode >= size - n) { 448 if (ncode > size - n) {
415 uint nsize = size + (size >> 1) + n; 449 uint nsize = size + (size >> 1) + n;
416 if (nsize > INT_MAX) 450 if (nsize >= INT_MAX)
417 luaL_error(compst->L, "code too large"); 451 luaL_error(compst->L, "pattern code too large");
418 realloccode(compst->L, compst->p, nsize); 452 realloccode(compst->L, compst->p, nsize);
419 } 453 }
420 compst->ncode = ncode + n; 454 compst->ncode = ncode + n;
@@ -998,12 +1032,13 @@ static void peephole (CompileState *compst) {
998 1032
999 1033
1000/* 1034/*
1001** Compile a pattern 1035** Compile a pattern. 'size' is the size of the pattern's tree,
1036** which gives a hint for the size of the final code.
1002*/ 1037*/
1003Instruction *compile (lua_State *L, Pattern *p) { 1038Instruction *compile (lua_State *L, Pattern *p, uint size) {
1004 CompileState compst; 1039 CompileState compst;
1005 compst.p = p; compst.ncode = 0; compst.L = L; 1040 compst.p = p; compst.ncode = 0; compst.L = L;
1006 realloccode(L, p, 4); /* minimum initial size */ 1041 newcode(L, p, size/2u + 2); /* set initial size */
1007 codegen(&compst, p->tree, 0, NOINST, fullset); 1042 codegen(&compst, p->tree, 0, NOINST, fullset);
1008 addinstruction(&compst, IEnd, 0); 1043 addinstruction(&compst, IEnd, 0);
1009 realloccode(L, p, compst.ncode); /* set final size */ 1044 realloccode(L, p, compst.ncode); /* set final size */
diff --git a/lpcode.h b/lpcode.h
index 3c71451..10c2ced 100644
--- a/lpcode.h
+++ b/lpcode.h
@@ -12,8 +12,8 @@ int checkaux (TTree *tree, int pred);
12int fixedlen (TTree *tree); 12int fixedlen (TTree *tree);
13int hascaptures (TTree *tree); 13int hascaptures (TTree *tree);
14int lp_gc (lua_State *L); 14int lp_gc (lua_State *L);
15Instruction *compile (lua_State *L, Pattern *p); 15Instruction *compile (lua_State *L, Pattern *p, uint size);
16void realloccode (lua_State *L, Pattern *p, int nsize); 16void freecode (lua_State *L, Pattern *p);
17int sizei (const Instruction *i); 17int sizei (const Instruction *i);
18 18
19 19
diff --git a/lpprint.c b/lpprint.c
index 3e7e7f2..54a3da7 100644
--- a/lpprint.c
+++ b/lpprint.c
@@ -137,8 +137,9 @@ void printinst (const Instruction *op, const Instruction *p) {
137} 137}
138 138
139 139
140void printpatt (Instruction *p, int n) { 140void printpatt (Instruction *p) {
141 Instruction *op = p; 141 Instruction *op = p;
142 uint n = op[-1].codesize - 1;
142 while (p < op + n) { 143 while (p < op + n) {
143 printinst(op, p); 144 printinst(op, p);
144 p += sizei(p); 145 p += sizei(p);
diff --git a/lpprint.h b/lpprint.h
index 42d7f98..e97f8c0 100644
--- a/lpprint.h
+++ b/lpprint.h
@@ -9,7 +9,7 @@
9 9
10#if defined(LPEG_DEBUG) 10#if defined(LPEG_DEBUG)
11 11
12void printpatt (Instruction *p, int n); 12void printpatt (Instruction *p);
13void printtree (TTree *tree, int ident); 13void printtree (TTree *tree, int ident);
14void printktable (lua_State *L, int idx); 14void printktable (lua_State *L, int idx);
15void printcharset (const byte *st); 15void printcharset (const byte *st);
diff --git a/lptree.c b/lptree.c
index dcee27e..0b2d824 100644
--- a/lptree.c
+++ b/lptree.c
@@ -359,7 +359,7 @@ static TTree *newtree (lua_State *L, int len) {
359 lua_pushvalue(L, -1); 359 lua_pushvalue(L, -1);
360 lua_setuservalue(L, -3); 360 lua_setuservalue(L, -3);
361 lua_setmetatable(L, -2); 361 lua_setmetatable(L, -2);
362 p->code = NULL; p->codesize = 0; 362 p->code = NULL;
363 return p->tree; 363 return p->tree;
364} 364}
365 365
@@ -1189,7 +1189,7 @@ static Instruction *prepcompile (lua_State *L, Pattern *p, int idx) {
1189 lua_getuservalue(L, idx); /* push 'ktable' (may be used by 'finalfix') */ 1189 lua_getuservalue(L, idx); /* push 'ktable' (may be used by 'finalfix') */
1190 finalfix(L, 0, NULL, p->tree); 1190 finalfix(L, 0, NULL, p->tree);
1191 lua_pop(L, 1); /* remove 'ktable' */ 1191 lua_pop(L, 1); /* remove 'ktable' */
1192 return compile(L, p); 1192 return compile(L, p, getsize(L, idx));
1193} 1193}
1194 1194
1195 1195
@@ -1212,7 +1212,7 @@ static int lp_printcode (lua_State *L) {
1212 printktable(L, 1); 1212 printktable(L, 1);
1213 if (p->code == NULL) /* not compiled yet? */ 1213 if (p->code == NULL) /* not compiled yet? */
1214 prepcompile(L, p, 1); 1214 prepcompile(L, p, 1);
1215 printpatt(p->code, p->codesize); 1215 printpatt(p->code);
1216 return 0; 1216 return 0;
1217} 1217}
1218 1218
@@ -1290,7 +1290,7 @@ static int lp_type (lua_State *L) {
1290 1290
1291int lp_gc (lua_State *L) { 1291int lp_gc (lua_State *L) {
1292 Pattern *p = getpattern(L, 1); 1292 Pattern *p = getpattern(L, 1);
1293 realloccode(L, p, 0); /* delete code block */ 1293 freecode(L, p); /* delete code block */
1294 return 0; 1294 return 0;
1295} 1295}
1296 1296
@@ -1377,7 +1377,6 @@ static struct luaL_Reg metareg[] = {
1377 1377
1378int luaopen_lpeg (lua_State *L); 1378int luaopen_lpeg (lua_State *L);
1379int luaopen_lpeg (lua_State *L) { 1379int luaopen_lpeg (lua_State *L) {
1380printf("%ld\n", sizeof(TTree));
1381 luaL_newmetatable(L, PATTERN_T); 1380 luaL_newmetatable(L, PATTERN_T);
1382 lua_pushnumber(L, MAXBACK); /* initialize maximum backtracking */ 1381 lua_pushnumber(L, MAXBACK); /* initialize maximum backtracking */
1383 lua_setfield(L, LUA_REGISTRYINDEX, MAXSTACKIDX); 1382 lua_setfield(L, LUA_REGISTRYINDEX, MAXSTACKIDX);
diff --git a/lptree.h b/lptree.h
index 7dab362..c788741 100644
--- a/lptree.h
+++ b/lptree.h
@@ -72,7 +72,6 @@ typedef struct TTree {
72*/ 72*/
73typedef struct Pattern { 73typedef struct Pattern {
74 union Instruction *code; 74 union Instruction *code;
75 int codesize;
76 TTree tree[1]; 75 TTree tree[1];
77} Pattern; 76} Pattern;
78 77
diff --git a/lpvm.h b/lpvm.h
index a64d577..684f0c9 100644
--- a/lpvm.h
+++ b/lpvm.h
@@ -43,7 +43,11 @@ typedef enum Opcode {
43} Opcode; 43} Opcode;
44 44
45 45
46 46/*
47** All array of instructions has a 'codesize' as its first element
48** and is referred by a pointer to its second element, which is the
49** first actual opcode.
50*/
47typedef union Instruction { 51typedef union Instruction {
48 struct Inst { 52 struct Inst {
49 byte code; 53 byte code;
@@ -57,6 +61,7 @@ typedef union Instruction {
57 } aux2; 61 } aux2;
58 } i; 62 } i;
59 int offset; 63 int offset;
64 uint codesize;
60 byte buff[1]; 65 byte buff[1];
61} Instruction; 66} Instruction;
62 67
@@ -66,7 +71,6 @@ typedef union Instruction {
66 71
67 72
68int charinset (const Instruction *i, const byte *buff, uint c); 73int charinset (const Instruction *i, const byte *buff, uint c);
69void printpatt (Instruction *p, int n);
70const char *match (lua_State *L, const char *o, const char *s, const char *e, 74const char *match (lua_State *L, const char *o, const char *s, const char *e,
71 Instruction *op, Capture *capture, int ptop); 75 Instruction *op, Capture *capture, int ptop);
72 76