diff options
author | Roberto Ierusalimschy <roberto@inf.puc-rio.br> | 2023-05-29 09:39:03 -0300 |
---|---|---|
committer | Roberto Ierusalimschy <roberto@inf.puc-rio.br> | 2023-05-29 09:39:03 -0300 |
commit | 44fab2a44d06a956c3121ceba2b39ca7b00dc428 (patch) | |
tree | 9f492ba2ebaa4fb3cf6554fcb63f3fe026d43a5d | |
parent | 460a35cbcb33fbc56f5a658b96a793b9bb8963e9 (diff) | |
download | lpeg-44fab2a44d06a956c3121ceba2b39ca7b00dc428.tar.gz lpeg-44fab2a44d06a956c3121ceba2b39ca7b00dc428.tar.bz2 lpeg-44fab2a44d06a956c3121ceba2b39ca7b00dc428.zip |
Code size stored in code itself
Most patterns do not have code, as they are not directly used for
a match; they are created only to compose larger patterns. So, we
shouldn't waste space to store the size of their code, as a NULL
pointer already indicates that the size is zero.
-rw-r--r-- | lpcode.c | 67 | ||||
-rw-r--r-- | lpcode.h | 4 | ||||
-rw-r--r-- | lpprint.c | 3 | ||||
-rw-r--r-- | lpprint.h | 2 | ||||
-rw-r--r-- | lptree.c | 9 | ||||
-rw-r--r-- | lptree.h | 1 | ||||
-rw-r--r-- | lpvm.h | 8 |
7 files changed, 66 insertions, 28 deletions
@@ -392,29 +392,63 @@ static void codegen (CompileState *compst, TTree *tree, int opt, int tt, | |||
392 | const Charset *fl); | 392 | const Charset *fl); |
393 | 393 | ||
394 | 394 | ||
395 | void realloccode (lua_State *L, Pattern *p, int nsize) { | 395 | static void finishrelcode (lua_State *L, Pattern *p, Instruction *block, |
396 | int size) { | ||
397 | if (block == NULL) | ||
398 | luaL_error(L, "not enough memory"); | ||
399 | block->codesize = size; | ||
400 | p->code = (Instruction *)block + 1; | ||
401 | } | ||
402 | |||
403 | |||
404 | /* | ||
405 | ** Initialize array 'p->code' | ||
406 | */ | ||
407 | static void newcode (lua_State *L, Pattern *p, int size) { | ||
396 | void *ud; | 408 | void *ud; |
409 | Instruction *block; | ||
397 | lua_Alloc f = lua_getallocf(L, &ud); | 410 | lua_Alloc f = lua_getallocf(L, &ud); |
398 | void *newblock = f(ud, p->code, p->codesize * sizeof(Instruction), | 411 | size++; /* slot for 'codesize' */ |
399 | nsize * sizeof(Instruction)); | 412 | block = (Instruction*) f(ud, NULL, 0, size * sizeof(Instruction)); |
400 | if (newblock == NULL && nsize > 0) | 413 | finishrelcode(L, p, block, size); |
401 | luaL_error(L, "not enough memory"); | 414 | } |
402 | p->code = (Instruction *)newblock; | 415 | |
403 | p->codesize = nsize; | 416 | |
417 | void freecode (lua_State *L, Pattern *p) { | ||
418 | if (p->code != NULL) { | ||
419 | void *ud; | ||
420 | lua_Alloc f = lua_getallocf(L, &ud); | ||
421 | uint osize = p->code[-1].codesize; | ||
422 | f(ud, p->code - 1, osize * sizeof(Instruction), 0); /* free block */ | ||
423 | } | ||
424 | } | ||
425 | |||
426 | |||
427 | /* | ||
428 | ** Assume that 'nsize' is not zero and that 'p->code' already exists. | ||
429 | */ | ||
430 | static void realloccode (lua_State *L, Pattern *p, int nsize) { | ||
431 | void *ud; | ||
432 | lua_Alloc f = lua_getallocf(L, &ud); | ||
433 | Instruction *block = p->code - 1; | ||
434 | uint osize = block->codesize; | ||
435 | nsize++; /* add the 'codesize' slot to size */ | ||
436 | block = (Instruction*) f(ud, block, osize * sizeof(Instruction), | ||
437 | nsize * sizeof(Instruction)); | ||
438 | finishrelcode(L, p, block, nsize); | ||
404 | } | 439 | } |
405 | 440 | ||
406 | 441 | ||
407 | /* | 442 | /* |
408 | ** Add space for 'n' more instructions and return the index of | 443 | ** Add space for an instruction with 'n' slots and return its index. |
409 | ** the first one. | ||
410 | */ | 444 | */ |
411 | static int nextinstruction (CompileState *compst, int n) { | 445 | static int nextinstruction (CompileState *compst, int n) { |
412 | int size = compst->p->codesize; | 446 | int size = compst->p->code[-1].codesize - 1; |
413 | int ncode = compst->ncode; | 447 | int ncode = compst->ncode; |
414 | if (ncode >= size - n) { | 448 | if (ncode > size - n) { |
415 | uint nsize = size + (size >> 1) + n; | 449 | uint nsize = size + (size >> 1) + n; |
416 | if (nsize > INT_MAX) | 450 | if (nsize >= INT_MAX) |
417 | luaL_error(compst->L, "code too large"); | 451 | luaL_error(compst->L, "pattern code too large"); |
418 | realloccode(compst->L, compst->p, nsize); | 452 | realloccode(compst->L, compst->p, nsize); |
419 | } | 453 | } |
420 | compst->ncode = ncode + n; | 454 | compst->ncode = ncode + n; |
@@ -998,12 +1032,13 @@ static void peephole (CompileState *compst) { | |||
998 | 1032 | ||
999 | 1033 | ||
1000 | /* | 1034 | /* |
1001 | ** Compile a pattern | 1035 | ** Compile a pattern. 'size' is the size of the pattern's tree, |
1036 | ** which gives a hint for the size of the final code. | ||
1002 | */ | 1037 | */ |
1003 | Instruction *compile (lua_State *L, Pattern *p) { | 1038 | Instruction *compile (lua_State *L, Pattern *p, uint size) { |
1004 | CompileState compst; | 1039 | CompileState compst; |
1005 | compst.p = p; compst.ncode = 0; compst.L = L; | 1040 | compst.p = p; compst.ncode = 0; compst.L = L; |
1006 | realloccode(L, p, 4); /* minimum initial size */ | 1041 | newcode(L, p, size/2u + 2); /* set initial size */ |
1007 | codegen(&compst, p->tree, 0, NOINST, fullset); | 1042 | codegen(&compst, p->tree, 0, NOINST, fullset); |
1008 | addinstruction(&compst, IEnd, 0); | 1043 | addinstruction(&compst, IEnd, 0); |
1009 | realloccode(L, p, compst.ncode); /* set final size */ | 1044 | realloccode(L, p, compst.ncode); /* set final size */ |
@@ -12,8 +12,8 @@ int checkaux (TTree *tree, int pred); | |||
12 | int fixedlen (TTree *tree); | 12 | int fixedlen (TTree *tree); |
13 | int hascaptures (TTree *tree); | 13 | int hascaptures (TTree *tree); |
14 | int lp_gc (lua_State *L); | 14 | int lp_gc (lua_State *L); |
15 | Instruction *compile (lua_State *L, Pattern *p); | 15 | Instruction *compile (lua_State *L, Pattern *p, uint size); |
16 | void realloccode (lua_State *L, Pattern *p, int nsize); | 16 | void freecode (lua_State *L, Pattern *p); |
17 | int sizei (const Instruction *i); | 17 | int sizei (const Instruction *i); |
18 | 18 | ||
19 | 19 | ||
@@ -137,8 +137,9 @@ void printinst (const Instruction *op, const Instruction *p) { | |||
137 | } | 137 | } |
138 | 138 | ||
139 | 139 | ||
140 | void printpatt (Instruction *p, int n) { | 140 | void printpatt (Instruction *p) { |
141 | Instruction *op = p; | 141 | Instruction *op = p; |
142 | uint n = op[-1].codesize - 1; | ||
142 | while (p < op + n) { | 143 | while (p < op + n) { |
143 | printinst(op, p); | 144 | printinst(op, p); |
144 | p += sizei(p); | 145 | p += sizei(p); |
@@ -9,7 +9,7 @@ | |||
9 | 9 | ||
10 | #if defined(LPEG_DEBUG) | 10 | #if defined(LPEG_DEBUG) |
11 | 11 | ||
12 | void printpatt (Instruction *p, int n); | 12 | void printpatt (Instruction *p); |
13 | void printtree (TTree *tree, int ident); | 13 | void printtree (TTree *tree, int ident); |
14 | void printktable (lua_State *L, int idx); | 14 | void printktable (lua_State *L, int idx); |
15 | void printcharset (const byte *st); | 15 | void printcharset (const byte *st); |
@@ -359,7 +359,7 @@ static TTree *newtree (lua_State *L, int len) { | |||
359 | lua_pushvalue(L, -1); | 359 | lua_pushvalue(L, -1); |
360 | lua_setuservalue(L, -3); | 360 | lua_setuservalue(L, -3); |
361 | lua_setmetatable(L, -2); | 361 | lua_setmetatable(L, -2); |
362 | p->code = NULL; p->codesize = 0; | 362 | p->code = NULL; |
363 | return p->tree; | 363 | return p->tree; |
364 | } | 364 | } |
365 | 365 | ||
@@ -1189,7 +1189,7 @@ static Instruction *prepcompile (lua_State *L, Pattern *p, int idx) { | |||
1189 | lua_getuservalue(L, idx); /* push 'ktable' (may be used by 'finalfix') */ | 1189 | lua_getuservalue(L, idx); /* push 'ktable' (may be used by 'finalfix') */ |
1190 | finalfix(L, 0, NULL, p->tree); | 1190 | finalfix(L, 0, NULL, p->tree); |
1191 | lua_pop(L, 1); /* remove 'ktable' */ | 1191 | lua_pop(L, 1); /* remove 'ktable' */ |
1192 | return compile(L, p); | 1192 | return compile(L, p, getsize(L, idx)); |
1193 | } | 1193 | } |
1194 | 1194 | ||
1195 | 1195 | ||
@@ -1212,7 +1212,7 @@ static int lp_printcode (lua_State *L) { | |||
1212 | printktable(L, 1); | 1212 | printktable(L, 1); |
1213 | if (p->code == NULL) /* not compiled yet? */ | 1213 | if (p->code == NULL) /* not compiled yet? */ |
1214 | prepcompile(L, p, 1); | 1214 | prepcompile(L, p, 1); |
1215 | printpatt(p->code, p->codesize); | 1215 | printpatt(p->code); |
1216 | return 0; | 1216 | return 0; |
1217 | } | 1217 | } |
1218 | 1218 | ||
@@ -1290,7 +1290,7 @@ static int lp_type (lua_State *L) { | |||
1290 | 1290 | ||
1291 | int lp_gc (lua_State *L) { | 1291 | int lp_gc (lua_State *L) { |
1292 | Pattern *p = getpattern(L, 1); | 1292 | Pattern *p = getpattern(L, 1); |
1293 | realloccode(L, p, 0); /* delete code block */ | 1293 | freecode(L, p); /* delete code block */ |
1294 | return 0; | 1294 | return 0; |
1295 | } | 1295 | } |
1296 | 1296 | ||
@@ -1377,7 +1377,6 @@ static struct luaL_Reg metareg[] = { | |||
1377 | 1377 | ||
1378 | int luaopen_lpeg (lua_State *L); | 1378 | int luaopen_lpeg (lua_State *L); |
1379 | int luaopen_lpeg (lua_State *L) { | 1379 | int luaopen_lpeg (lua_State *L) { |
1380 | printf("%ld\n", sizeof(TTree)); | ||
1381 | luaL_newmetatable(L, PATTERN_T); | 1380 | luaL_newmetatable(L, PATTERN_T); |
1382 | lua_pushnumber(L, MAXBACK); /* initialize maximum backtracking */ | 1381 | lua_pushnumber(L, MAXBACK); /* initialize maximum backtracking */ |
1383 | lua_setfield(L, LUA_REGISTRYINDEX, MAXSTACKIDX); | 1382 | lua_setfield(L, LUA_REGISTRYINDEX, MAXSTACKIDX); |
@@ -72,7 +72,6 @@ typedef struct TTree { | |||
72 | */ | 72 | */ |
73 | typedef struct Pattern { | 73 | typedef struct Pattern { |
74 | union Instruction *code; | 74 | union Instruction *code; |
75 | int codesize; | ||
76 | TTree tree[1]; | 75 | TTree tree[1]; |
77 | } Pattern; | 76 | } Pattern; |
78 | 77 | ||
@@ -43,7 +43,11 @@ typedef enum Opcode { | |||
43 | } Opcode; | 43 | } Opcode; |
44 | 44 | ||
45 | 45 | ||
46 | 46 | /* | |
47 | ** All array of instructions has a 'codesize' as its first element | ||
48 | ** and is referred by a pointer to its second element, which is the | ||
49 | ** first actual opcode. | ||
50 | */ | ||
47 | typedef union Instruction { | 51 | typedef union Instruction { |
48 | struct Inst { | 52 | struct Inst { |
49 | byte code; | 53 | byte code; |
@@ -57,6 +61,7 @@ typedef union Instruction { | |||
57 | } aux2; | 61 | } aux2; |
58 | } i; | 62 | } i; |
59 | int offset; | 63 | int offset; |
64 | uint codesize; | ||
60 | byte buff[1]; | 65 | byte buff[1]; |
61 | } Instruction; | 66 | } Instruction; |
62 | 67 | ||
@@ -66,7 +71,6 @@ typedef union Instruction { | |||
66 | 71 | ||
67 | 72 | ||
68 | int charinset (const Instruction *i, const byte *buff, uint c); | 73 | int charinset (const Instruction *i, const byte *buff, uint c); |
69 | void printpatt (Instruction *p, int n); | ||
70 | const char *match (lua_State *L, const char *o, const char *s, const char *e, | 74 | const char *match (lua_State *L, const char *o, const char *s, const char *e, |
71 | Instruction *op, Capture *capture, int ptop); | 75 | Instruction *op, Capture *capture, int ptop); |
72 | 76 | ||