diff options
| author | Roberto Ierusalimschy <roberto@inf.puc-rio.br> | 2023-05-29 09:39:03 -0300 |
|---|---|---|
| committer | Roberto Ierusalimschy <roberto@inf.puc-rio.br> | 2023-05-29 09:39:03 -0300 |
| commit | 44fab2a44d06a956c3121ceba2b39ca7b00dc428 (patch) | |
| tree | 9f492ba2ebaa4fb3cf6554fcb63f3fe026d43a5d | |
| parent | 460a35cbcb33fbc56f5a658b96a793b9bb8963e9 (diff) | |
| download | lpeg-44fab2a44d06a956c3121ceba2b39ca7b00dc428.tar.gz lpeg-44fab2a44d06a956c3121ceba2b39ca7b00dc428.tar.bz2 lpeg-44fab2a44d06a956c3121ceba2b39ca7b00dc428.zip | |
Code size stored in code itself
Most patterns do not have code, as they are not directly used for
a match; they are created only to compose larger patterns. So, we
shouldn't waste space to store the size of their code, as a NULL
pointer already indicates that the size is zero.
| -rw-r--r-- | lpcode.c | 67 | ||||
| -rw-r--r-- | lpcode.h | 4 | ||||
| -rw-r--r-- | lpprint.c | 3 | ||||
| -rw-r--r-- | lpprint.h | 2 | ||||
| -rw-r--r-- | lptree.c | 9 | ||||
| -rw-r--r-- | lptree.h | 1 | ||||
| -rw-r--r-- | lpvm.h | 8 |
7 files changed, 66 insertions, 28 deletions
| @@ -392,29 +392,63 @@ static void codegen (CompileState *compst, TTree *tree, int opt, int tt, | |||
| 392 | const Charset *fl); | 392 | const Charset *fl); |
| 393 | 393 | ||
| 394 | 394 | ||
| 395 | void realloccode (lua_State *L, Pattern *p, int nsize) { | 395 | static void finishrelcode (lua_State *L, Pattern *p, Instruction *block, |
| 396 | int size) { | ||
| 397 | if (block == NULL) | ||
| 398 | luaL_error(L, "not enough memory"); | ||
| 399 | block->codesize = size; | ||
| 400 | p->code = (Instruction *)block + 1; | ||
| 401 | } | ||
| 402 | |||
| 403 | |||
| 404 | /* | ||
| 405 | ** Initialize array 'p->code' | ||
| 406 | */ | ||
| 407 | static void newcode (lua_State *L, Pattern *p, int size) { | ||
| 396 | void *ud; | 408 | void *ud; |
| 409 | Instruction *block; | ||
| 397 | lua_Alloc f = lua_getallocf(L, &ud); | 410 | lua_Alloc f = lua_getallocf(L, &ud); |
| 398 | void *newblock = f(ud, p->code, p->codesize * sizeof(Instruction), | 411 | size++; /* slot for 'codesize' */ |
| 399 | nsize * sizeof(Instruction)); | 412 | block = (Instruction*) f(ud, NULL, 0, size * sizeof(Instruction)); |
| 400 | if (newblock == NULL && nsize > 0) | 413 | finishrelcode(L, p, block, size); |
| 401 | luaL_error(L, "not enough memory"); | 414 | } |
| 402 | p->code = (Instruction *)newblock; | 415 | |
| 403 | p->codesize = nsize; | 416 | |
| 417 | void freecode (lua_State *L, Pattern *p) { | ||
| 418 | if (p->code != NULL) { | ||
| 419 | void *ud; | ||
| 420 | lua_Alloc f = lua_getallocf(L, &ud); | ||
| 421 | uint osize = p->code[-1].codesize; | ||
| 422 | f(ud, p->code - 1, osize * sizeof(Instruction), 0); /* free block */ | ||
| 423 | } | ||
| 424 | } | ||
| 425 | |||
| 426 | |||
| 427 | /* | ||
| 428 | ** Assume that 'nsize' is not zero and that 'p->code' already exists. | ||
| 429 | */ | ||
| 430 | static void realloccode (lua_State *L, Pattern *p, int nsize) { | ||
| 431 | void *ud; | ||
| 432 | lua_Alloc f = lua_getallocf(L, &ud); | ||
| 433 | Instruction *block = p->code - 1; | ||
| 434 | uint osize = block->codesize; | ||
| 435 | nsize++; /* add the 'codesize' slot to size */ | ||
| 436 | block = (Instruction*) f(ud, block, osize * sizeof(Instruction), | ||
| 437 | nsize * sizeof(Instruction)); | ||
| 438 | finishrelcode(L, p, block, nsize); | ||
| 404 | } | 439 | } |
| 405 | 440 | ||
| 406 | 441 | ||
| 407 | /* | 442 | /* |
| 408 | ** Add space for 'n' more instructions and return the index of | 443 | ** Add space for an instruction with 'n' slots and return its index. |
| 409 | ** the first one. | ||
| 410 | */ | 444 | */ |
| 411 | static int nextinstruction (CompileState *compst, int n) { | 445 | static int nextinstruction (CompileState *compst, int n) { |
| 412 | int size = compst->p->codesize; | 446 | int size = compst->p->code[-1].codesize - 1; |
| 413 | int ncode = compst->ncode; | 447 | int ncode = compst->ncode; |
| 414 | if (ncode >= size - n) { | 448 | if (ncode > size - n) { |
| 415 | uint nsize = size + (size >> 1) + n; | 449 | uint nsize = size + (size >> 1) + n; |
| 416 | if (nsize > INT_MAX) | 450 | if (nsize >= INT_MAX) |
| 417 | luaL_error(compst->L, "code too large"); | 451 | luaL_error(compst->L, "pattern code too large"); |
| 418 | realloccode(compst->L, compst->p, nsize); | 452 | realloccode(compst->L, compst->p, nsize); |
| 419 | } | 453 | } |
| 420 | compst->ncode = ncode + n; | 454 | compst->ncode = ncode + n; |
| @@ -998,12 +1032,13 @@ static void peephole (CompileState *compst) { | |||
| 998 | 1032 | ||
| 999 | 1033 | ||
| 1000 | /* | 1034 | /* |
| 1001 | ** Compile a pattern | 1035 | ** Compile a pattern. 'size' is the size of the pattern's tree, |
| 1036 | ** which gives a hint for the size of the final code. | ||
| 1002 | */ | 1037 | */ |
| 1003 | Instruction *compile (lua_State *L, Pattern *p) { | 1038 | Instruction *compile (lua_State *L, Pattern *p, uint size) { |
| 1004 | CompileState compst; | 1039 | CompileState compst; |
| 1005 | compst.p = p; compst.ncode = 0; compst.L = L; | 1040 | compst.p = p; compst.ncode = 0; compst.L = L; |
| 1006 | realloccode(L, p, 4); /* minimum initial size */ | 1041 | newcode(L, p, size/2u + 2); /* set initial size */ |
| 1007 | codegen(&compst, p->tree, 0, NOINST, fullset); | 1042 | codegen(&compst, p->tree, 0, NOINST, fullset); |
| 1008 | addinstruction(&compst, IEnd, 0); | 1043 | addinstruction(&compst, IEnd, 0); |
| 1009 | realloccode(L, p, compst.ncode); /* set final size */ | 1044 | realloccode(L, p, compst.ncode); /* set final size */ |
| @@ -12,8 +12,8 @@ int checkaux (TTree *tree, int pred); | |||
| 12 | int fixedlen (TTree *tree); | 12 | int fixedlen (TTree *tree); |
| 13 | int hascaptures (TTree *tree); | 13 | int hascaptures (TTree *tree); |
| 14 | int lp_gc (lua_State *L); | 14 | int lp_gc (lua_State *L); |
| 15 | Instruction *compile (lua_State *L, Pattern *p); | 15 | Instruction *compile (lua_State *L, Pattern *p, uint size); |
| 16 | void realloccode (lua_State *L, Pattern *p, int nsize); | 16 | void freecode (lua_State *L, Pattern *p); |
| 17 | int sizei (const Instruction *i); | 17 | int sizei (const Instruction *i); |
| 18 | 18 | ||
| 19 | 19 | ||
| @@ -137,8 +137,9 @@ void printinst (const Instruction *op, const Instruction *p) { | |||
| 137 | } | 137 | } |
| 138 | 138 | ||
| 139 | 139 | ||
| 140 | void printpatt (Instruction *p, int n) { | 140 | void printpatt (Instruction *p) { |
| 141 | Instruction *op = p; | 141 | Instruction *op = p; |
| 142 | uint n = op[-1].codesize - 1; | ||
| 142 | while (p < op + n) { | 143 | while (p < op + n) { |
| 143 | printinst(op, p); | 144 | printinst(op, p); |
| 144 | p += sizei(p); | 145 | p += sizei(p); |
| @@ -9,7 +9,7 @@ | |||
| 9 | 9 | ||
| 10 | #if defined(LPEG_DEBUG) | 10 | #if defined(LPEG_DEBUG) |
| 11 | 11 | ||
| 12 | void printpatt (Instruction *p, int n); | 12 | void printpatt (Instruction *p); |
| 13 | void printtree (TTree *tree, int ident); | 13 | void printtree (TTree *tree, int ident); |
| 14 | void printktable (lua_State *L, int idx); | 14 | void printktable (lua_State *L, int idx); |
| 15 | void printcharset (const byte *st); | 15 | void printcharset (const byte *st); |
| @@ -359,7 +359,7 @@ static TTree *newtree (lua_State *L, int len) { | |||
| 359 | lua_pushvalue(L, -1); | 359 | lua_pushvalue(L, -1); |
| 360 | lua_setuservalue(L, -3); | 360 | lua_setuservalue(L, -3); |
| 361 | lua_setmetatable(L, -2); | 361 | lua_setmetatable(L, -2); |
| 362 | p->code = NULL; p->codesize = 0; | 362 | p->code = NULL; |
| 363 | return p->tree; | 363 | return p->tree; |
| 364 | } | 364 | } |
| 365 | 365 | ||
| @@ -1189,7 +1189,7 @@ static Instruction *prepcompile (lua_State *L, Pattern *p, int idx) { | |||
| 1189 | lua_getuservalue(L, idx); /* push 'ktable' (may be used by 'finalfix') */ | 1189 | lua_getuservalue(L, idx); /* push 'ktable' (may be used by 'finalfix') */ |
| 1190 | finalfix(L, 0, NULL, p->tree); | 1190 | finalfix(L, 0, NULL, p->tree); |
| 1191 | lua_pop(L, 1); /* remove 'ktable' */ | 1191 | lua_pop(L, 1); /* remove 'ktable' */ |
| 1192 | return compile(L, p); | 1192 | return compile(L, p, getsize(L, idx)); |
| 1193 | } | 1193 | } |
| 1194 | 1194 | ||
| 1195 | 1195 | ||
| @@ -1212,7 +1212,7 @@ static int lp_printcode (lua_State *L) { | |||
| 1212 | printktable(L, 1); | 1212 | printktable(L, 1); |
| 1213 | if (p->code == NULL) /* not compiled yet? */ | 1213 | if (p->code == NULL) /* not compiled yet? */ |
| 1214 | prepcompile(L, p, 1); | 1214 | prepcompile(L, p, 1); |
| 1215 | printpatt(p->code, p->codesize); | 1215 | printpatt(p->code); |
| 1216 | return 0; | 1216 | return 0; |
| 1217 | } | 1217 | } |
| 1218 | 1218 | ||
| @@ -1290,7 +1290,7 @@ static int lp_type (lua_State *L) { | |||
| 1290 | 1290 | ||
| 1291 | int lp_gc (lua_State *L) { | 1291 | int lp_gc (lua_State *L) { |
| 1292 | Pattern *p = getpattern(L, 1); | 1292 | Pattern *p = getpattern(L, 1); |
| 1293 | realloccode(L, p, 0); /* delete code block */ | 1293 | freecode(L, p); /* delete code block */ |
| 1294 | return 0; | 1294 | return 0; |
| 1295 | } | 1295 | } |
| 1296 | 1296 | ||
| @@ -1377,7 +1377,6 @@ static struct luaL_Reg metareg[] = { | |||
| 1377 | 1377 | ||
| 1378 | int luaopen_lpeg (lua_State *L); | 1378 | int luaopen_lpeg (lua_State *L); |
| 1379 | int luaopen_lpeg (lua_State *L) { | 1379 | int luaopen_lpeg (lua_State *L) { |
| 1380 | printf("%ld\n", sizeof(TTree)); | ||
| 1381 | luaL_newmetatable(L, PATTERN_T); | 1380 | luaL_newmetatable(L, PATTERN_T); |
| 1382 | lua_pushnumber(L, MAXBACK); /* initialize maximum backtracking */ | 1381 | lua_pushnumber(L, MAXBACK); /* initialize maximum backtracking */ |
| 1383 | lua_setfield(L, LUA_REGISTRYINDEX, MAXSTACKIDX); | 1382 | lua_setfield(L, LUA_REGISTRYINDEX, MAXSTACKIDX); |
| @@ -72,7 +72,6 @@ typedef struct TTree { | |||
| 72 | */ | 72 | */ |
| 73 | typedef struct Pattern { | 73 | typedef struct Pattern { |
| 74 | union Instruction *code; | 74 | union Instruction *code; |
| 75 | int codesize; | ||
| 76 | TTree tree[1]; | 75 | TTree tree[1]; |
| 77 | } Pattern; | 76 | } Pattern; |
| 78 | 77 | ||
| @@ -43,7 +43,11 @@ typedef enum Opcode { | |||
| 43 | } Opcode; | 43 | } Opcode; |
| 44 | 44 | ||
| 45 | 45 | ||
| 46 | 46 | /* | |
| 47 | ** All array of instructions has a 'codesize' as its first element | ||
| 48 | ** and is referred by a pointer to its second element, which is the | ||
| 49 | ** first actual opcode. | ||
| 50 | */ | ||
| 47 | typedef union Instruction { | 51 | typedef union Instruction { |
| 48 | struct Inst { | 52 | struct Inst { |
| 49 | byte code; | 53 | byte code; |
| @@ -57,6 +61,7 @@ typedef union Instruction { | |||
| 57 | } aux2; | 61 | } aux2; |
| 58 | } i; | 62 | } i; |
| 59 | int offset; | 63 | int offset; |
| 64 | uint codesize; | ||
| 60 | byte buff[1]; | 65 | byte buff[1]; |
| 61 | } Instruction; | 66 | } Instruction; |
| 62 | 67 | ||
| @@ -66,7 +71,6 @@ typedef union Instruction { | |||
| 66 | 71 | ||
| 67 | 72 | ||
| 68 | int charinset (const Instruction *i, const byte *buff, uint c); | 73 | int charinset (const Instruction *i, const byte *buff, uint c); |
| 69 | void printpatt (Instruction *p, int n); | ||
| 70 | const char *match (lua_State *L, const char *o, const char *s, const char *e, | 74 | const char *match (lua_State *L, const char *o, const char *s, const char *e, |
| 71 | Instruction *op, Capture *capture, int ptop); | 75 | Instruction *op, Capture *capture, int ptop); |
| 72 | 76 | ||
