diff options
author | Roberto Ierusalimschy <roberto@inf.puc-rio.br> | 2023-04-27 15:22:06 -0300 |
---|---|---|
committer | Roberto Ierusalimschy <roberto@inf.puc-rio.br> | 2023-04-27 15:22:06 -0300 |
commit | 503126fec29117e31d633b81203f887b99040c4a (patch) | |
tree | a5b287e10955c12e82b0dfb2a81fdc579e3f0b36 | |
parent | 97a4ca3b4078f581cdc8cebc4fa4cf39d5ff8125 (diff) | |
download | lpeg-503126fec29117e31d633b81203f887b99040c4a.tar.gz lpeg-503126fec29117e31d633b81203f887b99040c4a.tar.bz2 lpeg-503126fec29117e31d633b81203f887b99040c4a.zip |
Small optimization in size of charset trees
Got a byte that was wasted for padding to be used in the bitmap.
-rw-r--r-- | lptree.c | 54 | ||||
-rw-r--r-- | lptree.h | 5 | ||||
-rw-r--r-- | lptypes.h | 5 |
3 files changed, 35 insertions, 29 deletions
@@ -335,7 +335,7 @@ static Pattern *getpattern (lua_State *L, int idx) { | |||
335 | 335 | ||
336 | 336 | ||
337 | static int getsize (lua_State *L, int idx) { | 337 | static int getsize (lua_State *L, int idx) { |
338 | return (lua_rawlen(L, idx) - sizeof(Pattern)) / sizeof(TTree) + 1; | 338 | return (lua_rawlen(L, idx) - offsetof(Pattern, tree)) / sizeof(TTree); |
339 | } | 339 | } |
340 | 340 | ||
341 | 341 | ||
@@ -348,12 +348,12 @@ static TTree *gettree (lua_State *L, int idx, int *len) { | |||
348 | 348 | ||
349 | 349 | ||
350 | /* | 350 | /* |
351 | ** create a pattern. Set its uservalue (the 'ktable') equal to its | 351 | ** create a pattern followed by a tree with 'len' nodes. Set its |
352 | ** metatable. (It could be any empty sequence; the metatable is at | 352 | ** uservalue (the 'ktable') equal to its metatable. (It could be any |
353 | ** hand here, so we use it.) | 353 | ** empty sequence; the metatable is at hand here, so we use it.) |
354 | */ | 354 | */ |
355 | static TTree *newtree (lua_State *L, int len) { | 355 | static TTree *newtree (lua_State *L, int len) { |
356 | size_t size = (len - 1) * sizeof(TTree) + sizeof(Pattern); | 356 | size_t size = offsetof(Pattern, tree) + len * sizeof(TTree); |
357 | Pattern *p = (Pattern *)lua_newuserdata(L, size); | 357 | Pattern *p = (Pattern *)lua_newuserdata(L, size); |
358 | luaL_getmetatable(L, PATTERN_T); | 358 | luaL_getmetatable(L, PATTERN_T); |
359 | lua_pushvalue(L, -1); | 359 | lua_pushvalue(L, -1); |
@@ -371,40 +371,44 @@ static TTree *newleaf (lua_State *L, int tag) { | |||
371 | } | 371 | } |
372 | 372 | ||
373 | 373 | ||
374 | static TTree *newccharset (lua_State *L, byte *cs, charsetinfo *info) { | 374 | /* |
375 | int i; | 375 | ** Create a tree for a charset, optimizing for special cases: empty set, |
376 | TTree *tree = newtree(L, bytes2slots(info->size) + 1); | 376 | ** full set, and singleton set. |
377 | tree->tag = TSet; | 377 | */ |
378 | tree->u.set.offset = info->offset; | ||
379 | tree->u.set.size = info->size; | ||
380 | tree->u.set.deflt = info->deflt; | ||
381 | for (i = 0; i < info->size; i++) | ||
382 | treebuffer(tree)[i] = cs[info->offset + i]; | ||
383 | return tree; | ||
384 | } | ||
385 | |||
386 | |||
387 | static TTree *newcharset (lua_State *L, byte *cs) { | 378 | static TTree *newcharset (lua_State *L, byte *cs) { |
388 | charsetinfo info; | 379 | charsetinfo info; |
389 | Opcode op = charsettype(cs, &info); | 380 | Opcode op = charsettype(cs, &info); |
390 | switch (op) { | 381 | switch (op) { |
391 | case IFail: return newleaf(L, TFalse); | 382 | case IFail: return newleaf(L, TFalse); /* empty set */ |
392 | case IAny: return newleaf(L, TAny); | 383 | case IAny: return newleaf(L, TAny); /* full set */ |
393 | case IChar: { | 384 | case IChar: { /* singleton set */ |
394 | TTree *tree =newleaf(L, TChar); | 385 | TTree *tree =newleaf(L, TChar); |
395 | tree->u.n = info.offset; | 386 | tree->u.n = info.offset; |
396 | return tree; | 387 | return tree; |
397 | } | 388 | } |
398 | default: | 389 | default: { /* regular set */ |
390 | int i; | ||
391 | int bsize = /* tree size in bytes */ | ||
392 | (int)offsetof(TTree, u.set.bitmap) + info.size; | ||
393 | TTree *tree = newtree(L, bytes2slots(bsize)); | ||
399 | assert(op == ISet); | 394 | assert(op == ISet); |
400 | return newccharset(L, cs, &info); | 395 | tree->tag = TSet; |
396 | tree->u.set.offset = info.offset; | ||
397 | tree->u.set.size = info.size; | ||
398 | tree->u.set.deflt = info.deflt; | ||
399 | for (i = 0; i < info.size; i++) { | ||
400 | assert(&treebuffer(tree)[i] < (byte*)tree + bsize); | ||
401 | treebuffer(tree)[i] = cs[info.offset + i]; | ||
402 | } | ||
403 | return tree; | ||
404 | } | ||
401 | } | 405 | } |
402 | } | 406 | } |
403 | 407 | ||
404 | 408 | ||
405 | /* | 409 | /* |
406 | ** add to tree a sequence where first sibling is 'sib' (with size | 410 | ** Add to tree a sequence where first sibling is 'sib' (with size |
407 | ** 'sibsize'); returns position for second sibling | 411 | ** 'sibsize'); return position for second sibling. |
408 | */ | 412 | */ |
409 | static TTree *seqaux (TTree *tree, TTree *sib, int sibsize) { | 413 | static TTree *seqaux (TTree *tree, TTree *sib, int sibsize) { |
410 | tree->tag = TSeq; tree->u.ps = sibsize + 1; | 414 | tree->tag = TSeq; tree->u.ps = sibsize + 1; |
@@ -56,11 +56,16 @@ typedef struct TTree { | |||
56 | byte offset; /* compact set offset (in bytes) */ | 56 | byte offset; /* compact set offset (in bytes) */ |
57 | byte size; /* compact set size (in bytes) */ | 57 | byte size; /* compact set size (in bytes) */ |
58 | byte deflt; /* default value */ | 58 | byte deflt; /* default value */ |
59 | byte bitmap[1]; /* bitmap (open array) */ | ||
59 | } set; /* for compact sets */ | 60 | } set; /* for compact sets */ |
60 | } u; | 61 | } u; |
61 | } TTree; | 62 | } TTree; |
62 | 63 | ||
63 | 64 | ||
65 | /* access to charset */ | ||
66 | #define treebuffer(t) ((t)->u.set.bitmap) | ||
67 | |||
68 | |||
64 | /* | 69 | /* |
65 | ** A complete pattern has its tree plus, if already compiled, | 70 | ** A complete pattern has its tree plus, if already compiled, |
66 | ** its corresponding code | 71 | ** its corresponding code |
@@ -101,11 +101,8 @@ typedef struct Charset { | |||
101 | #define fillset(s,c) memset(s,c,CHARSETSIZE) | 101 | #define fillset(s,c) memset(s,c,CHARSETSIZE) |
102 | #define clearset(s) fillset(s,0) | 102 | #define clearset(s) fillset(s,0) |
103 | 103 | ||
104 | /* access to charset */ | ||
105 | #define treebuffer(t) ((byte *)((t) + 1)) | ||
106 | |||
107 | /* number of slots needed for 'n' bytes */ | 104 | /* number of slots needed for 'n' bytes */ |
108 | #define bytes2slots(n) (((n) - 1) / sizeof(TTree) + 1) | 105 | #define bytes2slots(n) (((n) - 1u) / (unsigned int)sizeof(TTree) + 1u) |
109 | 106 | ||
110 | /* set 'b' bit in charset 'cs' */ | 107 | /* set 'b' bit in charset 'cs' */ |
111 | #define setchar(cs,b) ((cs)[(b) >> 3] |= (1 << ((b) & 7))) | 108 | #define setchar(cs,b) ((cs)[(b) >> 3] |= (1 << ((b) & 7))) |