aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRoberto Ierusalimschy <roberto@inf.puc-rio.br>2023-04-27 15:22:06 -0300
committerRoberto Ierusalimschy <roberto@inf.puc-rio.br>2023-04-27 15:22:06 -0300
commit503126fec29117e31d633b81203f887b99040c4a (patch)
treea5b287e10955c12e82b0dfb2a81fdc579e3f0b36
parent97a4ca3b4078f581cdc8cebc4fa4cf39d5ff8125 (diff)
downloadlpeg-503126fec29117e31d633b81203f887b99040c4a.tar.gz
lpeg-503126fec29117e31d633b81203f887b99040c4a.tar.bz2
lpeg-503126fec29117e31d633b81203f887b99040c4a.zip
Small optimization in size of charset trees
Got a byte that was wasted for padding to be used in the bitmap.
-rw-r--r--lptree.c54
-rw-r--r--lptree.h5
-rw-r--r--lptypes.h5
3 files changed, 35 insertions, 29 deletions
diff --git a/lptree.c b/lptree.c
index c61a5db..ff5cb9d 100644
--- a/lptree.c
+++ b/lptree.c
@@ -335,7 +335,7 @@ static Pattern *getpattern (lua_State *L, int idx) {
335 335
336 336
337static int getsize (lua_State *L, int idx) { 337static int getsize (lua_State *L, int idx) {
338 return (lua_rawlen(L, idx) - sizeof(Pattern)) / sizeof(TTree) + 1; 338 return (lua_rawlen(L, idx) - offsetof(Pattern, tree)) / sizeof(TTree);
339} 339}
340 340
341 341
@@ -348,12 +348,12 @@ static TTree *gettree (lua_State *L, int idx, int *len) {
348 348
349 349
350/* 350/*
351** create a pattern. Set its uservalue (the 'ktable') equal to its 351** create a pattern followed by a tree with 'len' nodes. Set its
352** metatable. (It could be any empty sequence; the metatable is at 352** uservalue (the 'ktable') equal to its metatable. (It could be any
353** hand here, so we use it.) 353** empty sequence; the metatable is at hand here, so we use it.)
354*/ 354*/
355static TTree *newtree (lua_State *L, int len) { 355static TTree *newtree (lua_State *L, int len) {
356 size_t size = (len - 1) * sizeof(TTree) + sizeof(Pattern); 356 size_t size = offsetof(Pattern, tree) + len * sizeof(TTree);
357 Pattern *p = (Pattern *)lua_newuserdata(L, size); 357 Pattern *p = (Pattern *)lua_newuserdata(L, size);
358 luaL_getmetatable(L, PATTERN_T); 358 luaL_getmetatable(L, PATTERN_T);
359 lua_pushvalue(L, -1); 359 lua_pushvalue(L, -1);
@@ -371,40 +371,44 @@ static TTree *newleaf (lua_State *L, int tag) {
371} 371}
372 372
373 373
374static TTree *newccharset (lua_State *L, byte *cs, charsetinfo *info) { 374/*
375 int i; 375** Create a tree for a charset, optimizing for special cases: empty set,
376 TTree *tree = newtree(L, bytes2slots(info->size) + 1); 376** full set, and singleton set.
377 tree->tag = TSet; 377*/
378 tree->u.set.offset = info->offset;
379 tree->u.set.size = info->size;
380 tree->u.set.deflt = info->deflt;
381 for (i = 0; i < info->size; i++)
382 treebuffer(tree)[i] = cs[info->offset + i];
383 return tree;
384}
385
386
387static TTree *newcharset (lua_State *L, byte *cs) { 378static TTree *newcharset (lua_State *L, byte *cs) {
388 charsetinfo info; 379 charsetinfo info;
389 Opcode op = charsettype(cs, &info); 380 Opcode op = charsettype(cs, &info);
390 switch (op) { 381 switch (op) {
391 case IFail: return newleaf(L, TFalse); 382 case IFail: return newleaf(L, TFalse); /* empty set */
392 case IAny: return newleaf(L, TAny); 383 case IAny: return newleaf(L, TAny); /* full set */
393 case IChar: { 384 case IChar: { /* singleton set */
394 TTree *tree =newleaf(L, TChar); 385 TTree *tree =newleaf(L, TChar);
395 tree->u.n = info.offset; 386 tree->u.n = info.offset;
396 return tree; 387 return tree;
397 } 388 }
398 default: 389 default: { /* regular set */
390 int i;
391 int bsize = /* tree size in bytes */
392 (int)offsetof(TTree, u.set.bitmap) + info.size;
393 TTree *tree = newtree(L, bytes2slots(bsize));
399 assert(op == ISet); 394 assert(op == ISet);
400 return newccharset(L, cs, &info); 395 tree->tag = TSet;
396 tree->u.set.offset = info.offset;
397 tree->u.set.size = info.size;
398 tree->u.set.deflt = info.deflt;
399 for (i = 0; i < info.size; i++) {
400 assert(&treebuffer(tree)[i] < (byte*)tree + bsize);
401 treebuffer(tree)[i] = cs[info.offset + i];
402 }
403 return tree;
404 }
401 } 405 }
402} 406}
403 407
404 408
405/* 409/*
406** add to tree a sequence where first sibling is 'sib' (with size 410** Add to tree a sequence where first sibling is 'sib' (with size
407** 'sibsize'); returns position for second sibling 411** 'sibsize'); return position for second sibling.
408*/ 412*/
409static TTree *seqaux (TTree *tree, TTree *sib, int sibsize) { 413static TTree *seqaux (TTree *tree, TTree *sib, int sibsize) {
410 tree->tag = TSeq; tree->u.ps = sibsize + 1; 414 tree->tag = TSeq; tree->u.ps = sibsize + 1;
diff --git a/lptree.h b/lptree.h
index b76c235..7dab362 100644
--- a/lptree.h
+++ b/lptree.h
@@ -56,11 +56,16 @@ typedef struct TTree {
56 byte offset; /* compact set offset (in bytes) */ 56 byte offset; /* compact set offset (in bytes) */
57 byte size; /* compact set size (in bytes) */ 57 byte size; /* compact set size (in bytes) */
58 byte deflt; /* default value */ 58 byte deflt; /* default value */
59 byte bitmap[1]; /* bitmap (open array) */
59 } set; /* for compact sets */ 60 } set; /* for compact sets */
60 } u; 61 } u;
61} TTree; 62} TTree;
62 63
63 64
65/* access to charset */
66#define treebuffer(t) ((t)->u.set.bitmap)
67
68
64/* 69/*
65** A complete pattern has its tree plus, if already compiled, 70** A complete pattern has its tree plus, if already compiled,
66** its corresponding code 71** its corresponding code
diff --git a/lptypes.h b/lptypes.h
index 17d406e..e10d88b 100644
--- a/lptypes.h
+++ b/lptypes.h
@@ -101,11 +101,8 @@ typedef struct Charset {
101#define fillset(s,c) memset(s,c,CHARSETSIZE) 101#define fillset(s,c) memset(s,c,CHARSETSIZE)
102#define clearset(s) fillset(s,0) 102#define clearset(s) fillset(s,0)
103 103
104/* access to charset */
105#define treebuffer(t) ((byte *)((t) + 1))
106
107/* number of slots needed for 'n' bytes */ 104/* number of slots needed for 'n' bytes */
108#define bytes2slots(n) (((n) - 1) / sizeof(TTree) + 1) 105#define bytes2slots(n) (((n) - 1u) / (unsigned int)sizeof(TTree) + 1u)
109 106
110/* set 'b' bit in charset 'cs' */ 107/* set 'b' bit in charset 'cs' */
111#define setchar(cs,b) ((cs)[(b) >> 3] |= (1 << ((b) & 7))) 108#define setchar(cs,b) ((cs)[(b) >> 3] |= (1 << ((b) & 7)))