aboutsummaryrefslogtreecommitdiff
path: root/lpcset.c
diff options
context:
space:
mode:
authorRoberto Ierusalimschy <roberto@inf.puc-rio.br>2023-04-27 10:32:39 -0300
committerRoberto Ierusalimschy <roberto@inf.puc-rio.br>2023-04-27 10:32:39 -0300
commit012cf9c86cf91cb8354e229bde335592d41b84b2 (patch)
tree353f17797b1952eaec231c8e4fd5c21e02daf875 /lpcset.c
parent3403b0c7256435560b63f828da92026c5d4c898b (diff)
downloadlpeg-012cf9c86cf91cb8354e229bde335592d41b84b2.tar.gz
lpeg-012cf9c86cf91cb8354e229bde335592d41b84b2.tar.bz2
lpeg-012cf9c86cf91cb8354e229bde335592d41b84b2.zip
Compact charsets used in trees, too.
Diffstat (limited to 'lpcset.c')
-rw-r--r--lpcset.c55
1 files changed, 48 insertions, 7 deletions
diff --git a/lpcset.c b/lpcset.c
index 9ecf475..2e62d94 100644
--- a/lpcset.c
+++ b/lpcset.c
@@ -16,7 +16,7 @@ static int onlybit (int c, int b) {
16 16
17/* 17/*
18** Check whether a charset is empty (returns IFail), singleton (IChar), 18** Check whether a charset is empty (returns IFail), singleton (IChar),
19** full (IAny), or none of those (ISet). When singleton, 'info.aux1' 19** full (IAny), or none of those (ISet). When singleton, 'info.offset'
20** returns which character it is. When generic set, 'info' returns 20** returns which character it is. When generic set, 'info' returns
21** information about its range. 21** information about its range.
22*/ 22*/
@@ -31,7 +31,7 @@ Opcode charsettype (const byte *cs, charsetinfo *info) {
31 if (low1 == high1) { /* only one byte with 1-bits? */ 31 if (low1 == high1) { /* only one byte with 1-bits? */
32 int b = cs[low1]; 32 int b = cs[low1];
33 if ((b & (b - 1)) == 0) { /* does byte has only one 1-bit? */ 33 if ((b & (b - 1)) == 0) { /* does byte has only one 1-bit? */
34 info->aux1 = onlybit(low1 * BITSPERCHAR, b); /* get that bit */ 34 info->offset = onlybit(low1 * BITSPERCHAR, b); /* get that bit */
35 return IChar; /* single character */ 35 return IChar; /* single character */
36 } 36 }
37 } 37 }
@@ -42,15 +42,16 @@ Opcode charsettype (const byte *cs, charsetinfo *info) {
42 for (high0 = CHARSETSIZE - 1; cs[high0] == 0xFF; high0--) 42 for (high0 = CHARSETSIZE - 1; cs[high0] == 0xFF; high0--)
43 /* find highest byte with a 0-bit; low0 is a sentinel */; 43 /* find highest byte with a 0-bit; low0 is a sentinel */;
44 if (high1 - low1 <= high0 - low0) { /* range of 1s smaller than of 0s? */ 44 if (high1 - low1 <= high0 - low0) { /* range of 1s smaller than of 0s? */
45 info->aux1 = low1; 45 info->offset = low1;
46 info->size = high1 - low1 + 1; 46 info->size = high1 - low1 + 1;
47 info->deflt = 0; /* all discharged bits were 0 */ 47 info->deflt = 0; /* all discharged bits were 0 */
48 } 48 }
49 else { 49 else {
50 info->aux1 = low0; 50 info->offset = low0;
51 info->size = high0 - low0 + 1; 51 info->size = high0 - low0 + 1;
52 info->deflt = 0xFF; /* all discharged bits were 1 */ 52 info->deflt = 0xFF; /* all discharged bits were 1 */
53 } 53 }
54 info->cs = cs + info->offset;
54 return ISet; 55 return ISet;
55} 56}
56 57
@@ -60,10 +61,50 @@ Opcode charsettype (const byte *cs, charsetinfo *info) {
60** range, get the byte from the supporting charset (correcting it 61** range, get the byte from the supporting charset (correcting it
61** by the offset). Otherwise, return the default for the set. 62** by the offset). Otherwise, return the default for the set.
62*/ 63*/
63byte getbytefromcharset (const byte *cs, const charsetinfo *info, 64byte getbytefromcharset (const charsetinfo *info, int index) {
64 int index) {
65 if (index < info->size) 65 if (index < info->size)
66 return cs[info->aux1 + index]; 66 return info->cs[index];
67 else return info->deflt; 67 else return info->deflt;
68} 68}
69 69
70
71/*
72** If 'tree' is a 'char' pattern (TSet, TChar, TAny, TFalse), convert it
73** into a charset and return 1; else return 0.
74*/
75int tocharset (TTree *tree, Charset *cs) {
76 switch (tree->tag) {
77 case TChar: { /* only one char */
78 assert(0 <= tree->u.n && tree->u.n <= UCHAR_MAX);
79 loopset(i, cs->cs[i] = 0); /* erase all chars */
80 setchar(cs->cs, tree->u.n); /* add that one */
81 return 1;
82 }
83 case TAny: {
84 loopset(i, cs->cs[i] = 0xFF); /* add all characters to the set */
85 return 1;
86 }
87 case TFalse: {
88 loopset(i, cs->cs[i] = 0); /* empty set */
89 return 1;
90 }
91 case TSet: { /* fill set */
92 int i;
93 loopset(j, cs->cs[j] = tree->u.set.deflt);
94 for (i = 0; i < tree->u.set.size; i++)
95 cs->cs[tree->u.set.offset + i] = treebuffer(tree)[i];
96 return 1;
97 }
98 default: return 0;
99 }
100}
101
102
103void tree2cset (TTree *tree, charsetinfo *info) {
104 assert(tree->tag == TSet);
105 info->offset = tree->u.set.offset;
106 info->size = tree->u.set.size;
107 info->deflt = tree->u.set.deflt;
108 info->cs = treebuffer(tree);
109}
110