From 012cf9c86cf91cb8354e229bde335592d41b84b2 Mon Sep 17 00:00:00 2001 From: Roberto Ierusalimschy Date: Thu, 27 Apr 2023 10:32:39 -0300 Subject: Compact charsets used in trees, too. --- lpcset.c | 55 ++++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 48 insertions(+), 7 deletions(-) (limited to 'lpcset.c') diff --git a/lpcset.c b/lpcset.c index 9ecf475..2e62d94 100644 --- a/lpcset.c +++ b/lpcset.c @@ -16,7 +16,7 @@ static int onlybit (int c, int b) { /* ** Check whether a charset is empty (returns IFail), singleton (IChar), -** full (IAny), or none of those (ISet). When singleton, 'info.aux1' +** full (IAny), or none of those (ISet). When singleton, 'info.offset' ** returns which character it is. When generic set, 'info' returns ** information about its range. */ @@ -31,7 +31,7 @@ Opcode charsettype (const byte *cs, charsetinfo *info) { if (low1 == high1) { /* only one byte with 1-bits? */ int b = cs[low1]; if ((b & (b - 1)) == 0) { /* does byte has only one 1-bit? */ - info->aux1 = onlybit(low1 * BITSPERCHAR, b); /* get that bit */ + info->offset = onlybit(low1 * BITSPERCHAR, b); /* get that bit */ return IChar; /* single character */ } } @@ -42,15 +42,16 @@ Opcode charsettype (const byte *cs, charsetinfo *info) { for (high0 = CHARSETSIZE - 1; cs[high0] == 0xFF; high0--) /* find highest byte with a 0-bit; low0 is a sentinel */; if (high1 - low1 <= high0 - low0) { /* range of 1s smaller than of 0s? */ - info->aux1 = low1; + info->offset = low1; info->size = high1 - low1 + 1; info->deflt = 0; /* all discharged bits were 0 */ } else { - info->aux1 = low0; + info->offset = low0; info->size = high0 - low0 + 1; info->deflt = 0xFF; /* all discharged bits were 1 */ } + info->cs = cs + info->offset; return ISet; } @@ -60,10 +61,50 @@ Opcode charsettype (const byte *cs, charsetinfo *info) { ** range, get the byte from the supporting charset (correcting it ** by the offset). Otherwise, return the default for the set. */ -byte getbytefromcharset (const byte *cs, const charsetinfo *info, - int index) { +byte getbytefromcharset (const charsetinfo *info, int index) { if (index < info->size) - return cs[info->aux1 + index]; + return info->cs[index]; else return info->deflt; } + +/* +** If 'tree' is a 'char' pattern (TSet, TChar, TAny, TFalse), convert it +** into a charset and return 1; else return 0. +*/ +int tocharset (TTree *tree, Charset *cs) { + switch (tree->tag) { + case TChar: { /* only one char */ + assert(0 <= tree->u.n && tree->u.n <= UCHAR_MAX); + loopset(i, cs->cs[i] = 0); /* erase all chars */ + setchar(cs->cs, tree->u.n); /* add that one */ + return 1; + } + case TAny: { + loopset(i, cs->cs[i] = 0xFF); /* add all characters to the set */ + return 1; + } + case TFalse: { + loopset(i, cs->cs[i] = 0); /* empty set */ + return 1; + } + case TSet: { /* fill set */ + int i; + loopset(j, cs->cs[j] = tree->u.set.deflt); + for (i = 0; i < tree->u.set.size; i++) + cs->cs[tree->u.set.offset + i] = treebuffer(tree)[i]; + return 1; + } + default: return 0; + } +} + + +void tree2cset (TTree *tree, charsetinfo *info) { + assert(tree->tag == TSet); + info->offset = tree->u.set.offset; + info->size = tree->u.set.size; + info->deflt = tree->u.set.deflt; + info->cs = treebuffer(tree); +} + -- cgit v1.2.3-55-g6feb