aboutsummaryrefslogtreecommitdiff
path: root/lpcset.c
diff options
context:
space:
mode:
authorRoberto Ierusalimschy <roberto@inf.puc-rio.br>2023-04-26 13:36:34 -0300
committerRoberto Ierusalimschy <roberto@inf.puc-rio.br>2023-04-26 13:36:34 -0300
commit3403b0c7256435560b63f828da92026c5d4c898b (patch)
treeca6d5753f55fb2d7b6c85cedfe332e03033190a7 /lpcset.c
parentdef10e7c009f71f99d6a11171d84fc27568f9b81 (diff)
downloadlpeg-3403b0c7256435560b63f828da92026c5d4c898b.tar.gz
lpeg-3403b0c7256435560b63f828da92026c5d4c898b.tar.bz2
lpeg-3403b0c7256435560b63f828da92026c5d4c898b.zip
New module 'lpcset'
For code related to compact sets.
Diffstat (limited to 'lpcset.c')
-rw-r--r--lpcset.c69
1 files changed, 69 insertions, 0 deletions
diff --git a/lpcset.c b/lpcset.c
new file mode 100644
index 0000000..9ecf475
--- /dev/null
+++ b/lpcset.c
@@ -0,0 +1,69 @@
1
2#include "lptypes.h"
3#include "lpcset.h"
4
5
6/*
7** Add to 'c' the index of the (only) bit set in byte 'b'
8*/
9static int onlybit (int c, int b) {
10 if ((b & 0xF0) != 0) { c += 4; b >>= 4; }
11 if ((b & 0x0C) != 0) { c += 2; b >>= 2; }
12 if ((b & 0x02) != 0) { c += 1; }
13 return c;
14}
15
16
17/*
18** Check whether a charset is empty (returns IFail), singleton (IChar),
19** full (IAny), or none of those (ISet). When singleton, 'info.aux1'
20** returns which character it is. When generic set, 'info' returns
21** information about its range.
22*/
23Opcode charsettype (const byte *cs, charsetinfo *info) {
24 int low0, low1, high0, high1;
25 for (low1 = 0; low1 < CHARSETSIZE && cs[low1] == 0; low1++)
26 /* find lowest byte with a 1-bit */;
27 if (low1 == CHARSETSIZE)
28 return IFail; /* no characters in set */
29 for (high1 = CHARSETSIZE - 1; cs[high1] == 0; high1--)
30 /* find highest byte with a 1-bit; low1 is a sentinel */;
31 if (low1 == high1) { /* only one byte with 1-bits? */
32 int b = cs[low1];
33 if ((b & (b - 1)) == 0) { /* does byte has only one 1-bit? */
34 info->aux1 = onlybit(low1 * BITSPERCHAR, b); /* get that bit */
35 return IChar; /* single character */
36 }
37 }
38 for (low0 = 0; low0 < CHARSETSIZE && cs[low0] == 0xFF; low0++)
39 /* find lowest byte with a 0-bit */;
40 if (low0 == CHARSETSIZE)
41 return IAny; /* set has all bits set */
42 for (high0 = CHARSETSIZE - 1; cs[high0] == 0xFF; high0--)
43 /* find highest byte with a 0-bit; low0 is a sentinel */;
44 if (high1 - low1 <= high0 - low0) { /* range of 1s smaller than of 0s? */
45 info->aux1 = low1;
46 info->size = high1 - low1 + 1;
47 info->deflt = 0; /* all discharged bits were 0 */
48 }
49 else {
50 info->aux1 = low0;
51 info->size = high0 - low0 + 1;
52 info->deflt = 0xFF; /* all discharged bits were 1 */
53 }
54 return ISet;
55}
56
57
58/*
59** Get a byte from a compact charset. If index is inside the charset
60** range, get the byte from the supporting charset (correcting it
61** by the offset). Otherwise, return the default for the set.
62*/
63byte getbytefromcharset (const byte *cs, const charsetinfo *info,
64 int index) {
65 if (index < info->size)
66 return cs[info->aux1 + index];
67 else return info->deflt;
68}
69