diff options
author | Roberto Ierusalimschy <roberto@inf.puc-rio.br> | 2023-04-26 13:36:34 -0300 |
---|---|---|
committer | Roberto Ierusalimschy <roberto@inf.puc-rio.br> | 2023-04-26 13:36:34 -0300 |
commit | 3403b0c7256435560b63f828da92026c5d4c898b (patch) | |
tree | ca6d5753f55fb2d7b6c85cedfe332e03033190a7 /lpcset.c | |
parent | def10e7c009f71f99d6a11171d84fc27568f9b81 (diff) | |
download | lpeg-3403b0c7256435560b63f828da92026c5d4c898b.tar.gz lpeg-3403b0c7256435560b63f828da92026c5d4c898b.tar.bz2 lpeg-3403b0c7256435560b63f828da92026c5d4c898b.zip |
New module 'lpcset'
For code related to compact sets.
Diffstat (limited to 'lpcset.c')
-rw-r--r-- | lpcset.c | 69 |
1 files changed, 69 insertions, 0 deletions
diff --git a/lpcset.c b/lpcset.c new file mode 100644 index 0000000..9ecf475 --- /dev/null +++ b/lpcset.c | |||
@@ -0,0 +1,69 @@ | |||
1 | |||
2 | #include "lptypes.h" | ||
3 | #include "lpcset.h" | ||
4 | |||
5 | |||
6 | /* | ||
7 | ** Add to 'c' the index of the (only) bit set in byte 'b' | ||
8 | */ | ||
9 | static int onlybit (int c, int b) { | ||
10 | if ((b & 0xF0) != 0) { c += 4; b >>= 4; } | ||
11 | if ((b & 0x0C) != 0) { c += 2; b >>= 2; } | ||
12 | if ((b & 0x02) != 0) { c += 1; } | ||
13 | return c; | ||
14 | } | ||
15 | |||
16 | |||
17 | /* | ||
18 | ** Check whether a charset is empty (returns IFail), singleton (IChar), | ||
19 | ** full (IAny), or none of those (ISet). When singleton, 'info.aux1' | ||
20 | ** returns which character it is. When generic set, 'info' returns | ||
21 | ** information about its range. | ||
22 | */ | ||
23 | Opcode charsettype (const byte *cs, charsetinfo *info) { | ||
24 | int low0, low1, high0, high1; | ||
25 | for (low1 = 0; low1 < CHARSETSIZE && cs[low1] == 0; low1++) | ||
26 | /* find lowest byte with a 1-bit */; | ||
27 | if (low1 == CHARSETSIZE) | ||
28 | return IFail; /* no characters in set */ | ||
29 | for (high1 = CHARSETSIZE - 1; cs[high1] == 0; high1--) | ||
30 | /* find highest byte with a 1-bit; low1 is a sentinel */; | ||
31 | if (low1 == high1) { /* only one byte with 1-bits? */ | ||
32 | int b = cs[low1]; | ||
33 | if ((b & (b - 1)) == 0) { /* does byte has only one 1-bit? */ | ||
34 | info->aux1 = onlybit(low1 * BITSPERCHAR, b); /* get that bit */ | ||
35 | return IChar; /* single character */ | ||
36 | } | ||
37 | } | ||
38 | for (low0 = 0; low0 < CHARSETSIZE && cs[low0] == 0xFF; low0++) | ||
39 | /* find lowest byte with a 0-bit */; | ||
40 | if (low0 == CHARSETSIZE) | ||
41 | return IAny; /* set has all bits set */ | ||
42 | for (high0 = CHARSETSIZE - 1; cs[high0] == 0xFF; high0--) | ||
43 | /* find highest byte with a 0-bit; low0 is a sentinel */; | ||
44 | if (high1 - low1 <= high0 - low0) { /* range of 1s smaller than of 0s? */ | ||
45 | info->aux1 = low1; | ||
46 | info->size = high1 - low1 + 1; | ||
47 | info->deflt = 0; /* all discharged bits were 0 */ | ||
48 | } | ||
49 | else { | ||
50 | info->aux1 = low0; | ||
51 | info->size = high0 - low0 + 1; | ||
52 | info->deflt = 0xFF; /* all discharged bits were 1 */ | ||
53 | } | ||
54 | return ISet; | ||
55 | } | ||
56 | |||
57 | |||
58 | /* | ||
59 | ** Get a byte from a compact charset. If index is inside the charset | ||
60 | ** range, get the byte from the supporting charset (correcting it | ||
61 | ** by the offset). Otherwise, return the default for the set. | ||
62 | */ | ||
63 | byte getbytefromcharset (const byte *cs, const charsetinfo *info, | ||
64 | int index) { | ||
65 | if (index < info->size) | ||
66 | return cs[info->aux1 + index]; | ||
67 | else return info->deflt; | ||
68 | } | ||
69 | |||