aboutsummaryrefslogtreecommitdiff
path: root/lpcode.c
diff options
context:
space:
mode:
authorRoberto Ierusalimschy <roberto@inf.puc-rio.br>2023-04-26 13:36:34 -0300
committerRoberto Ierusalimschy <roberto@inf.puc-rio.br>2023-04-26 13:36:34 -0300
commit3403b0c7256435560b63f828da92026c5d4c898b (patch)
treeca6d5753f55fb2d7b6c85cedfe332e03033190a7 /lpcode.c
parentdef10e7c009f71f99d6a11171d84fc27568f9b81 (diff)
downloadlpeg-3403b0c7256435560b63f828da92026c5d4c898b.tar.gz
lpeg-3403b0c7256435560b63f828da92026c5d4c898b.tar.bz2
lpeg-3403b0c7256435560b63f828da92026c5d4c898b.zip
New module 'lpcset'
For code related to compact sets.
Diffstat (limited to 'lpcode.c')
-rw-r--r--lpcode.c81
1 files changed, 6 insertions, 75 deletions
diff --git a/lpcode.c b/lpcode.c
index ba6942f..66d2f3f 100644
--- a/lpcode.c
+++ b/lpcode.c
@@ -7,6 +7,7 @@
7 7
8#include "lptypes.h" 8#include "lptypes.h"
9#include "lpcode.h" 9#include "lpcode.h"
10#include "lpcset.h"
10 11
11 12
12/* signals a "no-instruction */ 13/* signals a "no-instruction */
@@ -31,70 +32,6 @@ static const Charset *fullset = &fullset_;
31 32
32 33
33/* 34/*
34** Add to 'c' the index of the (only) bit set in byte 'b'
35*/
36static int onlybit (int c, int b) {
37 if ((b & 0xF0) != 0) { c += 4; b >>= 4; }
38 if ((b & 0x0C) != 0) { c += 2; b >>= 2; }
39 if ((b & 0x02) != 0) { c += 1; }
40 return c;
41}
42
43
44/*
45** Extra information for the result of 'charsettype'. When result is
46** IChar, 'aux1' is the character. When result is ISet, 'aux1' is the
47** offset (in bytes), 'size' is the size (in bytes), and
48** 'delt' is the default value for bytes outside the set.
49*/
50typedef struct {
51 int aux1;
52 int size;
53 int deflt;
54} charsetinfo;
55
56/*
57** Check whether a charset is empty (returns IFail), singleton (IChar),
58** full (IAny), or none of those (ISet). When singleton, 'info.aux1'
59** returns which character it is. When generic set, 'info' returns
60** information about its range.
61*/
62static Opcode charsettype (const byte *cs, charsetinfo *info) {
63 int low0, low1, high0, high1;
64 for (low1 = 0; low1 < CHARSETSIZE && cs[low1] == 0; low1++)
65 /* find lowest byte with a 1-bit */;
66 if (low1 == CHARSETSIZE)
67 return IFail; /* no characters in set */
68 for (high1 = CHARSETSIZE - 1; cs[high1] == 0; high1--)
69 /* find highest byte with a 1-bit; low1 is a sentinel */;
70 if (low1 == high1) { /* only one byte with 1-bits? */
71 int b = cs[low1];
72 if ((b & (b - 1)) == 0) { /* does byte has only one 1-bit? */
73 info->aux1 = onlybit(low1 * BITSPERCHAR, b); /* get that bit */
74 return IChar; /* single character */
75 }
76 }
77 for (low0 = 0; low0 < CHARSETSIZE && cs[low0] == 0xFF; low0++)
78 /* find lowest byte with a 0-bit */;
79 if (low0 == CHARSETSIZE)
80 return IAny; /* set has all bits set */
81 for (high0 = CHARSETSIZE - 1; cs[high0] == 0xFF; high0--)
82 /* find highest byte with a 0-bit; low0 is a sentinel */;
83 if (high1 - low1 <= high0 - low0) { /* range of 1s smaller than of 0s? */
84 info->aux1 = low1;
85 info->size = high1 - low1 + 1;
86 info->deflt = 0; /* all discharged bits were 0 */
87 }
88 else {
89 info->aux1 = low0;
90 info->size = high0 - low0 + 1;
91 info->deflt = 0xFF; /* all discharged bits were 1 */
92 }
93 return ISet;
94}
95
96
97/*
98** A few basic operations on Charsets 35** A few basic operations on Charsets
99*/ 36*/
100static void cs_complement (Charset *cs) { 37static void cs_complement (Charset *cs) {
@@ -617,11 +554,9 @@ static void addcharset (CompileState *compst, int inst, const byte *cs,
617 I->i.aux2.set.size = isize; 554 I->i.aux2.set.size = isize;
618 I->i.aux1 = info->deflt; 555 I->i.aux1 = info->deflt;
619 p = nextinstruction(compst, isize); /* space for charset */ 556 p = nextinstruction(compst, isize); /* space for charset */
620 charset = getinstr(compst, p).buff; /* previous loop may reallocate things */ 557 charset = getinstr(compst, p).buff; /* charset buffer */
621 for (i = 0; i < info->size; i++) 558 for (i = 0; i < isize * (int)sizeof(Instruction); i++)
622 charset[i] = cs[i + info->aux1]; /* fill buffer with charset */ 559 charset[i] = getbytefromcharset(cs, info, i); /* fill the buffer */
623 for (; i < isize * (int)sizeof(Instruction); i++)
624 charset[i] = info->deflt; /* complete the buffer */
625} 560}
626 561
627 562
@@ -637,12 +572,8 @@ static int cs_equal (Instruction *p, const byte *cs, charsetinfo *info) {
637 return 0; 572 return 0;
638 else { 573 else {
639 int i; 574 int i;
640 for (i = 0; i < info->size; i++) { 575 for (i = 0; i < instsize(info->size) * (int)sizeof(Instruction); i++) {
641 if ((p + 2)->buff[i] != cs[i + info->aux1]) 576 if ((p + 2)->buff[i] != getbytefromcharset(cs, info, i))
642 return 0;
643 }
644 for (; i < instsize(info->size) * (int)sizeof(Instruction); i++) {
645 if ((p + 2)->buff[i] != info->deflt)
646 return 0; 577 return 0;
647 } 578 }
648 } 579 }