aboutsummaryrefslogtreecommitdiff
path: root/lpcode.c
diff options
context:
space:
mode:
authorRoberto Ierusalimschy <roberto@inf.puc-rio.br>2023-04-25 15:57:30 -0300
committerRoberto Ierusalimschy <roberto@inf.puc-rio.br>2023-04-25 15:57:30 -0300
commit37ff352e833ae3a543acb07b2bee56fee7bd1491 (patch)
tree8ca4cf55cd6c8ea0beab6b1bd5dc9ca1116efe34 /lpcode.c
parent0476d60007ec6693fd9643f6c92aa3adb9fde8d7 (diff)
downloadlpeg-37ff352e833ae3a543acb07b2bee56fee7bd1491.tar.gz
lpeg-37ff352e833ae3a543acb07b2bee56fee7bd1491.tar.bz2
lpeg-37ff352e833ae3a543acb07b2bee56fee7bd1491.zip
Fixing several bugs left in previous commit
Diffstat (limited to 'lpcode.c')
-rw-r--r--lpcode.c61
1 files changed, 42 insertions, 19 deletions
diff --git a/lpcode.c b/lpcode.c
index 1ef7e2d..54bf586 100644
--- a/lpcode.c
+++ b/lpcode.c
@@ -42,13 +42,15 @@ static int onlybit (int c, int b) {
42 42
43 43
44/* 44/*
45** Extra information for the result of 'charsettype'. 45** Extra information for the result of 'charsettype'. When result is
46** IChar, 'aux1' is the character. When result is ISet, 'aux1' is the
47** offset (in bytes), 'size' is the size (in bytes), and
48** 'delt' is the default value for bytes outside the set.
46*/ 49*/
47typedef struct { 50typedef struct {
48 /* unique character for result IChar, offset (in bytes) for result ISet */
49 int aux1; 51 int aux1;
50 int size; /* size (in instructions) for result ISet */ 52 int size;
51 int deflt; /* default value for bits outside that set */ 53 int deflt;
52} charsetinfo; 54} charsetinfo;
53 55
54/* 56/*
@@ -80,13 +82,13 @@ static Opcode charsettype (const byte *cs, charsetinfo *info) {
80 /* find highest byte with a 0-bit; low0 is a sentinel */; 82 /* find highest byte with a 0-bit; low0 is a sentinel */;
81 if (high1 - low1 <= high0 - low0) { /* range of 1s smaller than of 0s? */ 83 if (high1 - low1 <= high0 - low0) { /* range of 1s smaller than of 0s? */
82 info->aux1 = low1; 84 info->aux1 = low1;
83 info->size = instsize(high1 - low1 + 1); 85 info->size = high1 - low1 + 1;
84 info->deflt = 0; /* all discharged bits were 0 */ 86 info->deflt = 0; /* all discharged bits were 0 */
85 } 87 }
86 else { 88 else {
87 info->aux1 = low0; 89 info->aux1 = low0;
88 info->size = instsize(high0 - low0 + 1); 90 info->size = high0 - low0 + 1;
89 info->deflt = 1; /* all discharged bits were 1 */ 91 info->deflt = 0xFF; /* all discharged bits were 1 */
90 } 92 }
91 return ISet; 93 return ISet;
92} 94}
@@ -99,11 +101,6 @@ static void cs_complement (Charset *cs) {
99 loopset(i, cs->cs[i] = ~cs->cs[i]); 101 loopset(i, cs->cs[i] = ~cs->cs[i]);
100} 102}
101 103
102static int cs_equal (const byte *cs1, const byte *cs2) {
103 loopset(i, if (cs1[i] != cs2[i]) return 0);
104 return 1;
105}
106
107static int cs_disjoint (const Charset *cs1, const Charset *cs2) { 104static int cs_disjoint (const Charset *cs1, const Charset *cs2) {
108 loopset(i, if ((cs1->cs[i] & cs2->cs[i]) != 0) return 0;) 105 loopset(i, if ((cs1->cs[i] & cs2->cs[i]) != 0) return 0;)
109 return 1; 106 return 1;
@@ -604,16 +601,43 @@ static void addcharset (CompileState *compst, int inst, const byte *cs,
604 int p = gethere(compst); 601 int p = gethere(compst);
605 Instruction *I = &getinstr(compst, inst); 602 Instruction *I = &getinstr(compst, inst);
606 byte *charset; 603 byte *charset;
604 int isize = instsize(info->size); /* size in instructions */
607 int i; 605 int i;
608 I->i.aux2.set.offset = info->aux1 * 8; /* offset in bits */ 606 I->i.aux2.set.offset = info->aux1 * 8; /* offset in bits */
609 I->i.aux2.set.size = info->size; /* size in instructions */ 607 I->i.aux2.set.size = isize;
610 I->i.aux1 = info->deflt; 608 I->i.aux1 = info->deflt;
611 for (i = 0; i < info->size; i++) 609 for (i = 0; i < isize; i++)
612 nextinstruction(compst); /* space for charset */ 610 nextinstruction(compst); /* space for charset */
613 charset = getinstr(compst, p).buff; /* previous loop may reallocate things */ 611 charset = getinstr(compst, p).buff; /* previous loop may reallocate things */
614 /* fill buffer with charset */ 612 for (i = 0; i < info->size; i++)
615 for (i = 0; i < info->size * (int)sizeof(Instruction); i++) 613 charset[i] = cs[i + info->aux1]; /* fill buffer with charset */
616 charset[i] = cs[i + info->aux1]; 614 for (; i < isize * (int)sizeof(Instruction); i++)
615 charset[i] = info->deflt; /* complete the buffer */
616}
617
618
619/*
620** Check whether compact charset cs is dominated by instruction 'p'
621*/
622static int cs_equal (Instruction *p, const byte *cs, charsetinfo *info) {
623 if (p->i.code != ITestSet)
624 return 0;
625 else if (p->i.aux2.set.offset != info->aux1 * 8 ||
626 p->i.aux2.set.size != instsize(info->size) ||
627 p->i.aux1 != info->deflt)
628 return 0;
629 else {
630 int i;
631 for (i = 0; i < info->size; i++) {
632 if ((p + 2)->buff[i] != cs[i + info->aux1])
633 return 0;
634 }
635 for (; i < instsize(info->size) * (int)sizeof(Instruction); i++) {
636 if ((p + 2)->buff[i] != info->deflt)
637 return 0;
638 }
639 }
640 return 1;
617} 641}
618 642
619 643
@@ -628,8 +652,7 @@ static void codecharset (CompileState *compst, const byte *cs, int tt) {
628 switch (op) { 652 switch (op) {
629 case IChar: codechar(compst, info.aux1, tt); break; 653 case IChar: codechar(compst, info.aux1, tt); break;
630 case ISet: { /* non-trivial set? */ 654 case ISet: { /* non-trivial set? */
631 if (tt >= 0 && getinstr(compst, tt).i.code == ITestSet && 655 if (tt >= 0 && cs_equal(&getinstr(compst, tt), cs, &info))
632 cs_equal(cs, getinstr(compst, tt + 2).buff))
633 addinstruction(compst, IAny, 0); 656 addinstruction(compst, IAny, 0);
634 else { 657 else {
635 int i = addinstruction(compst, ISet, 0); 658 int i = addinstruction(compst, ISet, 0);