diff options
author | Roberto Ierusalimschy <roberto@inf.puc-rio.br> | 2023-04-25 15:57:30 -0300 |
---|---|---|
committer | Roberto Ierusalimschy <roberto@inf.puc-rio.br> | 2023-04-25 15:57:30 -0300 |
commit | 37ff352e833ae3a543acb07b2bee56fee7bd1491 (patch) | |
tree | 8ca4cf55cd6c8ea0beab6b1bd5dc9ca1116efe34 /lpcode.c | |
parent | 0476d60007ec6693fd9643f6c92aa3adb9fde8d7 (diff) | |
download | lpeg-37ff352e833ae3a543acb07b2bee56fee7bd1491.tar.gz lpeg-37ff352e833ae3a543acb07b2bee56fee7bd1491.tar.bz2 lpeg-37ff352e833ae3a543acb07b2bee56fee7bd1491.zip |
Fixing several bugs left in previous commit
Diffstat (limited to 'lpcode.c')
-rw-r--r-- | lpcode.c | 61 |
1 files changed, 42 insertions, 19 deletions
@@ -42,13 +42,15 @@ static int onlybit (int c, int b) { | |||
42 | 42 | ||
43 | 43 | ||
44 | /* | 44 | /* |
45 | ** Extra information for the result of 'charsettype'. | 45 | ** Extra information for the result of 'charsettype'. When result is |
46 | ** IChar, 'aux1' is the character. When result is ISet, 'aux1' is the | ||
47 | ** offset (in bytes), 'size' is the size (in bytes), and | ||
48 | ** 'delt' is the default value for bytes outside the set. | ||
46 | */ | 49 | */ |
47 | typedef struct { | 50 | typedef struct { |
48 | /* unique character for result IChar, offset (in bytes) for result ISet */ | ||
49 | int aux1; | 51 | int aux1; |
50 | int size; /* size (in instructions) for result ISet */ | 52 | int size; |
51 | int deflt; /* default value for bits outside that set */ | 53 | int deflt; |
52 | } charsetinfo; | 54 | } charsetinfo; |
53 | 55 | ||
54 | /* | 56 | /* |
@@ -80,13 +82,13 @@ static Opcode charsettype (const byte *cs, charsetinfo *info) { | |||
80 | /* find highest byte with a 0-bit; low0 is a sentinel */; | 82 | /* find highest byte with a 0-bit; low0 is a sentinel */; |
81 | if (high1 - low1 <= high0 - low0) { /* range of 1s smaller than of 0s? */ | 83 | if (high1 - low1 <= high0 - low0) { /* range of 1s smaller than of 0s? */ |
82 | info->aux1 = low1; | 84 | info->aux1 = low1; |
83 | info->size = instsize(high1 - low1 + 1); | 85 | info->size = high1 - low1 + 1; |
84 | info->deflt = 0; /* all discharged bits were 0 */ | 86 | info->deflt = 0; /* all discharged bits were 0 */ |
85 | } | 87 | } |
86 | else { | 88 | else { |
87 | info->aux1 = low0; | 89 | info->aux1 = low0; |
88 | info->size = instsize(high0 - low0 + 1); | 90 | info->size = high0 - low0 + 1; |
89 | info->deflt = 1; /* all discharged bits were 1 */ | 91 | info->deflt = 0xFF; /* all discharged bits were 1 */ |
90 | } | 92 | } |
91 | return ISet; | 93 | return ISet; |
92 | } | 94 | } |
@@ -99,11 +101,6 @@ static void cs_complement (Charset *cs) { | |||
99 | loopset(i, cs->cs[i] = ~cs->cs[i]); | 101 | loopset(i, cs->cs[i] = ~cs->cs[i]); |
100 | } | 102 | } |
101 | 103 | ||
102 | static int cs_equal (const byte *cs1, const byte *cs2) { | ||
103 | loopset(i, if (cs1[i] != cs2[i]) return 0); | ||
104 | return 1; | ||
105 | } | ||
106 | |||
107 | static int cs_disjoint (const Charset *cs1, const Charset *cs2) { | 104 | static int cs_disjoint (const Charset *cs1, const Charset *cs2) { |
108 | loopset(i, if ((cs1->cs[i] & cs2->cs[i]) != 0) return 0;) | 105 | loopset(i, if ((cs1->cs[i] & cs2->cs[i]) != 0) return 0;) |
109 | return 1; | 106 | return 1; |
@@ -604,16 +601,43 @@ static void addcharset (CompileState *compst, int inst, const byte *cs, | |||
604 | int p = gethere(compst); | 601 | int p = gethere(compst); |
605 | Instruction *I = &getinstr(compst, inst); | 602 | Instruction *I = &getinstr(compst, inst); |
606 | byte *charset; | 603 | byte *charset; |
604 | int isize = instsize(info->size); /* size in instructions */ | ||
607 | int i; | 605 | int i; |
608 | I->i.aux2.set.offset = info->aux1 * 8; /* offset in bits */ | 606 | I->i.aux2.set.offset = info->aux1 * 8; /* offset in bits */ |
609 | I->i.aux2.set.size = info->size; /* size in instructions */ | 607 | I->i.aux2.set.size = isize; |
610 | I->i.aux1 = info->deflt; | 608 | I->i.aux1 = info->deflt; |
611 | for (i = 0; i < info->size; i++) | 609 | for (i = 0; i < isize; i++) |
612 | nextinstruction(compst); /* space for charset */ | 610 | nextinstruction(compst); /* space for charset */ |
613 | charset = getinstr(compst, p).buff; /* previous loop may reallocate things */ | 611 | charset = getinstr(compst, p).buff; /* previous loop may reallocate things */ |
614 | /* fill buffer with charset */ | 612 | for (i = 0; i < info->size; i++) |
615 | for (i = 0; i < info->size * (int)sizeof(Instruction); i++) | 613 | charset[i] = cs[i + info->aux1]; /* fill buffer with charset */ |
616 | charset[i] = cs[i + info->aux1]; | 614 | for (; i < isize * (int)sizeof(Instruction); i++) |
615 | charset[i] = info->deflt; /* complete the buffer */ | ||
616 | } | ||
617 | |||
618 | |||
619 | /* | ||
620 | ** Check whether compact charset cs is dominated by instruction 'p' | ||
621 | */ | ||
622 | static int cs_equal (Instruction *p, const byte *cs, charsetinfo *info) { | ||
623 | if (p->i.code != ITestSet) | ||
624 | return 0; | ||
625 | else if (p->i.aux2.set.offset != info->aux1 * 8 || | ||
626 | p->i.aux2.set.size != instsize(info->size) || | ||
627 | p->i.aux1 != info->deflt) | ||
628 | return 0; | ||
629 | else { | ||
630 | int i; | ||
631 | for (i = 0; i < info->size; i++) { | ||
632 | if ((p + 2)->buff[i] != cs[i + info->aux1]) | ||
633 | return 0; | ||
634 | } | ||
635 | for (; i < instsize(info->size) * (int)sizeof(Instruction); i++) { | ||
636 | if ((p + 2)->buff[i] != info->deflt) | ||
637 | return 0; | ||
638 | } | ||
639 | } | ||
640 | return 1; | ||
617 | } | 641 | } |
618 | 642 | ||
619 | 643 | ||
@@ -628,8 +652,7 @@ static void codecharset (CompileState *compst, const byte *cs, int tt) { | |||
628 | switch (op) { | 652 | switch (op) { |
629 | case IChar: codechar(compst, info.aux1, tt); break; | 653 | case IChar: codechar(compst, info.aux1, tt); break; |
630 | case ISet: { /* non-trivial set? */ | 654 | case ISet: { /* non-trivial set? */ |
631 | if (tt >= 0 && getinstr(compst, tt).i.code == ITestSet && | 655 | if (tt >= 0 && cs_equal(&getinstr(compst, tt), cs, &info)) |
632 | cs_equal(cs, getinstr(compst, tt + 2).buff)) | ||
633 | addinstruction(compst, IAny, 0); | 656 | addinstruction(compst, IAny, 0); |
634 | else { | 657 | else { |
635 | int i = addinstruction(compst, ISet, 0); | 658 | int i = addinstruction(compst, ISet, 0); |