diff options
| author | Roberto Ierusalimschy <roberto@inf.puc-rio.br> | 2023-04-25 15:57:30 -0300 |
|---|---|---|
| committer | Roberto Ierusalimschy <roberto@inf.puc-rio.br> | 2023-04-25 15:57:30 -0300 |
| commit | 37ff352e833ae3a543acb07b2bee56fee7bd1491 (patch) | |
| tree | 8ca4cf55cd6c8ea0beab6b1bd5dc9ca1116efe34 | |
| parent | 0476d60007ec6693fd9643f6c92aa3adb9fde8d7 (diff) | |
| download | lpeg-37ff352e833ae3a543acb07b2bee56fee7bd1491.tar.gz lpeg-37ff352e833ae3a543acb07b2bee56fee7bd1491.tar.bz2 lpeg-37ff352e833ae3a543acb07b2bee56fee7bd1491.zip | |
Fixing several bugs left in previous commit
| -rw-r--r-- | lpcode.c | 61 | ||||
| -rw-r--r-- | lpprint.c | 5 | ||||
| -rw-r--r-- | lptypes.h | 3 |
3 files changed, 47 insertions, 22 deletions
| @@ -42,13 +42,15 @@ static int onlybit (int c, int b) { | |||
| 42 | 42 | ||
| 43 | 43 | ||
| 44 | /* | 44 | /* |
| 45 | ** Extra information for the result of 'charsettype'. | 45 | ** Extra information for the result of 'charsettype'. When result is |
| 46 | ** IChar, 'aux1' is the character. When result is ISet, 'aux1' is the | ||
| 47 | ** offset (in bytes), 'size' is the size (in bytes), and | ||
| 48 | ** 'delt' is the default value for bytes outside the set. | ||
| 46 | */ | 49 | */ |
| 47 | typedef struct { | 50 | typedef struct { |
| 48 | /* unique character for result IChar, offset (in bytes) for result ISet */ | ||
| 49 | int aux1; | 51 | int aux1; |
| 50 | int size; /* size (in instructions) for result ISet */ | 52 | int size; |
| 51 | int deflt; /* default value for bits outside that set */ | 53 | int deflt; |
| 52 | } charsetinfo; | 54 | } charsetinfo; |
| 53 | 55 | ||
| 54 | /* | 56 | /* |
| @@ -80,13 +82,13 @@ static Opcode charsettype (const byte *cs, charsetinfo *info) { | |||
| 80 | /* find highest byte with a 0-bit; low0 is a sentinel */; | 82 | /* find highest byte with a 0-bit; low0 is a sentinel */; |
| 81 | if (high1 - low1 <= high0 - low0) { /* range of 1s smaller than of 0s? */ | 83 | if (high1 - low1 <= high0 - low0) { /* range of 1s smaller than of 0s? */ |
| 82 | info->aux1 = low1; | 84 | info->aux1 = low1; |
| 83 | info->size = instsize(high1 - low1 + 1); | 85 | info->size = high1 - low1 + 1; |
| 84 | info->deflt = 0; /* all discharged bits were 0 */ | 86 | info->deflt = 0; /* all discharged bits were 0 */ |
| 85 | } | 87 | } |
| 86 | else { | 88 | else { |
| 87 | info->aux1 = low0; | 89 | info->aux1 = low0; |
| 88 | info->size = instsize(high0 - low0 + 1); | 90 | info->size = high0 - low0 + 1; |
| 89 | info->deflt = 1; /* all discharged bits were 1 */ | 91 | info->deflt = 0xFF; /* all discharged bits were 1 */ |
| 90 | } | 92 | } |
| 91 | return ISet; | 93 | return ISet; |
| 92 | } | 94 | } |
| @@ -99,11 +101,6 @@ static void cs_complement (Charset *cs) { | |||
| 99 | loopset(i, cs->cs[i] = ~cs->cs[i]); | 101 | loopset(i, cs->cs[i] = ~cs->cs[i]); |
| 100 | } | 102 | } |
| 101 | 103 | ||
| 102 | static int cs_equal (const byte *cs1, const byte *cs2) { | ||
| 103 | loopset(i, if (cs1[i] != cs2[i]) return 0); | ||
| 104 | return 1; | ||
| 105 | } | ||
| 106 | |||
| 107 | static int cs_disjoint (const Charset *cs1, const Charset *cs2) { | 104 | static int cs_disjoint (const Charset *cs1, const Charset *cs2) { |
| 108 | loopset(i, if ((cs1->cs[i] & cs2->cs[i]) != 0) return 0;) | 105 | loopset(i, if ((cs1->cs[i] & cs2->cs[i]) != 0) return 0;) |
| 109 | return 1; | 106 | return 1; |
| @@ -604,16 +601,43 @@ static void addcharset (CompileState *compst, int inst, const byte *cs, | |||
| 604 | int p = gethere(compst); | 601 | int p = gethere(compst); |
| 605 | Instruction *I = &getinstr(compst, inst); | 602 | Instruction *I = &getinstr(compst, inst); |
| 606 | byte *charset; | 603 | byte *charset; |
| 604 | int isize = instsize(info->size); /* size in instructions */ | ||
| 607 | int i; | 605 | int i; |
| 608 | I->i.aux2.set.offset = info->aux1 * 8; /* offset in bits */ | 606 | I->i.aux2.set.offset = info->aux1 * 8; /* offset in bits */ |
| 609 | I->i.aux2.set.size = info->size; /* size in instructions */ | 607 | I->i.aux2.set.size = isize; |
| 610 | I->i.aux1 = info->deflt; | 608 | I->i.aux1 = info->deflt; |
| 611 | for (i = 0; i < info->size; i++) | 609 | for (i = 0; i < isize; i++) |
| 612 | nextinstruction(compst); /* space for charset */ | 610 | nextinstruction(compst); /* space for charset */ |
| 613 | charset = getinstr(compst, p).buff; /* previous loop may reallocate things */ | 611 | charset = getinstr(compst, p).buff; /* previous loop may reallocate things */ |
| 614 | /* fill buffer with charset */ | 612 | for (i = 0; i < info->size; i++) |
| 615 | for (i = 0; i < info->size * (int)sizeof(Instruction); i++) | 613 | charset[i] = cs[i + info->aux1]; /* fill buffer with charset */ |
| 616 | charset[i] = cs[i + info->aux1]; | 614 | for (; i < isize * (int)sizeof(Instruction); i++) |
| 615 | charset[i] = info->deflt; /* complete the buffer */ | ||
| 616 | } | ||
| 617 | |||
| 618 | |||
| 619 | /* | ||
| 620 | ** Check whether compact charset cs is dominated by instruction 'p' | ||
| 621 | */ | ||
| 622 | static int cs_equal (Instruction *p, const byte *cs, charsetinfo *info) { | ||
| 623 | if (p->i.code != ITestSet) | ||
| 624 | return 0; | ||
| 625 | else if (p->i.aux2.set.offset != info->aux1 * 8 || | ||
| 626 | p->i.aux2.set.size != instsize(info->size) || | ||
| 627 | p->i.aux1 != info->deflt) | ||
| 628 | return 0; | ||
| 629 | else { | ||
| 630 | int i; | ||
| 631 | for (i = 0; i < info->size; i++) { | ||
| 632 | if ((p + 2)->buff[i] != cs[i + info->aux1]) | ||
| 633 | return 0; | ||
| 634 | } | ||
| 635 | for (; i < instsize(info->size) * (int)sizeof(Instruction); i++) { | ||
| 636 | if ((p + 2)->buff[i] != info->deflt) | ||
| 637 | return 0; | ||
| 638 | } | ||
| 639 | } | ||
| 640 | return 1; | ||
| 617 | } | 641 | } |
| 618 | 642 | ||
| 619 | 643 | ||
| @@ -628,8 +652,7 @@ static void codecharset (CompileState *compst, const byte *cs, int tt) { | |||
| 628 | switch (op) { | 652 | switch (op) { |
| 629 | case IChar: codechar(compst, info.aux1, tt); break; | 653 | case IChar: codechar(compst, info.aux1, tt); break; |
| 630 | case ISet: { /* non-trivial set? */ | 654 | case ISet: { /* non-trivial set? */ |
| 631 | if (tt >= 0 && getinstr(compst, tt).i.code == ITestSet && | 655 | if (tt >= 0 && cs_equal(&getinstr(compst, tt), cs, &info)) |
| 632 | cs_equal(cs, getinstr(compst, tt + 2).buff)) | ||
| 633 | addinstruction(compst, IAny, 0); | 656 | addinstruction(compst, IAny, 0); |
| 634 | else { | 657 | else { |
| 635 | int i = addinstruction(compst, ISet, 0); | 658 | int i = addinstruction(compst, ISet, 0); |
| @@ -23,7 +23,7 @@ void printcharset (const byte *st) { | |||
| 23 | printf("["); | 23 | printf("["); |
| 24 | for (i = 0; i <= UCHAR_MAX; i++) { | 24 | for (i = 0; i <= UCHAR_MAX; i++) { |
| 25 | int first = i; | 25 | int first = i; |
| 26 | while (testchar(st, i) && i <= UCHAR_MAX) i++; | 26 | while (i <= UCHAR_MAX && testchar(st, i)) i++; |
| 27 | if (i - 1 == first) /* unary range? */ | 27 | if (i - 1 == first) /* unary range? */ |
| 28 | printf("(%02x)", first); | 28 | printf("(%02x)", first); |
| 29 | else if (i - 1 > first) /* non-empty range? */ | 29 | else if (i - 1 > first) /* non-empty range? */ |
| @@ -36,8 +36,9 @@ void printcharset (const byte *st) { | |||
| 36 | static void printIcharset (const Instruction *inst, const byte *buff) { | 36 | static void printIcharset (const Instruction *inst, const byte *buff) { |
| 37 | byte cs[CHARSETSIZE]; | 37 | byte cs[CHARSETSIZE]; |
| 38 | int i; | 38 | int i; |
| 39 | printf("(%02x-%d) ", inst->i.aux2.set.offset, inst->i.aux2.set.size); | ||
| 39 | loopset(j, cs[j] = 0); | 40 | loopset(j, cs[j] = 0); |
| 40 | for (i = 0; i < CHARSETSIZE << 3; i++) { | 41 | for (i = 0; i < CHARSETSIZE * 8; i++) { |
| 41 | if (charinset(inst, buff, i)) | 42 | if (charinset(inst, buff, i)) |
| 42 | setchar(cs, i); | 43 | setchar(cs, i); |
| 43 | } | 44 | } |
| @@ -128,7 +128,8 @@ typedef struct Charset { | |||
| 128 | 128 | ||
| 129 | 129 | ||
| 130 | /* size (in instructions) for l bytes (l > 0) */ | 130 | /* size (in instructions) for l bytes (l > 0) */ |
| 131 | #define instsize(l) (((l) - 1)/sizeof(Instruction) + 1) | 131 | #define instsize(l) ((int)(((l) + (unsigned int)sizeof(Instruction) - 1u) \ |
| 132 | / (unsigned int)sizeof(Instruction))) | ||
| 132 | 133 | ||
| 133 | 134 | ||
| 134 | /* size (in elements) for a ISet instruction */ | 135 | /* size (in elements) for a ISet instruction */ |
