aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRoberto Ierusalimschy <roberto@inf.puc-rio.br>2023-04-26 13:36:34 -0300
committerRoberto Ierusalimschy <roberto@inf.puc-rio.br>2023-04-26 13:36:34 -0300
commit3403b0c7256435560b63f828da92026c5d4c898b (patch)
treeca6d5753f55fb2d7b6c85cedfe332e03033190a7
parentdef10e7c009f71f99d6a11171d84fc27568f9b81 (diff)
downloadlpeg-3403b0c7256435560b63f828da92026c5d4c898b.tar.gz
lpeg-3403b0c7256435560b63f828da92026c5d4c898b.tar.bz2
lpeg-3403b0c7256435560b63f828da92026c5d4c898b.zip
New module 'lpcset'
For code related to compact sets.
-rw-r--r--lpcode.c81
-rw-r--r--lpcset.c69
-rw-r--r--lpcset.h25
-rw-r--r--makefile8
4 files changed, 104 insertions, 79 deletions
diff --git a/lpcode.c b/lpcode.c
index ba6942f..66d2f3f 100644
--- a/lpcode.c
+++ b/lpcode.c
@@ -7,6 +7,7 @@
7 7
8#include "lptypes.h" 8#include "lptypes.h"
9#include "lpcode.h" 9#include "lpcode.h"
10#include "lpcset.h"
10 11
11 12
12/* signals a "no-instruction */ 13/* signals a "no-instruction */
@@ -31,70 +32,6 @@ static const Charset *fullset = &fullset_;
31 32
32 33
33/* 34/*
34** Add to 'c' the index of the (only) bit set in byte 'b'
35*/
36static int onlybit (int c, int b) {
37 if ((b & 0xF0) != 0) { c += 4; b >>= 4; }
38 if ((b & 0x0C) != 0) { c += 2; b >>= 2; }
39 if ((b & 0x02) != 0) { c += 1; }
40 return c;
41}
42
43
44/*
45** Extra information for the result of 'charsettype'. When result is
46** IChar, 'aux1' is the character. When result is ISet, 'aux1' is the
47** offset (in bytes), 'size' is the size (in bytes), and
48** 'delt' is the default value for bytes outside the set.
49*/
50typedef struct {
51 int aux1;
52 int size;
53 int deflt;
54} charsetinfo;
55
56/*
57** Check whether a charset is empty (returns IFail), singleton (IChar),
58** full (IAny), or none of those (ISet). When singleton, 'info.aux1'
59** returns which character it is. When generic set, 'info' returns
60** information about its range.
61*/
62static Opcode charsettype (const byte *cs, charsetinfo *info) {
63 int low0, low1, high0, high1;
64 for (low1 = 0; low1 < CHARSETSIZE && cs[low1] == 0; low1++)
65 /* find lowest byte with a 1-bit */;
66 if (low1 == CHARSETSIZE)
67 return IFail; /* no characters in set */
68 for (high1 = CHARSETSIZE - 1; cs[high1] == 0; high1--)
69 /* find highest byte with a 1-bit; low1 is a sentinel */;
70 if (low1 == high1) { /* only one byte with 1-bits? */
71 int b = cs[low1];
72 if ((b & (b - 1)) == 0) { /* does byte has only one 1-bit? */
73 info->aux1 = onlybit(low1 * BITSPERCHAR, b); /* get that bit */
74 return IChar; /* single character */
75 }
76 }
77 for (low0 = 0; low0 < CHARSETSIZE && cs[low0] == 0xFF; low0++)
78 /* find lowest byte with a 0-bit */;
79 if (low0 == CHARSETSIZE)
80 return IAny; /* set has all bits set */
81 for (high0 = CHARSETSIZE - 1; cs[high0] == 0xFF; high0--)
82 /* find highest byte with a 0-bit; low0 is a sentinel */;
83 if (high1 - low1 <= high0 - low0) { /* range of 1s smaller than of 0s? */
84 info->aux1 = low1;
85 info->size = high1 - low1 + 1;
86 info->deflt = 0; /* all discharged bits were 0 */
87 }
88 else {
89 info->aux1 = low0;
90 info->size = high0 - low0 + 1;
91 info->deflt = 0xFF; /* all discharged bits were 1 */
92 }
93 return ISet;
94}
95
96
97/*
98** A few basic operations on Charsets 35** A few basic operations on Charsets
99*/ 36*/
100static void cs_complement (Charset *cs) { 37static void cs_complement (Charset *cs) {
@@ -617,11 +554,9 @@ static void addcharset (CompileState *compst, int inst, const byte *cs,
617 I->i.aux2.set.size = isize; 554 I->i.aux2.set.size = isize;
618 I->i.aux1 = info->deflt; 555 I->i.aux1 = info->deflt;
619 p = nextinstruction(compst, isize); /* space for charset */ 556 p = nextinstruction(compst, isize); /* space for charset */
620 charset = getinstr(compst, p).buff; /* previous loop may reallocate things */ 557 charset = getinstr(compst, p).buff; /* charset buffer */
621 for (i = 0; i < info->size; i++) 558 for (i = 0; i < isize * (int)sizeof(Instruction); i++)
622 charset[i] = cs[i + info->aux1]; /* fill buffer with charset */ 559 charset[i] = getbytefromcharset(cs, info, i); /* fill the buffer */
623 for (; i < isize * (int)sizeof(Instruction); i++)
624 charset[i] = info->deflt; /* complete the buffer */
625} 560}
626 561
627 562
@@ -637,12 +572,8 @@ static int cs_equal (Instruction *p, const byte *cs, charsetinfo *info) {
637 return 0; 572 return 0;
638 else { 573 else {
639 int i; 574 int i;
640 for (i = 0; i < info->size; i++) { 575 for (i = 0; i < instsize(info->size) * (int)sizeof(Instruction); i++) {
641 if ((p + 2)->buff[i] != cs[i + info->aux1]) 576 if ((p + 2)->buff[i] != getbytefromcharset(cs, info, i))
642 return 0;
643 }
644 for (; i < instsize(info->size) * (int)sizeof(Instruction); i++) {
645 if ((p + 2)->buff[i] != info->deflt)
646 return 0; 577 return 0;
647 } 578 }
648 } 579 }
diff --git a/lpcset.c b/lpcset.c
new file mode 100644
index 0000000..9ecf475
--- /dev/null
+++ b/lpcset.c
@@ -0,0 +1,69 @@
1
2#include "lptypes.h"
3#include "lpcset.h"
4
5
6/*
7** Add to 'c' the index of the (only) bit set in byte 'b'
8*/
9static int onlybit (int c, int b) {
10 if ((b & 0xF0) != 0) { c += 4; b >>= 4; }
11 if ((b & 0x0C) != 0) { c += 2; b >>= 2; }
12 if ((b & 0x02) != 0) { c += 1; }
13 return c;
14}
15
16
17/*
18** Check whether a charset is empty (returns IFail), singleton (IChar),
19** full (IAny), or none of those (ISet). When singleton, 'info.aux1'
20** returns which character it is. When generic set, 'info' returns
21** information about its range.
22*/
23Opcode charsettype (const byte *cs, charsetinfo *info) {
24 int low0, low1, high0, high1;
25 for (low1 = 0; low1 < CHARSETSIZE && cs[low1] == 0; low1++)
26 /* find lowest byte with a 1-bit */;
27 if (low1 == CHARSETSIZE)
28 return IFail; /* no characters in set */
29 for (high1 = CHARSETSIZE - 1; cs[high1] == 0; high1--)
30 /* find highest byte with a 1-bit; low1 is a sentinel */;
31 if (low1 == high1) { /* only one byte with 1-bits? */
32 int b = cs[low1];
33 if ((b & (b - 1)) == 0) { /* does byte has only one 1-bit? */
34 info->aux1 = onlybit(low1 * BITSPERCHAR, b); /* get that bit */
35 return IChar; /* single character */
36 }
37 }
38 for (low0 = 0; low0 < CHARSETSIZE && cs[low0] == 0xFF; low0++)
39 /* find lowest byte with a 0-bit */;
40 if (low0 == CHARSETSIZE)
41 return IAny; /* set has all bits set */
42 for (high0 = CHARSETSIZE - 1; cs[high0] == 0xFF; high0--)
43 /* find highest byte with a 0-bit; low0 is a sentinel */;
44 if (high1 - low1 <= high0 - low0) { /* range of 1s smaller than of 0s? */
45 info->aux1 = low1;
46 info->size = high1 - low1 + 1;
47 info->deflt = 0; /* all discharged bits were 0 */
48 }
49 else {
50 info->aux1 = low0;
51 info->size = high0 - low0 + 1;
52 info->deflt = 0xFF; /* all discharged bits were 1 */
53 }
54 return ISet;
55}
56
57
58/*
59** Get a byte from a compact charset. If index is inside the charset
60** range, get the byte from the supporting charset (correcting it
61** by the offset). Otherwise, return the default for the set.
62*/
63byte getbytefromcharset (const byte *cs, const charsetinfo *info,
64 int index) {
65 if (index < info->size)
66 return cs[info->aux1 + index];
67 else return info->deflt;
68}
69
diff --git a/lpcset.h b/lpcset.h
new file mode 100644
index 0000000..e5152c4
--- /dev/null
+++ b/lpcset.h
@@ -0,0 +1,25 @@
1
2#if !defined(lpset_h)
3#define lpset_h
4
5#include "lpcset.h"
6#include "lpcode.h"
7
8
9/*
10** Extra information for the result of 'charsettype'. When result is
11** IChar, 'aux1' is the character. When result is ISet, 'aux1' is the
12** offset (in bytes), 'size' is the size (in bytes), and
13** 'delt' is the default value for bytes outside the set.
14*/
15typedef struct {
16 int aux1;
17 int size;
18 int deflt;
19} charsetinfo;
20
21
22Opcode charsettype (const byte *cs, charsetinfo *info);
23byte getbytefromcharset (const byte *cs, const charsetinfo *info, int index);
24
25#endif
diff --git a/makefile b/makefile
index a85f902..9493c6e 100644
--- a/makefile
+++ b/makefile
@@ -1,7 +1,7 @@
1LIBNAME = lpeg 1LIBNAME = lpeg
2LUADIR = ../lua/ 2LUADIR = ./lua/
3 3
4# COPT = -O2 -DNDEBUG 4COPT = -O2 -DNDEBUG
5# COPT = -g 5# COPT = -g
6# COPT = -DLPEG_DEBUG 6# COPT = -DLPEG_DEBUG
7 7
@@ -26,7 +26,7 @@ CWARNS = -Wall -Wextra -pedantic \
26CFLAGS = $(CWARNS) $(COPT) -std=c99 -I$(LUADIR) -fPIC 26CFLAGS = $(CWARNS) $(COPT) -std=c99 -I$(LUADIR) -fPIC
27CC = gcc 27CC = gcc
28 28
29FILES = lpvm.o lpcap.o lptree.o lpcode.o lpprint.o 29FILES = lpvm.o lpcap.o lptree.o lpcode.o lpprint.o lpcset.o
30 30
31# For Linux 31# For Linux
32linux: 32linux:
@@ -53,4 +53,4 @@ lpcode.o: lpcode.c lptypes.h lpcode.h lptree.h lpvm.h lpcap.h
53lpprint.o: lpprint.c lptypes.h lpprint.h lptree.h lpvm.h lpcap.h 53lpprint.o: lpprint.c lptypes.h lpprint.h lptree.h lpvm.h lpcap.h
54lptree.o: lptree.c lptypes.h lpcap.h lpcode.h lptree.h lpvm.h lpprint.h 54lptree.o: lptree.c lptypes.h lpcap.h lpcode.h lptree.h lpvm.h lpprint.h
55lpvm.o: lpvm.c lpcap.h lptypes.h lpvm.h lpprint.h lptree.h 55lpvm.o: lpvm.c lpcap.h lptypes.h lpvm.h lpprint.h lptree.h
56 56lpcset.o: lpcset.c lpcset.h lptypes.h