diff options
author | Roberto Ierusalimschy <roberto@inf.puc-rio.br> | 2023-04-27 11:03:30 -0300 |
---|---|---|
committer | Roberto Ierusalimschy <roberto@inf.puc-rio.br> | 2023-04-27 11:03:30 -0300 |
commit | 97a4ca3b4078f581cdc8cebc4fa4cf39d5ff8125 (patch) | |
tree | 3ec0442b2f1ce9f3a081da5cf14f281b2d2811d5 | |
parent | 012cf9c86cf91cb8354e229bde335592d41b84b2 (diff) | |
download | lpeg-97a4ca3b4078f581cdc8cebc4fa4cf39d5ff8125.tar.gz lpeg-97a4ca3b4078f581cdc8cebc4fa4cf39d5ff8125.tar.bz2 lpeg-97a4ca3b4078f581cdc8cebc4fa4cf39d5ff8125.zip |
New macros 'fillset' and 'clearset'
-rw-r--r-- | lpcode.c | 2 | ||||
-rw-r--r-- | lpcset.c | 8 | ||||
-rw-r--r-- | lpprint.c | 4 | ||||
-rw-r--r-- | lptree.c | 9 | ||||
-rw-r--r-- | lptypes.h | 4 | ||||
-rwxr-xr-x | test.lua | 6 |
6 files changed, 22 insertions, 11 deletions
@@ -221,7 +221,7 @@ static int getfirst (TTree *tree, const Charset *follow, Charset *firstset) { | |||
221 | } | 221 | } |
222 | case TUTFR: { | 222 | case TUTFR: { |
223 | int c; | 223 | int c; |
224 | loopset(i, firstset->cs[i] = 0); /* erase all chars */ | 224 | clearset(firstset->cs); /* erase all chars */ |
225 | for (c = tree->key; c <= sib1(tree)->key; c++) | 225 | for (c = tree->key; c <= sib1(tree)->key; c++) |
226 | setchar(firstset->cs, c); | 226 | setchar(firstset->cs, c); |
227 | return 0; | 227 | return 0; |
@@ -76,21 +76,21 @@ int tocharset (TTree *tree, Charset *cs) { | |||
76 | switch (tree->tag) { | 76 | switch (tree->tag) { |
77 | case TChar: { /* only one char */ | 77 | case TChar: { /* only one char */ |
78 | assert(0 <= tree->u.n && tree->u.n <= UCHAR_MAX); | 78 | assert(0 <= tree->u.n && tree->u.n <= UCHAR_MAX); |
79 | loopset(i, cs->cs[i] = 0); /* erase all chars */ | 79 | clearset(cs->cs); /* erase all chars */ |
80 | setchar(cs->cs, tree->u.n); /* add that one */ | 80 | setchar(cs->cs, tree->u.n); /* add that one */ |
81 | return 1; | 81 | return 1; |
82 | } | 82 | } |
83 | case TAny: { | 83 | case TAny: { |
84 | loopset(i, cs->cs[i] = 0xFF); /* add all characters to the set */ | 84 | fillset(cs->cs, 0xFF); /* add all characters to the set */ |
85 | return 1; | 85 | return 1; |
86 | } | 86 | } |
87 | case TFalse: { | 87 | case TFalse: { |
88 | loopset(i, cs->cs[i] = 0); /* empty set */ | 88 | clearset(cs->cs); /* empty set */ |
89 | return 1; | 89 | return 1; |
90 | } | 90 | } |
91 | case TSet: { /* fill set */ | 91 | case TSet: { /* fill set */ |
92 | int i; | 92 | int i; |
93 | loopset(j, cs->cs[j] = tree->u.set.deflt); | 93 | fillset(cs->cs, tree->u.set.deflt); |
94 | for (i = 0; i < tree->u.set.size; i++) | 94 | for (i = 0; i < tree->u.set.size; i++) |
95 | cs->cs[tree->u.set.offset + i] = treebuffer(tree)[i]; | 95 | cs->cs[tree->u.set.offset + i] = treebuffer(tree)[i]; |
96 | return 1; | 96 | return 1; |
@@ -37,7 +37,7 @@ static void printIcharset (const Instruction *inst, const byte *buff) { | |||
37 | byte cs[CHARSETSIZE]; | 37 | byte cs[CHARSETSIZE]; |
38 | int i; | 38 | int i; |
39 | printf("(%02x-%d) ", inst->i.aux2.set.offset, inst->i.aux2.set.size); | 39 | printf("(%02x-%d) ", inst->i.aux2.set.offset, inst->i.aux2.set.size); |
40 | loopset(j, cs[j] = 0); | 40 | clearset(cs); |
41 | for (i = 0; i < CHARSETSIZE * 8; i++) { | 41 | for (i = 0; i < CHARSETSIZE * 8; i++) { |
42 | if (charinset(inst, buff, i)) | 42 | if (charinset(inst, buff, i)) |
43 | setchar(cs, i); | 43 | setchar(cs, i); |
@@ -50,7 +50,7 @@ static void printTcharset (TTree *tree) { | |||
50 | byte cs[CHARSETSIZE]; | 50 | byte cs[CHARSETSIZE]; |
51 | int i; | 51 | int i; |
52 | printf("(%02x-%d) ", tree->u.set.offset, tree->u.set.size); | 52 | printf("(%02x-%d) ", tree->u.set.offset, tree->u.set.size); |
53 | loopset(j, cs[j] = tree->u.set.deflt); | 53 | fillset(cs, tree->u.set.deflt); |
54 | for (i = 0; i < tree->u.set.size; i++) | 54 | for (i = 0; i < tree->u.set.size; i++) |
55 | cs[tree->u.set.offset + i] = treebuffer(tree)[i]; | 55 | cs[tree->u.set.offset + i] = treebuffer(tree)[i]; |
56 | printcharset(cs); | 56 | printcharset(cs); |
@@ -670,7 +670,7 @@ static int lp_set (lua_State *L) { | |||
670 | size_t l; | 670 | size_t l; |
671 | const char *s = luaL_checklstring(L, 1, &l); | 671 | const char *s = luaL_checklstring(L, 1, &l); |
672 | byte buff[CHARSETSIZE]; | 672 | byte buff[CHARSETSIZE]; |
673 | loopset(i, buff[i] = 0); | 673 | clearset(buff); |
674 | while (l--) { | 674 | while (l--) { |
675 | setchar(buff, (byte)(*s)); | 675 | setchar(buff, (byte)(*s)); |
676 | s++; | 676 | s++; |
@@ -684,7 +684,7 @@ static int lp_range (lua_State *L) { | |||
684 | int arg; | 684 | int arg; |
685 | int top = lua_gettop(L); | 685 | int top = lua_gettop(L); |
686 | byte buff[CHARSETSIZE]; | 686 | byte buff[CHARSETSIZE]; |
687 | loopset(i, buff[i] = 0); | 687 | clearset(buff); |
688 | for (arg = 1; arg <= top; arg++) { | 688 | for (arg = 1; arg <= top; arg++) { |
689 | int c; | 689 | int c; |
690 | size_t l; | 690 | size_t l; |
@@ -734,7 +734,7 @@ static int lp_utfr (lua_State *L) { | |||
734 | if (to <= 0x7f) { /* ascii range? */ | 734 | if (to <= 0x7f) { /* ascii range? */ |
735 | unsigned int f; | 735 | unsigned int f; |
736 | byte buff[CHARSETSIZE]; /* code it as a regular charset */ | 736 | byte buff[CHARSETSIZE]; /* code it as a regular charset */ |
737 | loopset(i, buff[i] = 0); | 737 | clearset(buff); |
738 | for (f = (int)from; f <= to; f++) | 738 | for (f = (int)from; f <= to; f++) |
739 | setchar(buff, f); | 739 | setchar(buff, f); |
740 | newcharset(L, buff); | 740 | newcharset(L, buff); |
@@ -1298,7 +1298,7 @@ int lp_gc (lua_State *L) { | |||
1298 | static void createcat (lua_State *L, const char *catname, int (catf) (int)) { | 1298 | static void createcat (lua_State *L, const char *catname, int (catf) (int)) { |
1299 | int c; | 1299 | int c; |
1300 | byte buff[CHARSETSIZE]; | 1300 | byte buff[CHARSETSIZE]; |
1301 | loopset(i, buff[i] = 0); | 1301 | clearset(buff); |
1302 | for (c = 0; c <= UCHAR_MAX; c++) | 1302 | for (c = 0; c <= UCHAR_MAX; c++) |
1303 | if (catf(c)) setchar(buff, c); | 1303 | if (catf(c)) setchar(buff, c); |
1304 | newcharset(L, buff); | 1304 | newcharset(L, buff); |
@@ -1373,6 +1373,7 @@ static struct luaL_Reg metareg[] = { | |||
1373 | 1373 | ||
1374 | int luaopen_lpeg (lua_State *L); | 1374 | int luaopen_lpeg (lua_State *L); |
1375 | int luaopen_lpeg (lua_State *L) { | 1375 | int luaopen_lpeg (lua_State *L) { |
1376 | printf("%ld\n", sizeof(TTree)); | ||
1376 | luaL_newmetatable(L, PATTERN_T); | 1377 | luaL_newmetatable(L, PATTERN_T); |
1377 | lua_pushnumber(L, MAXBACK); /* initialize maximum backtracking */ | 1378 | lua_pushnumber(L, MAXBACK); /* initialize maximum backtracking */ |
1378 | lua_setfield(L, LUA_REGISTRYINDEX, MAXSTACKIDX); | 1379 | lua_setfield(L, LUA_REGISTRYINDEX, MAXSTACKIDX); |
@@ -10,6 +10,7 @@ | |||
10 | 10 | ||
11 | #include <assert.h> | 11 | #include <assert.h> |
12 | #include <limits.h> | 12 | #include <limits.h> |
13 | #include <string.h> | ||
13 | 14 | ||
14 | #include "lua.h" | 15 | #include "lua.h" |
15 | 16 | ||
@@ -97,6 +98,9 @@ typedef struct Charset { | |||
97 | 98 | ||
98 | #define loopset(v,b) { int v; for (v = 0; v < CHARSETSIZE; v++) {b;} } | 99 | #define loopset(v,b) { int v; for (v = 0; v < CHARSETSIZE; v++) {b;} } |
99 | 100 | ||
101 | #define fillset(s,c) memset(s,c,CHARSETSIZE) | ||
102 | #define clearset(s) fillset(s,0) | ||
103 | |||
100 | /* access to charset */ | 104 | /* access to charset */ |
101 | #define treebuffer(t) ((byte *)((t) + 1)) | 105 | #define treebuffer(t) ((byte *)((t) + 1)) |
102 | 106 | ||
@@ -1227,6 +1227,12 @@ do -- a few typical UTF-8 ranges | |||
1227 | t[5] == "cjk: " .. cjk and t[6] == "other: —" and | 1227 | t[5] == "cjk: " .. cjk and t[6] == "other: —" and |
1228 | t[7] == "ascii: " .. ascii and t[8] == "other: " .. last and | 1228 | t[7] == "ascii: " .. ascii and t[8] == "other: " .. last and |
1229 | t[9] == nil) | 1229 | t[9] == nil) |
1230 | |||
1231 | -- failing UTF-8 matches and borders | ||
1232 | assert(not m.match(m.utfR(10, 0x2000), "\9")) | ||
1233 | assert(not m.match(m.utfR(10, 0x2000), "\226\128\129")) | ||
1234 | assert(m.match(m.utfR(10, 0x2000), "\10") == 2) | ||
1235 | assert(m.match(m.utfR(10, 0x2000), "\226\128\128") == 4) | ||
1230 | end | 1236 | end |
1231 | 1237 | ||
1232 | 1238 | ||