diff options
| author | Roberto Ierusalimschy <roberto@inf.puc-rio.br> | 2023-04-27 11:03:30 -0300 |
|---|---|---|
| committer | Roberto Ierusalimschy <roberto@inf.puc-rio.br> | 2023-04-27 11:03:30 -0300 |
| commit | 97a4ca3b4078f581cdc8cebc4fa4cf39d5ff8125 (patch) | |
| tree | 3ec0442b2f1ce9f3a081da5cf14f281b2d2811d5 | |
| parent | 012cf9c86cf91cb8354e229bde335592d41b84b2 (diff) | |
| download | lpeg-97a4ca3b4078f581cdc8cebc4fa4cf39d5ff8125.tar.gz lpeg-97a4ca3b4078f581cdc8cebc4fa4cf39d5ff8125.tar.bz2 lpeg-97a4ca3b4078f581cdc8cebc4fa4cf39d5ff8125.zip | |
New macros 'fillset' and 'clearset'
| -rw-r--r-- | lpcode.c | 2 | ||||
| -rw-r--r-- | lpcset.c | 8 | ||||
| -rw-r--r-- | lpprint.c | 4 | ||||
| -rw-r--r-- | lptree.c | 9 | ||||
| -rw-r--r-- | lptypes.h | 4 | ||||
| -rwxr-xr-x | test.lua | 6 |
6 files changed, 22 insertions, 11 deletions
| @@ -221,7 +221,7 @@ static int getfirst (TTree *tree, const Charset *follow, Charset *firstset) { | |||
| 221 | } | 221 | } |
| 222 | case TUTFR: { | 222 | case TUTFR: { |
| 223 | int c; | 223 | int c; |
| 224 | loopset(i, firstset->cs[i] = 0); /* erase all chars */ | 224 | clearset(firstset->cs); /* erase all chars */ |
| 225 | for (c = tree->key; c <= sib1(tree)->key; c++) | 225 | for (c = tree->key; c <= sib1(tree)->key; c++) |
| 226 | setchar(firstset->cs, c); | 226 | setchar(firstset->cs, c); |
| 227 | return 0; | 227 | return 0; |
| @@ -76,21 +76,21 @@ int tocharset (TTree *tree, Charset *cs) { | |||
| 76 | switch (tree->tag) { | 76 | switch (tree->tag) { |
| 77 | case TChar: { /* only one char */ | 77 | case TChar: { /* only one char */ |
| 78 | assert(0 <= tree->u.n && tree->u.n <= UCHAR_MAX); | 78 | assert(0 <= tree->u.n && tree->u.n <= UCHAR_MAX); |
| 79 | loopset(i, cs->cs[i] = 0); /* erase all chars */ | 79 | clearset(cs->cs); /* erase all chars */ |
| 80 | setchar(cs->cs, tree->u.n); /* add that one */ | 80 | setchar(cs->cs, tree->u.n); /* add that one */ |
| 81 | return 1; | 81 | return 1; |
| 82 | } | 82 | } |
| 83 | case TAny: { | 83 | case TAny: { |
| 84 | loopset(i, cs->cs[i] = 0xFF); /* add all characters to the set */ | 84 | fillset(cs->cs, 0xFF); /* add all characters to the set */ |
| 85 | return 1; | 85 | return 1; |
| 86 | } | 86 | } |
| 87 | case TFalse: { | 87 | case TFalse: { |
| 88 | loopset(i, cs->cs[i] = 0); /* empty set */ | 88 | clearset(cs->cs); /* empty set */ |
| 89 | return 1; | 89 | return 1; |
| 90 | } | 90 | } |
| 91 | case TSet: { /* fill set */ | 91 | case TSet: { /* fill set */ |
| 92 | int i; | 92 | int i; |
| 93 | loopset(j, cs->cs[j] = tree->u.set.deflt); | 93 | fillset(cs->cs, tree->u.set.deflt); |
| 94 | for (i = 0; i < tree->u.set.size; i++) | 94 | for (i = 0; i < tree->u.set.size; i++) |
| 95 | cs->cs[tree->u.set.offset + i] = treebuffer(tree)[i]; | 95 | cs->cs[tree->u.set.offset + i] = treebuffer(tree)[i]; |
| 96 | return 1; | 96 | return 1; |
| @@ -37,7 +37,7 @@ static void printIcharset (const Instruction *inst, const byte *buff) { | |||
| 37 | byte cs[CHARSETSIZE]; | 37 | byte cs[CHARSETSIZE]; |
| 38 | int i; | 38 | int i; |
| 39 | printf("(%02x-%d) ", inst->i.aux2.set.offset, inst->i.aux2.set.size); | 39 | printf("(%02x-%d) ", inst->i.aux2.set.offset, inst->i.aux2.set.size); |
| 40 | loopset(j, cs[j] = 0); | 40 | clearset(cs); |
| 41 | for (i = 0; i < CHARSETSIZE * 8; i++) { | 41 | for (i = 0; i < CHARSETSIZE * 8; i++) { |
| 42 | if (charinset(inst, buff, i)) | 42 | if (charinset(inst, buff, i)) |
| 43 | setchar(cs, i); | 43 | setchar(cs, i); |
| @@ -50,7 +50,7 @@ static void printTcharset (TTree *tree) { | |||
| 50 | byte cs[CHARSETSIZE]; | 50 | byte cs[CHARSETSIZE]; |
| 51 | int i; | 51 | int i; |
| 52 | printf("(%02x-%d) ", tree->u.set.offset, tree->u.set.size); | 52 | printf("(%02x-%d) ", tree->u.set.offset, tree->u.set.size); |
| 53 | loopset(j, cs[j] = tree->u.set.deflt); | 53 | fillset(cs, tree->u.set.deflt); |
| 54 | for (i = 0; i < tree->u.set.size; i++) | 54 | for (i = 0; i < tree->u.set.size; i++) |
| 55 | cs[tree->u.set.offset + i] = treebuffer(tree)[i]; | 55 | cs[tree->u.set.offset + i] = treebuffer(tree)[i]; |
| 56 | printcharset(cs); | 56 | printcharset(cs); |
| @@ -670,7 +670,7 @@ static int lp_set (lua_State *L) { | |||
| 670 | size_t l; | 670 | size_t l; |
| 671 | const char *s = luaL_checklstring(L, 1, &l); | 671 | const char *s = luaL_checklstring(L, 1, &l); |
| 672 | byte buff[CHARSETSIZE]; | 672 | byte buff[CHARSETSIZE]; |
| 673 | loopset(i, buff[i] = 0); | 673 | clearset(buff); |
| 674 | while (l--) { | 674 | while (l--) { |
| 675 | setchar(buff, (byte)(*s)); | 675 | setchar(buff, (byte)(*s)); |
| 676 | s++; | 676 | s++; |
| @@ -684,7 +684,7 @@ static int lp_range (lua_State *L) { | |||
| 684 | int arg; | 684 | int arg; |
| 685 | int top = lua_gettop(L); | 685 | int top = lua_gettop(L); |
| 686 | byte buff[CHARSETSIZE]; | 686 | byte buff[CHARSETSIZE]; |
| 687 | loopset(i, buff[i] = 0); | 687 | clearset(buff); |
| 688 | for (arg = 1; arg <= top; arg++) { | 688 | for (arg = 1; arg <= top; arg++) { |
| 689 | int c; | 689 | int c; |
| 690 | size_t l; | 690 | size_t l; |
| @@ -734,7 +734,7 @@ static int lp_utfr (lua_State *L) { | |||
| 734 | if (to <= 0x7f) { /* ascii range? */ | 734 | if (to <= 0x7f) { /* ascii range? */ |
| 735 | unsigned int f; | 735 | unsigned int f; |
| 736 | byte buff[CHARSETSIZE]; /* code it as a regular charset */ | 736 | byte buff[CHARSETSIZE]; /* code it as a regular charset */ |
| 737 | loopset(i, buff[i] = 0); | 737 | clearset(buff); |
| 738 | for (f = (int)from; f <= to; f++) | 738 | for (f = (int)from; f <= to; f++) |
| 739 | setchar(buff, f); | 739 | setchar(buff, f); |
| 740 | newcharset(L, buff); | 740 | newcharset(L, buff); |
| @@ -1298,7 +1298,7 @@ int lp_gc (lua_State *L) { | |||
| 1298 | static void createcat (lua_State *L, const char *catname, int (catf) (int)) { | 1298 | static void createcat (lua_State *L, const char *catname, int (catf) (int)) { |
| 1299 | int c; | 1299 | int c; |
| 1300 | byte buff[CHARSETSIZE]; | 1300 | byte buff[CHARSETSIZE]; |
| 1301 | loopset(i, buff[i] = 0); | 1301 | clearset(buff); |
| 1302 | for (c = 0; c <= UCHAR_MAX; c++) | 1302 | for (c = 0; c <= UCHAR_MAX; c++) |
| 1303 | if (catf(c)) setchar(buff, c); | 1303 | if (catf(c)) setchar(buff, c); |
| 1304 | newcharset(L, buff); | 1304 | newcharset(L, buff); |
| @@ -1373,6 +1373,7 @@ static struct luaL_Reg metareg[] = { | |||
| 1373 | 1373 | ||
| 1374 | int luaopen_lpeg (lua_State *L); | 1374 | int luaopen_lpeg (lua_State *L); |
| 1375 | int luaopen_lpeg (lua_State *L) { | 1375 | int luaopen_lpeg (lua_State *L) { |
| 1376 | printf("%ld\n", sizeof(TTree)); | ||
| 1376 | luaL_newmetatable(L, PATTERN_T); | 1377 | luaL_newmetatable(L, PATTERN_T); |
| 1377 | lua_pushnumber(L, MAXBACK); /* initialize maximum backtracking */ | 1378 | lua_pushnumber(L, MAXBACK); /* initialize maximum backtracking */ |
| 1378 | lua_setfield(L, LUA_REGISTRYINDEX, MAXSTACKIDX); | 1379 | lua_setfield(L, LUA_REGISTRYINDEX, MAXSTACKIDX); |
| @@ -10,6 +10,7 @@ | |||
| 10 | 10 | ||
| 11 | #include <assert.h> | 11 | #include <assert.h> |
| 12 | #include <limits.h> | 12 | #include <limits.h> |
| 13 | #include <string.h> | ||
| 13 | 14 | ||
| 14 | #include "lua.h" | 15 | #include "lua.h" |
| 15 | 16 | ||
| @@ -97,6 +98,9 @@ typedef struct Charset { | |||
| 97 | 98 | ||
| 98 | #define loopset(v,b) { int v; for (v = 0; v < CHARSETSIZE; v++) {b;} } | 99 | #define loopset(v,b) { int v; for (v = 0; v < CHARSETSIZE; v++) {b;} } |
| 99 | 100 | ||
| 101 | #define fillset(s,c) memset(s,c,CHARSETSIZE) | ||
| 102 | #define clearset(s) fillset(s,0) | ||
| 103 | |||
| 100 | /* access to charset */ | 104 | /* access to charset */ |
| 101 | #define treebuffer(t) ((byte *)((t) + 1)) | 105 | #define treebuffer(t) ((byte *)((t) + 1)) |
| 102 | 106 | ||
| @@ -1227,6 +1227,12 @@ do -- a few typical UTF-8 ranges | |||
| 1227 | t[5] == "cjk: " .. cjk and t[6] == "other: —" and | 1227 | t[5] == "cjk: " .. cjk and t[6] == "other: —" and |
| 1228 | t[7] == "ascii: " .. ascii and t[8] == "other: " .. last and | 1228 | t[7] == "ascii: " .. ascii and t[8] == "other: " .. last and |
| 1229 | t[9] == nil) | 1229 | t[9] == nil) |
| 1230 | |||
| 1231 | -- failing UTF-8 matches and borders | ||
| 1232 | assert(not m.match(m.utfR(10, 0x2000), "\9")) | ||
| 1233 | assert(not m.match(m.utfR(10, 0x2000), "\226\128\129")) | ||
| 1234 | assert(m.match(m.utfR(10, 0x2000), "\10") == 2) | ||
| 1235 | assert(m.match(m.utfR(10, 0x2000), "\226\128\128") == 4) | ||
| 1230 | end | 1236 | end |
| 1231 | 1237 | ||
| 1232 | 1238 | ||
