From 97a4ca3b4078f581cdc8cebc4fa4cf39d5ff8125 Mon Sep 17 00:00:00 2001 From: Roberto Ierusalimschy Date: Thu, 27 Apr 2023 11:03:30 -0300 Subject: New macros 'fillset' and 'clearset' --- lpcode.c | 2 +- lpcset.c | 8 ++++---- lpprint.c | 4 ++-- lptree.c | 9 +++++---- lptypes.h | 4 ++++ test.lua | 6 ++++++ 6 files changed, 22 insertions(+), 11 deletions(-) diff --git a/lpcode.c b/lpcode.c index 9289bd3..b68f32d 100644 --- a/lpcode.c +++ b/lpcode.c @@ -221,7 +221,7 @@ static int getfirst (TTree *tree, const Charset *follow, Charset *firstset) { } case TUTFR: { int c; - loopset(i, firstset->cs[i] = 0); /* erase all chars */ + clearset(firstset->cs); /* erase all chars */ for (c = tree->key; c <= sib1(tree)->key; c++) setchar(firstset->cs, c); return 0; diff --git a/lpcset.c b/lpcset.c index 2e62d94..2dcffd9 100644 --- a/lpcset.c +++ b/lpcset.c @@ -76,21 +76,21 @@ int tocharset (TTree *tree, Charset *cs) { switch (tree->tag) { case TChar: { /* only one char */ assert(0 <= tree->u.n && tree->u.n <= UCHAR_MAX); - loopset(i, cs->cs[i] = 0); /* erase all chars */ + clearset(cs->cs); /* erase all chars */ setchar(cs->cs, tree->u.n); /* add that one */ return 1; } case TAny: { - loopset(i, cs->cs[i] = 0xFF); /* add all characters to the set */ + fillset(cs->cs, 0xFF); /* add all characters to the set */ return 1; } case TFalse: { - loopset(i, cs->cs[i] = 0); /* empty set */ + clearset(cs->cs); /* empty set */ return 1; } case TSet: { /* fill set */ int i; - loopset(j, cs->cs[j] = tree->u.set.deflt); + fillset(cs->cs, tree->u.set.deflt); for (i = 0; i < tree->u.set.size; i++) cs->cs[tree->u.set.offset + i] = treebuffer(tree)[i]; return 1; diff --git a/lpprint.c b/lpprint.c index a432263..3e7e7f2 100644 --- a/lpprint.c +++ b/lpprint.c @@ -37,7 +37,7 @@ static void printIcharset (const Instruction *inst, const byte *buff) { byte cs[CHARSETSIZE]; int i; printf("(%02x-%d) ", inst->i.aux2.set.offset, inst->i.aux2.set.size); - loopset(j, cs[j] = 0); + clearset(cs); for (i = 0; i < CHARSETSIZE * 8; i++) { if (charinset(inst, buff, i)) setchar(cs, i); @@ -50,7 +50,7 @@ static void printTcharset (TTree *tree) { byte cs[CHARSETSIZE]; int i; printf("(%02x-%d) ", tree->u.set.offset, tree->u.set.size); - loopset(j, cs[j] = tree->u.set.deflt); + fillset(cs, tree->u.set.deflt); for (i = 0; i < tree->u.set.size; i++) cs[tree->u.set.offset + i] = treebuffer(tree)[i]; printcharset(cs); diff --git a/lptree.c b/lptree.c index f9e170b..c61a5db 100644 --- a/lptree.c +++ b/lptree.c @@ -670,7 +670,7 @@ static int lp_set (lua_State *L) { size_t l; const char *s = luaL_checklstring(L, 1, &l); byte buff[CHARSETSIZE]; - loopset(i, buff[i] = 0); + clearset(buff); while (l--) { setchar(buff, (byte)(*s)); s++; @@ -684,7 +684,7 @@ static int lp_range (lua_State *L) { int arg; int top = lua_gettop(L); byte buff[CHARSETSIZE]; - loopset(i, buff[i] = 0); + clearset(buff); for (arg = 1; arg <= top; arg++) { int c; size_t l; @@ -734,7 +734,7 @@ static int lp_utfr (lua_State *L) { if (to <= 0x7f) { /* ascii range? */ unsigned int f; byte buff[CHARSETSIZE]; /* code it as a regular charset */ - loopset(i, buff[i] = 0); + clearset(buff); for (f = (int)from; f <= to; f++) setchar(buff, f); newcharset(L, buff); @@ -1298,7 +1298,7 @@ int lp_gc (lua_State *L) { static void createcat (lua_State *L, const char *catname, int (catf) (int)) { int c; byte buff[CHARSETSIZE]; - loopset(i, buff[i] = 0); + clearset(buff); for (c = 0; c <= UCHAR_MAX; c++) if (catf(c)) setchar(buff, c); newcharset(L, buff); @@ -1373,6 +1373,7 @@ static struct luaL_Reg metareg[] = { int luaopen_lpeg (lua_State *L); int luaopen_lpeg (lua_State *L) { +printf("%ld\n", sizeof(TTree)); luaL_newmetatable(L, PATTERN_T); lua_pushnumber(L, MAXBACK); /* initialize maximum backtracking */ lua_setfield(L, LUA_REGISTRYINDEX, MAXSTACKIDX); diff --git a/lptypes.h b/lptypes.h index 7793e76..17d406e 100644 --- a/lptypes.h +++ b/lptypes.h @@ -10,6 +10,7 @@ #include #include +#include #include "lua.h" @@ -97,6 +98,9 @@ typedef struct Charset { #define loopset(v,b) { int v; for (v = 0; v < CHARSETSIZE; v++) {b;} } +#define fillset(s,c) memset(s,c,CHARSETSIZE) +#define clearset(s) fillset(s,0) + /* access to charset */ #define treebuffer(t) ((byte *)((t) + 1)) diff --git a/test.lua b/test.lua index 9f8d226..d31a69f 100755 --- a/test.lua +++ b/test.lua @@ -1227,6 +1227,12 @@ do -- a few typical UTF-8 ranges t[5] == "cjk: " .. cjk and t[6] == "other: —" and t[7] == "ascii: " .. ascii and t[8] == "other: " .. last and t[9] == nil) + + -- failing UTF-8 matches and borders + assert(not m.match(m.utfR(10, 0x2000), "\9")) + assert(not m.match(m.utfR(10, 0x2000), "\226\128\129")) + assert(m.match(m.utfR(10, 0x2000), "\10") == 2) + assert(m.match(m.utfR(10, 0x2000), "\226\128\128") == 4) end -- cgit v1.2.3-55-g6feb