From fd28f9d9e54f33bf7ae3a5e12dc71478f9c91aea Mon Sep 17 00:00:00 2001 From: Sergio Queiroz Date: Thu, 10 Nov 2016 16:26:11 -0300 Subject: Removing labeled choice, updating testlabel, and disabling an optmization related to Throw --- lpcode.c | 39 ++--- lpprint.c | 4 +- lptree.c | 23 +-- lptree.h | 2 +- lpvm.c | 11 -- lpvm.h | 1 - relabel.lua | 457 --------------------------------------------------------- relabelrec.lua | 396 +++++++++++++++++++++++++++++++++++++++++++++++++ test.lua | 2 +- testlabel.lua | 453 ++++++++++++++++++++++++++++++++++++++------------------ 10 files changed, 724 insertions(+), 664 deletions(-) delete mode 100644 relabel.lua create mode 100644 relabelrec.lua diff --git a/lpcode.c b/lpcode.c index d5f8d68..b2dbba2 100644 --- a/lpcode.c +++ b/lpcode.c @@ -196,7 +196,7 @@ int checkaux (TTree *tree, int pred) { if (checkaux(sib2(tree), pred)) return 1; /* else return checkaux(sib1(tree), pred); */ tree = sib1(tree); goto tailcall; - case TLabChoice: case TRecov: /* labeled failure */ + case TRecov: /* labeled failure */ /* we do not know whether sib2 will be evaluated */ tree = sib1(tree); goto tailcall; case TCapture: case TGrammar: case TRule: @@ -218,10 +218,10 @@ int fixedlenx (TTree *tree, int count, int len) { switch (tree->tag) { case TChar: case TSet: case TAny: return len + 1; - case TFalse: case TTrue: case TNot: case TAnd: case TBehind: - case TThrow: /* labeled failure */ + case TFalse: case TTrue: case TNot: case TAnd: case TBehind: return len; case TRep: case TRunTime: case TOpenCall: + case TThrow: /* labeled failure */ return -1; case TCapture: case TRule: case TGrammar: /* return fixedlenx(sib1(tree), count); */ @@ -237,7 +237,7 @@ int fixedlenx (TTree *tree, int count, int len) { /* else return fixedlenx(sib2(tree), count, len); */ tree = sib2(tree); goto tailcall; } - case TChoice: case TLabChoice: { /* labeled failure */ + case TChoice: { int n1, n2; n1 = fixedlenx(sib1(tree), count, len); if (n1 < 0) return -1; @@ -287,7 +287,7 @@ static int getfirst (TTree *tree, const Charset *follow, Charset *firstset) { loopset(i, firstset->cs[i] = follow->cs[i]); /* follow = fullset(?) */ return 1; } - case TChoice: case TLabChoice: { /*(?) labeled failure */ + case TChoice: { Charset csaux; int e1 = getfirst(sib1(tree), follow, firstset); int e2 = getfirst(sib2(tree), follow, &csaux); @@ -378,7 +378,7 @@ static int headfail (TTree *tree) { if (!nofail(sib2(tree))) return 0; /* else return headfail(sib1(tree)); */ tree = sib1(tree); goto tailcall; - case TChoice: case TLabChoice: case TRecov: /* labeled failure */ + case TChoice: case TRecov: /* labeled failure */ if (!headfail(sib1(tree))) return 0; /* else return headfail(sib2(tree)); */ tree = sib2(tree); goto tailcall; @@ -398,7 +398,7 @@ static int needfollow (TTree *tree) { case TChar: case TSet: case TAny: case TFalse: case TTrue: case TAnd: case TNot: case TRunTime: case TGrammar: case TCall: case TBehind: - case TThrow: case TLabChoice: case TRecov: /* (?)labeled failure */ + case TThrow: case TRecov: /* (?)labeled failure */ return 0; case TChoice: case TRep: return 1; @@ -433,7 +433,7 @@ int sizei (const Instruction *i) { return 2; case IThrow: /* labeled failure */ return 1; - case ILabChoice: case IRecov: + case IRecov: return (CHARSETINSTSIZE - 1) + 2; /* labeled failure */ default: return 1; } @@ -499,7 +499,7 @@ static int addoffsetinst (CompileState *compst, Opcode op) { int i = addinstruction(compst, op, 0); /* instruction */ addinstruction(compst, (Opcode)0, 0); /* open space for offset */ assert(op == ITestSet || sizei(&getinstr(compst, i)) == 2 || - op == IRecov || op == ILabChoice); /* labeled failure */ + op == IRecov); /* labeled failure */ return i; } @@ -707,21 +707,6 @@ static void codechoice (CompileState *compst, TTree *p1, TTree *p2, int opt, /* labeled failure begin */ -static void codelabchoice (CompileState *compst, TTree *p1, TTree *p2, int opt, - const Charset *fl, const byte *cs) { - int emptyp2 = (p2->tag == TTrue); - int pcommit; - int test = NOINST; - int pchoice = addoffsetinst(compst, ILabChoice); - addcharset(compst, cs); - codegen(compst, p1, emptyp2, test, fullset); - pcommit = addoffsetinst(compst, ICommit); - jumptohere(compst, pchoice); - jumptohere(compst, test); - codegen(compst, p2, opt, NOINST, fl); - jumptohere(compst, pcommit); -} - static void coderecovery (CompileState *compst, TTree *p1, TTree *p2, int opt, const Charset *fl, const byte *cs) { int emptyp2 = (p2->tag == TTrue); @@ -970,10 +955,6 @@ static void codegen (CompileState *compst, TTree *tree, int opt, int tt, addinstruction(compst, IThrow, (byte) tree->u.label); break; } - case TLabChoice: { /* labeled failure */ - codelabchoice(compst, sib1(tree), sib2(tree), opt, fl, treelabelset(tree)); - break; - } case TRecov: { /* labeled failure */ coderecovery(compst, sib1(tree), sib2(tree), opt, fl, treelabelset(tree)); break; @@ -1000,7 +981,7 @@ static void peephole (CompileState *compst) { switch (code[i].i.code) { case IChoice: case ICall: case ICommit: case IPartialCommit: case IBackCommit: case ITestChar: case ITestSet: - case ILabChoice: case IRecov: /* labeled failure */ + case IRecov: /* labeled failure */ case ITestAny: { /* instructions with labels */ jumptothere(compst, i, finallabel(code, i)); /* optimize label */ break; diff --git a/lpprint.c b/lpprint.c index 122d2e5..ff69a6a 100644 --- a/lpprint.c +++ b/lpprint.c @@ -112,7 +112,7 @@ void printinst (const Instruction *op, const Instruction *p) { printf("%d", p->i.aux); break; } - case ILabChoice: case IRecov: { /* labeled failure */ + case IRecov: { /* labeled failure */ printjmp(op, p); printcharset((p+2)->buff); break; @@ -223,7 +223,7 @@ void printtree (TTree *tree, int ident) { default: { int sibs = numsiblings[tree->tag]; printf("\n"); - if (tree->tag == TLabChoice || tree->tag == TRecov) { /* labeled failure */ + if (tree->tag == TRecov) { /* labeled failure */ printcharset(treelabelset(tree)); printf("\n"); } diff --git a/lptree.c b/lptree.c index 700398b..9861cfe 100644 --- a/lptree.c +++ b/lptree.c @@ -723,37 +723,25 @@ static int lp_behind (lua_State *L) { */ static int lp_throw (lua_State *L) { int label = luaL_checkinteger(L, -1); - luaL_argcheck(L, label >= 0 && label < MAXLABELS, -1, "the number of a label must be between 0 and 255"); + luaL_argcheck(L, label >= 1 && label < MAXLABELS, -1, "the number of a label must be between 1 and 255"); newthrowleaf(L, label); return 1; } /* -** labeled choice function +** labeled recovery function */ -static int lp_labchoice (lua_State *L) { - int n = lua_gettop(L); - TTree *tree = newrootlab2sib(L, TLabChoice); - int i; - for (i = 3; i <= n; i++) { - int d = luaL_checkinteger(L, i); - luaL_argcheck(L, d >= 0 && d < MAXLABELS, i, "the number of a label must be between 0 and 255"); - setlabel(treelabelset(tree), (byte)d); - } - return 1; -} - - static int lp_recovery (lua_State *L) { int n = lua_gettop(L); TTree *tree = newrootlab2sib(L, TRecov); + luaL_argcheck(L, n >= 3, 3, "non-nil value expected"); if (n == 2) { /* catches fail as default */ /*setlabel(treelabelset(tree), LFAIL); recovery does not catch regular fail */ } else { int i; for (i = 3; i <= n; i++) { int d = luaL_checkinteger(L, i); - luaL_argcheck(L, d >= 0 && d < MAXLABELS, i, "the number of a label must be between 0 and 255"); + luaL_argcheck(L, d >= 1 && d < MAXLABELS, i, "the number of a label must be between 1 and 255"); setlabel(treelabelset(tree), (byte)d); } } @@ -1089,7 +1077,7 @@ static int verifyrule (lua_State *L, TTree *tree, int *passed, int npassed, return nb; /* else return verifyrule(L, sib2(tree), passed, npassed, nb); */ tree = sib2(tree); goto tailcall; - case TChoice: case TLabChoice: case TRecov: /* must check both children */ /* labeled failure */ + case TChoice: case TRecov: /* must check both children */ /* labeled failure */ nb = verifyrule(L, sib1(tree), passed, npassed, nb); /* return verifyrule(L, sib2(tree), passed, npassed, nb); */ tree = sib2(tree); goto tailcall; @@ -1342,7 +1330,6 @@ static struct luaL_Reg pattreg[] = { {"setmaxstack", lp_setmax}, {"type", lp_type}, {"T", lp_throw}, /* labeled failure throw */ - {"Lc", lp_labchoice}, /* labeled failure choice */ {"Rec", lp_recovery}, /* labeled failure choice */ {NULL, NULL} }; diff --git a/lptree.h b/lptree.h index cca346e..b75f323 100644 --- a/lptree.h +++ b/lptree.h @@ -25,7 +25,7 @@ typedef enum TTag { TBehind, /* match behind */ TCapture, /* regular capture */ TRunTime, /* run-time capture */ - TThrow, TLabChoice, TRecov /* labeled failure */ + TThrow, TRecov /* labeled failure */ } TTag; /* number of siblings for each tree */ diff --git a/lpvm.c b/lpvm.c index b200dce..122c8f4 100644 --- a/lpvm.c +++ b/lpvm.c @@ -273,17 +273,6 @@ const char *match (lua_State *L, const char *o, const char *s, const char *e, stack++; p += 2; continue; - } - case ILabChoice: { /* labeled failure */ - if (stack == stacklimit) - stack = doublestack(L, &stacklimit, ptop); - stack->p = p + getoffset(p); - stack->s = s; - stack->ls = (const Labelset *) ((p + 2)->buff); - stack->caplevel = captop; - stack++; - p += (CHARSETINSTSIZE - 1) + 2; - continue; } case IRecov: { /* labeled failure */ if (stack == stacklimit) diff --git a/lpvm.h b/lpvm.h index 3c27027..bcfc22f 100644 --- a/lpvm.h +++ b/lpvm.h @@ -35,7 +35,6 @@ typedef enum Opcode { ICloseCapture, ICloseRunTime, IThrow, /* "fails" with a specific label labeled failure */ - ILabChoice, /* labeled choice */ IRecov /* stack a recovery; next fail with label 'f' will jump to 'offset' */ } Opcode; diff --git a/relabel.lua b/relabel.lua deleted file mode 100644 index 0e7195e..0000000 --- a/relabel.lua +++ /dev/null @@ -1,457 +0,0 @@ --- $Id: re.lua,v 1.44 2013/03/26 20:11:40 roberto Exp $ - --- imported functions and modules -local tonumber, type, print, error, ipairs = tonumber, type, print, error, ipairs -local pcall = pcall -local setmetatable = setmetatable -local unpack, tinsert, concat = table.unpack or unpack, table.insert, table.concat -local rep = string.rep -local m = require"lpeglabelrec" - --- 'm' will be used to parse expressions, and 'mm' will be used to --- create expressions; that is, 're' runs on 'm', creating patterns --- on 'mm' -local mm = m - --- pattern's metatable -local mt = getmetatable(mm.P(0)) - - - --- No more global accesses after this point -local version = _VERSION -if version == "Lua 5.2" then _ENV = nil end - - -local any = m.P(1) -local dummy = mm.P(false) - - -local errinfo = { - {"NoPatt", "no pattern found"}, - {"ExtraChars", "unexpected characters after the pattern"}, - - {"ExpPatt1", "expected a pattern after '/' or the label(s)"}, - - {"ExpPatt2", "expected a pattern after '&'"}, - {"ExpPatt3", "expected a pattern after '!'"}, - - {"ExpPatt4", "expected a pattern after '('"}, - {"ExpPatt5", "expected a pattern after ':'"}, - {"ExpPatt6", "expected a pattern after '{~'"}, - {"ExpPatt7", "expected a pattern after '{|'"}, - - {"ExpPatt8", "expected a pattern after '<-'"}, - - {"ExpPattOrClose", "expected a pattern or closing '}' after '{'"}, - - {"ExpNum", "expected a number after '^', '+' or '-' (no space)"}, - {"ExpCap", "expected a string, number, '{}' or name after '->'"}, - - {"ExpName1", "expected the name of a rule after '=>'"}, - {"ExpName2", "expected the name of a rule after '=' (no space)"}, - {"ExpName3", "expected the name of a rule after '<' (no space)"}, - - {"ExpLab1", "expected at least one label after '{'"}, - {"ExpLab2", "expected a label after the comma"}, - - {"ExpNameOrLab", "expected a name or label after '%' (no space)"}, - - {"ExpItem", "expected at least one item after '[' or '^'"}, - - {"MisClose1", "missing closing ')'"}, - {"MisClose2", "missing closing ':}'"}, - {"MisClose3", "missing closing '~}'"}, - {"MisClose4", "missing closing '|}'"}, - {"MisClose5", "missing closing '}'"}, -- for the captures - - {"MisClose6", "missing closing '>'"}, - {"MisClose7", "missing closing '}'"}, -- for the labels - - {"MisClose8", "missing closing ']'"}, - - {"MisTerm1", "missing terminating single quote"}, - {"MisTerm2", "missing terminating double quote"}, -} - -local errmsgs = {} -local labels = {} - -for i, err in ipairs(errinfo) do - errmsgs[i] = err[2] - labels[err[1]] = i -end - -local syntaxerrs = {} - -local function expect (pattern, labelname) - local label = labels[labelname] - local record = function (input, pos) - tinsert(syntaxerrs, { label = label, pos = pos }) - return true - end - return pattern + m.Cmt("", record) * m.T(label) -end - - --- Pre-defined names -local Predef = { nl = m.P"\n" } -local tlabels = {} - - -local mem -local fmem -local gmem - - -local function updatelocale () - mm.locale(Predef) - Predef.a = Predef.alpha - Predef.c = Predef.cntrl - Predef.d = Predef.digit - Predef.g = Predef.graph - Predef.l = Predef.lower - Predef.p = Predef.punct - Predef.s = Predef.space - Predef.u = Predef.upper - Predef.w = Predef.alnum - Predef.x = Predef.xdigit - Predef.A = any - Predef.a - Predef.C = any - Predef.c - Predef.D = any - Predef.d - Predef.G = any - Predef.g - Predef.L = any - Predef.l - Predef.P = any - Predef.p - Predef.S = any - Predef.s - Predef.U = any - Predef.u - Predef.W = any - Predef.w - Predef.X = any - Predef.x - mem = {} -- restart memoization - fmem = {} - gmem = {} - local mt = {__mode = "v"} - setmetatable(mem, mt) - setmetatable(fmem, mt) - setmetatable(gmem, mt) -end - - -updatelocale() - - - -local I = m.P(function (s,i) print(i, s:sub(1, i-1)); return i end) - - -local function getdef (id, defs) - local c = defs and defs[id] - if not c then - error("undefined name: " .. id) - end - return c -end - - -local function mult (p, n) - local np = mm.P(true) - while n >= 1 do - if n%2 >= 1 then np = np * p end - p = p * p - n = n/2 - end - return np -end - -local function equalcap (s, i, c) - if type(c) ~= "string" then return nil end - local e = #c + i - if s:sub(i, e - 1) == c then return e else return nil end -end - - -local S = (Predef.space + "--" * (any - Predef.nl)^0)^0 - -local name = m.C(m.R("AZ", "az", "__") * m.R("AZ", "az", "__", "09")^0) - -local arrow = S * "<-" - --- a defined name only have meaning in a given environment -local Def = name * m.Carg(1) - -local num = m.C(m.R"09"^1) * S / tonumber - -local String = "'" * m.C((any - "'" - m.P"\n")^0) * expect("'", "MisTerm1") - + '"' * m.C((any - '"' - m.P"\n")^0) * expect('"', "MisTerm2") - - -local defined = "%" * Def / function (c,Defs) - local cat = Defs and Defs[c] or Predef[c] - if not cat then - error("name '" .. c .. "' undefined") - end - return cat -end - -local Range = m.Cs(any * (m.P"-"/"") * (any - "]")) / mm.R - -local item = defined + Range + m.C(any - m.P"\n") - -local Class = - "[" - * (m.C(m.P"^"^-1)) -- optional complement symbol - * m.Cf(expect(item, "ExpItem") * (item - "]")^0, mt.__add) - / function (c, p) return c == "^" and any - p or p end - * expect("]", "MisClose8") - -local function adddef (t, k, exp) - if t[k] then - error("'"..k.."' already defined as a rule") - else - t[k] = exp - end - return t -end - -local function firstdef (n, r) return adddef({n}, n, r) end - - -local function NT (n, b) - if not b then - error("rule '"..n.."' used outside a grammar") - else return mm.V(n) - end -end - -local function labchoice (...) - local t = { ... } - local n = #t - local p = t[1] - local i = 2 - while i + 1 <= n do - -- t[i] == nil when there are no labels - p = t[i] and mm.Lc(p, t[i+1], unpack(t[i])) or mt.__add(p, t[i+1]) - i = i + 2 - end - - return p -end - --- error recovery -local skip = m.P { "Skip", - Skip = (-m.P"/" * -m.P(name * arrow) * m.V"Ignored")^0 * m.Cc(dummy); - Ignored = m.V"Group" + any; - Group = "(" * (-m.P")" * m.V"Ignored")^0 * ")" - + "{" * (-m.P"}" * m.V"Ignored")^0 * "}" - + "[" * (-m.P"]" * m.V"Ignored")^0 * "]" - + "'" * (-m.P"'" * m.V"Ignored")^0 * "'" - + '"' * (-m.P'"' * m.V"Ignored")^0 * '"'; -} - -local ignore = m.Cmt(any, function (input, pos) - return syntaxerrs[#syntaxerrs].pos, dummy -end) - -local pointAtStart = m.Cmt(any, function (input, pos) - -- like ignore but makes the last syntax error point at the start - local ret = syntaxerrs[#syntaxerrs].pos - syntaxerrs[#syntaxerrs].pos = pos-1 - return ret, dummy -end) - - -local function labify (labelnames) - for i, l in ipairs(labelnames) do - labelnames[i] = labels[l] - end - return labelnames -end - -local labelset1 = labify { - "ExpPatt2", "ExpPatt3", - "ExpPatt4", "ExpPatt5", "ExpPatt6", "ExpPatt7", - "ExpPatt8", "ExpPattOrClose", - "ExpNum", "ExpCap", - "ExpName1", "ExpName2", "ExpName3", - "ExpNameOrLab", "ExpItem", - "MisClose6", "MisClose7" -} - -local labelset2 = labify { - "MisClose1", "MisClose2", "MisClose3", "MisClose4", "MisClose5" -} - -local labelset3 = labify { - "ExpPatt1", "ExpLab1", "ExpLab2", "MisClose7" -} - -local exp = m.P{ "Exp", - Exp = S * ( m.V"Grammar" - + (m.V"RecovSeq" * (S * "/" * m.Lc((m.Ct(m.V"Labels") + m.Cc(nil)) - * expect(S * m.V"RecovSeq", - "ExpPatt1"), - m.Cc(nil) * skip, - unpack(labelset3)) - )^0 - ) / labchoice); - Labels = m.P"{" * expect(S * m.V"Label", "ExpLab1") - * (S * "," * expect(S * m.V"Label", "ExpLab2"))^0 - * expect(S * "}", "MisClose7"); - RecovSeq = m.Lc(m.V"Seq", skip, unpack(labelset1)); - Seq = m.Cf(m.Cc(m.P"") * m.V"Prefix" * (S * m.V"Prefix")^0, mt.__mul); - Prefix = "&" * expect(S * m.V"Prefix", "ExpPatt2") / mt.__len - + "!" * expect(S * m.V"Prefix", "ExpPatt3") / mt.__unm - + m.V"Suffix"; - Suffix = m.Cf(m.V"RecovPrimary" * - ( S * ( m.P"+" * m.Cc(1, mt.__pow) - + m.P"*" * m.Cc(0, mt.__pow) - + m.P"?" * m.Cc(-1, mt.__pow) - + "^" * expect( m.Cg(num * m.Cc(mult)) - + m.Cg(m.C(m.S"+-" * m.R"09"^1) * m.Cc(mt.__pow) - ), - "ExpNum") - + "->" * expect(S * ( m.Cg((String + num) * m.Cc(mt.__div)) - + m.P"{}" * m.Cc(nil, m.Ct) - + m.Cg(Def / getdef * m.Cc(mt.__div)) - ), - "ExpCap") - + "=>" * expect(S * m.Cg(Def / getdef * m.Cc(m.Cmt)), - "ExpName1") - ) - )^0, function (a,b,f) return f(a,b) end ); - RecovPrimary = m.Lc(m.V"Primary", ignore, unpack(labelset2)); - Primary = "(" * expect(m.V"Exp", "ExpPatt4") * expect(S * ")", "MisClose1") - + m.Lc(String / mm.P, pointAtStart, - labels["MisTerm1"], labels["MisTerm2"]) - + m.Lc(Class, pointAtStart, labels["MisClose8"]) - + defined - + "%" * expect(m.V"Labels", "ExpNameOrLab") / mm.T - + "{:" * (name * ":" + m.Cc(nil)) * expect(m.V"Exp", "ExpPatt5") - * expect(S * ":}", "MisClose2") - / function (n, p) return mm.Cg(p, n) end - + "=" * expect(name, "ExpName2") - / function (n) return mm.Cmt(mm.Cb(n), equalcap) end - + m.P"{}" / mm.Cp - + "{~" * expect(m.V"Exp", "ExpPatt6") - * expect(S * "~}", "MisClose3") / mm.Cs - + "{|" * expect(m.V"Exp", "ExpPatt7") - * expect(S * "|}", "MisClose4") / mm.Ct - + "{" * expect(m.V"Exp", "ExpPattOrClose") - * expect(S * "}", "MisClose5") / mm.C - + m.P"." * m.Cc(any) - + (name * -arrow + "<" * expect(name, "ExpName3") - * expect(">", "MisClose6")) * m.Cb("G") / NT; - Label = num + name / function (f) return tlabels[f] end; - Definition = name * arrow * expect(m.V"Exp", "ExpPatt8"); - Grammar = m.Cg(m.Cc(true), "G") - * m.Cf(m.V"Definition" / firstdef * (S * m.Cg(m.V"Definition"))^0, - adddef) / mm.P; -} - -local pattern = S * m.Cg(m.Cc(false), "G") * expect(exp, "NoPatt") / mm.P - * S * expect(-any, "ExtraChars") - -local function lineno (s, i) - if i == 1 then return 1, 1 end - local adjustment = 0 - -- report the current line if at end of line, not the next - if s:sub(i,i) == '\n' then - i = i-1 - adjustment = 1 - end - local rest, num = s:sub(1,i):gsub("[^\n]*\n", "") - local r = #rest - return 1 + num, (r ~= 0 and r or 1) + adjustment -end - -local function splitlines(str) - local t = {} - local function helper(line) tinsert(t, line) return "" end - helper((str:gsub("(.-)\r?\n", helper))) - return t -end - -local function compile (p, defs) - if mm.type(p) == "pattern" then return p end -- already compiled - p = p .. " " -- for better reporting of column numbers in errors when at EOF - local ok, cp, label, suffix = pcall(function() return pattern:match(p, 1, defs) end) - if not ok and #syntaxerrs == 0 then - if type(cp) == "string" then - cp = cp:gsub("^[^:]+:[^:]+: ", "") - end - error(cp, 3) - end - if #syntaxerrs > 0 then - local lines = splitlines(p) - local errors = {} - for i, err in ipairs(syntaxerrs) do - local line, col = lineno(p, err.pos) - tinsert(errors, "L" .. line .. ":C" .. col .. ": " .. errmsgs[err.label]) - tinsert(errors, lines[line]) - tinsert(errors, rep(" ", col-1) .. "^") - end - syntaxerrs = {} - error("syntax error(s) in pattern\n" .. concat(errors, "\n"), 3) - end - return cp -end - -local function match (s, p, i) - local cp = mem[p] - if not cp then - cp = compile(p) - mem[p] = cp - end - return cp:match(s, i or 1) -end - -local function find (s, p, i) - local cp = fmem[p] - if not cp then - cp = compile(p) / 0 - cp = mm.P{ mm.Cp() * cp * mm.Cp() + 1 * mm.V(1) } - fmem[p] = cp - end - local i, e = cp:match(s, i or 1) - if i then return i, e - 1 - else return i - end -end - -local function gsub (s, p, rep) - local g = gmem[p] or {} -- ensure gmem[p] is not collected while here - gmem[p] = g - local cp = g[rep] - if not cp then - cp = compile(p) - cp = mm.Cs((cp / rep + 1)^0) - g[rep] = cp - end - return cp:match(s) -end - -local function setlabels (t) - tlabels = t -end - -local function calcline (s, i) - if i == 1 then return 1, 1 end - local rest, line = s:sub(1,i):gsub("[^\n]*\n", "") - local col = #rest - return 1 + line, col ~= 0 and col or 1 -end - - --- exported names -local re = { - compile = compile, - match = match, - find = find, - gsub = gsub, - updatelocale = updatelocale, - setlabels = setlabels, - calcline = calcline -} - -if version == "Lua 5.1" then _G.re = re end - -return re diff --git a/relabelrec.lua b/relabelrec.lua new file mode 100644 index 0000000..16ca7f0 --- /dev/null +++ b/relabelrec.lua @@ -0,0 +1,396 @@ +-- $Id: re.lua,v 1.44 2013/03/26 20:11:40 roberto Exp $ + +-- imported functions and modules +local tonumber, type, print, error, ipairs = tonumber, type, print, error, ipairs +local pcall = pcall +local setmetatable = setmetatable +local unpack, tinsert, concat = table.unpack or unpack, table.insert, table.concat +local rep = string.rep +local m = require"lpeglabelrec" + +-- 'm' will be used to parse expressions, and 'mm' will be used to +-- create expressions; that is, 're' runs on 'm', creating patterns +-- on 'mm' +local mm = m + +-- pattern's metatable +local mt = getmetatable(mm.P(0)) + + + +-- No more global accesses after this point +local version = _VERSION +if version == "Lua 5.2" then _ENV = nil end + + +local any = m.P(1) +local dummy = mm.P(false) + + +local errinfo = { + {"NoPatt", "no pattern found"}, + {"ExtraChars", "unexpected characters after the pattern"}, + + {"ExpPatt1", "expected a pattern after '/' or the label(s)"}, + + {"ExpPatt2", "expected a pattern after '&'"}, + {"ExpPatt3", "expected a pattern after '!'"}, + + {"ExpPatt4", "expected a pattern after '('"}, + {"ExpPatt5", "expected a pattern after ':'"}, + {"ExpPatt6", "expected a pattern after '{~'"}, + {"ExpPatt7", "expected a pattern after '{|'"}, + + {"ExpPatt8", "expected a pattern after '<-'"}, + + {"ExpPattOrClose", "expected a pattern or closing '}' after '{'"}, + + {"ExpNum", "expected a number after '^', '+' or '-' (no space)"}, + {"ExpCap", "expected a string, number, '{}' or name after '->'"}, + + {"ExpName1", "expected the name of a rule after '=>'"}, + {"ExpName2", "expected the name of a rule after '=' (no space)"}, + {"ExpName3", "expected the name of a rule after '<' (no space)"}, + + {"ExpLab1", "expected at least one label after '{'"}, + {"ExpLab2", "expected a label after the comma"}, + + {"ExpNameOrLab", "expected a name or label after '%' (no space)"}, + + {"ExpItem", "expected at least one item after '[' or '^'"}, + + {"MisClose1", "missing closing ')'"}, + {"MisClose2", "missing closing ':}'"}, + {"MisClose3", "missing closing '~}'"}, + {"MisClose4", "missing closing '|}'"}, + {"MisClose5", "missing closing '}'"}, -- for the captures + + {"MisClose6", "missing closing '>'"}, + {"MisClose7", "missing closing '}'"}, -- for the labels + + {"MisClose8", "missing closing ']'"}, + + {"MisTerm1", "missing terminating single quote"}, + {"MisTerm2", "missing terminating double quote"}, +} + +local errmsgs = {} +local labels = {} + +for i, err in ipairs(errinfo) do + errmsgs[i] = err[2] + labels[err[1]] = i +end + +local function expect (pattern, labelname) + local label = labels[labelname] + return pattern + m.T(label) +end + + +-- Pre-defined names +local Predef = { nl = m.P"\n" } +local tlabels = {} + + +local mem +local fmem +local gmem + + +local function updatelocale () + mm.locale(Predef) + Predef.a = Predef.alpha + Predef.c = Predef.cntrl + Predef.d = Predef.digit + Predef.g = Predef.graph + Predef.l = Predef.lower + Predef.p = Predef.punct + Predef.s = Predef.space + Predef.u = Predef.upper + Predef.w = Predef.alnum + Predef.x = Predef.xdigit + Predef.A = any - Predef.a + Predef.C = any - Predef.c + Predef.D = any - Predef.d + Predef.G = any - Predef.g + Predef.L = any - Predef.l + Predef.P = any - Predef.p + Predef.S = any - Predef.s + Predef.U = any - Predef.u + Predef.W = any - Predef.w + Predef.X = any - Predef.x + mem = {} -- restart memoization + fmem = {} + gmem = {} + local mt = {__mode = "v"} + setmetatable(mem, mt) + setmetatable(fmem, mt) + setmetatable(gmem, mt) +end + + +updatelocale() + + + +local I = m.P(function (s,i) print(i, s:sub(1, i-1)); return i end) + + +local function getdef (id, defs) + local c = defs and defs[id] + if not c then + error("undefined name: " .. id) + end + return c +end + + +local function mult (p, n) + local np = mm.P(true) + while n >= 1 do + if n%2 >= 1 then np = np * p end + p = p * p + n = n/2 + end + return np +end + +local function equalcap (s, i, c) + if type(c) ~= "string" then return nil end + local e = #c + i + if s:sub(i, e - 1) == c then return e else return nil end +end + + +local S = (Predef.space + "--" * (any - Predef.nl)^0)^0 + +local name = m.C(m.R("AZ", "az", "__") * m.R("AZ", "az", "__", "09")^0) + +local arrow = S * "<-" + +-- a defined name only have meaning in a given environment +local Def = name * m.Carg(1) + +local num = m.C(m.R"09"^1) * S / tonumber + +local String = "'" * m.C((any - "'" - m.P"\n")^0) * expect("'", "MisTerm1") + + '"' * m.C((any - '"' - m.P"\n")^0) * expect('"', "MisTerm2") + + +local defined = "%" * Def / function (c,Defs) + local cat = Defs and Defs[c] or Predef[c] + if not cat then + error("name '" .. c .. "' undefined") + end + return cat +end + +local Range = m.Cs(any * (m.P"-"/"") * (any - "]")) / mm.R + +local item = defined + Range + m.C(any - m.P"\n") + +local Class = + "[" + * (m.C(m.P"^"^-1)) -- optional complement symbol + * m.Cf(expect(item, "ExpItem") * (item - "]")^0, mt.__add) + / function (c, p) return c == "^" and any - p or p end + * expect("]", "MisClose8") + +local function adddef (t, k, exp) + if t[k] then + error("'"..k.."' already defined as a rule") + else + t[k] = exp + end + return t +end + +local function firstdef (n, r) return adddef({n}, n, r) end + + +local function NT (n, b) + if not b then + error("rule '"..n.."' used outside a grammar") + else return mm.V(n) + end +end + +local function choicerec (...) + local t = { ... } + local n = #t + local p = t[1] + local i = 2 + while i + 1 <= n do + -- t[i] == nil when there are no labels + p = t[i] and mm.Rec(p, t[i+1], unpack(t[i])) or mt.__add(p, t[i+1]) + i = i + 2 + end + + return p +end + +local exp = m.P{ "Exp", + Exp = S * ( m.V"Grammar" + + (m.V"Seq" * (S * (("//" * m.Ct(m.V"Labels")) + ("/" * m.Cc(nil))) + * expect(S * m.V"Seq", "ExpPatt1") + )^0 + ) / choicerec); + Labels = m.P"{" * expect(S * m.V"Label", "ExpLab1") + * (S * "," * expect(S * m.V"Label", "ExpLab2"))^0 + * expect(S * "}", "MisClose7"); + Seq = m.Cf(m.Cc(m.P"") * m.V"Prefix" * (S * m.V"Prefix")^0, mt.__mul); + Prefix = "&" * expect(S * m.V"Prefix", "ExpPatt2") / mt.__len + + "!" * expect(S * m.V"Prefix", "ExpPatt3") / mt.__unm + + m.V"Suffix"; + Suffix = m.Cf(m.V"Primary" * + ( S * ( m.P"+" * m.Cc(1, mt.__pow) + + m.P"*" * m.Cc(0, mt.__pow) + + m.P"?" * m.Cc(-1, mt.__pow) + + "^" * expect( m.Cg(num * m.Cc(mult)) + + m.Cg(m.C(m.S"+-" * m.R"09"^1) * m.Cc(mt.__pow) + ), + "ExpNum") + + "->" * expect(S * ( m.Cg((String + num) * m.Cc(mt.__div)) + + m.P"{}" * m.Cc(nil, m.Ct) + + m.Cg(Def / getdef * m.Cc(mt.__div)) + ), + "ExpCap") + + "=>" * expect(S * m.Cg(Def / getdef * m.Cc(m.Cmt)), + "ExpName1") + ) + )^0, function (a,b,f) return f(a,b) end ); + Primary = "(" * expect(m.V"Exp", "ExpPatt4") * expect(S * ")", "MisClose1") + + String / mm.P + + Class + + defined + + "%" * expect(m.P"{", "ExpNameOrLab") + * expect(S * m.V"Label", "ExpLab1") + * expect(S * "}", "MisClose7") / mm.T + + "{:" * (name * ":" + m.Cc(nil)) * expect(m.V"Exp", "ExpPatt5") + * expect(S * ":}", "MisClose2") + / function (n, p) return mm.Cg(p, n) end + + "=" * expect(name, "ExpName2") + / function (n) return mm.Cmt(mm.Cb(n), equalcap) end + + m.P"{}" / mm.Cp + + "{~" * expect(m.V"Exp", "ExpPatt6") + * expect(S * "~}", "MisClose3") / mm.Cs + + "{|" * expect(m.V"Exp", "ExpPatt7") + * expect(S * "|}", "MisClose4") / mm.Ct + + "{" * expect(m.V"Exp", "ExpPattOrClose") + * expect(S * "}", "MisClose5") / mm.C + + m.P"." * m.Cc(any) + + (name * -arrow + "<" * expect(name, "ExpName3") + * expect(">", "MisClose6")) * m.Cb("G") / NT; + Label = num + name / function (f) return tlabels[f] end; + Definition = name * arrow * expect(m.V"Exp", "ExpPatt8"); + Grammar = m.Cg(m.Cc(true), "G") + * m.Cf(m.V"Definition" / firstdef * (S * m.Cg(m.V"Definition"))^0, + adddef) / mm.P; +} + +local pattern = S * m.Cg(m.Cc(false), "G") * expect(exp, "NoPatt") / mm.P + * S * expect(-any, "ExtraChars") + +local function lineno (s, i) + if i == 1 then return 1, 1 end + local adjustment = 0 + -- report the current line if at end of line, not the next + if s:sub(i,i) == '\n' then + i = i-1 + adjustment = 1 + end + local rest, num = s:sub(1,i):gsub("[^\n]*\n", "") + local r = #rest + return 1 + num, (r ~= 0 and r or 1) + adjustment +end + +local function splitlines(str) + local t = {} + local function helper(line) tinsert(t, line) return "" end + helper((str:gsub("(.-)\r?\n", helper))) + return t +end + +local function compile (p, defs) + if mm.type(p) == "pattern" then return p end -- already compiled + p = p .. " " -- for better reporting of column numbers in errors when at EOF + local ok, cp, label, suffix = pcall(function() return pattern:match(p, 1, defs) end) + if not ok and cp then + if type(cp) == "string" then + cp = cp:gsub("^[^:]+:[^:]+: ", "") + end + error(cp, 3) + end + if not cp then + local lines = splitlines(p) + local line, col = lineno(p, #p - #suffix + 1) + local err = {} + tinsert(err, "L" .. line .. ":C" .. col .. ": " .. errmsgs[label]) + tinsert(err, lines[line]) + tinsert(err, rep(" ", col-1) .. "^") + error("syntax error(s) in pattern\n" .. concat(err, "\n"), 3) + end + return cp +end + +local function match (s, p, i) + local cp = mem[p] + if not cp then + cp = compile(p) + mem[p] = cp + end + return cp:match(s, i or 1) +end + +local function find (s, p, i) + local cp = fmem[p] + if not cp then + cp = compile(p) / 0 + cp = mm.P{ mm.Cp() * cp * mm.Cp() + 1 * mm.V(1) } + fmem[p] = cp + end + local i, e = cp:match(s, i or 1) + if i then return i, e - 1 + else return i + end +end + +local function gsub (s, p, rep) + local g = gmem[p] or {} -- ensure gmem[p] is not collected while here + gmem[p] = g + local cp = g[rep] + if not cp then + cp = compile(p) + cp = mm.Cs((cp / rep + 1)^0) + g[rep] = cp + end + return cp:match(s) +end + +local function setlabels (t) + tlabels = t +end + +local function calcline (s, i) + if i == 1 then return 1, 1 end + local rest, line = s:sub(1,i):gsub("[^\n]*\n", "") + local col = #rest + return 1 + line, col ~= 0 and col or 1 +end + + +-- exported names +local re = { + compile = compile, + match = match, + find = find, + gsub = gsub, + updatelocale = updatelocale, + setlabels = setlabels, + calcline = calcline +} + +if version == "Lua 5.1" then _G.re = re end + +return re diff --git a/test.lua b/test.lua index fc2b607..18ab20f 100755 --- a/test.lua +++ b/test.lua @@ -1110,7 +1110,7 @@ checkeq(t, {'a', 'aa', 20, 'a', 'aaa', 'aaa'}) -- Tests for 're' module ------------------------------------------------------------------- -local re = require "relabel" +local re = require "relabelrec" local match, compile = re.match, re.compile diff --git a/testlabel.lua b/testlabel.lua index 25036b2..cb92657 100644 --- a/testlabel.lua +++ b/testlabel.lua @@ -11,6 +11,15 @@ local function checkeqlab (x, ...) end end +local function checkeq (x, y, p) +if p then print(x,y) end + if type(x) ~= "table" then assert(x == y) + else + for k,v in pairs(x) do checkeq(v, y[k], p) end + for k,v in pairs(y) do checkeq(v, x[k], p) end + end +end + -- throws a label p = m.T(1) s = "abc" @@ -32,48 +41,97 @@ local g = m.P{ r, l, serror = g:match(s) assert(r == nil and l == 1 and serror == "abc") ---[==[ TODO: labeled choice does not work anymore --- throws a label that is not caught by labeled choice -p = m.Lc(m.T(2), m.P"a", 1, 3) + +-- throws a label that is not caught by the recovery operator +p = m.Rec(m.T(2), m.P"a", 1, 3) r, l, serror = p:match(s) assert(r == nil and l == 2 and serror == "abc") --- modifies previous pattern --- adds another labeled choice to catch label "2" -p = m.Lc(p, m.P"a", 2) +-- wraps the previous pattern with a recovery that catches label "2" +p = m.Rec(p, m.P"a", 2) assert(p:match(s) == 2) --- throws a label that is caught by labeled choice -p = m.Lc(m.T(25), m.P"a", 25) +-- throws a label that is caught by recovery +p = m.Rec(m.T(25), m.P"a", 25) assert(p:match(s) == 2) -- "fail" is label "0" --- throws the "fail" label that is not caught by the labeled choice +-- throws the "fail" label after the recovery s = "bola" r, l, serror = p:match("bola") assert(r == nil and l == 0 and serror == "bola") --- labeled choice does not catch "fail" by default -p = m.Lc(m.P"b", m.P"a", 1) +-- Recovery does not catch "fail" by default +p = m.Rec(m.P"b", m.P"a", 1) r, l, serror = p:match("abc") assert(r == nil and l == 0 and serror == "abc") assert(p:match("bola") == 2) --- labeled choice can catch "fail" -p = m.Lc(m.P"b", m.P"a", 0) + +-- recovery operator catches "1" or "3" +p = m.Rec((m.P"a" + m.T(1)) * m.T(3), (m.P"a" + m.P"b"), 1, 3) +assert(p:match("aac") == 3) +assert(p:match("abc") == 3) +r, l, serror = p:match("acc") +assert(r == nil and l == 0 and serror == "cc") + +--throws 1, recovery pattern matches 'b', throw 3, and rec pat mathces 'a' +assert(p:match("bac") == 3) + +r, l, serror = p:match("cab") +assert(r == nil and l == 0 and serror == "cab") + + +-- associativity +-- (p1 / %1) //{1} (p2 / %2) //{2} p3 +-- left-associativity +-- ("a" //{1} "b") //{2} "c" +p = m.Rec(m.Rec(m.P"a" + m.T(1), m.P"b" + m.T(2), 1), m.P"c", 2) assert(p:match("abc") == 2) -assert(p:match("bola") == 2) +assert(p:match("bac") == 2) +assert(p:match("cab") == 2) +r, l, serror = p:match("dab") +assert(r == nil and l == 0 and serror == "dab") --- "fail" is label "0" --- labeled choice catches "fail" or "3" -p = m.Lc(m.P"a" * m.T(3), (m.P"a" + m.P"b"), 0, 3) + +-- righ-associativity +-- "a" //{1} ("b" //{2} "c") +p = m.Rec(m.P"a" + m.T(1), m.Rec(m.P"b" + m.T(2), m.P"c", 2), 1) assert(p:match("abc") == 2) assert(p:match("bac") == 2) +assert(p:match("cab") == 2) +r, l, serror = p:match("dab") +assert(r == nil and l == 0 and serror == "dab") + + +-- associativity -> in this case the error thrown by p1 is only +-- recovered when we have a left-associative operator +-- (p1 / %2) //{1} (p2 / %2) //{2} p3 +-- left-associativity +-- ("a" //{1} "b") //{2} "c" +p = m.Rec(m.Rec(m.P"a" + m.T(2), m.P"b" + m.T(2), 1), m.P"c", 2) +assert(p:match("abc") == 2) +r, l, serror = p:match("bac") +assert(r == nil and l == 0 and serror == "bac") +assert(p:match("cab") == 2) +r, l, serror = p:match("dab") +assert(r == nil and l == 0 and serror == "dab") + +-- righ-associativity +-- "a" //{1} ("b" //{2} "c") +p = m.Rec(m.P"a" + m.T(2), m.Rec(m.P"b" + m.T(2), m.P"c", 2), 1) +assert(p:match("abc") == 2) +r, l, serror = p:match("bac") +assert(r == nil and l == 2 and serror == "bac") r, l, serror = p:match("cab") -assert(r == nil and l == 0 and serror == "cab") +assert(r == nil and l == 2 and serror == "cab") +r, l, serror = p:match("dab") +assert(r == nil and l == 2 and serror == "dab") + + -- tests related to predicates p = #m.T(1) + m.P"a" @@ -84,74 +142,66 @@ p = ##m.T(1) + m.P"a" r, l, serror = p:match("abc") assert(r == nil and l == 1 and serror == "abc") -p = #m.T(0) * m.P"a" -assert(p:match("abc") == fail) - -p = #m.T(0) + m.P"a" -assert(p:match("abc") == 2) - p = -m.T(1) * m.P"a" r, l, serror = p:match("abc") assert(r == nil and l == 1 and serror == "abc") +p = -m.T(1) * m.P"a" +r, l, serror = p:match("bbc") +assert(r == nil and l == 1 and serror == "bbc") + p = -(-m.T(1)) * m.P"a" r, l, serror = p:match("abc") assert(r == nil and l == 1 and serror == "abc") -p = -m.T(0) * m.P"a" -assert(p:match("abc") == 2) +p = m.Rec(-m.T(22), m.P"a", 22) +r, l, serror = p:match("abc") +assert(r == nil and l == 0 and serror == "bc") -p = -m.T(0) + m.P"a" -assert(p:match("abc") == 1) +assert(p:match("bbc") == 1) -p = -(-m.T(0)) + m.P"a" -assert(p:match("abc") == 2) +p = m.Rec(#m.T(22), m.P"a", 22) +assert(p:match("abc") == 1) -p = m.Lc(-m.T(22), m.P"a", 22) -assert(p:match("abc") == 2) +p = #m.Rec(m.T(22), m.P"a", 22) +assert(p:match("abc") == 1) -p = m.Lc(-m.T(0), m.P"a", 0) +p = m.Rec(m.T(22), #m.P"a", 22) assert(p:match("abc") == 1) -p = m.Lc(#m.T(22), m.P"a", 22) -assert(p:match("abc") == 2) +p = m.Rec(#m.T(22), m.P"a", 22) +r, l, serror = p:match("bbc") +assert(r == nil and l == 0 and serror == "bbc") -p = m.Lc(#m.T(0), m.P"a", 0) -assert(p:match("abc") == 2) -- tests related to repetition p = m.T(1)^0 r, l, serror = p:match("ab") assert(r == nil and l == 1 and serror == "ab") -p = m.T(0)^0 -assert(p:match("ab") == 1) - p = (m.P"a" + m.T(1))^0 r, l, serror = p:match("aa") assert(r == nil and l == 1 and serror == "") -p = (m.P"a" + m.T(0))^0 -assert(p:match("aa") == 3) -- Bug reported by Matthew Allen -- some optmizations performed by LPeg should not be -- applied in case of labeled choices -p = m.Lc(m.P"A", m.P(true), 1) + m.P("B") +p = m.Rec(m.P"A", m.P(true), 1) + m.P("B") assert(p:match("B") == 2) -p = m.Lc(m.P"A", m.P(false), 1) + m.P("B") +p = m.Rec(m.P"A", m.P(false), 1) + m.P("B") assert(p:match("B") == 2) --[[ -S -> A /{1} 'a' +S -> A //{1} 'a' A -> B B -> %1 ]] g = m.P{ "S", - S = m.Lc(m.V"A", m.P"a", 1), + S = m.Rec(m.V"A", m.P"a", 1), A = m.V"B", B = m.T(1), } @@ -177,66 +227,96 @@ r, l, serror = g:match("a;a") assert(r == nil and l == 1 and serror == "") --- %1 /{1,3} %2 /{2} 'a' -p = m.Lc(m.Lc(m.T(1), m.T(2), 1, 3), m.P"a", 2) +-- %1 //{1,3} %2 //{2} 'a' +p = m.Rec(m.Rec(m.T(1), m.T(2), 1, 3), m.P"a", 2) assert(p:match("abc") == 2) r, l, serror = p:match("") assert(r == nil and l == 0 and serror == "") -p = m.Lc(m.T(1), m.Lc(m.T(2), m.P"a", 2), 1, 3) +p = m.Rec(m.T(1), m.Rec(m.T(2), m.P"a", 2), 1, 3) assert(p:match("abc") == 2) r, l, serror = p:match("") assert(r == nil and l == 0 and serror == "") -]==] + + +-- Infinte Loop TODO: check the semantics +-- %1 //{1} %1 +p = m.Rec(m.T(1), m.T(1), 1) +--r, l, serror = p:match("ab") +--assert(r == nil and l == 1 and serror == "ab") + +-- %1 //{1} 'a' (!. / %1) +p = m.Rec(m.T(1), m.P"a" * (-m.P(1) + m.T(1)), 1) +r, l, serror = p:match("ab") +assert(r == nil and l == 0 and serror == "b") + +r, l, serror = p:match("cd") +assert(r == nil and l == 0 and serror == "cd") + +-- %1 //{1} . (!. / %1) +p = m.Rec(m.T(1), m.P(1) * (-m.P(1) + m.T(1)), 1) +assert(p:match("abc") == 4) + -- testing the limit of labels -p = m.T(0) -s = "abc" -r, l, serror = p:match(s) -assert(r == nil and l == 0 and serror == "abc") +-- can only throw labels between 1 and 255 +local r = pcall(m.Rec, m.P"b", m.P"a", 0) +assert(r == false) -p = m.T(255) -s = "abc" -r, l, serror = p:match(s) -assert(r == nil and l == 255 and serror == "abc") +local r = pcall(m.Rec, m.P"b", m.P"a", 256) +assert(r == false) -local r = pcall(m.T, -1) +local r = pcall(m.Rec, m.P"b", m.P"a", -1) +assert(r == false) + +local r = pcall(m.T, 0) assert(r == false) local r = pcall(m.T, 256) assert(r == false) +local r = pcall(m.T, -1) +assert(r == false) + + +local r = m.Rec(m.P"b", m.P"a", 255) +assert(p:match("a") == 2) + +p = m.T(255) +s = "abc" +r, l, serror = p:match(s) +assert(r == nil and l == 255 and serror == "abc") + + print("+") ---[==[ TODO: labeled choice does not work anymore --[[ grammar based on Figure 8 of paper submitted to SCP -S -> S0 /{1} ID /{2} ID '=' Exp /{3} 'unsigned'* 'int' ID /{4} 'unsigned'* ID ID / %error -S0 -> ID S1 / 'unsigned' S2 / 'int' %3 -S1 -> '=' %2 / !. %1 / ID %4 -S2 -> 'unsigned' S2 / ID %4 / 'int' %3 +S -> S0 //{1} ID //{2} ID '=' Exp //{3} 'unsigned'* 'int' ID //{4} 'unsigned'* ID ID / %error +S0 -> S1 / S2 / &'int' %3 +S1 -> &(ID '=') %2 / &(ID !.) %1 / &ID %4 +S2 -> &('unsigned'+ ID) %4 / & ('unsigned'+ 'int') %3 ]] - local sp = m.S" \t\n"^0 local eq = sp * m.P"=" g = m.P{ "S", - S = m.Lc( - m.Lc( - m.Lc( - m.Lc(m.V"S0", m.V"ID" * (m.P(1) + ""), 1), + S = m.Rec( + m.Rec( + m.Rec( + m.Rec(m.V"S0", m.V"ID", 1), m.V"ID" * eq * m.V"Exp", 2 ), m.V"U"^0 * m.V"I" * m.V"ID", 3 ), m.V"U"^0 * m.V"ID" * m.V"ID", 4) + m.T(5), -- error - S0 = m.V"ID" * m.V"S1" + m.V"U" * m.V"S2" + m.V"I" * m.T(3), - S1 = eq * m.T(2) + sp * -m.P(1) * m.T(1) + m.V"ID" * m.T(4), - S2 = m.V"U" * m.V"S2" + m.V"ID" * m.T(4) + m.V"I" * m.T(3), + S0 = m.V"S1" + m.V"S2" + #m.V"I" * m.T(3), + S1 = #(m.V"ID" * eq) * m.T(2) + sp * #(m.V"ID" * -m.P(1)) * m.T(1) + #m.V"ID" * m.T(4), + S2 = #(m.V"U"^1 * m.V"ID") * m.T(4) + #(m.V"U"^1 * m.V"I") * m.T(3), ID = sp * m.P"a", U = sp * m.P"unsigned", I = sp * m.P"int", @@ -273,53 +353,59 @@ assert(r == nil and l == 5 and serror == s) print("+") -local re = require 'relabel' -g = re.compile[['a' /{4,9} [a-z] +local re = require 'relabelrec' + +g = re.compile[['a' //{4,9} [a-z] ]] assert(g:match("a") == 2) r, l, serror = g:match("b") assert(r == nil and l == 0 and serror == "b") -g = re.compile[['a' /{4,9} [a-f] /{5, 7} [a-z] +g = re.compile[['a' //{4,9} [a-f] //{5, 7} [a-z] ]] assert(g:match("a") == 2) r, l, serror = g:match("b") assert(r == nil and l == 0 and serror == "b") -g = re.compile[[%{1} /{4,9} [a-z] +g = re.compile[[%{1} //{4,9} [a-z] ]] r, l, serror = g:match("a") assert(r == nil and l == 1 and serror == "a") -g = re.compile[[%{1} /{4,1} [a-f] +g = re.compile[[%{1} //{4,1} [a-f] ]] assert(g:match("a") == 2) r, l, serror = g:match("h") assert(r == nil and l == 0 and serror == "h") -g = re.compile[[[a-f]%{9} /{4,9} [a-c]%{7} /{5, 7} [a-z] ]] -assert(g:match("a") == 2) -assert(g:match("c") == 2) -r, l, serror = g:match("d") +g = re.compile[[[a-f]%{9} //{4,9} [a-c]%{7} //{5, 7} [a-z] ]] +r, l, serror = g:match("a") +assert(r == nil and l == 0 and serror == "") +r, l, serror = g:match("aa") +assert(r == nil and l == 0 and serror == "") +assert(g:match("aaa") == 4) + +r, l, serror = g:match("ad") assert(r == nil and l == 0 and serror == "d") + r, l, serror = g:match("g") assert(r == nil and l == 0 and serror == "g") + --[[ grammar based on Figure 8 of paper submitted to SCP -S -> S0 /{1} ID /{2} ID '=' Exp /{3} 'unsigned'* 'int' ID /{4} 'unsigned'* ID ID / %error -S0 -> ID S1 / 'unsigned' S2 / 'int' %3 -S1 -> '=' %2 / !. %1 / ID %4 -S2 -> 'unsigned' S2 / ID %4 / 'int' %3 +S -> S0 //{1} ID //{2} ID '=' Exp //{3} 'unsigned'* 'int' ID //{4} 'unsigned'* ID ID / %error +S0 -> S1 / S2 / &'int' %3 +S1 -> &(ID '=') %2 / &(ID !.) %1 / &ID %4 +S2 -> &('unsigned'+ ID) %4 / & ('unsigned'+ 'int') %3 ]] - g = re.compile([[ - S <- S0 /{1} ID /{2} ID %s* '=' Exp /{3} U* Int ID /{4} U ID ID /{0} %{5} - S0 <- ID S1 / U S2 / Int %{3} - S1 <- %s* '=' %{2} / !. %{1} / ID %{4} - S2 <- U S2 / ID %{4} / Int %{3} + S <- S0 //{1} ID //{2} ID %s* '=' Exp //{3} U* Int ID //{4} U ID ID / %{5} + S0 <- S1 / S2 / &Int %{3} + S1 <- &(ID %s* '=') %{2} / &(ID !.) %{1} / &ID %{4} + S2 <- &(U+ ID) %{4} / &(U+ Int) %{3} ID <- %s* 'a' U <- %s* 'unsigned' Int <- %s* 'int' @@ -349,6 +435,8 @@ s = "unsigned int" r, l, serror = g:match(s) assert(r == nil and l == 5 and serror == s) + + local terror = { ['cmdSeq'] = "Missing ';' in CmdSeq", ['ifExp'] = "Error in expresion of 'if'", ['ifThen'] = "Error matching 'then' keyword", @@ -370,12 +458,12 @@ local terror = { ['cmdSeq'] = "Missing ';' in CmdSeq", ['undefined'] = "Undefined Error"} g = re.compile([[ - Tiny <- CmdSeq /{1} '' -> cmdSeq /{2} '' -> ifExp /{3} '' -> ifThen /{4} '' -> ifThenCmdSeq - /{5} '' -> ifElseCmdSeq /{6} '' -> ifEnd /{7} '' -> repeatCmdSeq - /{8} '' -> repeatUntil /{9} '' -> repeatExp /{10} '' -> assignOp - /{11} '' -> assignExp /{12} '' -> readName /{13} '' -> writeExp - /{14} '' -> simpleExp /{15} '' -> term /{16} '' -> factor - /{17} '' -> openParExp /{18} '' -> closePar /{0} '' -> undefined + Tiny <- CmdSeq //{1} '' -> cmdSeq //{2} '' -> ifExp //{3} '' -> ifThen //{4} '' -> ifThenCmdSeq + //{5} '' -> ifElseCmdSeq //{6} '' -> ifEnd //{7} '' -> repeatCmdSeq + //{8} '' -> repeatUntil //{9} '' -> repeatExp //{10} '' -> assignOp + //{11} '' -> assignExp //{12} '' -> readName //{13} '' -> writeExp + //{14} '' -> simpleExp //{15} '' -> term //{16} '' -> factor + //{17} '' -> openParExp //{18} '' -> closePar / '' -> undefined CmdSeq <- (Cmd (SEMICOLON / %{1})) (Cmd (SEMICOLON / %{1}))* Cmd <- IfCmd / RepeatCmd / ReadCmd / WriteCmd / AssignCmd IfCmd <- IF (Exp / %{2}) (THEN / %{3}) (CmdSeq / %{4}) (ELSE (CmdSeq / %{5}) / '') (END / %{6}) @@ -551,21 +639,14 @@ assert(g:match(s) == terror['undefined']) print("+") -]==] --- test recovery operator -p = m.Rec("a", "b") -assert(p:match("a") == 2) ---assert(p:match("b") == 2) -checkeqlab({nil, 0, "b"}, p:match("b")) -checkeqlab({nil, 0, "c"}, p:match("c")) p = m.Rec("a", "b", 3) assert(p:match("a") == 2) checkeqlab({nil, 0, "b"}, p:match("b")) checkeqlab({nil, 0, "c"}, p:match("c")) -p = m.Rec(m.T(3), "b") +p = m.Rec(m.T(3), "b", 1) checkeqlab({nil, 3, "a"}, p:match("a")) checkeqlab({nil, 3, "b"}, p:match("b")) @@ -574,56 +655,26 @@ checkeqlab({nil, 0, "a"}, p:match("a")) assert(p:match("b") == 2) --[[ -S -> (A //{fail} (!c .)*) C -A -> a*b -C -> c+ -]] -g = m.P{ - "S", - S = m.Rec(m.V"A", (-m.P"c" * m.P(1))^0) * m.V"C", - A = m.P"a"^0 * "b", - C = m.P"c"^1, -} - -assert(g:match("abc") == 4) -assert(g:match("aabc") == 5) ---assert(g:match("aadc") == 5) --old semantics -checkeqlab({nil, 0, "dc"}, g:match("aadc")) --new semantics -assert(g:match("bc") == 3) -- new semantics -checkeqlab({nil, 0, "bc"}, g:match("bbc")) ---assert(g:match("xxc") == 4) old semantics -checkeqlab({nil, 0, "xxc"}, g:match("xxc")) --new semantics ---assert(g:match("c") == 2) --old semantics -checkeqlab({nil, 0, "c"}, g:match("c")) --new semantics ---checkeqlab({nil, 0, ""}, g:match("fail")) --old semantics -checkeqlab({nil, 0, "fail"}, g:match("fail")) --new semantics ---checkeqlab({nil, 0, ""}, g:match("aaxx")) --old semantics -checkeqlab({nil, 0, "xx"}, g:match("aaxx")) --new semantics - - ---[[ -S -> (A //{0} (!c .)*) C -A -> a*b / ^{0} +S -> (A //{128} (!c .)*) C +A -> a*b / %128 C -> c+ ]] g = m.P{ "S", - S = m.Rec(m.V"A", (-m.P"c" * m.P(1))^0, 0) * m.V"C", --explicitly put 0 in Rec - A = m.P"a"^0 * m.P"b" + m.T(0), + S = m.Rec(m.V"A", (-m.P"c" * m.P(1))^0, 128) * m.V"C", + A = m.P"a"^0 * "b" + m.T(128), C = m.P"c"^1, } assert(g:match("abc") == 4) assert(g:match("aabc") == 5) -assert(g:match("aadc") == 5) --updated -assert(g:match("bc") == 3) -- new semantics +assert(g:match("aadc") == 5) +assert(g:match("dc") == 3) checkeqlab({nil, 0, "bc"}, g:match("bbc")) assert(g:match("xxc") == 4) -assert(g:match("c") == 2) --old semantics updated -checkeqlab({nil, 0, ""}, g:match("fail")) --old semantics updated -checkeqlab({nil, 0, ""}, g:match("aaxx")) --old semantics updated - - +assert(g:match("c") == 2) +checkeqlab({nil, 0, ""}, g:match("fail")) +checkeqlab({nil, 0, ""}, g:match("aaxx")) --[[ @@ -810,6 +861,120 @@ checkeqlab({nil, 4, "dc"}, g3:match("dc")) checkeqlab({nil, 4, "d"}, g3:match(".d")) +-- testing more captures +local g = re.compile[[ + S <- ( %s* &. {A} )* + A <- [0-9]+ / %{5} +]] -print("OK") +checkeq({"523", "624", "346", "888"} , {g:match("523 624 346\n888")}) +checkeq({nil, 5, "a 123"}, {g:match("44 a 123")}) + +local g2 = m.Rec(g, ((-m.R("09") * m.P(1))^0) / "58", 5) + +checkeq({"523", "624", "346", "888"} , {g2:match("523 624 346\n888")}) +checkeq({"44", "a ", "58", "123"}, {g2:match("44 a 123")}) + + +local g = re.compile[[ + S <- ( %s* &. A )* + A <- {[0-9]+} / %{5} +]] + +checkeq({"523", "624", "346", "888"} , {g:match("523 624 346\n888")}) +checkeq({nil, 5, "a 123"}, {g:match("44 a 123")}) + +local g2 = m.Rec(g, ((-m.R("09") * m.P(1))^0) / "58", 5) + +checkeq({"523", "624", "346", "888"} , {g2:match("523 624 346\n888")}) +checkeq({"44", "58", "123"}, {g2:match("44 a 123")}) + + +local R, S, P, V = lpeg.R, lpeg.S, lpeg.P, lpeg.V +local C, Cc, Ct, Cmt = lpeg.C, lpeg.Cc, lpeg.Ct, lpeg.Cmt +local T, Lc, Rec = lpeg.T, lpeg.Lc, lpeg.Rec + +local labels = { + {"NoExp", "no expression found"}, + {"Extra", "extra characters found after the expression"}, + {"ExpTerm", "expected a term after the operator"}, + {"ExpExp", "expected an expression after the parenthesis"}, + {"MisClose", "missing a closing ')' after the expression"}, +} + +local function labelindex(labname) + for i, elem in ipairs(labels) do + if elem[1] == labname then + return i + end + end + error("could not find label: " .. labname) +end + +local errors = {} + +local function expect(patt, labname, recpatt) + local i = labelindex(labname) + function recorderror(input, pos) + table.insert(errors, {i, pos}) + return true + end + if not recpatt then recpatt = P"" end + --return Rec(patt, Cmt("", recorderror) * recpatt) + return patt + T(i) +end + +local num = R("09")^1 / tonumber +local op = S("+-*/") + +local function compute(tokens) + local result = tokens[1] + for i = 2, #tokens, 2 do + if tokens[i] == '+' then + result = result + tokens[i+1] + elseif tokens[i] == '-' then + result = result - tokens[i+1] + elseif tokens[i] == '*' then + result = result * tokens[i+1] + elseif tokens[i] == '/' then + result = result / tokens[i+1] + else + error('unknown operation: ' .. tokens[i]) + end + end + return result +end + +local g = P { +"Exp", +Exp = Ct(V"Term" * (C(op) * V"Operand")^0) / compute, +Operand = expect(V"Term", "ExpTerm"), +Term = num, +} +local rg = Rec(g, Cc(3), labelindex("ExpTerm")) + +local function eval(input) + local result, label, suffix = rg:match(input) + if #errors == 0 then + return result + else + local out = {} + for i, err in ipairs(errors) do + local pos = err[2] + local msg = labels[err[1]][2] + table.insert(out, "syntax error: " .. msg .. " (at index " .. pos .. ")") + end + errors = {} + return nil, table.concat(out, "\n") + end +end + +assert(eval("98-76*54/32") == 37.125) +--> 37.125 + +assert(eval("1+") == 4) +--> syntax error: expected a term after the operator (at index 3) + + +print("OK") -- cgit v1.2.3-55-g6feb