From 0671e873f50ba568074d1683e068d5dce88dd43b Mon Sep 17 00:00:00 2001 From: Sergio Date: Fri, 21 Jun 2019 18:07:07 -0300 Subject: Updating LPegLabel to the codebase of LPeg 1.0.2 --- lpcap.c | 64 ++++++++++++++++++++++++++++---------------- lpcap.h | 3 ++- lpcode.c | 2 +- lpcode.h | 2 +- lpprint.c | 2 +- lpprint.h | 2 +- lptree.c | 18 +++++++------ lptree.h | 2 +- lptypes.h | 10 +++---- lpvm.c | 88 ++++++++++++++++++++++++++++++++++--------------------------- lpvm.h | 2 +- relabel.lua | 34 ++++++++++++++---------- test.lua | 22 +++++++++++++++- 13 files changed, 152 insertions(+), 99 deletions(-) diff --git a/lpcap.c b/lpcap.c index c9085de..b332fde 100644 --- a/lpcap.c +++ b/lpcap.c @@ -1,5 +1,5 @@ /* -** $Id: lpcap.c,v 1.6 2015/06/15 16:09:57 roberto Exp $ +** $Id: lpcap.c $ ** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license) */ @@ -271,15 +271,15 @@ int finddyncap (Capture *cap, Capture *last) { /* -** Calls a runtime capture. Returns number of captures removed by -** the call, including the initial Cgroup. (Captures to be added are -** on the Lua stack.) +** Calls a runtime capture. Returns number of captures "removed" by the +** call, that is, those inside the group capture. Captures to be added +** are on the Lua stack. */ int runtimecap (CapState *cs, Capture *close, const char *s, int *rem) { int n, id; lua_State *L = cs->L; int otop = lua_gettop(L); - Capture *open = findopen(close); + Capture *open = findopen(close); /* get open group capture */ assert(captype(open) == Cgroup); id = finddyncap(open, close); /* get first dynamic capture argument */ close->kind = Cclose; /* closes the group */ @@ -299,7 +299,7 @@ int runtimecap (CapState *cs, Capture *close, const char *s, int *rem) { } else *rem = 0; /* no dynamic captures removed */ - return close - open; /* number of captures of all kinds removed */ + return close - open - 1; /* number of captures to be removed */ } @@ -441,70 +441,88 @@ static int addonestring (luaL_Buffer *b, CapState *cs, const char *what) { } +#if !defined(MAXRECLEVEL) +#define MAXRECLEVEL 200 +#endif + + /* ** Push all values of the current capture into the stack; returns ** number of values pushed */ static int pushcapture (CapState *cs) { lua_State *L = cs->L; + int res; luaL_checkstack(L, 4, "too many captures"); + if (cs->reclevel++ > MAXRECLEVEL) + return luaL_error(L, "subcapture nesting too deep"); switch (captype(cs->cap)) { case Cposition: { lua_pushinteger(L, cs->cap->s - cs->s + 1); cs->cap++; - return 1; + res = 1; + break; } case Cconst: { pushluaval(cs); cs->cap++; - return 1; + res = 1; + break; } case Carg: { int arg = (cs->cap++)->idx; if (arg + FIXEDARGS > cs->ptop) return luaL_error(L, "reference to absent extra argument #%d", arg); lua_pushvalue(L, arg + FIXEDARGS); - return 1; + res = 1; + break; } case Csimple: { int k = pushnestedvalues(cs, 1); lua_insert(L, -k); /* make whole match be first result */ - return k; + res = k; + break; } case Cruntime: { lua_pushvalue(L, (cs->cap++)->idx); /* value is in the stack */ - return 1; + res = 1; + break; } case Cstring: { luaL_Buffer b; luaL_buffinit(L, &b); stringcap(&b, cs); luaL_pushresult(&b); - return 1; + res = 1; + break; } case Csubst: { luaL_Buffer b; luaL_buffinit(L, &b); substcap(&b, cs); luaL_pushresult(&b); - return 1; + res = 1; + break; } case Cgroup: { if (cs->cap->idx == 0) /* anonymous group? */ - return pushnestedvalues(cs, 0); /* add all nested values */ + res = pushnestedvalues(cs, 0); /* add all nested values */ else { /* named group: add no values */ nextcap(cs); /* skip capture */ - return 0; + res = 0; } + break; } - case Cbackref: return backrefcap(cs); - case Ctable: return tablecap(cs); - case Cfunction: return functioncap(cs); - case Cnum: return numcap(cs); - case Cquery: return querycap(cs); - case Cfold: return foldcap(cs); - default: assert(0); return 0; + case Cbackref: res = backrefcap(cs); break; + case Ctable: res = tablecap(cs); break; + case Cfunction: res = functioncap(cs); break; + case Cnum: res = numcap(cs); break; + case Cquery: res = querycap(cs); break; + case Cfold: res = foldcap(cs); break; + default: assert(0); res = 0; } + cs->reclevel--; + return res; } @@ -521,7 +539,7 @@ int getcaptures (lua_State *L, const char *s, const char *r, int ptop) { int n = 0; if (!isclosecap(capture)) { /* is there any capture? */ CapState cs; - cs.ocap = cs.cap = capture; cs.L = L; + cs.ocap = cs.cap = capture; cs.L = L; cs.reclevel = 0; cs.s = s; cs.valuecached = 0; cs.ptop = ptop; do { /* collect their values */ n += pushcapture(&cs); diff --git a/lpcap.h b/lpcap.h index 6133df2..dc10d69 100644 --- a/lpcap.h +++ b/lpcap.h @@ -1,5 +1,5 @@ /* -** $Id: lpcap.h,v 1.3 2016/09/13 17:45:58 roberto Exp $ +** $Id: lpcap.h $ */ #if !defined(lpcap_h) @@ -44,6 +44,7 @@ typedef struct CapState { int ptop; /* index of last argument to 'match' */ const char *s; /* original string */ int valuecached; /* value stored in cache slot */ + int reclevel; /* recursion level */ } CapState; diff --git a/lpcode.c b/lpcode.c index 6cf2933..ecc6fa3 100644 --- a/lpcode.c +++ b/lpcode.c @@ -1,5 +1,5 @@ /* -** $Id: lpcode.c,v 1.24 2016/09/15 17:46:13 roberto Exp $ +** $Id: lpcode.c $ ** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license) */ diff --git a/lpcode.h b/lpcode.h index 2a5861e..34ee276 100644 --- a/lpcode.h +++ b/lpcode.h @@ -1,5 +1,5 @@ /* -** $Id: lpcode.h,v 1.8 2016/09/15 17:46:13 roberto Exp $ +** $Id: lpcode.h $ */ #if !defined(lpcode_h) diff --git a/lpprint.c b/lpprint.c index 3c6a8f6..76a7007 100644 --- a/lpprint.c +++ b/lpprint.c @@ -1,5 +1,5 @@ /* -** $Id: lpprint.c,v 1.10 2016/09/13 16:06:03 roberto Exp $ +** $Id: lpprint.c $ ** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license) */ diff --git a/lpprint.h b/lpprint.h index 6329760..15ef121 100644 --- a/lpprint.h +++ b/lpprint.h @@ -1,5 +1,5 @@ /* -** $Id: lpprint.h,v 1.2 2015/06/12 18:18:08 roberto Exp $ +** $Id: lpprint.h $ */ diff --git a/lptree.c b/lptree.c index 33ae79d..b1a32c4 100644 --- a/lptree.c +++ b/lptree.c @@ -1,5 +1,5 @@ /* -** $Id: lptree.c,v 1.22 2016/09/13 18:10:22 roberto Exp $ +** $Id: lptree.c $ ** Copyright 2013, Lua.org & PUC-Rio (see 'lpeg.html' for license) */ @@ -750,6 +750,7 @@ static int capture_aux (lua_State *L, int cap, int labelidx) { /* ** Fill a tree with an empty capture, using an empty (TTrue) sibling. +** (The 'key' field must be filled by the caller to finish the tree.) */ static TTree *auxemptycap (TTree *tree, int cap) { tree->tag = TCapture; @@ -760,15 +761,17 @@ static TTree *auxemptycap (TTree *tree, int cap) { /* -** Create a tree for an empty capture +** Create a tree for an empty capture. */ -static TTree *newemptycap (lua_State *L, int cap) { - return auxemptycap(newtree(L, 2), cap); +static TTree *newemptycap (lua_State *L, int cap, int key) { + TTree *tree = auxemptycap(newtree(L, 2), cap); + tree->key = key; + return tree; } /* -** Create a tree for an empty capture with an associated Lua value +** Create a tree for an empty capture with an associated Lua value. */ static TTree *newemptycapkey (lua_State *L, int cap, int idx) { TTree *tree = auxemptycap(newtree(L, 2), cap); @@ -829,16 +832,15 @@ static int lp_simplecapture (lua_State *L) { static int lp_poscapture (lua_State *L) { - newemptycap(L, Cposition); + newemptycap(L, Cposition, 0); return 1; } static int lp_argcapture (lua_State *L) { int n = (int)luaL_checkinteger(L, 1); - TTree *tree = newemptycap(L, Carg); - tree->key = n; luaL_argcheck(L, 0 < n && n <= SHRT_MAX, 1, "invalid argument index"); + newemptycap(L, Carg, n); return 1; } diff --git a/lptree.h b/lptree.h index 24a9ac7..0cf160a 100644 --- a/lptree.h +++ b/lptree.h @@ -1,5 +1,5 @@ /* -** $Id: lptree.h,v 1.3 2016/09/13 18:07:51 roberto Exp $ +** $Id: lptree.h $ */ #if !defined(lptree_h) diff --git a/lptypes.h b/lptypes.h index 8503223..3261428 100644 --- a/lptypes.h +++ b/lptypes.h @@ -1,7 +1,7 @@ /* -** $Id: lptypes.h,v 1.16 2017/01/13 13:33:17 roberto Exp $ +** $Id: lptypes.h $ ** LPeg - PEG pattern matching for Lua -** Copyright 2007-2017, Lua.org & PUC-Rio (see 'lpeg.html' for license) +** Copyright 2007-2019, Lua.org & PUC-Rio (see 'lpeg.html' for license) ** written by Roberto Ierusalimschy */ @@ -9,17 +9,13 @@ #define lptypes_h -#if !defined(LPEG_DEBUG) -#define NDEBUG -#endif - #include #include #include "lua.h" -#define VERSION "1.5.1" +#define VERSION "1.5.2" #define PATTERN_T "lpeg-pattern" diff --git a/lpvm.c b/lpvm.c index 0c70766..a791c44 100644 --- a/lpvm.c +++ b/lpvm.c @@ -1,5 +1,5 @@ /* -** $Id: lpvm.c,v 1.9 2016/06/03 20:11:18 roberto Exp $ +** $Id: lpvm.c $ ** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license) */ @@ -47,18 +47,29 @@ typedef struct Stack { /* -** Make the size of the array of captures 'cap' twice as large as needed -** (which is 'captop'). ('n' is the number of new elements.) +** Ensures the size of array 'capture' (with size '*capsize' and +** 'captop' elements being used) is enough to accomodate 'n' extra +** elements plus one. (Because several opcodes add stuff to the capture +** array, it is simpler to ensure the array always has at least one free +** slot upfront and check its size later.) */ -static Capture *doublecap (lua_State *L, Capture *cap, int captop, - int n, int ptop) { - Capture *newc; - if (captop >= INT_MAX/((int)sizeof(Capture) * 2)) - luaL_error(L, "too many captures"); - newc = (Capture *)lua_newuserdata(L, captop * 2 * sizeof(Capture)); - memcpy(newc, cap, (captop - n) * sizeof(Capture)); - lua_replace(L, caplistidx(ptop)); - return newc; +static Capture *growcap (lua_State *L, Capture *capture, int *capsize, + int captop, int n, int ptop) { + if (*capsize - captop > n) + return capture; /* no need to grow array */ + else { /* must grow */ + Capture *newc; + int newsize = captop + n + 1; /* minimum size needed */ + if (newsize < INT_MAX/((int)sizeof(Capture) * 2)) + newsize *= 2; /* twice that size, if not too big */ + else if (newsize >= INT_MAX/((int)sizeof(Capture))) + luaL_error(L, "too many captures"); + newc = (Capture *)lua_newuserdata(L, newsize * sizeof(Capture)); + memcpy(newc, capture, captop * sizeof(Capture)); + *capsize = newsize; + lua_replace(L, caplistidx(ptop)); + return newc; + } } @@ -111,24 +122,24 @@ static int resdyncaptures (lua_State *L, int fr, int curr, int limit) { /* -** Add capture values returned by a dynamic capture to the capture list -** 'base', nested inside a group capture. 'fd' indexes the first capture -** value, 'n' is the number of values (at least 1). +** Add capture values returned by a dynamic capture to the list +** 'capture', nested inside a group. 'fd' indexes the first capture +** value, 'n' is the number of values (at least 1). The open group +** capture is already in 'capture', before the place for the new entries. */ -static void adddyncaptures (const char *s, Capture *base, int n, int fd) { +static void adddyncaptures (const char *s, Capture *capture, int n, int fd) { int i; - base[0].kind = Cgroup; /* create group capture */ - base[0].siz = 0; - base[0].idx = 0; /* make it an anonymous group */ - for (i = 1; i <= n; i++) { /* add runtime captures */ - base[i].kind = Cruntime; - base[i].siz = 1; /* mark it as closed */ - base[i].idx = fd + i - 1; /* stack index of capture value */ - base[i].s = s; + assert(capture[-1].kind == Cgroup && capture[-1].siz == 0); + capture[-1].idx = 0; /* make group capture an anonymous group */ + for (i = 0; i < n; i++) { /* add runtime captures */ + capture[i].kind = Cruntime; + capture[i].siz = 1; /* mark it as closed */ + capture[i].idx = fd + i; /* stack index of capture value */ + capture[i].s = s; } - base[i].kind = Cclose; /* close group */ - base[i].siz = 1; - base[i].s = s; + capture[n].kind = Cclose; /* close group */ + capture[n].siz = 1; + capture[n].s = s; } @@ -374,7 +385,8 @@ const char *match (lua_State *L, const char *o, const char *s, const char *e, CapState cs; int rem, res, n; int fr = lua_gettop(L) + 1; /* stack index of first result */ - cs.s = o; cs.L = L; cs.ocap = capture; cs.ptop = ptop; + cs.reclevel = 0; cs.L = L; + cs.s = o; cs.ocap = capture; cs.ptop = ptop; n = runtimecap(&cs, capture + captop, s, &rem); /* call function */ captop -= n; /* remove nested captures */ ndyncap -= rem; /* update number of dynamic captures */ @@ -388,15 +400,15 @@ const char *match (lua_State *L, const char *o, const char *s, const char *e, s = o + res; /* else update current position */ n = lua_gettop(L) - fr + 1; /* number of new captures */ ndyncap += n; /* update number of dynamic captures */ - if (n > 0) { /* any new capture? */ + if (n == 0) /* no new captures? */ + captop--; /* remove open group */ + else { /* new captures; keep original open group */ if (fr + n >= SHRT_MAX) luaL_error(L, "too many results in match-time capture"); - if ((captop += n + 2) >= capsize) { - capture = doublecap(L, capture, captop, n + 2, ptop); - capsize = 2 * captop; - } - /* add new captures to 'capture' list */ - adddyncaptures(s, capture + captop - n - 2, n, fr); + /* add new captures + close group to 'capture' list */ + capture = growcap(L, capture, &capsize, captop, n + 1, ptop); + adddyncaptures(s, capture + captop, n, fr); + captop += n + 1; /* new captures + close group */ } p++; continue; @@ -428,10 +440,8 @@ const char *match (lua_State *L, const char *o, const char *s, const char *e, pushcapture: { capture[captop].idx = p->i.key; capture[captop].kind = getkind(p); - if (++captop >= capsize) { - capture = doublecap(L, capture, captop, 0, ptop); - capsize = 2 * captop; - } + captop++; + capture = growcap(L, capture, &capsize, captop, 0, ptop); p++; continue; } diff --git a/lpvm.h b/lpvm.h index c5457c4..6633c4b 100644 --- a/lpvm.h +++ b/lpvm.h @@ -1,5 +1,5 @@ /* -** $Id: lpvm.h,v 1.3 2014/02/21 13:06:41 roberto Exp $ +** $Id: lpvm.h $ */ #if !defined(lpvm_h) diff --git a/relabel.lua b/relabel.lua index f668086..eb71dd0 100644 --- a/relabel.lua +++ b/relabel.lua @@ -1,4 +1,4 @@ --- $Id: re.lua,v 1.44 2013/03/26 20:11:40 roberto Exp $ +-- $Id: re.lua $ -- imported functions and modules local tonumber, type, print, error = tonumber, type, print, error @@ -126,15 +126,6 @@ updatelocale() local I = m.P(function (s,i) print(i, s:sub(1, i-1)); return i end) -local function getdef (id, defs) - local c = defs and defs[id] - if not c then - error("undefined name: " .. id) - end - return c -end - - local function mult (p, n) local np = mm.P(true) while n >= 1 do @@ -161,6 +152,20 @@ local arrow = S * "<-" -- a defined name only have meaning in a given environment local Def = name * m.Carg(1) + +local function getdef (id, defs) + local c = defs and defs[id] + if not c then error("undefined name: " .. id) end + return c +end + +-- match a name and return a group of its corresponding definition +-- and 'f' (to be folded in 'Suffix') +local function defwithfunc (f) + return m.Cg(Def / getdef * m.Cc(f)) +end + + local num = m.C(m.R"09"^1) * S / tonumber local String = "'" * m.C((any - "'" - m.P"\n")^0) * expect("'", "MisTerm1") @@ -177,7 +182,7 @@ end local Range = m.Cs(any * (m.P"-"/"") * (any - "]")) / mm.R -local item = defined + Range + m.C(any - m.P"\n") +local item = (defined + Range + m.C(any - m.P"\n")) / m.P local Class = "[" @@ -224,12 +229,13 @@ local exp = m.P{ "Exp", "ExpNumName") + "->" * expect(S * ( m.Cg((String + num) * m.Cc(mt.__div)) + m.P"{}" * m.Cc(nil, m.Ct) - + m.Cg(Def / getdef * m.Cc(mt.__div)) + + defwithfunc(mt.__div) ), "ExpCap") - + "=>" * expect(S * m.Cg(Def / getdef * m.Cc(m.Cmt)), + + "=>" * expect(S * defwithfunc(m.Cmt), "ExpName1") - ) + + "~>" * S * defwithfunc(m.Cf) + ) --* S )^0, function (a,b,f) if f == "lab" then return a + mm.T(b) else return f(a,b) end end ); Primary = "(" * expect(m.V"Exp", "ExpPatt4") * expect(S * ")", "MisClose1") + String / mm.P diff --git a/test.lua b/test.lua index a3b86bf..2c05dd0 100755 --- a/test.lua +++ b/test.lua @@ -1,6 +1,6 @@ #!/usr/bin/env lua --- $Id: test.lua,v 1.112 2017/01/14 18:55:22 roberto Exp $ +-- $Id: test.lua $ -- require"strict" -- just to be pedantic @@ -424,6 +424,16 @@ do end +do + -- nesting of captures too deep + local p = m.C(1) + for i = 1, 300 do + p = m.Ct(p) + end + checkerr("too deep", p.match, p, "x") +end + + -- tests for non-pattern as arguments to pattern functions p = { ('a' * m.V(1))^-1 } * m.P'b' * { 'a' * m.V(2); m.V(1)^-1 } @@ -1186,6 +1196,9 @@ assert(not match("abbcde", " [b-z] + ")) assert(match("abb\"de", '"abb"["]"de"') == 7) assert(match("abceeef", "'ac' ? 'ab' * 'c' { 'e' * } / 'abceeef' ") == "eee") assert(match("abceeef", "'ac'? 'ab'* 'c' { 'f'+ } / 'abceeef' ") == 8) + +assert(re.match("aaand", "[a]^2") == 3) + local t = {match("abceefe", "( ( & 'e' {} ) ? . ) * ")} checkeq(t, {4, 5, 7}) local t = {match("abceefe", "((&&'e' {})? .)*")} @@ -1360,6 +1373,13 @@ checkeq(x, {tag='x', 'hi', {tag = 'b', 'hello'}, 'but', {'totheend'}}) +-- test for folding captures +c = re.compile([[ + S <- (number (%s+ number)*) ~> add + number <- %d+ -> tonumber +]], {tonumber = tonumber, add = function (a,b) return a + b end}) +assert(c:match("3 401 50") == 3 + 401 + 50) + -- tests for look-ahead captures x = {re.match("alo", "&(&{.}) !{'b'} {&(...)} &{..} {...} {!.}")} checkeq(x, {"", "alo", ""}) -- cgit v1.2.3-55-g6feb