From 8ee42c29131e1c7de48575d6d8a9b24ea6977cbd Mon Sep 17 00:00:00 2001 From: Sergio Queiroz Date: Thu, 6 Jul 2017 11:21:56 -0300 Subject: Updating lpeglabel to the codebase of LPeg 1.0.1 --- lpcap.h | 19 +++++++++++--- lpcode.c | 87 ++++++++++++++++++++++++++++++++++++++++++--------------------- lpcode.h | 6 ++--- lpprint.c | 22 ++++++++-------- lptree.c | 19 +++++++++----- lptree.h | 57 +++++++++++++++++++++++------------------ lptypes.h | 10 ++++---- lpvm.c | 38 +++++++++++++++++----------- makefile | 4 +-- test.lua | 57 ++++++++++++++++++++++++++++++++++++++++- 10 files changed, 217 insertions(+), 102 deletions(-) diff --git a/lpcap.h b/lpcap.h index d762fdc..6133df2 100644 --- a/lpcap.h +++ b/lpcap.h @@ -1,5 +1,5 @@ /* -** $Id: lpcap.h,v 1.2 2015/02/27 17:13:17 roberto Exp $ +** $Id: lpcap.h,v 1.3 2016/09/13 17:45:58 roberto Exp $ */ #if !defined(lpcap_h) @@ -11,8 +11,21 @@ /* kinds of captures */ typedef enum CapKind { - Cclose, Cposition, Cconst, Cbackref, Carg, Csimple, Ctable, Cfunction, - Cquery, Cstring, Cnum, Csubst, Cfold, Cruntime, Cgroup + Cclose, /* not used in trees */ + Cposition, + Cconst, /* ktable[key] is Lua constant */ + Cbackref, /* ktable[key] is "name" of group to get capture */ + Carg, /* 'key' is arg's number */ + Csimple, /* next node is pattern */ + Ctable, /* next node is pattern */ + Cfunction, /* ktable[key] is function; next node is pattern */ + Cquery, /* ktable[key] is table; next node is pattern */ + Cstring, /* ktable[key] is string; next node is pattern */ + Cnum, /* numbered capture; 'key' is number of value to return */ + Csubst, /* substitution capture; next node is pattern */ + Cfold, /* ktable[key] is function; next node is pattern */ + Cruntime, /* not used in trees (is uses another type for tree) */ + Cgroup /* ktable[key] is group's "name" */ } CapKind; diff --git a/lpcode.c b/lpcode.c index b2dbba2..a5cf2e2 100644 --- a/lpcode.c +++ b/lpcode.c @@ -1,5 +1,5 @@ /* -** $Id: lpcode.c,v 1.23 2015/06/12 18:36:47 roberto Exp $ +** $Id: lpcode.c,v 1.24 2016/09/15 17:46:13 roberto Exp $ ** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license) */ @@ -125,6 +125,27 @@ int tocharset (TTree *tree, Charset *cs) { } +/* +** Visit a TCall node taking care to stop recursion. If node not yet +** visited, return 'f(sib2(tree))', otherwise return 'def' (default +** value) +*/ +static int callrecursive (TTree *tree, int f (TTree *t), int def) { + int key = tree->key; + assert(tree->tag == TCall); + assert(sib2(tree)->tag == TRule); + if (key == 0) /* node already visited? */ + return def; /* return default value */ + else { /* first visit */ + int result; + tree->key = 0; /* mark call as already visited */ + result = f(sib2(tree)); /* go to called rule */ + tree->key = key; /* restore tree */ + return result; + } +} + + /* ** Check whether a pattern tree has captures */ @@ -134,14 +155,17 @@ int hascaptures (TTree *tree) { case TCapture: case TRunTime: return 1; case TCall: - tree = sib2(tree); goto tailcall; /* return hascaptures(sib2(tree)); */ + return callrecursive(tree, hascaptures, 0); + case TRule: /* do not follow siblings */ + tree = sib1(tree); goto tailcall; case TOpenCall: assert(0); default: { switch (numsiblings[tree->tag]) { case 1: /* return hascaptures(sib1(tree)); */ tree = sib1(tree); goto tailcall; case 2: - if (hascaptures(sib1(tree))) return 1; + if (hascaptures(sib1(tree))) + return 1; /* else return hascaptures(sib2(tree)); */ tree = sib2(tree); goto tailcall; default: assert(numsiblings[tree->tag] == 0); return 0; @@ -211,39 +235,42 @@ int checkaux (TTree *tree, int pred) { /* ** number of characters to match a pattern (or -1 if variable) -** ('count' avoids infinite loops for grammars) */ -int fixedlenx (TTree *tree, int count, int len) { +int fixedlen (TTree *tree) { + int len = 0; /* to accumulate in tail calls */ tailcall: switch (tree->tag) { case TChar: case TSet: case TAny: return len + 1; - case TFalse: case TTrue: case TNot: case TAnd: case TBehind: + case TFalse: case TTrue: case TNot: case TAnd: case TBehind: return len; case TRep: case TRunTime: case TOpenCall: case TThrow: /* labeled failure */ return -1; case TCapture: case TRule: case TGrammar: - /* return fixedlenx(sib1(tree), count); */ + /* return fixedlen(sib1(tree)); */ tree = sib1(tree); goto tailcall; - case TCall: - if (count++ >= MAXRULES) - return -1; /* may be a loop */ - /* else return fixedlenx(sib2(tree), count); */ - tree = sib2(tree); goto tailcall; + case TCall: { + int n1 = callrecursive(tree, fixedlen, -1); + if (n1 < 0) + return -1; + else + return len + n1; + } case TSeq: case TRecov: { /* labeled failure */ - len = fixedlenx(sib1(tree), count, len); - if (len < 0) return -1; - /* else return fixedlenx(sib2(tree), count, len); */ - tree = sib2(tree); goto tailcall; + int n1 = fixedlen(sib1(tree)); + if (n1 < 0) + return -1; + /* else return fixedlen(sib2(tree)) + len; */ + len += n1; tree = sib2(tree); goto tailcall; } - case TChoice: { - int n1, n2; - n1 = fixedlenx(sib1(tree), count, len); - if (n1 < 0) return -1; - n2 = fixedlenx(sib2(tree), count, len); - if (n1 == n2) return n1; - else return -1; + case TChoice: { + int n1 = fixedlen(sib1(tree)); + int n2 = fixedlen(sib2(tree)); + if (n1 != n2 || n1 < 0) + return -1; + else + return len + n1; } default: assert(0); return 0; }; @@ -287,7 +314,7 @@ static int getfirst (TTree *tree, const Charset *follow, Charset *firstset) { loopset(i, firstset->cs[i] = follow->cs[i]); /* follow = fullset(?) */ return 1; } - case TChoice: { + case TChoice: { Charset csaux; int e1 = getfirst(sib1(tree), follow, firstset); int e2 = getfirst(sib2(tree), follow, &csaux); @@ -378,7 +405,7 @@ static int headfail (TTree *tree) { if (!nofail(sib2(tree))) return 0; /* else return headfail(sib1(tree)); */ tree = sib1(tree); goto tailcall; - case TChoice: case TRecov: /* labeled failure */ + case TChoice: case TRecov: /* labeled failure */ if (!headfail(sib1(tree))) return 0; /* else return headfail(sib2(tree)); */ tree = sib2(tree); goto tailcall; @@ -433,8 +460,9 @@ int sizei (const Instruction *i) { return 2; case IThrow: /* labeled failure */ return 1; - case IRecov: + case IRecov: return (CHARSETINSTSIZE - 1) + 2; /* labeled failure */ + default: return 1; } } @@ -750,9 +778,10 @@ static void codeand (CompileState *compst, TTree *tree, int tt) { /* -** Captures: if pattern has fixed (and not too big) length, use -** a single IFullCapture instruction after the match; otherwise, -** enclose the pattern with OpenCapture - CloseCapture. +** Captures: if pattern has fixed (and not too big) length, and it +** has no nested captures, use a single IFullCapture instruction +** after the match; otherwise, enclose the pattern with OpenCapture - +** CloseCapture. */ static void codecapture (CompileState *compst, TTree *tree, int tt, const Charset *fl) { diff --git a/lpcode.h b/lpcode.h index 896d3c7..2a5861e 100644 --- a/lpcode.h +++ b/lpcode.h @@ -1,5 +1,5 @@ /* -** $Id: lpcode.h,v 1.7 2015/06/12 18:24:45 roberto Exp $ +** $Id: lpcode.h,v 1.8 2016/09/15 17:46:13 roberto Exp $ */ #if !defined(lpcode_h) @@ -13,7 +13,7 @@ int tocharset (TTree *tree, Charset *cs); int checkaux (TTree *tree, int pred); -int fixedlenx (TTree *tree, int count, int len); +int fixedlen (TTree *tree); int hascaptures (TTree *tree); int lp_gc (lua_State *L); Instruction *compile (lua_State *L, Pattern *p); @@ -35,8 +35,6 @@ int sizei (const Instruction *i); */ #define nullable(t) checkaux(t, PEnullable) -#define fixedlen(t) fixedlenx(t, 0, 0) - #endif diff --git a/lpprint.c b/lpprint.c index 8c488ea..ecaa7f1 100644 --- a/lpprint.c +++ b/lpprint.c @@ -1,5 +1,5 @@ /* -** $Id: lpprint.c,v 1.9 2015/06/15 16:09:57 roberto Exp $ +** $Id: lpprint.c,v 1.10 2016/09/13 16:06:03 roberto Exp $ ** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license) */ @@ -37,13 +37,13 @@ void printcharset (const byte *st) { } -static void printcapkind (int kind) { +static const char *capkind (int kind) { const char *const modes[] = { "close", "position", "constant", "backref", "argument", "simple", "table", "function", "query", "string", "num", "substitution", "fold", "runtime", "group"}; - printf("%s", modes[kind]); + return modes[kind]; } @@ -74,13 +74,12 @@ void printinst (const Instruction *op, const Instruction *p) { break; } case IFullCapture: { - printcapkind(getkind(p)); - printf(" (size = %d) (idx = %d)", getoff(p), p->i.key); + printf("%s (size = %d) (idx = %d)", + capkind(getkind(p)), getoff(p), p->i.key); break; } case IOpenCapture: { - printcapkind(getkind(p)); - printf(" (idx = %d)", p->i.key); + printf("%s (idx = %d)", capkind(getkind(p)), p->i.key); break; } case ISet: { @@ -134,8 +133,8 @@ void printpatt (Instruction *p, int n) { #if defined(LPEG_DEBUG) static void printcap (Capture *cap) { - printcapkind(cap->kind); - printf(" (idx: %d - size: %d) -> %p\n", cap->idx, cap->siz, cap->s); + printf("%s (idx: %d - size: %d) -> %p\n", + capkind(cap->kind), cap->idx, cap->siz, cap->s); } @@ -188,7 +187,8 @@ void printtree (TTree *tree, int ident) { break; } case TOpenCall: case TCall: { - printf(" key: %d\n", tree->key); + assert(sib2(tree)->tag == TRule); + printf(" key: %d (rule: %d)\n", tree->key, sib2(tree)->cap); break; } case TBehind: { @@ -197,7 +197,7 @@ void printtree (TTree *tree, int ident) { break; } case TCapture: { - printf(" cap: %d key: %d n: %d\n", tree->cap, tree->key, tree->u.n); + printf(" kind: '%s' key: %d\n", capkind(tree->cap), tree->key); printtree(sib1(tree), ident + 2); break; } diff --git a/lptree.c b/lptree.c index 6633035..fcf5ff9 100644 --- a/lptree.c +++ b/lptree.c @@ -1,5 +1,5 @@ /* -** $Id: lptree.c,v 1.21 2015/09/28 17:01:25 roberto Exp $ +** $Id: lptree.c,v 1.22 2016/09/13 18:10:22 roberto Exp $ ** Copyright 2013, Lua.org & PUC-Rio (see 'lpeg.html' for license) */ @@ -65,7 +65,7 @@ static void fixonecall (lua_State *L, int postable, TTree *g, TTree *t) { t->tag = TCall; t->u.s.ps = n - (t - g); /* position relative to node */ assert(sib2(t)->tag == TRule); - sib2(t)->key = t->key; + sib2(t)->key = t->key; /* fix rule's key */ } @@ -542,7 +542,6 @@ static TTree *newrootlab2sib (lua_State *L, int tag) { /* labeled failure end */ - static int lp_P (lua_State *L) { luaL_checkany(L, 1); getpatt(L, 1, NULL); @@ -728,6 +727,7 @@ static int lp_throw (lua_State *L) { return 1; } + /* ** labeled recovery function */ @@ -747,7 +747,6 @@ static int lp_recovery (lua_State *L) { } return 1; } - /* labeled failure end */ @@ -996,7 +995,7 @@ static void buildgrammar (lua_State *L, TTree *grammar, int frule, int n) { int rulesize; TTree *rn = gettree(L, ridx, &rulesize); nd->tag = TRule; - nd->key = 0; + nd->key = 0; /* will be fixed when rule is used */ nd->cap = i; /* rule number */ nd->u.s.ps = rulesize + 1; /* point to next rule */ memcpy(sib1(nd), rn, rulesize * sizeof(TTree)); /* copy rule */ @@ -1030,6 +1029,11 @@ static int checkloops (TTree *tree) { } +/* +** Give appropriate error message for 'verifyrule'. If a rule appears +** twice in 'passed', there is path from it back to itself without +** advancing the subject. +*/ static int verifyerror (lua_State *L, int *passed, int npassed) { int i, j; for (i = npassed - 1; i >= 0; i--) { /* search for a repetition */ @@ -1051,6 +1055,8 @@ static int verifyerror (lua_State *L, int *passed, int npassed) { ** is only relevant if the first is nullable. ** Parameter 'nb' works as an accumulator, to allow tail calls in ** choices. ('nb' true makes function returns true.) +** Parameter 'passed' is a list of already visited rules, 'npassed' +** counts the elements in 'passed'. ** Assume ktable at the top of the stack. */ static int verifyrule (lua_State *L, TTree *tree, int *passed, int npassed, @@ -1330,7 +1336,7 @@ static struct luaL_Reg pattreg[] = { {"setmaxstack", lp_setmax}, {"type", lp_type}, {"T", lp_throw}, /* labeled failure throw */ - {"Rec", lp_recovery}, /* labeled failure choice */ + {"Rec", lp_recovery}, /* labeled failure recovery */ {NULL, NULL} }; @@ -1347,7 +1353,6 @@ static struct luaL_Reg metareg[] = { {NULL, NULL} }; - int luaopen_lpeglabel (lua_State *L); /* labeled failure */ int luaopen_lpeglabel (lua_State *L) { /* labeled failure */ luaL_newmetatable(L, PATTERN_T); diff --git a/lptree.h b/lptree.h index b75f323..e4e8901 100644 --- a/lptree.h +++ b/lptree.h @@ -1,5 +1,5 @@ /* -** $Id: lptree.h,v 1.2 2013/03/24 13:51:12 roberto Exp $ +** $Id: lptree.h,v 1.3 2016/09/13 18:07:51 roberto Exp $ */ #if !defined(lptree_h) @@ -13,32 +13,39 @@ ** types of trees */ typedef enum TTag { - TChar = 0, TSet, TAny, /* standard PEG elements */ - TTrue, TFalse, - TRep, - TSeq, TChoice, - TNot, TAnd, - TCall, - TOpenCall, - TRule, /* sib1 is rule's pattern, sib2 is 'next' rule */ - TGrammar, /* sib1 is initial (and first) rule */ - TBehind, /* match behind */ - TCapture, /* regular capture */ - TRunTime, /* run-time capture */ - TThrow, TRecov /* labeled failure */ + TChar = 0, /* 'n' = char */ + TSet, /* the set is stored in next CHARSETSIZE bytes */ + TAny, + TTrue, + TFalse, + TRep, /* 'sib1'* */ + TSeq, /* 'sib1' 'sib2' */ + TChoice, /* 'sib1' / 'sib2' */ + TNot, /* !'sib1' */ + TAnd, /* &'sib1' */ + TCall, /* ktable[key] is rule's key; 'sib2' is rule being called */ + TOpenCall, /* ktable[key] is rule's key */ + TRule, /* ktable[key] is rule's key (but key == 0 for unused rules); + 'sib1' is rule's pattern; + 'sib2' is next rule; 'cap' is rule's sequential number */ + TGrammar, /* 'sib1' is initial (and first) rule */ + TBehind, /* 'sib1' is pattern, 'n' is how much to go back */ + TCapture, /* captures: 'cap' is kind of capture (enum 'CapKind'); + ktable[key] is Lua value associated with capture; + 'sib1' is capture body */ + TRunTime, /* run-time capture: 'key' is Lua function; + 'sib1' is capture body */ + TThrow, /* labeled failure: 'label' = l */ + TRecov /* labed failure: 'sib1' // 'sib2' */ + /* the set of labels is stored in next CHARSETSIZE bytes */ } TTag; -/* number of siblings for each tree */ -extern const byte numsiblings[]; - /* ** Tree trees -** The first sibling of a tree (if there is one) is immediately after -** the tree. A reference to a second sibling (ps) is its position -** relative to the position of the tree itself. A key in ktable -** uses the (unique) address of the original tree that created that -** entry. NULL means no data. +** The first child of a tree (if there is one) is immediately after +** the tree. A reference to a second child (ps) is its position +** relative to the position of the tree itself. */ typedef struct TTree { byte tag; @@ -48,7 +55,7 @@ typedef struct TTree { int n; /* occasional counter */ int label; /* labeled failure */ struct { /* labeled failure */ - int ps; /* occasional second sibling */ + int ps; /* occasional second child */ int plab; /* occasional label set */ } s; /* labeled failure */ } u; @@ -66,10 +73,10 @@ typedef struct Pattern { } Pattern; -/* number of siblings for each tree */ +/* number of children for each tree */ extern const byte numsiblings[]; -/* access to siblings */ +/* access to children */ #define sib1(t) ((t) + 1) #define sib2(t) ((t) + (t)->u.s.ps) diff --git a/lptypes.h b/lptypes.h index 81f0fdc..0fe250b 100644 --- a/lptypes.h +++ b/lptypes.h @@ -1,7 +1,7 @@ /* -** $Id: lptypes.h,v 1.14 2015/09/28 17:17:41 roberto Exp $ +** $Id: lptypes.h,v 1.16 2017/01/13 13:33:17 roberto Exp $ ** LPeg - PEG pattern matching for Lua -** Copyright 2007-2015, Lua.org & PUC-Rio (see 'lpeg.html' for license) +** Copyright 2007-2017, Lua.org & PUC-Rio (see 'lpeg.html' for license) ** written by Roberto Ierusalimschy */ @@ -19,7 +19,7 @@ #include "lua.h" -#define VERSION "1.0.0" +#define VERSION "1.0.1" #define PATTERN_T "lpeg-pattern" @@ -55,9 +55,9 @@ #endif -/* maximum number of rules in a grammar */ +/* maximum number of rules in a grammar (limited by 'unsigned char') */ #if !defined(MAXRULES) -#define MAXRULES 1000 +#define MAXRULES 250 #endif diff --git a/lpvm.c b/lpvm.c index c256083..911b4c5 100644 --- a/lpvm.c +++ b/lpvm.c @@ -1,5 +1,5 @@ /* -** $Id: lpvm.c,v 1.6 2015/09/28 17:01:25 roberto Exp $ +** $Id: lpvm.c,v 1.9 2016/06/03 20:11:18 roberto Exp $ ** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license) */ @@ -33,6 +33,7 @@ static void setlabelfail(Labelset *ls) { } /* labeled failure end */ + /* ** {====================================================== ** Virtual Machine @@ -52,14 +53,16 @@ typedef struct Stack { /* -** Double the size of the array of captures +** Make the size of the array of captures 'cap' twice as large as needed +** (which is 'captop'). ('n' is the number of new elements.) */ -static Capture *doublecap (lua_State *L, Capture *cap, int captop, int ptop) { +static Capture *doublecap (lua_State *L, Capture *cap, int captop, + int n, int ptop) { Capture *newc; if (captop >= INT_MAX/((int)sizeof(Capture) * 2)) luaL_error(L, "too many captures"); newc = (Capture *)lua_newuserdata(L, captop * 2 * sizeof(Capture)); - memcpy(newc, cap, captop * sizeof(Capture)); + memcpy(newc, cap, (captop - n) * sizeof(Capture)); lua_replace(L, caplistidx(ptop)); return newc; } @@ -120,8 +123,8 @@ static int resdyncaptures (lua_State *L, int fr, int curr, int limit) { */ static void adddyncaptures (const char *s, Capture *base, int n, int fd) { int i; - /* Cgroup capture is already there */ - assert(base[0].kind == Cgroup && base[0].siz == 0); + base[0].kind = Cgroup; /* create group capture */ + base[0].siz = 0; base[0].idx = 0; /* make it an anonymous group */ for (i = 1; i <= n; i++) { /* add runtime captures */ base[i].kind = Cruntime; @@ -148,8 +151,6 @@ static int removedyncap (lua_State *L, Capture *capture, } - - /* ** Opcode interpreter */ @@ -170,10 +171,11 @@ const char *match (lua_State *L, const char *o, const char *s, const char *e, lua_pushlightuserdata(L, stackbase); for (;;) { #if defined(DEBUG) - printinst(op, p); - printf("s: |%s| stck:%d, dyncaps:%d, caps:%d ", - s, stack - getstackbase(L, ptop), ndyncap, captop); + printf("-------------------------------------\n"); printcaplist(capture, capture + captop); + printf("s: |%s| stck:%d, dyncaps:%d, caps:%d ", + s, (int)(stack - getstackbase(L, ptop)), ndyncap, captop); + printinst(op, p); #endif assert(stackidx(ptop) + ndyncap == lua_gettop(L) && ndyncap <= captop); switch ((Opcode)p->i.code) { @@ -275,7 +277,7 @@ const char *match (lua_State *L, const char *o, const char *s, const char *e, p += 2; continue; } - case IRecov: { /* labeled failure */ + case IRecov: { /* labeled failure */ if (stack == stacklimit) stack = doublestack(L, &stacklimit, ptop); stack->p = p + getoffset(p); @@ -354,6 +356,9 @@ const char *match (lua_State *L, const char *o, const char *s, const char *e, stack++; } p = pstack->p; +#if defined(DEBUG) + printf("**FAIL**\n"); +#endif continue; } case ICloseRunTime: { @@ -363,6 +368,7 @@ const char *match (lua_State *L, const char *o, const char *s, const char *e, cs.s = o; cs.L = L; cs.ocap = capture; cs.ptop = ptop; n = runtimecap(&cs, capture + captop, s, &rem); /* call function */ captop -= n; /* remove nested captures */ + ndyncap -= rem; /* update number of dynamic captures */ fr -= rem; /* 'rem' items were popped from Lua stack */ res = resdyncaptures(L, fr, s - o, e - o); /* get result */ if (res == -1) { /* fail? */ @@ -373,10 +379,12 @@ const char *match (lua_State *L, const char *o, const char *s, const char *e, } s = o + res; /* else update current position */ n = lua_gettop(L) - fr + 1; /* number of new captures */ - ndyncap += n - rem; /* update number of dynamic captures */ + ndyncap += n; /* update number of dynamic captures */ if (n > 0) { /* any new capture? */ + if (fr + n >= SHRT_MAX) + luaL_error(L, "too many results in match-time capture"); if ((captop += n + 2) >= capsize) { - capture = doublecap(L, capture, captop, ptop); + capture = doublecap(L, capture, captop, n + 2, ptop); capsize = 2 * captop; } /* add new captures to 'capture' list */ @@ -413,7 +421,7 @@ const char *match (lua_State *L, const char *o, const char *s, const char *e, capture[captop].idx = p->i.key; capture[captop].kind = getkind(p); if (++captop >= capsize) { - capture = doublecap(L, capture, captop, ptop); + capture = doublecap(L, capture, captop, 0, ptop); capsize = 2 * captop; } p++; diff --git a/makefile b/makefile index 5d74b96..c06ab86 100644 --- a/makefile +++ b/makefile @@ -45,10 +45,10 @@ lpeglabel.dll: $(FILES) $(FILES): makefile -test: test.lua testlabel.lua testerrors.lua relabel.lua lpeglabel.so +test: test.lua testlabel.lua testrelabelparser.lua relabel.lua lpeglabel.so lua test.lua lua testlabel.lua - lua testerrors.lua + lua testrelabelparser.lua clean: rm -f $(FILES) lpeglabel.so diff --git a/test.lua b/test.lua index d5922ac..a3b86bf 100755 --- a/test.lua +++ b/test.lua @@ -1,6 +1,6 @@ #!/usr/bin/env lua --- $Id: test.lua,v 1.109 2015/09/28 17:01:25 roberto Exp $ +-- $Id: test.lua,v 1.112 2017/01/14 18:55:22 roberto Exp $ -- require"strict" -- just to be pedantic @@ -202,6 +202,14 @@ do end +-- bug: loop in 'hascaptures' +do + local p = m.C(-m.P{m.P'x' * m.V(1) + m.P'y'}) + assert(p:match("xxx") == "") +end + + + -- test for small capture boundary for i = 250,260 do assert(#m.match(m.C(i), string.rep('a', i)) == i) @@ -517,6 +525,27 @@ assert(m.match(m.Cs((#((#m.P"a")/"") * 1 + m.P(1)/".")^0), "aloal") == "a..a.") assert(m.match(m.Cs((- -m.P("a") * 1 + m.P(1)/".")^0), "aloal") == "a..a.") assert(m.match(m.Cs((-((-m.P"a")/"") * 1 + m.P(1)/".")^0), "aloal") == "a..a.") + +-- fixed length +do + -- 'and' predicate using fixed length + local p = m.C(#("a" * (m.P("bd") + "cd")) * 2) + assert(p:match("acd") == "ac") + + p = #m.P{ "a" * m.V(2), m.P"b" } * 2 + assert(p:match("abc") == 3) + + p = #(m.P"abc" * m.B"c") + assert(p:match("abc") == 1 and not p:match("ab")) + + p = m.P{ "a" * m.V(2), m.P"b"^1 } + checkerr("pattern may not have fixed length", m.B, p) + + p = "abc" * (m.P"b"^1 + m.P"a"^0) + checkerr("pattern may not have fixed length", m.B, p) +end + + p = -m.P'a' * m.Cc(1) + -m.P'b' * m.Cc(2) + -m.P'c' * m.Cc(3) assert(p:match('a') == 2 and p:match('') == 1 and p:match('b') == 1) @@ -1098,6 +1127,32 @@ do assert(c == 11) end + +-- Return a match-time capture that returns 'n' captures +local function manyCmt (n) + return m.Cmt("a", function () + local a = {}; for i = 1, n do a[i] = n - i end + return true, unpack(a) + end) +end + +-- bug in 1.0: failed match-time that used previous match-time results +do + local x + local function aux (...) x = #{...}; return false end + local res = {m.match(m.Cmt(manyCmt(20), aux) + manyCmt(10), "a")} + assert(#res == 10 and res[1] == 9 and res[10] == 0) +end + + +-- bug in 1.0: problems with math-times returning too many captures +do + local lim = 2^11 - 10 + local res = {m.match(manyCmt(lim), "a")} + assert(#res == lim and res[1] == lim - 1 and res[lim] == 0) + checkerr("too many", m.match, manyCmt(2^15), "a") +end + p = (m.P(function () return true, "a" end) * 'a' + m.P(function (s, i) return i, "aa", 20 end) * 'b' + m.P(function (s,i) if i <= #s then return i, "aaa" end end) * 1)^0 -- cgit v1.2.3-55-g6feb