aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lpcap.h19
-rw-r--r--lpcode.c87
-rw-r--r--lpcode.h6
-rw-r--r--lpprint.c22
-rw-r--r--lptree.c19
-rw-r--r--lptree.h57
-rw-r--r--lptypes.h10
-rw-r--r--lpvm.c38
-rw-r--r--makefile4
-rwxr-xr-xtest.lua57
10 files changed, 217 insertions, 102 deletions
diff --git a/lpcap.h b/lpcap.h
index d762fdc..6133df2 100644
--- a/lpcap.h
+++ b/lpcap.h
@@ -1,5 +1,5 @@
1/* 1/*
2** $Id: lpcap.h,v 1.2 2015/02/27 17:13:17 roberto Exp $ 2** $Id: lpcap.h,v 1.3 2016/09/13 17:45:58 roberto Exp $
3*/ 3*/
4 4
5#if !defined(lpcap_h) 5#if !defined(lpcap_h)
@@ -11,8 +11,21 @@
11 11
12/* kinds of captures */ 12/* kinds of captures */
13typedef enum CapKind { 13typedef enum CapKind {
14 Cclose, Cposition, Cconst, Cbackref, Carg, Csimple, Ctable, Cfunction, 14 Cclose, /* not used in trees */
15 Cquery, Cstring, Cnum, Csubst, Cfold, Cruntime, Cgroup 15 Cposition,
16 Cconst, /* ktable[key] is Lua constant */
17 Cbackref, /* ktable[key] is "name" of group to get capture */
18 Carg, /* 'key' is arg's number */
19 Csimple, /* next node is pattern */
20 Ctable, /* next node is pattern */
21 Cfunction, /* ktable[key] is function; next node is pattern */
22 Cquery, /* ktable[key] is table; next node is pattern */
23 Cstring, /* ktable[key] is string; next node is pattern */
24 Cnum, /* numbered capture; 'key' is number of value to return */
25 Csubst, /* substitution capture; next node is pattern */
26 Cfold, /* ktable[key] is function; next node is pattern */
27 Cruntime, /* not used in trees (is uses another type for tree) */
28 Cgroup /* ktable[key] is group's "name" */
16} CapKind; 29} CapKind;
17 30
18 31
diff --git a/lpcode.c b/lpcode.c
index b2dbba2..a5cf2e2 100644
--- a/lpcode.c
+++ b/lpcode.c
@@ -1,5 +1,5 @@
1/* 1/*
2** $Id: lpcode.c,v 1.23 2015/06/12 18:36:47 roberto Exp $ 2** $Id: lpcode.c,v 1.24 2016/09/15 17:46:13 roberto Exp $
3** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license) 3** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license)
4*/ 4*/
5 5
@@ -126,6 +126,27 @@ int tocharset (TTree *tree, Charset *cs) {
126 126
127 127
128/* 128/*
129** Visit a TCall node taking care to stop recursion. If node not yet
130** visited, return 'f(sib2(tree))', otherwise return 'def' (default
131** value)
132*/
133static int callrecursive (TTree *tree, int f (TTree *t), int def) {
134 int key = tree->key;
135 assert(tree->tag == TCall);
136 assert(sib2(tree)->tag == TRule);
137 if (key == 0) /* node already visited? */
138 return def; /* return default value */
139 else { /* first visit */
140 int result;
141 tree->key = 0; /* mark call as already visited */
142 result = f(sib2(tree)); /* go to called rule */
143 tree->key = key; /* restore tree */
144 return result;
145 }
146}
147
148
149/*
129** Check whether a pattern tree has captures 150** Check whether a pattern tree has captures
130*/ 151*/
131int hascaptures (TTree *tree) { 152int hascaptures (TTree *tree) {
@@ -134,14 +155,17 @@ int hascaptures (TTree *tree) {
134 case TCapture: case TRunTime: 155 case TCapture: case TRunTime:
135 return 1; 156 return 1;
136 case TCall: 157 case TCall:
137 tree = sib2(tree); goto tailcall; /* return hascaptures(sib2(tree)); */ 158 return callrecursive(tree, hascaptures, 0);
159 case TRule: /* do not follow siblings */
160 tree = sib1(tree); goto tailcall;
138 case TOpenCall: assert(0); 161 case TOpenCall: assert(0);
139 default: { 162 default: {
140 switch (numsiblings[tree->tag]) { 163 switch (numsiblings[tree->tag]) {
141 case 1: /* return hascaptures(sib1(tree)); */ 164 case 1: /* return hascaptures(sib1(tree)); */
142 tree = sib1(tree); goto tailcall; 165 tree = sib1(tree); goto tailcall;
143 case 2: 166 case 2:
144 if (hascaptures(sib1(tree))) return 1; 167 if (hascaptures(sib1(tree)))
168 return 1;
145 /* else return hascaptures(sib2(tree)); */ 169 /* else return hascaptures(sib2(tree)); */
146 tree = sib2(tree); goto tailcall; 170 tree = sib2(tree); goto tailcall;
147 default: assert(numsiblings[tree->tag] == 0); return 0; 171 default: assert(numsiblings[tree->tag] == 0); return 0;
@@ -211,39 +235,42 @@ int checkaux (TTree *tree, int pred) {
211 235
212/* 236/*
213** number of characters to match a pattern (or -1 if variable) 237** number of characters to match a pattern (or -1 if variable)
214** ('count' avoids infinite loops for grammars)
215*/ 238*/
216int fixedlenx (TTree *tree, int count, int len) { 239int fixedlen (TTree *tree) {
240 int len = 0; /* to accumulate in tail calls */
217 tailcall: 241 tailcall:
218 switch (tree->tag) { 242 switch (tree->tag) {
219 case TChar: case TSet: case TAny: 243 case TChar: case TSet: case TAny:
220 return len + 1; 244 return len + 1;
221 case TFalse: case TTrue: case TNot: case TAnd: case TBehind: 245 case TFalse: case TTrue: case TNot: case TAnd: case TBehind:
222 return len; 246 return len;
223 case TRep: case TRunTime: case TOpenCall: 247 case TRep: case TRunTime: case TOpenCall:
224 case TThrow: /* labeled failure */ 248 case TThrow: /* labeled failure */
225 return -1; 249 return -1;
226 case TCapture: case TRule: case TGrammar: 250 case TCapture: case TRule: case TGrammar:
227 /* return fixedlenx(sib1(tree), count); */ 251 /* return fixedlen(sib1(tree)); */
228 tree = sib1(tree); goto tailcall; 252 tree = sib1(tree); goto tailcall;
229 case TCall: 253 case TCall: {
230 if (count++ >= MAXRULES) 254 int n1 = callrecursive(tree, fixedlen, -1);
231 return -1; /* may be a loop */ 255 if (n1 < 0)
232 /* else return fixedlenx(sib2(tree), count); */ 256 return -1;
233 tree = sib2(tree); goto tailcall; 257 else
258 return len + n1;
259 }
234 case TSeq: case TRecov: { /* labeled failure */ 260 case TSeq: case TRecov: { /* labeled failure */
235 len = fixedlenx(sib1(tree), count, len); 261 int n1 = fixedlen(sib1(tree));
236 if (len < 0) return -1; 262 if (n1 < 0)
237 /* else return fixedlenx(sib2(tree), count, len); */ 263 return -1;
238 tree = sib2(tree); goto tailcall; 264 /* else return fixedlen(sib2(tree)) + len; */
265 len += n1; tree = sib2(tree); goto tailcall;
239 } 266 }
240 case TChoice: { 267 case TChoice: {
241 int n1, n2; 268 int n1 = fixedlen(sib1(tree));
242 n1 = fixedlenx(sib1(tree), count, len); 269 int n2 = fixedlen(sib2(tree));
243 if (n1 < 0) return -1; 270 if (n1 != n2 || n1 < 0)
244 n2 = fixedlenx(sib2(tree), count, len); 271 return -1;
245 if (n1 == n2) return n1; 272 else
246 else return -1; 273 return len + n1;
247 } 274 }
248 default: assert(0); return 0; 275 default: assert(0); return 0;
249 }; 276 };
@@ -287,7 +314,7 @@ static int getfirst (TTree *tree, const Charset *follow, Charset *firstset) {
287 loopset(i, firstset->cs[i] = follow->cs[i]); /* follow = fullset(?) */ 314 loopset(i, firstset->cs[i] = follow->cs[i]); /* follow = fullset(?) */
288 return 1; 315 return 1;
289 } 316 }
290 case TChoice: { 317 case TChoice: {
291 Charset csaux; 318 Charset csaux;
292 int e1 = getfirst(sib1(tree), follow, firstset); 319 int e1 = getfirst(sib1(tree), follow, firstset);
293 int e2 = getfirst(sib2(tree), follow, &csaux); 320 int e2 = getfirst(sib2(tree), follow, &csaux);
@@ -378,7 +405,7 @@ static int headfail (TTree *tree) {
378 if (!nofail(sib2(tree))) return 0; 405 if (!nofail(sib2(tree))) return 0;
379 /* else return headfail(sib1(tree)); */ 406 /* else return headfail(sib1(tree)); */
380 tree = sib1(tree); goto tailcall; 407 tree = sib1(tree); goto tailcall;
381 case TChoice: case TRecov: /* labeled failure */ 408 case TChoice: case TRecov: /* labeled failure */
382 if (!headfail(sib1(tree))) return 0; 409 if (!headfail(sib1(tree))) return 0;
383 /* else return headfail(sib2(tree)); */ 410 /* else return headfail(sib2(tree)); */
384 tree = sib2(tree); goto tailcall; 411 tree = sib2(tree); goto tailcall;
@@ -433,8 +460,9 @@ int sizei (const Instruction *i) {
433 return 2; 460 return 2;
434 case IThrow: /* labeled failure */ 461 case IThrow: /* labeled failure */
435 return 1; 462 return 1;
436 case IRecov: 463 case IRecov:
437 return (CHARSETINSTSIZE - 1) + 2; /* labeled failure */ 464 return (CHARSETINSTSIZE - 1) + 2; /* labeled failure */
465
438 default: return 1; 466 default: return 1;
439 } 467 }
440} 468}
@@ -750,9 +778,10 @@ static void codeand (CompileState *compst, TTree *tree, int tt) {
750 778
751 779
752/* 780/*
753** Captures: if pattern has fixed (and not too big) length, use 781** Captures: if pattern has fixed (and not too big) length, and it
754** a single IFullCapture instruction after the match; otherwise, 782** has no nested captures, use a single IFullCapture instruction
755** enclose the pattern with OpenCapture - CloseCapture. 783** after the match; otherwise, enclose the pattern with OpenCapture -
784** CloseCapture.
756*/ 785*/
757static void codecapture (CompileState *compst, TTree *tree, int tt, 786static void codecapture (CompileState *compst, TTree *tree, int tt,
758 const Charset *fl) { 787 const Charset *fl) {
diff --git a/lpcode.h b/lpcode.h
index 896d3c7..2a5861e 100644
--- a/lpcode.h
+++ b/lpcode.h
@@ -1,5 +1,5 @@
1/* 1/*
2** $Id: lpcode.h,v 1.7 2015/06/12 18:24:45 roberto Exp $ 2** $Id: lpcode.h,v 1.8 2016/09/15 17:46:13 roberto Exp $
3*/ 3*/
4 4
5#if !defined(lpcode_h) 5#if !defined(lpcode_h)
@@ -13,7 +13,7 @@
13 13
14int tocharset (TTree *tree, Charset *cs); 14int tocharset (TTree *tree, Charset *cs);
15int checkaux (TTree *tree, int pred); 15int checkaux (TTree *tree, int pred);
16int fixedlenx (TTree *tree, int count, int len); 16int fixedlen (TTree *tree);
17int hascaptures (TTree *tree); 17int hascaptures (TTree *tree);
18int lp_gc (lua_State *L); 18int lp_gc (lua_State *L);
19Instruction *compile (lua_State *L, Pattern *p); 19Instruction *compile (lua_State *L, Pattern *p);
@@ -35,8 +35,6 @@ int sizei (const Instruction *i);
35*/ 35*/
36#define nullable(t) checkaux(t, PEnullable) 36#define nullable(t) checkaux(t, PEnullable)
37 37
38#define fixedlen(t) fixedlenx(t, 0, 0)
39
40 38
41 39
42#endif 40#endif
diff --git a/lpprint.c b/lpprint.c
index 8c488ea..ecaa7f1 100644
--- a/lpprint.c
+++ b/lpprint.c
@@ -1,5 +1,5 @@
1/* 1/*
2** $Id: lpprint.c,v 1.9 2015/06/15 16:09:57 roberto Exp $ 2** $Id: lpprint.c,v 1.10 2016/09/13 16:06:03 roberto Exp $
3** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license) 3** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license)
4*/ 4*/
5 5
@@ -37,13 +37,13 @@ void printcharset (const byte *st) {
37} 37}
38 38
39 39
40static void printcapkind (int kind) { 40static const char *capkind (int kind) {
41 const char *const modes[] = { 41 const char *const modes[] = {
42 "close", "position", "constant", "backref", 42 "close", "position", "constant", "backref",
43 "argument", "simple", "table", "function", 43 "argument", "simple", "table", "function",
44 "query", "string", "num", "substitution", "fold", 44 "query", "string", "num", "substitution", "fold",
45 "runtime", "group"}; 45 "runtime", "group"};
46 printf("%s", modes[kind]); 46 return modes[kind];
47} 47}
48 48
49 49
@@ -74,13 +74,12 @@ void printinst (const Instruction *op, const Instruction *p) {
74 break; 74 break;
75 } 75 }
76 case IFullCapture: { 76 case IFullCapture: {
77 printcapkind(getkind(p)); 77 printf("%s (size = %d) (idx = %d)",
78 printf(" (size = %d) (idx = %d)", getoff(p), p->i.key); 78 capkind(getkind(p)), getoff(p), p->i.key);
79 break; 79 break;
80 } 80 }
81 case IOpenCapture: { 81 case IOpenCapture: {
82 printcapkind(getkind(p)); 82 printf("%s (idx = %d)", capkind(getkind(p)), p->i.key);
83 printf(" (idx = %d)", p->i.key);
84 break; 83 break;
85 } 84 }
86 case ISet: { 85 case ISet: {
@@ -134,8 +133,8 @@ void printpatt (Instruction *p, int n) {
134 133
135#if defined(LPEG_DEBUG) 134#if defined(LPEG_DEBUG)
136static void printcap (Capture *cap) { 135static void printcap (Capture *cap) {
137 printcapkind(cap->kind); 136 printf("%s (idx: %d - size: %d) -> %p\n",
138 printf(" (idx: %d - size: %d) -> %p\n", cap->idx, cap->siz, cap->s); 137 capkind(cap->kind), cap->idx, cap->siz, cap->s);
139} 138}
140 139
141 140
@@ -188,7 +187,8 @@ void printtree (TTree *tree, int ident) {
188 break; 187 break;
189 } 188 }
190 case TOpenCall: case TCall: { 189 case TOpenCall: case TCall: {
191 printf(" key: %d\n", tree->key); 190 assert(sib2(tree)->tag == TRule);
191 printf(" key: %d (rule: %d)\n", tree->key, sib2(tree)->cap);
192 break; 192 break;
193 } 193 }
194 case TBehind: { 194 case TBehind: {
@@ -197,7 +197,7 @@ void printtree (TTree *tree, int ident) {
197 break; 197 break;
198 } 198 }
199 case TCapture: { 199 case TCapture: {
200 printf(" cap: %d key: %d n: %d\n", tree->cap, tree->key, tree->u.n); 200 printf(" kind: '%s' key: %d\n", capkind(tree->cap), tree->key);
201 printtree(sib1(tree), ident + 2); 201 printtree(sib1(tree), ident + 2);
202 break; 202 break;
203 } 203 }
diff --git a/lptree.c b/lptree.c
index 6633035..fcf5ff9 100644
--- a/lptree.c
+++ b/lptree.c
@@ -1,5 +1,5 @@
1/* 1/*
2** $Id: lptree.c,v 1.21 2015/09/28 17:01:25 roberto Exp $ 2** $Id: lptree.c,v 1.22 2016/09/13 18:10:22 roberto Exp $
3** Copyright 2013, Lua.org & PUC-Rio (see 'lpeg.html' for license) 3** Copyright 2013, Lua.org & PUC-Rio (see 'lpeg.html' for license)
4*/ 4*/
5 5
@@ -65,7 +65,7 @@ static void fixonecall (lua_State *L, int postable, TTree *g, TTree *t) {
65 t->tag = TCall; 65 t->tag = TCall;
66 t->u.s.ps = n - (t - g); /* position relative to node */ 66 t->u.s.ps = n - (t - g); /* position relative to node */
67 assert(sib2(t)->tag == TRule); 67 assert(sib2(t)->tag == TRule);
68 sib2(t)->key = t->key; 68 sib2(t)->key = t->key; /* fix rule's key */
69} 69}
70 70
71 71
@@ -542,7 +542,6 @@ static TTree *newrootlab2sib (lua_State *L, int tag) {
542/* labeled failure end */ 542/* labeled failure end */
543 543
544 544
545
546static int lp_P (lua_State *L) { 545static int lp_P (lua_State *L) {
547 luaL_checkany(L, 1); 546 luaL_checkany(L, 1);
548 getpatt(L, 1, NULL); 547 getpatt(L, 1, NULL);
@@ -728,6 +727,7 @@ static int lp_throw (lua_State *L) {
728 return 1; 727 return 1;
729} 728}
730 729
730
731/* 731/*
732** labeled recovery function 732** labeled recovery function
733*/ 733*/
@@ -747,7 +747,6 @@ static int lp_recovery (lua_State *L) {
747 } 747 }
748 return 1; 748 return 1;
749} 749}
750
751/* labeled failure end */ 750/* labeled failure end */
752 751
753 752
@@ -996,7 +995,7 @@ static void buildgrammar (lua_State *L, TTree *grammar, int frule, int n) {
996 int rulesize; 995 int rulesize;
997 TTree *rn = gettree(L, ridx, &rulesize); 996 TTree *rn = gettree(L, ridx, &rulesize);
998 nd->tag = TRule; 997 nd->tag = TRule;
999 nd->key = 0; 998 nd->key = 0; /* will be fixed when rule is used */
1000 nd->cap = i; /* rule number */ 999 nd->cap = i; /* rule number */
1001 nd->u.s.ps = rulesize + 1; /* point to next rule */ 1000 nd->u.s.ps = rulesize + 1; /* point to next rule */
1002 memcpy(sib1(nd), rn, rulesize * sizeof(TTree)); /* copy rule */ 1001 memcpy(sib1(nd), rn, rulesize * sizeof(TTree)); /* copy rule */
@@ -1030,6 +1029,11 @@ static int checkloops (TTree *tree) {
1030} 1029}
1031 1030
1032 1031
1032/*
1033** Give appropriate error message for 'verifyrule'. If a rule appears
1034** twice in 'passed', there is path from it back to itself without
1035** advancing the subject.
1036*/
1033static int verifyerror (lua_State *L, int *passed, int npassed) { 1037static int verifyerror (lua_State *L, int *passed, int npassed) {
1034 int i, j; 1038 int i, j;
1035 for (i = npassed - 1; i >= 0; i--) { /* search for a repetition */ 1039 for (i = npassed - 1; i >= 0; i--) { /* search for a repetition */
@@ -1051,6 +1055,8 @@ static int verifyerror (lua_State *L, int *passed, int npassed) {
1051** is only relevant if the first is nullable. 1055** is only relevant if the first is nullable.
1052** Parameter 'nb' works as an accumulator, to allow tail calls in 1056** Parameter 'nb' works as an accumulator, to allow tail calls in
1053** choices. ('nb' true makes function returns true.) 1057** choices. ('nb' true makes function returns true.)
1058** Parameter 'passed' is a list of already visited rules, 'npassed'
1059** counts the elements in 'passed'.
1054** Assume ktable at the top of the stack. 1060** Assume ktable at the top of the stack.
1055*/ 1061*/
1056static int verifyrule (lua_State *L, TTree *tree, int *passed, int npassed, 1062static int verifyrule (lua_State *L, TTree *tree, int *passed, int npassed,
@@ -1330,7 +1336,7 @@ static struct luaL_Reg pattreg[] = {
1330 {"setmaxstack", lp_setmax}, 1336 {"setmaxstack", lp_setmax},
1331 {"type", lp_type}, 1337 {"type", lp_type},
1332 {"T", lp_throw}, /* labeled failure throw */ 1338 {"T", lp_throw}, /* labeled failure throw */
1333 {"Rec", lp_recovery}, /* labeled failure choice */ 1339 {"Rec", lp_recovery}, /* labeled failure recovery */
1334 {NULL, NULL} 1340 {NULL, NULL}
1335}; 1341};
1336 1342
@@ -1347,7 +1353,6 @@ static struct luaL_Reg metareg[] = {
1347 {NULL, NULL} 1353 {NULL, NULL}
1348}; 1354};
1349 1355
1350
1351int luaopen_lpeglabel (lua_State *L); /* labeled failure */ 1356int luaopen_lpeglabel (lua_State *L); /* labeled failure */
1352int luaopen_lpeglabel (lua_State *L) { /* labeled failure */ 1357int luaopen_lpeglabel (lua_State *L) { /* labeled failure */
1353 luaL_newmetatable(L, PATTERN_T); 1358 luaL_newmetatable(L, PATTERN_T);
diff --git a/lptree.h b/lptree.h
index b75f323..e4e8901 100644
--- a/lptree.h
+++ b/lptree.h
@@ -1,5 +1,5 @@
1/* 1/*
2** $Id: lptree.h,v 1.2 2013/03/24 13:51:12 roberto Exp $ 2** $Id: lptree.h,v 1.3 2016/09/13 18:07:51 roberto Exp $
3*/ 3*/
4 4
5#if !defined(lptree_h) 5#if !defined(lptree_h)
@@ -13,32 +13,39 @@
13** types of trees 13** types of trees
14*/ 14*/
15typedef enum TTag { 15typedef enum TTag {
16 TChar = 0, TSet, TAny, /* standard PEG elements */ 16 TChar = 0, /* 'n' = char */
17 TTrue, TFalse, 17 TSet, /* the set is stored in next CHARSETSIZE bytes */
18 TRep, 18 TAny,
19 TSeq, TChoice, 19 TTrue,
20 TNot, TAnd, 20 TFalse,
21 TCall, 21 TRep, /* 'sib1'* */
22 TOpenCall, 22 TSeq, /* 'sib1' 'sib2' */
23 TRule, /* sib1 is rule's pattern, sib2 is 'next' rule */ 23 TChoice, /* 'sib1' / 'sib2' */
24 TGrammar, /* sib1 is initial (and first) rule */ 24 TNot, /* !'sib1' */
25 TBehind, /* match behind */ 25 TAnd, /* &'sib1' */
26 TCapture, /* regular capture */ 26 TCall, /* ktable[key] is rule's key; 'sib2' is rule being called */
27 TRunTime, /* run-time capture */ 27 TOpenCall, /* ktable[key] is rule's key */
28 TThrow, TRecov /* labeled failure */ 28 TRule, /* ktable[key] is rule's key (but key == 0 for unused rules);
29 'sib1' is rule's pattern;
30 'sib2' is next rule; 'cap' is rule's sequential number */
31 TGrammar, /* 'sib1' is initial (and first) rule */
32 TBehind, /* 'sib1' is pattern, 'n' is how much to go back */
33 TCapture, /* captures: 'cap' is kind of capture (enum 'CapKind');
34 ktable[key] is Lua value associated with capture;
35 'sib1' is capture body */
36 TRunTime, /* run-time capture: 'key' is Lua function;
37 'sib1' is capture body */
38 TThrow, /* labeled failure: 'label' = l */
39 TRecov /* labed failure: 'sib1' // 'sib2' */
40 /* the set of labels is stored in next CHARSETSIZE bytes */
29} TTag; 41} TTag;
30 42
31/* number of siblings for each tree */
32extern const byte numsiblings[];
33
34 43
35/* 44/*
36** Tree trees 45** Tree trees
37** The first sibling of a tree (if there is one) is immediately after 46** The first child of a tree (if there is one) is immediately after
38** the tree. A reference to a second sibling (ps) is its position 47** the tree. A reference to a second child (ps) is its position
39** relative to the position of the tree itself. A key in ktable 48** relative to the position of the tree itself.
40** uses the (unique) address of the original tree that created that
41** entry. NULL means no data.
42*/ 49*/
43typedef struct TTree { 50typedef struct TTree {
44 byte tag; 51 byte tag;
@@ -48,7 +55,7 @@ typedef struct TTree {
48 int n; /* occasional counter */ 55 int n; /* occasional counter */
49 int label; /* labeled failure */ 56 int label; /* labeled failure */
50 struct { /* labeled failure */ 57 struct { /* labeled failure */
51 int ps; /* occasional second sibling */ 58 int ps; /* occasional second child */
52 int plab; /* occasional label set */ 59 int plab; /* occasional label set */
53 } s; /* labeled failure */ 60 } s; /* labeled failure */
54 } u; 61 } u;
@@ -66,10 +73,10 @@ typedef struct Pattern {
66} Pattern; 73} Pattern;
67 74
68 75
69/* number of siblings for each tree */ 76/* number of children for each tree */
70extern const byte numsiblings[]; 77extern const byte numsiblings[];
71 78
72/* access to siblings */ 79/* access to children */
73#define sib1(t) ((t) + 1) 80#define sib1(t) ((t) + 1)
74#define sib2(t) ((t) + (t)->u.s.ps) 81#define sib2(t) ((t) + (t)->u.s.ps)
75 82
diff --git a/lptypes.h b/lptypes.h
index 81f0fdc..0fe250b 100644
--- a/lptypes.h
+++ b/lptypes.h
@@ -1,7 +1,7 @@
1/* 1/*
2** $Id: lptypes.h,v 1.14 2015/09/28 17:17:41 roberto Exp $ 2** $Id: lptypes.h,v 1.16 2017/01/13 13:33:17 roberto Exp $
3** LPeg - PEG pattern matching for Lua 3** LPeg - PEG pattern matching for Lua
4** Copyright 2007-2015, Lua.org & PUC-Rio (see 'lpeg.html' for license) 4** Copyright 2007-2017, Lua.org & PUC-Rio (see 'lpeg.html' for license)
5** written by Roberto Ierusalimschy 5** written by Roberto Ierusalimschy
6*/ 6*/
7 7
@@ -19,7 +19,7 @@
19#include "lua.h" 19#include "lua.h"
20 20
21 21
22#define VERSION "1.0.0" 22#define VERSION "1.0.1"
23 23
24 24
25#define PATTERN_T "lpeg-pattern" 25#define PATTERN_T "lpeg-pattern"
@@ -55,9 +55,9 @@
55#endif 55#endif
56 56
57 57
58/* maximum number of rules in a grammar */ 58/* maximum number of rules in a grammar (limited by 'unsigned char') */
59#if !defined(MAXRULES) 59#if !defined(MAXRULES)
60#define MAXRULES 1000 60#define MAXRULES 250
61#endif 61#endif
62 62
63 63
diff --git a/lpvm.c b/lpvm.c
index c256083..911b4c5 100644
--- a/lpvm.c
+++ b/lpvm.c
@@ -1,5 +1,5 @@
1/* 1/*
2** $Id: lpvm.c,v 1.6 2015/09/28 17:01:25 roberto Exp $ 2** $Id: lpvm.c,v 1.9 2016/06/03 20:11:18 roberto Exp $
3** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license) 3** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license)
4*/ 4*/
5 5
@@ -33,6 +33,7 @@ static void setlabelfail(Labelset *ls) {
33} 33}
34/* labeled failure end */ 34/* labeled failure end */
35 35
36
36/* 37/*
37** {====================================================== 38** {======================================================
38** Virtual Machine 39** Virtual Machine
@@ -52,14 +53,16 @@ typedef struct Stack {
52 53
53 54
54/* 55/*
55** Double the size of the array of captures 56** Make the size of the array of captures 'cap' twice as large as needed
57** (which is 'captop'). ('n' is the number of new elements.)
56*/ 58*/
57static Capture *doublecap (lua_State *L, Capture *cap, int captop, int ptop) { 59static Capture *doublecap (lua_State *L, Capture *cap, int captop,
60 int n, int ptop) {
58 Capture *newc; 61 Capture *newc;
59 if (captop >= INT_MAX/((int)sizeof(Capture) * 2)) 62 if (captop >= INT_MAX/((int)sizeof(Capture) * 2))
60 luaL_error(L, "too many captures"); 63 luaL_error(L, "too many captures");
61 newc = (Capture *)lua_newuserdata(L, captop * 2 * sizeof(Capture)); 64 newc = (Capture *)lua_newuserdata(L, captop * 2 * sizeof(Capture));
62 memcpy(newc, cap, captop * sizeof(Capture)); 65 memcpy(newc, cap, (captop - n) * sizeof(Capture));
63 lua_replace(L, caplistidx(ptop)); 66 lua_replace(L, caplistidx(ptop));
64 return newc; 67 return newc;
65} 68}
@@ -120,8 +123,8 @@ static int resdyncaptures (lua_State *L, int fr, int curr, int limit) {
120*/ 123*/
121static void adddyncaptures (const char *s, Capture *base, int n, int fd) { 124static void adddyncaptures (const char *s, Capture *base, int n, int fd) {
122 int i; 125 int i;
123 /* Cgroup capture is already there */ 126 base[0].kind = Cgroup; /* create group capture */
124 assert(base[0].kind == Cgroup && base[0].siz == 0); 127 base[0].siz = 0;
125 base[0].idx = 0; /* make it an anonymous group */ 128 base[0].idx = 0; /* make it an anonymous group */
126 for (i = 1; i <= n; i++) { /* add runtime captures */ 129 for (i = 1; i <= n; i++) { /* add runtime captures */
127 base[i].kind = Cruntime; 130 base[i].kind = Cruntime;
@@ -148,8 +151,6 @@ static int removedyncap (lua_State *L, Capture *capture,
148} 151}
149 152
150 153
151
152
153/* 154/*
154** Opcode interpreter 155** Opcode interpreter
155*/ 156*/
@@ -170,10 +171,11 @@ const char *match (lua_State *L, const char *o, const char *s, const char *e,
170 lua_pushlightuserdata(L, stackbase); 171 lua_pushlightuserdata(L, stackbase);
171 for (;;) { 172 for (;;) {
172#if defined(DEBUG) 173#if defined(DEBUG)
173 printinst(op, p); 174 printf("-------------------------------------\n");
174 printf("s: |%s| stck:%d, dyncaps:%d, caps:%d ",
175 s, stack - getstackbase(L, ptop), ndyncap, captop);
176 printcaplist(capture, capture + captop); 175 printcaplist(capture, capture + captop);
176 printf("s: |%s| stck:%d, dyncaps:%d, caps:%d ",
177 s, (int)(stack - getstackbase(L, ptop)), ndyncap, captop);
178 printinst(op, p);
177#endif 179#endif
178 assert(stackidx(ptop) + ndyncap == lua_gettop(L) && ndyncap <= captop); 180 assert(stackidx(ptop) + ndyncap == lua_gettop(L) && ndyncap <= captop);
179 switch ((Opcode)p->i.code) { 181 switch ((Opcode)p->i.code) {
@@ -275,7 +277,7 @@ const char *match (lua_State *L, const char *o, const char *s, const char *e,
275 p += 2; 277 p += 2;
276 continue; 278 continue;
277 } 279 }
278 case IRecov: { /* labeled failure */ 280 case IRecov: { /* labeled failure */
279 if (stack == stacklimit) 281 if (stack == stacklimit)
280 stack = doublestack(L, &stacklimit, ptop); 282 stack = doublestack(L, &stacklimit, ptop);
281 stack->p = p + getoffset(p); 283 stack->p = p + getoffset(p);
@@ -354,6 +356,9 @@ const char *match (lua_State *L, const char *o, const char *s, const char *e,
354 stack++; 356 stack++;
355 } 357 }
356 p = pstack->p; 358 p = pstack->p;
359#if defined(DEBUG)
360 printf("**FAIL**\n");
361#endif
357 continue; 362 continue;
358 } 363 }
359 case ICloseRunTime: { 364 case ICloseRunTime: {
@@ -363,6 +368,7 @@ const char *match (lua_State *L, const char *o, const char *s, const char *e,
363 cs.s = o; cs.L = L; cs.ocap = capture; cs.ptop = ptop; 368 cs.s = o; cs.L = L; cs.ocap = capture; cs.ptop = ptop;
364 n = runtimecap(&cs, capture + captop, s, &rem); /* call function */ 369 n = runtimecap(&cs, capture + captop, s, &rem); /* call function */
365 captop -= n; /* remove nested captures */ 370 captop -= n; /* remove nested captures */
371 ndyncap -= rem; /* update number of dynamic captures */
366 fr -= rem; /* 'rem' items were popped from Lua stack */ 372 fr -= rem; /* 'rem' items were popped from Lua stack */
367 res = resdyncaptures(L, fr, s - o, e - o); /* get result */ 373 res = resdyncaptures(L, fr, s - o, e - o); /* get result */
368 if (res == -1) { /* fail? */ 374 if (res == -1) { /* fail? */
@@ -373,10 +379,12 @@ const char *match (lua_State *L, const char *o, const char *s, const char *e,
373 } 379 }
374 s = o + res; /* else update current position */ 380 s = o + res; /* else update current position */
375 n = lua_gettop(L) - fr + 1; /* number of new captures */ 381 n = lua_gettop(L) - fr + 1; /* number of new captures */
376 ndyncap += n - rem; /* update number of dynamic captures */ 382 ndyncap += n; /* update number of dynamic captures */
377 if (n > 0) { /* any new capture? */ 383 if (n > 0) { /* any new capture? */
384 if (fr + n >= SHRT_MAX)
385 luaL_error(L, "too many results in match-time capture");
378 if ((captop += n + 2) >= capsize) { 386 if ((captop += n + 2) >= capsize) {
379 capture = doublecap(L, capture, captop, ptop); 387 capture = doublecap(L, capture, captop, n + 2, ptop);
380 capsize = 2 * captop; 388 capsize = 2 * captop;
381 } 389 }
382 /* add new captures to 'capture' list */ 390 /* add new captures to 'capture' list */
@@ -413,7 +421,7 @@ const char *match (lua_State *L, const char *o, const char *s, const char *e,
413 capture[captop].idx = p->i.key; 421 capture[captop].idx = p->i.key;
414 capture[captop].kind = getkind(p); 422 capture[captop].kind = getkind(p);
415 if (++captop >= capsize) { 423 if (++captop >= capsize) {
416 capture = doublecap(L, capture, captop, ptop); 424 capture = doublecap(L, capture, captop, 0, ptop);
417 capsize = 2 * captop; 425 capsize = 2 * captop;
418 } 426 }
419 p++; 427 p++;
diff --git a/makefile b/makefile
index 5d74b96..c06ab86 100644
--- a/makefile
+++ b/makefile
@@ -45,10 +45,10 @@ lpeglabel.dll: $(FILES)
45 45
46$(FILES): makefile 46$(FILES): makefile
47 47
48test: test.lua testlabel.lua testerrors.lua relabel.lua lpeglabel.so 48test: test.lua testlabel.lua testrelabelparser.lua relabel.lua lpeglabel.so
49 lua test.lua 49 lua test.lua
50 lua testlabel.lua 50 lua testlabel.lua
51 lua testerrors.lua 51 lua testrelabelparser.lua
52 52
53clean: 53clean:
54 rm -f $(FILES) lpeglabel.so 54 rm -f $(FILES) lpeglabel.so
diff --git a/test.lua b/test.lua
index d5922ac..a3b86bf 100755
--- a/test.lua
+++ b/test.lua
@@ -1,6 +1,6 @@
1#!/usr/bin/env lua 1#!/usr/bin/env lua
2 2
3-- $Id: test.lua,v 1.109 2015/09/28 17:01:25 roberto Exp $ 3-- $Id: test.lua,v 1.112 2017/01/14 18:55:22 roberto Exp $
4 4
5-- require"strict" -- just to be pedantic 5-- require"strict" -- just to be pedantic
6 6
@@ -202,6 +202,14 @@ do
202end 202end
203 203
204 204
205-- bug: loop in 'hascaptures'
206do
207 local p = m.C(-m.P{m.P'x' * m.V(1) + m.P'y'})
208 assert(p:match("xxx") == "")
209end
210
211
212
205-- test for small capture boundary 213-- test for small capture boundary
206for i = 250,260 do 214for i = 250,260 do
207 assert(#m.match(m.C(i), string.rep('a', i)) == i) 215 assert(#m.match(m.C(i), string.rep('a', i)) == i)
@@ -517,6 +525,27 @@ assert(m.match(m.Cs((#((#m.P"a")/"") * 1 + m.P(1)/".")^0), "aloal") == "a..a.")
517assert(m.match(m.Cs((- -m.P("a") * 1 + m.P(1)/".")^0), "aloal") == "a..a.") 525assert(m.match(m.Cs((- -m.P("a") * 1 + m.P(1)/".")^0), "aloal") == "a..a.")
518assert(m.match(m.Cs((-((-m.P"a")/"") * 1 + m.P(1)/".")^0), "aloal") == "a..a.") 526assert(m.match(m.Cs((-((-m.P"a")/"") * 1 + m.P(1)/".")^0), "aloal") == "a..a.")
519 527
528
529-- fixed length
530do
531 -- 'and' predicate using fixed length
532 local p = m.C(#("a" * (m.P("bd") + "cd")) * 2)
533 assert(p:match("acd") == "ac")
534
535 p = #m.P{ "a" * m.V(2), m.P"b" } * 2
536 assert(p:match("abc") == 3)
537
538 p = #(m.P"abc" * m.B"c")
539 assert(p:match("abc") == 1 and not p:match("ab"))
540
541 p = m.P{ "a" * m.V(2), m.P"b"^1 }
542 checkerr("pattern may not have fixed length", m.B, p)
543
544 p = "abc" * (m.P"b"^1 + m.P"a"^0)
545 checkerr("pattern may not have fixed length", m.B, p)
546end
547
548
520p = -m.P'a' * m.Cc(1) + -m.P'b' * m.Cc(2) + -m.P'c' * m.Cc(3) 549p = -m.P'a' * m.Cc(1) + -m.P'b' * m.Cc(2) + -m.P'c' * m.Cc(3)
521assert(p:match('a') == 2 and p:match('') == 1 and p:match('b') == 1) 550assert(p:match('a') == 2 and p:match('') == 1 and p:match('b') == 1)
522 551
@@ -1098,6 +1127,32 @@ do
1098 assert(c == 11) 1127 assert(c == 11)
1099end 1128end
1100 1129
1130
1131-- Return a match-time capture that returns 'n' captures
1132local function manyCmt (n)
1133 return m.Cmt("a", function ()
1134 local a = {}; for i = 1, n do a[i] = n - i end
1135 return true, unpack(a)
1136 end)
1137end
1138
1139-- bug in 1.0: failed match-time that used previous match-time results
1140do
1141 local x
1142 local function aux (...) x = #{...}; return false end
1143 local res = {m.match(m.Cmt(manyCmt(20), aux) + manyCmt(10), "a")}
1144 assert(#res == 10 and res[1] == 9 and res[10] == 0)
1145end
1146
1147
1148-- bug in 1.0: problems with math-times returning too many captures
1149do
1150 local lim = 2^11 - 10
1151 local res = {m.match(manyCmt(lim), "a")}
1152 assert(#res == lim and res[1] == lim - 1 and res[lim] == 0)
1153 checkerr("too many", m.match, manyCmt(2^15), "a")
1154end
1155
1101p = (m.P(function () return true, "a" end) * 'a' 1156p = (m.P(function () return true, "a" end) * 'a'
1102 + m.P(function (s, i) return i, "aa", 20 end) * 'b' 1157 + m.P(function (s, i) return i, "aa", 20 end) * 'b'
1103 + m.P(function (s,i) if i <= #s then return i, "aaa" end end) * 1)^0 1158 + m.P(function (s,i) if i <= #s then return i, "aaa" end end) * 1)^0