aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergio Queiroz <sqmedeiros@gmail.com>2019-01-10 10:06:33 -0300
committerSergio Queiroz <sqmedeiros@gmail.com>2019-01-10 10:06:33 -0300
commit4fb816d55f47c48c34cc4478e584b1567a75863b (patch)
tree54ec6965c9d3983b193cf000a521a06cb3d73b8a
parent9be59fb8f4b176a16643e707c74051b243202296 (diff)
downloadlpeglabel-lpeg-1.0.0.tar.gz
lpeglabel-lpeg-1.0.0.tar.bz2
lpeglabel-lpeg-1.0.0.zip
Adapting lpeglabel-1.5 to the codebase of lpeg-1.0.0lpeg-1.0.0
-rw-r--r--lpcap.h19
-rw-r--r--lpcode.c84
-rw-r--r--lpcode.h6
-rw-r--r--lpprint.c24
-rw-r--r--lptree.c13
-rw-r--r--lptree.h53
-rw-r--r--lptypes.h10
-rw-r--r--lpvm.c30
-rwxr-xr-xtest.lua57
9 files changed, 91 insertions, 205 deletions
diff --git a/lpcap.h b/lpcap.h
index 6133df2..d762fdc 100644
--- a/lpcap.h
+++ b/lpcap.h
@@ -1,5 +1,5 @@
1/* 1/*
2** $Id: lpcap.h,v 1.3 2016/09/13 17:45:58 roberto Exp $ 2** $Id: lpcap.h,v 1.2 2015/02/27 17:13:17 roberto Exp $
3*/ 3*/
4 4
5#if !defined(lpcap_h) 5#if !defined(lpcap_h)
@@ -11,21 +11,8 @@
11 11
12/* kinds of captures */ 12/* kinds of captures */
13typedef enum CapKind { 13typedef enum CapKind {
14 Cclose, /* not used in trees */ 14 Cclose, Cposition, Cconst, Cbackref, Carg, Csimple, Ctable, Cfunction,
15 Cposition, 15 Cquery, Cstring, Cnum, Csubst, Cfold, Cruntime, Cgroup
16 Cconst, /* ktable[key] is Lua constant */
17 Cbackref, /* ktable[key] is "name" of group to get capture */
18 Carg, /* 'key' is arg's number */
19 Csimple, /* next node is pattern */
20 Ctable, /* next node is pattern */
21 Cfunction, /* ktable[key] is function; next node is pattern */
22 Cquery, /* ktable[key] is table; next node is pattern */
23 Cstring, /* ktable[key] is string; next node is pattern */
24 Cnum, /* numbered capture; 'key' is number of value to return */
25 Csubst, /* substitution capture; next node is pattern */
26 Cfold, /* ktable[key] is function; next node is pattern */
27 Cruntime, /* not used in trees (is uses another type for tree) */
28 Cgroup /* ktable[key] is group's "name" */
29} CapKind; 16} CapKind;
30 17
31 18
diff --git a/lpcode.c b/lpcode.c
index 6cf2933..08c0bf0 100644
--- a/lpcode.c
+++ b/lpcode.c
@@ -1,5 +1,5 @@
1/* 1/*
2** $Id: lpcode.c,v 1.24 2016/09/15 17:46:13 roberto Exp $ 2** $Id: lpcode.c,v 1.23 2015/06/12 18:36:47 roberto Exp $
3** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license) 3** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license)
4*/ 4*/
5 5
@@ -126,27 +126,6 @@ int tocharset (TTree *tree, Charset *cs) {
126 126
127 127
128/* 128/*
129** Visit a TCall node taking care to stop recursion. If node not yet
130** visited, return 'f(sib2(tree))', otherwise return 'def' (default
131** value)
132*/
133static int callrecursive (TTree *tree, int f (TTree *t), int def) {
134 int key = tree->key;
135 assert(tree->tag == TCall);
136 assert(sib2(tree)->tag == TRule);
137 if (key == 0) /* node already visited? */
138 return def; /* return default value */
139 else { /* first visit */
140 int result;
141 tree->key = 0; /* mark call as already visited */
142 result = f(sib2(tree)); /* go to called rule */
143 tree->key = key; /* restore tree */
144 return result;
145 }
146}
147
148
149/*
150** Check whether a pattern tree has captures 129** Check whether a pattern tree has captures
151*/ 130*/
152int hascaptures (TTree *tree) { 131int hascaptures (TTree *tree) {
@@ -155,17 +134,14 @@ int hascaptures (TTree *tree) {
155 case TCapture: case TRunTime: 134 case TCapture: case TRunTime:
156 return 1; 135 return 1;
157 case TCall: 136 case TCall:
158 return callrecursive(tree, hascaptures, 0); 137 tree = sib2(tree); goto tailcall; /* return hascaptures(sib2(tree)); */
159 case TRule: /* do not follow siblings */
160 tree = sib1(tree); goto tailcall;
161 case TOpenCall: assert(0); 138 case TOpenCall: assert(0);
162 default: { 139 default: {
163 switch (numsiblings[tree->tag]) { 140 switch (numsiblings[tree->tag]) {
164 case 1: /* return hascaptures(sib1(tree)); */ 141 case 1: /* return hascaptures(sib1(tree)); */
165 tree = sib1(tree); goto tailcall; 142 tree = sib1(tree); goto tailcall;
166 case 2: 143 case 2:
167 if (hascaptures(sib1(tree))) 144 if (hascaptures(sib1(tree))) return 1;
168 return 1;
169 /* else return hascaptures(sib2(tree)); */ 145 /* else return hascaptures(sib2(tree)); */
170 tree = sib2(tree); goto tailcall; 146 tree = sib2(tree); goto tailcall;
171 default: assert(numsiblings[tree->tag] == 0); return 0; 147 default: assert(numsiblings[tree->tag] == 0); return 0;
@@ -232,9 +208,9 @@ int checkaux (TTree *tree, int pred) {
232 208
233/* 209/*
234** number of characters to match a pattern (or -1 if variable) 210** number of characters to match a pattern (or -1 if variable)
211** ('count' avoids infinite loops for grammars)
235*/ 212*/
236int fixedlen (TTree *tree) { 213int fixedlenx (TTree *tree, int count, int len) {
237 int len = 0; /* to accumulate in tail calls */
238 tailcall: 214 tailcall:
239 switch (tree->tag) { 215 switch (tree->tag) {
240 case TChar: case TSet: case TAny: 216 case TChar: case TSet: case TAny:
@@ -245,29 +221,26 @@ int fixedlen (TTree *tree) {
245 case TThrow: /* labeled failure */ 221 case TThrow: /* labeled failure */
246 return -1; 222 return -1;
247 case TCapture: case TRule: case TGrammar: 223 case TCapture: case TRule: case TGrammar:
248 /* return fixedlen(sib1(tree)); */ 224 /* return fixedlenx(sib1(tree), count); */
249 tree = sib1(tree); goto tailcall; 225 tree = sib1(tree); goto tailcall;
250 case TCall: { 226 case TCall:
251 int n1 = callrecursive(tree, fixedlen, -1); 227 if (count++ >= MAXRULES)
252 if (n1 < 0) 228 return -1; /* may be a loop */
253 return -1; 229 /* else return fixedlenx(sib2(tree), count); */
254 else 230 tree = sib2(tree); goto tailcall;
255 return len + n1; 231 case TSeq: {
256 } 232 len = fixedlenx(sib1(tree), count, len);
257 case TSeq: { 233 if (len < 0) return -1;
258 int n1 = fixedlen(sib1(tree)); 234 /* else return fixedlenx(sib2(tree), count, len); */
259 if (n1 < 0) 235 tree = sib2(tree); goto tailcall;
260 return -1;
261 /* else return fixedlen(sib2(tree)) + len; */
262 len += n1; tree = sib2(tree); goto tailcall;
263 } 236 }
264 case TChoice: { 237 case TChoice: {
265 int n1 = fixedlen(sib1(tree)); 238 int n1, n2;
266 int n2 = fixedlen(sib2(tree)); 239 n1 = fixedlenx(sib1(tree), count, len);
267 if (n1 != n2 || n1 < 0) 240 if (n1 < 0) return -1;
268 return -1; 241 n2 = fixedlenx(sib2(tree), count, len);
269 else 242 if (n1 == n2) return n1;
270 return len + n1; 243 else return -1;
271 } 244 }
272 default: assert(0); return 0; 245 default: assert(0); return 0;
273 }; 246 };
@@ -311,7 +284,7 @@ static int getfirst (TTree *tree, const Charset *follow, Charset *firstset) {
311 loopset(i, firstset->cs[i] = fullset->cs[i]); 284 loopset(i, firstset->cs[i] = fullset->cs[i]);
312 return 1; 285 return 1;
313 } 286 }
314 case TChoice: { 287 case TChoice: {
315 Charset csaux; 288 Charset csaux;
316 int e1 = getfirst(sib1(tree), follow, firstset); 289 int e1 = getfirst(sib1(tree), follow, firstset);
317 int e2 = getfirst(sib2(tree), follow, &csaux); 290 int e2 = getfirst(sib2(tree), follow, &csaux);
@@ -396,7 +369,7 @@ static int headfail (TTree *tree) {
396 if (!nofail(sib2(tree))) return 0; 369 if (!nofail(sib2(tree))) return 0;
397 /* else return headfail(sib1(tree)); */ 370 /* else return headfail(sib1(tree)); */
398 tree = sib1(tree); goto tailcall; 371 tree = sib1(tree); goto tailcall;
399 case TChoice: 372 case TChoice:
400 if (!headfail(sib1(tree))) return 0; 373 if (!headfail(sib1(tree))) return 0;
401 /* else return headfail(sib2(tree)); */ 374 /* else return headfail(sib2(tree)); */
402 tree = sib2(tree); goto tailcall; 375 tree = sib2(tree); goto tailcall;
@@ -769,10 +742,9 @@ static void codeand (CompileState *compst, TTree *tree, int tt) {
769 742
770 743
771/* 744/*
772** Captures: if pattern has fixed (and not too big) length, and it 745** Captures: if pattern has fixed (and not too big) length, use
773** has no nested captures, use a single IFullCapture instruction 746** a single IFullCapture instruction after the match; otherwise,
774** after the match; otherwise, enclose the pattern with OpenCapture - 747** enclose the pattern with OpenCapture - CloseCapture.
775** CloseCapture.
776*/ 748*/
777static void codecapture (CompileState *compst, TTree *tree, int tt, 749static void codecapture (CompileState *compst, TTree *tree, int tt,
778 const Charset *fl) { 750 const Charset *fl) {
diff --git a/lpcode.h b/lpcode.h
index 2a5861e..896d3c7 100644
--- a/lpcode.h
+++ b/lpcode.h
@@ -1,5 +1,5 @@
1/* 1/*
2** $Id: lpcode.h,v 1.8 2016/09/15 17:46:13 roberto Exp $ 2** $Id: lpcode.h,v 1.7 2015/06/12 18:24:45 roberto Exp $
3*/ 3*/
4 4
5#if !defined(lpcode_h) 5#if !defined(lpcode_h)
@@ -13,7 +13,7 @@
13 13
14int tocharset (TTree *tree, Charset *cs); 14int tocharset (TTree *tree, Charset *cs);
15int checkaux (TTree *tree, int pred); 15int checkaux (TTree *tree, int pred);
16int fixedlen (TTree *tree); 16int fixedlenx (TTree *tree, int count, int len);
17int hascaptures (TTree *tree); 17int hascaptures (TTree *tree);
18int lp_gc (lua_State *L); 18int lp_gc (lua_State *L);
19Instruction *compile (lua_State *L, Pattern *p); 19Instruction *compile (lua_State *L, Pattern *p);
@@ -35,6 +35,8 @@ int sizei (const Instruction *i);
35*/ 35*/
36#define nullable(t) checkaux(t, PEnullable) 36#define nullable(t) checkaux(t, PEnullable)
37 37
38#define fixedlen(t) fixedlenx(t, 0, 0)
39
38 40
39 41
40#endif 42#endif
diff --git a/lpprint.c b/lpprint.c
index 3c6a8f6..393fc86 100644
--- a/lpprint.c
+++ b/lpprint.c
@@ -1,5 +1,5 @@
1/* 1/*
2** $Id: lpprint.c,v 1.10 2016/09/13 16:06:03 roberto Exp $ 2** $Id: lpprint.c,v 1.9 2015/06/15 16:09:57 roberto Exp $
3** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license) 3** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license)
4*/ 4*/
5 5
@@ -37,13 +37,13 @@ void printcharset (const byte *st) {
37} 37}
38 38
39 39
40static const char *capkind (int kind) { 40static void printcapkind (int kind) {
41 const char *const modes[] = { 41 const char *const modes[] = {
42 "close", "position", "constant", "backref", 42 "close", "position", "constant", "backref",
43 "argument", "simple", "table", "function", 43 "argument", "simple", "table", "function",
44 "query", "string", "num", "substitution", "fold", 44 "query", "string", "num", "substitution", "fold",
45 "runtime", "group"}; 45 "runtime", "group"};
46 return modes[kind]; 46 printf("%s", modes[kind]);
47} 47}
48 48
49 49
@@ -60,7 +60,7 @@ void printinst (const Instruction *op, const Instruction *p) {
60 "ret", "end", 60 "ret", "end",
61 "choice", "pred_choice", "jmp", "call", "open_call", /* labeled failure */ 61 "choice", "pred_choice", "jmp", "call", "open_call", /* labeled failure */
62 "commit", "partial_commit", "back_commit", "failtwice", "fail", "giveup", 62 "commit", "partial_commit", "back_commit", "failtwice", "fail", "giveup",
63 "fullcapture", "opencapture", "closecapture", "closeruntime", 63 "fullcapture", "opencapture", "closecapture", "closeruntime",
64 "throw", "throw_rec", /* labeled failure */ 64 "throw", "throw_rec", /* labeled failure */
65 }; 65 };
66 printf("%02ld: %s ", (long)(p - op), names[p->i.code]); 66 printf("%02ld: %s ", (long)(p - op), names[p->i.code]);
@@ -74,12 +74,13 @@ void printinst (const Instruction *op, const Instruction *p) {
74 break; 74 break;
75 } 75 }
76 case IFullCapture: { 76 case IFullCapture: {
77 printf("%s (size = %d) (idx = %d)", 77 printcapkind(getkind(p));
78 capkind(getkind(p)), getoff(p), p->i.key); 78 printf(" (size = %d) (idx = %d)", getoff(p), p->i.key);
79 break; 79 break;
80 } 80 }
81 case IOpenCapture: { 81 case IOpenCapture: {
82 printf("%s (idx = %d)", capkind(getkind(p)), p->i.key); 82 printcapkind(getkind(p));
83 printf(" (idx = %d)", p->i.key);
83 break; 84 break;
84 } 85 }
85 case ISet: { 86 case ISet: {
@@ -133,8 +134,8 @@ void printpatt (Instruction *p, int n) {
133 134
134#if defined(LPEG_DEBUG) 135#if defined(LPEG_DEBUG)
135static void printcap (Capture *cap) { 136static void printcap (Capture *cap) {
136 printf("%s (idx: %d - size: %d) -> %p\n", 137 printcapkind(cap->kind);
137 capkind(cap->kind), cap->idx, cap->siz, cap->s); 138 printf(" (idx: %d - size: %d) -> %p\n", cap->idx, cap->siz, cap->s);
138} 139}
139 140
140 141
@@ -187,8 +188,7 @@ void printtree (TTree *tree, int ident) {
187 break; 188 break;
188 } 189 }
189 case TOpenCall: case TCall: { 190 case TOpenCall: case TCall: {
190 assert(sib2(tree)->tag == TRule); 191 printf(" key: %d\n", tree->key);
191 printf(" key: %d (rule: %d)\n", tree->key, sib2(tree)->cap);
192 break; 192 break;
193 } 193 }
194 case TBehind: { 194 case TBehind: {
@@ -197,7 +197,7 @@ void printtree (TTree *tree, int ident) {
197 break; 197 break;
198 } 198 }
199 case TCapture: { 199 case TCapture: {
200 printf(" kind: '%s' key: %d\n", capkind(tree->cap), tree->key); 200 printf(" cap: %d key: %d n: %d\n", tree->cap, tree->key, tree->u.n);
201 printtree(sib1(tree), ident + 2); 201 printtree(sib1(tree), ident + 2);
202 break; 202 break;
203 } 203 }
diff --git a/lptree.c b/lptree.c
index 33ae79d..96d3079 100644
--- a/lptree.c
+++ b/lptree.c
@@ -1,5 +1,5 @@
1/* 1/*
2** $Id: lptree.c,v 1.22 2016/09/13 18:10:22 roberto Exp $ 2** $Id: lptree.c,v 1.21 2015/09/28 17:01:25 roberto Exp $
3** Copyright 2013, Lua.org & PUC-Rio (see 'lpeg.html' for license) 3** Copyright 2013, Lua.org & PUC-Rio (see 'lpeg.html' for license)
4*/ 4*/
5 5
@@ -66,7 +66,7 @@ static void fixonecall (lua_State *L, int postable, TTree *g, TTree *t, byte tag
66 t->tag = TCall; 66 t->tag = TCall;
67 t->u.ps = n - (t - g); /* position relative to node */ 67 t->u.ps = n - (t - g); /* position relative to node */
68 assert(sib2(t)->tag == TRule); 68 assert(sib2(t)->tag == TRule);
69 sib2(t)->key = t->key; /* fix rule's key */ 69 sib2(t)->key = t->key;
70 } else if (n != 0) { /* labeled failure */ 70 } else if (n != 0) { /* labeled failure */
71 t->u.ps = n - (t - g); /* position relative to node */ 71 t->u.ps = n - (t - g); /* position relative to node */
72 } 72 }
@@ -969,7 +969,7 @@ static void buildgrammar (lua_State *L, TTree *grammar, int frule, int n) {
969 int rulesize; 969 int rulesize;
970 TTree *rn = gettree(L, ridx, &rulesize); 970 TTree *rn = gettree(L, ridx, &rulesize);
971 nd->tag = TRule; 971 nd->tag = TRule;
972 nd->key = 0; /* will be fixed when rule is used */ 972 nd->key = 0;
973 nd->cap = i; /* rule number */ 973 nd->cap = i; /* rule number */
974 nd->u.ps = rulesize + 1; /* point to next rule */ 974 nd->u.ps = rulesize + 1; /* point to next rule */
975 memcpy(sib1(nd), rn, rulesize * sizeof(TTree)); /* copy rule */ 975 memcpy(sib1(nd), rn, rulesize * sizeof(TTree)); /* copy rule */
@@ -1003,11 +1003,6 @@ static int checkloops (TTree *tree) {
1003} 1003}
1004 1004
1005 1005
1006/*
1007** Give appropriate error message for 'verifyrule'. If a rule appears
1008** twice in 'passed', there is path from it back to itself without
1009** advancing the subject.
1010*/
1011static int verifyerror (lua_State *L, int *passed, int npassed) { 1006static int verifyerror (lua_State *L, int *passed, int npassed) {
1012 int i, j; 1007 int i, j;
1013 for (i = npassed - 1; i >= 0; i--) { /* search for a repetition */ 1008 for (i = npassed - 1; i >= 0; i--) { /* search for a repetition */
@@ -1029,8 +1024,6 @@ static int verifyerror (lua_State *L, int *passed, int npassed) {
1029** is only relevant if the first is nullable. 1024** is only relevant if the first is nullable.
1030** Parameter 'nb' works as an accumulator, to allow tail calls in 1025** Parameter 'nb' works as an accumulator, to allow tail calls in
1031** choices. ('nb' true makes function returns true.) 1026** choices. ('nb' true makes function returns true.)
1032** Parameter 'passed' is a list of already visited rules, 'npassed'
1033** counts the elements in 'passed'.
1034** Assume ktable at the top of the stack. 1027** Assume ktable at the top of the stack.
1035*/ 1028*/
1036static int verifyrule (lua_State *L, TTree *tree, int *passed, int npassed, 1029static int verifyrule (lua_State *L, TTree *tree, int *passed, int npassed,
diff --git a/lptree.h b/lptree.h
index 24a9ac7..70e9c61 100644
--- a/lptree.h
+++ b/lptree.h
@@ -1,5 +1,5 @@
1/* 1/*
2** $Id: lptree.h,v 1.3 2016/09/13 18:07:51 roberto Exp $ 2** $Id: lptree.h,v 1.2 2013/03/24 13:51:12 roberto Exp $
3*/ 3*/
4 4
5#if !defined(lptree_h) 5#if !defined(lptree_h)
@@ -13,45 +13,40 @@
13** types of trees 13** types of trees
14*/ 14*/
15typedef enum TTag { 15typedef enum TTag {
16 TChar = 0, /* 'n' = char */ 16 TChar = 0, TSet, TAny, /* standard PEG elements */
17 TSet, /* the set is stored in next CHARSETSIZE bytes */ 17 TTrue, TFalse,
18 TAny, 18 TRep,
19 TTrue, 19 TSeq, TChoice,
20 TFalse, 20 TNot, TAnd,
21 TRep, /* 'sib1'* */ 21 TCall,
22 TSeq, /* 'sib1' 'sib2' */ 22 TOpenCall,
23 TChoice, /* 'sib1' / 'sib2' */ 23 TRule, /* sib1 is rule's pattern, sib2 is 'next' rule */
24 TNot, /* !'sib1' */ 24 TGrammar, /* sib1 is initial (and first) rule */
25 TAnd, /* &'sib1' */ 25 TBehind, /* match behind */
26 TCall, /* ktable[key] is rule's key; 'sib2' is rule being called */ 26 TCapture, /* regular capture */
27 TOpenCall, /* ktable[key] is rule's key */ 27 TRunTime, /* run-time capture */
28 TRule, /* ktable[key] is rule's key (but key == 0 for unused rules);
29 'sib1' is rule's pattern;
30 'sib2' is next rule; 'cap' is rule's sequential number */
31 TGrammar, /* 'sib1' is initial (and first) rule */
32 TBehind, /* 'sib1' is pattern, 'n' is how much to go back */
33 TCapture, /* captures: 'cap' is kind of capture (enum 'CapKind');
34 ktable[key] is Lua value associated with capture;
35 'sib1' is capture body */
36 TRunTime, /* run-time capture: 'key' is Lua function;
37 'sib1' is capture body */
38 TThrow, /* labeled failure: ktable[key] is label's name */ 28 TThrow, /* labeled failure: ktable[key] is label's name */
39} TTag; 29} TTag;
40 30
31/* number of siblings for each tree */
32extern const byte numsiblings[];
33
41 34
42/* 35/*
43** Tree trees 36** Tree trees
44** The first child of a tree (if there is one) is immediately after 37** The first sibling of a tree (if there is one) is immediately after
45** the tree. A reference to a second child (ps) is its position 38** the tree. A reference to a second sibling (ps) is its position
46** relative to the position of the tree itself. 39** relative to the position of the tree itself. A key in ktable
40** uses the (unique) address of the original tree that created that
41** entry. NULL means no data.
47*/ 42*/
48typedef struct TTree { 43typedef struct TTree {
49 byte tag; 44 byte tag;
50 byte cap; /* kind of capture (if it is a capture) */ 45 byte cap; /* kind of capture (if it is a capture) */
51 unsigned short key; /* key in ktable for Lua data (0 if no key) */ 46 unsigned short key; /* key in ktable for Lua data (0 if no key) */
52 union { 47 union {
48 int ps; /* occasional second sibling */
53 int n; /* occasional counter */ 49 int n; /* occasional counter */
54 int ps; /* occasional second child */
55 } u; 50 } u;
56} TTree; 51} TTree;
57 52
@@ -67,10 +62,10 @@ typedef struct Pattern {
67} Pattern; 62} Pattern;
68 63
69 64
70/* number of children for each tree */ 65/* number of siblings for each tree */
71extern const byte numsiblings[]; 66extern const byte numsiblings[];
72 67
73/* access to children */ 68/* access to siblings */
74#define sib1(t) ((t) + 1) 69#define sib1(t) ((t) + 1)
75#define sib2(t) ((t) + (t)->u.ps) 70#define sib2(t) ((t) + (t)->u.ps)
76 71
diff --git a/lptypes.h b/lptypes.h
index 72a33f3..a0655d2 100644
--- a/lptypes.h
+++ b/lptypes.h
@@ -1,7 +1,7 @@
1/* 1/*
2** $Id: lptypes.h,v 1.16 2017/01/13 13:33:17 roberto Exp $ 2** $Id: lptypes.h,v 1.14 2015/09/28 17:17:41 roberto Exp $
3** LPeg - PEG pattern matching for Lua 3** LPeg - PEG pattern matching for Lua
4** Copyright 2007-2017, Lua.org & PUC-Rio (see 'lpeg.html' for license) 4** Copyright 2007-2015, Lua.org & PUC-Rio (see 'lpeg.html' for license)
5** written by Roberto Ierusalimschy 5** written by Roberto Ierusalimschy
6*/ 6*/
7 7
@@ -19,7 +19,7 @@
19#include "lua.h" 19#include "lua.h"
20 20
21 21
22#define VERSION "1.5.1" 22#define VERSION "1.5.0"
23 23
24 24
25#define PATTERN_T "lpeg-pattern" 25#define PATTERN_T "lpeg-pattern"
@@ -55,9 +55,9 @@
55#endif 55#endif
56 56
57 57
58/* maximum number of rules in a grammar (limited by 'unsigned char') */ 58/* maximum number of rules in a grammar */
59#if !defined(MAXRULES) 59#if !defined(MAXRULES)
60#define MAXRULES 250 60#define MAXRULES 1000
61#endif 61#endif
62 62
63 63
diff --git a/lpvm.c b/lpvm.c
index 0c70766..5989a68 100644
--- a/lpvm.c
+++ b/lpvm.c
@@ -1,5 +1,5 @@
1/* 1/*
2** $Id: lpvm.c,v 1.9 2016/06/03 20:11:18 roberto Exp $ 2** $Id: lpvm.c,v 1.6 2015/09/28 17:01:25 roberto Exp $
3** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license) 3** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license)
4*/ 4*/
5 5
@@ -47,16 +47,14 @@ typedef struct Stack {
47 47
48 48
49/* 49/*
50** Make the size of the array of captures 'cap' twice as large as needed 50** Double the size of the array of captures
51** (which is 'captop'). ('n' is the number of new elements.)
52*/ 51*/
53static Capture *doublecap (lua_State *L, Capture *cap, int captop, 52static Capture *doublecap (lua_State *L, Capture *cap, int captop, int ptop) {
54 int n, int ptop) {
55 Capture *newc; 53 Capture *newc;
56 if (captop >= INT_MAX/((int)sizeof(Capture) * 2)) 54 if (captop >= INT_MAX/((int)sizeof(Capture) * 2))
57 luaL_error(L, "too many captures"); 55 luaL_error(L, "too many captures");
58 newc = (Capture *)lua_newuserdata(L, captop * 2 * sizeof(Capture)); 56 newc = (Capture *)lua_newuserdata(L, captop * 2 * sizeof(Capture));
59 memcpy(newc, cap, (captop - n) * sizeof(Capture)); 57 memcpy(newc, cap, captop * sizeof(Capture));
60 lua_replace(L, caplistidx(ptop)); 58 lua_replace(L, caplistidx(ptop));
61 return newc; 59 return newc;
62} 60}
@@ -117,8 +115,8 @@ static int resdyncaptures (lua_State *L, int fr, int curr, int limit) {
117*/ 115*/
118static void adddyncaptures (const char *s, Capture *base, int n, int fd) { 116static void adddyncaptures (const char *s, Capture *base, int n, int fd) {
119 int i; 117 int i;
120 base[0].kind = Cgroup; /* create group capture */ 118 /* Cgroup capture is already there */
121 base[0].siz = 0; 119 assert(base[0].kind == Cgroup && base[0].siz == 0);
122 base[0].idx = 0; /* make it an anonymous group */ 120 base[0].idx = 0; /* make it an anonymous group */
123 for (i = 1; i <= n; i++) { /* add runtime captures */ 121 for (i = 1; i <= n; i++) { /* add runtime captures */
124 base[i].kind = Cruntime; 122 base[i].kind = Cruntime;
@@ -163,11 +161,10 @@ const char *match (lua_State *L, const char *o, const char *s, const char *e,
163 lua_pushlightuserdata(L, stackbase); 161 lua_pushlightuserdata(L, stackbase);
164 for (;;) { 162 for (;;) {
165#if defined(DEBUG) 163#if defined(DEBUG)
166 printf("-------------------------------------\n");
167 printcaplist(capture, capture + captop);
168 printf("s: |%s| stck:%d, dyncaps:%d, caps:%d ", 164 printf("s: |%s| stck:%d, dyncaps:%d, caps:%d ",
169 s, (int)(stack - getstackbase(L, ptop)), ndyncap, captop); 165 s, stack - getstackbase(L, ptop), ndyncap, captop);
170 printinst(op, p); 166 printinst(op, p);
167 printcaplist(capture, capture + captop);
171#endif 168#endif
172 assert(stackidx(ptop) + ndyncap == lua_gettop(L) && ndyncap <= captop); 169 assert(stackidx(ptop) + ndyncap == lua_gettop(L) && ndyncap <= captop);
173 assert(insidepred == INPRED || insidepred == OUTPRED); 170 assert(insidepred == INPRED || insidepred == OUTPRED);
@@ -365,9 +362,6 @@ const char *match (lua_State *L, const char *o, const char *s, const char *e,
365 assert((insidepred == INPRED && stack->labenv == OUTPRED) || insidepred == stack->labenv); 362 assert((insidepred == INPRED && stack->labenv == OUTPRED) || insidepred == stack->labenv);
366 insidepred = stack->labenv; /* labeled failure */ 363 insidepred = stack->labenv; /* labeled failure */
367 p = stack->p; 364 p = stack->p;
368#if defined(DEBUG)
369 printf("**FAIL**\n");
370#endif
371 continue; 365 continue;
372 } 366 }
373 case ICloseRunTime: { 367 case ICloseRunTime: {
@@ -377,7 +371,7 @@ const char *match (lua_State *L, const char *o, const char *s, const char *e,
377 cs.s = o; cs.L = L; cs.ocap = capture; cs.ptop = ptop; 371 cs.s = o; cs.L = L; cs.ocap = capture; cs.ptop = ptop;
378 n = runtimecap(&cs, capture + captop, s, &rem); /* call function */ 372 n = runtimecap(&cs, capture + captop, s, &rem); /* call function */
379 captop -= n; /* remove nested captures */ 373 captop -= n; /* remove nested captures */
380 ndyncap -= rem; /* update number of dynamic captures */ 374 ndyncap += n - rem; /* update number of dynamic captures */
381 fr -= rem; /* 'rem' items were popped from Lua stack */ 375 fr -= rem; /* 'rem' items were popped from Lua stack */
382 res = resdyncaptures(L, fr, s - o, e - o); /* get result */ 376 res = resdyncaptures(L, fr, s - o, e - o); /* get result */
383 if (res == -1) { /* fail? */ 377 if (res == -1) { /* fail? */
@@ -389,10 +383,8 @@ const char *match (lua_State *L, const char *o, const char *s, const char *e,
389 n = lua_gettop(L) - fr + 1; /* number of new captures */ 383 n = lua_gettop(L) - fr + 1; /* number of new captures */
390 ndyncap += n; /* update number of dynamic captures */ 384 ndyncap += n; /* update number of dynamic captures */
391 if (n > 0) { /* any new capture? */ 385 if (n > 0) { /* any new capture? */
392 if (fr + n >= SHRT_MAX)
393 luaL_error(L, "too many results in match-time capture");
394 if ((captop += n + 2) >= capsize) { 386 if ((captop += n + 2) >= capsize) {
395 capture = doublecap(L, capture, captop, n + 2, ptop); 387 capture = doublecap(L, capture, captop, ptop);
396 capsize = 2 * captop; 388 capsize = 2 * captop;
397 } 389 }
398 /* add new captures to 'capture' list */ 390 /* add new captures to 'capture' list */
@@ -429,7 +421,7 @@ const char *match (lua_State *L, const char *o, const char *s, const char *e,
429 capture[captop].idx = p->i.key; 421 capture[captop].idx = p->i.key;
430 capture[captop].kind = getkind(p); 422 capture[captop].kind = getkind(p);
431 if (++captop >= capsize) { 423 if (++captop >= capsize) {
432 capture = doublecap(L, capture, captop, 0, ptop); 424 capture = doublecap(L, capture, captop, ptop);
433 capsize = 2 * captop; 425 capsize = 2 * captop;
434 } 426 }
435 p++; 427 p++;
diff --git a/test.lua b/test.lua
index a3b86bf..d5922ac 100755
--- a/test.lua
+++ b/test.lua
@@ -1,6 +1,6 @@
1#!/usr/bin/env lua 1#!/usr/bin/env lua
2 2
3-- $Id: test.lua,v 1.112 2017/01/14 18:55:22 roberto Exp $ 3-- $Id: test.lua,v 1.109 2015/09/28 17:01:25 roberto Exp $
4 4
5-- require"strict" -- just to be pedantic 5-- require"strict" -- just to be pedantic
6 6
@@ -202,14 +202,6 @@ do
202end 202end
203 203
204 204
205-- bug: loop in 'hascaptures'
206do
207 local p = m.C(-m.P{m.P'x' * m.V(1) + m.P'y'})
208 assert(p:match("xxx") == "")
209end
210
211
212
213-- test for small capture boundary 205-- test for small capture boundary
214for i = 250,260 do 206for i = 250,260 do
215 assert(#m.match(m.C(i), string.rep('a', i)) == i) 207 assert(#m.match(m.C(i), string.rep('a', i)) == i)
@@ -525,27 +517,6 @@ assert(m.match(m.Cs((#((#m.P"a")/"") * 1 + m.P(1)/".")^0), "aloal") == "a..a.")
525assert(m.match(m.Cs((- -m.P("a") * 1 + m.P(1)/".")^0), "aloal") == "a..a.") 517assert(m.match(m.Cs((- -m.P("a") * 1 + m.P(1)/".")^0), "aloal") == "a..a.")
526assert(m.match(m.Cs((-((-m.P"a")/"") * 1 + m.P(1)/".")^0), "aloal") == "a..a.") 518assert(m.match(m.Cs((-((-m.P"a")/"") * 1 + m.P(1)/".")^0), "aloal") == "a..a.")
527 519
528
529-- fixed length
530do
531 -- 'and' predicate using fixed length
532 local p = m.C(#("a" * (m.P("bd") + "cd")) * 2)
533 assert(p:match("acd") == "ac")
534
535 p = #m.P{ "a" * m.V(2), m.P"b" } * 2
536 assert(p:match("abc") == 3)
537
538 p = #(m.P"abc" * m.B"c")
539 assert(p:match("abc") == 1 and not p:match("ab"))
540
541 p = m.P{ "a" * m.V(2), m.P"b"^1 }
542 checkerr("pattern may not have fixed length", m.B, p)
543
544 p = "abc" * (m.P"b"^1 + m.P"a"^0)
545 checkerr("pattern may not have fixed length", m.B, p)
546end
547
548
549p = -m.P'a' * m.Cc(1) + -m.P'b' * m.Cc(2) + -m.P'c' * m.Cc(3) 520p = -m.P'a' * m.Cc(1) + -m.P'b' * m.Cc(2) + -m.P'c' * m.Cc(3)
550assert(p:match('a') == 2 and p:match('') == 1 and p:match('b') == 1) 521assert(p:match('a') == 2 and p:match('') == 1 and p:match('b') == 1)
551 522
@@ -1127,32 +1098,6 @@ do
1127 assert(c == 11) 1098 assert(c == 11)
1128end 1099end
1129 1100
1130
1131-- Return a match-time capture that returns 'n' captures
1132local function manyCmt (n)
1133 return m.Cmt("a", function ()
1134 local a = {}; for i = 1, n do a[i] = n - i end
1135 return true, unpack(a)
1136 end)
1137end
1138
1139-- bug in 1.0: failed match-time that used previous match-time results
1140do
1141 local x
1142 local function aux (...) x = #{...}; return false end
1143 local res = {m.match(m.Cmt(manyCmt(20), aux) + manyCmt(10), "a")}
1144 assert(#res == 10 and res[1] == 9 and res[10] == 0)
1145end
1146
1147
1148-- bug in 1.0: problems with math-times returning too many captures
1149do
1150 local lim = 2^11 - 10
1151 local res = {m.match(manyCmt(lim), "a")}
1152 assert(#res == lim and res[1] == lim - 1 and res[lim] == 0)
1153 checkerr("too many", m.match, manyCmt(2^15), "a")
1154end
1155
1156p = (m.P(function () return true, "a" end) * 'a' 1101p = (m.P(function () return true, "a" end) * 'a'
1157 + m.P(function (s, i) return i, "aa", 20 end) * 'b' 1102 + m.P(function (s, i) return i, "aa", 20 end) * 'b'
1158 + m.P(function (s,i) if i <= #s then return i, "aaa" end end) * 1)^0 1103 + m.P(function (s,i) if i <= #s then return i, "aaa" end end) * 1)^0