diff options
-rw-r--r-- | lpcap.h | 19 | ||||
-rw-r--r-- | lpcode.c | 84 | ||||
-rw-r--r-- | lpcode.h | 6 | ||||
-rw-r--r-- | lpprint.c | 24 | ||||
-rw-r--r-- | lptree.c | 13 | ||||
-rw-r--r-- | lptree.h | 53 | ||||
-rw-r--r-- | lptypes.h | 10 | ||||
-rw-r--r-- | lpvm.c | 30 | ||||
-rwxr-xr-x | test.lua | 57 |
9 files changed, 91 insertions, 205 deletions
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | ** $Id: lpcap.h,v 1.3 2016/09/13 17:45:58 roberto Exp $ | 2 | ** $Id: lpcap.h,v 1.2 2015/02/27 17:13:17 roberto Exp $ |
3 | */ | 3 | */ |
4 | 4 | ||
5 | #if !defined(lpcap_h) | 5 | #if !defined(lpcap_h) |
@@ -11,21 +11,8 @@ | |||
11 | 11 | ||
12 | /* kinds of captures */ | 12 | /* kinds of captures */ |
13 | typedef enum CapKind { | 13 | typedef enum CapKind { |
14 | Cclose, /* not used in trees */ | 14 | Cclose, Cposition, Cconst, Cbackref, Carg, Csimple, Ctable, Cfunction, |
15 | Cposition, | 15 | Cquery, Cstring, Cnum, Csubst, Cfold, Cruntime, Cgroup |
16 | Cconst, /* ktable[key] is Lua constant */ | ||
17 | Cbackref, /* ktable[key] is "name" of group to get capture */ | ||
18 | Carg, /* 'key' is arg's number */ | ||
19 | Csimple, /* next node is pattern */ | ||
20 | Ctable, /* next node is pattern */ | ||
21 | Cfunction, /* ktable[key] is function; next node is pattern */ | ||
22 | Cquery, /* ktable[key] is table; next node is pattern */ | ||
23 | Cstring, /* ktable[key] is string; next node is pattern */ | ||
24 | Cnum, /* numbered capture; 'key' is number of value to return */ | ||
25 | Csubst, /* substitution capture; next node is pattern */ | ||
26 | Cfold, /* ktable[key] is function; next node is pattern */ | ||
27 | Cruntime, /* not used in trees (is uses another type for tree) */ | ||
28 | Cgroup /* ktable[key] is group's "name" */ | ||
29 | } CapKind; | 16 | } CapKind; |
30 | 17 | ||
31 | 18 | ||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | ** $Id: lpcode.c,v 1.24 2016/09/15 17:46:13 roberto Exp $ | 2 | ** $Id: lpcode.c,v 1.23 2015/06/12 18:36:47 roberto Exp $ |
3 | ** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license) | 3 | ** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license) |
4 | */ | 4 | */ |
5 | 5 | ||
@@ -126,27 +126,6 @@ int tocharset (TTree *tree, Charset *cs) { | |||
126 | 126 | ||
127 | 127 | ||
128 | /* | 128 | /* |
129 | ** Visit a TCall node taking care to stop recursion. If node not yet | ||
130 | ** visited, return 'f(sib2(tree))', otherwise return 'def' (default | ||
131 | ** value) | ||
132 | */ | ||
133 | static int callrecursive (TTree *tree, int f (TTree *t), int def) { | ||
134 | int key = tree->key; | ||
135 | assert(tree->tag == TCall); | ||
136 | assert(sib2(tree)->tag == TRule); | ||
137 | if (key == 0) /* node already visited? */ | ||
138 | return def; /* return default value */ | ||
139 | else { /* first visit */ | ||
140 | int result; | ||
141 | tree->key = 0; /* mark call as already visited */ | ||
142 | result = f(sib2(tree)); /* go to called rule */ | ||
143 | tree->key = key; /* restore tree */ | ||
144 | return result; | ||
145 | } | ||
146 | } | ||
147 | |||
148 | |||
149 | /* | ||
150 | ** Check whether a pattern tree has captures | 129 | ** Check whether a pattern tree has captures |
151 | */ | 130 | */ |
152 | int hascaptures (TTree *tree) { | 131 | int hascaptures (TTree *tree) { |
@@ -155,17 +134,14 @@ int hascaptures (TTree *tree) { | |||
155 | case TCapture: case TRunTime: | 134 | case TCapture: case TRunTime: |
156 | return 1; | 135 | return 1; |
157 | case TCall: | 136 | case TCall: |
158 | return callrecursive(tree, hascaptures, 0); | 137 | tree = sib2(tree); goto tailcall; /* return hascaptures(sib2(tree)); */ |
159 | case TRule: /* do not follow siblings */ | ||
160 | tree = sib1(tree); goto tailcall; | ||
161 | case TOpenCall: assert(0); | 138 | case TOpenCall: assert(0); |
162 | default: { | 139 | default: { |
163 | switch (numsiblings[tree->tag]) { | 140 | switch (numsiblings[tree->tag]) { |
164 | case 1: /* return hascaptures(sib1(tree)); */ | 141 | case 1: /* return hascaptures(sib1(tree)); */ |
165 | tree = sib1(tree); goto tailcall; | 142 | tree = sib1(tree); goto tailcall; |
166 | case 2: | 143 | case 2: |
167 | if (hascaptures(sib1(tree))) | 144 | if (hascaptures(sib1(tree))) return 1; |
168 | return 1; | ||
169 | /* else return hascaptures(sib2(tree)); */ | 145 | /* else return hascaptures(sib2(tree)); */ |
170 | tree = sib2(tree); goto tailcall; | 146 | tree = sib2(tree); goto tailcall; |
171 | default: assert(numsiblings[tree->tag] == 0); return 0; | 147 | default: assert(numsiblings[tree->tag] == 0); return 0; |
@@ -232,9 +208,9 @@ int checkaux (TTree *tree, int pred) { | |||
232 | 208 | ||
233 | /* | 209 | /* |
234 | ** number of characters to match a pattern (or -1 if variable) | 210 | ** number of characters to match a pattern (or -1 if variable) |
211 | ** ('count' avoids infinite loops for grammars) | ||
235 | */ | 212 | */ |
236 | int fixedlen (TTree *tree) { | 213 | int fixedlenx (TTree *tree, int count, int len) { |
237 | int len = 0; /* to accumulate in tail calls */ | ||
238 | tailcall: | 214 | tailcall: |
239 | switch (tree->tag) { | 215 | switch (tree->tag) { |
240 | case TChar: case TSet: case TAny: | 216 | case TChar: case TSet: case TAny: |
@@ -245,29 +221,26 @@ int fixedlen (TTree *tree) { | |||
245 | case TThrow: /* labeled failure */ | 221 | case TThrow: /* labeled failure */ |
246 | return -1; | 222 | return -1; |
247 | case TCapture: case TRule: case TGrammar: | 223 | case TCapture: case TRule: case TGrammar: |
248 | /* return fixedlen(sib1(tree)); */ | 224 | /* return fixedlenx(sib1(tree), count); */ |
249 | tree = sib1(tree); goto tailcall; | 225 | tree = sib1(tree); goto tailcall; |
250 | case TCall: { | 226 | case TCall: |
251 | int n1 = callrecursive(tree, fixedlen, -1); | 227 | if (count++ >= MAXRULES) |
252 | if (n1 < 0) | 228 | return -1; /* may be a loop */ |
253 | return -1; | 229 | /* else return fixedlenx(sib2(tree), count); */ |
254 | else | 230 | tree = sib2(tree); goto tailcall; |
255 | return len + n1; | 231 | case TSeq: { |
256 | } | 232 | len = fixedlenx(sib1(tree), count, len); |
257 | case TSeq: { | 233 | if (len < 0) return -1; |
258 | int n1 = fixedlen(sib1(tree)); | 234 | /* else return fixedlenx(sib2(tree), count, len); */ |
259 | if (n1 < 0) | 235 | tree = sib2(tree); goto tailcall; |
260 | return -1; | ||
261 | /* else return fixedlen(sib2(tree)) + len; */ | ||
262 | len += n1; tree = sib2(tree); goto tailcall; | ||
263 | } | 236 | } |
264 | case TChoice: { | 237 | case TChoice: { |
265 | int n1 = fixedlen(sib1(tree)); | 238 | int n1, n2; |
266 | int n2 = fixedlen(sib2(tree)); | 239 | n1 = fixedlenx(sib1(tree), count, len); |
267 | if (n1 != n2 || n1 < 0) | 240 | if (n1 < 0) return -1; |
268 | return -1; | 241 | n2 = fixedlenx(sib2(tree), count, len); |
269 | else | 242 | if (n1 == n2) return n1; |
270 | return len + n1; | 243 | else return -1; |
271 | } | 244 | } |
272 | default: assert(0); return 0; | 245 | default: assert(0); return 0; |
273 | }; | 246 | }; |
@@ -311,7 +284,7 @@ static int getfirst (TTree *tree, const Charset *follow, Charset *firstset) { | |||
311 | loopset(i, firstset->cs[i] = fullset->cs[i]); | 284 | loopset(i, firstset->cs[i] = fullset->cs[i]); |
312 | return 1; | 285 | return 1; |
313 | } | 286 | } |
314 | case TChoice: { | 287 | case TChoice: { |
315 | Charset csaux; | 288 | Charset csaux; |
316 | int e1 = getfirst(sib1(tree), follow, firstset); | 289 | int e1 = getfirst(sib1(tree), follow, firstset); |
317 | int e2 = getfirst(sib2(tree), follow, &csaux); | 290 | int e2 = getfirst(sib2(tree), follow, &csaux); |
@@ -396,7 +369,7 @@ static int headfail (TTree *tree) { | |||
396 | if (!nofail(sib2(tree))) return 0; | 369 | if (!nofail(sib2(tree))) return 0; |
397 | /* else return headfail(sib1(tree)); */ | 370 | /* else return headfail(sib1(tree)); */ |
398 | tree = sib1(tree); goto tailcall; | 371 | tree = sib1(tree); goto tailcall; |
399 | case TChoice: | 372 | case TChoice: |
400 | if (!headfail(sib1(tree))) return 0; | 373 | if (!headfail(sib1(tree))) return 0; |
401 | /* else return headfail(sib2(tree)); */ | 374 | /* else return headfail(sib2(tree)); */ |
402 | tree = sib2(tree); goto tailcall; | 375 | tree = sib2(tree); goto tailcall; |
@@ -769,10 +742,9 @@ static void codeand (CompileState *compst, TTree *tree, int tt) { | |||
769 | 742 | ||
770 | 743 | ||
771 | /* | 744 | /* |
772 | ** Captures: if pattern has fixed (and not too big) length, and it | 745 | ** Captures: if pattern has fixed (and not too big) length, use |
773 | ** has no nested captures, use a single IFullCapture instruction | 746 | ** a single IFullCapture instruction after the match; otherwise, |
774 | ** after the match; otherwise, enclose the pattern with OpenCapture - | 747 | ** enclose the pattern with OpenCapture - CloseCapture. |
775 | ** CloseCapture. | ||
776 | */ | 748 | */ |
777 | static void codecapture (CompileState *compst, TTree *tree, int tt, | 749 | static void codecapture (CompileState *compst, TTree *tree, int tt, |
778 | const Charset *fl) { | 750 | const Charset *fl) { |
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | ** $Id: lpcode.h,v 1.8 2016/09/15 17:46:13 roberto Exp $ | 2 | ** $Id: lpcode.h,v 1.7 2015/06/12 18:24:45 roberto Exp $ |
3 | */ | 3 | */ |
4 | 4 | ||
5 | #if !defined(lpcode_h) | 5 | #if !defined(lpcode_h) |
@@ -13,7 +13,7 @@ | |||
13 | 13 | ||
14 | int tocharset (TTree *tree, Charset *cs); | 14 | int tocharset (TTree *tree, Charset *cs); |
15 | int checkaux (TTree *tree, int pred); | 15 | int checkaux (TTree *tree, int pred); |
16 | int fixedlen (TTree *tree); | 16 | int fixedlenx (TTree *tree, int count, int len); |
17 | int hascaptures (TTree *tree); | 17 | int hascaptures (TTree *tree); |
18 | int lp_gc (lua_State *L); | 18 | int lp_gc (lua_State *L); |
19 | Instruction *compile (lua_State *L, Pattern *p); | 19 | Instruction *compile (lua_State *L, Pattern *p); |
@@ -35,6 +35,8 @@ int sizei (const Instruction *i); | |||
35 | */ | 35 | */ |
36 | #define nullable(t) checkaux(t, PEnullable) | 36 | #define nullable(t) checkaux(t, PEnullable) |
37 | 37 | ||
38 | #define fixedlen(t) fixedlenx(t, 0, 0) | ||
39 | |||
38 | 40 | ||
39 | 41 | ||
40 | #endif | 42 | #endif |
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | ** $Id: lpprint.c,v 1.10 2016/09/13 16:06:03 roberto Exp $ | 2 | ** $Id: lpprint.c,v 1.9 2015/06/15 16:09:57 roberto Exp $ |
3 | ** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license) | 3 | ** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license) |
4 | */ | 4 | */ |
5 | 5 | ||
@@ -37,13 +37,13 @@ void printcharset (const byte *st) { | |||
37 | } | 37 | } |
38 | 38 | ||
39 | 39 | ||
40 | static const char *capkind (int kind) { | 40 | static void printcapkind (int kind) { |
41 | const char *const modes[] = { | 41 | const char *const modes[] = { |
42 | "close", "position", "constant", "backref", | 42 | "close", "position", "constant", "backref", |
43 | "argument", "simple", "table", "function", | 43 | "argument", "simple", "table", "function", |
44 | "query", "string", "num", "substitution", "fold", | 44 | "query", "string", "num", "substitution", "fold", |
45 | "runtime", "group"}; | 45 | "runtime", "group"}; |
46 | return modes[kind]; | 46 | printf("%s", modes[kind]); |
47 | } | 47 | } |
48 | 48 | ||
49 | 49 | ||
@@ -60,7 +60,7 @@ void printinst (const Instruction *op, const Instruction *p) { | |||
60 | "ret", "end", | 60 | "ret", "end", |
61 | "choice", "pred_choice", "jmp", "call", "open_call", /* labeled failure */ | 61 | "choice", "pred_choice", "jmp", "call", "open_call", /* labeled failure */ |
62 | "commit", "partial_commit", "back_commit", "failtwice", "fail", "giveup", | 62 | "commit", "partial_commit", "back_commit", "failtwice", "fail", "giveup", |
63 | "fullcapture", "opencapture", "closecapture", "closeruntime", | 63 | "fullcapture", "opencapture", "closecapture", "closeruntime", |
64 | "throw", "throw_rec", /* labeled failure */ | 64 | "throw", "throw_rec", /* labeled failure */ |
65 | }; | 65 | }; |
66 | printf("%02ld: %s ", (long)(p - op), names[p->i.code]); | 66 | printf("%02ld: %s ", (long)(p - op), names[p->i.code]); |
@@ -74,12 +74,13 @@ void printinst (const Instruction *op, const Instruction *p) { | |||
74 | break; | 74 | break; |
75 | } | 75 | } |
76 | case IFullCapture: { | 76 | case IFullCapture: { |
77 | printf("%s (size = %d) (idx = %d)", | 77 | printcapkind(getkind(p)); |
78 | capkind(getkind(p)), getoff(p), p->i.key); | 78 | printf(" (size = %d) (idx = %d)", getoff(p), p->i.key); |
79 | break; | 79 | break; |
80 | } | 80 | } |
81 | case IOpenCapture: { | 81 | case IOpenCapture: { |
82 | printf("%s (idx = %d)", capkind(getkind(p)), p->i.key); | 82 | printcapkind(getkind(p)); |
83 | printf(" (idx = %d)", p->i.key); | ||
83 | break; | 84 | break; |
84 | } | 85 | } |
85 | case ISet: { | 86 | case ISet: { |
@@ -133,8 +134,8 @@ void printpatt (Instruction *p, int n) { | |||
133 | 134 | ||
134 | #if defined(LPEG_DEBUG) | 135 | #if defined(LPEG_DEBUG) |
135 | static void printcap (Capture *cap) { | 136 | static void printcap (Capture *cap) { |
136 | printf("%s (idx: %d - size: %d) -> %p\n", | 137 | printcapkind(cap->kind); |
137 | capkind(cap->kind), cap->idx, cap->siz, cap->s); | 138 | printf(" (idx: %d - size: %d) -> %p\n", cap->idx, cap->siz, cap->s); |
138 | } | 139 | } |
139 | 140 | ||
140 | 141 | ||
@@ -187,8 +188,7 @@ void printtree (TTree *tree, int ident) { | |||
187 | break; | 188 | break; |
188 | } | 189 | } |
189 | case TOpenCall: case TCall: { | 190 | case TOpenCall: case TCall: { |
190 | assert(sib2(tree)->tag == TRule); | 191 | printf(" key: %d\n", tree->key); |
191 | printf(" key: %d (rule: %d)\n", tree->key, sib2(tree)->cap); | ||
192 | break; | 192 | break; |
193 | } | 193 | } |
194 | case TBehind: { | 194 | case TBehind: { |
@@ -197,7 +197,7 @@ void printtree (TTree *tree, int ident) { | |||
197 | break; | 197 | break; |
198 | } | 198 | } |
199 | case TCapture: { | 199 | case TCapture: { |
200 | printf(" kind: '%s' key: %d\n", capkind(tree->cap), tree->key); | 200 | printf(" cap: %d key: %d n: %d\n", tree->cap, tree->key, tree->u.n); |
201 | printtree(sib1(tree), ident + 2); | 201 | printtree(sib1(tree), ident + 2); |
202 | break; | 202 | break; |
203 | } | 203 | } |
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | ** $Id: lptree.c,v 1.22 2016/09/13 18:10:22 roberto Exp $ | 2 | ** $Id: lptree.c,v 1.21 2015/09/28 17:01:25 roberto Exp $ |
3 | ** Copyright 2013, Lua.org & PUC-Rio (see 'lpeg.html' for license) | 3 | ** Copyright 2013, Lua.org & PUC-Rio (see 'lpeg.html' for license) |
4 | */ | 4 | */ |
5 | 5 | ||
@@ -66,7 +66,7 @@ static void fixonecall (lua_State *L, int postable, TTree *g, TTree *t, byte tag | |||
66 | t->tag = TCall; | 66 | t->tag = TCall; |
67 | t->u.ps = n - (t - g); /* position relative to node */ | 67 | t->u.ps = n - (t - g); /* position relative to node */ |
68 | assert(sib2(t)->tag == TRule); | 68 | assert(sib2(t)->tag == TRule); |
69 | sib2(t)->key = t->key; /* fix rule's key */ | 69 | sib2(t)->key = t->key; |
70 | } else if (n != 0) { /* labeled failure */ | 70 | } else if (n != 0) { /* labeled failure */ |
71 | t->u.ps = n - (t - g); /* position relative to node */ | 71 | t->u.ps = n - (t - g); /* position relative to node */ |
72 | } | 72 | } |
@@ -969,7 +969,7 @@ static void buildgrammar (lua_State *L, TTree *grammar, int frule, int n) { | |||
969 | int rulesize; | 969 | int rulesize; |
970 | TTree *rn = gettree(L, ridx, &rulesize); | 970 | TTree *rn = gettree(L, ridx, &rulesize); |
971 | nd->tag = TRule; | 971 | nd->tag = TRule; |
972 | nd->key = 0; /* will be fixed when rule is used */ | 972 | nd->key = 0; |
973 | nd->cap = i; /* rule number */ | 973 | nd->cap = i; /* rule number */ |
974 | nd->u.ps = rulesize + 1; /* point to next rule */ | 974 | nd->u.ps = rulesize + 1; /* point to next rule */ |
975 | memcpy(sib1(nd), rn, rulesize * sizeof(TTree)); /* copy rule */ | 975 | memcpy(sib1(nd), rn, rulesize * sizeof(TTree)); /* copy rule */ |
@@ -1003,11 +1003,6 @@ static int checkloops (TTree *tree) { | |||
1003 | } | 1003 | } |
1004 | 1004 | ||
1005 | 1005 | ||
1006 | /* | ||
1007 | ** Give appropriate error message for 'verifyrule'. If a rule appears | ||
1008 | ** twice in 'passed', there is path from it back to itself without | ||
1009 | ** advancing the subject. | ||
1010 | */ | ||
1011 | static int verifyerror (lua_State *L, int *passed, int npassed) { | 1006 | static int verifyerror (lua_State *L, int *passed, int npassed) { |
1012 | int i, j; | 1007 | int i, j; |
1013 | for (i = npassed - 1; i >= 0; i--) { /* search for a repetition */ | 1008 | for (i = npassed - 1; i >= 0; i--) { /* search for a repetition */ |
@@ -1029,8 +1024,6 @@ static int verifyerror (lua_State *L, int *passed, int npassed) { | |||
1029 | ** is only relevant if the first is nullable. | 1024 | ** is only relevant if the first is nullable. |
1030 | ** Parameter 'nb' works as an accumulator, to allow tail calls in | 1025 | ** Parameter 'nb' works as an accumulator, to allow tail calls in |
1031 | ** choices. ('nb' true makes function returns true.) | 1026 | ** choices. ('nb' true makes function returns true.) |
1032 | ** Parameter 'passed' is a list of already visited rules, 'npassed' | ||
1033 | ** counts the elements in 'passed'. | ||
1034 | ** Assume ktable at the top of the stack. | 1027 | ** Assume ktable at the top of the stack. |
1035 | */ | 1028 | */ |
1036 | static int verifyrule (lua_State *L, TTree *tree, int *passed, int npassed, | 1029 | static int verifyrule (lua_State *L, TTree *tree, int *passed, int npassed, |
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | ** $Id: lptree.h,v 1.3 2016/09/13 18:07:51 roberto Exp $ | 2 | ** $Id: lptree.h,v 1.2 2013/03/24 13:51:12 roberto Exp $ |
3 | */ | 3 | */ |
4 | 4 | ||
5 | #if !defined(lptree_h) | 5 | #if !defined(lptree_h) |
@@ -13,45 +13,40 @@ | |||
13 | ** types of trees | 13 | ** types of trees |
14 | */ | 14 | */ |
15 | typedef enum TTag { | 15 | typedef enum TTag { |
16 | TChar = 0, /* 'n' = char */ | 16 | TChar = 0, TSet, TAny, /* standard PEG elements */ |
17 | TSet, /* the set is stored in next CHARSETSIZE bytes */ | 17 | TTrue, TFalse, |
18 | TAny, | 18 | TRep, |
19 | TTrue, | 19 | TSeq, TChoice, |
20 | TFalse, | 20 | TNot, TAnd, |
21 | TRep, /* 'sib1'* */ | 21 | TCall, |
22 | TSeq, /* 'sib1' 'sib2' */ | 22 | TOpenCall, |
23 | TChoice, /* 'sib1' / 'sib2' */ | 23 | TRule, /* sib1 is rule's pattern, sib2 is 'next' rule */ |
24 | TNot, /* !'sib1' */ | 24 | TGrammar, /* sib1 is initial (and first) rule */ |
25 | TAnd, /* &'sib1' */ | 25 | TBehind, /* match behind */ |
26 | TCall, /* ktable[key] is rule's key; 'sib2' is rule being called */ | 26 | TCapture, /* regular capture */ |
27 | TOpenCall, /* ktable[key] is rule's key */ | 27 | TRunTime, /* run-time capture */ |
28 | TRule, /* ktable[key] is rule's key (but key == 0 for unused rules); | ||
29 | 'sib1' is rule's pattern; | ||
30 | 'sib2' is next rule; 'cap' is rule's sequential number */ | ||
31 | TGrammar, /* 'sib1' is initial (and first) rule */ | ||
32 | TBehind, /* 'sib1' is pattern, 'n' is how much to go back */ | ||
33 | TCapture, /* captures: 'cap' is kind of capture (enum 'CapKind'); | ||
34 | ktable[key] is Lua value associated with capture; | ||
35 | 'sib1' is capture body */ | ||
36 | TRunTime, /* run-time capture: 'key' is Lua function; | ||
37 | 'sib1' is capture body */ | ||
38 | TThrow, /* labeled failure: ktable[key] is label's name */ | 28 | TThrow, /* labeled failure: ktable[key] is label's name */ |
39 | } TTag; | 29 | } TTag; |
40 | 30 | ||
31 | /* number of siblings for each tree */ | ||
32 | extern const byte numsiblings[]; | ||
33 | |||
41 | 34 | ||
42 | /* | 35 | /* |
43 | ** Tree trees | 36 | ** Tree trees |
44 | ** The first child of a tree (if there is one) is immediately after | 37 | ** The first sibling of a tree (if there is one) is immediately after |
45 | ** the tree. A reference to a second child (ps) is its position | 38 | ** the tree. A reference to a second sibling (ps) is its position |
46 | ** relative to the position of the tree itself. | 39 | ** relative to the position of the tree itself. A key in ktable |
40 | ** uses the (unique) address of the original tree that created that | ||
41 | ** entry. NULL means no data. | ||
47 | */ | 42 | */ |
48 | typedef struct TTree { | 43 | typedef struct TTree { |
49 | byte tag; | 44 | byte tag; |
50 | byte cap; /* kind of capture (if it is a capture) */ | 45 | byte cap; /* kind of capture (if it is a capture) */ |
51 | unsigned short key; /* key in ktable for Lua data (0 if no key) */ | 46 | unsigned short key; /* key in ktable for Lua data (0 if no key) */ |
52 | union { | 47 | union { |
48 | int ps; /* occasional second sibling */ | ||
53 | int n; /* occasional counter */ | 49 | int n; /* occasional counter */ |
54 | int ps; /* occasional second child */ | ||
55 | } u; | 50 | } u; |
56 | } TTree; | 51 | } TTree; |
57 | 52 | ||
@@ -67,10 +62,10 @@ typedef struct Pattern { | |||
67 | } Pattern; | 62 | } Pattern; |
68 | 63 | ||
69 | 64 | ||
70 | /* number of children for each tree */ | 65 | /* number of siblings for each tree */ |
71 | extern const byte numsiblings[]; | 66 | extern const byte numsiblings[]; |
72 | 67 | ||
73 | /* access to children */ | 68 | /* access to siblings */ |
74 | #define sib1(t) ((t) + 1) | 69 | #define sib1(t) ((t) + 1) |
75 | #define sib2(t) ((t) + (t)->u.ps) | 70 | #define sib2(t) ((t) + (t)->u.ps) |
76 | 71 | ||
@@ -1,7 +1,7 @@ | |||
1 | /* | 1 | /* |
2 | ** $Id: lptypes.h,v 1.16 2017/01/13 13:33:17 roberto Exp $ | 2 | ** $Id: lptypes.h,v 1.14 2015/09/28 17:17:41 roberto Exp $ |
3 | ** LPeg - PEG pattern matching for Lua | 3 | ** LPeg - PEG pattern matching for Lua |
4 | ** Copyright 2007-2017, Lua.org & PUC-Rio (see 'lpeg.html' for license) | 4 | ** Copyright 2007-2015, Lua.org & PUC-Rio (see 'lpeg.html' for license) |
5 | ** written by Roberto Ierusalimschy | 5 | ** written by Roberto Ierusalimschy |
6 | */ | 6 | */ |
7 | 7 | ||
@@ -19,7 +19,7 @@ | |||
19 | #include "lua.h" | 19 | #include "lua.h" |
20 | 20 | ||
21 | 21 | ||
22 | #define VERSION "1.5.1" | 22 | #define VERSION "1.5.0" |
23 | 23 | ||
24 | 24 | ||
25 | #define PATTERN_T "lpeg-pattern" | 25 | #define PATTERN_T "lpeg-pattern" |
@@ -55,9 +55,9 @@ | |||
55 | #endif | 55 | #endif |
56 | 56 | ||
57 | 57 | ||
58 | /* maximum number of rules in a grammar (limited by 'unsigned char') */ | 58 | /* maximum number of rules in a grammar */ |
59 | #if !defined(MAXRULES) | 59 | #if !defined(MAXRULES) |
60 | #define MAXRULES 250 | 60 | #define MAXRULES 1000 |
61 | #endif | 61 | #endif |
62 | 62 | ||
63 | 63 | ||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | ** $Id: lpvm.c,v 1.9 2016/06/03 20:11:18 roberto Exp $ | 2 | ** $Id: lpvm.c,v 1.6 2015/09/28 17:01:25 roberto Exp $ |
3 | ** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license) | 3 | ** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license) |
4 | */ | 4 | */ |
5 | 5 | ||
@@ -47,16 +47,14 @@ typedef struct Stack { | |||
47 | 47 | ||
48 | 48 | ||
49 | /* | 49 | /* |
50 | ** Make the size of the array of captures 'cap' twice as large as needed | 50 | ** Double the size of the array of captures |
51 | ** (which is 'captop'). ('n' is the number of new elements.) | ||
52 | */ | 51 | */ |
53 | static Capture *doublecap (lua_State *L, Capture *cap, int captop, | 52 | static Capture *doublecap (lua_State *L, Capture *cap, int captop, int ptop) { |
54 | int n, int ptop) { | ||
55 | Capture *newc; | 53 | Capture *newc; |
56 | if (captop >= INT_MAX/((int)sizeof(Capture) * 2)) | 54 | if (captop >= INT_MAX/((int)sizeof(Capture) * 2)) |
57 | luaL_error(L, "too many captures"); | 55 | luaL_error(L, "too many captures"); |
58 | newc = (Capture *)lua_newuserdata(L, captop * 2 * sizeof(Capture)); | 56 | newc = (Capture *)lua_newuserdata(L, captop * 2 * sizeof(Capture)); |
59 | memcpy(newc, cap, (captop - n) * sizeof(Capture)); | 57 | memcpy(newc, cap, captop * sizeof(Capture)); |
60 | lua_replace(L, caplistidx(ptop)); | 58 | lua_replace(L, caplistidx(ptop)); |
61 | return newc; | 59 | return newc; |
62 | } | 60 | } |
@@ -117,8 +115,8 @@ static int resdyncaptures (lua_State *L, int fr, int curr, int limit) { | |||
117 | */ | 115 | */ |
118 | static void adddyncaptures (const char *s, Capture *base, int n, int fd) { | 116 | static void adddyncaptures (const char *s, Capture *base, int n, int fd) { |
119 | int i; | 117 | int i; |
120 | base[0].kind = Cgroup; /* create group capture */ | 118 | /* Cgroup capture is already there */ |
121 | base[0].siz = 0; | 119 | assert(base[0].kind == Cgroup && base[0].siz == 0); |
122 | base[0].idx = 0; /* make it an anonymous group */ | 120 | base[0].idx = 0; /* make it an anonymous group */ |
123 | for (i = 1; i <= n; i++) { /* add runtime captures */ | 121 | for (i = 1; i <= n; i++) { /* add runtime captures */ |
124 | base[i].kind = Cruntime; | 122 | base[i].kind = Cruntime; |
@@ -163,11 +161,10 @@ const char *match (lua_State *L, const char *o, const char *s, const char *e, | |||
163 | lua_pushlightuserdata(L, stackbase); | 161 | lua_pushlightuserdata(L, stackbase); |
164 | for (;;) { | 162 | for (;;) { |
165 | #if defined(DEBUG) | 163 | #if defined(DEBUG) |
166 | printf("-------------------------------------\n"); | ||
167 | printcaplist(capture, capture + captop); | ||
168 | printf("s: |%s| stck:%d, dyncaps:%d, caps:%d ", | 164 | printf("s: |%s| stck:%d, dyncaps:%d, caps:%d ", |
169 | s, (int)(stack - getstackbase(L, ptop)), ndyncap, captop); | 165 | s, stack - getstackbase(L, ptop), ndyncap, captop); |
170 | printinst(op, p); | 166 | printinst(op, p); |
167 | printcaplist(capture, capture + captop); | ||
171 | #endif | 168 | #endif |
172 | assert(stackidx(ptop) + ndyncap == lua_gettop(L) && ndyncap <= captop); | 169 | assert(stackidx(ptop) + ndyncap == lua_gettop(L) && ndyncap <= captop); |
173 | assert(insidepred == INPRED || insidepred == OUTPRED); | 170 | assert(insidepred == INPRED || insidepred == OUTPRED); |
@@ -365,9 +362,6 @@ const char *match (lua_State *L, const char *o, const char *s, const char *e, | |||
365 | assert((insidepred == INPRED && stack->labenv == OUTPRED) || insidepred == stack->labenv); | 362 | assert((insidepred == INPRED && stack->labenv == OUTPRED) || insidepred == stack->labenv); |
366 | insidepred = stack->labenv; /* labeled failure */ | 363 | insidepred = stack->labenv; /* labeled failure */ |
367 | p = stack->p; | 364 | p = stack->p; |
368 | #if defined(DEBUG) | ||
369 | printf("**FAIL**\n"); | ||
370 | #endif | ||
371 | continue; | 365 | continue; |
372 | } | 366 | } |
373 | case ICloseRunTime: { | 367 | case ICloseRunTime: { |
@@ -377,7 +371,7 @@ const char *match (lua_State *L, const char *o, const char *s, const char *e, | |||
377 | cs.s = o; cs.L = L; cs.ocap = capture; cs.ptop = ptop; | 371 | cs.s = o; cs.L = L; cs.ocap = capture; cs.ptop = ptop; |
378 | n = runtimecap(&cs, capture + captop, s, &rem); /* call function */ | 372 | n = runtimecap(&cs, capture + captop, s, &rem); /* call function */ |
379 | captop -= n; /* remove nested captures */ | 373 | captop -= n; /* remove nested captures */ |
380 | ndyncap -= rem; /* update number of dynamic captures */ | 374 | ndyncap += n - rem; /* update number of dynamic captures */ |
381 | fr -= rem; /* 'rem' items were popped from Lua stack */ | 375 | fr -= rem; /* 'rem' items were popped from Lua stack */ |
382 | res = resdyncaptures(L, fr, s - o, e - o); /* get result */ | 376 | res = resdyncaptures(L, fr, s - o, e - o); /* get result */ |
383 | if (res == -1) { /* fail? */ | 377 | if (res == -1) { /* fail? */ |
@@ -389,10 +383,8 @@ const char *match (lua_State *L, const char *o, const char *s, const char *e, | |||
389 | n = lua_gettop(L) - fr + 1; /* number of new captures */ | 383 | n = lua_gettop(L) - fr + 1; /* number of new captures */ |
390 | ndyncap += n; /* update number of dynamic captures */ | 384 | ndyncap += n; /* update number of dynamic captures */ |
391 | if (n > 0) { /* any new capture? */ | 385 | if (n > 0) { /* any new capture? */ |
392 | if (fr + n >= SHRT_MAX) | ||
393 | luaL_error(L, "too many results in match-time capture"); | ||
394 | if ((captop += n + 2) >= capsize) { | 386 | if ((captop += n + 2) >= capsize) { |
395 | capture = doublecap(L, capture, captop, n + 2, ptop); | 387 | capture = doublecap(L, capture, captop, ptop); |
396 | capsize = 2 * captop; | 388 | capsize = 2 * captop; |
397 | } | 389 | } |
398 | /* add new captures to 'capture' list */ | 390 | /* add new captures to 'capture' list */ |
@@ -429,7 +421,7 @@ const char *match (lua_State *L, const char *o, const char *s, const char *e, | |||
429 | capture[captop].idx = p->i.key; | 421 | capture[captop].idx = p->i.key; |
430 | capture[captop].kind = getkind(p); | 422 | capture[captop].kind = getkind(p); |
431 | if (++captop >= capsize) { | 423 | if (++captop >= capsize) { |
432 | capture = doublecap(L, capture, captop, 0, ptop); | 424 | capture = doublecap(L, capture, captop, ptop); |
433 | capsize = 2 * captop; | 425 | capsize = 2 * captop; |
434 | } | 426 | } |
435 | p++; | 427 | p++; |
@@ -1,6 +1,6 @@ | |||
1 | #!/usr/bin/env lua | 1 | #!/usr/bin/env lua |
2 | 2 | ||
3 | -- $Id: test.lua,v 1.112 2017/01/14 18:55:22 roberto Exp $ | 3 | -- $Id: test.lua,v 1.109 2015/09/28 17:01:25 roberto Exp $ |
4 | 4 | ||
5 | -- require"strict" -- just to be pedantic | 5 | -- require"strict" -- just to be pedantic |
6 | 6 | ||
@@ -202,14 +202,6 @@ do | |||
202 | end | 202 | end |
203 | 203 | ||
204 | 204 | ||
205 | -- bug: loop in 'hascaptures' | ||
206 | do | ||
207 | local p = m.C(-m.P{m.P'x' * m.V(1) + m.P'y'}) | ||
208 | assert(p:match("xxx") == "") | ||
209 | end | ||
210 | |||
211 | |||
212 | |||
213 | -- test for small capture boundary | 205 | -- test for small capture boundary |
214 | for i = 250,260 do | 206 | for i = 250,260 do |
215 | assert(#m.match(m.C(i), string.rep('a', i)) == i) | 207 | assert(#m.match(m.C(i), string.rep('a', i)) == i) |
@@ -525,27 +517,6 @@ assert(m.match(m.Cs((#((#m.P"a")/"") * 1 + m.P(1)/".")^0), "aloal") == "a..a.") | |||
525 | assert(m.match(m.Cs((- -m.P("a") * 1 + m.P(1)/".")^0), "aloal") == "a..a.") | 517 | assert(m.match(m.Cs((- -m.P("a") * 1 + m.P(1)/".")^0), "aloal") == "a..a.") |
526 | assert(m.match(m.Cs((-((-m.P"a")/"") * 1 + m.P(1)/".")^0), "aloal") == "a..a.") | 518 | assert(m.match(m.Cs((-((-m.P"a")/"") * 1 + m.P(1)/".")^0), "aloal") == "a..a.") |
527 | 519 | ||
528 | |||
529 | -- fixed length | ||
530 | do | ||
531 | -- 'and' predicate using fixed length | ||
532 | local p = m.C(#("a" * (m.P("bd") + "cd")) * 2) | ||
533 | assert(p:match("acd") == "ac") | ||
534 | |||
535 | p = #m.P{ "a" * m.V(2), m.P"b" } * 2 | ||
536 | assert(p:match("abc") == 3) | ||
537 | |||
538 | p = #(m.P"abc" * m.B"c") | ||
539 | assert(p:match("abc") == 1 and not p:match("ab")) | ||
540 | |||
541 | p = m.P{ "a" * m.V(2), m.P"b"^1 } | ||
542 | checkerr("pattern may not have fixed length", m.B, p) | ||
543 | |||
544 | p = "abc" * (m.P"b"^1 + m.P"a"^0) | ||
545 | checkerr("pattern may not have fixed length", m.B, p) | ||
546 | end | ||
547 | |||
548 | |||
549 | p = -m.P'a' * m.Cc(1) + -m.P'b' * m.Cc(2) + -m.P'c' * m.Cc(3) | 520 | p = -m.P'a' * m.Cc(1) + -m.P'b' * m.Cc(2) + -m.P'c' * m.Cc(3) |
550 | assert(p:match('a') == 2 and p:match('') == 1 and p:match('b') == 1) | 521 | assert(p:match('a') == 2 and p:match('') == 1 and p:match('b') == 1) |
551 | 522 | ||
@@ -1127,32 +1098,6 @@ do | |||
1127 | assert(c == 11) | 1098 | assert(c == 11) |
1128 | end | 1099 | end |
1129 | 1100 | ||
1130 | |||
1131 | -- Return a match-time capture that returns 'n' captures | ||
1132 | local function manyCmt (n) | ||
1133 | return m.Cmt("a", function () | ||
1134 | local a = {}; for i = 1, n do a[i] = n - i end | ||
1135 | return true, unpack(a) | ||
1136 | end) | ||
1137 | end | ||
1138 | |||
1139 | -- bug in 1.0: failed match-time that used previous match-time results | ||
1140 | do | ||
1141 | local x | ||
1142 | local function aux (...) x = #{...}; return false end | ||
1143 | local res = {m.match(m.Cmt(manyCmt(20), aux) + manyCmt(10), "a")} | ||
1144 | assert(#res == 10 and res[1] == 9 and res[10] == 0) | ||
1145 | end | ||
1146 | |||
1147 | |||
1148 | -- bug in 1.0: problems with math-times returning too many captures | ||
1149 | do | ||
1150 | local lim = 2^11 - 10 | ||
1151 | local res = {m.match(manyCmt(lim), "a")} | ||
1152 | assert(#res == lim and res[1] == lim - 1 and res[lim] == 0) | ||
1153 | checkerr("too many", m.match, manyCmt(2^15), "a") | ||
1154 | end | ||
1155 | |||
1156 | p = (m.P(function () return true, "a" end) * 'a' | 1101 | p = (m.P(function () return true, "a" end) * 'a' |
1157 | + m.P(function (s, i) return i, "aa", 20 end) * 'b' | 1102 | + m.P(function (s, i) return i, "aa", 20 end) * 'b' |
1158 | + m.P(function (s,i) if i <= #s then return i, "aaa" end end) * 1)^0 | 1103 | + m.P(function (s,i) if i <= #s then return i, "aaa" end end) * 1)^0 |