diff options
author | Sergio Queiroz <sqmedeiros@gmail.com> | 2017-07-06 11:21:56 -0300 |
---|---|---|
committer | Sergio Queiroz <sqmedeiros@gmail.com> | 2017-07-06 11:21:56 -0300 |
commit | 8ee42c29131e1c7de48575d6d8a9b24ea6977cbd (patch) | |
tree | 4267e0cf0609a909be1f2eaea17d11b0efc12c98 | |
parent | aad9b29e755d67d6cb4e4b861111df0d173819ce (diff) | |
download | lpeglabel-8ee42c29131e1c7de48575d6d8a9b24ea6977cbd.tar.gz lpeglabel-8ee42c29131e1c7de48575d6d8a9b24ea6977cbd.tar.bz2 lpeglabel-8ee42c29131e1c7de48575d6d8a9b24ea6977cbd.zip |
Updating lpeglabel to the codebase of LPeg 1.0.1
-rw-r--r-- | lpcap.h | 19 | ||||
-rw-r--r-- | lpcode.c | 87 | ||||
-rw-r--r-- | lpcode.h | 6 | ||||
-rw-r--r-- | lpprint.c | 22 | ||||
-rw-r--r-- | lptree.c | 19 | ||||
-rw-r--r-- | lptree.h | 57 | ||||
-rw-r--r-- | lptypes.h | 10 | ||||
-rw-r--r-- | lpvm.c | 38 | ||||
-rw-r--r-- | makefile | 4 | ||||
-rwxr-xr-x | test.lua | 57 |
10 files changed, 217 insertions, 102 deletions
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | ** $Id: lpcap.h,v 1.2 2015/02/27 17:13:17 roberto Exp $ | 2 | ** $Id: lpcap.h,v 1.3 2016/09/13 17:45:58 roberto Exp $ |
3 | */ | 3 | */ |
4 | 4 | ||
5 | #if !defined(lpcap_h) | 5 | #if !defined(lpcap_h) |
@@ -11,8 +11,21 @@ | |||
11 | 11 | ||
12 | /* kinds of captures */ | 12 | /* kinds of captures */ |
13 | typedef enum CapKind { | 13 | typedef enum CapKind { |
14 | Cclose, Cposition, Cconst, Cbackref, Carg, Csimple, Ctable, Cfunction, | 14 | Cclose, /* not used in trees */ |
15 | Cquery, Cstring, Cnum, Csubst, Cfold, Cruntime, Cgroup | 15 | Cposition, |
16 | Cconst, /* ktable[key] is Lua constant */ | ||
17 | Cbackref, /* ktable[key] is "name" of group to get capture */ | ||
18 | Carg, /* 'key' is arg's number */ | ||
19 | Csimple, /* next node is pattern */ | ||
20 | Ctable, /* next node is pattern */ | ||
21 | Cfunction, /* ktable[key] is function; next node is pattern */ | ||
22 | Cquery, /* ktable[key] is table; next node is pattern */ | ||
23 | Cstring, /* ktable[key] is string; next node is pattern */ | ||
24 | Cnum, /* numbered capture; 'key' is number of value to return */ | ||
25 | Csubst, /* substitution capture; next node is pattern */ | ||
26 | Cfold, /* ktable[key] is function; next node is pattern */ | ||
27 | Cruntime, /* not used in trees (is uses another type for tree) */ | ||
28 | Cgroup /* ktable[key] is group's "name" */ | ||
16 | } CapKind; | 29 | } CapKind; |
17 | 30 | ||
18 | 31 | ||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | ** $Id: lpcode.c,v 1.23 2015/06/12 18:36:47 roberto Exp $ | 2 | ** $Id: lpcode.c,v 1.24 2016/09/15 17:46:13 roberto Exp $ |
3 | ** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license) | 3 | ** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license) |
4 | */ | 4 | */ |
5 | 5 | ||
@@ -126,6 +126,27 @@ int tocharset (TTree *tree, Charset *cs) { | |||
126 | 126 | ||
127 | 127 | ||
128 | /* | 128 | /* |
129 | ** Visit a TCall node taking care to stop recursion. If node not yet | ||
130 | ** visited, return 'f(sib2(tree))', otherwise return 'def' (default | ||
131 | ** value) | ||
132 | */ | ||
133 | static int callrecursive (TTree *tree, int f (TTree *t), int def) { | ||
134 | int key = tree->key; | ||
135 | assert(tree->tag == TCall); | ||
136 | assert(sib2(tree)->tag == TRule); | ||
137 | if (key == 0) /* node already visited? */ | ||
138 | return def; /* return default value */ | ||
139 | else { /* first visit */ | ||
140 | int result; | ||
141 | tree->key = 0; /* mark call as already visited */ | ||
142 | result = f(sib2(tree)); /* go to called rule */ | ||
143 | tree->key = key; /* restore tree */ | ||
144 | return result; | ||
145 | } | ||
146 | } | ||
147 | |||
148 | |||
149 | /* | ||
129 | ** Check whether a pattern tree has captures | 150 | ** Check whether a pattern tree has captures |
130 | */ | 151 | */ |
131 | int hascaptures (TTree *tree) { | 152 | int hascaptures (TTree *tree) { |
@@ -134,14 +155,17 @@ int hascaptures (TTree *tree) { | |||
134 | case TCapture: case TRunTime: | 155 | case TCapture: case TRunTime: |
135 | return 1; | 156 | return 1; |
136 | case TCall: | 157 | case TCall: |
137 | tree = sib2(tree); goto tailcall; /* return hascaptures(sib2(tree)); */ | 158 | return callrecursive(tree, hascaptures, 0); |
159 | case TRule: /* do not follow siblings */ | ||
160 | tree = sib1(tree); goto tailcall; | ||
138 | case TOpenCall: assert(0); | 161 | case TOpenCall: assert(0); |
139 | default: { | 162 | default: { |
140 | switch (numsiblings[tree->tag]) { | 163 | switch (numsiblings[tree->tag]) { |
141 | case 1: /* return hascaptures(sib1(tree)); */ | 164 | case 1: /* return hascaptures(sib1(tree)); */ |
142 | tree = sib1(tree); goto tailcall; | 165 | tree = sib1(tree); goto tailcall; |
143 | case 2: | 166 | case 2: |
144 | if (hascaptures(sib1(tree))) return 1; | 167 | if (hascaptures(sib1(tree))) |
168 | return 1; | ||
145 | /* else return hascaptures(sib2(tree)); */ | 169 | /* else return hascaptures(sib2(tree)); */ |
146 | tree = sib2(tree); goto tailcall; | 170 | tree = sib2(tree); goto tailcall; |
147 | default: assert(numsiblings[tree->tag] == 0); return 0; | 171 | default: assert(numsiblings[tree->tag] == 0); return 0; |
@@ -211,39 +235,42 @@ int checkaux (TTree *tree, int pred) { | |||
211 | 235 | ||
212 | /* | 236 | /* |
213 | ** number of characters to match a pattern (or -1 if variable) | 237 | ** number of characters to match a pattern (or -1 if variable) |
214 | ** ('count' avoids infinite loops for grammars) | ||
215 | */ | 238 | */ |
216 | int fixedlenx (TTree *tree, int count, int len) { | 239 | int fixedlen (TTree *tree) { |
240 | int len = 0; /* to accumulate in tail calls */ | ||
217 | tailcall: | 241 | tailcall: |
218 | switch (tree->tag) { | 242 | switch (tree->tag) { |
219 | case TChar: case TSet: case TAny: | 243 | case TChar: case TSet: case TAny: |
220 | return len + 1; | 244 | return len + 1; |
221 | case TFalse: case TTrue: case TNot: case TAnd: case TBehind: | 245 | case TFalse: case TTrue: case TNot: case TAnd: case TBehind: |
222 | return len; | 246 | return len; |
223 | case TRep: case TRunTime: case TOpenCall: | 247 | case TRep: case TRunTime: case TOpenCall: |
224 | case TThrow: /* labeled failure */ | 248 | case TThrow: /* labeled failure */ |
225 | return -1; | 249 | return -1; |
226 | case TCapture: case TRule: case TGrammar: | 250 | case TCapture: case TRule: case TGrammar: |
227 | /* return fixedlenx(sib1(tree), count); */ | 251 | /* return fixedlen(sib1(tree)); */ |
228 | tree = sib1(tree); goto tailcall; | 252 | tree = sib1(tree); goto tailcall; |
229 | case TCall: | 253 | case TCall: { |
230 | if (count++ >= MAXRULES) | 254 | int n1 = callrecursive(tree, fixedlen, -1); |
231 | return -1; /* may be a loop */ | 255 | if (n1 < 0) |
232 | /* else return fixedlenx(sib2(tree), count); */ | 256 | return -1; |
233 | tree = sib2(tree); goto tailcall; | 257 | else |
258 | return len + n1; | ||
259 | } | ||
234 | case TSeq: case TRecov: { /* labeled failure */ | 260 | case TSeq: case TRecov: { /* labeled failure */ |
235 | len = fixedlenx(sib1(tree), count, len); | 261 | int n1 = fixedlen(sib1(tree)); |
236 | if (len < 0) return -1; | 262 | if (n1 < 0) |
237 | /* else return fixedlenx(sib2(tree), count, len); */ | 263 | return -1; |
238 | tree = sib2(tree); goto tailcall; | 264 | /* else return fixedlen(sib2(tree)) + len; */ |
265 | len += n1; tree = sib2(tree); goto tailcall; | ||
239 | } | 266 | } |
240 | case TChoice: { | 267 | case TChoice: { |
241 | int n1, n2; | 268 | int n1 = fixedlen(sib1(tree)); |
242 | n1 = fixedlenx(sib1(tree), count, len); | 269 | int n2 = fixedlen(sib2(tree)); |
243 | if (n1 < 0) return -1; | 270 | if (n1 != n2 || n1 < 0) |
244 | n2 = fixedlenx(sib2(tree), count, len); | 271 | return -1; |
245 | if (n1 == n2) return n1; | 272 | else |
246 | else return -1; | 273 | return len + n1; |
247 | } | 274 | } |
248 | default: assert(0); return 0; | 275 | default: assert(0); return 0; |
249 | }; | 276 | }; |
@@ -287,7 +314,7 @@ static int getfirst (TTree *tree, const Charset *follow, Charset *firstset) { | |||
287 | loopset(i, firstset->cs[i] = follow->cs[i]); /* follow = fullset(?) */ | 314 | loopset(i, firstset->cs[i] = follow->cs[i]); /* follow = fullset(?) */ |
288 | return 1; | 315 | return 1; |
289 | } | 316 | } |
290 | case TChoice: { | 317 | case TChoice: { |
291 | Charset csaux; | 318 | Charset csaux; |
292 | int e1 = getfirst(sib1(tree), follow, firstset); | 319 | int e1 = getfirst(sib1(tree), follow, firstset); |
293 | int e2 = getfirst(sib2(tree), follow, &csaux); | 320 | int e2 = getfirst(sib2(tree), follow, &csaux); |
@@ -378,7 +405,7 @@ static int headfail (TTree *tree) { | |||
378 | if (!nofail(sib2(tree))) return 0; | 405 | if (!nofail(sib2(tree))) return 0; |
379 | /* else return headfail(sib1(tree)); */ | 406 | /* else return headfail(sib1(tree)); */ |
380 | tree = sib1(tree); goto tailcall; | 407 | tree = sib1(tree); goto tailcall; |
381 | case TChoice: case TRecov: /* labeled failure */ | 408 | case TChoice: case TRecov: /* labeled failure */ |
382 | if (!headfail(sib1(tree))) return 0; | 409 | if (!headfail(sib1(tree))) return 0; |
383 | /* else return headfail(sib2(tree)); */ | 410 | /* else return headfail(sib2(tree)); */ |
384 | tree = sib2(tree); goto tailcall; | 411 | tree = sib2(tree); goto tailcall; |
@@ -433,8 +460,9 @@ int sizei (const Instruction *i) { | |||
433 | return 2; | 460 | return 2; |
434 | case IThrow: /* labeled failure */ | 461 | case IThrow: /* labeled failure */ |
435 | return 1; | 462 | return 1; |
436 | case IRecov: | 463 | case IRecov: |
437 | return (CHARSETINSTSIZE - 1) + 2; /* labeled failure */ | 464 | return (CHARSETINSTSIZE - 1) + 2; /* labeled failure */ |
465 | |||
438 | default: return 1; | 466 | default: return 1; |
439 | } | 467 | } |
440 | } | 468 | } |
@@ -750,9 +778,10 @@ static void codeand (CompileState *compst, TTree *tree, int tt) { | |||
750 | 778 | ||
751 | 779 | ||
752 | /* | 780 | /* |
753 | ** Captures: if pattern has fixed (and not too big) length, use | 781 | ** Captures: if pattern has fixed (and not too big) length, and it |
754 | ** a single IFullCapture instruction after the match; otherwise, | 782 | ** has no nested captures, use a single IFullCapture instruction |
755 | ** enclose the pattern with OpenCapture - CloseCapture. | 783 | ** after the match; otherwise, enclose the pattern with OpenCapture - |
784 | ** CloseCapture. | ||
756 | */ | 785 | */ |
757 | static void codecapture (CompileState *compst, TTree *tree, int tt, | 786 | static void codecapture (CompileState *compst, TTree *tree, int tt, |
758 | const Charset *fl) { | 787 | const Charset *fl) { |
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | ** $Id: lpcode.h,v 1.7 2015/06/12 18:24:45 roberto Exp $ | 2 | ** $Id: lpcode.h,v 1.8 2016/09/15 17:46:13 roberto Exp $ |
3 | */ | 3 | */ |
4 | 4 | ||
5 | #if !defined(lpcode_h) | 5 | #if !defined(lpcode_h) |
@@ -13,7 +13,7 @@ | |||
13 | 13 | ||
14 | int tocharset (TTree *tree, Charset *cs); | 14 | int tocharset (TTree *tree, Charset *cs); |
15 | int checkaux (TTree *tree, int pred); | 15 | int checkaux (TTree *tree, int pred); |
16 | int fixedlenx (TTree *tree, int count, int len); | 16 | int fixedlen (TTree *tree); |
17 | int hascaptures (TTree *tree); | 17 | int hascaptures (TTree *tree); |
18 | int lp_gc (lua_State *L); | 18 | int lp_gc (lua_State *L); |
19 | Instruction *compile (lua_State *L, Pattern *p); | 19 | Instruction *compile (lua_State *L, Pattern *p); |
@@ -35,8 +35,6 @@ int sizei (const Instruction *i); | |||
35 | */ | 35 | */ |
36 | #define nullable(t) checkaux(t, PEnullable) | 36 | #define nullable(t) checkaux(t, PEnullable) |
37 | 37 | ||
38 | #define fixedlen(t) fixedlenx(t, 0, 0) | ||
39 | |||
40 | 38 | ||
41 | 39 | ||
42 | #endif | 40 | #endif |
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | ** $Id: lpprint.c,v 1.9 2015/06/15 16:09:57 roberto Exp $ | 2 | ** $Id: lpprint.c,v 1.10 2016/09/13 16:06:03 roberto Exp $ |
3 | ** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license) | 3 | ** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license) |
4 | */ | 4 | */ |
5 | 5 | ||
@@ -37,13 +37,13 @@ void printcharset (const byte *st) { | |||
37 | } | 37 | } |
38 | 38 | ||
39 | 39 | ||
40 | static void printcapkind (int kind) { | 40 | static const char *capkind (int kind) { |
41 | const char *const modes[] = { | 41 | const char *const modes[] = { |
42 | "close", "position", "constant", "backref", | 42 | "close", "position", "constant", "backref", |
43 | "argument", "simple", "table", "function", | 43 | "argument", "simple", "table", "function", |
44 | "query", "string", "num", "substitution", "fold", | 44 | "query", "string", "num", "substitution", "fold", |
45 | "runtime", "group"}; | 45 | "runtime", "group"}; |
46 | printf("%s", modes[kind]); | 46 | return modes[kind]; |
47 | } | 47 | } |
48 | 48 | ||
49 | 49 | ||
@@ -74,13 +74,12 @@ void printinst (const Instruction *op, const Instruction *p) { | |||
74 | break; | 74 | break; |
75 | } | 75 | } |
76 | case IFullCapture: { | 76 | case IFullCapture: { |
77 | printcapkind(getkind(p)); | 77 | printf("%s (size = %d) (idx = %d)", |
78 | printf(" (size = %d) (idx = %d)", getoff(p), p->i.key); | 78 | capkind(getkind(p)), getoff(p), p->i.key); |
79 | break; | 79 | break; |
80 | } | 80 | } |
81 | case IOpenCapture: { | 81 | case IOpenCapture: { |
82 | printcapkind(getkind(p)); | 82 | printf("%s (idx = %d)", capkind(getkind(p)), p->i.key); |
83 | printf(" (idx = %d)", p->i.key); | ||
84 | break; | 83 | break; |
85 | } | 84 | } |
86 | case ISet: { | 85 | case ISet: { |
@@ -134,8 +133,8 @@ void printpatt (Instruction *p, int n) { | |||
134 | 133 | ||
135 | #if defined(LPEG_DEBUG) | 134 | #if defined(LPEG_DEBUG) |
136 | static void printcap (Capture *cap) { | 135 | static void printcap (Capture *cap) { |
137 | printcapkind(cap->kind); | 136 | printf("%s (idx: %d - size: %d) -> %p\n", |
138 | printf(" (idx: %d - size: %d) -> %p\n", cap->idx, cap->siz, cap->s); | 137 | capkind(cap->kind), cap->idx, cap->siz, cap->s); |
139 | } | 138 | } |
140 | 139 | ||
141 | 140 | ||
@@ -188,7 +187,8 @@ void printtree (TTree *tree, int ident) { | |||
188 | break; | 187 | break; |
189 | } | 188 | } |
190 | case TOpenCall: case TCall: { | 189 | case TOpenCall: case TCall: { |
191 | printf(" key: %d\n", tree->key); | 190 | assert(sib2(tree)->tag == TRule); |
191 | printf(" key: %d (rule: %d)\n", tree->key, sib2(tree)->cap); | ||
192 | break; | 192 | break; |
193 | } | 193 | } |
194 | case TBehind: { | 194 | case TBehind: { |
@@ -197,7 +197,7 @@ void printtree (TTree *tree, int ident) { | |||
197 | break; | 197 | break; |
198 | } | 198 | } |
199 | case TCapture: { | 199 | case TCapture: { |
200 | printf(" cap: %d key: %d n: %d\n", tree->cap, tree->key, tree->u.n); | 200 | printf(" kind: '%s' key: %d\n", capkind(tree->cap), tree->key); |
201 | printtree(sib1(tree), ident + 2); | 201 | printtree(sib1(tree), ident + 2); |
202 | break; | 202 | break; |
203 | } | 203 | } |
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | ** $Id: lptree.c,v 1.21 2015/09/28 17:01:25 roberto Exp $ | 2 | ** $Id: lptree.c,v 1.22 2016/09/13 18:10:22 roberto Exp $ |
3 | ** Copyright 2013, Lua.org & PUC-Rio (see 'lpeg.html' for license) | 3 | ** Copyright 2013, Lua.org & PUC-Rio (see 'lpeg.html' for license) |
4 | */ | 4 | */ |
5 | 5 | ||
@@ -65,7 +65,7 @@ static void fixonecall (lua_State *L, int postable, TTree *g, TTree *t) { | |||
65 | t->tag = TCall; | 65 | t->tag = TCall; |
66 | t->u.s.ps = n - (t - g); /* position relative to node */ | 66 | t->u.s.ps = n - (t - g); /* position relative to node */ |
67 | assert(sib2(t)->tag == TRule); | 67 | assert(sib2(t)->tag == TRule); |
68 | sib2(t)->key = t->key; | 68 | sib2(t)->key = t->key; /* fix rule's key */ |
69 | } | 69 | } |
70 | 70 | ||
71 | 71 | ||
@@ -542,7 +542,6 @@ static TTree *newrootlab2sib (lua_State *L, int tag) { | |||
542 | /* labeled failure end */ | 542 | /* labeled failure end */ |
543 | 543 | ||
544 | 544 | ||
545 | |||
546 | static int lp_P (lua_State *L) { | 545 | static int lp_P (lua_State *L) { |
547 | luaL_checkany(L, 1); | 546 | luaL_checkany(L, 1); |
548 | getpatt(L, 1, NULL); | 547 | getpatt(L, 1, NULL); |
@@ -728,6 +727,7 @@ static int lp_throw (lua_State *L) { | |||
728 | return 1; | 727 | return 1; |
729 | } | 728 | } |
730 | 729 | ||
730 | |||
731 | /* | 731 | /* |
732 | ** labeled recovery function | 732 | ** labeled recovery function |
733 | */ | 733 | */ |
@@ -747,7 +747,6 @@ static int lp_recovery (lua_State *L) { | |||
747 | } | 747 | } |
748 | return 1; | 748 | return 1; |
749 | } | 749 | } |
750 | |||
751 | /* labeled failure end */ | 750 | /* labeled failure end */ |
752 | 751 | ||
753 | 752 | ||
@@ -996,7 +995,7 @@ static void buildgrammar (lua_State *L, TTree *grammar, int frule, int n) { | |||
996 | int rulesize; | 995 | int rulesize; |
997 | TTree *rn = gettree(L, ridx, &rulesize); | 996 | TTree *rn = gettree(L, ridx, &rulesize); |
998 | nd->tag = TRule; | 997 | nd->tag = TRule; |
999 | nd->key = 0; | 998 | nd->key = 0; /* will be fixed when rule is used */ |
1000 | nd->cap = i; /* rule number */ | 999 | nd->cap = i; /* rule number */ |
1001 | nd->u.s.ps = rulesize + 1; /* point to next rule */ | 1000 | nd->u.s.ps = rulesize + 1; /* point to next rule */ |
1002 | memcpy(sib1(nd), rn, rulesize * sizeof(TTree)); /* copy rule */ | 1001 | memcpy(sib1(nd), rn, rulesize * sizeof(TTree)); /* copy rule */ |
@@ -1030,6 +1029,11 @@ static int checkloops (TTree *tree) { | |||
1030 | } | 1029 | } |
1031 | 1030 | ||
1032 | 1031 | ||
1032 | /* | ||
1033 | ** Give appropriate error message for 'verifyrule'. If a rule appears | ||
1034 | ** twice in 'passed', there is path from it back to itself without | ||
1035 | ** advancing the subject. | ||
1036 | */ | ||
1033 | static int verifyerror (lua_State *L, int *passed, int npassed) { | 1037 | static int verifyerror (lua_State *L, int *passed, int npassed) { |
1034 | int i, j; | 1038 | int i, j; |
1035 | for (i = npassed - 1; i >= 0; i--) { /* search for a repetition */ | 1039 | for (i = npassed - 1; i >= 0; i--) { /* search for a repetition */ |
@@ -1051,6 +1055,8 @@ static int verifyerror (lua_State *L, int *passed, int npassed) { | |||
1051 | ** is only relevant if the first is nullable. | 1055 | ** is only relevant if the first is nullable. |
1052 | ** Parameter 'nb' works as an accumulator, to allow tail calls in | 1056 | ** Parameter 'nb' works as an accumulator, to allow tail calls in |
1053 | ** choices. ('nb' true makes function returns true.) | 1057 | ** choices. ('nb' true makes function returns true.) |
1058 | ** Parameter 'passed' is a list of already visited rules, 'npassed' | ||
1059 | ** counts the elements in 'passed'. | ||
1054 | ** Assume ktable at the top of the stack. | 1060 | ** Assume ktable at the top of the stack. |
1055 | */ | 1061 | */ |
1056 | static int verifyrule (lua_State *L, TTree *tree, int *passed, int npassed, | 1062 | static int verifyrule (lua_State *L, TTree *tree, int *passed, int npassed, |
@@ -1330,7 +1336,7 @@ static struct luaL_Reg pattreg[] = { | |||
1330 | {"setmaxstack", lp_setmax}, | 1336 | {"setmaxstack", lp_setmax}, |
1331 | {"type", lp_type}, | 1337 | {"type", lp_type}, |
1332 | {"T", lp_throw}, /* labeled failure throw */ | 1338 | {"T", lp_throw}, /* labeled failure throw */ |
1333 | {"Rec", lp_recovery}, /* labeled failure choice */ | 1339 | {"Rec", lp_recovery}, /* labeled failure recovery */ |
1334 | {NULL, NULL} | 1340 | {NULL, NULL} |
1335 | }; | 1341 | }; |
1336 | 1342 | ||
@@ -1347,7 +1353,6 @@ static struct luaL_Reg metareg[] = { | |||
1347 | {NULL, NULL} | 1353 | {NULL, NULL} |
1348 | }; | 1354 | }; |
1349 | 1355 | ||
1350 | |||
1351 | int luaopen_lpeglabel (lua_State *L); /* labeled failure */ | 1356 | int luaopen_lpeglabel (lua_State *L); /* labeled failure */ |
1352 | int luaopen_lpeglabel (lua_State *L) { /* labeled failure */ | 1357 | int luaopen_lpeglabel (lua_State *L) { /* labeled failure */ |
1353 | luaL_newmetatable(L, PATTERN_T); | 1358 | luaL_newmetatable(L, PATTERN_T); |
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | ** $Id: lptree.h,v 1.2 2013/03/24 13:51:12 roberto Exp $ | 2 | ** $Id: lptree.h,v 1.3 2016/09/13 18:07:51 roberto Exp $ |
3 | */ | 3 | */ |
4 | 4 | ||
5 | #if !defined(lptree_h) | 5 | #if !defined(lptree_h) |
@@ -13,32 +13,39 @@ | |||
13 | ** types of trees | 13 | ** types of trees |
14 | */ | 14 | */ |
15 | typedef enum TTag { | 15 | typedef enum TTag { |
16 | TChar = 0, TSet, TAny, /* standard PEG elements */ | 16 | TChar = 0, /* 'n' = char */ |
17 | TTrue, TFalse, | 17 | TSet, /* the set is stored in next CHARSETSIZE bytes */ |
18 | TRep, | 18 | TAny, |
19 | TSeq, TChoice, | 19 | TTrue, |
20 | TNot, TAnd, | 20 | TFalse, |
21 | TCall, | 21 | TRep, /* 'sib1'* */ |
22 | TOpenCall, | 22 | TSeq, /* 'sib1' 'sib2' */ |
23 | TRule, /* sib1 is rule's pattern, sib2 is 'next' rule */ | 23 | TChoice, /* 'sib1' / 'sib2' */ |
24 | TGrammar, /* sib1 is initial (and first) rule */ | 24 | TNot, /* !'sib1' */ |
25 | TBehind, /* match behind */ | 25 | TAnd, /* &'sib1' */ |
26 | TCapture, /* regular capture */ | 26 | TCall, /* ktable[key] is rule's key; 'sib2' is rule being called */ |
27 | TRunTime, /* run-time capture */ | 27 | TOpenCall, /* ktable[key] is rule's key */ |
28 | TThrow, TRecov /* labeled failure */ | 28 | TRule, /* ktable[key] is rule's key (but key == 0 for unused rules); |
29 | 'sib1' is rule's pattern; | ||
30 | 'sib2' is next rule; 'cap' is rule's sequential number */ | ||
31 | TGrammar, /* 'sib1' is initial (and first) rule */ | ||
32 | TBehind, /* 'sib1' is pattern, 'n' is how much to go back */ | ||
33 | TCapture, /* captures: 'cap' is kind of capture (enum 'CapKind'); | ||
34 | ktable[key] is Lua value associated with capture; | ||
35 | 'sib1' is capture body */ | ||
36 | TRunTime, /* run-time capture: 'key' is Lua function; | ||
37 | 'sib1' is capture body */ | ||
38 | TThrow, /* labeled failure: 'label' = l */ | ||
39 | TRecov /* labed failure: 'sib1' // 'sib2' */ | ||
40 | /* the set of labels is stored in next CHARSETSIZE bytes */ | ||
29 | } TTag; | 41 | } TTag; |
30 | 42 | ||
31 | /* number of siblings for each tree */ | ||
32 | extern const byte numsiblings[]; | ||
33 | |||
34 | 43 | ||
35 | /* | 44 | /* |
36 | ** Tree trees | 45 | ** Tree trees |
37 | ** The first sibling of a tree (if there is one) is immediately after | 46 | ** The first child of a tree (if there is one) is immediately after |
38 | ** the tree. A reference to a second sibling (ps) is its position | 47 | ** the tree. A reference to a second child (ps) is its position |
39 | ** relative to the position of the tree itself. A key in ktable | 48 | ** relative to the position of the tree itself. |
40 | ** uses the (unique) address of the original tree that created that | ||
41 | ** entry. NULL means no data. | ||
42 | */ | 49 | */ |
43 | typedef struct TTree { | 50 | typedef struct TTree { |
44 | byte tag; | 51 | byte tag; |
@@ -48,7 +55,7 @@ typedef struct TTree { | |||
48 | int n; /* occasional counter */ | 55 | int n; /* occasional counter */ |
49 | int label; /* labeled failure */ | 56 | int label; /* labeled failure */ |
50 | struct { /* labeled failure */ | 57 | struct { /* labeled failure */ |
51 | int ps; /* occasional second sibling */ | 58 | int ps; /* occasional second child */ |
52 | int plab; /* occasional label set */ | 59 | int plab; /* occasional label set */ |
53 | } s; /* labeled failure */ | 60 | } s; /* labeled failure */ |
54 | } u; | 61 | } u; |
@@ -66,10 +73,10 @@ typedef struct Pattern { | |||
66 | } Pattern; | 73 | } Pattern; |
67 | 74 | ||
68 | 75 | ||
69 | /* number of siblings for each tree */ | 76 | /* number of children for each tree */ |
70 | extern const byte numsiblings[]; | 77 | extern const byte numsiblings[]; |
71 | 78 | ||
72 | /* access to siblings */ | 79 | /* access to children */ |
73 | #define sib1(t) ((t) + 1) | 80 | #define sib1(t) ((t) + 1) |
74 | #define sib2(t) ((t) + (t)->u.s.ps) | 81 | #define sib2(t) ((t) + (t)->u.s.ps) |
75 | 82 | ||
@@ -1,7 +1,7 @@ | |||
1 | /* | 1 | /* |
2 | ** $Id: lptypes.h,v 1.14 2015/09/28 17:17:41 roberto Exp $ | 2 | ** $Id: lptypes.h,v 1.16 2017/01/13 13:33:17 roberto Exp $ |
3 | ** LPeg - PEG pattern matching for Lua | 3 | ** LPeg - PEG pattern matching for Lua |
4 | ** Copyright 2007-2015, Lua.org & PUC-Rio (see 'lpeg.html' for license) | 4 | ** Copyright 2007-2017, Lua.org & PUC-Rio (see 'lpeg.html' for license) |
5 | ** written by Roberto Ierusalimschy | 5 | ** written by Roberto Ierusalimschy |
6 | */ | 6 | */ |
7 | 7 | ||
@@ -19,7 +19,7 @@ | |||
19 | #include "lua.h" | 19 | #include "lua.h" |
20 | 20 | ||
21 | 21 | ||
22 | #define VERSION "1.0.0" | 22 | #define VERSION "1.0.1" |
23 | 23 | ||
24 | 24 | ||
25 | #define PATTERN_T "lpeg-pattern" | 25 | #define PATTERN_T "lpeg-pattern" |
@@ -55,9 +55,9 @@ | |||
55 | #endif | 55 | #endif |
56 | 56 | ||
57 | 57 | ||
58 | /* maximum number of rules in a grammar */ | 58 | /* maximum number of rules in a grammar (limited by 'unsigned char') */ |
59 | #if !defined(MAXRULES) | 59 | #if !defined(MAXRULES) |
60 | #define MAXRULES 1000 | 60 | #define MAXRULES 250 |
61 | #endif | 61 | #endif |
62 | 62 | ||
63 | 63 | ||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | ** $Id: lpvm.c,v 1.6 2015/09/28 17:01:25 roberto Exp $ | 2 | ** $Id: lpvm.c,v 1.9 2016/06/03 20:11:18 roberto Exp $ |
3 | ** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license) | 3 | ** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license) |
4 | */ | 4 | */ |
5 | 5 | ||
@@ -33,6 +33,7 @@ static void setlabelfail(Labelset *ls) { | |||
33 | } | 33 | } |
34 | /* labeled failure end */ | 34 | /* labeled failure end */ |
35 | 35 | ||
36 | |||
36 | /* | 37 | /* |
37 | ** {====================================================== | 38 | ** {====================================================== |
38 | ** Virtual Machine | 39 | ** Virtual Machine |
@@ -52,14 +53,16 @@ typedef struct Stack { | |||
52 | 53 | ||
53 | 54 | ||
54 | /* | 55 | /* |
55 | ** Double the size of the array of captures | 56 | ** Make the size of the array of captures 'cap' twice as large as needed |
57 | ** (which is 'captop'). ('n' is the number of new elements.) | ||
56 | */ | 58 | */ |
57 | static Capture *doublecap (lua_State *L, Capture *cap, int captop, int ptop) { | 59 | static Capture *doublecap (lua_State *L, Capture *cap, int captop, |
60 | int n, int ptop) { | ||
58 | Capture *newc; | 61 | Capture *newc; |
59 | if (captop >= INT_MAX/((int)sizeof(Capture) * 2)) | 62 | if (captop >= INT_MAX/((int)sizeof(Capture) * 2)) |
60 | luaL_error(L, "too many captures"); | 63 | luaL_error(L, "too many captures"); |
61 | newc = (Capture *)lua_newuserdata(L, captop * 2 * sizeof(Capture)); | 64 | newc = (Capture *)lua_newuserdata(L, captop * 2 * sizeof(Capture)); |
62 | memcpy(newc, cap, captop * sizeof(Capture)); | 65 | memcpy(newc, cap, (captop - n) * sizeof(Capture)); |
63 | lua_replace(L, caplistidx(ptop)); | 66 | lua_replace(L, caplistidx(ptop)); |
64 | return newc; | 67 | return newc; |
65 | } | 68 | } |
@@ -120,8 +123,8 @@ static int resdyncaptures (lua_State *L, int fr, int curr, int limit) { | |||
120 | */ | 123 | */ |
121 | static void adddyncaptures (const char *s, Capture *base, int n, int fd) { | 124 | static void adddyncaptures (const char *s, Capture *base, int n, int fd) { |
122 | int i; | 125 | int i; |
123 | /* Cgroup capture is already there */ | 126 | base[0].kind = Cgroup; /* create group capture */ |
124 | assert(base[0].kind == Cgroup && base[0].siz == 0); | 127 | base[0].siz = 0; |
125 | base[0].idx = 0; /* make it an anonymous group */ | 128 | base[0].idx = 0; /* make it an anonymous group */ |
126 | for (i = 1; i <= n; i++) { /* add runtime captures */ | 129 | for (i = 1; i <= n; i++) { /* add runtime captures */ |
127 | base[i].kind = Cruntime; | 130 | base[i].kind = Cruntime; |
@@ -148,8 +151,6 @@ static int removedyncap (lua_State *L, Capture *capture, | |||
148 | } | 151 | } |
149 | 152 | ||
150 | 153 | ||
151 | |||
152 | |||
153 | /* | 154 | /* |
154 | ** Opcode interpreter | 155 | ** Opcode interpreter |
155 | */ | 156 | */ |
@@ -170,10 +171,11 @@ const char *match (lua_State *L, const char *o, const char *s, const char *e, | |||
170 | lua_pushlightuserdata(L, stackbase); | 171 | lua_pushlightuserdata(L, stackbase); |
171 | for (;;) { | 172 | for (;;) { |
172 | #if defined(DEBUG) | 173 | #if defined(DEBUG) |
173 | printinst(op, p); | 174 | printf("-------------------------------------\n"); |
174 | printf("s: |%s| stck:%d, dyncaps:%d, caps:%d ", | ||
175 | s, stack - getstackbase(L, ptop), ndyncap, captop); | ||
176 | printcaplist(capture, capture + captop); | 175 | printcaplist(capture, capture + captop); |
176 | printf("s: |%s| stck:%d, dyncaps:%d, caps:%d ", | ||
177 | s, (int)(stack - getstackbase(L, ptop)), ndyncap, captop); | ||
178 | printinst(op, p); | ||
177 | #endif | 179 | #endif |
178 | assert(stackidx(ptop) + ndyncap == lua_gettop(L) && ndyncap <= captop); | 180 | assert(stackidx(ptop) + ndyncap == lua_gettop(L) && ndyncap <= captop); |
179 | switch ((Opcode)p->i.code) { | 181 | switch ((Opcode)p->i.code) { |
@@ -275,7 +277,7 @@ const char *match (lua_State *L, const char *o, const char *s, const char *e, | |||
275 | p += 2; | 277 | p += 2; |
276 | continue; | 278 | continue; |
277 | } | 279 | } |
278 | case IRecov: { /* labeled failure */ | 280 | case IRecov: { /* labeled failure */ |
279 | if (stack == stacklimit) | 281 | if (stack == stacklimit) |
280 | stack = doublestack(L, &stacklimit, ptop); | 282 | stack = doublestack(L, &stacklimit, ptop); |
281 | stack->p = p + getoffset(p); | 283 | stack->p = p + getoffset(p); |
@@ -354,6 +356,9 @@ const char *match (lua_State *L, const char *o, const char *s, const char *e, | |||
354 | stack++; | 356 | stack++; |
355 | } | 357 | } |
356 | p = pstack->p; | 358 | p = pstack->p; |
359 | #if defined(DEBUG) | ||
360 | printf("**FAIL**\n"); | ||
361 | #endif | ||
357 | continue; | 362 | continue; |
358 | } | 363 | } |
359 | case ICloseRunTime: { | 364 | case ICloseRunTime: { |
@@ -363,6 +368,7 @@ const char *match (lua_State *L, const char *o, const char *s, const char *e, | |||
363 | cs.s = o; cs.L = L; cs.ocap = capture; cs.ptop = ptop; | 368 | cs.s = o; cs.L = L; cs.ocap = capture; cs.ptop = ptop; |
364 | n = runtimecap(&cs, capture + captop, s, &rem); /* call function */ | 369 | n = runtimecap(&cs, capture + captop, s, &rem); /* call function */ |
365 | captop -= n; /* remove nested captures */ | 370 | captop -= n; /* remove nested captures */ |
371 | ndyncap -= rem; /* update number of dynamic captures */ | ||
366 | fr -= rem; /* 'rem' items were popped from Lua stack */ | 372 | fr -= rem; /* 'rem' items were popped from Lua stack */ |
367 | res = resdyncaptures(L, fr, s - o, e - o); /* get result */ | 373 | res = resdyncaptures(L, fr, s - o, e - o); /* get result */ |
368 | if (res == -1) { /* fail? */ | 374 | if (res == -1) { /* fail? */ |
@@ -373,10 +379,12 @@ const char *match (lua_State *L, const char *o, const char *s, const char *e, | |||
373 | } | 379 | } |
374 | s = o + res; /* else update current position */ | 380 | s = o + res; /* else update current position */ |
375 | n = lua_gettop(L) - fr + 1; /* number of new captures */ | 381 | n = lua_gettop(L) - fr + 1; /* number of new captures */ |
376 | ndyncap += n - rem; /* update number of dynamic captures */ | 382 | ndyncap += n; /* update number of dynamic captures */ |
377 | if (n > 0) { /* any new capture? */ | 383 | if (n > 0) { /* any new capture? */ |
384 | if (fr + n >= SHRT_MAX) | ||
385 | luaL_error(L, "too many results in match-time capture"); | ||
378 | if ((captop += n + 2) >= capsize) { | 386 | if ((captop += n + 2) >= capsize) { |
379 | capture = doublecap(L, capture, captop, ptop); | 387 | capture = doublecap(L, capture, captop, n + 2, ptop); |
380 | capsize = 2 * captop; | 388 | capsize = 2 * captop; |
381 | } | 389 | } |
382 | /* add new captures to 'capture' list */ | 390 | /* add new captures to 'capture' list */ |
@@ -413,7 +421,7 @@ const char *match (lua_State *L, const char *o, const char *s, const char *e, | |||
413 | capture[captop].idx = p->i.key; | 421 | capture[captop].idx = p->i.key; |
414 | capture[captop].kind = getkind(p); | 422 | capture[captop].kind = getkind(p); |
415 | if (++captop >= capsize) { | 423 | if (++captop >= capsize) { |
416 | capture = doublecap(L, capture, captop, ptop); | 424 | capture = doublecap(L, capture, captop, 0, ptop); |
417 | capsize = 2 * captop; | 425 | capsize = 2 * captop; |
418 | } | 426 | } |
419 | p++; | 427 | p++; |
@@ -45,10 +45,10 @@ lpeglabel.dll: $(FILES) | |||
45 | 45 | ||
46 | $(FILES): makefile | 46 | $(FILES): makefile |
47 | 47 | ||
48 | test: test.lua testlabel.lua testerrors.lua relabel.lua lpeglabel.so | 48 | test: test.lua testlabel.lua testrelabelparser.lua relabel.lua lpeglabel.so |
49 | lua test.lua | 49 | lua test.lua |
50 | lua testlabel.lua | 50 | lua testlabel.lua |
51 | lua testerrors.lua | 51 | lua testrelabelparser.lua |
52 | 52 | ||
53 | clean: | 53 | clean: |
54 | rm -f $(FILES) lpeglabel.so | 54 | rm -f $(FILES) lpeglabel.so |
@@ -1,6 +1,6 @@ | |||
1 | #!/usr/bin/env lua | 1 | #!/usr/bin/env lua |
2 | 2 | ||
3 | -- $Id: test.lua,v 1.109 2015/09/28 17:01:25 roberto Exp $ | 3 | -- $Id: test.lua,v 1.112 2017/01/14 18:55:22 roberto Exp $ |
4 | 4 | ||
5 | -- require"strict" -- just to be pedantic | 5 | -- require"strict" -- just to be pedantic |
6 | 6 | ||
@@ -202,6 +202,14 @@ do | |||
202 | end | 202 | end |
203 | 203 | ||
204 | 204 | ||
205 | -- bug: loop in 'hascaptures' | ||
206 | do | ||
207 | local p = m.C(-m.P{m.P'x' * m.V(1) + m.P'y'}) | ||
208 | assert(p:match("xxx") == "") | ||
209 | end | ||
210 | |||
211 | |||
212 | |||
205 | -- test for small capture boundary | 213 | -- test for small capture boundary |
206 | for i = 250,260 do | 214 | for i = 250,260 do |
207 | assert(#m.match(m.C(i), string.rep('a', i)) == i) | 215 | assert(#m.match(m.C(i), string.rep('a', i)) == i) |
@@ -517,6 +525,27 @@ assert(m.match(m.Cs((#((#m.P"a")/"") * 1 + m.P(1)/".")^0), "aloal") == "a..a.") | |||
517 | assert(m.match(m.Cs((- -m.P("a") * 1 + m.P(1)/".")^0), "aloal") == "a..a.") | 525 | assert(m.match(m.Cs((- -m.P("a") * 1 + m.P(1)/".")^0), "aloal") == "a..a.") |
518 | assert(m.match(m.Cs((-((-m.P"a")/"") * 1 + m.P(1)/".")^0), "aloal") == "a..a.") | 526 | assert(m.match(m.Cs((-((-m.P"a")/"") * 1 + m.P(1)/".")^0), "aloal") == "a..a.") |
519 | 527 | ||
528 | |||
529 | -- fixed length | ||
530 | do | ||
531 | -- 'and' predicate using fixed length | ||
532 | local p = m.C(#("a" * (m.P("bd") + "cd")) * 2) | ||
533 | assert(p:match("acd") == "ac") | ||
534 | |||
535 | p = #m.P{ "a" * m.V(2), m.P"b" } * 2 | ||
536 | assert(p:match("abc") == 3) | ||
537 | |||
538 | p = #(m.P"abc" * m.B"c") | ||
539 | assert(p:match("abc") == 1 and not p:match("ab")) | ||
540 | |||
541 | p = m.P{ "a" * m.V(2), m.P"b"^1 } | ||
542 | checkerr("pattern may not have fixed length", m.B, p) | ||
543 | |||
544 | p = "abc" * (m.P"b"^1 + m.P"a"^0) | ||
545 | checkerr("pattern may not have fixed length", m.B, p) | ||
546 | end | ||
547 | |||
548 | |||
520 | p = -m.P'a' * m.Cc(1) + -m.P'b' * m.Cc(2) + -m.P'c' * m.Cc(3) | 549 | p = -m.P'a' * m.Cc(1) + -m.P'b' * m.Cc(2) + -m.P'c' * m.Cc(3) |
521 | assert(p:match('a') == 2 and p:match('') == 1 and p:match('b') == 1) | 550 | assert(p:match('a') == 2 and p:match('') == 1 and p:match('b') == 1) |
522 | 551 | ||
@@ -1098,6 +1127,32 @@ do | |||
1098 | assert(c == 11) | 1127 | assert(c == 11) |
1099 | end | 1128 | end |
1100 | 1129 | ||
1130 | |||
1131 | -- Return a match-time capture that returns 'n' captures | ||
1132 | local function manyCmt (n) | ||
1133 | return m.Cmt("a", function () | ||
1134 | local a = {}; for i = 1, n do a[i] = n - i end | ||
1135 | return true, unpack(a) | ||
1136 | end) | ||
1137 | end | ||
1138 | |||
1139 | -- bug in 1.0: failed match-time that used previous match-time results | ||
1140 | do | ||
1141 | local x | ||
1142 | local function aux (...) x = #{...}; return false end | ||
1143 | local res = {m.match(m.Cmt(manyCmt(20), aux) + manyCmt(10), "a")} | ||
1144 | assert(#res == 10 and res[1] == 9 and res[10] == 0) | ||
1145 | end | ||
1146 | |||
1147 | |||
1148 | -- bug in 1.0: problems with math-times returning too many captures | ||
1149 | do | ||
1150 | local lim = 2^11 - 10 | ||
1151 | local res = {m.match(manyCmt(lim), "a")} | ||
1152 | assert(#res == lim and res[1] == lim - 1 and res[lim] == 0) | ||
1153 | checkerr("too many", m.match, manyCmt(2^15), "a") | ||
1154 | end | ||
1155 | |||
1101 | p = (m.P(function () return true, "a" end) * 'a' | 1156 | p = (m.P(function () return true, "a" end) * 'a' |
1102 | + m.P(function (s, i) return i, "aa", 20 end) * 'b' | 1157 | + m.P(function (s, i) return i, "aa", 20 end) * 'b' |
1103 | + m.P(function (s,i) if i <= #s then return i, "aaa" end end) * 1)^0 | 1158 | + m.P(function (s,i) if i <= #s then return i, "aaa" end end) * 1)^0 |