diff options
author | Roberto Ierusalimschy <roberto@inf.puc-rio.br> | 2019-02-20 10:13:46 -0300 |
---|---|---|
committer | Roberto Ierusalimschy <roberto@inf.puc-rio.br> | 2019-02-20 10:13:46 -0300 |
commit | e08e5df853560de6482d84066a7accc6a18de545 (patch) | |
tree | ee19686bb35da90709a32ed24bf7855de1a3946a /lptree.c | |
download | lpeg-e08e5df853560de6482d84066a7accc6a18de545.tar.gz lpeg-e08e5df853560de6482d84066a7accc6a18de545.tar.bz2 lpeg-e08e5df853560de6482d84066a7accc6a18de545.zip |
Fist version of LPeg on GIT
LPeg repository is being moved to git. Past versions won't be moved;
they are still available in RCS.
Diffstat (limited to 'lptree.c')
-rw-r--r-- | lptree.c | 1305 |
1 files changed, 1305 insertions, 0 deletions
diff --git a/lptree.c b/lptree.c new file mode 100644 index 0000000..37bfaf0 --- /dev/null +++ b/lptree.c | |||
@@ -0,0 +1,1305 @@ | |||
1 | /* | ||
2 | ** $Id: lptree.c,v 1.23 2017/12/14 15:30:04 roberto Exp $ | ||
3 | ** Copyright 2013, Lua.org & PUC-Rio (see 'lpeg.html' for license) | ||
4 | */ | ||
5 | |||
6 | #include <ctype.h> | ||
7 | #include <limits.h> | ||
8 | #include <string.h> | ||
9 | |||
10 | |||
11 | #include "lua.h" | ||
12 | #include "lauxlib.h" | ||
13 | |||
14 | #include "lptypes.h" | ||
15 | #include "lpcap.h" | ||
16 | #include "lpcode.h" | ||
17 | #include "lpprint.h" | ||
18 | #include "lptree.h" | ||
19 | |||
20 | |||
21 | /* number of siblings for each tree */ | ||
22 | const byte numsiblings[] = { | ||
23 | 0, 0, 0, /* char, set, any */ | ||
24 | 0, 0, /* true, false */ | ||
25 | 1, /* rep */ | ||
26 | 2, 2, /* seq, choice */ | ||
27 | 1, 1, /* not, and */ | ||
28 | 0, 0, 2, 1, /* call, opencall, rule, grammar */ | ||
29 | 1, /* behind */ | ||
30 | 1, 1 /* capture, runtime capture */ | ||
31 | }; | ||
32 | |||
33 | |||
34 | static TTree *newgrammar (lua_State *L, int arg); | ||
35 | |||
36 | |||
37 | /* | ||
38 | ** returns a reasonable name for value at index 'idx' on the stack | ||
39 | */ | ||
40 | static const char *val2str (lua_State *L, int idx) { | ||
41 | const char *k = lua_tostring(L, idx); | ||
42 | if (k != NULL) | ||
43 | return lua_pushfstring(L, "%s", k); | ||
44 | else | ||
45 | return lua_pushfstring(L, "(a %s)", luaL_typename(L, idx)); | ||
46 | } | ||
47 | |||
48 | |||
49 | /* | ||
50 | ** Fix a TOpenCall into a TCall node, using table 'postable' to | ||
51 | ** translate a key to its rule address in the tree. Raises an | ||
52 | ** error if key does not exist. | ||
53 | */ | ||
54 | static void fixonecall (lua_State *L, int postable, TTree *g, TTree *t) { | ||
55 | int n; | ||
56 | lua_rawgeti(L, -1, t->key); /* get rule's name */ | ||
57 | lua_gettable(L, postable); /* query name in position table */ | ||
58 | n = lua_tonumber(L, -1); /* get (absolute) position */ | ||
59 | lua_pop(L, 1); /* remove position */ | ||
60 | if (n == 0) { /* no position? */ | ||
61 | lua_rawgeti(L, -1, t->key); /* get rule's name again */ | ||
62 | luaL_error(L, "rule '%s' undefined in given grammar", val2str(L, -1)); | ||
63 | } | ||
64 | t->tag = TCall; | ||
65 | t->u.ps = n - (t - g); /* position relative to node */ | ||
66 | assert(sib2(t)->tag == TRule); | ||
67 | sib2(t)->key = t->key; /* fix rule's key */ | ||
68 | } | ||
69 | |||
70 | |||
71 | /* | ||
72 | ** Transform left associative constructions into right | ||
73 | ** associative ones, for sequence and choice; that is: | ||
74 | ** (t11 + t12) + t2 => t11 + (t12 + t2) | ||
75 | ** (t11 * t12) * t2 => t11 * (t12 * t2) | ||
76 | ** (that is, Op (Op t11 t12) t2 => Op t11 (Op t12 t2)) | ||
77 | */ | ||
78 | static void correctassociativity (TTree *tree) { | ||
79 | TTree *t1 = sib1(tree); | ||
80 | assert(tree->tag == TChoice || tree->tag == TSeq); | ||
81 | while (t1->tag == tree->tag) { | ||
82 | int n1size = tree->u.ps - 1; /* t1 == Op t11 t12 */ | ||
83 | int n11size = t1->u.ps - 1; | ||
84 | int n12size = n1size - n11size - 1; | ||
85 | memmove(sib1(tree), sib1(t1), n11size * sizeof(TTree)); /* move t11 */ | ||
86 | tree->u.ps = n11size + 1; | ||
87 | sib2(tree)->tag = tree->tag; | ||
88 | sib2(tree)->u.ps = n12size + 1; | ||
89 | } | ||
90 | } | ||
91 | |||
92 | |||
93 | /* | ||
94 | ** Make final adjustments in a tree. Fix open calls in tree 't', | ||
95 | ** making them refer to their respective rules or raising appropriate | ||
96 | ** errors (if not inside a grammar). Correct associativity of associative | ||
97 | ** constructions (making them right associative). Assume that tree's | ||
98 | ** ktable is at the top of the stack (for error messages). | ||
99 | */ | ||
100 | static void finalfix (lua_State *L, int postable, TTree *g, TTree *t) { | ||
101 | tailcall: | ||
102 | switch (t->tag) { | ||
103 | case TGrammar: /* subgrammars were already fixed */ | ||
104 | return; | ||
105 | case TOpenCall: { | ||
106 | if (g != NULL) /* inside a grammar? */ | ||
107 | fixonecall(L, postable, g, t); | ||
108 | else { /* open call outside grammar */ | ||
109 | lua_rawgeti(L, -1, t->key); | ||
110 | luaL_error(L, "rule '%s' used outside a grammar", val2str(L, -1)); | ||
111 | } | ||
112 | break; | ||
113 | } | ||
114 | case TSeq: case TChoice: | ||
115 | correctassociativity(t); | ||
116 | break; | ||
117 | } | ||
118 | switch (numsiblings[t->tag]) { | ||
119 | case 1: /* finalfix(L, postable, g, sib1(t)); */ | ||
120 | t = sib1(t); goto tailcall; | ||
121 | case 2: | ||
122 | finalfix(L, postable, g, sib1(t)); | ||
123 | t = sib2(t); goto tailcall; /* finalfix(L, postable, g, sib2(t)); */ | ||
124 | default: assert(numsiblings[t->tag] == 0); break; | ||
125 | } | ||
126 | } | ||
127 | |||
128 | |||
129 | |||
130 | /* | ||
131 | ** {=================================================================== | ||
132 | ** KTable manipulation | ||
133 | ** | ||
134 | ** - The ktable of a pattern 'p' can be shared by other patterns that | ||
135 | ** contain 'p' and no other constants. Because of this sharing, we | ||
136 | ** should not add elements to a 'ktable' unless it was freshly created | ||
137 | ** for the new pattern. | ||
138 | ** | ||
139 | ** - The maximum index in a ktable is USHRT_MAX, because trees and | ||
140 | ** patterns use unsigned shorts to store those indices. | ||
141 | ** ==================================================================== | ||
142 | */ | ||
143 | |||
144 | /* | ||
145 | ** Create a new 'ktable' to the pattern at the top of the stack. | ||
146 | */ | ||
147 | static void newktable (lua_State *L, int n) { | ||
148 | lua_createtable(L, n, 0); /* create a fresh table */ | ||
149 | lua_setuservalue(L, -2); /* set it as 'ktable' for pattern */ | ||
150 | } | ||
151 | |||
152 | |||
153 | /* | ||
154 | ** Add element 'idx' to 'ktable' of pattern at the top of the stack; | ||
155 | ** Return index of new element. | ||
156 | ** If new element is nil, does not add it to table (as it would be | ||
157 | ** useless) and returns 0, as ktable[0] is always nil. | ||
158 | */ | ||
159 | static int addtoktable (lua_State *L, int idx) { | ||
160 | if (lua_isnil(L, idx)) /* nil value? */ | ||
161 | return 0; | ||
162 | else { | ||
163 | int n; | ||
164 | lua_getuservalue(L, -1); /* get ktable from pattern */ | ||
165 | n = lua_rawlen(L, -1); | ||
166 | if (n >= USHRT_MAX) | ||
167 | luaL_error(L, "too many Lua values in pattern"); | ||
168 | lua_pushvalue(L, idx); /* element to be added */ | ||
169 | lua_rawseti(L, -2, ++n); | ||
170 | lua_pop(L, 1); /* remove 'ktable' */ | ||
171 | return n; | ||
172 | } | ||
173 | } | ||
174 | |||
175 | |||
176 | /* | ||
177 | ** Return the number of elements in the ktable at 'idx'. | ||
178 | ** In Lua 5.2/5.3, default "environment" for patterns is nil, not | ||
179 | ** a table. Treat it as an empty table. In Lua 5.1, assumes that | ||
180 | ** the environment has no numeric indices (len == 0) | ||
181 | */ | ||
182 | static int ktablelen (lua_State *L, int idx) { | ||
183 | if (!lua_istable(L, idx)) return 0; | ||
184 | else return lua_rawlen(L, idx); | ||
185 | } | ||
186 | |||
187 | |||
188 | /* | ||
189 | ** Concatentate the contents of table 'idx1' into table 'idx2'. | ||
190 | ** (Assume that both indices are negative.) | ||
191 | ** Return the original length of table 'idx2' (or 0, if no | ||
192 | ** element was added, as there is no need to correct any index). | ||
193 | */ | ||
194 | static int concattable (lua_State *L, int idx1, int idx2) { | ||
195 | int i; | ||
196 | int n1 = ktablelen(L, idx1); | ||
197 | int n2 = ktablelen(L, idx2); | ||
198 | if (n1 + n2 > USHRT_MAX) | ||
199 | luaL_error(L, "too many Lua values in pattern"); | ||
200 | if (n1 == 0) return 0; /* nothing to correct */ | ||
201 | for (i = 1; i <= n1; i++) { | ||
202 | lua_rawgeti(L, idx1, i); | ||
203 | lua_rawseti(L, idx2 - 1, n2 + i); /* correct 'idx2' */ | ||
204 | } | ||
205 | return n2; | ||
206 | } | ||
207 | |||
208 | |||
209 | /* | ||
210 | ** When joining 'ktables', constants from one of the subpatterns must | ||
211 | ** be renumbered; 'correctkeys' corrects their indices (adding 'n' | ||
212 | ** to each of them) | ||
213 | */ | ||
214 | static void correctkeys (TTree *tree, int n) { | ||
215 | if (n == 0) return; /* no correction? */ | ||
216 | tailcall: | ||
217 | switch (tree->tag) { | ||
218 | case TOpenCall: case TCall: case TRunTime: case TRule: { | ||
219 | if (tree->key > 0) | ||
220 | tree->key += n; | ||
221 | break; | ||
222 | } | ||
223 | case TCapture: { | ||
224 | if (tree->key > 0 && tree->cap != Carg && tree->cap != Cnum) | ||
225 | tree->key += n; | ||
226 | break; | ||
227 | } | ||
228 | default: break; | ||
229 | } | ||
230 | switch (numsiblings[tree->tag]) { | ||
231 | case 1: /* correctkeys(sib1(tree), n); */ | ||
232 | tree = sib1(tree); goto tailcall; | ||
233 | case 2: | ||
234 | correctkeys(sib1(tree), n); | ||
235 | tree = sib2(tree); goto tailcall; /* correctkeys(sib2(tree), n); */ | ||
236 | default: assert(numsiblings[tree->tag] == 0); break; | ||
237 | } | ||
238 | } | ||
239 | |||
240 | |||
241 | /* | ||
242 | ** Join the ktables from p1 and p2 the ktable for the new pattern at the | ||
243 | ** top of the stack, reusing them when possible. | ||
244 | */ | ||
245 | static void joinktables (lua_State *L, int p1, TTree *t2, int p2) { | ||
246 | int n1, n2; | ||
247 | lua_getuservalue(L, p1); /* get ktables */ | ||
248 | lua_getuservalue(L, p2); | ||
249 | n1 = ktablelen(L, -2); | ||
250 | n2 = ktablelen(L, -1); | ||
251 | if (n1 == 0 && n2 == 0) /* are both tables empty? */ | ||
252 | lua_pop(L, 2); /* nothing to be done; pop tables */ | ||
253 | else if (n2 == 0 || lp_equal(L, -2, -1)) { /* 2nd table empty or equal? */ | ||
254 | lua_pop(L, 1); /* pop 2nd table */ | ||
255 | lua_setuservalue(L, -2); /* set 1st ktable into new pattern */ | ||
256 | } | ||
257 | else if (n1 == 0) { /* first table is empty? */ | ||
258 | lua_setuservalue(L, -3); /* set 2nd table into new pattern */ | ||
259 | lua_pop(L, 1); /* pop 1st table */ | ||
260 | } | ||
261 | else { | ||
262 | lua_createtable(L, n1 + n2, 0); /* create ktable for new pattern */ | ||
263 | /* stack: new p; ktable p1; ktable p2; new ktable */ | ||
264 | concattable(L, -3, -1); /* from p1 into new ktable */ | ||
265 | concattable(L, -2, -1); /* from p2 into new ktable */ | ||
266 | lua_setuservalue(L, -4); /* new ktable becomes 'p' environment */ | ||
267 | lua_pop(L, 2); /* pop other ktables */ | ||
268 | correctkeys(t2, n1); /* correction for indices from p2 */ | ||
269 | } | ||
270 | } | ||
271 | |||
272 | |||
273 | /* | ||
274 | ** copy 'ktable' of element 'idx' to new tree (on top of stack) | ||
275 | */ | ||
276 | static void copyktable (lua_State *L, int idx) { | ||
277 | lua_getuservalue(L, idx); | ||
278 | lua_setuservalue(L, -2); | ||
279 | } | ||
280 | |||
281 | |||
282 | /* | ||
283 | ** merge 'ktable' from 'stree' at stack index 'idx' into 'ktable' | ||
284 | ** from tree at the top of the stack, and correct corresponding | ||
285 | ** tree. | ||
286 | */ | ||
287 | static void mergektable (lua_State *L, int idx, TTree *stree) { | ||
288 | int n; | ||
289 | lua_getuservalue(L, -1); /* get ktables */ | ||
290 | lua_getuservalue(L, idx); | ||
291 | n = concattable(L, -1, -2); | ||
292 | lua_pop(L, 2); /* remove both ktables */ | ||
293 | correctkeys(stree, n); | ||
294 | } | ||
295 | |||
296 | |||
297 | /* | ||
298 | ** Create a new 'ktable' to the pattern at the top of the stack, adding | ||
299 | ** all elements from pattern 'p' (if not 0) plus element 'idx' to it. | ||
300 | ** Return index of new element. | ||
301 | */ | ||
302 | static int addtonewktable (lua_State *L, int p, int idx) { | ||
303 | newktable(L, 1); | ||
304 | if (p) | ||
305 | mergektable(L, p, NULL); | ||
306 | return addtoktable(L, idx); | ||
307 | } | ||
308 | |||
309 | /* }====================================================== */ | ||
310 | |||
311 | |||
312 | /* | ||
313 | ** {====================================================== | ||
314 | ** Tree generation | ||
315 | ** ======================================================= | ||
316 | */ | ||
317 | |||
318 | /* | ||
319 | ** In 5.2, could use 'luaL_testudata'... | ||
320 | */ | ||
321 | static int testpattern (lua_State *L, int idx) { | ||
322 | if (lua_touserdata(L, idx)) { /* value is a userdata? */ | ||
323 | if (lua_getmetatable(L, idx)) { /* does it have a metatable? */ | ||
324 | luaL_getmetatable(L, PATTERN_T); | ||
325 | if (lua_rawequal(L, -1, -2)) { /* does it have the correct mt? */ | ||
326 | lua_pop(L, 2); /* remove both metatables */ | ||
327 | return 1; | ||
328 | } | ||
329 | } | ||
330 | } | ||
331 | return 0; | ||
332 | } | ||
333 | |||
334 | |||
335 | static Pattern *getpattern (lua_State *L, int idx) { | ||
336 | return (Pattern *)luaL_checkudata(L, idx, PATTERN_T); | ||
337 | } | ||
338 | |||
339 | |||
340 | static int getsize (lua_State *L, int idx) { | ||
341 | return (lua_rawlen(L, idx) - sizeof(Pattern)) / sizeof(TTree) + 1; | ||
342 | } | ||
343 | |||
344 | |||
345 | static TTree *gettree (lua_State *L, int idx, int *len) { | ||
346 | Pattern *p = getpattern(L, idx); | ||
347 | if (len) | ||
348 | *len = getsize(L, idx); | ||
349 | return p->tree; | ||
350 | } | ||
351 | |||
352 | |||
353 | /* | ||
354 | ** create a pattern. Set its uservalue (the 'ktable') equal to its | ||
355 | ** metatable. (It could be any empty sequence; the metatable is at | ||
356 | ** hand here, so we use it.) | ||
357 | */ | ||
358 | static TTree *newtree (lua_State *L, int len) { | ||
359 | size_t size = (len - 1) * sizeof(TTree) + sizeof(Pattern); | ||
360 | Pattern *p = (Pattern *)lua_newuserdata(L, size); | ||
361 | luaL_getmetatable(L, PATTERN_T); | ||
362 | lua_pushvalue(L, -1); | ||
363 | lua_setuservalue(L, -3); | ||
364 | lua_setmetatable(L, -2); | ||
365 | p->code = NULL; p->codesize = 0; | ||
366 | return p->tree; | ||
367 | } | ||
368 | |||
369 | |||
370 | static TTree *newleaf (lua_State *L, int tag) { | ||
371 | TTree *tree = newtree(L, 1); | ||
372 | tree->tag = tag; | ||
373 | return tree; | ||
374 | } | ||
375 | |||
376 | |||
377 | static TTree *newcharset (lua_State *L) { | ||
378 | TTree *tree = newtree(L, bytes2slots(CHARSETSIZE) + 1); | ||
379 | tree->tag = TSet; | ||
380 | loopset(i, treebuffer(tree)[i] = 0); | ||
381 | return tree; | ||
382 | } | ||
383 | |||
384 | |||
385 | /* | ||
386 | ** add to tree a sequence where first sibling is 'sib' (with size | ||
387 | ** 'sibsize'); returns position for second sibling | ||
388 | */ | ||
389 | static TTree *seqaux (TTree *tree, TTree *sib, int sibsize) { | ||
390 | tree->tag = TSeq; tree->u.ps = sibsize + 1; | ||
391 | memcpy(sib1(tree), sib, sibsize * sizeof(TTree)); | ||
392 | return sib2(tree); | ||
393 | } | ||
394 | |||
395 | |||
396 | /* | ||
397 | ** Build a sequence of 'n' nodes, each with tag 'tag' and 'u.n' got | ||
398 | ** from the array 's' (or 0 if array is NULL). (TSeq is binary, so it | ||
399 | ** must build a sequence of sequence of sequence...) | ||
400 | */ | ||
401 | static void fillseq (TTree *tree, int tag, int n, const char *s) { | ||
402 | int i; | ||
403 | for (i = 0; i < n - 1; i++) { /* initial n-1 copies of Seq tag; Seq ... */ | ||
404 | tree->tag = TSeq; tree->u.ps = 2; | ||
405 | sib1(tree)->tag = tag; | ||
406 | sib1(tree)->u.n = s ? (byte)s[i] : 0; | ||
407 | tree = sib2(tree); | ||
408 | } | ||
409 | tree->tag = tag; /* last one does not need TSeq */ | ||
410 | tree->u.n = s ? (byte)s[i] : 0; | ||
411 | } | ||
412 | |||
413 | |||
414 | /* | ||
415 | ** Numbers as patterns: | ||
416 | ** 0 == true (always match); n == TAny repeated 'n' times; | ||
417 | ** -n == not (TAny repeated 'n' times) | ||
418 | */ | ||
419 | static TTree *numtree (lua_State *L, int n) { | ||
420 | if (n == 0) | ||
421 | return newleaf(L, TTrue); | ||
422 | else { | ||
423 | TTree *tree, *nd; | ||
424 | if (n > 0) | ||
425 | tree = nd = newtree(L, 2 * n - 1); | ||
426 | else { /* negative: code it as !(-n) */ | ||
427 | n = -n; | ||
428 | tree = newtree(L, 2 * n); | ||
429 | tree->tag = TNot; | ||
430 | nd = sib1(tree); | ||
431 | } | ||
432 | fillseq(nd, TAny, n, NULL); /* sequence of 'n' any's */ | ||
433 | return tree; | ||
434 | } | ||
435 | } | ||
436 | |||
437 | |||
438 | /* | ||
439 | ** Convert value at index 'idx' to a pattern | ||
440 | */ | ||
441 | static TTree *getpatt (lua_State *L, int idx, int *len) { | ||
442 | TTree *tree; | ||
443 | switch (lua_type(L, idx)) { | ||
444 | case LUA_TSTRING: { | ||
445 | size_t slen; | ||
446 | const char *s = lua_tolstring(L, idx, &slen); /* get string */ | ||
447 | if (slen == 0) /* empty? */ | ||
448 | tree = newleaf(L, TTrue); /* always match */ | ||
449 | else { | ||
450 | tree = newtree(L, 2 * (slen - 1) + 1); | ||
451 | fillseq(tree, TChar, slen, s); /* sequence of 'slen' chars */ | ||
452 | } | ||
453 | break; | ||
454 | } | ||
455 | case LUA_TNUMBER: { | ||
456 | int n = lua_tointeger(L, idx); | ||
457 | tree = numtree(L, n); | ||
458 | break; | ||
459 | } | ||
460 | case LUA_TBOOLEAN: { | ||
461 | tree = (lua_toboolean(L, idx) ? newleaf(L, TTrue) : newleaf(L, TFalse)); | ||
462 | break; | ||
463 | } | ||
464 | case LUA_TTABLE: { | ||
465 | tree = newgrammar(L, idx); | ||
466 | break; | ||
467 | } | ||
468 | case LUA_TFUNCTION: { | ||
469 | tree = newtree(L, 2); | ||
470 | tree->tag = TRunTime; | ||
471 | tree->key = addtonewktable(L, 0, idx); | ||
472 | sib1(tree)->tag = TTrue; | ||
473 | break; | ||
474 | } | ||
475 | default: { | ||
476 | return gettree(L, idx, len); | ||
477 | } | ||
478 | } | ||
479 | lua_replace(L, idx); /* put new tree into 'idx' slot */ | ||
480 | if (len) | ||
481 | *len = getsize(L, idx); | ||
482 | return tree; | ||
483 | } | ||
484 | |||
485 | |||
486 | /* | ||
487 | ** create a new tree, whith a new root and one sibling. | ||
488 | ** Sibling must be on the Lua stack, at index 1. | ||
489 | */ | ||
490 | static TTree *newroot1sib (lua_State *L, int tag) { | ||
491 | int s1; | ||
492 | TTree *tree1 = getpatt(L, 1, &s1); | ||
493 | TTree *tree = newtree(L, 1 + s1); /* create new tree */ | ||
494 | tree->tag = tag; | ||
495 | memcpy(sib1(tree), tree1, s1 * sizeof(TTree)); | ||
496 | copyktable(L, 1); | ||
497 | return tree; | ||
498 | } | ||
499 | |||
500 | |||
501 | /* | ||
502 | ** create a new tree, whith a new root and 2 siblings. | ||
503 | ** Siblings must be on the Lua stack, first one at index 1. | ||
504 | */ | ||
505 | static TTree *newroot2sib (lua_State *L, int tag) { | ||
506 | int s1, s2; | ||
507 | TTree *tree1 = getpatt(L, 1, &s1); | ||
508 | TTree *tree2 = getpatt(L, 2, &s2); | ||
509 | TTree *tree = newtree(L, 1 + s1 + s2); /* create new tree */ | ||
510 | tree->tag = tag; | ||
511 | tree->u.ps = 1 + s1; | ||
512 | memcpy(sib1(tree), tree1, s1 * sizeof(TTree)); | ||
513 | memcpy(sib2(tree), tree2, s2 * sizeof(TTree)); | ||
514 | joinktables(L, 1, sib2(tree), 2); | ||
515 | return tree; | ||
516 | } | ||
517 | |||
518 | |||
519 | static int lp_P (lua_State *L) { | ||
520 | luaL_checkany(L, 1); | ||
521 | getpatt(L, 1, NULL); | ||
522 | lua_settop(L, 1); | ||
523 | return 1; | ||
524 | } | ||
525 | |||
526 | |||
527 | /* | ||
528 | ** sequence operator; optimizations: | ||
529 | ** false x => false, x true => x, true x => x | ||
530 | ** (cannot do x . false => false because x may have runtime captures) | ||
531 | */ | ||
532 | static int lp_seq (lua_State *L) { | ||
533 | TTree *tree1 = getpatt(L, 1, NULL); | ||
534 | TTree *tree2 = getpatt(L, 2, NULL); | ||
535 | if (tree1->tag == TFalse || tree2->tag == TTrue) | ||
536 | lua_pushvalue(L, 1); /* false . x == false, x . true = x */ | ||
537 | else if (tree1->tag == TTrue) | ||
538 | lua_pushvalue(L, 2); /* true . x = x */ | ||
539 | else | ||
540 | newroot2sib(L, TSeq); | ||
541 | return 1; | ||
542 | } | ||
543 | |||
544 | |||
545 | /* | ||
546 | ** choice operator; optimizations: | ||
547 | ** charset / charset => charset | ||
548 | ** true / x => true, x / false => x, false / x => x | ||
549 | ** (x / true is not equivalent to true) | ||
550 | */ | ||
551 | static int lp_choice (lua_State *L) { | ||
552 | Charset st1, st2; | ||
553 | TTree *t1 = getpatt(L, 1, NULL); | ||
554 | TTree *t2 = getpatt(L, 2, NULL); | ||
555 | if (tocharset(t1, &st1) && tocharset(t2, &st2)) { | ||
556 | TTree *t = newcharset(L); | ||
557 | loopset(i, treebuffer(t)[i] = st1.cs[i] | st2.cs[i]); | ||
558 | } | ||
559 | else if (nofail(t1) || t2->tag == TFalse) | ||
560 | lua_pushvalue(L, 1); /* true / x => true, x / false => x */ | ||
561 | else if (t1->tag == TFalse) | ||
562 | lua_pushvalue(L, 2); /* false / x => x */ | ||
563 | else | ||
564 | newroot2sib(L, TChoice); | ||
565 | return 1; | ||
566 | } | ||
567 | |||
568 | |||
569 | /* | ||
570 | ** p^n | ||
571 | */ | ||
572 | static int lp_star (lua_State *L) { | ||
573 | int size1; | ||
574 | int n = (int)luaL_checkinteger(L, 2); | ||
575 | TTree *tree1 = getpatt(L, 1, &size1); | ||
576 | if (n >= 0) { /* seq tree1 (seq tree1 ... (seq tree1 (rep tree1))) */ | ||
577 | TTree *tree = newtree(L, (n + 1) * (size1 + 1)); | ||
578 | if (nullable(tree1)) | ||
579 | luaL_error(L, "loop body may accept empty string"); | ||
580 | while (n--) /* repeat 'n' times */ | ||
581 | tree = seqaux(tree, tree1, size1); | ||
582 | tree->tag = TRep; | ||
583 | memcpy(sib1(tree), tree1, size1 * sizeof(TTree)); | ||
584 | } | ||
585 | else { /* choice (seq tree1 ... choice tree1 true ...) true */ | ||
586 | TTree *tree; | ||
587 | n = -n; | ||
588 | /* size = (choice + seq + tree1 + true) * n, but the last has no seq */ | ||
589 | tree = newtree(L, n * (size1 + 3) - 1); | ||
590 | for (; n > 1; n--) { /* repeat (n - 1) times */ | ||
591 | tree->tag = TChoice; tree->u.ps = n * (size1 + 3) - 2; | ||
592 | sib2(tree)->tag = TTrue; | ||
593 | tree = sib1(tree); | ||
594 | tree = seqaux(tree, tree1, size1); | ||
595 | } | ||
596 | tree->tag = TChoice; tree->u.ps = size1 + 1; | ||
597 | sib2(tree)->tag = TTrue; | ||
598 | memcpy(sib1(tree), tree1, size1 * sizeof(TTree)); | ||
599 | } | ||
600 | copyktable(L, 1); | ||
601 | return 1; | ||
602 | } | ||
603 | |||
604 | |||
605 | /* | ||
606 | ** #p == &p | ||
607 | */ | ||
608 | static int lp_and (lua_State *L) { | ||
609 | newroot1sib(L, TAnd); | ||
610 | return 1; | ||
611 | } | ||
612 | |||
613 | |||
614 | /* | ||
615 | ** -p == !p | ||
616 | */ | ||
617 | static int lp_not (lua_State *L) { | ||
618 | newroot1sib(L, TNot); | ||
619 | return 1; | ||
620 | } | ||
621 | |||
622 | |||
623 | /* | ||
624 | ** [t1 - t2] == Seq (Not t2) t1 | ||
625 | ** If t1 and t2 are charsets, make their difference. | ||
626 | */ | ||
627 | static int lp_sub (lua_State *L) { | ||
628 | Charset st1, st2; | ||
629 | int s1, s2; | ||
630 | TTree *t1 = getpatt(L, 1, &s1); | ||
631 | TTree *t2 = getpatt(L, 2, &s2); | ||
632 | if (tocharset(t1, &st1) && tocharset(t2, &st2)) { | ||
633 | TTree *t = newcharset(L); | ||
634 | loopset(i, treebuffer(t)[i] = st1.cs[i] & ~st2.cs[i]); | ||
635 | } | ||
636 | else { | ||
637 | TTree *tree = newtree(L, 2 + s1 + s2); | ||
638 | tree->tag = TSeq; /* sequence of... */ | ||
639 | tree->u.ps = 2 + s2; | ||
640 | sib1(tree)->tag = TNot; /* ...not... */ | ||
641 | memcpy(sib1(sib1(tree)), t2, s2 * sizeof(TTree)); /* ...t2 */ | ||
642 | memcpy(sib2(tree), t1, s1 * sizeof(TTree)); /* ... and t1 */ | ||
643 | joinktables(L, 1, sib1(tree), 2); | ||
644 | } | ||
645 | return 1; | ||
646 | } | ||
647 | |||
648 | |||
649 | static int lp_set (lua_State *L) { | ||
650 | size_t l; | ||
651 | const char *s = luaL_checklstring(L, 1, &l); | ||
652 | TTree *tree = newcharset(L); | ||
653 | while (l--) { | ||
654 | setchar(treebuffer(tree), (byte)(*s)); | ||
655 | s++; | ||
656 | } | ||
657 | return 1; | ||
658 | } | ||
659 | |||
660 | |||
661 | static int lp_range (lua_State *L) { | ||
662 | int arg; | ||
663 | int top = lua_gettop(L); | ||
664 | TTree *tree = newcharset(L); | ||
665 | for (arg = 1; arg <= top; arg++) { | ||
666 | int c; | ||
667 | size_t l; | ||
668 | const char *r = luaL_checklstring(L, arg, &l); | ||
669 | luaL_argcheck(L, l == 2, arg, "range must have two characters"); | ||
670 | for (c = (byte)r[0]; c <= (byte)r[1]; c++) | ||
671 | setchar(treebuffer(tree), c); | ||
672 | } | ||
673 | return 1; | ||
674 | } | ||
675 | |||
676 | |||
677 | /* | ||
678 | ** Look-behind predicate | ||
679 | */ | ||
680 | static int lp_behind (lua_State *L) { | ||
681 | TTree *tree; | ||
682 | TTree *tree1 = getpatt(L, 1, NULL); | ||
683 | int n = fixedlen(tree1); | ||
684 | luaL_argcheck(L, n >= 0, 1, "pattern may not have fixed length"); | ||
685 | luaL_argcheck(L, !hascaptures(tree1), 1, "pattern have captures"); | ||
686 | luaL_argcheck(L, n <= MAXBEHIND, 1, "pattern too long to look behind"); | ||
687 | tree = newroot1sib(L, TBehind); | ||
688 | tree->u.n = n; | ||
689 | return 1; | ||
690 | } | ||
691 | |||
692 | |||
693 | /* | ||
694 | ** Create a non-terminal | ||
695 | */ | ||
696 | static int lp_V (lua_State *L) { | ||
697 | TTree *tree = newleaf(L, TOpenCall); | ||
698 | luaL_argcheck(L, !lua_isnoneornil(L, 1), 1, "non-nil value expected"); | ||
699 | tree->key = addtonewktable(L, 0, 1); | ||
700 | return 1; | ||
701 | } | ||
702 | |||
703 | |||
704 | /* | ||
705 | ** Create a tree for a non-empty capture, with a body and | ||
706 | ** optionally with an associated Lua value (at index 'labelidx' in the | ||
707 | ** stack) | ||
708 | */ | ||
709 | static int capture_aux (lua_State *L, int cap, int labelidx) { | ||
710 | TTree *tree = newroot1sib(L, TCapture); | ||
711 | tree->cap = cap; | ||
712 | tree->key = (labelidx == 0) ? 0 : addtonewktable(L, 1, labelidx); | ||
713 | return 1; | ||
714 | } | ||
715 | |||
716 | |||
717 | /* | ||
718 | ** Fill a tree with an empty capture, using an empty (TTrue) sibling. | ||
719 | ** (The 'key' field must be filled by the caller to finish the tree.) | ||
720 | */ | ||
721 | static TTree *auxemptycap (TTree *tree, int cap) { | ||
722 | tree->tag = TCapture; | ||
723 | tree->cap = cap; | ||
724 | sib1(tree)->tag = TTrue; | ||
725 | return tree; | ||
726 | } | ||
727 | |||
728 | |||
729 | /* | ||
730 | ** Create a tree for an empty capture. | ||
731 | */ | ||
732 | static TTree *newemptycap (lua_State *L, int cap, int key) { | ||
733 | TTree *tree = auxemptycap(newtree(L, 2), cap); | ||
734 | tree->key = key; | ||
735 | return tree; | ||
736 | } | ||
737 | |||
738 | |||
739 | /* | ||
740 | ** Create a tree for an empty capture with an associated Lua value. | ||
741 | */ | ||
742 | static TTree *newemptycapkey (lua_State *L, int cap, int idx) { | ||
743 | TTree *tree = auxemptycap(newtree(L, 2), cap); | ||
744 | tree->key = addtonewktable(L, 0, idx); | ||
745 | return tree; | ||
746 | } | ||
747 | |||
748 | |||
749 | /* | ||
750 | ** Captures with syntax p / v | ||
751 | ** (function capture, query capture, string capture, or number capture) | ||
752 | */ | ||
753 | static int lp_divcapture (lua_State *L) { | ||
754 | switch (lua_type(L, 2)) { | ||
755 | case LUA_TFUNCTION: return capture_aux(L, Cfunction, 2); | ||
756 | case LUA_TTABLE: return capture_aux(L, Cquery, 2); | ||
757 | case LUA_TSTRING: return capture_aux(L, Cstring, 2); | ||
758 | case LUA_TNUMBER: { | ||
759 | int n = lua_tointeger(L, 2); | ||
760 | TTree *tree = newroot1sib(L, TCapture); | ||
761 | luaL_argcheck(L, 0 <= n && n <= SHRT_MAX, 1, "invalid number"); | ||
762 | tree->cap = Cnum; | ||
763 | tree->key = n; | ||
764 | return 1; | ||
765 | } | ||
766 | default: return luaL_argerror(L, 2, "invalid replacement value"); | ||
767 | } | ||
768 | } | ||
769 | |||
770 | |||
771 | static int lp_substcapture (lua_State *L) { | ||
772 | return capture_aux(L, Csubst, 0); | ||
773 | } | ||
774 | |||
775 | |||
776 | static int lp_tablecapture (lua_State *L) { | ||
777 | return capture_aux(L, Ctable, 0); | ||
778 | } | ||
779 | |||
780 | |||
781 | static int lp_groupcapture (lua_State *L) { | ||
782 | if (lua_isnoneornil(L, 2)) | ||
783 | return capture_aux(L, Cgroup, 0); | ||
784 | else | ||
785 | return capture_aux(L, Cgroup, 2); | ||
786 | } | ||
787 | |||
788 | |||
789 | static int lp_foldcapture (lua_State *L) { | ||
790 | luaL_checktype(L, 2, LUA_TFUNCTION); | ||
791 | return capture_aux(L, Cfold, 2); | ||
792 | } | ||
793 | |||
794 | |||
795 | static int lp_simplecapture (lua_State *L) { | ||
796 | return capture_aux(L, Csimple, 0); | ||
797 | } | ||
798 | |||
799 | |||
800 | static int lp_poscapture (lua_State *L) { | ||
801 | newemptycap(L, Cposition, 0); | ||
802 | return 1; | ||
803 | } | ||
804 | |||
805 | |||
806 | static int lp_argcapture (lua_State *L) { | ||
807 | int n = (int)luaL_checkinteger(L, 1); | ||
808 | luaL_argcheck(L, 0 < n && n <= SHRT_MAX, 1, "invalid argument index"); | ||
809 | newemptycap(L, Carg, n); | ||
810 | return 1; | ||
811 | } | ||
812 | |||
813 | |||
814 | static int lp_backref (lua_State *L) { | ||
815 | luaL_checkany(L, 1); | ||
816 | newemptycapkey(L, Cbackref, 1); | ||
817 | return 1; | ||
818 | } | ||
819 | |||
820 | |||
821 | /* | ||
822 | ** Constant capture | ||
823 | */ | ||
824 | static int lp_constcapture (lua_State *L) { | ||
825 | int i; | ||
826 | int n = lua_gettop(L); /* number of values */ | ||
827 | if (n == 0) /* no values? */ | ||
828 | newleaf(L, TTrue); /* no capture */ | ||
829 | else if (n == 1) | ||
830 | newemptycapkey(L, Cconst, 1); /* single constant capture */ | ||
831 | else { /* create a group capture with all values */ | ||
832 | TTree *tree = newtree(L, 1 + 3 * (n - 1) + 2); | ||
833 | newktable(L, n); /* create a 'ktable' for new tree */ | ||
834 | tree->tag = TCapture; | ||
835 | tree->cap = Cgroup; | ||
836 | tree->key = 0; | ||
837 | tree = sib1(tree); | ||
838 | for (i = 1; i <= n - 1; i++) { | ||
839 | tree->tag = TSeq; | ||
840 | tree->u.ps = 3; /* skip TCapture and its sibling */ | ||
841 | auxemptycap(sib1(tree), Cconst); | ||
842 | sib1(tree)->key = addtoktable(L, i); | ||
843 | tree = sib2(tree); | ||
844 | } | ||
845 | auxemptycap(tree, Cconst); | ||
846 | tree->key = addtoktable(L, i); | ||
847 | } | ||
848 | return 1; | ||
849 | } | ||
850 | |||
851 | |||
852 | static int lp_matchtime (lua_State *L) { | ||
853 | TTree *tree; | ||
854 | luaL_checktype(L, 2, LUA_TFUNCTION); | ||
855 | tree = newroot1sib(L, TRunTime); | ||
856 | tree->key = addtonewktable(L, 1, 2); | ||
857 | return 1; | ||
858 | } | ||
859 | |||
860 | /* }====================================================== */ | ||
861 | |||
862 | |||
863 | /* | ||
864 | ** {====================================================== | ||
865 | ** Grammar - Tree generation | ||
866 | ** ======================================================= | ||
867 | */ | ||
868 | |||
869 | /* | ||
870 | ** push on the stack the index and the pattern for the | ||
871 | ** initial rule of grammar at index 'arg' in the stack; | ||
872 | ** also add that index into position table. | ||
873 | */ | ||
874 | static void getfirstrule (lua_State *L, int arg, int postab) { | ||
875 | lua_rawgeti(L, arg, 1); /* access first element */ | ||
876 | if (lua_isstring(L, -1)) { /* is it the name of initial rule? */ | ||
877 | lua_pushvalue(L, -1); /* duplicate it to use as key */ | ||
878 | lua_gettable(L, arg); /* get associated rule */ | ||
879 | } | ||
880 | else { | ||
881 | lua_pushinteger(L, 1); /* key for initial rule */ | ||
882 | lua_insert(L, -2); /* put it before rule */ | ||
883 | } | ||
884 | if (!testpattern(L, -1)) { /* initial rule not a pattern? */ | ||
885 | if (lua_isnil(L, -1)) | ||
886 | luaL_error(L, "grammar has no initial rule"); | ||
887 | else | ||
888 | luaL_error(L, "initial rule '%s' is not a pattern", lua_tostring(L, -2)); | ||
889 | } | ||
890 | lua_pushvalue(L, -2); /* push key */ | ||
891 | lua_pushinteger(L, 1); /* push rule position (after TGrammar) */ | ||
892 | lua_settable(L, postab); /* insert pair at position table */ | ||
893 | } | ||
894 | |||
895 | /* | ||
896 | ** traverse grammar at index 'arg', pushing all its keys and patterns | ||
897 | ** into the stack. Create a new table (before all pairs key-pattern) to | ||
898 | ** collect all keys and their associated positions in the final tree | ||
899 | ** (the "position table"). | ||
900 | ** Return the number of rules and (in 'totalsize') the total size | ||
901 | ** for the new tree. | ||
902 | */ | ||
903 | static int collectrules (lua_State *L, int arg, int *totalsize) { | ||
904 | int n = 1; /* to count number of rules */ | ||
905 | int postab = lua_gettop(L) + 1; /* index of position table */ | ||
906 | int size; /* accumulator for total size */ | ||
907 | lua_newtable(L); /* create position table */ | ||
908 | getfirstrule(L, arg, postab); | ||
909 | size = 2 + getsize(L, postab + 2); /* TGrammar + TRule + rule */ | ||
910 | lua_pushnil(L); /* prepare to traverse grammar table */ | ||
911 | while (lua_next(L, arg) != 0) { | ||
912 | if (lua_tonumber(L, -2) == 1 || | ||
913 | lp_equal(L, -2, postab + 1)) { /* initial rule? */ | ||
914 | lua_pop(L, 1); /* remove value (keep key for lua_next) */ | ||
915 | continue; | ||
916 | } | ||
917 | if (!testpattern(L, -1)) /* value is not a pattern? */ | ||
918 | luaL_error(L, "rule '%s' is not a pattern", val2str(L, -2)); | ||
919 | luaL_checkstack(L, LUA_MINSTACK, "grammar has too many rules"); | ||
920 | lua_pushvalue(L, -2); /* push key (to insert into position table) */ | ||
921 | lua_pushinteger(L, size); | ||
922 | lua_settable(L, postab); | ||
923 | size += 1 + getsize(L, -1); /* update size */ | ||
924 | lua_pushvalue(L, -2); /* push key (for next lua_next) */ | ||
925 | n++; | ||
926 | } | ||
927 | *totalsize = size + 1; /* TTrue to finish list of rules */ | ||
928 | return n; | ||
929 | } | ||
930 | |||
931 | |||
932 | static void buildgrammar (lua_State *L, TTree *grammar, int frule, int n) { | ||
933 | int i; | ||
934 | TTree *nd = sib1(grammar); /* auxiliary pointer to traverse the tree */ | ||
935 | for (i = 0; i < n; i++) { /* add each rule into new tree */ | ||
936 | int ridx = frule + 2*i + 1; /* index of i-th rule */ | ||
937 | int rulesize; | ||
938 | TTree *rn = gettree(L, ridx, &rulesize); | ||
939 | nd->tag = TRule; | ||
940 | nd->key = 0; /* will be fixed when rule is used */ | ||
941 | nd->cap = i; /* rule number */ | ||
942 | nd->u.ps = rulesize + 1; /* point to next rule */ | ||
943 | memcpy(sib1(nd), rn, rulesize * sizeof(TTree)); /* copy rule */ | ||
944 | mergektable(L, ridx, sib1(nd)); /* merge its ktable into new one */ | ||
945 | nd = sib2(nd); /* move to next rule */ | ||
946 | } | ||
947 | nd->tag = TTrue; /* finish list of rules */ | ||
948 | } | ||
949 | |||
950 | |||
951 | /* | ||
952 | ** Check whether a tree has potential infinite loops | ||
953 | */ | ||
954 | static int checkloops (TTree *tree) { | ||
955 | tailcall: | ||
956 | if (tree->tag == TRep && nullable(sib1(tree))) | ||
957 | return 1; | ||
958 | else if (tree->tag == TGrammar) | ||
959 | return 0; /* sub-grammars already checked */ | ||
960 | else { | ||
961 | switch (numsiblings[tree->tag]) { | ||
962 | case 1: /* return checkloops(sib1(tree)); */ | ||
963 | tree = sib1(tree); goto tailcall; | ||
964 | case 2: | ||
965 | if (checkloops(sib1(tree))) return 1; | ||
966 | /* else return checkloops(sib2(tree)); */ | ||
967 | tree = sib2(tree); goto tailcall; | ||
968 | default: assert(numsiblings[tree->tag] == 0); return 0; | ||
969 | } | ||
970 | } | ||
971 | } | ||
972 | |||
973 | |||
974 | /* | ||
975 | ** Give appropriate error message for 'verifyrule'. If a rule appears | ||
976 | ** twice in 'passed', there is path from it back to itself without | ||
977 | ** advancing the subject. | ||
978 | */ | ||
979 | static int verifyerror (lua_State *L, int *passed, int npassed) { | ||
980 | int i, j; | ||
981 | for (i = npassed - 1; i >= 0; i--) { /* search for a repetition */ | ||
982 | for (j = i - 1; j >= 0; j--) { | ||
983 | if (passed[i] == passed[j]) { | ||
984 | lua_rawgeti(L, -1, passed[i]); /* get rule's key */ | ||
985 | return luaL_error(L, "rule '%s' may be left recursive", val2str(L, -1)); | ||
986 | } | ||
987 | } | ||
988 | } | ||
989 | return luaL_error(L, "too many left calls in grammar"); | ||
990 | } | ||
991 | |||
992 | |||
993 | /* | ||
994 | ** Check whether a rule can be left recursive; raise an error in that | ||
995 | ** case; otherwise return 1 iff pattern is nullable. | ||
996 | ** The return value is used to check sequences, where the second pattern | ||
997 | ** is only relevant if the first is nullable. | ||
998 | ** Parameter 'nb' works as an accumulator, to allow tail calls in | ||
999 | ** choices. ('nb' true makes function returns true.) | ||
1000 | ** Parameter 'passed' is a list of already visited rules, 'npassed' | ||
1001 | ** counts the elements in 'passed'. | ||
1002 | ** Assume ktable at the top of the stack. | ||
1003 | */ | ||
1004 | static int verifyrule (lua_State *L, TTree *tree, int *passed, int npassed, | ||
1005 | int nb) { | ||
1006 | tailcall: | ||
1007 | switch (tree->tag) { | ||
1008 | case TChar: case TSet: case TAny: | ||
1009 | case TFalse: | ||
1010 | return nb; /* cannot pass from here */ | ||
1011 | case TTrue: | ||
1012 | case TBehind: /* look-behind cannot have calls */ | ||
1013 | return 1; | ||
1014 | case TNot: case TAnd: case TRep: | ||
1015 | /* return verifyrule(L, sib1(tree), passed, npassed, 1); */ | ||
1016 | tree = sib1(tree); nb = 1; goto tailcall; | ||
1017 | case TCapture: case TRunTime: | ||
1018 | /* return verifyrule(L, sib1(tree), passed, npassed, nb); */ | ||
1019 | tree = sib1(tree); goto tailcall; | ||
1020 | case TCall: | ||
1021 | /* return verifyrule(L, sib2(tree), passed, npassed, nb); */ | ||
1022 | tree = sib2(tree); goto tailcall; | ||
1023 | case TSeq: /* only check 2nd child if first is nb */ | ||
1024 | if (!verifyrule(L, sib1(tree), passed, npassed, 0)) | ||
1025 | return nb; | ||
1026 | /* else return verifyrule(L, sib2(tree), passed, npassed, nb); */ | ||
1027 | tree = sib2(tree); goto tailcall; | ||
1028 | case TChoice: /* must check both children */ | ||
1029 | nb = verifyrule(L, sib1(tree), passed, npassed, nb); | ||
1030 | /* return verifyrule(L, sib2(tree), passed, npassed, nb); */ | ||
1031 | tree = sib2(tree); goto tailcall; | ||
1032 | case TRule: | ||
1033 | if (npassed >= MAXRULES) | ||
1034 | return verifyerror(L, passed, npassed); | ||
1035 | else { | ||
1036 | passed[npassed++] = tree->key; | ||
1037 | /* return verifyrule(L, sib1(tree), passed, npassed); */ | ||
1038 | tree = sib1(tree); goto tailcall; | ||
1039 | } | ||
1040 | case TGrammar: | ||
1041 | return nullable(tree); /* sub-grammar cannot be left recursive */ | ||
1042 | default: assert(0); return 0; | ||
1043 | } | ||
1044 | } | ||
1045 | |||
1046 | |||
1047 | static void verifygrammar (lua_State *L, TTree *grammar) { | ||
1048 | int passed[MAXRULES]; | ||
1049 | TTree *rule; | ||
1050 | /* check left-recursive rules */ | ||
1051 | for (rule = sib1(grammar); rule->tag == TRule; rule = sib2(rule)) { | ||
1052 | if (rule->key == 0) continue; /* unused rule */ | ||
1053 | verifyrule(L, sib1(rule), passed, 0, 0); | ||
1054 | } | ||
1055 | assert(rule->tag == TTrue); | ||
1056 | /* check infinite loops inside rules */ | ||
1057 | for (rule = sib1(grammar); rule->tag == TRule; rule = sib2(rule)) { | ||
1058 | if (rule->key == 0) continue; /* unused rule */ | ||
1059 | if (checkloops(sib1(rule))) { | ||
1060 | lua_rawgeti(L, -1, rule->key); /* get rule's key */ | ||
1061 | luaL_error(L, "empty loop in rule '%s'", val2str(L, -1)); | ||
1062 | } | ||
1063 | } | ||
1064 | assert(rule->tag == TTrue); | ||
1065 | } | ||
1066 | |||
1067 | |||
1068 | /* | ||
1069 | ** Give a name for the initial rule if it is not referenced | ||
1070 | */ | ||
1071 | static void initialrulename (lua_State *L, TTree *grammar, int frule) { | ||
1072 | if (sib1(grammar)->key == 0) { /* initial rule is not referenced? */ | ||
1073 | int n = lua_rawlen(L, -1) + 1; /* index for name */ | ||
1074 | lua_pushvalue(L, frule); /* rule's name */ | ||
1075 | lua_rawseti(L, -2, n); /* ktable was on the top of the stack */ | ||
1076 | sib1(grammar)->key = n; | ||
1077 | } | ||
1078 | } | ||
1079 | |||
1080 | |||
1081 | static TTree *newgrammar (lua_State *L, int arg) { | ||
1082 | int treesize; | ||
1083 | int frule = lua_gettop(L) + 2; /* position of first rule's key */ | ||
1084 | int n = collectrules(L, arg, &treesize); | ||
1085 | TTree *g = newtree(L, treesize); | ||
1086 | luaL_argcheck(L, n <= MAXRULES, arg, "grammar has too many rules"); | ||
1087 | g->tag = TGrammar; g->u.n = n; | ||
1088 | lua_newtable(L); /* create 'ktable' */ | ||
1089 | lua_setuservalue(L, -2); | ||
1090 | buildgrammar(L, g, frule, n); | ||
1091 | lua_getuservalue(L, -1); /* get 'ktable' for new tree */ | ||
1092 | finalfix(L, frule - 1, g, sib1(g)); | ||
1093 | initialrulename(L, g, frule); | ||
1094 | verifygrammar(L, g); | ||
1095 | lua_pop(L, 1); /* remove 'ktable' */ | ||
1096 | lua_insert(L, -(n * 2 + 2)); /* move new table to proper position */ | ||
1097 | lua_pop(L, n * 2 + 1); /* remove position table + rule pairs */ | ||
1098 | return g; /* new table at the top of the stack */ | ||
1099 | } | ||
1100 | |||
1101 | /* }====================================================== */ | ||
1102 | |||
1103 | |||
1104 | static Instruction *prepcompile (lua_State *L, Pattern *p, int idx) { | ||
1105 | lua_getuservalue(L, idx); /* push 'ktable' (may be used by 'finalfix') */ | ||
1106 | finalfix(L, 0, NULL, p->tree); | ||
1107 | lua_pop(L, 1); /* remove 'ktable' */ | ||
1108 | return compile(L, p); | ||
1109 | } | ||
1110 | |||
1111 | |||
1112 | static int lp_printtree (lua_State *L) { | ||
1113 | TTree *tree = getpatt(L, 1, NULL); | ||
1114 | int c = lua_toboolean(L, 2); | ||
1115 | if (c) { | ||
1116 | lua_getuservalue(L, 1); /* push 'ktable' (may be used by 'finalfix') */ | ||
1117 | finalfix(L, 0, NULL, tree); | ||
1118 | lua_pop(L, 1); /* remove 'ktable' */ | ||
1119 | } | ||
1120 | printktable(L, 1); | ||
1121 | printtree(tree, 0); | ||
1122 | return 0; | ||
1123 | } | ||
1124 | |||
1125 | |||
1126 | static int lp_printcode (lua_State *L) { | ||
1127 | Pattern *p = getpattern(L, 1); | ||
1128 | printktable(L, 1); | ||
1129 | if (p->code == NULL) /* not compiled yet? */ | ||
1130 | prepcompile(L, p, 1); | ||
1131 | printpatt(p->code, p->codesize); | ||
1132 | return 0; | ||
1133 | } | ||
1134 | |||
1135 | |||
1136 | /* | ||
1137 | ** Get the initial position for the match, interpreting negative | ||
1138 | ** values from the end of the subject | ||
1139 | */ | ||
1140 | static size_t initposition (lua_State *L, size_t len) { | ||
1141 | lua_Integer ii = luaL_optinteger(L, 3, 1); | ||
1142 | if (ii > 0) { /* positive index? */ | ||
1143 | if ((size_t)ii <= len) /* inside the string? */ | ||
1144 | return (size_t)ii - 1; /* return it (corrected to 0-base) */ | ||
1145 | else return len; /* crop at the end */ | ||
1146 | } | ||
1147 | else { /* negative index */ | ||
1148 | if ((size_t)(-ii) <= len) /* inside the string? */ | ||
1149 | return len - ((size_t)(-ii)); /* return position from the end */ | ||
1150 | else return 0; /* crop at the beginning */ | ||
1151 | } | ||
1152 | } | ||
1153 | |||
1154 | |||
1155 | /* | ||
1156 | ** Main match function | ||
1157 | */ | ||
1158 | static int lp_match (lua_State *L) { | ||
1159 | Capture capture[INITCAPSIZE]; | ||
1160 | const char *r; | ||
1161 | size_t l; | ||
1162 | Pattern *p = (getpatt(L, 1, NULL), getpattern(L, 1)); | ||
1163 | Instruction *code = (p->code != NULL) ? p->code : prepcompile(L, p, 1); | ||
1164 | const char *s = luaL_checklstring(L, SUBJIDX, &l); | ||
1165 | size_t i = initposition(L, l); | ||
1166 | int ptop = lua_gettop(L); | ||
1167 | lua_pushnil(L); /* initialize subscache */ | ||
1168 | lua_pushlightuserdata(L, capture); /* initialize caplistidx */ | ||
1169 | lua_getuservalue(L, 1); /* initialize penvidx */ | ||
1170 | r = match(L, s, s + i, s + l, code, capture, ptop); | ||
1171 | if (r == NULL) { | ||
1172 | lua_pushnil(L); | ||
1173 | return 1; | ||
1174 | } | ||
1175 | return getcaptures(L, s, r, ptop); | ||
1176 | } | ||
1177 | |||
1178 | |||
1179 | |||
1180 | /* | ||
1181 | ** {====================================================== | ||
1182 | ** Library creation and functions not related to matching | ||
1183 | ** ======================================================= | ||
1184 | */ | ||
1185 | |||
1186 | /* maximum limit for stack size */ | ||
1187 | #define MAXLIM (INT_MAX / 100) | ||
1188 | |||
1189 | static int lp_setmax (lua_State *L) { | ||
1190 | lua_Integer lim = luaL_checkinteger(L, 1); | ||
1191 | luaL_argcheck(L, 0 < lim && lim <= MAXLIM, 1, "out of range"); | ||
1192 | lua_settop(L, 1); | ||
1193 | lua_setfield(L, LUA_REGISTRYINDEX, MAXSTACKIDX); | ||
1194 | return 0; | ||
1195 | } | ||
1196 | |||
1197 | |||
1198 | static int lp_version (lua_State *L) { | ||
1199 | lua_pushstring(L, VERSION); | ||
1200 | return 1; | ||
1201 | } | ||
1202 | |||
1203 | |||
1204 | static int lp_type (lua_State *L) { | ||
1205 | if (testpattern(L, 1)) | ||
1206 | lua_pushliteral(L, "pattern"); | ||
1207 | else | ||
1208 | lua_pushnil(L); | ||
1209 | return 1; | ||
1210 | } | ||
1211 | |||
1212 | |||
1213 | int lp_gc (lua_State *L) { | ||
1214 | Pattern *p = getpattern(L, 1); | ||
1215 | realloccode(L, p, 0); /* delete code block */ | ||
1216 | return 0; | ||
1217 | } | ||
1218 | |||
1219 | |||
1220 | static void createcat (lua_State *L, const char *catname, int (catf) (int)) { | ||
1221 | TTree *t = newcharset(L); | ||
1222 | int i; | ||
1223 | for (i = 0; i <= UCHAR_MAX; i++) | ||
1224 | if (catf(i)) setchar(treebuffer(t), i); | ||
1225 | lua_setfield(L, -2, catname); | ||
1226 | } | ||
1227 | |||
1228 | |||
1229 | static int lp_locale (lua_State *L) { | ||
1230 | if (lua_isnoneornil(L, 1)) { | ||
1231 | lua_settop(L, 0); | ||
1232 | lua_createtable(L, 0, 12); | ||
1233 | } | ||
1234 | else { | ||
1235 | luaL_checktype(L, 1, LUA_TTABLE); | ||
1236 | lua_settop(L, 1); | ||
1237 | } | ||
1238 | createcat(L, "alnum", isalnum); | ||
1239 | createcat(L, "alpha", isalpha); | ||
1240 | createcat(L, "cntrl", iscntrl); | ||
1241 | createcat(L, "digit", isdigit); | ||
1242 | createcat(L, "graph", isgraph); | ||
1243 | createcat(L, "lower", islower); | ||
1244 | createcat(L, "print", isprint); | ||
1245 | createcat(L, "punct", ispunct); | ||
1246 | createcat(L, "space", isspace); | ||
1247 | createcat(L, "upper", isupper); | ||
1248 | createcat(L, "xdigit", isxdigit); | ||
1249 | return 1; | ||
1250 | } | ||
1251 | |||
1252 | |||
1253 | static struct luaL_Reg pattreg[] = { | ||
1254 | {"ptree", lp_printtree}, | ||
1255 | {"pcode", lp_printcode}, | ||
1256 | {"match", lp_match}, | ||
1257 | {"B", lp_behind}, | ||
1258 | {"V", lp_V}, | ||
1259 | {"C", lp_simplecapture}, | ||
1260 | {"Cc", lp_constcapture}, | ||
1261 | {"Cmt", lp_matchtime}, | ||
1262 | {"Cb", lp_backref}, | ||
1263 | {"Carg", lp_argcapture}, | ||
1264 | {"Cp", lp_poscapture}, | ||
1265 | {"Cs", lp_substcapture}, | ||
1266 | {"Ct", lp_tablecapture}, | ||
1267 | {"Cf", lp_foldcapture}, | ||
1268 | {"Cg", lp_groupcapture}, | ||
1269 | {"P", lp_P}, | ||
1270 | {"S", lp_set}, | ||
1271 | {"R", lp_range}, | ||
1272 | {"locale", lp_locale}, | ||
1273 | {"version", lp_version}, | ||
1274 | {"setmaxstack", lp_setmax}, | ||
1275 | {"type", lp_type}, | ||
1276 | {NULL, NULL} | ||
1277 | }; | ||
1278 | |||
1279 | |||
1280 | static struct luaL_Reg metareg[] = { | ||
1281 | {"__mul", lp_seq}, | ||
1282 | {"__add", lp_choice}, | ||
1283 | {"__pow", lp_star}, | ||
1284 | {"__gc", lp_gc}, | ||
1285 | {"__len", lp_and}, | ||
1286 | {"__div", lp_divcapture}, | ||
1287 | {"__unm", lp_not}, | ||
1288 | {"__sub", lp_sub}, | ||
1289 | {NULL, NULL} | ||
1290 | }; | ||
1291 | |||
1292 | |||
1293 | int luaopen_lpeg (lua_State *L); | ||
1294 | int luaopen_lpeg (lua_State *L) { | ||
1295 | luaL_newmetatable(L, PATTERN_T); | ||
1296 | lua_pushnumber(L, MAXBACK); /* initialize maximum backtracking */ | ||
1297 | lua_setfield(L, LUA_REGISTRYINDEX, MAXSTACKIDX); | ||
1298 | luaL_setfuncs(L, metareg, 0); | ||
1299 | luaL_newlib(L, pattreg); | ||
1300 | lua_pushvalue(L, -1); | ||
1301 | lua_setfield(L, -3, "__index"); | ||
1302 | return 1; | ||
1303 | } | ||
1304 | |||
1305 | /* }====================================================== */ | ||