diff options
Diffstat (limited to 'lptree.c')
-rw-r--r-- | lptree.c | 1282 |
1 files changed, 1282 insertions, 0 deletions
diff --git a/lptree.c b/lptree.c new file mode 100644 index 0000000..5d2933d --- /dev/null +++ b/lptree.c | |||
@@ -0,0 +1,1282 @@ | |||
1 | /* | ||
2 | ** $Id: lptree.c,v 1.10 2013/04/12 16:30:33 roberto Exp $ | ||
3 | ** Copyright 2013, Lua.org & PUC-Rio (see 'lpeg.html' for license) | ||
4 | */ | ||
5 | |||
6 | #include <ctype.h> | ||
7 | #include <limits.h> | ||
8 | #include <string.h> | ||
9 | |||
10 | |||
11 | #include "lua.h" | ||
12 | #include "lauxlib.h" | ||
13 | |||
14 | #include "lptypes.h" | ||
15 | #include "lpcap.h" | ||
16 | #include "lpcode.h" | ||
17 | #include "lpprint.h" | ||
18 | #include "lptree.h" | ||
19 | |||
20 | |||
21 | /* number of siblings for each tree */ | ||
22 | const byte numsiblings[] = { | ||
23 | 0, 0, 0, /* char, set, any */ | ||
24 | 0, 0, /* true, false */ | ||
25 | 1, /* rep */ | ||
26 | 2, 2, /* seq, choice */ | ||
27 | 1, 1, /* not, and */ | ||
28 | 0, 0, 2, 1, /* call, opencall, rule, grammar */ | ||
29 | 1, /* behind */ | ||
30 | 1, 1, /* capture, runtime capture */ | ||
31 | 0, 2 /* labeled failure throw, labeled choice */ | ||
32 | }; | ||
33 | |||
34 | |||
35 | static TTree *newgrammar (lua_State *L, int arg); | ||
36 | |||
37 | |||
38 | /* | ||
39 | ** returns a reasonable name for value at index 'idx' on the stack | ||
40 | */ | ||
41 | static const char *val2str (lua_State *L, int idx) { | ||
42 | const char *k = lua_tostring(L, idx); | ||
43 | if (k != NULL) | ||
44 | return lua_pushfstring(L, "%s", k); | ||
45 | else | ||
46 | return lua_pushfstring(L, "(a %s)", luaL_typename(L, idx)); | ||
47 | } | ||
48 | |||
49 | |||
50 | /* | ||
51 | ** Fix a TOpenCall into a TCall node, using table 'postable' to | ||
52 | ** translate a key to its rule address in the tree. Raises an | ||
53 | ** error if key does not exist. | ||
54 | */ | ||
55 | static void fixonecall (lua_State *L, int postable, TTree *g, TTree *t) { | ||
56 | int n; | ||
57 | lua_rawgeti(L, -1, t->key); /* get rule's name */ | ||
58 | lua_gettable(L, postable); /* query name in position table */ | ||
59 | n = lua_tonumber(L, -1); /* get (absolute) position */ | ||
60 | lua_pop(L, 1); /* remove position */ | ||
61 | if (n == 0) { /* no position? */ | ||
62 | lua_rawgeti(L, -1, t->key); /* get rule's name again */ | ||
63 | luaL_error(L, "rule '%s' undefined in given grammar", val2str(L, -1)); | ||
64 | } | ||
65 | t->tag = TCall; | ||
66 | t->u.ps = n - (t - g); /* position relative to node */ | ||
67 | assert(sib2(t)->tag == TRule); | ||
68 | sib2(t)->key = t->key; | ||
69 | } | ||
70 | |||
71 | |||
72 | /* | ||
73 | ** Transform left associative constructions into right | ||
74 | ** associative ones, for sequence and choice; that is: | ||
75 | ** (t11 + t12) + t2 => t11 + (t12 + t2) | ||
76 | ** (t11 * t12) * t2 => t11 * (t12 * t2) | ||
77 | ** (that is, Op (Op t11 t12) t2 => Op t11 (Op t12 t2)) | ||
78 | */ | ||
79 | static void correctassociativity (TTree *tree) { | ||
80 | TTree *t1 = sib1(tree); | ||
81 | assert(tree->tag == TChoice || tree->tag == TSeq); | ||
82 | while (t1->tag == tree->tag) { | ||
83 | int n1size = tree->u.ps - 1; /* t1 == Op t11 t12 */ | ||
84 | int n11size = t1->u.ps - 1; | ||
85 | int n12size = n1size - n11size - 1; | ||
86 | memmove(sib1(tree), sib1(t1), n11size * sizeof(TTree)); /* move t11 */ | ||
87 | tree->u.ps = n11size + 1; | ||
88 | sib2(tree)->tag = tree->tag; | ||
89 | sib2(tree)->u.ps = n12size + 1; | ||
90 | } | ||
91 | } | ||
92 | |||
93 | |||
94 | /* | ||
95 | ** Make final adjustments in a tree. Fix open calls in tree 't', | ||
96 | ** making them refer to their respective rules or raising appropriate | ||
97 | ** errors (if not inside a grammar). Correct associativity of associative | ||
98 | ** constructions (making them right associative). Assume that tree's | ||
99 | ** ktable is at the top of the stack (for error messages). | ||
100 | */ | ||
101 | static void finalfix (lua_State *L, int postable, TTree *g, TTree *t) { | ||
102 | tailcall: | ||
103 | switch (t->tag) { | ||
104 | case TGrammar: /* subgrammars were already fixed */ | ||
105 | return; | ||
106 | case TOpenCall: { | ||
107 | if (g != NULL) /* inside a grammar? */ | ||
108 | fixonecall(L, postable, g, t); | ||
109 | else { /* open call outside grammar */ | ||
110 | lua_rawgeti(L, -1, t->key); | ||
111 | luaL_error(L, "rule '%s' used outside a grammar", val2str(L, -1)); | ||
112 | } | ||
113 | break; | ||
114 | } | ||
115 | case TSeq: case TChoice: | ||
116 | correctassociativity(t); | ||
117 | break; | ||
118 | } | ||
119 | switch (numsiblings[t->tag]) { | ||
120 | case 1: /* finalfix(L, postable, g, sib1(t)); */ | ||
121 | t = sib1(t); goto tailcall; | ||
122 | case 2: | ||
123 | finalfix(L, postable, g, sib1(t)); | ||
124 | t = sib2(t); goto tailcall; /* finalfix(L, postable, g, sib2(t)); */ | ||
125 | default: assert(numsiblings[t->tag] == 0); break; | ||
126 | } | ||
127 | } | ||
128 | |||
129 | |||
130 | /* | ||
131 | ** {====================================================== | ||
132 | ** Tree generation | ||
133 | ** ======================================================= | ||
134 | */ | ||
135 | |||
136 | /* | ||
137 | ** In 5.2, could use 'luaL_testudata'... | ||
138 | */ | ||
139 | static int testpattern (lua_State *L, int idx) { | ||
140 | if (lua_touserdata(L, idx)) { /* value is a userdata? */ | ||
141 | if (lua_getmetatable(L, idx)) { /* does it have a metatable? */ | ||
142 | luaL_getmetatable(L, PATTERN_T); | ||
143 | if (lua_rawequal(L, -1, -2)) { /* does it have the correct mt? */ | ||
144 | lua_pop(L, 2); /* remove both metatables */ | ||
145 | return 1; | ||
146 | } | ||
147 | } | ||
148 | } | ||
149 | return 0; | ||
150 | } | ||
151 | |||
152 | |||
153 | static Pattern *getpattern (lua_State *L, int idx) { | ||
154 | return (Pattern *)luaL_checkudata(L, idx, PATTERN_T); | ||
155 | } | ||
156 | |||
157 | |||
158 | static int getsize (lua_State *L, int idx) { | ||
159 | return (lua_objlen(L, idx) - sizeof(Pattern)) / sizeof(TTree) + 1; | ||
160 | } | ||
161 | |||
162 | |||
163 | static TTree *gettree (lua_State *L, int idx, int *len) { | ||
164 | Pattern *p = getpattern(L, idx); | ||
165 | if (len) | ||
166 | *len = getsize(L, idx); | ||
167 | return p->tree; | ||
168 | } | ||
169 | |||
170 | |||
171 | /* | ||
172 | ** create a pattern | ||
173 | */ | ||
174 | static TTree *newtree (lua_State *L, int len) { | ||
175 | size_t size = (len - 1) * sizeof(TTree) + sizeof(Pattern); | ||
176 | Pattern *p = (Pattern *)lua_newuserdata(L, size); | ||
177 | luaL_getmetatable(L, PATTERN_T); | ||
178 | lua_setmetatable(L, -2); | ||
179 | p->code = NULL; p->codesize = 0; | ||
180 | return p->tree; | ||
181 | } | ||
182 | |||
183 | |||
184 | static TTree *newleaf (lua_State *L, int tag) { | ||
185 | TTree *tree = newtree(L, 1); | ||
186 | tree->tag = tag; | ||
187 | return tree; | ||
188 | } | ||
189 | |||
190 | |||
191 | /* labeled failure begin */ | ||
192 | static TTree *newlabelleaf (lua_State *L, Labelset ls) { | ||
193 | TTree *tree = newtree(L, 1); | ||
194 | tree->tag = TThrow; | ||
195 | tree->labels = ls; | ||
196 | return tree; | ||
197 | } | ||
198 | /* labeled failure end */ | ||
199 | |||
200 | |||
201 | static TTree *newcharset (lua_State *L) { | ||
202 | TTree *tree = newtree(L, bytes2slots(CHARSETSIZE) + 1); | ||
203 | tree->tag = TSet; | ||
204 | loopset(i, treebuffer(tree)[i] = 0); | ||
205 | return tree; | ||
206 | } | ||
207 | |||
208 | |||
209 | /* | ||
210 | ** add to tree a sequence where first sibling is 'sib' (with size | ||
211 | ** 'sibsize'); returns position for second sibling | ||
212 | */ | ||
213 | static TTree *seqaux (TTree *tree, TTree *sib, int sibsize) { | ||
214 | tree->tag = TSeq; tree->u.ps = sibsize + 1; | ||
215 | memcpy(sib1(tree), sib, sibsize * sizeof(TTree)); | ||
216 | return sib2(tree); | ||
217 | } | ||
218 | |||
219 | |||
220 | /* | ||
221 | ** Add element 'idx' to 'ktable' of pattern at the top of the stack; | ||
222 | ** create new 'ktable' if necessary. Return index of new element. | ||
223 | */ | ||
224 | static int addtoktable (lua_State *L, int idx) { | ||
225 | if (idx == 0 || lua_isnil(L, idx)) /* no actual value to insert? */ | ||
226 | return 0; | ||
227 | else { | ||
228 | int n; | ||
229 | lua_getfenv(L, -1); /* get ktable from pattern */ | ||
230 | n = lua_objlen(L, -1); | ||
231 | if (n == 0) { /* is it empty/non-existent? */ | ||
232 | lua_pop(L, 1); /* remove it */ | ||
233 | lua_createtable(L, 1, 0); /* create a fresh table */ | ||
234 | } | ||
235 | lua_pushvalue(L, idx); /* element to be added */ | ||
236 | lua_rawseti(L, -2, n + 1); | ||
237 | lua_setfenv(L, -2); /* set it as ktable for pattern */ | ||
238 | return n + 1; | ||
239 | } | ||
240 | } | ||
241 | |||
242 | |||
243 | /* | ||
244 | ** Build a sequence of 'n' nodes, each with tag 'tag' and 'u.n' got | ||
245 | ** from the array 's' (or 0 if array is NULL). (TSeq is binary, so it | ||
246 | ** must build a sequence of sequence of sequence...) | ||
247 | */ | ||
248 | static void fillseq (TTree *tree, int tag, int n, const char *s) { | ||
249 | int i; | ||
250 | for (i = 0; i < n - 1; i++) { /* initial n-1 copies of Seq tag; Seq ... */ | ||
251 | tree->tag = TSeq; tree->u.ps = 2; | ||
252 | sib1(tree)->tag = tag; | ||
253 | sib1(tree)->u.n = s ? (byte)s[i] : 0; | ||
254 | tree = sib2(tree); | ||
255 | } | ||
256 | tree->tag = tag; /* last one does not need TSeq */ | ||
257 | tree->u.n = s ? (byte)s[i] : 0; | ||
258 | } | ||
259 | |||
260 | |||
261 | /* | ||
262 | ** Numbers as patterns: | ||
263 | ** 0 == true (always match); n == TAny repeated 'n' times; | ||
264 | ** -n == not (TAny repeated 'n' times) | ||
265 | */ | ||
266 | static TTree *numtree (lua_State *L, int n) { | ||
267 | if (n == 0) | ||
268 | return newleaf(L, TTrue); | ||
269 | else { | ||
270 | TTree *tree, *nd; | ||
271 | if (n > 0) | ||
272 | tree = nd = newtree(L, 2 * n - 1); | ||
273 | else { /* negative: code it as !(-n) */ | ||
274 | n = -n; | ||
275 | tree = newtree(L, 2 * n); | ||
276 | tree->tag = TNot; | ||
277 | nd = sib1(tree); | ||
278 | } | ||
279 | fillseq(nd, TAny, n, NULL); /* sequence of 'n' any's */ | ||
280 | return tree; | ||
281 | } | ||
282 | } | ||
283 | |||
284 | |||
285 | /* | ||
286 | ** Convert value at index 'idx' to a pattern | ||
287 | */ | ||
288 | static TTree *getpatt (lua_State *L, int idx, int *len) { | ||
289 | TTree *tree; | ||
290 | switch (lua_type(L, idx)) { | ||
291 | case LUA_TSTRING: { | ||
292 | size_t slen; | ||
293 | const char *s = lua_tolstring(L, idx, &slen); /* get string */ | ||
294 | if (slen == 0) /* empty? */ | ||
295 | tree = newleaf(L, TTrue); /* always match */ | ||
296 | else { | ||
297 | tree = newtree(L, 2 * (slen - 1) + 1); | ||
298 | fillseq(tree, TChar, slen, s); /* sequence of 'slen' chars */ | ||
299 | } | ||
300 | break; | ||
301 | } | ||
302 | case LUA_TNUMBER: { | ||
303 | int n = lua_tointeger(L, idx); | ||
304 | tree = numtree(L, n); | ||
305 | break; | ||
306 | } | ||
307 | case LUA_TBOOLEAN: { | ||
308 | tree = (lua_toboolean(L, idx) ? newleaf(L, TTrue) : newleaf(L, TFalse)); | ||
309 | break; | ||
310 | } | ||
311 | case LUA_TTABLE: { | ||
312 | tree = newgrammar(L, idx); | ||
313 | break; | ||
314 | } | ||
315 | case LUA_TFUNCTION: { | ||
316 | tree = newtree(L, 2); | ||
317 | tree->tag = TRunTime; | ||
318 | tree->key = addtoktable(L, idx); | ||
319 | sib1(tree)->tag = TTrue; | ||
320 | break; | ||
321 | } | ||
322 | default: { | ||
323 | return gettree(L, idx, len); | ||
324 | } | ||
325 | } | ||
326 | lua_replace(L, idx); /* put new tree into 'idx' slot */ | ||
327 | if (len) | ||
328 | *len = getsize(L, idx); | ||
329 | return tree; | ||
330 | } | ||
331 | |||
332 | |||
333 | /* | ||
334 | ** Return the number of elements in the ktable of pattern at 'idx'. | ||
335 | ** In Lua 5.2, default "environment" for patterns is nil, not | ||
336 | ** a table. Treat it as an empty table. In Lua 5.1, assumes that | ||
337 | ** the environment has no numeric indices (len == 0) | ||
338 | */ | ||
339 | static int ktablelen (lua_State *L, int idx) { | ||
340 | if (!lua_istable(L, idx)) return 0; | ||
341 | else return lua_objlen(L, idx); | ||
342 | } | ||
343 | |||
344 | |||
345 | /* | ||
346 | ** Concatentate the contents of table 'idx1' into table 'idx2'. | ||
347 | ** (Assume that both indices are negative.) | ||
348 | ** Return the original length of table 'idx2' | ||
349 | */ | ||
350 | static int concattable (lua_State *L, int idx1, int idx2) { | ||
351 | int i; | ||
352 | int n1 = ktablelen(L, idx1); | ||
353 | int n2 = ktablelen(L, idx2); | ||
354 | if (n1 == 0) return 0; /* nothing to correct */ | ||
355 | for (i = 1; i <= n1; i++) { | ||
356 | lua_rawgeti(L, idx1, i); | ||
357 | lua_rawseti(L, idx2 - 1, n2 + i); /* correct 'idx2' */ | ||
358 | } | ||
359 | return n2; | ||
360 | } | ||
361 | |||
362 | |||
363 | /* | ||
364 | ** Make a merge of ktables from p1 and p2 the ktable for the new | ||
365 | ** pattern at the top of the stack. | ||
366 | */ | ||
367 | static int joinktables (lua_State *L, int p1, int p2) { | ||
368 | int n1, n2; | ||
369 | lua_getfenv(L, p1); /* get ktables */ | ||
370 | lua_getfenv(L, p2); | ||
371 | n1 = ktablelen(L, -2); | ||
372 | n2 = ktablelen(L, -1); | ||
373 | if (n1 == 0 && n2 == 0) { /* are both tables empty? */ | ||
374 | lua_pop(L, 2); /* nothing to be done; pop tables */ | ||
375 | return 0; /* nothing to correct */ | ||
376 | } | ||
377 | if (n2 == 0 || lua_equal(L, -2, -1)) { /* second table is empty or equal? */ | ||
378 | lua_pop(L, 1); /* pop 2nd table */ | ||
379 | lua_setfenv(L, -2); /* set 1st ktable into new pattern */ | ||
380 | return 0; /* nothing to correct */ | ||
381 | } | ||
382 | if (n1 == 0) { /* first table is empty? */ | ||
383 | lua_setfenv(L, -3); /* set 2nd table into new pattern */ | ||
384 | lua_pop(L, 1); /* pop 1st table */ | ||
385 | return 0; /* nothing to correct */ | ||
386 | } | ||
387 | else { | ||
388 | lua_createtable(L, n1 + n2, 0); /* create ktable for new pattern */ | ||
389 | /* stack: new p; ktable p1; ktable p2; new ktable */ | ||
390 | concattable(L, -3, -1); /* from p1 into new ktable */ | ||
391 | concattable(L, -2, -1); /* from p2 into new ktable */ | ||
392 | lua_setfenv(L, -4); /* new ktable becomes p env */ | ||
393 | lua_pop(L, 2); /* pop other ktables */ | ||
394 | return n1; /* correction for indices from p2 */ | ||
395 | } | ||
396 | } | ||
397 | |||
398 | |||
399 | static void correctkeys (TTree *tree, int n) { | ||
400 | if (n == 0) return; /* no correction? */ | ||
401 | tailcall: | ||
402 | switch (tree->tag) { | ||
403 | case TOpenCall: case TCall: case TRunTime: case TRule: { | ||
404 | if (tree->key > 0) | ||
405 | tree->key += n; | ||
406 | break; | ||
407 | } | ||
408 | case TCapture: { | ||
409 | if (tree->key > 0 && tree->cap != Carg && tree->cap != Cnum) | ||
410 | tree->key += n; | ||
411 | break; | ||
412 | } | ||
413 | default: break; | ||
414 | } | ||
415 | switch (numsiblings[tree->tag]) { | ||
416 | case 1: /* correctkeys(sib1(tree), n); */ | ||
417 | tree = sib1(tree); goto tailcall; | ||
418 | case 2: | ||
419 | correctkeys(sib1(tree), n); | ||
420 | tree = sib2(tree); goto tailcall; /* correctkeys(sib2(tree), n); */ | ||
421 | default: assert(numsiblings[tree->tag] == 0); break; | ||
422 | } | ||
423 | } | ||
424 | |||
425 | |||
426 | /* | ||
427 | ** copy 'ktable' of element 'idx' to new tree (on top of stack) | ||
428 | */ | ||
429 | static void copyktable (lua_State *L, int idx) { | ||
430 | lua_getfenv(L, idx); | ||
431 | lua_setfenv(L, -2); | ||
432 | } | ||
433 | |||
434 | |||
435 | /* | ||
436 | ** merge 'ktable' from rule at stack index 'idx' into 'ktable' | ||
437 | ** from tree at the top of the stack, and correct corresponding | ||
438 | ** tree. | ||
439 | */ | ||
440 | static void mergektable (lua_State *L, int idx, TTree *rule) { | ||
441 | int n; | ||
442 | lua_getfenv(L, -1); /* get ktables */ | ||
443 | lua_getfenv(L, idx); | ||
444 | n = concattable(L, -1, -2); | ||
445 | lua_pop(L, 2); /* remove both ktables */ | ||
446 | correctkeys(rule, n); | ||
447 | } | ||
448 | |||
449 | |||
450 | /* | ||
451 | ** create a new tree, whith a new root and one sibling. | ||
452 | ** Sibling must be on the Lua stack, at index 1. | ||
453 | */ | ||
454 | static TTree *newroot1sib (lua_State *L, int tag) { | ||
455 | int s1; | ||
456 | TTree *tree1 = getpatt(L, 1, &s1); | ||
457 | TTree *tree = newtree(L, 1 + s1); /* create new tree */ | ||
458 | tree->tag = tag; | ||
459 | memcpy(sib1(tree), tree1, s1 * sizeof(TTree)); | ||
460 | copyktable(L, 1); | ||
461 | return tree; | ||
462 | } | ||
463 | |||
464 | |||
465 | /* | ||
466 | ** create a new tree, whith a new root and 2 siblings. | ||
467 | ** Siblings must be on the Lua stack, first one at index 1. | ||
468 | */ | ||
469 | static TTree *newroot2sib (lua_State *L, int tag) { | ||
470 | int s1, s2; | ||
471 | TTree *tree1 = getpatt(L, 1, &s1); | ||
472 | TTree *tree2 = getpatt(L, 2, &s2); | ||
473 | TTree *tree = newtree(L, 1 + s1 + s2); /* create new tree */ | ||
474 | tree->tag = tag; | ||
475 | tree->u.ps = 1 + s1; | ||
476 | memcpy(sib1(tree), tree1, s1 * sizeof(TTree)); | ||
477 | memcpy(sib2(tree), tree2, s2 * sizeof(TTree)); | ||
478 | correctkeys(sib2(tree), joinktables(L, 1, 2)); | ||
479 | return tree; | ||
480 | } | ||
481 | |||
482 | |||
483 | static int lp_P (lua_State *L) { | ||
484 | luaL_checkany(L, 1); | ||
485 | getpatt(L, 1, NULL); | ||
486 | lua_settop(L, 1); | ||
487 | return 1; | ||
488 | } | ||
489 | |||
490 | |||
491 | /* | ||
492 | ** sequence operator; optimizations: | ||
493 | ** false x => false, x true => x, true x => x | ||
494 | ** (cannot do x . false => false because x may have runtime captures) | ||
495 | */ | ||
496 | static int lp_seq (lua_State *L) { | ||
497 | TTree *tree1 = getpatt(L, 1, NULL); | ||
498 | TTree *tree2 = getpatt(L, 2, NULL); | ||
499 | if (tree1->tag == TFalse || tree2->tag == TTrue) | ||
500 | lua_pushvalue(L, 1); /* false . x == false, x . true = x */ | ||
501 | else if (tree1->tag == TTrue) | ||
502 | lua_pushvalue(L, 2); /* true . x = x */ | ||
503 | else | ||
504 | newroot2sib(L, TSeq); | ||
505 | return 1; | ||
506 | } | ||
507 | |||
508 | |||
509 | /* | ||
510 | ** choice operator; optimizations: | ||
511 | ** charset / charset => charset | ||
512 | ** true / x => true, x / false => x, false / x => x | ||
513 | ** (x / true is not equivalent to true) | ||
514 | */ | ||
515 | static int lp_choice (lua_State *L) { | ||
516 | Charset st1, st2; | ||
517 | TTree *t1 = getpatt(L, 1, NULL); | ||
518 | TTree *t2 = getpatt(L, 2, NULL); | ||
519 | if (tocharset(t1, &st1) && tocharset(t2, &st2)) { | ||
520 | TTree *t = newcharset(L); | ||
521 | loopset(i, treebuffer(t)[i] = st1.cs[i] | st2.cs[i]); | ||
522 | } | ||
523 | else if (nofail(t1) || t2->tag == TFalse) | ||
524 | lua_pushvalue(L, 1); /* true / x => true, x / false => x */ | ||
525 | else if (t1->tag == TFalse) | ||
526 | lua_pushvalue(L, 2); /* false / x => x */ | ||
527 | else | ||
528 | newroot2sib(L, TChoice); | ||
529 | return 1; | ||
530 | } | ||
531 | |||
532 | |||
533 | /* | ||
534 | ** p^n | ||
535 | */ | ||
536 | static int lp_star (lua_State *L) { | ||
537 | int size1; | ||
538 | int n = luaL_checkint(L, 2); | ||
539 | TTree *tree1 = gettree(L, 1, &size1); | ||
540 | if (n >= 0) { /* seq tree1 (seq tree1 ... (seq tree1 (rep tree1))) */ | ||
541 | TTree *tree = newtree(L, (n + 1) * (size1 + 1)); | ||
542 | if (nullable(tree1)) | ||
543 | luaL_error(L, "loop body may accept empty string"); | ||
544 | while (n--) /* repeat 'n' times */ | ||
545 | tree = seqaux(tree, tree1, size1); | ||
546 | tree->tag = TRep; | ||
547 | memcpy(sib1(tree), tree1, size1 * sizeof(TTree)); | ||
548 | } | ||
549 | else { /* choice (seq tree1 ... choice tree1 true ...) true */ | ||
550 | TTree *tree; | ||
551 | n = -n; | ||
552 | /* size = (choice + seq + tree1 + true) * n, but the last has no seq */ | ||
553 | tree = newtree(L, n * (size1 + 3) - 1); | ||
554 | for (; n > 1; n--) { /* repeat (n - 1) times */ | ||
555 | tree->tag = TChoice; tree->u.ps = n * (size1 + 3) - 2; | ||
556 | sib2(tree)->tag = TTrue; | ||
557 | tree = sib1(tree); | ||
558 | tree = seqaux(tree, tree1, size1); | ||
559 | } | ||
560 | tree->tag = TChoice; tree->u.ps = size1 + 1; | ||
561 | sib2(tree)->tag = TTrue; | ||
562 | memcpy(sib1(tree), tree1, size1 * sizeof(TTree)); | ||
563 | } | ||
564 | copyktable(L, 1); | ||
565 | return 1; | ||
566 | } | ||
567 | |||
568 | |||
569 | /* | ||
570 | ** #p == &p | ||
571 | */ | ||
572 | static int lp_and (lua_State *L) { | ||
573 | newroot1sib(L, TAnd); | ||
574 | return 1; | ||
575 | } | ||
576 | |||
577 | |||
578 | /* | ||
579 | ** -p == !p | ||
580 | */ | ||
581 | static int lp_not (lua_State *L) { | ||
582 | newroot1sib(L, TNot); | ||
583 | return 1; | ||
584 | } | ||
585 | |||
586 | |||
587 | /* | ||
588 | ** [t1 - t2] == Seq (Not t2) t1 | ||
589 | ** If t1 and t2 are charsets, make their difference. | ||
590 | */ | ||
591 | static int lp_sub (lua_State *L) { | ||
592 | Charset st1, st2; | ||
593 | int s1, s2; | ||
594 | TTree *t1 = getpatt(L, 1, &s1); | ||
595 | TTree *t2 = getpatt(L, 2, &s2); | ||
596 | if (tocharset(t1, &st1) && tocharset(t2, &st2)) { | ||
597 | TTree *t = newcharset(L); | ||
598 | loopset(i, treebuffer(t)[i] = st1.cs[i] & ~st2.cs[i]); | ||
599 | } | ||
600 | else { | ||
601 | TTree *tree = newtree(L, 2 + s1 + s2); | ||
602 | tree->tag = TSeq; /* sequence of... */ | ||
603 | tree->u.ps = 2 + s2; | ||
604 | sib1(tree)->tag = TNot; /* ...not... */ | ||
605 | memcpy(sib1(sib1(tree)), t2, s2 * sizeof(TTree)); /* ...t2 */ | ||
606 | memcpy(sib2(tree), t1, s1 * sizeof(TTree)); /* ... and t1 */ | ||
607 | correctkeys(sib1(tree), joinktables(L, 1, 2)); | ||
608 | } | ||
609 | return 1; | ||
610 | } | ||
611 | |||
612 | |||
613 | static int lp_set (lua_State *L) { | ||
614 | size_t l; | ||
615 | const char *s = luaL_checklstring(L, 1, &l); | ||
616 | TTree *tree = newcharset(L); | ||
617 | while (l--) { | ||
618 | setchar(treebuffer(tree), (byte)(*s)); | ||
619 | s++; | ||
620 | } | ||
621 | return 1; | ||
622 | } | ||
623 | |||
624 | |||
625 | static int lp_range (lua_State *L) { | ||
626 | int arg; | ||
627 | int top = lua_gettop(L); | ||
628 | TTree *tree = newcharset(L); | ||
629 | for (arg = 1; arg <= top; arg++) { | ||
630 | int c; | ||
631 | size_t l; | ||
632 | const char *r = luaL_checklstring(L, arg, &l); | ||
633 | luaL_argcheck(L, l == 2, arg, "range must have two characters"); | ||
634 | for (c = (byte)r[0]; c <= (byte)r[1]; c++) | ||
635 | setchar(treebuffer(tree), c); | ||
636 | } | ||
637 | return 1; | ||
638 | } | ||
639 | |||
640 | |||
641 | /* | ||
642 | ** Look-behind predicate | ||
643 | */ | ||
644 | static int lp_behind (lua_State *L) { | ||
645 | TTree *tree; | ||
646 | TTree *tree1 = getpatt(L, 1, NULL); | ||
647 | int n = fixedlen(tree1); | ||
648 | luaL_argcheck(L, !hascaptures(tree1), 1, "pattern have captures"); | ||
649 | luaL_argcheck(L, n > 0, 1, "pattern may not have fixed length"); | ||
650 | luaL_argcheck(L, n <= MAXBEHIND, 1, "pattern too long to look behind"); | ||
651 | tree = newroot1sib(L, TBehind); | ||
652 | tree->u.n = n; | ||
653 | return 1; | ||
654 | } | ||
655 | |||
656 | |||
657 | /* labeled failure begin */ | ||
658 | /* | ||
659 | ** Throws a label or a set of labels | ||
660 | */ | ||
661 | static int lp_throw (lua_State *L) { | ||
662 | int n = lua_gettop(L); | ||
663 | Labelset ls = 0; | ||
664 | int i; | ||
665 | for (i = 1; i <= n; i++) { | ||
666 | int d = luaL_checkint(L, i); | ||
667 | luaL_argcheck(L, d >= 0 && d < (int)MAXLABELS, i, "invalid label index"); | ||
668 | setlabel(ls, d); | ||
669 | } | ||
670 | newlabelleaf(L, ls); | ||
671 | return 1; | ||
672 | } | ||
673 | |||
674 | /* | ||
675 | ** labeled choice function | ||
676 | */ | ||
677 | static int lp_labchoice (lua_State *L) { | ||
678 | TTree *tree; | ||
679 | int n = lua_gettop(L); | ||
680 | int i; | ||
681 | Labelset ls = 0; | ||
682 | for (i = 3; i <= n; i++) { | ||
683 | int d = luaL_checkint(L, i); | ||
684 | luaL_argcheck(L, d >= 0 && d < (int)MAXLABELS, i, "invalid label index"); | ||
685 | setlabel(ls, d); | ||
686 | } | ||
687 | tree = newroot2sib(L, TLabChoice); | ||
688 | tree->labels = ls; | ||
689 | return 1; | ||
690 | } | ||
691 | /* labeled failure end */ | ||
692 | |||
693 | |||
694 | /* | ||
695 | ** Create a non-terminal | ||
696 | */ | ||
697 | static int lp_V (lua_State *L) { | ||
698 | TTree *tree = newleaf(L, TOpenCall); | ||
699 | luaL_argcheck(L, !lua_isnoneornil(L, 1), 1, "non-nil value expected"); | ||
700 | tree->key = addtoktable(L, 1); | ||
701 | return 1; | ||
702 | } | ||
703 | |||
704 | |||
705 | /* | ||
706 | ** Create a tree for a non-empty capture, with a body and | ||
707 | ** optionally with an associated Lua value (at index 'labelidx' in the | ||
708 | ** stack) | ||
709 | */ | ||
710 | static int capture_aux (lua_State *L, int cap, int labelidx) { | ||
711 | TTree *tree = newroot1sib(L, TCapture); | ||
712 | tree->cap = cap; | ||
713 | tree->key = addtoktable(L, labelidx); | ||
714 | return 1; | ||
715 | } | ||
716 | |||
717 | |||
718 | /* | ||
719 | ** Fill a tree with an empty capture, using an empty (TTrue) sibling. | ||
720 | */ | ||
721 | static TTree *auxemptycap (lua_State *L, TTree *tree, int cap, int idx) { | ||
722 | tree->tag = TCapture; | ||
723 | tree->cap = cap; | ||
724 | tree->key = addtoktable(L, idx); | ||
725 | sib1(tree)->tag = TTrue; | ||
726 | return tree; | ||
727 | } | ||
728 | |||
729 | |||
730 | /* | ||
731 | ** Create a tree for an empty capture | ||
732 | */ | ||
733 | static TTree *newemptycap (lua_State *L, int cap, int idx) { | ||
734 | return auxemptycap(L, newtree(L, 2), cap, idx); | ||
735 | } | ||
736 | |||
737 | |||
738 | /* | ||
739 | ** Captures with syntax p / v | ||
740 | ** (function capture, query capture, string capture, or number capture) | ||
741 | */ | ||
742 | static int lp_divcapture (lua_State *L) { | ||
743 | switch (lua_type(L, 2)) { | ||
744 | case LUA_TFUNCTION: return capture_aux(L, Cfunction, 2); | ||
745 | case LUA_TTABLE: return capture_aux(L, Cquery, 2); | ||
746 | case LUA_TSTRING: return capture_aux(L, Cstring, 2); | ||
747 | case LUA_TNUMBER: { | ||
748 | int n = lua_tointeger(L, 2); | ||
749 | TTree *tree = newroot1sib(L, TCapture); | ||
750 | luaL_argcheck(L, 0 <= n && n <= SHRT_MAX, 1, "invalid number"); | ||
751 | tree->cap = Cnum; | ||
752 | tree->key = n; | ||
753 | return 1; | ||
754 | } | ||
755 | default: return luaL_argerror(L, 2, "invalid replacement value"); | ||
756 | } | ||
757 | } | ||
758 | |||
759 | |||
760 | static int lp_substcapture (lua_State *L) { | ||
761 | return capture_aux(L, Csubst, 0); | ||
762 | } | ||
763 | |||
764 | |||
765 | static int lp_tablecapture (lua_State *L) { | ||
766 | return capture_aux(L, Ctable, 0); | ||
767 | } | ||
768 | |||
769 | |||
770 | static int lp_groupcapture (lua_State *L) { | ||
771 | if (lua_isnoneornil(L, 2)) | ||
772 | return capture_aux(L, Cgroup, 0); | ||
773 | else { | ||
774 | luaL_checkstring(L, 2); | ||
775 | return capture_aux(L, Cgroup, 2); | ||
776 | } | ||
777 | } | ||
778 | |||
779 | |||
780 | static int lp_foldcapture (lua_State *L) { | ||
781 | luaL_checktype(L, 2, LUA_TFUNCTION); | ||
782 | return capture_aux(L, Cfold, 2); | ||
783 | } | ||
784 | |||
785 | |||
786 | static int lp_simplecapture (lua_State *L) { | ||
787 | return capture_aux(L, Csimple, 0); | ||
788 | } | ||
789 | |||
790 | |||
791 | static int lp_poscapture (lua_State *L) { | ||
792 | newemptycap(L, Cposition, 0); | ||
793 | return 1; | ||
794 | } | ||
795 | |||
796 | |||
797 | static int lp_argcapture (lua_State *L) { | ||
798 | int n = luaL_checkint(L, 1); | ||
799 | TTree *tree = newemptycap(L, Carg, 0); | ||
800 | tree->key = n; | ||
801 | luaL_argcheck(L, 0 < n && n <= SHRT_MAX, 1, "invalid argument index"); | ||
802 | return 1; | ||
803 | } | ||
804 | |||
805 | |||
806 | static int lp_backref (lua_State *L) { | ||
807 | luaL_checkstring(L, 1); | ||
808 | newemptycap(L, Cbackref, 1); | ||
809 | return 1; | ||
810 | } | ||
811 | |||
812 | |||
813 | /* | ||
814 | ** Constant capture | ||
815 | */ | ||
816 | static int lp_constcapture (lua_State *L) { | ||
817 | int i; | ||
818 | int n = lua_gettop(L); /* number of values */ | ||
819 | if (n == 0) /* no values? */ | ||
820 | newleaf(L, TTrue); /* no capture */ | ||
821 | else if (n == 1) | ||
822 | newemptycap(L, Cconst, 1); /* single constant capture */ | ||
823 | else { /* create a group capture with all values */ | ||
824 | TTree *tree = newtree(L, 1 + 3 * (n - 1) + 2); | ||
825 | tree->tag = TCapture; | ||
826 | tree->cap = Cgroup; | ||
827 | tree->key = 0; | ||
828 | tree = sib1(tree); | ||
829 | for (i = 1; i <= n - 1; i++) { | ||
830 | tree->tag = TSeq; | ||
831 | tree->u.ps = 3; /* skip TCapture and its sibling */ | ||
832 | auxemptycap(L, sib1(tree), Cconst, i); | ||
833 | tree = sib2(tree); | ||
834 | } | ||
835 | auxemptycap(L, tree, Cconst, i); | ||
836 | } | ||
837 | return 1; | ||
838 | } | ||
839 | |||
840 | |||
841 | static int lp_matchtime (lua_State *L) { | ||
842 | TTree *tree; | ||
843 | luaL_checktype(L, 2, LUA_TFUNCTION); | ||
844 | tree = newroot1sib(L, TRunTime); | ||
845 | tree->key = addtoktable(L, 2); | ||
846 | return 1; | ||
847 | } | ||
848 | |||
849 | /* }====================================================== */ | ||
850 | |||
851 | |||
852 | /* | ||
853 | ** {====================================================== | ||
854 | ** Grammar - Tree generation | ||
855 | ** ======================================================= | ||
856 | */ | ||
857 | |||
858 | /* | ||
859 | ** push on the stack the index and the pattern for the | ||
860 | ** initial rule of grammar at index 'arg' in the stack; | ||
861 | ** also add that index into position table. | ||
862 | */ | ||
863 | static void getfirstrule (lua_State *L, int arg, int postab) { | ||
864 | lua_rawgeti(L, arg, 1); /* access first element */ | ||
865 | if (lua_isstring(L, -1)) { /* is it the name of initial rule? */ | ||
866 | lua_pushvalue(L, -1); /* duplicate it to use as key */ | ||
867 | lua_gettable(L, arg); /* get associated rule */ | ||
868 | } | ||
869 | else { | ||
870 | lua_pushinteger(L, 1); /* key for initial rule */ | ||
871 | lua_insert(L, -2); /* put it before rule */ | ||
872 | } | ||
873 | if (!testpattern(L, -1)) { /* initial rule not a pattern? */ | ||
874 | if (lua_isnil(L, -1)) | ||
875 | luaL_error(L, "grammar has no initial rule"); | ||
876 | else | ||
877 | luaL_error(L, "initial rule '%s' is not a pattern", lua_tostring(L, -2)); | ||
878 | } | ||
879 | lua_pushvalue(L, -2); /* push key */ | ||
880 | lua_pushinteger(L, 1); /* push rule position (after TGrammar) */ | ||
881 | lua_settable(L, postab); /* insert pair at position table */ | ||
882 | } | ||
883 | |||
884 | /* | ||
885 | ** traverse grammar at index 'arg', pushing all its keys and patterns | ||
886 | ** into the stack. Create a new table (before all pairs key-pattern) to | ||
887 | ** collect all keys and their associated positions in the final tree | ||
888 | ** (the "position table"). | ||
889 | ** Return the number of rules and (in 'totalsize') the total size | ||
890 | ** for the new tree. | ||
891 | */ | ||
892 | static int collectrules (lua_State *L, int arg, int *totalsize) { | ||
893 | int n = 1; /* to count number of rules */ | ||
894 | int postab = lua_gettop(L) + 1; /* index of position table */ | ||
895 | int size; /* accumulator for total size */ | ||
896 | lua_newtable(L); /* create position table */ | ||
897 | getfirstrule(L, arg, postab); | ||
898 | size = 2 + getsize(L, postab + 2); /* TGrammar + TRule + rule */ | ||
899 | lua_pushnil(L); /* prepare to traverse grammar table */ | ||
900 | while (lua_next(L, arg) != 0) { | ||
901 | if (lua_tonumber(L, -2) == 1 || | ||
902 | lua_equal(L, -2, postab + 1)) { /* initial rule? */ | ||
903 | lua_pop(L, 1); /* remove value (keep key for lua_next) */ | ||
904 | continue; | ||
905 | } | ||
906 | if (!testpattern(L, -1)) /* value is not a pattern? */ | ||
907 | luaL_error(L, "rule '%s' is not a pattern", val2str(L, -2)); | ||
908 | luaL_checkstack(L, LUA_MINSTACK, "grammar has too many rules"); | ||
909 | lua_pushvalue(L, -2); /* push key (to insert into position table) */ | ||
910 | lua_pushinteger(L, size); | ||
911 | lua_settable(L, postab); | ||
912 | size += 1 + getsize(L, -1); /* update size */ | ||
913 | lua_pushvalue(L, -2); /* push key (for next lua_next) */ | ||
914 | n++; | ||
915 | } | ||
916 | *totalsize = size + 1; /* TTrue to finish list of rules */ | ||
917 | return n; | ||
918 | } | ||
919 | |||
920 | |||
921 | static void buildgrammar (lua_State *L, TTree *grammar, int frule, int n) { | ||
922 | int i; | ||
923 | TTree *nd = sib1(grammar); /* auxiliary pointer to traverse the tree */ | ||
924 | for (i = 0; i < n; i++) { /* add each rule into new tree */ | ||
925 | int ridx = frule + 2*i + 1; /* index of i-th rule */ | ||
926 | int rulesize; | ||
927 | TTree *rn = gettree(L, ridx, &rulesize); | ||
928 | nd->tag = TRule; | ||
929 | nd->key = 0; | ||
930 | nd->cap = i; /* rule number */ | ||
931 | nd->u.ps = rulesize + 1; /* point to next rule */ | ||
932 | memcpy(sib1(nd), rn, rulesize * sizeof(TTree)); /* copy rule */ | ||
933 | mergektable(L, ridx, sib1(nd)); /* merge its ktable into new one */ | ||
934 | nd = sib2(nd); /* move to next rule */ | ||
935 | } | ||
936 | nd->tag = TTrue; /* finish list of rules */ | ||
937 | } | ||
938 | |||
939 | |||
940 | /* | ||
941 | ** Check whether a tree has potential infinite loops | ||
942 | */ | ||
943 | static int checkloops (TTree *tree) { | ||
944 | tailcall: | ||
945 | if (tree->tag == TRep && nullable(sib1(tree))) | ||
946 | return 1; | ||
947 | else if (tree->tag == TGrammar) | ||
948 | return 0; /* sub-grammars already checked */ | ||
949 | else { | ||
950 | switch (numsiblings[tree->tag]) { | ||
951 | case 1: /* return checkloops(sib1(tree)); */ | ||
952 | tree = sib1(tree); goto tailcall; | ||
953 | case 2: | ||
954 | if (checkloops(sib1(tree))) return 1; | ||
955 | /* else return checkloops(sib2(tree)); */ | ||
956 | tree = sib2(tree); goto tailcall; | ||
957 | default: assert(numsiblings[tree->tag] == 0); return 0; | ||
958 | } | ||
959 | } | ||
960 | } | ||
961 | |||
962 | |||
963 | static int verifyerror (lua_State *L, int *passed, int npassed) { | ||
964 | int i, j; | ||
965 | for (i = npassed - 1; i >= 0; i--) { /* search for a repetition */ | ||
966 | for (j = i - 1; j >= 0; j--) { | ||
967 | if (passed[i] == passed[j]) { | ||
968 | lua_rawgeti(L, -1, passed[i]); /* get rule's key */ | ||
969 | return luaL_error(L, "rule '%s' may be left recursive", val2str(L, -1)); | ||
970 | } | ||
971 | } | ||
972 | } | ||
973 | return luaL_error(L, "too many left calls in grammar"); | ||
974 | } | ||
975 | |||
976 | |||
977 | /* | ||
978 | ** Check whether a rule can be left recursive; raise an error in that | ||
979 | ** case; otherwise return 1 iff pattern is nullable. Assume ktable at | ||
980 | ** the top of the stack. | ||
981 | */ | ||
982 | static int verifyrule (lua_State *L, TTree *tree, int *passed, int npassed, | ||
983 | int nullable) { | ||
984 | tailcall: | ||
985 | switch (tree->tag) { | ||
986 | case TChar: case TSet: case TAny: | ||
987 | case TFalse: case TThrow: /* labeled failure */ | ||
988 | return nullable; /* cannot pass from here */ | ||
989 | case TTrue: | ||
990 | case TBehind: /* look-behind cannot have calls */ | ||
991 | return 1; | ||
992 | case TNot: case TAnd: case TRep: | ||
993 | /* return verifyrule(L, sib1(tree), passed, npassed, 1); */ | ||
994 | tree = sib1(tree); nullable = 1; goto tailcall; | ||
995 | case TCapture: case TRunTime: | ||
996 | /* return verifyrule(L, sib1(tree), passed, npassed); */ | ||
997 | tree = sib1(tree); goto tailcall; | ||
998 | case TCall: | ||
999 | /* return verifyrule(L, sib2(tree), passed, npassed); */ | ||
1000 | tree = sib2(tree); goto tailcall; | ||
1001 | case TSeq: /* only check 2nd child if first is nullable */ | ||
1002 | if (!verifyrule(L, sib1(tree), passed, npassed, 0)) | ||
1003 | return nullable; | ||
1004 | /* else return verifyrule(L, sib2(tree), passed, npassed); */ | ||
1005 | tree = sib2(tree); goto tailcall; | ||
1006 | case TChoice: case TLabChoice: /* must check both children */ /* labeled failure */ | ||
1007 | nullable = verifyrule(L, sib1(tree), passed, npassed, nullable); | ||
1008 | /* return verifyrule(L, sib2(tree), passed, npassed, nullable); */ | ||
1009 | tree = sib2(tree); goto tailcall; | ||
1010 | case TRule: | ||
1011 | if (npassed >= MAXRULES) | ||
1012 | return verifyerror(L, passed, npassed); | ||
1013 | else { | ||
1014 | passed[npassed++] = tree->key; | ||
1015 | /* return verifyrule(L, sib1(tree), passed, npassed); */ | ||
1016 | tree = sib1(tree); goto tailcall; | ||
1017 | } | ||
1018 | case TGrammar: | ||
1019 | return nullable(tree); /* sub-grammar cannot be left recursive */ | ||
1020 | default: assert(0); return 0; | ||
1021 | } | ||
1022 | } | ||
1023 | |||
1024 | |||
1025 | static void verifygrammar (lua_State *L, TTree *grammar) { | ||
1026 | int passed[MAXRULES]; | ||
1027 | TTree *rule; | ||
1028 | /* check left-recursive rules */ | ||
1029 | for (rule = sib1(grammar); rule->tag == TRule; rule = sib2(rule)) { | ||
1030 | if (rule->key == 0) continue; /* unused rule */ | ||
1031 | verifyrule(L, sib1(rule), passed, 0, 0); | ||
1032 | } | ||
1033 | assert(rule->tag == TTrue); | ||
1034 | /* check infinite loops inside rules */ | ||
1035 | for (rule = sib1(grammar); rule->tag == TRule; rule = sib2(rule)) { | ||
1036 | if (rule->key == 0) continue; /* unused rule */ | ||
1037 | if (checkloops(sib1(rule))) { | ||
1038 | lua_rawgeti(L, -1, rule->key); /* get rule's key */ | ||
1039 | luaL_error(L, "empty loop in rule '%s'", val2str(L, -1)); | ||
1040 | } | ||
1041 | } | ||
1042 | assert(rule->tag == TTrue); | ||
1043 | } | ||
1044 | |||
1045 | |||
1046 | /* | ||
1047 | ** Give a name for the initial rule if it is not referenced | ||
1048 | */ | ||
1049 | static void initialrulename (lua_State *L, TTree *grammar, int frule) { | ||
1050 | if (sib1(grammar)->key == 0) { /* initial rule is not referenced? */ | ||
1051 | int n = lua_objlen(L, -1) + 1; /* index for name */ | ||
1052 | lua_pushvalue(L, frule); /* rule's name */ | ||
1053 | lua_rawseti(L, -2, n); /* ktable was on the top of the stack */ | ||
1054 | sib1(grammar)->key = n; | ||
1055 | } | ||
1056 | } | ||
1057 | |||
1058 | |||
1059 | static TTree *newgrammar (lua_State *L, int arg) { | ||
1060 | int treesize; | ||
1061 | int frule = lua_gettop(L) + 2; /* position of first rule's key */ | ||
1062 | int n = collectrules(L, arg, &treesize); | ||
1063 | TTree *g = newtree(L, treesize); | ||
1064 | luaL_argcheck(L, n <= MAXRULES, arg, "grammar has too many rules"); | ||
1065 | g->tag = TGrammar; g->u.n = n; | ||
1066 | lua_newtable(L); /* create 'ktable' */ | ||
1067 | lua_setfenv(L, -2); | ||
1068 | buildgrammar(L, g, frule, n); | ||
1069 | lua_getfenv(L, -1); /* get 'ktable' for new tree */ | ||
1070 | finalfix(L, frule - 1, g, sib1(g)); | ||
1071 | initialrulename(L, g, frule); | ||
1072 | verifygrammar(L, g); | ||
1073 | lua_pop(L, 1); /* remove 'ktable' */ | ||
1074 | lua_insert(L, -(n * 2 + 2)); /* move new table to proper position */ | ||
1075 | lua_pop(L, n * 2 + 1); /* remove position table + rule pairs */ | ||
1076 | return g; /* new table at the top of the stack */ | ||
1077 | } | ||
1078 | |||
1079 | /* }====================================================== */ | ||
1080 | |||
1081 | |||
1082 | static Instruction *prepcompile (lua_State *L, Pattern *p, int idx) { | ||
1083 | lua_getfenv(L, idx); /* push 'ktable' (may be used by 'finalfix') */ | ||
1084 | finalfix(L, 0, NULL, p->tree); | ||
1085 | lua_pop(L, 1); /* remove 'ktable' */ | ||
1086 | return compile(L, p); | ||
1087 | } | ||
1088 | |||
1089 | |||
1090 | static int lp_printtree (lua_State *L) { | ||
1091 | TTree *tree = getpatt(L, 1, NULL); | ||
1092 | int c = lua_toboolean(L, 2); | ||
1093 | if (c) { | ||
1094 | lua_getfenv(L, 1); /* push 'ktable' (may be used by 'finalfix') */ | ||
1095 | finalfix(L, 0, NULL, tree); | ||
1096 | lua_pop(L, 1); /* remove 'ktable' */ | ||
1097 | } | ||
1098 | printktable(L, 1); | ||
1099 | printtree(tree, 0); | ||
1100 | return 0; | ||
1101 | } | ||
1102 | |||
1103 | |||
1104 | static int lp_printcode (lua_State *L) { | ||
1105 | Pattern *p = getpattern(L, 1); | ||
1106 | printktable(L, 1); | ||
1107 | if (p->code == NULL) /* not compiled yet? */ | ||
1108 | prepcompile(L, p, 1); | ||
1109 | printpatt(p->code, p->codesize); | ||
1110 | return 0; | ||
1111 | } | ||
1112 | |||
1113 | |||
1114 | /* | ||
1115 | ** Get the initial position for the match, interpreting negative | ||
1116 | ** values from the end of the subject | ||
1117 | */ | ||
1118 | static size_t initposition (lua_State *L, size_t len) { | ||
1119 | lua_Integer ii = luaL_optinteger(L, 3, 1); | ||
1120 | if (ii > 0) { /* positive index? */ | ||
1121 | if ((size_t)ii <= len) /* inside the string? */ | ||
1122 | return (size_t)ii - 1; /* return it (corrected to 0-base) */ | ||
1123 | else return len; /* crop at the end */ | ||
1124 | } | ||
1125 | else { /* negative index */ | ||
1126 | if ((size_t)(-ii) <= len) /* inside the string? */ | ||
1127 | return len - ((size_t)(-ii)); /* return position from the end */ | ||
1128 | else return 0; /* crop at the beginning */ | ||
1129 | } | ||
1130 | } | ||
1131 | |||
1132 | |||
1133 | /* | ||
1134 | ** Main match function | ||
1135 | */ | ||
1136 | static int lp_match (lua_State *L) { | ||
1137 | Capture capture[INITCAPSIZE]; | ||
1138 | const char *r; | ||
1139 | size_t l; | ||
1140 | Pattern *p = (getpatt(L, 1, NULL), getpattern(L, 1)); | ||
1141 | Instruction *code = (p->code != NULL) ? p->code : prepcompile(L, p, 1); | ||
1142 | const char *s = luaL_checklstring(L, SUBJIDX, &l); | ||
1143 | size_t i = initposition(L, l); | ||
1144 | int ptop = lua_gettop(L); | ||
1145 | lua_pushnil(L); /* initialize subscache */ | ||
1146 | lua_pushlightuserdata(L, capture); /* initialize caplistidx */ | ||
1147 | lua_getfenv(L, 1); /* initialize penvidx */ | ||
1148 | r = match(L, s, s + i, s + l, code, capture, ptop); | ||
1149 | if (r == NULL) { | ||
1150 | lua_pushnil(L); | ||
1151 | return 1; | ||
1152 | } | ||
1153 | return getcaptures(L, s, r, ptop); | ||
1154 | } | ||
1155 | |||
1156 | |||
1157 | |||
1158 | /* | ||
1159 | ** {====================================================== | ||
1160 | ** Library creation and functions not related to matching | ||
1161 | ** ======================================================= | ||
1162 | */ | ||
1163 | |||
1164 | static int lp_setmax (lua_State *L) { | ||
1165 | luaL_optinteger(L, 1, -1); | ||
1166 | lua_settop(L, 1); | ||
1167 | lua_setfield(L, LUA_REGISTRYINDEX, MAXSTACKIDX); | ||
1168 | return 0; | ||
1169 | } | ||
1170 | |||
1171 | |||
1172 | static int lp_version (lua_State *L) { | ||
1173 | lua_pushstring(L, VERSION); | ||
1174 | return 1; | ||
1175 | } | ||
1176 | |||
1177 | |||
1178 | static int lp_type (lua_State *L) { | ||
1179 | if (testpattern(L, 1)) | ||
1180 | lua_pushliteral(L, "pattern"); | ||
1181 | else | ||
1182 | lua_pushnil(L); | ||
1183 | return 1; | ||
1184 | } | ||
1185 | |||
1186 | |||
1187 | int lp_gc (lua_State *L) { | ||
1188 | Pattern *p = getpattern(L, 1); | ||
1189 | if (p->codesize > 0) | ||
1190 | reallocprog(L, p, 0); | ||
1191 | return 0; | ||
1192 | } | ||
1193 | |||
1194 | |||
1195 | static void createcat (lua_State *L, const char *catname, int (catf) (int)) { | ||
1196 | TTree *t = newcharset(L); | ||
1197 | int i; | ||
1198 | for (i = 0; i <= UCHAR_MAX; i++) | ||
1199 | if (catf(i)) setchar(treebuffer(t), i); | ||
1200 | lua_setfield(L, -2, catname); | ||
1201 | } | ||
1202 | |||
1203 | |||
1204 | static int lp_locale (lua_State *L) { | ||
1205 | if (lua_isnoneornil(L, 1)) { | ||
1206 | lua_settop(L, 0); | ||
1207 | lua_createtable(L, 0, 12); | ||
1208 | } | ||
1209 | else { | ||
1210 | luaL_checktype(L, 1, LUA_TTABLE); | ||
1211 | lua_settop(L, 1); | ||
1212 | } | ||
1213 | createcat(L, "alnum", isalnum); | ||
1214 | createcat(L, "alpha", isalpha); | ||
1215 | createcat(L, "cntrl", iscntrl); | ||
1216 | createcat(L, "digit", isdigit); | ||
1217 | createcat(L, "graph", isgraph); | ||
1218 | createcat(L, "lower", islower); | ||
1219 | createcat(L, "print", isprint); | ||
1220 | createcat(L, "punct", ispunct); | ||
1221 | createcat(L, "space", isspace); | ||
1222 | createcat(L, "upper", isupper); | ||
1223 | createcat(L, "xdigit", isxdigit); | ||
1224 | return 1; | ||
1225 | } | ||
1226 | |||
1227 | |||
1228 | static struct luaL_Reg pattreg[] = { | ||
1229 | {"ptree", lp_printtree}, | ||
1230 | {"pcode", lp_printcode}, | ||
1231 | {"match", lp_match}, | ||
1232 | {"B", lp_behind}, | ||
1233 | {"V", lp_V}, | ||
1234 | {"C", lp_simplecapture}, | ||
1235 | {"Cc", lp_constcapture}, | ||
1236 | {"Cmt", lp_matchtime}, | ||
1237 | {"Cb", lp_backref}, | ||
1238 | {"Carg", lp_argcapture}, | ||
1239 | {"Cp", lp_poscapture}, | ||
1240 | {"Cs", lp_substcapture}, | ||
1241 | {"Ct", lp_tablecapture}, | ||
1242 | {"Cf", lp_foldcapture}, | ||
1243 | {"Cg", lp_groupcapture}, | ||
1244 | {"P", lp_P}, | ||
1245 | {"S", lp_set}, | ||
1246 | {"R", lp_range}, | ||
1247 | {"locale", lp_locale}, | ||
1248 | {"version", lp_version}, | ||
1249 | {"setmaxstack", lp_setmax}, | ||
1250 | {"type", lp_type}, | ||
1251 | {"T", lp_throw}, /* labeled failure throw */ | ||
1252 | {"Lc", lp_labchoice}, /* labeled failure choice */ | ||
1253 | {NULL, NULL} | ||
1254 | }; | ||
1255 | |||
1256 | |||
1257 | static struct luaL_Reg metareg[] = { | ||
1258 | {"__mul", lp_seq}, | ||
1259 | {"__add", lp_choice}, | ||
1260 | {"__pow", lp_star}, | ||
1261 | {"__gc", lp_gc}, | ||
1262 | {"__len", lp_and}, | ||
1263 | {"__div", lp_divcapture}, | ||
1264 | {"__unm", lp_not}, | ||
1265 | {"__sub", lp_sub}, | ||
1266 | {NULL, NULL} | ||
1267 | }; | ||
1268 | |||
1269 | |||
1270 | int luaopen_lpeglabel (lua_State *L); | ||
1271 | int luaopen_lpeglabel (lua_State *L) { | ||
1272 | luaL_newmetatable(L, PATTERN_T); | ||
1273 | lua_pushnumber(L, MAXBACK); /* initialize maximum backtracking */ | ||
1274 | lua_setfield(L, LUA_REGISTRYINDEX, MAXSTACKIDX); | ||
1275 | luaL_register(L, NULL, metareg); | ||
1276 | luaL_register(L, "lpeglabel", pattreg); | ||
1277 | lua_pushvalue(L, -1); | ||
1278 | lua_setfield(L, -3, "__index"); | ||
1279 | return 1; | ||
1280 | } | ||
1281 | |||
1282 | /* }====================================================== */ | ||