diff options
author | Alexey Romanoff <ar@logiceditor.com> | 2021-08-15 15:17:28 +0300 |
---|---|---|
committer | Alexey Romanoff <ar@logiceditor.com> | 2021-08-16 16:35:07 +0300 |
commit | 7668d9076da6e2ae9e220eafb5c6ee1e933c088d (patch) | |
tree | c2d8e3bdd0d6d189572bd50d8d9a078097fcc3b5 /lpltree.c | |
parent | 635a51b5c88e958c27b26e049c639ea774946367 (diff) | |
download | lpeglabel-7668d9076da6e2ae9e220eafb5c6ee1e933c088d.tar.gz lpeglabel-7668d9076da6e2ae9e220eafb5c6ee1e933c088d.tar.bz2 lpeglabel-7668d9076da6e2ae9e220eafb5c6ee1e933c088d.zip |
change prefix of source files from 'lp' to 'lpl'
To eliminate conflicts with the original LPeg:
> $ lua -e "require 'lpeg'; require 'lpeglabel'; for _ in (require'lxsh').lexers.lua.gmatch('1') do end"
lua: lpvm.c:347: match: Assertion `stack > getstackbase(L, ptop) && (stack - 1)->s != NULL' failed.
Aborted
Diffstat (limited to 'lpltree.c')
-rw-r--r-- | lpltree.c | 1403 |
1 files changed, 1403 insertions, 0 deletions
diff --git a/lpltree.c b/lpltree.c new file mode 100644 index 0000000..e7c6638 --- /dev/null +++ b/lpltree.c | |||
@@ -0,0 +1,1403 @@ | |||
1 | /* | ||
2 | ** $Id: lpltree.c $ | ||
3 | ** Copyright 2013, Lua.org & PUC-Rio (see 'lpeg.html' for license) | ||
4 | */ | ||
5 | |||
6 | #include <ctype.h> | ||
7 | #include <limits.h> | ||
8 | #include <string.h> | ||
9 | |||
10 | |||
11 | #include "lua.h" | ||
12 | #include "lauxlib.h" | ||
13 | |||
14 | #include "lpltypes.h" | ||
15 | #include "lplcap.h" | ||
16 | #include "lplcode.h" | ||
17 | #include "lplprint.h" | ||
18 | #include "lpltree.h" | ||
19 | |||
20 | |||
21 | /* number of siblings for each tree */ | ||
22 | const byte numsiblings[] = { | ||
23 | 0, 0, 0, /* char, set, any */ | ||
24 | 0, 0, 0, /* true, false, utf-range */ | ||
25 | 1, /* rep */ | ||
26 | 2, 2, /* seq, choice */ | ||
27 | 1, 1, /* not, and */ | ||
28 | 0, 0, 2, 1, 1, /* call, opencall, rule, prerule, grammar */ | ||
29 | 1, /* behind */ | ||
30 | 1, 1, /* capture, runtime capture */ | ||
31 | 0 /* labeled failure throw */ | ||
32 | }; | ||
33 | |||
34 | |||
35 | static TTree *newgrammar (lua_State *L, int arg); | ||
36 | |||
37 | |||
38 | /* | ||
39 | ** returns a reasonable name for value at index 'idx' on the stack | ||
40 | */ | ||
41 | static const char *val2str (lua_State *L, int idx) { | ||
42 | const char *k = lua_tostring(L, idx); | ||
43 | if (k != NULL) | ||
44 | return lua_pushfstring(L, "%s", k); | ||
45 | else | ||
46 | return lua_pushfstring(L, "(a %s)", luaL_typename(L, idx)); | ||
47 | } | ||
48 | |||
49 | |||
50 | /* | ||
51 | ** Fix a TOpenCall into a TCall node, using table 'postable' to | ||
52 | ** translate a key to its rule address in the tree. Raises an | ||
53 | ** error if key does not exist. | ||
54 | */ | ||
55 | static void fixonecall (lua_State *L, int postable, TTree *g, TTree *t, byte tag) { /* labeled failure */ | ||
56 | int n; | ||
57 | lua_rawgeti(L, -1, t->key); /* get rule's name */ | ||
58 | lua_gettable(L, postable); /* query name in position table */ | ||
59 | n = lua_tonumber(L, -1); /* get (absolute) position */ | ||
60 | lua_pop(L, 1); /* remove position */ | ||
61 | if (tag == TOpenCall) { /* labeled failure */ | ||
62 | if (n == 0) { /* no position? */ | ||
63 | lua_rawgeti(L, -1, t->key); /* get rule's name again */ | ||
64 | luaL_error(L, "rule '%s' undefined in given grammar", val2str(L, -1)); | ||
65 | } | ||
66 | t->tag = TCall; | ||
67 | t->u.ps = n - (t - g); /* position relative to node */ | ||
68 | assert(sib2(t)->tag == TRule); | ||
69 | sib2(t)->key = t->key; /* fix rule's key */ | ||
70 | } else if (n != 0) { /* labeled failure */ | ||
71 | t->u.ps = n - (t - g); /* position relative to node */ | ||
72 | } | ||
73 | } | ||
74 | |||
75 | |||
76 | /* | ||
77 | ** Transform left associative constructions into right | ||
78 | ** associative ones, for sequence and choice; that is: | ||
79 | ** (t11 + t12) + t2 => t11 + (t12 + t2) | ||
80 | ** (t11 * t12) * t2 => t11 * (t12 * t2) | ||
81 | ** (that is, Op (Op t11 t12) t2 => Op t11 (Op t12 t2)) | ||
82 | */ | ||
83 | static void correctassociativity (TTree *tree) { | ||
84 | TTree *t1 = sib1(tree); | ||
85 | assert(tree->tag == TChoice || tree->tag == TSeq); | ||
86 | while (t1->tag == tree->tag) { | ||
87 | int n1size = tree->u.ps - 1; /* t1 == Op t11 t12 */ | ||
88 | int n11size = t1->u.ps - 1; | ||
89 | int n12size = n1size - n11size - 1; | ||
90 | memmove(sib1(tree), sib1(t1), n11size * sizeof(TTree)); /* move t11 */ | ||
91 | tree->u.ps = n11size + 1; | ||
92 | sib2(tree)->tag = tree->tag; | ||
93 | sib2(tree)->u.ps = n12size + 1; | ||
94 | } | ||
95 | } | ||
96 | |||
97 | |||
98 | /* | ||
99 | ** Make final adjustments in a tree. Fix open calls in tree 't', | ||
100 | ** making them refer to their respective rules or raising appropriate | ||
101 | ** errors (if not inside a grammar). Correct associativity of associative | ||
102 | ** constructions (making them right associative). Assume that tree's | ||
103 | ** ktable is at the top of the stack (for error messages). | ||
104 | */ | ||
105 | static void finalfix (lua_State *L, int postable, TTree *g, TTree *t) { | ||
106 | tailcall: | ||
107 | switch (t->tag) { | ||
108 | case TGrammar: /* subgrammars were already fixed */ | ||
109 | return; | ||
110 | case TOpenCall: { | ||
111 | if (g != NULL) /* inside a grammar? */ | ||
112 | fixonecall(L, postable, g, t, TOpenCall); /* labeled failure */ | ||
113 | else { /* open call outside grammar */ | ||
114 | lua_rawgeti(L, -1, t->key); | ||
115 | luaL_error(L, "rule '%s' used outside a grammar", val2str(L, -1)); | ||
116 | } | ||
117 | break; | ||
118 | } | ||
119 | case TThrow: { /* labeled failure */ | ||
120 | if (g != NULL) /* inside a grammar? */ | ||
121 | fixonecall(L, postable, g, t, TThrow); | ||
122 | break; | ||
123 | } | ||
124 | case TSeq: case TChoice: | ||
125 | correctassociativity(t); | ||
126 | break; | ||
127 | } | ||
128 | switch (numsiblings[t->tag]) { | ||
129 | case 1: /* finalfix(L, postable, g, sib1(t)); */ | ||
130 | t = sib1(t); goto tailcall; | ||
131 | case 2: | ||
132 | finalfix(L, postable, g, sib1(t)); | ||
133 | t = sib2(t); goto tailcall; /* finalfix(L, postable, g, sib2(t)); */ | ||
134 | default: assert(numsiblings[t->tag] == 0); break; | ||
135 | } | ||
136 | } | ||
137 | |||
138 | |||
139 | |||
140 | /* | ||
141 | ** {=================================================================== | ||
142 | ** KTable manipulation | ||
143 | ** | ||
144 | ** - The ktable of a pattern 'p' can be shared by other patterns that | ||
145 | ** contain 'p' and no other constants. Because of this sharing, we | ||
146 | ** should not add elements to a 'ktable' unless it was freshly created | ||
147 | ** for the new pattern. | ||
148 | ** | ||
149 | ** - The maximum index in a ktable is USHRT_MAX, because trees and | ||
150 | ** patterns use unsigned shorts to store those indices. | ||
151 | ** ==================================================================== | ||
152 | */ | ||
153 | |||
154 | /* | ||
155 | ** Create a new 'ktable' to the pattern at the top of the stack. | ||
156 | */ | ||
157 | static void newktable (lua_State *L, int n) { | ||
158 | lua_createtable(L, n, 0); /* create a fresh table */ | ||
159 | lua_setuservalue(L, -2); /* set it as 'ktable' for pattern */ | ||
160 | } | ||
161 | |||
162 | |||
163 | /* | ||
164 | ** Add element 'idx' to 'ktable' of pattern at the top of the stack; | ||
165 | ** Return index of new element. | ||
166 | ** If new element is nil, does not add it to table (as it would be | ||
167 | ** useless) and returns 0, as ktable[0] is always nil. | ||
168 | */ | ||
169 | static int addtoktable (lua_State *L, int idx) { | ||
170 | if (lua_isnil(L, idx)) /* nil value? */ | ||
171 | return 0; | ||
172 | else { | ||
173 | int n; | ||
174 | lua_getuservalue(L, -1); /* get ktable from pattern */ | ||
175 | n = lua_rawlen(L, -1); | ||
176 | if (n >= USHRT_MAX) | ||
177 | luaL_error(L, "too many Lua values in pattern"); | ||
178 | lua_pushvalue(L, idx); /* element to be added */ | ||
179 | lua_rawseti(L, -2, ++n); | ||
180 | lua_pop(L, 1); /* remove 'ktable' */ | ||
181 | return n; | ||
182 | } | ||
183 | } | ||
184 | |||
185 | |||
186 | /* | ||
187 | ** Return the number of elements in the ktable at 'idx'. | ||
188 | ** In Lua 5.2/5.3, default "environment" for patterns is nil, not | ||
189 | ** a table. Treat it as an empty table. In Lua 5.1, assumes that | ||
190 | ** the environment has no numeric indices (len == 0) | ||
191 | */ | ||
192 | static int ktablelen (lua_State *L, int idx) { | ||
193 | if (!lua_istable(L, idx)) return 0; | ||
194 | else return lua_rawlen(L, idx); | ||
195 | } | ||
196 | |||
197 | |||
198 | /* | ||
199 | ** Concatentate the contents of table 'idx1' into table 'idx2'. | ||
200 | ** (Assume that both indices are negative.) | ||
201 | ** Return the original length of table 'idx2' (or 0, if no | ||
202 | ** element was added, as there is no need to correct any index). | ||
203 | */ | ||
204 | static int concattable (lua_State *L, int idx1, int idx2) { | ||
205 | int i; | ||
206 | int n1 = ktablelen(L, idx1); | ||
207 | int n2 = ktablelen(L, idx2); | ||
208 | if (n1 + n2 > USHRT_MAX) | ||
209 | luaL_error(L, "too many Lua values in pattern"); | ||
210 | if (n1 == 0) return 0; /* nothing to correct */ | ||
211 | for (i = 1; i <= n1; i++) { | ||
212 | lua_rawgeti(L, idx1, i); | ||
213 | lua_rawseti(L, idx2 - 1, n2 + i); /* correct 'idx2' */ | ||
214 | } | ||
215 | return n2; | ||
216 | } | ||
217 | |||
218 | |||
219 | /* | ||
220 | ** When joining 'ktables', constants from one of the subpatterns must | ||
221 | ** be renumbered; 'correctkeys' corrects their indices (adding 'n' | ||
222 | ** to each of them) | ||
223 | */ | ||
224 | static void correctkeys (TTree *tree, int n) { | ||
225 | if (n == 0) return; /* no correction? */ | ||
226 | tailcall: | ||
227 | switch (tree->tag) { | ||
228 | case TOpenCall: case TCall: case TRunTime: case TRule: case TThrow: { /* labeled failure */ | ||
229 | if (tree->key > 0) | ||
230 | tree->key += n; | ||
231 | break; | ||
232 | } | ||
233 | case TCapture: { | ||
234 | if (tree->key > 0 && tree->cap != Carg && tree->cap != Cnum) | ||
235 | tree->key += n; | ||
236 | break; | ||
237 | } | ||
238 | default: break; | ||
239 | } | ||
240 | switch (numsiblings[tree->tag]) { | ||
241 | case 1: /* correctkeys(sib1(tree), n); */ | ||
242 | tree = sib1(tree); goto tailcall; | ||
243 | case 2: | ||
244 | correctkeys(sib1(tree), n); | ||
245 | tree = sib2(tree); goto tailcall; /* correctkeys(sib2(tree), n); */ | ||
246 | default: assert(numsiblings[tree->tag] == 0); break; | ||
247 | } | ||
248 | } | ||
249 | |||
250 | |||
251 | /* | ||
252 | ** Join the ktables from p1 and p2 the ktable for the new pattern at the | ||
253 | ** top of the stack, reusing them when possible. | ||
254 | */ | ||
255 | static void joinktables (lua_State *L, int p1, TTree *t2, int p2) { | ||
256 | int n1, n2; | ||
257 | lua_getuservalue(L, p1); /* get ktables */ | ||
258 | lua_getuservalue(L, p2); | ||
259 | n1 = ktablelen(L, -2); | ||
260 | n2 = ktablelen(L, -1); | ||
261 | if (n1 == 0 && n2 == 0) /* are both tables empty? */ | ||
262 | lua_pop(L, 2); /* nothing to be done; pop tables */ | ||
263 | else if (n2 == 0 || lp_equal(L, -2, -1)) { /* 2nd table empty or equal? */ | ||
264 | lua_pop(L, 1); /* pop 2nd table */ | ||
265 | lua_setuservalue(L, -2); /* set 1st ktable into new pattern */ | ||
266 | } | ||
267 | else if (n1 == 0) { /* first table is empty? */ | ||
268 | lua_setuservalue(L, -3); /* set 2nd table into new pattern */ | ||
269 | lua_pop(L, 1); /* pop 1st table */ | ||
270 | } | ||
271 | else { | ||
272 | lua_createtable(L, n1 + n2, 0); /* create ktable for new pattern */ | ||
273 | /* stack: new p; ktable p1; ktable p2; new ktable */ | ||
274 | concattable(L, -3, -1); /* from p1 into new ktable */ | ||
275 | concattable(L, -2, -1); /* from p2 into new ktable */ | ||
276 | lua_setuservalue(L, -4); /* new ktable becomes 'p' environment */ | ||
277 | lua_pop(L, 2); /* pop other ktables */ | ||
278 | correctkeys(t2, n1); /* correction for indices from p2 */ | ||
279 | } | ||
280 | } | ||
281 | |||
282 | |||
283 | /* | ||
284 | ** copy 'ktable' of element 'idx' to new tree (on top of stack) | ||
285 | */ | ||
286 | static void copyktable (lua_State *L, int idx) { | ||
287 | lua_getuservalue(L, idx); | ||
288 | lua_setuservalue(L, -2); | ||
289 | } | ||
290 | |||
291 | |||
292 | /* | ||
293 | ** merge 'ktable' from 'stree' at stack index 'idx' into 'ktable' | ||
294 | ** from tree at the top of the stack, and correct corresponding | ||
295 | ** tree. | ||
296 | */ | ||
297 | static void mergektable (lua_State *L, int idx, TTree *stree) { | ||
298 | int n; | ||
299 | lua_getuservalue(L, -1); /* get ktables */ | ||
300 | lua_getuservalue(L, idx); | ||
301 | n = concattable(L, -1, -2); | ||
302 | lua_pop(L, 2); /* remove both ktables */ | ||
303 | correctkeys(stree, n); | ||
304 | } | ||
305 | |||
306 | |||
307 | /* | ||
308 | ** Create a new 'ktable' to the pattern at the top of the stack, adding | ||
309 | ** all elements from pattern 'p' (if not 0) plus element 'idx' to it. | ||
310 | ** Return index of new element. | ||
311 | */ | ||
312 | static int addtonewktable (lua_State *L, int p, int idx) { | ||
313 | newktable(L, 1); | ||
314 | if (p) | ||
315 | mergektable(L, p, NULL); | ||
316 | return addtoktable(L, idx); | ||
317 | } | ||
318 | |||
319 | /* }====================================================== */ | ||
320 | |||
321 | |||
322 | /* | ||
323 | ** {====================================================== | ||
324 | ** Tree generation | ||
325 | ** ======================================================= | ||
326 | */ | ||
327 | |||
328 | /* | ||
329 | ** In 5.2, could use 'luaL_testudata'... | ||
330 | */ | ||
331 | static int testpattern (lua_State *L, int idx) { | ||
332 | if (lua_touserdata(L, idx)) { /* value is a userdata? */ | ||
333 | if (lua_getmetatable(L, idx)) { /* does it have a metatable? */ | ||
334 | luaL_getmetatable(L, PATTERN_T); | ||
335 | if (lua_rawequal(L, -1, -2)) { /* does it have the correct mt? */ | ||
336 | lua_pop(L, 2); /* remove both metatables */ | ||
337 | return 1; | ||
338 | } | ||
339 | } | ||
340 | } | ||
341 | return 0; | ||
342 | } | ||
343 | |||
344 | |||
345 | static Pattern *getpattern (lua_State *L, int idx) { | ||
346 | return (Pattern *)luaL_checkudata(L, idx, PATTERN_T); | ||
347 | } | ||
348 | |||
349 | |||
350 | static int getsize (lua_State *L, int idx) { | ||
351 | return (lua_rawlen(L, idx) - sizeof(Pattern)) / sizeof(TTree) + 1; | ||
352 | } | ||
353 | |||
354 | |||
355 | static TTree *gettree (lua_State *L, int idx, int *len) { | ||
356 | Pattern *p = getpattern(L, idx); | ||
357 | if (len) | ||
358 | *len = getsize(L, idx); | ||
359 | return p->tree; | ||
360 | } | ||
361 | |||
362 | |||
363 | /* | ||
364 | ** create a pattern. Set its uservalue (the 'ktable') equal to its | ||
365 | ** metatable. (It could be any empty sequence; the metatable is at | ||
366 | ** hand here, so we use it.) | ||
367 | */ | ||
368 | static TTree *newtree (lua_State *L, int len) { | ||
369 | size_t size = (len - 1) * sizeof(TTree) + sizeof(Pattern); | ||
370 | Pattern *p = (Pattern *)lua_newuserdata(L, size); | ||
371 | luaL_getmetatable(L, PATTERN_T); | ||
372 | lua_pushvalue(L, -1); | ||
373 | lua_setuservalue(L, -3); | ||
374 | lua_setmetatable(L, -2); | ||
375 | p->code = NULL; p->codesize = 0; | ||
376 | return p->tree; | ||
377 | } | ||
378 | |||
379 | |||
380 | static TTree *newleaf (lua_State *L, int tag) { | ||
381 | TTree *tree = newtree(L, 1); | ||
382 | tree->tag = tag; | ||
383 | return tree; | ||
384 | } | ||
385 | |||
386 | |||
387 | static TTree *newcharset (lua_State *L) { | ||
388 | TTree *tree = newtree(L, bytes2slots(CHARSETSIZE) + 1); | ||
389 | tree->tag = TSet; | ||
390 | loopset(i, treebuffer(tree)[i] = 0); | ||
391 | return tree; | ||
392 | } | ||
393 | |||
394 | |||
395 | /* | ||
396 | ** add to tree a sequence where first sibling is 'sib' (with size | ||
397 | ** 'sibsize'); returns position for second sibling | ||
398 | */ | ||
399 | static TTree *seqaux (TTree *tree, TTree *sib, int sibsize) { | ||
400 | tree->tag = TSeq; tree->u.ps = sibsize + 1; | ||
401 | memcpy(sib1(tree), sib, sibsize * sizeof(TTree)); | ||
402 | return sib2(tree); | ||
403 | } | ||
404 | |||
405 | |||
406 | /* | ||
407 | ** Build a sequence of 'n' nodes, each with tag 'tag' and 'u.n' got | ||
408 | ** from the array 's' (or 0 if array is NULL). (TSeq is binary, so it | ||
409 | ** must build a sequence of sequence of sequence...) | ||
410 | */ | ||
411 | static void fillseq (TTree *tree, int tag, int n, const char *s) { | ||
412 | int i; | ||
413 | for (i = 0; i < n - 1; i++) { /* initial n-1 copies of Seq tag; Seq ... */ | ||
414 | tree->tag = TSeq; tree->u.ps = 2; | ||
415 | sib1(tree)->tag = tag; | ||
416 | sib1(tree)->u.n = s ? (byte)s[i] : 0; | ||
417 | tree = sib2(tree); | ||
418 | } | ||
419 | tree->tag = tag; /* last one does not need TSeq */ | ||
420 | tree->u.n = s ? (byte)s[i] : 0; | ||
421 | } | ||
422 | |||
423 | |||
424 | /* | ||
425 | ** Numbers as patterns: | ||
426 | ** 0 == true (always match); n == TAny repeated 'n' times; | ||
427 | ** -n == not (TAny repeated 'n' times) | ||
428 | */ | ||
429 | static TTree *numtree (lua_State *L, int n) { | ||
430 | if (n == 0) | ||
431 | return newleaf(L, TTrue); | ||
432 | else { | ||
433 | TTree *tree, *nd; | ||
434 | if (n > 0) | ||
435 | tree = nd = newtree(L, 2 * n - 1); | ||
436 | else { /* negative: code it as !(-n) */ | ||
437 | n = -n; | ||
438 | tree = newtree(L, 2 * n); | ||
439 | tree->tag = TNot; | ||
440 | nd = sib1(tree); | ||
441 | } | ||
442 | fillseq(nd, TAny, n, NULL); /* sequence of 'n' any's */ | ||
443 | return tree; | ||
444 | } | ||
445 | } | ||
446 | |||
447 | |||
448 | /* | ||
449 | ** Convert value at index 'idx' to a pattern | ||
450 | */ | ||
451 | static TTree *getpatt (lua_State *L, int idx, int *len) { | ||
452 | TTree *tree; | ||
453 | switch (lua_type(L, idx)) { | ||
454 | case LUA_TSTRING: { | ||
455 | size_t slen; | ||
456 | const char *s = lua_tolstring(L, idx, &slen); /* get string */ | ||
457 | if (slen == 0) /* empty? */ | ||
458 | tree = newleaf(L, TTrue); /* always match */ | ||
459 | else { | ||
460 | tree = newtree(L, 2 * (slen - 1) + 1); | ||
461 | fillseq(tree, TChar, slen, s); /* sequence of 'slen' chars */ | ||
462 | } | ||
463 | break; | ||
464 | } | ||
465 | case LUA_TNUMBER: { | ||
466 | int n = lua_tointeger(L, idx); | ||
467 | tree = numtree(L, n); | ||
468 | break; | ||
469 | } | ||
470 | case LUA_TBOOLEAN: { | ||
471 | tree = (lua_toboolean(L, idx) ? newleaf(L, TTrue) : newleaf(L, TFalse)); | ||
472 | break; | ||
473 | } | ||
474 | case LUA_TTABLE: { | ||
475 | tree = newgrammar(L, idx); | ||
476 | break; | ||
477 | } | ||
478 | case LUA_TFUNCTION: { | ||
479 | tree = newtree(L, 2); | ||
480 | tree->tag = TRunTime; | ||
481 | tree->key = addtonewktable(L, 0, idx); | ||
482 | sib1(tree)->tag = TTrue; | ||
483 | break; | ||
484 | } | ||
485 | default: { | ||
486 | return gettree(L, idx, len); | ||
487 | } | ||
488 | } | ||
489 | lua_replace(L, idx); /* put new tree into 'idx' slot */ | ||
490 | if (len) | ||
491 | *len = getsize(L, idx); | ||
492 | return tree; | ||
493 | } | ||
494 | |||
495 | |||
496 | /* | ||
497 | ** create a new tree, whith a new root and one sibling. | ||
498 | ** Sibling must be on the Lua stack, at index 1. | ||
499 | */ | ||
500 | static TTree *newroot1sib (lua_State *L, int tag) { | ||
501 | int s1; | ||
502 | TTree *tree1 = getpatt(L, 1, &s1); | ||
503 | TTree *tree = newtree(L, 1 + s1); /* create new tree */ | ||
504 | tree->tag = tag; | ||
505 | memcpy(sib1(tree), tree1, s1 * sizeof(TTree)); | ||
506 | copyktable(L, 1); | ||
507 | return tree; | ||
508 | } | ||
509 | |||
510 | |||
511 | /* | ||
512 | ** create a new tree, whith a new root and 2 siblings. | ||
513 | ** Siblings must be on the Lua stack, first one at index 1. | ||
514 | */ | ||
515 | static TTree *newroot2sib (lua_State *L, int tag) { | ||
516 | int s1, s2; | ||
517 | TTree *tree1 = getpatt(L, 1, &s1); | ||
518 | TTree *tree2 = getpatt(L, 2, &s2); | ||
519 | TTree *tree = newtree(L, 1 + s1 + s2); /* create new tree */ | ||
520 | tree->tag = tag; | ||
521 | tree->u.ps = 1 + s1; | ||
522 | memcpy(sib1(tree), tree1, s1 * sizeof(TTree)); | ||
523 | memcpy(sib2(tree), tree2, s2 * sizeof(TTree)); | ||
524 | joinktables(L, 1, sib2(tree), 2); | ||
525 | return tree; | ||
526 | } | ||
527 | |||
528 | |||
529 | /* labeled failure begin */ | ||
530 | static TTree *newthrowleaf (lua_State *L) { | ||
531 | TTree *tree = newtree(L, 1); | ||
532 | tree->tag = TThrow; | ||
533 | tree->u.ps = 0; /* there is no recovery rule associated */ | ||
534 | return tree; | ||
535 | } | ||
536 | /* labeled failure end */ | ||
537 | |||
538 | |||
539 | static int lp_P (lua_State *L) { | ||
540 | luaL_checkany(L, 1); | ||
541 | getpatt(L, 1, NULL); | ||
542 | lua_settop(L, 1); | ||
543 | return 1; | ||
544 | } | ||
545 | |||
546 | |||
547 | /* | ||
548 | ** sequence operator; optimizations: | ||
549 | ** false x => false, x true => x, true x => x | ||
550 | ** (cannot do x . false => false because x may have runtime captures) | ||
551 | */ | ||
552 | static int lp_seq (lua_State *L) { | ||
553 | TTree *tree1 = getpatt(L, 1, NULL); | ||
554 | TTree *tree2 = getpatt(L, 2, NULL); | ||
555 | if (tree1->tag == TFalse || tree2->tag == TTrue) | ||
556 | lua_pushvalue(L, 1); /* false . x == false, x . true = x */ | ||
557 | else if (tree1->tag == TTrue) | ||
558 | lua_pushvalue(L, 2); /* true . x = x */ | ||
559 | else | ||
560 | newroot2sib(L, TSeq); | ||
561 | return 1; | ||
562 | } | ||
563 | |||
564 | |||
565 | /* | ||
566 | ** choice operator; optimizations: | ||
567 | ** charset / charset => charset | ||
568 | ** true / x => true, x / false => x, false / x => x | ||
569 | ** (x / true is not equivalent to true) | ||
570 | */ | ||
571 | static int lp_choice (lua_State *L) { | ||
572 | Charset st1, st2; | ||
573 | TTree *t1 = getpatt(L, 1, NULL); | ||
574 | TTree *t2 = getpatt(L, 2, NULL); | ||
575 | if (tocharset(t1, &st1) && tocharset(t2, &st2)) { | ||
576 | TTree *t = newcharset(L); | ||
577 | loopset(i, treebuffer(t)[i] = st1.cs[i] | st2.cs[i]); | ||
578 | } | ||
579 | else if (nofail(t1) || t2->tag == TFalse) | ||
580 | lua_pushvalue(L, 1); /* true / x => true, x / false => x */ | ||
581 | else if (t1->tag == TFalse) | ||
582 | lua_pushvalue(L, 2); /* false / x => x */ | ||
583 | else | ||
584 | newroot2sib(L, TChoice); | ||
585 | return 1; | ||
586 | } | ||
587 | |||
588 | |||
589 | /* | ||
590 | ** p^n | ||
591 | */ | ||
592 | static int lp_star (lua_State *L) { | ||
593 | int size1; | ||
594 | int n = (int)luaL_checkinteger(L, 2); | ||
595 | TTree *tree1 = getpatt(L, 1, &size1); | ||
596 | if (n >= 0) { /* seq tree1 (seq tree1 ... (seq tree1 (rep tree1))) */ | ||
597 | TTree *tree = newtree(L, (n + 1) * (size1 + 1)); | ||
598 | if (nullable(tree1)) | ||
599 | luaL_error(L, "loop body may accept empty string"); | ||
600 | while (n--) /* repeat 'n' times */ | ||
601 | tree = seqaux(tree, tree1, size1); | ||
602 | tree->tag = TRep; | ||
603 | memcpy(sib1(tree), tree1, size1 * sizeof(TTree)); | ||
604 | } | ||
605 | else { /* choice (seq tree1 ... choice tree1 true ...) true */ | ||
606 | TTree *tree; | ||
607 | n = -n; | ||
608 | /* size = (choice + seq + tree1 + true) * n, but the last has no seq */ | ||
609 | tree = newtree(L, n * (size1 + 3) - 1); | ||
610 | for (; n > 1; n--) { /* repeat (n - 1) times */ | ||
611 | tree->tag = TChoice; tree->u.ps = n * (size1 + 3) - 2; | ||
612 | sib2(tree)->tag = TTrue; | ||
613 | tree = sib1(tree); | ||
614 | tree = seqaux(tree, tree1, size1); | ||
615 | } | ||
616 | tree->tag = TChoice; tree->u.ps = size1 + 1; | ||
617 | sib2(tree)->tag = TTrue; | ||
618 | memcpy(sib1(tree), tree1, size1 * sizeof(TTree)); | ||
619 | } | ||
620 | copyktable(L, 1); | ||
621 | return 1; | ||
622 | } | ||
623 | |||
624 | |||
625 | /* | ||
626 | ** #p == &p | ||
627 | */ | ||
628 | static int lp_and (lua_State *L) { | ||
629 | newroot1sib(L, TAnd); | ||
630 | return 1; | ||
631 | } | ||
632 | |||
633 | |||
634 | /* | ||
635 | ** -p == !p | ||
636 | */ | ||
637 | static int lp_not (lua_State *L) { | ||
638 | newroot1sib(L, TNot); | ||
639 | return 1; | ||
640 | } | ||
641 | |||
642 | |||
643 | /* | ||
644 | ** [t1 - t2] == Seq (Not t2) t1 | ||
645 | ** If t1 and t2 are charsets, make their difference. | ||
646 | */ | ||
647 | static int lp_sub (lua_State *L) { | ||
648 | Charset st1, st2; | ||
649 | int s1, s2; | ||
650 | TTree *t1 = getpatt(L, 1, &s1); | ||
651 | TTree *t2 = getpatt(L, 2, &s2); | ||
652 | if (tocharset(t1, &st1) && tocharset(t2, &st2)) { | ||
653 | TTree *t = newcharset(L); | ||
654 | loopset(i, treebuffer(t)[i] = st1.cs[i] & ~st2.cs[i]); | ||
655 | } | ||
656 | else { | ||
657 | TTree *tree = newtree(L, 2 + s1 + s2); | ||
658 | tree->tag = TSeq; /* sequence of... */ | ||
659 | tree->u.ps = 2 + s2; | ||
660 | sib1(tree)->tag = TNot; /* ...not... */ | ||
661 | memcpy(sib1(sib1(tree)), t2, s2 * sizeof(TTree)); /* ...t2 */ | ||
662 | memcpy(sib2(tree), t1, s1 * sizeof(TTree)); /* ... and t1 */ | ||
663 | joinktables(L, 1, sib1(tree), 2); | ||
664 | } | ||
665 | return 1; | ||
666 | } | ||
667 | |||
668 | |||
669 | static int lp_set (lua_State *L) { | ||
670 | size_t l; | ||
671 | const char *s = luaL_checklstring(L, 1, &l); | ||
672 | TTree *tree = newcharset(L); | ||
673 | while (l--) { | ||
674 | setchar(treebuffer(tree), (byte)(*s)); | ||
675 | s++; | ||
676 | } | ||
677 | return 1; | ||
678 | } | ||
679 | |||
680 | |||
681 | static int lp_range (lua_State *L) { | ||
682 | int arg; | ||
683 | int top = lua_gettop(L); | ||
684 | TTree *tree = newcharset(L); | ||
685 | for (arg = 1; arg <= top; arg++) { | ||
686 | int c; | ||
687 | size_t l; | ||
688 | const char *r = luaL_checklstring(L, arg, &l); | ||
689 | luaL_argcheck(L, l == 2, arg, "range must have two characters"); | ||
690 | for (c = (byte)r[0]; c <= (byte)r[1]; c++) | ||
691 | setchar(treebuffer(tree), c); | ||
692 | } | ||
693 | return 1; | ||
694 | } | ||
695 | |||
696 | |||
697 | /* | ||
698 | ** Fills a tree node with basic information about the UTF-8 code point | ||
699 | ** 'cpu': its value in 'n', its length in 'cap', and its first byte in | ||
700 | ** 'key' | ||
701 | */ | ||
702 | static void codeutftree (lua_State *L, TTree *t, lua_Unsigned cpu, int arg) { | ||
703 | int len, fb, cp; | ||
704 | cp = (int)cpu; | ||
705 | if (cp <= 0x7f) { /* one byte? */ | ||
706 | len = 1; | ||
707 | fb = cp; | ||
708 | } else if (cp <= 0x7ff) { | ||
709 | len = 2; | ||
710 | fb = 0xC0 | (cp >> 6); | ||
711 | } else if (cp <= 0xffff) { | ||
712 | len = 3; | ||
713 | fb = 0xE0 | (cp >> 12); | ||
714 | } | ||
715 | else { | ||
716 | luaL_argcheck(L, cpu <= 0x10ffffu, arg, "invalid code point"); | ||
717 | len = 4; | ||
718 | fb = 0xF0 | (cp >> 18); | ||
719 | } | ||
720 | t->u.n = cp; | ||
721 | t->cap = len; | ||
722 | t->key = fb; | ||
723 | } | ||
724 | |||
725 | |||
726 | static int lp_utfr (lua_State *L) { | ||
727 | lua_Unsigned from = (lua_Unsigned)luaL_checkinteger(L, 1); | ||
728 | lua_Unsigned to = (lua_Unsigned)luaL_checkinteger(L, 2); | ||
729 | luaL_argcheck(L, from <= to, 2, "empty range"); | ||
730 | if (to <= 0x7f) { /* ascii range? */ | ||
731 | TTree *tree = newcharset(L); /* code it as a regular charset */ | ||
732 | unsigned int f; | ||
733 | for (f = (int)from; f <= to; f++) | ||
734 | setchar(treebuffer(tree), f); | ||
735 | } | ||
736 | else { /* multi-byte utf-8 range */ | ||
737 | TTree *tree = newtree(L, 2); | ||
738 | tree->tag = TUTFR; | ||
739 | codeutftree(L, tree, from, 1); | ||
740 | sib1(tree)->tag = TXInfo; | ||
741 | codeutftree(L, sib1(tree), to, 2); | ||
742 | } | ||
743 | return 1; | ||
744 | } | ||
745 | |||
746 | |||
747 | /* | ||
748 | ** Look-behind predicate | ||
749 | */ | ||
750 | static int lp_behind (lua_State *L) { | ||
751 | TTree *tree; | ||
752 | TTree *tree1 = getpatt(L, 1, NULL); | ||
753 | int n = fixedlen(tree1); | ||
754 | luaL_argcheck(L, n >= 0, 1, "pattern may not have fixed length"); | ||
755 | luaL_argcheck(L, !hascaptures(tree1), 1, "pattern have captures"); | ||
756 | luaL_argcheck(L, n <= MAXBEHIND, 1, "pattern too long to look behind"); | ||
757 | tree = newroot1sib(L, TBehind); | ||
758 | tree->u.n = n; | ||
759 | return 1; | ||
760 | } | ||
761 | |||
762 | |||
763 | /* labeled failure begin */ | ||
764 | /* | ||
765 | ** Throws a label | ||
766 | */ | ||
767 | static int lp_throw (lua_State *L) { | ||
768 | TTree * tree; | ||
769 | luaL_checkstring(L, -1); | ||
770 | tree = newthrowleaf(L); | ||
771 | tree->key = addtonewktable(L, 0, 1); | ||
772 | return 1; | ||
773 | } | ||
774 | /* labeled failure end */ | ||
775 | |||
776 | |||
777 | /* | ||
778 | ** Create a non-terminal | ||
779 | */ | ||
780 | static int lp_V (lua_State *L) { | ||
781 | TTree *tree = newleaf(L, TOpenCall); | ||
782 | luaL_argcheck(L, !lua_isnoneornil(L, 1), 1, "non-nil value expected"); | ||
783 | tree->key = addtonewktable(L, 0, 1); | ||
784 | return 1; | ||
785 | } | ||
786 | |||
787 | |||
788 | /* | ||
789 | ** Create a tree for a non-empty capture, with a body and | ||
790 | ** optionally with an associated Lua value (at index 'labelidx' in the | ||
791 | ** stack) | ||
792 | */ | ||
793 | static int capture_aux (lua_State *L, int cap, int labelidx) { | ||
794 | TTree *tree = newroot1sib(L, TCapture); | ||
795 | tree->cap = cap; | ||
796 | tree->key = (labelidx == 0) ? 0 : addtonewktable(L, 1, labelidx); | ||
797 | return 1; | ||
798 | } | ||
799 | |||
800 | |||
801 | /* | ||
802 | ** Fill a tree with an empty capture, using an empty (TTrue) sibling. | ||
803 | ** (The 'key' field must be filled by the caller to finish the tree.) | ||
804 | */ | ||
805 | static TTree *auxemptycap (TTree *tree, int cap) { | ||
806 | tree->tag = TCapture; | ||
807 | tree->cap = cap; | ||
808 | sib1(tree)->tag = TTrue; | ||
809 | return tree; | ||
810 | } | ||
811 | |||
812 | |||
813 | /* | ||
814 | ** Create a tree for an empty capture. | ||
815 | */ | ||
816 | static TTree *newemptycap (lua_State *L, int cap, int key) { | ||
817 | TTree *tree = auxemptycap(newtree(L, 2), cap); | ||
818 | tree->key = key; | ||
819 | return tree; | ||
820 | } | ||
821 | |||
822 | |||
823 | /* | ||
824 | ** Create a tree for an empty capture with an associated Lua value. | ||
825 | */ | ||
826 | static TTree *newemptycapkey (lua_State *L, int cap, int idx) { | ||
827 | TTree *tree = auxemptycap(newtree(L, 2), cap); | ||
828 | tree->key = addtonewktable(L, 0, idx); | ||
829 | return tree; | ||
830 | } | ||
831 | |||
832 | |||
833 | /* | ||
834 | ** Captures with syntax p / v | ||
835 | ** (function capture, query capture, string capture, or number capture) | ||
836 | */ | ||
837 | static int lp_divcapture (lua_State *L) { | ||
838 | switch (lua_type(L, 2)) { | ||
839 | case LUA_TFUNCTION: return capture_aux(L, Cfunction, 2); | ||
840 | case LUA_TTABLE: return capture_aux(L, Cquery, 2); | ||
841 | case LUA_TSTRING: return capture_aux(L, Cstring, 2); | ||
842 | case LUA_TNUMBER: { | ||
843 | int n = lua_tointeger(L, 2); | ||
844 | TTree *tree = newroot1sib(L, TCapture); | ||
845 | luaL_argcheck(L, 0 <= n && n <= SHRT_MAX, 1, "invalid number"); | ||
846 | tree->cap = Cnum; | ||
847 | tree->key = n; | ||
848 | return 1; | ||
849 | } | ||
850 | default: return luaL_argerror(L, 2, "invalid replacement value"); | ||
851 | } | ||
852 | } | ||
853 | |||
854 | |||
855 | static int lp_substcapture (lua_State *L) { | ||
856 | return capture_aux(L, Csubst, 0); | ||
857 | } | ||
858 | |||
859 | |||
860 | static int lp_tablecapture (lua_State *L) { | ||
861 | return capture_aux(L, Ctable, 0); | ||
862 | } | ||
863 | |||
864 | |||
865 | static int lp_groupcapture (lua_State *L) { | ||
866 | if (lua_isnoneornil(L, 2)) | ||
867 | return capture_aux(L, Cgroup, 0); | ||
868 | else | ||
869 | return capture_aux(L, Cgroup, 2); | ||
870 | } | ||
871 | |||
872 | |||
873 | static int lp_foldcapture (lua_State *L) { | ||
874 | luaL_checktype(L, 2, LUA_TFUNCTION); | ||
875 | return capture_aux(L, Cfold, 2); | ||
876 | } | ||
877 | |||
878 | |||
879 | static int lp_simplecapture (lua_State *L) { | ||
880 | return capture_aux(L, Csimple, 0); | ||
881 | } | ||
882 | |||
883 | |||
884 | static int lp_poscapture (lua_State *L) { | ||
885 | newemptycap(L, Cposition, 0); | ||
886 | return 1; | ||
887 | } | ||
888 | |||
889 | |||
890 | static int lp_argcapture (lua_State *L) { | ||
891 | int n = (int)luaL_checkinteger(L, 1); | ||
892 | luaL_argcheck(L, 0 < n && n <= SHRT_MAX, 1, "invalid argument index"); | ||
893 | newemptycap(L, Carg, n); | ||
894 | return 1; | ||
895 | } | ||
896 | |||
897 | |||
898 | static int lp_backref (lua_State *L) { | ||
899 | luaL_checkany(L, 1); | ||
900 | newemptycapkey(L, Cbackref, 1); | ||
901 | return 1; | ||
902 | } | ||
903 | |||
904 | |||
905 | /* | ||
906 | ** Constant capture | ||
907 | */ | ||
908 | static int lp_constcapture (lua_State *L) { | ||
909 | int i; | ||
910 | int n = lua_gettop(L); /* number of values */ | ||
911 | if (n == 0) /* no values? */ | ||
912 | newleaf(L, TTrue); /* no capture */ | ||
913 | else if (n == 1) | ||
914 | newemptycapkey(L, Cconst, 1); /* single constant capture */ | ||
915 | else { /* create a group capture with all values */ | ||
916 | TTree *tree = newtree(L, 1 + 3 * (n - 1) + 2); | ||
917 | newktable(L, n); /* create a 'ktable' for new tree */ | ||
918 | tree->tag = TCapture; | ||
919 | tree->cap = Cgroup; | ||
920 | tree->key = 0; | ||
921 | tree = sib1(tree); | ||
922 | for (i = 1; i <= n - 1; i++) { | ||
923 | tree->tag = TSeq; | ||
924 | tree->u.ps = 3; /* skip TCapture and its sibling */ | ||
925 | auxemptycap(sib1(tree), Cconst); | ||
926 | sib1(tree)->key = addtoktable(L, i); | ||
927 | tree = sib2(tree); | ||
928 | } | ||
929 | auxemptycap(tree, Cconst); | ||
930 | tree->key = addtoktable(L, i); | ||
931 | } | ||
932 | return 1; | ||
933 | } | ||
934 | |||
935 | |||
936 | static int lp_matchtime (lua_State *L) { | ||
937 | TTree *tree; | ||
938 | luaL_checktype(L, 2, LUA_TFUNCTION); | ||
939 | tree = newroot1sib(L, TRunTime); | ||
940 | tree->key = addtonewktable(L, 1, 2); | ||
941 | return 1; | ||
942 | } | ||
943 | |||
944 | /* }====================================================== */ | ||
945 | |||
946 | |||
947 | /* | ||
948 | ** {====================================================== | ||
949 | ** Grammar - Tree generation | ||
950 | ** ======================================================= | ||
951 | */ | ||
952 | |||
953 | /* | ||
954 | ** push on the stack the index and the pattern for the | ||
955 | ** initial rule of grammar at index 'arg' in the stack; | ||
956 | ** also add that index into position table. | ||
957 | */ | ||
958 | static void getfirstrule (lua_State *L, int arg, int postab) { | ||
959 | lua_rawgeti(L, arg, 1); /* access first element */ | ||
960 | if (lua_isstring(L, -1)) { /* is it the name of initial rule? */ | ||
961 | lua_pushvalue(L, -1); /* duplicate it to use as key */ | ||
962 | lua_gettable(L, arg); /* get associated rule */ | ||
963 | } | ||
964 | else { | ||
965 | lua_pushinteger(L, 1); /* key for initial rule */ | ||
966 | lua_insert(L, -2); /* put it before rule */ | ||
967 | } | ||
968 | if (!testpattern(L, -1)) { /* initial rule not a pattern? */ | ||
969 | if (lua_isnil(L, -1)) | ||
970 | luaL_error(L, "grammar has no initial rule"); | ||
971 | else | ||
972 | luaL_error(L, "initial rule '%s' is not a pattern", lua_tostring(L, -2)); | ||
973 | } | ||
974 | lua_pushvalue(L, -2); /* push key */ | ||
975 | lua_pushinteger(L, 1); /* push rule position (after TGrammar) */ | ||
976 | lua_settable(L, postab); /* insert pair at position table */ | ||
977 | } | ||
978 | |||
979 | /* | ||
980 | ** traverse grammar at index 'arg', pushing all its keys and patterns | ||
981 | ** into the stack. Create a new table (before all pairs key-pattern) to | ||
982 | ** collect all keys and their associated positions in the final tree | ||
983 | ** (the "position table"). | ||
984 | ** Return the number of rules and (in 'totalsize') the total size | ||
985 | ** for the new tree. | ||
986 | */ | ||
987 | static int collectrules (lua_State *L, int arg, int *totalsize) { | ||
988 | int n = 1; /* to count number of rules */ | ||
989 | int postab = lua_gettop(L) + 1; /* index of position table */ | ||
990 | int size; /* accumulator for total size */ | ||
991 | lua_newtable(L); /* create position table */ | ||
992 | getfirstrule(L, arg, postab); | ||
993 | size = 3 + getsize(L, postab + 2); /* TGrammar + TRule + TXInfo + rule */ | ||
994 | lua_pushnil(L); /* prepare to traverse grammar table */ | ||
995 | while (lua_next(L, arg) != 0) { | ||
996 | if (lua_tonumber(L, -2) == 1 || | ||
997 | lp_equal(L, -2, postab + 1)) { /* initial rule? */ | ||
998 | lua_pop(L, 1); /* remove value (keep key for lua_next) */ | ||
999 | continue; | ||
1000 | } | ||
1001 | if (!testpattern(L, -1)) /* value is not a pattern? */ | ||
1002 | luaL_error(L, "rule '%s' is not a pattern", val2str(L, -2)); | ||
1003 | luaL_checkstack(L, LUA_MINSTACK, "grammar has too many rules"); | ||
1004 | lua_pushvalue(L, -2); /* push key (to insert into position table) */ | ||
1005 | lua_pushinteger(L, size); | ||
1006 | lua_settable(L, postab); | ||
1007 | size += 2 + getsize(L, -1); /* add 'TRule + TXInfo + rule' to size */ | ||
1008 | lua_pushvalue(L, -2); /* push key (for next lua_next) */ | ||
1009 | n++; | ||
1010 | } | ||
1011 | *totalsize = size + 1; /* space for 'TTrue' finishing list of rules */ | ||
1012 | return n; | ||
1013 | } | ||
1014 | |||
1015 | |||
1016 | static void buildgrammar (lua_State *L, TTree *grammar, int frule, int n) { | ||
1017 | int i; | ||
1018 | TTree *nd = sib1(grammar); /* auxiliary pointer to traverse the tree */ | ||
1019 | for (i = 0; i < n; i++) { /* add each rule into new tree */ | ||
1020 | int ridx = frule + 2*i + 1; /* index of i-th rule */ | ||
1021 | int rulesize; | ||
1022 | TTree *rn = gettree(L, ridx, &rulesize); | ||
1023 | TTree *pr = sib1(nd); /* points to rule's prerule */ | ||
1024 | nd->tag = TRule; | ||
1025 | nd->key = 0; /* will be fixed when rule is used */ | ||
1026 | pr->tag = TXInfo; | ||
1027 | pr->u.n = i; /* rule number */ | ||
1028 | nd->u.ps = rulesize + 2; /* point to next rule */ | ||
1029 | memcpy(sib1(pr), rn, rulesize * sizeof(TTree)); /* copy rule */ | ||
1030 | mergektable(L, ridx, sib1(nd)); /* merge its ktable into new one */ | ||
1031 | nd = sib2(nd); /* move to next rule */ | ||
1032 | } | ||
1033 | nd->tag = TTrue; /* finish list of rules */ | ||
1034 | } | ||
1035 | |||
1036 | |||
1037 | /* | ||
1038 | ** Check whether a tree has potential infinite loops | ||
1039 | */ | ||
1040 | static int checkloops (TTree *tree) { | ||
1041 | tailcall: | ||
1042 | if (tree->tag == TRep && nullable(sib1(tree))) | ||
1043 | return 1; | ||
1044 | else if (tree->tag == TGrammar) | ||
1045 | return 0; /* sub-grammars already checked */ | ||
1046 | else { | ||
1047 | switch (numsiblings[tree->tag]) { | ||
1048 | case 1: /* return checkloops(sib1(tree)); */ | ||
1049 | tree = sib1(tree); goto tailcall; | ||
1050 | case 2: | ||
1051 | if (checkloops(sib1(tree))) return 1; | ||
1052 | /* else return checkloops(sib2(tree)); */ | ||
1053 | tree = sib2(tree); goto tailcall; | ||
1054 | default: assert(numsiblings[tree->tag] == 0); return 0; | ||
1055 | } | ||
1056 | } | ||
1057 | } | ||
1058 | |||
1059 | |||
1060 | /* | ||
1061 | ** Give appropriate error message for 'verifyrule'. If a rule appears | ||
1062 | ** twice in 'passed', there is path from it back to itself without | ||
1063 | ** advancing the subject. | ||
1064 | */ | ||
1065 | static int verifyerror (lua_State *L, unsigned short *passed, int npassed) { | ||
1066 | int i, j; | ||
1067 | for (i = npassed - 1; i >= 0; i--) { /* search for a repetition */ | ||
1068 | for (j = i - 1; j >= 0; j--) { | ||
1069 | if (passed[i] == passed[j]) { | ||
1070 | lua_rawgeti(L, -1, passed[i]); /* get rule's key */ | ||
1071 | return luaL_error(L, "rule '%s' may be left recursive", val2str(L, -1)); | ||
1072 | } | ||
1073 | } | ||
1074 | } | ||
1075 | return luaL_error(L, "too many left calls in grammar"); | ||
1076 | } | ||
1077 | |||
1078 | |||
1079 | /* | ||
1080 | ** Check whether a rule can be left recursive; raise an error in that | ||
1081 | ** case; otherwise return 1 iff pattern is nullable. | ||
1082 | ** The return value is used to check sequences, where the second pattern | ||
1083 | ** is only relevant if the first is nullable. | ||
1084 | ** Parameter 'nb' works as an accumulator, to allow tail calls in | ||
1085 | ** choices. ('nb' true makes function returns true.) | ||
1086 | ** Parameter 'passed' is a list of already visited rules, 'npassed' | ||
1087 | ** counts the elements in 'passed'. | ||
1088 | ** Assume ktable at the top of the stack. | ||
1089 | */ | ||
1090 | static int verifyrule (lua_State *L, TTree *tree, unsigned short *passed, | ||
1091 | int npassed, int nb) { | ||
1092 | tailcall: | ||
1093 | switch (tree->tag) { | ||
1094 | case TChar: case TSet: case TAny: | ||
1095 | case TFalse: case TUTFR: | ||
1096 | case TThrow: /* labeled failure */ | ||
1097 | return nb; /* cannot pass from here */ | ||
1098 | case TTrue: | ||
1099 | case TBehind: /* look-behind cannot have calls */ | ||
1100 | return 1; | ||
1101 | case TNot: case TAnd: case TRep: | ||
1102 | /* return verifyrule(L, sib1(tree), passed, npassed, 1); */ | ||
1103 | tree = sib1(tree); nb = 1; goto tailcall; | ||
1104 | case TCapture: case TRunTime: case TXInfo: | ||
1105 | /* return verifyrule(L, sib1(tree), passed, npassed, nb); */ | ||
1106 | tree = sib1(tree); goto tailcall; | ||
1107 | case TCall: | ||
1108 | /* return verifyrule(L, sib2(tree), passed, npassed, nb); */ | ||
1109 | tree = sib2(tree); goto tailcall; | ||
1110 | case TSeq: /* only check 2nd child if first is nb */ | ||
1111 | if (!verifyrule(L, sib1(tree), passed, npassed, 0)) | ||
1112 | return nb; | ||
1113 | /* else return verifyrule(L, sib2(tree), passed, npassed, nb); */ | ||
1114 | tree = sib2(tree); goto tailcall; | ||
1115 | case TChoice: /* must check both children */ | ||
1116 | nb = verifyrule(L, sib1(tree), passed, npassed, nb); | ||
1117 | /* return verifyrule(L, sib2(tree), passed, npassed, nb); */ | ||
1118 | tree = sib2(tree); goto tailcall; | ||
1119 | case TRule: | ||
1120 | if (npassed >= MAXRULES) /* too many steps? */ | ||
1121 | return verifyerror(L, passed, npassed); /* error */ | ||
1122 | else { | ||
1123 | passed[npassed++] = tree->key; /* add rule to path */ | ||
1124 | /* return verifyrule(L, sib1(tree), passed, npassed); */ | ||
1125 | tree = sib1(tree); goto tailcall; | ||
1126 | } | ||
1127 | case TGrammar: | ||
1128 | return nullable(tree); /* sub-grammar cannot be left recursive */ | ||
1129 | default: assert(0); return 0; | ||
1130 | } | ||
1131 | } | ||
1132 | |||
1133 | |||
1134 | static void verifygrammar (lua_State *L, TTree *grammar) { | ||
1135 | unsigned short passed[MAXRULES]; | ||
1136 | TTree *rule; | ||
1137 | /* check left-recursive rules */ | ||
1138 | for (rule = sib1(grammar); rule->tag == TRule; rule = sib2(rule)) { | ||
1139 | if (rule->key == 0) continue; /* unused rule */ | ||
1140 | verifyrule(L, sib1(rule), passed, 0, 0); | ||
1141 | } | ||
1142 | assert(rule->tag == TTrue); | ||
1143 | /* check infinite loops inside rules */ | ||
1144 | for (rule = sib1(grammar); rule->tag == TRule; rule = sib2(rule)) { | ||
1145 | if (rule->key == 0) continue; /* unused rule */ | ||
1146 | if (checkloops(sib1(rule))) { | ||
1147 | lua_rawgeti(L, -1, rule->key); /* get rule's key */ | ||
1148 | luaL_error(L, "empty loop in rule '%s'", val2str(L, -1)); | ||
1149 | } | ||
1150 | } | ||
1151 | assert(rule->tag == TTrue); | ||
1152 | } | ||
1153 | |||
1154 | |||
1155 | /* | ||
1156 | ** Give a name for the initial rule if it is not referenced | ||
1157 | */ | ||
1158 | static void initialrulename (lua_State *L, TTree *grammar, int frule) { | ||
1159 | if (sib1(grammar)->key == 0) { /* initial rule is not referenced? */ | ||
1160 | int n = lua_rawlen(L, -1) + 1; /* index for name */ | ||
1161 | lua_pushvalue(L, frule); /* rule's name */ | ||
1162 | lua_rawseti(L, -2, n); /* ktable was on the top of the stack */ | ||
1163 | sib1(grammar)->key = n; | ||
1164 | } | ||
1165 | } | ||
1166 | |||
1167 | |||
1168 | static TTree *newgrammar (lua_State *L, int arg) { | ||
1169 | int treesize; | ||
1170 | int frule = lua_gettop(L) + 2; /* position of first rule's key */ | ||
1171 | int n = collectrules(L, arg, &treesize); | ||
1172 | TTree *g = newtree(L, treesize); | ||
1173 | luaL_argcheck(L, n <= MAXRULES, arg, "grammar has too many rules"); | ||
1174 | g->tag = TGrammar; g->u.n = n; | ||
1175 | lua_newtable(L); /* create 'ktable' */ | ||
1176 | lua_setuservalue(L, -2); | ||
1177 | buildgrammar(L, g, frule, n); | ||
1178 | lua_getuservalue(L, -1); /* get 'ktable' for new tree */ | ||
1179 | finalfix(L, frule - 1, g, sib1(g)); | ||
1180 | initialrulename(L, g, frule); | ||
1181 | verifygrammar(L, g); | ||
1182 | lua_pop(L, 1); /* remove 'ktable' */ | ||
1183 | lua_insert(L, -(n * 2 + 2)); /* move new table to proper position */ | ||
1184 | lua_pop(L, n * 2 + 1); /* remove position table + rule pairs */ | ||
1185 | return g; /* new table at the top of the stack */ | ||
1186 | } | ||
1187 | |||
1188 | /* }====================================================== */ | ||
1189 | |||
1190 | |||
1191 | static Instruction *prepcompile (lua_State *L, Pattern *p, int idx) { | ||
1192 | lua_getuservalue(L, idx); /* push 'ktable' (may be used by 'finalfix') */ | ||
1193 | finalfix(L, 0, NULL, p->tree); | ||
1194 | lua_pop(L, 1); /* remove 'ktable' */ | ||
1195 | return compile(L, p); | ||
1196 | } | ||
1197 | |||
1198 | |||
1199 | static int lp_printtree (lua_State *L) { | ||
1200 | TTree *tree = getpatt(L, 1, NULL); | ||
1201 | int c = lua_toboolean(L, 2); | ||
1202 | if (c) { | ||
1203 | lua_getuservalue(L, 1); /* push 'ktable' (may be used by 'finalfix') */ | ||
1204 | finalfix(L, 0, NULL, tree); | ||
1205 | lua_pop(L, 1); /* remove 'ktable' */ | ||
1206 | } | ||
1207 | printktable(L, 1); | ||
1208 | printtree(tree, 0); | ||
1209 | return 0; | ||
1210 | } | ||
1211 | |||
1212 | |||
1213 | static int lp_printcode (lua_State *L) { | ||
1214 | Pattern *p = getpattern(L, 1); | ||
1215 | printktable(L, 1); | ||
1216 | if (p->code == NULL) /* not compiled yet? */ | ||
1217 | prepcompile(L, p, 1); | ||
1218 | printpatt(p->code, p->codesize); | ||
1219 | return 0; | ||
1220 | } | ||
1221 | |||
1222 | |||
1223 | /* | ||
1224 | ** Get the initial position for the match, interpreting negative | ||
1225 | ** values from the end of the subject | ||
1226 | */ | ||
1227 | static size_t initposition (lua_State *L, size_t len) { | ||
1228 | lua_Integer ii = luaL_optinteger(L, 3, 1); | ||
1229 | if (ii > 0) { /* positive index? */ | ||
1230 | if ((size_t)ii <= len) /* inside the string? */ | ||
1231 | return (size_t)ii - 1; /* return it (corrected to 0-base) */ | ||
1232 | else return len; /* crop at the end */ | ||
1233 | } | ||
1234 | else { /* negative index */ | ||
1235 | if ((size_t)(-ii) <= len) /* inside the string? */ | ||
1236 | return len - ((size_t)(-ii)); /* return position from the end */ | ||
1237 | else return 0; /* crop at the beginning */ | ||
1238 | } | ||
1239 | } | ||
1240 | |||
1241 | |||
1242 | /* | ||
1243 | ** Main match function | ||
1244 | */ | ||
1245 | static int lp_match (lua_State *L) { | ||
1246 | Capture capture[INITCAPSIZE]; | ||
1247 | const char *r; | ||
1248 | size_t l; | ||
1249 | Pattern *p = (getpatt(L, 1, NULL), getpattern(L, 1)); | ||
1250 | Instruction *code = (p->code != NULL) ? p->code : prepcompile(L, p, 1); | ||
1251 | const char *s = luaL_checklstring(L, SUBJIDX, &l); | ||
1252 | size_t i = initposition(L, l); | ||
1253 | int ptop = lua_gettop(L); | ||
1254 | short labelf; /* labeled failure */ | ||
1255 | const char *sfail = NULL; /* labeled failure */ | ||
1256 | lua_pushnil(L); /* initialize subscache */ | ||
1257 | lua_pushlightuserdata(L, capture); /* initialize caplistidx */ | ||
1258 | lua_getuservalue(L, 1); /* initialize penvidx */ | ||
1259 | r = match(L, s, s + i, s + l, code, capture, ptop, &labelf, &sfail); /* labeled failure */ | ||
1260 | if (r == NULL) { /* labeled failure begin */ | ||
1261 | lua_pushnil(L); | ||
1262 | if (labelf) { | ||
1263 | lua_Integer luaInt; | ||
1264 | lua_rawgeti(L, ktableidx(ptop), labelf); | ||
1265 | luaInt = lua_tointeger(L, -1); | ||
1266 | if (luaInt) { | ||
1267 | lua_pop(L, 1); | ||
1268 | lua_pushinteger(L, luaInt); | ||
1269 | } | ||
1270 | } | ||
1271 | else | ||
1272 | lua_pushstring(L, "fail"); | ||
1273 | lua_pushinteger(L, sfail - (s + i) + 1); /* subject position related to the error */ | ||
1274 | return 3; | ||
1275 | } /* labeled failure end */ | ||
1276 | return getcaptures(L, s, r, ptop); | ||
1277 | } | ||
1278 | |||
1279 | |||
1280 | |||
1281 | /* | ||
1282 | ** {====================================================== | ||
1283 | ** Library creation and functions not related to matching | ||
1284 | ** ======================================================= | ||
1285 | */ | ||
1286 | |||
1287 | /* maximum limit for stack size */ | ||
1288 | #define MAXLIM (INT_MAX / 100) | ||
1289 | |||
1290 | static int lp_setmax (lua_State *L) { | ||
1291 | lua_Integer lim = luaL_checkinteger(L, 1); | ||
1292 | luaL_argcheck(L, 0 < lim && lim <= MAXLIM, 1, "out of range"); | ||
1293 | lua_settop(L, 1); | ||
1294 | lua_setfield(L, LUA_REGISTRYINDEX, MAXSTACKIDX); | ||
1295 | return 0; | ||
1296 | } | ||
1297 | |||
1298 | |||
1299 | static int lp_type (lua_State *L) { | ||
1300 | if (testpattern(L, 1)) | ||
1301 | lua_pushliteral(L, "pattern"); | ||
1302 | else | ||
1303 | lua_pushnil(L); | ||
1304 | return 1; | ||
1305 | } | ||
1306 | |||
1307 | |||
1308 | int lp_gc (lua_State *L) { | ||
1309 | Pattern *p = getpattern(L, 1); | ||
1310 | realloccode(L, p, 0); /* delete code block */ | ||
1311 | return 0; | ||
1312 | } | ||
1313 | |||
1314 | |||
1315 | static void createcat (lua_State *L, const char *catname, int (catf) (int)) { | ||
1316 | TTree *t = newcharset(L); | ||
1317 | int i; | ||
1318 | for (i = 0; i <= UCHAR_MAX; i++) | ||
1319 | if (catf(i)) setchar(treebuffer(t), i); | ||
1320 | lua_setfield(L, -2, catname); | ||
1321 | } | ||
1322 | |||
1323 | |||
1324 | static int lp_locale (lua_State *L) { | ||
1325 | if (lua_isnoneornil(L, 1)) { | ||
1326 | lua_settop(L, 0); | ||
1327 | lua_createtable(L, 0, 12); | ||
1328 | } | ||
1329 | else { | ||
1330 | luaL_checktype(L, 1, LUA_TTABLE); | ||
1331 | lua_settop(L, 1); | ||
1332 | } | ||
1333 | createcat(L, "alnum", isalnum); | ||
1334 | createcat(L, "alpha", isalpha); | ||
1335 | createcat(L, "cntrl", iscntrl); | ||
1336 | createcat(L, "digit", isdigit); | ||
1337 | createcat(L, "graph", isgraph); | ||
1338 | createcat(L, "lower", islower); | ||
1339 | createcat(L, "print", isprint); | ||
1340 | createcat(L, "punct", ispunct); | ||
1341 | createcat(L, "space", isspace); | ||
1342 | createcat(L, "upper", isupper); | ||
1343 | createcat(L, "xdigit", isxdigit); | ||
1344 | return 1; | ||
1345 | } | ||
1346 | |||
1347 | |||
1348 | static struct luaL_Reg pattreg[] = { | ||
1349 | {"ptree", lp_printtree}, | ||
1350 | {"pcode", lp_printcode}, | ||
1351 | {"match", lp_match}, | ||
1352 | {"B", lp_behind}, | ||
1353 | {"V", lp_V}, | ||
1354 | {"C", lp_simplecapture}, | ||
1355 | {"Cc", lp_constcapture}, | ||
1356 | {"Cmt", lp_matchtime}, | ||
1357 | {"Cb", lp_backref}, | ||
1358 | {"Carg", lp_argcapture}, | ||
1359 | {"Cp", lp_poscapture}, | ||
1360 | {"Cs", lp_substcapture}, | ||
1361 | {"Ct", lp_tablecapture}, | ||
1362 | {"Cf", lp_foldcapture}, | ||
1363 | {"Cg", lp_groupcapture}, | ||
1364 | {"P", lp_P}, | ||
1365 | {"S", lp_set}, | ||
1366 | {"R", lp_range}, | ||
1367 | {"utfR", lp_utfr}, | ||
1368 | {"locale", lp_locale}, | ||
1369 | {"version", NULL}, | ||
1370 | {"setmaxstack", lp_setmax}, | ||
1371 | {"type", lp_type}, | ||
1372 | {"T", lp_throw}, /* labeled failure throw */ | ||
1373 | {NULL, NULL} | ||
1374 | }; | ||
1375 | |||
1376 | |||
1377 | static struct luaL_Reg metareg[] = { | ||
1378 | {"__mul", lp_seq}, | ||
1379 | {"__add", lp_choice}, | ||
1380 | {"__pow", lp_star}, | ||
1381 | {"__gc", lp_gc}, | ||
1382 | {"__len", lp_and}, | ||
1383 | {"__div", lp_divcapture}, | ||
1384 | {"__unm", lp_not}, | ||
1385 | {"__sub", lp_sub}, | ||
1386 | {NULL, NULL} | ||
1387 | }; | ||
1388 | |||
1389 | int luaopen_lpeglabel (lua_State *L); /* labeled failure */ | ||
1390 | int luaopen_lpeglabel (lua_State *L) { /* labeled failure */ | ||
1391 | luaL_newmetatable(L, PATTERN_T); | ||
1392 | lua_pushnumber(L, MAXBACK); /* initialize maximum backtracking */ | ||
1393 | lua_setfield(L, LUA_REGISTRYINDEX, MAXSTACKIDX); | ||
1394 | luaL_setfuncs(L, metareg, 0); | ||
1395 | luaL_newlib(L, pattreg); | ||
1396 | lua_pushvalue(L, -1); | ||
1397 | lua_setfield(L, -3, "__index"); | ||
1398 | lua_pushliteral(L, "LPegLabel " VERSION); /* labeled failure */ | ||
1399 | lua_setfield(L, -2, "version"); | ||
1400 | return 1; | ||
1401 | } | ||
1402 | |||
1403 | /* }====================================================== */ | ||