aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergio Medeiros <sqmedeiros@gmail.com>2014-10-29 18:13:38 -0300
committerSergio Medeiros <sqmedeiros@gmail.com>2014-10-29 18:13:38 -0300
commit8d30a0ff8a8584e225c03d878a9add439ea193a3 (patch)
treecdc03907837ebec1aed26be7290a5a40d00f3e2c
parent3af55803f0a261dd9b2ccf85a7532288c4a270ae (diff)
downloadlpeglabel-8d30a0ff8a8584e225c03d878a9add439ea193a3.tar.gz
lpeglabel-8d30a0ff8a8584e225c03d878a9add439ea193a3.tar.bz2
lpeglabel-8d30a0ff8a8584e225c03d878a9add439ea193a3.zip
Creating the git repository with the current implementation.
-rw-r--r--lpcap.c537
-rw-r--r--lpcap.h43
-rw-r--r--lpcode.c1016
-rw-r--r--lpcode.h34
-rw-r--r--lpprint.c255
-rw-r--r--lpprint.h37
-rw-r--r--lptree.c1282
-rw-r--r--lptree.h79
-rw-r--r--lptypes.h158
-rw-r--r--lpvm.c391
-rw-r--r--lpvm.h66
-rw-r--r--makefile55
-rw-r--r--re.lua276
-rwxr-xr-xtest.lua1386
-rw-r--r--testlabel.lua422
15 files changed, 6037 insertions, 0 deletions
diff --git a/lpcap.c b/lpcap.c
new file mode 100644
index 0000000..d90b935
--- /dev/null
+++ b/lpcap.c
@@ -0,0 +1,537 @@
1/*
2** $Id: lpcap.c,v 1.4 2013/03/21 20:25:12 roberto Exp $
3** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license)
4*/
5
6#include "lua.h"
7#include "lauxlib.h"
8
9#include "lpcap.h"
10#include "lptypes.h"
11
12
13#define captype(cap) ((cap)->kind)
14
15#define isclosecap(cap) (captype(cap) == Cclose)
16
17#define closeaddr(c) ((c)->s + (c)->siz - 1)
18
19#define isfullcap(cap) ((cap)->siz != 0)
20
21#define getfromktable(cs,v) lua_rawgeti((cs)->L, ktableidx((cs)->ptop), v)
22
23#define pushluaval(cs) getfromktable(cs, (cs)->cap->idx)
24
25
26
27/*
28** Put at the cache for Lua values the value indexed by 'v' in ktable
29** of the running pattern (if it is not there yet); returns its index.
30*/
31static int updatecache (CapState *cs, int v) {
32 int idx = cs->ptop + 1; /* stack index of cache for Lua values */
33 if (v != cs->valuecached) { /* not there? */
34 getfromktable(cs, v); /* get value from 'ktable' */
35 lua_replace(cs->L, idx); /* put it at reserved stack position */
36 cs->valuecached = v; /* keep track of what is there */
37 }
38 return idx;
39}
40
41
42static int pushcapture (CapState *cs);
43
44
45/*
46** Goes back in a list of captures looking for an open capture
47** corresponding to a close
48*/
49static Capture *findopen (Capture *cap) {
50 int n = 0; /* number of closes waiting an open */
51 for (;;) {
52 cap--;
53 if (isclosecap(cap)) n++; /* one more open to skip */
54 else if (!isfullcap(cap))
55 if (n-- == 0) return cap;
56 }
57}
58
59
60/*
61** Go to the next capture
62*/
63static void nextcap (CapState *cs) {
64 Capture *cap = cs->cap;
65 if (!isfullcap(cap)) { /* not a single capture? */
66 int n = 0; /* number of opens waiting a close */
67 for (;;) { /* look for corresponding close */
68 cap++;
69 if (isclosecap(cap)) {
70 if (n-- == 0) break;
71 }
72 else if (!isfullcap(cap)) n++;
73 }
74 }
75 cs->cap = cap + 1; /* + 1 to skip last close (or entire single capture) */
76}
77
78
79/*
80** Push on the Lua stack all values generated by nested captures inside
81** the current capture. Returns number of values pushed. 'addextra'
82** makes it push the entire match after all captured values. The
83** entire match is pushed also if there are no other nested values,
84** so the function never returns zero.
85*/
86static int pushnestedvalues (CapState *cs, int addextra) {
87 Capture *co = cs->cap;
88 if (isfullcap(cs->cap++)) { /* no nested captures? */
89 lua_pushlstring(cs->L, co->s, co->siz - 1); /* push whole match */
90 return 1; /* that is it */
91 }
92 else {
93 int n = 0;
94 while (!isclosecap(cs->cap)) /* repeat for all nested patterns */
95 n += pushcapture(cs);
96 if (addextra || n == 0) { /* need extra? */
97 lua_pushlstring(cs->L, co->s, cs->cap->s - co->s); /* push whole match */
98 n++;
99 }
100 cs->cap++; /* skip close entry */
101 return n;
102 }
103}
104
105
106/*
107** Push only the first value generated by nested captures
108*/
109static void pushonenestedvalue (CapState *cs) {
110 int n = pushnestedvalues(cs, 0);
111 if (n > 1)
112 lua_pop(cs->L, n - 1); /* pop extra values */
113}
114
115
116/*
117** Try to find a named group capture with the name given at the top of
118** the stack; goes backward from 'cap'.
119*/
120static Capture *findback (CapState *cs, Capture *cap) {
121 lua_State *L = cs->L;
122 while (cap-- > cs->ocap) { /* repeat until end of list */
123 if (isclosecap(cap))
124 cap = findopen(cap); /* skip nested captures */
125 else if (!isfullcap(cap))
126 continue; /* opening an enclosing capture: skip and get previous */
127 if (captype(cap) == Cgroup) {
128 getfromktable(cs, cap->idx); /* get group name */
129 if (lua_equal(L, -2, -1)) { /* right group? */
130 lua_pop(L, 2); /* remove reference name and group name */
131 return cap;
132 }
133 else lua_pop(L, 1); /* remove group name */
134 }
135 }
136 luaL_error(L, "back reference '%s' not found", lua_tostring(L, -1));
137 return NULL; /* to avoid warnings */
138}
139
140
141/*
142** Back-reference capture. Return number of values pushed.
143*/
144static int backrefcap (CapState *cs) {
145 int n;
146 Capture *curr = cs->cap;
147 pushluaval(cs); /* reference name */
148 cs->cap = findback(cs, curr); /* find corresponding group */
149 n = pushnestedvalues(cs, 0); /* push group's values */
150 cs->cap = curr + 1;
151 return n;
152}
153
154
155/*
156** Table capture: creates a new table and populates it with nested
157** captures.
158*/
159static int tablecap (CapState *cs) {
160 lua_State *L = cs->L;
161 int n = 0;
162 lua_newtable(L);
163 if (isfullcap(cs->cap++))
164 return 1; /* table is empty */
165 while (!isclosecap(cs->cap)) {
166 if (captype(cs->cap) == Cgroup && cs->cap->idx != 0) { /* named group? */
167 pushluaval(cs); /* push group name */
168 pushonenestedvalue(cs);
169 lua_settable(L, -3);
170 }
171 else { /* not a named group */
172 int i;
173 int k = pushcapture(cs);
174 for (i = k; i > 0; i--) /* store all values into table */
175 lua_rawseti(L, -(i + 1), n + i);
176 n += k;
177 }
178 }
179 cs->cap++; /* skip close entry */
180 return 1; /* number of values pushed (only the table) */
181}
182
183
184/*
185** Table-query capture
186*/
187static int querycap (CapState *cs) {
188 int idx = cs->cap->idx;
189 pushonenestedvalue(cs); /* get nested capture */
190 lua_gettable(cs->L, updatecache(cs, idx)); /* query cap. value at table */
191 if (!lua_isnil(cs->L, -1))
192 return 1;
193 else { /* no value */
194 lua_pop(cs->L, 1); /* remove nil */
195 return 0;
196 }
197}
198
199
200/*
201** Fold capture
202*/
203static int foldcap (CapState *cs) {
204 int n;
205 lua_State *L = cs->L;
206 int idx = cs->cap->idx;
207 if (isfullcap(cs->cap++) || /* no nested captures? */
208 isclosecap(cs->cap) || /* no nested captures (large subject)? */
209 (n = pushcapture(cs)) == 0) /* nested captures with no values? */
210 return luaL_error(L, "no initial value for fold capture");
211 if (n > 1)
212 lua_pop(L, n - 1); /* leave only one result for accumulator */
213 while (!isclosecap(cs->cap)) {
214 lua_pushvalue(L, updatecache(cs, idx)); /* get folding function */
215 lua_insert(L, -2); /* put it before accumulator */
216 n = pushcapture(cs); /* get next capture's values */
217 lua_call(L, n + 1, 1); /* call folding function */
218 }
219 cs->cap++; /* skip close entry */
220 return 1; /* only accumulator left on the stack */
221}
222
223
224/*
225** Function capture
226*/
227static int functioncap (CapState *cs) {
228 int n;
229 int top = lua_gettop(cs->L);
230 pushluaval(cs); /* push function */
231 n = pushnestedvalues(cs, 0); /* push nested captures */
232 lua_call(cs->L, n, LUA_MULTRET); /* call function */
233 return lua_gettop(cs->L) - top; /* return function's results */
234}
235
236
237/*
238** Select capture
239*/
240static int numcap (CapState *cs) {
241 int idx = cs->cap->idx; /* value to select */
242 if (idx == 0) { /* no values? */
243 nextcap(cs); /* skip entire capture */
244 return 0; /* no value produced */
245 }
246 else {
247 int n = pushnestedvalues(cs, 0);
248 if (n < idx) /* invalid index? */
249 return luaL_error(cs->L, "no capture '%d'", idx);
250 else {
251 lua_pushvalue(cs->L, -(n - idx + 1)); /* get selected capture */
252 lua_replace(cs->L, -(n + 1)); /* put it in place of 1st capture */
253 lua_pop(cs->L, n - 1); /* remove other captures */
254 return 1;
255 }
256 }
257}
258
259
260/*
261** Return the stack index of the first runtime capture in the given
262** list of captures (or zero if no runtime captures)
263*/
264int finddyncap (Capture *cap, Capture *last) {
265 for (; cap < last; cap++) {
266 if (cap->kind == Cruntime)
267 return cap->idx; /* stack position of first capture */
268 }
269 return 0; /* no dynamic captures in this segment */
270}
271
272
273/*
274** Calls a runtime capture. Returns number of captures removed by
275** the call, including the initial Cgroup. (Captures to be added are
276** on the Lua stack.)
277*/
278int runtimecap (CapState *cs, Capture *close, const char *s, int *rem) {
279 int n, id;
280 lua_State *L = cs->L;
281 int otop = lua_gettop(L);
282 Capture *open = findopen(close);
283 assert(captype(open) == Cgroup);
284 id = finddyncap(open, close); /* get first dynamic capture argument */
285 close->kind = Cclose; /* closes the group */
286 close->s = s;
287 cs->cap = open; cs->valuecached = 0; /* prepare capture state */
288 luaL_checkstack(L, 4, "too many runtime captures");
289 pushluaval(cs); /* push function to be called */
290 lua_pushvalue(L, SUBJIDX); /* push original subject */
291 lua_pushinteger(L, s - cs->s + 1); /* push current position */
292 n = pushnestedvalues(cs, 0); /* push nested captures */
293 lua_call(L, n + 2, LUA_MULTRET); /* call dynamic function */
294 if (id > 0) { /* are there old dynamic captures to be removed? */
295 int i;
296 for (i = id; i <= otop; i++)
297 lua_remove(L, id); /* remove old dynamic captures */
298 *rem = otop - id + 1; /* total number of dynamic captures removed */
299 }
300 else
301 *rem = 0; /* no dynamic captures removed */
302 return close - open; /* number of captures of all kinds removed */
303}
304
305
306/*
307** Auxiliary structure for substitution and string captures: keep
308** information about nested captures for future use, avoiding to push
309** string results into Lua
310*/
311typedef struct StrAux {
312 int isstring; /* whether capture is a string */
313 union {
314 Capture *cp; /* if not a string, respective capture */
315 struct { /* if it is a string... */
316 const char *s; /* ... starts here */
317 const char *e; /* ... ends here */
318 } s;
319 } u;
320} StrAux;
321
322#define MAXSTRCAPS 10
323
324/*
325** Collect values from current capture into array 'cps'. Current
326** capture must be Cstring (first call) or Csimple (recursive calls).
327** (In first call, fills %0 with whole match for Cstring.)
328** Returns number of elements in the array that were filled.
329*/
330static int getstrcaps (CapState *cs, StrAux *cps, int n) {
331 int k = n++;
332 cps[k].isstring = 1; /* get string value */
333 cps[k].u.s.s = cs->cap->s; /* starts here */
334 if (!isfullcap(cs->cap++)) { /* nested captures? */
335 while (!isclosecap(cs->cap)) { /* traverse them */
336 if (n >= MAXSTRCAPS) /* too many captures? */
337 nextcap(cs); /* skip extra captures (will not need them) */
338 else if (captype(cs->cap) == Csimple) /* string? */
339 n = getstrcaps(cs, cps, n); /* put info. into array */
340 else {
341 cps[n].isstring = 0; /* not a string */
342 cps[n].u.cp = cs->cap; /* keep original capture */
343 nextcap(cs);
344 n++;
345 }
346 }
347 cs->cap++; /* skip close */
348 }
349 cps[k].u.s.e = closeaddr(cs->cap - 1); /* ends here */
350 return n;
351}
352
353
354/*
355** add next capture value (which should be a string) to buffer 'b'
356*/
357static int addonestring (luaL_Buffer *b, CapState *cs, const char *what);
358
359
360/*
361** String capture: add result to buffer 'b' (instead of pushing
362** it into the stack)
363*/
364static void stringcap (luaL_Buffer *b, CapState *cs) {
365 StrAux cps[MAXSTRCAPS];
366 int n;
367 size_t len, i;
368 const char *fmt; /* format string */
369 fmt = lua_tolstring(cs->L, updatecache(cs, cs->cap->idx), &len);
370 n = getstrcaps(cs, cps, 0) - 1; /* collect nested captures */
371 for (i = 0; i < len; i++) { /* traverse them */
372 if (fmt[i] != '%') /* not an escape? */
373 luaL_addchar(b, fmt[i]); /* add it to buffer */
374 else if (fmt[++i] < '0' || fmt[i] > '9') /* not followed by a digit? */
375 luaL_addchar(b, fmt[i]); /* add to buffer */
376 else {
377 int l = fmt[i] - '0'; /* capture index */
378 if (l > n)
379 luaL_error(cs->L, "invalid capture index (%d)", l);
380 else if (cps[l].isstring)
381 luaL_addlstring(b, cps[l].u.s.s, cps[l].u.s.e - cps[l].u.s.s);
382 else {
383 Capture *curr = cs->cap;
384 cs->cap = cps[l].u.cp; /* go back to evaluate that nested capture */
385 if (!addonestring(b, cs, "capture"))
386 luaL_error(cs->L, "no values in capture index %d", l);
387 cs->cap = curr; /* continue from where it stopped */
388 }
389 }
390 }
391}
392
393
394/*
395** Substitution capture: add result to buffer 'b'
396*/
397static void substcap (luaL_Buffer *b, CapState *cs) {
398 const char *curr = cs->cap->s;
399 if (isfullcap(cs->cap)) /* no nested captures? */
400 luaL_addlstring(b, curr, cs->cap->siz - 1); /* keep original text */
401 else {
402 cs->cap++; /* skip open entry */
403 while (!isclosecap(cs->cap)) { /* traverse nested captures */
404 const char *next = cs->cap->s;
405 luaL_addlstring(b, curr, next - curr); /* add text up to capture */
406 if (addonestring(b, cs, "replacement"))
407 curr = closeaddr(cs->cap - 1); /* continue after match */
408 else /* no capture value */
409 curr = next; /* keep original text in final result */
410 }
411 luaL_addlstring(b, curr, cs->cap->s - curr); /* add last piece of text */
412 }
413 cs->cap++; /* go to next capture */
414}
415
416
417/*
418** Evaluates a capture and adds its first value to buffer 'b'; returns
419** whether there was a value
420*/
421static int addonestring (luaL_Buffer *b, CapState *cs, const char *what) {
422 switch (captype(cs->cap)) {
423 case Cstring:
424 stringcap(b, cs); /* add capture directly to buffer */
425 return 1;
426 case Csubst:
427 substcap(b, cs); /* add capture directly to buffer */
428 return 1;
429 default: {
430 lua_State *L = cs->L;
431 int n = pushcapture(cs);
432 if (n > 0) {
433 if (n > 1) lua_pop(L, n - 1); /* only one result */
434 if (!lua_isstring(L, -1))
435 luaL_error(L, "invalid %s value (a %s)", what, luaL_typename(L, -1));
436 luaL_addvalue(b);
437 }
438 return n;
439 }
440 }
441}
442
443
444/*
445** Push all values of the current capture into the stack; returns
446** number of values pushed
447*/
448static int pushcapture (CapState *cs) {
449 lua_State *L = cs->L;
450 luaL_checkstack(L, 4, "too many captures");
451 switch (captype(cs->cap)) {
452 case Cposition: {
453 lua_pushinteger(L, cs->cap->s - cs->s + 1);
454 cs->cap++;
455 return 1;
456 }
457 case Cconst: {
458 pushluaval(cs);
459 cs->cap++;
460 return 1;
461 }
462 case Carg: {
463 int arg = (cs->cap++)->idx;
464 if (arg + FIXEDARGS > cs->ptop)
465 return luaL_error(L, "reference to absent argument #%d", arg);
466 lua_pushvalue(L, arg + FIXEDARGS);
467 return 1;
468 }
469 case Csimple: {
470 int k = pushnestedvalues(cs, 1);
471 lua_insert(L, -k); /* make whole match be first result */
472 return k;
473 }
474 case Cruntime: {
475 lua_pushvalue(L, (cs->cap++)->idx); /* value is in the stack */
476 return 1;
477 }
478 case Cstring: {
479 luaL_Buffer b;
480 luaL_buffinit(L, &b);
481 stringcap(&b, cs);
482 luaL_pushresult(&b);
483 return 1;
484 }
485 case Csubst: {
486 luaL_Buffer b;
487 luaL_buffinit(L, &b);
488 substcap(&b, cs);
489 luaL_pushresult(&b);
490 return 1;
491 }
492 case Cgroup: {
493 if (cs->cap->idx == 0) /* anonymous group? */
494 return pushnestedvalues(cs, 0); /* add all nested values */
495 else { /* named group: add no values */
496 nextcap(cs); /* skip capture */
497 return 0;
498 }
499 }
500 case Cbackref: return backrefcap(cs);
501 case Ctable: return tablecap(cs);
502 case Cfunction: return functioncap(cs);
503 case Cnum: return numcap(cs);
504 case Cquery: return querycap(cs);
505 case Cfold: return foldcap(cs);
506 default: assert(0); return 0;
507 }
508}
509
510
511/*
512** Prepare a CapState structure and traverse the entire list of
513** captures in the stack pushing its results. 's' is the subject
514** string, 'r' is the final position of the match, and 'ptop'
515** the index in the stack where some useful values were pushed.
516** Returns the number of results pushed. (If the list produces no
517** results, push the final position of the match.)
518*/
519int getcaptures (lua_State *L, const char *s, const char *r, int ptop) {
520 Capture *capture = (Capture *)lua_touserdata(L, caplistidx(ptop));
521 int n = 0;
522 if (!isclosecap(capture)) { /* is there any capture? */
523 CapState cs;
524 cs.ocap = cs.cap = capture; cs.L = L;
525 cs.s = s; cs.valuecached = 0; cs.ptop = ptop;
526 do { /* collect their values */
527 n += pushcapture(&cs);
528 } while (!isclosecap(cs.cap));
529 }
530 if (n == 0) { /* no capture values? */
531 lua_pushinteger(L, r - s + 1); /* return only end position */
532 n = 1;
533 }
534 return n;
535}
536
537
diff --git a/lpcap.h b/lpcap.h
new file mode 100644
index 0000000..c0a0e38
--- /dev/null
+++ b/lpcap.h
@@ -0,0 +1,43 @@
1/*
2** $Id: lpcap.h,v 1.1 2013/03/21 20:25:12 roberto Exp $
3*/
4
5#if !defined(lpcap_h)
6#define lpcap_h
7
8
9#include "lptypes.h"
10
11
12/* kinds of captures */
13typedef enum CapKind {
14 Cclose, Cposition, Cconst, Cbackref, Carg, Csimple, Ctable, Cfunction,
15 Cquery, Cstring, Cnum, Csubst, Cfold, Cruntime, Cgroup
16} CapKind;
17
18
19typedef struct Capture {
20 const char *s; /* subject position */
21 short idx; /* extra info about capture (group name, arg index, etc.) */
22 byte kind; /* kind of capture */
23 byte siz; /* size of full capture + 1 (0 = not a full capture) */
24} Capture;
25
26
27typedef struct CapState {
28 Capture *cap; /* current capture */
29 Capture *ocap; /* (original) capture list */
30 lua_State *L;
31 int ptop; /* index of last argument to 'match' */
32 const char *s; /* original string */
33 int valuecached; /* value stored in cache slot */
34} CapState;
35
36
37int runtimecap (CapState *cs, Capture *close, const char *s, int *rem);
38int getcaptures (lua_State *L, const char *s, const char *r, int ptop);
39int finddyncap (Capture *cap, Capture *last);
40
41#endif
42
43
diff --git a/lpcode.c b/lpcode.c
new file mode 100644
index 0000000..4a49e7d
--- /dev/null
+++ b/lpcode.c
@@ -0,0 +1,1016 @@
1/*
2** $Id: lpcode.c,v 1.18 2013/04/12 16:30:33 roberto Exp $
3** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license)
4*/
5
6#include <limits.h>
7
8
9#include "lua.h"
10#include "lauxlib.h"
11
12#include "lptypes.h"
13#include "lpcode.h"
14
15
16/* signals a "no-instruction */
17#define NOINST -1
18
19
20
21static const Charset fullset_ =
22 {{0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
23 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
24 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
25 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF}};
26
27static const Charset *fullset = &fullset_;
28
29/*
30** {======================================================
31** Analysis and some optimizations
32** =======================================================
33*/
34
35/*
36** Check whether a charset is empty (IFail), singleton (IChar),
37** full (IAny), or none of those (ISet).
38*/
39static Opcode charsettype (const byte *cs, int *c) {
40 int count = 0;
41 int i;
42 int candidate = -1; /* candidate position for a char */
43 for (i = 0; i < CHARSETSIZE; i++) {
44 int b = cs[i];
45 if (b == 0) {
46 if (count > 1) return ISet; /* else set is still empty */
47 }
48 else if (b == 0xFF) {
49 if (count < (i * BITSPERCHAR))
50 return ISet;
51 else count += BITSPERCHAR; /* set is still full */
52 }
53 else if ((b & (b - 1)) == 0) { /* byte has only one bit? */
54 if (count > 0)
55 return ISet; /* set is neither full nor empty */
56 else { /* set has only one char till now; track it */
57 count++;
58 candidate = i;
59 }
60 }
61 else return ISet; /* byte is neither empty, full, nor singleton */
62 }
63 switch (count) {
64 case 0: return IFail; /* empty set */
65 case 1: { /* singleton; find character bit inside byte */
66 int b = cs[candidate];
67 *c = candidate * BITSPERCHAR;
68 if ((b & 0xF0) != 0) { *c += 4; b >>= 4; }
69 if ((b & 0x0C) != 0) { *c += 2; b >>= 2; }
70 if ((b & 0x02) != 0) { *c += 1; }
71 return IChar;
72 }
73 default: {
74 assert(count == CHARSETSIZE * BITSPERCHAR); /* full set */
75 return IAny;
76 }
77 }
78}
79
80/*
81** A few basic operations on Charsets
82*/
83static void cs_complement (Charset *cs) {
84 loopset(i, cs->cs[i] = ~cs->cs[i]);
85}
86
87
88static int cs_equal (const byte *cs1, const byte *cs2) {
89 loopset(i, if (cs1[i] != cs2[i]) return 0);
90 return 1;
91}
92
93
94/*
95** computes whether sets cs1 and cs2 are disjoint
96*/
97static int cs_disjoint (const Charset *cs1, const Charset *cs2) {
98 loopset(i, if ((cs1->cs[i] & cs2->cs[i]) != 0) return 0;)
99 return 1;
100}
101
102
103/*
104** Convert a 'char' pattern (TSet, TChar, TAny) to a charset
105*/
106int tocharset (TTree *tree, Charset *cs) {
107 switch (tree->tag) {
108 case TSet: { /* copy set */
109 loopset(i, cs->cs[i] = treebuffer(tree)[i]);
110 return 1;
111 }
112 case TChar: { /* only one char */
113 assert(0 <= tree->u.n && tree->u.n <= UCHAR_MAX);
114 loopset(i, cs->cs[i] = 0); /* erase all chars */
115 setchar(cs->cs, tree->u.n); /* add that one */
116 return 1;
117 }
118 case TAny: {
119 loopset(i, cs->cs[i] = 0xFF); /* add all to the set */
120 return 1;
121 }
122 default: return 0;
123 }
124}
125
126
127/*
128** Checks whether a pattern has captures
129*/
130int hascaptures (TTree *tree) {
131 tailcall:
132 switch (tree->tag) {
133 case TCapture: case TRunTime:
134 return 1;
135 default: {
136 switch (numsiblings[tree->tag]) {
137 case 1: /* return hascaptures(sib1(tree)); */
138 tree = sib1(tree); goto tailcall;
139 case 2:
140 if (hascaptures(sib1(tree))) return 1;
141 /* else return hascaptures(sib2(tree)); */
142 tree = sib2(tree); goto tailcall;
143 default: assert(numsiblings[tree->tag] == 0); return 0;
144 }
145 }
146 }
147}
148
149
150/*
151** Checks how a pattern behaves regarding the empty string,
152** in one of two different ways:
153** A pattern is *nullable* if it can match without consuming any character;
154** A pattern is *nofail* if it never fails for any string
155** (including the empty string).
156** The difference is only for predicates and run-time captures;
157** for other patterns, the two properties are equivalent.
158** (With predicates, &'a' is nullable but not nofail. Of course,
159** nofail => nullable.)
160** These functions are all convervative in the following way:
161** p is nullable => nullable(p)
162** nofail(p) => p cannot fail
163** The function assumes that TOpenCall is not nullable;
164** this will be checked again when the grammar is fixed.)
165** Run-time captures can do whatever they want, so the result
166** is conservative.
167*/
168int checkaux (TTree *tree, int pred) {
169 tailcall:
170 switch (tree->tag) {
171 case TChar: case TSet: case TAny:
172 case TFalse: case TOpenCall: case TThrow: /* labeled failure */
173 return 0; /* not nullable */
174 case TRep: case TTrue:
175 return 1; /* no fail */
176 case TNot: case TBehind: /* can match empty, but can fail */
177 if (pred == PEnofail) return 0;
178 else return 1; /* PEnullable */
179 case TAnd: /* can match empty; fail iff body does */
180 if (pred == PEnullable) return 1;
181 /* else return checkaux(sib1(tree), pred); */
182 tree = sib1(tree); goto tailcall;
183 case TRunTime: /* can fail; match empty iff body does */
184 if (pred == PEnofail) return 0;
185 /* else return checkaux(sib1(tree), pred); */
186 tree = sib1(tree); goto tailcall;
187 case TSeq:
188 if (!checkaux(sib1(tree), pred)) return 0;
189 /* else return checkaux(sib2(tree), pred); */
190 tree = sib2(tree); goto tailcall;
191 case TChoice: case TLabChoice: /* labeled failure */
192 if (checkaux(sib2(tree), pred)) return 1;
193 /* else return checkaux(sib1(tree), pred); */
194 tree = sib1(tree); goto tailcall;
195 case TCapture: case TGrammar: case TRule:
196 /* return checkaux(sib1(tree), pred); */
197 tree = sib1(tree); goto tailcall;
198 case TCall: /* return checkaux(sib2(tree), pred); */
199 tree = sib2(tree); goto tailcall;
200 default: assert(0); return 0;
201 };
202}
203
204
205/*
206** number of characters to match a pattern (or -1 if variable)
207** ('count' avoids infinite loops for grammars)
208*/
209int fixedlenx (TTree *tree, int count, int len) {
210 tailcall:
211 switch (tree->tag) {
212 case TChar: case TSet: case TAny:
213 return len + 1;
214 case TFalse: case TTrue: case TNot: case TAnd: case TBehind:
215 case TThrow: /* labeled failure */
216 return len;
217 case TRep: case TRunTime: case TOpenCall:
218 return -1;
219 case TCapture: case TRule: case TGrammar:
220 /* return fixedlenx(sib1(tree), count); */
221 tree = sib1(tree); goto tailcall;
222 case TCall:
223 if (count++ >= MAXRULES)
224 return -1; /* may be a loop */
225 /* else return fixedlenx(sib2(tree), count); */
226 tree = sib2(tree); goto tailcall;
227 case TSeq: {
228 len = fixedlenx(sib1(tree), count, len);
229 if (len < 0) return -1;
230 /* else return fixedlenx(sib2(tree), count, len); */
231 tree = sib2(tree); goto tailcall;
232 }
233 case TChoice: case TLabChoice: { /* labeled failure */
234 int n1, n2;
235 n1 = fixedlenx(sib1(tree), count, len);
236 if (n1 < 0) return -1;
237 n2 = fixedlenx(sib2(tree), count, len);
238 if (n1 == n2) return n1;
239 else return -1;
240 }
241 default: assert(0); return 0;
242 };
243}
244
245
246/*
247** Computes the 'first set' of a pattern.
248** The result is a conservative aproximation:
249** match p ax -> x' for some x ==> a in first(p).
250** The set 'follow' is the first set of what follows the
251** pattern (full set if nothing follows it).
252** The function returns 0 when this set can be used for
253** tests that avoid the pattern altogether.
254** A non-zero return can happen for two reasons:
255** 1) match p '' -> '' ==> returns 1.
256** (tests cannot be used because they always fail for an empty input)
257** 2) there is a match-time capture ==> returns 2.
258** (match-time captures should not be avoided by optimizations)
259*/
260static int getfirst (TTree *tree, const Charset *follow, Charset *firstset) {
261 tailcall:
262 switch (tree->tag) {
263 case TChar: case TSet: case TAny: {
264 tocharset(tree, firstset);
265 return 0;
266 }
267 case TTrue: {
268 loopset(i, firstset->cs[i] = follow->cs[i]);
269 return 1;
270 }
271 case TFalse: {
272 loopset(i, firstset->cs[i] = 0);
273 return 0;
274 }
275 case TThrow: { /* (?)labeled failure: should always throw the label */
276 loopset(i, firstset->cs[i] = follow->cs[i]); /* follow = fullset? */
277 return 1;
278 }
279 case TChoice: case TLabChoice: { /*(?) labeled failure */
280 Charset csaux;
281 int e1 = getfirst(sib1(tree), follow, firstset);
282 int e2 = getfirst(sib2(tree), follow, &csaux);
283 loopset(i, firstset->cs[i] |= csaux.cs[i]);
284 return e1 | e2;
285 }
286 case TSeq: {
287 if (!nullable(sib1(tree))) {
288 /* return getfirst(sib1(tree), fullset, firstset); */
289 tree = sib1(tree); follow = fullset; goto tailcall;
290 }
291 else { /* FIRST(p1 p2, fl) = FIRST(p1, FIRST(p2, fl)) */
292 Charset csaux;
293 int e2 = getfirst(sib2(tree), follow, &csaux);
294 int e1 = getfirst(sib1(tree), &csaux, firstset);
295 if (e1 == 0) return 0; /* 'e1' ensures that first can be used */
296 else if ((e1 | e2) & 2) /* one of the children has a matchtime? */
297 return 2; /* pattern has a matchtime capture */
298 else return e2; /* else depends on 'e2' */
299 }
300 }
301 case TRep: {
302 getfirst(sib1(tree), follow, firstset);
303 loopset(i, firstset->cs[i] |= follow->cs[i]);
304 return 1; /* accept the empty string */
305 }
306 case TCapture: case TGrammar: case TRule: {
307 /* return getfirst(sib1(tree), follow, firstset); */
308 tree = sib1(tree); goto tailcall;
309 }
310 case TRunTime: { /* function invalidates any follow info. */
311 int e = getfirst(sib1(tree), fullset, firstset);
312 if (e) return 2; /* function is not "protected"? */
313 else return 0; /* pattern inside capture ensures first can be used */
314 }
315 case TCall: {
316 /* return getfirst(sib2(tree), follow, firstset); */
317 tree = sib2(tree); goto tailcall;
318 }
319 case TAnd: {
320 int e = getfirst(sib1(tree), follow, firstset);
321 loopset(i, firstset->cs[i] &= follow->cs[i]);
322 return e;
323 }
324 case TNot: {
325 if (tocharset(sib1(tree), firstset)) {
326 cs_complement(firstset);
327 return 1;
328 }
329 /* else go through */
330 }
331 case TBehind: { /* instruction gives no new information */
332 /* call 'getfirst' to check for math-time captures */
333 int e = getfirst(sib1(tree), follow, firstset);
334 loopset(i, firstset->cs[i] = follow->cs[i]); /* uses follow */
335 return e | 1; /* always can accept the empty string */
336 }
337 default: assert(0); return 0;
338 }
339}
340
341
342/*
343** If it returns true, then pattern can fail only depending on the next
344** character of the subject
345*/
346static int headfail (TTree *tree) {
347 tailcall:
348 switch (tree->tag) {
349 case TChar: case TSet: case TAny: case TFalse:
350 return 1;
351 case TTrue: case TRep: case TRunTime: case TNot:
352 case TBehind:
353 case TThrow: /* labeled failure: should always throw the label */
354 return 0;
355 case TCapture: case TGrammar: case TRule: case TAnd:
356 tree = sib1(tree); goto tailcall; /* return headfail(sib1(tree)); */
357 case TCall:
358 tree = sib2(tree); goto tailcall; /* return headfail(sib2(tree)); */
359 case TSeq:
360 if (!nofail(sib2(tree))) return 0;
361 /* else return headfail(sib1(tree)); */
362 tree = sib1(tree); goto tailcall;
363 case TChoice: case TLabChoice: /* labeled failure */
364 if (!headfail(sib1(tree))) return 0;
365 /* else return headfail(sib2(tree)); */
366 tree = sib2(tree); goto tailcall;
367 default: assert(0); return 0;
368 }
369}
370
371
372/*
373** Check whether the code generation for the given tree can benefit
374** from a follow set (to avoid computing the follow set when it is
375** not needed)
376*/
377static int needfollow (TTree *tree) {
378 tailcall:
379 switch (tree->tag) {
380 case TChar: case TSet: case TAny:
381 case TFalse: case TTrue: case TAnd: case TNot:
382 case TRunTime: case TGrammar: case TCall: case TBehind:
383 case TThrow: case TLabChoice: /* (?)labeled failure */
384 return 0;
385 case TChoice: case TRep:
386 return 1;
387 case TCapture:
388 tree = sib1(tree); goto tailcall;
389 case TSeq:
390 tree = sib2(tree); goto tailcall;
391 default: assert(0); return 0;
392 }
393}
394
395/* }====================================================== */
396
397
398
399/*
400** {======================================================
401** Code generation
402** =======================================================
403*/
404
405
406/*
407** size of an instruction
408*/
409int sizei (const Instruction *i) {
410 switch((Opcode)i->i.code) {
411 case ISet: case ISpan: return CHARSETINSTSIZE;
412 case ITestSet: return CHARSETINSTSIZE + 1;
413 case ITestChar: case ITestAny: case IChoice: case IJmp:
414 case ICall: case IOpenCall: case ICommit: case IPartialCommit:
415 case IBackCommit: case IThrow: return 2; /* labeled failure */
416 case ILabChoice: return 3; /* labeled failure */
417 default: return 1;
418 }
419}
420
421
422/*
423** state for the compiler
424*/
425typedef struct CompileState {
426 Pattern *p; /* pattern being compiled */
427 int ncode; /* next position in p->code to be filled */
428 lua_State *L;
429} CompileState;
430
431
432/*
433** code generation is recursive; 'opt' indicates that the code is
434** being generated under a 'IChoice' operator jumping to its end.
435** 'tt' points to a previous test protecting this code. 'fl' is
436** the follow set of the pattern.
437*/
438static void codegen (CompileState *compst, TTree *tree, int opt, int tt,
439 const Charset *fl);
440
441
442void reallocprog (lua_State *L, Pattern *p, int nsize) {
443 void *ud;
444 lua_Alloc f = lua_getallocf(L, &ud);
445 void *newblock = f(ud, p->code, p->codesize * sizeof(Instruction),
446 nsize * sizeof(Instruction));
447 if (newblock == NULL && nsize > 0)
448 luaL_error(L, "not enough memory");
449 p->code = (Instruction *)newblock;
450 p->codesize = nsize;
451}
452
453
454static int nextinstruction (CompileState *compst) {
455 int size = compst->p->codesize;
456 if (compst->ncode >= size)
457 reallocprog(compst->L, compst->p, size * 2);
458 return compst->ncode++;
459}
460
461
462#define getinstr(cs,i) ((cs)->p->code[i])
463
464
465static int addinstruction (CompileState *compst, Opcode op, int aux) {
466 int i = nextinstruction(compst);
467 getinstr(compst, i).i.code = op;
468 getinstr(compst, i).i.aux = aux;
469 return i;
470}
471
472
473static int addoffsetinst (CompileState *compst, Opcode op) {
474 int i = addinstruction(compst, op, 0); /* instruction */
475 addinstruction(compst, (Opcode)0, 0); /* open space for offset */
476 assert(op == ITestSet || sizei(&getinstr(compst, i)) == 2);
477 return i;
478}
479
480/* labeled failure begin */
481static int addthrowinstruction (CompileState *compst, Labelset ls) {
482 int i = nextinstruction(compst);
483 getinstr(compst, i).i.code = IThrow;
484 i = nextinstruction(compst);
485 getinstr(compst, i).labels = ls;
486 return i;
487}
488
489static int addoffsetlabinst (CompileState *compst, Opcode op, Labelset ls) {
490 int j;
491 int i = addinstruction(compst, op, 0); /* instruction */
492 addinstruction(compst, (Opcode)0, 0); /* open space for offset */
493 j = nextinstruction(compst); /* open space for labels */
494 getinstr(compst, j).labels = ls;
495 assert(op == ILabChoice);
496 return i;
497}
498/* labeled failure end */
499
500
501static void setoffset (CompileState *compst, int instruction, int offset) {
502 getinstr(compst, instruction + 1).offset = offset;
503}
504
505
506/*
507** Add a capture instruction:
508** 'op' is the capture instruction; 'cap' the capture kind;
509** 'key' the key into ktable; 'aux' is optional offset
510**
511*/
512static int addinstcap (CompileState *compst, Opcode op, int cap, int key,
513 int aux) {
514 int i = addinstruction(compst, op, joinkindoff(cap, aux));
515 getinstr(compst, i).i.key = key;
516 return i;
517}
518
519
520#define gethere(compst) ((compst)->ncode)
521
522#define target(code,i) ((i) + code[i + 1].offset)
523
524
525static void jumptothere (CompileState *compst, int instruction, int target) {
526 if (instruction >= 0)
527 setoffset(compst, instruction, target - instruction);
528}
529
530
531static void jumptohere (CompileState *compst, int instruction) {
532 jumptothere(compst, instruction, gethere(compst));
533}
534
535
536/*
537** Code an IChar instruction, or IAny if there is an equivalent
538** test dominating it
539*/
540static void codechar (CompileState *compst, int c, int tt) {
541 if (tt >= 0 && getinstr(compst, tt).i.code == ITestChar &&
542 getinstr(compst, tt).i.aux == c)
543 addinstruction(compst, IAny, 0);
544 else
545 addinstruction(compst, IChar, c);
546}
547
548
549/*
550** Add a charset posfix to an instruction
551*/
552static void addcharset (CompileState *compst, const byte *cs) {
553 int p = gethere(compst);
554 int i;
555 for (i = 0; i < (int)CHARSETINSTSIZE - 1; i++)
556 nextinstruction(compst); /* space for buffer */
557 /* fill buffer with charset */
558 loopset(j, getinstr(compst, p).buff[j] = cs[j]);
559}
560
561
562/*
563** code a char set, optimizing unit sets for IChar, "complete"
564** sets for IAny, and empty sets for IFail; also use an IAny
565** when instruction is dominated by an equivalent test.
566*/
567static void codecharset (CompileState *compst, const byte *cs, int tt) {
568 int c = 0; /* (=) to avoid warnings */
569 Opcode op = charsettype(cs, &c);
570 switch (op) {
571 case IChar: codechar(compst, c, tt); break;
572 case ISet: { /* non-trivial set? */
573 if (tt >= 0 && getinstr(compst, tt).i.code == ITestSet &&
574 cs_equal(cs, getinstr(compst, tt + 2).buff))
575 addinstruction(compst, IAny, 0);
576 else {
577 addinstruction(compst, ISet, 0);
578 addcharset(compst, cs);
579 }
580 break;
581 }
582 default: addinstruction(compst, op, c); break;
583 }
584}
585
586
587/*
588** code a test set, optimizing unit sets for ITestChar, "complete"
589** sets for ITestAny, and empty sets for IJmp (always fails).
590** 'e' is true iff test should accept the empty string. (Test
591** instructions in the current VM never accept the empty string.)
592*/
593static int codetestset (CompileState *compst, Charset *cs, int e) {
594 if (e) return NOINST; /* no test */
595 else {
596 int c = 0;
597 Opcode op = charsettype(cs->cs, &c);
598 switch (op) {
599 case IFail: return addoffsetinst(compst, IJmp); /* always jump */
600 case IAny: return addoffsetinst(compst, ITestAny);
601 case IChar: {
602 int i = addoffsetinst(compst, ITestChar);
603 getinstr(compst, i).i.aux = c;
604 return i;
605 }
606 case ISet: {
607 int i = addoffsetinst(compst, ITestSet);
608 addcharset(compst, cs->cs);
609 return i;
610 }
611 default: assert(0); return 0;
612 }
613 }
614}
615
616
617/*
618** Find the final destination of a sequence of jumps
619*/
620static int finaltarget (Instruction *code, int i) {
621 while (code[i].i.code == IJmp)
622 i = target(code, i);
623 return i;
624}
625
626
627/*
628** final label (after traversing any jumps)
629*/
630static int finallabel (Instruction *code, int i) {
631 return finaltarget(code, target(code, i));
632}
633
634
635/*
636** <behind(p)> == behind n; <p> (where n = fixedlen(p))
637*/
638static void codebehind (CompileState *compst, TTree *tree) {
639 if (tree->u.n > 0)
640 addinstruction(compst, IBehind, tree->u.n);
641 codegen(compst, sib1(tree), 0, NOINST, fullset);
642}
643
644
645/*
646** Choice; optimizations:
647** - when p1 is headfail
648** - when first(p1) and first(p2) are disjoint; than
649** a character not in first(p1) cannot go to p1, and a character
650** in first(p1) cannot go to p2 (at it is not in first(p2)).
651** (The optimization is not valid if p1 accepts the empty string,
652** as then there is no character at all...)
653** - when p2 is empty and opt is true; a IPartialCommit can resuse
654** the Choice already active in the stack.
655*/
656static void codechoice (CompileState *compst, TTree *p1, TTree *p2, int opt,
657 const Charset *fl) {
658 int emptyp2 = (p2->tag == TTrue);
659 Charset cs1, cs2;
660 int e1 = getfirst(p1, fullset, &cs1);
661 if (headfail(p1) ||
662 (!e1 && (getfirst(p2, fl, &cs2), cs_disjoint(&cs1, &cs2)))) {
663 /*if (0) {*/
664 /* <p1 / p2> == test (fail(p1)) -> L1 ; p1 ; jmp L2; L1: p2; L2: */
665 int test = codetestset(compst, &cs1, 0);
666 int jmp = NOINST;
667 codegen(compst, p1, 0, test, fl);
668 if (!emptyp2)
669 jmp = addoffsetinst(compst, IJmp);
670 jumptohere(compst, test);
671 codegen(compst, p2, opt, NOINST, fl);
672 jumptohere(compst, jmp);
673 }
674 else if (opt && emptyp2) {
675 /* p1? == IPartialCommit; p1 */
676 jumptohere(compst, addoffsetinst(compst, IPartialCommit));
677 codegen(compst, p1, 1, NOINST, fullset);
678 }
679 else {
680 /* <p1 / p2> ==
681 test(fail(p1)) -> L1; choice L1; <p1>; commit L2; L1: <p2>; L2: */
682 int pcommit;
683 int test = codetestset(compst, &cs1, e1);
684 int pchoice = addoffsetinst(compst, IChoice);
685 codegen(compst, p1, emptyp2, test, fullset);
686 pcommit = addoffsetinst(compst, ICommit);
687 jumptohere(compst, pchoice);
688 jumptohere(compst, test);
689 codegen(compst, p2, opt, NOINST, fl);
690 jumptohere(compst, pcommit);
691 }
692}
693
694/* labeled failure begin */
695static void codelabchoice (CompileState *compst, TTree *p1, TTree *p2, int opt,
696 const Charset *fl, Labelset ls) {
697 int emptyp2 = (p2->tag == TTrue);
698 int pcommit;
699 int test = NOINST;
700 int pchoice = addoffsetlabinst(compst, ILabChoice, ls);
701 codegen(compst, p1, emptyp2, test, fullset);
702 pcommit = addoffsetinst(compst, ICommit);
703 jumptohere(compst, pchoice);
704 jumptohere(compst, test);
705 codegen(compst, p2, opt, NOINST, fl);
706 jumptohere(compst, pcommit);
707
708}
709/* labeled failure end */
710
711/*
712** And predicate
713** optimization: fixedlen(p) = n ==> <&p> == <p>; behind n
714** (valid only when 'p' has no captures)
715*/
716static void codeand (CompileState *compst, TTree *tree, int tt) {
717 int n = fixedlen(tree);
718 if (n >= 0 && n <= MAXBEHIND && !hascaptures(tree)) {
719 codegen(compst, tree, 0, tt, fullset);
720 if (n > 0)
721 addinstruction(compst, IBehind, n);
722 }
723 else { /* default: Choice L1; p1; BackCommit L2; L1: Fail; L2: */
724 int pcommit;
725 int pchoice = addoffsetinst(compst, IChoice);
726 codegen(compst, tree, 0, tt, fullset);
727 pcommit = addoffsetinst(compst, IBackCommit);
728 jumptohere(compst, pchoice);
729 addinstruction(compst, IFail, 0);
730 jumptohere(compst, pcommit);
731 }
732}
733
734
735/*
736** Captures: if pattern has fixed (and not too big) length, use
737** a single IFullCapture instruction after the match; otherwise,
738** enclose the pattern with OpenCapture - CloseCapture.
739*/
740static void codecapture (CompileState *compst, TTree *tree, int tt,
741 const Charset *fl) {
742 int len = fixedlen(sib1(tree));
743 if (len >= 0 && len <= MAXOFF && !hascaptures(sib1(tree))) {
744 codegen(compst, sib1(tree), 0, tt, fl);
745 addinstcap(compst, IFullCapture, tree->cap, tree->key, len);
746 }
747 else {
748 addinstcap(compst, IOpenCapture, tree->cap, tree->key, 0);
749 codegen(compst, sib1(tree), 0, tt, fl);
750 addinstcap(compst, ICloseCapture, Cclose, 0, 0);
751 }
752}
753
754
755static void coderuntime (CompileState *compst, TTree *tree, int tt) {
756 addinstcap(compst, IOpenCapture, Cgroup, tree->key, 0);
757 codegen(compst, sib1(tree), 0, tt, fullset);
758 addinstcap(compst, ICloseRunTime, Cclose, 0, 0);
759}
760
761
762/*
763** Repetion; optimizations:
764** When pattern is a charset, can use special instruction ISpan.
765** When pattern is head fail, or if it starts with characters that
766** are disjoint from what follows the repetions, a simple test
767** is enough (a fail inside the repetition would backtrack to fail
768** again in the following pattern, so there is no need for a choice).
769** When 'opt' is true, the repetion can reuse the Choice already
770** active in the stack.
771*/
772static void coderep (CompileState *compst, TTree *tree, int opt,
773 const Charset *fl) {
774 Charset st;
775 if (tocharset(tree, &st)) {
776 addinstruction(compst, ISpan, 0);
777 addcharset(compst, st.cs);
778 }
779 else {
780 int e1 = getfirst(tree, fullset, &st);
781 if (headfail(tree) || (!e1 && cs_disjoint(&st, fl))) {
782 /* L1: test (fail(p1)) -> L2; <p>; jmp L1; L2: */
783 int jmp;
784 int test = codetestset(compst, &st, 0);
785 codegen(compst, tree, opt, test, fullset);
786 jmp = addoffsetinst(compst, IJmp);
787 jumptohere(compst, test);
788 jumptothere(compst, jmp, test);
789 }
790 else {
791 /* test(fail(p1)) -> L2; choice L2; L1: <p>; partialcommit L1; L2: */
792 /* or (if 'opt'): partialcommit L1; L1: <p>; partialcommit L1; */
793 int commit, l2;
794 int test = codetestset(compst, &st, e1);
795 int pchoice = NOINST;
796 if (opt)
797 jumptohere(compst, addoffsetinst(compst, IPartialCommit));
798 else
799 pchoice = addoffsetinst(compst, IChoice);
800 l2 = gethere(compst);
801 codegen(compst, tree, 0, NOINST, fullset);
802 commit = addoffsetinst(compst, IPartialCommit);
803 jumptothere(compst, commit, l2);
804 jumptohere(compst, pchoice);
805 jumptohere(compst, test);
806 }
807 }
808}
809
810
811/*
812** Not predicate; optimizations:
813** In any case, if first test fails, 'not' succeeds, so it can jump to
814** the end. If pattern is headfail, that is all (it cannot fail
815** in other parts); this case includes 'not' of simple sets. Otherwise,
816** use the default code (a choice plus a failtwice).
817*/
818static void codenot (CompileState *compst, TTree *tree) {
819 Charset st;
820 int e = getfirst(tree, fullset, &st);
821 int test = codetestset(compst, &st, e);
822 if (headfail(tree)) /* test (fail(p1)) -> L1; fail; L1: */
823 addinstruction(compst, IFail, 0);
824 else {
825 /* test(fail(p))-> L1; choice L1; <p>; failtwice; L1: */
826 int pchoice = addoffsetinst(compst, IChoice);
827 codegen(compst, tree, 0, NOINST, fullset);
828 addinstruction(compst, IFailTwice, 0);
829 jumptohere(compst, pchoice);
830 }
831 jumptohere(compst, test);
832}
833
834
835/*
836** change open calls to calls, using list 'positions' to find
837** correct offsets; also optimize tail calls
838*/
839static void correctcalls (CompileState *compst, int *positions,
840 int from, int to) {
841 int i;
842 Instruction *code = compst->p->code;
843 for (i = from; i < to; i += sizei(&code[i])) {
844 if (code[i].i.code == IOpenCall) {
845 int n = code[i].i.key; /* rule number */
846 int rule = positions[n]; /* rule position */
847 assert(rule == from || code[rule - 1].i.code == IRet);
848 if (code[finaltarget(code, i + 2)].i.code == IRet) /* call; ret ? */
849 code[i].i.code = IJmp; /* tail call */
850 else
851 code[i].i.code = ICall;
852 jumptothere(compst, i, rule); /* call jumps to respective rule */
853 }
854 }
855 assert(i == to);
856}
857
858
859/*
860** Code for a grammar:
861** call L1; jmp L2; L1: rule 1; ret; rule 2; ret; ...; L2:
862*/
863static void codegrammar (CompileState *compst, TTree *grammar) {
864 int positions[MAXRULES];
865 int rulenumber = 0;
866 TTree *rule;
867 int firstcall = addoffsetinst(compst, ICall); /* call initial rule */
868 int jumptoend = addoffsetinst(compst, IJmp); /* jump to the end */
869 int start = gethere(compst); /* here starts the initial rule */
870 jumptohere(compst, firstcall);
871 for (rule = sib1(grammar); rule->tag == TRule; rule = sib2(rule)) {
872 positions[rulenumber++] = gethere(compst); /* save rule position */
873 codegen(compst, sib1(rule), 0, NOINST, fullset); /* code rule */
874 addinstruction(compst, IRet, 0);
875 }
876 assert(rule->tag == TTrue);
877 jumptohere(compst, jumptoend);
878 correctcalls(compst, positions, start, gethere(compst));
879}
880
881
882static void codecall (CompileState *compst, TTree *call) {
883 int c = addoffsetinst(compst, IOpenCall); /* to be corrected later */
884 getinstr(compst, c).i.key = sib2(call)->cap; /* rule number */
885 assert(sib2(call)->tag == TRule);
886}
887
888
889/*
890** Code first child of a sequence
891** (second child is called in-place to allow tail call)
892** Return 'tt' for second child
893*/
894static int codeseq1 (CompileState *compst, TTree *p1, TTree *p2,
895 int tt, const Charset *fl) {
896 if (needfollow(p1)) {
897 Charset fl1;
898 getfirst(p2, fl, &fl1); /* p1 follow is p2 first */
899 codegen(compst, p1, 0, tt, &fl1);
900 }
901 else /* use 'fullset' as follow */
902 codegen(compst, p1, 0, tt, fullset);
903 if (fixedlen(p1) != 0) /* can 'p1' consume anything? */
904 return NOINST; /* invalidate test */
905 else return tt; /* else 'tt' still protects sib2 */
906}
907
908
909/*
910** Main code-generation function: dispatch to auxiliar functions
911** according to kind of tree
912*/
913static void codegen (CompileState *compst, TTree *tree, int opt, int tt,
914 const Charset *fl) {
915 tailcall:
916 switch (tree->tag) {
917 case TChar: codechar(compst, tree->u.n, tt); break;
918 case TAny: addinstruction(compst, IAny, 0); break;
919 case TSet: codecharset(compst, treebuffer(tree), tt); break;
920 case TTrue: break;
921 case TFalse: addinstruction(compst, IFail, 0); break;
922 case TChoice: codechoice(compst, sib1(tree), sib2(tree), opt, fl); break;
923 case TRep: coderep(compst, sib1(tree), opt, fl); break;
924 case TBehind: codebehind(compst, tree); break;
925 case TNot: codenot(compst, sib1(tree)); break;
926 case TAnd: codeand(compst, sib1(tree), tt); break;
927 case TCapture: codecapture(compst, tree, tt, fl); break;
928 case TRunTime: coderuntime(compst, tree, tt); break;
929 case TGrammar: codegrammar(compst, tree); break;
930 case TCall: codecall(compst, tree); break;
931 case TSeq: {
932 tt = codeseq1(compst, sib1(tree), sib2(tree), tt, fl); /* code 'p1' */
933 /* codegen(compst, p2, opt, tt, fl); */
934 tree = sib2(tree); goto tailcall;
935 }
936 case TThrow: { /* labeled failure */
937 addthrowinstruction(compst, tree->labels);
938 break;
939 }
940 case TLabChoice: { /* labeled failure */
941 codelabchoice(compst, sib1(tree), sib2(tree), opt, fl, tree->labels);
942 break;
943 }
944 default: assert(0);
945 }
946}
947
948
949/*
950** Optimize jumps and other jump-like instructions.
951** * Update labels of instructions with labels to their final
952** destinations (e.g., choice L1; ... L1: jmp L2: becomes
953** choice L2)
954** * Jumps to other instructions that do jumps become those
955** instructions (e.g., jump to return becomes a return; jump
956** to commit becomes a commit)
957*/
958static void peephole (CompileState *compst) {
959 Instruction *code = compst->p->code;
960 int i;
961 for (i = 0; i < compst->ncode; i += sizei(&code[i])) {
962 switch (code[i].i.code) {
963 case IChoice: case ICall: case ICommit: case IPartialCommit:
964 case IBackCommit: case ITestChar: case ITestSet: case ILabChoice: /* labeled failure */
965 case ITestAny: { /* instructions with labels */
966 jumptothere(compst, i, finallabel(code, i)); /* optimize label */
967 break;
968 }
969 case IJmp: {
970 int ft = finaltarget(code, i);
971 switch (code[ft].i.code) { /* jumping to what? */
972 case IRet: case IFail: case IFailTwice:
973 case IEnd: { /* instructions with unconditional implicit jumps */
974 code[i] = code[ft]; /* jump becomes that instruction */
975 code[i + 1].i.code = IAny; /* 'no-op' for target position */
976 break;
977 }
978 case ICommit: case IPartialCommit:
979 case IBackCommit: { /* inst. with unconditional explicit jumps */
980 int fft = finallabel(code, ft);
981 code[i] = code[ft]; /* jump becomes that instruction... */
982 jumptothere(compst, i, fft); /* but must correct its offset */
983 i--; /* reoptimize its label */
984 break;
985 }
986 default: {
987 jumptothere(compst, i, ft); /* optimize label */
988 break;
989 }
990 }
991 break;
992 }
993 default: break;
994 }
995 }
996 assert(code[i - 1].i.code == IEnd);
997}
998
999
1000/*
1001** Compile a pattern
1002*/
1003Instruction *compile (lua_State *L, Pattern *p) {
1004 CompileState compst;
1005 compst.p = p; compst.ncode = 0; compst.L = L;
1006 reallocprog(L, p, 2); /* minimum initial size */
1007 codegen(&compst, p->tree, 0, NOINST, fullset);
1008 addinstruction(&compst, IEnd, 0);
1009 reallocprog(L, p, compst.ncode); /* set final size */
1010 peephole(&compst); /* labeled failure */
1011 return p->code;
1012}
1013
1014
1015/* }====================================================== */
1016
diff --git a/lpcode.h b/lpcode.h
new file mode 100644
index 0000000..5c9d54f
--- /dev/null
+++ b/lpcode.h
@@ -0,0 +1,34 @@
1/*
2** $Id: lpcode.h,v 1.5 2013/04/04 21:24:45 roberto Exp $
3*/
4
5#if !defined(lpcode_h)
6#define lpcode_h
7
8#include "lua.h"
9
10#include "lptypes.h"
11#include "lptree.h"
12#include "lpvm.h"
13
14int tocharset (TTree *tree, Charset *cs);
15int checkaux (TTree *tree, int pred);
16int fixedlenx (TTree *tree, int count, int len);
17int hascaptures (TTree *tree);
18int lp_gc (lua_State *L);
19Instruction *compile (lua_State *L, Pattern *p);
20void reallocprog (lua_State *L, Pattern *p, int nsize);
21int sizei (const Instruction *i);
22
23
24#define PEnullable 0
25#define PEnofail 1
26
27#define nofail(t) checkaux(t, PEnofail)
28#define nullable(t) checkaux(t, PEnullable)
29
30#define fixedlen(t) fixedlenx(t, 0, 0)
31
32
33
34#endif
diff --git a/lpprint.c b/lpprint.c
new file mode 100644
index 0000000..03239fd
--- /dev/null
+++ b/lpprint.c
@@ -0,0 +1,255 @@
1/*
2** $Id: lpprint.c,v 1.7 2013/04/12 16:29:49 roberto Exp $
3** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license)
4*/
5
6#include <ctype.h>
7#include <limits.h>
8#include <stdio.h>
9
10
11#include "lptypes.h"
12#include "lpprint.h"
13#include "lpcode.h"
14
15
16#if defined(LPEG_DEBUG)
17
18/*
19** {======================================================
20** Printing patterns (for debugging)
21** =======================================================
22*/
23
24
25void printcharset (const byte *st) {
26 int i;
27 printf("[");
28 for (i = 0; i <= UCHAR_MAX; i++) {
29 int first = i;
30 while (testchar(st, i) && i <= UCHAR_MAX) i++;
31 if (i - 1 == first) /* unary range? */
32 printf("(%02x)", first);
33 else if (i - 1 > first) /* non-empty range? */
34 printf("(%02x-%02x)", first, i - 1);
35 }
36 printf("]");
37}
38
39
40static void printcapkind (int kind) {
41 const char *const modes[] = {
42 "close", "position", "constant", "backref",
43 "argument", "simple", "table", "function",
44 "query", "string", "num", "substitution", "fold",
45 "runtime", "group"};
46 printf("%s", modes[kind]);
47}
48
49
50static void printjmp (const Instruction *op, const Instruction *p) {
51 printf("-> %d", (int)(p + (p + 1)->offset - op));
52}
53
54
55void printinst (const Instruction *op, const Instruction *p) {
56 const char *const names[] = {
57 "any", "char", "set",
58 "testany", "testchar", "testset",
59 "span", "behind",
60 "ret", "end",
61 "choice", "jmp", "call", "open_call",
62 "commit", "partial_commit", "back_commit", "failtwice", "fail", "giveup",
63 "fullcapture", "opencapture", "closecapture", "closeruntime",
64 "throw", "labeled_choice" /* labeled failure */
65 };
66 printf("%02ld: %s ", (long)(p - op), names[p->i.code]);
67 switch ((Opcode)p->i.code) {
68 case IChar: {
69 printf("'%c'", p->i.aux);
70 break;
71 }
72 case ITestChar: {
73 printf("'%c'", p->i.aux); printjmp(op, p);
74 break;
75 }
76 case IFullCapture: {
77 printcapkind(getkind(p));
78 printf(" (size = %d) (idx = %d)", getoff(p), p->i.key);
79 break;
80 }
81 case IOpenCapture: {
82 printcapkind(getkind(p));
83 printf(" (idx = %d)", p->i.key);
84 break;
85 }
86 case ISet: {
87 printcharset((p+1)->buff);
88 break;
89 }
90 case ITestSet: {
91 printcharset((p+2)->buff); printjmp(op, p);
92 break;
93 }
94 case ISpan: {
95 printcharset((p+1)->buff);
96 break;
97 }
98 case IOpenCall: {
99 printf("-> %d", (p + 1)->offset);
100 break;
101 }
102 case IBehind: {
103 printf("%d", p->i.aux);
104 break;
105 }
106 case IJmp: case ICall: case ICommit: case IChoice:
107 case IPartialCommit: case IBackCommit: case ITestAny: {
108 printjmp(op, p);
109 break;
110 }
111 case IThrow: { /* labeled failure */
112 printf("%d", (p + 1)->labels);
113 break;
114 }
115 case ILabChoice: { /* labeled failure */
116 printjmp(op, p);
117 printf(" %d", (p + 2)->labels);
118 break;
119 }
120
121 default: break;
122 }
123 printf("\n");
124}
125
126
127void printpatt (Instruction *p, int n) {
128 Instruction *op = p;
129 while (p < op + n) {
130 printinst(op, p);
131 p += sizei(p);
132 }
133}
134
135
136#if defined(LPEG_DEBUG)
137static void printcap (Capture *cap) {
138 printcapkind(cap->kind);
139 printf(" (idx: %d - size: %d) -> %p\n", cap->idx, cap->siz, cap->s);
140}
141
142
143void printcaplist (Capture *cap, Capture *limit) {
144 printf(">======\n");
145 for (; cap->s && (limit == NULL || cap < limit); cap++)
146 printcap(cap);
147 printf("=======\n");
148}
149#endif
150
151/* }====================================================== */
152
153
154/*
155** {======================================================
156** Printing trees (for debugging)
157** =======================================================
158*/
159
160static const char *tagnames[] = {
161 "char", "set", "any",
162 "true", "false",
163 "rep",
164 "seq", "choice",
165 "not", "and",
166 "call", "opencall", "rule", "grammar",
167 "behind",
168 "capture", "run-time"
169};
170
171
172void printtree (TTree *tree, int ident) {
173 int i;
174 for (i = 0; i < ident; i++) printf(" ");
175 printf("%s", tagnames[tree->tag]);
176 switch (tree->tag) {
177 case TChar: {
178 int c = tree->u.n;
179 if (isprint(c))
180 printf(" '%c'\n", c);
181 else
182 printf(" (%02X)\n", c);
183 break;
184 }
185 case TSet: {
186 printcharset(treebuffer(tree));
187 printf("\n");
188 break;
189 }
190 case TOpenCall: case TCall: {
191 printf(" key: %d\n", tree->key);
192 break;
193 }
194 case TBehind: {
195 printf(" %d\n", tree->u.n);
196 printtree(sib1(tree), ident + 2);
197 break;
198 }
199 case TCapture: {
200 printf(" cap: %d key: %d n: %d\n", tree->cap, tree->key, tree->u.n);
201 printtree(sib1(tree), ident + 2);
202 break;
203 }
204 case TRule: {
205 printf(" n: %d key: %d\n", tree->cap, tree->key);
206 printtree(sib1(tree), ident + 2);
207 break; /* do not print next rule as a sibling */
208 }
209 case TGrammar: {
210 TTree *rule = sib1(tree);
211 printf(" %d\n", tree->u.n); /* number of rules */
212 for (i = 0; i < tree->u.n; i++) {
213 printtree(rule, ident + 2);
214 rule = sib2(rule);
215 }
216 assert(rule->tag == TTrue); /* sentinel */
217 break;
218 }
219 default: {
220 int sibs = numsiblings[tree->tag];
221 printf("\n");
222 if (sibs >= 1) {
223 printtree(sib1(tree), ident + 2);
224 if (sibs >= 2)
225 printtree(sib2(tree), ident + 2);
226 }
227 break;
228 }
229 }
230}
231
232
233void printktable (lua_State *L, int idx) {
234 int n, i;
235 lua_getfenv(L, idx);
236 if (lua_isnil(L, -1)) /* no ktable? */
237 return;
238 n = lua_objlen(L, -1);
239 printf("[");
240 for (i = 1; i <= n; i++) {
241 printf("%d = ", i);
242 lua_rawgeti(L, -1, i);
243 if (lua_isstring(L, -1))
244 printf("%s ", lua_tostring(L, -1));
245 else
246 printf("%s ", lua_typename(L, lua_type(L, -1)));
247 lua_pop(L, 1);
248 }
249 printf("]\n");
250 /* leave ktable at the stack */
251}
252
253/* }====================================================== */
254
255#endif
diff --git a/lpprint.h b/lpprint.h
new file mode 100644
index 0000000..6cbe47d
--- /dev/null
+++ b/lpprint.h
@@ -0,0 +1,37 @@
1/*
2** $Id: lpprint.h,v 1.1 2013/03/21 20:25:12 roberto Exp $
3*/
4
5
6#if !defined(lpprint_h)
7#define lpprint_h
8
9
10#include "lptree.h"
11#include "lpvm.h"
12
13
14#if defined(LPEG_DEBUG)
15
16void printpatt (Instruction *p, int n);
17void printtree (TTree *tree, int ident);
18void printktable (lua_State *L, int idx);
19void printcharset (const byte *st);
20void printcaplist (Capture *cap, Capture *limit);
21void printinst (const Instruction *op, const Instruction *p);
22
23
24#else
25
26#define printktable(L,idx) \
27 luaL_error(L, "function only implemented in debug mode")
28#define printtree(tree,i) \
29 luaL_error(L, "function only implemented in debug mode")
30#define printpatt(p,n) \
31 luaL_error(L, "function only implemented in debug mode")
32
33#endif
34
35
36#endif
37
diff --git a/lptree.c b/lptree.c
new file mode 100644
index 0000000..5d2933d
--- /dev/null
+++ b/lptree.c
@@ -0,0 +1,1282 @@
1/*
2** $Id: lptree.c,v 1.10 2013/04/12 16:30:33 roberto Exp $
3** Copyright 2013, Lua.org & PUC-Rio (see 'lpeg.html' for license)
4*/
5
6#include <ctype.h>
7#include <limits.h>
8#include <string.h>
9
10
11#include "lua.h"
12#include "lauxlib.h"
13
14#include "lptypes.h"
15#include "lpcap.h"
16#include "lpcode.h"
17#include "lpprint.h"
18#include "lptree.h"
19
20
21/* number of siblings for each tree */
22const byte numsiblings[] = {
23 0, 0, 0, /* char, set, any */
24 0, 0, /* true, false */
25 1, /* rep */
26 2, 2, /* seq, choice */
27 1, 1, /* not, and */
28 0, 0, 2, 1, /* call, opencall, rule, grammar */
29 1, /* behind */
30 1, 1, /* capture, runtime capture */
31 0, 2 /* labeled failure throw, labeled choice */
32};
33
34
35static TTree *newgrammar (lua_State *L, int arg);
36
37
38/*
39** returns a reasonable name for value at index 'idx' on the stack
40*/
41static const char *val2str (lua_State *L, int idx) {
42 const char *k = lua_tostring(L, idx);
43 if (k != NULL)
44 return lua_pushfstring(L, "%s", k);
45 else
46 return lua_pushfstring(L, "(a %s)", luaL_typename(L, idx));
47}
48
49
50/*
51** Fix a TOpenCall into a TCall node, using table 'postable' to
52** translate a key to its rule address in the tree. Raises an
53** error if key does not exist.
54*/
55static void fixonecall (lua_State *L, int postable, TTree *g, TTree *t) {
56 int n;
57 lua_rawgeti(L, -1, t->key); /* get rule's name */
58 lua_gettable(L, postable); /* query name in position table */
59 n = lua_tonumber(L, -1); /* get (absolute) position */
60 lua_pop(L, 1); /* remove position */
61 if (n == 0) { /* no position? */
62 lua_rawgeti(L, -1, t->key); /* get rule's name again */
63 luaL_error(L, "rule '%s' undefined in given grammar", val2str(L, -1));
64 }
65 t->tag = TCall;
66 t->u.ps = n - (t - g); /* position relative to node */
67 assert(sib2(t)->tag == TRule);
68 sib2(t)->key = t->key;
69}
70
71
72/*
73** Transform left associative constructions into right
74** associative ones, for sequence and choice; that is:
75** (t11 + t12) + t2 => t11 + (t12 + t2)
76** (t11 * t12) * t2 => t11 * (t12 * t2)
77** (that is, Op (Op t11 t12) t2 => Op t11 (Op t12 t2))
78*/
79static void correctassociativity (TTree *tree) {
80 TTree *t1 = sib1(tree);
81 assert(tree->tag == TChoice || tree->tag == TSeq);
82 while (t1->tag == tree->tag) {
83 int n1size = tree->u.ps - 1; /* t1 == Op t11 t12 */
84 int n11size = t1->u.ps - 1;
85 int n12size = n1size - n11size - 1;
86 memmove(sib1(tree), sib1(t1), n11size * sizeof(TTree)); /* move t11 */
87 tree->u.ps = n11size + 1;
88 sib2(tree)->tag = tree->tag;
89 sib2(tree)->u.ps = n12size + 1;
90 }
91}
92
93
94/*
95** Make final adjustments in a tree. Fix open calls in tree 't',
96** making them refer to their respective rules or raising appropriate
97** errors (if not inside a grammar). Correct associativity of associative
98** constructions (making them right associative). Assume that tree's
99** ktable is at the top of the stack (for error messages).
100*/
101static void finalfix (lua_State *L, int postable, TTree *g, TTree *t) {
102 tailcall:
103 switch (t->tag) {
104 case TGrammar: /* subgrammars were already fixed */
105 return;
106 case TOpenCall: {
107 if (g != NULL) /* inside a grammar? */
108 fixonecall(L, postable, g, t);
109 else { /* open call outside grammar */
110 lua_rawgeti(L, -1, t->key);
111 luaL_error(L, "rule '%s' used outside a grammar", val2str(L, -1));
112 }
113 break;
114 }
115 case TSeq: case TChoice:
116 correctassociativity(t);
117 break;
118 }
119 switch (numsiblings[t->tag]) {
120 case 1: /* finalfix(L, postable, g, sib1(t)); */
121 t = sib1(t); goto tailcall;
122 case 2:
123 finalfix(L, postable, g, sib1(t));
124 t = sib2(t); goto tailcall; /* finalfix(L, postable, g, sib2(t)); */
125 default: assert(numsiblings[t->tag] == 0); break;
126 }
127}
128
129
130/*
131** {======================================================
132** Tree generation
133** =======================================================
134*/
135
136/*
137** In 5.2, could use 'luaL_testudata'...
138*/
139static int testpattern (lua_State *L, int idx) {
140 if (lua_touserdata(L, idx)) { /* value is a userdata? */
141 if (lua_getmetatable(L, idx)) { /* does it have a metatable? */
142 luaL_getmetatable(L, PATTERN_T);
143 if (lua_rawequal(L, -1, -2)) { /* does it have the correct mt? */
144 lua_pop(L, 2); /* remove both metatables */
145 return 1;
146 }
147 }
148 }
149 return 0;
150}
151
152
153static Pattern *getpattern (lua_State *L, int idx) {
154 return (Pattern *)luaL_checkudata(L, idx, PATTERN_T);
155}
156
157
158static int getsize (lua_State *L, int idx) {
159 return (lua_objlen(L, idx) - sizeof(Pattern)) / sizeof(TTree) + 1;
160}
161
162
163static TTree *gettree (lua_State *L, int idx, int *len) {
164 Pattern *p = getpattern(L, idx);
165 if (len)
166 *len = getsize(L, idx);
167 return p->tree;
168}
169
170
171/*
172** create a pattern
173*/
174static TTree *newtree (lua_State *L, int len) {
175 size_t size = (len - 1) * sizeof(TTree) + sizeof(Pattern);
176 Pattern *p = (Pattern *)lua_newuserdata(L, size);
177 luaL_getmetatable(L, PATTERN_T);
178 lua_setmetatable(L, -2);
179 p->code = NULL; p->codesize = 0;
180 return p->tree;
181}
182
183
184static TTree *newleaf (lua_State *L, int tag) {
185 TTree *tree = newtree(L, 1);
186 tree->tag = tag;
187 return tree;
188}
189
190
191/* labeled failure begin */
192static TTree *newlabelleaf (lua_State *L, Labelset ls) {
193 TTree *tree = newtree(L, 1);
194 tree->tag = TThrow;
195 tree->labels = ls;
196 return tree;
197}
198/* labeled failure end */
199
200
201static TTree *newcharset (lua_State *L) {
202 TTree *tree = newtree(L, bytes2slots(CHARSETSIZE) + 1);
203 tree->tag = TSet;
204 loopset(i, treebuffer(tree)[i] = 0);
205 return tree;
206}
207
208
209/*
210** add to tree a sequence where first sibling is 'sib' (with size
211** 'sibsize'); returns position for second sibling
212*/
213static TTree *seqaux (TTree *tree, TTree *sib, int sibsize) {
214 tree->tag = TSeq; tree->u.ps = sibsize + 1;
215 memcpy(sib1(tree), sib, sibsize * sizeof(TTree));
216 return sib2(tree);
217}
218
219
220/*
221** Add element 'idx' to 'ktable' of pattern at the top of the stack;
222** create new 'ktable' if necessary. Return index of new element.
223*/
224static int addtoktable (lua_State *L, int idx) {
225 if (idx == 0 || lua_isnil(L, idx)) /* no actual value to insert? */
226 return 0;
227 else {
228 int n;
229 lua_getfenv(L, -1); /* get ktable from pattern */
230 n = lua_objlen(L, -1);
231 if (n == 0) { /* is it empty/non-existent? */
232 lua_pop(L, 1); /* remove it */
233 lua_createtable(L, 1, 0); /* create a fresh table */
234 }
235 lua_pushvalue(L, idx); /* element to be added */
236 lua_rawseti(L, -2, n + 1);
237 lua_setfenv(L, -2); /* set it as ktable for pattern */
238 return n + 1;
239 }
240}
241
242
243/*
244** Build a sequence of 'n' nodes, each with tag 'tag' and 'u.n' got
245** from the array 's' (or 0 if array is NULL). (TSeq is binary, so it
246** must build a sequence of sequence of sequence...)
247*/
248static void fillseq (TTree *tree, int tag, int n, const char *s) {
249 int i;
250 for (i = 0; i < n - 1; i++) { /* initial n-1 copies of Seq tag; Seq ... */
251 tree->tag = TSeq; tree->u.ps = 2;
252 sib1(tree)->tag = tag;
253 sib1(tree)->u.n = s ? (byte)s[i] : 0;
254 tree = sib2(tree);
255 }
256 tree->tag = tag; /* last one does not need TSeq */
257 tree->u.n = s ? (byte)s[i] : 0;
258}
259
260
261/*
262** Numbers as patterns:
263** 0 == true (always match); n == TAny repeated 'n' times;
264** -n == not (TAny repeated 'n' times)
265*/
266static TTree *numtree (lua_State *L, int n) {
267 if (n == 0)
268 return newleaf(L, TTrue);
269 else {
270 TTree *tree, *nd;
271 if (n > 0)
272 tree = nd = newtree(L, 2 * n - 1);
273 else { /* negative: code it as !(-n) */
274 n = -n;
275 tree = newtree(L, 2 * n);
276 tree->tag = TNot;
277 nd = sib1(tree);
278 }
279 fillseq(nd, TAny, n, NULL); /* sequence of 'n' any's */
280 return tree;
281 }
282}
283
284
285/*
286** Convert value at index 'idx' to a pattern
287*/
288static TTree *getpatt (lua_State *L, int idx, int *len) {
289 TTree *tree;
290 switch (lua_type(L, idx)) {
291 case LUA_TSTRING: {
292 size_t slen;
293 const char *s = lua_tolstring(L, idx, &slen); /* get string */
294 if (slen == 0) /* empty? */
295 tree = newleaf(L, TTrue); /* always match */
296 else {
297 tree = newtree(L, 2 * (slen - 1) + 1);
298 fillseq(tree, TChar, slen, s); /* sequence of 'slen' chars */
299 }
300 break;
301 }
302 case LUA_TNUMBER: {
303 int n = lua_tointeger(L, idx);
304 tree = numtree(L, n);
305 break;
306 }
307 case LUA_TBOOLEAN: {
308 tree = (lua_toboolean(L, idx) ? newleaf(L, TTrue) : newleaf(L, TFalse));
309 break;
310 }
311 case LUA_TTABLE: {
312 tree = newgrammar(L, idx);
313 break;
314 }
315 case LUA_TFUNCTION: {
316 tree = newtree(L, 2);
317 tree->tag = TRunTime;
318 tree->key = addtoktable(L, idx);
319 sib1(tree)->tag = TTrue;
320 break;
321 }
322 default: {
323 return gettree(L, idx, len);
324 }
325 }
326 lua_replace(L, idx); /* put new tree into 'idx' slot */
327 if (len)
328 *len = getsize(L, idx);
329 return tree;
330}
331
332
333/*
334** Return the number of elements in the ktable of pattern at 'idx'.
335** In Lua 5.2, default "environment" for patterns is nil, not
336** a table. Treat it as an empty table. In Lua 5.1, assumes that
337** the environment has no numeric indices (len == 0)
338*/
339static int ktablelen (lua_State *L, int idx) {
340 if (!lua_istable(L, idx)) return 0;
341 else return lua_objlen(L, idx);
342}
343
344
345/*
346** Concatentate the contents of table 'idx1' into table 'idx2'.
347** (Assume that both indices are negative.)
348** Return the original length of table 'idx2'
349*/
350static int concattable (lua_State *L, int idx1, int idx2) {
351 int i;
352 int n1 = ktablelen(L, idx1);
353 int n2 = ktablelen(L, idx2);
354 if (n1 == 0) return 0; /* nothing to correct */
355 for (i = 1; i <= n1; i++) {
356 lua_rawgeti(L, idx1, i);
357 lua_rawseti(L, idx2 - 1, n2 + i); /* correct 'idx2' */
358 }
359 return n2;
360}
361
362
363/*
364** Make a merge of ktables from p1 and p2 the ktable for the new
365** pattern at the top of the stack.
366*/
367static int joinktables (lua_State *L, int p1, int p2) {
368 int n1, n2;
369 lua_getfenv(L, p1); /* get ktables */
370 lua_getfenv(L, p2);
371 n1 = ktablelen(L, -2);
372 n2 = ktablelen(L, -1);
373 if (n1 == 0 && n2 == 0) { /* are both tables empty? */
374 lua_pop(L, 2); /* nothing to be done; pop tables */
375 return 0; /* nothing to correct */
376 }
377 if (n2 == 0 || lua_equal(L, -2, -1)) { /* second table is empty or equal? */
378 lua_pop(L, 1); /* pop 2nd table */
379 lua_setfenv(L, -2); /* set 1st ktable into new pattern */
380 return 0; /* nothing to correct */
381 }
382 if (n1 == 0) { /* first table is empty? */
383 lua_setfenv(L, -3); /* set 2nd table into new pattern */
384 lua_pop(L, 1); /* pop 1st table */
385 return 0; /* nothing to correct */
386 }
387 else {
388 lua_createtable(L, n1 + n2, 0); /* create ktable for new pattern */
389 /* stack: new p; ktable p1; ktable p2; new ktable */
390 concattable(L, -3, -1); /* from p1 into new ktable */
391 concattable(L, -2, -1); /* from p2 into new ktable */
392 lua_setfenv(L, -4); /* new ktable becomes p env */
393 lua_pop(L, 2); /* pop other ktables */
394 return n1; /* correction for indices from p2 */
395 }
396}
397
398
399static void correctkeys (TTree *tree, int n) {
400 if (n == 0) return; /* no correction? */
401 tailcall:
402 switch (tree->tag) {
403 case TOpenCall: case TCall: case TRunTime: case TRule: {
404 if (tree->key > 0)
405 tree->key += n;
406 break;
407 }
408 case TCapture: {
409 if (tree->key > 0 && tree->cap != Carg && tree->cap != Cnum)
410 tree->key += n;
411 break;
412 }
413 default: break;
414 }
415 switch (numsiblings[tree->tag]) {
416 case 1: /* correctkeys(sib1(tree), n); */
417 tree = sib1(tree); goto tailcall;
418 case 2:
419 correctkeys(sib1(tree), n);
420 tree = sib2(tree); goto tailcall; /* correctkeys(sib2(tree), n); */
421 default: assert(numsiblings[tree->tag] == 0); break;
422 }
423}
424
425
426/*
427** copy 'ktable' of element 'idx' to new tree (on top of stack)
428*/
429static void copyktable (lua_State *L, int idx) {
430 lua_getfenv(L, idx);
431 lua_setfenv(L, -2);
432}
433
434
435/*
436** merge 'ktable' from rule at stack index 'idx' into 'ktable'
437** from tree at the top of the stack, and correct corresponding
438** tree.
439*/
440static void mergektable (lua_State *L, int idx, TTree *rule) {
441 int n;
442 lua_getfenv(L, -1); /* get ktables */
443 lua_getfenv(L, idx);
444 n = concattable(L, -1, -2);
445 lua_pop(L, 2); /* remove both ktables */
446 correctkeys(rule, n);
447}
448
449
450/*
451** create a new tree, whith a new root and one sibling.
452** Sibling must be on the Lua stack, at index 1.
453*/
454static TTree *newroot1sib (lua_State *L, int tag) {
455 int s1;
456 TTree *tree1 = getpatt(L, 1, &s1);
457 TTree *tree = newtree(L, 1 + s1); /* create new tree */
458 tree->tag = tag;
459 memcpy(sib1(tree), tree1, s1 * sizeof(TTree));
460 copyktable(L, 1);
461 return tree;
462}
463
464
465/*
466** create a new tree, whith a new root and 2 siblings.
467** Siblings must be on the Lua stack, first one at index 1.
468*/
469static TTree *newroot2sib (lua_State *L, int tag) {
470 int s1, s2;
471 TTree *tree1 = getpatt(L, 1, &s1);
472 TTree *tree2 = getpatt(L, 2, &s2);
473 TTree *tree = newtree(L, 1 + s1 + s2); /* create new tree */
474 tree->tag = tag;
475 tree->u.ps = 1 + s1;
476 memcpy(sib1(tree), tree1, s1 * sizeof(TTree));
477 memcpy(sib2(tree), tree2, s2 * sizeof(TTree));
478 correctkeys(sib2(tree), joinktables(L, 1, 2));
479 return tree;
480}
481
482
483static int lp_P (lua_State *L) {
484 luaL_checkany(L, 1);
485 getpatt(L, 1, NULL);
486 lua_settop(L, 1);
487 return 1;
488}
489
490
491/*
492** sequence operator; optimizations:
493** false x => false, x true => x, true x => x
494** (cannot do x . false => false because x may have runtime captures)
495*/
496static int lp_seq (lua_State *L) {
497 TTree *tree1 = getpatt(L, 1, NULL);
498 TTree *tree2 = getpatt(L, 2, NULL);
499 if (tree1->tag == TFalse || tree2->tag == TTrue)
500 lua_pushvalue(L, 1); /* false . x == false, x . true = x */
501 else if (tree1->tag == TTrue)
502 lua_pushvalue(L, 2); /* true . x = x */
503 else
504 newroot2sib(L, TSeq);
505 return 1;
506}
507
508
509/*
510** choice operator; optimizations:
511** charset / charset => charset
512** true / x => true, x / false => x, false / x => x
513** (x / true is not equivalent to true)
514*/
515static int lp_choice (lua_State *L) {
516 Charset st1, st2;
517 TTree *t1 = getpatt(L, 1, NULL);
518 TTree *t2 = getpatt(L, 2, NULL);
519 if (tocharset(t1, &st1) && tocharset(t2, &st2)) {
520 TTree *t = newcharset(L);
521 loopset(i, treebuffer(t)[i] = st1.cs[i] | st2.cs[i]);
522 }
523 else if (nofail(t1) || t2->tag == TFalse)
524 lua_pushvalue(L, 1); /* true / x => true, x / false => x */
525 else if (t1->tag == TFalse)
526 lua_pushvalue(L, 2); /* false / x => x */
527 else
528 newroot2sib(L, TChoice);
529 return 1;
530}
531
532
533/*
534** p^n
535*/
536static int lp_star (lua_State *L) {
537 int size1;
538 int n = luaL_checkint(L, 2);
539 TTree *tree1 = gettree(L, 1, &size1);
540 if (n >= 0) { /* seq tree1 (seq tree1 ... (seq tree1 (rep tree1))) */
541 TTree *tree = newtree(L, (n + 1) * (size1 + 1));
542 if (nullable(tree1))
543 luaL_error(L, "loop body may accept empty string");
544 while (n--) /* repeat 'n' times */
545 tree = seqaux(tree, tree1, size1);
546 tree->tag = TRep;
547 memcpy(sib1(tree), tree1, size1 * sizeof(TTree));
548 }
549 else { /* choice (seq tree1 ... choice tree1 true ...) true */
550 TTree *tree;
551 n = -n;
552 /* size = (choice + seq + tree1 + true) * n, but the last has no seq */
553 tree = newtree(L, n * (size1 + 3) - 1);
554 for (; n > 1; n--) { /* repeat (n - 1) times */
555 tree->tag = TChoice; tree->u.ps = n * (size1 + 3) - 2;
556 sib2(tree)->tag = TTrue;
557 tree = sib1(tree);
558 tree = seqaux(tree, tree1, size1);
559 }
560 tree->tag = TChoice; tree->u.ps = size1 + 1;
561 sib2(tree)->tag = TTrue;
562 memcpy(sib1(tree), tree1, size1 * sizeof(TTree));
563 }
564 copyktable(L, 1);
565 return 1;
566}
567
568
569/*
570** #p == &p
571*/
572static int lp_and (lua_State *L) {
573 newroot1sib(L, TAnd);
574 return 1;
575}
576
577
578/*
579** -p == !p
580*/
581static int lp_not (lua_State *L) {
582 newroot1sib(L, TNot);
583 return 1;
584}
585
586
587/*
588** [t1 - t2] == Seq (Not t2) t1
589** If t1 and t2 are charsets, make their difference.
590*/
591static int lp_sub (lua_State *L) {
592 Charset st1, st2;
593 int s1, s2;
594 TTree *t1 = getpatt(L, 1, &s1);
595 TTree *t2 = getpatt(L, 2, &s2);
596 if (tocharset(t1, &st1) && tocharset(t2, &st2)) {
597 TTree *t = newcharset(L);
598 loopset(i, treebuffer(t)[i] = st1.cs[i] & ~st2.cs[i]);
599 }
600 else {
601 TTree *tree = newtree(L, 2 + s1 + s2);
602 tree->tag = TSeq; /* sequence of... */
603 tree->u.ps = 2 + s2;
604 sib1(tree)->tag = TNot; /* ...not... */
605 memcpy(sib1(sib1(tree)), t2, s2 * sizeof(TTree)); /* ...t2 */
606 memcpy(sib2(tree), t1, s1 * sizeof(TTree)); /* ... and t1 */
607 correctkeys(sib1(tree), joinktables(L, 1, 2));
608 }
609 return 1;
610}
611
612
613static int lp_set (lua_State *L) {
614 size_t l;
615 const char *s = luaL_checklstring(L, 1, &l);
616 TTree *tree = newcharset(L);
617 while (l--) {
618 setchar(treebuffer(tree), (byte)(*s));
619 s++;
620 }
621 return 1;
622}
623
624
625static int lp_range (lua_State *L) {
626 int arg;
627 int top = lua_gettop(L);
628 TTree *tree = newcharset(L);
629 for (arg = 1; arg <= top; arg++) {
630 int c;
631 size_t l;
632 const char *r = luaL_checklstring(L, arg, &l);
633 luaL_argcheck(L, l == 2, arg, "range must have two characters");
634 for (c = (byte)r[0]; c <= (byte)r[1]; c++)
635 setchar(treebuffer(tree), c);
636 }
637 return 1;
638}
639
640
641/*
642** Look-behind predicate
643*/
644static int lp_behind (lua_State *L) {
645 TTree *tree;
646 TTree *tree1 = getpatt(L, 1, NULL);
647 int n = fixedlen(tree1);
648 luaL_argcheck(L, !hascaptures(tree1), 1, "pattern have captures");
649 luaL_argcheck(L, n > 0, 1, "pattern may not have fixed length");
650 luaL_argcheck(L, n <= MAXBEHIND, 1, "pattern too long to look behind");
651 tree = newroot1sib(L, TBehind);
652 tree->u.n = n;
653 return 1;
654}
655
656
657/* labeled failure begin */
658/*
659** Throws a label or a set of labels
660*/
661static int lp_throw (lua_State *L) {
662 int n = lua_gettop(L);
663 Labelset ls = 0;
664 int i;
665 for (i = 1; i <= n; i++) {
666 int d = luaL_checkint(L, i);
667 luaL_argcheck(L, d >= 0 && d < (int)MAXLABELS, i, "invalid label index");
668 setlabel(ls, d);
669 }
670 newlabelleaf(L, ls);
671 return 1;
672}
673
674/*
675** labeled choice function
676*/
677static int lp_labchoice (lua_State *L) {
678 TTree *tree;
679 int n = lua_gettop(L);
680 int i;
681 Labelset ls = 0;
682 for (i = 3; i <= n; i++) {
683 int d = luaL_checkint(L, i);
684 luaL_argcheck(L, d >= 0 && d < (int)MAXLABELS, i, "invalid label index");
685 setlabel(ls, d);
686 }
687 tree = newroot2sib(L, TLabChoice);
688 tree->labels = ls;
689 return 1;
690}
691/* labeled failure end */
692
693
694/*
695** Create a non-terminal
696*/
697static int lp_V (lua_State *L) {
698 TTree *tree = newleaf(L, TOpenCall);
699 luaL_argcheck(L, !lua_isnoneornil(L, 1), 1, "non-nil value expected");
700 tree->key = addtoktable(L, 1);
701 return 1;
702}
703
704
705/*
706** Create a tree for a non-empty capture, with a body and
707** optionally with an associated Lua value (at index 'labelidx' in the
708** stack)
709*/
710static int capture_aux (lua_State *L, int cap, int labelidx) {
711 TTree *tree = newroot1sib(L, TCapture);
712 tree->cap = cap;
713 tree->key = addtoktable(L, labelidx);
714 return 1;
715}
716
717
718/*
719** Fill a tree with an empty capture, using an empty (TTrue) sibling.
720*/
721static TTree *auxemptycap (lua_State *L, TTree *tree, int cap, int idx) {
722 tree->tag = TCapture;
723 tree->cap = cap;
724 tree->key = addtoktable(L, idx);
725 sib1(tree)->tag = TTrue;
726 return tree;
727}
728
729
730/*
731** Create a tree for an empty capture
732*/
733static TTree *newemptycap (lua_State *L, int cap, int idx) {
734 return auxemptycap(L, newtree(L, 2), cap, idx);
735}
736
737
738/*
739** Captures with syntax p / v
740** (function capture, query capture, string capture, or number capture)
741*/
742static int lp_divcapture (lua_State *L) {
743 switch (lua_type(L, 2)) {
744 case LUA_TFUNCTION: return capture_aux(L, Cfunction, 2);
745 case LUA_TTABLE: return capture_aux(L, Cquery, 2);
746 case LUA_TSTRING: return capture_aux(L, Cstring, 2);
747 case LUA_TNUMBER: {
748 int n = lua_tointeger(L, 2);
749 TTree *tree = newroot1sib(L, TCapture);
750 luaL_argcheck(L, 0 <= n && n <= SHRT_MAX, 1, "invalid number");
751 tree->cap = Cnum;
752 tree->key = n;
753 return 1;
754 }
755 default: return luaL_argerror(L, 2, "invalid replacement value");
756 }
757}
758
759
760static int lp_substcapture (lua_State *L) {
761 return capture_aux(L, Csubst, 0);
762}
763
764
765static int lp_tablecapture (lua_State *L) {
766 return capture_aux(L, Ctable, 0);
767}
768
769
770static int lp_groupcapture (lua_State *L) {
771 if (lua_isnoneornil(L, 2))
772 return capture_aux(L, Cgroup, 0);
773 else {
774 luaL_checkstring(L, 2);
775 return capture_aux(L, Cgroup, 2);
776 }
777}
778
779
780static int lp_foldcapture (lua_State *L) {
781 luaL_checktype(L, 2, LUA_TFUNCTION);
782 return capture_aux(L, Cfold, 2);
783}
784
785
786static int lp_simplecapture (lua_State *L) {
787 return capture_aux(L, Csimple, 0);
788}
789
790
791static int lp_poscapture (lua_State *L) {
792 newemptycap(L, Cposition, 0);
793 return 1;
794}
795
796
797static int lp_argcapture (lua_State *L) {
798 int n = luaL_checkint(L, 1);
799 TTree *tree = newemptycap(L, Carg, 0);
800 tree->key = n;
801 luaL_argcheck(L, 0 < n && n <= SHRT_MAX, 1, "invalid argument index");
802 return 1;
803}
804
805
806static int lp_backref (lua_State *L) {
807 luaL_checkstring(L, 1);
808 newemptycap(L, Cbackref, 1);
809 return 1;
810}
811
812
813/*
814** Constant capture
815*/
816static int lp_constcapture (lua_State *L) {
817 int i;
818 int n = lua_gettop(L); /* number of values */
819 if (n == 0) /* no values? */
820 newleaf(L, TTrue); /* no capture */
821 else if (n == 1)
822 newemptycap(L, Cconst, 1); /* single constant capture */
823 else { /* create a group capture with all values */
824 TTree *tree = newtree(L, 1 + 3 * (n - 1) + 2);
825 tree->tag = TCapture;
826 tree->cap = Cgroup;
827 tree->key = 0;
828 tree = sib1(tree);
829 for (i = 1; i <= n - 1; i++) {
830 tree->tag = TSeq;
831 tree->u.ps = 3; /* skip TCapture and its sibling */
832 auxemptycap(L, sib1(tree), Cconst, i);
833 tree = sib2(tree);
834 }
835 auxemptycap(L, tree, Cconst, i);
836 }
837 return 1;
838}
839
840
841static int lp_matchtime (lua_State *L) {
842 TTree *tree;
843 luaL_checktype(L, 2, LUA_TFUNCTION);
844 tree = newroot1sib(L, TRunTime);
845 tree->key = addtoktable(L, 2);
846 return 1;
847}
848
849/* }====================================================== */
850
851
852/*
853** {======================================================
854** Grammar - Tree generation
855** =======================================================
856*/
857
858/*
859** push on the stack the index and the pattern for the
860** initial rule of grammar at index 'arg' in the stack;
861** also add that index into position table.
862*/
863static void getfirstrule (lua_State *L, int arg, int postab) {
864 lua_rawgeti(L, arg, 1); /* access first element */
865 if (lua_isstring(L, -1)) { /* is it the name of initial rule? */
866 lua_pushvalue(L, -1); /* duplicate it to use as key */
867 lua_gettable(L, arg); /* get associated rule */
868 }
869 else {
870 lua_pushinteger(L, 1); /* key for initial rule */
871 lua_insert(L, -2); /* put it before rule */
872 }
873 if (!testpattern(L, -1)) { /* initial rule not a pattern? */
874 if (lua_isnil(L, -1))
875 luaL_error(L, "grammar has no initial rule");
876 else
877 luaL_error(L, "initial rule '%s' is not a pattern", lua_tostring(L, -2));
878 }
879 lua_pushvalue(L, -2); /* push key */
880 lua_pushinteger(L, 1); /* push rule position (after TGrammar) */
881 lua_settable(L, postab); /* insert pair at position table */
882}
883
884/*
885** traverse grammar at index 'arg', pushing all its keys and patterns
886** into the stack. Create a new table (before all pairs key-pattern) to
887** collect all keys and their associated positions in the final tree
888** (the "position table").
889** Return the number of rules and (in 'totalsize') the total size
890** for the new tree.
891*/
892static int collectrules (lua_State *L, int arg, int *totalsize) {
893 int n = 1; /* to count number of rules */
894 int postab = lua_gettop(L) + 1; /* index of position table */
895 int size; /* accumulator for total size */
896 lua_newtable(L); /* create position table */
897 getfirstrule(L, arg, postab);
898 size = 2 + getsize(L, postab + 2); /* TGrammar + TRule + rule */
899 lua_pushnil(L); /* prepare to traverse grammar table */
900 while (lua_next(L, arg) != 0) {
901 if (lua_tonumber(L, -2) == 1 ||
902 lua_equal(L, -2, postab + 1)) { /* initial rule? */
903 lua_pop(L, 1); /* remove value (keep key for lua_next) */
904 continue;
905 }
906 if (!testpattern(L, -1)) /* value is not a pattern? */
907 luaL_error(L, "rule '%s' is not a pattern", val2str(L, -2));
908 luaL_checkstack(L, LUA_MINSTACK, "grammar has too many rules");
909 lua_pushvalue(L, -2); /* push key (to insert into position table) */
910 lua_pushinteger(L, size);
911 lua_settable(L, postab);
912 size += 1 + getsize(L, -1); /* update size */
913 lua_pushvalue(L, -2); /* push key (for next lua_next) */
914 n++;
915 }
916 *totalsize = size + 1; /* TTrue to finish list of rules */
917 return n;
918}
919
920
921static void buildgrammar (lua_State *L, TTree *grammar, int frule, int n) {
922 int i;
923 TTree *nd = sib1(grammar); /* auxiliary pointer to traverse the tree */
924 for (i = 0; i < n; i++) { /* add each rule into new tree */
925 int ridx = frule + 2*i + 1; /* index of i-th rule */
926 int rulesize;
927 TTree *rn = gettree(L, ridx, &rulesize);
928 nd->tag = TRule;
929 nd->key = 0;
930 nd->cap = i; /* rule number */
931 nd->u.ps = rulesize + 1; /* point to next rule */
932 memcpy(sib1(nd), rn, rulesize * sizeof(TTree)); /* copy rule */
933 mergektable(L, ridx, sib1(nd)); /* merge its ktable into new one */
934 nd = sib2(nd); /* move to next rule */
935 }
936 nd->tag = TTrue; /* finish list of rules */
937}
938
939
940/*
941** Check whether a tree has potential infinite loops
942*/
943static int checkloops (TTree *tree) {
944 tailcall:
945 if (tree->tag == TRep && nullable(sib1(tree)))
946 return 1;
947 else if (tree->tag == TGrammar)
948 return 0; /* sub-grammars already checked */
949 else {
950 switch (numsiblings[tree->tag]) {
951 case 1: /* return checkloops(sib1(tree)); */
952 tree = sib1(tree); goto tailcall;
953 case 2:
954 if (checkloops(sib1(tree))) return 1;
955 /* else return checkloops(sib2(tree)); */
956 tree = sib2(tree); goto tailcall;
957 default: assert(numsiblings[tree->tag] == 0); return 0;
958 }
959 }
960}
961
962
963static int verifyerror (lua_State *L, int *passed, int npassed) {
964 int i, j;
965 for (i = npassed - 1; i >= 0; i--) { /* search for a repetition */
966 for (j = i - 1; j >= 0; j--) {
967 if (passed[i] == passed[j]) {
968 lua_rawgeti(L, -1, passed[i]); /* get rule's key */
969 return luaL_error(L, "rule '%s' may be left recursive", val2str(L, -1));
970 }
971 }
972 }
973 return luaL_error(L, "too many left calls in grammar");
974}
975
976
977/*
978** Check whether a rule can be left recursive; raise an error in that
979** case; otherwise return 1 iff pattern is nullable. Assume ktable at
980** the top of the stack.
981*/
982static int verifyrule (lua_State *L, TTree *tree, int *passed, int npassed,
983 int nullable) {
984 tailcall:
985 switch (tree->tag) {
986 case TChar: case TSet: case TAny:
987 case TFalse: case TThrow: /* labeled failure */
988 return nullable; /* cannot pass from here */
989 case TTrue:
990 case TBehind: /* look-behind cannot have calls */
991 return 1;
992 case TNot: case TAnd: case TRep:
993 /* return verifyrule(L, sib1(tree), passed, npassed, 1); */
994 tree = sib1(tree); nullable = 1; goto tailcall;
995 case TCapture: case TRunTime:
996 /* return verifyrule(L, sib1(tree), passed, npassed); */
997 tree = sib1(tree); goto tailcall;
998 case TCall:
999 /* return verifyrule(L, sib2(tree), passed, npassed); */
1000 tree = sib2(tree); goto tailcall;
1001 case TSeq: /* only check 2nd child if first is nullable */
1002 if (!verifyrule(L, sib1(tree), passed, npassed, 0))
1003 return nullable;
1004 /* else return verifyrule(L, sib2(tree), passed, npassed); */
1005 tree = sib2(tree); goto tailcall;
1006 case TChoice: case TLabChoice: /* must check both children */ /* labeled failure */
1007 nullable = verifyrule(L, sib1(tree), passed, npassed, nullable);
1008 /* return verifyrule(L, sib2(tree), passed, npassed, nullable); */
1009 tree = sib2(tree); goto tailcall;
1010 case TRule:
1011 if (npassed >= MAXRULES)
1012 return verifyerror(L, passed, npassed);
1013 else {
1014 passed[npassed++] = tree->key;
1015 /* return verifyrule(L, sib1(tree), passed, npassed); */
1016 tree = sib1(tree); goto tailcall;
1017 }
1018 case TGrammar:
1019 return nullable(tree); /* sub-grammar cannot be left recursive */
1020 default: assert(0); return 0;
1021 }
1022}
1023
1024
1025static void verifygrammar (lua_State *L, TTree *grammar) {
1026 int passed[MAXRULES];
1027 TTree *rule;
1028 /* check left-recursive rules */
1029 for (rule = sib1(grammar); rule->tag == TRule; rule = sib2(rule)) {
1030 if (rule->key == 0) continue; /* unused rule */
1031 verifyrule(L, sib1(rule), passed, 0, 0);
1032 }
1033 assert(rule->tag == TTrue);
1034 /* check infinite loops inside rules */
1035 for (rule = sib1(grammar); rule->tag == TRule; rule = sib2(rule)) {
1036 if (rule->key == 0) continue; /* unused rule */
1037 if (checkloops(sib1(rule))) {
1038 lua_rawgeti(L, -1, rule->key); /* get rule's key */
1039 luaL_error(L, "empty loop in rule '%s'", val2str(L, -1));
1040 }
1041 }
1042 assert(rule->tag == TTrue);
1043}
1044
1045
1046/*
1047** Give a name for the initial rule if it is not referenced
1048*/
1049static void initialrulename (lua_State *L, TTree *grammar, int frule) {
1050 if (sib1(grammar)->key == 0) { /* initial rule is not referenced? */
1051 int n = lua_objlen(L, -1) + 1; /* index for name */
1052 lua_pushvalue(L, frule); /* rule's name */
1053 lua_rawseti(L, -2, n); /* ktable was on the top of the stack */
1054 sib1(grammar)->key = n;
1055 }
1056}
1057
1058
1059static TTree *newgrammar (lua_State *L, int arg) {
1060 int treesize;
1061 int frule = lua_gettop(L) + 2; /* position of first rule's key */
1062 int n = collectrules(L, arg, &treesize);
1063 TTree *g = newtree(L, treesize);
1064 luaL_argcheck(L, n <= MAXRULES, arg, "grammar has too many rules");
1065 g->tag = TGrammar; g->u.n = n;
1066 lua_newtable(L); /* create 'ktable' */
1067 lua_setfenv(L, -2);
1068 buildgrammar(L, g, frule, n);
1069 lua_getfenv(L, -1); /* get 'ktable' for new tree */
1070 finalfix(L, frule - 1, g, sib1(g));
1071 initialrulename(L, g, frule);
1072 verifygrammar(L, g);
1073 lua_pop(L, 1); /* remove 'ktable' */
1074 lua_insert(L, -(n * 2 + 2)); /* move new table to proper position */
1075 lua_pop(L, n * 2 + 1); /* remove position table + rule pairs */
1076 return g; /* new table at the top of the stack */
1077}
1078
1079/* }====================================================== */
1080
1081
1082static Instruction *prepcompile (lua_State *L, Pattern *p, int idx) {
1083 lua_getfenv(L, idx); /* push 'ktable' (may be used by 'finalfix') */
1084 finalfix(L, 0, NULL, p->tree);
1085 lua_pop(L, 1); /* remove 'ktable' */
1086 return compile(L, p);
1087}
1088
1089
1090static int lp_printtree (lua_State *L) {
1091 TTree *tree = getpatt(L, 1, NULL);
1092 int c = lua_toboolean(L, 2);
1093 if (c) {
1094 lua_getfenv(L, 1); /* push 'ktable' (may be used by 'finalfix') */
1095 finalfix(L, 0, NULL, tree);
1096 lua_pop(L, 1); /* remove 'ktable' */
1097 }
1098 printktable(L, 1);
1099 printtree(tree, 0);
1100 return 0;
1101}
1102
1103
1104static int lp_printcode (lua_State *L) {
1105 Pattern *p = getpattern(L, 1);
1106 printktable(L, 1);
1107 if (p->code == NULL) /* not compiled yet? */
1108 prepcompile(L, p, 1);
1109 printpatt(p->code, p->codesize);
1110 return 0;
1111}
1112
1113
1114/*
1115** Get the initial position for the match, interpreting negative
1116** values from the end of the subject
1117*/
1118static size_t initposition (lua_State *L, size_t len) {
1119 lua_Integer ii = luaL_optinteger(L, 3, 1);
1120 if (ii > 0) { /* positive index? */
1121 if ((size_t)ii <= len) /* inside the string? */
1122 return (size_t)ii - 1; /* return it (corrected to 0-base) */
1123 else return len; /* crop at the end */
1124 }
1125 else { /* negative index */
1126 if ((size_t)(-ii) <= len) /* inside the string? */
1127 return len - ((size_t)(-ii)); /* return position from the end */
1128 else return 0; /* crop at the beginning */
1129 }
1130}
1131
1132
1133/*
1134** Main match function
1135*/
1136static int lp_match (lua_State *L) {
1137 Capture capture[INITCAPSIZE];
1138 const char *r;
1139 size_t l;
1140 Pattern *p = (getpatt(L, 1, NULL), getpattern(L, 1));
1141 Instruction *code = (p->code != NULL) ? p->code : prepcompile(L, p, 1);
1142 const char *s = luaL_checklstring(L, SUBJIDX, &l);
1143 size_t i = initposition(L, l);
1144 int ptop = lua_gettop(L);
1145 lua_pushnil(L); /* initialize subscache */
1146 lua_pushlightuserdata(L, capture); /* initialize caplistidx */
1147 lua_getfenv(L, 1); /* initialize penvidx */
1148 r = match(L, s, s + i, s + l, code, capture, ptop);
1149 if (r == NULL) {
1150 lua_pushnil(L);
1151 return 1;
1152 }
1153 return getcaptures(L, s, r, ptop);
1154}
1155
1156
1157
1158/*
1159** {======================================================
1160** Library creation and functions not related to matching
1161** =======================================================
1162*/
1163
1164static int lp_setmax (lua_State *L) {
1165 luaL_optinteger(L, 1, -1);
1166 lua_settop(L, 1);
1167 lua_setfield(L, LUA_REGISTRYINDEX, MAXSTACKIDX);
1168 return 0;
1169}
1170
1171
1172static int lp_version (lua_State *L) {
1173 lua_pushstring(L, VERSION);
1174 return 1;
1175}
1176
1177
1178static int lp_type (lua_State *L) {
1179 if (testpattern(L, 1))
1180 lua_pushliteral(L, "pattern");
1181 else
1182 lua_pushnil(L);
1183 return 1;
1184}
1185
1186
1187int lp_gc (lua_State *L) {
1188 Pattern *p = getpattern(L, 1);
1189 if (p->codesize > 0)
1190 reallocprog(L, p, 0);
1191 return 0;
1192}
1193
1194
1195static void createcat (lua_State *L, const char *catname, int (catf) (int)) {
1196 TTree *t = newcharset(L);
1197 int i;
1198 for (i = 0; i <= UCHAR_MAX; i++)
1199 if (catf(i)) setchar(treebuffer(t), i);
1200 lua_setfield(L, -2, catname);
1201}
1202
1203
1204static int lp_locale (lua_State *L) {
1205 if (lua_isnoneornil(L, 1)) {
1206 lua_settop(L, 0);
1207 lua_createtable(L, 0, 12);
1208 }
1209 else {
1210 luaL_checktype(L, 1, LUA_TTABLE);
1211 lua_settop(L, 1);
1212 }
1213 createcat(L, "alnum", isalnum);
1214 createcat(L, "alpha", isalpha);
1215 createcat(L, "cntrl", iscntrl);
1216 createcat(L, "digit", isdigit);
1217 createcat(L, "graph", isgraph);
1218 createcat(L, "lower", islower);
1219 createcat(L, "print", isprint);
1220 createcat(L, "punct", ispunct);
1221 createcat(L, "space", isspace);
1222 createcat(L, "upper", isupper);
1223 createcat(L, "xdigit", isxdigit);
1224 return 1;
1225}
1226
1227
1228static struct luaL_Reg pattreg[] = {
1229 {"ptree", lp_printtree},
1230 {"pcode", lp_printcode},
1231 {"match", lp_match},
1232 {"B", lp_behind},
1233 {"V", lp_V},
1234 {"C", lp_simplecapture},
1235 {"Cc", lp_constcapture},
1236 {"Cmt", lp_matchtime},
1237 {"Cb", lp_backref},
1238 {"Carg", lp_argcapture},
1239 {"Cp", lp_poscapture},
1240 {"Cs", lp_substcapture},
1241 {"Ct", lp_tablecapture},
1242 {"Cf", lp_foldcapture},
1243 {"Cg", lp_groupcapture},
1244 {"P", lp_P},
1245 {"S", lp_set},
1246 {"R", lp_range},
1247 {"locale", lp_locale},
1248 {"version", lp_version},
1249 {"setmaxstack", lp_setmax},
1250 {"type", lp_type},
1251 {"T", lp_throw}, /* labeled failure throw */
1252 {"Lc", lp_labchoice}, /* labeled failure choice */
1253 {NULL, NULL}
1254};
1255
1256
1257static struct luaL_Reg metareg[] = {
1258 {"__mul", lp_seq},
1259 {"__add", lp_choice},
1260 {"__pow", lp_star},
1261 {"__gc", lp_gc},
1262 {"__len", lp_and},
1263 {"__div", lp_divcapture},
1264 {"__unm", lp_not},
1265 {"__sub", lp_sub},
1266 {NULL, NULL}
1267};
1268
1269
1270int luaopen_lpeglabel (lua_State *L);
1271int luaopen_lpeglabel (lua_State *L) {
1272 luaL_newmetatable(L, PATTERN_T);
1273 lua_pushnumber(L, MAXBACK); /* initialize maximum backtracking */
1274 lua_setfield(L, LUA_REGISTRYINDEX, MAXSTACKIDX);
1275 luaL_register(L, NULL, metareg);
1276 luaL_register(L, "lpeglabel", pattreg);
1277 lua_pushvalue(L, -1);
1278 lua_setfield(L, -3, "__index");
1279 return 1;
1280}
1281
1282/* }====================================================== */
diff --git a/lptree.h b/lptree.h
new file mode 100644
index 0000000..43299a4
--- /dev/null
+++ b/lptree.h
@@ -0,0 +1,79 @@
1/*
2** $Id: lptree.h,v 1.2 2013/03/24 13:51:12 roberto Exp $
3*/
4
5#if !defined(lptree_h)
6#define lptree_h
7
8
9#include "lptypes.h"
10
11
12/*
13** types of trees
14*/
15typedef enum TTag {
16 TChar = 0, TSet, TAny, /* standard PEG elements */
17 TTrue, TFalse,
18 TRep,
19 TSeq, TChoice,
20 TNot, TAnd,
21 TCall,
22 TOpenCall,
23 TRule, /* sib1 is rule's pattern, sib2 is 'next' rule */
24 TGrammar, /* sib1 is initial (and first) rule */
25 TBehind, /* match behind */
26 TCapture, /* regular capture */
27 TRunTime, /* run-time capture */
28 TThrow, TLabChoice /* labeled failure */
29} TTag;
30
31/* number of siblings for each tree */
32extern const byte numsiblings[];
33
34
35/*
36** Tree trees
37** The first sibling of a tree (if there is one) is immediately after
38** the tree. A reference to a second sibling (ps) is its position
39** relative to the position of the tree itself. A key in ktable
40** uses the (unique) address of the original tree that created that
41** entry. NULL means no data.
42*/
43typedef struct TTree {
44 byte tag;
45 byte cap; /* kind of capture (if it is a capture) */
46 unsigned short key; /* key in ktable for Lua data (0 if no key) */
47 Labelset labels; /* labeled failure */
48 union {
49 int ps; /* occasional second sibling */
50 int n; /* occasional counter */
51 } u;
52} TTree;
53
54
55/*
56** A complete pattern has its tree plus, if already compiled,
57** its corresponding code
58*/
59typedef struct Pattern {
60 union Instruction *code;
61 int codesize;
62 TTree tree[1];
63} Pattern;
64
65
66/* number of siblings for each tree */
67extern const byte numsiblings[];
68
69/* access to siblings */
70#define sib1(t) ((t) + 1)
71#define sib2(t) ((t) + (t)->u.ps)
72
73
74
75
76
77
78#endif
79
diff --git a/lptypes.h b/lptypes.h
new file mode 100644
index 0000000..503f1f0
--- /dev/null
+++ b/lptypes.h
@@ -0,0 +1,158 @@
1/*
2** $Id: lptypes.h,v 1.8 2013/04/12 16:26:38 roberto Exp $
3** LPeg - PEG pattern matching for Lua
4** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license)
5** written by Roberto Ierusalimschy
6*/
7
8#if !defined(lptypes_h)
9#define lptypes_h
10
11
12#if !defined(LPEG_DEBUG)
13#define NDEBUG
14#endif
15
16#include <assert.h>
17#include <limits.h>
18
19#include "lua.h"
20
21
22#define VERSION "0.12"
23
24
25#define PATTERN_T "lpeg-pattern"
26#define MAXSTACKIDX "lpeg-maxstack"
27
28
29/*
30** compatibility with Lua 5.2
31*/
32#if (LUA_VERSION_NUM == 502)
33
34#undef lua_equal
35#define lua_equal(L,idx1,idx2) lua_compare(L,(idx1),(idx2),LUA_OPEQ)
36
37#undef lua_getfenv
38#define lua_getfenv lua_getuservalue
39#undef lua_setfenv
40#define lua_setfenv lua_setuservalue
41
42#undef lua_objlen
43#define lua_objlen lua_rawlen
44
45#undef luaL_register
46#define luaL_register(L,n,f) \
47 { if ((n) == NULL) luaL_setfuncs(L,f,0); else luaL_newlib(L,f); }
48
49#endif
50
51
52/* default maximum size for call/backtrack stack */
53#if !defined(MAXBACK)
54#define MAXBACK 100
55#endif
56
57
58/* maximum number of rules in a grammar */
59#define MAXRULES 200
60
61
62
63/* initial size for capture's list */
64#define INITCAPSIZE 32
65
66
67/* index, on Lua stack, for subject */
68#define SUBJIDX 2
69
70/* number of fixed arguments to 'match' (before capture arguments) */
71#define FIXEDARGS 3
72
73/* index, on Lua stack, for capture list */
74#define caplistidx(ptop) ((ptop) + 2)
75
76/* index, on Lua stack, for pattern's ktable */
77#define ktableidx(ptop) ((ptop) + 3)
78
79/* index, on Lua stack, for backtracking stack */
80#define stackidx(ptop) ((ptop) + 4)
81
82
83
84typedef unsigned char byte;
85
86
87#define BITSPERCHAR 8
88
89#define CHARSETSIZE ((UCHAR_MAX/BITSPERCHAR) + 1)
90
91
92
93typedef struct Charset {
94 byte cs[CHARSETSIZE];
95} Charset;
96
97
98
99#define loopset(v,b) { int v; for (v = 0; v < CHARSETSIZE; v++) {b;} }
100
101/* access to charset */
102#define treebuffer(t) ((byte *)((t) + 1))
103
104/* number of slots needed for 'n' bytes */
105#define bytes2slots(n) (((n) - 1) / sizeof(TTree) + 1)
106
107/* set 'b' bit in charset 'cs' */
108#define setchar(cs,b) ((cs)[(b) >> 3] |= (1 << ((b) & 7)))
109
110/* labeled failure begin */
111typedef int Labelset;
112
113#define MAXLABELS (sizeof(int) * 8)
114
115#define LFAIL 1
116
117/* set bit 'b' in set 's' */
118#define setlabel(s, b) ((s) |= (1 << (b)))
119/* labeled failure end */
120
121
122/*
123** in capture instructions, 'kind' of capture and its offset are
124** packed in field 'aux', 4 bits for each
125*/
126#define getkind(op) ((op)->i.aux & 0xF)
127#define getoff(op) (((op)->i.aux >> 4) & 0xF)
128#define joinkindoff(k,o) ((k) | ((o) << 4))
129
130#define MAXOFF 0xF
131#define MAXAUX 0xFF
132
133
134/* maximum number of bytes to look behind */
135#define MAXBEHIND MAXAUX
136
137
138/* maximum size (in elements) for a pattern */
139#define MAXPATTSIZE (SHRT_MAX - 10)
140
141
142/* size (in elements) for an instruction plus extra l bytes */
143#define instsize(l) (((l) + sizeof(Instruction) - 1)/sizeof(Instruction) + 1)
144
145
146/* size (in elements) for a ISet instruction */
147#define CHARSETINSTSIZE instsize(CHARSETSIZE)
148
149/* size (in elements) for a IFunc instruction */
150#define funcinstsize(p) ((p)->i.aux + 2)
151
152
153
154#define testchar(st,c) (((int)(st)[((c) >> 3)] & (1 << ((c) & 7))))
155
156
157#endif
158
diff --git a/lpvm.c b/lpvm.c
new file mode 100644
index 0000000..8d2c98e
--- /dev/null
+++ b/lpvm.c
@@ -0,0 +1,391 @@
1/*
2** $Id: lpvm.c,v 1.5 2013/04/12 16:29:49 roberto Exp $
3** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license)
4*/
5
6#include <limits.h>
7#include <string.h>
8
9
10#include "lua.h"
11#include "lauxlib.h"
12
13#include "lpcap.h"
14#include "lptypes.h"
15#include "lpvm.h"
16#include "lpprint.h"
17
18
19/* initial size for call/backtrack stack */
20#if !defined(INITBACK)
21#define INITBACK 100
22#endif
23
24
25#define getoffset(p) (((p) + 1)->offset)
26
27static const Instruction giveup = {{IGiveup, 0, 0}};
28
29
30/*
31** {======================================================
32** Virtual Machine
33** =======================================================
34*/
35
36
37typedef struct Stack {
38 const char *s; /* saved position (or NULL for calls) */
39 const Instruction *p; /* next instruction */
40 int caplevel;
41 Labelset ls; /* labeled failure */
42} Stack;
43
44
45#define getstackbase(L, ptop) ((Stack *)lua_touserdata(L, stackidx(ptop)))
46
47
48/*
49** Double the size of the array of captures
50*/
51static Capture *doublecap (lua_State *L, Capture *cap, int captop, int ptop) {
52 Capture *newc;
53 if (captop >= INT_MAX/((int)sizeof(Capture) * 2))
54 luaL_error(L, "too many captures");
55 newc = (Capture *)lua_newuserdata(L, captop * 2 * sizeof(Capture));
56 memcpy(newc, cap, captop * sizeof(Capture));
57 lua_replace(L, caplistidx(ptop));
58 return newc;
59}
60
61
62/*
63** Double the size of the stack
64*/
65static Stack *doublestack (lua_State *L, Stack **stacklimit, int ptop) {
66 Stack *stack = getstackbase(L, ptop);
67 Stack *newstack;
68 int n = *stacklimit - stack; /* current stack size */
69 int max, newn;
70 lua_getfield(L, LUA_REGISTRYINDEX, MAXSTACKIDX);
71 max = lua_tointeger(L, -1); /* maximum allowed size */
72 lua_pop(L, 1);
73 if (n >= max) /* already at maximum size? */
74 luaL_error(L, "too many pending calls/choices");
75 newn = 2 * n; /* new size */
76 if (newn > max) newn = max;
77 newstack = (Stack *)lua_newuserdata(L, newn * sizeof(Stack));
78 memcpy(newstack, stack, n * sizeof(Stack));
79 lua_replace(L, stackidx(ptop));
80 *stacklimit = newstack + newn;
81 return newstack + n; /* return next position */
82}
83
84
85/*
86** Interpret the result of a dynamic capture: false -> fail;
87** true -> keep current position; number -> next position.
88** Return new subject position. 'fr' is stack index where
89** is the result; 'curr' is current subject position; 'limit'
90** is subject's size.
91*/
92static int resdyncaptures (lua_State *L, int fr, int curr, int limit) {
93 lua_Integer res;
94 if (!lua_toboolean(L, fr)) { /* false value? */
95 lua_settop(L, fr - 1); /* remove results */
96 return -1; /* and fail */
97 }
98 else if (lua_isboolean(L, fr)) /* true? */
99 res = curr; /* keep current position */
100 else {
101 res = lua_tointeger(L, fr) - 1; /* new position */
102 if (res < curr || res > limit)
103 luaL_error(L, "invalid position returned by match-time capture");
104 }
105 lua_remove(L, fr); /* remove first result (offset) */
106 return res;
107}
108
109
110/*
111** Add capture values returned by a dynamic capture to the capture list
112** 'base', nested inside a group capture. 'fd' indexes the first capture
113** value, 'n' is the number of values (at least 1).
114*/
115static void adddyncaptures (const char *s, Capture *base, int n, int fd) {
116 int i;
117 /* Cgroup capture is already there */
118 assert(base[0].kind == Cgroup && base[0].siz == 0);
119 base[0].idx = 0; /* make it an anonymous group */
120 for (i = 1; i <= n; i++) { /* add runtime captures */
121 base[i].kind = Cruntime;
122 base[i].siz = 1; /* mark it as closed */
123 base[i].idx = fd + i - 1; /* stack index of capture value */
124 base[i].s = s;
125 }
126 base[i].kind = Cclose; /* close group */
127 base[i].siz = 1;
128 base[i].s = s;
129}
130
131
132/*
133** Remove dynamic captures from the Lua stack (called in case of failure)
134*/
135static int removedyncap (lua_State *L, Capture *capture,
136 int level, int last) {
137 int id = finddyncap(capture + level, capture + last); /* index of 1st cap. */
138 int top = lua_gettop(L);
139 if (id == 0) return 0; /* no dynamic captures? */
140 lua_settop(L, id - 1); /* remove captures */
141 return top - id + 1; /* number of values removed */
142}
143
144
145/*
146** Opcode interpreter
147*/
148const char *match (lua_State *L, const char *o, const char *s, const char *e,
149 Instruction *op, Capture *capture, int ptop) {
150 Stack stackbase[INITBACK];
151 Stack *stacklimit = stackbase + INITBACK;
152 Stack *stack = stackbase; /* point to first empty slot in stack */
153 int capsize = INITCAPSIZE;
154 int captop = 0; /* point to first empty slot in captures */
155 int ndyncap = 0; /* number of dynamic captures (in Lua stack) */
156 const Instruction *p = op; /* current instruction */
157 Labelset labelf; /* labeled failure */
158 stack->p = &giveup; stack->s = s; stack->caplevel = 0; stack++;
159 lua_pushlightuserdata(L, stackbase);
160 for (;;) {
161#if defined(LPEGDEBUG)
162 printf("s: |%s| stck:%d, dyncaps:%d, caps:%d ",
163 s, stack - getstackbase(L, ptop), ndyncap, captop);
164 printinst(op, p);
165 printcaplist(capture, capture + captop);
166#endif
167 assert(stackidx(ptop) + ndyncap == lua_gettop(L) && ndyncap <= captop);
168 switch ((Opcode)p->i.code) {
169 case IEnd: {
170 assert(stack == getstackbase(L, ptop) + 1);
171 capture[captop].kind = Cclose;
172 capture[captop].s = NULL;
173 return s;
174 }
175 case IGiveup: {
176 assert(stack == getstackbase(L, ptop));
177 return NULL;
178 }
179 case IRet: {
180 assert(stack > getstackbase(L, ptop) && (stack - 1)->s == NULL);
181 p = (--stack)->p;
182 continue;
183 }
184 case IAny: {
185 if (s < e) { p++; s++; }
186 else {
187 labelf = LFAIL; /* labeled failure */
188 goto fail;
189 }
190 continue;
191 }
192 case ITestAny: {
193 if (s < e) p += 2;
194 else p += getoffset(p);
195 continue;
196 }
197 case IChar: {
198 if ((byte)*s == p->i.aux && s < e) { p++; s++; }
199 else {
200 labelf = LFAIL; /* labeled failure */
201 goto fail;
202 }
203 continue;
204 }
205 case ITestChar: {
206 if ((byte)*s == p->i.aux && s < e) p += 2;
207 else p += getoffset(p);
208 continue;
209 }
210 case ISet: {
211 int c = (byte)*s;
212 if (testchar((p+1)->buff, c) && s < e)
213 { p += CHARSETINSTSIZE; s++; }
214 else {
215 labelf = LFAIL; /* labeled failure */
216 goto fail;
217 }
218 continue;
219 }
220 case ITestSet: {
221 int c = (byte)*s;
222 if (testchar((p + 2)->buff, c) && s < e)
223 p += 1 + CHARSETINSTSIZE;
224 else p += getoffset(p);
225 continue;
226 }
227 case IBehind: {
228 int n = p->i.aux;
229 if (n > s - o) {
230 labelf = LFAIL; /* labeled failure */
231 goto fail;
232 }
233 s -= n; p++;
234 continue;
235 }
236 case ISpan: {
237 for (; s < e; s++) {
238 int c = (byte)*s;
239 if (!testchar((p+1)->buff, c)) break;
240 }
241 p += CHARSETINSTSIZE;
242 continue;
243 }
244 case IJmp: {
245 p += getoffset(p);
246 continue;
247 }
248 case IChoice: {
249 if (stack == stacklimit)
250 stack = doublestack(L, &stacklimit, ptop);
251 stack->p = p + getoffset(p);
252 stack->s = s;
253 stack->ls = LFAIL; /* labeled failure */
254 stack->caplevel = captop;
255 stack++;
256 p += 2;
257 continue;
258 }
259 case ILabChoice: { /* labeled failure */
260 if (stack == stacklimit)
261 stack = doublestack(L, &stacklimit, ptop);
262 stack->p = p + getoffset(p);
263 stack->s = s;
264 stack->ls = (p + 2)->labels;
265 stack->caplevel = captop;
266 stack++;
267 p += 3;
268 continue;
269 }
270
271 case ICall: {
272 if (stack == stacklimit)
273 stack = doublestack(L, &stacklimit, ptop);
274 stack->s = NULL;
275 stack->p = p + 2; /* save return address */
276 stack++;
277 p += getoffset(p);
278 continue;
279 }
280 case ICommit: {
281 assert(stack > getstackbase(L, ptop) && (stack - 1)->s != NULL);
282 stack--;
283 p += getoffset(p);
284 continue;
285 }
286 case IPartialCommit: {
287 assert(stack > getstackbase(L, ptop) && (stack - 1)->s != NULL);
288 (stack - 1)->s = s;
289 (stack - 1)->caplevel = captop;
290 p += getoffset(p);
291 continue;
292 }
293 case IBackCommit: {
294 assert(stack > getstackbase(L, ptop) && (stack - 1)->s != NULL);
295 s = (--stack)->s;
296 captop = stack->caplevel;
297 p += getoffset(p);
298 continue;
299 }
300 case IThrow: { /* labeled failure */
301 labelf = (p+1)->labels;
302 goto fail;
303 }
304 case IFailTwice:
305 assert(stack > getstackbase(L, ptop));
306 stack--;
307 /* go through */
308 case IFail:
309 labelf = LFAIL; /* labeled failure */
310 fail: { /* pattern failed: try to backtrack */
311 do { /* remove pending calls */
312 assert(stack > getstackbase(L, ptop));
313 s = (--stack)->s;
314 /*printf("fail (s == NULL => %d), labelf=%d stack->ls=%d (stack-> == giveup %d)\n",
315 s == NULL, labelf, stack->ls, stack->p == &giveup);*/
316 } while (s == NULL || (!(stack->ls & labelf) && stack->p != &giveup));
317 if (ndyncap > 0) /* is there matchtime captures? */
318 ndyncap -= removedyncap(L, capture, stack->caplevel, captop);
319 captop = stack->caplevel;
320 p = stack->p;
321 continue;
322 }
323 case ICloseRunTime: {
324 CapState cs;
325 int rem, res, n;
326 int fr = lua_gettop(L) + 1; /* stack index of first result */
327 cs.s = o; cs.L = L; cs.ocap = capture; cs.ptop = ptop;
328 n = runtimecap(&cs, capture + captop, s, &rem); /* call function */
329 captop -= n; /* remove nested captures */
330 fr -= rem; /* 'rem' items were popped from Lua stack */
331 res = resdyncaptures(L, fr, s - o, e - o); /* get result */
332 if (res == -1) { /* fail? */
333 labelf = LFAIL; /* labeled failure */
334 goto fail;
335 }
336 s = o + res; /* else update current position */
337 n = lua_gettop(L) - fr + 1; /* number of new captures */
338 ndyncap += n - rem; /* update number of dynamic captures */
339 if (n > 0) { /* any new capture? */
340 if ((captop += n + 2) >= capsize) {
341 capture = doublecap(L, capture, captop, ptop);
342 capsize = 2 * captop;
343 }
344 /* add new captures to 'capture' list */
345 adddyncaptures(s, capture + captop - n - 2, n, fr);
346 }
347 p++;
348 continue;
349 }
350 case ICloseCapture: {
351 const char *s1 = s;
352 assert(captop > 0);
353 /* if possible, turn capture into a full capture */
354 if (capture[captop - 1].siz == 0 &&
355 s1 - capture[captop - 1].s < UCHAR_MAX) {
356 capture[captop - 1].siz = s1 - capture[captop - 1].s + 1;
357 p++;
358 continue;
359 }
360 else {
361 capture[captop].siz = 1; /* mark entry as closed */
362 capture[captop].s = s;
363 goto pushcapture;
364 }
365 }
366 case IOpenCapture:
367 capture[captop].siz = 0; /* mark entry as open */
368 capture[captop].s = s;
369 goto pushcapture;
370 case IFullCapture:
371 capture[captop].siz = getoff(p) + 1; /* save capture size */
372 capture[captop].s = s - getoff(p);
373 /* goto pushcapture; */
374 pushcapture: {
375 capture[captop].idx = p->i.key;
376 capture[captop].kind = getkind(p);
377 if (++captop >= capsize) {
378 capture = doublecap(L, capture, captop, ptop);
379 capsize = 2 * captop;
380 }
381 p++;
382 continue;
383 }
384 default: assert(0); return NULL;
385 }
386 }
387}
388
389/* }====================================================== */
390
391
diff --git a/lpvm.h b/lpvm.h
new file mode 100644
index 0000000..8024b45
--- /dev/null
+++ b/lpvm.h
@@ -0,0 +1,66 @@
1/*
2** $Id: lpvm.h,v 1.2 2013/04/03 20:37:18 roberto Exp $
3*/
4
5#if !defined(lpvm_h)
6#define lpvm_h
7
8#include "lpcap.h"
9
10
11/* Virtual Machine's instructions */
12typedef enum Opcode {
13 IAny, /* if no char, fail */
14 IChar, /* if char != aux, fail */
15 ISet, /* if char not in buff, fail */
16 ITestAny, /* in no char, jump to 'offset' */
17 ITestChar, /* if char != aux, jump to 'offset' */
18 ITestSet, /* if char not in buff, jump to 'offset' */
19 ISpan, /* read a span of chars in buff */
20 IBehind, /* walk back 'aux' characters (fail if not possible) */
21 IRet, /* return from a rule */
22 IEnd, /* end of pattern */
23 IChoice, /* stack a choice; next fail will jump to 'offset' */
24 IJmp, /* jump to 'offset' */
25 ICall, /* call rule at 'offset' */
26 IOpenCall, /* call rule number 'key' (must be closed to a ICall) */
27 ICommit, /* pop choice and jump to 'offset' */
28 IPartialCommit, /* update top choice to current position and jump */
29 IBackCommit, /* "fails" but jump to its own 'offset' */
30 IFailTwice, /* pop one choice and then fail */
31 IFail, /* go back to saved state on choice and jump to saved offset */
32 IGiveup, /* internal use */
33 IFullCapture, /* complete capture of last 'off' chars */
34 IOpenCapture, /* start a capture */
35 ICloseCapture,
36 ICloseRunTime,
37 IThrow, /* "fails" with a specific label labeled failure */
38 ILabChoice /* labeled choice */
39} Opcode;
40
41
42
43typedef union Instruction {
44 struct Inst {
45 byte code;
46 byte aux;
47 short key;
48 } i;
49 int offset;
50 Labelset labels; /* labeled failure */
51 byte buff[1];
52} Instruction;
53
54
55int getposition (lua_State *L, int t, int i);
56void printpatt (Instruction *p, int n);
57const char *match (lua_State *L, const char *o, const char *s, const char *e,
58 Instruction *op, Capture *capture, int ptop);
59int verify (lua_State *L, Instruction *op, const Instruction *p,
60 Instruction *e, int postable, int rule);
61void checkrule (lua_State *L, Instruction *op, int from, int to,
62 int postable, int rule);
63
64
65#endif
66
diff --git a/makefile b/makefile
new file mode 100644
index 0000000..4f41062
--- /dev/null
+++ b/makefile
@@ -0,0 +1,55 @@
1LIBNAME = lpeglabel
2LUADIR = /usr/include/lua5.1/
3
4#COPT = -O2
5COPT = -DLPEG_DEBUG -g
6
7CWARNS = -Wall -Wextra -pedantic \
8 -Waggregate-return \
9 -Wcast-align \
10 -Wcast-qual \
11 -Wdisabled-optimization \
12 -Wpointer-arith \
13 -Wshadow \
14 -Wsign-compare \
15 -Wundef \
16 -Wwrite-strings \
17 -Wbad-function-cast \
18 -Wdeclaration-after-statement \
19 -Wmissing-prototypes \
20 -Wnested-externs \
21 -Wstrict-prototypes \
22# -Wunreachable-code \
23
24
25CFLAGS = $(CWARNS) $(COPT) -ansi -I$(LUADIR) -fPIC
26CC = gcc
27
28FILES = lpvm.o lpcap.o lptree.o lpcode.o lpprint.o
29
30# For Linux
31linux:
32 make lpeglabel.so "DLLFLAGS = -shared -fPIC"
33
34# For Mac OS
35macosx:
36 make lpeglabel.so "DLLFLAGS = -bundle -undefined dynamic_lookup"
37
38lpeglabel.so: $(FILES)
39 env $(CC) $(DLLFLAGS) $(FILES) -o lpeglabel.so
40
41$(FILES): makefile
42
43test: test.lua re.lua lpeglabel.so
44 ./test.lua
45
46clean:
47 rm -f $(FILES) lpeglabel.so
48
49
50lpcap.o: lpcap.c lpcap.h lptypes.h
51lpcode.o: lpcode.c lptypes.h lpcode.h lptree.h lpvm.h lpcap.h
52lpprint.o: lpprint.c lptypes.h lpprint.h lptree.h lpvm.h lpcap.h
53lptree.o: lptree.c lptypes.h lpcap.h lpcode.h lptree.h lpvm.h lpprint.h
54lpvm.o: lpvm.c lpcap.h lptypes.h lpvm.h lpprint.h lptree.h
55
diff --git a/re.lua b/re.lua
new file mode 100644
index 0000000..f45aad1
--- /dev/null
+++ b/re.lua
@@ -0,0 +1,276 @@
1-- $Id: re.lua,v 1.44 2013/03/26 20:11:40 roberto Exp $
2
3-- imported functions and modules
4local tonumber, type, print, error = tonumber, type, print, error
5local setmetatable = setmetatable
6local unpack = table.unpack
7local m = require"lpeglabel"
8
9-- 'm' will be used to parse expressions, and 'mm' will be used to
10-- create expressions; that is, 're' runs on 'm', creating patterns
11-- on 'mm'
12local mm = m
13
14-- pattern's metatable
15local mt = getmetatable(mm.P(0))
16
17
18
19-- No more global accesses after this point
20local version = _VERSION
21if version == "Lua 5.2" then _ENV = nil end
22
23
24local any = m.P(1)
25
26
27-- Pre-defined names
28local Predef = { nl = m.P"\n" }
29
30
31local mem
32local fmem
33local gmem
34
35
36local function updatelocale ()
37 mm.locale(Predef)
38 Predef.a = Predef.alpha
39 Predef.c = Predef.cntrl
40 Predef.d = Predef.digit
41 Predef.g = Predef.graph
42 Predef.l = Predef.lower
43 Predef.p = Predef.punct
44 Predef.s = Predef.space
45 Predef.u = Predef.upper
46 Predef.w = Predef.alnum
47 Predef.x = Predef.xdigit
48 Predef.A = any - Predef.a
49 Predef.C = any - Predef.c
50 Predef.D = any - Predef.d
51 Predef.G = any - Predef.g
52 Predef.L = any - Predef.l
53 Predef.P = any - Predef.p
54 Predef.S = any - Predef.s
55 Predef.U = any - Predef.u
56 Predef.W = any - Predef.w
57 Predef.X = any - Predef.x
58 mem = {} -- restart memoization
59 fmem = {}
60 gmem = {}
61 local mt = {__mode = "v"}
62 setmetatable(mem, mt)
63 setmetatable(fmem, mt)
64 setmetatable(gmem, mt)
65end
66
67
68updatelocale()
69
70
71
72local I = m.P(function (s,i) print(i, s:sub(1, i-1)); return i end)
73
74
75local function getdef (id, defs)
76 local c = defs and defs[id]
77 if not c then error("undefined name: " .. id) end
78 return c
79end
80
81
82local function patt_error (s, i)
83 local msg = (#s < i + 20) and s:sub(i)
84 or s:sub(i,i+20) .. "..."
85 msg = ("pattern error near '%s'"):format(msg)
86 error(msg, 2)
87end
88
89local function mult (p, n)
90 local np = mm.P(true)
91 while n >= 1 do
92 if n%2 >= 1 then np = np * p end
93 p = p * p
94 n = n/2
95 end
96 return np
97end
98
99local function equalcap (s, i, c)
100 if type(c) ~= "string" then return nil end
101 local e = #c + i
102 if s:sub(i, e - 1) == c then return e else return nil end
103end
104
105
106local S = (Predef.space + "--" * (any - Predef.nl)^0)^0
107
108local name = m.R("AZ", "az", "__") * m.R("AZ", "az", "__", "09")^0
109
110local arrow = S * "<-"
111
112local seq_follow = m.P"/" + ")" + "}" + ":}" + "~}" + "|}" + (name * arrow) + -1
113
114name = m.C(name)
115
116
117-- a defined name only have meaning in a given environment
118local Def = name * m.Carg(1)
119
120local num = m.C(m.R"09"^1) * S / tonumber
121
122local String = "'" * m.C((any - "'")^0) * "'" +
123 '"' * m.C((any - '"')^0) * '"'
124
125
126local defined = "%" * Def / function (c,Defs)
127 local cat = Defs and Defs[c] or Predef[c]
128 if not cat then error ("name '" .. c .. "' undefined") end
129 return cat
130end
131
132local Range = m.Cs(any * (m.P"-"/"") * (any - "]")) / mm.R
133
134local item = defined + Range + m.C(any)
135
136local Class =
137 "["
138 * (m.C(m.P"^"^-1)) -- optional complement symbol
139 * m.Cf(item * (item - "]")^0, mt.__add) /
140 function (c, p) return c == "^" and any - p or p end
141 * "]"
142
143local function adddef (t, k, exp)
144 if t[k] then
145 error("'"..k.."' already defined as a rule")
146 else
147 t[k] = exp
148 end
149 return t
150end
151
152local function firstdef (n, r) return adddef({n}, n, r) end
153
154
155local function NT (n, b)
156 if not b then
157 error("rule '"..n.."' used outside a grammar")
158 else return mm.V(n)
159 end
160end
161
162local function labchoice (...)
163 local t = { ... }
164 local n = #t
165 local p = t[1]
166 local i = 2
167 while i + 1 <= n do
168 p = mm.Lc(p, t[i+1], unpack(t[i]))
169 i = i + 2
170 end
171
172 return p
173end
174
175
176local exp = m.P{ "Exp",
177 Exp = S * ( m.V"Grammar"
178 + (m.V"Seq") * ("/" * m.V"Labels" * S * m.V"Seq")^1 / labchoice
179 + m.Cf(m.V"Seq" * ("/" * S * m.V"Seq")^0, mt.__add) );
180 Labels = m.Ct(m.P"{" * S * num * (S * "," * S * num)^0 * S * "}");
181 Seq = m.Cf(m.Cc(m.P"") * m.V"Prefix"^0 , mt.__mul)
182 * (#seq_follow + patt_error);
183 Prefix = "&" * S * m.V"Prefix" / mt.__len
184 + "!" * S * m.V"Prefix" / mt.__unm
185 + m.V"Suffix";
186 Suffix = m.Cf(m.V"Primary" * S *
187 ( ( m.P"+" * m.Cc(1, mt.__pow)
188 + m.P"*" * m.Cc(0, mt.__pow)
189 + m.P"?" * m.Cc(-1, mt.__pow)
190 + "^" * ( m.Cg(num * m.Cc(mult))
191 + m.Cg(m.C(m.S"+-" * m.R"09"^1) * m.Cc(mt.__pow))
192 )
193 + "->" * S * ( m.Cg((String + num) * m.Cc(mt.__div))
194 + m.P"{}" * m.Cc(nil, m.Ct)
195 + m.Cg(Def / getdef * m.Cc(mt.__div))
196 )
197 + "=>" * S * m.Cg(Def / getdef * m.Cc(m.Cmt))
198 ) * S
199 )^0, function (a,b,f) return f(a,b) end );
200 Primary = "(" * m.V"Exp" * ")"
201 + String / mm.P
202 + Class
203 + defined
204 + "%{" * S * num * (S * "," * S * num)^0 * S * "}" / mm.T
205 + "{:" * (name * ":" + m.Cc(nil)) * m.V"Exp" * ":}" /
206 function (n, p) return mm.Cg(p, n) end
207 + "=" * name / function (n) return mm.Cmt(mm.Cb(n), equalcap) end
208 + m.P"{}" / mm.Cp
209 + "{~" * m.V"Exp" * "~}" / mm.Cs
210 + "{|" * m.V"Exp" * "|}" / mm.Ct
211 + "{" * m.V"Exp" * "}" / mm.C
212 + m.P"." * m.Cc(any)
213 + (name * -arrow + "<" * name * ">") * m.Cb("G") / NT;
214 Definition = name * arrow * m.V"Exp";
215 Grammar = m.Cg(m.Cc(true), "G") *
216 m.Cf(m.V"Definition" / firstdef * m.Cg(m.V"Definition")^0,
217 adddef) / mm.P
218}
219
220local pattern = S * m.Cg(m.Cc(false), "G") * exp / mm.P * (-any + patt_error)
221
222
223local function compile (p, defs)
224 if mm.type(p) == "pattern" then return p end -- already compiled
225 local cp = pattern:match(p, 1, defs)
226 if not cp then error("incorrect pattern", 3) end
227 return cp
228end
229
230local function match (s, p, i)
231 local cp = mem[p]
232 if not cp then
233 cp = compile(p)
234 mem[p] = cp
235 end
236 return cp:match(s, i or 1)
237end
238
239local function find (s, p, i)
240 local cp = fmem[p]
241 if not cp then
242 cp = compile(p) / 0
243 cp = mm.P{ mm.Cp() * cp * mm.Cp() + 1 * mm.V(1) }
244 fmem[p] = cp
245 end
246 local i, e = cp:match(s, i or 1)
247 if i then return i, e - 1
248 else return i
249 end
250end
251
252local function gsub (s, p, rep)
253 local g = gmem[p] or {} -- ensure gmem[p] is not collected while here
254 gmem[p] = g
255 local cp = g[rep]
256 if not cp then
257 cp = compile(p)
258 cp = mm.Cs((cp / rep + 1)^0)
259 g[rep] = cp
260 end
261 return cp:match(s)
262end
263
264
265-- exported names
266local re = {
267 compile = compile,
268 match = match,
269 find = find,
270 gsub = gsub,
271 updatelocale = updatelocale,
272}
273
274if version == "Lua 5.1" then _G.re = re end
275
276return re
diff --git a/test.lua b/test.lua
new file mode 100755
index 0000000..d486c03
--- /dev/null
+++ b/test.lua
@@ -0,0 +1,1386 @@
1#!/usr/bin/env lua5.1
2
3-- $Id: test.lua,v 1.101 2013/04/12 16:30:33 roberto Exp $
4
5-- require"strict" -- just to be pedantic
6
7local m = require"lpeglabel"
8
9
10-- for general use
11local a, b, c, d, e, f, g, p, t
12
13
14-- compatibility with Lua 5.2
15local unpack = rawget(table, "unpack") or unpack
16local loadstring = rawget(_G, "loadstring") or load
17
18
19-- most tests here do not need much stack space
20m.setmaxstack(5)
21
22local any = m.P(1)
23local space = m.S" \t\n"^0
24
25local function checkeq (x, y, p)
26if p then print(x,y) end
27 if type(x) ~= "table" then assert(x == y)
28 else
29 for k,v in pairs(x) do checkeq(v, y[k], p) end
30 for k,v in pairs(y) do checkeq(v, x[k], p) end
31 end
32end
33
34
35local mt = getmetatable(m.P(1))
36
37
38local allchar = {}
39for i=0,255 do allchar[i + 1] = i end
40allchar = string.char(unpack(allchar))
41assert(#allchar == 256)
42
43local function cs2str (c)
44 return m.match(m.Cs((c + m.P(1)/"")^0), allchar)
45end
46
47local function eqcharset (c1, c2)
48 assert(cs2str(c1) == cs2str(c2))
49end
50
51
52print"General tests for LPeg library"
53
54assert(type(m.version()) == "string")
55print("version " .. m.version())
56assert(m.type("alo") ~= "pattern")
57assert(m.type(io.input) ~= "pattern")
58assert(m.type(m.P"alo") == "pattern")
59
60-- tests for some basic optimizations
61assert(m.match(m.P(false) + "a", "a") == 2)
62assert(m.match(m.P(true) + "a", "a") == 1)
63assert(m.match("a" + m.P(false), "b") == nil)
64assert(m.match("a" + m.P(true), "b") == 1)
65
66assert(m.match(m.P(false) * "a", "a") == nil)
67assert(m.match(m.P(true) * "a", "a") == 2)
68assert(m.match("a" * m.P(false), "a") == nil)
69assert(m.match("a" * m.P(true), "a") == 2)
70
71assert(m.match(#m.P(false) * "a", "a") == nil)
72assert(m.match(#m.P(true) * "a", "a") == 2)
73assert(m.match("a" * #m.P(false), "a") == nil)
74assert(m.match("a" * #m.P(true), "a") == 2)
75
76
77-- tests for locale
78do
79 assert(m.locale(m) == m)
80 local t = {}
81 assert(m.locale(t, m) == t)
82 local x = m.locale()
83 for n,v in pairs(x) do
84 assert(type(n) == "string")
85 eqcharset(v, m[n])
86 end
87end
88
89
90assert(m.match(3, "aaaa"))
91assert(m.match(4, "aaaa"))
92assert(not m.match(5, "aaaa"))
93assert(m.match(-3, "aa"))
94assert(not m.match(-3, "aaa"))
95assert(not m.match(-3, "aaaa"))
96assert(not m.match(-4, "aaaa"))
97assert(m.P(-5):match"aaaa")
98
99assert(m.match("a", "alo") == 2)
100assert(m.match("al", "alo") == 3)
101assert(not m.match("alu", "alo"))
102assert(m.match(true, "") == 1)
103
104local digit = m.S"0123456789"
105local upper = m.S"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
106local lower = m.S"abcdefghijklmnopqrstuvwxyz"
107local letter = m.S"" + upper + lower
108local alpha = letter + digit + m.R()
109
110eqcharset(m.S"", m.P(false))
111eqcharset(upper, m.R("AZ"))
112eqcharset(lower, m.R("az"))
113eqcharset(upper + lower, m.R("AZ", "az"))
114eqcharset(upper + lower, m.R("AZ", "cz", "aa", "bb", "90"))
115eqcharset(digit, m.S"01234567" + "8" + "9")
116eqcharset(upper, letter - lower)
117eqcharset(m.S(""), m.R())
118assert(cs2str(m.S("")) == "")
119
120eqcharset(m.S"\0", "\0")
121eqcharset(m.S"\1\0\2", m.R"\0\2")
122eqcharset(m.S"\1\0\2", m.R"\1\2" + "\0")
123eqcharset(m.S"\1\0\2" - "\0", m.R"\1\2")
124
125local word = alpha^1 * (1 - alpha)^0
126
127assert((word^0 * -1):match"alo alo")
128assert(m.match(word^1 * -1, "alo alo"))
129assert(m.match(word^2 * -1, "alo alo"))
130assert(not m.match(word^3 * -1, "alo alo"))
131
132assert(not m.match(word^-1 * -1, "alo alo"))
133assert(m.match(word^-2 * -1, "alo alo"))
134assert(m.match(word^-3 * -1, "alo alo"))
135
136local eos = m.P(-1)
137
138assert(m.match(digit^0 * letter * digit * eos, "1298a1"))
139assert(not m.match(digit^0 * letter * eos, "1257a1"))
140
141b = {
142 [1] = "(" * (((1 - m.S"()") + #m.P"(" * m.V(1))^0) * ")"
143}
144
145assert(m.match(b, "(al())()"))
146assert(not m.match(b * eos, "(al())()"))
147assert(m.match(b * eos, "((al())()(é))"))
148assert(not m.match(b, "(al()()"))
149
150assert(not m.match(letter^1 - "for", "foreach"))
151assert(m.match(letter^1 - ("for" * eos), "foreach"))
152assert(not m.match(letter^1 - ("for" * eos), "for"))
153
154function basiclookfor (p)
155 return m.P {
156 [1] = p + (1 * m.V(1))
157 }
158end
159
160function caplookfor (p)
161 return basiclookfor(p:C())
162end
163
164assert(m.match(caplookfor(letter^1), " 4achou123...") == "achou")
165a = {m.match(caplookfor(letter^1)^0, " two words, one more ")}
166checkeq(a, {"two", "words", "one", "more"})
167
168assert(m.match( basiclookfor((#m.P(b) * 1) * m.Cp()), " ( (a)") == 7)
169
170a = {m.match(m.C(digit^1 * m.Cc"d") + m.C(letter^1 * m.Cc"l"), "123")}
171checkeq(a, {"123", "d"})
172
173a = {m.match(m.C(digit^1) * "d" * -1 + m.C(letter^1 * m.Cc"l"), "123d")}
174checkeq(a, {"123"})
175
176a = {m.match(m.C(digit^1 * m.Cc"d") + m.C(letter^1 * m.Cc"l"), "abcd")}
177checkeq(a, {"abcd", "l"})
178
179a = {m.match(m.Cc(10,20,30) * 'a' * m.Cp(), 'aaa')}
180checkeq(a, {10,20,30,2})
181a = {m.match(m.Cp() * m.Cc(10,20,30) * 'a' * m.Cp(), 'aaa')}
182checkeq(a, {1,10,20,30,2})
183a = m.match(m.Ct(m.Cp() * m.Cc(10,20,30) * 'a' * m.Cp()), 'aaa')
184checkeq(a, {1,10,20,30,2})
185a = m.match(m.Ct(m.Cp() * m.Cc(7,8) * m.Cc(10,20,30) * 'a' * m.Cp()), 'aaa')
186checkeq(a, {1,7,8,10,20,30,2})
187a = {m.match(m.Cc() * m.Cc() * m.Cc(1) * m.Cc(2,3,4) * m.Cc() * 'a', 'aaa')}
188checkeq(a, {1,2,3,4})
189
190a = {m.match(m.Cp() * letter^1 * m.Cp(), "abcd")}
191checkeq(a, {1, 5})
192
193
194t = {m.match({[1] = m.C(m.C(1) * m.V(1) + -1)}, "abc")}
195checkeq(t, {"abc", "a", "bc", "b", "c", "c", ""})
196
197
198-- test for small capture boundary
199for i = 250,260 do
200 assert(#m.match(m.C(i), string.rep('a', i)) == i)
201 assert(#m.match(m.C(m.C(i)), string.rep('a', i)) == i)
202end
203
204
205-- tests for any*n and any*-n
206for n = 1, 550 do
207 local x_1 = string.rep('x', n - 1)
208 local x = x_1 .. 'a'
209 assert(not m.P(n):match(x_1))
210 assert(m.P(n):match(x) == n + 1)
211 assert(n < 4 or m.match(m.P(n) + "xxx", x_1) == 4)
212 assert(m.C(n):match(x) == x)
213 assert(m.C(m.C(n)):match(x) == x)
214 assert(m.P(-n):match(x_1) == 1)
215 assert(not m.P(-n):match(x))
216 assert(n < 13 or m.match(m.Cc(20) * ((n - 13) * m.P(10)) * 3, x) == 20)
217 local n3 = math.floor(n/3)
218 assert(m.match(n3 * m.Cp() * n3 * n3, x) == n3 + 1)
219end
220
221-- true values
222assert(m.P(0):match("x") == 1)
223assert(m.P(0):match("") == 1)
224assert(m.C(0):match("x") == "")
225
226assert(m.match(m.Cc(0) * m.P(10) + m.Cc(1) * "xuxu", "xuxu") == 1)
227assert(m.match(m.Cc(0) * m.P(10) + m.Cc(1) * "xuxu", "xuxuxuxuxu") == 0)
228assert(m.match(m.C(m.P(2)^1), "abcde") == "abcd")
229p = m.Cc(0) * 1 + m.Cc(1) * 2 + m.Cc(2) * 3 + m.Cc(3) * 4
230
231
232-- test for alternation optimization
233assert(m.match(m.P"a"^1 + "ab" + m.P"x"^0, "ab") == 2)
234assert(m.match((m.P"a"^1 + "ab" + m.P"x"^0 * 1)^0, "ab") == 3)
235assert(m.match(m.P"ab" + "cd" + "" + "cy" + "ak", "98") == 1)
236assert(m.match(m.P"ab" + "cd" + "ax" + "cy", "ax") == 3)
237assert(m.match("a" * m.P"b"^0 * "c" + "cd" + "ax" + "cy", "ax") == 3)
238assert(m.match((m.P"ab" + "cd" + "ax" + "cy")^0, "ax") == 3)
239assert(m.match(m.P(1) * "x" + m.S"" * "xu" + "ay", "ay") == 3)
240assert(m.match(m.P"abc" + "cde" + "aka", "aka") == 4)
241assert(m.match(m.S"abc" * "x" + "cde" + "aka", "ax") == 3)
242assert(m.match(m.S"abc" * "x" + "cde" + "aka", "aka") == 4)
243assert(m.match(m.S"abc" * "x" + "cde" + "aka", "cde") == 4)
244assert(m.match(m.S"abc" * "x" + "ide" + m.S"ab" * "ka", "aka") == 4)
245assert(m.match("ab" + m.S"abc" * m.P"y"^0 * "x" + "cde" + "aka", "ax") == 3)
246assert(m.match("ab" + m.S"abc" * m.P"y"^0 * "x" + "cde" + "aka", "aka") == 4)
247assert(m.match("ab" + m.S"abc" * m.P"y"^0 * "x" + "cde" + "aka", "cde") == 4)
248assert(m.match("ab" + m.S"abc" * m.P"y"^0 * "x" + "ide" + m.S"ab" * "ka", "aka") == 4)
249assert(m.match("ab" + m.S"abc" * m.P"y"^0 * "x" + "ide" + m.S"ab" * "ka", "ax") == 3)
250assert(m.match(m.P(1) * "x" + "cde" + m.S"ab" * "ka", "aka") == 4)
251assert(m.match(m.P(1) * "x" + "cde" + m.P(1) * "ka", "aka") == 4)
252assert(m.match(m.P(1) * "x" + "cde" + m.P(1) * "ka", "cde") == 4)
253assert(m.match(m.P"eb" + "cd" + m.P"e"^0 + "x", "ee") == 3)
254assert(m.match(m.P"ab" + "cd" + m.P"e"^0 + "x", "abcd") == 3)
255assert(m.match(m.P"ab" + "cd" + m.P"e"^0 + "x", "eeex") == 4)
256assert(m.match(m.P"ab" + "cd" + m.P"e"^0 + "x", "cd") == 3)
257assert(m.match(m.P"ab" + "cd" + m.P"e"^0 + "x", "x") == 1)
258assert(m.match(m.P"ab" + "cd" + m.P"e"^0 + "x" + "", "zee") == 1)
259assert(m.match(m.P"ab" + "cd" + m.P"e"^1 + "x", "abcd") == 3)
260assert(m.match(m.P"ab" + "cd" + m.P"e"^1 + "x", "eeex") == 4)
261assert(m.match(m.P"ab" + "cd" + m.P"e"^1 + "x", "cd") == 3)
262assert(m.match(m.P"ab" + "cd" + m.P"e"^1 + "x", "x") == 2)
263assert(m.match(m.P"ab" + "cd" + m.P"e"^1 + "x" + "", "zee") == 1)
264assert(not m.match(("aa" * m.P"bc"^-1 + "aab") * "e", "aabe"))
265
266assert(m.match("alo" * (m.P"\n" + -1), "alo") == 4)
267
268
269-- bug in 0.12 (rc1)
270assert(m.match((m.P"\128\187\191" + m.S"abc")^0, "\128\187\191") == 4)
271
272assert(m.match(m.S"\0\128\255\127"^0, string.rep("\0\128\255\127", 10)) ==
273 4*10 + 1)
274
275-- optimizations with optional parts
276assert(m.match(("ab" * -m.P"c")^-1, "abc") == 1)
277assert(m.match(("ab" * #m.P"c")^-1, "abd") == 1)
278assert(m.match(("ab" * m.B"c")^-1, "ab") == 1)
279assert(m.match(("ab" * m.P"cd"^0)^-1, "abcdcdc") == 7)
280
281assert(m.match(m.P"ab"^-1 - "c", "abcd") == 3)
282
283p = ('Aa' * ('Bb' * ('Cc' * m.P'Dd'^0)^0)^0)^-1
284assert(p:match("AaBbCcDdBbCcDdDdDdBb") == 21)
285
286
287pi = "3.14159 26535 89793 23846 26433 83279 50288 41971 69399 37510"
288assert(m.match(m.Cs((m.P"1" / "a" + m.P"5" / "b" + m.P"9" / "c" + 1)^0), pi) ==
289 m.match(m.Cs((m.P(1) / {["1"] = "a", ["5"] = "b", ["9"] = "c"})^0), pi))
290print"+"
291
292
293-- tests for capture optimizations
294assert(m.match((m.P(3) + 4 * m.Cp()) * "a", "abca") == 5)
295t = {m.match(((m.P"a" + m.Cp()) * m.P"x")^0, "axxaxx")}
296checkeq(t, {3, 6})
297
298
299-- tests for numbered captures
300p = m.C(1)
301assert(m.match(m.C(m.C(p * m.C(2)) * m.C(3)) / 3, "abcdefgh") == "a")
302assert(m.match(m.C(m.C(p * m.C(2)) * m.C(3)) / 1, "abcdefgh") == "abcdef")
303assert(m.match(m.C(m.C(p * m.C(2)) * m.C(3)) / 4, "abcdefgh") == "bc")
304assert(m.match(m.C(m.C(p * m.C(2)) * m.C(3)) / 0, "abcdefgh") == 7)
305
306a, b, c = m.match(p * (m.C(p * m.C(2)) * m.C(3) / 4) * p, "abcdefgh")
307assert(a == "a" and b == "efg" and c == "h")
308
309-- test for table captures
310t = m.match(m.Ct(letter^1), "alo")
311checkeq(t, {})
312
313t, n = m.match(m.Ct(m.C(letter)^1) * m.Cc"t", "alo")
314assert(n == "t" and table.concat(t) == "alo")
315
316t = m.match(m.Ct(m.C(m.C(letter)^1)), "alo")
317assert(table.concat(t, ";") == "alo;a;l;o")
318
319t = m.match(m.Ct(m.C(m.C(letter)^1)), "alo")
320assert(table.concat(t, ";") == "alo;a;l;o")
321
322t = m.match(m.Ct(m.Ct((m.Cp() * letter * m.Cp())^1)), "alo")
323assert(table.concat(t[1], ";") == "1;2;2;3;3;4")
324
325t = m.match(m.Ct(m.C(m.C(1) * 1 * m.C(1))), "alo")
326checkeq(t, {"alo", "a", "o"})
327
328
329-- tests for groups
330p = m.Cg(1) -- no capture
331assert(p:match('x') == 'x')
332p = m.Cg(m.P(true)/function () end * 1) -- no value
333assert(p:match('x') == 'x')
334p = m.Cg(m.Cg(m.Cg(m.C(1))))
335assert(p:match('x') == 'x')
336p = m.Cg(m.Cg(m.Cg(m.C(1))^0) * m.Cg(m.Cc(1) * m.Cc(2)))
337t = {p:match'abc'}
338checkeq(t, {'a', 'b', 'c', 1, 2})
339
340p = m.Ct(m.Cg(m.Cc(10), "hi") * m.C(1)^0 * m.Cg(m.Cc(20), "ho"))
341t = p:match''
342checkeq(t, {hi = 10, ho = 20})
343t = p:match'abc'
344checkeq(t, {hi = 10, ho = 20, 'a', 'b', 'c'})
345
346
347-- test for error messages
348local function checkerr (msg, ...)
349 assert(m.match({ m.P(msg) + 1 * m.V(1) }, select(2, pcall(...))))
350end
351
352checkerr("rule '1' may be left recursive", m.match, { m.V(1) * 'a' }, "a")
353checkerr("rule '1' used outside a grammar", m.match, m.V(1), "")
354checkerr("rule 'hiii' used outside a grammar", m.match, m.V('hiii'), "")
355checkerr("rule 'hiii' undefined in given grammar", m.match, { m.V('hiii') }, "")
356checkerr("undefined in given grammar", m.match, { m.V{} }, "")
357
358checkerr("rule 'A' is not a pattern", m.P, { m.P(1), A = {} })
359checkerr("grammar has no initial rule", m.P, { [print] = {} })
360
361-- grammar with a long call chain before left recursion
362p = {'a',
363 a = m.V'b' * m.V'c' * m.V'd' * m.V'a',
364 b = m.V'c',
365 c = m.V'd',
366 d = m.V'e',
367 e = m.V'f',
368 f = m.V'g',
369 g = m.P''
370}
371checkerr("rule 'a' may be left recursive", m.match, p, "a")
372
373
374-- tests for non-pattern as arguments to pattern functions
375
376p = { ('a' * m.V(1))^-1 } * m.P'b' * { 'a' * m.V(2); m.V(1)^-1 }
377assert(m.match(p, "aaabaac") == 7)
378
379p = m.P'abc' * 2 * -5 * true * 'de' -- mix of numbers and strings and booleans
380
381assert(p:match("abc01de") == 8)
382assert(p:match("abc01de3456") == nil)
383
384p = 'abc' * (2 * (-5 * (true * m.P'de')))
385
386assert(p:match("abc01de") == 8)
387assert(p:match("abc01de3456") == nil)
388
389p = { m.V(2), m.P"abc" } *
390 (m.P{ "xx", xx = m.P"xx" } + { "x", x = m.P"a" * m.V"x" + "" })
391assert(p:match("abcaaaxx") == 7)
392assert(p:match("abcxx") == 6)
393
394
395-- a large table capture
396t = m.match(m.Ct(m.C('a')^0), string.rep("a", 10000))
397assert(#t == 10000 and t[1] == 'a' and t[#t] == 'a')
398
399print('+')
400
401
402-- bug in 0.10 (rechecking a grammar, after tail-call optimization)
403m.P{ m.P { (m.P(3) + "xuxu")^0 * m.V"xuxu", xuxu = m.P(1) } }
404
405local V = m.V
406
407local Space = m.S(" \n\t")^0
408local Number = m.C(m.R("09")^1) * Space
409local FactorOp = m.C(m.S("+-")) * Space
410local TermOp = m.C(m.S("*/")) * Space
411local Open = "(" * Space
412local Close = ")" * Space
413
414
415local function f_factor (v1, op, v2, d)
416 assert(d == nil)
417 if op == "+" then return v1 + v2
418 else return v1 - v2
419 end
420end
421
422
423local function f_term (v1, op, v2, d)
424 assert(d == nil)
425 if op == "*" then return v1 * v2
426 else return v1 / v2
427 end
428end
429
430G = m.P{ "Exp",
431 Exp = m.Cf(V"Factor" * m.Cg(FactorOp * V"Factor")^0, f_factor);
432 Factor = m.Cf(V"Term" * m.Cg(TermOp * V"Term")^0, f_term);
433 Term = Number / tonumber + Open * V"Exp" * Close;
434}
435
436G = Space * G * -1
437
438for _, s in ipairs{" 3 + 5*9 / (1+1) ", "3+4/2", "3+3-3- 9*2+3*9/1- 8"} do
439 assert(m.match(G, s) == loadstring("return "..s)())
440end
441
442
443-- test for grammars (errors deep in calling non-terminals)
444g = m.P{
445 [1] = m.V(2) + "a",
446 [2] = "a" * m.V(3) * "x",
447 [3] = "b" * m.V(3) + "c"
448}
449
450assert(m.match(g, "abbbcx") == 7)
451assert(m.match(g, "abbbbx") == 2)
452
453
454-- tests for \0
455assert(m.match(m.R("\0\1")^1, "\0\1\0") == 4)
456assert(m.match(m.S("\0\1ab")^1, "\0\1\0a") == 5)
457assert(m.match(m.P(1)^3, "\0\1\0a") == 5)
458assert(not m.match(-4, "\0\1\0a"))
459assert(m.match("\0\1\0a", "\0\1\0a") == 5)
460assert(m.match("\0\0\0", "\0\0\0") == 4)
461assert(not m.match("\0\0\0", "\0\0"))
462
463
464-- tests for predicates
465assert(not m.match(-m.P("a") * 2, "alo"))
466assert(m.match(- -m.P("a") * 2, "alo") == 3)
467assert(m.match(#m.P("a") * 2, "alo") == 3)
468assert(m.match(##m.P("a") * 2, "alo") == 3)
469assert(not m.match(##m.P("c") * 2, "alo"))
470assert(m.match(m.Cs((##m.P("a") * 1 + m.P(1)/".")^0), "aloal") == "a..a.")
471assert(m.match(m.Cs((#((#m.P"a")/"") * 1 + m.P(1)/".")^0), "aloal") == "a..a.")
472assert(m.match(m.Cs((- -m.P("a") * 1 + m.P(1)/".")^0), "aloal") == "a..a.")
473assert(m.match(m.Cs((-((-m.P"a")/"") * 1 + m.P(1)/".")^0), "aloal") == "a..a.")
474
475p = -m.P'a' * m.Cc(1) + -m.P'b' * m.Cc(2) + -m.P'c' * m.Cc(3)
476assert(p:match('a') == 2 and p:match('') == 1 and p:match('b') == 1)
477
478p = -m.P'a' * m.Cc(10) + #m.P'a' * m.Cc(20)
479assert(p:match('a') == 20 and p:match('') == 10 and p:match('b') == 10)
480
481
482
483-- look-behind predicate
484assert(not m.match(m.B'a', 'a'))
485assert(m.match(1 * m.B'a', 'a') == 2)
486assert(not m.match(m.B(1), 'a'))
487assert(m.match(1 * m.B(1), 'a') == 2)
488assert(m.match(-m.B(1), 'a') == 1)
489assert(m.match(m.B(250), string.rep('a', 250)) == nil)
490assert(m.match(250 * m.B(250), string.rep('a', 250)) == 251)
491assert(not pcall(m.B, 260))
492
493B = #letter * -m.B(letter) + -letter * m.B(letter)
494x = m.Ct({ (B * m.Cp())^-1 * (1 * m.V(1) + m.P(true)) })
495checkeq(m.match(x, 'ar cal c'), {1,3,4,7,9,10})
496checkeq(m.match(x, ' ar cal '), {2,4,5,8})
497checkeq(m.match(x, ' '), {})
498checkeq(m.match(x, 'aloalo'), {1,7})
499
500assert(m.match(B, "a") == 1)
501assert(m.match(1 * B, "a") == 2)
502assert(not m.B(1 - letter):match(""))
503assert((-m.B(letter)):match("") == 1)
504
505assert((4 * m.B(letter, 4)):match("aaaaaaaa") == 5)
506assert(not (4 * m.B(#letter * 5)):match("aaaaaaaa"))
507assert((4 * -m.B(#letter * 5)):match("aaaaaaaa") == 5)
508
509-- look-behind with grammars
510assert(m.match('a' * m.B{'x', x = m.P(3)}, 'aaa') == nil)
511assert(m.match('aa' * m.B{'x', x = m.P('aaa')}, 'aaaa') == nil)
512assert(m.match('aaa' * m.B{'x', x = m.P('aaa')}, 'aaaaa') == 4)
513
514
515
516-- bug in 0.9
517assert(m.match(('a' * #m.P'b'), "ab") == 2)
518assert(not m.match(('a' * #m.P'b'), "a"))
519
520assert(not m.match(#m.S'567', ""))
521assert(m.match(#m.S'567' * 1, "6") == 2)
522
523
524-- tests for Tail Calls
525
526--labeled failure
527p = m.P{ 'a' * m.V(1) + '' }
528assert(p:match(string.rep('a', 1000)) == 1001)
529
530-- create a grammar for a simple DFA for even number of 0s and 1s
531--
532-- ->1 <---0---> 2
533-- ^ ^
534-- | |
535-- 1 1
536-- | |
537-- V V
538-- 3 <---0---> 4
539--
540-- this grammar should keep no backtracking information
541
542p = m.P{
543 [1] = '0' * m.V(2) + '1' * m.V(3) + -1,
544 [2] = '0' * m.V(1) + '1' * m.V(4),
545 [3] = '0' * m.V(4) + '1' * m.V(1),
546 [4] = '0' * m.V(3) + '1' * m.V(2),
547}
548
549-- labeled failure
550assert(p:match(string.rep("00", 10000)))
551assert(p:match(string.rep("01", 10000)))
552assert(p:match(string.rep("011", 10000)))
553assert(not p:match(string.rep("011", 10000) .. "1"))
554assert(not p:match(string.rep("011", 10001)))
555
556
557-- this grammar does need backtracking info.
558local lim = 10000
559p = m.P{ '0' * m.V(1) + '0' }
560assert(not pcall(m.match, p, string.rep("0", lim)))
561m.setmaxstack(2*lim)
562assert(not pcall(m.match, p, string.rep("0", lim)))
563m.setmaxstack(2*lim + 4)
564assert(pcall(m.match, p, string.rep("0", lim)))
565
566-- this repetition should not need stack space (only the call does)
567p = m.P{ ('a' * m.V(1))^0 * 'b' + 'c' }
568m.setmaxstack(200)
569-- labeled failure
570assert(p:match(string.rep('a', 180) .. 'c' .. string.rep('b', 180)) == 362)
571
572m.setmaxstack(5) -- restore original limit
573
574-- tests for optional start position
575assert(m.match("a", "abc", 1))
576assert(m.match("b", "abc", 2))
577assert(m.match("c", "abc", 3))
578assert(not m.match(1, "abc", 4))
579assert(m.match("a", "abc", -3))
580assert(m.match("b", "abc", -2))
581assert(m.match("c", "abc", -1))
582assert(m.match("abc", "abc", -4)) -- truncate to position 1
583
584assert(m.match("", "abc", 10)) -- empty string is everywhere!
585assert(m.match("", "", 10))
586assert(not m.match(1, "", 1))
587assert(not m.match(1, "", -1))
588assert(not m.match(1, "", 0))
589
590print("+")
591
592
593-- tests for argument captures
594assert(not pcall(m.Carg, 0))
595assert(not pcall(m.Carg, -1))
596assert(not pcall(m.Carg, 2^18))
597assert(not pcall(m.match, m.Carg(1), 'a', 1))
598assert(m.match(m.Carg(1), 'a', 1, print) == print)
599x = {m.match(m.Carg(1) * m.Carg(2), '', 1, 10, 20)}
600checkeq(x, {10, 20})
601
602assert(m.match(m.Cmt(m.Cg(m.Carg(3), "a") *
603 m.Cmt(m.Cb("a"), function (s,i,x)
604 assert(s == "a" and i == 1);
605 return i, x+1
606 end) *
607 m.Carg(2), function (s,i,a,b,c)
608 assert(s == "a" and i == 1 and c == nil);
609 return i, 2*a + 3*b
610 end) * "a",
611 "a", 1, false, 100, 1000) == 2*1001 + 3*100)
612
613
614-- tests for Lua functions
615
616t = {}
617s = ""
618p = m.P(function (s1, i) assert(s == s1); t[#t + 1] = i; return nil end) * false
619s = "hi, this is a test"
620assert(m.match(((p - m.P(-1)) + 2)^0, s) == string.len(s) + 1)
621assert(#t == string.len(s)/2 and t[1] == 1 and t[2] == 3)
622
623assert(not m.match(p, s))
624
625p = mt.__add(function (s, i) return i end, function (s, i) return nil end)
626assert(m.match(p, "alo"))
627
628p = mt.__mul(function (s, i) return i end, function (s, i) return nil end)
629assert(not m.match(p, "alo"))
630
631
632t = {}
633p = function (s1, i) assert(s == s1); t[#t + 1] = i; return i end
634s = "hi, this is a test"
635assert(m.match((m.P(1) * p)^0, s) == string.len(s) + 1)
636assert(#t == string.len(s) and t[1] == 2 and t[2] == 3)
637
638t = {}
639p = m.P(function (s1, i) assert(s == s1); t[#t + 1] = i;
640 return i <= s1:len() and i end) * 1
641s = "hi, this is a test"
642assert(m.match(p^0, s) == string.len(s) + 1)
643assert(#t == string.len(s) + 1 and t[1] == 1 and t[2] == 2)
644
645p = function (s1, i) return m.match(m.P"a"^1, s1, i) end
646assert(m.match(p, "aaaa") == 5)
647assert(m.match(p, "abaa") == 2)
648assert(not m.match(p, "baaa"))
649
650assert(not pcall(m.match, function () return 2^20 end, s))
651assert(not pcall(m.match, function () return 0 end, s))
652assert(not pcall(m.match, function (s, i) return i - 1 end, s))
653assert(not pcall(m.match, m.P(1)^0 * function (_, i) return i - 1 end, s))
654assert(m.match(m.P(1)^0 * function (_, i) return i end * -1, s))
655assert(not pcall(m.match, m.P(1)^0 * function (_, i) return i + 1 end, s))
656assert(m.match(m.P(function (s, i) return s:len() + 1 end) * -1, s))
657assert(not pcall(m.match, m.P(function (s, i) return s:len() + 2 end) * -1, s))
658assert(not m.match(m.P(function (s, i) return s:len() end) * -1, s))
659assert(m.match(m.P(1)^0 * function (_, i) return true end, s) ==
660 string.len(s) + 1)
661for i = 1, string.len(s) + 1 do
662 assert(m.match(function (_, _) return i end, s) == i)
663end
664
665p = (m.P(function (s, i) return i%2 == 0 and i end) * 1
666 + m.P(function (s, i) return i%2 ~= 0 and i + 2 <= s:len() and i end) * 3)^0
667 * -1
668assert(p:match(string.rep('a', 14000)))
669
670-- tests for Function Replacements
671f = function (a, ...) if a ~= "x" then return {a, ...} end end
672
673t = m.match(m.C(1)^0/f, "abc")
674checkeq(t, {"a", "b", "c"})
675
676t = m.match(m.C(1)^0/f/f, "abc")
677checkeq(t, {{"a", "b", "c"}})
678
679t = m.match(m.P(1)^0/f/f, "abc") -- no capture
680checkeq(t, {{"abc"}})
681
682t = m.match((m.P(1)^0/f * m.Cp())/f, "abc")
683checkeq(t, {{"abc"}, 4})
684
685t = m.match((m.C(1)^0/f * m.Cp())/f, "abc")
686checkeq(t, {{"a", "b", "c"}, 4})
687
688t = m.match((m.C(1)^0/f * m.Cp())/f, "xbc")
689checkeq(t, {4})
690
691t = m.match(m.C(m.C(1)^0)/f, "abc")
692checkeq(t, {"abc", "a", "b", "c"})
693
694g = function (...) return 1, ... end
695t = {m.match(m.C(1)^0/g/g, "abc")}
696checkeq(t, {1, 1, "a", "b", "c"})
697
698t = {m.match(m.Cc(nil,nil,4) * m.Cc(nil,3) * m.Cc(nil, nil) / g / g, "")}
699t1 = {1,1,nil,nil,4,nil,3,nil,nil}
700for i=1,10 do assert(t[i] == t1[i]) end
701
702t = {m.match((m.C(1) / function (x) return x, x.."x" end)^0, "abc")}
703checkeq(t, {"a", "ax", "b", "bx", "c", "cx"})
704
705t = m.match(m.Ct((m.C(1) / function (x,y) return y, x end * m.Cc(1))^0), "abc")
706checkeq(t, {nil, "a", 1, nil, "b", 1, nil, "c", 1})
707
708-- tests for Query Replacements
709
710assert(m.match(m.C(m.C(1)^0)/{abc = 10}, "abc") == 10)
711assert(m.match(m.C(1)^0/{a = 10}, "abc") == 10)
712assert(m.match(m.S("ba")^0/{ab = 40}, "abc") == 40)
713t = m.match(m.Ct((m.S("ba")/{a = 40})^0), "abc")
714checkeq(t, {40})
715
716assert(m.match(m.Cs((m.C(1)/{a=".", d=".."})^0), "abcdde") == ".bc....e")
717assert(m.match(m.Cs((m.C(1)/{f="."})^0), "abcdde") == "abcdde")
718assert(m.match(m.Cs((m.C(1)/{d="."})^0), "abcdde") == "abc..e")
719assert(m.match(m.Cs((m.C(1)/{e="."})^0), "abcdde") == "abcdd.")
720assert(m.match(m.Cs((m.C(1)/{e=".", f="+"})^0), "eefef") == "..+.+")
721assert(m.match(m.Cs((m.C(1))^0), "abcdde") == "abcdde")
722assert(m.match(m.Cs(m.C(m.C(1)^0)), "abcdde") == "abcdde")
723assert(m.match(1 * m.Cs(m.P(1)^0), "abcdde") == "bcdde")
724assert(m.match(m.Cs((m.C('0')/'x' + 1)^0), "abcdde") == "abcdde")
725assert(m.match(m.Cs((m.C('0')/'x' + 1)^0), "0ab0b0") == "xabxbx")
726assert(m.match(m.Cs((m.C('0')/'x' + m.P(1)/{b=3})^0), "b0a0b") == "3xax3")
727assert(m.match(m.P(1)/'%0%0'/{aa = -3} * 'x', 'ax') == -3)
728assert(m.match(m.C(1)/'%0%1'/{aa = 'z'}/{z = -3} * 'x', 'ax') == -3)
729
730assert(m.match(m.Cs(m.Cc(0) * (m.P(1)/"")), "4321") == "0")
731
732assert(m.match(m.Cs((m.P(1) / "%0")^0), "abcd") == "abcd")
733assert(m.match(m.Cs((m.P(1) / "%0.%0")^0), "abcd") == "a.ab.bc.cd.d")
734assert(m.match(m.Cs((m.P("a") / "%0.%0" + 1)^0), "abcad") == "a.abca.ad")
735assert(m.match(m.C("a") / "%1%%%0", "a") == "a%a")
736assert(m.match(m.Cs((m.P(1) / ".xx")^0), "abcd") == ".xx.xx.xx.xx")
737assert(m.match(m.Cp() * m.P(3) * m.Cp()/"%2%1%1 - %0 ", "abcde") ==
738 "411 - abc ")
739
740assert(pcall(m.match, m.P(1)/"%0", "abc"))
741assert(not pcall(m.match, m.P(1)/"%1", "abc")) -- out of range
742assert(not pcall(m.match, m.P(1)/"%9", "abc")) -- out of range
743
744p = m.C(1)
745p = p * p; p = p * p; p = p * p * m.C(1) / "%9 - %1"
746assert(p:match("1234567890") == "9 - 1")
747
748assert(m.match(m.Cc(print), "") == print)
749
750-- too many captures (just ignore extra ones)
751p = m.C(1)^0 / "%2-%9-%0-%9"
752assert(p:match"01234567890123456789" == "1-8-01234567890123456789-8")
753s = string.rep("12345678901234567890", 20)
754assert(m.match(m.C(1)^0 / "%9-%1-%0-%3", s) == "9-1-" .. s .. "-3")
755
756-- string captures with non-string subcaptures
757p = m.Cc('alo') * m.C(1) / "%1 - %2 - %1"
758assert(p:match'x' == 'alo - x - alo')
759
760assert(not pcall(m.match, m.Cc(true) / "%1", "a"))
761
762-- long strings for string capture
763l = 10000
764s = string.rep('a', l) .. string.rep('b', l) .. string.rep('c', l)
765
766p = (m.C(m.P'a'^1) * m.C(m.P'b'^1) * m.C(m.P'c'^1)) / '%3%2%1'
767
768assert(p:match(s) == string.rep('c', l) ..
769 string.rep('b', l) ..
770 string.rep('a', l))
771
772print"+"
773
774-- accumulator capture
775function f (x) return x + 1 end
776assert(m.match(m.Cf(m.Cc(0) * m.C(1)^0, f), "alo alo") == 7)
777
778t = {m.match(m.Cf(m.Cc(1,2,3), error), "")}
779checkeq(t, {1})
780p = m.Cf(m.Ct(true) * m.Cg(m.C(m.R"az"^1) * "=" * m.C(m.R"az"^1) * ";")^0,
781 rawset)
782t = p:match("a=b;c=du;xux=yuy;")
783checkeq(t, {a="b", c="du", xux="yuy"})
784
785
786-- errors in accumulator capture
787
788-- very long match (forces fold to be a pair open-close) producing with
789-- no initial capture
790assert(not pcall(m.match, m.Cf(m.P(500), print), string.rep('a', 600)))
791
792-- nested capture produces no initial value
793assert(not pcall(m.match, m.Cf(m.P(1) / {}, print), "alo"))
794
795
796-- tests for loop checker
797
798local function haveloop (p)
799 assert(not pcall(function (p) return p^0 end, m.P(p)))
800end
801
802haveloop(m.P("x")^-4)
803assert(m.match(((m.P(0) + 1) * m.S"al")^0, "alo") == 3)
804assert(m.match((("x" + #m.P(1))^-4 * m.S"al")^0, "alo") == 3)
805haveloop("")
806haveloop(m.P("x")^0)
807haveloop(m.P("x")^-1)
808haveloop(m.P("x") + 1 + 2 + m.P("a")^-1)
809haveloop(-m.P("ab"))
810haveloop(- -m.P("ab"))
811haveloop(# #(m.P("ab") + "xy"))
812haveloop(- #m.P("ab")^0)
813haveloop(# -m.P("ab")^1)
814haveloop(#m.V(3))
815haveloop(m.V(3) + m.V(1) + m.P('a')^-1)
816haveloop({[1] = m.V(2) * m.V(3), [2] = m.V(3), [3] = m.P(0)})
817assert(m.match(m.P{[1] = m.V(2) * m.V(3), [2] = m.V(3), [3] = m.P(1)}^0, "abc")
818 == 3)
819assert(m.match(m.P""^-3, "a") == 1)
820
821local function find (p, s)
822 return m.match(basiclookfor(p), s)
823end
824
825
826local function badgrammar (g, expected)
827 local stat, msg = pcall(m.P, g)
828 assert(not stat)
829 if expected then assert(find(expected, msg)) end
830end
831
832badgrammar({[1] = m.V(1)}, "rule '1'")
833badgrammar({[1] = m.V(2)}, "rule '2'") -- invalid non-terminal
834badgrammar({[1] = m.V"x"}, "rule 'x'") -- invalid non-terminal
835badgrammar({[1] = m.V{}}, "rule '(a table)'") -- invalid non-terminal
836badgrammar({[1] = #m.P("a") * m.V(1)}, "rule '1'") -- left-recursive
837badgrammar({[1] = -m.P("a") * m.V(1)}, "rule '1'") -- left-recursive
838badgrammar({[1] = -1 * m.V(1)}, "rule '1'") -- left-recursive
839badgrammar({[1] = -1 + m.V(1)}, "rule '1'") -- left-recursive
840badgrammar({[1] = 1 * m.V(2), [2] = m.V(2)}, "rule '2'") -- left-recursive
841badgrammar({[1] = 1 * m.V(2)^0, [2] = m.P(0)}, "rule '1'") -- inf. loop
842badgrammar({ m.V(2), m.V(3)^0, m.P"" }, "rule '2'") -- inf. loop
843badgrammar({ m.V(2) * m.V(3)^0, m.V(3)^0, m.P"" }, "rule '1'") -- inf. loop
844badgrammar({"x", x = #(m.V(1) * 'a') }, "rule '1'") -- inf. loop
845badgrammar({ -(m.V(1) * 'a') }, "rule '1'") -- inf. loop
846badgrammar({"x", x = m.P'a'^-1 * m.V"x"}, "rule 'x'") -- left recursive
847badgrammar({"x", x = m.P'a' * m.V"y"^1, y = #m.P(1)}, "rule 'x'")
848
849assert(m.match({'a' * -m.V(1)}, "aaa") == 2)
850assert(m.match({'a' * -m.V(1)}, "aaaa") == nil)
851
852
853-- good x bad grammars
854m.P{ ('a' * m.V(1))^-1 }
855m.P{ -('a' * m.V(1)) }
856m.P{ ('abc' * m.V(1))^-1 }
857m.P{ -('abc' * m.V(1)) }
858badgrammar{ #m.P('abc') * m.V(1) }
859badgrammar{ -('a' + m.V(1)) }
860m.P{ #('a' * m.V(1)) }
861badgrammar{ #('a' + m.V(1)) }
862m.P{ m.B{ m.P'abc' } * 'a' * m.V(1) }
863badgrammar{ m.B{ m.P'abc' } * m.V(1) }
864badgrammar{ ('a' + m.P'bcd')^-1 * m.V(1) }
865
866
867-- simple tests for maximum sizes:
868local p = m.P"a"
869for i=1,14 do p = p * p end
870
871p = {}
872for i=1,100 do p[i] = m.P"a" end
873p = m.P(p)
874
875
876-- strange values for rule labels
877
878p = m.P{ "print",
879 print = m.V(print),
880 [print] = m.V(_G),
881 [_G] = m.P"a",
882 }
883
884assert(p:match("a"))
885
886-- initial rule
887g = {}
888for i = 1, 10 do g["i"..i] = "a" * m.V("i"..i+1) end
889g.i11 = m.P""
890for i = 1, 10 do
891 g[1] = "i"..i
892 local p = m.P(g)
893 assert(p:match("aaaaaaaaaaa") == 11 - i + 1)
894end
895
896print"+"
897
898
899-- tests for back references
900assert(not pcall(m.match, m.Cb('x'), ''))
901assert(not pcall(m.match, m.Cg(1, 'a') * m.Cb('b'), 'a'))
902
903p = m.Cg(m.C(1) * m.C(1), "k") * m.Ct(m.Cb("k"))
904t = p:match("ab")
905checkeq(t, {"a", "b"})
906
907
908t = {}
909function foo (p) t[#t + 1] = p; return p .. "x" end
910
911p = m.Cg(m.C(2) / foo, "x") * m.Cb"x" *
912 m.Cg(m.Cb('x') / foo, "x") * m.Cb"x" *
913 m.Cg(m.Cb('x') / foo, "x") * m.Cb"x" *
914 m.Cg(m.Cb('x') / foo, "x") * m.Cb"x"
915x = {p:match'ab'}
916checkeq(x, {'abx', 'abxx', 'abxxx', 'abxxxx'})
917checkeq(t, {'ab',
918 'ab', 'abx',
919 'ab', 'abx', 'abxx',
920 'ab', 'abx', 'abxx', 'abxxx'})
921
922
923
924-- tests for match-time captures
925
926p = m.P'a' * (function (s, i) return (s:sub(i, i) == 'b') and i + 1 end)
927 + 'acd'
928
929assert(p:match('abc') == 3)
930assert(p:match('acd') == 4)
931
932local function id (s, i, ...)
933 return true, ...
934end
935
936assert(m.Cmt(m.Cs((m.Cmt(m.S'abc' / { a = 'x', c = 'y' }, id) +
937 m.R'09'^1 / string.char +
938 m.P(1))^0), id):match"acb98+68c" == "xyb\98+\68y")
939
940p = m.P{'S',
941 S = m.V'atom' * space
942 + m.Cmt(m.Ct("(" * space * (m.Cmt(m.V'S'^1, id) + m.P(true)) * ")" * space), id),
943 atom = m.Cmt(m.C(m.R("AZ", "az", "09")^1), id)
944}
945x = p:match"(a g () ((b) c) (d (e)))"
946checkeq(x, {'a', 'g', {}, {{'b'}, 'c'}, {'d', {'e'}}});
947
948x = {(m.Cmt(1, id)^0):match(string.rep('a', 500))}
949assert(#x == 500)
950
951local function id(s, i, x)
952 if x == 'a' then return i, 1, 3, 7
953 else return nil, 2, 4, 6, 8
954 end
955end
956
957p = ((m.P(id) * 1 + m.Cmt(2, id) * 1 + m.Cmt(1, id) * 1))^0
958assert(table.concat{p:match('abababab')} == string.rep('137', 4))
959
960local function ref (s, i, x)
961 return m.match(x, s, i - x:len())
962end
963
964assert(m.Cmt(m.P(1)^0, ref):match('alo') == 4)
965assert((m.P(1) * m.Cmt(m.P(1)^0, ref)):match('alo') == 4)
966assert(not (m.P(1) * m.Cmt(m.C(1)^0, ref)):match('alo'))
967
968ref = function (s,i,x) return i == tonumber(x) and i, 'xuxu' end
969
970assert(m.Cmt(1, ref):match'2')
971assert(not m.Cmt(1, ref):match'1')
972assert(m.Cmt(m.P(1)^0, ref):match'03')
973
974function ref (s, i, a, b)
975 if a == b then return i, a:upper() end
976end
977
978p = m.Cmt(m.C(m.R"az"^1) * "-" * m.C(m.R"az"^1), ref)
979p = (any - p)^0 * p * any^0 * -1
980
981assert(p:match'abbbc-bc ddaa' == 'BC')
982
983do -- match-time captures cannot be optimized away
984 local touch = 0
985 f = m.P(function () touch = touch + 1; return true end)
986
987 local function check(n) n = n or 1; assert(touch == n); touch = 0 end
988
989 assert(m.match(f * false + 'b', 'a') == nil); check()
990 assert(m.match(f * false + 'b', '') == nil); check()
991 assert(m.match( (f * 'a')^0 * 'b', 'b') == 2); check()
992 assert(m.match( (f * 'a')^0 * 'b', '') == nil); check()
993 assert(m.match( (f * 'a')^-1 * 'b', 'b') == 2); check()
994 assert(m.match( (f * 'a')^-1 * 'b', '') == nil); check()
995 assert(m.match( ('b' + f * 'a')^-1 * 'b', '') == nil); check()
996 assert(m.match( (m.P'b'^-1 * f * 'a')^-1 * 'b', '') == nil); check()
997 assert(m.match( (-m.P(1) * m.P'b'^-1 * f * 'a')^-1 * 'b', '') == nil);
998 check()
999 assert(m.match( (f * 'a' + 'b')^-1 * 'b', '') == nil); check()
1000 assert(m.match(f * 'a' + f * 'b', 'b') == 2); check(2)
1001 assert(m.match(f * 'a' + f * 'b', 'a') == 2); check(1)
1002 assert(m.match(-f * 'a' + 'b', 'b') == 2); check(1)
1003 assert(m.match(-f * 'a' + 'b', '') == nil); check(1)
1004end
1005
1006c = '[' * m.Cg(m.P'='^0, "init") * '[' *
1007 { m.Cmt(']' * m.C(m.P'='^0) * ']' * m.Cb("init"), function (_, _, s1, s2)
1008 return s1 == s2 end)
1009 + 1 * m.V(1) } / 0
1010
1011assert(c:match'[==[]]====]]]]==]===[]' == 18)
1012assert(c:match'[[]=]====]=]]]==]===[]' == 14)
1013assert(not c:match'[[]=]====]=]=]==]===[]')
1014
1015
1016-- old bug: optimization of concat with fail removed match-time capture
1017p = m.Cmt(0, function (s) p = s end) * m.P(false)
1018assert(not p:match('alo'))
1019assert(p == 'alo')
1020
1021
1022-- ensure that failed match-time captures are not kept on Lua stack
1023do
1024 local t = {__mode = "kv"}; setmetatable(t,t)
1025 local c = 0
1026
1027 local function foo (s,i)
1028 collectgarbage();
1029 assert(next(t) == "__mode" and next(t, "__mode") == nil)
1030 local x = {}
1031 t[x] = true
1032 c = c + 1
1033 return i, x
1034 end
1035
1036 local p = m.P{ m.Cmt(0, foo) * m.P(false) + m.P(1) * m.V(1) + m.P"" }
1037 p:match(string.rep('1', 10))
1038 assert(c == 11)
1039end
1040
1041p = (m.P(function () return true, "a" end) * 'a'
1042 + m.P(function (s, i) return i, "aa", 20 end) * 'b'
1043 + m.P(function (s,i) if i <= #s then return i, "aaa" end end) * 1)^0
1044
1045t = {p:match('abacc')}
1046checkeq(t, {'a', 'aa', 20, 'a', 'aaa', 'aaa'})
1047
1048
1049-------------------------------------------------------------------
1050-- Tests for 're' module
1051-------------------------------------------------------------------
1052
1053local re = require "re"
1054
1055local match, compile = re.match, re.compile
1056
1057assert(match("a", ".") == 2)
1058assert(match("a", "''") == 1)
1059assert(match("", " ! . ") == 1)
1060assert(not match("a", " ! . "))
1061assert(match("abcde", " ( . . ) * ") == 5)
1062assert(match("abbcde", " [a-c] +") == 5)
1063assert(match("0abbc1de", "'0' [a-c]+ '1'") == 7)
1064assert(match("0zz1dda", "'0' [^a-c]+ 'a'") == 8)
1065assert(match("abbc--", " [a-c] + +") == 5)
1066assert(match("abbc--", " [ac-] +") == 2)
1067assert(match("abbc--", " [-acb] + ") == 7)
1068assert(not match("abbcde", " [b-z] + "))
1069assert(match("abb\"de", '"abb"["]"de"') == 7)
1070assert(match("abceeef", "'ac' ? 'ab' * 'c' { 'e' * } / 'abceeef' ") == "eee")
1071assert(match("abceeef", "'ac'? 'ab'* 'c' { 'f'+ } / 'abceeef' ") == 8)
1072local t = {match("abceefe", "( ( & 'e' {} ) ? . ) * ")}
1073checkeq(t, {4, 5, 7})
1074local t = {match("abceefe", "((&&'e' {})? .)*")}
1075checkeq(t, {4, 5, 7})
1076local t = {match("abceefe", "( ( ! ! 'e' {} ) ? . ) *")}
1077checkeq(t, {4, 5, 7})
1078local t = {match("abceefe", "(( & ! & ! 'e' {})? .)*")}
1079checkeq(t, {4, 5, 7})
1080
1081assert(match("cccx" , "'ab'? ('ccc' / ('cde' / 'cd'*)? / 'ccc') 'x'+") == 5)
1082assert(match("cdx" , "'ab'? ('ccc' / ('cde' / 'cd'*)? / 'ccc') 'x'+") == 4)
1083assert(match("abcdcdx" , "'ab'? ('ccc' / ('cde' / 'cd'*)? / 'ccc') 'x'+") == 8)
1084
1085assert(match("abc", "a <- (. a)?") == 4)
1086b = "balanced <- '(' ([^()] / balanced)* ')'"
1087assert(match("(abc)", b))
1088assert(match("(a(b)((c) (d)))", b))
1089assert(not match("(a(b ((c) (d)))", b))
1090
1091b = compile[[ balanced <- "(" ([^()] / balanced)* ")" ]]
1092assert(b == m.P(b))
1093assert(b:match"((((a))(b)))")
1094
1095local g = [[
1096 S <- "0" B / "1" A / "" -- balanced strings
1097 A <- "0" S / "1" A A -- one more 0
1098 B <- "1" S / "0" B B -- one more 1
1099]]
1100assert(match("00011011", g) == 9)
1101
1102local g = [[
1103 S <- ("0" B / "1" A)*
1104 A <- "0" / "1" A A
1105 B <- "1" / "0" B B
1106]]
1107assert(match("00011011", g) == 9)
1108assert(match("000110110", g) == 9)
1109assert(match("011110110", g) == 3)
1110assert(match("000110010", g) == 1)
1111
1112s = "aaaaaaaaaaaaaaaaaaaaaaaa"
1113assert(match(s, "'a'^3") == 4)
1114assert(match(s, "'a'^0") == 1)
1115assert(match(s, "'a'^+3") == s:len() + 1)
1116assert(not match(s, "'a'^+30"))
1117assert(match(s, "'a'^-30") == s:len() + 1)
1118assert(match(s, "'a'^-5") == 6)
1119for i = 1, s:len() do
1120 assert(match(s, string.format("'a'^+%d", i)) >= i + 1)
1121 assert(match(s, string.format("'a'^-%d", i)) <= i + 1)
1122 assert(match(s, string.format("'a'^%d", i)) == i + 1)
1123end
1124assert(match("01234567890123456789", "[0-9]^3+") == 19)
1125
1126
1127assert(match("01234567890123456789", "({....}{...}) -> '%2%1'") == "4560123")
1128t = match("0123456789", "{| {.}* |}")
1129checkeq(t, {"0", "1", "2", "3", "4", "5", "6", "7", "8", "9"})
1130assert(match("012345", "{| (..) -> '%0%0' |}")[1] == "0101")
1131
1132assert(match("abcdef", "( {.} {.} {.} {.} {.} ) -> 3") == "c")
1133assert(match("abcdef", "( {:x: . :} {.} {.} {.} {.} ) -> 3") == "d")
1134assert(match("abcdef", "( {:x: . :} {.} {.} {.} {.} ) -> 0") == 6)
1135
1136assert(not match("abcdef", "{:x: ({.} {.} {.}) -> 2 :} =x"))
1137assert(match("abcbef", "{:x: ({.} {.} {.}) -> 2 :} =x"))
1138
1139eqcharset(compile"[]]", "]")
1140eqcharset(compile"[][]", m.S"[]")
1141eqcharset(compile"[]-]", m.S"-]")
1142eqcharset(compile"[-]", m.S"-")
1143eqcharset(compile"[az-]", m.S"a-z")
1144eqcharset(compile"[-az]", m.S"a-z")
1145eqcharset(compile"[a-z]", m.R"az")
1146eqcharset(compile"[]['\"]", m.S[[]['"]])
1147
1148eqcharset(compile"[^]]", any - "]")
1149eqcharset(compile"[^][]", any - m.S"[]")
1150eqcharset(compile"[^]-]", any - m.S"-]")
1151eqcharset(compile"[^]-]", any - m.S"-]")
1152eqcharset(compile"[^-]", any - m.S"-")
1153eqcharset(compile"[^az-]", any - m.S"a-z")
1154eqcharset(compile"[^-az]", any - m.S"a-z")
1155eqcharset(compile"[^a-z]", any - m.R"az")
1156eqcharset(compile"[^]['\"]", any - m.S[[]['"]])
1157
1158-- tests for comments in 're'
1159e = compile[[
1160A <- _B -- \t \n %nl .<> <- -> --
1161_B <- 'x' --]]
1162assert(e:match'xy' == 2)
1163
1164-- tests for 're' with pre-definitions
1165defs = {digits = m.R"09", letters = m.R"az", _=m.P"__"}
1166e = compile("%letters (%letters / %digits)*", defs)
1167assert(e:match"x123" == 5)
1168e = compile("%_", defs)
1169assert(e:match"__" == 3)
1170
1171e = compile([[
1172 S <- A+
1173 A <- %letters+ B
1174 B <- %digits+
1175]], defs)
1176
1177e = compile("{[0-9]+'.'?[0-9]*} -> sin", math)
1178assert(e:match("2.34") == math.sin(2.34))
1179
1180
1181function eq (_, _, a, b) return a == b end
1182
1183c = re.compile([[
1184 longstring <- '[' {:init: '='* :} '[' close
1185 close <- ']' =init ']' / . close
1186]])
1187
1188assert(c:match'[==[]]===]]]]==]===[]' == 17)
1189assert(c:match'[[]=]====]=]]]==]===[]' == 14)
1190assert(not c:match'[[]=]====]=]=]==]===[]')
1191
1192c = re.compile" '[' {:init: '='* :} '[' (!(']' =init ']') .)* ']' =init ']' !. "
1193
1194assert(c:match'[==[]]===]]]]==]')
1195assert(c:match'[[]=]====]=][]==]===[]]')
1196assert(not c:match'[[]=]====]=]=]==]===[]')
1197
1198assert(re.find("hi alalo", "{:x:..:} =x") == 4)
1199assert(re.find("hi alalo", "{:x:..:} =x", 4) == 4)
1200assert(not re.find("hi alalo", "{:x:..:} =x", 5))
1201assert(re.find("hi alalo", "{'al'}", 5) == 6)
1202assert(re.find("hi aloalolo", "{:x:..:} =x") == 8)
1203assert(re.find("alo alohi x x", "{:word:%w+:}%W*(=word)!%w") == 11)
1204
1205-- re.find discards any captures
1206local a,b,c = re.find("alo", "{.}{'o'}")
1207assert(a == 2 and b == 3 and c == nil)
1208
1209local function match (s,p)
1210 local i,e = re.find(s,p)
1211 if i then return s:sub(i, e) end
1212end
1213assert(match("alo alo", '[a-z]+') == "alo")
1214assert(match("alo alo", '{:x: [a-z]+ :} =x') == nil)
1215assert(match("alo alo", "{:x: [a-z]+ :} ' ' =x") == "alo alo")
1216
1217assert(re.gsub("alo alo", "[abc]", "x") == "xlo xlo")
1218assert(re.gsub("alo alo", "%w+", ".") == ". .")
1219assert(re.gsub("hi, how are you", "[aeiou]", string.upper) ==
1220 "hI, hOw ArE yOU")
1221
1222s = 'hi [[a comment[=]=] ending here]] and [=[another]]=]]'
1223c = re.compile" '[' {:i: '='* :} '[' (!(']' =i ']') .)* ']' { =i } ']' "
1224assert(re.gsub(s, c, "%2") == 'hi and =]')
1225assert(re.gsub(s, c, "%0") == s)
1226assert(re.gsub('[=[hi]=]', c, "%2") == '=')
1227
1228assert(re.find("", "!.") == 1)
1229assert(re.find("alo", "!.") == 4)
1230
1231function addtag (s, i, t, tag) t.tag = tag; return i, t end
1232
1233c = re.compile([[
1234 doc <- block !.
1235 block <- (start {| (block / { [^<]+ })* |} end?) => addtag
1236 start <- '<' {:tag: [a-z]+ :} '>'
1237 end <- '</' { =tag } '>'
1238]], {addtag = addtag})
1239
1240x = c:match[[
1241<x>hi<b>hello</b>but<b>totheend</x>]]
1242checkeq(x, {tag='x', 'hi', {tag = 'b', 'hello'}, 'but',
1243 {'totheend'}})
1244
1245
1246-- tests for look-ahead captures
1247x = {re.match("alo", "&(&{.}) !{'b'} {&(...)} &{..} {...} {!.}")}
1248checkeq(x, {"", "alo", ""})
1249
1250assert(re.match("aloalo",
1251 "{~ (((&'al' {.}) -> 'A%1' / (&%l {.}) -> '%1%1') / .)* ~}")
1252 == "AallooAalloo")
1253
1254-- bug in 0.9 (and older versions), due to captures in look-aheads
1255x = re.compile[[ {~ (&(. ([a-z]* -> '*')) ([a-z]+ -> '+') ' '*)* ~} ]]
1256assert(x:match"alo alo" == "+ +")
1257
1258-- valid capture in look-ahead (used inside the look-ahead itself)
1259x = re.compile[[
1260 S <- &({:two: .. :} . =two) {[a-z]+} / . S
1261]]
1262assert(x:match("hello aloaLo aloalo xuxu") == "aloalo")
1263
1264
1265p = re.compile[[
1266 block <- {| {:ident:space*:} line
1267 ((=ident !space line) / &(=ident space) block)* |}
1268 line <- {[^%nl]*} %nl
1269 space <- '_' -- should be ' ', but '_' is simpler for editors
1270]]
1271
1272t= p:match[[
12731
1274__1.1
1275__1.2
1276____1.2.1
1277____
12782
1279__2.1
1280]]
1281checkeq(t, {"1", {"1.1", "1.2", {"1.2.1", "", ident = "____"}, ident = "__"},
1282 "2", {"2.1", ident = "__"}, ident = ""})
1283
1284
1285-- nested grammars
1286p = re.compile[[
1287 s <- a b !.
1288 b <- ( x <- ('b' x)? )
1289 a <- ( x <- 'a' x? )
1290]]
1291
1292assert(p:match'aaabbb')
1293assert(p:match'aaa')
1294assert(not p:match'bbb')
1295assert(not p:match'aaabbba')
1296
1297-- testing groups
1298t = {re.match("abc", "{:S <- {:.:} {S} / '':}")}
1299checkeq(t, {"a", "bc", "b", "c", "c", ""})
1300
1301t = re.match("1234", "{| {:a:.:} {:b:.:} {:c:.{.}:} |}")
1302checkeq(t, {a="1", b="2", c="4"})
1303t = re.match("1234", "{|{:a:.:} {:b:{.}{.}:} {:c:{.}:}|}")
1304checkeq(t, {a="1", b="2", c="4"})
1305t = re.match("12345", "{| {:.:} {:b:{.}{.}:} {:{.}{.}:} |}")
1306checkeq(t, {"1", b="2", "4", "5"})
1307t = re.match("12345", "{| {:.:} {:{:b:{.}{.}:}:} {:{.}{.}:} |}")
1308checkeq(t, {"1", "23", "4", "5"})
1309t = re.match("12345", "{| {:.:} {{:b:{.}{.}:}} {:{.}{.}:} |}")
1310checkeq(t, {"1", "23", "4", "5"})
1311
1312
1313-- testing pre-defined names
1314assert(os.setlocale("C") == "C")
1315
1316function eqlpeggsub (p1, p2)
1317 local s1 = cs2str(re.compile(p1))
1318 local s2 = string.gsub(allchar, "[^" .. p2 .. "]", "")
1319 -- if s1 ~= s2 then print(#s1,#s2) end
1320 assert(s1 == s2)
1321end
1322
1323
1324eqlpeggsub("%w", "%w")
1325eqlpeggsub("%a", "%a")
1326eqlpeggsub("%l", "%l")
1327eqlpeggsub("%u", "%u")
1328eqlpeggsub("%p", "%p")
1329eqlpeggsub("%d", "%d")
1330eqlpeggsub("%x", "%x")
1331eqlpeggsub("%s", "%s")
1332eqlpeggsub("%c", "%c")
1333
1334eqlpeggsub("%W", "%W")
1335eqlpeggsub("%A", "%A")
1336eqlpeggsub("%L", "%L")
1337eqlpeggsub("%U", "%U")
1338eqlpeggsub("%P", "%P")
1339eqlpeggsub("%D", "%D")
1340eqlpeggsub("%X", "%X")
1341eqlpeggsub("%S", "%S")
1342eqlpeggsub("%C", "%C")
1343
1344eqlpeggsub("[%w]", "%w")
1345eqlpeggsub("[_%w]", "_%w")
1346eqlpeggsub("[^%w]", "%W")
1347eqlpeggsub("[%W%S]", "%W%S")
1348
1349re.updatelocale()
1350
1351-- testing nested substitutions x string captures
1352
1353p = re.compile[[
1354 text <- {~ item* ~}
1355 item <- macro / [^()] / '(' item* ')'
1356 arg <- ' '* {~ (!',' item)* ~}
1357 args <- '(' arg (',' arg)* ')'
1358 macro <- ('apply' args) -> '%1(%2)'
1359 / ('add' args) -> '%1 + %2'
1360 / ('mul' args) -> '%1 * %2'
1361]]
1362
1363assert(p:match"add(mul(a,b), apply(f,x))" == "a * b + f(x)")
1364
1365rev = re.compile[[ R <- (!.) -> '' / ({.} R) -> '%2%1']]
1366
1367assert(rev:match"0123456789" == "9876543210")
1368
1369
1370-- testing error messages in re
1371
1372local function errmsg (p, err)
1373 local s, msg = pcall(re.compile, p)
1374 assert(not s and string.find(msg, err))
1375end
1376
1377errmsg('aaaa', "rule 'aaaa'")
1378errmsg('a', 'outside')
1379errmsg('b <- a', 'undefined')
1380errmsg("x <- 'a' x <- 'b'", 'already defined')
1381errmsg("'a' -", "near '-'")
1382
1383
1384print"OK"
1385
1386
diff --git a/testlabel.lua b/testlabel.lua
new file mode 100644
index 0000000..100c0d0
--- /dev/null
+++ b/testlabel.lua
@@ -0,0 +1,422 @@
1local m = require 'lpeglabel'
2
3local p = m.T(1, 2, 5)
4assert(p:match("abc") == nil)
5
6-- throws a label that is not caught by ordinary choice
7p = m.T(1) + m.P"a"
8local r = p:match("abc")
9assert(r == nil)
10
11-- again throws a label that is not caught by ordinary choice
12local g = m.P{
13 "S",
14 S = m.V"A" + m.V"B",
15 A = m.T(1),
16 B = m.P"a"
17}
18r = g:match("abc")
19assert(r == nil)
20
21-- throws a label that is not caught by labeled choice
22p = m.Lc(m.T(2), m.P"a", 1, 3)
23r = p:match("abc")
24assert(r == nil)
25
26-- modifies previous pattern
27-- adds another labeled choice to catch label "2"
28p = m.Lc(p, m.P"a", 2)
29r = p:match("abc")
30assert(r == 2)
31
32-- throws a label that is caught by labeled choice
33p = m.Lc(m.T(25), m.P"a", 25)
34r = p:match("abc")
35assert(r == 2)
36assert(p:match("bola") == nil)
37
38-- labeled choice did not catch "fail" by default
39p = m.Lc(m.P"b", m.P"a", 1)
40r = p:match("abc")
41assert(r == nil, r)
42assert(p:match("bola") == 2)
43
44-- "fail" is label "0"
45-- labeled choice can catch "fail"
46p = m.Lc(m.P"b", m.P"a", 0)
47r = p:match("abc")
48assert(r == 2, r)
49assert(p:match("bola") == 2)
50
51-- "fail" is label "0"
52-- labeled choice can catch "fail" or "3"
53p = m.Lc(m.P"a" * m.T(3), (m.P"a" + m.P"b"), 0, 3)
54assert(p:match("abc") == 2)
55assert(p:match("bac") == 2)
56assert(p:match("cab") == nil)
57
58--[[
59S -> A /{1} 'a'
60A -> B
61B -> %1
62]]
63g = m.P{
64 "S",
65 S = m.Lc(m.V"A", m.P"a", 1),
66 A = m.V"B",
67 B = m.T(1),
68}
69assert(g:match("ab") == 2)
70assert(g:match("bc") == nil)
71
72
73--[[
74S -> A
75A -> (B (';' / %{1}))*
76B -> 'a'
77]]
78g = m.P{
79 "S",
80 S = m.V"A",
81 A = m.P(m.V"B" * (";" + m.T(1)))^0,
82 B = m.P'a',
83}
84assert(g:match("a;a;") == 5)
85assert(g:match("a;a") == nil)
86
87
88
89-- %1 /{1,3} %2 /{2} 'a'
90p = m.Lc(m.Lc(m.T(1), m.T(2), 1, 3), m.P"a", 2)
91r = p:match("abc")
92assert(r == 2)
93assert(p:match("") == nil)
94
95p = m.Lc(m.T(1), m.Lc(m.T(2), m.P"a", 2), 1, 3)
96r = p:match("abc")
97assert(r == 2)
98assert(p:match("") == nil)
99
100print("+")
101
102--[[ grammar based on Figure 8 of paper submitted to SCP
103S -> S0 /{1} ID /{2} ID '=' Exp /{3} 'unsigned'* 'int' ID /{4} 'unsigned'* ID ID / %error
104S0 -> ID S1 / 'unsigned' S2 / 'int' %3
105S1 -> '=' %2 / !. %1 / ID %4
106S2 -> 'unsigned' S2 / ID %4 / 'int' %3
107]]
108
109local sp = m.S" \t\n"^0
110local eq = sp * m.P"="
111
112g = m.P{
113 "S",
114 S = m.Lc(
115 m.Lc(
116 m.Lc(
117 m.Lc(m.V"S0", m.V"ID" * (m.P(1) + ""), 1),
118 m.V"ID" * eq * m.V"Exp", 2
119 ),
120 m.V"U"^0 * m.V"I" * m.V"ID", 3
121 ),
122 m.V"U"^0 * m.V"ID" * m.V"ID", 4)
123 + m.T(5),
124 S0 = m.V"ID" * m.V"S1" + m.V"U" * m.V"S2" + m.V"I" * m.T(3),
125 S1 = eq * m.T(2) + sp * -m.P(1) * m.T(1) + m.V"ID" * m.T(4),
126 S2 = m.V"U" * m.V"S2" + m.V"ID" * m.T(4) + m.V"I" * m.T(3),
127 ID = sp * m.P"a",
128 U = sp * m.P"unsigned",
129 I = sp * m.P"int",
130 Exp = sp * m.P"E",
131}
132--g:pcode()
133
134local s = "a"
135assert(g:match(s) == #s + 1) --1
136s = "a = E"
137assert(g:match(s) == #s + 1) --2
138s = "int a"
139assert(g:match(s) == #s + 1) --3
140s = "unsigned int a"
141assert(g:match(s) == #s + 1) --3
142s = "unsigned a a"
143assert(g:match(s) == #s + 1) --4
144s = "b"
145assert(g:match(s) == nil)
146s = "unsigned"
147assert(g:match(s) == nil)
148s = "unsigned a"
149assert(g:match(s) == nil)
150s = "unsigned int"
151assert(g:match(s) == nil)
152
153
154print("+")
155
156local re = require 're'
157
158g = re.compile[['a' /{4,9} [a-z]
159]]
160assert(g:match("a") == 2)
161assert(g:match("b") == nil)
162
163g = re.compile[['a' /{4,9} [a-f] /{5, 7} [a-z]
164]]
165assert(g:match("a") == 2)
166assert(g:match("b") == nil)
167
168g = re.compile[[%{1} /{4,9} [a-z]
169]]
170assert(g:match("a") == nil)
171
172g = re.compile[[%{1} /{4,1} [a-f]
173]]
174assert(g:match("a") == 2)
175assert(g:match("h") == nil)
176
177g = re.compile[[[a-f]%{15, 9} /{4,9} [a-c]%{7} /{5, 7} [a-z] ]]
178assert(g:match("a") == 2)
179assert(g:match("c") == 2)
180assert(g:match("d") == nil)
181assert(g:match("g") == nil)
182
183--[[ grammar based on Figure 8 of paper submitted to SCP
184S -> S0 /{1} ID /{2} ID '=' Exp /{3} 'unsigned'* 'int' ID /{4} 'unsigned'* ID ID / %error
185S0 -> ID S1 / 'unsigned' S2 / 'int' %3
186S1 -> '=' %2 / !. %1 / ID %4
187S2 -> 'unsigned' S2 / ID %4 / 'int' %3
188]]
189
190
191g = re.compile([[
192 S <- S0 /{1} ID /{2} ID %s* '=' Exp /{3} U* Int ID /{4} U ID ID /{0} %{5}
193 S0 <- ID S1 / U S2 / Int %{3}
194 S1 <- %s* '=' %{2} / !. %{1} / ID %{4}
195 S2 <- U S2 / ID %{4} / Int %{3}
196 ID <- %s* 'a'
197 U <- %s* 'unsigned'
198 Int <- %s* 'int'
199 Exp <- %s* 'E'
200]])
201
202local s = "a"
203assert(g:match(s) == #s + 1) --1
204s = "a = E"
205assert(g:match(s) == #s + 1) --2
206s = "int a"
207assert(g:match(s) == #s + 1) --3
208s = "unsigned int a"
209assert(g:match(s) == #s + 1) --3
210s = "unsigned a a"
211assert(g:match(s) == #s + 1) --4
212s = "b"
213assert(g:match(s) == nil)
214s = "unsigned"
215assert(g:match(s) == nil)
216s = "unsigned a"
217assert(g:match(s) == nil)
218s = "unsigned int"
219assert(g:match(s) == nil)
220
221local terror = { ['cmdSeq'] = "Missing ';' in CmdSeq",
222 ['ifExp'] = "Error in expresion of 'if'",
223 ['ifThen'] = "Error matching 'then' keyword",
224 ['ifThenCmdSeq'] = "Error matching CmdSeq of 'then' branch",
225 ['ifElseCmdSeq'] = "Error matching CmdSeq of 'else' branch",
226 ['ifEnd'] = "Error matching 'end' keyword of 'if'",
227 ['repeatCmdSeq'] = "Error matching CmdSeq of 'repeat'",
228 ['repeatUntil'] = "Error matching 'until' keyword",
229 ['repeatExp'] = "Error matching expression of 'until'",
230 ['assignOp'] = "Error matching ':='",
231 ['assignExp'] = "Error matching expression of assignment",
232 ['readName'] = "Error matching 'NAME' after 'read'",
233 ['writeExp'] = "Error matching expression after 'write'",
234 ['simpleExp'] = "Error matching 'SimpleExp'",
235 ['term'] = "Error matching 'Term'",
236 ['factor'] = "Error matching 'Factor'",
237 ['openParExp'] = "Error matching expression after '('",
238 ['closePar'] = "Error matching ')'",
239 ['undefined'] = "Error undefined'"}
240
241g = re.compile([[
242 Tiny <- CmdSeq /{1} '' -> cmdSeq /{2} '' -> ifExp /{3} '' -> ifThen /{4} '' -> ifThenCmdSeq
243 /{5} '' -> ifElseCmdSeq /{6} '' -> ifEnd /{7} '' -> repeatCmdSeq
244 /{8} '' -> repeatUntil /{9} '' -> repeatExp /{10} '' -> assignOp
245 /{11} '' -> assignExp /{12} '' -> readName /{13} '' -> writeExp
246 /{14} '' -> simpleExp /{15} '' -> term /{16} '' -> factor
247 /{17} '' -> openParExp /{18} '' -> closePar /{0} '' -> undefined
248 CmdSeq <- (Cmd (SEMICOLON / %{1})) (Cmd (SEMICOLON / %{1}))*
249 Cmd <- IfCmd / RepeatCmd / ReadCmd / WriteCmd / AssignCmd
250 IfCmd <- IF (Exp / %{2}) (THEN / %{3}) (CmdSeq / %{4}) (ELSE (CmdSeq / %{5}) / '') (END / %{6})
251 RepeatCmd <- REPEAT (CmdSeq / %{7}) (UNTIL / %{8}) (Exp / %{9})
252 AssignCmd <- !RESERVED NAME (ASSIGNMENT / %{10}) (Exp / %{11})
253 ReadCmd <- READ (NAME / %{12})
254 WriteCmd <- WRITE (Exp / %{13})
255 Exp <- SimpleExp ((LESS / EQUAL) (SimpleExp / %{14}) / '')
256 SimpleExp <- Term ((ADD / SUB) (Term / %{15}))*
257 Term <- Factor ((MUL / DIV) (Factor / %{16}))*
258 Factor <- OPENPAR (Exp / %{17}) (CLOSEPAR / %{18}) / NUMBER / NAME
259 ADD <- Sp '+'
260 ASSIGNMENT <- Sp ':='
261 CLOSEPAR <- Sp ')'
262 DIV <- Sp '/'
263 IF <- Sp 'if'
264 ELSE <- Sp 'else'
265 END <- Sp 'end'
266 EQUAL <- Sp '='
267 LESS <- Sp '<'
268 MUL <- Sp '*'
269 NAME <- Sp [a-z]+
270 NUMBER <- Sp [0-9]+
271 OPENPAR <- Sp '('
272 READ <- Sp 'read'
273 REPEAT <- Sp 'repeat'
274 SEMICOLON <- Sp ';'
275 SUB <- Sp '-'
276 THEN <- Sp 'then'
277 UNTIL <- Sp 'until'
278 WRITE <- Sp 'write'
279 RESERVED <- IF / ELSE / END / READ / REPEAT / THEN / UNTIL / WRITE
280 Sp <- (%s / %nl)*
281]], terror)
282
283s = [[
284n := 5;]]
285assert(g:match(s) == #s + 1)
286
287s = [[
288n := 5;
289f := 1;
290repeat
291 f := f * n;
292 n := n - 1;
293until (n < 1);
294write f;]]
295assert(g:match(s) == #s + 1)
296
297-- a ';' is missing in 'read a'
298s = [[
299read a]]
300assert(g:match(s) == terror['cmdSeq'])
301
302
303-- a ';' is missing in 'n := n - 1'
304s = [[
305n := 5;
306f := 1;
307repeat
308 f := f * n;
309 n := n - 1
310until (n < 1);
311write f;]]
312assert(g:match(s) == terror['cmdSeq'])
313
314
315-- IF expression
316s = [[
317if a then a := a + 1; end;]]
318assert(g:match(s) == #s + 1)
319
320-- IF expression
321s = [[
322if a then a := a + 1; else write 2; end;]]
323assert(g:match(s) == #s + 1)
324
325-- Error in expression of 'if'. 'A' is not a valida name
326s = [[
327if A then a := a + 1; else write 2; end;]]
328assert(g:match(s) == terror['ifExp'])
329
330-- Error matching the 'then' keyword
331s = [[
332if a a := a + 1; else write 2; end;]]
333assert(g:match(s) == terror['ifThen'])
334
335-- Error matching the CmdSeq inside of 'then' branch
336s = [[
337if a then 3 := 2; else write 2; end;]]
338assert(g:match(s) == terror['ifThenCmdSeq'])
339
340-- Error matching the CmdSeq inside of 'else' branch
341s = [[
342if a then b := 2; else A := 2; end;]]
343assert(g:match(s) == terror['ifElseCmdSeq'])
344
345-- Error matching 'end' of 'if'
346s = [[
347if a then b := 2; else a := 2; 77;]]
348assert(g:match(s) == terror['ifEnd'])
349
350-- Error matching the CmdSeq of 'repeat'
351s = [[repeat
352 F := f * n;
353 n := n - 1;
354until (n < 1);]]
355assert(g:match(s) == terror['repeatCmdSeq'])
356
357-- Error matching 'until'
358s = [[repeat
359 f := f * n;
360 n := n - 1;
36188 (n < 1);]]
362assert(g:match(s) == terror['repeatUntil'])
363
364-- Error matching expression of 'until'
365s = [[repeat
366 f := f * n;
367 n := n - 1;
368until ; (n < 1);]]
369assert(g:match(s) == terror['repeatExp'])
370
371-- Error matching ':='
372s = [[
373f = f * n;]]
374assert(g:match(s) == terror['assignOp'])
375
376-- Error matching expression of assignment
377s = [[
378f := A * n;]]
379assert(g:match(s) == terror['assignExp'])
380
381-- Error matching 'name'
382s = [[
383read 2;]]
384assert(g:match(s) == terror['readName'])
385
386-- Error matching expression after 'write'
387s = [[
388write [a] := 2;]]
389assert(g:match(s) == terror['writeExp'])
390
391-- Error matching 'SimpleExp'
392s = [[
393a := a < A;]]
394assert(g:match(s) == terror['simpleExp'])
395
396-- Error matching 'Term'
397s = [[
398a := a + A;]]
399assert(g:match(s) == terror['term'])
400
401-- Error matching 'Factor'
402s = [[
403a := a * A;]]
404assert(g:match(s) == terror['factor'])
405
406-- Error matching expression after '('
407s = [[
408a := (A);]]
409assert(g:match(s) == terror['openParExp'])
410
411-- Error matching ')'
412s = [[
413a := (a];]]
414assert(g:match(s) == terror['closePar'])
415
416-- Error undefined
417s = [[
418A := a;]]
419assert(g:match(s) == terror['undefined'])
420
421
422print("OK")