aboutsummaryrefslogtreecommitdiff
path: root/lpcode.c
diff options
context:
space:
mode:
Diffstat (limited to 'lpcode.c')
-rw-r--r--lpcode.c1016
1 files changed, 1016 insertions, 0 deletions
diff --git a/lpcode.c b/lpcode.c
new file mode 100644
index 0000000..4a49e7d
--- /dev/null
+++ b/lpcode.c
@@ -0,0 +1,1016 @@
1/*
2** $Id: lpcode.c,v 1.18 2013/04/12 16:30:33 roberto Exp $
3** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license)
4*/
5
6#include <limits.h>
7
8
9#include "lua.h"
10#include "lauxlib.h"
11
12#include "lptypes.h"
13#include "lpcode.h"
14
15
16/* signals a "no-instruction */
17#define NOINST -1
18
19
20
21static const Charset fullset_ =
22 {{0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
23 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
24 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
25 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF}};
26
27static const Charset *fullset = &fullset_;
28
29/*
30** {======================================================
31** Analysis and some optimizations
32** =======================================================
33*/
34
35/*
36** Check whether a charset is empty (IFail), singleton (IChar),
37** full (IAny), or none of those (ISet).
38*/
39static Opcode charsettype (const byte *cs, int *c) {
40 int count = 0;
41 int i;
42 int candidate = -1; /* candidate position for a char */
43 for (i = 0; i < CHARSETSIZE; i++) {
44 int b = cs[i];
45 if (b == 0) {
46 if (count > 1) return ISet; /* else set is still empty */
47 }
48 else if (b == 0xFF) {
49 if (count < (i * BITSPERCHAR))
50 return ISet;
51 else count += BITSPERCHAR; /* set is still full */
52 }
53 else if ((b & (b - 1)) == 0) { /* byte has only one bit? */
54 if (count > 0)
55 return ISet; /* set is neither full nor empty */
56 else { /* set has only one char till now; track it */
57 count++;
58 candidate = i;
59 }
60 }
61 else return ISet; /* byte is neither empty, full, nor singleton */
62 }
63 switch (count) {
64 case 0: return IFail; /* empty set */
65 case 1: { /* singleton; find character bit inside byte */
66 int b = cs[candidate];
67 *c = candidate * BITSPERCHAR;
68 if ((b & 0xF0) != 0) { *c += 4; b >>= 4; }
69 if ((b & 0x0C) != 0) { *c += 2; b >>= 2; }
70 if ((b & 0x02) != 0) { *c += 1; }
71 return IChar;
72 }
73 default: {
74 assert(count == CHARSETSIZE * BITSPERCHAR); /* full set */
75 return IAny;
76 }
77 }
78}
79
80/*
81** A few basic operations on Charsets
82*/
83static void cs_complement (Charset *cs) {
84 loopset(i, cs->cs[i] = ~cs->cs[i]);
85}
86
87
88static int cs_equal (const byte *cs1, const byte *cs2) {
89 loopset(i, if (cs1[i] != cs2[i]) return 0);
90 return 1;
91}
92
93
94/*
95** computes whether sets cs1 and cs2 are disjoint
96*/
97static int cs_disjoint (const Charset *cs1, const Charset *cs2) {
98 loopset(i, if ((cs1->cs[i] & cs2->cs[i]) != 0) return 0;)
99 return 1;
100}
101
102
103/*
104** Convert a 'char' pattern (TSet, TChar, TAny) to a charset
105*/
106int tocharset (TTree *tree, Charset *cs) {
107 switch (tree->tag) {
108 case TSet: { /* copy set */
109 loopset(i, cs->cs[i] = treebuffer(tree)[i]);
110 return 1;
111 }
112 case TChar: { /* only one char */
113 assert(0 <= tree->u.n && tree->u.n <= UCHAR_MAX);
114 loopset(i, cs->cs[i] = 0); /* erase all chars */
115 setchar(cs->cs, tree->u.n); /* add that one */
116 return 1;
117 }
118 case TAny: {
119 loopset(i, cs->cs[i] = 0xFF); /* add all to the set */
120 return 1;
121 }
122 default: return 0;
123 }
124}
125
126
127/*
128** Checks whether a pattern has captures
129*/
130int hascaptures (TTree *tree) {
131 tailcall:
132 switch (tree->tag) {
133 case TCapture: case TRunTime:
134 return 1;
135 default: {
136 switch (numsiblings[tree->tag]) {
137 case 1: /* return hascaptures(sib1(tree)); */
138 tree = sib1(tree); goto tailcall;
139 case 2:
140 if (hascaptures(sib1(tree))) return 1;
141 /* else return hascaptures(sib2(tree)); */
142 tree = sib2(tree); goto tailcall;
143 default: assert(numsiblings[tree->tag] == 0); return 0;
144 }
145 }
146 }
147}
148
149
150/*
151** Checks how a pattern behaves regarding the empty string,
152** in one of two different ways:
153** A pattern is *nullable* if it can match without consuming any character;
154** A pattern is *nofail* if it never fails for any string
155** (including the empty string).
156** The difference is only for predicates and run-time captures;
157** for other patterns, the two properties are equivalent.
158** (With predicates, &'a' is nullable but not nofail. Of course,
159** nofail => nullable.)
160** These functions are all convervative in the following way:
161** p is nullable => nullable(p)
162** nofail(p) => p cannot fail
163** The function assumes that TOpenCall is not nullable;
164** this will be checked again when the grammar is fixed.)
165** Run-time captures can do whatever they want, so the result
166** is conservative.
167*/
168int checkaux (TTree *tree, int pred) {
169 tailcall:
170 switch (tree->tag) {
171 case TChar: case TSet: case TAny:
172 case TFalse: case TOpenCall: case TThrow: /* labeled failure */
173 return 0; /* not nullable */
174 case TRep: case TTrue:
175 return 1; /* no fail */
176 case TNot: case TBehind: /* can match empty, but can fail */
177 if (pred == PEnofail) return 0;
178 else return 1; /* PEnullable */
179 case TAnd: /* can match empty; fail iff body does */
180 if (pred == PEnullable) return 1;
181 /* else return checkaux(sib1(tree), pred); */
182 tree = sib1(tree); goto tailcall;
183 case TRunTime: /* can fail; match empty iff body does */
184 if (pred == PEnofail) return 0;
185 /* else return checkaux(sib1(tree), pred); */
186 tree = sib1(tree); goto tailcall;
187 case TSeq:
188 if (!checkaux(sib1(tree), pred)) return 0;
189 /* else return checkaux(sib2(tree), pred); */
190 tree = sib2(tree); goto tailcall;
191 case TChoice: case TLabChoice: /* labeled failure */
192 if (checkaux(sib2(tree), pred)) return 1;
193 /* else return checkaux(sib1(tree), pred); */
194 tree = sib1(tree); goto tailcall;
195 case TCapture: case TGrammar: case TRule:
196 /* return checkaux(sib1(tree), pred); */
197 tree = sib1(tree); goto tailcall;
198 case TCall: /* return checkaux(sib2(tree), pred); */
199 tree = sib2(tree); goto tailcall;
200 default: assert(0); return 0;
201 };
202}
203
204
205/*
206** number of characters to match a pattern (or -1 if variable)
207** ('count' avoids infinite loops for grammars)
208*/
209int fixedlenx (TTree *tree, int count, int len) {
210 tailcall:
211 switch (tree->tag) {
212 case TChar: case TSet: case TAny:
213 return len + 1;
214 case TFalse: case TTrue: case TNot: case TAnd: case TBehind:
215 case TThrow: /* labeled failure */
216 return len;
217 case TRep: case TRunTime: case TOpenCall:
218 return -1;
219 case TCapture: case TRule: case TGrammar:
220 /* return fixedlenx(sib1(tree), count); */
221 tree = sib1(tree); goto tailcall;
222 case TCall:
223 if (count++ >= MAXRULES)
224 return -1; /* may be a loop */
225 /* else return fixedlenx(sib2(tree), count); */
226 tree = sib2(tree); goto tailcall;
227 case TSeq: {
228 len = fixedlenx(sib1(tree), count, len);
229 if (len < 0) return -1;
230 /* else return fixedlenx(sib2(tree), count, len); */
231 tree = sib2(tree); goto tailcall;
232 }
233 case TChoice: case TLabChoice: { /* labeled failure */
234 int n1, n2;
235 n1 = fixedlenx(sib1(tree), count, len);
236 if (n1 < 0) return -1;
237 n2 = fixedlenx(sib2(tree), count, len);
238 if (n1 == n2) return n1;
239 else return -1;
240 }
241 default: assert(0); return 0;
242 };
243}
244
245
246/*
247** Computes the 'first set' of a pattern.
248** The result is a conservative aproximation:
249** match p ax -> x' for some x ==> a in first(p).
250** The set 'follow' is the first set of what follows the
251** pattern (full set if nothing follows it).
252** The function returns 0 when this set can be used for
253** tests that avoid the pattern altogether.
254** A non-zero return can happen for two reasons:
255** 1) match p '' -> '' ==> returns 1.
256** (tests cannot be used because they always fail for an empty input)
257** 2) there is a match-time capture ==> returns 2.
258** (match-time captures should not be avoided by optimizations)
259*/
260static int getfirst (TTree *tree, const Charset *follow, Charset *firstset) {
261 tailcall:
262 switch (tree->tag) {
263 case TChar: case TSet: case TAny: {
264 tocharset(tree, firstset);
265 return 0;
266 }
267 case TTrue: {
268 loopset(i, firstset->cs[i] = follow->cs[i]);
269 return 1;
270 }
271 case TFalse: {
272 loopset(i, firstset->cs[i] = 0);
273 return 0;
274 }
275 case TThrow: { /* (?)labeled failure: should always throw the label */
276 loopset(i, firstset->cs[i] = follow->cs[i]); /* follow = fullset? */
277 return 1;
278 }
279 case TChoice: case TLabChoice: { /*(?) labeled failure */
280 Charset csaux;
281 int e1 = getfirst(sib1(tree), follow, firstset);
282 int e2 = getfirst(sib2(tree), follow, &csaux);
283 loopset(i, firstset->cs[i] |= csaux.cs[i]);
284 return e1 | e2;
285 }
286 case TSeq: {
287 if (!nullable(sib1(tree))) {
288 /* return getfirst(sib1(tree), fullset, firstset); */
289 tree = sib1(tree); follow = fullset; goto tailcall;
290 }
291 else { /* FIRST(p1 p2, fl) = FIRST(p1, FIRST(p2, fl)) */
292 Charset csaux;
293 int e2 = getfirst(sib2(tree), follow, &csaux);
294 int e1 = getfirst(sib1(tree), &csaux, firstset);
295 if (e1 == 0) return 0; /* 'e1' ensures that first can be used */
296 else if ((e1 | e2) & 2) /* one of the children has a matchtime? */
297 return 2; /* pattern has a matchtime capture */
298 else return e2; /* else depends on 'e2' */
299 }
300 }
301 case TRep: {
302 getfirst(sib1(tree), follow, firstset);
303 loopset(i, firstset->cs[i] |= follow->cs[i]);
304 return 1; /* accept the empty string */
305 }
306 case TCapture: case TGrammar: case TRule: {
307 /* return getfirst(sib1(tree), follow, firstset); */
308 tree = sib1(tree); goto tailcall;
309 }
310 case TRunTime: { /* function invalidates any follow info. */
311 int e = getfirst(sib1(tree), fullset, firstset);
312 if (e) return 2; /* function is not "protected"? */
313 else return 0; /* pattern inside capture ensures first can be used */
314 }
315 case TCall: {
316 /* return getfirst(sib2(tree), follow, firstset); */
317 tree = sib2(tree); goto tailcall;
318 }
319 case TAnd: {
320 int e = getfirst(sib1(tree), follow, firstset);
321 loopset(i, firstset->cs[i] &= follow->cs[i]);
322 return e;
323 }
324 case TNot: {
325 if (tocharset(sib1(tree), firstset)) {
326 cs_complement(firstset);
327 return 1;
328 }
329 /* else go through */
330 }
331 case TBehind: { /* instruction gives no new information */
332 /* call 'getfirst' to check for math-time captures */
333 int e = getfirst(sib1(tree), follow, firstset);
334 loopset(i, firstset->cs[i] = follow->cs[i]); /* uses follow */
335 return e | 1; /* always can accept the empty string */
336 }
337 default: assert(0); return 0;
338 }
339}
340
341
342/*
343** If it returns true, then pattern can fail only depending on the next
344** character of the subject
345*/
346static int headfail (TTree *tree) {
347 tailcall:
348 switch (tree->tag) {
349 case TChar: case TSet: case TAny: case TFalse:
350 return 1;
351 case TTrue: case TRep: case TRunTime: case TNot:
352 case TBehind:
353 case TThrow: /* labeled failure: should always throw the label */
354 return 0;
355 case TCapture: case TGrammar: case TRule: case TAnd:
356 tree = sib1(tree); goto tailcall; /* return headfail(sib1(tree)); */
357 case TCall:
358 tree = sib2(tree); goto tailcall; /* return headfail(sib2(tree)); */
359 case TSeq:
360 if (!nofail(sib2(tree))) return 0;
361 /* else return headfail(sib1(tree)); */
362 tree = sib1(tree); goto tailcall;
363 case TChoice: case TLabChoice: /* labeled failure */
364 if (!headfail(sib1(tree))) return 0;
365 /* else return headfail(sib2(tree)); */
366 tree = sib2(tree); goto tailcall;
367 default: assert(0); return 0;
368 }
369}
370
371
372/*
373** Check whether the code generation for the given tree can benefit
374** from a follow set (to avoid computing the follow set when it is
375** not needed)
376*/
377static int needfollow (TTree *tree) {
378 tailcall:
379 switch (tree->tag) {
380 case TChar: case TSet: case TAny:
381 case TFalse: case TTrue: case TAnd: case TNot:
382 case TRunTime: case TGrammar: case TCall: case TBehind:
383 case TThrow: case TLabChoice: /* (?)labeled failure */
384 return 0;
385 case TChoice: case TRep:
386 return 1;
387 case TCapture:
388 tree = sib1(tree); goto tailcall;
389 case TSeq:
390 tree = sib2(tree); goto tailcall;
391 default: assert(0); return 0;
392 }
393}
394
395/* }====================================================== */
396
397
398
399/*
400** {======================================================
401** Code generation
402** =======================================================
403*/
404
405
406/*
407** size of an instruction
408*/
409int sizei (const Instruction *i) {
410 switch((Opcode)i->i.code) {
411 case ISet: case ISpan: return CHARSETINSTSIZE;
412 case ITestSet: return CHARSETINSTSIZE + 1;
413 case ITestChar: case ITestAny: case IChoice: case IJmp:
414 case ICall: case IOpenCall: case ICommit: case IPartialCommit:
415 case IBackCommit: case IThrow: return 2; /* labeled failure */
416 case ILabChoice: return 3; /* labeled failure */
417 default: return 1;
418 }
419}
420
421
422/*
423** state for the compiler
424*/
425typedef struct CompileState {
426 Pattern *p; /* pattern being compiled */
427 int ncode; /* next position in p->code to be filled */
428 lua_State *L;
429} CompileState;
430
431
432/*
433** code generation is recursive; 'opt' indicates that the code is
434** being generated under a 'IChoice' operator jumping to its end.
435** 'tt' points to a previous test protecting this code. 'fl' is
436** the follow set of the pattern.
437*/
438static void codegen (CompileState *compst, TTree *tree, int opt, int tt,
439 const Charset *fl);
440
441
442void reallocprog (lua_State *L, Pattern *p, int nsize) {
443 void *ud;
444 lua_Alloc f = lua_getallocf(L, &ud);
445 void *newblock = f(ud, p->code, p->codesize * sizeof(Instruction),
446 nsize * sizeof(Instruction));
447 if (newblock == NULL && nsize > 0)
448 luaL_error(L, "not enough memory");
449 p->code = (Instruction *)newblock;
450 p->codesize = nsize;
451}
452
453
454static int nextinstruction (CompileState *compst) {
455 int size = compst->p->codesize;
456 if (compst->ncode >= size)
457 reallocprog(compst->L, compst->p, size * 2);
458 return compst->ncode++;
459}
460
461
462#define getinstr(cs,i) ((cs)->p->code[i])
463
464
465static int addinstruction (CompileState *compst, Opcode op, int aux) {
466 int i = nextinstruction(compst);
467 getinstr(compst, i).i.code = op;
468 getinstr(compst, i).i.aux = aux;
469 return i;
470}
471
472
473static int addoffsetinst (CompileState *compst, Opcode op) {
474 int i = addinstruction(compst, op, 0); /* instruction */
475 addinstruction(compst, (Opcode)0, 0); /* open space for offset */
476 assert(op == ITestSet || sizei(&getinstr(compst, i)) == 2);
477 return i;
478}
479
480/* labeled failure begin */
481static int addthrowinstruction (CompileState *compst, Labelset ls) {
482 int i = nextinstruction(compst);
483 getinstr(compst, i).i.code = IThrow;
484 i = nextinstruction(compst);
485 getinstr(compst, i).labels = ls;
486 return i;
487}
488
489static int addoffsetlabinst (CompileState *compst, Opcode op, Labelset ls) {
490 int j;
491 int i = addinstruction(compst, op, 0); /* instruction */
492 addinstruction(compst, (Opcode)0, 0); /* open space for offset */
493 j = nextinstruction(compst); /* open space for labels */
494 getinstr(compst, j).labels = ls;
495 assert(op == ILabChoice);
496 return i;
497}
498/* labeled failure end */
499
500
501static void setoffset (CompileState *compst, int instruction, int offset) {
502 getinstr(compst, instruction + 1).offset = offset;
503}
504
505
506/*
507** Add a capture instruction:
508** 'op' is the capture instruction; 'cap' the capture kind;
509** 'key' the key into ktable; 'aux' is optional offset
510**
511*/
512static int addinstcap (CompileState *compst, Opcode op, int cap, int key,
513 int aux) {
514 int i = addinstruction(compst, op, joinkindoff(cap, aux));
515 getinstr(compst, i).i.key = key;
516 return i;
517}
518
519
520#define gethere(compst) ((compst)->ncode)
521
522#define target(code,i) ((i) + code[i + 1].offset)
523
524
525static void jumptothere (CompileState *compst, int instruction, int target) {
526 if (instruction >= 0)
527 setoffset(compst, instruction, target - instruction);
528}
529
530
531static void jumptohere (CompileState *compst, int instruction) {
532 jumptothere(compst, instruction, gethere(compst));
533}
534
535
536/*
537** Code an IChar instruction, or IAny if there is an equivalent
538** test dominating it
539*/
540static void codechar (CompileState *compst, int c, int tt) {
541 if (tt >= 0 && getinstr(compst, tt).i.code == ITestChar &&
542 getinstr(compst, tt).i.aux == c)
543 addinstruction(compst, IAny, 0);
544 else
545 addinstruction(compst, IChar, c);
546}
547
548
549/*
550** Add a charset posfix to an instruction
551*/
552static void addcharset (CompileState *compst, const byte *cs) {
553 int p = gethere(compst);
554 int i;
555 for (i = 0; i < (int)CHARSETINSTSIZE - 1; i++)
556 nextinstruction(compst); /* space for buffer */
557 /* fill buffer with charset */
558 loopset(j, getinstr(compst, p).buff[j] = cs[j]);
559}
560
561
562/*
563** code a char set, optimizing unit sets for IChar, "complete"
564** sets for IAny, and empty sets for IFail; also use an IAny
565** when instruction is dominated by an equivalent test.
566*/
567static void codecharset (CompileState *compst, const byte *cs, int tt) {
568 int c = 0; /* (=) to avoid warnings */
569 Opcode op = charsettype(cs, &c);
570 switch (op) {
571 case IChar: codechar(compst, c, tt); break;
572 case ISet: { /* non-trivial set? */
573 if (tt >= 0 && getinstr(compst, tt).i.code == ITestSet &&
574 cs_equal(cs, getinstr(compst, tt + 2).buff))
575 addinstruction(compst, IAny, 0);
576 else {
577 addinstruction(compst, ISet, 0);
578 addcharset(compst, cs);
579 }
580 break;
581 }
582 default: addinstruction(compst, op, c); break;
583 }
584}
585
586
587/*
588** code a test set, optimizing unit sets for ITestChar, "complete"
589** sets for ITestAny, and empty sets for IJmp (always fails).
590** 'e' is true iff test should accept the empty string. (Test
591** instructions in the current VM never accept the empty string.)
592*/
593static int codetestset (CompileState *compst, Charset *cs, int e) {
594 if (e) return NOINST; /* no test */
595 else {
596 int c = 0;
597 Opcode op = charsettype(cs->cs, &c);
598 switch (op) {
599 case IFail: return addoffsetinst(compst, IJmp); /* always jump */
600 case IAny: return addoffsetinst(compst, ITestAny);
601 case IChar: {
602 int i = addoffsetinst(compst, ITestChar);
603 getinstr(compst, i).i.aux = c;
604 return i;
605 }
606 case ISet: {
607 int i = addoffsetinst(compst, ITestSet);
608 addcharset(compst, cs->cs);
609 return i;
610 }
611 default: assert(0); return 0;
612 }
613 }
614}
615
616
617/*
618** Find the final destination of a sequence of jumps
619*/
620static int finaltarget (Instruction *code, int i) {
621 while (code[i].i.code == IJmp)
622 i = target(code, i);
623 return i;
624}
625
626
627/*
628** final label (after traversing any jumps)
629*/
630static int finallabel (Instruction *code, int i) {
631 return finaltarget(code, target(code, i));
632}
633
634
635/*
636** <behind(p)> == behind n; <p> (where n = fixedlen(p))
637*/
638static void codebehind (CompileState *compst, TTree *tree) {
639 if (tree->u.n > 0)
640 addinstruction(compst, IBehind, tree->u.n);
641 codegen(compst, sib1(tree), 0, NOINST, fullset);
642}
643
644
645/*
646** Choice; optimizations:
647** - when p1 is headfail
648** - when first(p1) and first(p2) are disjoint; than
649** a character not in first(p1) cannot go to p1, and a character
650** in first(p1) cannot go to p2 (at it is not in first(p2)).
651** (The optimization is not valid if p1 accepts the empty string,
652** as then there is no character at all...)
653** - when p2 is empty and opt is true; a IPartialCommit can resuse
654** the Choice already active in the stack.
655*/
656static void codechoice (CompileState *compst, TTree *p1, TTree *p2, int opt,
657 const Charset *fl) {
658 int emptyp2 = (p2->tag == TTrue);
659 Charset cs1, cs2;
660 int e1 = getfirst(p1, fullset, &cs1);
661 if (headfail(p1) ||
662 (!e1 && (getfirst(p2, fl, &cs2), cs_disjoint(&cs1, &cs2)))) {
663 /*if (0) {*/
664 /* <p1 / p2> == test (fail(p1)) -> L1 ; p1 ; jmp L2; L1: p2; L2: */
665 int test = codetestset(compst, &cs1, 0);
666 int jmp = NOINST;
667 codegen(compst, p1, 0, test, fl);
668 if (!emptyp2)
669 jmp = addoffsetinst(compst, IJmp);
670 jumptohere(compst, test);
671 codegen(compst, p2, opt, NOINST, fl);
672 jumptohere(compst, jmp);
673 }
674 else if (opt && emptyp2) {
675 /* p1? == IPartialCommit; p1 */
676 jumptohere(compst, addoffsetinst(compst, IPartialCommit));
677 codegen(compst, p1, 1, NOINST, fullset);
678 }
679 else {
680 /* <p1 / p2> ==
681 test(fail(p1)) -> L1; choice L1; <p1>; commit L2; L1: <p2>; L2: */
682 int pcommit;
683 int test = codetestset(compst, &cs1, e1);
684 int pchoice = addoffsetinst(compst, IChoice);
685 codegen(compst, p1, emptyp2, test, fullset);
686 pcommit = addoffsetinst(compst, ICommit);
687 jumptohere(compst, pchoice);
688 jumptohere(compst, test);
689 codegen(compst, p2, opt, NOINST, fl);
690 jumptohere(compst, pcommit);
691 }
692}
693
694/* labeled failure begin */
695static void codelabchoice (CompileState *compst, TTree *p1, TTree *p2, int opt,
696 const Charset *fl, Labelset ls) {
697 int emptyp2 = (p2->tag == TTrue);
698 int pcommit;
699 int test = NOINST;
700 int pchoice = addoffsetlabinst(compst, ILabChoice, ls);
701 codegen(compst, p1, emptyp2, test, fullset);
702 pcommit = addoffsetinst(compst, ICommit);
703 jumptohere(compst, pchoice);
704 jumptohere(compst, test);
705 codegen(compst, p2, opt, NOINST, fl);
706 jumptohere(compst, pcommit);
707
708}
709/* labeled failure end */
710
711/*
712** And predicate
713** optimization: fixedlen(p) = n ==> <&p> == <p>; behind n
714** (valid only when 'p' has no captures)
715*/
716static void codeand (CompileState *compst, TTree *tree, int tt) {
717 int n = fixedlen(tree);
718 if (n >= 0 && n <= MAXBEHIND && !hascaptures(tree)) {
719 codegen(compst, tree, 0, tt, fullset);
720 if (n > 0)
721 addinstruction(compst, IBehind, n);
722 }
723 else { /* default: Choice L1; p1; BackCommit L2; L1: Fail; L2: */
724 int pcommit;
725 int pchoice = addoffsetinst(compst, IChoice);
726 codegen(compst, tree, 0, tt, fullset);
727 pcommit = addoffsetinst(compst, IBackCommit);
728 jumptohere(compst, pchoice);
729 addinstruction(compst, IFail, 0);
730 jumptohere(compst, pcommit);
731 }
732}
733
734
735/*
736** Captures: if pattern has fixed (and not too big) length, use
737** a single IFullCapture instruction after the match; otherwise,
738** enclose the pattern with OpenCapture - CloseCapture.
739*/
740static void codecapture (CompileState *compst, TTree *tree, int tt,
741 const Charset *fl) {
742 int len = fixedlen(sib1(tree));
743 if (len >= 0 && len <= MAXOFF && !hascaptures(sib1(tree))) {
744 codegen(compst, sib1(tree), 0, tt, fl);
745 addinstcap(compst, IFullCapture, tree->cap, tree->key, len);
746 }
747 else {
748 addinstcap(compst, IOpenCapture, tree->cap, tree->key, 0);
749 codegen(compst, sib1(tree), 0, tt, fl);
750 addinstcap(compst, ICloseCapture, Cclose, 0, 0);
751 }
752}
753
754
755static void coderuntime (CompileState *compst, TTree *tree, int tt) {
756 addinstcap(compst, IOpenCapture, Cgroup, tree->key, 0);
757 codegen(compst, sib1(tree), 0, tt, fullset);
758 addinstcap(compst, ICloseRunTime, Cclose, 0, 0);
759}
760
761
762/*
763** Repetion; optimizations:
764** When pattern is a charset, can use special instruction ISpan.
765** When pattern is head fail, or if it starts with characters that
766** are disjoint from what follows the repetions, a simple test
767** is enough (a fail inside the repetition would backtrack to fail
768** again in the following pattern, so there is no need for a choice).
769** When 'opt' is true, the repetion can reuse the Choice already
770** active in the stack.
771*/
772static void coderep (CompileState *compst, TTree *tree, int opt,
773 const Charset *fl) {
774 Charset st;
775 if (tocharset(tree, &st)) {
776 addinstruction(compst, ISpan, 0);
777 addcharset(compst, st.cs);
778 }
779 else {
780 int e1 = getfirst(tree, fullset, &st);
781 if (headfail(tree) || (!e1 && cs_disjoint(&st, fl))) {
782 /* L1: test (fail(p1)) -> L2; <p>; jmp L1; L2: */
783 int jmp;
784 int test = codetestset(compst, &st, 0);
785 codegen(compst, tree, opt, test, fullset);
786 jmp = addoffsetinst(compst, IJmp);
787 jumptohere(compst, test);
788 jumptothere(compst, jmp, test);
789 }
790 else {
791 /* test(fail(p1)) -> L2; choice L2; L1: <p>; partialcommit L1; L2: */
792 /* or (if 'opt'): partialcommit L1; L1: <p>; partialcommit L1; */
793 int commit, l2;
794 int test = codetestset(compst, &st, e1);
795 int pchoice = NOINST;
796 if (opt)
797 jumptohere(compst, addoffsetinst(compst, IPartialCommit));
798 else
799 pchoice = addoffsetinst(compst, IChoice);
800 l2 = gethere(compst);
801 codegen(compst, tree, 0, NOINST, fullset);
802 commit = addoffsetinst(compst, IPartialCommit);
803 jumptothere(compst, commit, l2);
804 jumptohere(compst, pchoice);
805 jumptohere(compst, test);
806 }
807 }
808}
809
810
811/*
812** Not predicate; optimizations:
813** In any case, if first test fails, 'not' succeeds, so it can jump to
814** the end. If pattern is headfail, that is all (it cannot fail
815** in other parts); this case includes 'not' of simple sets. Otherwise,
816** use the default code (a choice plus a failtwice).
817*/
818static void codenot (CompileState *compst, TTree *tree) {
819 Charset st;
820 int e = getfirst(tree, fullset, &st);
821 int test = codetestset(compst, &st, e);
822 if (headfail(tree)) /* test (fail(p1)) -> L1; fail; L1: */
823 addinstruction(compst, IFail, 0);
824 else {
825 /* test(fail(p))-> L1; choice L1; <p>; failtwice; L1: */
826 int pchoice = addoffsetinst(compst, IChoice);
827 codegen(compst, tree, 0, NOINST, fullset);
828 addinstruction(compst, IFailTwice, 0);
829 jumptohere(compst, pchoice);
830 }
831 jumptohere(compst, test);
832}
833
834
835/*
836** change open calls to calls, using list 'positions' to find
837** correct offsets; also optimize tail calls
838*/
839static void correctcalls (CompileState *compst, int *positions,
840 int from, int to) {
841 int i;
842 Instruction *code = compst->p->code;
843 for (i = from; i < to; i += sizei(&code[i])) {
844 if (code[i].i.code == IOpenCall) {
845 int n = code[i].i.key; /* rule number */
846 int rule = positions[n]; /* rule position */
847 assert(rule == from || code[rule - 1].i.code == IRet);
848 if (code[finaltarget(code, i + 2)].i.code == IRet) /* call; ret ? */
849 code[i].i.code = IJmp; /* tail call */
850 else
851 code[i].i.code = ICall;
852 jumptothere(compst, i, rule); /* call jumps to respective rule */
853 }
854 }
855 assert(i == to);
856}
857
858
859/*
860** Code for a grammar:
861** call L1; jmp L2; L1: rule 1; ret; rule 2; ret; ...; L2:
862*/
863static void codegrammar (CompileState *compst, TTree *grammar) {
864 int positions[MAXRULES];
865 int rulenumber = 0;
866 TTree *rule;
867 int firstcall = addoffsetinst(compst, ICall); /* call initial rule */
868 int jumptoend = addoffsetinst(compst, IJmp); /* jump to the end */
869 int start = gethere(compst); /* here starts the initial rule */
870 jumptohere(compst, firstcall);
871 for (rule = sib1(grammar); rule->tag == TRule; rule = sib2(rule)) {
872 positions[rulenumber++] = gethere(compst); /* save rule position */
873 codegen(compst, sib1(rule), 0, NOINST, fullset); /* code rule */
874 addinstruction(compst, IRet, 0);
875 }
876 assert(rule->tag == TTrue);
877 jumptohere(compst, jumptoend);
878 correctcalls(compst, positions, start, gethere(compst));
879}
880
881
882static void codecall (CompileState *compst, TTree *call) {
883 int c = addoffsetinst(compst, IOpenCall); /* to be corrected later */
884 getinstr(compst, c).i.key = sib2(call)->cap; /* rule number */
885 assert(sib2(call)->tag == TRule);
886}
887
888
889/*
890** Code first child of a sequence
891** (second child is called in-place to allow tail call)
892** Return 'tt' for second child
893*/
894static int codeseq1 (CompileState *compst, TTree *p1, TTree *p2,
895 int tt, const Charset *fl) {
896 if (needfollow(p1)) {
897 Charset fl1;
898 getfirst(p2, fl, &fl1); /* p1 follow is p2 first */
899 codegen(compst, p1, 0, tt, &fl1);
900 }
901 else /* use 'fullset' as follow */
902 codegen(compst, p1, 0, tt, fullset);
903 if (fixedlen(p1) != 0) /* can 'p1' consume anything? */
904 return NOINST; /* invalidate test */
905 else return tt; /* else 'tt' still protects sib2 */
906}
907
908
909/*
910** Main code-generation function: dispatch to auxiliar functions
911** according to kind of tree
912*/
913static void codegen (CompileState *compst, TTree *tree, int opt, int tt,
914 const Charset *fl) {
915 tailcall:
916 switch (tree->tag) {
917 case TChar: codechar(compst, tree->u.n, tt); break;
918 case TAny: addinstruction(compst, IAny, 0); break;
919 case TSet: codecharset(compst, treebuffer(tree), tt); break;
920 case TTrue: break;
921 case TFalse: addinstruction(compst, IFail, 0); break;
922 case TChoice: codechoice(compst, sib1(tree), sib2(tree), opt, fl); break;
923 case TRep: coderep(compst, sib1(tree), opt, fl); break;
924 case TBehind: codebehind(compst, tree); break;
925 case TNot: codenot(compst, sib1(tree)); break;
926 case TAnd: codeand(compst, sib1(tree), tt); break;
927 case TCapture: codecapture(compst, tree, tt, fl); break;
928 case TRunTime: coderuntime(compst, tree, tt); break;
929 case TGrammar: codegrammar(compst, tree); break;
930 case TCall: codecall(compst, tree); break;
931 case TSeq: {
932 tt = codeseq1(compst, sib1(tree), sib2(tree), tt, fl); /* code 'p1' */
933 /* codegen(compst, p2, opt, tt, fl); */
934 tree = sib2(tree); goto tailcall;
935 }
936 case TThrow: { /* labeled failure */
937 addthrowinstruction(compst, tree->labels);
938 break;
939 }
940 case TLabChoice: { /* labeled failure */
941 codelabchoice(compst, sib1(tree), sib2(tree), opt, fl, tree->labels);
942 break;
943 }
944 default: assert(0);
945 }
946}
947
948
949/*
950** Optimize jumps and other jump-like instructions.
951** * Update labels of instructions with labels to their final
952** destinations (e.g., choice L1; ... L1: jmp L2: becomes
953** choice L2)
954** * Jumps to other instructions that do jumps become those
955** instructions (e.g., jump to return becomes a return; jump
956** to commit becomes a commit)
957*/
958static void peephole (CompileState *compst) {
959 Instruction *code = compst->p->code;
960 int i;
961 for (i = 0; i < compst->ncode; i += sizei(&code[i])) {
962 switch (code[i].i.code) {
963 case IChoice: case ICall: case ICommit: case IPartialCommit:
964 case IBackCommit: case ITestChar: case ITestSet: case ILabChoice: /* labeled failure */
965 case ITestAny: { /* instructions with labels */
966 jumptothere(compst, i, finallabel(code, i)); /* optimize label */
967 break;
968 }
969 case IJmp: {
970 int ft = finaltarget(code, i);
971 switch (code[ft].i.code) { /* jumping to what? */
972 case IRet: case IFail: case IFailTwice:
973 case IEnd: { /* instructions with unconditional implicit jumps */
974 code[i] = code[ft]; /* jump becomes that instruction */
975 code[i + 1].i.code = IAny; /* 'no-op' for target position */
976 break;
977 }
978 case ICommit: case IPartialCommit:
979 case IBackCommit: { /* inst. with unconditional explicit jumps */
980 int fft = finallabel(code, ft);
981 code[i] = code[ft]; /* jump becomes that instruction... */
982 jumptothere(compst, i, fft); /* but must correct its offset */
983 i--; /* reoptimize its label */
984 break;
985 }
986 default: {
987 jumptothere(compst, i, ft); /* optimize label */
988 break;
989 }
990 }
991 break;
992 }
993 default: break;
994 }
995 }
996 assert(code[i - 1].i.code == IEnd);
997}
998
999
1000/*
1001** Compile a pattern
1002*/
1003Instruction *compile (lua_State *L, Pattern *p) {
1004 CompileState compst;
1005 compst.p = p; compst.ncode = 0; compst.L = L;
1006 reallocprog(L, p, 2); /* minimum initial size */
1007 codegen(&compst, p->tree, 0, NOINST, fullset);
1008 addinstruction(&compst, IEnd, 0);
1009 reallocprog(L, p, compst.ncode); /* set final size */
1010 peephole(&compst); /* labeled failure */
1011 return p->code;
1012}
1013
1014
1015/* }====================================================== */
1016