aboutsummaryrefslogtreecommitdiff
path: root/lpcode.c
diff options
context:
space:
mode:
Diffstat (limited to 'lpcode.c')
-rw-r--r--lpcode.c79
1 files changed, 51 insertions, 28 deletions
diff --git a/lpcode.c b/lpcode.c
index ecc6fa3..a2d08f7 100644
--- a/lpcode.c
+++ b/lpcode.c
@@ -196,7 +196,7 @@ int hascaptures (TTree *tree) {
196int checkaux (TTree *tree, int pred) { 196int checkaux (TTree *tree, int pred) {
197 tailcall: 197 tailcall:
198 switch (tree->tag) { 198 switch (tree->tag) {
199 case TChar: case TSet: case TAny: 199 case TChar: case TSet: case TAny: case TUTFR:
200 case TFalse: case TOpenCall: case TThrow: /* labeled failure */ 200 case TFalse: case TOpenCall: case TThrow: /* labeled failure */
201 return 0; /* not nullable */ 201 return 0; /* not nullable */
202 case TRep: case TTrue: 202 case TRep: case TTrue:
@@ -220,7 +220,7 @@ int checkaux (TTree *tree, int pred) {
220 if (checkaux(sib2(tree), pred)) return 1; 220 if (checkaux(sib2(tree), pred)) return 1;
221 /* else return checkaux(sib1(tree), pred); */ 221 /* else return checkaux(sib1(tree), pred); */
222 tree = sib1(tree); goto tailcall; 222 tree = sib1(tree); goto tailcall;
223 case TCapture: case TGrammar: case TRule: 223 case TCapture: case TGrammar: case TRule: case TXInfo:
224 /* return checkaux(sib1(tree), pred); */ 224 /* return checkaux(sib1(tree), pred); */
225 tree = sib1(tree); goto tailcall; 225 tree = sib1(tree); goto tailcall;
226 case TCall: /* return checkaux(sib2(tree), pred); */ 226 case TCall: /* return checkaux(sib2(tree), pred); */
@@ -239,12 +239,14 @@ int fixedlen (TTree *tree) {
239 switch (tree->tag) { 239 switch (tree->tag) {
240 case TChar: case TSet: case TAny: 240 case TChar: case TSet: case TAny:
241 return len + 1; 241 return len + 1;
242 case TUTFR:
243 return (tree->cap == sib1(tree)->cap) ? len + tree->cap : -1;
242 case TFalse: case TTrue: case TNot: case TAnd: case TBehind: 244 case TFalse: case TTrue: case TNot: case TAnd: case TBehind:
243 return len; 245 return len;
244 case TRep: case TRunTime: case TOpenCall: 246 case TRep: case TRunTime: case TOpenCall:
245 case TThrow: /* labeled failure */ 247 case TThrow: /* labeled failure */
246 return -1; 248 return -1;
247 case TCapture: case TRule: case TGrammar: 249 case TCapture: case TRule: case TGrammar: case TXInfo:
248 /* return fixedlen(sib1(tree)); */ 250 /* return fixedlen(sib1(tree)); */
249 tree = sib1(tree); goto tailcall; 251 tree = sib1(tree); goto tailcall;
250 case TCall: { 252 case TCall: {
@@ -254,14 +256,14 @@ int fixedlen (TTree *tree) {
254 else 256 else
255 return len + n1; 257 return len + n1;
256 } 258 }
257 case TSeq: { 259 case TSeq: {
258 int n1 = fixedlen(sib1(tree)); 260 int n1 = fixedlen(sib1(tree));
259 if (n1 < 0) 261 if (n1 < 0)
260 return -1; 262 return -1;
261 /* else return fixedlen(sib2(tree)) + len; */ 263 /* else return fixedlen(sib2(tree)) + len; */
262 len += n1; tree = sib2(tree); goto tailcall; 264 len += n1; tree = sib2(tree); goto tailcall;
263 } 265 }
264 case TChoice: { 266 case TChoice: {
265 int n1 = fixedlen(sib1(tree)); 267 int n1 = fixedlen(sib1(tree));
266 int n2 = fixedlen(sib2(tree)); 268 int n2 = fixedlen(sib2(tree));
267 if (n1 != n2 || n1 < 0) 269 if (n1 != n2 || n1 < 0)
@@ -299,6 +301,13 @@ static int getfirst (TTree *tree, const Charset *follow, Charset *firstset) {
299 tocharset(tree, firstset); 301 tocharset(tree, firstset);
300 return 0; 302 return 0;
301 } 303 }
304 case TUTFR: {
305 int c;
306 loopset(i, firstset->cs[i] = 0); /* erase all chars */
307 for (c = tree->key; c <= sib1(tree)->key; c++)
308 setchar(firstset->cs, c);
309 return 0;
310 }
302 case TTrue: { 311 case TTrue: {
303 loopset(i, firstset->cs[i] = follow->cs[i]); 312 loopset(i, firstset->cs[i] = follow->cs[i]);
304 return 1; /* accepts the empty string */ 313 return 1; /* accepts the empty string */
@@ -311,7 +320,7 @@ static int getfirst (TTree *tree, const Charset *follow, Charset *firstset) {
311 loopset(i, firstset->cs[i] = fullset->cs[i]); 320 loopset(i, firstset->cs[i] = fullset->cs[i]);
312 return 1; 321 return 1;
313 } 322 }
314 case TChoice: { 323 case TChoice: {
315 Charset csaux; 324 Charset csaux;
316 int e1 = getfirst(sib1(tree), follow, firstset); 325 int e1 = getfirst(sib1(tree), follow, firstset);
317 int e2 = getfirst(sib2(tree), follow, &csaux); 326 int e2 = getfirst(sib2(tree), follow, &csaux);
@@ -339,7 +348,7 @@ static int getfirst (TTree *tree, const Charset *follow, Charset *firstset) {
339 loopset(i, firstset->cs[i] |= follow->cs[i]); 348 loopset(i, firstset->cs[i] |= follow->cs[i]);
340 return 1; /* accept the empty string */ 349 return 1; /* accept the empty string */
341 } 350 }
342 case TCapture: case TGrammar: case TRule: { 351 case TCapture: case TGrammar: case TRule: case TXInfo: {
343 /* return getfirst(sib1(tree), follow, firstset); */ 352 /* return getfirst(sib1(tree), follow, firstset); */
344 tree = sib1(tree); goto tailcall; 353 tree = sib1(tree); goto tailcall;
345 } 354 }
@@ -385,10 +394,10 @@ static int headfail (TTree *tree) {
385 case TChar: case TSet: case TAny: case TFalse: 394 case TChar: case TSet: case TAny: case TFalse:
386 return 1; 395 return 1;
387 case TTrue: case TRep: case TRunTime: case TNot: 396 case TTrue: case TRep: case TRunTime: case TNot:
388 case TBehind: 397 case TBehind: case TUTFR:
389 case TThrow: /* labeled failure: must always throw the label */ 398 case TThrow: /* labeled failure: must always throw the label */
390 return 0; 399 return 0;
391 case TCapture: case TGrammar: case TRule: case TAnd: 400 case TCapture: case TGrammar: case TRule: case TXInfo: case TAnd:
392 tree = sib1(tree); goto tailcall; /* return headfail(sib1(tree)); */ 401 tree = sib1(tree); goto tailcall; /* return headfail(sib1(tree)); */
393 case TCall: 402 case TCall:
394 tree = sib2(tree); goto tailcall; /* return headfail(sib2(tree)); */ 403 tree = sib2(tree); goto tailcall; /* return headfail(sib2(tree)); */
@@ -396,7 +405,7 @@ static int headfail (TTree *tree) {
396 if (!nofail(sib2(tree))) return 0; 405 if (!nofail(sib2(tree))) return 0;
397 /* else return headfail(sib1(tree)); */ 406 /* else return headfail(sib1(tree)); */
398 tree = sib1(tree); goto tailcall; 407 tree = sib1(tree); goto tailcall;
399 case TChoice: 408 case TChoice:
400 if (!headfail(sib1(tree))) return 0; 409 if (!headfail(sib1(tree))) return 0;
401 /* else return headfail(sib2(tree)); */ 410 /* else return headfail(sib2(tree)); */
402 tree = sib2(tree); goto tailcall; 411 tree = sib2(tree); goto tailcall;
@@ -413,7 +422,7 @@ static int headfail (TTree *tree) {
413static int needfollow (TTree *tree) { 422static int needfollow (TTree *tree) {
414 tailcall: 423 tailcall:
415 switch (tree->tag) { 424 switch (tree->tag) {
416 case TChar: case TSet: case TAny: 425 case TChar: case TSet: case TAny: case TUTFR:
417 case TFalse: case TTrue: case TAnd: case TNot: 426 case TFalse: case TTrue: case TAnd: case TNot:
418 case TRunTime: case TGrammar: case TCall: case TBehind: 427 case TRunTime: case TGrammar: case TCall: case TBehind:
419 case TThrow: /* (?)labeled failure */ 428 case TThrow: /* (?)labeled failure */
@@ -425,7 +434,7 @@ static int needfollow (TTree *tree) {
425 case TSeq: 434 case TSeq:
426 tree = sib2(tree); goto tailcall; 435 tree = sib2(tree); goto tailcall;
427 default: assert(0); return 0; 436 default: assert(0); return 0;
428 } 437 }
429} 438}
430 439
431/* }====================================================== */ 440/* }====================================================== */
@@ -448,12 +457,12 @@ int sizei (const Instruction *i) {
448 case ITestSet: return CHARSETINSTSIZE + 1; 457 case ITestSet: return CHARSETINSTSIZE + 1;
449 case ITestChar: case ITestAny: case IChoice: case IJmp: case ICall: 458 case ITestChar: case ITestAny: case IChoice: case IJmp: case ICall:
450 case IOpenCall: case ICommit: case IPartialCommit: case IBackCommit: 459 case IOpenCall: case ICommit: case IPartialCommit: case IBackCommit:
460 case IUTFR:
451 return 2; 461 return 2;
452 case IThrow: case IPredChoice: /* labeled failure */ 462 case IThrow: case IPredChoice: /* labeled failure */
453 return 2; 463 return 2;
454 case IThrowRec: /* labeled failure */ 464 case IThrowRec: /* labeled failure */
455 return 3; 465 return 3;
456
457 default: return 1; 466 default: return 1;
458 } 467 }
459} 468}
@@ -517,8 +526,9 @@ static int addinstruction (CompileState *compst, Opcode op, int aux) {
517static int addoffsetinst (CompileState *compst, Opcode op) { 526static int addoffsetinst (CompileState *compst, Opcode op) {
518 int i = addinstruction(compst, op, 0); /* instruction */ 527 int i = addinstruction(compst, op, 0); /* instruction */
519 addinstruction(compst, (Opcode)0, 0); /* open space for offset */ 528 addinstruction(compst, (Opcode)0, 0); /* open space for offset */
520 assert(op == ITestSet || sizei(&getinstr(compst, i)) == 2 || 529 assert(op == ITestSet || sizei(&getinstr(compst, i)) == 2 ||
521 op == IThrowRec); /* labeled failure */ 530 op == IThrowRec); /* labeled failure */
531
522 return i; 532 return i;
523} 533}
524 534
@@ -528,13 +538,13 @@ static void codethrow (CompileState *compst, TTree *throw) {
528 int recov, aux; 538 int recov, aux;
529 if (throw->u.ps != 0) { 539 if (throw->u.ps != 0) {
530 recov = addoffsetinst(compst, IThrowRec); 540 recov = addoffsetinst(compst, IThrowRec);
531 assert(sib2(throw)->tag == TRule); 541 assert(sib1(sib2(throw))->tag == TXInfo);
532 } else { 542 } else {
533 recov = addinstruction(compst, IThrow, 0); 543 recov = addinstruction(compst, IThrow, 0);
534 } 544 }
535 aux = nextinstruction(compst); 545 aux = nextinstruction(compst);
536 getinstr(compst, aux).i.key = throw->key; /* next instruction keeps only rule name */ 546 getinstr(compst, aux).i.key = throw->key; /* next instruction keeps only rule name */
537 getinstr(compst, recov).i.key = sib2(throw)->cap; /* rule number */ 547 getinstr(compst, recov).i.key = sib1(sib2(throw))->u.n; /* rule number */
538} 548}
539/* labeled failure */ 549/* labeled failure */
540 550
@@ -547,6 +557,16 @@ static void setoffset (CompileState *compst, int instruction, int offset) {
547} 557}
548 558
549 559
560static void codeutfr (CompileState *compst, TTree *tree) {
561 int i = addoffsetinst(compst, IUTFR);
562 int to = sib1(tree)->u.n;
563 assert(sib1(tree)->tag == TXInfo);
564 getinstr(compst, i + 1).offset = tree->u.n;
565 getinstr(compst, i).i.aux = to & 0xff;
566 getinstr(compst, i).i.key = to >> 8;
567}
568
569
550/* 570/*
551** Add a capture instruction: 571** Add a capture instruction:
552** 'op' is the capture instruction; 'cap' the capture kind; 572** 'op' is the capture instruction; 'cap' the capture kind;
@@ -694,11 +714,11 @@ static void codebehind (CompileState *compst, TTree *tree) {
694 714
695/* 715/*
696** Choice; optimizations: 716** Choice; optimizations:
697** - when p1 is headfail or 717** - when p1 is headfail or when first(p1) and first(p2) are disjoint,
698** when first(p1) and first(p2) are disjoint, than 718** than a character not in first(p1) cannot go to p1 and a character
699** a character not in first(p1) cannot go to p1, and a character 719** in first(p1) cannot go to p2, either because p1 will accept
700** in first(p1) cannot go to p2 (at it is not in first(p2)). 720** (headfail) or because it is not in first(p2) (disjoint).
701** (The optimization is not valid if p1 accepts the empty string, 721** (The second case is not valid if p1 accepts the empty string,
702** as then there is no character at all...) 722** as then there is no character at all...)
703** - when p2 is empty and opt is true; a IPartialCommit can reuse 723** - when p2 is empty and opt is true; a IPartialCommit can reuse
704** the Choice already active in the stack. 724** the Choice already active in the stack.
@@ -715,7 +735,7 @@ static void codechoice (CompileState *compst, TTree *p1, TTree *p2, int opt,
715 int jmp = NOINST; 735 int jmp = NOINST;
716 codegen(compst, p1, 0, test, fl); 736 codegen(compst, p1, 0, test, fl);
717 if (!emptyp2) 737 if (!emptyp2)
718 jmp = addoffsetinst(compst, IJmp); 738 jmp = addoffsetinst(compst, IJmp);
719 jumptohere(compst, test); 739 jumptohere(compst, test);
720 codegen(compst, p2, opt, NOINST, fl); 740 codegen(compst, p2, opt, NOINST, fl);
721 jumptohere(compst, jmp); 741 jumptohere(compst, jmp);
@@ -726,7 +746,7 @@ static void codechoice (CompileState *compst, TTree *p1, TTree *p2, int opt,
726 codegen(compst, p1, 1, NOINST, fullset); 746 codegen(compst, p1, 1, NOINST, fullset);
727 } 747 }
728 else { 748 else {
729 /* <p1 / p2> == 749 /* <p1 / p2> ==
730 test(first(p1)) -> L1; choice L1; <p1>; commit L2; L1: <p2>; L2: */ 750 test(first(p1)) -> L1; choice L1; <p1>; commit L2; L1: <p2>; L2: */
731 int pcommit; 751 int pcommit;
732 int test = codetestset(compst, &cs1, e1); 752 int test = codetestset(compst, &cs1, e1);
@@ -887,7 +907,7 @@ static void correctcalls (CompileState *compst, int *positions,
887 else 907 else
888 code[i].i.code = ICall; 908 code[i].i.code = ICall;
889 jumptothere(compst, i, rule); /* call jumps to respective rule */ 909 jumptothere(compst, i, rule); /* call jumps to respective rule */
890 } else if (code[i].i.code == IThrowRec) { 910 } else if (code[i].i.code == IThrowRec) { /* labeled failure */
891 int n = code[i].i.key; /* rule number */ 911 int n = code[i].i.key; /* rule number */
892 int rule = positions[n]; /* rule position */ 912 int rule = positions[n]; /* rule position */
893 assert(rule == from || code[rule - 1].i.code == IRet); 913 assert(rule == from || code[rule - 1].i.code == IRet);
@@ -912,8 +932,10 @@ static void codegrammar (CompileState *compst, TTree *grammar) {
912 int start = gethere(compst); /* here starts the initial rule */ 932 int start = gethere(compst); /* here starts the initial rule */
913 jumptohere(compst, firstcall); 933 jumptohere(compst, firstcall);
914 for (rule = sib1(grammar); rule->tag == TRule; rule = sib2(rule)) { 934 for (rule = sib1(grammar); rule->tag == TRule; rule = sib2(rule)) {
935 TTree *r = sib1(rule);
936 assert(r->tag == TXInfo);
915 positions[rulenumber++] = gethere(compst); /* save rule position */ 937 positions[rulenumber++] = gethere(compst); /* save rule position */
916 codegen(compst, sib1(rule), 0, NOINST, fullset); /* code rule */ 938 codegen(compst, sib1(r), 0, NOINST, fullset); /* code rule */
917 addinstruction(compst, IRet, 0); 939 addinstruction(compst, IRet, 0);
918 } 940 }
919 assert(rule->tag == TTrue); 941 assert(rule->tag == TTrue);
@@ -924,8 +946,8 @@ static void codegrammar (CompileState *compst, TTree *grammar) {
924 946
925static void codecall (CompileState *compst, TTree *call) { 947static void codecall (CompileState *compst, TTree *call) {
926 int c = addoffsetinst(compst, IOpenCall); /* to be corrected later */ 948 int c = addoffsetinst(compst, IOpenCall); /* to be corrected later */
927 getinstr(compst, c).i.key = sib2(call)->cap; /* rule number */ 949 assert(sib1(sib2(call))->tag == TXInfo);
928 assert(sib2(call)->tag == TRule); 950 getinstr(compst, c).i.key = sib1(sib2(call))->u.n; /* rule number */
929} 951}
930 952
931 953
@@ -963,6 +985,7 @@ static void codegen (CompileState *compst, TTree *tree, int opt, int tt,
963 case TSet: codecharset(compst, treebuffer(tree), tt); break; 985 case TSet: codecharset(compst, treebuffer(tree), tt); break;
964 case TTrue: break; 986 case TTrue: break;
965 case TFalse: addinstruction(compst, IFail, 0); break; 987 case TFalse: addinstruction(compst, IFail, 0); break;
988 case TUTFR: codeutfr(compst, tree); break;
966 case TChoice: codechoice(compst, sib1(tree), sib2(tree), opt, fl); break; 989 case TChoice: codechoice(compst, sib1(tree), sib2(tree), opt, fl); break;
967 case TRep: coderep(compst, sib1(tree), opt, fl); break; 990 case TRep: coderep(compst, sib1(tree), opt, fl); break;
968 case TBehind: codebehind(compst, tree); break; 991 case TBehind: codebehind(compst, tree); break;
@@ -1013,7 +1036,7 @@ static void peephole (CompileState *compst) {
1013 case IRet: case IFail: case IFailTwice: 1036 case IRet: case IFail: case IFailTwice:
1014 case IEnd: { /* instructions with unconditional implicit jumps */ 1037 case IEnd: { /* instructions with unconditional implicit jumps */
1015 code[i] = code[ft]; /* jump becomes that instruction */ 1038 code[i] = code[ft]; /* jump becomes that instruction */
1016 code[i + 1].i.code = IAny; /* 'no-op' for target position */ 1039 code[i + 1].i.code = IEmpty; /* 'no-op' for target position */
1017 break; 1040 break;
1018 } 1041 }
1019 case ICommit: case IPartialCommit: 1042 case ICommit: case IPartialCommit: