diff options
Diffstat (limited to 'lpcode.c')
-rw-r--r-- | lpcode.c | 79 |
1 files changed, 51 insertions, 28 deletions
@@ -196,7 +196,7 @@ int hascaptures (TTree *tree) { | |||
196 | int checkaux (TTree *tree, int pred) { | 196 | int checkaux (TTree *tree, int pred) { |
197 | tailcall: | 197 | tailcall: |
198 | switch (tree->tag) { | 198 | switch (tree->tag) { |
199 | case TChar: case TSet: case TAny: | 199 | case TChar: case TSet: case TAny: case TUTFR: |
200 | case TFalse: case TOpenCall: case TThrow: /* labeled failure */ | 200 | case TFalse: case TOpenCall: case TThrow: /* labeled failure */ |
201 | return 0; /* not nullable */ | 201 | return 0; /* not nullable */ |
202 | case TRep: case TTrue: | 202 | case TRep: case TTrue: |
@@ -220,7 +220,7 @@ int checkaux (TTree *tree, int pred) { | |||
220 | if (checkaux(sib2(tree), pred)) return 1; | 220 | if (checkaux(sib2(tree), pred)) return 1; |
221 | /* else return checkaux(sib1(tree), pred); */ | 221 | /* else return checkaux(sib1(tree), pred); */ |
222 | tree = sib1(tree); goto tailcall; | 222 | tree = sib1(tree); goto tailcall; |
223 | case TCapture: case TGrammar: case TRule: | 223 | case TCapture: case TGrammar: case TRule: case TXInfo: |
224 | /* return checkaux(sib1(tree), pred); */ | 224 | /* return checkaux(sib1(tree), pred); */ |
225 | tree = sib1(tree); goto tailcall; | 225 | tree = sib1(tree); goto tailcall; |
226 | case TCall: /* return checkaux(sib2(tree), pred); */ | 226 | case TCall: /* return checkaux(sib2(tree), pred); */ |
@@ -239,12 +239,14 @@ int fixedlen (TTree *tree) { | |||
239 | switch (tree->tag) { | 239 | switch (tree->tag) { |
240 | case TChar: case TSet: case TAny: | 240 | case TChar: case TSet: case TAny: |
241 | return len + 1; | 241 | return len + 1; |
242 | case TUTFR: | ||
243 | return (tree->cap == sib1(tree)->cap) ? len + tree->cap : -1; | ||
242 | case TFalse: case TTrue: case TNot: case TAnd: case TBehind: | 244 | case TFalse: case TTrue: case TNot: case TAnd: case TBehind: |
243 | return len; | 245 | return len; |
244 | case TRep: case TRunTime: case TOpenCall: | 246 | case TRep: case TRunTime: case TOpenCall: |
245 | case TThrow: /* labeled failure */ | 247 | case TThrow: /* labeled failure */ |
246 | return -1; | 248 | return -1; |
247 | case TCapture: case TRule: case TGrammar: | 249 | case TCapture: case TRule: case TGrammar: case TXInfo: |
248 | /* return fixedlen(sib1(tree)); */ | 250 | /* return fixedlen(sib1(tree)); */ |
249 | tree = sib1(tree); goto tailcall; | 251 | tree = sib1(tree); goto tailcall; |
250 | case TCall: { | 252 | case TCall: { |
@@ -254,14 +256,14 @@ int fixedlen (TTree *tree) { | |||
254 | else | 256 | else |
255 | return len + n1; | 257 | return len + n1; |
256 | } | 258 | } |
257 | case TSeq: { | 259 | case TSeq: { |
258 | int n1 = fixedlen(sib1(tree)); | 260 | int n1 = fixedlen(sib1(tree)); |
259 | if (n1 < 0) | 261 | if (n1 < 0) |
260 | return -1; | 262 | return -1; |
261 | /* else return fixedlen(sib2(tree)) + len; */ | 263 | /* else return fixedlen(sib2(tree)) + len; */ |
262 | len += n1; tree = sib2(tree); goto tailcall; | 264 | len += n1; tree = sib2(tree); goto tailcall; |
263 | } | 265 | } |
264 | case TChoice: { | 266 | case TChoice: { |
265 | int n1 = fixedlen(sib1(tree)); | 267 | int n1 = fixedlen(sib1(tree)); |
266 | int n2 = fixedlen(sib2(tree)); | 268 | int n2 = fixedlen(sib2(tree)); |
267 | if (n1 != n2 || n1 < 0) | 269 | if (n1 != n2 || n1 < 0) |
@@ -299,6 +301,13 @@ static int getfirst (TTree *tree, const Charset *follow, Charset *firstset) { | |||
299 | tocharset(tree, firstset); | 301 | tocharset(tree, firstset); |
300 | return 0; | 302 | return 0; |
301 | } | 303 | } |
304 | case TUTFR: { | ||
305 | int c; | ||
306 | loopset(i, firstset->cs[i] = 0); /* erase all chars */ | ||
307 | for (c = tree->key; c <= sib1(tree)->key; c++) | ||
308 | setchar(firstset->cs, c); | ||
309 | return 0; | ||
310 | } | ||
302 | case TTrue: { | 311 | case TTrue: { |
303 | loopset(i, firstset->cs[i] = follow->cs[i]); | 312 | loopset(i, firstset->cs[i] = follow->cs[i]); |
304 | return 1; /* accepts the empty string */ | 313 | return 1; /* accepts the empty string */ |
@@ -311,7 +320,7 @@ static int getfirst (TTree *tree, const Charset *follow, Charset *firstset) { | |||
311 | loopset(i, firstset->cs[i] = fullset->cs[i]); | 320 | loopset(i, firstset->cs[i] = fullset->cs[i]); |
312 | return 1; | 321 | return 1; |
313 | } | 322 | } |
314 | case TChoice: { | 323 | case TChoice: { |
315 | Charset csaux; | 324 | Charset csaux; |
316 | int e1 = getfirst(sib1(tree), follow, firstset); | 325 | int e1 = getfirst(sib1(tree), follow, firstset); |
317 | int e2 = getfirst(sib2(tree), follow, &csaux); | 326 | int e2 = getfirst(sib2(tree), follow, &csaux); |
@@ -339,7 +348,7 @@ static int getfirst (TTree *tree, const Charset *follow, Charset *firstset) { | |||
339 | loopset(i, firstset->cs[i] |= follow->cs[i]); | 348 | loopset(i, firstset->cs[i] |= follow->cs[i]); |
340 | return 1; /* accept the empty string */ | 349 | return 1; /* accept the empty string */ |
341 | } | 350 | } |
342 | case TCapture: case TGrammar: case TRule: { | 351 | case TCapture: case TGrammar: case TRule: case TXInfo: { |
343 | /* return getfirst(sib1(tree), follow, firstset); */ | 352 | /* return getfirst(sib1(tree), follow, firstset); */ |
344 | tree = sib1(tree); goto tailcall; | 353 | tree = sib1(tree); goto tailcall; |
345 | } | 354 | } |
@@ -385,10 +394,10 @@ static int headfail (TTree *tree) { | |||
385 | case TChar: case TSet: case TAny: case TFalse: | 394 | case TChar: case TSet: case TAny: case TFalse: |
386 | return 1; | 395 | return 1; |
387 | case TTrue: case TRep: case TRunTime: case TNot: | 396 | case TTrue: case TRep: case TRunTime: case TNot: |
388 | case TBehind: | 397 | case TBehind: case TUTFR: |
389 | case TThrow: /* labeled failure: must always throw the label */ | 398 | case TThrow: /* labeled failure: must always throw the label */ |
390 | return 0; | 399 | return 0; |
391 | case TCapture: case TGrammar: case TRule: case TAnd: | 400 | case TCapture: case TGrammar: case TRule: case TXInfo: case TAnd: |
392 | tree = sib1(tree); goto tailcall; /* return headfail(sib1(tree)); */ | 401 | tree = sib1(tree); goto tailcall; /* return headfail(sib1(tree)); */ |
393 | case TCall: | 402 | case TCall: |
394 | tree = sib2(tree); goto tailcall; /* return headfail(sib2(tree)); */ | 403 | tree = sib2(tree); goto tailcall; /* return headfail(sib2(tree)); */ |
@@ -396,7 +405,7 @@ static int headfail (TTree *tree) { | |||
396 | if (!nofail(sib2(tree))) return 0; | 405 | if (!nofail(sib2(tree))) return 0; |
397 | /* else return headfail(sib1(tree)); */ | 406 | /* else return headfail(sib1(tree)); */ |
398 | tree = sib1(tree); goto tailcall; | 407 | tree = sib1(tree); goto tailcall; |
399 | case TChoice: | 408 | case TChoice: |
400 | if (!headfail(sib1(tree))) return 0; | 409 | if (!headfail(sib1(tree))) return 0; |
401 | /* else return headfail(sib2(tree)); */ | 410 | /* else return headfail(sib2(tree)); */ |
402 | tree = sib2(tree); goto tailcall; | 411 | tree = sib2(tree); goto tailcall; |
@@ -413,7 +422,7 @@ static int headfail (TTree *tree) { | |||
413 | static int needfollow (TTree *tree) { | 422 | static int needfollow (TTree *tree) { |
414 | tailcall: | 423 | tailcall: |
415 | switch (tree->tag) { | 424 | switch (tree->tag) { |
416 | case TChar: case TSet: case TAny: | 425 | case TChar: case TSet: case TAny: case TUTFR: |
417 | case TFalse: case TTrue: case TAnd: case TNot: | 426 | case TFalse: case TTrue: case TAnd: case TNot: |
418 | case TRunTime: case TGrammar: case TCall: case TBehind: | 427 | case TRunTime: case TGrammar: case TCall: case TBehind: |
419 | case TThrow: /* (?)labeled failure */ | 428 | case TThrow: /* (?)labeled failure */ |
@@ -425,7 +434,7 @@ static int needfollow (TTree *tree) { | |||
425 | case TSeq: | 434 | case TSeq: |
426 | tree = sib2(tree); goto tailcall; | 435 | tree = sib2(tree); goto tailcall; |
427 | default: assert(0); return 0; | 436 | default: assert(0); return 0; |
428 | } | 437 | } |
429 | } | 438 | } |
430 | 439 | ||
431 | /* }====================================================== */ | 440 | /* }====================================================== */ |
@@ -448,12 +457,12 @@ int sizei (const Instruction *i) { | |||
448 | case ITestSet: return CHARSETINSTSIZE + 1; | 457 | case ITestSet: return CHARSETINSTSIZE + 1; |
449 | case ITestChar: case ITestAny: case IChoice: case IJmp: case ICall: | 458 | case ITestChar: case ITestAny: case IChoice: case IJmp: case ICall: |
450 | case IOpenCall: case ICommit: case IPartialCommit: case IBackCommit: | 459 | case IOpenCall: case ICommit: case IPartialCommit: case IBackCommit: |
460 | case IUTFR: | ||
451 | return 2; | 461 | return 2; |
452 | case IThrow: case IPredChoice: /* labeled failure */ | 462 | case IThrow: case IPredChoice: /* labeled failure */ |
453 | return 2; | 463 | return 2; |
454 | case IThrowRec: /* labeled failure */ | 464 | case IThrowRec: /* labeled failure */ |
455 | return 3; | 465 | return 3; |
456 | |||
457 | default: return 1; | 466 | default: return 1; |
458 | } | 467 | } |
459 | } | 468 | } |
@@ -517,8 +526,9 @@ static int addinstruction (CompileState *compst, Opcode op, int aux) { | |||
517 | static int addoffsetinst (CompileState *compst, Opcode op) { | 526 | static int addoffsetinst (CompileState *compst, Opcode op) { |
518 | int i = addinstruction(compst, op, 0); /* instruction */ | 527 | int i = addinstruction(compst, op, 0); /* instruction */ |
519 | addinstruction(compst, (Opcode)0, 0); /* open space for offset */ | 528 | addinstruction(compst, (Opcode)0, 0); /* open space for offset */ |
520 | assert(op == ITestSet || sizei(&getinstr(compst, i)) == 2 || | 529 | assert(op == ITestSet || sizei(&getinstr(compst, i)) == 2 || |
521 | op == IThrowRec); /* labeled failure */ | 530 | op == IThrowRec); /* labeled failure */ |
531 | |||
522 | return i; | 532 | return i; |
523 | } | 533 | } |
524 | 534 | ||
@@ -528,13 +538,13 @@ static void codethrow (CompileState *compst, TTree *throw) { | |||
528 | int recov, aux; | 538 | int recov, aux; |
529 | if (throw->u.ps != 0) { | 539 | if (throw->u.ps != 0) { |
530 | recov = addoffsetinst(compst, IThrowRec); | 540 | recov = addoffsetinst(compst, IThrowRec); |
531 | assert(sib2(throw)->tag == TRule); | 541 | assert(sib1(sib2(throw))->tag == TXInfo); |
532 | } else { | 542 | } else { |
533 | recov = addinstruction(compst, IThrow, 0); | 543 | recov = addinstruction(compst, IThrow, 0); |
534 | } | 544 | } |
535 | aux = nextinstruction(compst); | 545 | aux = nextinstruction(compst); |
536 | getinstr(compst, aux).i.key = throw->key; /* next instruction keeps only rule name */ | 546 | getinstr(compst, aux).i.key = throw->key; /* next instruction keeps only rule name */ |
537 | getinstr(compst, recov).i.key = sib2(throw)->cap; /* rule number */ | 547 | getinstr(compst, recov).i.key = sib1(sib2(throw))->u.n; /* rule number */ |
538 | } | 548 | } |
539 | /* labeled failure */ | 549 | /* labeled failure */ |
540 | 550 | ||
@@ -547,6 +557,16 @@ static void setoffset (CompileState *compst, int instruction, int offset) { | |||
547 | } | 557 | } |
548 | 558 | ||
549 | 559 | ||
560 | static void codeutfr (CompileState *compst, TTree *tree) { | ||
561 | int i = addoffsetinst(compst, IUTFR); | ||
562 | int to = sib1(tree)->u.n; | ||
563 | assert(sib1(tree)->tag == TXInfo); | ||
564 | getinstr(compst, i + 1).offset = tree->u.n; | ||
565 | getinstr(compst, i).i.aux = to & 0xff; | ||
566 | getinstr(compst, i).i.key = to >> 8; | ||
567 | } | ||
568 | |||
569 | |||
550 | /* | 570 | /* |
551 | ** Add a capture instruction: | 571 | ** Add a capture instruction: |
552 | ** 'op' is the capture instruction; 'cap' the capture kind; | 572 | ** 'op' is the capture instruction; 'cap' the capture kind; |
@@ -694,11 +714,11 @@ static void codebehind (CompileState *compst, TTree *tree) { | |||
694 | 714 | ||
695 | /* | 715 | /* |
696 | ** Choice; optimizations: | 716 | ** Choice; optimizations: |
697 | ** - when p1 is headfail or | 717 | ** - when p1 is headfail or when first(p1) and first(p2) are disjoint, |
698 | ** when first(p1) and first(p2) are disjoint, than | 718 | ** than a character not in first(p1) cannot go to p1 and a character |
699 | ** a character not in first(p1) cannot go to p1, and a character | 719 | ** in first(p1) cannot go to p2, either because p1 will accept |
700 | ** in first(p1) cannot go to p2 (at it is not in first(p2)). | 720 | ** (headfail) or because it is not in first(p2) (disjoint). |
701 | ** (The optimization is not valid if p1 accepts the empty string, | 721 | ** (The second case is not valid if p1 accepts the empty string, |
702 | ** as then there is no character at all...) | 722 | ** as then there is no character at all...) |
703 | ** - when p2 is empty and opt is true; a IPartialCommit can reuse | 723 | ** - when p2 is empty and opt is true; a IPartialCommit can reuse |
704 | ** the Choice already active in the stack. | 724 | ** the Choice already active in the stack. |
@@ -715,7 +735,7 @@ static void codechoice (CompileState *compst, TTree *p1, TTree *p2, int opt, | |||
715 | int jmp = NOINST; | 735 | int jmp = NOINST; |
716 | codegen(compst, p1, 0, test, fl); | 736 | codegen(compst, p1, 0, test, fl); |
717 | if (!emptyp2) | 737 | if (!emptyp2) |
718 | jmp = addoffsetinst(compst, IJmp); | 738 | jmp = addoffsetinst(compst, IJmp); |
719 | jumptohere(compst, test); | 739 | jumptohere(compst, test); |
720 | codegen(compst, p2, opt, NOINST, fl); | 740 | codegen(compst, p2, opt, NOINST, fl); |
721 | jumptohere(compst, jmp); | 741 | jumptohere(compst, jmp); |
@@ -726,7 +746,7 @@ static void codechoice (CompileState *compst, TTree *p1, TTree *p2, int opt, | |||
726 | codegen(compst, p1, 1, NOINST, fullset); | 746 | codegen(compst, p1, 1, NOINST, fullset); |
727 | } | 747 | } |
728 | else { | 748 | else { |
729 | /* <p1 / p2> == | 749 | /* <p1 / p2> == |
730 | test(first(p1)) -> L1; choice L1; <p1>; commit L2; L1: <p2>; L2: */ | 750 | test(first(p1)) -> L1; choice L1; <p1>; commit L2; L1: <p2>; L2: */ |
731 | int pcommit; | 751 | int pcommit; |
732 | int test = codetestset(compst, &cs1, e1); | 752 | int test = codetestset(compst, &cs1, e1); |
@@ -887,7 +907,7 @@ static void correctcalls (CompileState *compst, int *positions, | |||
887 | else | 907 | else |
888 | code[i].i.code = ICall; | 908 | code[i].i.code = ICall; |
889 | jumptothere(compst, i, rule); /* call jumps to respective rule */ | 909 | jumptothere(compst, i, rule); /* call jumps to respective rule */ |
890 | } else if (code[i].i.code == IThrowRec) { | 910 | } else if (code[i].i.code == IThrowRec) { /* labeled failure */ |
891 | int n = code[i].i.key; /* rule number */ | 911 | int n = code[i].i.key; /* rule number */ |
892 | int rule = positions[n]; /* rule position */ | 912 | int rule = positions[n]; /* rule position */ |
893 | assert(rule == from || code[rule - 1].i.code == IRet); | 913 | assert(rule == from || code[rule - 1].i.code == IRet); |
@@ -912,8 +932,10 @@ static void codegrammar (CompileState *compst, TTree *grammar) { | |||
912 | int start = gethere(compst); /* here starts the initial rule */ | 932 | int start = gethere(compst); /* here starts the initial rule */ |
913 | jumptohere(compst, firstcall); | 933 | jumptohere(compst, firstcall); |
914 | for (rule = sib1(grammar); rule->tag == TRule; rule = sib2(rule)) { | 934 | for (rule = sib1(grammar); rule->tag == TRule; rule = sib2(rule)) { |
935 | TTree *r = sib1(rule); | ||
936 | assert(r->tag == TXInfo); | ||
915 | positions[rulenumber++] = gethere(compst); /* save rule position */ | 937 | positions[rulenumber++] = gethere(compst); /* save rule position */ |
916 | codegen(compst, sib1(rule), 0, NOINST, fullset); /* code rule */ | 938 | codegen(compst, sib1(r), 0, NOINST, fullset); /* code rule */ |
917 | addinstruction(compst, IRet, 0); | 939 | addinstruction(compst, IRet, 0); |
918 | } | 940 | } |
919 | assert(rule->tag == TTrue); | 941 | assert(rule->tag == TTrue); |
@@ -924,8 +946,8 @@ static void codegrammar (CompileState *compst, TTree *grammar) { | |||
924 | 946 | ||
925 | static void codecall (CompileState *compst, TTree *call) { | 947 | static void codecall (CompileState *compst, TTree *call) { |
926 | int c = addoffsetinst(compst, IOpenCall); /* to be corrected later */ | 948 | int c = addoffsetinst(compst, IOpenCall); /* to be corrected later */ |
927 | getinstr(compst, c).i.key = sib2(call)->cap; /* rule number */ | 949 | assert(sib1(sib2(call))->tag == TXInfo); |
928 | assert(sib2(call)->tag == TRule); | 950 | getinstr(compst, c).i.key = sib1(sib2(call))->u.n; /* rule number */ |
929 | } | 951 | } |
930 | 952 | ||
931 | 953 | ||
@@ -963,6 +985,7 @@ static void codegen (CompileState *compst, TTree *tree, int opt, int tt, | |||
963 | case TSet: codecharset(compst, treebuffer(tree), tt); break; | 985 | case TSet: codecharset(compst, treebuffer(tree), tt); break; |
964 | case TTrue: break; | 986 | case TTrue: break; |
965 | case TFalse: addinstruction(compst, IFail, 0); break; | 987 | case TFalse: addinstruction(compst, IFail, 0); break; |
988 | case TUTFR: codeutfr(compst, tree); break; | ||
966 | case TChoice: codechoice(compst, sib1(tree), sib2(tree), opt, fl); break; | 989 | case TChoice: codechoice(compst, sib1(tree), sib2(tree), opt, fl); break; |
967 | case TRep: coderep(compst, sib1(tree), opt, fl); break; | 990 | case TRep: coderep(compst, sib1(tree), opt, fl); break; |
968 | case TBehind: codebehind(compst, tree); break; | 991 | case TBehind: codebehind(compst, tree); break; |
@@ -1013,7 +1036,7 @@ static void peephole (CompileState *compst) { | |||
1013 | case IRet: case IFail: case IFailTwice: | 1036 | case IRet: case IFail: case IFailTwice: |
1014 | case IEnd: { /* instructions with unconditional implicit jumps */ | 1037 | case IEnd: { /* instructions with unconditional implicit jumps */ |
1015 | code[i] = code[ft]; /* jump becomes that instruction */ | 1038 | code[i] = code[ft]; /* jump becomes that instruction */ |
1016 | code[i + 1].i.code = IAny; /* 'no-op' for target position */ | 1039 | code[i + 1].i.code = IEmpty; /* 'no-op' for target position */ |
1017 | break; | 1040 | break; |
1018 | } | 1041 | } |
1019 | case ICommit: case IPartialCommit: | 1042 | case ICommit: case IPartialCommit: |