aboutsummaryrefslogtreecommitdiff
path: root/lpcode.c
diff options
context:
space:
mode:
authorRoberto Ierusalimschy <roberto@inf.puc-rio.br>2023-04-23 11:02:52 -0300
committerRoberto Ierusalimschy <roberto@inf.puc-rio.br>2023-04-23 11:02:52 -0300
commitf8e9bc1c721a0802b2260f48ced72c7e04d7b1ef (patch)
treefcd765f59c5d74574bdb21cd7a11e1f723068d87 /lpcode.c
parent9f7183c280f310c0d0b49b7b9c3b8eac297fafa7 (diff)
downloadlpeg-f8e9bc1c721a0802b2260f48ced72c7e04d7b1ef.tar.gz
lpeg-f8e9bc1c721a0802b2260f48ced72c7e04d7b1ef.tar.bz2
lpeg-f8e9bc1c721a0802b2260f48ced72c7e04d7b1ef.zip
Towards a smaller encoding for charsets in code
Diffstat (limited to 'lpcode.c')
-rw-r--r--lpcode.c23
1 files changed, 14 insertions, 9 deletions
diff --git a/lpcode.c b/lpcode.c
index f537904..8dab896 100644
--- a/lpcode.c
+++ b/lpcode.c
@@ -22,6 +22,7 @@ static const Charset fullset_ =
22 22
23static const Charset *fullset = &fullset_; 23static const Charset *fullset = &fullset_;
24 24
25
25/* 26/*
26** {====================================================== 27** {======================================================
27** Analysis and some optimizations 28** Analysis and some optimizations
@@ -441,8 +442,8 @@ static int needfollow (TTree *tree) {
441*/ 442*/
442int sizei (const Instruction *i) { 443int sizei (const Instruction *i) {
443 switch((Opcode)i->i.code) { 444 switch((Opcode)i->i.code) {
444 case ISet: case ISpan: return CHARSETINSTSIZE; 445 case ISet: case ISpan: return 1 + i->i.aux2.set.size;
445 case ITestSet: return CHARSETINSTSIZE + 1; 446 case ITestSet: return 2 + i->i.aux2.set.size;
446 case ITestChar: case ITestAny: case IChoice: case IJmp: case ICall: 447 case ITestChar: case ITestAny: case IChoice: case IJmp: case ICall:
447 case IOpenCall: case ICommit: case IPartialCommit: case IBackCommit: 448 case IOpenCall: case ICommit: case IPartialCommit: case IBackCommit:
448 case IUTFR: 449 case IUTFR:
@@ -585,10 +586,14 @@ static void codechar (CompileState *compst, int c, int tt) {
585/* 586/*
586** Add a charset posfix to an instruction 587** Add a charset posfix to an instruction
587*/ 588*/
588static void addcharset (CompileState *compst, const byte *cs) { 589static void addcharset (CompileState *compst, int inst, const byte *cs) {
589 int p = gethere(compst); 590 int p = gethere(compst);
590 int i; 591 int i;
591 for (i = 0; i < (int)CHARSETINSTSIZE - 1; i++) 592 /* for now, all sets use all bits */
593 getinstr(compst, inst).i.aux2.set.offset = 0;
594 getinstr(compst, inst).i.aux2.set.size = instsize(CHARSETSIZE);
595 getinstr(compst, inst).i.aux1 = 0;
596 for (i = 0; i < (int)instsize(CHARSETSIZE); i++)
592 nextinstruction(compst); /* space for buffer */ 597 nextinstruction(compst); /* space for buffer */
593 /* fill buffer with charset */ 598 /* fill buffer with charset */
594 loopset(j, getinstr(compst, p).buff[j] = cs[j]); 599 loopset(j, getinstr(compst, p).buff[j] = cs[j]);
@@ -610,8 +615,8 @@ static void codecharset (CompileState *compst, const byte *cs, int tt) {
610 cs_equal(cs, getinstr(compst, tt + 2).buff)) 615 cs_equal(cs, getinstr(compst, tt + 2).buff))
611 addinstruction(compst, IAny, 0); 616 addinstruction(compst, IAny, 0);
612 else { 617 else {
613 addinstruction(compst, ISet, 0); 618 int i = addinstruction(compst, ISet, 0);
614 addcharset(compst, cs); 619 addcharset(compst, i, cs);
615 } 620 }
616 break; 621 break;
617 } 622 }
@@ -641,7 +646,7 @@ static int codetestset (CompileState *compst, Charset *cs, int e) {
641 } 646 }
642 case ISet: { 647 case ISet: {
643 int i = addoffsetinst(compst, ITestSet); 648 int i = addoffsetinst(compst, ITestSet);
644 addcharset(compst, cs->cs); 649 addcharset(compst, i, cs->cs);
645 return i; 650 return i;
646 } 651 }
647 default: assert(0); return 0; 652 default: assert(0); return 0;
@@ -793,8 +798,8 @@ static void coderep (CompileState *compst, TTree *tree, int opt,
793 const Charset *fl) { 798 const Charset *fl) {
794 Charset st; 799 Charset st;
795 if (tocharset(tree, &st)) { 800 if (tocharset(tree, &st)) {
796 addinstruction(compst, ISpan, 0); 801 int i = addinstruction(compst, ISpan, 0);
797 addcharset(compst, st.cs); 802 addcharset(compst, i, st.cs);
798 } 803 }
799 else { 804 else {
800 int e1 = getfirst(tree, fullset, &st); 805 int e1 = getfirst(tree, fullset, &st);