aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRoberto Ierusalimschy <roberto@inf.puc-rio.br>2019-04-14 12:04:23 -0300
committerRoberto Ierusalimschy <roberto@inf.puc-rio.br>2019-04-14 12:04:23 -0300
commit3f7797419e4d7493e1364290a5b127d1cb45e3bf (patch)
tree8dd91b0d008d5ea9f9c96eada86510495c97d1e3
parentd9f83dded93a35fb333c4e1bd371c401f7129fd1 (diff)
downloadlpeg-3f7797419e4d7493e1364290a5b127d1cb45e3bf.tar.gz
lpeg-3f7797419e4d7493e1364290a5b127d1cb45e3bf.tar.bz2
lpeg-3f7797419e4d7493e1364290a5b127d1cb45e3bf.zip
Removed 'unsigned char' limit on number of rules in grammars
Added a new tree-type node 'TXInfo', which follows 'TRule' nodes, to store extra information about a node. (In this case, the rule number, with an 'unsigned short' field.)
-rw-r--r--lpcode.c18
-rw-r--r--lpprint.c37
-rw-r--r--lptree.c32
-rw-r--r--lptree.h4
-rw-r--r--lptypes.h4
-rw-r--r--lpvm.h3
-rwxr-xr-xtest.lua25
7 files changed, 75 insertions, 48 deletions
diff --git a/lpcode.c b/lpcode.c
index 3923459..5fddfab 100644
--- a/lpcode.c
+++ b/lpcode.c
@@ -220,7 +220,7 @@ int checkaux (TTree *tree, int pred) {
220 if (checkaux(sib2(tree), pred)) return 1; 220 if (checkaux(sib2(tree), pred)) return 1;
221 /* else return checkaux(sib1(tree), pred); */ 221 /* else return checkaux(sib1(tree), pred); */
222 tree = sib1(tree); goto tailcall; 222 tree = sib1(tree); goto tailcall;
223 case TCapture: case TGrammar: case TRule: 223 case TCapture: case TGrammar: case TRule: case TXInfo:
224 /* return checkaux(sib1(tree), pred); */ 224 /* return checkaux(sib1(tree), pred); */
225 tree = sib1(tree); goto tailcall; 225 tree = sib1(tree); goto tailcall;
226 case TCall: /* return checkaux(sib2(tree), pred); */ 226 case TCall: /* return checkaux(sib2(tree), pred); */
@@ -243,7 +243,7 @@ int fixedlen (TTree *tree) {
243 return len; 243 return len;
244 case TRep: case TRunTime: case TOpenCall: 244 case TRep: case TRunTime: case TOpenCall:
245 return -1; 245 return -1;
246 case TCapture: case TRule: case TGrammar: 246 case TCapture: case TRule: case TGrammar: case TXInfo:
247 /* return fixedlen(sib1(tree)); */ 247 /* return fixedlen(sib1(tree)); */
248 tree = sib1(tree); goto tailcall; 248 tree = sib1(tree); goto tailcall;
249 case TCall: { 249 case TCall: {
@@ -334,7 +334,7 @@ static int getfirst (TTree *tree, const Charset *follow, Charset *firstset) {
334 loopset(i, firstset->cs[i] |= follow->cs[i]); 334 loopset(i, firstset->cs[i] |= follow->cs[i]);
335 return 1; /* accept the empty string */ 335 return 1; /* accept the empty string */
336 } 336 }
337 case TCapture: case TGrammar: case TRule: { 337 case TCapture: case TGrammar: case TRule: case TXInfo: {
338 /* return getfirst(sib1(tree), follow, firstset); */ 338 /* return getfirst(sib1(tree), follow, firstset); */
339 tree = sib1(tree); goto tailcall; 339 tree = sib1(tree); goto tailcall;
340 } 340 }
@@ -382,7 +382,7 @@ static int headfail (TTree *tree) {
382 case TTrue: case TRep: case TRunTime: case TNot: 382 case TTrue: case TRep: case TRunTime: case TNot:
383 case TBehind: 383 case TBehind:
384 return 0; 384 return 0;
385 case TCapture: case TGrammar: case TRule: case TAnd: 385 case TCapture: case TGrammar: case TRule: case TXInfo: case TAnd:
386 tree = sib1(tree); goto tailcall; /* return headfail(sib1(tree)); */ 386 tree = sib1(tree); goto tailcall; /* return headfail(sib1(tree)); */
387 case TCall: 387 case TCall:
388 tree = sib2(tree); goto tailcall; /* return headfail(sib2(tree)); */ 388 tree = sib2(tree); goto tailcall; /* return headfail(sib2(tree)); */
@@ -874,8 +874,10 @@ static void codegrammar (CompileState *compst, TTree *grammar) {
874 int start = gethere(compst); /* here starts the initial rule */ 874 int start = gethere(compst); /* here starts the initial rule */
875 jumptohere(compst, firstcall); 875 jumptohere(compst, firstcall);
876 for (rule = sib1(grammar); rule->tag == TRule; rule = sib2(rule)) { 876 for (rule = sib1(grammar); rule->tag == TRule; rule = sib2(rule)) {
877 TTree *r = sib1(rule);
878 assert(r->tag == TXInfo);
877 positions[rulenumber++] = gethere(compst); /* save rule position */ 879 positions[rulenumber++] = gethere(compst); /* save rule position */
878 codegen(compst, sib1(rule), 0, NOINST, fullset); /* code rule */ 880 codegen(compst, sib1(r), 0, NOINST, fullset); /* code rule */
879 addinstruction(compst, IRet, 0); 881 addinstruction(compst, IRet, 0);
880 } 882 }
881 assert(rule->tag == TTrue); 883 assert(rule->tag == TTrue);
@@ -886,8 +888,8 @@ static void codegrammar (CompileState *compst, TTree *grammar) {
886 888
887static void codecall (CompileState *compst, TTree *call) { 889static void codecall (CompileState *compst, TTree *call) {
888 int c = addoffsetinst(compst, IOpenCall); /* to be corrected later */ 890 int c = addoffsetinst(compst, IOpenCall); /* to be corrected later */
889 getinstr(compst, c).i.key = sib2(call)->cap; /* rule number */ 891 assert(sib1(sib2(call))->tag == TXInfo);
890 assert(sib2(call)->tag == TRule); 892 getinstr(compst, c).i.key = sib1(sib2(call))->u.n; /* rule number */
891} 893}
892 894
893 895
@@ -971,7 +973,7 @@ static void peephole (CompileState *compst) {
971 case IRet: case IFail: case IFailTwice: 973 case IRet: case IFail: case IFailTwice:
972 case IEnd: { /* instructions with unconditional implicit jumps */ 974 case IEnd: { /* instructions with unconditional implicit jumps */
973 code[i] = code[ft]; /* jump becomes that instruction */ 975 code[i] = code[ft]; /* jump becomes that instruction */
974 code[i + 1].i.code = IAny; /* 'no-op' for target position */ 976 code[i + 1].i.code = IEmpty; /* 'no-op' for target position */
975 break; 977 break;
976 } 978 }
977 case ICommit: case IPartialCommit: 979 case ICommit: case IPartialCommit:
diff --git a/lpprint.c b/lpprint.c
index df62cbe..397785e 100644
--- a/lpprint.c
+++ b/lpprint.c
@@ -60,7 +60,8 @@ void printinst (const Instruction *op, const Instruction *p) {
60 "ret", "end", 60 "ret", "end",
61 "choice", "jmp", "call", "open_call", 61 "choice", "jmp", "call", "open_call",
62 "commit", "partial_commit", "back_commit", "failtwice", "fail", "giveup", 62 "commit", "partial_commit", "back_commit", "failtwice", "fail", "giveup",
63 "fullcapture", "opencapture", "closecapture", "closeruntime" 63 "fullcapture", "opencapture", "closecapture", "closeruntime",
64 "--"
64 }; 65 };
65 printf("%02ld: %s ", (long)(p - op), names[p->i.code]); 66 printf("%02ld: %s ", (long)(p - op), names[p->i.code]);
66 switch ((Opcode)p->i.code) { 67 switch ((Opcode)p->i.code) {
@@ -151,7 +152,7 @@ static const char *tagnames[] = {
151 "rep", 152 "rep",
152 "seq", "choice", 153 "seq", "choice",
153 "not", "and", 154 "not", "and",
154 "call", "opencall", "rule", "grammar", 155 "call", "opencall", "rule", "xinfo", "grammar",
155 "behind", 156 "behind",
156 "capture", "run-time" 157 "capture", "run-time"
157}; 158};
@@ -159,6 +160,7 @@ static const char *tagnames[] = {
159 160
160void printtree (TTree *tree, int ident) { 161void printtree (TTree *tree, int ident) {
161 int i; 162 int i;
163 int sibs = numsiblings[tree->tag];
162 for (i = 0; i < ident; i++) printf(" "); 164 for (i = 0; i < ident; i++) printf(" ");
163 printf("%s", tagnames[tree->tag]); 165 printf("%s", tagnames[tree->tag]);
164 switch (tree->tag) { 166 switch (tree->tag) {
@@ -176,24 +178,26 @@ void printtree (TTree *tree, int ident) {
176 break; 178 break;
177 } 179 }
178 case TOpenCall: case TCall: { 180 case TOpenCall: case TCall: {
179 assert(sib2(tree)->tag == TRule); 181 assert(sib1(sib2(tree))->tag == TXInfo);
180 printf(" key: %d (rule: %d)\n", tree->key, sib2(tree)->cap); 182 printf(" key: %d (rule: %d)\n", tree->key, sib1(sib2(tree))->u.n);
181 break; 183 break;
182 } 184 }
183 case TBehind: { 185 case TBehind: {
184 printf(" %d\n", tree->u.n); 186 printf(" %d\n", tree->u.n);
185 printtree(sib1(tree), ident + 2);
186 break; 187 break;
187 } 188 }
188 case TCapture: { 189 case TCapture: {
189 printf(" kind: '%s' key: %d\n", capkind(tree->cap), tree->key); 190 printf(" kind: '%s' key: %d\n", capkind(tree->cap), tree->key);
190 printtree(sib1(tree), ident + 2);
191 break; 191 break;
192 } 192 }
193 case TRule: { 193 case TRule: {
194 printf(" n: %d key: %d\n", tree->cap, tree->key); 194 printf(" key: %d\n", tree->key);
195 printtree(sib1(tree), ident + 2); 195 sibs = 1; /* do not print 'sib2' (next rule) as a sibling */
196 break; /* do not print next rule as a sibling */ 196 break;
197 }
198 case TXInfo: {
199 printf(" n: %d\n", tree->u.n);
200 break;
197 } 201 }
198 case TGrammar: { 202 case TGrammar: {
199 TTree *rule = sib1(tree); 203 TTree *rule = sib1(tree);
@@ -203,18 +207,17 @@ void printtree (TTree *tree, int ident) {
203 rule = sib2(rule); 207 rule = sib2(rule);
204 } 208 }
205 assert(rule->tag == TTrue); /* sentinel */ 209 assert(rule->tag == TTrue); /* sentinel */
210 sibs = 0; /* siblings already handled */
206 break; 211 break;
207 } 212 }
208 default: { 213 default:
209 int sibs = numsiblings[tree->tag];
210 printf("\n"); 214 printf("\n");
211 if (sibs >= 1) {
212 printtree(sib1(tree), ident + 2);
213 if (sibs >= 2)
214 printtree(sib2(tree), ident + 2);
215 }
216 break; 215 break;
217 } 216 }
217 if (sibs >= 1) {
218 printtree(sib1(tree), ident + 2);
219 if (sibs >= 2)
220 printtree(sib2(tree), ident + 2);
218 } 221 }
219} 222}
220 223
diff --git a/lptree.c b/lptree.c
index 5c8de94..557090b 100644
--- a/lptree.c
+++ b/lptree.c
@@ -25,7 +25,7 @@ const byte numsiblings[] = {
25 1, /* rep */ 25 1, /* rep */
26 2, 2, /* seq, choice */ 26 2, 2, /* seq, choice */
27 1, 1, /* not, and */ 27 1, 1, /* not, and */
28 0, 0, 2, 1, /* call, opencall, rule, grammar */ 28 0, 0, 2, 1, 1, /* call, opencall, rule, prerule, grammar */
29 1, /* behind */ 29 1, /* behind */
30 1, 1 /* capture, runtime capture */ 30 1, 1 /* capture, runtime capture */
31}; 31};
@@ -906,7 +906,7 @@ static int collectrules (lua_State *L, int arg, int *totalsize) {
906 int size; /* accumulator for total size */ 906 int size; /* accumulator for total size */
907 lua_newtable(L); /* create position table */ 907 lua_newtable(L); /* create position table */
908 getfirstrule(L, arg, postab); 908 getfirstrule(L, arg, postab);
909 size = 2 + getsize(L, postab + 2); /* TGrammar + TRule + rule */ 909 size = 3 + getsize(L, postab + 2); /* TGrammar + TRule + TXInfo + rule */
910 lua_pushnil(L); /* prepare to traverse grammar table */ 910 lua_pushnil(L); /* prepare to traverse grammar table */
911 while (lua_next(L, arg) != 0) { 911 while (lua_next(L, arg) != 0) {
912 if (lua_tonumber(L, -2) == 1 || 912 if (lua_tonumber(L, -2) == 1 ||
@@ -920,11 +920,11 @@ static int collectrules (lua_State *L, int arg, int *totalsize) {
920 lua_pushvalue(L, -2); /* push key (to insert into position table) */ 920 lua_pushvalue(L, -2); /* push key (to insert into position table) */
921 lua_pushinteger(L, size); 921 lua_pushinteger(L, size);
922 lua_settable(L, postab); 922 lua_settable(L, postab);
923 size += 1 + getsize(L, -1); /* update size */ 923 size += 2 + getsize(L, -1); /* add 'TRule + TXInfo + rule' to size */
924 lua_pushvalue(L, -2); /* push key (for next lua_next) */ 924 lua_pushvalue(L, -2); /* push key (for next lua_next) */
925 n++; 925 n++;
926 } 926 }
927 *totalsize = size + 1; /* TTrue to finish list of rules */ 927 *totalsize = size + 1; /* space for 'TTrue' finishing list of rules */
928 return n; 928 return n;
929} 929}
930 930
@@ -936,11 +936,13 @@ static void buildgrammar (lua_State *L, TTree *grammar, int frule, int n) {
936 int ridx = frule + 2*i + 1; /* index of i-th rule */ 936 int ridx = frule + 2*i + 1; /* index of i-th rule */
937 int rulesize; 937 int rulesize;
938 TTree *rn = gettree(L, ridx, &rulesize); 938 TTree *rn = gettree(L, ridx, &rulesize);
939 TTree *pr = sib1(nd); /* points to rule's prerule */
939 nd->tag = TRule; 940 nd->tag = TRule;
940 nd->key = 0; /* will be fixed when rule is used */ 941 nd->key = 0; /* will be fixed when rule is used */
941 nd->cap = i; /* rule number */ 942 pr->tag = TXInfo;
942 nd->u.ps = rulesize + 1; /* point to next rule */ 943 pr->u.n = i; /* rule number */
943 memcpy(sib1(nd), rn, rulesize * sizeof(TTree)); /* copy rule */ 944 nd->u.ps = rulesize + 2; /* point to next rule */
945 memcpy(sib1(pr), rn, rulesize * sizeof(TTree)); /* copy rule */
944 mergektable(L, ridx, sib1(nd)); /* merge its ktable into new one */ 946 mergektable(L, ridx, sib1(nd)); /* merge its ktable into new one */
945 nd = sib2(nd); /* move to next rule */ 947 nd = sib2(nd); /* move to next rule */
946 } 948 }
@@ -976,7 +978,7 @@ static int checkloops (TTree *tree) {
976** twice in 'passed', there is path from it back to itself without 978** twice in 'passed', there is path from it back to itself without
977** advancing the subject. 979** advancing the subject.
978*/ 980*/
979static int verifyerror (lua_State *L, int *passed, int npassed) { 981static int verifyerror (lua_State *L, unsigned short *passed, int npassed) {
980 int i, j; 982 int i, j;
981 for (i = npassed - 1; i >= 0; i--) { /* search for a repetition */ 983 for (i = npassed - 1; i >= 0; i--) { /* search for a repetition */
982 for (j = i - 1; j >= 0; j--) { 984 for (j = i - 1; j >= 0; j--) {
@@ -1001,8 +1003,8 @@ static int verifyerror (lua_State *L, int *passed, int npassed) {
1001** counts the elements in 'passed'. 1003** counts the elements in 'passed'.
1002** Assume ktable at the top of the stack. 1004** Assume ktable at the top of the stack.
1003*/ 1005*/
1004static int verifyrule (lua_State *L, TTree *tree, int *passed, int npassed, 1006static int verifyrule (lua_State *L, TTree *tree, unsigned short *passed,
1005 int nb) { 1007 int npassed, int nb) {
1006 tailcall: 1008 tailcall:
1007 switch (tree->tag) { 1009 switch (tree->tag) {
1008 case TChar: case TSet: case TAny: 1010 case TChar: case TSet: case TAny:
@@ -1014,7 +1016,7 @@ static int verifyrule (lua_State *L, TTree *tree, int *passed, int npassed,
1014 case TNot: case TAnd: case TRep: 1016 case TNot: case TAnd: case TRep:
1015 /* return verifyrule(L, sib1(tree), passed, npassed, 1); */ 1017 /* return verifyrule(L, sib1(tree), passed, npassed, 1); */
1016 tree = sib1(tree); nb = 1; goto tailcall; 1018 tree = sib1(tree); nb = 1; goto tailcall;
1017 case TCapture: case TRunTime: 1019 case TCapture: case TRunTime: case TXInfo:
1018 /* return verifyrule(L, sib1(tree), passed, npassed, nb); */ 1020 /* return verifyrule(L, sib1(tree), passed, npassed, nb); */
1019 tree = sib1(tree); goto tailcall; 1021 tree = sib1(tree); goto tailcall;
1020 case TCall: 1022 case TCall:
@@ -1030,10 +1032,10 @@ static int verifyrule (lua_State *L, TTree *tree, int *passed, int npassed,
1030 /* return verifyrule(L, sib2(tree), passed, npassed, nb); */ 1032 /* return verifyrule(L, sib2(tree), passed, npassed, nb); */
1031 tree = sib2(tree); goto tailcall; 1033 tree = sib2(tree); goto tailcall;
1032 case TRule: 1034 case TRule:
1033 if (npassed >= MAXRULES) 1035 if (npassed >= MAXRULES) /* too many steps? */
1034 return verifyerror(L, passed, npassed); 1036 return verifyerror(L, passed, npassed); /* error */
1035 else { 1037 else {
1036 passed[npassed++] = tree->key; 1038 passed[npassed++] = tree->key; /* add rule to path */
1037 /* return verifyrule(L, sib1(tree), passed, npassed); */ 1039 /* return verifyrule(L, sib1(tree), passed, npassed); */
1038 tree = sib1(tree); goto tailcall; 1040 tree = sib1(tree); goto tailcall;
1039 } 1041 }
@@ -1045,7 +1047,7 @@ static int verifyrule (lua_State *L, TTree *tree, int *passed, int npassed,
1045 1047
1046 1048
1047static void verifygrammar (lua_State *L, TTree *grammar) { 1049static void verifygrammar (lua_State *L, TTree *grammar) {
1048 int passed[MAXRULES]; 1050 unsigned short passed[MAXRULES];
1049 TTree *rule; 1051 TTree *rule;
1050 /* check left-recursive rules */ 1052 /* check left-recursive rules */
1051 for (rule = sib1(grammar); rule->tag == TRule; rule = sib2(rule)) { 1053 for (rule = sib1(grammar); rule->tag == TRule; rule = sib2(rule)) {
diff --git a/lptree.h b/lptree.h
index 25906d5..3e8b52b 100644
--- a/lptree.h
+++ b/lptree.h
@@ -26,8 +26,8 @@ typedef enum TTag {
26 TCall, /* ktable[key] is rule's key; 'sib2' is rule being called */ 26 TCall, /* ktable[key] is rule's key; 'sib2' is rule being called */
27 TOpenCall, /* ktable[key] is rule's key */ 27 TOpenCall, /* ktable[key] is rule's key */
28 TRule, /* ktable[key] is rule's key (but key == 0 for unused rules); 28 TRule, /* ktable[key] is rule's key (but key == 0 for unused rules);
29 'sib1' is rule's pattern; 29 'sib1' is rule's pattern pre-rule; 'sib2' is next rule; */
30 'sib2' is next rule; 'cap' is rule's sequential number */ 30 TXInfo, /* extra info; 'n' is rule's sequential number */
31 TGrammar, /* 'sib1' is initial (and first) rule */ 31 TGrammar, /* 'sib1' is initial (and first) rule */
32 TBehind, /* 'sib1' is pattern, 'n' is how much to go back */ 32 TBehind, /* 'sib1' is pattern, 'n' is how much to go back */
33 TCapture, /* captures: 'cap' is kind of capture (enum 'CapKind'); 33 TCapture, /* captures: 'cap' is kind of capture (enum 'CapKind');
diff --git a/lptypes.h b/lptypes.h
index 1d9d59f..223d887 100644
--- a/lptypes.h
+++ b/lptypes.h
@@ -51,9 +51,9 @@
51#endif 51#endif
52 52
53 53
54/* maximum number of rules in a grammar (limited by 'unsigned char') */ 54/* maximum number of rules in a grammar (limited by 'unsigned short') */
55#if !defined(MAXRULES) 55#if !defined(MAXRULES)
56#define MAXRULES 250 56#define MAXRULES 1000
57#endif 57#endif
58 58
59 59
diff --git a/lpvm.h b/lpvm.h
index 69ec33d..576429f 100644
--- a/lpvm.h
+++ b/lpvm.h
@@ -33,7 +33,8 @@ typedef enum Opcode {
33 IFullCapture, /* complete capture of last 'off' chars */ 33 IFullCapture, /* complete capture of last 'off' chars */
34 IOpenCapture, /* start a capture */ 34 IOpenCapture, /* start a capture */
35 ICloseCapture, 35 ICloseCapture,
36 ICloseRunTime 36 ICloseRunTime,
37 IEmpty /* to fill empty slots left by optimizations */
37} Opcode; 38} Opcode;
38 39
39 40
diff --git a/test.lua b/test.lua
index 8f9f574..f57cdec 100755
--- a/test.lua
+++ b/test.lua
@@ -406,7 +406,7 @@ assert(p:match('abcx') == 5 and p:match('ayzx') == 5 and not p:match'abc')
406 406
407 407
408do 408do
409 -- large dynamic Cc 409 print "testing large dynamic Cc"
410 local lim = 2^16 - 1 410 local lim = 2^16 - 1
411 local c = 0 411 local c = 0
412 local function seq (n) 412 local function seq (n)
@@ -985,10 +985,10 @@ for i = 1, 10 do
985 assert(p:match("aaaaaaaaaaa") == 11 - i + 1) 985 assert(p:match("aaaaaaaaaaa") == 11 - i + 1)
986end 986end
987 987
988print"+"
989 988
990 989
991-- tests for back references 990print "testing back references"
991
992checkerr("back reference 'x' not found", m.match, m.Cb('x'), '') 992checkerr("back reference 'x' not found", m.match, m.Cb('x'), '')
993checkerr("back reference 'b' not found", m.match, m.Cg(1, 'a') * m.Cb('b'), 'a') 993checkerr("back reference 'b' not found", m.match, m.Cg(1, 'a') * m.Cb('b'), 'a')
994 994
@@ -1171,9 +1171,28 @@ t = {p:match('abacc')}
1171checkeq(t, {'a', 'aa', 20, 'a', 'aaa', 'aaa'}) 1171checkeq(t, {'a', 'aa', 20, 'a', 'aaa', 'aaa'})
1172 1172
1173 1173
1174do print"testing large grammars"
1175 local lim = 1000 -- number of rules
1176 local t = {}
1177
1178 for i = 3, lim do
1179 t[i] = m.V(i - 1) -- each rule calls previous one
1180 end
1181 t[1] = m.V(lim) -- start on last rule
1182 t[2] = m.C("alo") -- final rule
1183
1184 local P = m.P(t) -- build grammar
1185 assert(P:match("alo") == "alo")
1186
1187 t[#t + 1] = m.P("x") -- one more rule...
1188 checkerr("too many rules", m.P, t)
1189end
1190
1191
1174------------------------------------------------------------------- 1192-------------------------------------------------------------------
1175-- Tests for 're' module 1193-- Tests for 're' module
1176------------------------------------------------------------------- 1194-------------------------------------------------------------------
1195print"testing 're' module"
1177 1196
1178local re = require "re" 1197local re = require "re"
1179 1198