diff options
| author | Roberto Ierusalimschy <roberto@inf.puc-rio.br> | 2019-04-14 12:04:23 -0300 |
|---|---|---|
| committer | Roberto Ierusalimschy <roberto@inf.puc-rio.br> | 2019-04-14 12:04:23 -0300 |
| commit | 3f7797419e4d7493e1364290a5b127d1cb45e3bf (patch) | |
| tree | 8dd91b0d008d5ea9f9c96eada86510495c97d1e3 | |
| parent | d9f83dded93a35fb333c4e1bd371c401f7129fd1 (diff) | |
| download | lpeg-3f7797419e4d7493e1364290a5b127d1cb45e3bf.tar.gz lpeg-3f7797419e4d7493e1364290a5b127d1cb45e3bf.tar.bz2 lpeg-3f7797419e4d7493e1364290a5b127d1cb45e3bf.zip | |
Removed 'unsigned char' limit on number of rules in grammars
Added a new tree-type node 'TXInfo', which follows 'TRule' nodes,
to store extra information about a node. (In this case, the rule
number, with an 'unsigned short' field.)
| -rw-r--r-- | lpcode.c | 18 | ||||
| -rw-r--r-- | lpprint.c | 37 | ||||
| -rw-r--r-- | lptree.c | 32 | ||||
| -rw-r--r-- | lptree.h | 4 | ||||
| -rw-r--r-- | lptypes.h | 4 | ||||
| -rw-r--r-- | lpvm.h | 3 | ||||
| -rwxr-xr-x | test.lua | 25 |
7 files changed, 75 insertions, 48 deletions
| @@ -220,7 +220,7 @@ int checkaux (TTree *tree, int pred) { | |||
| 220 | if (checkaux(sib2(tree), pred)) return 1; | 220 | if (checkaux(sib2(tree), pred)) return 1; |
| 221 | /* else return checkaux(sib1(tree), pred); */ | 221 | /* else return checkaux(sib1(tree), pred); */ |
| 222 | tree = sib1(tree); goto tailcall; | 222 | tree = sib1(tree); goto tailcall; |
| 223 | case TCapture: case TGrammar: case TRule: | 223 | case TCapture: case TGrammar: case TRule: case TXInfo: |
| 224 | /* return checkaux(sib1(tree), pred); */ | 224 | /* return checkaux(sib1(tree), pred); */ |
| 225 | tree = sib1(tree); goto tailcall; | 225 | tree = sib1(tree); goto tailcall; |
| 226 | case TCall: /* return checkaux(sib2(tree), pred); */ | 226 | case TCall: /* return checkaux(sib2(tree), pred); */ |
| @@ -243,7 +243,7 @@ int fixedlen (TTree *tree) { | |||
| 243 | return len; | 243 | return len; |
| 244 | case TRep: case TRunTime: case TOpenCall: | 244 | case TRep: case TRunTime: case TOpenCall: |
| 245 | return -1; | 245 | return -1; |
| 246 | case TCapture: case TRule: case TGrammar: | 246 | case TCapture: case TRule: case TGrammar: case TXInfo: |
| 247 | /* return fixedlen(sib1(tree)); */ | 247 | /* return fixedlen(sib1(tree)); */ |
| 248 | tree = sib1(tree); goto tailcall; | 248 | tree = sib1(tree); goto tailcall; |
| 249 | case TCall: { | 249 | case TCall: { |
| @@ -334,7 +334,7 @@ static int getfirst (TTree *tree, const Charset *follow, Charset *firstset) { | |||
| 334 | loopset(i, firstset->cs[i] |= follow->cs[i]); | 334 | loopset(i, firstset->cs[i] |= follow->cs[i]); |
| 335 | return 1; /* accept the empty string */ | 335 | return 1; /* accept the empty string */ |
| 336 | } | 336 | } |
| 337 | case TCapture: case TGrammar: case TRule: { | 337 | case TCapture: case TGrammar: case TRule: case TXInfo: { |
| 338 | /* return getfirst(sib1(tree), follow, firstset); */ | 338 | /* return getfirst(sib1(tree), follow, firstset); */ |
| 339 | tree = sib1(tree); goto tailcall; | 339 | tree = sib1(tree); goto tailcall; |
| 340 | } | 340 | } |
| @@ -382,7 +382,7 @@ static int headfail (TTree *tree) { | |||
| 382 | case TTrue: case TRep: case TRunTime: case TNot: | 382 | case TTrue: case TRep: case TRunTime: case TNot: |
| 383 | case TBehind: | 383 | case TBehind: |
| 384 | return 0; | 384 | return 0; |
| 385 | case TCapture: case TGrammar: case TRule: case TAnd: | 385 | case TCapture: case TGrammar: case TRule: case TXInfo: case TAnd: |
| 386 | tree = sib1(tree); goto tailcall; /* return headfail(sib1(tree)); */ | 386 | tree = sib1(tree); goto tailcall; /* return headfail(sib1(tree)); */ |
| 387 | case TCall: | 387 | case TCall: |
| 388 | tree = sib2(tree); goto tailcall; /* return headfail(sib2(tree)); */ | 388 | tree = sib2(tree); goto tailcall; /* return headfail(sib2(tree)); */ |
| @@ -874,8 +874,10 @@ static void codegrammar (CompileState *compst, TTree *grammar) { | |||
| 874 | int start = gethere(compst); /* here starts the initial rule */ | 874 | int start = gethere(compst); /* here starts the initial rule */ |
| 875 | jumptohere(compst, firstcall); | 875 | jumptohere(compst, firstcall); |
| 876 | for (rule = sib1(grammar); rule->tag == TRule; rule = sib2(rule)) { | 876 | for (rule = sib1(grammar); rule->tag == TRule; rule = sib2(rule)) { |
| 877 | TTree *r = sib1(rule); | ||
| 878 | assert(r->tag == TXInfo); | ||
| 877 | positions[rulenumber++] = gethere(compst); /* save rule position */ | 879 | positions[rulenumber++] = gethere(compst); /* save rule position */ |
| 878 | codegen(compst, sib1(rule), 0, NOINST, fullset); /* code rule */ | 880 | codegen(compst, sib1(r), 0, NOINST, fullset); /* code rule */ |
| 879 | addinstruction(compst, IRet, 0); | 881 | addinstruction(compst, IRet, 0); |
| 880 | } | 882 | } |
| 881 | assert(rule->tag == TTrue); | 883 | assert(rule->tag == TTrue); |
| @@ -886,8 +888,8 @@ static void codegrammar (CompileState *compst, TTree *grammar) { | |||
| 886 | 888 | ||
| 887 | static void codecall (CompileState *compst, TTree *call) { | 889 | static void codecall (CompileState *compst, TTree *call) { |
| 888 | int c = addoffsetinst(compst, IOpenCall); /* to be corrected later */ | 890 | int c = addoffsetinst(compst, IOpenCall); /* to be corrected later */ |
| 889 | getinstr(compst, c).i.key = sib2(call)->cap; /* rule number */ | 891 | assert(sib1(sib2(call))->tag == TXInfo); |
| 890 | assert(sib2(call)->tag == TRule); | 892 | getinstr(compst, c).i.key = sib1(sib2(call))->u.n; /* rule number */ |
| 891 | } | 893 | } |
| 892 | 894 | ||
| 893 | 895 | ||
| @@ -971,7 +973,7 @@ static void peephole (CompileState *compst) { | |||
| 971 | case IRet: case IFail: case IFailTwice: | 973 | case IRet: case IFail: case IFailTwice: |
| 972 | case IEnd: { /* instructions with unconditional implicit jumps */ | 974 | case IEnd: { /* instructions with unconditional implicit jumps */ |
| 973 | code[i] = code[ft]; /* jump becomes that instruction */ | 975 | code[i] = code[ft]; /* jump becomes that instruction */ |
| 974 | code[i + 1].i.code = IAny; /* 'no-op' for target position */ | 976 | code[i + 1].i.code = IEmpty; /* 'no-op' for target position */ |
| 975 | break; | 977 | break; |
| 976 | } | 978 | } |
| 977 | case ICommit: case IPartialCommit: | 979 | case ICommit: case IPartialCommit: |
| @@ -60,7 +60,8 @@ void printinst (const Instruction *op, const Instruction *p) { | |||
| 60 | "ret", "end", | 60 | "ret", "end", |
| 61 | "choice", "jmp", "call", "open_call", | 61 | "choice", "jmp", "call", "open_call", |
| 62 | "commit", "partial_commit", "back_commit", "failtwice", "fail", "giveup", | 62 | "commit", "partial_commit", "back_commit", "failtwice", "fail", "giveup", |
| 63 | "fullcapture", "opencapture", "closecapture", "closeruntime" | 63 | "fullcapture", "opencapture", "closecapture", "closeruntime", |
| 64 | "--" | ||
| 64 | }; | 65 | }; |
| 65 | printf("%02ld: %s ", (long)(p - op), names[p->i.code]); | 66 | printf("%02ld: %s ", (long)(p - op), names[p->i.code]); |
| 66 | switch ((Opcode)p->i.code) { | 67 | switch ((Opcode)p->i.code) { |
| @@ -151,7 +152,7 @@ static const char *tagnames[] = { | |||
| 151 | "rep", | 152 | "rep", |
| 152 | "seq", "choice", | 153 | "seq", "choice", |
| 153 | "not", "and", | 154 | "not", "and", |
| 154 | "call", "opencall", "rule", "grammar", | 155 | "call", "opencall", "rule", "xinfo", "grammar", |
| 155 | "behind", | 156 | "behind", |
| 156 | "capture", "run-time" | 157 | "capture", "run-time" |
| 157 | }; | 158 | }; |
| @@ -159,6 +160,7 @@ static const char *tagnames[] = { | |||
| 159 | 160 | ||
| 160 | void printtree (TTree *tree, int ident) { | 161 | void printtree (TTree *tree, int ident) { |
| 161 | int i; | 162 | int i; |
| 163 | int sibs = numsiblings[tree->tag]; | ||
| 162 | for (i = 0; i < ident; i++) printf(" "); | 164 | for (i = 0; i < ident; i++) printf(" "); |
| 163 | printf("%s", tagnames[tree->tag]); | 165 | printf("%s", tagnames[tree->tag]); |
| 164 | switch (tree->tag) { | 166 | switch (tree->tag) { |
| @@ -176,24 +178,26 @@ void printtree (TTree *tree, int ident) { | |||
| 176 | break; | 178 | break; |
| 177 | } | 179 | } |
| 178 | case TOpenCall: case TCall: { | 180 | case TOpenCall: case TCall: { |
| 179 | assert(sib2(tree)->tag == TRule); | 181 | assert(sib1(sib2(tree))->tag == TXInfo); |
| 180 | printf(" key: %d (rule: %d)\n", tree->key, sib2(tree)->cap); | 182 | printf(" key: %d (rule: %d)\n", tree->key, sib1(sib2(tree))->u.n); |
| 181 | break; | 183 | break; |
| 182 | } | 184 | } |
| 183 | case TBehind: { | 185 | case TBehind: { |
| 184 | printf(" %d\n", tree->u.n); | 186 | printf(" %d\n", tree->u.n); |
| 185 | printtree(sib1(tree), ident + 2); | ||
| 186 | break; | 187 | break; |
| 187 | } | 188 | } |
| 188 | case TCapture: { | 189 | case TCapture: { |
| 189 | printf(" kind: '%s' key: %d\n", capkind(tree->cap), tree->key); | 190 | printf(" kind: '%s' key: %d\n", capkind(tree->cap), tree->key); |
| 190 | printtree(sib1(tree), ident + 2); | ||
| 191 | break; | 191 | break; |
| 192 | } | 192 | } |
| 193 | case TRule: { | 193 | case TRule: { |
| 194 | printf(" n: %d key: %d\n", tree->cap, tree->key); | 194 | printf(" key: %d\n", tree->key); |
| 195 | printtree(sib1(tree), ident + 2); | 195 | sibs = 1; /* do not print 'sib2' (next rule) as a sibling */ |
| 196 | break; /* do not print next rule as a sibling */ | 196 | break; |
| 197 | } | ||
| 198 | case TXInfo: { | ||
| 199 | printf(" n: %d\n", tree->u.n); | ||
| 200 | break; | ||
| 197 | } | 201 | } |
| 198 | case TGrammar: { | 202 | case TGrammar: { |
| 199 | TTree *rule = sib1(tree); | 203 | TTree *rule = sib1(tree); |
| @@ -203,18 +207,17 @@ void printtree (TTree *tree, int ident) { | |||
| 203 | rule = sib2(rule); | 207 | rule = sib2(rule); |
| 204 | } | 208 | } |
| 205 | assert(rule->tag == TTrue); /* sentinel */ | 209 | assert(rule->tag == TTrue); /* sentinel */ |
| 210 | sibs = 0; /* siblings already handled */ | ||
| 206 | break; | 211 | break; |
| 207 | } | 212 | } |
| 208 | default: { | 213 | default: |
| 209 | int sibs = numsiblings[tree->tag]; | ||
| 210 | printf("\n"); | 214 | printf("\n"); |
| 211 | if (sibs >= 1) { | ||
| 212 | printtree(sib1(tree), ident + 2); | ||
| 213 | if (sibs >= 2) | ||
| 214 | printtree(sib2(tree), ident + 2); | ||
| 215 | } | ||
| 216 | break; | 215 | break; |
| 217 | } | 216 | } |
| 217 | if (sibs >= 1) { | ||
| 218 | printtree(sib1(tree), ident + 2); | ||
| 219 | if (sibs >= 2) | ||
| 220 | printtree(sib2(tree), ident + 2); | ||
| 218 | } | 221 | } |
| 219 | } | 222 | } |
| 220 | 223 | ||
| @@ -25,7 +25,7 @@ const byte numsiblings[] = { | |||
| 25 | 1, /* rep */ | 25 | 1, /* rep */ |
| 26 | 2, 2, /* seq, choice */ | 26 | 2, 2, /* seq, choice */ |
| 27 | 1, 1, /* not, and */ | 27 | 1, 1, /* not, and */ |
| 28 | 0, 0, 2, 1, /* call, opencall, rule, grammar */ | 28 | 0, 0, 2, 1, 1, /* call, opencall, rule, prerule, grammar */ |
| 29 | 1, /* behind */ | 29 | 1, /* behind */ |
| 30 | 1, 1 /* capture, runtime capture */ | 30 | 1, 1 /* capture, runtime capture */ |
| 31 | }; | 31 | }; |
| @@ -906,7 +906,7 @@ static int collectrules (lua_State *L, int arg, int *totalsize) { | |||
| 906 | int size; /* accumulator for total size */ | 906 | int size; /* accumulator for total size */ |
| 907 | lua_newtable(L); /* create position table */ | 907 | lua_newtable(L); /* create position table */ |
| 908 | getfirstrule(L, arg, postab); | 908 | getfirstrule(L, arg, postab); |
| 909 | size = 2 + getsize(L, postab + 2); /* TGrammar + TRule + rule */ | 909 | size = 3 + getsize(L, postab + 2); /* TGrammar + TRule + TXInfo + rule */ |
| 910 | lua_pushnil(L); /* prepare to traverse grammar table */ | 910 | lua_pushnil(L); /* prepare to traverse grammar table */ |
| 911 | while (lua_next(L, arg) != 0) { | 911 | while (lua_next(L, arg) != 0) { |
| 912 | if (lua_tonumber(L, -2) == 1 || | 912 | if (lua_tonumber(L, -2) == 1 || |
| @@ -920,11 +920,11 @@ static int collectrules (lua_State *L, int arg, int *totalsize) { | |||
| 920 | lua_pushvalue(L, -2); /* push key (to insert into position table) */ | 920 | lua_pushvalue(L, -2); /* push key (to insert into position table) */ |
| 921 | lua_pushinteger(L, size); | 921 | lua_pushinteger(L, size); |
| 922 | lua_settable(L, postab); | 922 | lua_settable(L, postab); |
| 923 | size += 1 + getsize(L, -1); /* update size */ | 923 | size += 2 + getsize(L, -1); /* add 'TRule + TXInfo + rule' to size */ |
| 924 | lua_pushvalue(L, -2); /* push key (for next lua_next) */ | 924 | lua_pushvalue(L, -2); /* push key (for next lua_next) */ |
| 925 | n++; | 925 | n++; |
| 926 | } | 926 | } |
| 927 | *totalsize = size + 1; /* TTrue to finish list of rules */ | 927 | *totalsize = size + 1; /* space for 'TTrue' finishing list of rules */ |
| 928 | return n; | 928 | return n; |
| 929 | } | 929 | } |
| 930 | 930 | ||
| @@ -936,11 +936,13 @@ static void buildgrammar (lua_State *L, TTree *grammar, int frule, int n) { | |||
| 936 | int ridx = frule + 2*i + 1; /* index of i-th rule */ | 936 | int ridx = frule + 2*i + 1; /* index of i-th rule */ |
| 937 | int rulesize; | 937 | int rulesize; |
| 938 | TTree *rn = gettree(L, ridx, &rulesize); | 938 | TTree *rn = gettree(L, ridx, &rulesize); |
| 939 | TTree *pr = sib1(nd); /* points to rule's prerule */ | ||
| 939 | nd->tag = TRule; | 940 | nd->tag = TRule; |
| 940 | nd->key = 0; /* will be fixed when rule is used */ | 941 | nd->key = 0; /* will be fixed when rule is used */ |
| 941 | nd->cap = i; /* rule number */ | 942 | pr->tag = TXInfo; |
| 942 | nd->u.ps = rulesize + 1; /* point to next rule */ | 943 | pr->u.n = i; /* rule number */ |
| 943 | memcpy(sib1(nd), rn, rulesize * sizeof(TTree)); /* copy rule */ | 944 | nd->u.ps = rulesize + 2; /* point to next rule */ |
| 945 | memcpy(sib1(pr), rn, rulesize * sizeof(TTree)); /* copy rule */ | ||
| 944 | mergektable(L, ridx, sib1(nd)); /* merge its ktable into new one */ | 946 | mergektable(L, ridx, sib1(nd)); /* merge its ktable into new one */ |
| 945 | nd = sib2(nd); /* move to next rule */ | 947 | nd = sib2(nd); /* move to next rule */ |
| 946 | } | 948 | } |
| @@ -976,7 +978,7 @@ static int checkloops (TTree *tree) { | |||
| 976 | ** twice in 'passed', there is path from it back to itself without | 978 | ** twice in 'passed', there is path from it back to itself without |
| 977 | ** advancing the subject. | 979 | ** advancing the subject. |
| 978 | */ | 980 | */ |
| 979 | static int verifyerror (lua_State *L, int *passed, int npassed) { | 981 | static int verifyerror (lua_State *L, unsigned short *passed, int npassed) { |
| 980 | int i, j; | 982 | int i, j; |
| 981 | for (i = npassed - 1; i >= 0; i--) { /* search for a repetition */ | 983 | for (i = npassed - 1; i >= 0; i--) { /* search for a repetition */ |
| 982 | for (j = i - 1; j >= 0; j--) { | 984 | for (j = i - 1; j >= 0; j--) { |
| @@ -1001,8 +1003,8 @@ static int verifyerror (lua_State *L, int *passed, int npassed) { | |||
| 1001 | ** counts the elements in 'passed'. | 1003 | ** counts the elements in 'passed'. |
| 1002 | ** Assume ktable at the top of the stack. | 1004 | ** Assume ktable at the top of the stack. |
| 1003 | */ | 1005 | */ |
| 1004 | static int verifyrule (lua_State *L, TTree *tree, int *passed, int npassed, | 1006 | static int verifyrule (lua_State *L, TTree *tree, unsigned short *passed, |
| 1005 | int nb) { | 1007 | int npassed, int nb) { |
| 1006 | tailcall: | 1008 | tailcall: |
| 1007 | switch (tree->tag) { | 1009 | switch (tree->tag) { |
| 1008 | case TChar: case TSet: case TAny: | 1010 | case TChar: case TSet: case TAny: |
| @@ -1014,7 +1016,7 @@ static int verifyrule (lua_State *L, TTree *tree, int *passed, int npassed, | |||
| 1014 | case TNot: case TAnd: case TRep: | 1016 | case TNot: case TAnd: case TRep: |
| 1015 | /* return verifyrule(L, sib1(tree), passed, npassed, 1); */ | 1017 | /* return verifyrule(L, sib1(tree), passed, npassed, 1); */ |
| 1016 | tree = sib1(tree); nb = 1; goto tailcall; | 1018 | tree = sib1(tree); nb = 1; goto tailcall; |
| 1017 | case TCapture: case TRunTime: | 1019 | case TCapture: case TRunTime: case TXInfo: |
| 1018 | /* return verifyrule(L, sib1(tree), passed, npassed, nb); */ | 1020 | /* return verifyrule(L, sib1(tree), passed, npassed, nb); */ |
| 1019 | tree = sib1(tree); goto tailcall; | 1021 | tree = sib1(tree); goto tailcall; |
| 1020 | case TCall: | 1022 | case TCall: |
| @@ -1030,10 +1032,10 @@ static int verifyrule (lua_State *L, TTree *tree, int *passed, int npassed, | |||
| 1030 | /* return verifyrule(L, sib2(tree), passed, npassed, nb); */ | 1032 | /* return verifyrule(L, sib2(tree), passed, npassed, nb); */ |
| 1031 | tree = sib2(tree); goto tailcall; | 1033 | tree = sib2(tree); goto tailcall; |
| 1032 | case TRule: | 1034 | case TRule: |
| 1033 | if (npassed >= MAXRULES) | 1035 | if (npassed >= MAXRULES) /* too many steps? */ |
| 1034 | return verifyerror(L, passed, npassed); | 1036 | return verifyerror(L, passed, npassed); /* error */ |
| 1035 | else { | 1037 | else { |
| 1036 | passed[npassed++] = tree->key; | 1038 | passed[npassed++] = tree->key; /* add rule to path */ |
| 1037 | /* return verifyrule(L, sib1(tree), passed, npassed); */ | 1039 | /* return verifyrule(L, sib1(tree), passed, npassed); */ |
| 1038 | tree = sib1(tree); goto tailcall; | 1040 | tree = sib1(tree); goto tailcall; |
| 1039 | } | 1041 | } |
| @@ -1045,7 +1047,7 @@ static int verifyrule (lua_State *L, TTree *tree, int *passed, int npassed, | |||
| 1045 | 1047 | ||
| 1046 | 1048 | ||
| 1047 | static void verifygrammar (lua_State *L, TTree *grammar) { | 1049 | static void verifygrammar (lua_State *L, TTree *grammar) { |
| 1048 | int passed[MAXRULES]; | 1050 | unsigned short passed[MAXRULES]; |
| 1049 | TTree *rule; | 1051 | TTree *rule; |
| 1050 | /* check left-recursive rules */ | 1052 | /* check left-recursive rules */ |
| 1051 | for (rule = sib1(grammar); rule->tag == TRule; rule = sib2(rule)) { | 1053 | for (rule = sib1(grammar); rule->tag == TRule; rule = sib2(rule)) { |
| @@ -26,8 +26,8 @@ typedef enum TTag { | |||
| 26 | TCall, /* ktable[key] is rule's key; 'sib2' is rule being called */ | 26 | TCall, /* ktable[key] is rule's key; 'sib2' is rule being called */ |
| 27 | TOpenCall, /* ktable[key] is rule's key */ | 27 | TOpenCall, /* ktable[key] is rule's key */ |
| 28 | TRule, /* ktable[key] is rule's key (but key == 0 for unused rules); | 28 | TRule, /* ktable[key] is rule's key (but key == 0 for unused rules); |
| 29 | 'sib1' is rule's pattern; | 29 | 'sib1' is rule's pattern pre-rule; 'sib2' is next rule; */ |
| 30 | 'sib2' is next rule; 'cap' is rule's sequential number */ | 30 | TXInfo, /* extra info; 'n' is rule's sequential number */ |
| 31 | TGrammar, /* 'sib1' is initial (and first) rule */ | 31 | TGrammar, /* 'sib1' is initial (and first) rule */ |
| 32 | TBehind, /* 'sib1' is pattern, 'n' is how much to go back */ | 32 | TBehind, /* 'sib1' is pattern, 'n' is how much to go back */ |
| 33 | TCapture, /* captures: 'cap' is kind of capture (enum 'CapKind'); | 33 | TCapture, /* captures: 'cap' is kind of capture (enum 'CapKind'); |
| @@ -51,9 +51,9 @@ | |||
| 51 | #endif | 51 | #endif |
| 52 | 52 | ||
| 53 | 53 | ||
| 54 | /* maximum number of rules in a grammar (limited by 'unsigned char') */ | 54 | /* maximum number of rules in a grammar (limited by 'unsigned short') */ |
| 55 | #if !defined(MAXRULES) | 55 | #if !defined(MAXRULES) |
| 56 | #define MAXRULES 250 | 56 | #define MAXRULES 1000 |
| 57 | #endif | 57 | #endif |
| 58 | 58 | ||
| 59 | 59 | ||
| @@ -33,7 +33,8 @@ typedef enum Opcode { | |||
| 33 | IFullCapture, /* complete capture of last 'off' chars */ | 33 | IFullCapture, /* complete capture of last 'off' chars */ |
| 34 | IOpenCapture, /* start a capture */ | 34 | IOpenCapture, /* start a capture */ |
| 35 | ICloseCapture, | 35 | ICloseCapture, |
| 36 | ICloseRunTime | 36 | ICloseRunTime, |
| 37 | IEmpty /* to fill empty slots left by optimizations */ | ||
| 37 | } Opcode; | 38 | } Opcode; |
| 38 | 39 | ||
| 39 | 40 | ||
| @@ -406,7 +406,7 @@ assert(p:match('abcx') == 5 and p:match('ayzx') == 5 and not p:match'abc') | |||
| 406 | 406 | ||
| 407 | 407 | ||
| 408 | do | 408 | do |
| 409 | -- large dynamic Cc | 409 | print "testing large dynamic Cc" |
| 410 | local lim = 2^16 - 1 | 410 | local lim = 2^16 - 1 |
| 411 | local c = 0 | 411 | local c = 0 |
| 412 | local function seq (n) | 412 | local function seq (n) |
| @@ -985,10 +985,10 @@ for i = 1, 10 do | |||
| 985 | assert(p:match("aaaaaaaaaaa") == 11 - i + 1) | 985 | assert(p:match("aaaaaaaaaaa") == 11 - i + 1) |
| 986 | end | 986 | end |
| 987 | 987 | ||
| 988 | print"+" | ||
| 989 | 988 | ||
| 990 | 989 | ||
| 991 | -- tests for back references | 990 | print "testing back references" |
| 991 | |||
| 992 | checkerr("back reference 'x' not found", m.match, m.Cb('x'), '') | 992 | checkerr("back reference 'x' not found", m.match, m.Cb('x'), '') |
| 993 | checkerr("back reference 'b' not found", m.match, m.Cg(1, 'a') * m.Cb('b'), 'a') | 993 | checkerr("back reference 'b' not found", m.match, m.Cg(1, 'a') * m.Cb('b'), 'a') |
| 994 | 994 | ||
| @@ -1171,9 +1171,28 @@ t = {p:match('abacc')} | |||
| 1171 | checkeq(t, {'a', 'aa', 20, 'a', 'aaa', 'aaa'}) | 1171 | checkeq(t, {'a', 'aa', 20, 'a', 'aaa', 'aaa'}) |
| 1172 | 1172 | ||
| 1173 | 1173 | ||
| 1174 | do print"testing large grammars" | ||
| 1175 | local lim = 1000 -- number of rules | ||
| 1176 | local t = {} | ||
| 1177 | |||
| 1178 | for i = 3, lim do | ||
| 1179 | t[i] = m.V(i - 1) -- each rule calls previous one | ||
| 1180 | end | ||
| 1181 | t[1] = m.V(lim) -- start on last rule | ||
| 1182 | t[2] = m.C("alo") -- final rule | ||
| 1183 | |||
| 1184 | local P = m.P(t) -- build grammar | ||
| 1185 | assert(P:match("alo") == "alo") | ||
| 1186 | |||
| 1187 | t[#t + 1] = m.P("x") -- one more rule... | ||
| 1188 | checkerr("too many rules", m.P, t) | ||
| 1189 | end | ||
| 1190 | |||
| 1191 | |||
| 1174 | ------------------------------------------------------------------- | 1192 | ------------------------------------------------------------------- |
| 1175 | -- Tests for 're' module | 1193 | -- Tests for 're' module |
| 1176 | ------------------------------------------------------------------- | 1194 | ------------------------------------------------------------------- |
| 1195 | print"testing 're' module" | ||
| 1177 | 1196 | ||
| 1178 | local re = require "re" | 1197 | local re = require "re" |
| 1179 | 1198 | ||
