diff options
| author | Sergio Queiroz <sqmedeiros@gmail.com> | 2019-01-10 10:06:33 -0300 |
|---|---|---|
| committer | Sergio Queiroz <sqmedeiros@gmail.com> | 2019-01-10 10:06:33 -0300 |
| commit | 4fb816d55f47c48c34cc4478e584b1567a75863b (patch) | |
| tree | 54ec6965c9d3983b193cf000a521a06cb3d73b8a | |
| parent | 9be59fb8f4b176a16643e707c74051b243202296 (diff) | |
| download | lpeglabel-lpeg-1.0.0.tar.gz lpeglabel-lpeg-1.0.0.tar.bz2 lpeglabel-lpeg-1.0.0.zip | |
Adapting lpeglabel-1.5 to the codebase of lpeg-1.0.0lpeg-1.0.0
| -rw-r--r-- | lpcap.h | 19 | ||||
| -rw-r--r-- | lpcode.c | 84 | ||||
| -rw-r--r-- | lpcode.h | 6 | ||||
| -rw-r--r-- | lpprint.c | 24 | ||||
| -rw-r--r-- | lptree.c | 13 | ||||
| -rw-r--r-- | lptree.h | 53 | ||||
| -rw-r--r-- | lptypes.h | 10 | ||||
| -rw-r--r-- | lpvm.c | 30 | ||||
| -rwxr-xr-x | test.lua | 57 |
9 files changed, 91 insertions, 205 deletions
| @@ -1,5 +1,5 @@ | |||
| 1 | /* | 1 | /* |
| 2 | ** $Id: lpcap.h,v 1.3 2016/09/13 17:45:58 roberto Exp $ | 2 | ** $Id: lpcap.h,v 1.2 2015/02/27 17:13:17 roberto Exp $ |
| 3 | */ | 3 | */ |
| 4 | 4 | ||
| 5 | #if !defined(lpcap_h) | 5 | #if !defined(lpcap_h) |
| @@ -11,21 +11,8 @@ | |||
| 11 | 11 | ||
| 12 | /* kinds of captures */ | 12 | /* kinds of captures */ |
| 13 | typedef enum CapKind { | 13 | typedef enum CapKind { |
| 14 | Cclose, /* not used in trees */ | 14 | Cclose, Cposition, Cconst, Cbackref, Carg, Csimple, Ctable, Cfunction, |
| 15 | Cposition, | 15 | Cquery, Cstring, Cnum, Csubst, Cfold, Cruntime, Cgroup |
| 16 | Cconst, /* ktable[key] is Lua constant */ | ||
| 17 | Cbackref, /* ktable[key] is "name" of group to get capture */ | ||
| 18 | Carg, /* 'key' is arg's number */ | ||
| 19 | Csimple, /* next node is pattern */ | ||
| 20 | Ctable, /* next node is pattern */ | ||
| 21 | Cfunction, /* ktable[key] is function; next node is pattern */ | ||
| 22 | Cquery, /* ktable[key] is table; next node is pattern */ | ||
| 23 | Cstring, /* ktable[key] is string; next node is pattern */ | ||
| 24 | Cnum, /* numbered capture; 'key' is number of value to return */ | ||
| 25 | Csubst, /* substitution capture; next node is pattern */ | ||
| 26 | Cfold, /* ktable[key] is function; next node is pattern */ | ||
| 27 | Cruntime, /* not used in trees (is uses another type for tree) */ | ||
| 28 | Cgroup /* ktable[key] is group's "name" */ | ||
| 29 | } CapKind; | 16 | } CapKind; |
| 30 | 17 | ||
| 31 | 18 | ||
| @@ -1,5 +1,5 @@ | |||
| 1 | /* | 1 | /* |
| 2 | ** $Id: lpcode.c,v 1.24 2016/09/15 17:46:13 roberto Exp $ | 2 | ** $Id: lpcode.c,v 1.23 2015/06/12 18:36:47 roberto Exp $ |
| 3 | ** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license) | 3 | ** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license) |
| 4 | */ | 4 | */ |
| 5 | 5 | ||
| @@ -126,27 +126,6 @@ int tocharset (TTree *tree, Charset *cs) { | |||
| 126 | 126 | ||
| 127 | 127 | ||
| 128 | /* | 128 | /* |
| 129 | ** Visit a TCall node taking care to stop recursion. If node not yet | ||
| 130 | ** visited, return 'f(sib2(tree))', otherwise return 'def' (default | ||
| 131 | ** value) | ||
| 132 | */ | ||
| 133 | static int callrecursive (TTree *tree, int f (TTree *t), int def) { | ||
| 134 | int key = tree->key; | ||
| 135 | assert(tree->tag == TCall); | ||
| 136 | assert(sib2(tree)->tag == TRule); | ||
| 137 | if (key == 0) /* node already visited? */ | ||
| 138 | return def; /* return default value */ | ||
| 139 | else { /* first visit */ | ||
| 140 | int result; | ||
| 141 | tree->key = 0; /* mark call as already visited */ | ||
| 142 | result = f(sib2(tree)); /* go to called rule */ | ||
| 143 | tree->key = key; /* restore tree */ | ||
| 144 | return result; | ||
| 145 | } | ||
| 146 | } | ||
| 147 | |||
| 148 | |||
| 149 | /* | ||
| 150 | ** Check whether a pattern tree has captures | 129 | ** Check whether a pattern tree has captures |
| 151 | */ | 130 | */ |
| 152 | int hascaptures (TTree *tree) { | 131 | int hascaptures (TTree *tree) { |
| @@ -155,17 +134,14 @@ int hascaptures (TTree *tree) { | |||
| 155 | case TCapture: case TRunTime: | 134 | case TCapture: case TRunTime: |
| 156 | return 1; | 135 | return 1; |
| 157 | case TCall: | 136 | case TCall: |
| 158 | return callrecursive(tree, hascaptures, 0); | 137 | tree = sib2(tree); goto tailcall; /* return hascaptures(sib2(tree)); */ |
| 159 | case TRule: /* do not follow siblings */ | ||
| 160 | tree = sib1(tree); goto tailcall; | ||
| 161 | case TOpenCall: assert(0); | 138 | case TOpenCall: assert(0); |
| 162 | default: { | 139 | default: { |
| 163 | switch (numsiblings[tree->tag]) { | 140 | switch (numsiblings[tree->tag]) { |
| 164 | case 1: /* return hascaptures(sib1(tree)); */ | 141 | case 1: /* return hascaptures(sib1(tree)); */ |
| 165 | tree = sib1(tree); goto tailcall; | 142 | tree = sib1(tree); goto tailcall; |
| 166 | case 2: | 143 | case 2: |
| 167 | if (hascaptures(sib1(tree))) | 144 | if (hascaptures(sib1(tree))) return 1; |
| 168 | return 1; | ||
| 169 | /* else return hascaptures(sib2(tree)); */ | 145 | /* else return hascaptures(sib2(tree)); */ |
| 170 | tree = sib2(tree); goto tailcall; | 146 | tree = sib2(tree); goto tailcall; |
| 171 | default: assert(numsiblings[tree->tag] == 0); return 0; | 147 | default: assert(numsiblings[tree->tag] == 0); return 0; |
| @@ -232,9 +208,9 @@ int checkaux (TTree *tree, int pred) { | |||
| 232 | 208 | ||
| 233 | /* | 209 | /* |
| 234 | ** number of characters to match a pattern (or -1 if variable) | 210 | ** number of characters to match a pattern (or -1 if variable) |
| 211 | ** ('count' avoids infinite loops for grammars) | ||
| 235 | */ | 212 | */ |
| 236 | int fixedlen (TTree *tree) { | 213 | int fixedlenx (TTree *tree, int count, int len) { |
| 237 | int len = 0; /* to accumulate in tail calls */ | ||
| 238 | tailcall: | 214 | tailcall: |
| 239 | switch (tree->tag) { | 215 | switch (tree->tag) { |
| 240 | case TChar: case TSet: case TAny: | 216 | case TChar: case TSet: case TAny: |
| @@ -245,29 +221,26 @@ int fixedlen (TTree *tree) { | |||
| 245 | case TThrow: /* labeled failure */ | 221 | case TThrow: /* labeled failure */ |
| 246 | return -1; | 222 | return -1; |
| 247 | case TCapture: case TRule: case TGrammar: | 223 | case TCapture: case TRule: case TGrammar: |
| 248 | /* return fixedlen(sib1(tree)); */ | 224 | /* return fixedlenx(sib1(tree), count); */ |
| 249 | tree = sib1(tree); goto tailcall; | 225 | tree = sib1(tree); goto tailcall; |
| 250 | case TCall: { | 226 | case TCall: |
| 251 | int n1 = callrecursive(tree, fixedlen, -1); | 227 | if (count++ >= MAXRULES) |
| 252 | if (n1 < 0) | 228 | return -1; /* may be a loop */ |
| 253 | return -1; | 229 | /* else return fixedlenx(sib2(tree), count); */ |
| 254 | else | 230 | tree = sib2(tree); goto tailcall; |
| 255 | return len + n1; | 231 | case TSeq: { |
| 256 | } | 232 | len = fixedlenx(sib1(tree), count, len); |
| 257 | case TSeq: { | 233 | if (len < 0) return -1; |
| 258 | int n1 = fixedlen(sib1(tree)); | 234 | /* else return fixedlenx(sib2(tree), count, len); */ |
| 259 | if (n1 < 0) | 235 | tree = sib2(tree); goto tailcall; |
| 260 | return -1; | ||
| 261 | /* else return fixedlen(sib2(tree)) + len; */ | ||
| 262 | len += n1; tree = sib2(tree); goto tailcall; | ||
| 263 | } | 236 | } |
| 264 | case TChoice: { | 237 | case TChoice: { |
| 265 | int n1 = fixedlen(sib1(tree)); | 238 | int n1, n2; |
| 266 | int n2 = fixedlen(sib2(tree)); | 239 | n1 = fixedlenx(sib1(tree), count, len); |
| 267 | if (n1 != n2 || n1 < 0) | 240 | if (n1 < 0) return -1; |
| 268 | return -1; | 241 | n2 = fixedlenx(sib2(tree), count, len); |
| 269 | else | 242 | if (n1 == n2) return n1; |
| 270 | return len + n1; | 243 | else return -1; |
| 271 | } | 244 | } |
| 272 | default: assert(0); return 0; | 245 | default: assert(0); return 0; |
| 273 | }; | 246 | }; |
| @@ -311,7 +284,7 @@ static int getfirst (TTree *tree, const Charset *follow, Charset *firstset) { | |||
| 311 | loopset(i, firstset->cs[i] = fullset->cs[i]); | 284 | loopset(i, firstset->cs[i] = fullset->cs[i]); |
| 312 | return 1; | 285 | return 1; |
| 313 | } | 286 | } |
| 314 | case TChoice: { | 287 | case TChoice: { |
| 315 | Charset csaux; | 288 | Charset csaux; |
| 316 | int e1 = getfirst(sib1(tree), follow, firstset); | 289 | int e1 = getfirst(sib1(tree), follow, firstset); |
| 317 | int e2 = getfirst(sib2(tree), follow, &csaux); | 290 | int e2 = getfirst(sib2(tree), follow, &csaux); |
| @@ -396,7 +369,7 @@ static int headfail (TTree *tree) { | |||
| 396 | if (!nofail(sib2(tree))) return 0; | 369 | if (!nofail(sib2(tree))) return 0; |
| 397 | /* else return headfail(sib1(tree)); */ | 370 | /* else return headfail(sib1(tree)); */ |
| 398 | tree = sib1(tree); goto tailcall; | 371 | tree = sib1(tree); goto tailcall; |
| 399 | case TChoice: | 372 | case TChoice: |
| 400 | if (!headfail(sib1(tree))) return 0; | 373 | if (!headfail(sib1(tree))) return 0; |
| 401 | /* else return headfail(sib2(tree)); */ | 374 | /* else return headfail(sib2(tree)); */ |
| 402 | tree = sib2(tree); goto tailcall; | 375 | tree = sib2(tree); goto tailcall; |
| @@ -769,10 +742,9 @@ static void codeand (CompileState *compst, TTree *tree, int tt) { | |||
| 769 | 742 | ||
| 770 | 743 | ||
| 771 | /* | 744 | /* |
| 772 | ** Captures: if pattern has fixed (and not too big) length, and it | 745 | ** Captures: if pattern has fixed (and not too big) length, use |
| 773 | ** has no nested captures, use a single IFullCapture instruction | 746 | ** a single IFullCapture instruction after the match; otherwise, |
| 774 | ** after the match; otherwise, enclose the pattern with OpenCapture - | 747 | ** enclose the pattern with OpenCapture - CloseCapture. |
| 775 | ** CloseCapture. | ||
| 776 | */ | 748 | */ |
| 777 | static void codecapture (CompileState *compst, TTree *tree, int tt, | 749 | static void codecapture (CompileState *compst, TTree *tree, int tt, |
| 778 | const Charset *fl) { | 750 | const Charset *fl) { |
| @@ -1,5 +1,5 @@ | |||
| 1 | /* | 1 | /* |
| 2 | ** $Id: lpcode.h,v 1.8 2016/09/15 17:46:13 roberto Exp $ | 2 | ** $Id: lpcode.h,v 1.7 2015/06/12 18:24:45 roberto Exp $ |
| 3 | */ | 3 | */ |
| 4 | 4 | ||
| 5 | #if !defined(lpcode_h) | 5 | #if !defined(lpcode_h) |
| @@ -13,7 +13,7 @@ | |||
| 13 | 13 | ||
| 14 | int tocharset (TTree *tree, Charset *cs); | 14 | int tocharset (TTree *tree, Charset *cs); |
| 15 | int checkaux (TTree *tree, int pred); | 15 | int checkaux (TTree *tree, int pred); |
| 16 | int fixedlen (TTree *tree); | 16 | int fixedlenx (TTree *tree, int count, int len); |
| 17 | int hascaptures (TTree *tree); | 17 | int hascaptures (TTree *tree); |
| 18 | int lp_gc (lua_State *L); | 18 | int lp_gc (lua_State *L); |
| 19 | Instruction *compile (lua_State *L, Pattern *p); | 19 | Instruction *compile (lua_State *L, Pattern *p); |
| @@ -35,6 +35,8 @@ int sizei (const Instruction *i); | |||
| 35 | */ | 35 | */ |
| 36 | #define nullable(t) checkaux(t, PEnullable) | 36 | #define nullable(t) checkaux(t, PEnullable) |
| 37 | 37 | ||
| 38 | #define fixedlen(t) fixedlenx(t, 0, 0) | ||
| 39 | |||
| 38 | 40 | ||
| 39 | 41 | ||
| 40 | #endif | 42 | #endif |
| @@ -1,5 +1,5 @@ | |||
| 1 | /* | 1 | /* |
| 2 | ** $Id: lpprint.c,v 1.10 2016/09/13 16:06:03 roberto Exp $ | 2 | ** $Id: lpprint.c,v 1.9 2015/06/15 16:09:57 roberto Exp $ |
| 3 | ** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license) | 3 | ** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license) |
| 4 | */ | 4 | */ |
| 5 | 5 | ||
| @@ -37,13 +37,13 @@ void printcharset (const byte *st) { | |||
| 37 | } | 37 | } |
| 38 | 38 | ||
| 39 | 39 | ||
| 40 | static const char *capkind (int kind) { | 40 | static void printcapkind (int kind) { |
| 41 | const char *const modes[] = { | 41 | const char *const modes[] = { |
| 42 | "close", "position", "constant", "backref", | 42 | "close", "position", "constant", "backref", |
| 43 | "argument", "simple", "table", "function", | 43 | "argument", "simple", "table", "function", |
| 44 | "query", "string", "num", "substitution", "fold", | 44 | "query", "string", "num", "substitution", "fold", |
| 45 | "runtime", "group"}; | 45 | "runtime", "group"}; |
| 46 | return modes[kind]; | 46 | printf("%s", modes[kind]); |
| 47 | } | 47 | } |
| 48 | 48 | ||
| 49 | 49 | ||
| @@ -60,7 +60,7 @@ void printinst (const Instruction *op, const Instruction *p) { | |||
| 60 | "ret", "end", | 60 | "ret", "end", |
| 61 | "choice", "pred_choice", "jmp", "call", "open_call", /* labeled failure */ | 61 | "choice", "pred_choice", "jmp", "call", "open_call", /* labeled failure */ |
| 62 | "commit", "partial_commit", "back_commit", "failtwice", "fail", "giveup", | 62 | "commit", "partial_commit", "back_commit", "failtwice", "fail", "giveup", |
| 63 | "fullcapture", "opencapture", "closecapture", "closeruntime", | 63 | "fullcapture", "opencapture", "closecapture", "closeruntime", |
| 64 | "throw", "throw_rec", /* labeled failure */ | 64 | "throw", "throw_rec", /* labeled failure */ |
| 65 | }; | 65 | }; |
| 66 | printf("%02ld: %s ", (long)(p - op), names[p->i.code]); | 66 | printf("%02ld: %s ", (long)(p - op), names[p->i.code]); |
| @@ -74,12 +74,13 @@ void printinst (const Instruction *op, const Instruction *p) { | |||
| 74 | break; | 74 | break; |
| 75 | } | 75 | } |
| 76 | case IFullCapture: { | 76 | case IFullCapture: { |
| 77 | printf("%s (size = %d) (idx = %d)", | 77 | printcapkind(getkind(p)); |
| 78 | capkind(getkind(p)), getoff(p), p->i.key); | 78 | printf(" (size = %d) (idx = %d)", getoff(p), p->i.key); |
| 79 | break; | 79 | break; |
| 80 | } | 80 | } |
| 81 | case IOpenCapture: { | 81 | case IOpenCapture: { |
| 82 | printf("%s (idx = %d)", capkind(getkind(p)), p->i.key); | 82 | printcapkind(getkind(p)); |
| 83 | printf(" (idx = %d)", p->i.key); | ||
| 83 | break; | 84 | break; |
| 84 | } | 85 | } |
| 85 | case ISet: { | 86 | case ISet: { |
| @@ -133,8 +134,8 @@ void printpatt (Instruction *p, int n) { | |||
| 133 | 134 | ||
| 134 | #if defined(LPEG_DEBUG) | 135 | #if defined(LPEG_DEBUG) |
| 135 | static void printcap (Capture *cap) { | 136 | static void printcap (Capture *cap) { |
| 136 | printf("%s (idx: %d - size: %d) -> %p\n", | 137 | printcapkind(cap->kind); |
| 137 | capkind(cap->kind), cap->idx, cap->siz, cap->s); | 138 | printf(" (idx: %d - size: %d) -> %p\n", cap->idx, cap->siz, cap->s); |
| 138 | } | 139 | } |
| 139 | 140 | ||
| 140 | 141 | ||
| @@ -187,8 +188,7 @@ void printtree (TTree *tree, int ident) { | |||
| 187 | break; | 188 | break; |
| 188 | } | 189 | } |
| 189 | case TOpenCall: case TCall: { | 190 | case TOpenCall: case TCall: { |
| 190 | assert(sib2(tree)->tag == TRule); | 191 | printf(" key: %d\n", tree->key); |
| 191 | printf(" key: %d (rule: %d)\n", tree->key, sib2(tree)->cap); | ||
| 192 | break; | 192 | break; |
| 193 | } | 193 | } |
| 194 | case TBehind: { | 194 | case TBehind: { |
| @@ -197,7 +197,7 @@ void printtree (TTree *tree, int ident) { | |||
| 197 | break; | 197 | break; |
| 198 | } | 198 | } |
| 199 | case TCapture: { | 199 | case TCapture: { |
| 200 | printf(" kind: '%s' key: %d\n", capkind(tree->cap), tree->key); | 200 | printf(" cap: %d key: %d n: %d\n", tree->cap, tree->key, tree->u.n); |
| 201 | printtree(sib1(tree), ident + 2); | 201 | printtree(sib1(tree), ident + 2); |
| 202 | break; | 202 | break; |
| 203 | } | 203 | } |
| @@ -1,5 +1,5 @@ | |||
| 1 | /* | 1 | /* |
| 2 | ** $Id: lptree.c,v 1.22 2016/09/13 18:10:22 roberto Exp $ | 2 | ** $Id: lptree.c,v 1.21 2015/09/28 17:01:25 roberto Exp $ |
| 3 | ** Copyright 2013, Lua.org & PUC-Rio (see 'lpeg.html' for license) | 3 | ** Copyright 2013, Lua.org & PUC-Rio (see 'lpeg.html' for license) |
| 4 | */ | 4 | */ |
| 5 | 5 | ||
| @@ -66,7 +66,7 @@ static void fixonecall (lua_State *L, int postable, TTree *g, TTree *t, byte tag | |||
| 66 | t->tag = TCall; | 66 | t->tag = TCall; |
| 67 | t->u.ps = n - (t - g); /* position relative to node */ | 67 | t->u.ps = n - (t - g); /* position relative to node */ |
| 68 | assert(sib2(t)->tag == TRule); | 68 | assert(sib2(t)->tag == TRule); |
| 69 | sib2(t)->key = t->key; /* fix rule's key */ | 69 | sib2(t)->key = t->key; |
| 70 | } else if (n != 0) { /* labeled failure */ | 70 | } else if (n != 0) { /* labeled failure */ |
| 71 | t->u.ps = n - (t - g); /* position relative to node */ | 71 | t->u.ps = n - (t - g); /* position relative to node */ |
| 72 | } | 72 | } |
| @@ -969,7 +969,7 @@ static void buildgrammar (lua_State *L, TTree *grammar, int frule, int n) { | |||
| 969 | int rulesize; | 969 | int rulesize; |
| 970 | TTree *rn = gettree(L, ridx, &rulesize); | 970 | TTree *rn = gettree(L, ridx, &rulesize); |
| 971 | nd->tag = TRule; | 971 | nd->tag = TRule; |
| 972 | nd->key = 0; /* will be fixed when rule is used */ | 972 | nd->key = 0; |
| 973 | nd->cap = i; /* rule number */ | 973 | nd->cap = i; /* rule number */ |
| 974 | nd->u.ps = rulesize + 1; /* point to next rule */ | 974 | nd->u.ps = rulesize + 1; /* point to next rule */ |
| 975 | memcpy(sib1(nd), rn, rulesize * sizeof(TTree)); /* copy rule */ | 975 | memcpy(sib1(nd), rn, rulesize * sizeof(TTree)); /* copy rule */ |
| @@ -1003,11 +1003,6 @@ static int checkloops (TTree *tree) { | |||
| 1003 | } | 1003 | } |
| 1004 | 1004 | ||
| 1005 | 1005 | ||
| 1006 | /* | ||
| 1007 | ** Give appropriate error message for 'verifyrule'. If a rule appears | ||
| 1008 | ** twice in 'passed', there is path from it back to itself without | ||
| 1009 | ** advancing the subject. | ||
| 1010 | */ | ||
| 1011 | static int verifyerror (lua_State *L, int *passed, int npassed) { | 1006 | static int verifyerror (lua_State *L, int *passed, int npassed) { |
| 1012 | int i, j; | 1007 | int i, j; |
| 1013 | for (i = npassed - 1; i >= 0; i--) { /* search for a repetition */ | 1008 | for (i = npassed - 1; i >= 0; i--) { /* search for a repetition */ |
| @@ -1029,8 +1024,6 @@ static int verifyerror (lua_State *L, int *passed, int npassed) { | |||
| 1029 | ** is only relevant if the first is nullable. | 1024 | ** is only relevant if the first is nullable. |
| 1030 | ** Parameter 'nb' works as an accumulator, to allow tail calls in | 1025 | ** Parameter 'nb' works as an accumulator, to allow tail calls in |
| 1031 | ** choices. ('nb' true makes function returns true.) | 1026 | ** choices. ('nb' true makes function returns true.) |
| 1032 | ** Parameter 'passed' is a list of already visited rules, 'npassed' | ||
| 1033 | ** counts the elements in 'passed'. | ||
| 1034 | ** Assume ktable at the top of the stack. | 1027 | ** Assume ktable at the top of the stack. |
| 1035 | */ | 1028 | */ |
| 1036 | static int verifyrule (lua_State *L, TTree *tree, int *passed, int npassed, | 1029 | static int verifyrule (lua_State *L, TTree *tree, int *passed, int npassed, |
| @@ -1,5 +1,5 @@ | |||
| 1 | /* | 1 | /* |
| 2 | ** $Id: lptree.h,v 1.3 2016/09/13 18:07:51 roberto Exp $ | 2 | ** $Id: lptree.h,v 1.2 2013/03/24 13:51:12 roberto Exp $ |
| 3 | */ | 3 | */ |
| 4 | 4 | ||
| 5 | #if !defined(lptree_h) | 5 | #if !defined(lptree_h) |
| @@ -13,45 +13,40 @@ | |||
| 13 | ** types of trees | 13 | ** types of trees |
| 14 | */ | 14 | */ |
| 15 | typedef enum TTag { | 15 | typedef enum TTag { |
| 16 | TChar = 0, /* 'n' = char */ | 16 | TChar = 0, TSet, TAny, /* standard PEG elements */ |
| 17 | TSet, /* the set is stored in next CHARSETSIZE bytes */ | 17 | TTrue, TFalse, |
| 18 | TAny, | 18 | TRep, |
| 19 | TTrue, | 19 | TSeq, TChoice, |
| 20 | TFalse, | 20 | TNot, TAnd, |
| 21 | TRep, /* 'sib1'* */ | 21 | TCall, |
| 22 | TSeq, /* 'sib1' 'sib2' */ | 22 | TOpenCall, |
| 23 | TChoice, /* 'sib1' / 'sib2' */ | 23 | TRule, /* sib1 is rule's pattern, sib2 is 'next' rule */ |
| 24 | TNot, /* !'sib1' */ | 24 | TGrammar, /* sib1 is initial (and first) rule */ |
| 25 | TAnd, /* &'sib1' */ | 25 | TBehind, /* match behind */ |
| 26 | TCall, /* ktable[key] is rule's key; 'sib2' is rule being called */ | 26 | TCapture, /* regular capture */ |
| 27 | TOpenCall, /* ktable[key] is rule's key */ | 27 | TRunTime, /* run-time capture */ |
| 28 | TRule, /* ktable[key] is rule's key (but key == 0 for unused rules); | ||
| 29 | 'sib1' is rule's pattern; | ||
| 30 | 'sib2' is next rule; 'cap' is rule's sequential number */ | ||
| 31 | TGrammar, /* 'sib1' is initial (and first) rule */ | ||
| 32 | TBehind, /* 'sib1' is pattern, 'n' is how much to go back */ | ||
| 33 | TCapture, /* captures: 'cap' is kind of capture (enum 'CapKind'); | ||
| 34 | ktable[key] is Lua value associated with capture; | ||
| 35 | 'sib1' is capture body */ | ||
| 36 | TRunTime, /* run-time capture: 'key' is Lua function; | ||
| 37 | 'sib1' is capture body */ | ||
| 38 | TThrow, /* labeled failure: ktable[key] is label's name */ | 28 | TThrow, /* labeled failure: ktable[key] is label's name */ |
| 39 | } TTag; | 29 | } TTag; |
| 40 | 30 | ||
| 31 | /* number of siblings for each tree */ | ||
| 32 | extern const byte numsiblings[]; | ||
| 33 | |||
| 41 | 34 | ||
| 42 | /* | 35 | /* |
| 43 | ** Tree trees | 36 | ** Tree trees |
| 44 | ** The first child of a tree (if there is one) is immediately after | 37 | ** The first sibling of a tree (if there is one) is immediately after |
| 45 | ** the tree. A reference to a second child (ps) is its position | 38 | ** the tree. A reference to a second sibling (ps) is its position |
| 46 | ** relative to the position of the tree itself. | 39 | ** relative to the position of the tree itself. A key in ktable |
| 40 | ** uses the (unique) address of the original tree that created that | ||
| 41 | ** entry. NULL means no data. | ||
| 47 | */ | 42 | */ |
| 48 | typedef struct TTree { | 43 | typedef struct TTree { |
| 49 | byte tag; | 44 | byte tag; |
| 50 | byte cap; /* kind of capture (if it is a capture) */ | 45 | byte cap; /* kind of capture (if it is a capture) */ |
| 51 | unsigned short key; /* key in ktable for Lua data (0 if no key) */ | 46 | unsigned short key; /* key in ktable for Lua data (0 if no key) */ |
| 52 | union { | 47 | union { |
| 48 | int ps; /* occasional second sibling */ | ||
| 53 | int n; /* occasional counter */ | 49 | int n; /* occasional counter */ |
| 54 | int ps; /* occasional second child */ | ||
| 55 | } u; | 50 | } u; |
| 56 | } TTree; | 51 | } TTree; |
| 57 | 52 | ||
| @@ -67,10 +62,10 @@ typedef struct Pattern { | |||
| 67 | } Pattern; | 62 | } Pattern; |
| 68 | 63 | ||
| 69 | 64 | ||
| 70 | /* number of children for each tree */ | 65 | /* number of siblings for each tree */ |
| 71 | extern const byte numsiblings[]; | 66 | extern const byte numsiblings[]; |
| 72 | 67 | ||
| 73 | /* access to children */ | 68 | /* access to siblings */ |
| 74 | #define sib1(t) ((t) + 1) | 69 | #define sib1(t) ((t) + 1) |
| 75 | #define sib2(t) ((t) + (t)->u.ps) | 70 | #define sib2(t) ((t) + (t)->u.ps) |
| 76 | 71 | ||
| @@ -1,7 +1,7 @@ | |||
| 1 | /* | 1 | /* |
| 2 | ** $Id: lptypes.h,v 1.16 2017/01/13 13:33:17 roberto Exp $ | 2 | ** $Id: lptypes.h,v 1.14 2015/09/28 17:17:41 roberto Exp $ |
| 3 | ** LPeg - PEG pattern matching for Lua | 3 | ** LPeg - PEG pattern matching for Lua |
| 4 | ** Copyright 2007-2017, Lua.org & PUC-Rio (see 'lpeg.html' for license) | 4 | ** Copyright 2007-2015, Lua.org & PUC-Rio (see 'lpeg.html' for license) |
| 5 | ** written by Roberto Ierusalimschy | 5 | ** written by Roberto Ierusalimschy |
| 6 | */ | 6 | */ |
| 7 | 7 | ||
| @@ -19,7 +19,7 @@ | |||
| 19 | #include "lua.h" | 19 | #include "lua.h" |
| 20 | 20 | ||
| 21 | 21 | ||
| 22 | #define VERSION "1.5.1" | 22 | #define VERSION "1.5.0" |
| 23 | 23 | ||
| 24 | 24 | ||
| 25 | #define PATTERN_T "lpeg-pattern" | 25 | #define PATTERN_T "lpeg-pattern" |
| @@ -55,9 +55,9 @@ | |||
| 55 | #endif | 55 | #endif |
| 56 | 56 | ||
| 57 | 57 | ||
| 58 | /* maximum number of rules in a grammar (limited by 'unsigned char') */ | 58 | /* maximum number of rules in a grammar */ |
| 59 | #if !defined(MAXRULES) | 59 | #if !defined(MAXRULES) |
| 60 | #define MAXRULES 250 | 60 | #define MAXRULES 1000 |
| 61 | #endif | 61 | #endif |
| 62 | 62 | ||
| 63 | 63 | ||
| @@ -1,5 +1,5 @@ | |||
| 1 | /* | 1 | /* |
| 2 | ** $Id: lpvm.c,v 1.9 2016/06/03 20:11:18 roberto Exp $ | 2 | ** $Id: lpvm.c,v 1.6 2015/09/28 17:01:25 roberto Exp $ |
| 3 | ** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license) | 3 | ** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license) |
| 4 | */ | 4 | */ |
| 5 | 5 | ||
| @@ -47,16 +47,14 @@ typedef struct Stack { | |||
| 47 | 47 | ||
| 48 | 48 | ||
| 49 | /* | 49 | /* |
| 50 | ** Make the size of the array of captures 'cap' twice as large as needed | 50 | ** Double the size of the array of captures |
| 51 | ** (which is 'captop'). ('n' is the number of new elements.) | ||
| 52 | */ | 51 | */ |
| 53 | static Capture *doublecap (lua_State *L, Capture *cap, int captop, | 52 | static Capture *doublecap (lua_State *L, Capture *cap, int captop, int ptop) { |
| 54 | int n, int ptop) { | ||
| 55 | Capture *newc; | 53 | Capture *newc; |
| 56 | if (captop >= INT_MAX/((int)sizeof(Capture) * 2)) | 54 | if (captop >= INT_MAX/((int)sizeof(Capture) * 2)) |
| 57 | luaL_error(L, "too many captures"); | 55 | luaL_error(L, "too many captures"); |
| 58 | newc = (Capture *)lua_newuserdata(L, captop * 2 * sizeof(Capture)); | 56 | newc = (Capture *)lua_newuserdata(L, captop * 2 * sizeof(Capture)); |
| 59 | memcpy(newc, cap, (captop - n) * sizeof(Capture)); | 57 | memcpy(newc, cap, captop * sizeof(Capture)); |
| 60 | lua_replace(L, caplistidx(ptop)); | 58 | lua_replace(L, caplistidx(ptop)); |
| 61 | return newc; | 59 | return newc; |
| 62 | } | 60 | } |
| @@ -117,8 +115,8 @@ static int resdyncaptures (lua_State *L, int fr, int curr, int limit) { | |||
| 117 | */ | 115 | */ |
| 118 | static void adddyncaptures (const char *s, Capture *base, int n, int fd) { | 116 | static void adddyncaptures (const char *s, Capture *base, int n, int fd) { |
| 119 | int i; | 117 | int i; |
| 120 | base[0].kind = Cgroup; /* create group capture */ | 118 | /* Cgroup capture is already there */ |
| 121 | base[0].siz = 0; | 119 | assert(base[0].kind == Cgroup && base[0].siz == 0); |
| 122 | base[0].idx = 0; /* make it an anonymous group */ | 120 | base[0].idx = 0; /* make it an anonymous group */ |
| 123 | for (i = 1; i <= n; i++) { /* add runtime captures */ | 121 | for (i = 1; i <= n; i++) { /* add runtime captures */ |
| 124 | base[i].kind = Cruntime; | 122 | base[i].kind = Cruntime; |
| @@ -163,11 +161,10 @@ const char *match (lua_State *L, const char *o, const char *s, const char *e, | |||
| 163 | lua_pushlightuserdata(L, stackbase); | 161 | lua_pushlightuserdata(L, stackbase); |
| 164 | for (;;) { | 162 | for (;;) { |
| 165 | #if defined(DEBUG) | 163 | #if defined(DEBUG) |
| 166 | printf("-------------------------------------\n"); | ||
| 167 | printcaplist(capture, capture + captop); | ||
| 168 | printf("s: |%s| stck:%d, dyncaps:%d, caps:%d ", | 164 | printf("s: |%s| stck:%d, dyncaps:%d, caps:%d ", |
| 169 | s, (int)(stack - getstackbase(L, ptop)), ndyncap, captop); | 165 | s, stack - getstackbase(L, ptop), ndyncap, captop); |
| 170 | printinst(op, p); | 166 | printinst(op, p); |
| 167 | printcaplist(capture, capture + captop); | ||
| 171 | #endif | 168 | #endif |
| 172 | assert(stackidx(ptop) + ndyncap == lua_gettop(L) && ndyncap <= captop); | 169 | assert(stackidx(ptop) + ndyncap == lua_gettop(L) && ndyncap <= captop); |
| 173 | assert(insidepred == INPRED || insidepred == OUTPRED); | 170 | assert(insidepred == INPRED || insidepred == OUTPRED); |
| @@ -365,9 +362,6 @@ const char *match (lua_State *L, const char *o, const char *s, const char *e, | |||
| 365 | assert((insidepred == INPRED && stack->labenv == OUTPRED) || insidepred == stack->labenv); | 362 | assert((insidepred == INPRED && stack->labenv == OUTPRED) || insidepred == stack->labenv); |
| 366 | insidepred = stack->labenv; /* labeled failure */ | 363 | insidepred = stack->labenv; /* labeled failure */ |
| 367 | p = stack->p; | 364 | p = stack->p; |
| 368 | #if defined(DEBUG) | ||
| 369 | printf("**FAIL**\n"); | ||
| 370 | #endif | ||
| 371 | continue; | 365 | continue; |
| 372 | } | 366 | } |
| 373 | case ICloseRunTime: { | 367 | case ICloseRunTime: { |
| @@ -377,7 +371,7 @@ const char *match (lua_State *L, const char *o, const char *s, const char *e, | |||
| 377 | cs.s = o; cs.L = L; cs.ocap = capture; cs.ptop = ptop; | 371 | cs.s = o; cs.L = L; cs.ocap = capture; cs.ptop = ptop; |
| 378 | n = runtimecap(&cs, capture + captop, s, &rem); /* call function */ | 372 | n = runtimecap(&cs, capture + captop, s, &rem); /* call function */ |
| 379 | captop -= n; /* remove nested captures */ | 373 | captop -= n; /* remove nested captures */ |
| 380 | ndyncap -= rem; /* update number of dynamic captures */ | 374 | ndyncap += n - rem; /* update number of dynamic captures */ |
| 381 | fr -= rem; /* 'rem' items were popped from Lua stack */ | 375 | fr -= rem; /* 'rem' items were popped from Lua stack */ |
| 382 | res = resdyncaptures(L, fr, s - o, e - o); /* get result */ | 376 | res = resdyncaptures(L, fr, s - o, e - o); /* get result */ |
| 383 | if (res == -1) { /* fail? */ | 377 | if (res == -1) { /* fail? */ |
| @@ -389,10 +383,8 @@ const char *match (lua_State *L, const char *o, const char *s, const char *e, | |||
| 389 | n = lua_gettop(L) - fr + 1; /* number of new captures */ | 383 | n = lua_gettop(L) - fr + 1; /* number of new captures */ |
| 390 | ndyncap += n; /* update number of dynamic captures */ | 384 | ndyncap += n; /* update number of dynamic captures */ |
| 391 | if (n > 0) { /* any new capture? */ | 385 | if (n > 0) { /* any new capture? */ |
| 392 | if (fr + n >= SHRT_MAX) | ||
| 393 | luaL_error(L, "too many results in match-time capture"); | ||
| 394 | if ((captop += n + 2) >= capsize) { | 386 | if ((captop += n + 2) >= capsize) { |
| 395 | capture = doublecap(L, capture, captop, n + 2, ptop); | 387 | capture = doublecap(L, capture, captop, ptop); |
| 396 | capsize = 2 * captop; | 388 | capsize = 2 * captop; |
| 397 | } | 389 | } |
| 398 | /* add new captures to 'capture' list */ | 390 | /* add new captures to 'capture' list */ |
| @@ -429,7 +421,7 @@ const char *match (lua_State *L, const char *o, const char *s, const char *e, | |||
| 429 | capture[captop].idx = p->i.key; | 421 | capture[captop].idx = p->i.key; |
| 430 | capture[captop].kind = getkind(p); | 422 | capture[captop].kind = getkind(p); |
| 431 | if (++captop >= capsize) { | 423 | if (++captop >= capsize) { |
| 432 | capture = doublecap(L, capture, captop, 0, ptop); | 424 | capture = doublecap(L, capture, captop, ptop); |
| 433 | capsize = 2 * captop; | 425 | capsize = 2 * captop; |
| 434 | } | 426 | } |
| 435 | p++; | 427 | p++; |
| @@ -1,6 +1,6 @@ | |||
| 1 | #!/usr/bin/env lua | 1 | #!/usr/bin/env lua |
| 2 | 2 | ||
| 3 | -- $Id: test.lua,v 1.112 2017/01/14 18:55:22 roberto Exp $ | 3 | -- $Id: test.lua,v 1.109 2015/09/28 17:01:25 roberto Exp $ |
| 4 | 4 | ||
| 5 | -- require"strict" -- just to be pedantic | 5 | -- require"strict" -- just to be pedantic |
| 6 | 6 | ||
| @@ -202,14 +202,6 @@ do | |||
| 202 | end | 202 | end |
| 203 | 203 | ||
| 204 | 204 | ||
| 205 | -- bug: loop in 'hascaptures' | ||
| 206 | do | ||
| 207 | local p = m.C(-m.P{m.P'x' * m.V(1) + m.P'y'}) | ||
| 208 | assert(p:match("xxx") == "") | ||
| 209 | end | ||
| 210 | |||
| 211 | |||
| 212 | |||
| 213 | -- test for small capture boundary | 205 | -- test for small capture boundary |
| 214 | for i = 250,260 do | 206 | for i = 250,260 do |
| 215 | assert(#m.match(m.C(i), string.rep('a', i)) == i) | 207 | assert(#m.match(m.C(i), string.rep('a', i)) == i) |
| @@ -525,27 +517,6 @@ assert(m.match(m.Cs((#((#m.P"a")/"") * 1 + m.P(1)/".")^0), "aloal") == "a..a.") | |||
| 525 | assert(m.match(m.Cs((- -m.P("a") * 1 + m.P(1)/".")^0), "aloal") == "a..a.") | 517 | assert(m.match(m.Cs((- -m.P("a") * 1 + m.P(1)/".")^0), "aloal") == "a..a.") |
| 526 | assert(m.match(m.Cs((-((-m.P"a")/"") * 1 + m.P(1)/".")^0), "aloal") == "a..a.") | 518 | assert(m.match(m.Cs((-((-m.P"a")/"") * 1 + m.P(1)/".")^0), "aloal") == "a..a.") |
| 527 | 519 | ||
| 528 | |||
| 529 | -- fixed length | ||
| 530 | do | ||
| 531 | -- 'and' predicate using fixed length | ||
| 532 | local p = m.C(#("a" * (m.P("bd") + "cd")) * 2) | ||
| 533 | assert(p:match("acd") == "ac") | ||
| 534 | |||
| 535 | p = #m.P{ "a" * m.V(2), m.P"b" } * 2 | ||
| 536 | assert(p:match("abc") == 3) | ||
| 537 | |||
| 538 | p = #(m.P"abc" * m.B"c") | ||
| 539 | assert(p:match("abc") == 1 and not p:match("ab")) | ||
| 540 | |||
| 541 | p = m.P{ "a" * m.V(2), m.P"b"^1 } | ||
| 542 | checkerr("pattern may not have fixed length", m.B, p) | ||
| 543 | |||
| 544 | p = "abc" * (m.P"b"^1 + m.P"a"^0) | ||
| 545 | checkerr("pattern may not have fixed length", m.B, p) | ||
| 546 | end | ||
| 547 | |||
| 548 | |||
| 549 | p = -m.P'a' * m.Cc(1) + -m.P'b' * m.Cc(2) + -m.P'c' * m.Cc(3) | 520 | p = -m.P'a' * m.Cc(1) + -m.P'b' * m.Cc(2) + -m.P'c' * m.Cc(3) |
| 550 | assert(p:match('a') == 2 and p:match('') == 1 and p:match('b') == 1) | 521 | assert(p:match('a') == 2 and p:match('') == 1 and p:match('b') == 1) |
| 551 | 522 | ||
| @@ -1127,32 +1098,6 @@ do | |||
| 1127 | assert(c == 11) | 1098 | assert(c == 11) |
| 1128 | end | 1099 | end |
| 1129 | 1100 | ||
| 1130 | |||
| 1131 | -- Return a match-time capture that returns 'n' captures | ||
| 1132 | local function manyCmt (n) | ||
| 1133 | return m.Cmt("a", function () | ||
| 1134 | local a = {}; for i = 1, n do a[i] = n - i end | ||
| 1135 | return true, unpack(a) | ||
| 1136 | end) | ||
| 1137 | end | ||
| 1138 | |||
| 1139 | -- bug in 1.0: failed match-time that used previous match-time results | ||
| 1140 | do | ||
| 1141 | local x | ||
| 1142 | local function aux (...) x = #{...}; return false end | ||
| 1143 | local res = {m.match(m.Cmt(manyCmt(20), aux) + manyCmt(10), "a")} | ||
| 1144 | assert(#res == 10 and res[1] == 9 and res[10] == 0) | ||
| 1145 | end | ||
| 1146 | |||
| 1147 | |||
| 1148 | -- bug in 1.0: problems with math-times returning too many captures | ||
| 1149 | do | ||
| 1150 | local lim = 2^11 - 10 | ||
| 1151 | local res = {m.match(manyCmt(lim), "a")} | ||
| 1152 | assert(#res == lim and res[1] == lim - 1 and res[lim] == 0) | ||
| 1153 | checkerr("too many", m.match, manyCmt(2^15), "a") | ||
| 1154 | end | ||
| 1155 | |||
| 1156 | p = (m.P(function () return true, "a" end) * 'a' | 1101 | p = (m.P(function () return true, "a" end) * 'a' |
| 1157 | + m.P(function (s, i) return i, "aa", 20 end) * 'b' | 1102 | + m.P(function (s, i) return i, "aa", 20 end) * 'b' |
| 1158 | + m.P(function (s,i) if i <= #s then return i, "aaa" end end) * 1)^0 | 1103 | + m.P(function (s,i) if i <= #s then return i, "aaa" end end) * 1)^0 |
