From 7668d9076da6e2ae9e220eafb5c6ee1e933c088d Mon Sep 17 00:00:00 2001 From: Alexey Romanoff Date: Sun, 15 Aug 2021 15:17:28 +0300 Subject: change prefix of source files from 'lp' to 'lpl' To eliminate conflicts with the original LPeg: > $ lua -e "require 'lpeg'; require 'lpeglabel'; for _ in (require'lxsh').lexers.lua.gmatch('1') do end" lua: lpvm.c:347: match: Assertion `stack > getstackbase(L, ptop) && (stack - 1)->s != NULL' failed. Aborted --- README.md | 2 +- lpcap.c | 555 ------------------------ lpcap.h | 57 --- lpcode.c | 1081 ---------------------------------------------- lpcode.h | 40 -- lplcap.c | 555 ++++++++++++++++++++++++ lplcap.h | 57 +++ lplcode.c | 1081 ++++++++++++++++++++++++++++++++++++++++++++++ lplcode.h | 40 ++ lplprint.c | 275 ++++++++++++ lplprint.h | 36 ++ lpltree.c | 1403 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ lpltree.h | 88 ++++ lpltypes.h | 157 +++++++ lplvm.c | 499 +++++++++++++++++++++ lplvm.h | 66 +++ lpprint.c | 275 ------------ lpprint.h | 36 -- lptree.c | 1403 ------------------------------------------------------------ lptree.h | 88 ---- lptypes.h | 157 ------- lpvm.c | 499 --------------------- lpvm.h | 66 --- makefile | 12 +- 24 files changed, 4264 insertions(+), 4264 deletions(-) delete mode 100644 lpcap.c delete mode 100644 lpcap.h delete mode 100644 lpcode.c delete mode 100644 lpcode.h create mode 100644 lplcap.c create mode 100644 lplcap.h create mode 100644 lplcode.c create mode 100644 lplcode.h create mode 100644 lplprint.c create mode 100644 lplprint.h create mode 100644 lpltree.c create mode 100644 lpltree.h create mode 100644 lpltypes.h create mode 100644 lplvm.c create mode 100644 lplvm.h delete mode 100644 lpprint.c delete mode 100644 lpprint.h delete mode 100644 lptree.c delete mode 100644 lptree.h delete mode 100644 lptypes.h delete mode 100644 lpvm.c delete mode 100644 lpvm.h diff --git a/README.md b/README.md index c9cbb4a..d098fb6 100644 --- a/README.md +++ b/README.md @@ -486,5 +486,5 @@ its initial rule. In case your grammar has many regular and recovery rules, you may get an error message such as grammar: has too many rules. -In this case, we need to change *MAXRULES* in `lptypes.h`. +In this case, we need to change *MAXRULES* in `lpltypes.h`. diff --git a/lpcap.c b/lpcap.c deleted file mode 100644 index b332fde..0000000 --- a/lpcap.c +++ /dev/null @@ -1,555 +0,0 @@ -/* -** $Id: lpcap.c $ -** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license) -*/ - -#include "lua.h" -#include "lauxlib.h" - -#include "lpcap.h" -#include "lptypes.h" - - -#define captype(cap) ((cap)->kind) - -#define isclosecap(cap) (captype(cap) == Cclose) - -#define closeaddr(c) ((c)->s + (c)->siz - 1) - -#define isfullcap(cap) ((cap)->siz != 0) - -#define getfromktable(cs,v) lua_rawgeti((cs)->L, ktableidx((cs)->ptop), v) - -#define pushluaval(cs) getfromktable(cs, (cs)->cap->idx) - - - -/* -** Put at the cache for Lua values the value indexed by 'v' in ktable -** of the running pattern (if it is not there yet); returns its index. -*/ -static int updatecache (CapState *cs, int v) { - int idx = cs->ptop + 1; /* stack index of cache for Lua values */ - if (v != cs->valuecached) { /* not there? */ - getfromktable(cs, v); /* get value from 'ktable' */ - lua_replace(cs->L, idx); /* put it at reserved stack position */ - cs->valuecached = v; /* keep track of what is there */ - } - return idx; -} - - -static int pushcapture (CapState *cs); - - -/* -** Goes back in a list of captures looking for an open capture -** corresponding to a close -*/ -static Capture *findopen (Capture *cap) { - int n = 0; /* number of closes waiting an open */ - for (;;) { - cap--; - if (isclosecap(cap)) n++; /* one more open to skip */ - else if (!isfullcap(cap)) - if (n-- == 0) return cap; - } -} - - -/* -** Go to the next capture -*/ -static void nextcap (CapState *cs) { - Capture *cap = cs->cap; - if (!isfullcap(cap)) { /* not a single capture? */ - int n = 0; /* number of opens waiting a close */ - for (;;) { /* look for corresponding close */ - cap++; - if (isclosecap(cap)) { - if (n-- == 0) break; - } - else if (!isfullcap(cap)) n++; - } - } - cs->cap = cap + 1; /* + 1 to skip last close (or entire single capture) */ -} - - -/* -** Push on the Lua stack all values generated by nested captures inside -** the current capture. Returns number of values pushed. 'addextra' -** makes it push the entire match after all captured values. The -** entire match is pushed also if there are no other nested values, -** so the function never returns zero. -*/ -static int pushnestedvalues (CapState *cs, int addextra) { - Capture *co = cs->cap; - if (isfullcap(cs->cap++)) { /* no nested captures? */ - lua_pushlstring(cs->L, co->s, co->siz - 1); /* push whole match */ - return 1; /* that is it */ - } - else { - int n = 0; - while (!isclosecap(cs->cap)) /* repeat for all nested patterns */ - n += pushcapture(cs); - if (addextra || n == 0) { /* need extra? */ - lua_pushlstring(cs->L, co->s, cs->cap->s - co->s); /* push whole match */ - n++; - } - cs->cap++; /* skip close entry */ - return n; - } -} - - -/* -** Push only the first value generated by nested captures -*/ -static void pushonenestedvalue (CapState *cs) { - int n = pushnestedvalues(cs, 0); - if (n > 1) - lua_pop(cs->L, n - 1); /* pop extra values */ -} - - -/* -** Try to find a named group capture with the name given at the top of -** the stack; goes backward from 'cap'. -*/ -static Capture *findback (CapState *cs, Capture *cap) { - lua_State *L = cs->L; - while (cap-- > cs->ocap) { /* repeat until end of list */ - if (isclosecap(cap)) - cap = findopen(cap); /* skip nested captures */ - else if (!isfullcap(cap)) - continue; /* opening an enclosing capture: skip and get previous */ - if (captype(cap) == Cgroup) { - getfromktable(cs, cap->idx); /* get group name */ - if (lp_equal(L, -2, -1)) { /* right group? */ - lua_pop(L, 2); /* remove reference name and group name */ - return cap; - } - else lua_pop(L, 1); /* remove group name */ - } - } - luaL_error(L, "back reference '%s' not found", lua_tostring(L, -1)); - return NULL; /* to avoid warnings */ -} - - -/* -** Back-reference capture. Return number of values pushed. -*/ -static int backrefcap (CapState *cs) { - int n; - Capture *curr = cs->cap; - pushluaval(cs); /* reference name */ - cs->cap = findback(cs, curr); /* find corresponding group */ - n = pushnestedvalues(cs, 0); /* push group's values */ - cs->cap = curr + 1; - return n; -} - - -/* -** Table capture: creates a new table and populates it with nested -** captures. -*/ -static int tablecap (CapState *cs) { - lua_State *L = cs->L; - int n = 0; - lua_newtable(L); - if (isfullcap(cs->cap++)) - return 1; /* table is empty */ - while (!isclosecap(cs->cap)) { - if (captype(cs->cap) == Cgroup && cs->cap->idx != 0) { /* named group? */ - pushluaval(cs); /* push group name */ - pushonenestedvalue(cs); - lua_settable(L, -3); - } - else { /* not a named group */ - int i; - int k = pushcapture(cs); - for (i = k; i > 0; i--) /* store all values into table */ - lua_rawseti(L, -(i + 1), n + i); - n += k; - } - } - cs->cap++; /* skip close entry */ - return 1; /* number of values pushed (only the table) */ -} - - -/* -** Table-query capture -*/ -static int querycap (CapState *cs) { - int idx = cs->cap->idx; - pushonenestedvalue(cs); /* get nested capture */ - lua_gettable(cs->L, updatecache(cs, idx)); /* query cap. value at table */ - if (!lua_isnil(cs->L, -1)) - return 1; - else { /* no value */ - lua_pop(cs->L, 1); /* remove nil */ - return 0; - } -} - - -/* -** Fold capture -*/ -static int foldcap (CapState *cs) { - int n; - lua_State *L = cs->L; - int idx = cs->cap->idx; - if (isfullcap(cs->cap++) || /* no nested captures? */ - isclosecap(cs->cap) || /* no nested captures (large subject)? */ - (n = pushcapture(cs)) == 0) /* nested captures with no values? */ - return luaL_error(L, "no initial value for fold capture"); - if (n > 1) - lua_pop(L, n - 1); /* leave only one result for accumulator */ - while (!isclosecap(cs->cap)) { - lua_pushvalue(L, updatecache(cs, idx)); /* get folding function */ - lua_insert(L, -2); /* put it before accumulator */ - n = pushcapture(cs); /* get next capture's values */ - lua_call(L, n + 1, 1); /* call folding function */ - } - cs->cap++; /* skip close entry */ - return 1; /* only accumulator left on the stack */ -} - - -/* -** Function capture -*/ -static int functioncap (CapState *cs) { - int n; - int top = lua_gettop(cs->L); - pushluaval(cs); /* push function */ - n = pushnestedvalues(cs, 0); /* push nested captures */ - lua_call(cs->L, n, LUA_MULTRET); /* call function */ - return lua_gettop(cs->L) - top; /* return function's results */ -} - - -/* -** Select capture -*/ -static int numcap (CapState *cs) { - int idx = cs->cap->idx; /* value to select */ - if (idx == 0) { /* no values? */ - nextcap(cs); /* skip entire capture */ - return 0; /* no value produced */ - } - else { - int n = pushnestedvalues(cs, 0); - if (n < idx) /* invalid index? */ - return luaL_error(cs->L, "no capture '%d'", idx); - else { - lua_pushvalue(cs->L, -(n - idx + 1)); /* get selected capture */ - lua_replace(cs->L, -(n + 1)); /* put it in place of 1st capture */ - lua_pop(cs->L, n - 1); /* remove other captures */ - return 1; - } - } -} - - -/* -** Return the stack index of the first runtime capture in the given -** list of captures (or zero if no runtime captures) -*/ -int finddyncap (Capture *cap, Capture *last) { - for (; cap < last; cap++) { - if (cap->kind == Cruntime) - return cap->idx; /* stack position of first capture */ - } - return 0; /* no dynamic captures in this segment */ -} - - -/* -** Calls a runtime capture. Returns number of captures "removed" by the -** call, that is, those inside the group capture. Captures to be added -** are on the Lua stack. -*/ -int runtimecap (CapState *cs, Capture *close, const char *s, int *rem) { - int n, id; - lua_State *L = cs->L; - int otop = lua_gettop(L); - Capture *open = findopen(close); /* get open group capture */ - assert(captype(open) == Cgroup); - id = finddyncap(open, close); /* get first dynamic capture argument */ - close->kind = Cclose; /* closes the group */ - close->s = s; - cs->cap = open; cs->valuecached = 0; /* prepare capture state */ - luaL_checkstack(L, 4, "too many runtime captures"); - pushluaval(cs); /* push function to be called */ - lua_pushvalue(L, SUBJIDX); /* push original subject */ - lua_pushinteger(L, s - cs->s + 1); /* push current position */ - n = pushnestedvalues(cs, 0); /* push nested captures */ - lua_call(L, n + 2, LUA_MULTRET); /* call dynamic function */ - if (id > 0) { /* are there old dynamic captures to be removed? */ - int i; - for (i = id; i <= otop; i++) - lua_remove(L, id); /* remove old dynamic captures */ - *rem = otop - id + 1; /* total number of dynamic captures removed */ - } - else - *rem = 0; /* no dynamic captures removed */ - return close - open - 1; /* number of captures to be removed */ -} - - -/* -** Auxiliary structure for substitution and string captures: keep -** information about nested captures for future use, avoiding to push -** string results into Lua -*/ -typedef struct StrAux { - int isstring; /* whether capture is a string */ - union { - Capture *cp; /* if not a string, respective capture */ - struct { /* if it is a string... */ - const char *s; /* ... starts here */ - const char *e; /* ... ends here */ - } s; - } u; -} StrAux; - -#define MAXSTRCAPS 10 - -/* -** Collect values from current capture into array 'cps'. Current -** capture must be Cstring (first call) or Csimple (recursive calls). -** (In first call, fills %0 with whole match for Cstring.) -** Returns number of elements in the array that were filled. -*/ -static int getstrcaps (CapState *cs, StrAux *cps, int n) { - int k = n++; - cps[k].isstring = 1; /* get string value */ - cps[k].u.s.s = cs->cap->s; /* starts here */ - if (!isfullcap(cs->cap++)) { /* nested captures? */ - while (!isclosecap(cs->cap)) { /* traverse them */ - if (n >= MAXSTRCAPS) /* too many captures? */ - nextcap(cs); /* skip extra captures (will not need them) */ - else if (captype(cs->cap) == Csimple) /* string? */ - n = getstrcaps(cs, cps, n); /* put info. into array */ - else { - cps[n].isstring = 0; /* not a string */ - cps[n].u.cp = cs->cap; /* keep original capture */ - nextcap(cs); - n++; - } - } - cs->cap++; /* skip close */ - } - cps[k].u.s.e = closeaddr(cs->cap - 1); /* ends here */ - return n; -} - - -/* -** add next capture value (which should be a string) to buffer 'b' -*/ -static int addonestring (luaL_Buffer *b, CapState *cs, const char *what); - - -/* -** String capture: add result to buffer 'b' (instead of pushing -** it into the stack) -*/ -static void stringcap (luaL_Buffer *b, CapState *cs) { - StrAux cps[MAXSTRCAPS]; - int n; - size_t len, i; - const char *fmt; /* format string */ - fmt = lua_tolstring(cs->L, updatecache(cs, cs->cap->idx), &len); - n = getstrcaps(cs, cps, 0) - 1; /* collect nested captures */ - for (i = 0; i < len; i++) { /* traverse them */ - if (fmt[i] != '%') /* not an escape? */ - luaL_addchar(b, fmt[i]); /* add it to buffer */ - else if (fmt[++i] < '0' || fmt[i] > '9') /* not followed by a digit? */ - luaL_addchar(b, fmt[i]); /* add to buffer */ - else { - int l = fmt[i] - '0'; /* capture index */ - if (l > n) - luaL_error(cs->L, "invalid capture index (%d)", l); - else if (cps[l].isstring) - luaL_addlstring(b, cps[l].u.s.s, cps[l].u.s.e - cps[l].u.s.s); - else { - Capture *curr = cs->cap; - cs->cap = cps[l].u.cp; /* go back to evaluate that nested capture */ - if (!addonestring(b, cs, "capture")) - luaL_error(cs->L, "no values in capture index %d", l); - cs->cap = curr; /* continue from where it stopped */ - } - } - } -} - - -/* -** Substitution capture: add result to buffer 'b' -*/ -static void substcap (luaL_Buffer *b, CapState *cs) { - const char *curr = cs->cap->s; - if (isfullcap(cs->cap)) /* no nested captures? */ - luaL_addlstring(b, curr, cs->cap->siz - 1); /* keep original text */ - else { - cs->cap++; /* skip open entry */ - while (!isclosecap(cs->cap)) { /* traverse nested captures */ - const char *next = cs->cap->s; - luaL_addlstring(b, curr, next - curr); /* add text up to capture */ - if (addonestring(b, cs, "replacement")) - curr = closeaddr(cs->cap - 1); /* continue after match */ - else /* no capture value */ - curr = next; /* keep original text in final result */ - } - luaL_addlstring(b, curr, cs->cap->s - curr); /* add last piece of text */ - } - cs->cap++; /* go to next capture */ -} - - -/* -** Evaluates a capture and adds its first value to buffer 'b'; returns -** whether there was a value -*/ -static int addonestring (luaL_Buffer *b, CapState *cs, const char *what) { - switch (captype(cs->cap)) { - case Cstring: - stringcap(b, cs); /* add capture directly to buffer */ - return 1; - case Csubst: - substcap(b, cs); /* add capture directly to buffer */ - return 1; - default: { - lua_State *L = cs->L; - int n = pushcapture(cs); - if (n > 0) { - if (n > 1) lua_pop(L, n - 1); /* only one result */ - if (!lua_isstring(L, -1)) - luaL_error(L, "invalid %s value (a %s)", what, luaL_typename(L, -1)); - luaL_addvalue(b); - } - return n; - } - } -} - - -#if !defined(MAXRECLEVEL) -#define MAXRECLEVEL 200 -#endif - - -/* -** Push all values of the current capture into the stack; returns -** number of values pushed -*/ -static int pushcapture (CapState *cs) { - lua_State *L = cs->L; - int res; - luaL_checkstack(L, 4, "too many captures"); - if (cs->reclevel++ > MAXRECLEVEL) - return luaL_error(L, "subcapture nesting too deep"); - switch (captype(cs->cap)) { - case Cposition: { - lua_pushinteger(L, cs->cap->s - cs->s + 1); - cs->cap++; - res = 1; - break; - } - case Cconst: { - pushluaval(cs); - cs->cap++; - res = 1; - break; - } - case Carg: { - int arg = (cs->cap++)->idx; - if (arg + FIXEDARGS > cs->ptop) - return luaL_error(L, "reference to absent extra argument #%d", arg); - lua_pushvalue(L, arg + FIXEDARGS); - res = 1; - break; - } - case Csimple: { - int k = pushnestedvalues(cs, 1); - lua_insert(L, -k); /* make whole match be first result */ - res = k; - break; - } - case Cruntime: { - lua_pushvalue(L, (cs->cap++)->idx); /* value is in the stack */ - res = 1; - break; - } - case Cstring: { - luaL_Buffer b; - luaL_buffinit(L, &b); - stringcap(&b, cs); - luaL_pushresult(&b); - res = 1; - break; - } - case Csubst: { - luaL_Buffer b; - luaL_buffinit(L, &b); - substcap(&b, cs); - luaL_pushresult(&b); - res = 1; - break; - } - case Cgroup: { - if (cs->cap->idx == 0) /* anonymous group? */ - res = pushnestedvalues(cs, 0); /* add all nested values */ - else { /* named group: add no values */ - nextcap(cs); /* skip capture */ - res = 0; - } - break; - } - case Cbackref: res = backrefcap(cs); break; - case Ctable: res = tablecap(cs); break; - case Cfunction: res = functioncap(cs); break; - case Cnum: res = numcap(cs); break; - case Cquery: res = querycap(cs); break; - case Cfold: res = foldcap(cs); break; - default: assert(0); res = 0; - } - cs->reclevel--; - return res; -} - - -/* -** Prepare a CapState structure and traverse the entire list of -** captures in the stack pushing its results. 's' is the subject -** string, 'r' is the final position of the match, and 'ptop' -** the index in the stack where some useful values were pushed. -** Returns the number of results pushed. (If the list produces no -** results, push the final position of the match.) -*/ -int getcaptures (lua_State *L, const char *s, const char *r, int ptop) { - Capture *capture = (Capture *)lua_touserdata(L, caplistidx(ptop)); - int n = 0; - if (!isclosecap(capture)) { /* is there any capture? */ - CapState cs; - cs.ocap = cs.cap = capture; cs.L = L; cs.reclevel = 0; - cs.s = s; cs.valuecached = 0; cs.ptop = ptop; - do { /* collect their values */ - n += pushcapture(&cs); - } while (!isclosecap(cs.cap)); - } - if (n == 0) { /* no capture values? */ - lua_pushinteger(L, r - s + 1); /* return only end position */ - n = 1; - } - return n; -} - - diff --git a/lpcap.h b/lpcap.h deleted file mode 100644 index f3d1836..0000000 --- a/lpcap.h +++ /dev/null @@ -1,57 +0,0 @@ -/* -** $Id: lpcap.h $ -*/ - -#if !defined(lpcap_h) -#define lpcap_h - - -#include "lptypes.h" - - -/* kinds of captures */ -typedef enum CapKind { - Cclose, /* not used in trees */ - Cposition, - Cconst, /* ktable[key] is Lua constant */ - Cbackref, /* ktable[key] is "name" of group to get capture */ - Carg, /* 'key' is arg's number */ - Csimple, /* next node is pattern */ - Ctable, /* next node is pattern */ - Cfunction, /* ktable[key] is function; next node is pattern */ - Cquery, /* ktable[key] is table; next node is pattern */ - Cstring, /* ktable[key] is string; next node is pattern */ - Cnum, /* numbered capture; 'key' is number of value to return */ - Csubst, /* substitution capture; next node is pattern */ - Cfold, /* ktable[key] is function; next node is pattern */ - Cruntime, /* not used in trees (is uses another type for tree) */ - Cgroup /* ktable[key] is group's "name" */ -} CapKind; - - -typedef struct Capture { - const char *s; /* subject position */ - unsigned short idx; /* extra info (group name, arg index, etc.) */ - byte kind; /* kind of capture */ - byte siz; /* size of full capture + 1 (0 = not a full capture) */ -} Capture; - - -typedef struct CapState { - Capture *cap; /* current capture */ - Capture *ocap; /* (original) capture list */ - lua_State *L; - int ptop; /* index of last argument to 'match' */ - const char *s; /* original string */ - int valuecached; /* value stored in cache slot */ - int reclevel; /* recursion level */ -} CapState; - - -LUAI_FUNC int runtimecap (CapState *cs, Capture *close, const char *s, int *rem); -LUAI_FUNC int getcaptures (lua_State *L, const char *s, const char *r, int ptop); -LUAI_FUNC int finddyncap (Capture *cap, Capture *last); - -#endif - - diff --git a/lpcode.c b/lpcode.c deleted file mode 100644 index 78c6566..0000000 --- a/lpcode.c +++ /dev/null @@ -1,1081 +0,0 @@ -/* -** $Id: lpcode.c $ -** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license) -*/ - -#include - - -#include "lua.h" -#include "lauxlib.h" - -#include "lptypes.h" -#include "lpcode.h" - - -/* signals a "no-instruction */ -#define NOINST -1 - - - -static const Charset fullset_ = - {{0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF}}; - -static const Charset *fullset = &fullset_; - -/* -** {====================================================== -** Analysis and some optimizations -** ======================================================= -*/ - -/* -** Check whether a charset is empty (returns IFail), singleton (IChar), -** full (IAny), or none of those (ISet). When singleton, '*c' returns -** which character it is. (When generic set, the set was the input, -** so there is no need to return it.) -*/ -static Opcode charsettype (const byte *cs, int *c) { - int count = 0; /* number of characters in the set */ - int i; - int candidate = -1; /* candidate position for the singleton char */ - for (i = 0; i < CHARSETSIZE; i++) { /* for each byte */ - int b = cs[i]; - if (b == 0) { /* is byte empty? */ - if (count > 1) /* was set neither empty nor singleton? */ - return ISet; /* neither full nor empty nor singleton */ - /* else set is still empty or singleton */ - } - else if (b == 0xFF) { /* is byte full? */ - if (count < (i * BITSPERCHAR)) /* was set not full? */ - return ISet; /* neither full nor empty nor singleton */ - else count += BITSPERCHAR; /* set is still full */ - } - else if ((b & (b - 1)) == 0) { /* has byte only one bit? */ - if (count > 0) /* was set not empty? */ - return ISet; /* neither full nor empty nor singleton */ - else { /* set has only one char till now; track it */ - count++; - candidate = i; - } - } - else return ISet; /* byte is neither empty, full, nor singleton */ - } - switch (count) { - case 0: return IFail; /* empty set */ - case 1: { /* singleton; find character bit inside byte */ - int b = cs[candidate]; - *c = candidate * BITSPERCHAR; - if ((b & 0xF0) != 0) { *c += 4; b >>= 4; } - if ((b & 0x0C) != 0) { *c += 2; b >>= 2; } - if ((b & 0x02) != 0) { *c += 1; } - return IChar; - } - default: { - assert(count == CHARSETSIZE * BITSPERCHAR); /* full set */ - return IAny; - } - } -} - - -/* -** A few basic operations on Charsets -*/ -static void cs_complement (Charset *cs) { - loopset(i, cs->cs[i] = ~cs->cs[i]); -} - -static int cs_equal (const byte *cs1, const byte *cs2) { - loopset(i, if (cs1[i] != cs2[i]) return 0); - return 1; -} - -static int cs_disjoint (const Charset *cs1, const Charset *cs2) { - loopset(i, if ((cs1->cs[i] & cs2->cs[i]) != 0) return 0;) - return 1; -} - - -/* -** If 'tree' is a 'char' pattern (TSet, TChar, TAny), convert it into a -** charset and return 1; else return 0. -*/ -int tocharset (TTree *tree, Charset *cs) { - switch (tree->tag) { - case TSet: { /* copy set */ - loopset(i, cs->cs[i] = treebuffer(tree)[i]); - return 1; - } - case TChar: { /* only one char */ - assert(0 <= tree->u.n && tree->u.n <= UCHAR_MAX); - loopset(i, cs->cs[i] = 0); /* erase all chars */ - setchar(cs->cs, tree->u.n); /* add that one */ - return 1; - } - case TAny: { - loopset(i, cs->cs[i] = 0xFF); /* add all characters to the set */ - return 1; - } - default: return 0; - } -} - - -/* -** Visit a TCall node taking care to stop recursion. If node not yet -** visited, return 'f(sib2(tree))', otherwise return 'def' (default -** value) -*/ -static int callrecursive (TTree *tree, int f (TTree *t), int def) { - int key = tree->key; - assert(tree->tag == TCall); - assert(sib2(tree)->tag == TRule); - if (key == 0) /* node already visited? */ - return def; /* return default value */ - else { /* first visit */ - int result; - tree->key = 0; /* mark call as already visited */ - result = f(sib2(tree)); /* go to called rule */ - tree->key = key; /* restore tree */ - return result; - } -} - - -/* -** Check whether a pattern tree has captures -*/ -int hascaptures (TTree *tree) { - tailcall: - switch (tree->tag) { - case TCapture: case TRunTime: - return 1; - case TCall: - return callrecursive(tree, hascaptures, 0); - case TRule: /* do not follow siblings */ - tree = sib1(tree); goto tailcall; - case TOpenCall: assert(0); - default: { - switch (numsiblings[tree->tag]) { - case 1: /* return hascaptures(sib1(tree)); */ - tree = sib1(tree); goto tailcall; - case 2: - if (hascaptures(sib1(tree))) - return 1; - /* else return hascaptures(sib2(tree)); */ - tree = sib2(tree); goto tailcall; - default: assert(numsiblings[tree->tag] == 0); return 0; - } - } - } -} - - -/* -** Checks how a pattern behaves regarding the empty string, -** in one of two different ways: -** A pattern is *nullable* if it can match without consuming any character; -** A pattern is *nofail* if it never fails for any string -** (including the empty string). -** The difference is only for predicates and run-time captures; -** for other patterns, the two properties are equivalent. -** (With predicates, &'a' is nullable but not nofail. Of course, -** nofail => nullable.) -** These functions are all convervative in the following way: -** p is nullable => nullable(p) -** nofail(p) => p cannot fail -** The function assumes that TOpenCall is not nullable; -** this will be checked again when the grammar is fixed. -** Run-time captures can do whatever they want, so the result -** is conservative. -*/ -int checkaux (TTree *tree, int pred) { - tailcall: - switch (tree->tag) { - case TChar: case TSet: case TAny: case TUTFR: - case TFalse: case TOpenCall: case TThrow: /* labeled failure */ - return 0; /* not nullable */ - case TRep: case TTrue: - return 1; /* no fail */ - case TNot: case TBehind: /* can match empty, but can fail */ - if (pred == PEnofail) return 0; - else return 1; /* PEnullable */ - case TAnd: /* can match empty; fail iff body does */ - if (pred == PEnullable) return 1; - /* else return checkaux(sib1(tree), pred); */ - tree = sib1(tree); goto tailcall; - case TRunTime: /* can fail; match empty iff body does */ - if (pred == PEnofail) return 0; - /* else return checkaux(sib1(tree), pred); */ - tree = sib1(tree); goto tailcall; - case TSeq: - if (!checkaux(sib1(tree), pred)) return 0; - /* else return checkaux(sib2(tree), pred); */ - tree = sib2(tree); goto tailcall; - case TChoice: - if (checkaux(sib2(tree), pred)) return 1; - /* else return checkaux(sib1(tree), pred); */ - tree = sib1(tree); goto tailcall; - case TCapture: case TGrammar: case TRule: case TXInfo: - /* return checkaux(sib1(tree), pred); */ - tree = sib1(tree); goto tailcall; - case TCall: /* return checkaux(sib2(tree), pred); */ - tree = sib2(tree); goto tailcall; - default: assert(0); return 0; - } -} - - -/* -** number of characters to match a pattern (or -1 if variable) -*/ -int fixedlen (TTree *tree) { - int len = 0; /* to accumulate in tail calls */ - tailcall: - switch (tree->tag) { - case TChar: case TSet: case TAny: - return len + 1; - case TUTFR: - return (tree->cap == sib1(tree)->cap) ? len + tree->cap : -1; - case TFalse: case TTrue: case TNot: case TAnd: case TBehind: - return len; - case TRep: case TRunTime: case TOpenCall: - case TThrow: /* labeled failure */ - return -1; - case TCapture: case TRule: case TGrammar: case TXInfo: - /* return fixedlen(sib1(tree)); */ - tree = sib1(tree); goto tailcall; - case TCall: { - int n1 = callrecursive(tree, fixedlen, -1); - if (n1 < 0) - return -1; - else - return len + n1; - } - case TSeq: { - int n1 = fixedlen(sib1(tree)); - if (n1 < 0) - return -1; - /* else return fixedlen(sib2(tree)) + len; */ - len += n1; tree = sib2(tree); goto tailcall; - } - case TChoice: { - int n1 = fixedlen(sib1(tree)); - int n2 = fixedlen(sib2(tree)); - if (n1 != n2 || n1 < 0) - return -1; - else - return len + n1; - } - default: assert(0); return 0; - }; -} - - -/* -** Computes the 'first set' of a pattern. -** The result is a conservative aproximation: -** match p ax -> x (for some x) ==> a belongs to first(p) -** or -** a not in first(p) ==> match p ax -> fail (for all x) -** -** The set 'follow' is the first set of what follows the -** pattern (full set if nothing follows it). -** -** The function returns 0 when this resulting set can be used for -** test instructions that avoid the pattern altogether. -** A non-zero return can happen for two reasons: -** 1) match p '' -> '' ==> return has bit 1 set -** (tests cannot be used because they would always fail for an empty input); -** 2) there is a match-time capture ==> return has bit 2 set -** (optimizations should not bypass match-time captures). -*/ -static int getfirst (TTree *tree, const Charset *follow, Charset *firstset) { - tailcall: - switch (tree->tag) { - case TChar: case TSet: case TAny: { - tocharset(tree, firstset); - return 0; - } - case TUTFR: { - int c; - loopset(i, firstset->cs[i] = 0); /* erase all chars */ - for (c = tree->key; c <= sib1(tree)->key; c++) - setchar(firstset->cs, c); - return 0; - } - case TTrue: { - loopset(i, firstset->cs[i] = follow->cs[i]); - return 1; /* accepts the empty string */ - } - case TFalse: { - loopset(i, firstset->cs[i] = 0); - return 0; - } - case TThrow: { /* labeled failure: must always throw the label */ - loopset(i, firstset->cs[i] = fullset->cs[i]); - return 1; - } - case TChoice: { - Charset csaux; - int e1 = getfirst(sib1(tree), follow, firstset); - int e2 = getfirst(sib2(tree), follow, &csaux); - loopset(i, firstset->cs[i] |= csaux.cs[i]); - return e1 | e2; - } - case TSeq: { - if (!nullable(sib1(tree))) { - /* when p1 is not nullable, p2 has nothing to contribute; - return getfirst(sib1(tree), fullset, firstset); */ - tree = sib1(tree); follow = fullset; goto tailcall; - } - else { /* FIRST(p1 p2, fl) = FIRST(p1, FIRST(p2, fl)) */ - Charset csaux; - int e2 = getfirst(sib2(tree), follow, &csaux); - int e1 = getfirst(sib1(tree), &csaux, firstset); - if (e1 == 0) return 0; /* 'e1' ensures that first can be used */ - else if ((e1 | e2) & 2) /* one of the children has a matchtime? */ - return 2; /* pattern has a matchtime capture */ - else return e2; /* else depends on 'e2' */ - } - } - case TRep: { - getfirst(sib1(tree), follow, firstset); - loopset(i, firstset->cs[i] |= follow->cs[i]); - return 1; /* accept the empty string */ - } - case TCapture: case TGrammar: case TRule: case TXInfo: { - /* return getfirst(sib1(tree), follow, firstset); */ - tree = sib1(tree); goto tailcall; - } - case TRunTime: { /* function invalidates any follow info. */ - int e = getfirst(sib1(tree), fullset, firstset); - if (e) return 2; /* function is not "protected"? */ - else return 0; /* pattern inside capture ensures first can be used */ - } - case TCall: { - /* return getfirst(sib2(tree), follow, firstset); */ - tree = sib2(tree); goto tailcall; - } - case TAnd: { - int e = getfirst(sib1(tree), follow, firstset); - loopset(i, firstset->cs[i] &= follow->cs[i]); - return e; - } - case TNot: { - if (tocharset(sib1(tree), firstset)) { - cs_complement(firstset); - return 1; - } - /* else go through */ - } - case TBehind: { /* instruction gives no new information */ - /* call 'getfirst' only to check for math-time captures */ - int e = getfirst(sib1(tree), follow, firstset); - loopset(i, firstset->cs[i] = follow->cs[i]); /* uses follow */ - return e | 1; /* always can accept the empty string */ - } - default: assert(0); return 0; - } -} - - -/* -** If 'headfail(tree)' true, then 'tree' can fail only depending on the -** next character of the subject. -*/ -static int headfail (TTree *tree) { - tailcall: - switch (tree->tag) { - case TChar: case TSet: case TAny: case TFalse: - return 1; - case TTrue: case TRep: case TRunTime: case TNot: - case TBehind: case TUTFR: - case TThrow: /* labeled failure: must always throw the label */ - return 0; - case TCapture: case TGrammar: case TRule: case TXInfo: case TAnd: - tree = sib1(tree); goto tailcall; /* return headfail(sib1(tree)); */ - case TCall: - tree = sib2(tree); goto tailcall; /* return headfail(sib2(tree)); */ - case TSeq: - if (!nofail(sib2(tree))) return 0; - /* else return headfail(sib1(tree)); */ - tree = sib1(tree); goto tailcall; - case TChoice: - if (!headfail(sib1(tree))) return 0; - /* else return headfail(sib2(tree)); */ - tree = sib2(tree); goto tailcall; - default: assert(0); return 0; - } -} - - -/* -** Check whether the code generation for the given tree can benefit -** from a follow set (to avoid computing the follow set when it is -** not needed) -*/ -static int needfollow (TTree *tree) { - tailcall: - switch (tree->tag) { - case TChar: case TSet: case TAny: case TUTFR: - case TFalse: case TTrue: case TAnd: case TNot: - case TRunTime: case TGrammar: case TCall: case TBehind: - case TThrow: /* (?)labeled failure */ - return 0; - case TChoice: case TRep: - return 1; - case TCapture: - tree = sib1(tree); goto tailcall; - case TSeq: - tree = sib2(tree); goto tailcall; - default: assert(0); return 0; - } -} - -/* }====================================================== */ - - - -/* -** {====================================================== -** Code generation -** ======================================================= -*/ - - -/* -** size of an instruction -*/ -int sizei (const Instruction *i) { - switch((Opcode)i->i.code) { - case ISet: case ISpan: return CHARSETINSTSIZE; - case ITestSet: return CHARSETINSTSIZE + 1; - case ITestChar: case ITestAny: case IChoice: case IJmp: case ICall: - case IOpenCall: case ICommit: case IPartialCommit: case IBackCommit: - case IUTFR: - return 2; - case IThrow: case IPredChoice: /* labeled failure */ - return 2; - case IThrowRec: /* labeled failure */ - return 3; - default: return 1; - } -} - - -/* -** state for the compiler -*/ -typedef struct CompileState { - Pattern *p; /* pattern being compiled */ - int ncode; /* next position in p->code to be filled */ - lua_State *L; -} CompileState; - - -/* -** code generation is recursive; 'opt' indicates that the code is being -** generated as the last thing inside an optional pattern (so, if that -** code is optional too, it can reuse the 'IChoice' already in place for -** the outer pattern). 'tt' points to a previous test protecting this -** code (or NOINST). 'fl' is the follow set of the pattern. -*/ -static void codegen (CompileState *compst, TTree *tree, int opt, int tt, - const Charset *fl); - - -void realloccode (lua_State *L, Pattern *p, int nsize) { - void *ud; - lua_Alloc f = lua_getallocf(L, &ud); - void *newblock = f(ud, p->code, p->codesize * sizeof(Instruction), - nsize * sizeof(Instruction)); - if (newblock == NULL && nsize > 0) - luaL_error(L, "not enough memory"); - p->code = (Instruction *)newblock; - p->codesize = nsize; -} - - -static int nextinstruction (CompileState *compst) { - int size = compst->p->codesize; - if (compst->ncode >= size) - realloccode(compst->L, compst->p, size * 2); - return compst->ncode++; -} - - -#define getinstr(cs,i) ((cs)->p->code[i]) - - -static int addinstruction (CompileState *compst, Opcode op, int aux) { - int i = nextinstruction(compst); - getinstr(compst, i).i.code = op; - getinstr(compst, i).i.aux = aux; - return i; -} - - -/* -** Add an instruction followed by space for an offset (to be set later) -*/ -static int addoffsetinst (CompileState *compst, Opcode op) { - int i = addinstruction(compst, op, 0); /* instruction */ - addinstruction(compst, (Opcode)0, 0); /* open space for offset */ - assert(op == ITestSet || sizei(&getinstr(compst, i)) == 2 || - op == IThrowRec); /* labeled failure */ - - return i; -} - - -/* labeled failure */ -static void codethrow (CompileState *compst, TTree *throw) { - int recov, aux, n; - if (throw->u.ps != 0) { - recov = addoffsetinst(compst, IThrowRec); - assert(sib1(sib2(throw))->tag == TXInfo); - n = sib1(sib2(throw))->u.n; - } else { - recov = addinstruction(compst, IThrow, 0); - n = -1; - } - aux = nextinstruction(compst); - getinstr(compst, aux).i.key = throw->key; /* next instruction keeps only rule name */ - getinstr(compst, recov).i.key = n; /* rule number */ -} -/* labeled failure */ - - -/* -** Set the offset of an instruction -*/ -static void setoffset (CompileState *compst, int instruction, int offset) { - getinstr(compst, instruction + 1).offset = offset; -} - - -static void codeutfr (CompileState *compst, TTree *tree) { - int i = addoffsetinst(compst, IUTFR); - int to = sib1(tree)->u.n; - assert(sib1(tree)->tag == TXInfo); - getinstr(compst, i + 1).offset = tree->u.n; - getinstr(compst, i).i.aux = to & 0xff; - getinstr(compst, i).i.key = to >> 8; -} - - -/* -** Add a capture instruction: -** 'op' is the capture instruction; 'cap' the capture kind; -** 'key' the key into ktable; 'aux' is the optional capture offset -** -*/ -static int addinstcap (CompileState *compst, Opcode op, int cap, int key, - int aux) { - int i = addinstruction(compst, op, joinkindoff(cap, aux)); - getinstr(compst, i).i.key = key; - return i; -} - - -#define gethere(compst) ((compst)->ncode) - -#define target(code,i) ((i) + code[i + 1].offset) - - -/* -** Patch 'instruction' to jump to 'target' -*/ -static void jumptothere (CompileState *compst, int instruction, int target) { - if (instruction >= 0) - setoffset(compst, instruction, target - instruction); -} - - -/* -** Patch 'instruction' to jump to current position -*/ -static void jumptohere (CompileState *compst, int instruction) { - jumptothere(compst, instruction, gethere(compst)); -} - - -/* -** Code an IChar instruction, or IAny if there is an equivalent -** test dominating it -*/ -static void codechar (CompileState *compst, int c, int tt) { - if (tt >= 0 && getinstr(compst, tt).i.code == ITestChar && - getinstr(compst, tt).i.aux == c) - addinstruction(compst, IAny, 0); - else - addinstruction(compst, IChar, c); -} - - -/* -** Add a charset posfix to an instruction -*/ -static void addcharset (CompileState *compst, const byte *cs) { - int p = gethere(compst); - int i; - for (i = 0; i < (int)CHARSETINSTSIZE - 1; i++) - nextinstruction(compst); /* space for buffer */ - /* fill buffer with charset */ - loopset(j, getinstr(compst, p).buff[j] = cs[j]); -} - - -/* -** code a char set, optimizing unit sets for IChar, "complete" -** sets for IAny, and empty sets for IFail; also use an IAny -** when instruction is dominated by an equivalent test. -*/ -static void codecharset (CompileState *compst, const byte *cs, int tt) { - int c = 0; /* (=) to avoid warnings */ - Opcode op = charsettype(cs, &c); - switch (op) { - case IChar: codechar(compst, c, tt); break; - case ISet: { /* non-trivial set? */ - if (tt >= 0 && getinstr(compst, tt).i.code == ITestSet && - cs_equal(cs, getinstr(compst, tt + 2).buff)) - addinstruction(compst, IAny, 0); - else { - addinstruction(compst, ISet, 0); - addcharset(compst, cs); - } - break; - } - default: addinstruction(compst, op, c); break; - } -} - - -/* -** code a test set, optimizing unit sets for ITestChar, "complete" -** sets for ITestAny, and empty sets for IJmp (always fails). -** 'e' is true iff test should accept the empty string. (Test -** instructions in the current VM never accept the empty string.) -*/ -static int codetestset (CompileState *compst, Charset *cs, int e) { - if (e) return NOINST; /* no test */ - else { - int c = 0; - Opcode op = charsettype(cs->cs, &c); - switch (op) { - case IFail: return addoffsetinst(compst, IJmp); /* always jump */ - case IAny: return addoffsetinst(compst, ITestAny); - case IChar: { - int i = addoffsetinst(compst, ITestChar); - getinstr(compst, i).i.aux = c; - return i; - } - case ISet: { - int i = addoffsetinst(compst, ITestSet); - addcharset(compst, cs->cs); - return i; - } - default: assert(0); return 0; - } - } -} - - -/* -** Find the final destination of a sequence of jumps -*/ -static int finaltarget (Instruction *code, int i) { - while (code[i].i.code == IJmp) - i = target(code, i); - return i; -} - - -/* -** final label (after traversing any jumps) -*/ -static int finallabel (Instruction *code, int i) { - return finaltarget(code, target(code, i)); -} - - -/* -** == behind n;

(where n = fixedlen(p)) -*/ -static void codebehind (CompileState *compst, TTree *tree) { - if (tree->u.n > 0) - addinstruction(compst, IBehind, tree->u.n); - codegen(compst, sib1(tree), 0, NOINST, fullset); -} - - -/* -** Choice; optimizations: -** - when p1 is headfail or when first(p1) and first(p2) are disjoint, -** than a character not in first(p1) cannot go to p1 and a character -** in first(p1) cannot go to p2, either because p1 will accept -** (headfail) or because it is not in first(p2) (disjoint). -** (The second case is not valid if p1 accepts the empty string, -** as then there is no character at all...) -** - when p2 is empty and opt is true; a IPartialCommit can reuse -** the Choice already active in the stack. -*/ -static void codechoice (CompileState *compst, TTree *p1, TTree *p2, int opt, - const Charset *fl) { - int emptyp2 = (p2->tag == TTrue); - Charset cs1, cs2; - int e1 = getfirst(p1, fullset, &cs1); - if (headfail(p1) || - (!e1 && (getfirst(p2, fl, &cs2), cs_disjoint(&cs1, &cs2)))) { - /* == test (fail(p1)) -> L1 ; p1 ; jmp L2; L1: p2; L2: */ - int test = codetestset(compst, &cs1, 0); - int jmp = NOINST; - codegen(compst, p1, 0, test, fl); - if (!emptyp2) - jmp = addoffsetinst(compst, IJmp); - jumptohere(compst, test); - codegen(compst, p2, opt, NOINST, fl); - jumptohere(compst, jmp); - } - else if (opt && emptyp2) { - /* p1? == IPartialCommit; p1 */ - jumptohere(compst, addoffsetinst(compst, IPartialCommit)); - codegen(compst, p1, 1, NOINST, fullset); - } - else { - /* == - test(first(p1)) -> L1; choice L1; ; commit L2; L1: ; L2: */ - int pcommit; - int test = codetestset(compst, &cs1, e1); - int pchoice = addoffsetinst(compst, IChoice); - codegen(compst, p1, emptyp2, test, fullset); - pcommit = addoffsetinst(compst, ICommit); - jumptohere(compst, pchoice); - jumptohere(compst, test); - codegen(compst, p2, opt, NOINST, fl); - jumptohere(compst, pcommit); - } -} - - -/* -** And predicate -** optimization: fixedlen(p) = n ==> <&p> ==

; behind n -** (valid only when 'p' has no captures) -*/ -static void codeand (CompileState *compst, TTree *tree, int tt) { - /* labeled failure: optimization disabled bacause in case of a failure it - does not report the expected error position (the current subject position - when begin the matching of <&p>) */ - /*int n = fixedlen(tree); - if (n >= 0 && n <= MAXBEHIND && !hascaptures(tree)) { - codegen(compst, tree, 0, tt, fullset); - if (n > 0) - addinstruction(compst, IBehind, n); - } - else */{ /* default: Choice L1; p1; BackCommit L2; L1: Fail; L2: */ - int pcommit; - int pchoice = addoffsetinst(compst, IPredChoice); /* labeled failure */ - codegen(compst, tree, 0, tt, fullset); - pcommit = addoffsetinst(compst, IBackCommit); - jumptohere(compst, pchoice); - addinstruction(compst, IFail, 0); - jumptohere(compst, pcommit); - } -} - - -/* -** Captures: if pattern has fixed (and not too big) length, and it -** has no nested captures, use a single IFullCapture instruction -** after the match; otherwise, enclose the pattern with OpenCapture - -** CloseCapture. -*/ -static void codecapture (CompileState *compst, TTree *tree, int tt, - const Charset *fl) { - int len = fixedlen(sib1(tree)); - if (len >= 0 && len <= MAXOFF && !hascaptures(sib1(tree))) { - codegen(compst, sib1(tree), 0, tt, fl); - addinstcap(compst, IFullCapture, tree->cap, tree->key, len); - } - else { - addinstcap(compst, IOpenCapture, tree->cap, tree->key, 0); - codegen(compst, sib1(tree), 0, tt, fl); - addinstcap(compst, ICloseCapture, Cclose, 0, 0); - } -} - - -static void coderuntime (CompileState *compst, TTree *tree, int tt) { - addinstcap(compst, IOpenCapture, Cgroup, tree->key, 0); - codegen(compst, sib1(tree), 0, tt, fullset); - addinstcap(compst, ICloseRunTime, Cclose, 0, 0); -} - - -/* -** Repetion; optimizations: -** When pattern is a charset, can use special instruction ISpan. -** When pattern is head fail, or if it starts with characters that -** are disjoint from what follows the repetions, a simple test -** is enough (a fail inside the repetition would backtrack to fail -** again in the following pattern, so there is no need for a choice). -** When 'opt' is true, the repetion can reuse the Choice already -** active in the stack. -*/ -static void coderep (CompileState *compst, TTree *tree, int opt, - const Charset *fl) { - Charset st; - if (tocharset(tree, &st)) { - addinstruction(compst, ISpan, 0); - addcharset(compst, st.cs); - } - else { - int e1 = getfirst(tree, fullset, &st); - if (headfail(tree) || (!e1 && cs_disjoint(&st, fl))) { - /* L1: test (fail(p1)) -> L2;

; jmp L1; L2: */ - int jmp; - int test = codetestset(compst, &st, 0); - codegen(compst, tree, 0, test, fullset); - jmp = addoffsetinst(compst, IJmp); - jumptohere(compst, test); - jumptothere(compst, jmp, test); - } - else { - /* test(fail(p1)) -> L2; choice L2; L1:

; partialcommit L1; L2: */ - /* or (if 'opt'): partialcommit L1; L1:

; partialcommit L1; */ - int commit, l2; - int test = codetestset(compst, &st, e1); - int pchoice = NOINST; - if (opt) - jumptohere(compst, addoffsetinst(compst, IPartialCommit)); - else - pchoice = addoffsetinst(compst, IChoice); - l2 = gethere(compst); - codegen(compst, tree, 0, NOINST, fullset); - commit = addoffsetinst(compst, IPartialCommit); - jumptothere(compst, commit, l2); - jumptohere(compst, pchoice); - jumptohere(compst, test); - } - } -} - - -/* -** Not predicate; optimizations: -** In any case, if first test fails, 'not' succeeds, so it can jump to -** the end. If pattern is headfail, that is all (it cannot fail -** in other parts); this case includes 'not' of simple sets. Otherwise, -** use the default code (a choice plus a failtwice). -*/ -static void codenot (CompileState *compst, TTree *tree) { - Charset st; - int e = getfirst(tree, fullset, &st); - int test = codetestset(compst, &st, e); - if (headfail(tree)) /* test (fail(p1)) -> L1; fail; L1: */ - addinstruction(compst, IFail, 0); - else { - /* test(fail(p))-> L1; choice L1;

; failtwice; L1: */ - int pchoice = addoffsetinst(compst, IPredChoice); /* labeled failure */ - codegen(compst, tree, 0, NOINST, fullset); - addinstruction(compst, IFailTwice, 0); - jumptohere(compst, pchoice); - } - jumptohere(compst, test); -} - - -/* -** change open calls to calls, using list 'positions' to find -** correct offsets; also optimize tail calls -*/ -static void correctcalls (CompileState *compst, int *positions, - int from, int to) { - int i; - Instruction *code = compst->p->code; - for (i = from; i < to; i += sizei(&code[i])) { - if (code[i].i.code == IOpenCall) { - int n = code[i].i.key; /* rule number */ - int rule = positions[n]; /* rule position */ - assert(rule == from || code[rule - 1].i.code == IRet); - if (code[finaltarget(code, i + 2)].i.code == IRet) /* call; ret ? */ - code[i].i.code = IJmp; /* tail call */ - else - code[i].i.code = ICall; - jumptothere(compst, i, rule); /* call jumps to respective rule */ - } else if (code[i].i.code == IThrowRec) { /* labeled failure */ - int n = code[i].i.key; /* rule number */ - int rule = positions[n]; /* rule position */ - assert(rule == from || code[rule - 1].i.code == IRet); - jumptothere(compst, i, rule); /* call jumps to respective rule */ - } - - } - assert(i == to); -} - - -/* -** Code for a grammar: -** call L1; jmp L2; L1: rule 1; ret; rule 2; ret; ...; L2: -*/ -static void codegrammar (CompileState *compst, TTree *grammar) { - int positions[MAXRULES]; - int rulenumber = 0; - TTree *rule; - int firstcall = addoffsetinst(compst, ICall); /* call initial rule */ - int jumptoend = addoffsetinst(compst, IJmp); /* jump to the end */ - int start = gethere(compst); /* here starts the initial rule */ - jumptohere(compst, firstcall); - for (rule = sib1(grammar); rule->tag == TRule; rule = sib2(rule)) { - TTree *r = sib1(rule); - assert(r->tag == TXInfo); - positions[rulenumber++] = gethere(compst); /* save rule position */ - codegen(compst, sib1(r), 0, NOINST, fullset); /* code rule */ - addinstruction(compst, IRet, 0); - } - assert(rule->tag == TTrue); - jumptohere(compst, jumptoend); - correctcalls(compst, positions, start, gethere(compst)); -} - - -static void codecall (CompileState *compst, TTree *call) { - int c = addoffsetinst(compst, IOpenCall); /* to be corrected later */ - assert(sib1(sib2(call))->tag == TXInfo); - getinstr(compst, c).i.key = sib1(sib2(call))->u.n; /* rule number */ -} - - -/* -** Code first child of a sequence -** (second child is called in-place to allow tail call) -** Return 'tt' for second child -*/ -static int codeseq1 (CompileState *compst, TTree *p1, TTree *p2, - int tt, const Charset *fl) { - if (needfollow(p1)) { - Charset fl1; - getfirst(p2, fl, &fl1); /* p1 follow is p2 first */ - codegen(compst, p1, 0, tt, &fl1); - } - else /* use 'fullset' as follow */ - codegen(compst, p1, 0, tt, fullset); - if (fixedlen(p1) != 0) /* can 'p1' consume anything? */ - return NOINST; /* invalidate test */ - else return tt; /* else 'tt' still protects sib2 */ -} - - -/* -** Main code-generation function: dispatch to auxiliar functions -** according to kind of tree. ('needfollow' should return true -** only for consructions that use 'fl'.) -*/ -static void codegen (CompileState *compst, TTree *tree, int opt, int tt, - const Charset *fl) { - tailcall: - switch (tree->tag) { - case TChar: codechar(compst, tree->u.n, tt); break; - case TAny: addinstruction(compst, IAny, 0); break; - case TSet: codecharset(compst, treebuffer(tree), tt); break; - case TTrue: break; - case TFalse: addinstruction(compst, IFail, 0); break; - case TUTFR: codeutfr(compst, tree); break; - case TChoice: codechoice(compst, sib1(tree), sib2(tree), opt, fl); break; - case TRep: coderep(compst, sib1(tree), opt, fl); break; - case TBehind: codebehind(compst, tree); break; - case TNot: codenot(compst, sib1(tree)); break; - case TAnd: codeand(compst, sib1(tree), tt); break; - case TCapture: codecapture(compst, tree, tt, fl); break; - case TRunTime: coderuntime(compst, tree, tt); break; - case TGrammar: codegrammar(compst, tree); break; - case TCall: codecall(compst, tree); break; - case TSeq: { - tt = codeseq1(compst, sib1(tree), sib2(tree), tt, fl); /* code 'p1' */ - /* codegen(compst, p2, opt, tt, fl); */ - tree = sib2(tree); goto tailcall; - } - case TThrow: { /* labeled failure */ - codethrow(compst, tree); - break; - } - default: assert(0); - } -} - - -/* -** Optimize jumps and other jump-like instructions. -** * Update labels of instructions with labels to their final -** destinations (e.g., choice L1; ... L1: jmp L2: becomes -** choice L2) -** * Jumps to other instructions that do jumps become those -** instructions (e.g., jump to return becomes a return; jump -** to commit becomes a commit) -*/ -static void peephole (CompileState *compst) { - Instruction *code = compst->p->code; - int i; - for (i = 0; i < compst->ncode; i += sizei(&code[i])) { - redo: - switch (code[i].i.code) { - case IChoice: case ICall: case ICommit: case IPartialCommit: - case IBackCommit: case ITestChar: case ITestSet: - case ITestAny: { /* instructions with labels */ - jumptothere(compst, i, finallabel(code, i)); /* optimize label */ - break; - } - case IJmp: { - int ft = finaltarget(code, i); - switch (code[ft].i.code) { /* jumping to what? */ - case IRet: case IFail: case IFailTwice: - case IEnd: { /* instructions with unconditional implicit jumps */ - code[i] = code[ft]; /* jump becomes that instruction */ - code[i + 1].i.code = IEmpty; /* 'no-op' for target position */ - break; - } - case ICommit: case IPartialCommit: - case IBackCommit: { /* inst. with unconditional explicit jumps */ - int fft = finallabel(code, ft); - code[i] = code[ft]; /* jump becomes that instruction... */ - jumptothere(compst, i, fft); /* but must correct its offset */ - goto redo; /* reoptimize its label */ - } - default: { - jumptothere(compst, i, ft); /* optimize label */ - break; - } - } - break; - } - default: break; - } - } - assert(code[i - 1].i.code == IEnd); -} - - -/* -** Compile a pattern -*/ -Instruction *compile (lua_State *L, Pattern *p) { - CompileState compst; - compst.p = p; compst.ncode = 0; compst.L = L; - realloccode(L, p, 2); /* minimum initial size */ - codegen(&compst, p->tree, 0, NOINST, fullset); - addinstruction(&compst, IEnd, 0); - realloccode(L, p, compst.ncode); /* set final size */ - peephole(&compst); - return p->code; -} - - -/* }====================================================== */ - diff --git a/lpcode.h b/lpcode.h deleted file mode 100644 index bdd3ec1..0000000 --- a/lpcode.h +++ /dev/null @@ -1,40 +0,0 @@ -/* -** $Id: lpcode.h $ -*/ - -#if !defined(lpcode_h) -#define lpcode_h - -#include "lua.h" - -#include "lptypes.h" -#include "lptree.h" -#include "lpvm.h" - -LUAI_FUNC int tocharset (TTree *tree, Charset *cs); -LUAI_FUNC int checkaux (TTree *tree, int pred); -LUAI_FUNC int fixedlen (TTree *tree); -LUAI_FUNC int hascaptures (TTree *tree); -LUAI_FUNC int lp_gc (lua_State *L); -LUAI_FUNC Instruction *compile (lua_State *L, Pattern *p); -LUAI_FUNC void realloccode (lua_State *L, Pattern *p, int nsize); -LUAI_FUNC int sizei (const Instruction *i); - - -#define PEnullable 0 -#define PEnofail 1 - -/* -** nofail(t) implies that 't' cannot fail with any input -*/ -#define nofail(t) checkaux(t, PEnofail) - -/* -** (not nullable(t)) implies 't' cannot match without consuming -** something -*/ -#define nullable(t) checkaux(t, PEnullable) - - - -#endif diff --git a/lplcap.c b/lplcap.c new file mode 100644 index 0000000..924d371 --- /dev/null +++ b/lplcap.c @@ -0,0 +1,555 @@ +/* +** $Id: lplcap.c $ +** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license) +*/ + +#include "lua.h" +#include "lauxlib.h" + +#include "lplcap.h" +#include "lpltypes.h" + + +#define captype(cap) ((cap)->kind) + +#define isclosecap(cap) (captype(cap) == Cclose) + +#define closeaddr(c) ((c)->s + (c)->siz - 1) + +#define isfullcap(cap) ((cap)->siz != 0) + +#define getfromktable(cs,v) lua_rawgeti((cs)->L, ktableidx((cs)->ptop), v) + +#define pushluaval(cs) getfromktable(cs, (cs)->cap->idx) + + + +/* +** Put at the cache for Lua values the value indexed by 'v' in ktable +** of the running pattern (if it is not there yet); returns its index. +*/ +static int updatecache (CapState *cs, int v) { + int idx = cs->ptop + 1; /* stack index of cache for Lua values */ + if (v != cs->valuecached) { /* not there? */ + getfromktable(cs, v); /* get value from 'ktable' */ + lua_replace(cs->L, idx); /* put it at reserved stack position */ + cs->valuecached = v; /* keep track of what is there */ + } + return idx; +} + + +static int pushcapture (CapState *cs); + + +/* +** Goes back in a list of captures looking for an open capture +** corresponding to a close +*/ +static Capture *findopen (Capture *cap) { + int n = 0; /* number of closes waiting an open */ + for (;;) { + cap--; + if (isclosecap(cap)) n++; /* one more open to skip */ + else if (!isfullcap(cap)) + if (n-- == 0) return cap; + } +} + + +/* +** Go to the next capture +*/ +static void nextcap (CapState *cs) { + Capture *cap = cs->cap; + if (!isfullcap(cap)) { /* not a single capture? */ + int n = 0; /* number of opens waiting a close */ + for (;;) { /* look for corresponding close */ + cap++; + if (isclosecap(cap)) { + if (n-- == 0) break; + } + else if (!isfullcap(cap)) n++; + } + } + cs->cap = cap + 1; /* + 1 to skip last close (or entire single capture) */ +} + + +/* +** Push on the Lua stack all values generated by nested captures inside +** the current capture. Returns number of values pushed. 'addextra' +** makes it push the entire match after all captured values. The +** entire match is pushed also if there are no other nested values, +** so the function never returns zero. +*/ +static int pushnestedvalues (CapState *cs, int addextra) { + Capture *co = cs->cap; + if (isfullcap(cs->cap++)) { /* no nested captures? */ + lua_pushlstring(cs->L, co->s, co->siz - 1); /* push whole match */ + return 1; /* that is it */ + } + else { + int n = 0; + while (!isclosecap(cs->cap)) /* repeat for all nested patterns */ + n += pushcapture(cs); + if (addextra || n == 0) { /* need extra? */ + lua_pushlstring(cs->L, co->s, cs->cap->s - co->s); /* push whole match */ + n++; + } + cs->cap++; /* skip close entry */ + return n; + } +} + + +/* +** Push only the first value generated by nested captures +*/ +static void pushonenestedvalue (CapState *cs) { + int n = pushnestedvalues(cs, 0); + if (n > 1) + lua_pop(cs->L, n - 1); /* pop extra values */ +} + + +/* +** Try to find a named group capture with the name given at the top of +** the stack; goes backward from 'cap'. +*/ +static Capture *findback (CapState *cs, Capture *cap) { + lua_State *L = cs->L; + while (cap-- > cs->ocap) { /* repeat until end of list */ + if (isclosecap(cap)) + cap = findopen(cap); /* skip nested captures */ + else if (!isfullcap(cap)) + continue; /* opening an enclosing capture: skip and get previous */ + if (captype(cap) == Cgroup) { + getfromktable(cs, cap->idx); /* get group name */ + if (lp_equal(L, -2, -1)) { /* right group? */ + lua_pop(L, 2); /* remove reference name and group name */ + return cap; + } + else lua_pop(L, 1); /* remove group name */ + } + } + luaL_error(L, "back reference '%s' not found", lua_tostring(L, -1)); + return NULL; /* to avoid warnings */ +} + + +/* +** Back-reference capture. Return number of values pushed. +*/ +static int backrefcap (CapState *cs) { + int n; + Capture *curr = cs->cap; + pushluaval(cs); /* reference name */ + cs->cap = findback(cs, curr); /* find corresponding group */ + n = pushnestedvalues(cs, 0); /* push group's values */ + cs->cap = curr + 1; + return n; +} + + +/* +** Table capture: creates a new table and populates it with nested +** captures. +*/ +static int tablecap (CapState *cs) { + lua_State *L = cs->L; + int n = 0; + lua_newtable(L); + if (isfullcap(cs->cap++)) + return 1; /* table is empty */ + while (!isclosecap(cs->cap)) { + if (captype(cs->cap) == Cgroup && cs->cap->idx != 0) { /* named group? */ + pushluaval(cs); /* push group name */ + pushonenestedvalue(cs); + lua_settable(L, -3); + } + else { /* not a named group */ + int i; + int k = pushcapture(cs); + for (i = k; i > 0; i--) /* store all values into table */ + lua_rawseti(L, -(i + 1), n + i); + n += k; + } + } + cs->cap++; /* skip close entry */ + return 1; /* number of values pushed (only the table) */ +} + + +/* +** Table-query capture +*/ +static int querycap (CapState *cs) { + int idx = cs->cap->idx; + pushonenestedvalue(cs); /* get nested capture */ + lua_gettable(cs->L, updatecache(cs, idx)); /* query cap. value at table */ + if (!lua_isnil(cs->L, -1)) + return 1; + else { /* no value */ + lua_pop(cs->L, 1); /* remove nil */ + return 0; + } +} + + +/* +** Fold capture +*/ +static int foldcap (CapState *cs) { + int n; + lua_State *L = cs->L; + int idx = cs->cap->idx; + if (isfullcap(cs->cap++) || /* no nested captures? */ + isclosecap(cs->cap) || /* no nested captures (large subject)? */ + (n = pushcapture(cs)) == 0) /* nested captures with no values? */ + return luaL_error(L, "no initial value for fold capture"); + if (n > 1) + lua_pop(L, n - 1); /* leave only one result for accumulator */ + while (!isclosecap(cs->cap)) { + lua_pushvalue(L, updatecache(cs, idx)); /* get folding function */ + lua_insert(L, -2); /* put it before accumulator */ + n = pushcapture(cs); /* get next capture's values */ + lua_call(L, n + 1, 1); /* call folding function */ + } + cs->cap++; /* skip close entry */ + return 1; /* only accumulator left on the stack */ +} + + +/* +** Function capture +*/ +static int functioncap (CapState *cs) { + int n; + int top = lua_gettop(cs->L); + pushluaval(cs); /* push function */ + n = pushnestedvalues(cs, 0); /* push nested captures */ + lua_call(cs->L, n, LUA_MULTRET); /* call function */ + return lua_gettop(cs->L) - top; /* return function's results */ +} + + +/* +** Select capture +*/ +static int numcap (CapState *cs) { + int idx = cs->cap->idx; /* value to select */ + if (idx == 0) { /* no values? */ + nextcap(cs); /* skip entire capture */ + return 0; /* no value produced */ + } + else { + int n = pushnestedvalues(cs, 0); + if (n < idx) /* invalid index? */ + return luaL_error(cs->L, "no capture '%d'", idx); + else { + lua_pushvalue(cs->L, -(n - idx + 1)); /* get selected capture */ + lua_replace(cs->L, -(n + 1)); /* put it in place of 1st capture */ + lua_pop(cs->L, n - 1); /* remove other captures */ + return 1; + } + } +} + + +/* +** Return the stack index of the first runtime capture in the given +** list of captures (or zero if no runtime captures) +*/ +int finddyncap (Capture *cap, Capture *last) { + for (; cap < last; cap++) { + if (cap->kind == Cruntime) + return cap->idx; /* stack position of first capture */ + } + return 0; /* no dynamic captures in this segment */ +} + + +/* +** Calls a runtime capture. Returns number of captures "removed" by the +** call, that is, those inside the group capture. Captures to be added +** are on the Lua stack. +*/ +int runtimecap (CapState *cs, Capture *close, const char *s, int *rem) { + int n, id; + lua_State *L = cs->L; + int otop = lua_gettop(L); + Capture *open = findopen(close); /* get open group capture */ + assert(captype(open) == Cgroup); + id = finddyncap(open, close); /* get first dynamic capture argument */ + close->kind = Cclose; /* closes the group */ + close->s = s; + cs->cap = open; cs->valuecached = 0; /* prepare capture state */ + luaL_checkstack(L, 4, "too many runtime captures"); + pushluaval(cs); /* push function to be called */ + lua_pushvalue(L, SUBJIDX); /* push original subject */ + lua_pushinteger(L, s - cs->s + 1); /* push current position */ + n = pushnestedvalues(cs, 0); /* push nested captures */ + lua_call(L, n + 2, LUA_MULTRET); /* call dynamic function */ + if (id > 0) { /* are there old dynamic captures to be removed? */ + int i; + for (i = id; i <= otop; i++) + lua_remove(L, id); /* remove old dynamic captures */ + *rem = otop - id + 1; /* total number of dynamic captures removed */ + } + else + *rem = 0; /* no dynamic captures removed */ + return close - open - 1; /* number of captures to be removed */ +} + + +/* +** Auxiliary structure for substitution and string captures: keep +** information about nested captures for future use, avoiding to push +** string results into Lua +*/ +typedef struct StrAux { + int isstring; /* whether capture is a string */ + union { + Capture *cp; /* if not a string, respective capture */ + struct { /* if it is a string... */ + const char *s; /* ... starts here */ + const char *e; /* ... ends here */ + } s; + } u; +} StrAux; + +#define MAXSTRCAPS 10 + +/* +** Collect values from current capture into array 'cps'. Current +** capture must be Cstring (first call) or Csimple (recursive calls). +** (In first call, fills %0 with whole match for Cstring.) +** Returns number of elements in the array that were filled. +*/ +static int getstrcaps (CapState *cs, StrAux *cps, int n) { + int k = n++; + cps[k].isstring = 1; /* get string value */ + cps[k].u.s.s = cs->cap->s; /* starts here */ + if (!isfullcap(cs->cap++)) { /* nested captures? */ + while (!isclosecap(cs->cap)) { /* traverse them */ + if (n >= MAXSTRCAPS) /* too many captures? */ + nextcap(cs); /* skip extra captures (will not need them) */ + else if (captype(cs->cap) == Csimple) /* string? */ + n = getstrcaps(cs, cps, n); /* put info. into array */ + else { + cps[n].isstring = 0; /* not a string */ + cps[n].u.cp = cs->cap; /* keep original capture */ + nextcap(cs); + n++; + } + } + cs->cap++; /* skip close */ + } + cps[k].u.s.e = closeaddr(cs->cap - 1); /* ends here */ + return n; +} + + +/* +** add next capture value (which should be a string) to buffer 'b' +*/ +static int addonestring (luaL_Buffer *b, CapState *cs, const char *what); + + +/* +** String capture: add result to buffer 'b' (instead of pushing +** it into the stack) +*/ +static void stringcap (luaL_Buffer *b, CapState *cs) { + StrAux cps[MAXSTRCAPS]; + int n; + size_t len, i; + const char *fmt; /* format string */ + fmt = lua_tolstring(cs->L, updatecache(cs, cs->cap->idx), &len); + n = getstrcaps(cs, cps, 0) - 1; /* collect nested captures */ + for (i = 0; i < len; i++) { /* traverse them */ + if (fmt[i] != '%') /* not an escape? */ + luaL_addchar(b, fmt[i]); /* add it to buffer */ + else if (fmt[++i] < '0' || fmt[i] > '9') /* not followed by a digit? */ + luaL_addchar(b, fmt[i]); /* add to buffer */ + else { + int l = fmt[i] - '0'; /* capture index */ + if (l > n) + luaL_error(cs->L, "invalid capture index (%d)", l); + else if (cps[l].isstring) + luaL_addlstring(b, cps[l].u.s.s, cps[l].u.s.e - cps[l].u.s.s); + else { + Capture *curr = cs->cap; + cs->cap = cps[l].u.cp; /* go back to evaluate that nested capture */ + if (!addonestring(b, cs, "capture")) + luaL_error(cs->L, "no values in capture index %d", l); + cs->cap = curr; /* continue from where it stopped */ + } + } + } +} + + +/* +** Substitution capture: add result to buffer 'b' +*/ +static void substcap (luaL_Buffer *b, CapState *cs) { + const char *curr = cs->cap->s; + if (isfullcap(cs->cap)) /* no nested captures? */ + luaL_addlstring(b, curr, cs->cap->siz - 1); /* keep original text */ + else { + cs->cap++; /* skip open entry */ + while (!isclosecap(cs->cap)) { /* traverse nested captures */ + const char *next = cs->cap->s; + luaL_addlstring(b, curr, next - curr); /* add text up to capture */ + if (addonestring(b, cs, "replacement")) + curr = closeaddr(cs->cap - 1); /* continue after match */ + else /* no capture value */ + curr = next; /* keep original text in final result */ + } + luaL_addlstring(b, curr, cs->cap->s - curr); /* add last piece of text */ + } + cs->cap++; /* go to next capture */ +} + + +/* +** Evaluates a capture and adds its first value to buffer 'b'; returns +** whether there was a value +*/ +static int addonestring (luaL_Buffer *b, CapState *cs, const char *what) { + switch (captype(cs->cap)) { + case Cstring: + stringcap(b, cs); /* add capture directly to buffer */ + return 1; + case Csubst: + substcap(b, cs); /* add capture directly to buffer */ + return 1; + default: { + lua_State *L = cs->L; + int n = pushcapture(cs); + if (n > 0) { + if (n > 1) lua_pop(L, n - 1); /* only one result */ + if (!lua_isstring(L, -1)) + luaL_error(L, "invalid %s value (a %s)", what, luaL_typename(L, -1)); + luaL_addvalue(b); + } + return n; + } + } +} + + +#if !defined(MAXRECLEVEL) +#define MAXRECLEVEL 200 +#endif + + +/* +** Push all values of the current capture into the stack; returns +** number of values pushed +*/ +static int pushcapture (CapState *cs) { + lua_State *L = cs->L; + int res; + luaL_checkstack(L, 4, "too many captures"); + if (cs->reclevel++ > MAXRECLEVEL) + return luaL_error(L, "subcapture nesting too deep"); + switch (captype(cs->cap)) { + case Cposition: { + lua_pushinteger(L, cs->cap->s - cs->s + 1); + cs->cap++; + res = 1; + break; + } + case Cconst: { + pushluaval(cs); + cs->cap++; + res = 1; + break; + } + case Carg: { + int arg = (cs->cap++)->idx; + if (arg + FIXEDARGS > cs->ptop) + return luaL_error(L, "reference to absent extra argument #%d", arg); + lua_pushvalue(L, arg + FIXEDARGS); + res = 1; + break; + } + case Csimple: { + int k = pushnestedvalues(cs, 1); + lua_insert(L, -k); /* make whole match be first result */ + res = k; + break; + } + case Cruntime: { + lua_pushvalue(L, (cs->cap++)->idx); /* value is in the stack */ + res = 1; + break; + } + case Cstring: { + luaL_Buffer b; + luaL_buffinit(L, &b); + stringcap(&b, cs); + luaL_pushresult(&b); + res = 1; + break; + } + case Csubst: { + luaL_Buffer b; + luaL_buffinit(L, &b); + substcap(&b, cs); + luaL_pushresult(&b); + res = 1; + break; + } + case Cgroup: { + if (cs->cap->idx == 0) /* anonymous group? */ + res = pushnestedvalues(cs, 0); /* add all nested values */ + else { /* named group: add no values */ + nextcap(cs); /* skip capture */ + res = 0; + } + break; + } + case Cbackref: res = backrefcap(cs); break; + case Ctable: res = tablecap(cs); break; + case Cfunction: res = functioncap(cs); break; + case Cnum: res = numcap(cs); break; + case Cquery: res = querycap(cs); break; + case Cfold: res = foldcap(cs); break; + default: assert(0); res = 0; + } + cs->reclevel--; + return res; +} + + +/* +** Prepare a CapState structure and traverse the entire list of +** captures in the stack pushing its results. 's' is the subject +** string, 'r' is the final position of the match, and 'ptop' +** the index in the stack where some useful values were pushed. +** Returns the number of results pushed. (If the list produces no +** results, push the final position of the match.) +*/ +int getcaptures (lua_State *L, const char *s, const char *r, int ptop) { + Capture *capture = (Capture *)lua_touserdata(L, caplistidx(ptop)); + int n = 0; + if (!isclosecap(capture)) { /* is there any capture? */ + CapState cs; + cs.ocap = cs.cap = capture; cs.L = L; cs.reclevel = 0; + cs.s = s; cs.valuecached = 0; cs.ptop = ptop; + do { /* collect their values */ + n += pushcapture(&cs); + } while (!isclosecap(cs.cap)); + } + if (n == 0) { /* no capture values? */ + lua_pushinteger(L, r - s + 1); /* return only end position */ + n = 1; + } + return n; +} + + diff --git a/lplcap.h b/lplcap.h new file mode 100644 index 0000000..5d4de71 --- /dev/null +++ b/lplcap.h @@ -0,0 +1,57 @@ +/* +** $Id: lplcap.h $ +*/ + +#if !defined(lplcap_h) +#define lplcap_h + + +#include "lpltypes.h" + + +/* kinds of captures */ +typedef enum CapKind { + Cclose, /* not used in trees */ + Cposition, + Cconst, /* ktable[key] is Lua constant */ + Cbackref, /* ktable[key] is "name" of group to get capture */ + Carg, /* 'key' is arg's number */ + Csimple, /* next node is pattern */ + Ctable, /* next node is pattern */ + Cfunction, /* ktable[key] is function; next node is pattern */ + Cquery, /* ktable[key] is table; next node is pattern */ + Cstring, /* ktable[key] is string; next node is pattern */ + Cnum, /* numbered capture; 'key' is number of value to return */ + Csubst, /* substitution capture; next node is pattern */ + Cfold, /* ktable[key] is function; next node is pattern */ + Cruntime, /* not used in trees (is uses another type for tree) */ + Cgroup /* ktable[key] is group's "name" */ +} CapKind; + + +typedef struct Capture { + const char *s; /* subject position */ + unsigned short idx; /* extra info (group name, arg index, etc.) */ + byte kind; /* kind of capture */ + byte siz; /* size of full capture + 1 (0 = not a full capture) */ +} Capture; + + +typedef struct CapState { + Capture *cap; /* current capture */ + Capture *ocap; /* (original) capture list */ + lua_State *L; + int ptop; /* index of last argument to 'match' */ + const char *s; /* original string */ + int valuecached; /* value stored in cache slot */ + int reclevel; /* recursion level */ +} CapState; + + +LUAI_FUNC int runtimecap (CapState *cs, Capture *close, const char *s, int *rem); +LUAI_FUNC int getcaptures (lua_State *L, const char *s, const char *r, int ptop); +LUAI_FUNC int finddyncap (Capture *cap, Capture *last); + +#endif + + diff --git a/lplcode.c b/lplcode.c new file mode 100644 index 0000000..be041c4 --- /dev/null +++ b/lplcode.c @@ -0,0 +1,1081 @@ +/* +** $Id: lplcode.c $ +** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license) +*/ + +#include + + +#include "lua.h" +#include "lauxlib.h" + +#include "lpltypes.h" +#include "lplcode.h" + + +/* signals a "no-instruction */ +#define NOINST -1 + + + +static const Charset fullset_ = + {{0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF}}; + +static const Charset *fullset = &fullset_; + +/* +** {====================================================== +** Analysis and some optimizations +** ======================================================= +*/ + +/* +** Check whether a charset is empty (returns IFail), singleton (IChar), +** full (IAny), or none of those (ISet). When singleton, '*c' returns +** which character it is. (When generic set, the set was the input, +** so there is no need to return it.) +*/ +static Opcode charsettype (const byte *cs, int *c) { + int count = 0; /* number of characters in the set */ + int i; + int candidate = -1; /* candidate position for the singleton char */ + for (i = 0; i < CHARSETSIZE; i++) { /* for each byte */ + int b = cs[i]; + if (b == 0) { /* is byte empty? */ + if (count > 1) /* was set neither empty nor singleton? */ + return ISet; /* neither full nor empty nor singleton */ + /* else set is still empty or singleton */ + } + else if (b == 0xFF) { /* is byte full? */ + if (count < (i * BITSPERCHAR)) /* was set not full? */ + return ISet; /* neither full nor empty nor singleton */ + else count += BITSPERCHAR; /* set is still full */ + } + else if ((b & (b - 1)) == 0) { /* has byte only one bit? */ + if (count > 0) /* was set not empty? */ + return ISet; /* neither full nor empty nor singleton */ + else { /* set has only one char till now; track it */ + count++; + candidate = i; + } + } + else return ISet; /* byte is neither empty, full, nor singleton */ + } + switch (count) { + case 0: return IFail; /* empty set */ + case 1: { /* singleton; find character bit inside byte */ + int b = cs[candidate]; + *c = candidate * BITSPERCHAR; + if ((b & 0xF0) != 0) { *c += 4; b >>= 4; } + if ((b & 0x0C) != 0) { *c += 2; b >>= 2; } + if ((b & 0x02) != 0) { *c += 1; } + return IChar; + } + default: { + assert(count == CHARSETSIZE * BITSPERCHAR); /* full set */ + return IAny; + } + } +} + + +/* +** A few basic operations on Charsets +*/ +static void cs_complement (Charset *cs) { + loopset(i, cs->cs[i] = ~cs->cs[i]); +} + +static int cs_equal (const byte *cs1, const byte *cs2) { + loopset(i, if (cs1[i] != cs2[i]) return 0); + return 1; +} + +static int cs_disjoint (const Charset *cs1, const Charset *cs2) { + loopset(i, if ((cs1->cs[i] & cs2->cs[i]) != 0) return 0;) + return 1; +} + + +/* +** If 'tree' is a 'char' pattern (TSet, TChar, TAny), convert it into a +** charset and return 1; else return 0. +*/ +int tocharset (TTree *tree, Charset *cs) { + switch (tree->tag) { + case TSet: { /* copy set */ + loopset(i, cs->cs[i] = treebuffer(tree)[i]); + return 1; + } + case TChar: { /* only one char */ + assert(0 <= tree->u.n && tree->u.n <= UCHAR_MAX); + loopset(i, cs->cs[i] = 0); /* erase all chars */ + setchar(cs->cs, tree->u.n); /* add that one */ + return 1; + } + case TAny: { + loopset(i, cs->cs[i] = 0xFF); /* add all characters to the set */ + return 1; + } + default: return 0; + } +} + + +/* +** Visit a TCall node taking care to stop recursion. If node not yet +** visited, return 'f(sib2(tree))', otherwise return 'def' (default +** value) +*/ +static int callrecursive (TTree *tree, int f (TTree *t), int def) { + int key = tree->key; + assert(tree->tag == TCall); + assert(sib2(tree)->tag == TRule); + if (key == 0) /* node already visited? */ + return def; /* return default value */ + else { /* first visit */ + int result; + tree->key = 0; /* mark call as already visited */ + result = f(sib2(tree)); /* go to called rule */ + tree->key = key; /* restore tree */ + return result; + } +} + + +/* +** Check whether a pattern tree has captures +*/ +int hascaptures (TTree *tree) { + tailcall: + switch (tree->tag) { + case TCapture: case TRunTime: + return 1; + case TCall: + return callrecursive(tree, hascaptures, 0); + case TRule: /* do not follow siblings */ + tree = sib1(tree); goto tailcall; + case TOpenCall: assert(0); + default: { + switch (numsiblings[tree->tag]) { + case 1: /* return hascaptures(sib1(tree)); */ + tree = sib1(tree); goto tailcall; + case 2: + if (hascaptures(sib1(tree))) + return 1; + /* else return hascaptures(sib2(tree)); */ + tree = sib2(tree); goto tailcall; + default: assert(numsiblings[tree->tag] == 0); return 0; + } + } + } +} + + +/* +** Checks how a pattern behaves regarding the empty string, +** in one of two different ways: +** A pattern is *nullable* if it can match without consuming any character; +** A pattern is *nofail* if it never fails for any string +** (including the empty string). +** The difference is only for predicates and run-time captures; +** for other patterns, the two properties are equivalent. +** (With predicates, &'a' is nullable but not nofail. Of course, +** nofail => nullable.) +** These functions are all convervative in the following way: +** p is nullable => nullable(p) +** nofail(p) => p cannot fail +** The function assumes that TOpenCall is not nullable; +** this will be checked again when the grammar is fixed. +** Run-time captures can do whatever they want, so the result +** is conservative. +*/ +int checkaux (TTree *tree, int pred) { + tailcall: + switch (tree->tag) { + case TChar: case TSet: case TAny: case TUTFR: + case TFalse: case TOpenCall: case TThrow: /* labeled failure */ + return 0; /* not nullable */ + case TRep: case TTrue: + return 1; /* no fail */ + case TNot: case TBehind: /* can match empty, but can fail */ + if (pred == PEnofail) return 0; + else return 1; /* PEnullable */ + case TAnd: /* can match empty; fail iff body does */ + if (pred == PEnullable) return 1; + /* else return checkaux(sib1(tree), pred); */ + tree = sib1(tree); goto tailcall; + case TRunTime: /* can fail; match empty iff body does */ + if (pred == PEnofail) return 0; + /* else return checkaux(sib1(tree), pred); */ + tree = sib1(tree); goto tailcall; + case TSeq: + if (!checkaux(sib1(tree), pred)) return 0; + /* else return checkaux(sib2(tree), pred); */ + tree = sib2(tree); goto tailcall; + case TChoice: + if (checkaux(sib2(tree), pred)) return 1; + /* else return checkaux(sib1(tree), pred); */ + tree = sib1(tree); goto tailcall; + case TCapture: case TGrammar: case TRule: case TXInfo: + /* return checkaux(sib1(tree), pred); */ + tree = sib1(tree); goto tailcall; + case TCall: /* return checkaux(sib2(tree), pred); */ + tree = sib2(tree); goto tailcall; + default: assert(0); return 0; + } +} + + +/* +** number of characters to match a pattern (or -1 if variable) +*/ +int fixedlen (TTree *tree) { + int len = 0; /* to accumulate in tail calls */ + tailcall: + switch (tree->tag) { + case TChar: case TSet: case TAny: + return len + 1; + case TUTFR: + return (tree->cap == sib1(tree)->cap) ? len + tree->cap : -1; + case TFalse: case TTrue: case TNot: case TAnd: case TBehind: + return len; + case TRep: case TRunTime: case TOpenCall: + case TThrow: /* labeled failure */ + return -1; + case TCapture: case TRule: case TGrammar: case TXInfo: + /* return fixedlen(sib1(tree)); */ + tree = sib1(tree); goto tailcall; + case TCall: { + int n1 = callrecursive(tree, fixedlen, -1); + if (n1 < 0) + return -1; + else + return len + n1; + } + case TSeq: { + int n1 = fixedlen(sib1(tree)); + if (n1 < 0) + return -1; + /* else return fixedlen(sib2(tree)) + len; */ + len += n1; tree = sib2(tree); goto tailcall; + } + case TChoice: { + int n1 = fixedlen(sib1(tree)); + int n2 = fixedlen(sib2(tree)); + if (n1 != n2 || n1 < 0) + return -1; + else + return len + n1; + } + default: assert(0); return 0; + }; +} + + +/* +** Computes the 'first set' of a pattern. +** The result is a conservative aproximation: +** match p ax -> x (for some x) ==> a belongs to first(p) +** or +** a not in first(p) ==> match p ax -> fail (for all x) +** +** The set 'follow' is the first set of what follows the +** pattern (full set if nothing follows it). +** +** The function returns 0 when this resulting set can be used for +** test instructions that avoid the pattern altogether. +** A non-zero return can happen for two reasons: +** 1) match p '' -> '' ==> return has bit 1 set +** (tests cannot be used because they would always fail for an empty input); +** 2) there is a match-time capture ==> return has bit 2 set +** (optimizations should not bypass match-time captures). +*/ +static int getfirst (TTree *tree, const Charset *follow, Charset *firstset) { + tailcall: + switch (tree->tag) { + case TChar: case TSet: case TAny: { + tocharset(tree, firstset); + return 0; + } + case TUTFR: { + int c; + loopset(i, firstset->cs[i] = 0); /* erase all chars */ + for (c = tree->key; c <= sib1(tree)->key; c++) + setchar(firstset->cs, c); + return 0; + } + case TTrue: { + loopset(i, firstset->cs[i] = follow->cs[i]); + return 1; /* accepts the empty string */ + } + case TFalse: { + loopset(i, firstset->cs[i] = 0); + return 0; + } + case TThrow: { /* labeled failure: must always throw the label */ + loopset(i, firstset->cs[i] = fullset->cs[i]); + return 1; + } + case TChoice: { + Charset csaux; + int e1 = getfirst(sib1(tree), follow, firstset); + int e2 = getfirst(sib2(tree), follow, &csaux); + loopset(i, firstset->cs[i] |= csaux.cs[i]); + return e1 | e2; + } + case TSeq: { + if (!nullable(sib1(tree))) { + /* when p1 is not nullable, p2 has nothing to contribute; + return getfirst(sib1(tree), fullset, firstset); */ + tree = sib1(tree); follow = fullset; goto tailcall; + } + else { /* FIRST(p1 p2, fl) = FIRST(p1, FIRST(p2, fl)) */ + Charset csaux; + int e2 = getfirst(sib2(tree), follow, &csaux); + int e1 = getfirst(sib1(tree), &csaux, firstset); + if (e1 == 0) return 0; /* 'e1' ensures that first can be used */ + else if ((e1 | e2) & 2) /* one of the children has a matchtime? */ + return 2; /* pattern has a matchtime capture */ + else return e2; /* else depends on 'e2' */ + } + } + case TRep: { + getfirst(sib1(tree), follow, firstset); + loopset(i, firstset->cs[i] |= follow->cs[i]); + return 1; /* accept the empty string */ + } + case TCapture: case TGrammar: case TRule: case TXInfo: { + /* return getfirst(sib1(tree), follow, firstset); */ + tree = sib1(tree); goto tailcall; + } + case TRunTime: { /* function invalidates any follow info. */ + int e = getfirst(sib1(tree), fullset, firstset); + if (e) return 2; /* function is not "protected"? */ + else return 0; /* pattern inside capture ensures first can be used */ + } + case TCall: { + /* return getfirst(sib2(tree), follow, firstset); */ + tree = sib2(tree); goto tailcall; + } + case TAnd: { + int e = getfirst(sib1(tree), follow, firstset); + loopset(i, firstset->cs[i] &= follow->cs[i]); + return e; + } + case TNot: { + if (tocharset(sib1(tree), firstset)) { + cs_complement(firstset); + return 1; + } + /* else go through */ + } + case TBehind: { /* instruction gives no new information */ + /* call 'getfirst' only to check for math-time captures */ + int e = getfirst(sib1(tree), follow, firstset); + loopset(i, firstset->cs[i] = follow->cs[i]); /* uses follow */ + return e | 1; /* always can accept the empty string */ + } + default: assert(0); return 0; + } +} + + +/* +** If 'headfail(tree)' true, then 'tree' can fail only depending on the +** next character of the subject. +*/ +static int headfail (TTree *tree) { + tailcall: + switch (tree->tag) { + case TChar: case TSet: case TAny: case TFalse: + return 1; + case TTrue: case TRep: case TRunTime: case TNot: + case TBehind: case TUTFR: + case TThrow: /* labeled failure: must always throw the label */ + return 0; + case TCapture: case TGrammar: case TRule: case TXInfo: case TAnd: + tree = sib1(tree); goto tailcall; /* return headfail(sib1(tree)); */ + case TCall: + tree = sib2(tree); goto tailcall; /* return headfail(sib2(tree)); */ + case TSeq: + if (!nofail(sib2(tree))) return 0; + /* else return headfail(sib1(tree)); */ + tree = sib1(tree); goto tailcall; + case TChoice: + if (!headfail(sib1(tree))) return 0; + /* else return headfail(sib2(tree)); */ + tree = sib2(tree); goto tailcall; + default: assert(0); return 0; + } +} + + +/* +** Check whether the code generation for the given tree can benefit +** from a follow set (to avoid computing the follow set when it is +** not needed) +*/ +static int needfollow (TTree *tree) { + tailcall: + switch (tree->tag) { + case TChar: case TSet: case TAny: case TUTFR: + case TFalse: case TTrue: case TAnd: case TNot: + case TRunTime: case TGrammar: case TCall: case TBehind: + case TThrow: /* (?)labeled failure */ + return 0; + case TChoice: case TRep: + return 1; + case TCapture: + tree = sib1(tree); goto tailcall; + case TSeq: + tree = sib2(tree); goto tailcall; + default: assert(0); return 0; + } +} + +/* }====================================================== */ + + + +/* +** {====================================================== +** Code generation +** ======================================================= +*/ + + +/* +** size of an instruction +*/ +int sizei (const Instruction *i) { + switch((Opcode)i->i.code) { + case ISet: case ISpan: return CHARSETINSTSIZE; + case ITestSet: return CHARSETINSTSIZE + 1; + case ITestChar: case ITestAny: case IChoice: case IJmp: case ICall: + case IOpenCall: case ICommit: case IPartialCommit: case IBackCommit: + case IUTFR: + return 2; + case IThrow: case IPredChoice: /* labeled failure */ + return 2; + case IThrowRec: /* labeled failure */ + return 3; + default: return 1; + } +} + + +/* +** state for the compiler +*/ +typedef struct CompileState { + Pattern *p; /* pattern being compiled */ + int ncode; /* next position in p->code to be filled */ + lua_State *L; +} CompileState; + + +/* +** code generation is recursive; 'opt' indicates that the code is being +** generated as the last thing inside an optional pattern (so, if that +** code is optional too, it can reuse the 'IChoice' already in place for +** the outer pattern). 'tt' points to a previous test protecting this +** code (or NOINST). 'fl' is the follow set of the pattern. +*/ +static void codegen (CompileState *compst, TTree *tree, int opt, int tt, + const Charset *fl); + + +void realloccode (lua_State *L, Pattern *p, int nsize) { + void *ud; + lua_Alloc f = lua_getallocf(L, &ud); + void *newblock = f(ud, p->code, p->codesize * sizeof(Instruction), + nsize * sizeof(Instruction)); + if (newblock == NULL && nsize > 0) + luaL_error(L, "not enough memory"); + p->code = (Instruction *)newblock; + p->codesize = nsize; +} + + +static int nextinstruction (CompileState *compst) { + int size = compst->p->codesize; + if (compst->ncode >= size) + realloccode(compst->L, compst->p, size * 2); + return compst->ncode++; +} + + +#define getinstr(cs,i) ((cs)->p->code[i]) + + +static int addinstruction (CompileState *compst, Opcode op, int aux) { + int i = nextinstruction(compst); + getinstr(compst, i).i.code = op; + getinstr(compst, i).i.aux = aux; + return i; +} + + +/* +** Add an instruction followed by space for an offset (to be set later) +*/ +static int addoffsetinst (CompileState *compst, Opcode op) { + int i = addinstruction(compst, op, 0); /* instruction */ + addinstruction(compst, (Opcode)0, 0); /* open space for offset */ + assert(op == ITestSet || sizei(&getinstr(compst, i)) == 2 || + op == IThrowRec); /* labeled failure */ + + return i; +} + + +/* labeled failure */ +static void codethrow (CompileState *compst, TTree *throw) { + int recov, aux, n; + if (throw->u.ps != 0) { + recov = addoffsetinst(compst, IThrowRec); + assert(sib1(sib2(throw))->tag == TXInfo); + n = sib1(sib2(throw))->u.n; + } else { + recov = addinstruction(compst, IThrow, 0); + n = -1; + } + aux = nextinstruction(compst); + getinstr(compst, aux).i.key = throw->key; /* next instruction keeps only rule name */ + getinstr(compst, recov).i.key = n; /* rule number */ +} +/* labeled failure */ + + +/* +** Set the offset of an instruction +*/ +static void setoffset (CompileState *compst, int instruction, int offset) { + getinstr(compst, instruction + 1).offset = offset; +} + + +static void codeutfr (CompileState *compst, TTree *tree) { + int i = addoffsetinst(compst, IUTFR); + int to = sib1(tree)->u.n; + assert(sib1(tree)->tag == TXInfo); + getinstr(compst, i + 1).offset = tree->u.n; + getinstr(compst, i).i.aux = to & 0xff; + getinstr(compst, i).i.key = to >> 8; +} + + +/* +** Add a capture instruction: +** 'op' is the capture instruction; 'cap' the capture kind; +** 'key' the key into ktable; 'aux' is the optional capture offset +** +*/ +static int addinstcap (CompileState *compst, Opcode op, int cap, int key, + int aux) { + int i = addinstruction(compst, op, joinkindoff(cap, aux)); + getinstr(compst, i).i.key = key; + return i; +} + + +#define gethere(compst) ((compst)->ncode) + +#define target(code,i) ((i) + code[i + 1].offset) + + +/* +** Patch 'instruction' to jump to 'target' +*/ +static void jumptothere (CompileState *compst, int instruction, int target) { + if (instruction >= 0) + setoffset(compst, instruction, target - instruction); +} + + +/* +** Patch 'instruction' to jump to current position +*/ +static void jumptohere (CompileState *compst, int instruction) { + jumptothere(compst, instruction, gethere(compst)); +} + + +/* +** Code an IChar instruction, or IAny if there is an equivalent +** test dominating it +*/ +static void codechar (CompileState *compst, int c, int tt) { + if (tt >= 0 && getinstr(compst, tt).i.code == ITestChar && + getinstr(compst, tt).i.aux == c) + addinstruction(compst, IAny, 0); + else + addinstruction(compst, IChar, c); +} + + +/* +** Add a charset posfix to an instruction +*/ +static void addcharset (CompileState *compst, const byte *cs) { + int p = gethere(compst); + int i; + for (i = 0; i < (int)CHARSETINSTSIZE - 1; i++) + nextinstruction(compst); /* space for buffer */ + /* fill buffer with charset */ + loopset(j, getinstr(compst, p).buff[j] = cs[j]); +} + + +/* +** code a char set, optimizing unit sets for IChar, "complete" +** sets for IAny, and empty sets for IFail; also use an IAny +** when instruction is dominated by an equivalent test. +*/ +static void codecharset (CompileState *compst, const byte *cs, int tt) { + int c = 0; /* (=) to avoid warnings */ + Opcode op = charsettype(cs, &c); + switch (op) { + case IChar: codechar(compst, c, tt); break; + case ISet: { /* non-trivial set? */ + if (tt >= 0 && getinstr(compst, tt).i.code == ITestSet && + cs_equal(cs, getinstr(compst, tt + 2).buff)) + addinstruction(compst, IAny, 0); + else { + addinstruction(compst, ISet, 0); + addcharset(compst, cs); + } + break; + } + default: addinstruction(compst, op, c); break; + } +} + + +/* +** code a test set, optimizing unit sets for ITestChar, "complete" +** sets for ITestAny, and empty sets for IJmp (always fails). +** 'e' is true iff test should accept the empty string. (Test +** instructions in the current VM never accept the empty string.) +*/ +static int codetestset (CompileState *compst, Charset *cs, int e) { + if (e) return NOINST; /* no test */ + else { + int c = 0; + Opcode op = charsettype(cs->cs, &c); + switch (op) { + case IFail: return addoffsetinst(compst, IJmp); /* always jump */ + case IAny: return addoffsetinst(compst, ITestAny); + case IChar: { + int i = addoffsetinst(compst, ITestChar); + getinstr(compst, i).i.aux = c; + return i; + } + case ISet: { + int i = addoffsetinst(compst, ITestSet); + addcharset(compst, cs->cs); + return i; + } + default: assert(0); return 0; + } + } +} + + +/* +** Find the final destination of a sequence of jumps +*/ +static int finaltarget (Instruction *code, int i) { + while (code[i].i.code == IJmp) + i = target(code, i); + return i; +} + + +/* +** final label (after traversing any jumps) +*/ +static int finallabel (Instruction *code, int i) { + return finaltarget(code, target(code, i)); +} + + +/* +** == behind n;

(where n = fixedlen(p)) +*/ +static void codebehind (CompileState *compst, TTree *tree) { + if (tree->u.n > 0) + addinstruction(compst, IBehind, tree->u.n); + codegen(compst, sib1(tree), 0, NOINST, fullset); +} + + +/* +** Choice; optimizations: +** - when p1 is headfail or when first(p1) and first(p2) are disjoint, +** than a character not in first(p1) cannot go to p1 and a character +** in first(p1) cannot go to p2, either because p1 will accept +** (headfail) or because it is not in first(p2) (disjoint). +** (The second case is not valid if p1 accepts the empty string, +** as then there is no character at all...) +** - when p2 is empty and opt is true; a IPartialCommit can reuse +** the Choice already active in the stack. +*/ +static void codechoice (CompileState *compst, TTree *p1, TTree *p2, int opt, + const Charset *fl) { + int emptyp2 = (p2->tag == TTrue); + Charset cs1, cs2; + int e1 = getfirst(p1, fullset, &cs1); + if (headfail(p1) || + (!e1 && (getfirst(p2, fl, &cs2), cs_disjoint(&cs1, &cs2)))) { + /* == test (fail(p1)) -> L1 ; p1 ; jmp L2; L1: p2; L2: */ + int test = codetestset(compst, &cs1, 0); + int jmp = NOINST; + codegen(compst, p1, 0, test, fl); + if (!emptyp2) + jmp = addoffsetinst(compst, IJmp); + jumptohere(compst, test); + codegen(compst, p2, opt, NOINST, fl); + jumptohere(compst, jmp); + } + else if (opt && emptyp2) { + /* p1? == IPartialCommit; p1 */ + jumptohere(compst, addoffsetinst(compst, IPartialCommit)); + codegen(compst, p1, 1, NOINST, fullset); + } + else { + /* == + test(first(p1)) -> L1; choice L1; ; commit L2; L1: ; L2: */ + int pcommit; + int test = codetestset(compst, &cs1, e1); + int pchoice = addoffsetinst(compst, IChoice); + codegen(compst, p1, emptyp2, test, fullset); + pcommit = addoffsetinst(compst, ICommit); + jumptohere(compst, pchoice); + jumptohere(compst, test); + codegen(compst, p2, opt, NOINST, fl); + jumptohere(compst, pcommit); + } +} + + +/* +** And predicate +** optimization: fixedlen(p) = n ==> <&p> ==

; behind n +** (valid only when 'p' has no captures) +*/ +static void codeand (CompileState *compst, TTree *tree, int tt) { + /* labeled failure: optimization disabled bacause in case of a failure it + does not report the expected error position (the current subject position + when begin the matching of <&p>) */ + /*int n = fixedlen(tree); + if (n >= 0 && n <= MAXBEHIND && !hascaptures(tree)) { + codegen(compst, tree, 0, tt, fullset); + if (n > 0) + addinstruction(compst, IBehind, n); + } + else */{ /* default: Choice L1; p1; BackCommit L2; L1: Fail; L2: */ + int pcommit; + int pchoice = addoffsetinst(compst, IPredChoice); /* labeled failure */ + codegen(compst, tree, 0, tt, fullset); + pcommit = addoffsetinst(compst, IBackCommit); + jumptohere(compst, pchoice); + addinstruction(compst, IFail, 0); + jumptohere(compst, pcommit); + } +} + + +/* +** Captures: if pattern has fixed (and not too big) length, and it +** has no nested captures, use a single IFullCapture instruction +** after the match; otherwise, enclose the pattern with OpenCapture - +** CloseCapture. +*/ +static void codecapture (CompileState *compst, TTree *tree, int tt, + const Charset *fl) { + int len = fixedlen(sib1(tree)); + if (len >= 0 && len <= MAXOFF && !hascaptures(sib1(tree))) { + codegen(compst, sib1(tree), 0, tt, fl); + addinstcap(compst, IFullCapture, tree->cap, tree->key, len); + } + else { + addinstcap(compst, IOpenCapture, tree->cap, tree->key, 0); + codegen(compst, sib1(tree), 0, tt, fl); + addinstcap(compst, ICloseCapture, Cclose, 0, 0); + } +} + + +static void coderuntime (CompileState *compst, TTree *tree, int tt) { + addinstcap(compst, IOpenCapture, Cgroup, tree->key, 0); + codegen(compst, sib1(tree), 0, tt, fullset); + addinstcap(compst, ICloseRunTime, Cclose, 0, 0); +} + + +/* +** Repetion; optimizations: +** When pattern is a charset, can use special instruction ISpan. +** When pattern is head fail, or if it starts with characters that +** are disjoint from what follows the repetions, a simple test +** is enough (a fail inside the repetition would backtrack to fail +** again in the following pattern, so there is no need for a choice). +** When 'opt' is true, the repetion can reuse the Choice already +** active in the stack. +*/ +static void coderep (CompileState *compst, TTree *tree, int opt, + const Charset *fl) { + Charset st; + if (tocharset(tree, &st)) { + addinstruction(compst, ISpan, 0); + addcharset(compst, st.cs); + } + else { + int e1 = getfirst(tree, fullset, &st); + if (headfail(tree) || (!e1 && cs_disjoint(&st, fl))) { + /* L1: test (fail(p1)) -> L2;

; jmp L1; L2: */ + int jmp; + int test = codetestset(compst, &st, 0); + codegen(compst, tree, 0, test, fullset); + jmp = addoffsetinst(compst, IJmp); + jumptohere(compst, test); + jumptothere(compst, jmp, test); + } + else { + /* test(fail(p1)) -> L2; choice L2; L1:

; partialcommit L1; L2: */ + /* or (if 'opt'): partialcommit L1; L1:

; partialcommit L1; */ + int commit, l2; + int test = codetestset(compst, &st, e1); + int pchoice = NOINST; + if (opt) + jumptohere(compst, addoffsetinst(compst, IPartialCommit)); + else + pchoice = addoffsetinst(compst, IChoice); + l2 = gethere(compst); + codegen(compst, tree, 0, NOINST, fullset); + commit = addoffsetinst(compst, IPartialCommit); + jumptothere(compst, commit, l2); + jumptohere(compst, pchoice); + jumptohere(compst, test); + } + } +} + + +/* +** Not predicate; optimizations: +** In any case, if first test fails, 'not' succeeds, so it can jump to +** the end. If pattern is headfail, that is all (it cannot fail +** in other parts); this case includes 'not' of simple sets. Otherwise, +** use the default code (a choice plus a failtwice). +*/ +static void codenot (CompileState *compst, TTree *tree) { + Charset st; + int e = getfirst(tree, fullset, &st); + int test = codetestset(compst, &st, e); + if (headfail(tree)) /* test (fail(p1)) -> L1; fail; L1: */ + addinstruction(compst, IFail, 0); + else { + /* test(fail(p))-> L1; choice L1;

; failtwice; L1: */ + int pchoice = addoffsetinst(compst, IPredChoice); /* labeled failure */ + codegen(compst, tree, 0, NOINST, fullset); + addinstruction(compst, IFailTwice, 0); + jumptohere(compst, pchoice); + } + jumptohere(compst, test); +} + + +/* +** change open calls to calls, using list 'positions' to find +** correct offsets; also optimize tail calls +*/ +static void correctcalls (CompileState *compst, int *positions, + int from, int to) { + int i; + Instruction *code = compst->p->code; + for (i = from; i < to; i += sizei(&code[i])) { + if (code[i].i.code == IOpenCall) { + int n = code[i].i.key; /* rule number */ + int rule = positions[n]; /* rule position */ + assert(rule == from || code[rule - 1].i.code == IRet); + if (code[finaltarget(code, i + 2)].i.code == IRet) /* call; ret ? */ + code[i].i.code = IJmp; /* tail call */ + else + code[i].i.code = ICall; + jumptothere(compst, i, rule); /* call jumps to respective rule */ + } else if (code[i].i.code == IThrowRec) { /* labeled failure */ + int n = code[i].i.key; /* rule number */ + int rule = positions[n]; /* rule position */ + assert(rule == from || code[rule - 1].i.code == IRet); + jumptothere(compst, i, rule); /* call jumps to respective rule */ + } + + } + assert(i == to); +} + + +/* +** Code for a grammar: +** call L1; jmp L2; L1: rule 1; ret; rule 2; ret; ...; L2: +*/ +static void codegrammar (CompileState *compst, TTree *grammar) { + int positions[MAXRULES]; + int rulenumber = 0; + TTree *rule; + int firstcall = addoffsetinst(compst, ICall); /* call initial rule */ + int jumptoend = addoffsetinst(compst, IJmp); /* jump to the end */ + int start = gethere(compst); /* here starts the initial rule */ + jumptohere(compst, firstcall); + for (rule = sib1(grammar); rule->tag == TRule; rule = sib2(rule)) { + TTree *r = sib1(rule); + assert(r->tag == TXInfo); + positions[rulenumber++] = gethere(compst); /* save rule position */ + codegen(compst, sib1(r), 0, NOINST, fullset); /* code rule */ + addinstruction(compst, IRet, 0); + } + assert(rule->tag == TTrue); + jumptohere(compst, jumptoend); + correctcalls(compst, positions, start, gethere(compst)); +} + + +static void codecall (CompileState *compst, TTree *call) { + int c = addoffsetinst(compst, IOpenCall); /* to be corrected later */ + assert(sib1(sib2(call))->tag == TXInfo); + getinstr(compst, c).i.key = sib1(sib2(call))->u.n; /* rule number */ +} + + +/* +** Code first child of a sequence +** (second child is called in-place to allow tail call) +** Return 'tt' for second child +*/ +static int codeseq1 (CompileState *compst, TTree *p1, TTree *p2, + int tt, const Charset *fl) { + if (needfollow(p1)) { + Charset fl1; + getfirst(p2, fl, &fl1); /* p1 follow is p2 first */ + codegen(compst, p1, 0, tt, &fl1); + } + else /* use 'fullset' as follow */ + codegen(compst, p1, 0, tt, fullset); + if (fixedlen(p1) != 0) /* can 'p1' consume anything? */ + return NOINST; /* invalidate test */ + else return tt; /* else 'tt' still protects sib2 */ +} + + +/* +** Main code-generation function: dispatch to auxiliar functions +** according to kind of tree. ('needfollow' should return true +** only for consructions that use 'fl'.) +*/ +static void codegen (CompileState *compst, TTree *tree, int opt, int tt, + const Charset *fl) { + tailcall: + switch (tree->tag) { + case TChar: codechar(compst, tree->u.n, tt); break; + case TAny: addinstruction(compst, IAny, 0); break; + case TSet: codecharset(compst, treebuffer(tree), tt); break; + case TTrue: break; + case TFalse: addinstruction(compst, IFail, 0); break; + case TUTFR: codeutfr(compst, tree); break; + case TChoice: codechoice(compst, sib1(tree), sib2(tree), opt, fl); break; + case TRep: coderep(compst, sib1(tree), opt, fl); break; + case TBehind: codebehind(compst, tree); break; + case TNot: codenot(compst, sib1(tree)); break; + case TAnd: codeand(compst, sib1(tree), tt); break; + case TCapture: codecapture(compst, tree, tt, fl); break; + case TRunTime: coderuntime(compst, tree, tt); break; + case TGrammar: codegrammar(compst, tree); break; + case TCall: codecall(compst, tree); break; + case TSeq: { + tt = codeseq1(compst, sib1(tree), sib2(tree), tt, fl); /* code 'p1' */ + /* codegen(compst, p2, opt, tt, fl); */ + tree = sib2(tree); goto tailcall; + } + case TThrow: { /* labeled failure */ + codethrow(compst, tree); + break; + } + default: assert(0); + } +} + + +/* +** Optimize jumps and other jump-like instructions. +** * Update labels of instructions with labels to their final +** destinations (e.g., choice L1; ... L1: jmp L2: becomes +** choice L2) +** * Jumps to other instructions that do jumps become those +** instructions (e.g., jump to return becomes a return; jump +** to commit becomes a commit) +*/ +static void peephole (CompileState *compst) { + Instruction *code = compst->p->code; + int i; + for (i = 0; i < compst->ncode; i += sizei(&code[i])) { + redo: + switch (code[i].i.code) { + case IChoice: case ICall: case ICommit: case IPartialCommit: + case IBackCommit: case ITestChar: case ITestSet: + case ITestAny: { /* instructions with labels */ + jumptothere(compst, i, finallabel(code, i)); /* optimize label */ + break; + } + case IJmp: { + int ft = finaltarget(code, i); + switch (code[ft].i.code) { /* jumping to what? */ + case IRet: case IFail: case IFailTwice: + case IEnd: { /* instructions with unconditional implicit jumps */ + code[i] = code[ft]; /* jump becomes that instruction */ + code[i + 1].i.code = IEmpty; /* 'no-op' for target position */ + break; + } + case ICommit: case IPartialCommit: + case IBackCommit: { /* inst. with unconditional explicit jumps */ + int fft = finallabel(code, ft); + code[i] = code[ft]; /* jump becomes that instruction... */ + jumptothere(compst, i, fft); /* but must correct its offset */ + goto redo; /* reoptimize its label */ + } + default: { + jumptothere(compst, i, ft); /* optimize label */ + break; + } + } + break; + } + default: break; + } + } + assert(code[i - 1].i.code == IEnd); +} + + +/* +** Compile a pattern +*/ +Instruction *compile (lua_State *L, Pattern *p) { + CompileState compst; + compst.p = p; compst.ncode = 0; compst.L = L; + realloccode(L, p, 2); /* minimum initial size */ + codegen(&compst, p->tree, 0, NOINST, fullset); + addinstruction(&compst, IEnd, 0); + realloccode(L, p, compst.ncode); /* set final size */ + peephole(&compst); + return p->code; +} + + +/* }====================================================== */ + diff --git a/lplcode.h b/lplcode.h new file mode 100644 index 0000000..1b4ec78 --- /dev/null +++ b/lplcode.h @@ -0,0 +1,40 @@ +/* +** $Id: lplcode.h $ +*/ + +#if !defined(lplcode_h) +#define lplcode_h + +#include "lua.h" + +#include "lpltypes.h" +#include "lpltree.h" +#include "lplvm.h" + +LUAI_FUNC int tocharset (TTree *tree, Charset *cs); +LUAI_FUNC int checkaux (TTree *tree, int pred); +LUAI_FUNC int fixedlen (TTree *tree); +LUAI_FUNC int hascaptures (TTree *tree); +LUAI_FUNC int lp_gc (lua_State *L); +LUAI_FUNC Instruction *compile (lua_State *L, Pattern *p); +LUAI_FUNC void realloccode (lua_State *L, Pattern *p, int nsize); +LUAI_FUNC int sizei (const Instruction *i); + + +#define PEnullable 0 +#define PEnofail 1 + +/* +** nofail(t) implies that 't' cannot fail with any input +*/ +#define nofail(t) checkaux(t, PEnofail) + +/* +** (not nullable(t)) implies 't' cannot match without consuming +** something +*/ +#define nullable(t) checkaux(t, PEnullable) + + + +#endif diff --git a/lplprint.c b/lplprint.c new file mode 100644 index 0000000..7a9f95c --- /dev/null +++ b/lplprint.c @@ -0,0 +1,275 @@ +/* +** $Id: lplprint.c $ +** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license) +*/ + +#include +#include +#include + + +#include "lpltypes.h" +#include "lplprint.h" +#include "lplcode.h" + + +#if defined(LPEG_DEBUG) + +/* +** {====================================================== +** Printing patterns (for debugging) +** ======================================================= +*/ + + +void printcharset (const byte *st) { + int i; + printf("["); + for (i = 0; i <= UCHAR_MAX; i++) { + int first = i; + while (testchar(st, i) && i <= UCHAR_MAX) i++; + if (i - 1 == first) /* unary range? */ + printf("(%02x)", first); + else if (i - 1 > first) /* non-empty range? */ + printf("(%02x-%02x)", first, i - 1); + } + printf("]"); +} + + +static const char *capkind (int kind) { + const char *const modes[] = { + "close", "position", "constant", "backref", + "argument", "simple", "table", "function", + "query", "string", "num", "substitution", "fold", + "runtime", "group"}; + return modes[kind]; +} + + +static void printjmp (const Instruction *op, const Instruction *p) { + printf("-> %d", (int)(p + (p + 1)->offset - op)); +} + + +void printinst (const Instruction *op, const Instruction *p) { + const char *const names[] = { + "any", "char", "set", + "testany", "testchar", "testset", + "span", "utf-range", "behind", + "ret", "end", + "choice", "pred_choice", "jmp", "call", "open_call", /* labeled failure */ + "commit", "partial_commit", "back_commit", "failtwice", "fail", "giveup", + "fullcapture", "opencapture", "closecapture", "closeruntime", + "throw", "throw_rec", /* labeled failure */ + "--" + }; + printf("%02ld: %s ", (long)(p - op), names[p->i.code]); + switch ((Opcode)p->i.code) { + case IChar: { + printf("'%c' (%02x)", p->i.aux, p->i.aux); + break; + } + case ITestChar: { + printf("'%c' (%02x)", p->i.aux, p->i.aux); printjmp(op, p); + break; + } + case IUTFR: { + printf("%d - %d", p[1].offset, utf_to(p)); + break; + } + case IFullCapture: { + printf("%s (size = %d) (idx = %d)", + capkind(getkind(p)), getoff(p), p->i.key); + break; + } + case IOpenCapture: { + printf("%s (idx = %d)", capkind(getkind(p)), p->i.key); + break; + } + case ISet: { + printcharset((p+1)->buff); + break; + } + case ITestSet: { + printcharset((p+2)->buff); printjmp(op, p); + break; + } + case ISpan: { + printcharset((p+1)->buff); + break; + } + case IOpenCall: { + printf("-> %d", (p + 1)->offset); + break; + } + case IBehind: { + printf("%d", p->i.aux); + break; + } + case IJmp: case ICall: case ICommit: case IChoice: + case IPartialCommit: case IBackCommit: case ITestAny: + case IPredChoice: { /* labeled failure */ + printjmp(op, p); + break; + } + case IThrow: { /* labeled failure */ + printf("(idx = %d)", (p + 1)->i.key); + break; + } + case IThrowRec: { /* labeled failure */ + printjmp(op, p); printf(" (idx = %d)", (p + 2)->i.key); + break; + } + default: break; + } + printf("\n"); +} + + +void printpatt (Instruction *p, int n) { + Instruction *op = p; + while (p < op + n) { + printinst(op, p); + p += sizei(p); + } +} + + +#if defined(LPEG_DEBUG) +static void printcap (Capture *cap) { + printf("%s (idx: %d - size: %d) -> %p\n", + capkind(cap->kind), cap->idx, cap->siz, cap->s); +} + + +void printcaplist (Capture *cap, Capture *limit) { + printf(">======\n"); + for (; cap->s && (limit == NULL || cap < limit); cap++) + printcap(cap); + printf("=======\n"); +} +#endif + +/* }====================================================== */ + + +/* +** {====================================================== +** Printing trees (for debugging) +** ======================================================= +*/ + +static const char *tagnames[] = { + "char", "set", "any", + "true", "false", "utf8.range", + "rep", + "seq", "choice", + "not", "and", + "call", "opencall", "rule", "xinfo", "grammar", + "behind", + "capture", "run-time", + "throw" /* labeled failure */ +}; + + +void printtree (TTree *tree, int ident) { + int i; + int sibs = numsiblings[tree->tag]; + for (i = 0; i < ident; i++) printf(" "); + printf("%s", tagnames[tree->tag]); + switch (tree->tag) { + case TChar: { + int c = tree->u.n; + if (isprint(c)) + printf(" '%c'\n", c); + else + printf(" (%02X)\n", c); + break; + } + case TSet: { + printcharset(treebuffer(tree)); + printf("\n"); + break; + } + case TUTFR: { + assert(sib1(tree)->tag == TXInfo); + printf(" %d (%02x %d) - %d (%02x %d) \n", + tree->u.n, tree->key, tree->cap, + sib1(tree)->u.n, sib1(tree)->key, sib1(tree)->cap); + break; + } + case TOpenCall: case TCall: { + assert(sib1(sib2(tree))->tag == TXInfo); + printf(" key: %d (rule: %d)\n", tree->key, sib1(sib2(tree))->u.n); + break; + } + case TBehind: { + printf(" %d\n", tree->u.n); + break; + } + case TCapture: { + printf(" kind: '%s' key: %d\n", capkind(tree->cap), tree->key); + break; + } + case TRule: { + printf(" key: %d\n", tree->key); + sibs = 1; /* do not print 'sib2' (next rule) as a sibling */ + break; + } + case TXInfo: { + printf(" n: %d\n", tree->u.n); + break; + } + case TGrammar: { + TTree *rule = sib1(tree); + printf(" %d\n", tree->u.n); /* number of rules */ + for (i = 0; i < tree->u.n; i++) { + printtree(rule, ident + 2); + rule = sib2(rule); + } + assert(rule->tag == TTrue); /* sentinel */ + sibs = 0; /* siblings already handled */ + break; + } + case TThrow: { /* labeled failure */ + if (tree->u.ps != 0) + assert(sib2(tree)->tag == TRule); + printf(" key: %d (rule: %d)\n", tree->key, sib2(tree)->cap); + break; + } + default: + printf("\n"); + break; + } + if (sibs >= 1) { + printtree(sib1(tree), ident + 2); + if (sibs >= 2) + printtree(sib2(tree), ident + 2); + } +} + + +void printktable (lua_State *L, int idx) { + int n, i; + lua_getuservalue(L, idx); + if (lua_isnil(L, -1)) /* no ktable? */ + return; + n = lua_rawlen(L, -1); + printf("["); + for (i = 1; i <= n; i++) { + printf("%d = ", i); + lua_rawgeti(L, -1, i); + if (lua_isstring(L, -1)) + printf("%s ", lua_tostring(L, -1)); + else + printf("%s ", lua_typename(L, lua_type(L, -1))); + lua_pop(L, 1); + } + printf("]\n"); + /* leave ktable at the stack */ +} + +/* }====================================================== */ + +#endif diff --git a/lplprint.h b/lplprint.h new file mode 100644 index 0000000..4f020f5 --- /dev/null +++ b/lplprint.h @@ -0,0 +1,36 @@ +/* +** $Id: lplprint.h $ +*/ + + +#if !defined(lplprint_h) +#define lplprint_h + + +#include "lpltree.h" +#include "lplvm.h" + + +#if defined(LPEG_DEBUG) + +LUAI_FUNC void printpatt (Instruction *p, int n); +LUAI_FUNC void printtree (TTree *tree, int ident); +LUAI_FUNC void printktable (lua_State *L, int idx); +LUAI_FUNC void printcharset (const byte *st); +LUAI_FUNC void printcaplist (Capture *cap, Capture *limit); +LUAI_FUNC void printinst (const Instruction *op, const Instruction *p); + +#else + +#define printktable(L,idx) \ + luaL_error(L, "function only implemented in debug mode") +#define printtree(tree,i) \ + luaL_error(L, "function only implemented in debug mode") +#define printpatt(p,n) \ + luaL_error(L, "function only implemented in debug mode") + +#endif + + +#endif + diff --git a/lpltree.c b/lpltree.c new file mode 100644 index 0000000..e7c6638 --- /dev/null +++ b/lpltree.c @@ -0,0 +1,1403 @@ +/* +** $Id: lpltree.c $ +** Copyright 2013, Lua.org & PUC-Rio (see 'lpeg.html' for license) +*/ + +#include +#include +#include + + +#include "lua.h" +#include "lauxlib.h" + +#include "lpltypes.h" +#include "lplcap.h" +#include "lplcode.h" +#include "lplprint.h" +#include "lpltree.h" + + +/* number of siblings for each tree */ +const byte numsiblings[] = { + 0, 0, 0, /* char, set, any */ + 0, 0, 0, /* true, false, utf-range */ + 1, /* rep */ + 2, 2, /* seq, choice */ + 1, 1, /* not, and */ + 0, 0, 2, 1, 1, /* call, opencall, rule, prerule, grammar */ + 1, /* behind */ + 1, 1, /* capture, runtime capture */ + 0 /* labeled failure throw */ +}; + + +static TTree *newgrammar (lua_State *L, int arg); + + +/* +** returns a reasonable name for value at index 'idx' on the stack +*/ +static const char *val2str (lua_State *L, int idx) { + const char *k = lua_tostring(L, idx); + if (k != NULL) + return lua_pushfstring(L, "%s", k); + else + return lua_pushfstring(L, "(a %s)", luaL_typename(L, idx)); +} + + +/* +** Fix a TOpenCall into a TCall node, using table 'postable' to +** translate a key to its rule address in the tree. Raises an +** error if key does not exist. +*/ +static void fixonecall (lua_State *L, int postable, TTree *g, TTree *t, byte tag) { /* labeled failure */ + int n; + lua_rawgeti(L, -1, t->key); /* get rule's name */ + lua_gettable(L, postable); /* query name in position table */ + n = lua_tonumber(L, -1); /* get (absolute) position */ + lua_pop(L, 1); /* remove position */ + if (tag == TOpenCall) { /* labeled failure */ + if (n == 0) { /* no position? */ + lua_rawgeti(L, -1, t->key); /* get rule's name again */ + luaL_error(L, "rule '%s' undefined in given grammar", val2str(L, -1)); + } + t->tag = TCall; + t->u.ps = n - (t - g); /* position relative to node */ + assert(sib2(t)->tag == TRule); + sib2(t)->key = t->key; /* fix rule's key */ + } else if (n != 0) { /* labeled failure */ + t->u.ps = n - (t - g); /* position relative to node */ + } +} + + +/* +** Transform left associative constructions into right +** associative ones, for sequence and choice; that is: +** (t11 + t12) + t2 => t11 + (t12 + t2) +** (t11 * t12) * t2 => t11 * (t12 * t2) +** (that is, Op (Op t11 t12) t2 => Op t11 (Op t12 t2)) +*/ +static void correctassociativity (TTree *tree) { + TTree *t1 = sib1(tree); + assert(tree->tag == TChoice || tree->tag == TSeq); + while (t1->tag == tree->tag) { + int n1size = tree->u.ps - 1; /* t1 == Op t11 t12 */ + int n11size = t1->u.ps - 1; + int n12size = n1size - n11size - 1; + memmove(sib1(tree), sib1(t1), n11size * sizeof(TTree)); /* move t11 */ + tree->u.ps = n11size + 1; + sib2(tree)->tag = tree->tag; + sib2(tree)->u.ps = n12size + 1; + } +} + + +/* +** Make final adjustments in a tree. Fix open calls in tree 't', +** making them refer to their respective rules or raising appropriate +** errors (if not inside a grammar). Correct associativity of associative +** constructions (making them right associative). Assume that tree's +** ktable is at the top of the stack (for error messages). +*/ +static void finalfix (lua_State *L, int postable, TTree *g, TTree *t) { + tailcall: + switch (t->tag) { + case TGrammar: /* subgrammars were already fixed */ + return; + case TOpenCall: { + if (g != NULL) /* inside a grammar? */ + fixonecall(L, postable, g, t, TOpenCall); /* labeled failure */ + else { /* open call outside grammar */ + lua_rawgeti(L, -1, t->key); + luaL_error(L, "rule '%s' used outside a grammar", val2str(L, -1)); + } + break; + } + case TThrow: { /* labeled failure */ + if (g != NULL) /* inside a grammar? */ + fixonecall(L, postable, g, t, TThrow); + break; + } + case TSeq: case TChoice: + correctassociativity(t); + break; + } + switch (numsiblings[t->tag]) { + case 1: /* finalfix(L, postable, g, sib1(t)); */ + t = sib1(t); goto tailcall; + case 2: + finalfix(L, postable, g, sib1(t)); + t = sib2(t); goto tailcall; /* finalfix(L, postable, g, sib2(t)); */ + default: assert(numsiblings[t->tag] == 0); break; + } +} + + + +/* +** {=================================================================== +** KTable manipulation +** +** - The ktable of a pattern 'p' can be shared by other patterns that +** contain 'p' and no other constants. Because of this sharing, we +** should not add elements to a 'ktable' unless it was freshly created +** for the new pattern. +** +** - The maximum index in a ktable is USHRT_MAX, because trees and +** patterns use unsigned shorts to store those indices. +** ==================================================================== +*/ + +/* +** Create a new 'ktable' to the pattern at the top of the stack. +*/ +static void newktable (lua_State *L, int n) { + lua_createtable(L, n, 0); /* create a fresh table */ + lua_setuservalue(L, -2); /* set it as 'ktable' for pattern */ +} + + +/* +** Add element 'idx' to 'ktable' of pattern at the top of the stack; +** Return index of new element. +** If new element is nil, does not add it to table (as it would be +** useless) and returns 0, as ktable[0] is always nil. +*/ +static int addtoktable (lua_State *L, int idx) { + if (lua_isnil(L, idx)) /* nil value? */ + return 0; + else { + int n; + lua_getuservalue(L, -1); /* get ktable from pattern */ + n = lua_rawlen(L, -1); + if (n >= USHRT_MAX) + luaL_error(L, "too many Lua values in pattern"); + lua_pushvalue(L, idx); /* element to be added */ + lua_rawseti(L, -2, ++n); + lua_pop(L, 1); /* remove 'ktable' */ + return n; + } +} + + +/* +** Return the number of elements in the ktable at 'idx'. +** In Lua 5.2/5.3, default "environment" for patterns is nil, not +** a table. Treat it as an empty table. In Lua 5.1, assumes that +** the environment has no numeric indices (len == 0) +*/ +static int ktablelen (lua_State *L, int idx) { + if (!lua_istable(L, idx)) return 0; + else return lua_rawlen(L, idx); +} + + +/* +** Concatentate the contents of table 'idx1' into table 'idx2'. +** (Assume that both indices are negative.) +** Return the original length of table 'idx2' (or 0, if no +** element was added, as there is no need to correct any index). +*/ +static int concattable (lua_State *L, int idx1, int idx2) { + int i; + int n1 = ktablelen(L, idx1); + int n2 = ktablelen(L, idx2); + if (n1 + n2 > USHRT_MAX) + luaL_error(L, "too many Lua values in pattern"); + if (n1 == 0) return 0; /* nothing to correct */ + for (i = 1; i <= n1; i++) { + lua_rawgeti(L, idx1, i); + lua_rawseti(L, idx2 - 1, n2 + i); /* correct 'idx2' */ + } + return n2; +} + + +/* +** When joining 'ktables', constants from one of the subpatterns must +** be renumbered; 'correctkeys' corrects their indices (adding 'n' +** to each of them) +*/ +static void correctkeys (TTree *tree, int n) { + if (n == 0) return; /* no correction? */ + tailcall: + switch (tree->tag) { + case TOpenCall: case TCall: case TRunTime: case TRule: case TThrow: { /* labeled failure */ + if (tree->key > 0) + tree->key += n; + break; + } + case TCapture: { + if (tree->key > 0 && tree->cap != Carg && tree->cap != Cnum) + tree->key += n; + break; + } + default: break; + } + switch (numsiblings[tree->tag]) { + case 1: /* correctkeys(sib1(tree), n); */ + tree = sib1(tree); goto tailcall; + case 2: + correctkeys(sib1(tree), n); + tree = sib2(tree); goto tailcall; /* correctkeys(sib2(tree), n); */ + default: assert(numsiblings[tree->tag] == 0); break; + } +} + + +/* +** Join the ktables from p1 and p2 the ktable for the new pattern at the +** top of the stack, reusing them when possible. +*/ +static void joinktables (lua_State *L, int p1, TTree *t2, int p2) { + int n1, n2; + lua_getuservalue(L, p1); /* get ktables */ + lua_getuservalue(L, p2); + n1 = ktablelen(L, -2); + n2 = ktablelen(L, -1); + if (n1 == 0 && n2 == 0) /* are both tables empty? */ + lua_pop(L, 2); /* nothing to be done; pop tables */ + else if (n2 == 0 || lp_equal(L, -2, -1)) { /* 2nd table empty or equal? */ + lua_pop(L, 1); /* pop 2nd table */ + lua_setuservalue(L, -2); /* set 1st ktable into new pattern */ + } + else if (n1 == 0) { /* first table is empty? */ + lua_setuservalue(L, -3); /* set 2nd table into new pattern */ + lua_pop(L, 1); /* pop 1st table */ + } + else { + lua_createtable(L, n1 + n2, 0); /* create ktable for new pattern */ + /* stack: new p; ktable p1; ktable p2; new ktable */ + concattable(L, -3, -1); /* from p1 into new ktable */ + concattable(L, -2, -1); /* from p2 into new ktable */ + lua_setuservalue(L, -4); /* new ktable becomes 'p' environment */ + lua_pop(L, 2); /* pop other ktables */ + correctkeys(t2, n1); /* correction for indices from p2 */ + } +} + + +/* +** copy 'ktable' of element 'idx' to new tree (on top of stack) +*/ +static void copyktable (lua_State *L, int idx) { + lua_getuservalue(L, idx); + lua_setuservalue(L, -2); +} + + +/* +** merge 'ktable' from 'stree' at stack index 'idx' into 'ktable' +** from tree at the top of the stack, and correct corresponding +** tree. +*/ +static void mergektable (lua_State *L, int idx, TTree *stree) { + int n; + lua_getuservalue(L, -1); /* get ktables */ + lua_getuservalue(L, idx); + n = concattable(L, -1, -2); + lua_pop(L, 2); /* remove both ktables */ + correctkeys(stree, n); +} + + +/* +** Create a new 'ktable' to the pattern at the top of the stack, adding +** all elements from pattern 'p' (if not 0) plus element 'idx' to it. +** Return index of new element. +*/ +static int addtonewktable (lua_State *L, int p, int idx) { + newktable(L, 1); + if (p) + mergektable(L, p, NULL); + return addtoktable(L, idx); +} + +/* }====================================================== */ + + +/* +** {====================================================== +** Tree generation +** ======================================================= +*/ + +/* +** In 5.2, could use 'luaL_testudata'... +*/ +static int testpattern (lua_State *L, int idx) { + if (lua_touserdata(L, idx)) { /* value is a userdata? */ + if (lua_getmetatable(L, idx)) { /* does it have a metatable? */ + luaL_getmetatable(L, PATTERN_T); + if (lua_rawequal(L, -1, -2)) { /* does it have the correct mt? */ + lua_pop(L, 2); /* remove both metatables */ + return 1; + } + } + } + return 0; +} + + +static Pattern *getpattern (lua_State *L, int idx) { + return (Pattern *)luaL_checkudata(L, idx, PATTERN_T); +} + + +static int getsize (lua_State *L, int idx) { + return (lua_rawlen(L, idx) - sizeof(Pattern)) / sizeof(TTree) + 1; +} + + +static TTree *gettree (lua_State *L, int idx, int *len) { + Pattern *p = getpattern(L, idx); + if (len) + *len = getsize(L, idx); + return p->tree; +} + + +/* +** create a pattern. Set its uservalue (the 'ktable') equal to its +** metatable. (It could be any empty sequence; the metatable is at +** hand here, so we use it.) +*/ +static TTree *newtree (lua_State *L, int len) { + size_t size = (len - 1) * sizeof(TTree) + sizeof(Pattern); + Pattern *p = (Pattern *)lua_newuserdata(L, size); + luaL_getmetatable(L, PATTERN_T); + lua_pushvalue(L, -1); + lua_setuservalue(L, -3); + lua_setmetatable(L, -2); + p->code = NULL; p->codesize = 0; + return p->tree; +} + + +static TTree *newleaf (lua_State *L, int tag) { + TTree *tree = newtree(L, 1); + tree->tag = tag; + return tree; +} + + +static TTree *newcharset (lua_State *L) { + TTree *tree = newtree(L, bytes2slots(CHARSETSIZE) + 1); + tree->tag = TSet; + loopset(i, treebuffer(tree)[i] = 0); + return tree; +} + + +/* +** add to tree a sequence where first sibling is 'sib' (with size +** 'sibsize'); returns position for second sibling +*/ +static TTree *seqaux (TTree *tree, TTree *sib, int sibsize) { + tree->tag = TSeq; tree->u.ps = sibsize + 1; + memcpy(sib1(tree), sib, sibsize * sizeof(TTree)); + return sib2(tree); +} + + +/* +** Build a sequence of 'n' nodes, each with tag 'tag' and 'u.n' got +** from the array 's' (or 0 if array is NULL). (TSeq is binary, so it +** must build a sequence of sequence of sequence...) +*/ +static void fillseq (TTree *tree, int tag, int n, const char *s) { + int i; + for (i = 0; i < n - 1; i++) { /* initial n-1 copies of Seq tag; Seq ... */ + tree->tag = TSeq; tree->u.ps = 2; + sib1(tree)->tag = tag; + sib1(tree)->u.n = s ? (byte)s[i] : 0; + tree = sib2(tree); + } + tree->tag = tag; /* last one does not need TSeq */ + tree->u.n = s ? (byte)s[i] : 0; +} + + +/* +** Numbers as patterns: +** 0 == true (always match); n == TAny repeated 'n' times; +** -n == not (TAny repeated 'n' times) +*/ +static TTree *numtree (lua_State *L, int n) { + if (n == 0) + return newleaf(L, TTrue); + else { + TTree *tree, *nd; + if (n > 0) + tree = nd = newtree(L, 2 * n - 1); + else { /* negative: code it as !(-n) */ + n = -n; + tree = newtree(L, 2 * n); + tree->tag = TNot; + nd = sib1(tree); + } + fillseq(nd, TAny, n, NULL); /* sequence of 'n' any's */ + return tree; + } +} + + +/* +** Convert value at index 'idx' to a pattern +*/ +static TTree *getpatt (lua_State *L, int idx, int *len) { + TTree *tree; + switch (lua_type(L, idx)) { + case LUA_TSTRING: { + size_t slen; + const char *s = lua_tolstring(L, idx, &slen); /* get string */ + if (slen == 0) /* empty? */ + tree = newleaf(L, TTrue); /* always match */ + else { + tree = newtree(L, 2 * (slen - 1) + 1); + fillseq(tree, TChar, slen, s); /* sequence of 'slen' chars */ + } + break; + } + case LUA_TNUMBER: { + int n = lua_tointeger(L, idx); + tree = numtree(L, n); + break; + } + case LUA_TBOOLEAN: { + tree = (lua_toboolean(L, idx) ? newleaf(L, TTrue) : newleaf(L, TFalse)); + break; + } + case LUA_TTABLE: { + tree = newgrammar(L, idx); + break; + } + case LUA_TFUNCTION: { + tree = newtree(L, 2); + tree->tag = TRunTime; + tree->key = addtonewktable(L, 0, idx); + sib1(tree)->tag = TTrue; + break; + } + default: { + return gettree(L, idx, len); + } + } + lua_replace(L, idx); /* put new tree into 'idx' slot */ + if (len) + *len = getsize(L, idx); + return tree; +} + + +/* +** create a new tree, whith a new root and one sibling. +** Sibling must be on the Lua stack, at index 1. +*/ +static TTree *newroot1sib (lua_State *L, int tag) { + int s1; + TTree *tree1 = getpatt(L, 1, &s1); + TTree *tree = newtree(L, 1 + s1); /* create new tree */ + tree->tag = tag; + memcpy(sib1(tree), tree1, s1 * sizeof(TTree)); + copyktable(L, 1); + return tree; +} + + +/* +** create a new tree, whith a new root and 2 siblings. +** Siblings must be on the Lua stack, first one at index 1. +*/ +static TTree *newroot2sib (lua_State *L, int tag) { + int s1, s2; + TTree *tree1 = getpatt(L, 1, &s1); + TTree *tree2 = getpatt(L, 2, &s2); + TTree *tree = newtree(L, 1 + s1 + s2); /* create new tree */ + tree->tag = tag; + tree->u.ps = 1 + s1; + memcpy(sib1(tree), tree1, s1 * sizeof(TTree)); + memcpy(sib2(tree), tree2, s2 * sizeof(TTree)); + joinktables(L, 1, sib2(tree), 2); + return tree; +} + + +/* labeled failure begin */ +static TTree *newthrowleaf (lua_State *L) { + TTree *tree = newtree(L, 1); + tree->tag = TThrow; + tree->u.ps = 0; /* there is no recovery rule associated */ + return tree; +} +/* labeled failure end */ + + +static int lp_P (lua_State *L) { + luaL_checkany(L, 1); + getpatt(L, 1, NULL); + lua_settop(L, 1); + return 1; +} + + +/* +** sequence operator; optimizations: +** false x => false, x true => x, true x => x +** (cannot do x . false => false because x may have runtime captures) +*/ +static int lp_seq (lua_State *L) { + TTree *tree1 = getpatt(L, 1, NULL); + TTree *tree2 = getpatt(L, 2, NULL); + if (tree1->tag == TFalse || tree2->tag == TTrue) + lua_pushvalue(L, 1); /* false . x == false, x . true = x */ + else if (tree1->tag == TTrue) + lua_pushvalue(L, 2); /* true . x = x */ + else + newroot2sib(L, TSeq); + return 1; +} + + +/* +** choice operator; optimizations: +** charset / charset => charset +** true / x => true, x / false => x, false / x => x +** (x / true is not equivalent to true) +*/ +static int lp_choice (lua_State *L) { + Charset st1, st2; + TTree *t1 = getpatt(L, 1, NULL); + TTree *t2 = getpatt(L, 2, NULL); + if (tocharset(t1, &st1) && tocharset(t2, &st2)) { + TTree *t = newcharset(L); + loopset(i, treebuffer(t)[i] = st1.cs[i] | st2.cs[i]); + } + else if (nofail(t1) || t2->tag == TFalse) + lua_pushvalue(L, 1); /* true / x => true, x / false => x */ + else if (t1->tag == TFalse) + lua_pushvalue(L, 2); /* false / x => x */ + else + newroot2sib(L, TChoice); + return 1; +} + + +/* +** p^n +*/ +static int lp_star (lua_State *L) { + int size1; + int n = (int)luaL_checkinteger(L, 2); + TTree *tree1 = getpatt(L, 1, &size1); + if (n >= 0) { /* seq tree1 (seq tree1 ... (seq tree1 (rep tree1))) */ + TTree *tree = newtree(L, (n + 1) * (size1 + 1)); + if (nullable(tree1)) + luaL_error(L, "loop body may accept empty string"); + while (n--) /* repeat 'n' times */ + tree = seqaux(tree, tree1, size1); + tree->tag = TRep; + memcpy(sib1(tree), tree1, size1 * sizeof(TTree)); + } + else { /* choice (seq tree1 ... choice tree1 true ...) true */ + TTree *tree; + n = -n; + /* size = (choice + seq + tree1 + true) * n, but the last has no seq */ + tree = newtree(L, n * (size1 + 3) - 1); + for (; n > 1; n--) { /* repeat (n - 1) times */ + tree->tag = TChoice; tree->u.ps = n * (size1 + 3) - 2; + sib2(tree)->tag = TTrue; + tree = sib1(tree); + tree = seqaux(tree, tree1, size1); + } + tree->tag = TChoice; tree->u.ps = size1 + 1; + sib2(tree)->tag = TTrue; + memcpy(sib1(tree), tree1, size1 * sizeof(TTree)); + } + copyktable(L, 1); + return 1; +} + + +/* +** #p == &p +*/ +static int lp_and (lua_State *L) { + newroot1sib(L, TAnd); + return 1; +} + + +/* +** -p == !p +*/ +static int lp_not (lua_State *L) { + newroot1sib(L, TNot); + return 1; +} + + +/* +** [t1 - t2] == Seq (Not t2) t1 +** If t1 and t2 are charsets, make their difference. +*/ +static int lp_sub (lua_State *L) { + Charset st1, st2; + int s1, s2; + TTree *t1 = getpatt(L, 1, &s1); + TTree *t2 = getpatt(L, 2, &s2); + if (tocharset(t1, &st1) && tocharset(t2, &st2)) { + TTree *t = newcharset(L); + loopset(i, treebuffer(t)[i] = st1.cs[i] & ~st2.cs[i]); + } + else { + TTree *tree = newtree(L, 2 + s1 + s2); + tree->tag = TSeq; /* sequence of... */ + tree->u.ps = 2 + s2; + sib1(tree)->tag = TNot; /* ...not... */ + memcpy(sib1(sib1(tree)), t2, s2 * sizeof(TTree)); /* ...t2 */ + memcpy(sib2(tree), t1, s1 * sizeof(TTree)); /* ... and t1 */ + joinktables(L, 1, sib1(tree), 2); + } + return 1; +} + + +static int lp_set (lua_State *L) { + size_t l; + const char *s = luaL_checklstring(L, 1, &l); + TTree *tree = newcharset(L); + while (l--) { + setchar(treebuffer(tree), (byte)(*s)); + s++; + } + return 1; +} + + +static int lp_range (lua_State *L) { + int arg; + int top = lua_gettop(L); + TTree *tree = newcharset(L); + for (arg = 1; arg <= top; arg++) { + int c; + size_t l; + const char *r = luaL_checklstring(L, arg, &l); + luaL_argcheck(L, l == 2, arg, "range must have two characters"); + for (c = (byte)r[0]; c <= (byte)r[1]; c++) + setchar(treebuffer(tree), c); + } + return 1; +} + + +/* +** Fills a tree node with basic information about the UTF-8 code point +** 'cpu': its value in 'n', its length in 'cap', and its first byte in +** 'key' +*/ +static void codeutftree (lua_State *L, TTree *t, lua_Unsigned cpu, int arg) { + int len, fb, cp; + cp = (int)cpu; + if (cp <= 0x7f) { /* one byte? */ + len = 1; + fb = cp; + } else if (cp <= 0x7ff) { + len = 2; + fb = 0xC0 | (cp >> 6); + } else if (cp <= 0xffff) { + len = 3; + fb = 0xE0 | (cp >> 12); + } + else { + luaL_argcheck(L, cpu <= 0x10ffffu, arg, "invalid code point"); + len = 4; + fb = 0xF0 | (cp >> 18); + } + t->u.n = cp; + t->cap = len; + t->key = fb; +} + + +static int lp_utfr (lua_State *L) { + lua_Unsigned from = (lua_Unsigned)luaL_checkinteger(L, 1); + lua_Unsigned to = (lua_Unsigned)luaL_checkinteger(L, 2); + luaL_argcheck(L, from <= to, 2, "empty range"); + if (to <= 0x7f) { /* ascii range? */ + TTree *tree = newcharset(L); /* code it as a regular charset */ + unsigned int f; + for (f = (int)from; f <= to; f++) + setchar(treebuffer(tree), f); + } + else { /* multi-byte utf-8 range */ + TTree *tree = newtree(L, 2); + tree->tag = TUTFR; + codeutftree(L, tree, from, 1); + sib1(tree)->tag = TXInfo; + codeutftree(L, sib1(tree), to, 2); + } + return 1; +} + + +/* +** Look-behind predicate +*/ +static int lp_behind (lua_State *L) { + TTree *tree; + TTree *tree1 = getpatt(L, 1, NULL); + int n = fixedlen(tree1); + luaL_argcheck(L, n >= 0, 1, "pattern may not have fixed length"); + luaL_argcheck(L, !hascaptures(tree1), 1, "pattern have captures"); + luaL_argcheck(L, n <= MAXBEHIND, 1, "pattern too long to look behind"); + tree = newroot1sib(L, TBehind); + tree->u.n = n; + return 1; +} + + +/* labeled failure begin */ +/* +** Throws a label +*/ +static int lp_throw (lua_State *L) { + TTree * tree; + luaL_checkstring(L, -1); + tree = newthrowleaf(L); + tree->key = addtonewktable(L, 0, 1); + return 1; +} +/* labeled failure end */ + + +/* +** Create a non-terminal +*/ +static int lp_V (lua_State *L) { + TTree *tree = newleaf(L, TOpenCall); + luaL_argcheck(L, !lua_isnoneornil(L, 1), 1, "non-nil value expected"); + tree->key = addtonewktable(L, 0, 1); + return 1; +} + + +/* +** Create a tree for a non-empty capture, with a body and +** optionally with an associated Lua value (at index 'labelidx' in the +** stack) +*/ +static int capture_aux (lua_State *L, int cap, int labelidx) { + TTree *tree = newroot1sib(L, TCapture); + tree->cap = cap; + tree->key = (labelidx == 0) ? 0 : addtonewktable(L, 1, labelidx); + return 1; +} + + +/* +** Fill a tree with an empty capture, using an empty (TTrue) sibling. +** (The 'key' field must be filled by the caller to finish the tree.) +*/ +static TTree *auxemptycap (TTree *tree, int cap) { + tree->tag = TCapture; + tree->cap = cap; + sib1(tree)->tag = TTrue; + return tree; +} + + +/* +** Create a tree for an empty capture. +*/ +static TTree *newemptycap (lua_State *L, int cap, int key) { + TTree *tree = auxemptycap(newtree(L, 2), cap); + tree->key = key; + return tree; +} + + +/* +** Create a tree for an empty capture with an associated Lua value. +*/ +static TTree *newemptycapkey (lua_State *L, int cap, int idx) { + TTree *tree = auxemptycap(newtree(L, 2), cap); + tree->key = addtonewktable(L, 0, idx); + return tree; +} + + +/* +** Captures with syntax p / v +** (function capture, query capture, string capture, or number capture) +*/ +static int lp_divcapture (lua_State *L) { + switch (lua_type(L, 2)) { + case LUA_TFUNCTION: return capture_aux(L, Cfunction, 2); + case LUA_TTABLE: return capture_aux(L, Cquery, 2); + case LUA_TSTRING: return capture_aux(L, Cstring, 2); + case LUA_TNUMBER: { + int n = lua_tointeger(L, 2); + TTree *tree = newroot1sib(L, TCapture); + luaL_argcheck(L, 0 <= n && n <= SHRT_MAX, 1, "invalid number"); + tree->cap = Cnum; + tree->key = n; + return 1; + } + default: return luaL_argerror(L, 2, "invalid replacement value"); + } +} + + +static int lp_substcapture (lua_State *L) { + return capture_aux(L, Csubst, 0); +} + + +static int lp_tablecapture (lua_State *L) { + return capture_aux(L, Ctable, 0); +} + + +static int lp_groupcapture (lua_State *L) { + if (lua_isnoneornil(L, 2)) + return capture_aux(L, Cgroup, 0); + else + return capture_aux(L, Cgroup, 2); +} + + +static int lp_foldcapture (lua_State *L) { + luaL_checktype(L, 2, LUA_TFUNCTION); + return capture_aux(L, Cfold, 2); +} + + +static int lp_simplecapture (lua_State *L) { + return capture_aux(L, Csimple, 0); +} + + +static int lp_poscapture (lua_State *L) { + newemptycap(L, Cposition, 0); + return 1; +} + + +static int lp_argcapture (lua_State *L) { + int n = (int)luaL_checkinteger(L, 1); + luaL_argcheck(L, 0 < n && n <= SHRT_MAX, 1, "invalid argument index"); + newemptycap(L, Carg, n); + return 1; +} + + +static int lp_backref (lua_State *L) { + luaL_checkany(L, 1); + newemptycapkey(L, Cbackref, 1); + return 1; +} + + +/* +** Constant capture +*/ +static int lp_constcapture (lua_State *L) { + int i; + int n = lua_gettop(L); /* number of values */ + if (n == 0) /* no values? */ + newleaf(L, TTrue); /* no capture */ + else if (n == 1) + newemptycapkey(L, Cconst, 1); /* single constant capture */ + else { /* create a group capture with all values */ + TTree *tree = newtree(L, 1 + 3 * (n - 1) + 2); + newktable(L, n); /* create a 'ktable' for new tree */ + tree->tag = TCapture; + tree->cap = Cgroup; + tree->key = 0; + tree = sib1(tree); + for (i = 1; i <= n - 1; i++) { + tree->tag = TSeq; + tree->u.ps = 3; /* skip TCapture and its sibling */ + auxemptycap(sib1(tree), Cconst); + sib1(tree)->key = addtoktable(L, i); + tree = sib2(tree); + } + auxemptycap(tree, Cconst); + tree->key = addtoktable(L, i); + } + return 1; +} + + +static int lp_matchtime (lua_State *L) { + TTree *tree; + luaL_checktype(L, 2, LUA_TFUNCTION); + tree = newroot1sib(L, TRunTime); + tree->key = addtonewktable(L, 1, 2); + return 1; +} + +/* }====================================================== */ + + +/* +** {====================================================== +** Grammar - Tree generation +** ======================================================= +*/ + +/* +** push on the stack the index and the pattern for the +** initial rule of grammar at index 'arg' in the stack; +** also add that index into position table. +*/ +static void getfirstrule (lua_State *L, int arg, int postab) { + lua_rawgeti(L, arg, 1); /* access first element */ + if (lua_isstring(L, -1)) { /* is it the name of initial rule? */ + lua_pushvalue(L, -1); /* duplicate it to use as key */ + lua_gettable(L, arg); /* get associated rule */ + } + else { + lua_pushinteger(L, 1); /* key for initial rule */ + lua_insert(L, -2); /* put it before rule */ + } + if (!testpattern(L, -1)) { /* initial rule not a pattern? */ + if (lua_isnil(L, -1)) + luaL_error(L, "grammar has no initial rule"); + else + luaL_error(L, "initial rule '%s' is not a pattern", lua_tostring(L, -2)); + } + lua_pushvalue(L, -2); /* push key */ + lua_pushinteger(L, 1); /* push rule position (after TGrammar) */ + lua_settable(L, postab); /* insert pair at position table */ +} + +/* +** traverse grammar at index 'arg', pushing all its keys and patterns +** into the stack. Create a new table (before all pairs key-pattern) to +** collect all keys and their associated positions in the final tree +** (the "position table"). +** Return the number of rules and (in 'totalsize') the total size +** for the new tree. +*/ +static int collectrules (lua_State *L, int arg, int *totalsize) { + int n = 1; /* to count number of rules */ + int postab = lua_gettop(L) + 1; /* index of position table */ + int size; /* accumulator for total size */ + lua_newtable(L); /* create position table */ + getfirstrule(L, arg, postab); + size = 3 + getsize(L, postab + 2); /* TGrammar + TRule + TXInfo + rule */ + lua_pushnil(L); /* prepare to traverse grammar table */ + while (lua_next(L, arg) != 0) { + if (lua_tonumber(L, -2) == 1 || + lp_equal(L, -2, postab + 1)) { /* initial rule? */ + lua_pop(L, 1); /* remove value (keep key for lua_next) */ + continue; + } + if (!testpattern(L, -1)) /* value is not a pattern? */ + luaL_error(L, "rule '%s' is not a pattern", val2str(L, -2)); + luaL_checkstack(L, LUA_MINSTACK, "grammar has too many rules"); + lua_pushvalue(L, -2); /* push key (to insert into position table) */ + lua_pushinteger(L, size); + lua_settable(L, postab); + size += 2 + getsize(L, -1); /* add 'TRule + TXInfo + rule' to size */ + lua_pushvalue(L, -2); /* push key (for next lua_next) */ + n++; + } + *totalsize = size + 1; /* space for 'TTrue' finishing list of rules */ + return n; +} + + +static void buildgrammar (lua_State *L, TTree *grammar, int frule, int n) { + int i; + TTree *nd = sib1(grammar); /* auxiliary pointer to traverse the tree */ + for (i = 0; i < n; i++) { /* add each rule into new tree */ + int ridx = frule + 2*i + 1; /* index of i-th rule */ + int rulesize; + TTree *rn = gettree(L, ridx, &rulesize); + TTree *pr = sib1(nd); /* points to rule's prerule */ + nd->tag = TRule; + nd->key = 0; /* will be fixed when rule is used */ + pr->tag = TXInfo; + pr->u.n = i; /* rule number */ + nd->u.ps = rulesize + 2; /* point to next rule */ + memcpy(sib1(pr), rn, rulesize * sizeof(TTree)); /* copy rule */ + mergektable(L, ridx, sib1(nd)); /* merge its ktable into new one */ + nd = sib2(nd); /* move to next rule */ + } + nd->tag = TTrue; /* finish list of rules */ +} + + +/* +** Check whether a tree has potential infinite loops +*/ +static int checkloops (TTree *tree) { + tailcall: + if (tree->tag == TRep && nullable(sib1(tree))) + return 1; + else if (tree->tag == TGrammar) + return 0; /* sub-grammars already checked */ + else { + switch (numsiblings[tree->tag]) { + case 1: /* return checkloops(sib1(tree)); */ + tree = sib1(tree); goto tailcall; + case 2: + if (checkloops(sib1(tree))) return 1; + /* else return checkloops(sib2(tree)); */ + tree = sib2(tree); goto tailcall; + default: assert(numsiblings[tree->tag] == 0); return 0; + } + } +} + + +/* +** Give appropriate error message for 'verifyrule'. If a rule appears +** twice in 'passed', there is path from it back to itself without +** advancing the subject. +*/ +static int verifyerror (lua_State *L, unsigned short *passed, int npassed) { + int i, j; + for (i = npassed - 1; i >= 0; i--) { /* search for a repetition */ + for (j = i - 1; j >= 0; j--) { + if (passed[i] == passed[j]) { + lua_rawgeti(L, -1, passed[i]); /* get rule's key */ + return luaL_error(L, "rule '%s' may be left recursive", val2str(L, -1)); + } + } + } + return luaL_error(L, "too many left calls in grammar"); +} + + +/* +** Check whether a rule can be left recursive; raise an error in that +** case; otherwise return 1 iff pattern is nullable. +** The return value is used to check sequences, where the second pattern +** is only relevant if the first is nullable. +** Parameter 'nb' works as an accumulator, to allow tail calls in +** choices. ('nb' true makes function returns true.) +** Parameter 'passed' is a list of already visited rules, 'npassed' +** counts the elements in 'passed'. +** Assume ktable at the top of the stack. +*/ +static int verifyrule (lua_State *L, TTree *tree, unsigned short *passed, + int npassed, int nb) { + tailcall: + switch (tree->tag) { + case TChar: case TSet: case TAny: + case TFalse: case TUTFR: + case TThrow: /* labeled failure */ + return nb; /* cannot pass from here */ + case TTrue: + case TBehind: /* look-behind cannot have calls */ + return 1; + case TNot: case TAnd: case TRep: + /* return verifyrule(L, sib1(tree), passed, npassed, 1); */ + tree = sib1(tree); nb = 1; goto tailcall; + case TCapture: case TRunTime: case TXInfo: + /* return verifyrule(L, sib1(tree), passed, npassed, nb); */ + tree = sib1(tree); goto tailcall; + case TCall: + /* return verifyrule(L, sib2(tree), passed, npassed, nb); */ + tree = sib2(tree); goto tailcall; + case TSeq: /* only check 2nd child if first is nb */ + if (!verifyrule(L, sib1(tree), passed, npassed, 0)) + return nb; + /* else return verifyrule(L, sib2(tree), passed, npassed, nb); */ + tree = sib2(tree); goto tailcall; + case TChoice: /* must check both children */ + nb = verifyrule(L, sib1(tree), passed, npassed, nb); + /* return verifyrule(L, sib2(tree), passed, npassed, nb); */ + tree = sib2(tree); goto tailcall; + case TRule: + if (npassed >= MAXRULES) /* too many steps? */ + return verifyerror(L, passed, npassed); /* error */ + else { + passed[npassed++] = tree->key; /* add rule to path */ + /* return verifyrule(L, sib1(tree), passed, npassed); */ + tree = sib1(tree); goto tailcall; + } + case TGrammar: + return nullable(tree); /* sub-grammar cannot be left recursive */ + default: assert(0); return 0; + } +} + + +static void verifygrammar (lua_State *L, TTree *grammar) { + unsigned short passed[MAXRULES]; + TTree *rule; + /* check left-recursive rules */ + for (rule = sib1(grammar); rule->tag == TRule; rule = sib2(rule)) { + if (rule->key == 0) continue; /* unused rule */ + verifyrule(L, sib1(rule), passed, 0, 0); + } + assert(rule->tag == TTrue); + /* check infinite loops inside rules */ + for (rule = sib1(grammar); rule->tag == TRule; rule = sib2(rule)) { + if (rule->key == 0) continue; /* unused rule */ + if (checkloops(sib1(rule))) { + lua_rawgeti(L, -1, rule->key); /* get rule's key */ + luaL_error(L, "empty loop in rule '%s'", val2str(L, -1)); + } + } + assert(rule->tag == TTrue); +} + + +/* +** Give a name for the initial rule if it is not referenced +*/ +static void initialrulename (lua_State *L, TTree *grammar, int frule) { + if (sib1(grammar)->key == 0) { /* initial rule is not referenced? */ + int n = lua_rawlen(L, -1) + 1; /* index for name */ + lua_pushvalue(L, frule); /* rule's name */ + lua_rawseti(L, -2, n); /* ktable was on the top of the stack */ + sib1(grammar)->key = n; + } +} + + +static TTree *newgrammar (lua_State *L, int arg) { + int treesize; + int frule = lua_gettop(L) + 2; /* position of first rule's key */ + int n = collectrules(L, arg, &treesize); + TTree *g = newtree(L, treesize); + luaL_argcheck(L, n <= MAXRULES, arg, "grammar has too many rules"); + g->tag = TGrammar; g->u.n = n; + lua_newtable(L); /* create 'ktable' */ + lua_setuservalue(L, -2); + buildgrammar(L, g, frule, n); + lua_getuservalue(L, -1); /* get 'ktable' for new tree */ + finalfix(L, frule - 1, g, sib1(g)); + initialrulename(L, g, frule); + verifygrammar(L, g); + lua_pop(L, 1); /* remove 'ktable' */ + lua_insert(L, -(n * 2 + 2)); /* move new table to proper position */ + lua_pop(L, n * 2 + 1); /* remove position table + rule pairs */ + return g; /* new table at the top of the stack */ +} + +/* }====================================================== */ + + +static Instruction *prepcompile (lua_State *L, Pattern *p, int idx) { + lua_getuservalue(L, idx); /* push 'ktable' (may be used by 'finalfix') */ + finalfix(L, 0, NULL, p->tree); + lua_pop(L, 1); /* remove 'ktable' */ + return compile(L, p); +} + + +static int lp_printtree (lua_State *L) { + TTree *tree = getpatt(L, 1, NULL); + int c = lua_toboolean(L, 2); + if (c) { + lua_getuservalue(L, 1); /* push 'ktable' (may be used by 'finalfix') */ + finalfix(L, 0, NULL, tree); + lua_pop(L, 1); /* remove 'ktable' */ + } + printktable(L, 1); + printtree(tree, 0); + return 0; +} + + +static int lp_printcode (lua_State *L) { + Pattern *p = getpattern(L, 1); + printktable(L, 1); + if (p->code == NULL) /* not compiled yet? */ + prepcompile(L, p, 1); + printpatt(p->code, p->codesize); + return 0; +} + + +/* +** Get the initial position for the match, interpreting negative +** values from the end of the subject +*/ +static size_t initposition (lua_State *L, size_t len) { + lua_Integer ii = luaL_optinteger(L, 3, 1); + if (ii > 0) { /* positive index? */ + if ((size_t)ii <= len) /* inside the string? */ + return (size_t)ii - 1; /* return it (corrected to 0-base) */ + else return len; /* crop at the end */ + } + else { /* negative index */ + if ((size_t)(-ii) <= len) /* inside the string? */ + return len - ((size_t)(-ii)); /* return position from the end */ + else return 0; /* crop at the beginning */ + } +} + + +/* +** Main match function +*/ +static int lp_match (lua_State *L) { + Capture capture[INITCAPSIZE]; + const char *r; + size_t l; + Pattern *p = (getpatt(L, 1, NULL), getpattern(L, 1)); + Instruction *code = (p->code != NULL) ? p->code : prepcompile(L, p, 1); + const char *s = luaL_checklstring(L, SUBJIDX, &l); + size_t i = initposition(L, l); + int ptop = lua_gettop(L); + short labelf; /* labeled failure */ + const char *sfail = NULL; /* labeled failure */ + lua_pushnil(L); /* initialize subscache */ + lua_pushlightuserdata(L, capture); /* initialize caplistidx */ + lua_getuservalue(L, 1); /* initialize penvidx */ + r = match(L, s, s + i, s + l, code, capture, ptop, &labelf, &sfail); /* labeled failure */ + if (r == NULL) { /* labeled failure begin */ + lua_pushnil(L); + if (labelf) { + lua_Integer luaInt; + lua_rawgeti(L, ktableidx(ptop), labelf); + luaInt = lua_tointeger(L, -1); + if (luaInt) { + lua_pop(L, 1); + lua_pushinteger(L, luaInt); + } + } + else + lua_pushstring(L, "fail"); + lua_pushinteger(L, sfail - (s + i) + 1); /* subject position related to the error */ + return 3; + } /* labeled failure end */ + return getcaptures(L, s, r, ptop); +} + + + +/* +** {====================================================== +** Library creation and functions not related to matching +** ======================================================= +*/ + +/* maximum limit for stack size */ +#define MAXLIM (INT_MAX / 100) + +static int lp_setmax (lua_State *L) { + lua_Integer lim = luaL_checkinteger(L, 1); + luaL_argcheck(L, 0 < lim && lim <= MAXLIM, 1, "out of range"); + lua_settop(L, 1); + lua_setfield(L, LUA_REGISTRYINDEX, MAXSTACKIDX); + return 0; +} + + +static int lp_type (lua_State *L) { + if (testpattern(L, 1)) + lua_pushliteral(L, "pattern"); + else + lua_pushnil(L); + return 1; +} + + +int lp_gc (lua_State *L) { + Pattern *p = getpattern(L, 1); + realloccode(L, p, 0); /* delete code block */ + return 0; +} + + +static void createcat (lua_State *L, const char *catname, int (catf) (int)) { + TTree *t = newcharset(L); + int i; + for (i = 0; i <= UCHAR_MAX; i++) + if (catf(i)) setchar(treebuffer(t), i); + lua_setfield(L, -2, catname); +} + + +static int lp_locale (lua_State *L) { + if (lua_isnoneornil(L, 1)) { + lua_settop(L, 0); + lua_createtable(L, 0, 12); + } + else { + luaL_checktype(L, 1, LUA_TTABLE); + lua_settop(L, 1); + } + createcat(L, "alnum", isalnum); + createcat(L, "alpha", isalpha); + createcat(L, "cntrl", iscntrl); + createcat(L, "digit", isdigit); + createcat(L, "graph", isgraph); + createcat(L, "lower", islower); + createcat(L, "print", isprint); + createcat(L, "punct", ispunct); + createcat(L, "space", isspace); + createcat(L, "upper", isupper); + createcat(L, "xdigit", isxdigit); + return 1; +} + + +static struct luaL_Reg pattreg[] = { + {"ptree", lp_printtree}, + {"pcode", lp_printcode}, + {"match", lp_match}, + {"B", lp_behind}, + {"V", lp_V}, + {"C", lp_simplecapture}, + {"Cc", lp_constcapture}, + {"Cmt", lp_matchtime}, + {"Cb", lp_backref}, + {"Carg", lp_argcapture}, + {"Cp", lp_poscapture}, + {"Cs", lp_substcapture}, + {"Ct", lp_tablecapture}, + {"Cf", lp_foldcapture}, + {"Cg", lp_groupcapture}, + {"P", lp_P}, + {"S", lp_set}, + {"R", lp_range}, + {"utfR", lp_utfr}, + {"locale", lp_locale}, + {"version", NULL}, + {"setmaxstack", lp_setmax}, + {"type", lp_type}, + {"T", lp_throw}, /* labeled failure throw */ + {NULL, NULL} +}; + + +static struct luaL_Reg metareg[] = { + {"__mul", lp_seq}, + {"__add", lp_choice}, + {"__pow", lp_star}, + {"__gc", lp_gc}, + {"__len", lp_and}, + {"__div", lp_divcapture}, + {"__unm", lp_not}, + {"__sub", lp_sub}, + {NULL, NULL} +}; + +int luaopen_lpeglabel (lua_State *L); /* labeled failure */ +int luaopen_lpeglabel (lua_State *L) { /* labeled failure */ + luaL_newmetatable(L, PATTERN_T); + lua_pushnumber(L, MAXBACK); /* initialize maximum backtracking */ + lua_setfield(L, LUA_REGISTRYINDEX, MAXSTACKIDX); + luaL_setfuncs(L, metareg, 0); + luaL_newlib(L, pattreg); + lua_pushvalue(L, -1); + lua_setfield(L, -3, "__index"); + lua_pushliteral(L, "LPegLabel " VERSION); /* labeled failure */ + lua_setfield(L, -2, "version"); + return 1; +} + +/* }====================================================== */ diff --git a/lpltree.h b/lpltree.h new file mode 100644 index 0000000..646ef48 --- /dev/null +++ b/lpltree.h @@ -0,0 +1,88 @@ +/* +** $Id: lpltree.h $ +*/ + +#if !defined(lpltree_h) +#define lpltree_h + + +#include "lpltypes.h" + + +/* +** types of trees +*/ +typedef enum TTag { + TChar = 0, /* 'n' = char */ + TSet, /* the set is stored in next CHARSETSIZE bytes */ + TAny, + TTrue, + TFalse, + TUTFR, /* range of UTF-8 codepoints; 'n' has initial codepoint; + 'cap' has length; 'key' has first byte; + extra info is similar for end codepoint */ + TRep, /* 'sib1'* */ + TSeq, /* 'sib1' 'sib2' */ + TChoice, /* 'sib1' / 'sib2' */ + TNot, /* !'sib1' */ + TAnd, /* &'sib1' */ + TCall, /* ktable[key] is rule's key; 'sib2' is rule being called */ + TOpenCall, /* ktable[key] is rule's key */ + TRule, /* ktable[key] is rule's key (but key == 0 for unused rules); + 'sib1' is rule's pattern pre-rule; 'sib2' is next rule; + extra info 'n' is rule's sequential number */ + TXInfo, /* extra info */ + TGrammar, /* 'sib1' is initial (and first) rule */ + TBehind, /* 'sib1' is pattern, 'n' is how much to go back */ + TCapture, /* captures: 'cap' is kind of capture (enum 'CapKind'); + ktable[key] is Lua value associated with capture; + 'sib1' is capture body */ + TRunTime, /* run-time capture: 'key' is Lua function; + 'sib1' is capture body */ + TThrow, /* labeled failure: ktable[key] is label's name */ + +} TTag; + + +/* +** Tree trees +** The first child of a tree (if there is one) is immediately after +** the tree. A reference to a second child (ps) is its position +** relative to the position of the tree itself. +*/ +typedef struct TTree { + byte tag; + byte cap; /* kind of capture (if it is a capture) */ + unsigned short key; /* key in ktable for Lua data (0 if no key) */ + union { + int ps; /* occasional second child */ + int n; /* occasional counter */ + } u; +} TTree; + + +/* +** A complete pattern has its tree plus, if already compiled, +** its corresponding code +*/ +typedef struct Pattern { + union Instruction *code; + int codesize; + TTree tree[1]; +} Pattern; + + +/* number of children for each tree */ +LUAI_FUNC const byte numsiblings[]; + +/* access to children */ +#define sib1(t) ((t) + 1) +#define sib2(t) ((t) + (t)->u.ps) + + + + + + +#endif + diff --git a/lpltypes.h b/lpltypes.h new file mode 100644 index 0000000..c5fbb1e --- /dev/null +++ b/lpltypes.h @@ -0,0 +1,157 @@ +/* +** $Id: lpltypes.h $ +** LPeg - PEG pattern matching for Lua +** Copyright 2007-2019, Lua.org & PUC-Rio (see 'lpeg.html' for license) +** written by Roberto Ierusalimschy +*/ + +#if !defined(lpltypes_h) +#define lpltypes_h + + +#include +#include + +#include "lua.h" + + +#define VERSION "1.6.0" + + +#define PATTERN_T "lpeglabel-pattern" +#define MAXSTACKIDX "lpeglabel-maxstack" + + +/* +** compatibility with Lua 5.1 +*/ +#if (LUA_VERSION_NUM == 501) + +#define lp_equal lua_equal + +#define lua_getuservalue lua_getfenv +#define lua_setuservalue lua_setfenv + +#define lua_rawlen lua_objlen + +#define luaL_setfuncs(L,f,n) luaL_register(L,NULL,f) +#define luaL_newlib(L,f) luaL_register(L,"lpeglabel",f) + +typedef size_t lua_Unsigned; + +#endif + + +#if !defined(lp_equal) +#define lp_equal(L,idx1,idx2) lua_compare(L,(idx1),(idx2),LUA_OPEQ) +#endif + + +/* default maximum size for call/backtrack stack */ +#if !defined(MAXBACK) +#define MAXBACK 400 +#endif + + +/* maximum number of rules in a grammar (limited by 'unsigned short') */ +#if !defined(MAXRULES) +#define MAXRULES 1000 +#endif + + + +/* initial size for capture's list */ +#define INITCAPSIZE 32 + + +/* index, on Lua stack, for subject */ +#define SUBJIDX 2 + +/* number of fixed arguments to 'match' (before capture arguments) */ +#define FIXEDARGS 3 + +/* index, on Lua stack, for capture list */ +#define caplistidx(ptop) ((ptop) + 2) + +/* index, on Lua stack, for pattern's ktable */ +#define ktableidx(ptop) ((ptop) + 3) + +/* index, on Lua stack, for backtracking stack */ +#define stackidx(ptop) ((ptop) + 4) + + + +typedef unsigned char byte; + + +#define BITSPERCHAR 8 + +#define CHARSETSIZE ((UCHAR_MAX/BITSPERCHAR) + 1) + + + +typedef struct Charset { + byte cs[CHARSETSIZE]; +} Charset; + + + +#define loopset(v,b) { int v; for (v = 0; v < CHARSETSIZE; v++) {b;} } + +/* access to charset */ +#define treebuffer(t) ((byte *)((t) + 1)) + +/* number of slots needed for 'n' bytes */ +#define bytes2slots(n) (((n) - 1) / sizeof(TTree) + 1) + +/* set 'b' bit in charset 'cs' */ +#define setchar(cs,b) ((cs)[(b) >> 3] |= (1 << ((b) & 7))) + + +/* +** in capture instructions, 'kind' of capture and its offset are +** packed in field 'aux', 4 bits for each +*/ +#define getkind(op) ((op)->i.aux & 0xF) +#define getoff(op) (((op)->i.aux >> 4) & 0xF) +#define joinkindoff(k,o) ((k) | ((o) << 4)) + +#define MAXOFF 0xF +#define MAXAUX 0xFF + + +/* maximum number of bytes to look behind */ +#define MAXBEHIND MAXAUX + + +/* maximum size (in elements) for a pattern */ +#define MAXPATTSIZE (SHRT_MAX - 10) + + +/* size (in elements) for an instruction plus extra l bytes */ +#define instsize(l) (((l) + sizeof(Instruction) - 1)/sizeof(Instruction) + 1) + + +/* size (in elements) for a ISet instruction */ +#define CHARSETINSTSIZE instsize(CHARSETSIZE) + +/* size (in elements) for a IFunc instruction */ +#define funcinstsize(p) ((p)->i.aux + 2) + + + +#define testchar(st,c) (((int)(st)[((c) >> 3)] & (1 << ((c) & 7)))) + +/* labeled failure begin */ +#define LFAIL 0 + +/* update the farthest failure */ +#define updatefarthest(s1,s2) { if ((s2) > (s1)) s1 = s2; } + +#define OUTPRED 0 + +#define INPRED 1 +/* labeled failure end */ + +#endif + diff --git a/lplvm.c b/lplvm.c new file mode 100644 index 0000000..1dddfe6 --- /dev/null +++ b/lplvm.c @@ -0,0 +1,499 @@ +/* +** $Id: lplvm.c $ +** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license) +*/ + +#include +#include + + +#include "lua.h" +#include "lauxlib.h" + +#include "lplcap.h" +#include "lpltypes.h" +#include "lplvm.h" +#include "lplprint.h" + + +/* initial size for call/backtrack stack */ +#if !defined(INITBACK) +#define INITBACK MAXBACK +#endif + + +#define getoffset(p) (((p) + 1)->offset) + +static const Instruction giveup = {{IGiveup, 0, 0}}; + + +/* +** Decode one UTF-8 sequence, returning NULL if byte sequence is invalid. +*/ +static const char *utf8_decode (const char *o, int *val) { + static const unsigned int limits[] = {0xFF, 0x7F, 0x7FF, 0xFFFFu}; + const unsigned char *s = (const unsigned char *)o; + unsigned int c = s[0]; /* first byte */ + unsigned int res = 0; /* final result */ + if (c < 0x80) /* ascii? */ + res = c; + else { + int count = 0; /* to count number of continuation bytes */ + while (c & 0x40) { /* still have continuation bytes? */ + int cc = s[++count]; /* read next byte */ + if ((cc & 0xC0) != 0x80) /* not a continuation byte? */ + return NULL; /* invalid byte sequence */ + res = (res << 6) | (cc & 0x3F); /* add lower 6 bits from cont. byte */ + c <<= 1; /* to test next bit */ + } + res |= (c & 0x7F) << (count * 5); /* add first byte */ + if (count > 3 || res > 0x10FFFFu || res <= limits[count]) + return NULL; /* invalid byte sequence */ + s += count; /* skip continuation bytes read */ + } + *val = res; + return (const char *)s + 1; /* +1 to include first byte */ +} + + +/* +** {====================================================== +** Virtual Machine +** ======================================================= +*/ + + +typedef struct Stack { + const char *s; /* saved position (or NULL for calls) */ + const Instruction *p; /* next instruction */ + int caplevel; + byte labenv; /* labeled failure */ + byte predchoice; /* labeled failure */ +} Stack; + + +#define getstackbase(L, ptop) ((Stack *)lua_touserdata(L, stackidx(ptop))) + + +/* +** Ensures the size of array 'capture' (with size '*capsize' and +** 'captop' elements being used) is enough to accomodate 'n' extra +** elements plus one. (Because several opcodes add stuff to the capture +** array, it is simpler to ensure the array always has at least one free +** slot upfront and check its size later.) +*/ +static Capture *growcap (lua_State *L, Capture *capture, int *capsize, + int captop, int n, int ptop) { + if (*capsize - captop > n) + return capture; /* no need to grow array */ + else { /* must grow */ + Capture *newc; + int newsize = captop + n + 1; /* minimum size needed */ + if (newsize < INT_MAX/((int)sizeof(Capture) * 2)) + newsize *= 2; /* twice that size, if not too big */ + else if (newsize >= INT_MAX/((int)sizeof(Capture))) + luaL_error(L, "too many captures"); + newc = (Capture *)lua_newuserdata(L, newsize * sizeof(Capture)); + memcpy(newc, capture, captop * sizeof(Capture)); + *capsize = newsize; + lua_replace(L, caplistidx(ptop)); + return newc; + } +} + + +/* +** Double the size of the stack +*/ +static Stack *doublestack (lua_State *L, Stack **stacklimit, int ptop) { + Stack *stack = getstackbase(L, ptop); + Stack *newstack; + int n = *stacklimit - stack; /* current stack size */ + int max, newn; + lua_getfield(L, LUA_REGISTRYINDEX, MAXSTACKIDX); + max = lua_tointeger(L, -1); /* maximum allowed size */ + lua_pop(L, 1); + if (n >= max) /* already at maximum size? */ + luaL_error(L, "backtrack stack overflow (current limit is %d)", max); + newn = 2 * n; /* new size */ + if (newn > max) newn = max; + newstack = (Stack *)lua_newuserdata(L, newn * sizeof(Stack)); + memcpy(newstack, stack, n * sizeof(Stack)); + lua_replace(L, stackidx(ptop)); + *stacklimit = newstack + newn; + return newstack + n; /* return next position */ +} + + +/* +** Interpret the result of a dynamic capture: false -> fail; +** true -> keep current position; number -> next position. +** Return new subject position. 'fr' is stack index where +** is the result; 'curr' is current subject position; 'limit' +** is subject's size. +*/ +static int resdyncaptures (lua_State *L, int fr, int curr, int limit) { + lua_Integer res; + if (!lua_toboolean(L, fr)) { /* false value? */ + lua_settop(L, fr - 1); /* remove results */ + return -1; /* and fail */ + } + else if (lua_isboolean(L, fr)) /* true? */ + res = curr; /* keep current position */ + else { + res = lua_tointeger(L, fr) - 1; /* new position */ + if (res < curr || res > limit) + luaL_error(L, "invalid position returned by match-time capture"); + } + lua_remove(L, fr); /* remove first result (offset) */ + return res; +} + + +/* +** Add capture values returned by a dynamic capture to the list +** 'capture', nested inside a group. 'fd' indexes the first capture +** value, 'n' is the number of values (at least 1). The open group +** capture is already in 'capture', before the place for the new entries. +*/ +static void adddyncaptures (const char *s, Capture *capture, int n, int fd) { + int i; + assert(capture[-1].kind == Cgroup && capture[-1].siz == 0); + capture[-1].idx = 0; /* make group capture an anonymous group */ + for (i = 0; i < n; i++) { /* add runtime captures */ + capture[i].kind = Cruntime; + capture[i].siz = 1; /* mark it as closed */ + capture[i].idx = fd + i; /* stack index of capture value */ + capture[i].s = s; + } + capture[n].kind = Cclose; /* close group */ + capture[n].siz = 1; + capture[n].s = s; +} + + +/* +** Remove dynamic captures from the Lua stack (called in case of failure) +*/ +static int removedyncap (lua_State *L, Capture *capture, + int level, int last) { + int id = finddyncap(capture + level, capture + last); /* index of 1st cap. */ + int top = lua_gettop(L); + if (id == 0) return 0; /* no dynamic captures? */ + lua_settop(L, id - 1); /* remove captures */ + return top - id + 1; /* number of values removed */ +} + + +/* +** Opcode interpreter +*/ +const char *match (lua_State *L, const char *o, const char *s, const char *e, + Instruction *op, Capture *capture, int ptop, short *labelf, const char **sfail) { /* labeled failure */ + Stack stackbase[INITBACK]; + Stack *stacklimit = stackbase + INITBACK; + Stack *stack = stackbase; /* point to first empty slot in stack */ + int capsize = INITCAPSIZE; + int captop = 0; /* point to first empty slot in captures */ + int ndyncap = 0; /* number of dynamic captures (in Lua stack) */ + const Instruction *p = op; /* current instruction */ + byte insidepred = OUTPRED; /* labeled failure: label environment is off inside predicates */ + stack->p = &giveup; stack->s = s; stack->caplevel = 0; stack->labenv = insidepred; stack++; /* labeled failure */ + *sfail = s; /* labeled failure */ + lua_pushlightuserdata(L, stackbase); + for (;;) { +#if defined(DEBUG) + printf("-------------------------------------\n"); + printcaplist(capture, capture + captop); + printf("s: |%s| stck:%d, dyncaps:%d, caps:%d ", + s, (int)(stack - getstackbase(L, ptop)), ndyncap, captop); + printinst(op, p); +#endif + assert(stackidx(ptop) + ndyncap == lua_gettop(L) && ndyncap <= captop); + assert(insidepred == INPRED || insidepred == OUTPRED); + switch ((Opcode)p->i.code) { + case IEnd: { + assert(stack == getstackbase(L, ptop) + 1); + capture[captop].kind = Cclose; + capture[captop].s = NULL; + return s; + } + case IGiveup: { + assert(stack == getstackbase(L, ptop)); + return NULL; + } + case IRet: { + assert(stack > getstackbase(L, ptop) && (stack - 1)->s == NULL); + p = (--stack)->p; + continue; + } + case IAny: { + if (s < e) { p++; s++; } + else { + *labelf = LFAIL; /* labeled failure */ + updatefarthest(*sfail, s); /*labeled failure */ + goto fail; + } + continue; + } + case IUTFR: { + int codepoint; + if (s >= e) + goto fail; + s = utf8_decode (s, &codepoint); + if (s && p[1].offset <= codepoint && codepoint <= utf_to(p)) + p += 2; + else { + *labelf = LFAIL; /* labeled failure */ + updatefarthest(*sfail, s); /*labeled failure */ + goto fail; + } + continue; + } + case ITestAny: { + if (s < e) p += 2; + else p += getoffset(p); + continue; + } + case IChar: { + if ((byte)*s == p->i.aux && s < e) { p++; s++; } + else { + *labelf = LFAIL; /* labeled failure */ + updatefarthest(*sfail, s); /*labeled failure */ + goto fail; + } + continue; + } + case ITestChar: { + if ((byte)*s == p->i.aux && s < e) p += 2; + else p += getoffset(p); + continue; + } + case ISet: { + int c = (byte)*s; + if (testchar((p+1)->buff, c) && s < e) + { p += CHARSETINSTSIZE; s++; } + else { + *labelf = LFAIL; /* labeled failure */ + updatefarthest(*sfail, s); /*labeled failure */ + goto fail; + } + continue; + } + case ITestSet: { + int c = (byte)*s; + if (testchar((p + 2)->buff, c) && s < e) + p += 1 + CHARSETINSTSIZE; + else p += getoffset(p); + continue; + } + case IBehind: { + int n = p->i.aux; + if (n > s - o) { + *labelf = LFAIL; /* labeled failure */ + updatefarthest(*sfail, s); /*labeled failure */ + goto fail; + } + s -= n; p++; + continue; + } + case ISpan: { + for (; s < e; s++) { + int c = (byte)*s; + if (!testchar((p+1)->buff, c)) break; + } + p += CHARSETINSTSIZE; + continue; + } + case IJmp: { + p += getoffset(p); + continue; + } + case IChoice: { + if (stack == stacklimit) + stack = doublestack(L, &stacklimit, ptop); + stack->p = p + getoffset(p); + stack->s = s; + stack->caplevel = captop; + stack->labenv = insidepred; /* labeled failure */ + stack->predchoice = 0; /* labeled failure */ + stack++; + p += 2; + continue; + } + case IPredChoice: { /* labeled failure: new instruction */ + if (stack == stacklimit) + stack = doublestack(L, &stacklimit, ptop); + stack->p = p + getoffset(p); + stack->s = s; + stack->caplevel = captop; + stack->labenv = insidepred; + stack->predchoice = 1; + stack++; + insidepred = INPRED; + p += 2; + continue; + } + case ICall: { + if (stack == stacklimit) + stack = doublestack(L, &stacklimit, ptop); + stack->s = NULL; + stack->p = p + 2; /* save return address */ + stack++; + p += getoffset(p); + continue; + } + case ICommit: { + assert(stack > getstackbase(L, ptop) && (stack - 1)->s != NULL); + stack--; + p += getoffset(p); + continue; + } + case IPartialCommit: { + assert(stack > getstackbase(L, ptop) && (stack - 1)->s != NULL); + (stack - 1)->s = s; + (stack - 1)->caplevel = captop; + p += getoffset(p); + continue; + } + case IBackCommit: { + assert(stack > getstackbase(L, ptop) && (stack - 1)->s != NULL); + s = (--stack)->s; + insidepred = stack->labenv; /* labeled failure */ + if (ndyncap > 0) /* are there matchtime captures? */ + ndyncap -= removedyncap(L, capture, stack->caplevel, captop); + captop = stack->caplevel; + p += getoffset(p); + continue; + } + case IThrow: { /* labeled failure */ + if (insidepred == OUTPRED) { + *labelf = (p+1)->i.key; + stack = getstackbase(L, ptop); + stack++; + } + else { + while (!(stack-1)->predchoice) { + --stack; + } + *labelf = LFAIL; + } + *sfail = s; + goto fail; + } + case IThrowRec: { /* labeled failure */ + if (insidepred == OUTPRED) { + *labelf = (p+2)->i.key; + *sfail = s; + if (stack == stacklimit) + stack = doublestack(L, &stacklimit, ptop); + stack->s = NULL; + stack->p = p + 3; + stack->caplevel = captop; + stack++; + p += getoffset(p); + continue; + } else { + while (!(stack-1)->predchoice) { + --stack; + } + *labelf = LFAIL; + *sfail = s; + } + goto fail; + } + case IFailTwice: + assert(stack > getstackbase(L, ptop)); + stack--; + /* go through */ + case IFail: + *labelf = LFAIL; /* labeled failure */ + updatefarthest(*sfail, s); /*labeled failure */ + fail: { /* pattern failed: try to backtrack */ + do { /* remove pending calls */ + assert(stack > getstackbase(L, ptop)); + s = (--stack)->s; + } while (s == NULL); + if (ndyncap > 0) /* is there matchtime captures? */ + ndyncap -= removedyncap(L, capture, stack->caplevel, captop); + captop = stack->caplevel; + assert((insidepred == INPRED && stack->labenv == OUTPRED) || insidepred == stack->labenv); + insidepred = stack->labenv; /* labeled failure */ + p = stack->p; +#if defined(DEBUG) + printf("**FAIL**\n"); +#endif + continue; + } + case ICloseRunTime: { + CapState cs; + int rem, res, n; + int fr = lua_gettop(L) + 1; /* stack index of first result */ + cs.reclevel = 0; cs.L = L; + cs.s = o; cs.ocap = capture; cs.ptop = ptop; + n = runtimecap(&cs, capture + captop, s, &rem); /* call function */ + captop -= n; /* remove nested captures */ + ndyncap -= rem; /* update number of dynamic captures */ + fr -= rem; /* 'rem' items were popped from Lua stack */ + res = resdyncaptures(L, fr, s - o, e - o); /* get result */ + if (res == -1) { /* fail? */ + *labelf = LFAIL; /* labeled failure */ + updatefarthest(*sfail, s); /*labeled failure */ + goto fail; + } + s = o + res; /* else update current position */ + n = lua_gettop(L) - fr + 1; /* number of new captures */ + ndyncap += n; /* update number of dynamic captures */ + if (n == 0) /* no new captures? */ + captop--; /* remove open group */ + else { /* new captures; keep original open group */ + if (fr + n >= SHRT_MAX) + luaL_error(L, "too many results in match-time capture"); + /* add new captures + close group to 'capture' list */ + capture = growcap(L, capture, &capsize, captop, n + 1, ptop); + adddyncaptures(s, capture + captop, n, fr); + captop += n + 1; /* new captures + close group */ + } + p++; + continue; + } + case ICloseCapture: { + const char *s1 = s; + assert(captop > 0); + /* if possible, turn capture into a full capture */ + if (capture[captop - 1].siz == 0 && + s1 - capture[captop - 1].s < UCHAR_MAX) { + capture[captop - 1].siz = s1 - capture[captop - 1].s + 1; + p++; + continue; + } + else { + capture[captop].siz = 1; /* mark entry as closed */ + capture[captop].s = s; + goto pushcapture; + } + } + case IOpenCapture: + capture[captop].siz = 0; /* mark entry as open */ + capture[captop].s = s; + goto pushcapture; + case IFullCapture: + capture[captop].siz = getoff(p) + 1; /* save capture size */ + capture[captop].s = s - getoff(p); + /* goto pushcapture; */ + pushcapture: { + capture[captop].idx = p->i.key; + capture[captop].kind = getkind(p); + captop++; + capture = growcap(L, capture, &capsize, captop, 0, ptop); + p++; + continue; + } + default: assert(0); return NULL; + } + } +} + +/* }====================================================== */ + + diff --git a/lplvm.h b/lplvm.h new file mode 100644 index 0000000..19a1502 --- /dev/null +++ b/lplvm.h @@ -0,0 +1,66 @@ +/* +** $Id: lplvm.h $ +*/ + +#if !defined(lplvm_h) +#define lplvm_h + +#include "lplcap.h" + + +/* Virtual Machine's instructions */ +typedef enum Opcode { + IAny, /* if no char, fail */ + IChar, /* if char != aux, fail */ + ISet, /* if char not in buff, fail */ + ITestAny, /* in no char, jump to 'offset' */ + ITestChar, /* if char != aux, jump to 'offset' */ + ITestSet, /* if char not in buff, jump to 'offset' */ + ISpan, /* read a span of chars in buff */ + IUTFR, /* if codepoint not in range [offset, utf_to], fail */ + IBehind, /* walk back 'aux' characters (fail if not possible) */ + IRet, /* return from a rule */ + IEnd, /* end of pattern */ + IChoice, /* stack a choice; next fail will jump to 'offset' */ + IPredChoice, /* labeld failure: stack a choice; changes label env next fail will jump to 'offset' */ /*labeled failure */ + IJmp, /* jump to 'offset' */ + ICall, /* call rule at 'offset' */ + IOpenCall, /* call rule number 'key' (must be closed to a ICall) */ + ICommit, /* pop choice and jump to 'offset' */ + IPartialCommit, /* update top choice to current position and jump */ + IBackCommit, /* backtrack like "fail" but jump to its own 'offset' */ + IFailTwice, /* pop one choice and then fail */ + IFail, /* go back to saved state on choice and jump to saved offset */ + IGiveup, /* internal use */ + IFullCapture, /* complete capture of last 'off' chars */ + IOpenCapture, /* start a capture */ + ICloseCapture, + ICloseRunTime, + IThrow, /* fails with a given label */ /*labeled failure */ + IThrowRec, /* fails with a given label and call rule at 'offset' */ /*labeled failure */ + IEmpty /* to fill empty slots left by optimizations */ +} Opcode; + + + +typedef union Instruction { + struct Inst { + byte code; + byte aux; + short key; + } i; + int offset; + byte buff[1]; +} Instruction; + + +/* extract 24-bit value from an instruction */ +#define utf_to(inst) (((inst)->i.key << 8) | (inst)->i.aux) + + +LUAI_FUNC void printpatt (Instruction *p, int n); +LUAI_FUNC const char *match (lua_State *L, const char *o, const char *s, const char *e, + Instruction *op, Capture *capture, int ptop, short *labelf, const char **sfail); /* labeled failure */ + +#endif + diff --git a/lpprint.c b/lpprint.c deleted file mode 100644 index af03edc..0000000 --- a/lpprint.c +++ /dev/null @@ -1,275 +0,0 @@ -/* -** $Id: lpprint.c $ -** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license) -*/ - -#include -#include -#include - - -#include "lptypes.h" -#include "lpprint.h" -#include "lpcode.h" - - -#if defined(LPEG_DEBUG) - -/* -** {====================================================== -** Printing patterns (for debugging) -** ======================================================= -*/ - - -void printcharset (const byte *st) { - int i; - printf("["); - for (i = 0; i <= UCHAR_MAX; i++) { - int first = i; - while (testchar(st, i) && i <= UCHAR_MAX) i++; - if (i - 1 == first) /* unary range? */ - printf("(%02x)", first); - else if (i - 1 > first) /* non-empty range? */ - printf("(%02x-%02x)", first, i - 1); - } - printf("]"); -} - - -static const char *capkind (int kind) { - const char *const modes[] = { - "close", "position", "constant", "backref", - "argument", "simple", "table", "function", - "query", "string", "num", "substitution", "fold", - "runtime", "group"}; - return modes[kind]; -} - - -static void printjmp (const Instruction *op, const Instruction *p) { - printf("-> %d", (int)(p + (p + 1)->offset - op)); -} - - -void printinst (const Instruction *op, const Instruction *p) { - const char *const names[] = { - "any", "char", "set", - "testany", "testchar", "testset", - "span", "utf-range", "behind", - "ret", "end", - "choice", "pred_choice", "jmp", "call", "open_call", /* labeled failure */ - "commit", "partial_commit", "back_commit", "failtwice", "fail", "giveup", - "fullcapture", "opencapture", "closecapture", "closeruntime", - "throw", "throw_rec", /* labeled failure */ - "--" - }; - printf("%02ld: %s ", (long)(p - op), names[p->i.code]); - switch ((Opcode)p->i.code) { - case IChar: { - printf("'%c' (%02x)", p->i.aux, p->i.aux); - break; - } - case ITestChar: { - printf("'%c' (%02x)", p->i.aux, p->i.aux); printjmp(op, p); - break; - } - case IUTFR: { - printf("%d - %d", p[1].offset, utf_to(p)); - break; - } - case IFullCapture: { - printf("%s (size = %d) (idx = %d)", - capkind(getkind(p)), getoff(p), p->i.key); - break; - } - case IOpenCapture: { - printf("%s (idx = %d)", capkind(getkind(p)), p->i.key); - break; - } - case ISet: { - printcharset((p+1)->buff); - break; - } - case ITestSet: { - printcharset((p+2)->buff); printjmp(op, p); - break; - } - case ISpan: { - printcharset((p+1)->buff); - break; - } - case IOpenCall: { - printf("-> %d", (p + 1)->offset); - break; - } - case IBehind: { - printf("%d", p->i.aux); - break; - } - case IJmp: case ICall: case ICommit: case IChoice: - case IPartialCommit: case IBackCommit: case ITestAny: - case IPredChoice: { /* labeled failure */ - printjmp(op, p); - break; - } - case IThrow: { /* labeled failure */ - printf("(idx = %d)", (p + 1)->i.key); - break; - } - case IThrowRec: { /* labeled failure */ - printjmp(op, p); printf(" (idx = %d)", (p + 2)->i.key); - break; - } - default: break; - } - printf("\n"); -} - - -void printpatt (Instruction *p, int n) { - Instruction *op = p; - while (p < op + n) { - printinst(op, p); - p += sizei(p); - } -} - - -#if defined(LPEG_DEBUG) -static void printcap (Capture *cap) { - printf("%s (idx: %d - size: %d) -> %p\n", - capkind(cap->kind), cap->idx, cap->siz, cap->s); -} - - -void printcaplist (Capture *cap, Capture *limit) { - printf(">======\n"); - for (; cap->s && (limit == NULL || cap < limit); cap++) - printcap(cap); - printf("=======\n"); -} -#endif - -/* }====================================================== */ - - -/* -** {====================================================== -** Printing trees (for debugging) -** ======================================================= -*/ - -static const char *tagnames[] = { - "char", "set", "any", - "true", "false", "utf8.range", - "rep", - "seq", "choice", - "not", "and", - "call", "opencall", "rule", "xinfo", "grammar", - "behind", - "capture", "run-time", - "throw" /* labeled failure */ -}; - - -void printtree (TTree *tree, int ident) { - int i; - int sibs = numsiblings[tree->tag]; - for (i = 0; i < ident; i++) printf(" "); - printf("%s", tagnames[tree->tag]); - switch (tree->tag) { - case TChar: { - int c = tree->u.n; - if (isprint(c)) - printf(" '%c'\n", c); - else - printf(" (%02X)\n", c); - break; - } - case TSet: { - printcharset(treebuffer(tree)); - printf("\n"); - break; - } - case TUTFR: { - assert(sib1(tree)->tag == TXInfo); - printf(" %d (%02x %d) - %d (%02x %d) \n", - tree->u.n, tree->key, tree->cap, - sib1(tree)->u.n, sib1(tree)->key, sib1(tree)->cap); - break; - } - case TOpenCall: case TCall: { - assert(sib1(sib2(tree))->tag == TXInfo); - printf(" key: %d (rule: %d)\n", tree->key, sib1(sib2(tree))->u.n); - break; - } - case TBehind: { - printf(" %d\n", tree->u.n); - break; - } - case TCapture: { - printf(" kind: '%s' key: %d\n", capkind(tree->cap), tree->key); - break; - } - case TRule: { - printf(" key: %d\n", tree->key); - sibs = 1; /* do not print 'sib2' (next rule) as a sibling */ - break; - } - case TXInfo: { - printf(" n: %d\n", tree->u.n); - break; - } - case TGrammar: { - TTree *rule = sib1(tree); - printf(" %d\n", tree->u.n); /* number of rules */ - for (i = 0; i < tree->u.n; i++) { - printtree(rule, ident + 2); - rule = sib2(rule); - } - assert(rule->tag == TTrue); /* sentinel */ - sibs = 0; /* siblings already handled */ - break; - } - case TThrow: { /* labeled failure */ - if (tree->u.ps != 0) - assert(sib2(tree)->tag == TRule); - printf(" key: %d (rule: %d)\n", tree->key, sib2(tree)->cap); - break; - } - default: - printf("\n"); - break; - } - if (sibs >= 1) { - printtree(sib1(tree), ident + 2); - if (sibs >= 2) - printtree(sib2(tree), ident + 2); - } -} - - -void printktable (lua_State *L, int idx) { - int n, i; - lua_getuservalue(L, idx); - if (lua_isnil(L, -1)) /* no ktable? */ - return; - n = lua_rawlen(L, -1); - printf("["); - for (i = 1; i <= n; i++) { - printf("%d = ", i); - lua_rawgeti(L, -1, i); - if (lua_isstring(L, -1)) - printf("%s ", lua_tostring(L, -1)); - else - printf("%s ", lua_typename(L, lua_type(L, -1))); - lua_pop(L, 1); - } - printf("]\n"); - /* leave ktable at the stack */ -} - -/* }====================================================== */ - -#endif diff --git a/lpprint.h b/lpprint.h deleted file mode 100644 index c4d57a0..0000000 --- a/lpprint.h +++ /dev/null @@ -1,36 +0,0 @@ -/* -** $Id: lpprint.h $ -*/ - - -#if !defined(lpprint_h) -#define lpprint_h - - -#include "lptree.h" -#include "lpvm.h" - - -#if defined(LPEG_DEBUG) - -LUAI_FUNC void printpatt (Instruction *p, int n); -LUAI_FUNC void printtree (TTree *tree, int ident); -LUAI_FUNC void printktable (lua_State *L, int idx); -LUAI_FUNC void printcharset (const byte *st); -LUAI_FUNC void printcaplist (Capture *cap, Capture *limit); -LUAI_FUNC void printinst (const Instruction *op, const Instruction *p); - -#else - -#define printktable(L,idx) \ - luaL_error(L, "function only implemented in debug mode") -#define printtree(tree,i) \ - luaL_error(L, "function only implemented in debug mode") -#define printpatt(p,n) \ - luaL_error(L, "function only implemented in debug mode") - -#endif - - -#endif - diff --git a/lptree.c b/lptree.c deleted file mode 100644 index 4afbae7..0000000 --- a/lptree.c +++ /dev/null @@ -1,1403 +0,0 @@ -/* -** $Id: lptree.c $ -** Copyright 2013, Lua.org & PUC-Rio (see 'lpeg.html' for license) -*/ - -#include -#include -#include - - -#include "lua.h" -#include "lauxlib.h" - -#include "lptypes.h" -#include "lpcap.h" -#include "lpcode.h" -#include "lpprint.h" -#include "lptree.h" - - -/* number of siblings for each tree */ -const byte numsiblings[] = { - 0, 0, 0, /* char, set, any */ - 0, 0, 0, /* true, false, utf-range */ - 1, /* rep */ - 2, 2, /* seq, choice */ - 1, 1, /* not, and */ - 0, 0, 2, 1, 1, /* call, opencall, rule, prerule, grammar */ - 1, /* behind */ - 1, 1, /* capture, runtime capture */ - 0 /* labeled failure throw */ -}; - - -static TTree *newgrammar (lua_State *L, int arg); - - -/* -** returns a reasonable name for value at index 'idx' on the stack -*/ -static const char *val2str (lua_State *L, int idx) { - const char *k = lua_tostring(L, idx); - if (k != NULL) - return lua_pushfstring(L, "%s", k); - else - return lua_pushfstring(L, "(a %s)", luaL_typename(L, idx)); -} - - -/* -** Fix a TOpenCall into a TCall node, using table 'postable' to -** translate a key to its rule address in the tree. Raises an -** error if key does not exist. -*/ -static void fixonecall (lua_State *L, int postable, TTree *g, TTree *t, byte tag) { /* labeled failure */ - int n; - lua_rawgeti(L, -1, t->key); /* get rule's name */ - lua_gettable(L, postable); /* query name in position table */ - n = lua_tonumber(L, -1); /* get (absolute) position */ - lua_pop(L, 1); /* remove position */ - if (tag == TOpenCall) { /* labeled failure */ - if (n == 0) { /* no position? */ - lua_rawgeti(L, -1, t->key); /* get rule's name again */ - luaL_error(L, "rule '%s' undefined in given grammar", val2str(L, -1)); - } - t->tag = TCall; - t->u.ps = n - (t - g); /* position relative to node */ - assert(sib2(t)->tag == TRule); - sib2(t)->key = t->key; /* fix rule's key */ - } else if (n != 0) { /* labeled failure */ - t->u.ps = n - (t - g); /* position relative to node */ - } -} - - -/* -** Transform left associative constructions into right -** associative ones, for sequence and choice; that is: -** (t11 + t12) + t2 => t11 + (t12 + t2) -** (t11 * t12) * t2 => t11 * (t12 * t2) -** (that is, Op (Op t11 t12) t2 => Op t11 (Op t12 t2)) -*/ -static void correctassociativity (TTree *tree) { - TTree *t1 = sib1(tree); - assert(tree->tag == TChoice || tree->tag == TSeq); - while (t1->tag == tree->tag) { - int n1size = tree->u.ps - 1; /* t1 == Op t11 t12 */ - int n11size = t1->u.ps - 1; - int n12size = n1size - n11size - 1; - memmove(sib1(tree), sib1(t1), n11size * sizeof(TTree)); /* move t11 */ - tree->u.ps = n11size + 1; - sib2(tree)->tag = tree->tag; - sib2(tree)->u.ps = n12size + 1; - } -} - - -/* -** Make final adjustments in a tree. Fix open calls in tree 't', -** making them refer to their respective rules or raising appropriate -** errors (if not inside a grammar). Correct associativity of associative -** constructions (making them right associative). Assume that tree's -** ktable is at the top of the stack (for error messages). -*/ -static void finalfix (lua_State *L, int postable, TTree *g, TTree *t) { - tailcall: - switch (t->tag) { - case TGrammar: /* subgrammars were already fixed */ - return; - case TOpenCall: { - if (g != NULL) /* inside a grammar? */ - fixonecall(L, postable, g, t, TOpenCall); /* labeled failure */ - else { /* open call outside grammar */ - lua_rawgeti(L, -1, t->key); - luaL_error(L, "rule '%s' used outside a grammar", val2str(L, -1)); - } - break; - } - case TThrow: { /* labeled failure */ - if (g != NULL) /* inside a grammar? */ - fixonecall(L, postable, g, t, TThrow); - break; - } - case TSeq: case TChoice: - correctassociativity(t); - break; - } - switch (numsiblings[t->tag]) { - case 1: /* finalfix(L, postable, g, sib1(t)); */ - t = sib1(t); goto tailcall; - case 2: - finalfix(L, postable, g, sib1(t)); - t = sib2(t); goto tailcall; /* finalfix(L, postable, g, sib2(t)); */ - default: assert(numsiblings[t->tag] == 0); break; - } -} - - - -/* -** {=================================================================== -** KTable manipulation -** -** - The ktable of a pattern 'p' can be shared by other patterns that -** contain 'p' and no other constants. Because of this sharing, we -** should not add elements to a 'ktable' unless it was freshly created -** for the new pattern. -** -** - The maximum index in a ktable is USHRT_MAX, because trees and -** patterns use unsigned shorts to store those indices. -** ==================================================================== -*/ - -/* -** Create a new 'ktable' to the pattern at the top of the stack. -*/ -static void newktable (lua_State *L, int n) { - lua_createtable(L, n, 0); /* create a fresh table */ - lua_setuservalue(L, -2); /* set it as 'ktable' for pattern */ -} - - -/* -** Add element 'idx' to 'ktable' of pattern at the top of the stack; -** Return index of new element. -** If new element is nil, does not add it to table (as it would be -** useless) and returns 0, as ktable[0] is always nil. -*/ -static int addtoktable (lua_State *L, int idx) { - if (lua_isnil(L, idx)) /* nil value? */ - return 0; - else { - int n; - lua_getuservalue(L, -1); /* get ktable from pattern */ - n = lua_rawlen(L, -1); - if (n >= USHRT_MAX) - luaL_error(L, "too many Lua values in pattern"); - lua_pushvalue(L, idx); /* element to be added */ - lua_rawseti(L, -2, ++n); - lua_pop(L, 1); /* remove 'ktable' */ - return n; - } -} - - -/* -** Return the number of elements in the ktable at 'idx'. -** In Lua 5.2/5.3, default "environment" for patterns is nil, not -** a table. Treat it as an empty table. In Lua 5.1, assumes that -** the environment has no numeric indices (len == 0) -*/ -static int ktablelen (lua_State *L, int idx) { - if (!lua_istable(L, idx)) return 0; - else return lua_rawlen(L, idx); -} - - -/* -** Concatentate the contents of table 'idx1' into table 'idx2'. -** (Assume that both indices are negative.) -** Return the original length of table 'idx2' (or 0, if no -** element was added, as there is no need to correct any index). -*/ -static int concattable (lua_State *L, int idx1, int idx2) { - int i; - int n1 = ktablelen(L, idx1); - int n2 = ktablelen(L, idx2); - if (n1 + n2 > USHRT_MAX) - luaL_error(L, "too many Lua values in pattern"); - if (n1 == 0) return 0; /* nothing to correct */ - for (i = 1; i <= n1; i++) { - lua_rawgeti(L, idx1, i); - lua_rawseti(L, idx2 - 1, n2 + i); /* correct 'idx2' */ - } - return n2; -} - - -/* -** When joining 'ktables', constants from one of the subpatterns must -** be renumbered; 'correctkeys' corrects their indices (adding 'n' -** to each of them) -*/ -static void correctkeys (TTree *tree, int n) { - if (n == 0) return; /* no correction? */ - tailcall: - switch (tree->tag) { - case TOpenCall: case TCall: case TRunTime: case TRule: case TThrow: { /* labeled failure */ - if (tree->key > 0) - tree->key += n; - break; - } - case TCapture: { - if (tree->key > 0 && tree->cap != Carg && tree->cap != Cnum) - tree->key += n; - break; - } - default: break; - } - switch (numsiblings[tree->tag]) { - case 1: /* correctkeys(sib1(tree), n); */ - tree = sib1(tree); goto tailcall; - case 2: - correctkeys(sib1(tree), n); - tree = sib2(tree); goto tailcall; /* correctkeys(sib2(tree), n); */ - default: assert(numsiblings[tree->tag] == 0); break; - } -} - - -/* -** Join the ktables from p1 and p2 the ktable for the new pattern at the -** top of the stack, reusing them when possible. -*/ -static void joinktables (lua_State *L, int p1, TTree *t2, int p2) { - int n1, n2; - lua_getuservalue(L, p1); /* get ktables */ - lua_getuservalue(L, p2); - n1 = ktablelen(L, -2); - n2 = ktablelen(L, -1); - if (n1 == 0 && n2 == 0) /* are both tables empty? */ - lua_pop(L, 2); /* nothing to be done; pop tables */ - else if (n2 == 0 || lp_equal(L, -2, -1)) { /* 2nd table empty or equal? */ - lua_pop(L, 1); /* pop 2nd table */ - lua_setuservalue(L, -2); /* set 1st ktable into new pattern */ - } - else if (n1 == 0) { /* first table is empty? */ - lua_setuservalue(L, -3); /* set 2nd table into new pattern */ - lua_pop(L, 1); /* pop 1st table */ - } - else { - lua_createtable(L, n1 + n2, 0); /* create ktable for new pattern */ - /* stack: new p; ktable p1; ktable p2; new ktable */ - concattable(L, -3, -1); /* from p1 into new ktable */ - concattable(L, -2, -1); /* from p2 into new ktable */ - lua_setuservalue(L, -4); /* new ktable becomes 'p' environment */ - lua_pop(L, 2); /* pop other ktables */ - correctkeys(t2, n1); /* correction for indices from p2 */ - } -} - - -/* -** copy 'ktable' of element 'idx' to new tree (on top of stack) -*/ -static void copyktable (lua_State *L, int idx) { - lua_getuservalue(L, idx); - lua_setuservalue(L, -2); -} - - -/* -** merge 'ktable' from 'stree' at stack index 'idx' into 'ktable' -** from tree at the top of the stack, and correct corresponding -** tree. -*/ -static void mergektable (lua_State *L, int idx, TTree *stree) { - int n; - lua_getuservalue(L, -1); /* get ktables */ - lua_getuservalue(L, idx); - n = concattable(L, -1, -2); - lua_pop(L, 2); /* remove both ktables */ - correctkeys(stree, n); -} - - -/* -** Create a new 'ktable' to the pattern at the top of the stack, adding -** all elements from pattern 'p' (if not 0) plus element 'idx' to it. -** Return index of new element. -*/ -static int addtonewktable (lua_State *L, int p, int idx) { - newktable(L, 1); - if (p) - mergektable(L, p, NULL); - return addtoktable(L, idx); -} - -/* }====================================================== */ - - -/* -** {====================================================== -** Tree generation -** ======================================================= -*/ - -/* -** In 5.2, could use 'luaL_testudata'... -*/ -static int testpattern (lua_State *L, int idx) { - if (lua_touserdata(L, idx)) { /* value is a userdata? */ - if (lua_getmetatable(L, idx)) { /* does it have a metatable? */ - luaL_getmetatable(L, PATTERN_T); - if (lua_rawequal(L, -1, -2)) { /* does it have the correct mt? */ - lua_pop(L, 2); /* remove both metatables */ - return 1; - } - } - } - return 0; -} - - -static Pattern *getpattern (lua_State *L, int idx) { - return (Pattern *)luaL_checkudata(L, idx, PATTERN_T); -} - - -static int getsize (lua_State *L, int idx) { - return (lua_rawlen(L, idx) - sizeof(Pattern)) / sizeof(TTree) + 1; -} - - -static TTree *gettree (lua_State *L, int idx, int *len) { - Pattern *p = getpattern(L, idx); - if (len) - *len = getsize(L, idx); - return p->tree; -} - - -/* -** create a pattern. Set its uservalue (the 'ktable') equal to its -** metatable. (It could be any empty sequence; the metatable is at -** hand here, so we use it.) -*/ -static TTree *newtree (lua_State *L, int len) { - size_t size = (len - 1) * sizeof(TTree) + sizeof(Pattern); - Pattern *p = (Pattern *)lua_newuserdata(L, size); - luaL_getmetatable(L, PATTERN_T); - lua_pushvalue(L, -1); - lua_setuservalue(L, -3); - lua_setmetatable(L, -2); - p->code = NULL; p->codesize = 0; - return p->tree; -} - - -static TTree *newleaf (lua_State *L, int tag) { - TTree *tree = newtree(L, 1); - tree->tag = tag; - return tree; -} - - -static TTree *newcharset (lua_State *L) { - TTree *tree = newtree(L, bytes2slots(CHARSETSIZE) + 1); - tree->tag = TSet; - loopset(i, treebuffer(tree)[i] = 0); - return tree; -} - - -/* -** add to tree a sequence where first sibling is 'sib' (with size -** 'sibsize'); returns position for second sibling -*/ -static TTree *seqaux (TTree *tree, TTree *sib, int sibsize) { - tree->tag = TSeq; tree->u.ps = sibsize + 1; - memcpy(sib1(tree), sib, sibsize * sizeof(TTree)); - return sib2(tree); -} - - -/* -** Build a sequence of 'n' nodes, each with tag 'tag' and 'u.n' got -** from the array 's' (or 0 if array is NULL). (TSeq is binary, so it -** must build a sequence of sequence of sequence...) -*/ -static void fillseq (TTree *tree, int tag, int n, const char *s) { - int i; - for (i = 0; i < n - 1; i++) { /* initial n-1 copies of Seq tag; Seq ... */ - tree->tag = TSeq; tree->u.ps = 2; - sib1(tree)->tag = tag; - sib1(tree)->u.n = s ? (byte)s[i] : 0; - tree = sib2(tree); - } - tree->tag = tag; /* last one does not need TSeq */ - tree->u.n = s ? (byte)s[i] : 0; -} - - -/* -** Numbers as patterns: -** 0 == true (always match); n == TAny repeated 'n' times; -** -n == not (TAny repeated 'n' times) -*/ -static TTree *numtree (lua_State *L, int n) { - if (n == 0) - return newleaf(L, TTrue); - else { - TTree *tree, *nd; - if (n > 0) - tree = nd = newtree(L, 2 * n - 1); - else { /* negative: code it as !(-n) */ - n = -n; - tree = newtree(L, 2 * n); - tree->tag = TNot; - nd = sib1(tree); - } - fillseq(nd, TAny, n, NULL); /* sequence of 'n' any's */ - return tree; - } -} - - -/* -** Convert value at index 'idx' to a pattern -*/ -static TTree *getpatt (lua_State *L, int idx, int *len) { - TTree *tree; - switch (lua_type(L, idx)) { - case LUA_TSTRING: { - size_t slen; - const char *s = lua_tolstring(L, idx, &slen); /* get string */ - if (slen == 0) /* empty? */ - tree = newleaf(L, TTrue); /* always match */ - else { - tree = newtree(L, 2 * (slen - 1) + 1); - fillseq(tree, TChar, slen, s); /* sequence of 'slen' chars */ - } - break; - } - case LUA_TNUMBER: { - int n = lua_tointeger(L, idx); - tree = numtree(L, n); - break; - } - case LUA_TBOOLEAN: { - tree = (lua_toboolean(L, idx) ? newleaf(L, TTrue) : newleaf(L, TFalse)); - break; - } - case LUA_TTABLE: { - tree = newgrammar(L, idx); - break; - } - case LUA_TFUNCTION: { - tree = newtree(L, 2); - tree->tag = TRunTime; - tree->key = addtonewktable(L, 0, idx); - sib1(tree)->tag = TTrue; - break; - } - default: { - return gettree(L, idx, len); - } - } - lua_replace(L, idx); /* put new tree into 'idx' slot */ - if (len) - *len = getsize(L, idx); - return tree; -} - - -/* -** create a new tree, whith a new root and one sibling. -** Sibling must be on the Lua stack, at index 1. -*/ -static TTree *newroot1sib (lua_State *L, int tag) { - int s1; - TTree *tree1 = getpatt(L, 1, &s1); - TTree *tree = newtree(L, 1 + s1); /* create new tree */ - tree->tag = tag; - memcpy(sib1(tree), tree1, s1 * sizeof(TTree)); - copyktable(L, 1); - return tree; -} - - -/* -** create a new tree, whith a new root and 2 siblings. -** Siblings must be on the Lua stack, first one at index 1. -*/ -static TTree *newroot2sib (lua_State *L, int tag) { - int s1, s2; - TTree *tree1 = getpatt(L, 1, &s1); - TTree *tree2 = getpatt(L, 2, &s2); - TTree *tree = newtree(L, 1 + s1 + s2); /* create new tree */ - tree->tag = tag; - tree->u.ps = 1 + s1; - memcpy(sib1(tree), tree1, s1 * sizeof(TTree)); - memcpy(sib2(tree), tree2, s2 * sizeof(TTree)); - joinktables(L, 1, sib2(tree), 2); - return tree; -} - - -/* labeled failure begin */ -static TTree *newthrowleaf (lua_State *L) { - TTree *tree = newtree(L, 1); - tree->tag = TThrow; - tree->u.ps = 0; /* there is no recovery rule associated */ - return tree; -} -/* labeled failure end */ - - -static int lp_P (lua_State *L) { - luaL_checkany(L, 1); - getpatt(L, 1, NULL); - lua_settop(L, 1); - return 1; -} - - -/* -** sequence operator; optimizations: -** false x => false, x true => x, true x => x -** (cannot do x . false => false because x may have runtime captures) -*/ -static int lp_seq (lua_State *L) { - TTree *tree1 = getpatt(L, 1, NULL); - TTree *tree2 = getpatt(L, 2, NULL); - if (tree1->tag == TFalse || tree2->tag == TTrue) - lua_pushvalue(L, 1); /* false . x == false, x . true = x */ - else if (tree1->tag == TTrue) - lua_pushvalue(L, 2); /* true . x = x */ - else - newroot2sib(L, TSeq); - return 1; -} - - -/* -** choice operator; optimizations: -** charset / charset => charset -** true / x => true, x / false => x, false / x => x -** (x / true is not equivalent to true) -*/ -static int lp_choice (lua_State *L) { - Charset st1, st2; - TTree *t1 = getpatt(L, 1, NULL); - TTree *t2 = getpatt(L, 2, NULL); - if (tocharset(t1, &st1) && tocharset(t2, &st2)) { - TTree *t = newcharset(L); - loopset(i, treebuffer(t)[i] = st1.cs[i] | st2.cs[i]); - } - else if (nofail(t1) || t2->tag == TFalse) - lua_pushvalue(L, 1); /* true / x => true, x / false => x */ - else if (t1->tag == TFalse) - lua_pushvalue(L, 2); /* false / x => x */ - else - newroot2sib(L, TChoice); - return 1; -} - - -/* -** p^n -*/ -static int lp_star (lua_State *L) { - int size1; - int n = (int)luaL_checkinteger(L, 2); - TTree *tree1 = getpatt(L, 1, &size1); - if (n >= 0) { /* seq tree1 (seq tree1 ... (seq tree1 (rep tree1))) */ - TTree *tree = newtree(L, (n + 1) * (size1 + 1)); - if (nullable(tree1)) - luaL_error(L, "loop body may accept empty string"); - while (n--) /* repeat 'n' times */ - tree = seqaux(tree, tree1, size1); - tree->tag = TRep; - memcpy(sib1(tree), tree1, size1 * sizeof(TTree)); - } - else { /* choice (seq tree1 ... choice tree1 true ...) true */ - TTree *tree; - n = -n; - /* size = (choice + seq + tree1 + true) * n, but the last has no seq */ - tree = newtree(L, n * (size1 + 3) - 1); - for (; n > 1; n--) { /* repeat (n - 1) times */ - tree->tag = TChoice; tree->u.ps = n * (size1 + 3) - 2; - sib2(tree)->tag = TTrue; - tree = sib1(tree); - tree = seqaux(tree, tree1, size1); - } - tree->tag = TChoice; tree->u.ps = size1 + 1; - sib2(tree)->tag = TTrue; - memcpy(sib1(tree), tree1, size1 * sizeof(TTree)); - } - copyktable(L, 1); - return 1; -} - - -/* -** #p == &p -*/ -static int lp_and (lua_State *L) { - newroot1sib(L, TAnd); - return 1; -} - - -/* -** -p == !p -*/ -static int lp_not (lua_State *L) { - newroot1sib(L, TNot); - return 1; -} - - -/* -** [t1 - t2] == Seq (Not t2) t1 -** If t1 and t2 are charsets, make their difference. -*/ -static int lp_sub (lua_State *L) { - Charset st1, st2; - int s1, s2; - TTree *t1 = getpatt(L, 1, &s1); - TTree *t2 = getpatt(L, 2, &s2); - if (tocharset(t1, &st1) && tocharset(t2, &st2)) { - TTree *t = newcharset(L); - loopset(i, treebuffer(t)[i] = st1.cs[i] & ~st2.cs[i]); - } - else { - TTree *tree = newtree(L, 2 + s1 + s2); - tree->tag = TSeq; /* sequence of... */ - tree->u.ps = 2 + s2; - sib1(tree)->tag = TNot; /* ...not... */ - memcpy(sib1(sib1(tree)), t2, s2 * sizeof(TTree)); /* ...t2 */ - memcpy(sib2(tree), t1, s1 * sizeof(TTree)); /* ... and t1 */ - joinktables(L, 1, sib1(tree), 2); - } - return 1; -} - - -static int lp_set (lua_State *L) { - size_t l; - const char *s = luaL_checklstring(L, 1, &l); - TTree *tree = newcharset(L); - while (l--) { - setchar(treebuffer(tree), (byte)(*s)); - s++; - } - return 1; -} - - -static int lp_range (lua_State *L) { - int arg; - int top = lua_gettop(L); - TTree *tree = newcharset(L); - for (arg = 1; arg <= top; arg++) { - int c; - size_t l; - const char *r = luaL_checklstring(L, arg, &l); - luaL_argcheck(L, l == 2, arg, "range must have two characters"); - for (c = (byte)r[0]; c <= (byte)r[1]; c++) - setchar(treebuffer(tree), c); - } - return 1; -} - - -/* -** Fills a tree node with basic information about the UTF-8 code point -** 'cpu': its value in 'n', its length in 'cap', and its first byte in -** 'key' -*/ -static void codeutftree (lua_State *L, TTree *t, lua_Unsigned cpu, int arg) { - int len, fb, cp; - cp = (int)cpu; - if (cp <= 0x7f) { /* one byte? */ - len = 1; - fb = cp; - } else if (cp <= 0x7ff) { - len = 2; - fb = 0xC0 | (cp >> 6); - } else if (cp <= 0xffff) { - len = 3; - fb = 0xE0 | (cp >> 12); - } - else { - luaL_argcheck(L, cpu <= 0x10ffffu, arg, "invalid code point"); - len = 4; - fb = 0xF0 | (cp >> 18); - } - t->u.n = cp; - t->cap = len; - t->key = fb; -} - - -static int lp_utfr (lua_State *L) { - lua_Unsigned from = (lua_Unsigned)luaL_checkinteger(L, 1); - lua_Unsigned to = (lua_Unsigned)luaL_checkinteger(L, 2); - luaL_argcheck(L, from <= to, 2, "empty range"); - if (to <= 0x7f) { /* ascii range? */ - TTree *tree = newcharset(L); /* code it as a regular charset */ - unsigned int f; - for (f = (int)from; f <= to; f++) - setchar(treebuffer(tree), f); - } - else { /* multi-byte utf-8 range */ - TTree *tree = newtree(L, 2); - tree->tag = TUTFR; - codeutftree(L, tree, from, 1); - sib1(tree)->tag = TXInfo; - codeutftree(L, sib1(tree), to, 2); - } - return 1; -} - - -/* -** Look-behind predicate -*/ -static int lp_behind (lua_State *L) { - TTree *tree; - TTree *tree1 = getpatt(L, 1, NULL); - int n = fixedlen(tree1); - luaL_argcheck(L, n >= 0, 1, "pattern may not have fixed length"); - luaL_argcheck(L, !hascaptures(tree1), 1, "pattern have captures"); - luaL_argcheck(L, n <= MAXBEHIND, 1, "pattern too long to look behind"); - tree = newroot1sib(L, TBehind); - tree->u.n = n; - return 1; -} - - -/* labeled failure begin */ -/* -** Throws a label -*/ -static int lp_throw (lua_State *L) { - TTree * tree; - luaL_checkstring(L, -1); - tree = newthrowleaf(L); - tree->key = addtonewktable(L, 0, 1); - return 1; -} -/* labeled failure end */ - - -/* -** Create a non-terminal -*/ -static int lp_V (lua_State *L) { - TTree *tree = newleaf(L, TOpenCall); - luaL_argcheck(L, !lua_isnoneornil(L, 1), 1, "non-nil value expected"); - tree->key = addtonewktable(L, 0, 1); - return 1; -} - - -/* -** Create a tree for a non-empty capture, with a body and -** optionally with an associated Lua value (at index 'labelidx' in the -** stack) -*/ -static int capture_aux (lua_State *L, int cap, int labelidx) { - TTree *tree = newroot1sib(L, TCapture); - tree->cap = cap; - tree->key = (labelidx == 0) ? 0 : addtonewktable(L, 1, labelidx); - return 1; -} - - -/* -** Fill a tree with an empty capture, using an empty (TTrue) sibling. -** (The 'key' field must be filled by the caller to finish the tree.) -*/ -static TTree *auxemptycap (TTree *tree, int cap) { - tree->tag = TCapture; - tree->cap = cap; - sib1(tree)->tag = TTrue; - return tree; -} - - -/* -** Create a tree for an empty capture. -*/ -static TTree *newemptycap (lua_State *L, int cap, int key) { - TTree *tree = auxemptycap(newtree(L, 2), cap); - tree->key = key; - return tree; -} - - -/* -** Create a tree for an empty capture with an associated Lua value. -*/ -static TTree *newemptycapkey (lua_State *L, int cap, int idx) { - TTree *tree = auxemptycap(newtree(L, 2), cap); - tree->key = addtonewktable(L, 0, idx); - return tree; -} - - -/* -** Captures with syntax p / v -** (function capture, query capture, string capture, or number capture) -*/ -static int lp_divcapture (lua_State *L) { - switch (lua_type(L, 2)) { - case LUA_TFUNCTION: return capture_aux(L, Cfunction, 2); - case LUA_TTABLE: return capture_aux(L, Cquery, 2); - case LUA_TSTRING: return capture_aux(L, Cstring, 2); - case LUA_TNUMBER: { - int n = lua_tointeger(L, 2); - TTree *tree = newroot1sib(L, TCapture); - luaL_argcheck(L, 0 <= n && n <= SHRT_MAX, 1, "invalid number"); - tree->cap = Cnum; - tree->key = n; - return 1; - } - default: return luaL_argerror(L, 2, "invalid replacement value"); - } -} - - -static int lp_substcapture (lua_State *L) { - return capture_aux(L, Csubst, 0); -} - - -static int lp_tablecapture (lua_State *L) { - return capture_aux(L, Ctable, 0); -} - - -static int lp_groupcapture (lua_State *L) { - if (lua_isnoneornil(L, 2)) - return capture_aux(L, Cgroup, 0); - else - return capture_aux(L, Cgroup, 2); -} - - -static int lp_foldcapture (lua_State *L) { - luaL_checktype(L, 2, LUA_TFUNCTION); - return capture_aux(L, Cfold, 2); -} - - -static int lp_simplecapture (lua_State *L) { - return capture_aux(L, Csimple, 0); -} - - -static int lp_poscapture (lua_State *L) { - newemptycap(L, Cposition, 0); - return 1; -} - - -static int lp_argcapture (lua_State *L) { - int n = (int)luaL_checkinteger(L, 1); - luaL_argcheck(L, 0 < n && n <= SHRT_MAX, 1, "invalid argument index"); - newemptycap(L, Carg, n); - return 1; -} - - -static int lp_backref (lua_State *L) { - luaL_checkany(L, 1); - newemptycapkey(L, Cbackref, 1); - return 1; -} - - -/* -** Constant capture -*/ -static int lp_constcapture (lua_State *L) { - int i; - int n = lua_gettop(L); /* number of values */ - if (n == 0) /* no values? */ - newleaf(L, TTrue); /* no capture */ - else if (n == 1) - newemptycapkey(L, Cconst, 1); /* single constant capture */ - else { /* create a group capture with all values */ - TTree *tree = newtree(L, 1 + 3 * (n - 1) + 2); - newktable(L, n); /* create a 'ktable' for new tree */ - tree->tag = TCapture; - tree->cap = Cgroup; - tree->key = 0; - tree = sib1(tree); - for (i = 1; i <= n - 1; i++) { - tree->tag = TSeq; - tree->u.ps = 3; /* skip TCapture and its sibling */ - auxemptycap(sib1(tree), Cconst); - sib1(tree)->key = addtoktable(L, i); - tree = sib2(tree); - } - auxemptycap(tree, Cconst); - tree->key = addtoktable(L, i); - } - return 1; -} - - -static int lp_matchtime (lua_State *L) { - TTree *tree; - luaL_checktype(L, 2, LUA_TFUNCTION); - tree = newroot1sib(L, TRunTime); - tree->key = addtonewktable(L, 1, 2); - return 1; -} - -/* }====================================================== */ - - -/* -** {====================================================== -** Grammar - Tree generation -** ======================================================= -*/ - -/* -** push on the stack the index and the pattern for the -** initial rule of grammar at index 'arg' in the stack; -** also add that index into position table. -*/ -static void getfirstrule (lua_State *L, int arg, int postab) { - lua_rawgeti(L, arg, 1); /* access first element */ - if (lua_isstring(L, -1)) { /* is it the name of initial rule? */ - lua_pushvalue(L, -1); /* duplicate it to use as key */ - lua_gettable(L, arg); /* get associated rule */ - } - else { - lua_pushinteger(L, 1); /* key for initial rule */ - lua_insert(L, -2); /* put it before rule */ - } - if (!testpattern(L, -1)) { /* initial rule not a pattern? */ - if (lua_isnil(L, -1)) - luaL_error(L, "grammar has no initial rule"); - else - luaL_error(L, "initial rule '%s' is not a pattern", lua_tostring(L, -2)); - } - lua_pushvalue(L, -2); /* push key */ - lua_pushinteger(L, 1); /* push rule position (after TGrammar) */ - lua_settable(L, postab); /* insert pair at position table */ -} - -/* -** traverse grammar at index 'arg', pushing all its keys and patterns -** into the stack. Create a new table (before all pairs key-pattern) to -** collect all keys and their associated positions in the final tree -** (the "position table"). -** Return the number of rules and (in 'totalsize') the total size -** for the new tree. -*/ -static int collectrules (lua_State *L, int arg, int *totalsize) { - int n = 1; /* to count number of rules */ - int postab = lua_gettop(L) + 1; /* index of position table */ - int size; /* accumulator for total size */ - lua_newtable(L); /* create position table */ - getfirstrule(L, arg, postab); - size = 3 + getsize(L, postab + 2); /* TGrammar + TRule + TXInfo + rule */ - lua_pushnil(L); /* prepare to traverse grammar table */ - while (lua_next(L, arg) != 0) { - if (lua_tonumber(L, -2) == 1 || - lp_equal(L, -2, postab + 1)) { /* initial rule? */ - lua_pop(L, 1); /* remove value (keep key for lua_next) */ - continue; - } - if (!testpattern(L, -1)) /* value is not a pattern? */ - luaL_error(L, "rule '%s' is not a pattern", val2str(L, -2)); - luaL_checkstack(L, LUA_MINSTACK, "grammar has too many rules"); - lua_pushvalue(L, -2); /* push key (to insert into position table) */ - lua_pushinteger(L, size); - lua_settable(L, postab); - size += 2 + getsize(L, -1); /* add 'TRule + TXInfo + rule' to size */ - lua_pushvalue(L, -2); /* push key (for next lua_next) */ - n++; - } - *totalsize = size + 1; /* space for 'TTrue' finishing list of rules */ - return n; -} - - -static void buildgrammar (lua_State *L, TTree *grammar, int frule, int n) { - int i; - TTree *nd = sib1(grammar); /* auxiliary pointer to traverse the tree */ - for (i = 0; i < n; i++) { /* add each rule into new tree */ - int ridx = frule + 2*i + 1; /* index of i-th rule */ - int rulesize; - TTree *rn = gettree(L, ridx, &rulesize); - TTree *pr = sib1(nd); /* points to rule's prerule */ - nd->tag = TRule; - nd->key = 0; /* will be fixed when rule is used */ - pr->tag = TXInfo; - pr->u.n = i; /* rule number */ - nd->u.ps = rulesize + 2; /* point to next rule */ - memcpy(sib1(pr), rn, rulesize * sizeof(TTree)); /* copy rule */ - mergektable(L, ridx, sib1(nd)); /* merge its ktable into new one */ - nd = sib2(nd); /* move to next rule */ - } - nd->tag = TTrue; /* finish list of rules */ -} - - -/* -** Check whether a tree has potential infinite loops -*/ -static int checkloops (TTree *tree) { - tailcall: - if (tree->tag == TRep && nullable(sib1(tree))) - return 1; - else if (tree->tag == TGrammar) - return 0; /* sub-grammars already checked */ - else { - switch (numsiblings[tree->tag]) { - case 1: /* return checkloops(sib1(tree)); */ - tree = sib1(tree); goto tailcall; - case 2: - if (checkloops(sib1(tree))) return 1; - /* else return checkloops(sib2(tree)); */ - tree = sib2(tree); goto tailcall; - default: assert(numsiblings[tree->tag] == 0); return 0; - } - } -} - - -/* -** Give appropriate error message for 'verifyrule'. If a rule appears -** twice in 'passed', there is path from it back to itself without -** advancing the subject. -*/ -static int verifyerror (lua_State *L, unsigned short *passed, int npassed) { - int i, j; - for (i = npassed - 1; i >= 0; i--) { /* search for a repetition */ - for (j = i - 1; j >= 0; j--) { - if (passed[i] == passed[j]) { - lua_rawgeti(L, -1, passed[i]); /* get rule's key */ - return luaL_error(L, "rule '%s' may be left recursive", val2str(L, -1)); - } - } - } - return luaL_error(L, "too many left calls in grammar"); -} - - -/* -** Check whether a rule can be left recursive; raise an error in that -** case; otherwise return 1 iff pattern is nullable. -** The return value is used to check sequences, where the second pattern -** is only relevant if the first is nullable. -** Parameter 'nb' works as an accumulator, to allow tail calls in -** choices. ('nb' true makes function returns true.) -** Parameter 'passed' is a list of already visited rules, 'npassed' -** counts the elements in 'passed'. -** Assume ktable at the top of the stack. -*/ -static int verifyrule (lua_State *L, TTree *tree, unsigned short *passed, - int npassed, int nb) { - tailcall: - switch (tree->tag) { - case TChar: case TSet: case TAny: - case TFalse: case TUTFR: - case TThrow: /* labeled failure */ - return nb; /* cannot pass from here */ - case TTrue: - case TBehind: /* look-behind cannot have calls */ - return 1; - case TNot: case TAnd: case TRep: - /* return verifyrule(L, sib1(tree), passed, npassed, 1); */ - tree = sib1(tree); nb = 1; goto tailcall; - case TCapture: case TRunTime: case TXInfo: - /* return verifyrule(L, sib1(tree), passed, npassed, nb); */ - tree = sib1(tree); goto tailcall; - case TCall: - /* return verifyrule(L, sib2(tree), passed, npassed, nb); */ - tree = sib2(tree); goto tailcall; - case TSeq: /* only check 2nd child if first is nb */ - if (!verifyrule(L, sib1(tree), passed, npassed, 0)) - return nb; - /* else return verifyrule(L, sib2(tree), passed, npassed, nb); */ - tree = sib2(tree); goto tailcall; - case TChoice: /* must check both children */ - nb = verifyrule(L, sib1(tree), passed, npassed, nb); - /* return verifyrule(L, sib2(tree), passed, npassed, nb); */ - tree = sib2(tree); goto tailcall; - case TRule: - if (npassed >= MAXRULES) /* too many steps? */ - return verifyerror(L, passed, npassed); /* error */ - else { - passed[npassed++] = tree->key; /* add rule to path */ - /* return verifyrule(L, sib1(tree), passed, npassed); */ - tree = sib1(tree); goto tailcall; - } - case TGrammar: - return nullable(tree); /* sub-grammar cannot be left recursive */ - default: assert(0); return 0; - } -} - - -static void verifygrammar (lua_State *L, TTree *grammar) { - unsigned short passed[MAXRULES]; - TTree *rule; - /* check left-recursive rules */ - for (rule = sib1(grammar); rule->tag == TRule; rule = sib2(rule)) { - if (rule->key == 0) continue; /* unused rule */ - verifyrule(L, sib1(rule), passed, 0, 0); - } - assert(rule->tag == TTrue); - /* check infinite loops inside rules */ - for (rule = sib1(grammar); rule->tag == TRule; rule = sib2(rule)) { - if (rule->key == 0) continue; /* unused rule */ - if (checkloops(sib1(rule))) { - lua_rawgeti(L, -1, rule->key); /* get rule's key */ - luaL_error(L, "empty loop in rule '%s'", val2str(L, -1)); - } - } - assert(rule->tag == TTrue); -} - - -/* -** Give a name for the initial rule if it is not referenced -*/ -static void initialrulename (lua_State *L, TTree *grammar, int frule) { - if (sib1(grammar)->key == 0) { /* initial rule is not referenced? */ - int n = lua_rawlen(L, -1) + 1; /* index for name */ - lua_pushvalue(L, frule); /* rule's name */ - lua_rawseti(L, -2, n); /* ktable was on the top of the stack */ - sib1(grammar)->key = n; - } -} - - -static TTree *newgrammar (lua_State *L, int arg) { - int treesize; - int frule = lua_gettop(L) + 2; /* position of first rule's key */ - int n = collectrules(L, arg, &treesize); - TTree *g = newtree(L, treesize); - luaL_argcheck(L, n <= MAXRULES, arg, "grammar has too many rules"); - g->tag = TGrammar; g->u.n = n; - lua_newtable(L); /* create 'ktable' */ - lua_setuservalue(L, -2); - buildgrammar(L, g, frule, n); - lua_getuservalue(L, -1); /* get 'ktable' for new tree */ - finalfix(L, frule - 1, g, sib1(g)); - initialrulename(L, g, frule); - verifygrammar(L, g); - lua_pop(L, 1); /* remove 'ktable' */ - lua_insert(L, -(n * 2 + 2)); /* move new table to proper position */ - lua_pop(L, n * 2 + 1); /* remove position table + rule pairs */ - return g; /* new table at the top of the stack */ -} - -/* }====================================================== */ - - -static Instruction *prepcompile (lua_State *L, Pattern *p, int idx) { - lua_getuservalue(L, idx); /* push 'ktable' (may be used by 'finalfix') */ - finalfix(L, 0, NULL, p->tree); - lua_pop(L, 1); /* remove 'ktable' */ - return compile(L, p); -} - - -static int lp_printtree (lua_State *L) { - TTree *tree = getpatt(L, 1, NULL); - int c = lua_toboolean(L, 2); - if (c) { - lua_getuservalue(L, 1); /* push 'ktable' (may be used by 'finalfix') */ - finalfix(L, 0, NULL, tree); - lua_pop(L, 1); /* remove 'ktable' */ - } - printktable(L, 1); - printtree(tree, 0); - return 0; -} - - -static int lp_printcode (lua_State *L) { - Pattern *p = getpattern(L, 1); - printktable(L, 1); - if (p->code == NULL) /* not compiled yet? */ - prepcompile(L, p, 1); - printpatt(p->code, p->codesize); - return 0; -} - - -/* -** Get the initial position for the match, interpreting negative -** values from the end of the subject -*/ -static size_t initposition (lua_State *L, size_t len) { - lua_Integer ii = luaL_optinteger(L, 3, 1); - if (ii > 0) { /* positive index? */ - if ((size_t)ii <= len) /* inside the string? */ - return (size_t)ii - 1; /* return it (corrected to 0-base) */ - else return len; /* crop at the end */ - } - else { /* negative index */ - if ((size_t)(-ii) <= len) /* inside the string? */ - return len - ((size_t)(-ii)); /* return position from the end */ - else return 0; /* crop at the beginning */ - } -} - - -/* -** Main match function -*/ -static int lp_match (lua_State *L) { - Capture capture[INITCAPSIZE]; - const char *r; - size_t l; - Pattern *p = (getpatt(L, 1, NULL), getpattern(L, 1)); - Instruction *code = (p->code != NULL) ? p->code : prepcompile(L, p, 1); - const char *s = luaL_checklstring(L, SUBJIDX, &l); - size_t i = initposition(L, l); - int ptop = lua_gettop(L); - short labelf; /* labeled failure */ - const char *sfail = NULL; /* labeled failure */ - lua_pushnil(L); /* initialize subscache */ - lua_pushlightuserdata(L, capture); /* initialize caplistidx */ - lua_getuservalue(L, 1); /* initialize penvidx */ - r = match(L, s, s + i, s + l, code, capture, ptop, &labelf, &sfail); /* labeled failure */ - if (r == NULL) { /* labeled failure begin */ - lua_pushnil(L); - if (labelf) { - lua_Integer luaInt; - lua_rawgeti(L, ktableidx(ptop), labelf); - luaInt = lua_tointeger(L, -1); - if (luaInt) { - lua_pop(L, 1); - lua_pushinteger(L, luaInt); - } - } - else - lua_pushstring(L, "fail"); - lua_pushinteger(L, sfail - (s + i) + 1); /* subject position related to the error */ - return 3; - } /* labeled failure end */ - return getcaptures(L, s, r, ptop); -} - - - -/* -** {====================================================== -** Library creation and functions not related to matching -** ======================================================= -*/ - -/* maximum limit for stack size */ -#define MAXLIM (INT_MAX / 100) - -static int lp_setmax (lua_State *L) { - lua_Integer lim = luaL_checkinteger(L, 1); - luaL_argcheck(L, 0 < lim && lim <= MAXLIM, 1, "out of range"); - lua_settop(L, 1); - lua_setfield(L, LUA_REGISTRYINDEX, MAXSTACKIDX); - return 0; -} - - -static int lp_type (lua_State *L) { - if (testpattern(L, 1)) - lua_pushliteral(L, "pattern"); - else - lua_pushnil(L); - return 1; -} - - -int lp_gc (lua_State *L) { - Pattern *p = getpattern(L, 1); - realloccode(L, p, 0); /* delete code block */ - return 0; -} - - -static void createcat (lua_State *L, const char *catname, int (catf) (int)) { - TTree *t = newcharset(L); - int i; - for (i = 0; i <= UCHAR_MAX; i++) - if (catf(i)) setchar(treebuffer(t), i); - lua_setfield(L, -2, catname); -} - - -static int lp_locale (lua_State *L) { - if (lua_isnoneornil(L, 1)) { - lua_settop(L, 0); - lua_createtable(L, 0, 12); - } - else { - luaL_checktype(L, 1, LUA_TTABLE); - lua_settop(L, 1); - } - createcat(L, "alnum", isalnum); - createcat(L, "alpha", isalpha); - createcat(L, "cntrl", iscntrl); - createcat(L, "digit", isdigit); - createcat(L, "graph", isgraph); - createcat(L, "lower", islower); - createcat(L, "print", isprint); - createcat(L, "punct", ispunct); - createcat(L, "space", isspace); - createcat(L, "upper", isupper); - createcat(L, "xdigit", isxdigit); - return 1; -} - - -static struct luaL_Reg pattreg[] = { - {"ptree", lp_printtree}, - {"pcode", lp_printcode}, - {"match", lp_match}, - {"B", lp_behind}, - {"V", lp_V}, - {"C", lp_simplecapture}, - {"Cc", lp_constcapture}, - {"Cmt", lp_matchtime}, - {"Cb", lp_backref}, - {"Carg", lp_argcapture}, - {"Cp", lp_poscapture}, - {"Cs", lp_substcapture}, - {"Ct", lp_tablecapture}, - {"Cf", lp_foldcapture}, - {"Cg", lp_groupcapture}, - {"P", lp_P}, - {"S", lp_set}, - {"R", lp_range}, - {"utfR", lp_utfr}, - {"locale", lp_locale}, - {"version", NULL}, - {"setmaxstack", lp_setmax}, - {"type", lp_type}, - {"T", lp_throw}, /* labeled failure throw */ - {NULL, NULL} -}; - - -static struct luaL_Reg metareg[] = { - {"__mul", lp_seq}, - {"__add", lp_choice}, - {"__pow", lp_star}, - {"__gc", lp_gc}, - {"__len", lp_and}, - {"__div", lp_divcapture}, - {"__unm", lp_not}, - {"__sub", lp_sub}, - {NULL, NULL} -}; - -int luaopen_lpeglabel (lua_State *L); /* labeled failure */ -int luaopen_lpeglabel (lua_State *L) { /* labeled failure */ - luaL_newmetatable(L, PATTERN_T); - lua_pushnumber(L, MAXBACK); /* initialize maximum backtracking */ - lua_setfield(L, LUA_REGISTRYINDEX, MAXSTACKIDX); - luaL_setfuncs(L, metareg, 0); - luaL_newlib(L, pattreg); - lua_pushvalue(L, -1); - lua_setfield(L, -3, "__index"); - lua_pushliteral(L, "LPegLabel " VERSION); /* labeled failure */ - lua_setfield(L, -2, "version"); - return 1; -} - -/* }====================================================== */ diff --git a/lptree.h b/lptree.h deleted file mode 100644 index c0ae5dc..0000000 --- a/lptree.h +++ /dev/null @@ -1,88 +0,0 @@ -/* -** $Id: lptree.h $ -*/ - -#if !defined(lptree_h) -#define lptree_h - - -#include "lptypes.h" - - -/* -** types of trees -*/ -typedef enum TTag { - TChar = 0, /* 'n' = char */ - TSet, /* the set is stored in next CHARSETSIZE bytes */ - TAny, - TTrue, - TFalse, - TUTFR, /* range of UTF-8 codepoints; 'n' has initial codepoint; - 'cap' has length; 'key' has first byte; - extra info is similar for end codepoint */ - TRep, /* 'sib1'* */ - TSeq, /* 'sib1' 'sib2' */ - TChoice, /* 'sib1' / 'sib2' */ - TNot, /* !'sib1' */ - TAnd, /* &'sib1' */ - TCall, /* ktable[key] is rule's key; 'sib2' is rule being called */ - TOpenCall, /* ktable[key] is rule's key */ - TRule, /* ktable[key] is rule's key (but key == 0 for unused rules); - 'sib1' is rule's pattern pre-rule; 'sib2' is next rule; - extra info 'n' is rule's sequential number */ - TXInfo, /* extra info */ - TGrammar, /* 'sib1' is initial (and first) rule */ - TBehind, /* 'sib1' is pattern, 'n' is how much to go back */ - TCapture, /* captures: 'cap' is kind of capture (enum 'CapKind'); - ktable[key] is Lua value associated with capture; - 'sib1' is capture body */ - TRunTime, /* run-time capture: 'key' is Lua function; - 'sib1' is capture body */ - TThrow, /* labeled failure: ktable[key] is label's name */ - -} TTag; - - -/* -** Tree trees -** The first child of a tree (if there is one) is immediately after -** the tree. A reference to a second child (ps) is its position -** relative to the position of the tree itself. -*/ -typedef struct TTree { - byte tag; - byte cap; /* kind of capture (if it is a capture) */ - unsigned short key; /* key in ktable for Lua data (0 if no key) */ - union { - int ps; /* occasional second child */ - int n; /* occasional counter */ - } u; -} TTree; - - -/* -** A complete pattern has its tree plus, if already compiled, -** its corresponding code -*/ -typedef struct Pattern { - union Instruction *code; - int codesize; - TTree tree[1]; -} Pattern; - - -/* number of children for each tree */ -LUAI_FUNC const byte numsiblings[]; - -/* access to children */ -#define sib1(t) ((t) + 1) -#define sib2(t) ((t) + (t)->u.ps) - - - - - - -#endif - diff --git a/lptypes.h b/lptypes.h deleted file mode 100644 index 6a6e159..0000000 --- a/lptypes.h +++ /dev/null @@ -1,157 +0,0 @@ -/* -** $Id: lptypes.h $ -** LPeg - PEG pattern matching for Lua -** Copyright 2007-2019, Lua.org & PUC-Rio (see 'lpeg.html' for license) -** written by Roberto Ierusalimschy -*/ - -#if !defined(lptypes_h) -#define lptypes_h - - -#include -#include - -#include "lua.h" - - -#define VERSION "1.6.0" - - -#define PATTERN_T "lpeglabel-pattern" -#define MAXSTACKIDX "lpeglabel-maxstack" - - -/* -** compatibility with Lua 5.1 -*/ -#if (LUA_VERSION_NUM == 501) - -#define lp_equal lua_equal - -#define lua_getuservalue lua_getfenv -#define lua_setuservalue lua_setfenv - -#define lua_rawlen lua_objlen - -#define luaL_setfuncs(L,f,n) luaL_register(L,NULL,f) -#define luaL_newlib(L,f) luaL_register(L,"lpeglabel",f) - -typedef size_t lua_Unsigned; - -#endif - - -#if !defined(lp_equal) -#define lp_equal(L,idx1,idx2) lua_compare(L,(idx1),(idx2),LUA_OPEQ) -#endif - - -/* default maximum size for call/backtrack stack */ -#if !defined(MAXBACK) -#define MAXBACK 400 -#endif - - -/* maximum number of rules in a grammar (limited by 'unsigned short') */ -#if !defined(MAXRULES) -#define MAXRULES 1000 -#endif - - - -/* initial size for capture's list */ -#define INITCAPSIZE 32 - - -/* index, on Lua stack, for subject */ -#define SUBJIDX 2 - -/* number of fixed arguments to 'match' (before capture arguments) */ -#define FIXEDARGS 3 - -/* index, on Lua stack, for capture list */ -#define caplistidx(ptop) ((ptop) + 2) - -/* index, on Lua stack, for pattern's ktable */ -#define ktableidx(ptop) ((ptop) + 3) - -/* index, on Lua stack, for backtracking stack */ -#define stackidx(ptop) ((ptop) + 4) - - - -typedef unsigned char byte; - - -#define BITSPERCHAR 8 - -#define CHARSETSIZE ((UCHAR_MAX/BITSPERCHAR) + 1) - - - -typedef struct Charset { - byte cs[CHARSETSIZE]; -} Charset; - - - -#define loopset(v,b) { int v; for (v = 0; v < CHARSETSIZE; v++) {b;} } - -/* access to charset */ -#define treebuffer(t) ((byte *)((t) + 1)) - -/* number of slots needed for 'n' bytes */ -#define bytes2slots(n) (((n) - 1) / sizeof(TTree) + 1) - -/* set 'b' bit in charset 'cs' */ -#define setchar(cs,b) ((cs)[(b) >> 3] |= (1 << ((b) & 7))) - - -/* -** in capture instructions, 'kind' of capture and its offset are -** packed in field 'aux', 4 bits for each -*/ -#define getkind(op) ((op)->i.aux & 0xF) -#define getoff(op) (((op)->i.aux >> 4) & 0xF) -#define joinkindoff(k,o) ((k) | ((o) << 4)) - -#define MAXOFF 0xF -#define MAXAUX 0xFF - - -/* maximum number of bytes to look behind */ -#define MAXBEHIND MAXAUX - - -/* maximum size (in elements) for a pattern */ -#define MAXPATTSIZE (SHRT_MAX - 10) - - -/* size (in elements) for an instruction plus extra l bytes */ -#define instsize(l) (((l) + sizeof(Instruction) - 1)/sizeof(Instruction) + 1) - - -/* size (in elements) for a ISet instruction */ -#define CHARSETINSTSIZE instsize(CHARSETSIZE) - -/* size (in elements) for a IFunc instruction */ -#define funcinstsize(p) ((p)->i.aux + 2) - - - -#define testchar(st,c) (((int)(st)[((c) >> 3)] & (1 << ((c) & 7)))) - -/* labeled failure begin */ -#define LFAIL 0 - -/* update the farthest failure */ -#define updatefarthest(s1,s2) { if ((s2) > (s1)) s1 = s2; } - -#define OUTPRED 0 - -#define INPRED 1 -/* labeled failure end */ - -#endif - diff --git a/lpvm.c b/lpvm.c deleted file mode 100644 index b7ae631..0000000 --- a/lpvm.c +++ /dev/null @@ -1,499 +0,0 @@ -/* -** $Id: lpvm.c $ -** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license) -*/ - -#include -#include - - -#include "lua.h" -#include "lauxlib.h" - -#include "lpcap.h" -#include "lptypes.h" -#include "lpvm.h" -#include "lpprint.h" - - -/* initial size for call/backtrack stack */ -#if !defined(INITBACK) -#define INITBACK MAXBACK -#endif - - -#define getoffset(p) (((p) + 1)->offset) - -static const Instruction giveup = {{IGiveup, 0, 0}}; - - -/* -** Decode one UTF-8 sequence, returning NULL if byte sequence is invalid. -*/ -static const char *utf8_decode (const char *o, int *val) { - static const unsigned int limits[] = {0xFF, 0x7F, 0x7FF, 0xFFFFu}; - const unsigned char *s = (const unsigned char *)o; - unsigned int c = s[0]; /* first byte */ - unsigned int res = 0; /* final result */ - if (c < 0x80) /* ascii? */ - res = c; - else { - int count = 0; /* to count number of continuation bytes */ - while (c & 0x40) { /* still have continuation bytes? */ - int cc = s[++count]; /* read next byte */ - if ((cc & 0xC0) != 0x80) /* not a continuation byte? */ - return NULL; /* invalid byte sequence */ - res = (res << 6) | (cc & 0x3F); /* add lower 6 bits from cont. byte */ - c <<= 1; /* to test next bit */ - } - res |= (c & 0x7F) << (count * 5); /* add first byte */ - if (count > 3 || res > 0x10FFFFu || res <= limits[count]) - return NULL; /* invalid byte sequence */ - s += count; /* skip continuation bytes read */ - } - *val = res; - return (const char *)s + 1; /* +1 to include first byte */ -} - - -/* -** {====================================================== -** Virtual Machine -** ======================================================= -*/ - - -typedef struct Stack { - const char *s; /* saved position (or NULL for calls) */ - const Instruction *p; /* next instruction */ - int caplevel; - byte labenv; /* labeled failure */ - byte predchoice; /* labeled failure */ -} Stack; - - -#define getstackbase(L, ptop) ((Stack *)lua_touserdata(L, stackidx(ptop))) - - -/* -** Ensures the size of array 'capture' (with size '*capsize' and -** 'captop' elements being used) is enough to accomodate 'n' extra -** elements plus one. (Because several opcodes add stuff to the capture -** array, it is simpler to ensure the array always has at least one free -** slot upfront and check its size later.) -*/ -static Capture *growcap (lua_State *L, Capture *capture, int *capsize, - int captop, int n, int ptop) { - if (*capsize - captop > n) - return capture; /* no need to grow array */ - else { /* must grow */ - Capture *newc; - int newsize = captop + n + 1; /* minimum size needed */ - if (newsize < INT_MAX/((int)sizeof(Capture) * 2)) - newsize *= 2; /* twice that size, if not too big */ - else if (newsize >= INT_MAX/((int)sizeof(Capture))) - luaL_error(L, "too many captures"); - newc = (Capture *)lua_newuserdata(L, newsize * sizeof(Capture)); - memcpy(newc, capture, captop * sizeof(Capture)); - *capsize = newsize; - lua_replace(L, caplistidx(ptop)); - return newc; - } -} - - -/* -** Double the size of the stack -*/ -static Stack *doublestack (lua_State *L, Stack **stacklimit, int ptop) { - Stack *stack = getstackbase(L, ptop); - Stack *newstack; - int n = *stacklimit - stack; /* current stack size */ - int max, newn; - lua_getfield(L, LUA_REGISTRYINDEX, MAXSTACKIDX); - max = lua_tointeger(L, -1); /* maximum allowed size */ - lua_pop(L, 1); - if (n >= max) /* already at maximum size? */ - luaL_error(L, "backtrack stack overflow (current limit is %d)", max); - newn = 2 * n; /* new size */ - if (newn > max) newn = max; - newstack = (Stack *)lua_newuserdata(L, newn * sizeof(Stack)); - memcpy(newstack, stack, n * sizeof(Stack)); - lua_replace(L, stackidx(ptop)); - *stacklimit = newstack + newn; - return newstack + n; /* return next position */ -} - - -/* -** Interpret the result of a dynamic capture: false -> fail; -** true -> keep current position; number -> next position. -** Return new subject position. 'fr' is stack index where -** is the result; 'curr' is current subject position; 'limit' -** is subject's size. -*/ -static int resdyncaptures (lua_State *L, int fr, int curr, int limit) { - lua_Integer res; - if (!lua_toboolean(L, fr)) { /* false value? */ - lua_settop(L, fr - 1); /* remove results */ - return -1; /* and fail */ - } - else if (lua_isboolean(L, fr)) /* true? */ - res = curr; /* keep current position */ - else { - res = lua_tointeger(L, fr) - 1; /* new position */ - if (res < curr || res > limit) - luaL_error(L, "invalid position returned by match-time capture"); - } - lua_remove(L, fr); /* remove first result (offset) */ - return res; -} - - -/* -** Add capture values returned by a dynamic capture to the list -** 'capture', nested inside a group. 'fd' indexes the first capture -** value, 'n' is the number of values (at least 1). The open group -** capture is already in 'capture', before the place for the new entries. -*/ -static void adddyncaptures (const char *s, Capture *capture, int n, int fd) { - int i; - assert(capture[-1].kind == Cgroup && capture[-1].siz == 0); - capture[-1].idx = 0; /* make group capture an anonymous group */ - for (i = 0; i < n; i++) { /* add runtime captures */ - capture[i].kind = Cruntime; - capture[i].siz = 1; /* mark it as closed */ - capture[i].idx = fd + i; /* stack index of capture value */ - capture[i].s = s; - } - capture[n].kind = Cclose; /* close group */ - capture[n].siz = 1; - capture[n].s = s; -} - - -/* -** Remove dynamic captures from the Lua stack (called in case of failure) -*/ -static int removedyncap (lua_State *L, Capture *capture, - int level, int last) { - int id = finddyncap(capture + level, capture + last); /* index of 1st cap. */ - int top = lua_gettop(L); - if (id == 0) return 0; /* no dynamic captures? */ - lua_settop(L, id - 1); /* remove captures */ - return top - id + 1; /* number of values removed */ -} - - -/* -** Opcode interpreter -*/ -const char *match (lua_State *L, const char *o, const char *s, const char *e, - Instruction *op, Capture *capture, int ptop, short *labelf, const char **sfail) { /* labeled failure */ - Stack stackbase[INITBACK]; - Stack *stacklimit = stackbase + INITBACK; - Stack *stack = stackbase; /* point to first empty slot in stack */ - int capsize = INITCAPSIZE; - int captop = 0; /* point to first empty slot in captures */ - int ndyncap = 0; /* number of dynamic captures (in Lua stack) */ - const Instruction *p = op; /* current instruction */ - byte insidepred = OUTPRED; /* labeled failure: label environment is off inside predicates */ - stack->p = &giveup; stack->s = s; stack->caplevel = 0; stack->labenv = insidepred; stack++; /* labeled failure */ - *sfail = s; /* labeled failure */ - lua_pushlightuserdata(L, stackbase); - for (;;) { -#if defined(DEBUG) - printf("-------------------------------------\n"); - printcaplist(capture, capture + captop); - printf("s: |%s| stck:%d, dyncaps:%d, caps:%d ", - s, (int)(stack - getstackbase(L, ptop)), ndyncap, captop); - printinst(op, p); -#endif - assert(stackidx(ptop) + ndyncap == lua_gettop(L) && ndyncap <= captop); - assert(insidepred == INPRED || insidepred == OUTPRED); - switch ((Opcode)p->i.code) { - case IEnd: { - assert(stack == getstackbase(L, ptop) + 1); - capture[captop].kind = Cclose; - capture[captop].s = NULL; - return s; - } - case IGiveup: { - assert(stack == getstackbase(L, ptop)); - return NULL; - } - case IRet: { - assert(stack > getstackbase(L, ptop) && (stack - 1)->s == NULL); - p = (--stack)->p; - continue; - } - case IAny: { - if (s < e) { p++; s++; } - else { - *labelf = LFAIL; /* labeled failure */ - updatefarthest(*sfail, s); /*labeled failure */ - goto fail; - } - continue; - } - case IUTFR: { - int codepoint; - if (s >= e) - goto fail; - s = utf8_decode (s, &codepoint); - if (s && p[1].offset <= codepoint && codepoint <= utf_to(p)) - p += 2; - else { - *labelf = LFAIL; /* labeled failure */ - updatefarthest(*sfail, s); /*labeled failure */ - goto fail; - } - continue; - } - case ITestAny: { - if (s < e) p += 2; - else p += getoffset(p); - continue; - } - case IChar: { - if ((byte)*s == p->i.aux && s < e) { p++; s++; } - else { - *labelf = LFAIL; /* labeled failure */ - updatefarthest(*sfail, s); /*labeled failure */ - goto fail; - } - continue; - } - case ITestChar: { - if ((byte)*s == p->i.aux && s < e) p += 2; - else p += getoffset(p); - continue; - } - case ISet: { - int c = (byte)*s; - if (testchar((p+1)->buff, c) && s < e) - { p += CHARSETINSTSIZE; s++; } - else { - *labelf = LFAIL; /* labeled failure */ - updatefarthest(*sfail, s); /*labeled failure */ - goto fail; - } - continue; - } - case ITestSet: { - int c = (byte)*s; - if (testchar((p + 2)->buff, c) && s < e) - p += 1 + CHARSETINSTSIZE; - else p += getoffset(p); - continue; - } - case IBehind: { - int n = p->i.aux; - if (n > s - o) { - *labelf = LFAIL; /* labeled failure */ - updatefarthest(*sfail, s); /*labeled failure */ - goto fail; - } - s -= n; p++; - continue; - } - case ISpan: { - for (; s < e; s++) { - int c = (byte)*s; - if (!testchar((p+1)->buff, c)) break; - } - p += CHARSETINSTSIZE; - continue; - } - case IJmp: { - p += getoffset(p); - continue; - } - case IChoice: { - if (stack == stacklimit) - stack = doublestack(L, &stacklimit, ptop); - stack->p = p + getoffset(p); - stack->s = s; - stack->caplevel = captop; - stack->labenv = insidepred; /* labeled failure */ - stack->predchoice = 0; /* labeled failure */ - stack++; - p += 2; - continue; - } - case IPredChoice: { /* labeled failure: new instruction */ - if (stack == stacklimit) - stack = doublestack(L, &stacklimit, ptop); - stack->p = p + getoffset(p); - stack->s = s; - stack->caplevel = captop; - stack->labenv = insidepred; - stack->predchoice = 1; - stack++; - insidepred = INPRED; - p += 2; - continue; - } - case ICall: { - if (stack == stacklimit) - stack = doublestack(L, &stacklimit, ptop); - stack->s = NULL; - stack->p = p + 2; /* save return address */ - stack++; - p += getoffset(p); - continue; - } - case ICommit: { - assert(stack > getstackbase(L, ptop) && (stack - 1)->s != NULL); - stack--; - p += getoffset(p); - continue; - } - case IPartialCommit: { - assert(stack > getstackbase(L, ptop) && (stack - 1)->s != NULL); - (stack - 1)->s = s; - (stack - 1)->caplevel = captop; - p += getoffset(p); - continue; - } - case IBackCommit: { - assert(stack > getstackbase(L, ptop) && (stack - 1)->s != NULL); - s = (--stack)->s; - insidepred = stack->labenv; /* labeled failure */ - if (ndyncap > 0) /* are there matchtime captures? */ - ndyncap -= removedyncap(L, capture, stack->caplevel, captop); - captop = stack->caplevel; - p += getoffset(p); - continue; - } - case IThrow: { /* labeled failure */ - if (insidepred == OUTPRED) { - *labelf = (p+1)->i.key; - stack = getstackbase(L, ptop); - stack++; - } - else { - while (!(stack-1)->predchoice) { - --stack; - } - *labelf = LFAIL; - } - *sfail = s; - goto fail; - } - case IThrowRec: { /* labeled failure */ - if (insidepred == OUTPRED) { - *labelf = (p+2)->i.key; - *sfail = s; - if (stack == stacklimit) - stack = doublestack(L, &stacklimit, ptop); - stack->s = NULL; - stack->p = p + 3; - stack->caplevel = captop; - stack++; - p += getoffset(p); - continue; - } else { - while (!(stack-1)->predchoice) { - --stack; - } - *labelf = LFAIL; - *sfail = s; - } - goto fail; - } - case IFailTwice: - assert(stack > getstackbase(L, ptop)); - stack--; - /* go through */ - case IFail: - *labelf = LFAIL; /* labeled failure */ - updatefarthest(*sfail, s); /*labeled failure */ - fail: { /* pattern failed: try to backtrack */ - do { /* remove pending calls */ - assert(stack > getstackbase(L, ptop)); - s = (--stack)->s; - } while (s == NULL); - if (ndyncap > 0) /* is there matchtime captures? */ - ndyncap -= removedyncap(L, capture, stack->caplevel, captop); - captop = stack->caplevel; - assert((insidepred == INPRED && stack->labenv == OUTPRED) || insidepred == stack->labenv); - insidepred = stack->labenv; /* labeled failure */ - p = stack->p; -#if defined(DEBUG) - printf("**FAIL**\n"); -#endif - continue; - } - case ICloseRunTime: { - CapState cs; - int rem, res, n; - int fr = lua_gettop(L) + 1; /* stack index of first result */ - cs.reclevel = 0; cs.L = L; - cs.s = o; cs.ocap = capture; cs.ptop = ptop; - n = runtimecap(&cs, capture + captop, s, &rem); /* call function */ - captop -= n; /* remove nested captures */ - ndyncap -= rem; /* update number of dynamic captures */ - fr -= rem; /* 'rem' items were popped from Lua stack */ - res = resdyncaptures(L, fr, s - o, e - o); /* get result */ - if (res == -1) { /* fail? */ - *labelf = LFAIL; /* labeled failure */ - updatefarthest(*sfail, s); /*labeled failure */ - goto fail; - } - s = o + res; /* else update current position */ - n = lua_gettop(L) - fr + 1; /* number of new captures */ - ndyncap += n; /* update number of dynamic captures */ - if (n == 0) /* no new captures? */ - captop--; /* remove open group */ - else { /* new captures; keep original open group */ - if (fr + n >= SHRT_MAX) - luaL_error(L, "too many results in match-time capture"); - /* add new captures + close group to 'capture' list */ - capture = growcap(L, capture, &capsize, captop, n + 1, ptop); - adddyncaptures(s, capture + captop, n, fr); - captop += n + 1; /* new captures + close group */ - } - p++; - continue; - } - case ICloseCapture: { - const char *s1 = s; - assert(captop > 0); - /* if possible, turn capture into a full capture */ - if (capture[captop - 1].siz == 0 && - s1 - capture[captop - 1].s < UCHAR_MAX) { - capture[captop - 1].siz = s1 - capture[captop - 1].s + 1; - p++; - continue; - } - else { - capture[captop].siz = 1; /* mark entry as closed */ - capture[captop].s = s; - goto pushcapture; - } - } - case IOpenCapture: - capture[captop].siz = 0; /* mark entry as open */ - capture[captop].s = s; - goto pushcapture; - case IFullCapture: - capture[captop].siz = getoff(p) + 1; /* save capture size */ - capture[captop].s = s - getoff(p); - /* goto pushcapture; */ - pushcapture: { - capture[captop].idx = p->i.key; - capture[captop].kind = getkind(p); - captop++; - capture = growcap(L, capture, &capsize, captop, 0, ptop); - p++; - continue; - } - default: assert(0); return NULL; - } - } -} - -/* }====================================================== */ - - diff --git a/lpvm.h b/lpvm.h deleted file mode 100644 index a056701..0000000 --- a/lpvm.h +++ /dev/null @@ -1,66 +0,0 @@ -/* -** $Id: lpvm.h $ -*/ - -#if !defined(lpvm_h) -#define lpvm_h - -#include "lpcap.h" - - -/* Virtual Machine's instructions */ -typedef enum Opcode { - IAny, /* if no char, fail */ - IChar, /* if char != aux, fail */ - ISet, /* if char not in buff, fail */ - ITestAny, /* in no char, jump to 'offset' */ - ITestChar, /* if char != aux, jump to 'offset' */ - ITestSet, /* if char not in buff, jump to 'offset' */ - ISpan, /* read a span of chars in buff */ - IUTFR, /* if codepoint not in range [offset, utf_to], fail */ - IBehind, /* walk back 'aux' characters (fail if not possible) */ - IRet, /* return from a rule */ - IEnd, /* end of pattern */ - IChoice, /* stack a choice; next fail will jump to 'offset' */ - IPredChoice, /* labeld failure: stack a choice; changes label env next fail will jump to 'offset' */ /*labeled failure */ - IJmp, /* jump to 'offset' */ - ICall, /* call rule at 'offset' */ - IOpenCall, /* call rule number 'key' (must be closed to a ICall) */ - ICommit, /* pop choice and jump to 'offset' */ - IPartialCommit, /* update top choice to current position and jump */ - IBackCommit, /* backtrack like "fail" but jump to its own 'offset' */ - IFailTwice, /* pop one choice and then fail */ - IFail, /* go back to saved state on choice and jump to saved offset */ - IGiveup, /* internal use */ - IFullCapture, /* complete capture of last 'off' chars */ - IOpenCapture, /* start a capture */ - ICloseCapture, - ICloseRunTime, - IThrow, /* fails with a given label */ /*labeled failure */ - IThrowRec, /* fails with a given label and call rule at 'offset' */ /*labeled failure */ - IEmpty /* to fill empty slots left by optimizations */ -} Opcode; - - - -typedef union Instruction { - struct Inst { - byte code; - byte aux; - short key; - } i; - int offset; - byte buff[1]; -} Instruction; - - -/* extract 24-bit value from an instruction */ -#define utf_to(inst) (((inst)->i.key << 8) | (inst)->i.aux) - - -LUAI_FUNC void printpatt (Instruction *p, int n); -LUAI_FUNC const char *match (lua_State *L, const char *o, const char *s, const char *e, - Instruction *op, Capture *capture, int ptop, short *labelf, const char **sfail); /* labeled failure */ - -#endif - diff --git a/makefile b/makefile index c06ab86..bf4aa80 100644 --- a/makefile +++ b/makefile @@ -25,7 +25,7 @@ CWARNS = -Wall -Wextra -pedantic \ CFLAGS = $(CWARNS) $(COPT) -std=c99 -I$(LUADIR) -fPIC CC = gcc -FILES = lpvm.o lpcap.o lptree.o lpcode.o lpprint.o +FILES = lplvm.o lplcap.o lpltree.o lplcode.o lplprint.o # For Linux linux: make lpeglabel.so "DLLFLAGS = -shared -fPIC" @@ -54,9 +54,9 @@ clean: rm -f $(FILES) lpeglabel.so -lpcap.o: lpcap.c lpcap.h lptypes.h -lpcode.o: lpcode.c lptypes.h lpcode.h lptree.h lpvm.h lpcap.h -lpprint.o: lpprint.c lptypes.h lpprint.h lptree.h lpvm.h lpcap.h -lptree.o: lptree.c lptypes.h lpcap.h lpcode.h lptree.h lpvm.h lpprint.h -lpvm.o: lpvm.c lpcap.h lptypes.h lpvm.h lpprint.h lptree.h +lplcap.o: lplcap.c lplcap.h lpltypes.h +lplcode.o: lplcode.c lpltypes.h lplcode.h lpltree.h lplvm.h lplcap.h +lplprint.o: lplprint.c lpltypes.h lplprint.h lpltree.h lplvm.h lplcap.h +lpltree.o: lpltree.c lpltypes.h lplcap.h lplcode.h lpltree.h lplvm.h lplprint.h +lplvm.o: lplvm.c lplcap.h lpltypes.h lplvm.h lplprint.h lpltree.h -- cgit v1.2.3-55-g6feb