Removing labeled choice, updating testlabel, and disabling an optmization related to Throw

author: Sergio Queiroz <sqmedeiros@gmail.com> 2016-11-10 16:26:11 -0300
committer: Sergio Queiroz <sqmedeiros@gmail.com> 2016-11-10 16:26:11 -0300
commit: fd28f9d9e54f33bf7ae3a5e12dc71478f9c91aea (patch)
tree: 875ab38000e52376583bc13741b18701c6294f80
parent: d84dd6b3659f94b09e67eb90a10e71eb05c5630e (diff)
download: lpeglabel-fd28f9d9e54f33bf7ae3a5e12dc71478f9c91aea.tar.gz
lpeglabel-fd28f9d9e54f33bf7ae3a5e12dc71478f9c91aea.tar.bz2
lpeglabel-fd28f9d9e54f33bf7ae3a5e12dc71478f9c91aea.zip
9 files changed, 348 insertions, 288 deletions
diff --git a/lpcode.c b/lpcode.c
index d5f8d68..b2dbba2 100644
--- a/lpcode.c
+++ b/lpcode.c
@@ -196,7 +196,7 @@ int checkaux (TTree *tree, int pred) {
      if (checkaux(sib2(tree), pred)) return 1;
      /* else return checkaux(sib1(tree), pred); */
      tree = sib1(tree); goto tailcall;
-                case TLabChoice: case TRecov: /* labeled failure */
+                case TRecov: /* labeled failure */
     /* we do not know whether sib2 will be evaluated */
     tree = sib1(tree); goto tailcall;
    case TCapture: case TGrammar: case TRule:
@@ -218,10 +218,10 @@ int fixedlenx (TTree *tree, int count, int len) {
  switch (tree->tag) {
    case TChar: case TSet: case TAny:
      return len + 1;
-    case TFalse: case TTrue: case TNot: case TAnd: case TBehind:
+    case TFalse: case TTrue: case TNot: case TAnd: case TBehind: 
-    case TThrow: /* labeled failure */
      return len;
    case TRep: case TRunTime: case TOpenCall:
+    case TThrow: /* labeled failure */
      return -1;
    case TCapture: case TRule: case TGrammar:
      /* return fixedlenx(sib1(tree), count); */
@@ -237,7 +237,7 @@ int fixedlenx (TTree *tree, int count, int len) {
      /* else return fixedlenx(sib2(tree), count, len); */
      tree = sib2(tree); goto tailcall;
    }
-    case TChoice: case TLabChoice: {  /* labeled failure */
+    case TChoice: {  
      int n1, n2;
      n1 = fixedlenx(sib1(tree), count, len);
      if (n1 < 0) return -1;
@@ -287,7 +287,7 @@ static int getfirst (TTree *tree, const Charset *follow, Charset *firstset) {
      loopset(i, firstset->cs[i] = follow->cs[i]); /* follow = fullset(?) */
      return 1;
    } 
-    case TChoice: case TLabChoice: {  /*(?) labeled failure */
+    case TChoice: {  
      Charset csaux;
      int e1 = getfirst(sib1(tree), follow, firstset);
      int e2 = getfirst(sib2(tree), follow, &csaux);
@@ -378,7 +378,7 @@ static int headfail (TTree *tree) {
      if (!nofail(sib2(tree))) return 0;
      /* else return headfail(sib1(tree)); */
      tree = sib1(tree); goto tailcall;
-    case TChoice: case TLabChoice: case TRecov: /* labeled failure */
+    case TChoice:  case TRecov: /* labeled failure */
      if (!headfail(sib1(tree))) return 0;
      /* else return headfail(sib2(tree)); */
      tree = sib2(tree); goto tailcall;
@@ -398,7 +398,7 @@ static int needfollow (TTree *tree) {
    case TChar: case TSet: case TAny:
    case TFalse: case TTrue: case TAnd: case TNot:
    case TRunTime: case TGrammar: case TCall: case TBehind:
-    case TThrow: case TLabChoice: case TRecov: /* (?)labeled failure */
+    case TThrow: case TRecov: /* (?)labeled failure */
      return 0;
    case TChoice: case TRep:
      return 1;
@@ -433,7 +433,7 @@ int sizei (const Instruction *i) {
      return 2;
    case IThrow:  /* labeled failure */ 
                        return 1;
-    case ILabChoice: case IRecov: 
+   case IRecov: 
      return (CHARSETINSTSIZE - 1) + 2; /* labeled failure */
    default: return 1;
  }
@@ -499,7 +499,7 @@ static int addoffsetinst (CompileState *compst, Opcode op) {
  int i = addinstruction(compst, op, 0);  /* instruction */
  addinstruction(compst, (Opcode)0, 0);  /* open space for offset */
  assert(op == ITestSet || sizei(&getinstr(compst, i)) == 2 || 
-        op == IRecov || op == ILabChoice); /* labeled failure */
+        op == IRecov); /* labeled failure */
  return i;
 }
@@ -707,21 +707,6 @@ static void codechoice (CompileState *compst, TTree *p1, TTree *p2, int opt,
 /* labeled failure begin */
-static void codelabchoice (CompileState *compst, TTree *p1, TTree *p2, int opt,
-                        const Charset *fl, const byte *cs) {
-        int emptyp2 = (p2->tag == TTrue);
-                int pcommit;
-    int test = NOINST;
-                int pchoice = addoffsetinst(compst, ILabChoice);
-                addcharset(compst, cs);
-    codegen(compst, p1, emptyp2, test, fullset);
-    pcommit = addoffsetinst(compst, ICommit);
-    jumptohere(compst, pchoice);
-    jumptohere(compst, test);
-    codegen(compst, p2, opt, NOINST, fl);
-    jumptohere(compst, pcommit);
-}
 static void coderecovery (CompileState *compst, TTree *p1, TTree *p2, int opt,
                        const Charset *fl, const byte *cs) {
        int emptyp2 = (p2->tag == TTrue);
@@ -970,10 +955,6 @@ static void codegen (CompileState *compst, TTree *tree, int opt, int tt,
      addinstruction(compst, IThrow, (byte) tree->u.label);
                        break;
                }
-                case TLabChoice: { /* labeled failure */
-                        codelabchoice(compst, sib1(tree), sib2(tree), opt, fl, treelabelset(tree)); 
-                        break;
-                }
                case TRecov: { /* labeled failure */
                        coderecovery(compst, sib1(tree), sib2(tree), opt, fl, treelabelset(tree)); 
                        break;
@@ -1000,7 +981,7 @@ static void peephole (CompileState *compst) {
    switch (code[i].i.code) {
      case IChoice: case ICall: case ICommit: case IPartialCommit:
      case IBackCommit: case ITestChar: case ITestSet:
-      case ILabChoice: case IRecov: /* labeled failure */
+      case IRecov: /* labeled failure */
      case ITestAny: {  /* instructions with labels */
        jumptothere(compst, i, finallabel(code, i));  /* optimize label */
        break;
diff --git a/lpprint.c b/lpprint.c
index 122d2e5..ff69a6a 100644
--- a/lpprint.c
+++ b/lpprint.c
@@ -112,7 +112,7 @@ void printinst (const Instruction *op, const Instruction *p) {
      printf("%d", p->i.aux);
      break;
    }
-                case ILabChoice: case IRecov: { /* labeled failure */
+                case IRecov: { /* labeled failure */
      printjmp(op, p);
      printcharset((p+2)->buff);
      break;
@@ -223,7 +223,7 @@ void printtree (TTree *tree, int ident) {
    default: {
      int sibs = numsiblings[tree->tag];
      printf("\n");
-      if (tree->tag == TLabChoice || tree->tag == TRecov) { /* labeled failure */
+      if (tree->tag == TRecov) { /* labeled failure */
        printcharset(treelabelset(tree));
        printf("\n");
                        }
diff --git a/lptree.c b/lptree.c
index 700398b..9861cfe 100644
--- a/lptree.c
+++ b/lptree.c
@@ -723,37 +723,25 @@ static int lp_behind (lua_State *L) {
 */
 static int lp_throw (lua_State *L) {
        int label = luaL_checkinteger(L, -1);
-        luaL_argcheck(L, label >= 0 && label < MAXLABELS, -1, "the number of a label must be between 0 and 255");
+        luaL_argcheck(L, label >= 1 && label < MAXLABELS, -1, "the number of a label must be between 1 and 255");
        newthrowleaf(L, label);
        return 1;
 }
 /*
-** labeled choice function
+** labeled recovery function
 */
-static int lp_labchoice (lua_State *L) {
-        int n = lua_gettop(L);
-        TTree *tree = newrootlab2sib(L, TLabChoice);
-        int i;
-        for (i = 3; i <= n; i++) {
-                int d = luaL_checkinteger(L, i);
-                luaL_argcheck(L, d >= 0 && d < MAXLABELS, i, "the number of a label must be between 0 and 255");
-    setlabel(treelabelset(tree), (byte)d);
-        }
-  return 1;
-}
 static int lp_recovery (lua_State *L) {
        int n = lua_gettop(L);
        TTree *tree = newrootlab2sib(L, TRecov);
+  luaL_argcheck(L, n >= 3, 3, "non-nil value expected");
  if (n == 2) {  /* catches fail as default */
                /*setlabel(treelabelset(tree), LFAIL);  recovery does not catch regular fail */
  } else {
                int i;
                for (i = 3; i <= n; i++) {
                        int d = luaL_checkinteger(L, i);
-                        luaL_argcheck(L, d >= 0 && d < MAXLABELS, i, "the number of a label must be between 0 and 255");
+                        luaL_argcheck(L, d >= 1 && d < MAXLABELS, i, "the number of a label must be between 1 and 255");
        setlabel(treelabelset(tree), (byte)d);
                }
        }
@@ -1089,7 +1077,7 @@ static int verifyrule (lua_State *L, TTree *tree, int *passed, int npassed,
        return nb;
      /* else return verifyrule(L, sib2(tree), passed, npassed, nb); */
      tree = sib2(tree); goto tailcall;
-    case TChoice: case TLabChoice: case TRecov: /* must check both children */  /* labeled failure */
+    case TChoice:  case TRecov: /* must check both children */  /* labeled failure */
      nb = verifyrule(L, sib1(tree), passed, npassed, nb);
      /* return verifyrule(L, sib2(tree), passed, npassed, nb); */
      tree = sib2(tree); goto tailcall;
@@ -1342,7 +1330,6 @@ static struct luaL_Reg pattreg[] = {
  {"setmaxstack", lp_setmax},
  {"type", lp_type},
  {"T", lp_throw}, /* labeled failure throw */
-  {"Lc", lp_labchoice}, /* labeled failure choice */
  {"Rec", lp_recovery}, /* labeled failure choice */
  {NULL, NULL}
 };
diff --git a/lptree.h b/lptree.h
index cca346e..b75f323 100644
--- a/lptree.h
+++ b/lptree.h
@@ -25,7 +25,7 @@ typedef enum TTag {
  TBehind,  /* match behind */
  TCapture,  /* regular capture */
  TRunTime,  /* run-time capture */
-  TThrow, TLabChoice, TRecov     /* labeled failure */
+  TThrow, TRecov     /* labeled failure */
 } TTag;
 /* number of siblings for each tree */
diff --git a/lpvm.c b/lpvm.c
index b200dce..122c8f4 100644
--- a/lpvm.c
+++ b/lpvm.c
@@ -274,17 +274,6 @@ const char *match (lua_State *L, const char *o, const char *s, const char *e,
        p += 2;
        continue;
      }
-      case ILabChoice: { /* labeled failure */
-        if (stack == stacklimit)
-          stack = doublestack(L, &stacklimit, ptop);
-        stack->p = p + getoffset(p);
-        stack->s = s;
-        stack->ls = (const Labelset *) ((p + 2)->buff);
-        stack->caplevel = captop;
-        stack++;
-        p += (CHARSETINSTSIZE - 1) + 2;
-        continue;
-      }
                        case IRecov: { /* labeled failure */
        if (stack == stacklimit)
          stack = doublestack(L, &stacklimit, ptop);
diff --git a/lpvm.h b/lpvm.h
index 3c27027..bcfc22f 100644
--- a/lpvm.h
+++ b/lpvm.h
@@ -35,7 +35,6 @@ typedef enum Opcode {
  ICloseCapture,
  ICloseRunTime,
  IThrow,   /* "fails" with a specific label labeled failure */
-  ILabChoice,   /* labeled choice */
  IRecov    /* stack a recovery; next fail with label 'f' will jump to 'offset'  */
 } Opcode;
diff --git a/relabel.lua b/relabelrec.lua
index 0e7195e..16ca7f0 100644
--- a/relabel.lua
+++ b/relabelrec.lua
@@ -82,15 +82,9 @@ for i, err in ipairs(errinfo) do
  labels[err[1]] = i
 end
-local syntaxerrs = {}
 local function expect (pattern, labelname)
  local label = labels[labelname]
-  local record = function (input, pos)
+  return pattern + m.T(label)
-    tinsert(syntaxerrs, { label = label, pos = pos })
-    return true
-  end
-  return pattern + m.Cmt("", record) * m.T(label)
 end
@@ -222,86 +216,34 @@ local function NT (n, b)
  end
 end
-local function labchoice (...)
+local function choicerec (...)
  local t = { ... }
  local n = #t
  local p = t[1]
  local i = 2
  while i + 1 <= n do
    -- t[i] == nil when there are no labels
-    p = t[i] and mm.Lc(p, t[i+1], unpack(t[i])) or mt.__add(p, t[i+1])
+    p = t[i] and mm.Rec(p, t[i+1], unpack(t[i])) or mt.__add(p, t[i+1])
    i = i + 2
  end
  return p
 end
-- error recovery
-local skip = m.P { "Skip",
-  Skip = (-m.P"/" * -m.P(name * arrow) * m.V"Ignored")^0 * m.Cc(dummy);
-  Ignored = m.V"Group" + any;
-  Group = "(" * (-m.P")" * m.V"Ignored")^0 * ")"
-        + "{" * (-m.P"}" * m.V"Ignored")^0 * "}"
-        + "[" * (-m.P"]" * m.V"Ignored")^0 * "]"
-        + "'" * (-m.P"'" * m.V"Ignored")^0 * "'"
-        + '"' * (-m.P'"' * m.V"Ignored")^0 * '"';
-}
-local ignore = m.Cmt(any, function (input, pos)
-  return syntaxerrs[#syntaxerrs].pos, dummy
-end)
-local pointAtStart = m.Cmt(any, function (input, pos)
-  -- like ignore but makes the last syntax error point at the start
-  local ret = syntaxerrs[#syntaxerrs].pos
-  syntaxerrs[#syntaxerrs].pos = pos-1
-  return ret, dummy
-end)
-local function labify (labelnames)
-  for i, l in ipairs(labelnames) do
-    labelnames[i] = labels[l]
-  end
-  return labelnames
-end
-local labelset1 = labify {
-  "ExpPatt2", "ExpPatt3",
-  "ExpPatt4", "ExpPatt5", "ExpPatt6", "ExpPatt7",
-  "ExpPatt8", "ExpPattOrClose",
-  "ExpNum", "ExpCap",
-  "ExpName1", "ExpName2", "ExpName3",
-  "ExpNameOrLab", "ExpItem",
-  "MisClose6", "MisClose7"
-}
-local labelset2 = labify {
-  "MisClose1", "MisClose2", "MisClose3", "MisClose4", "MisClose5"
-}
-local labelset3 = labify {
-  "ExpPatt1", "ExpLab1", "ExpLab2", "MisClose7"
-}
 local exp = m.P{ "Exp",
  Exp = S * ( m.V"Grammar"
-            + (m.V"RecovSeq" * (S * "/" * m.Lc((m.Ct(m.V"Labels") + m.Cc(nil))
+            + (m.V"Seq" * (S * (("//" * m.Ct(m.V"Labels")) + ("/" * m.Cc(nil)))
-                                                * expect(S * m.V"RecovSeq",
+                                   * expect(S * m.V"Seq", "ExpPatt1")
-                                                    "ExpPatt1"),
-                                               m.Cc(nil) * skip,
-                                               unpack(labelset3))
                            )^0
-              ) / labchoice);
+              ) / choicerec);
  Labels = m.P"{" * expect(S * m.V"Label", "ExpLab1")
           * (S * "," * expect(S * m.V"Label", "ExpLab2"))^0
           * expect(S * "}", "MisClose7");
-  RecovSeq = m.Lc(m.V"Seq", skip, unpack(labelset1));
  Seq = m.Cf(m.Cc(m.P"") * m.V"Prefix" * (S * m.V"Prefix")^0, mt.__mul);
  Prefix = "&" * expect(S * m.V"Prefix", "ExpPatt2") / mt.__len
         + "!" * expect(S * m.V"Prefix", "ExpPatt3") / mt.__unm
         + m.V"Suffix";
-  Suffix = m.Cf(m.V"RecovPrimary" *
+  Suffix = m.Cf(m.V"Primary" *
          ( S * ( m.P"+" * m.Cc(1, mt.__pow)
                + m.P"*" * m.Cc(0, mt.__pow)
                + m.P"?" * m.Cc(-1, mt.__pow)
@@ -318,13 +260,13 @@ local exp = m.P{ "Exp",
                           "ExpName1")
                )
          )^0, function (a,b,f) return f(a,b) end );
-  RecovPrimary = m.Lc(m.V"Primary", ignore, unpack(labelset2));
  Primary = "(" * expect(m.V"Exp", "ExpPatt4") * expect(S * ")", "MisClose1")
-          + m.Lc(String / mm.P, pointAtStart,
+          + String / mm.P
-              labels["MisTerm1"], labels["MisTerm2"])
+          + Class
-          + m.Lc(Class, pointAtStart, labels["MisClose8"])
          + defined
-          + "%" * expect(m.V"Labels", "ExpNameOrLab") / mm.T
+          + "%" * expect(m.P"{", "ExpNameOrLab")
+            * expect(S * m.V"Label", "ExpLab1")
+            * expect(S * "}", "MisClose7") / mm.T
          + "{:" * (name * ":" + m.Cc(nil)) * expect(m.V"Exp", "ExpPatt5")
            * expect(S * ":}", "MisClose2")
            / function (n, p) return mm.Cg(p, n) end
@@ -374,23 +316,20 @@ local function compile (p, defs)
  if mm.type(p) == "pattern" then return p end   -- already compiled
  p = p .. " " -- for better reporting of column numbers in errors when at EOF
  local ok, cp, label, suffix = pcall(function() return pattern:match(p, 1, defs) end)
-  if not ok and #syntaxerrs == 0 then
+  if not ok and cp then
    if type(cp) == "string" then
      cp = cp:gsub("^[^:]+:[^:]+: ", "")
    end
    error(cp, 3)
  end
-  if #syntaxerrs > 0 then
+  if not cp then
    local lines = splitlines(p)
-    local errors = {}
+    local line, col = lineno(p, #p - #suffix + 1)
-    for i, err in ipairs(syntaxerrs) do
+    local err = {}
-      local line, col = lineno(p, err.pos)
+    tinsert(err, "L" .. line .. ":C" .. col .. ": " .. errmsgs[label])
-      tinsert(errors, "L" .. line .. ":C" .. col .. ": " .. errmsgs[err.label])
+    tinsert(err, lines[line])
-      tinsert(errors, lines[line])
+    tinsert(err, rep(" ", col-1) .. "^")
-      tinsert(errors, rep(" ", col-1) .. "^")
+    error("syntax error(s) in pattern\n" .. concat(err, "\n"), 3)
-    end
-    syntaxerrs = {}
-    error("syntax error(s) in pattern\n" .. concat(errors, "\n"), 3)
  end
  return cp
 end
diff --git a/test.lua b/test.lua
index fc2b607..18ab20f 100755
--- a/test.lua
+++ b/test.lua
@@ -1110,7 +1110,7 @@ checkeq(t, {'a', 'aa', 20, 'a', 'aaa', 'aaa'})
 -- Tests for 're' module
 -------------------------------------------------------------------
-local re = require "relabel"
+local re = require "relabelrec"
 local match, compile = re.match, re.compile
diff --git a/testlabel.lua b/testlabel.lua
index 25036b2..cb92657 100644
--- a/testlabel.lua
+++ b/testlabel.lua
@@ -11,6 +11,15 @@ local function checkeqlab (x, ...)
  end
 end
+local function checkeq (x, y, p)
+if p then print(x,y) end
+  if type(x) ~= "table" then assert(x == y)
+  else
+    for k,v in pairs(x) do checkeq(v, y[k], p) end
+    for k,v in pairs(y) do checkeq(v, x[k], p) end
+  end
+end
 -- throws a label 
 p = m.T(1)
 s = "abc"
@@ -32,48 +41,97 @@ local g = m.P{
 r, l, serror = g:match(s)
 assert(r == nil and l == 1 and serror == "abc")
--[==[ TODO: labeled choice does not work anymore
-- throws a label that is not caught by labeled choice
+-- throws a label that is not caught by the recovery operator
-p = m.Lc(m.T(2), m.P"a", 1, 3)
+p = m.Rec(m.T(2), m.P"a", 1, 3)
 r, l, serror = p:match(s)
 assert(r == nil and l == 2 and serror == "abc")
-- modifies previous pattern
+-- wraps the previous pattern with a recovery that catches label "2"
-- adds another labeled choice to catch label "2"
+p = m.Rec(p, m.P"a", 2)
-p = m.Lc(p, m.P"a", 2)
 assert(p:match(s) == 2)
-- throws a label that is caught by labeled choice
+-- throws a label that is caught by recovery 
-p = m.Lc(m.T(25), m.P"a", 25)
+p = m.Rec(m.T(25), m.P"a", 25)
 assert(p:match(s) == 2)
 -- "fail" is label "0"
-- throws the "fail" label that is not caught by the labeled choice
+-- throws the "fail" label after the recovery
 s = "bola"
 r, l, serror = p:match("bola")
 assert(r == nil and l == 0 and serror == "bola")
-- labeled choice does not catch "fail" by default
+-- Recovery does not catch "fail" by default
-p = m.Lc(m.P"b", m.P"a", 1)
+p = m.Rec(m.P"b", m.P"a", 1)
 r, l, serror = p:match("abc") 
 assert(r == nil and l == 0 and serror == "abc")
 assert(p:match("bola") == 2)
-- labeled choice can catch "fail"
-p = m.Lc(m.P"b", m.P"a", 0)
+-- recovery operator catches "1" or "3"
+p = m.Rec((m.P"a" + m.T(1)) * m.T(3), (m.P"a" + m.P"b"), 1, 3)
+assert(p:match("aac") == 3)
+assert(p:match("abc") == 3)
+r, l, serror = p:match("acc")
+assert(r == nil and l == 0 and serror == "cc")
+--throws 1, recovery pattern matches 'b', throw 3, and rec pat mathces 'a'
+assert(p:match("bac") == 3)
+r, l, serror = p:match("cab")
+assert(r == nil and l == 0 and serror == "cab")
+-- associativity
+-- (p1 / %1) //{1} (p2 / %2) //{2} p3
+-- left-associativity
+-- ("a" //{1}  "b") //{2} "c"
+p = m.Rec(m.Rec(m.P"a" + m.T(1), m.P"b" + m.T(2), 1), m.P"c", 2)
 assert(p:match("abc") == 2)
-assert(p:match("bola") == 2)
+assert(p:match("bac") == 2)
+assert(p:match("cab") == 2)
+r, l, serror = p:match("dab")
+assert(r == nil and l == 0 and serror == "dab")
-- "fail" is label "0"
-- labeled choice catches "fail" or "3"
+-- righ-associativity
-p = m.Lc(m.P"a" * m.T(3), (m.P"a" + m.P"b"), 0, 3)
+-- "a" //{1}  ("b" //{2} "c")
+p = m.Rec(m.P"a" + m.T(1), m.Rec(m.P"b" + m.T(2), m.P"c", 2), 1)
 assert(p:match("abc") == 2)
 assert(p:match("bac") == 2)
+assert(p:match("cab") == 2)
+r, l, serror = p:match("dab")
+assert(r == nil and l == 0 and serror == "dab")
+-- associativity -> in this case the error thrown by p1 is only
+--                  recovered when we have a left-associative operator
+-- (p1 / %2) //{1} (p2 / %2) //{2} p3
+-- left-associativity
+-- ("a" //{1}  "b") //{2} "c"
+p = m.Rec(m.Rec(m.P"a" + m.T(2), m.P"b" + m.T(2), 1), m.P"c", 2)
+assert(p:match("abc") == 2)
+r, l, serror = p:match("bac")
+assert(r == nil and l == 0 and serror == "bac")
+assert(p:match("cab") == 2)
+r, l, serror = p:match("dab")
+assert(r == nil and l == 0 and serror == "dab")
+-- righ-associativity
+-- "a" //{1}  ("b" //{2} "c")
+p = m.Rec(m.P"a" + m.T(2), m.Rec(m.P"b" + m.T(2), m.P"c", 2), 1)
+assert(p:match("abc") == 2)
+r, l, serror = p:match("bac")
+assert(r == nil and l == 2 and serror == "bac")
 r, l, serror = p:match("cab")
-assert(r == nil and l == 0 and serror == "cab")
+assert(r == nil and l == 2 and serror == "cab")
+r, l, serror = p:match("dab")
+assert(r == nil and l == 2 and serror == "dab")
 -- tests related to predicates
 p = #m.T(1) + m.P"a"
@@ -84,74 +142,66 @@ p = ##m.T(1) + m.P"a"
 r, l, serror = p:match("abc")
 assert(r == nil and l == 1 and serror == "abc")
-p = #m.T(0) * m.P"a"
-assert(p:match("abc") == fail)
-p = #m.T(0) + m.P"a"
-assert(p:match("abc") == 2)
 p = -m.T(1) * m.P"a"
 r, l, serror = p:match("abc")
 assert(r == nil and l == 1 and serror == "abc")
+p = -m.T(1) * m.P"a"
+r, l, serror = p:match("bbc")
+assert(r == nil and l == 1 and serror == "bbc")
 p = -(-m.T(1)) * m.P"a"
 r, l, serror = p:match("abc")
 assert(r == nil and l == 1 and serror == "abc")
-p = -m.T(0) * m.P"a"
+p = m.Rec(-m.T(22), m.P"a", 22)
-assert(p:match("abc") == 2)
+r, l, serror = p:match("abc")
+assert(r == nil and l == 0 and serror == "bc")
-p = -m.T(0) + m.P"a"
+assert(p:match("bbc") == 1)
-assert(p:match("abc") == 1)
-p = -(-m.T(0)) + m.P"a"
+p = m.Rec(#m.T(22), m.P"a", 22)
-assert(p:match("abc") == 2)
+assert(p:match("abc") == 1)
-p = m.Lc(-m.T(22), m.P"a", 22)
+p = #m.Rec(m.T(22), m.P"a", 22)
-assert(p:match("abc") == 2)
+assert(p:match("abc") == 1)
-p = m.Lc(-m.T(0), m.P"a", 0)
+p = m.Rec(m.T(22), #m.P"a", 22)
 assert(p:match("abc") == 1)
-p = m.Lc(#m.T(22), m.P"a", 22)
+p = m.Rec(#m.T(22), m.P"a", 22)
-assert(p:match("abc") == 2)
+r, l, serror = p:match("bbc")
+assert(r == nil and l == 0 and serror == "bbc")
-p = m.Lc(#m.T(0), m.P"a", 0)
-assert(p:match("abc") == 2)
 -- tests related to repetition
 p = m.T(1)^0
 r, l, serror = p:match("ab")
 assert(r == nil and l == 1 and serror == "ab")
-p = m.T(0)^0
-assert(p:match("ab") == 1)
 p = (m.P"a" + m.T(1))^0
 r, l, serror = p:match("aa")
 assert(r == nil and l == 1 and serror == "")
-p = (m.P"a" + m.T(0))^0
-assert(p:match("aa") == 3)
 -- Bug reported by Matthew Allen
 -- some optmizations performed by LPeg should not be
 -- applied in case of labeled choices
-p = m.Lc(m.P"A", m.P(true), 1) + m.P("B")
+p = m.Rec(m.P"A", m.P(true), 1) + m.P("B")
 assert(p:match("B") == 2)
-p = m.Lc(m.P"A", m.P(false), 1) + m.P("B")
+p = m.Rec(m.P"A", m.P(false), 1) + m.P("B")
 assert(p:match("B") == 2)
 --[[
-S -> A /{1} 'a'
+S -> A //{1} 'a'
 A -> B
 B -> %1
 ]]
 g = m.P{
        "S",
-        S = m.Lc(m.V"A", m.P"a", 1),
+        S = m.Rec(m.V"A", m.P"a", 1),
        A = m.V"B",
        B = m.T(1),
 }
@@ -177,66 +227,96 @@ r, l, serror = g:match("a;a")
 assert(r == nil and l == 1 and serror == "")
-- %1 /{1,3} %2 /{2} 'a'
+-- %1 //{1,3} %2 //{2} 'a'
-p = m.Lc(m.Lc(m.T(1), m.T(2), 1, 3), m.P"a", 2)
+p = m.Rec(m.Rec(m.T(1), m.T(2), 1, 3), m.P"a", 2)
 assert(p:match("abc") == 2)
 r, l, serror = p:match("")
 assert(r == nil and l == 0 and serror == "")
-p = m.Lc(m.T(1), m.Lc(m.T(2), m.P"a", 2), 1, 3)
+p = m.Rec(m.T(1), m.Rec(m.T(2), m.P"a", 2), 1, 3)
 assert(p:match("abc") == 2)
 r, l, serror = p:match("")
 assert(r == nil and l == 0 and serror == "")
-]==]
+-- Infinte Loop TODO: check the semantics
+-- %1 //{1} %1 
+p = m.Rec(m.T(1), m.T(1), 1)
+--r, l, serror = p:match("ab")
+--assert(r == nil and l == 1 and serror == "ab")
+-- %1 //{1} 'a' (!. / %1) 
+p = m.Rec(m.T(1), m.P"a" * (-m.P(1) + m.T(1)), 1)
+r, l, serror = p:match("ab")
+assert(r == nil and l == 0 and serror == "b")
+r, l, serror = p:match("cd")
+assert(r == nil and l == 0 and serror == "cd")
+-- %1 //{1} . (!. / %1) 
+p = m.Rec(m.T(1), m.P(1) * (-m.P(1) + m.T(1)), 1)
+assert(p:match("abc") == 4)
 -- testing the limit of labels
-p = m.T(0)
+-- can only throw labels between 1 and 255
-s = "abc"
+local r = pcall(m.Rec, m.P"b", m.P"a", 0)
-r, l, serror = p:match(s) 
+assert(r == false)
-assert(r == nil and l == 0 and serror == "abc")
-p = m.T(255)
+local r = pcall(m.Rec, m.P"b", m.P"a", 256)
-s = "abc"
+assert(r == false)
-r, l, serror = p:match(s) 
-assert(r == nil and l == 255 and serror == "abc")
-local r = pcall(m.T, -1)
+local r = pcall(m.Rec, m.P"b", m.P"a", -1)
+assert(r == false)
+local r = pcall(m.T, 0)
 assert(r == false)
 local r = pcall(m.T, 256)
 assert(r == false)
+local r = pcall(m.T, -1)
+assert(r == false)
+local r = m.Rec(m.P"b", m.P"a", 255)
+assert(p:match("a") == 2)
+p = m.T(255)
+s = "abc"
+r, l, serror = p:match(s) 
+assert(r == nil and l == 255 and serror == "abc")
 print("+")
--[==[ TODO: labeled choice does not work anymore
 --[[ grammar based on Figure 8 of paper submitted to SCP
-S  -> S0 /{1} ID /{2} ID '=' Exp /{3} 'unsigned'* 'int' ID /{4} 'unsigned'* ID ID / %error
+S  -> S0 //{1} ID //{2} ID '=' Exp //{3} 'unsigned'* 'int' ID //{4} 'unsigned'* ID ID / %error
-S0 -> ID S1 / 'unsigned' S2 / 'int' %3
+S0 -> S1 / S2 / &'int' %3
-S1 -> '=' %2  /  !. %1  /  ID %4
+S1 -> &(ID '=') %2  /  &(ID !.) %1  /  &ID %4
-S2 -> 'unsigned' S2  /  ID %4  /  'int' %3 
+S2 -> &('unsigned'+ ID) %4  /  & ('unsigned'+ 'int') %3 
 ]]
 local sp = m.S" \t\n"^0
 local eq = sp * m.P"="
 g = m.P{
        "S",
-        S = m.Lc(
+        S = m.Rec(
-         m.Lc(
+         m.Rec(
-            m.Lc(
+            m.Rec(
-               m.Lc(m.V"S0", m.V"ID" * (m.P(1) + ""), 1),
+               m.Rec(m.V"S0", m.V"ID", 1),
               m.V"ID" * eq * m.V"Exp", 2
               ),
            m.V"U"^0 * m.V"I" * m.V"ID", 3
            ),
         m.V"U"^0 * m.V"ID" * m.V"ID", 4) 
       + m.T(5), -- error
-        S0 = m.V"ID" * m.V"S1"  +  m.V"U" * m.V"S2"  +  m.V"I" * m.T(3),
+        S0 = m.V"S1"  +  m.V"S2"  +  #m.V"I" * m.T(3),
-        S1 = eq * m.T(2) + sp * -m.P(1) * m.T(1) + m.V"ID" * m.T(4),
+        S1 = #(m.V"ID" * eq) * m.T(2) + sp * #(m.V"ID" * -m.P(1)) * m.T(1) + #m.V"ID" * m.T(4),
-        S2 = m.V"U" * m.V"S2"  +   m.V"ID" * m.T(4)  +   m.V"I" * m.T(3),
+        S2 = #(m.V"U"^1 * m.V"ID") * m.T(4)  +  #(m.V"U"^1 * m.V"I") * m.T(3),
        ID = sp * m.P"a",
        U = sp * m.P"unsigned",
        I = sp * m.P"int",
@@ -273,53 +353,59 @@ assert(r == nil and l == 5 and serror == s)
 print("+")
-local re = require 'relabel'
-g = re.compile[['a' /{4,9} [a-z]
+local re = require 'relabelrec'
+g = re.compile[['a' //{4,9} [a-z]
 ]]
 assert(g:match("a") == 2)
 r, l, serror = g:match("b")
 assert(r == nil and l == 0 and serror == "b")
-g = re.compile[['a' /{4,9} [a-f] /{5, 7} [a-z]
+g = re.compile[['a' //{4,9} [a-f] //{5, 7} [a-z]
 ]]
 assert(g:match("a") == 2)
 r, l, serror = g:match("b")
 assert(r == nil and l == 0 and serror == "b")
-g = re.compile[[%{1} /{4,9} [a-z]
+g = re.compile[[%{1} //{4,9} [a-z]
 ]]
 r, l, serror = g:match("a")
 assert(r == nil and l == 1 and serror == "a")
-g = re.compile[[%{1} /{4,1} [a-f]
+g = re.compile[[%{1} //{4,1} [a-f]
 ]]
 assert(g:match("a") == 2)
 r, l, serror = g:match("h")
 assert(r == nil and l == 0 and serror == "h")
-g = re.compile[[[a-f]%{9} /{4,9} [a-c]%{7} /{5, 7} [a-z] ]]
+g = re.compile[[[a-f]%{9} //{4,9} [a-c]%{7} //{5, 7} [a-z] ]]
-assert(g:match("a") == 2)
+r, l, serror = g:match("a")
-assert(g:match("c") == 2)
+assert(r == nil and l == 0 and serror == "")
-r, l, serror = g:match("d")
+r, l, serror = g:match("aa")
+assert(r == nil and l == 0 and serror == "")
+assert(g:match("aaa") == 4)
+r, l, serror = g:match("ad")
 assert(r == nil and l == 0 and serror == "d")
 r, l, serror = g:match("g")
 assert(r == nil and l == 0 and serror == "g")
 --[[ grammar based on Figure 8 of paper submitted to SCP
-S  -> S0 /{1} ID /{2} ID '=' Exp /{3} 'unsigned'* 'int' ID /{4} 'unsigned'* ID ID / %error
+S  -> S0 //{1} ID //{2} ID '=' Exp //{3} 'unsigned'* 'int' ID //{4} 'unsigned'* ID ID / %error
-S0 -> ID S1 / 'unsigned' S2 / 'int' %3
+S0 -> S1 / S2 / &'int' %3
-S1 -> '=' %2  /  !. %1  /  ID %4
+S1 -> &(ID '=') %2  /  &(ID !.) %1  /  &ID %4
-S2 -> 'unsigned' S2  /  ID %4  /  'int' %3 
+S2 -> &('unsigned'+ ID) %4  /  & ('unsigned'+ 'int') %3 
 ]]
 g = re.compile([[
-        S <- S0 /{1} ID /{2} ID %s* '=' Exp /{3} U* Int ID /{4} U ID ID /{0} %{5}
+        S <- S0 //{1} ID //{2} ID %s* '=' Exp //{3} U* Int ID //{4} U ID ID / %{5}
-  S0 <- ID S1 / U S2 / Int %{3}
+  S0 <- S1 / S2 / &Int %{3}
-  S1 <- %s* '=' %{2} / !. %{1} / ID %{4}
+  S1 <- &(ID %s* '=') %{2} / &(ID !.) %{1} / &ID %{4}
-  S2 <- U S2 / ID %{4} / Int %{3}
+  S2 <- &(U+ ID) %{4} / &(U+ Int) %{3}
  ID <- %s* 'a' 
  U <- %s* 'unsigned'
  Int <- %s* 'int'
@@ -349,6 +435,8 @@ s = "unsigned int"
 r, l, serror = g:match(s)
 assert(r == nil and l == 5 and serror == s)
 local terror = { ['cmdSeq'] = "Missing ';' in CmdSeq",
                 ['ifExp'] = "Error in expresion of 'if'",
                 ['ifThen'] = "Error matching 'then' keyword",
@@ -370,12 +458,12 @@ local terror = { ['cmdSeq'] = "Missing ';' in CmdSeq",
                 ['undefined'] = "Undefined Error"}
 g = re.compile([[
-  Tiny       <- CmdSeq /{1} '' -> cmdSeq /{2} '' -> ifExp /{3} '' -> ifThen /{4} '' -> ifThenCmdSeq
+  Tiny       <- CmdSeq //{1} '' -> cmdSeq //{2} '' -> ifExp //{3} '' -> ifThen //{4} '' -> ifThenCmdSeq
-                       /{5} '' -> ifElseCmdSeq  /{6}  '' -> ifEnd  /{7} '' -> repeatCmdSeq
+                       //{5} '' -> ifElseCmdSeq  //{6}  '' -> ifEnd  //{7} '' -> repeatCmdSeq
-                       /{8} '' -> repeatUntil  /{9} '' -> repeatExp  /{10} '' -> assignOp
+                       //{8} '' -> repeatUntil  //{9} '' -> repeatExp  //{10} '' -> assignOp
-                       /{11} '' -> assignExp  /{12} '' -> readName  /{13}  '' -> writeExp
+                       //{11} '' -> assignExp  //{12} '' -> readName  //{13}  '' -> writeExp
-                       /{14} '' -> simpleExp  /{15} '' -> term  /{16} '' -> factor
+                       //{14} '' -> simpleExp  //{15} '' -> term  //{16} '' -> factor
-                       /{17} '' -> openParExp  /{18} '' -> closePar /{0} '' -> undefined
+                       //{17} '' -> openParExp  //{18} '' -> closePar / '' -> undefined
  CmdSeq     <- (Cmd (SEMICOLON / %{1})) (Cmd (SEMICOLON / %{1}))*
  Cmd        <- IfCmd / RepeatCmd / ReadCmd / WriteCmd  / AssignCmd 
  IfCmd      <- IF  (Exp / %{2})  (THEN / %{3})  (CmdSeq / %{4})  (ELSE (CmdSeq / %{5}) / '') (END / %{6})
@@ -551,21 +639,14 @@ assert(g:match(s) == terror['undefined'])
 print("+")
-]==]
-- test recovery operator
-p = m.Rec("a", "b")
-assert(p:match("a") == 2)
--assert(p:match("b") == 2)
-checkeqlab({nil, 0, "b"}, p:match("b"))
-checkeqlab({nil, 0, "c"}, p:match("c"))
 p = m.Rec("a", "b", 3) 
 assert(p:match("a") == 2)
 checkeqlab({nil, 0, "b"}, p:match("b"))
 checkeqlab({nil, 0, "c"}, p:match("c"))
-p = m.Rec(m.T(3), "b") 
+p = m.Rec(m.T(3), "b", 1) 
 checkeqlab({nil, 3, "a"}, p:match("a"))
 checkeqlab({nil, 3, "b"}, p:match("b"))
@@ -574,56 +655,26 @@ checkeqlab({nil, 0, "a"}, p:match("a"))
 assert(p:match("b") == 2)
 --[[
-S -> (A //{fail} (!c .)*) C
+S -> (A //{128} (!c .)*) C
-A -> a*b 
+A -> a*b / %128
-C -> c+
-]]
-g = m.P{
-        "S",
-        S = m.Rec(m.V"A", (-m.P"c" * m.P(1))^0) * m.V"C",
-        A = m.P"a"^0 * "b",
-        C = m.P"c"^1,
-}
-assert(g:match("abc") == 4)
-assert(g:match("aabc") == 5)
--assert(g:match("aadc") == 5)  --old semantics
-checkeqlab({nil, 0, "dc"}, g:match("aadc")) --new semantics
-assert(g:match("bc") == 3) -- new semantics
-checkeqlab({nil, 0, "bc"}, g:match("bbc"))
--assert(g:match("xxc") == 4) old semantics
-checkeqlab({nil, 0, "xxc"}, g:match("xxc")) --new semantics
--assert(g:match("c") == 2) --old semantics
-checkeqlab({nil, 0, "c"}, g:match("c")) --new semantics
--checkeqlab({nil, 0, ""}, g:match("fail")) --old semantics
-checkeqlab({nil, 0, "fail"}, g:match("fail")) --new semantics
--checkeqlab({nil, 0, ""}, g:match("aaxx")) --old semantics
-checkeqlab({nil, 0, "xx"}, g:match("aaxx")) --new semantics
--[[
-S -> (A //{0} (!c .)*) C
-A -> a*b / ^{0}
 C -> c+
 ]]
 g = m.P{
        "S",
-        S = m.Rec(m.V"A", (-m.P"c" * m.P(1))^0, 0) * m.V"C", --explicitly put 0 in Rec
+        S = m.Rec(m.V"A", (-m.P"c" * m.P(1))^0, 128) * m.V"C",
-        A = m.P"a"^0 * m.P"b" + m.T(0),
+        A = m.P"a"^0 * "b" + m.T(128),
        C = m.P"c"^1,
 }
 assert(g:match("abc") == 4)
 assert(g:match("aabc") == 5)
-assert(g:match("aadc") == 5) --updated
+assert(g:match("aadc") == 5)  
-assert(g:match("bc") == 3) -- new semantics
+assert(g:match("dc") == 3)
 checkeqlab({nil, 0, "bc"}, g:match("bbc"))
 assert(g:match("xxc") == 4) 
-assert(g:match("c") == 2) --old semantics updated
+assert(g:match("c") == 2)
-checkeqlab({nil, 0, ""}, g:match("fail")) --old semantics updated
+checkeqlab({nil, 0, ""}, g:match("fail"))
-checkeqlab({nil, 0, ""}, g:match("aaxx")) --old semantics updated
+checkeqlab({nil, 0, ""}, g:match("aaxx"))
 --[[
@@ -810,6 +861,120 @@ checkeqlab({nil, 4, "dc"}, g3:match("dc"))
 checkeqlab({nil, 4, "d"}, g3:match(".d"))
+-- testing more captures
+local g = re.compile[[
+        S <- ( %s* &. {A} )* 
+  A <- [0-9]+ / %{5}
+]]
-print("OK")
+checkeq({"523", "624", "346", "888"} , {g:match("523 624  346\n888")}) 
+checkeq({nil, 5, "a 123"}, {g:match("44 a 123")})
+local g2 = m.Rec(g, ((-m.R("09") * m.P(1))^0) / "58", 5)
+checkeq({"523", "624", "346", "888"} , {g2:match("523 624  346\n888")}) 
+checkeq({"44", "a ", "58", "123"}, {g2:match("44 a 123")})
+local g = re.compile[[
+        S <- ( %s* &. A )* 
+  A <- {[0-9]+} / %{5}
+]]
+checkeq({"523", "624", "346", "888"} , {g:match("523 624  346\n888")}) 
+checkeq({nil, 5, "a 123"}, {g:match("44 a 123")})
+local g2 = m.Rec(g, ((-m.R("09") * m.P(1))^0) / "58", 5)
+checkeq({"523", "624", "346", "888"} , {g2:match("523 624  346\n888")}) 
+checkeq({"44", "58", "123"}, {g2:match("44 a 123")})
+local R, S, P, V = lpeg.R, lpeg.S, lpeg.P, lpeg.V
+local C, Cc, Ct, Cmt = lpeg.C, lpeg.Cc, lpeg.Ct, lpeg.Cmt
+local T, Lc, Rec = lpeg.T, lpeg.Lc, lpeg.Rec
+local labels = {
+  {"NoExp",     "no expression found"},
+  {"Extra",     "extra characters found after the expression"},
+  {"ExpTerm",   "expected a term after the operator"},
+  {"ExpExp",    "expected an expression after the parenthesis"},
+  {"MisClose",  "missing a closing ')' after the expression"},
+}
+local function labelindex(labname)
+  for i, elem in ipairs(labels) do
+    if elem[1] == labname then
+      return i
+    end
+  end
+  error("could not find label: " .. labname)
+end
+local errors = {}
+local function expect(patt, labname, recpatt)
+  local i = labelindex(labname)
+  function recorderror(input, pos)
+    table.insert(errors, {i, pos})
+    return true
+  end
+        if not recpatt then recpatt = P"" end
+  --return Rec(patt, Cmt("", recorderror) * recpatt)
+  return patt + T(i)
+end
+local num = R("09")^1 / tonumber
+local op = S("+-*/")
+local function compute(tokens)
+  local result = tokens[1]
+  for i = 2, #tokens, 2 do
+    if tokens[i] == '+' then
+      result = result + tokens[i+1]
+    elseif tokens[i] == '-' then
+      result = result - tokens[i+1]
+    elseif tokens[i] == '*' then
+      result = result * tokens[i+1]
+    elseif tokens[i] == '/' then
+      result = result / tokens[i+1]
+    else
+      error('unknown operation: ' .. tokens[i])
+    end
+  end
+  return result
+end
+local g = P {
+"Exp",
+Exp = Ct(V"Term" * (C(op) * V"Operand")^0) / compute,
+Operand = expect(V"Term", "ExpTerm"),
+Term = num,
+}
+local rg = Rec(g, Cc(3), labelindex("ExpTerm"))
+                 
+local function eval(input)
+  local result, label, suffix = rg:match(input)
+  if #errors == 0 then
+    return result
+  else
+    local out = {}
+    for i, err in ipairs(errors) do
+      local pos = err[2]
+      local msg = labels[err[1]][2]
+      table.insert(out, "syntax error: " .. msg .. " (at index " .. pos .. ")")
+    end
+    errors = {}
+    return nil, table.concat(out, "\n")
+  end
+end
+assert(eval("98-76*54/32") == 37.125)
+--> 37.125
+assert(eval("1+") == 4)
+--> syntax error: expected a term after the operator (at index 3)
+print("OK")
author	Sergio Queiroz <sqmedeiros@gmail.com>	2016-11-10 16:26:11 -0300
committer	Sergio Queiroz <sqmedeiros@gmail.com>	2016-11-10 16:26:11 -0300
commit	fd28f9d9e54f33bf7ae3a5e12dc71478f9c91aea (patch)
tree	875ab38000e52376583bc13741b18701c6294f80
parent	d84dd6b3659f94b09e67eb90a10e71eb05c5630e (diff)
download	lpeglabel-fd28f9d9e54f33bf7ae3a5e12dc71478f9c91aea.tar.gz lpeglabel-fd28f9d9e54f33bf7ae3a5e12dc71478f9c91aea.tar.bz2 lpeglabel-fd28f9d9e54f33bf7ae3a5e12dc71478f9c91aea.zip