Updating to lpeg 0.12.2

author: Sergio Medeiros <sqmedeiros@gmail.com> 2015-03-23 14:13:25 -0300
committer: Sergio Medeiros <sqmedeiros@gmail.com> 2015-03-23 14:13:25 -0300
commit: 0e93d536ba2d312502737cce2ab0cc21393c4842 (patch)
tree: 7de1e3ae967c90a43e7086ccef61d1722881b20c
parent: a5a4b257e626847be3be4878c603adb51cbb420f (diff)
download: lpeglabel-0e93d536ba2d312502737cce2ab0cc21393c4842.tar.gz
lpeglabel-0e93d536ba2d312502737cce2ab0cc21393c4842.tar.bz2
lpeglabel-0e93d536ba2d312502737cce2ab0cc21393c4842.zip
14 files changed, 472 insertions, 358 deletions
diff --git a/LICENSE b/LICENSE
index 7d7e849..9c488ad 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,6 +1,6 @@
 The MIT License (MIT)
-Copyright (c) 2014 Sérgio Medeiros
+Copyright (c) 2014-2015 Sérgio Medeiros
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
diff --git a/lpcap.c b/lpcap.c
index d90b935..b6911cb 100644
--- a/lpcap.c
+++ b/lpcap.c
@@ -1,5 +1,5 @@
 /*
-** $Id: lpcap.c,v 1.4 2013/03/21 20:25:12 roberto Exp $
+** $Id: lpcap.c,v 1.5 2014/12/12 16:58:47 roberto Exp $
 ** Copyright 2007, Lua.org & PUC-Rio  (see 'lpeg.html' for license)
 */
@@ -462,7 +462,7 @@ static int pushcapture (CapState *cs) {
    case Carg: {
      int arg = (cs->cap++)->idx;
      if (arg + FIXEDARGS > cs->ptop)
-        return luaL_error(L, "reference to absent argument #%d", arg);
+        return luaL_error(L, "reference to absent extra argument #%d", arg);
      lua_pushvalue(L, arg + FIXEDARGS);
      return 1;
    }
diff --git a/lpcap.h b/lpcap.h
index c0a0e38..d762fdc 100644
--- a/lpcap.h
+++ b/lpcap.h
@@ -1,5 +1,5 @@
 /*
-** $Id: lpcap.h,v 1.1 2013/03/21 20:25:12 roberto Exp $
+** $Id: lpcap.h,v 1.2 2015/02/27 17:13:17 roberto Exp $
 */
 #if !defined(lpcap_h)
@@ -18,7 +18,7 @@ typedef enum CapKind {
 typedef struct Capture {
  const char *s;  /* subject position */
-  short idx;  /* extra info about capture (group name, arg index, etc.) */
+  unsigned short idx;  /* extra info (group name, arg index, etc.) */
  byte kind;  /* kind of capture */
  byte siz;  /* size of full capture + 1 (0 = not a full capture) */
 } Capture;
diff --git a/lpcode.c b/lpcode.c
index 4431146..1ee6ed1 100644
--- a/lpcode.c
+++ b/lpcode.c
@@ -1,5 +1,5 @@
 /*
-** $Id: lpcode.c,v 1.18 2013/04/12 16:30:33 roberto Exp $
+** $Id: lpcode.c,v 1.21 2014/12/12 17:01:29 roberto Exp $
 ** Copyright 2007, Lua.org & PUC-Rio  (see 'lpeg.html' for license)
 */
@@ -33,26 +33,30 @@ static const Charset *fullset = &fullset_;
 */
 /*
-** Check whether a charset is empty (IFail), singleton (IChar),
+** Check whether a charset is empty (returns IFail), singleton (IChar),
-** full (IAny), or none of those (ISet).
+** full (IAny), or none of those (ISet). When singleton, '*c' returns
+** which character it is. (When generic set, the set was the input,
+** so there is no need to return it.)
 */
 static Opcode charsettype (const byte *cs, int *c) {
-  int count = 0;
+  int count = 0;  /* number of characters in the set */
  int i;
-  int candidate = -1;  /* candidate position for a char */
+  int candidate = -1;  /* candidate position for the singleton char */
-  for (i = 0; i < CHARSETSIZE; i++) {
+  for (i = 0; i < CHARSETSIZE; i++) {  /* for each byte */
    int b = cs[i];
-    if (b == 0) {
+    if (b == 0) {  /* is byte empty? */
-      if (count > 1) return ISet;  /* else set is still empty */
+      if (count > 1)  /* was set neither empty nor singleton? */
+        return ISet;  /* neither full nor empty nor singleton */
+      /* else set is still empty or singleton */
    }
-    else if (b == 0xFF) {
+    else if (b == 0xFF) {  /* is byte full? */
-      if (count < (i * BITSPERCHAR))
+      if (count < (i * BITSPERCHAR))  /* was set not full? */
-        return ISet;
+        return ISet;  /* neither full nor empty nor singleton */
      else count += BITSPERCHAR;  /* set is still full */
    }
-    else if ((b & (b - 1)) == 0) {  /* byte has only one bit? */
+    else if ((b & (b - 1)) == 0) {  /* has byte only one bit? */
-      if (count > 0)
+      if (count > 0)  /* was set not empty? */
-        return ISet;  /* set is neither full nor empty */
+        return ISet;  /* neither full nor empty nor singleton */
      else {  /* set has only one char till now; track it */
        count++;
        candidate = i;
@@ -77,6 +81,7 @@ static Opcode charsettype (const byte *cs, int *c) {
  }
 }
 /*
 ** A few basic operations on Charsets
 */
@@ -84,16 +89,11 @@ static void cs_complement (Charset *cs) {
  loopset(i, cs->cs[i] = ~cs->cs[i]);
 }
 static int cs_equal (const byte *cs1, const byte *cs2) {
  loopset(i, if (cs1[i] != cs2[i]) return 0);
  return 1;
 }
-/*
-** computes whether sets cs1 and cs2 are disjoint
-*/
 static int cs_disjoint (const Charset *cs1, const Charset *cs2) {
  loopset(i, if ((cs1->cs[i] & cs2->cs[i]) != 0) return 0;)
  return 1;
@@ -101,7 +101,8 @@ static int cs_disjoint (const Charset *cs1, const Charset *cs2) {
 /*
-** Convert a 'char' pattern (TSet, TChar, TAny) to a charset
+** If 'tree' is a 'char' pattern (TSet, TChar, TAny), convert it into a
+** charset and return 1; else return 0.
 */
 int tocharset (TTree *tree, Charset *cs) {
  switch (tree->tag) {
@@ -116,7 +117,7 @@ int tocharset (TTree *tree, Charset *cs) {
      return 1;
    }
    case TAny: {
-      loopset(i, cs->cs[i] = 0xFF);  /* add all to the set */
+      loopset(i, cs->cs[i] = 0xFF);  /* add all characters to the set */
      return 1;
    }
    default: return 0;
@@ -125,13 +126,16 @@ int tocharset (TTree *tree, Charset *cs) {
 /*
-** Checks whether a pattern has captures
+** Check whether a pattern tree has captures
 */
 int hascaptures (TTree *tree) {
 tailcall:
  switch (tree->tag) {
    case TCapture: case TRunTime:
      return 1;
+    case TCall:
+      tree = sib2(tree); goto tailcall;  /* return hascaptures(sib2(tree)); */
+    case TOpenCall: assert(0);
    default: {
      switch (numsiblings[tree->tag]) {
        case 1:  /* return hascaptures(sib1(tree)); */
@@ -161,7 +165,7 @@ int hascaptures (TTree *tree) {
 **    p is nullable => nullable(p)
 **    nofail(p) => p cannot fail
 ** The function assumes that TOpenCall is not nullable;
-** this will be checked again when the grammar is fixed.)
+** this will be checked again when the grammar is fixed.
 ** Run-time captures can do whatever they want, so the result
 ** is conservative.
 */
@@ -188,7 +192,7 @@ int checkaux (TTree *tree, int pred) {
      if (!checkaux(sib1(tree), pred)) return 0;
      /* else return checkaux(sib2(tree), pred); */
      tree = sib2(tree); goto tailcall;
-    case TChoice: case TLabChoice:  /* labeled failure */
+    case TChoice: case TLabChoice: /* labeled failure */
      if (checkaux(sib2(tree), pred)) return 1;
      /* else return checkaux(sib1(tree), pred); */
      tree = sib1(tree); goto tailcall;
@@ -198,7 +202,7 @@ int checkaux (TTree *tree, int pred) {
    case TCall:  /* return checkaux(sib2(tree), pred); */
      tree = sib2(tree); goto tailcall;
    default: assert(0); return 0;
-  };
+  }
 }
@@ -246,16 +250,20 @@ int fixedlenx (TTree *tree, int count, int len) {
 /*
 ** Computes the 'first set' of a pattern.
 ** The result is a conservative aproximation:
-**   match p ax -> x' for some x ==> a in first(p).
+**   match p ax -> x (for some x) ==> a belongs to first(p)
+** or
+**   a not in first(p) ==> match p ax -> fail (for all x)
+**
 ** The set 'follow' is the first set of what follows the
 ** pattern (full set if nothing follows it).
-** The function returns 0 when this set can be used for
+**
-** tests that avoid the pattern altogether.
+** The function returns 0 when this resulting set can be used for
+** test instructions that avoid the pattern altogether.
 ** A non-zero return can happen for two reasons:
-** 1) match p '' -> ''            ==> returns 1.
+** 1) match p '' -> ''            ==> return has bit 1 set
-** (tests cannot be used because they always fail for an empty input)
+** (tests cannot be used because they would always fail for an empty input);
-** 2) there is a match-time capture ==> returns 2.
+** 2) there is a match-time capture ==> return has bit 2 set
-** (match-time captures should not be avoided by optimizations)
+** (optimizations should not bypass match-time captures).
 */
 static int getfirst (TTree *tree, const Charset *follow, Charset *firstset) {
 tailcall:
@@ -266,16 +274,16 @@ static int getfirst (TTree *tree, const Charset *follow, Charset *firstset) {
    }
    case TTrue: {
      loopset(i, firstset->cs[i] = follow->cs[i]);
-      return 1;
+      return 1;  /* accepts the empty string */
    }
-    case TFalse: { 
+    case TFalse: {
      loopset(i, firstset->cs[i] = 0);
      return 0;
    }
-                case TThrow: {  /* labeled failure: must always throw the label */
+    case TThrow: {  /* labeled failure: must always throw the label */
      loopset(i, firstset->cs[i] = follow->cs[i]); /* follow = fullset(?) */
      return 1;
-    }
+    } 
    case TChoice: case TLabChoice: {  /*(?) labeled failure */
      Charset csaux;
      int e1 = getfirst(sib1(tree), follow, firstset);
@@ -285,7 +293,8 @@ static int getfirst (TTree *tree, const Charset *follow, Charset *firstset) {
    }
    case TSeq: {
      if (!nullable(sib1(tree))) {
-        /* return getfirst(sib1(tree), fullset, firstset); */
+        /* when p1 is not nullable, p2 has nothing to contribute;
+           return getfirst(sib1(tree), fullset, firstset); */
        tree = sib1(tree); follow = fullset; goto tailcall;
      }
      else {  /* FIRST(p1 p2, fl) = FIRST(p1, FIRST(p2, fl)) */
@@ -329,7 +338,7 @@ static int getfirst (TTree *tree, const Charset *follow, Charset *firstset) {
      /* else go through */
    }
    case TBehind: {  /* instruction gives no new information */
-      /* call 'getfirst' to check for math-time captures */
+      /* call 'getfirst' only to check for math-time captures */
      int e = getfirst(sib1(tree), follow, firstset);
      loopset(i, firstset->cs[i] = follow->cs[i]);  /* uses follow */
      return e | 1;  /* always can accept the empty string */
@@ -340,13 +349,13 @@ static int getfirst (TTree *tree, const Charset *follow, Charset *firstset) {
 /*
-** If it returns true, then pattern can fail only depending on the next
+** If 'headfail(tree)' true, then 'tree' can fail only depending on the
-** character of the subject
+** next character of the subject.
 */
 static int headfail (TTree *tree) {
 tailcall:
  switch (tree->tag) {
-    case TChar: case TSet: case TAny: case TFalse: 
+    case TChar: case TSet: case TAny: case TFalse:
      return 1;
    case TTrue: case TRep: case TRunTime: case TNot:
    case TBehind:
@@ -410,10 +419,12 @@ int sizei (const Instruction *i) {
  switch((Opcode)i->i.code) {
    case ISet: case ISpan: return CHARSETINSTSIZE;
    case ITestSet: return CHARSETINSTSIZE + 1;
-    case ITestChar: case ITestAny: case IChoice: case IJmp: 
+    case ITestChar: case ITestAny: case IChoice: case IJmp: case ICall:
-    case ICall: case IOpenCall: case ICommit: case IPartialCommit:
+    case IOpenCall: case ICommit: case IPartialCommit: case IBackCommit:
-    case IBackCommit: case IThrow: return 2;  /* labeled failure */
+    case IThrow:  /* labeled failure */
+      return 2;  
                case ILabChoice: return 3; /* labeled failure */
+      return 2;
    default: return 1;
  }
 }
@@ -431,7 +442,8 @@ typedef struct CompileState {
 /*
 ** code generation is recursive; 'opt' indicates that the code is
-** being generated under a 'IChoice' operator jumping to its end.
+** being generated under a 'IChoice' operator jumping to its end
+** (that is, the match is "optional").
 ** 'tt' points to a previous test protecting this code. 'fl' is
 ** the follow set of the pattern.
 */
@@ -439,7 +451,7 @@ static void codegen (CompileState *compst, TTree *tree, int opt, int tt,
                     const Charset *fl);
-void reallocprog (lua_State *L, Pattern *p, int nsize) {
+void realloccode (lua_State *L, Pattern *p, int nsize) {
  void *ud;
  lua_Alloc f = lua_getallocf(L, &ud);
  void *newblock = f(ud, p->code, p->codesize * sizeof(Instruction),
@@ -454,7 +466,7 @@ void reallocprog (lua_State *L, Pattern *p, int nsize) {
 static int nextinstruction (CompileState *compst) {
  int size = compst->p->codesize;
  if (compst->ncode >= size)
-    reallocprog(compst->L, compst->p, size * 2);
+    realloccode(compst->L, compst->p, size * 2);
  return compst->ncode++;
 }
@@ -470,6 +482,9 @@ static int addinstruction (CompileState *compst, Opcode op, int aux) {
 }
+/*
+** Add an instruction followed by space for an offset (to be set later)
+*/
 static int addoffsetinst (CompileState *compst, Opcode op) {
  int i = addinstruction(compst, op, 0);  /* instruction */
  addinstruction(compst, (Opcode)0, 0);  /* open space for offset */
@@ -496,7 +511,9 @@ static int addoffsetlabinst (CompileState *compst, Labelset ls) {
 }
 /* labeled failure end */
+/*
+** Set the offset of an instruction
+*/
 static void setoffset (CompileState *compst, int instruction, int offset) {
  getinstr(compst, instruction + 1).offset = offset;
 }
@@ -505,7 +522,7 @@ static void setoffset (CompileState *compst, int instruction, int offset) {
 /*
 ** Add a capture instruction:
 ** 'op' is the capture instruction; 'cap' the capture kind;
-** 'key' the key into ktable; 'aux' is optional offset
+** 'key' the key into ktable; 'aux' is the optional capture offset
 **
 */
 static int addinstcap (CompileState *compst, Opcode op, int cap, int key,
@@ -521,12 +538,18 @@ static int addinstcap (CompileState *compst, Opcode op, int cap, int key,
 #define target(code,i)          ((i) + code[i + 1].offset)
+/*
+** Patch 'instruction' to jump to 'target'
+*/
 static void jumptothere (CompileState *compst, int instruction, int target) {
  if (instruction >= 0)
    setoffset(compst, instruction, target - instruction);
 }
+/*
+** Patch 'instruction' to jump to current position
+*/
 static void jumptohere (CompileState *compst, int instruction) {
  jumptothere(compst, instruction, gethere(compst));
 }
@@ -594,7 +617,7 @@ static int codetestset (CompileState *compst, Charset *cs, int e) {
  else {
    int c = 0;
    Opcode op = charsettype(cs->cs, &c);
-    switch (op) { 
+    switch (op) {
      case IFail: return addoffsetinst(compst, IJmp);  /* always jump */
      case IAny: return addoffsetinst(compst, ITestAny);
      case IChar: {
@@ -658,8 +681,7 @@ static void codechoice (CompileState *compst, TTree *p1, TTree *p2, int opt,
  Charset cs1, cs2;
  int e1 = getfirst(p1, fullset, &cs1);
  if (headfail(p1) ||
-      (!e1 && (getfirst(p2, fl, &cs2), cs_disjoint(&cs1, &cs2)))) { 
+      (!e1 && (getfirst(p2, fl, &cs2), cs_disjoint(&cs1, &cs2)))) {
-        /*if (0) {*/
    /* <p1 / p2> == test (fail(p1)) -> L1 ; p1 ; jmp L2; L1: p2; L2: */
    int test = codetestset(compst, &cs1, 0);
    int jmp = NOINST;
@@ -690,6 +712,7 @@ static void codechoice (CompileState *compst, TTree *p1, TTree *p2, int opt,
  }
 }
 /* labeled failure begin */
 static void codelabchoice (CompileState *compst, TTree *p1, TTree *p2, int opt,
                        const Charset *fl, Labelset ls) {
@@ -707,6 +730,7 @@ static void codelabchoice (CompileState *compst, TTree *p1, TTree *p2, int opt,
 }
 /* labeled failure end */
 /*
 ** And predicate
 ** optimization: fixedlen(p) = n ==> <&p> == <p>; behind n
@@ -907,7 +931,8 @@ static int codeseq1 (CompileState *compst, TTree *p1, TTree *p2,
 /*
 ** Main code-generation function: dispatch to auxiliar functions
-** according to kind of tree
+** according to kind of tree. ('needfollow' should return true
+** only for consructions that use 'fl'.)
 */
 static void codegen (CompileState *compst, TTree *tree, int opt, int tt,
                     const Charset *fl) {
@@ -932,7 +957,7 @@ static void codegen (CompileState *compst, TTree *tree, int opt, int tt,
      /* codegen(compst, p2, opt, tt, fl); */
      tree = sib2(tree); goto tailcall;
    }
-                case TThrow: { /* labeled failure */
+    case TThrow: { /* labeled failure */
                        addthrowinstruction(compst, tree->labels);
                        break;
                }
@@ -958,6 +983,7 @@ static void peephole (CompileState *compst) {
  Instruction *code = compst->p->code;
  int i;
  for (i = 0; i < compst->ncode; i += sizei(&code[i])) {
+   redo:
    switch (code[i].i.code) {
      case IChoice: case ICall: case ICommit: case IPartialCommit:
      case IBackCommit: case ITestChar: case ITestSet: case ILabChoice: /* labeled failure */
@@ -979,8 +1005,7 @@ static void peephole (CompileState *compst) {
            int fft = finallabel(code, ft);
            code[i] = code[ft];  /* jump becomes that instruction... */
            jumptothere(compst, i, fft);  /* but must correct its offset */
-            i--;  /* reoptimize its label */
+            goto redo;  /* reoptimize its label */
-            break;
          }
          default: {
            jumptothere(compst, i, ft);  /* optimize label */
@@ -1002,11 +1027,11 @@ static void peephole (CompileState *compst) {
 Instruction *compile (lua_State *L, Pattern *p) {
  CompileState compst;
  compst.p = p;  compst.ncode = 0;  compst.L = L;
-  reallocprog(L, p, 2);  /* minimum initial size */
+  realloccode(L, p, 2);  /* minimum initial size */
  codegen(&compst, p->tree, 0, NOINST, fullset);
  addinstruction(&compst, IEnd, 0);
-  reallocprog(L, p, compst.ncode);  /* set final size */
+  realloccode(L, p, compst.ncode);  /* set final size */
-  peephole(&compst);  /* labeled failure */
+  peephole(&compst);
  return p->code;
 }
diff --git a/lpcode.h b/lpcode.h
index 5c9d54f..72d2bb9 100644
--- a/lpcode.h
+++ b/lpcode.h
@@ -1,5 +1,5 @@
 /*
-** $Id: lpcode.h,v 1.5 2013/04/04 21:24:45 roberto Exp $
+** $Id: lpcode.h,v 1.6 2013/11/28 14:56:02 roberto Exp $
 */
 #if !defined(lpcode_h)
@@ -17,7 +17,7 @@ int fixedlenx (TTree *tree, int count, int len);
 int hascaptures (TTree *tree);
 int lp_gc (lua_State *L);
 Instruction *compile (lua_State *L, Pattern *p);
-void reallocprog (lua_State *L, Pattern *p, int nsize);
+void realloccode (lua_State *L, Pattern *p, int nsize);
 int sizei (const Instruction *i);
diff --git a/lpprint.c b/lpprint.c
index 0730ca9..d4a60a8 100644
--- a/lpprint.c
+++ b/lpprint.c
@@ -52,7 +52,7 @@ static void printjmp (const Instruction *op, const Instruction *p) {
 }
-void printinst (const Instruction *op, const Instruction *p) {
+static void printinst (const Instruction *op, const Instruction *p) {
  const char *const names[] = {
    "any", "char", "set",
    "testany", "testchar", "testset",
@@ -60,8 +60,8 @@ void printinst (const Instruction *op, const Instruction *p) {
    "ret", "end",
    "choice", "jmp", "call", "open_call",
    "commit", "partial_commit", "back_commit", "failtwice", "fail", "giveup",
-     "fullcapture", "opencapture", "closecapture", "closeruntime", 
+     "fullcapture", "opencapture", "closecapture", "closeruntime",
-     "throw", "labeled_choice" /* labeled failure */
+    "throw", "labeled_choice" /* labeled failure */
  };
  printf("%02ld: %s ", (long)(p - op), names[p->i.code]);
  switch ((Opcode)p->i.code) {
@@ -103,12 +103,12 @@ void printinst (const Instruction *op, const Instruction *p) {
      printf("%d", p->i.aux);
      break;
    }
-          case IJmp: case ICall: case ICommit: case IChoice:
+    case IJmp: case ICall: case ICommit: case IChoice:
    case IPartialCommit: case IBackCommit: case ITestAny: {
      printjmp(op, p);
      break;
    }
-                case IThrow: { /* labeled failure */
+    case IThrow: { /* labeled failure */
      printf("%d", (p + 1)->labels);
      break;
    }
@@ -117,7 +117,6 @@ void printinst (const Instruction *op, const Instruction *p) {
                        printf(" %d", (p + 2)->labels);
      break;
    }
    default: break;
  }
  printf("\n");
@@ -194,7 +193,7 @@ void printtree (TTree *tree, int ident) {
    }
    case TBehind: {
      printf(" %d\n", tree->u.n);
-       printtree(sib1(tree), ident + 2);
+        printtree(sib1(tree), ident + 2);
      break;
    }
    case TCapture: {
@@ -217,16 +216,16 @@ void printtree (TTree *tree, int ident) {
      assert(rule->tag == TTrue);  /* sentinel */
      break;
    }
-                case TThrow: { /* labeled failure */
+    case TThrow: { /* labeled failure */
      printf(" labels: %d\n", tree->labels);
      break;
    }
    default: {
      int sibs = numsiblings[tree->tag];
-                        if (tree->tag == TLabChoice) { /* labeled failure */
+      printf("\n");
+      if (tree->tag == TLabChoice) { /* labeled failure */
        printf(" labels: %d\n", tree->labels);
                        }
-      printf("\n");
      if (sibs >= 1) {
        printtree(sib1(tree), ident + 2);
        if (sibs >= 2)
diff --git a/lpprint.h b/lpprint.h
index 6cbe47d..e640f74 100644
--- a/lpprint.h
+++ b/lpprint.h
@@ -18,8 +18,6 @@ void printtree (TTree *tree, int ident);
 void printktable (lua_State *L, int idx);
 void printcharset (const byte *st);
 void printcaplist (Capture *cap, Capture *limit);
-void printinst (const Instruction *op, const Instruction *p);
 #else
diff --git a/lptree.c b/lptree.c
index 0dd5998..cbdd4df 100644
--- a/lptree.c
+++ b/lptree.c
@@ -1,5 +1,5 @@
 /*
-** $Id: lptree.c,v 1.10 2013/04/12 16:30:33 roberto Exp $
+** $Id: lptree.c,v 1.15 2015/03/04 17:23:00 roberto Exp $
 ** Copyright 2013, Lua.org & PUC-Rio  (see 'lpeg.html' for license)
 */
@@ -127,6 +127,189 @@ static void finalfix (lua_State *L, int postable, TTree *g, TTree *t) {
 }
+/*
+** {===================================================================
+** KTable manipulation
+**
+** - The ktable of a pattern 'p' can be shared by other patterns that
+** contain 'p' and no other constants. Because of this sharing, we
+** should not add elements to a 'ktable' unless it was freshly created
+** for the new pattern.
+**
+** - The maximum index in a ktable is USHRT_MAX, because trees and
+** patterns use unsigned shorts to store those indices.
+** ====================================================================
+*/
+/*
+** Create a new 'ktable' to the pattern at the top of the stack.
+*/
+static void newktable (lua_State *L, int n) {
+  lua_createtable(L, n, 0);  /* create a fresh table */
+  lua_setfenv(L, -2);  /* set it as 'ktable' for pattern */
+}
+/*
+** Add element 'idx' to 'ktable' of pattern at the top of the stack;
+** Return index of new element.
+** If new element is nil, does not add it to table (as it would be
+** useless) and returns 0, as ktable[0] is always nil.
+*/
+static int addtoktable (lua_State *L, int idx) {
+  if (lua_isnil(L, idx))  /* nil value? */
+    return 0;
+  else {
+    int n;
+    lua_getfenv(L, -1);  /* get ktable from pattern */
+    n = lua_objlen(L, -1);
+    if (n >= USHRT_MAX)
+      luaL_error(L, "too many Lua values in pattern");
+    lua_pushvalue(L, idx);  /* element to be added */
+    lua_rawseti(L, -2, ++n);
+    lua_pop(L, 1);  /* remove 'ktable' */
+    return n;
+  }
+}
+/*
+** Return the number of elements in the ktable at 'idx'.
+** In Lua 5.2/5.3, default "environment" for patterns is nil, not
+** a table. Treat it as an empty table. In Lua 5.1, assumes that
+** the environment has no numeric indices (len == 0)
+*/
+static int ktablelen (lua_State *L, int idx) {
+  if (!lua_istable(L, idx)) return 0;
+  else return lua_objlen(L, idx);
+}
+/*
+** Concatentate the contents of table 'idx1' into table 'idx2'.
+** (Assume that both indices are negative.)
+** Return the original length of table 'idx2' (or 0, if no
+** element was added, as there is no need to correct any index).
+*/
+static int concattable (lua_State *L, int idx1, int idx2) {
+  int i;
+  int n1 = ktablelen(L, idx1);
+  int n2 = ktablelen(L, idx2);
+  if (n1 + n2 > USHRT_MAX)
+    luaL_error(L, "too many Lua values in pattern");
+  if (n1 == 0) return 0;  /* nothing to correct */
+  for (i = 1; i <= n1; i++) {
+    lua_rawgeti(L, idx1, i);
+    lua_rawseti(L, idx2 - 1, n2 + i);  /* correct 'idx2' */
+  }
+  return n2;
+}
+/*
+** When joining 'ktables', constants from one of the subpatterns must
+** be renumbered; 'correctkeys' corrects their indices (adding 'n'
+** to each of them)
+*/
+static void correctkeys (TTree *tree, int n) {
+  if (n == 0) return;  /* no correction? */
+ tailcall:
+  switch (tree->tag) {
+    case TOpenCall: case TCall: case TRunTime: case TRule: {
+      if (tree->key > 0)
+        tree->key += n;
+      break;
+    }
+    case TCapture: {
+      if (tree->key > 0 && tree->cap != Carg && tree->cap != Cnum)
+        tree->key += n;
+      break;
+    }
+    default: break;
+  }
+  switch (numsiblings[tree->tag]) {
+    case 1:  /* correctkeys(sib1(tree), n); */
+      tree = sib1(tree); goto tailcall;
+    case 2:
+      correctkeys(sib1(tree), n);
+      tree = sib2(tree); goto tailcall;  /* correctkeys(sib2(tree), n); */
+    default: assert(numsiblings[tree->tag] == 0); break;
+  }
+}
+/*
+** Join the ktables from p1 and p2 the ktable for the new pattern at the
+** top of the stack, reusing them when possible.
+*/
+static void joinktables (lua_State *L, int p1, TTree *t2, int p2) {
+  int n1, n2;
+  lua_getfenv(L, p1);  /* get ktables */
+  lua_getfenv(L, p2);
+  n1 = ktablelen(L, -2);
+  n2 = ktablelen(L, -1);
+  if (n1 == 0 && n2 == 0)  /* are both tables empty? */
+    lua_pop(L, 2);  /* nothing to be done; pop tables */
+  else if (n2 == 0 || lua_equal(L, -2, -1)) {  /* 2nd table empty or equal? */
+    lua_pop(L, 1);  /* pop 2nd table */
+    lua_setfenv(L, -2);  /* set 1st ktable into new pattern */
+  }
+  else if (n1 == 0) {  /* first table is empty? */
+    lua_setfenv(L, -3);  /* set 2nd table into new pattern */
+    lua_pop(L, 1);  /* pop 1st table */
+  }
+  else {
+    lua_createtable(L, n1 + n2, 0);  /* create ktable for new pattern */
+    /* stack: new p; ktable p1; ktable p2; new ktable */
+    concattable(L, -3, -1);  /* from p1 into new ktable */
+    concattable(L, -2, -1);  /* from p2 into new ktable */
+    lua_setfenv(L, -4);  /* new ktable becomes 'p' environment */
+    lua_pop(L, 2);  /* pop other ktables */
+    correctkeys(t2, n1);  /* correction for indices from p2 */
+  }
+}
+/*
+** copy 'ktable' of element 'idx' to new tree (on top of stack)
+*/
+static void copyktable (lua_State *L, int idx) {
+  lua_getfenv(L, idx);
+  lua_setfenv(L, -2);
+}
+/*
+** merge 'ktable' from 'stree' at stack index 'idx' into 'ktable'
+** from tree at the top of the stack, and correct corresponding
+** tree.
+*/
+static void mergektable (lua_State *L, int idx, TTree *stree) {
+  int n;
+  lua_getfenv(L, -1);  /* get ktables */
+  lua_getfenv(L, idx);
+  n = concattable(L, -1, -2);
+  lua_pop(L, 2);  /* remove both ktables */
+  correctkeys(stree, n);
+}
+/*
+** Create a new 'ktable' to the pattern at the top of the stack, adding
+** all elements from pattern 'p' (if not 0) plus element 'idx' to it.
+** Return index of new element.
+*/
+static int addtonewktable (lua_State *L, int p, int idx) {
+  newktable(L, 1);
+  if (p)
+    mergektable(L, p, NULL);
+  return addtoktable(L, idx);
+}
+/* }====================================================== */
 /*
 ** {======================================================
 ** Tree generation
@@ -218,29 +401,6 @@ static TTree *seqaux (TTree *tree, TTree *sib, int sibsize) {
 /*
-** Add element 'idx' to 'ktable' of pattern at the top of the stack;
-** create new 'ktable' if necessary. Return index of new element.
-*/
-static int addtoktable (lua_State *L, int idx) {
-  if (idx == 0 || lua_isnil(L, idx))  /* no actual value to insert? */
-    return 0;
-  else {
-    int n;
-    lua_getfenv(L, -1);  /* get ktable from pattern */
-    n = lua_objlen(L, -1);
-    if (n == 0) {  /* is it empty/non-existent? */
-      lua_pop(L, 1);  /* remove it */
-      lua_createtable(L, 1, 0);  /* create a fresh table */
-    }
-    lua_pushvalue(L, idx);  /* element to be added */
-    lua_rawseti(L, -2, n + 1);
-    lua_setfenv(L, -2);  /* set it as ktable for pattern */
-    return n + 1;
-  }
-}
-/*
 ** Build a sequence of 'n' nodes, each with tag 'tag' and 'u.n' got
 ** from the array 's' (or 0 if array is NULL). (TSeq is binary, so it
 ** must build a sequence of sequence of sequence...)
@@ -315,7 +475,7 @@ static TTree *getpatt (lua_State *L, int idx, int *len) {
    case LUA_TFUNCTION: {
      tree = newtree(L, 2);
      tree->tag = TRunTime;
-      tree->key = addtoktable(L, idx);
+      tree->key = addtonewktable(L, 0, idx);
      sib1(tree)->tag = TTrue;
      break;
    }
@@ -331,123 +491,6 @@ static TTree *getpatt (lua_State *L, int idx, int *len) {
 /*
-** Return the number of elements in the ktable of pattern at 'idx'.
-** In Lua 5.2, default "environment" for patterns is nil, not
-** a table. Treat it as an empty table. In Lua 5.1, assumes that
-** the environment has no numeric indices (len == 0)
-*/
-static int ktablelen (lua_State *L, int idx) {
-  if (!lua_istable(L, idx)) return 0;
-  else return lua_objlen(L, idx);
-}
-/*
-** Concatentate the contents of table 'idx1' into table 'idx2'.
-** (Assume that both indices are negative.)
-** Return the original length of table 'idx2'
-*/
-static int concattable (lua_State *L, int idx1, int idx2) {
-  int i;
-  int n1 = ktablelen(L, idx1);
-  int n2 = ktablelen(L, idx2);
-  if (n1 == 0) return 0;  /* nothing to correct */
-  for (i = 1; i <= n1; i++) {
-    lua_rawgeti(L, idx1, i);
-    lua_rawseti(L, idx2 - 1, n2 + i);  /* correct 'idx2' */
-  }
-  return n2;
-}
-/*
-** Make a merge of ktables from p1 and p2 the ktable for the new
-** pattern at the top of the stack.
-*/
-static int joinktables (lua_State *L, int p1, int p2) {
-  int n1, n2;
-  lua_getfenv(L, p1);  /* get ktables */
-  lua_getfenv(L, p2);
-  n1 = ktablelen(L, -2);
-  n2 = ktablelen(L, -1);
-  if (n1 == 0 && n2 == 0) {  /* are both tables empty? */
-    lua_pop(L, 2);  /* nothing to be done; pop tables */
-    return 0;  /* nothing to correct */
-  }
-  if (n2 == 0 || lua_equal(L, -2, -1)) {  /* second table is empty or equal? */
-    lua_pop(L, 1);  /* pop 2nd table */
-    lua_setfenv(L, -2);  /* set 1st ktable into new pattern */
-    return 0;  /* nothing to correct */
-  }
-  if (n1 == 0) {  /* first table is empty? */
-    lua_setfenv(L, -3);  /* set 2nd table into new pattern */
-    lua_pop(L, 1);  /* pop 1st table */
-    return 0;  /* nothing to correct */
-  }
-  else {
-    lua_createtable(L, n1 + n2, 0);  /* create ktable for new pattern */
-    /* stack: new p; ktable p1; ktable p2; new ktable */
-    concattable(L, -3, -1);  /* from p1 into new ktable */
-    concattable(L, -2, -1);  /* from p2 into new ktable */
-    lua_setfenv(L, -4);  /* new ktable becomes p env */
-    lua_pop(L, 2);  /* pop other ktables */
-    return n1;  /* correction for indices from p2 */
-  }
-}
-static void correctkeys (TTree *tree, int n) {
-  if (n == 0) return;  /* no correction? */
- tailcall:
-  switch (tree->tag) {
-    case TOpenCall: case TCall: case TRunTime: case TRule: {
-      if (tree->key > 0)
-        tree->key += n;
-      break;
-    }
-    case TCapture: {
-      if (tree->key > 0 && tree->cap != Carg && tree->cap != Cnum)
-        tree->key += n;
-      break;
-    }
-    default: break;
-  }
-  switch (numsiblings[tree->tag]) {
-    case 1:  /* correctkeys(sib1(tree), n); */
-      tree = sib1(tree); goto tailcall;
-    case 2:
-      correctkeys(sib1(tree), n);
-      tree = sib2(tree); goto tailcall;  /* correctkeys(sib2(tree), n); */
-    default: assert(numsiblings[tree->tag] == 0); break;
-  }
-}
-/*
-** copy 'ktable' of element 'idx' to new tree (on top of stack)
-*/
-static void copyktable (lua_State *L, int idx) {
-  lua_getfenv(L, idx);
-  lua_setfenv(L, -2);
-}
-/*
-** merge 'ktable' from rule at stack index 'idx' into 'ktable'
-** from tree at the top of the stack, and correct corresponding
-** tree.
-*/
-static void mergektable (lua_State *L, int idx, TTree *rule) {
-  int n;
-  lua_getfenv(L, -1);  /* get ktables */
-  lua_getfenv(L, idx);
-  n = concattable(L, -1, -2);
-  lua_pop(L, 2);  /* remove both ktables */
-  correctkeys(rule, n);
-}
-/*
 ** create a new tree, whith a new root and one sibling.
 ** Sibling must be on the Lua stack, at index 1.
 */
@@ -475,7 +518,7 @@ static TTree *newroot2sib (lua_State *L, int tag) {
  tree->u.ps =  1 + s1;
  memcpy(sib1(tree), tree1, s1 * sizeof(TTree));
  memcpy(sib2(tree), tree2, s2 * sizeof(TTree));
-  correctkeys(sib2(tree), joinktables(L, 1, 2));
+  joinktables(L, 1, sib2(tree), 2);
  return tree;
 }
@@ -535,8 +578,8 @@ static int lp_choice (lua_State *L) {
 */
 static int lp_star (lua_State *L) {
  int size1;
-  int n = luaL_checkint(L, 2);
+  int n = (int)luaL_checkinteger(L, 2);
-  TTree *tree1 = gettree(L, 1, &size1);
+  TTree *tree1 = getpatt(L, 1, &size1);
  if (n >= 0) {  /* seq tree1 (seq tree1 ... (seq tree1 (rep tree1))) */
    TTree *tree = newtree(L, (n + 1) * (size1 + 1));
    if (nullable(tree1))
@@ -604,7 +647,7 @@ static int lp_sub (lua_State *L) {
    sib1(tree)->tag = TNot;  /* ...not... */
    memcpy(sib1(sib1(tree)), t2, s2 * sizeof(TTree));  /* ...t2 */
    memcpy(sib2(tree), t1, s1 * sizeof(TTree));  /* ... and t1 */
-    correctkeys(sib1(tree), joinktables(L, 1, 2));
+    joinktables(L, 1, sib1(tree), 2);
  }
  return 1;
 }
@@ -645,8 +688,8 @@ static int lp_behind (lua_State *L) {
  TTree *tree;
  TTree *tree1 = getpatt(L, 1, NULL);
  int n = fixedlen(tree1);
-  luaL_argcheck(L, !hascaptures(tree1), 1, "pattern have captures");
  luaL_argcheck(L, n > 0, 1, "pattern may not have fixed length");
+  luaL_argcheck(L, !hascaptures(tree1), 1, "pattern have captures");
  luaL_argcheck(L, n <= MAXBEHIND, 1, "pattern too long to look behind");
  tree = newroot1sib(L, TBehind);
  tree->u.n = n;
@@ -697,7 +740,7 @@ static int lp_labchoice (lua_State *L) {
 static int lp_V (lua_State *L) {
  TTree *tree = newleaf(L, TOpenCall);
  luaL_argcheck(L, !lua_isnoneornil(L, 1), 1, "non-nil value expected");
-  tree->key = addtoktable(L, 1);
+  tree->key = addtonewktable(L, 0, 1);
  return 1;
 }
@@ -710,7 +753,7 @@ static int lp_V (lua_State *L) {
 static int capture_aux (lua_State *L, int cap, int labelidx) {
  TTree *tree = newroot1sib(L, TCapture);
  tree->cap = cap;
-  tree->key = addtoktable(L, labelidx);
+  tree->key = (labelidx == 0) ? 0 : addtonewktable(L, 1, labelidx);
  return 1;
 }
@@ -718,10 +761,9 @@ static int capture_aux (lua_State *L, int cap, int labelidx) {
 /*
 ** Fill a tree with an empty capture, using an empty (TTrue) sibling.
 */
-static TTree *auxemptycap (lua_State *L, TTree *tree, int cap, int idx) {
+static TTree *auxemptycap (TTree *tree, int cap) {
  tree->tag = TCapture;
  tree->cap = cap;
-  tree->key = addtoktable(L, idx);
  sib1(tree)->tag = TTrue;
  return tree;
 }
@@ -730,8 +772,18 @@ static TTree *auxemptycap (lua_State *L, TTree *tree, int cap, int idx) {
 /*
 ** Create a tree for an empty capture
 */
-static TTree *newemptycap (lua_State *L, int cap, int idx) {
+static TTree *newemptycap (lua_State *L, int cap) {
-  return auxemptycap(L, newtree(L, 2), cap, idx);
+  return auxemptycap(newtree(L, 2), cap);
+}
+/*
+** Create a tree for an empty capture with an associated Lua value
+*/
+static TTree *newemptycapkey (lua_State *L, int cap, int idx) {
+  TTree *tree = auxemptycap(newtree(L, 2), cap);
+  tree->key = addtonewktable(L, 0, idx);
+  return tree;
 }
@@ -789,14 +841,14 @@ static int lp_simplecapture (lua_State *L) {
 static int lp_poscapture (lua_State *L) {
-  newemptycap(L, Cposition, 0);
+  newemptycap(L, Cposition);
  return 1;
 }
 static int lp_argcapture (lua_State *L) {
-  int n = luaL_checkint(L, 1);
+  int n = (int)luaL_checkinteger(L, 1);
-  TTree *tree = newemptycap(L, Carg, 0);
+  TTree *tree = newemptycap(L, Carg);
  tree->key = n;
  luaL_argcheck(L, 0 < n && n <= SHRT_MAX, 1, "invalid argument index");
  return 1;
@@ -805,7 +857,7 @@ static int lp_argcapture (lua_State *L) {
 static int lp_backref (lua_State *L) {
  luaL_checkstring(L, 1);
-  newemptycap(L, Cbackref, 1);
+  newemptycapkey(L, Cbackref, 1);
  return 1;
 }
@@ -819,9 +871,10 @@ static int lp_constcapture (lua_State *L) {
  if (n == 0)  /* no values? */
    newleaf(L, TTrue);  /* no capture */
  else if (n == 1)
-    newemptycap(L, Cconst, 1);  /* single constant capture */
+    newemptycapkey(L, Cconst, 1);  /* single constant capture */
  else {  /* create a group capture with all values */
    TTree *tree = newtree(L, 1 + 3 * (n - 1) + 2);
+    newktable(L, n);  /* create a 'ktable' for new tree */
    tree->tag = TCapture;
    tree->cap = Cgroup;
    tree->key = 0;
@@ -829,10 +882,12 @@ static int lp_constcapture (lua_State *L) {
    for (i = 1; i <= n - 1; i++) {
      tree->tag = TSeq;
      tree->u.ps = 3;  /* skip TCapture and its sibling */
-      auxemptycap(L, sib1(tree), Cconst, i);
+      auxemptycap(sib1(tree), Cconst);
+      sib1(tree)->key = addtoktable(L, i);
      tree = sib2(tree);
    }
-    auxemptycap(L, tree, Cconst, i);
+    auxemptycap(tree, Cconst);
+    tree->key = addtoktable(L, i);
  }
  return 1;
 }
@@ -842,7 +897,7 @@ static int lp_matchtime (lua_State *L) {
  TTree *tree;
  luaL_checktype(L, 2, LUA_TFUNCTION);
  tree = newroot1sib(L, TRunTime);
-  tree->key = addtoktable(L, 2);
+  tree->key = addtonewktable(L, 1, 2);
  return 1;
 }
@@ -1146,20 +1201,20 @@ static int lp_match (lua_State *L) {
  lua_pushnil(L);  /* initialize subscache */
  lua_pushlightuserdata(L, capture);  /* initialize caplistidx */
  lua_getfenv(L, 1);  /* initialize penvidx */
-  r = match(L, s, s + i, s + l, code, capture, ptop, &labelf);
+  r = match(L, s, s + i, s + l, code, capture, ptop, &labelf); /* labeled failure */
-  if (r == NULL) {
+  if (r == NULL) { /* labeled failure begin */
-                int j = 0;
+    int j = 0;
-                int n = 1;
+    int n = 1;
    lua_pushnil(L);
-                while (j < (int) MAXLABELS) {
+    while (j < (int) MAXLABELS) {
-                        if (labelf & (1 << j)) {        
+      if (labelf & (1 << j)) {  
-                                lua_pushinteger(L, j);
+        lua_pushinteger(L, j);
-                                n++;
+        n++;
-                        }
+      }
-                        j++;
+      j++;
-                }
+    }
    return n;
-  }
+  }  /* labeled failure end */
  return getcaptures(L, s, r, ptop);
 }
@@ -1197,7 +1252,7 @@ static int lp_type (lua_State *L) {
 int lp_gc (lua_State *L) {
  Pattern *p = getpattern(L, 1);
  if (p->codesize > 0)
-    reallocprog(L, p, 0);
+    realloccode(L, p, 0);
  return 0;
 }
@@ -1258,8 +1313,8 @@ static struct luaL_Reg pattreg[] = {
  {"version", lp_version},
  {"setmaxstack", lp_setmax},
  {"type", lp_type},
-        {"T", lp_throw}, /* labeled failure throw */
+  {"T", lp_throw}, /* labeled failure throw */
-        {"Lc", lp_labchoice}, /* labeled failure choice */
+  {"Lc", lp_labchoice}, /* labeled failure choice */
  {NULL, NULL}
 };
@@ -1277,13 +1332,13 @@ static struct luaL_Reg metareg[] = {
 };
-int luaopen_lpeglabel (lua_State *L);
+int luaopen_lpeglabel (lua_State *L);  /* labeld failure */
-int luaopen_lpeglabel (lua_State *L) {
+int luaopen_lpeglabel (lua_State *L) { /* labeled failure */
  luaL_newmetatable(L, PATTERN_T);
  lua_pushnumber(L, MAXBACK);  /* initialize maximum backtracking */
  lua_setfield(L, LUA_REGISTRYINDEX, MAXSTACKIDX);
  luaL_register(L, NULL, metareg);
-  luaL_register(L, "lpeglabel", pattreg);
+  luaL_register(L, "lpeglabel", pattreg); /* labeled failure */
  lua_pushvalue(L, -1);
  lua_setfield(L, -3, "__index");
  return 1;
diff --git a/lptypes.h b/lptypes.h
index 503f1f0..3f1041e 100644
--- a/lptypes.h
+++ b/lptypes.h
@@ -1,7 +1,7 @@
 /*
-** $Id: lptypes.h,v 1.8 2013/04/12 16:26:38 roberto Exp $
+** $Id: lptypes.h,v 1.11 2015/03/04 16:38:00 roberto Exp $
 ** LPeg - PEG pattern matching for Lua
-** Copyright 2007, Lua.org & PUC-Rio  (see 'lpeg.html' for license)
+** Copyright 2007-2014, Lua.org & PUC-Rio  (see 'lpeg.html' for license)
 ** written by Roberto Ierusalimschy
 */
@@ -19,7 +19,7 @@
 #include "lua.h"
-#define VERSION         "0.12"
+#define VERSION         "0.12.2"
 #define PATTERN_T       "lpeg-pattern"
@@ -29,7 +29,7 @@
 /*
 ** compatibility with Lua 5.2
 */
-#if (LUA_VERSION_NUM == 502)
+#if (LUA_VERSION_NUM >= 502)
 #undef lua_equal
 #define lua_equal(L,idx1,idx2)  lua_compare(L,(idx1),(idx2),LUA_OPEQ)
@@ -56,7 +56,9 @@
 /* maximum number of rules in a grammar */
-#define MAXRULES        200
+#if !defined(MAXRULES)
+#define MAXRULES        1000
+#endif
@@ -107,6 +109,7 @@ typedef struct Charset {
 /* set 'b' bit in charset 'cs' */
 #define setchar(cs,b)   ((cs)[(b) >> 3] |= (1 << ((b) & 7)))
 /* labeled failure begin */
 typedef int Labelset;
diff --git a/lpvm.c b/lpvm.c
index d8b854a..a9198ba 100644
--- a/lpvm.c
+++ b/lpvm.c
@@ -38,7 +38,7 @@ typedef struct Stack {
  const char *s;  /* saved position (or NULL for calls) */
  const Instruction *p;  /* next instruction */
  int caplevel;
-        Labelset ls; /* labeled failure */
+  Labelset ls; /* labeled failure */
 } Stack;
@@ -146,7 +146,7 @@ static int removedyncap (lua_State *L, Capture *capture,
 ** Opcode interpreter
 */
 const char *match (lua_State *L, const char *o, const char *s, const char *e,
-                   Instruction *op, Capture *capture, int ptop, Labelset *labelf) {
+                   Instruction *op, Capture *capture, int ptop, Labelset *labelf) { /* labeled failure */
  Stack stackbase[INITBACK];
  Stack *stacklimit = stackbase + INITBACK;
  Stack *stack = stackbase;  /* point to first empty slot in stack */
@@ -157,7 +157,7 @@ const char *match (lua_State *L, const char *o, const char *s, const char *e,
  stack->p = &giveup; stack->s = s; stack->caplevel = 0; stack++;
  lua_pushlightuserdata(L, stackbase);
  for (;;) {
-#if defined(LPEGDEBUG)
+#if defined(DEBUG)
      printf("s: |%s| stck:%d, dyncaps:%d, caps:%d  ",
             s, stack - getstackbase(L, ptop), ndyncap, captop);
      printinst(op, p);
@@ -183,9 +183,9 @@ const char *match (lua_State *L, const char *o, const char *s, const char *e,
      case IAny: {
        if (s < e) { p++; s++; }
        else {
-                                        *labelf = LFAIL; /* labeled failure */
+          *labelf = LFAIL; /* labeled failure */
-                                        goto fail;
+          goto fail;
-                                }
+        }
        continue;
      }
      case ITestAny: {
@@ -196,9 +196,9 @@ const char *match (lua_State *L, const char *o, const char *s, const char *e,
      case IChar: {
        if ((byte)*s == p->i.aux && s < e) { p++; s++; }
        else {
-                                        *labelf = LFAIL; /* labeled failure */
+          *labelf = LFAIL; /* labeled failure */
-                                        goto fail;
+          goto fail;
-                                }
+        }
        continue;
      }
      case ITestChar: {
@@ -211,9 +211,9 @@ const char *match (lua_State *L, const char *o, const char *s, const char *e,
        if (testchar((p+1)->buff, c) && s < e)
          { p += CHARSETINSTSIZE; s++; }
        else {
-                                        *labelf = LFAIL; /* labeled failure */
+          *labelf = LFAIL; /* labeled failure */
-                                        goto fail;
+          goto fail;
-                                }
+        }
        continue;
      }
      case ITestSet: {
@@ -226,9 +226,9 @@ const char *match (lua_State *L, const char *o, const char *s, const char *e,
      case IBehind: {
        int n = p->i.aux;
        if (n > s - o) {
-                                        *labelf = LFAIL; /* labeled failure */
+          *labelf = LFAIL; /* labeled failure */
-                                        goto fail;
+          goto fail;
-                                }
+        }
        s -= n; p++;
        continue;
      }
@@ -249,24 +249,23 @@ const char *match (lua_State *L, const char *o, const char *s, const char *e,
          stack = doublestack(L, &stacklimit, ptop);
        stack->p = p + getoffset(p);
        stack->s = s;
-                                stack->ls = LFAIL; /* labeled failure */
+        stack->ls = LFAIL; /* labeled failure */
        stack->caplevel = captop;
        stack++;
        p += 2;
        continue;
      }
-                        case ILabChoice: { /* labeled failure */
+      case ILabChoice: { /* labeled failure */
        if (stack == stacklimit)
          stack = doublestack(L, &stacklimit, ptop);
        stack->p = p + getoffset(p);
        stack->s = s;
-                                stack->ls = (p + 2)->labels;
+        stack->ls = (p + 2)->labels;
        stack->caplevel = captop;
        stack++;
        p += 3;
        continue;
      }
      case ICall: {
        if (stack == stacklimit)
          stack = doublestack(L, &stacklimit, ptop);
@@ -296,22 +295,20 @@ const char *match (lua_State *L, const char *o, const char *s, const char *e,
        p += getoffset(p);
        continue;
      }
-                        case IThrow: { /* labeled failure */
+      case IThrow: { /* labeled failure */
-                                *labelf = (p+1)->labels;
+        *labelf = (p+1)->labels;
-                                goto fail;
+        goto fail;
-                        }
+      }
      case IFailTwice:
        assert(stack > getstackbase(L, ptop));
        stack--;
        /* go through */
      case IFail:
-                                *labelf = LFAIL; /* labeled failure */
+      *labelf = LFAIL; /* labeled failure */
      fail: { /* pattern failed: try to backtrack */
        do {  /* remove pending calls */
          assert(stack > getstackbase(L, ptop));
          s = (--stack)->s;
-                                        /*printf("fail (s == NULL => %d), labelf=%d  stack->ls=%d (stack-> == giveup %d)\n",
-                 s == NULL, labelf, stack->ls, stack->p == &giveup);*/
        } while (s == NULL || (!(stack->ls & *labelf) && stack->p != &giveup));
        if (ndyncap > 0)  /* is there matchtime captures? */
          ndyncap -= removedyncap(L, capture, stack->caplevel, captop);
@@ -328,10 +325,10 @@ const char *match (lua_State *L, const char *o, const char *s, const char *e,
        captop -= n;  /* remove nested captures */
        fr -= rem;  /* 'rem' items were popped from Lua stack */
        res = resdyncaptures(L, fr, s - o, e - o);  /* get result */
-        if (res == -1) { /* fail? */ 
+        if (res == -1) { /* fail? */
-                                        *labelf = LFAIL; /* labeled failure */
+          *labelf = LFAIL; /* labeled failure */
          goto fail;
-                                }
+        }
        s = o + res;  /* else update current position */
        n = lua_gettop(L) - fr + 1;  /* number of new captures */
        ndyncap += n - rem;  /* update number of dynamic captures */
diff --git a/lpvm.h b/lpvm.h
index bb485af..c996e22 100644
--- a/lpvm.h
+++ b/lpvm.h
@@ -1,5 +1,5 @@
 /*
-** $Id: lpvm.h,v 1.2 2013/04/03 20:37:18 roberto Exp $
+** $Id: lpvm.h,v 1.3 2014/02/21 13:06:41 roberto Exp $
 */
 #if !defined(lpvm_h)
@@ -34,8 +34,8 @@ typedef enum Opcode {
  IOpenCapture,  /* start a capture */
  ICloseCapture,
  ICloseRunTime,
-        IThrow,   /* "fails" with a specific label labeled failure */
+  IThrow,   /* "fails" with a specific label labeled failure */
-        ILabChoice   /* labeled choice */
+  ILabChoice   /* labeled choice */
 } Opcode;
@@ -52,14 +52,9 @@ typedef union Instruction {
 } Instruction;
-int getposition (lua_State *L, int t, int i);
 void printpatt (Instruction *p, int n);
 const char *match (lua_State *L, const char *o, const char *s, const char *e,
-                   Instruction *op, Capture *capture, int ptop, Labelset *labelf);
+                   Instruction *op, Capture *capture, int ptop, Labelset *labelf); /* labeled failure */
-int verify (lua_State *L, Instruction *op, const Instruction *p,
-            Instruction *e, int postable, int rule);
-void checkrule (lua_State *L, Instruction *op, int from, int to,
-                int postable, int rule);
 #endif
diff --git a/makefile b/makefile
index 4f41062..d66a84e 100644
--- a/makefile
+++ b/makefile
@@ -1,8 +1,8 @@
 LIBNAME = lpeglabel
-LUADIR = /usr/include/lua5.1/
+LUADIR = ../lua/
-#COPT = -O2
+COPT = -O2
-COPT = -DLPEG_DEBUG -g
+# COPT = -DLPEG_DEBUG -g
 CWARNS = -Wall -Wextra -pedantic \
        -Waggregate-return \
@@ -22,7 +22,7 @@ CWARNS = -Wall -Wextra -pedantic \
 # -Wunreachable-code \
-CFLAGS = $(CWARNS) $(COPT) -ansi -I$(LUADIR) -fPIC
+CFLAGS = $(CWARNS) $(COPT) -std=c99 -I$(LUADIR) -fPIC
 CC = gcc
 FILES = lpvm.o lpcap.o lptree.o lpcode.o lpprint.o
diff --git a/test.lua b/test.lua
index d486c03..bbb3ccb 100755
--- a/test.lua
+++ b/test.lua
@@ -1,6 +1,6 @@
 #!/usr/bin/env lua5.1
-- $Id: test.lua,v 1.101 2013/04/12 16:30:33 roberto Exp $
+-- $Id: test.lua,v 1.106 2015/03/04 17:31:33 roberto Exp $
 -- require"strict"    -- just to be pedantic
@@ -170,8 +170,8 @@ assert(m.match( basiclookfor((#m.P(b) * 1) * m.Cp()), "  (  (a)") == 7)
 a = {m.match(m.C(digit^1 * m.Cc"d") + m.C(letter^1 * m.Cc"l"), "123")}
 checkeq(a, {"123", "d"})
-a = {m.match(m.C(digit^1) * "d" * -1 + m.C(letter^1 * m.Cc"l"), "123d")}
+-- bug in LPeg 0.12  (nil value does not create a 'ktable')
-checkeq(a, {"123"})
+assert(m.match(m.Cc(nil), "") == nil)
 a = {m.match(m.C(digit^1 * m.Cc"d") + m.C(letter^1 * m.Cc"l"), "abcd")}
 checkeq(a, {"abcd", "l"})
@@ -194,6 +194,16 @@ checkeq(a, {1, 5})
 t = {m.match({[1] = m.C(m.C(1) * m.V(1) + -1)}, "abc")}
 checkeq(t, {"abc", "a", "bc", "b", "c", "c", ""})
+-- bug in 0.12 ('hascapture' did not check for captures inside a rule)
+do
+  local pat = m.P{
+    'S';
+    S1 = m.C('abc') + 3,
+    S = #m.V('S1')    -- rule has capture, but '#' must ignore it
+  }
+  assert(pat:match'abc' == 1)
+end
 -- test for small capture boundary
 for i = 250,260 do
@@ -201,9 +211,8 @@ for i = 250,260 do
  assert(#m.match(m.C(m.C(i)), string.rep('a', i)) == i)
 end
 -- tests for any*n and any*-n
-for n = 1, 550 do
+for n = 1, 550, 13 do
  local x_1 = string.rep('x', n - 1)
  local x = x_1 .. 'a'
  assert(not m.P(n):match(x_1))
@@ -345,8 +354,9 @@ checkeq(t, {hi = 10, ho = 20, 'a', 'b', 'c'})
 -- test for error messages
-local function checkerr (msg, ...)
+local function checkerr (msg, f, ...)
-  assert(m.match({ m.P(msg) + 1 * m.V(1) }, select(2, pcall(...))))
+  local st, err = pcall(f, ...)
+  assert(not st and m.match({ m.P(msg) + 1 * m.V(1) }, err))
 end
 checkerr("rule '1' may be left recursive", m.match, { m.V(1) * 'a' }, "a")
@@ -370,6 +380,32 @@ p = {'a',
 }
 checkerr("rule 'a' may be left recursive", m.match, p, "a")
+-- Bug in peephole optimization of LPeg 0.12 (IJmp -> ICommit)
+-- the next grammar has an original sequence IJmp -> ICommit -> IJmp L1
+-- that is optimized to ICommit L1
+p = m.P { (m.P {m.P'abc'} + 'ayz') * m.V'y'; y = m.P'x' }
+assert(p:match('abcx') == 5 and p:match('ayzx') == 5 and not p:match'abc')
+do
+  -- large dynamic Cc
+  local lim = 2^16 - 1
+  local c = 0
+  local function seq (n) 
+    if n == 1 then c = c + 1; return m.Cc(c)
+    else
+      local m = math.floor(n / 2)
+      return seq(m) * seq(n - m)
+    end
+  end
+  p = m.Ct(seq(lim))
+  t = p:match('')
+  assert(t[lim] == lim)
+  checkerr("too many", function () p = p / print end)
+  checkerr("too many", seq, lim + 1)
+end
 -- tests for non-pattern as arguments to pattern functions
@@ -488,7 +524,10 @@ assert(m.match(1 * m.B(1), 'a') == 2)
 assert(m.match(-m.B(1), 'a') == 1)
 assert(m.match(m.B(250), string.rep('a', 250)) == nil)
 assert(m.match(250 * m.B(250), string.rep('a', 250)) == 251)
-assert(not pcall(m.B, 260))
+-- look-behind with an open call
+checkerr("pattern may not have fixed length", m.B, m.V'S1')
+checkerr("too long to look behind", m.B, 260)
 B = #letter * -m.B(letter) + -letter * m.B(letter)
 x = m.Ct({ (B * m.Cp())^-1 * (1 * m.V(1) + m.P(true)) })
@@ -523,7 +562,6 @@ assert(m.match(#m.S'567' * 1, "6") == 2)
 -- tests for Tail Calls
--labeled failure
 p = m.P{ 'a' * m.V(1) + '' }
 assert(p:match(string.rep('a', 1000)) == 1001)
@@ -546,7 +584,6 @@ p = m.P{
  [4] = '0' * m.V(3) + '1' * m.V(2),
 }
-- labeled failure
 assert(p:match(string.rep("00", 10000)))
 assert(p:match(string.rep("01", 10000)))
 assert(p:match(string.rep("011", 10000)))
@@ -557,16 +594,15 @@ assert(not p:match(string.rep("011", 10001)))
 -- this grammar does need backtracking info.
 local lim = 10000
 p = m.P{ '0' * m.V(1) + '0' }
-assert(not pcall(m.match, p, string.rep("0", lim)))
+checkerr("too many pending", m.match, p, string.rep("0", lim))
 m.setmaxstack(2*lim)
-assert(not pcall(m.match, p, string.rep("0", lim)))
+checkerr("too many pending", m.match, p, string.rep("0", lim))
 m.setmaxstack(2*lim + 4)
-assert(pcall(m.match, p, string.rep("0", lim)))
+assert(m.match(p, string.rep("0", lim)) == lim + 1)
 -- this repetition should not need stack space (only the call does)
 p = m.P{ ('a' * m.V(1))^0 * 'b' + 'c' }
 m.setmaxstack(200)
-- labeled failure
 assert(p:match(string.rep('a', 180) .. 'c' .. string.rep('b', 180)) == 362)
 m.setmaxstack(5)   -- restore original limit
@@ -591,10 +627,10 @@ print("+")
 -- tests for argument captures
-assert(not pcall(m.Carg, 0))
+checkerr("invalid argument", m.Carg, 0)
-assert(not pcall(m.Carg, -1))
+checkerr("invalid argument", m.Carg, -1)
-assert(not pcall(m.Carg, 2^18))
+checkerr("invalid argument", m.Carg, 2^18)
-assert(not pcall(m.match, m.Carg(1), 'a', 1))
+checkerr("absent extra argument #1", m.match, m.Carg(1), 'a', 1)
 assert(m.match(m.Carg(1), 'a', 1, print) == print)
 x = {m.match(m.Carg(1) * m.Carg(2), '', 1, 10, 20)}
 checkeq(x, {10, 20})
@@ -647,14 +683,16 @@ assert(m.match(p, "aaaa") == 5)
 assert(m.match(p, "abaa") == 2)
 assert(not m.match(p, "baaa"))
-assert(not pcall(m.match, function () return 2^20 end, s))
+checkerr("invalid position", m.match, function () return 2^20 end, s)
-assert(not pcall(m.match, function () return 0 end, s))
+checkerr("invalid position", m.match, function () return 0 end, s)
-assert(not pcall(m.match, function (s, i) return i - 1 end, s))
+checkerr("invalid position", m.match, function (s, i) return i - 1 end, s)
-assert(not pcall(m.match, m.P(1)^0 * function (_, i) return i - 1 end, s))
+checkerr("invalid position", m.match,
+             m.P(1)^0 * function (_, i) return i - 1 end, s)
 assert(m.match(m.P(1)^0 * function (_, i) return i end * -1, s))
-assert(not pcall(m.match, m.P(1)^0 * function (_, i) return i + 1 end, s))
+checkerr("invalid position", m.match,
+             m.P(1)^0 * function (_, i) return i + 1 end, s)
 assert(m.match(m.P(function (s, i) return s:len() + 1 end) * -1, s))
-assert(not pcall(m.match, m.P(function (s, i) return s:len() + 2 end) * -1, s))
+checkerr("invalid position", m.match, m.P(function (s, i) return s:len() + 2 end) * -1, s)
 assert(not m.match(m.P(function (s, i) return s:len() end) * -1, s))
 assert(m.match(m.P(1)^0 * function (_, i) return true end, s) ==
       string.len(s) + 1)
@@ -737,9 +775,9 @@ assert(m.match(m.Cs((m.P(1) / ".xx")^0), "abcd") == ".xx.xx.xx.xx")
 assert(m.match(m.Cp() * m.P(3) * m.Cp()/"%2%1%1 - %0 ", "abcde") ==
   "411 - abc ")
-assert(pcall(m.match, m.P(1)/"%0", "abc"))
+assert(m.match(m.P(1)/"%0", "abc") == "a")
-assert(not pcall(m.match, m.P(1)/"%1", "abc"))   -- out of range
+checkerr("invalid capture index", m.match, m.P(1)/"%1", "abc")
-assert(not pcall(m.match, m.P(1)/"%9", "abc"))   -- out of range
+checkerr("invalid capture index", m.match, m.P(1)/"%9", "abc")
 p = m.C(1)
 p = p * p; p = p * p; p = p * p * m.C(1) / "%9 - %1"
@@ -757,7 +795,7 @@ assert(m.match(m.C(1)^0 / "%9-%1-%0-%3", s) == "9-1-" .. s .. "-3")
 p = m.Cc('alo') * m.C(1) / "%1 - %2 - %1"
 assert(p:match'x' == 'alo - x - alo')
-assert(not pcall(m.match, m.Cc(true) / "%1", "a"))
+checkerr("invalid capture value (a boolean)", m.match, m.Cc(true) / "%1", "a")
 -- long strings for string capture
 l = 10000
@@ -785,35 +823,37 @@ checkeq(t, {a="b", c="du", xux="yuy"})
 -- errors in accumulator capture
-- very long match (forces fold to be a pair open-close) producing with
 -- no initial capture
-assert(not pcall(m.match, m.Cf(m.P(500), print), string.rep('a', 600)))
+checkerr("no initial value", m.match, m.Cf(m.P(5), print), 'aaaaaa')
+-- no initial capture (very long match forces fold to be a pair open-close)
+checkerr("no initial value", m.match, m.Cf(m.P(500), print),
+                               string.rep('a', 600))
 -- nested capture produces no initial value
-assert(not pcall(m.match, m.Cf(m.P(1) / {}, print), "alo"))
+checkerr("no initial value", m.match, m.Cf(m.P(1) / {}, print), "alo")
 -- tests for loop checker
-local function haveloop (p)
+local function isnullable (p)
-  assert(not pcall(function (p) return p^0 end, m.P(p)))
+  checkerr("may accept empty string", function (p) return p^0 end, m.P(p))
 end
-haveloop(m.P("x")^-4)
+isnullable(m.P("x")^-4)
 assert(m.match(((m.P(0) + 1) * m.S"al")^0, "alo") == 3)
 assert(m.match((("x" + #m.P(1))^-4 * m.S"al")^0, "alo") == 3)
-haveloop("")
+isnullable("")
-haveloop(m.P("x")^0)
+isnullable(m.P("x")^0)
-haveloop(m.P("x")^-1)
+isnullable(m.P("x")^-1)
-haveloop(m.P("x") + 1 + 2 + m.P("a")^-1)
+isnullable(m.P("x") + 1 + 2 + m.P("a")^-1)
-haveloop(-m.P("ab"))
+isnullable(-m.P("ab"))
-haveloop(- -m.P("ab"))
+isnullable(- -m.P("ab"))
-haveloop(# #(m.P("ab") + "xy"))
+isnullable(# #(m.P("ab") + "xy"))
-haveloop(- #m.P("ab")^0)
+isnullable(- #m.P("ab")^0)
-haveloop(# -m.P("ab")^1)
+isnullable(# -m.P("ab")^1)
-haveloop(#m.V(3))
+isnullable(#m.V(3))
-haveloop(m.V(3) + m.V(1) + m.P('a')^-1)
+isnullable(m.V(3) + m.V(1) + m.P('a')^-1)
-haveloop({[1] = m.V(2) * m.V(3), [2] = m.V(3), [3] = m.P(0)})
+isnullable({[1] = m.V(2) * m.V(3), [2] = m.V(3), [3] = m.P(0)})
 assert(m.match(m.P{[1] = m.V(2) * m.V(3), [2] = m.V(3), [3] = m.P(1)}^0, "abc")
       == 3)
 assert(m.match(m.P""^-3, "a") == 1)
@@ -897,8 +937,8 @@ print"+"
 -- tests for back references
-assert(not pcall(m.match, m.Cb('x'), ''))
+checkerr("back reference 'x' not found", m.match, m.Cb('x'), '')
-assert(not pcall(m.match, m.Cg(1, 'a') * m.Cb('b'), 'a'))
+checkerr("back reference 'b' not found", m.match, m.Cg(1, 'a') * m.Cb('b'), 'a')
 p = m.Cg(m.C(1) * m.C(1), "k") * m.Ct(m.Cb("k"))
 t = p:match("ab")
@@ -1054,6 +1094,8 @@ local re = require "re"
 local match, compile = re.match, re.compile
 assert(match("a", ".") == 2)
 assert(match("a", "''") == 1)
 assert(match("", " ! . ") == 1)
@@ -1348,6 +1390,7 @@ eqlpeggsub("[%W%S]", "%W%S")
 re.updatelocale()
 -- testing nested substitutions x string captures
 p = re.compile[[
@@ -1370,8 +1413,7 @@ assert(rev:match"0123456789" == "9876543210")
 -- testing error messages in re
 local function errmsg (p, err)
-  local s, msg = pcall(re.compile, p)
+  checkerr(err, re.compile, p)
-  assert(not s and string.find(msg, err))
 end
 errmsg('aaaa', "rule 'aaaa'")
diff --git a/testlabel.lua b/testlabel.lua
index ea18bf5..98d8f6c 100644
--- a/testlabel.lua
+++ b/testlabel.lua
@@ -279,7 +279,7 @@ local terror = { ['cmdSeq'] = "Missing ';' in CmdSeq",
                 ['factor'] = "Error matching 'Factor'",
                 ['openParExp'] = "Error matching expression after '('",
                 ['closePar'] = "Error matching ')'",
-                 ['undefined'] = "Error undefined'"}
+                 ['undefined'] = "Undefined Error"}
 g = re.compile([[
  Tiny       <- CmdSeq /{1} '' -> cmdSeq /{2} '' -> ifExp /{3} '' -> ifThen /{4} '' -> ifThenCmdSeq
@@ -292,7 +292,7 @@ g = re.compile([[
  Cmd        <- IfCmd / RepeatCmd / ReadCmd / WriteCmd  / AssignCmd 
  IfCmd      <- IF  (Exp / %{2})  (THEN / %{3})  (CmdSeq / %{4})  (ELSE (CmdSeq / %{5}) / '') (END / %{6})
  RepeatCmd  <- REPEAT  (CmdSeq / %{7})  (UNTIL / %{8})  (Exp / %{9})
-  AssignCmd  <- !RESERVED NAME  (ASSIGNMENT / %{10})  (Exp / %{11})
+  AssignCmd  <- NAME  (ASSIGNMENT / %{10})  (Exp / %{11})
  ReadCmd    <- READ  (NAME / %{12})
  WriteCmd   <- WRITE  (Exp / %{13})
  Exp        <- SimpleExp  ((LESS / EQUAL) (SimpleExp / %{14}) / '')
@@ -309,7 +309,7 @@ g = re.compile([[
  EQUAL      <- Sp '='
  LESS       <- Sp '<'
  MUL        <- Sp '*'
-  NAME       <- Sp [a-z]+
+  NAME       <- !RESERVED Sp [a-z]+
  NUMBER     <- Sp [0-9]+
  OPENPAR    <- Sp '('
  READ       <- Sp 'read'
@@ -319,7 +319,7 @@ g = re.compile([[
  THEN       <- Sp 'then'
  UNTIL      <- Sp 'until'
  WRITE      <- Sp 'write'
-        RESERVED   <- IF / ELSE / END / READ / REPEAT / THEN / UNTIL / WRITE
+        RESERVED   <- (IF / ELSE / END / READ / REPEAT / THEN / UNTIL / WRITE) ![a-z]+
  Sp         <- (%s / %nl)*     
 ]], terror)
author	Sergio Medeiros <sqmedeiros@gmail.com>	2015-03-23 14:13:25 -0300
committer	Sergio Medeiros <sqmedeiros@gmail.com>	2015-03-23 14:13:25 -0300
commit	0e93d536ba2d312502737cce2ab0cc21393c4842 (patch)
tree	7de1e3ae967c90a43e7086ccef61d1722881b20c
parent	a5a4b257e626847be3be4878c603adb51cbb420f (diff)
download	lpeglabel-0e93d536ba2d312502737cce2ab0cc21393c4842.tar.gz lpeglabel-0e93d536ba2d312502737cce2ab0cc21393c4842.tar.bz2 lpeglabel-0e93d536ba2d312502737cce2ab0cc21393c4842.zip