First implementation for the accumulator capture

author: Roberto Ierusalimschy <roberto@inf.puc-rio.br> 2023-06-06 17:50:31 -0300
committer: Roberto Ierusalimschy <roberto@inf.puc-rio.br> 2023-06-06 17:50:31 -0300
commit: e31e13f59ef1a4df1698b15ff1fe0198553cc3c2 (patch)
tree: c16ea4f41bdd378734afd4e475d7d7973a5a2b5d
parent: 44fab2a44d06a956c3121ceba2b39ca7b00dc428 (diff)
download: lpeg-e31e13f59ef1a4df1698b15ff1fe0198553cc3c2.tar.gz
lpeg-e31e13f59ef1a4df1698b15ff1fe0198553cc3c2.tar.bz2
lpeg-e31e13f59ef1a4df1698b15ff1fe0198553cc3c2.zip
8 files changed, 131 insertions, 22 deletions
diff --git a/HISTORY b/HISTORY
index 7fa72e6..a250709 100644
--- a/HISTORY
+++ b/HISTORY
@@ -2,9 +2,12 @@ HISTORY for LPeg 1.1.0
 * Changes from version 1.0.2 to 1.1.0
  ---------------------------------
+  + accumulator capture
  + UTF-8 ranges
  + Larger limit for number of rules in a grammar
+  + Larger limit for number of captures in a match
  + bug fixes
+  + other small improvements
 * Changes from version 1.0.1 to 1.0.2
  ---------------------------------
diff --git a/lpcap.c b/lpcap.c
index ec17d23..4750f01 100644
--- a/lpcap.c
+++ b/lpcap.c
@@ -231,6 +231,22 @@ static int functioncap (CapState *cs) {
 /*
+** Accumulator capture
+*/
+static int accumulatorcap (CapState *cs) {
+  lua_State *L = cs->L;
+  int n;
+  if (lua_gettop(L) < cs->firstcap)
+    luaL_error(L, "no previous value for accumulator capture");
+  pushluaval(cs);  /* push function */
+  lua_insert(L, -2);  /* previous value becomes first argument */
+  n = pushnestedvalues(cs, 0);  /* push nested captures */
+  lua_call(L, n + 1, 1);  /* call function */
+  return 0;  /* did not add any extra value */
+}
+/*
 ** Select capture
 */
 static int numcap (CapState *cs) {
@@ -422,13 +438,16 @@ static int addonestring (luaL_Buffer *b, CapState *cs, const char *what) {
    case Csubst:
      substcap(b, cs);  /* add capture directly to buffer */
      return 1;
+    case Cacc:  /* accumulator capture? */
+      return luaL_error(cs->L, "accumulator capture inside substitution capture");
    default: {
      lua_State *L = cs->L;
      int n = pushcapture(cs);
      if (n > 0) {
        if (n > 1) lua_pop(L, n - 1);  /* only one result */
        if (!lua_isstring(L, -1))
-          luaL_error(L, "invalid %s value (a %s)", what, luaL_typename(L, -1));
+          return luaL_error(L, "invalid %s value (a %s)",
+                               what, luaL_typename(L, -1));
        luaL_addvalue(b);
      }
      return n;
@@ -512,6 +531,7 @@ static int pushcapture (CapState *cs) {
    case Cbackref: res = backrefcap(cs); break;
    case Ctable: res = tablecap(cs); break;
    case Cfunction: res = functioncap(cs); break;
+    case Cacc: res = accumulatorcap(cs); break;
    case Cnum: res = numcap(cs); break;
    case Cquery: res = querycap(cs); break;
    case Cfold: res = foldcap(cs); break;
@@ -537,9 +557,11 @@ int getcaptures (lua_State *L, const char *s, const char *r, int ptop) {
    CapState cs;
    cs.ocap = cs.cap = capture; cs.L = L; cs.reclevel = 0;
    cs.s = s; cs.valuecached = 0; cs.ptop = ptop;
+    cs.firstcap = lua_gettop(L) + 1;  /* where first value (if any) will go */
    do {  /* collect their values */
      n += pushcapture(&cs);
    } while (!isclosecap(cs.cap));
+    assert(lua_gettop(L) - cs.firstcap == n - 1);
  }
  if (n == 0) {  /* no capture values? */
    lua_pushinteger(L, r - s + 1);  /* return only end position */
diff --git a/lpcap.h b/lpcap.h
index 10539a0..e72d913 100644
--- a/lpcap.h
+++ b/lpcap.h
@@ -16,6 +16,7 @@ typedef enum CapKind {
  Csimple,  /* next node is pattern */
  Ctable,  /* next node is pattern */
  Cfunction,  /* ktable[key] is function; next node is pattern */
+  Cacc,  /* ktable[key] is function; next node is pattern */
  Cquery,  /* ktable[key] is table; next node is pattern */
  Cstring,  /* ktable[key] is string; next node is pattern */
  Cnum,  /* numbered capture; 'key' is number of value to return */
@@ -38,7 +39,8 @@ typedef struct CapState {
  Capture *cap;  /* current capture */
  Capture *ocap;  /* (original) capture list */
  lua_State *L;
-  int ptop;  /* index of last argument to 'match' */
+  int ptop;  /* stack index of last argument to 'match' */
+  int firstcap;  /* stack index of first capture pushed in the stack */
  const char *s;  /* original string */
  int valuecached;  /* value stored in cache slot */
  int reclevel;  /* recursion level */
diff --git a/lpeg.html b/lpeg.html
index f50d327..9faa1c7 100644
--- a/lpeg.html
+++ b/lpeg.html
@@ -638,6 +638,10 @@ or no value when <code>number</code> is zero.</td></tr>
 <tr><td><a href="#cap-func"><code>patt / function</code></a></td>
  <td>the returns of <code>function</code> applied to the captures
      of <code>patt</code></td></tr>
+<tr><td><a href="#cap-rep"><code>patt % function</code></a></td>
+  <td>the return of <code>function</code> applied to the previous
+      capture plus the captures of <code>patt</code></td></tr>;
+      the returned value becomes the value of the previous capture
 <tr><td><a href="#matchtime"><code>lpeg.Cmt(patt, function)</code></a></td>
  <td>the returns of <code>function</code> applied to the captures
      of <code>patt</code>; the application is done at match time</td></tr>
@@ -889,6 +893,75 @@ there is no captured value.
 </p>
+<h3><a name="cap-rep"></a><code>patt % function</code></h3>
+<p>
+Creates an <em>accumulator capture</em>.
+This pattern behaves similarly to a
+<a href="cap-func">function capture</a>,
+with the following differences:
+The last captured value is added as a first argument to
+the call;
+the return of the function is adjusted to one single value;
+that value becomes the last captured value.
+</p>
+<p>
+As an example,
+consider the following code fragment:
+</p>
+<pre class="example">
+local name = lpeg.C(lpeg.R("az")^1)
+local p = name * (lpeg.P("^") % string.upper)^-1
+print(p:match("count"))    --&gt; count
+print(p:match("count^"))   --&gt; COUNT
+</pre>
+<p>
+In the first match,
+the accumulator capture does not match,
+and so the match results in its first capture, a name.
+In the second match,
+the accumulator capture matches,
+so the function <code>string.upper</code>
+is called with the previous capture (created by <code>name</code>)
+plus the string <code>"^"</code>;
+the function ignores its second argument and returns the first argument
+changed to upper case;
+that value then becomes the first and only
+capture value created by the match.
+</p>
+-- matches a numeral and captures its numerical value
+number = lpeg.R"09"^1 / tonumber
+-- auxiliary function to add two numbers
+function add (acc, newvalue) return acc + newvalue end
+-- matches a list of numbers, adding their values
+sum = number * ("," * number % add)^0
+-- example of use
+print(sum:match("10,30,43"))   --&gt; 83
+</pre>
+<p>
+First, the initial <code>number</code> captures a number;
+that first capture will play the role of an accumulator.
+Then, each time <code>number</code> matches inside the loop
+there is a accumulator capture:
+It calls <code>add</code> with the current value of the accumulator
+and the value of the new number,
+and their sum replaces the value of the accumulator.
+At the end of the match,
+the accumulator with all sums is the final value.
+</p>
+<p>
+Due to the nature of this capture,
+you should avoid using it in places where it is not clear
+what is its "previous" capture.
+Due to implementation details,
+you should not use this capture inside a substitution capture.
+</p>
 <h3><a name="matchtime"></a><code>lpeg.Cmt(patt, function)</code></h3>
 <p>
 Creates a <em>match-time capture</em>.
@@ -968,19 +1041,17 @@ lpeg.locale(lpeg)   -- adds locale entries into 'lpeg' table
 local space = lpeg.space^0
 local name = lpeg.C(lpeg.alpha^1) * space
 local sep = lpeg.S(",;") * space
-local pair = lpeg.Cg(name * "=" * space * name) * sep^-1
+local pair = name * "=" * space * name * sep^-1
-local list = lpeg.Cf(lpeg.Ct("") * pair^0, rawset)
+local list = lpeg.Ct("") * (pair % rawset)^0
 t = list:match("a=b, c = hi; next = pi")  --> { a = "b", c = "hi", next = "pi" }
 </pre>
 <p>
 Each pair has the format <code>name = name</code> followed by
 an optional separator (a comma or a semicolon).
-The <code>pair</code> pattern encloses the pair in a group pattern,
+The <code>list</code> pattern then <em>folds</em> these captures.
-so that the names become the values of a single capture.
-The <code>list</code> pattern then folds these captures.
 It starts with an empty table,
 created by a table capture matching an empty string;
-then for each capture (a pair of names) it applies <code>rawset</code>
+then for each a pair of names it applies <code>rawset</code>
 over the accumulator (the table) and the capture values (the pair of names).
 <code>rawset</code> returns the table itself,
 so the accumulator is always the table.
@@ -1295,8 +1366,8 @@ end
 -- Grammar
 local V = lpeg.V
 G = lpeg.P{ "Exp",
-  Exp = lpeg.Cf(V"Term" * lpeg.Cg(TermOp * V"Term")^0, eval);
+  Exp = V"Term" * (TermOp * V"Term" % eval)^0;
-  Term = lpeg.Cf(V"Factor" * lpeg.Cg(FactorOp * V"Factor")^0, eval);
+  Term = V"Factor" * (FactorOp * V"Factor" % eval)^0;
  Factor = Number / tonumber + Open * V"Exp" * Close;
 }
@@ -1304,7 +1375,7 @@ G = lpeg.P{ "Exp",
 print(lpeg.match(G, "3 + 5*9 / (1+1) - 12"))   --> 13.5
 </pre>
 <p>
-Note the use of the fold (accumulator) capture.
+Note the use of the accumulator capture.
 To compute the value of an expression,
 the accumulator starts with the value of the first term,
 and then applies <code>eval</code> over
diff --git a/lpprint.c b/lpprint.c
index 54a3da7..1142376 100644
--- a/lpprint.c
+++ b/lpprint.c
@@ -60,7 +60,7 @@ static void printTcharset (TTree *tree) {
 static const char *capkind (int kind) {
  const char *const modes[] = {
    "close", "position", "constant", "backref",
-    "argument", "simple", "table", "function",
+    "argument", "simple", "table", "function", "replace",
    "query", "string", "num", "substitution", "fold",
    "runtime", "group"};
  return modes[kind];
@@ -147,7 +147,6 @@ void printpatt (Instruction *p) {
 }
-#if defined(LPEG_DEBUG)
 static void printcap (Capture *cap) {
  printf("%s (idx: %d - size: %d) -> %p\n",
         capkind(cap->kind), cap->idx, cap->siz, cap->s);
@@ -160,7 +159,6 @@ void printcaplist (Capture *cap, Capture *limit) {
    printcap(cap);
  printf("=======\n");
 }
-#endif
 /* }====================================================== */
diff --git a/lptree.c b/lptree.c
index 0b2d824..330471a 100644
--- a/lptree.c
+++ b/lptree.c
@@ -19,7 +19,7 @@
 const byte numsiblings[] = {
  0, 0, 0,      /* char, set, any */
  0, 0, 0,      /* true, false, utf-range */
-  1,            /* rep */
+  1,            /* acc */
  2, 2,         /* seq, choice */
  1, 1,         /* not, and */
  0, 0, 2, 1, 1,  /* call, opencall, rule, prerule, grammar */
@@ -850,6 +850,11 @@ static int lp_divcapture (lua_State *L) {
 }
+static int lp_acccapture (lua_State *L) {
+  return capture_aux(L, Cacc, 2);
+}
 static int lp_substcapture (lua_State *L) {
  return capture_aux(L, Csubst, 0);
 }
@@ -1250,7 +1255,7 @@ static int lp_match (lua_State *L) {
  int ptop = lua_gettop(L);
  lua_pushnil(L);  /* initialize subscache */
  lua_pushlightuserdata(L, capture);  /* initialize caplistidx */
-  lua_getuservalue(L, 1);  /* initialize penvidx */
+  lua_getuservalue(L, 1);  /* initialize ktableidx */
  r = match(L, s, s + i, s + l, code, capture, ptop);
  if (r == NULL) {
    lua_pushnil(L);
@@ -1369,6 +1374,7 @@ static struct luaL_Reg metareg[] = {
  {"__gc", lp_gc},
  {"__len", lp_and},
  {"__div", lp_divcapture},
+  {"__mod", lp_acccapture},
  {"__unm", lp_not},
  {"__sub", lp_sub},
  {NULL, NULL}
diff --git a/makefile b/makefile
index 2b3c12e..2aff497 100644
--- a/makefile
+++ b/makefile
@@ -3,7 +3,7 @@ LUADIR = ./lua/
 # COPT = -O2 -DNDEBUG
 # COPT = -g
-COPT = -DLPEG_DEBUG -g
+COPT = -O0 -DLPEG_DEBUG -g
 CWARNS = -Wall -Wextra -pedantic \
        -Waggregate-return \
diff --git a/test.lua b/test.lua
index d31a69f..cd85b31 100755
--- a/test.lua
+++ b/test.lua
@@ -493,8 +493,8 @@ local function f_term (v1, op, v2, d)
 end
 G = m.P{ "Exp",
-  Exp = m.Cf(V"Factor" * m.Cg(FactorOp * V"Factor")^0, f_factor);
+  Exp = V"Factor" * (FactorOp * V"Factor" % f_factor)^0;
-  Factor = m.Cf(V"Term" * m.Cg(TermOp * V"Term")^0, f_term);
+  Factor = V"Term" * (TermOp * V"Term" % f_term)^0;
  Term = Number / tonumber  +  Open * V"Exp" * Close;
 }
@@ -866,6 +866,7 @@ print"+"
 -- accumulator capture
 function f (x) return x + 1 end
 assert(m.match(m.Cf(m.Cc(0) * m.C(1)^0, f), "alo alo") == 7)
+assert(m.match(m.Cc(0) * (m.C(1) % f)^0, "alo alo") == 7)
 t = {m.match(m.Cf(m.Cc(1,2,3), error), "")}
 checkeq(t, {1})
@@ -875,7 +876,7 @@ t = p:match("a=b;c=du;xux=yuy;")
 checkeq(t, {a="b", c="du", xux="yuy"})
-- errors in accumulator capture
+-- errors in fold capture
 -- no initial capture
 checkerr("no initial value", m.match, m.Cf(m.P(5), print), 'aaaaaa')
@@ -883,8 +884,14 @@ checkerr("no initial value", m.match, m.Cf(m.P(5), print), 'aaaaaa')
 checkerr("no initial value", m.match, m.Cf(m.P(500), print),
                               string.rep('a', 600))
-- nested capture produces no initial value
-checkerr("no initial value", m.match, m.Cf(m.P(1) / {}, print), "alo")
+-- errors in accumulator capture
+-- no initial capture
+checkerr("no previous value", m.match, m.P(5) % print, 'aaaaaa')
+-- no initial capture (very long match forces fold to be a pair open-close)
+checkerr("no previous value", m.match, m.P(500) % print,
+                               string.rep('a', 600))
 -- tests for loop checker
author	Roberto Ierusalimschy <roberto@inf.puc-rio.br>	2023-06-06 17:50:31 -0300
committer	Roberto Ierusalimschy <roberto@inf.puc-rio.br>	2023-06-06 17:50:31 -0300
commit	e31e13f59ef1a4df1698b15ff1fe0198553cc3c2 (patch)
tree	c16ea4f41bdd378734afd4e475d7d7973a5a2b5d
parent	44fab2a44d06a956c3121ceba2b39ca7b00dc428 (diff)
download	lpeg-e31e13f59ef1a4df1698b15ff1fe0198553cc3c2.tar.gz lpeg-e31e13f59ef1a4df1698b15ff1fe0198553cc3c2.tar.bz2 lpeg-e31e13f59ef1a4df1698b15ff1fe0198553cc3c2.zip

diff --git a/HISTORY b/HISTORY index 7fa72e6..a250709 100644 --- a/HISTORY +++ b/HISTORY
@@ -2,9 +2,12 @@ HISTORY for LPeg 1.1.0
2		2
3	* Changes from version 1.0.2 to 1.1.0	3	* Changes from version 1.0.2 to 1.1.0
4	---------------------------------	4	---------------------------------
		5	+ accumulator capture
5	+ UTF-8 ranges	6	+ UTF-8 ranges
6	+ Larger limit for number of rules in a grammar	7	+ Larger limit for number of rules in a grammar
		8	+ Larger limit for number of captures in a match
7	+ bug fixes	9	+ bug fixes
		10	+ other small improvements
8		11
9	* Changes from version 1.0.1 to 1.0.2	12	* Changes from version 1.0.1 to 1.0.2
10	---------------------------------	13	---------------------------------


diff --git a/lpcap.c b/lpcap.c index ec17d23..4750f01 100644 --- a/lpcap.c +++ b/lpcap.c
@@ -231,6 +231,22 @@ static int functioncap (CapState *cs) {
231		231
232		232
233	/*	233	/*
		234	** Accumulator capture
		235	*/
		236	static int accumulatorcap (CapState *cs) {
		237	lua_State *L = cs->L;
		238	int n;
		239	if (lua_gettop(L) < cs->firstcap)
		240	luaL_error(L, "no previous value for accumulator capture");
		241	pushluaval(cs); /* push function */
		242	lua_insert(L, -2); /* previous value becomes first argument */
		243	n = pushnestedvalues(cs, 0); /* push nested captures */
		244	lua_call(L, n + 1, 1); /* call function */
		245	return 0; /* did not add any extra value */
		246	}
		247
		248
		249	/*
234	** Select capture	250	** Select capture
235	*/	251	*/
236	static int numcap (CapState *cs) {	252	static int numcap (CapState *cs) {
@@ -422,13 +438,16 @@ static int addonestring (luaL_Buffer b, CapState cs, const char *what) {
422	case Csubst:	438	case Csubst:
423	substcap(b, cs); /* add capture directly to buffer */	439	substcap(b, cs); /* add capture directly to buffer */
424	return 1;	440	return 1;
		441	case Cacc: /* accumulator capture? */
		442	return luaL_error(cs->L, "accumulator capture inside substitution capture");
425	default: {	443	default: {
426	lua_State *L = cs->L;	444	lua_State *L = cs->L;
427	int n = pushcapture(cs);	445	int n = pushcapture(cs);
428	if (n > 0) {	446	if (n > 0) {
429	if (n > 1) lua_pop(L, n - 1); /* only one result */	447	if (n > 1) lua_pop(L, n - 1); /* only one result */
430	if (!lua_isstring(L, -1))	448	if (!lua_isstring(L, -1))
431	luaL_error(L, "invalid %s value (a %s)", what, luaL_typename(L, -1));	449	return luaL_error(L, "invalid %s value (a %s)",
		450	what, luaL_typename(L, -1));
432	luaL_addvalue(b);	451	luaL_addvalue(b);
433	}	452	}
434	return n;	453	return n;
@@ -512,6 +531,7 @@ static int pushcapture (CapState *cs) {
512	case Cbackref: res = backrefcap(cs); break;	531	case Cbackref: res = backrefcap(cs); break;
513	case Ctable: res = tablecap(cs); break;	532	case Ctable: res = tablecap(cs); break;
514	case Cfunction: res = functioncap(cs); break;	533	case Cfunction: res = functioncap(cs); break;
		534	case Cacc: res = accumulatorcap(cs); break;
515	case Cnum: res = numcap(cs); break;	535	case Cnum: res = numcap(cs); break;
516	case Cquery: res = querycap(cs); break;	536	case Cquery: res = querycap(cs); break;
517	case Cfold: res = foldcap(cs); break;	537	case Cfold: res = foldcap(cs); break;
@@ -537,9 +557,11 @@ int getcaptures (lua_State L, const char s, const char *r, int ptop) {
537	CapState cs;	557	CapState cs;
538	cs.ocap = cs.cap = capture; cs.L = L; cs.reclevel = 0;	558	cs.ocap = cs.cap = capture; cs.L = L; cs.reclevel = 0;
539	cs.s = s; cs.valuecached = 0; cs.ptop = ptop;	559	cs.s = s; cs.valuecached = 0; cs.ptop = ptop;
		560	cs.firstcap = lua_gettop(L) + 1; /* where first value (if any) will go */
540	do { /* collect their values */	561	do { /* collect their values */
541	n += pushcapture(&cs);	562	n += pushcapture(&cs);
542	} while (!isclosecap(cs.cap));	563	} while (!isclosecap(cs.cap));
		564	assert(lua_gettop(L) - cs.firstcap == n - 1);
543	}	565	}
544	if (n == 0) { /* no capture values? */	566	if (n == 0) { /* no capture values? */
545	lua_pushinteger(L, r - s + 1); /* return only end position */	567	lua_pushinteger(L, r - s + 1); /* return only end position */


diff --git a/lpcap.h b/lpcap.h index 10539a0..e72d913 100644 --- a/lpcap.h +++ b/lpcap.h
@@ -16,6 +16,7 @@ typedef enum CapKind {
16	Csimple, /* next node is pattern */	16	Csimple, /* next node is pattern */
17	Ctable, /* next node is pattern */	17	Ctable, /* next node is pattern */
18	Cfunction, /* ktable[key] is function; next node is pattern */	18	Cfunction, /* ktable[key] is function; next node is pattern */
		19	Cacc, /* ktable[key] is function; next node is pattern */
19	Cquery, /* ktable[key] is table; next node is pattern */	20	Cquery, /* ktable[key] is table; next node is pattern */
20	Cstring, /* ktable[key] is string; next node is pattern */	21	Cstring, /* ktable[key] is string; next node is pattern */
21	Cnum, /* numbered capture; 'key' is number of value to return */	22	Cnum, /* numbered capture; 'key' is number of value to return */
@@ -38,7 +39,8 @@ typedef struct CapState {
38	Capture cap; / current capture */	39	Capture cap; / current capture */
39	Capture ocap; / (original) capture list */	40	Capture ocap; / (original) capture list */
40	lua_State *L;	41	lua_State *L;
41	int ptop; /* index of last argument to 'match' */	42	int ptop; /* stack index of last argument to 'match' */
		43	int firstcap; /* stack index of first capture pushed in the stack */
42	const char s; / original string */	44	const char s; / original string */
43	int valuecached; /* value stored in cache slot */	45	int valuecached; /* value stored in cache slot */
44	int reclevel; /* recursion level */	46	int reclevel; /* recursion level */


diff --git a/lpeg.html b/lpeg.html index f50d327..9faa1c7 100644 --- a/lpeg.html +++ b/lpeg.html
@@ -638,6 +638,10 @@ or no value when <code>number</code> is zero.</td></tr>
638	<tr><td><a href="#cap-func"><code>patt / function</code></a></td>	638	<tr><td><a href="#cap-func"><code>patt / function</code></a></td>
639	<td>the returns of <code>function</code> applied to the captures	639	<td>the returns of <code>function</code> applied to the captures
640	of <code>patt</code></td></tr>	640	of <code>patt</code></td></tr>
		641	<tr><td><a href="#cap-rep"><code>patt % function</code></a></td>
		642	<td>the return of <code>function</code> applied to the previous
		643	capture plus the captures of <code>patt</code></td></tr>;
		644	the returned value becomes the value of the previous capture
641	<tr><td><a href="#matchtime"><code>lpeg.Cmt(patt, function)</code></a></td>	645	<tr><td><a href="#matchtime"><code>lpeg.Cmt(patt, function)</code></a></td>
642	<td>the returns of <code>function</code> applied to the captures	646	<td>the returns of <code>function</code> applied to the captures
643	of <code>patt</code>; the application is done at match time</td></tr>	647	of <code>patt</code>; the application is done at match time</td></tr>
@@ -889,6 +893,75 @@ there is no captured value.
889	</p>	893	</p>
890		894
891		895
		896	<h3><a name="cap-rep"></a><code>patt % function</code></h3>
		897	<p>
		898	Creates an <em>accumulator capture</em>.
		899	This pattern behaves similarly to a
		900	<a href="cap-func">function capture</a>,
		901	with the following differences:
		902	The last captured value is added as a first argument to
		903	the call;
		904	the return of the function is adjusted to one single value;
		905	that value becomes the last captured value.
		906	</p>
		907
		908	<p>
		909	As an example,
		910	consider the following code fragment:
		911	</p>
		912	<pre class="example">
		913	local name = lpeg.C(lpeg.R("az")^1)
		914	local p = name * (lpeg.P("^") % string.upper)^-1
		915	print(p:match("count")) --> count
		916	print(p:match("count^")) --> COUNT
		917	</pre>
		918	<p>
		919	In the first match,
		920	the accumulator capture does not match,
		921	and so the match results in its first capture, a name.
		922	In the second match,
		923	the accumulator capture matches,
		924	so the function <code>string.upper</code>
		925	is called with the previous capture (created by <code>name</code>)
		926	plus the string <code>"^"</code>;
		927	the function ignores its second argument and returns the first argument
		928	changed to upper case;
		929	that value then becomes the first and only
		930	capture value created by the match.
		931	</p>
		932	-- matches a numeral and captures its numerical value
		933	number = lpeg.R"09"^1 / tonumber
		934
		935	-- auxiliary function to add two numbers
		936	function add (acc, newvalue) return acc + newvalue end
		937
		938	-- matches a list of numbers, adding their values
		939	sum = number * ("," * number % add)^0
		940
		941	-- example of use
		942	print(sum:match("10,30,43")) --> 83
		943	</pre>
		944	<p>
		945	First, the initial <code>number</code> captures a number;
		946	that first capture will play the role of an accumulator.
		947	Then, each time <code>number</code> matches inside the loop
		948	there is a accumulator capture:
		949	It calls <code>add</code> with the current value of the accumulator
		950	and the value of the new number,
		951	and their sum replaces the value of the accumulator.
		952	At the end of the match,
		953	the accumulator with all sums is the final value.
		954	</p>
		955
		956	<p>
		957	Due to the nature of this capture,
		958	you should avoid using it in places where it is not clear
		959	what is its "previous" capture.
		960	Due to implementation details,
		961	you should not use this capture inside a substitution capture.
		962	</p>
		963
		964
892	<h3><a name="matchtime"></a><code>lpeg.Cmt(patt, function)</code></h3>	965	<h3><a name="matchtime"></a><code>lpeg.Cmt(patt, function)</code></h3>
893	<p>	966	<p>
894	Creates a <em>match-time capture</em>.	967	Creates a <em>match-time capture</em>.
@@ -968,19 +1041,17 @@ lpeg.locale(lpeg) -- adds locale entries into 'lpeg' table
968	local space = lpeg.space^0	1041	local space = lpeg.space^0
969	local name = lpeg.C(lpeg.alpha^1) * space	1042	local name = lpeg.C(lpeg.alpha^1) * space
970	local sep = lpeg.S(",;") * space	1043	local sep = lpeg.S(",;") * space
971	local pair = lpeg.Cg(name * "=" * space * name) * sep^-1	1044	local pair = name * "=" * space * name * sep^-1
972	local list = lpeg.Cf(lpeg.Ct("") * pair^0, rawset)	1045	local list = lpeg.Ct("") * (pair % rawset)^0
973	t = list:match("a=b, c = hi; next = pi") --> { a = "b", c = "hi", next = "pi" }	1046	t = list:match("a=b, c = hi; next = pi") --> { a = "b", c = "hi", next = "pi" }
974	</pre>	1047	</pre>
975	<p>	1048	<p>
976	Each pair has the format <code>name = name</code> followed by	1049	Each pair has the format <code>name = name</code> followed by
977	an optional separator (a comma or a semicolon).	1050	an optional separator (a comma or a semicolon).
978	The <code>pair</code> pattern encloses the pair in a group pattern,	1051	The <code>list</code> pattern then <em>folds</em> these captures.
979	so that the names become the values of a single capture.
980	The <code>list</code> pattern then folds these captures.
981	It starts with an empty table,	1052	It starts with an empty table,
982	created by a table capture matching an empty string;	1053	created by a table capture matching an empty string;
983	then for each capture (a pair of names) it applies <code>rawset</code>	1054	then for each a pair of names it applies <code>rawset</code>
984	over the accumulator (the table) and the capture values (the pair of names).	1055	over the accumulator (the table) and the capture values (the pair of names).
985	<code>rawset</code> returns the table itself,	1056	<code>rawset</code> returns the table itself,
986	so the accumulator is always the table.	1057	so the accumulator is always the table.
@@ -1295,8 +1366,8 @@ end
1295	-- Grammar	1366	-- Grammar
1296	local V = lpeg.V	1367	local V = lpeg.V
1297	G = lpeg.P{ "Exp",	1368	G = lpeg.P{ "Exp",
1298	Exp = lpeg.Cf(V"Term" * lpeg.Cg(TermOp * V"Term")^0, eval);	1369	Exp = V"Term" * (TermOp * V"Term" % eval)^0;
1299	Term = lpeg.Cf(V"Factor" * lpeg.Cg(FactorOp * V"Factor")^0, eval);	1370	Term = V"Factor" * (FactorOp * V"Factor" % eval)^0;
1300	Factor = Number / tonumber + Open * V"Exp" * Close;	1371	Factor = Number / tonumber + Open * V"Exp" * Close;
1301	}	1372	}
1302		1373
@@ -1304,7 +1375,7 @@ G = lpeg.P{ "Exp",
1304	print(lpeg.match(G, "3 + 5*9 / (1+1) - 12")) --> 13.5	1375	print(lpeg.match(G, "3 + 5*9 / (1+1) - 12")) --> 13.5
1305	</pre>	1376	</pre>
1306	<p>	1377	<p>
1307	Note the use of the fold (accumulator) capture.	1378	Note the use of the accumulator capture.
1308	To compute the value of an expression,	1379	To compute the value of an expression,
1309	the accumulator starts with the value of the first term,	1380	the accumulator starts with the value of the first term,
1310	and then applies <code>eval</code> over	1381	and then applies <code>eval</code> over


diff --git a/lpprint.c b/lpprint.c index 54a3da7..1142376 100644 --- a/lpprint.c +++ b/lpprint.c
@@ -60,7 +60,7 @@ static void printTcharset (TTree *tree) {
60	static const char *capkind (int kind) {	60	static const char *capkind (int kind) {
61	const char *const modes[] = {	61	const char *const modes[] = {
62	"close", "position", "constant", "backref",	62	"close", "position", "constant", "backref",
63	"argument", "simple", "table", "function",	63	"argument", "simple", "table", "function", "replace",
64	"query", "string", "num", "substitution", "fold",	64	"query", "string", "num", "substitution", "fold",
65	"runtime", "group"};	65	"runtime", "group"};
66	return modes[kind];	66	return modes[kind];
@@ -147,7 +147,6 @@ void printpatt (Instruction *p) {
147	}	147	}
148		148
149		149
150	#if defined(LPEG_DEBUG)
151	static void printcap (Capture *cap) {	150	static void printcap (Capture *cap) {
152	printf("%s (idx: %d - size: %d) -> %p\n",	151	printf("%s (idx: %d - size: %d) -> %p\n",
153	capkind(cap->kind), cap->idx, cap->siz, cap->s);	152	capkind(cap->kind), cap->idx, cap->siz, cap->s);
@@ -160,7 +159,6 @@ void printcaplist (Capture cap, Capture limit) {
160	printcap(cap);	159	printcap(cap);
161	printf("=======\n");	160	printf("=======\n");
162	}	161	}
163	#endif
164		162
165	/* }====================================================== */	163	/* }====================================================== */
166		164


diff --git a/lptree.c b/lptree.c index 0b2d824..330471a 100644 --- a/lptree.c +++ b/lptree.c
@@ -19,7 +19,7 @@
19	const byte numsiblings[] = {	19	const byte numsiblings[] = {
20	0, 0, 0, /* char, set, any */	20	0, 0, 0, /* char, set, any */
21	0, 0, 0, /* true, false, utf-range */	21	0, 0, 0, /* true, false, utf-range */
22	1, /* rep */	22	1, /* acc */
23	2, 2, /* seq, choice */	23	2, 2, /* seq, choice */
24	1, 1, /* not, and */	24	1, 1, /* not, and */
25	0, 0, 2, 1, 1, /* call, opencall, rule, prerule, grammar */	25	0, 0, 2, 1, 1, /* call, opencall, rule, prerule, grammar */
@@ -850,6 +850,11 @@ static int lp_divcapture (lua_State *L) {
850	}	850	}
851		851
852		852
		853	static int lp_acccapture (lua_State *L) {
		854	return capture_aux(L, Cacc, 2);
		855	}
		856
		857
853	static int lp_substcapture (lua_State *L) {	858	static int lp_substcapture (lua_State *L) {
854	return capture_aux(L, Csubst, 0);	859	return capture_aux(L, Csubst, 0);
855	}	860	}
@@ -1250,7 +1255,7 @@ static int lp_match (lua_State *L) {
1250	int ptop = lua_gettop(L);	1255	int ptop = lua_gettop(L);
1251	lua_pushnil(L); /* initialize subscache */	1256	lua_pushnil(L); /* initialize subscache */
1252	lua_pushlightuserdata(L, capture); /* initialize caplistidx */	1257	lua_pushlightuserdata(L, capture); /* initialize caplistidx */
1253	lua_getuservalue(L, 1); /* initialize penvidx */	1258	lua_getuservalue(L, 1); /* initialize ktableidx */
1254	r = match(L, s, s + i, s + l, code, capture, ptop);	1259	r = match(L, s, s + i, s + l, code, capture, ptop);
1255	if (r == NULL) {	1260	if (r == NULL) {
1256	lua_pushnil(L);	1261	lua_pushnil(L);
@@ -1369,6 +1374,7 @@ static struct luaL_Reg metareg[] = {
1369	{"__gc", lp_gc},	1374	{"__gc", lp_gc},
1370	{"__len", lp_and},	1375	{"__len", lp_and},
1371	{"__div", lp_divcapture},	1376	{"__div", lp_divcapture},
		1377	{"__mod", lp_acccapture},
1372	{"__unm", lp_not},	1378	{"__unm", lp_not},
1373	{"__sub", lp_sub},	1379	{"__sub", lp_sub},
1374	{NULL, NULL}	1380	{NULL, NULL}


diff --git a/makefile b/makefile index 2b3c12e..2aff497 100644 --- a/makefile +++ b/makefile
@@ -3,7 +3,7 @@ LUADIR = ./lua/
3		3
4	# COPT = -O2 -DNDEBUG	4	# COPT = -O2 -DNDEBUG
5	# COPT = -g	5	# COPT = -g
6	COPT = -DLPEG_DEBUG -g	6	COPT = -O0 -DLPEG_DEBUG -g
7		7
8	CWARNS = -Wall -Wextra -pedantic \	8	CWARNS = -Wall -Wextra -pedantic \
9	-Waggregate-return \	9	-Waggregate-return \


diff --git a/test.lua b/test.lua index d31a69f..cd85b31 100755 --- a/test.lua +++ b/test.lua
@@ -493,8 +493,8 @@ local function f_term (v1, op, v2, d)
493	end	493	end
494		494
495	G = m.P{ "Exp",	495	G = m.P{ "Exp",
496	Exp = m.Cf(V"Factor" * m.Cg(FactorOp * V"Factor")^0, f_factor);	496	Exp = V"Factor" * (FactorOp * V"Factor" % f_factor)^0;
497	Factor = m.Cf(V"Term" * m.Cg(TermOp * V"Term")^0, f_term);	497	Factor = V"Term" * (TermOp * V"Term" % f_term)^0;
498	Term = Number / tonumber + Open * V"Exp" * Close;	498	Term = Number / tonumber + Open * V"Exp" * Close;
499	}	499	}
500		500
@@ -866,6 +866,7 @@ print"+"
866	-- accumulator capture	866	-- accumulator capture
867	function f (x) return x + 1 end	867	function f (x) return x + 1 end
868	assert(m.match(m.Cf(m.Cc(0) * m.C(1)^0, f), "alo alo") == 7)	868	assert(m.match(m.Cf(m.Cc(0) * m.C(1)^0, f), "alo alo") == 7)
		869	assert(m.match(m.Cc(0) * (m.C(1) % f)^0, "alo alo") == 7)
869		870
870	t = {m.match(m.Cf(m.Cc(1,2,3), error), "")}	871	t = {m.match(m.Cf(m.Cc(1,2,3), error), "")}
871	checkeq(t, {1})	872	checkeq(t, {1})
@@ -875,7 +876,7 @@ t = p:match("a=b;c=du;xux=yuy;")
875	checkeq(t, {a="b", c="du", xux="yuy"})	876	checkeq(t, {a="b", c="du", xux="yuy"})
876		877
877		878
878	-- errors in accumulator capture	879	-- errors in fold capture
879		880
880	-- no initial capture	881	-- no initial capture
881	checkerr("no initial value", m.match, m.Cf(m.P(5), print), 'aaaaaa')	882	checkerr("no initial value", m.match, m.Cf(m.P(5), print), 'aaaaaa')
@@ -883,8 +884,14 @@ checkerr("no initial value", m.match, m.Cf(m.P(5), print), 'aaaaaa')
883	checkerr("no initial value", m.match, m.Cf(m.P(500), print),	884	checkerr("no initial value", m.match, m.Cf(m.P(500), print),
884	string.rep('a', 600))	885	string.rep('a', 600))
885		886
886	-- nested capture produces no initial value	887
887	checkerr("no initial value", m.match, m.Cf(m.P(1) / {}, print), "alo")	888	-- errors in accumulator capture
		889
		890	-- no initial capture
		891	checkerr("no previous value", m.match, m.P(5) % print, 'aaaaaa')
		892	-- no initial capture (very long match forces fold to be a pair open-close)
		893	checkerr("no previous value", m.match, m.P(500) % print,
		894	string.rep('a', 600))
888		895
889		896
890	-- tests for loop checker	897	-- tests for loop checker