From 9a9ee3d9ab8ce435d743d293ec43075151370200 Mon Sep 17 00:00:00 2001
From: Roberto Ierusalimschy
-lpeg.Carg(n)
the value of the nth extra argument to
lpeg.match
(matches the empty string)
+lpeg.Cb(name)
lpeg.Cb(key)
the values produced by the previous
- group capture named name
+ group capture named key
(matches the empty string)lpeg.Cc(values)
the given values (matches the empty string)
-lpeg.Cf(patt, func)
a folding of the captures from patt
+lpeg.Cg(patt [, name])
+ optionally tagged with lpeg.Cg(patt [, key])
the values produced by patt
,
- optionally tagged with name
key
lpeg.Cp()
the current position (matches the empty string)
@@ -639,9 +639,10 @@ or no value when lpeg.Cs(patt)
number
is zero.the returns of
function
applied to the captures
of patt
;
- the returned value becomes the value of the previous capture
+
- patt % function
the return of function
applied to the previous
- capture plus the captures of patt
produces no value;
+ it accummulates the captures from
patt
+ into the previous capture through function
+
@@ -699,24 +700,25 @@ argument given in the call to lpeg.Cmt(patt, function)
the returns of function
applied to the captures
of patt
; the application is done at match timelpeg.match
.
lpeg.Cb (name)
lpeg.Cb (key)
Creates a back capture.
This pattern matches the empty string and
produces the values produced by the most recent
-group capture named name
-(where name
can be any Lua value).
+group capture named key
+(where key
can be any Lua value).
Most recent means the last complete outermost -group capture with the given name. +group capture with the given key. A Complete capture means that the entire pattern -corresponding to the capture has matched. +corresponding to the capture has matched; +in other words, the back capture is not nested inside the group. An Outermost capture means that the capture is not inside -another complete capture. +another complete capture that does not contain the back capture itself.
@@ -785,13 +787,13 @@ print(sum:match("10,30,43")) --> 83 -
lpeg.Cg (patt [, name])
lpeg.Cg (patt [, key])
Creates a group capture.
It groups all values returned by patt
into a single capture.
-The group may be anonymous (if no name is given)
-or named with the given name
+The group may be anonymous (if no key is given)
+or named with the given key
(which can be any non-nil Lua value).
patt
,
the first value of the group is put into the table
-with the group name as its key.
+with the group key as its key.
The captured value is only the table.
@@ -897,12 +899,14 @@ there is no captured value.
Creates an accumulator capture. This pattern behaves similarly to a -function capture, +function capture, with the following differences: The last captured value is added as a first argument to the call; the return of the function is adjusted to one single value; -that value becomes the last captured value. +that value replaces the last captured value. +Note that the capture itself produces no values; +it only changes the value of its previous capture.
@@ -929,6 +933,12 @@ changed to upper case; that value then becomes the first and only capture value created by the match.
+ ++As another example, +let us consider the problem of adding a list of numbers. +
+-- matches a numeral and captures its numerical value number = lpeg.R"09"^1 / tonumber @@ -944,11 +954,11 @@ print(sum:match("10,30,43")) --> 83First, the initial
@@ -956,9 +966,12 @@ the accumulator with all sums is the final value.number
captures a number; that first capture will play the role of an accumulator. -Then, each timenumber
matches inside the loop -there is a accumulator capture: +Then, each time the sequencecomma-number
+matches inside the loop there is an accumulator capture: It callsadd
with the current value of the accumulator and the value of the new number, -and their sum replaces the value of the accumulator. +and the result of the call (their sum) replaces the value of the accumulator. At the end of the match, the accumulator with all sums is the final value.Due to the nature of this capture, you should avoid using it in places where it is not clear -what is its "previous" capture. +what is its "previous" capture +(e.g., directly nested in a string capture +or a numbered capture). Due to implementation details, -you should not use this capture inside a substitution capture. +you should not use this capture directly nested in a +substitution capture.
@@ -1014,9 +1027,9 @@ local lpeg = require "lpeg" -- matches a word followed by end-of-string p = lpeg.R"az"^1 * -1 -print(p:match("hello")) --> 6 -print(lpeg.match(p, "hello")) --> 6 -print(p:match("1 hello")) --> nil +print(p:match("hello")) --> 6 +print(lpeg.match(p, "hello")) --> 6 +print(p:match("1 hello")) --> nil
The pattern is simply a sequence of one or more lower-case letters @@ -1043,7 +1056,7 @@ local name = lpeg.C(lpeg.alpha^1) * space local sep = lpeg.S(",;") * space local pair = name * "=" * space * name * sep^-1 local list = lpeg.Ct("") * (pair % rawset)^0 -t = list:match("a=b, c = hi; next = pi") --> { a = "b", c = "hi", next = "pi" } +t = list:match("a=b, c = hi; next = pi") --> { a = "b", c = "hi", next = "pi" }
Each pair has the format name = name
followed by
@@ -1135,7 +1148,7 @@ function anywhere (p)
return lpeg.P{ I * p * I + 1 * lpeg.V(1) }
end
-print(anywhere("world"):match("hello world!")) -> 7 12
+print(anywhere("world"):match("hello world!")) --> 7 12
@@ -1344,7 +1357,7 @@ function evalExp (s) end -- small example -print(evalExp"3 + 5*9 / (1+1) - 12") --> 13.5 +print(evalExp"3 + 5*9 / (1+1) - 12") --> 13.5
@@ -1372,7 +1385,7 @@ G = lpeg.P{ "Exp", } -- small example -print(lpeg.match(G, "3 + 5*9 / (1+1) - 12")) --> 13.5 +print(lpeg.match(G, "3 + 5*9 / (1+1) - 12")) --> 13.5
Note the use of the accumulator capture. diff --git a/lpprint.c b/lpprint.c index 6349ac2..da902e6 100644 --- a/lpprint.c +++ b/lpprint.c @@ -149,8 +149,8 @@ void printpatt (Instruction *p) { static void printcap (Capture *cap, int ident) { while (ident--) printf(" "); - printf("%s (idx: %d - size: %d) -> %lu\n", - capkind(cap->kind), cap->idx, cap->siz, (long)cap->index); + printf("%s (idx: %d - size: %d) -> %lu (%p)\n", + capkind(cap->kind), cap->idx, cap->siz, (long)cap->index, (void*)cap); } diff --git a/lpvm.c b/lpvm.c index 5a30679..0a2fde4 100644 --- a/lpvm.c +++ b/lpvm.c @@ -198,11 +198,6 @@ static int removedyncap (lua_State *L, Capture *capture, } -/** -** Maximum number of captures to visit when looking for an 'open'. -*/ -#define MAXLOP 20 - /* ** Find the corresponding 'open' capture before 'cap', when that capture ** can become a full capture. If a full capture c1 is followed by an diff --git a/test.lua b/test.lua index cd85b31..7e61603 100755 --- a/test.lua +++ b/test.lua @@ -1005,6 +1005,35 @@ p = m.Cg(m.C(1) * m.C(1), "k") * m.Ct(m.Cb("k")) t = p:match("ab") checkeq(t, {"a", "b"}) + +do + -- some basic cases + assert(m.match(m.Cg(m.Cc(3), "a") * m.Cb("a"), "a") == 3) + assert(m.match(m.Cg(m.C(1), 133) * m.Cb(133), "X") == "X") + + -- first reference to 'x' should not see the group enclosing it + local p = m.Cg(m.Cb('x'), 'x') * m.Cb('x') + checkerr("back reference 'x' not found", m.match, p, '') + + local p = m.Cg(m.Cb('x') * m.C(1), 'x') * m.Cb('x') + checkerr("back reference 'x' not found", m.match, p, 'abc') + + -- reference to 'x' should not see the group enclosed in another capture + local s = string.rep("a", 30) + local p = (m.C(1)^-4 * m.Cg(m.C(1), 'x')) / {} * m.Cb('x') + checkerr("back reference 'x' not found", m.match, p, s) + + local p = (m.C(1)^-20 * m.Cg(m.C(1), 'x')) / {} * m.Cb('x') + checkerr("back reference 'x' not found", m.match, p, s) + + -- second reference 'k' should refer to 10 and first ref. 'k' + p = m.Cg(m.Cc(20), 'k') * m.Cg(m.Cc(10) * m.Cb('k') * m.C(1), 'k') + * (m.Cb('k') / function (a,b,c) return a*10 + b + tonumber(c) end) + -- 10 * 10 (Cc) + 20 (Cb) + 7 (C) == 127 + assert(p:match("756") == 127) + +end + p = m.P(true) for i = 1, 10 do p = p * m.Cg(1, i) end for i = 1, 10 do -- cgit v1.2.3-55-g6feb