aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRoberto Ierusalimschy <roberto@inf.puc-rio.br>2023-06-19 11:14:02 -0300
committerRoberto Ierusalimschy <roberto@inf.puc-rio.br>2023-06-19 11:14:02 -0300
commit9a9ee3d9ab8ce435d743d293ec43075151370200 (patch)
tree445290bfa04c2cd30f514b65b90b1d8b973f21f1
parenta561630f17e61548193666abf9a8b20f20462558 (diff)
downloadlpeg-9a9ee3d9ab8ce435d743d293ec43075151370200.tar.gz
lpeg-9a9ee3d9ab8ce435d743d293ec43075151370200.tar.bz2
lpeg-9a9ee3d9ab8ce435d743d293ec43075151370200.zip
Some fixes in vibibility check for back captures
-rw-r--r--lpcap.c24
-rw-r--r--lpcap.h6
-rw-r--r--lpeg.html75
-rw-r--r--lpprint.c4
-rw-r--r--lpvm.c5
-rwxr-xr-xtest.lua29
6 files changed, 97 insertions, 46 deletions
diff --git a/lpcap.c b/lpcap.c
index 74a34db..fca8cbb 100644
--- a/lpcap.c
+++ b/lpcap.c
@@ -126,15 +126,23 @@ static void pushonenestedvalue (CapState *cs) {
126 126
127 127
128/* 128/*
129** Checks whether group 'grp' is visible to 'ref', that is, 129** Checks whether group 'grp' is visible to 'ref', that is, 'grp' is
130** 'grp' is not nested inside a capture that does not contain 'ref'. 130** not nested inside a full capture that does not contain 'ref'. (We
131** To do that, must find minimum capture that contains 'grp'. 131** only need to care for full captures because the search at 'findback'
132** skips open-end blocks; so, if 'grp' is nested in a non-full capture,
133** 'ref' is also inside it.) To check this, we search backward for the
134** inner full capture enclosing 'grp'. A full capture cannot contain
135** non-full captures, so a close capture means we cannot be inside a
136** full capture anymore.
132*/ 137*/
133static int capvisible (CapState *cs, Capture *grp, Capture *ref) { 138static int capvisible (CapState *cs, Capture *grp, Capture *ref) {
134 Capture *cap = grp; 139 Capture *cap = grp;
135 while (cap-- > cs->ocap) { /* repeat until end of list */ 140 int i = MAXLOP; /* maximum distance for an 'open' */
141 while (i-- > 0 && cap-- > cs->ocap) {
136 if (isclosecap(cap)) 142 if (isclosecap(cap))
137 cap = findopen(cap); /* skip nested captures */ 143 return 1; /* can stop the search */
144 else if (grp->index - cap->index >= UCHAR_MAX)
145 return 1; /* can stop the search */
138 else if (capinside(cap, grp)) /* is 'grp' inside cap? */ 146 else if (capinside(cap, grp)) /* is 'grp' inside cap? */
139 return capinside(cap, ref); /* ok iff cap also contains 'ref' */ 147 return capinside(cap, ref); /* ok iff cap also contains 'ref' */
140 } 148 }
@@ -150,10 +158,10 @@ static Capture *findback (CapState *cs, Capture *ref) {
150 lua_State *L = cs->L; 158 lua_State *L = cs->L;
151 Capture *cap = ref; 159 Capture *cap = ref;
152 while (cap-- > cs->ocap) { /* repeat until end of list */ 160 while (cap-- > cs->ocap) { /* repeat until end of list */
153 if (isopencap(cap)) 161 if (isclosecap(cap))
154 continue; /* enclosing captures are not visible to 'ref' */
155 else if (isclosecap(cap))
156 cap = findopen(cap); /* skip nested captures */ 162 cap = findopen(cap); /* skip nested captures */
163 else if (capinside(cap, ref))
164 continue; /* enclosing captures are not visible to 'ref' */
157 if (captype(cap) == Cgroup && capvisible(cs, cap, ref)) { 165 if (captype(cap) == Cgroup && capvisible(cs, cap, ref)) {
158 getfromktable(cs, cap->idx); /* get group name */ 166 getfromktable(cs, cap->idx); /* get group name */
159 if (lp_equal(L, -2, -1)) { /* right group? */ 167 if (lp_equal(L, -2, -1)) { /* right group? */
diff --git a/lpcap.h b/lpcap.h
index 30f3714..abbd553 100644
--- a/lpcap.h
+++ b/lpcap.h
@@ -70,6 +70,12 @@ typedef struct CapState {
70 : (c2)->index < (c1)->index + (c1)->siz - 1) 70 : (c2)->index < (c1)->index + (c1)->siz - 1)
71 71
72 72
73/**
74** Maximum number of captures to visit when looking for an 'open'.
75*/
76#define MAXLOP 20
77
78
73 79
74int runtimecap (CapState *cs, Capture *close, const char *s, int *rem); 80int runtimecap (CapState *cs, Capture *close, const char *s, int *rem);
75int getcaptures (lua_State *L, const char *s, const char *r, int ptop); 81int getcaptures (lua_State *L, const char *s, const char *r, int ptop);
diff --git a/lpeg.html b/lpeg.html
index 9faa1c7..c9bd9f9 100644
--- a/lpeg.html
+++ b/lpeg.html
@@ -608,17 +608,17 @@ The following table summarizes the basic captures:
608<tr><td><a href="#cap-arg"><code>lpeg.Carg(n)</code></a></td> 608<tr><td><a href="#cap-arg"><code>lpeg.Carg(n)</code></a></td>
609 <td>the value of the n<sup>th</sup> extra argument to 609 <td>the value of the n<sup>th</sup> extra argument to
610 <code>lpeg.match</code> (matches the empty string)</td></tr> 610 <code>lpeg.match</code> (matches the empty string)</td></tr>
611<tr><td><a href="#cap-b"><code>lpeg.Cb(name)</code></a></td> 611<tr><td><a href="#cap-b"><code>lpeg.Cb(key)</code></a></td>
612 <td>the values produced by the previous 612 <td>the values produced by the previous
613 group capture named <code>name</code> 613 group capture named <code>key</code>
614 (matches the empty string)</td></tr> 614 (matches the empty string)</td></tr>
615<tr><td><a href="#cap-cc"><code>lpeg.Cc(values)</code></a></td> 615<tr><td><a href="#cap-cc"><code>lpeg.Cc(values)</code></a></td>
616 <td>the given values (matches the empty string)</td></tr> 616 <td>the given values (matches the empty string)</td></tr>
617<tr><td><a href="#cap-f"><code>lpeg.Cf(patt, func)</code></a></td> 617<tr><td><a href="#cap-f"><code>lpeg.Cf(patt, func)</code></a></td>
618 <td>a <em>folding</em> of the captures from <code>patt</code></td></tr> 618 <td>a <em>folding</em> of the captures from <code>patt</code></td></tr>
619<tr><td><a href="#cap-g"><code>lpeg.Cg(patt [, name])</code></a></td> 619<tr><td><a href="#cap-g"><code>lpeg.Cg(patt [, key])</code></a></td>
620 <td>the values produced by <code>patt</code>, 620 <td>the values produced by <code>patt</code>,
621 optionally tagged with <code>name</code></td></tr> 621 optionally tagged with <code>key</code></td></tr>
622<tr><td><a href="#cap-p"><code>lpeg.Cp()</code></a></td> 622<tr><td><a href="#cap-p"><code>lpeg.Cp()</code></a></td>
623 <td>the current position (matches the empty string)</td></tr> 623 <td>the current position (matches the empty string)</td></tr>
624<tr><td><a href="#cap-s"><code>lpeg.Cs(patt)</code></a></td> 624<tr><td><a href="#cap-s"><code>lpeg.Cs(patt)</code></a></td>
@@ -639,9 +639,10 @@ or no value when <code>number</code> is zero.</td></tr>
639 <td>the returns of <code>function</code> applied to the captures 639 <td>the returns of <code>function</code> applied to the captures
640 of <code>patt</code></td></tr> 640 of <code>patt</code></td></tr>
641<tr><td><a href="#cap-rep"><code>patt % function</code></a></td> 641<tr><td><a href="#cap-rep"><code>patt % function</code></a></td>
642 <td>the return of <code>function</code> applied to the previous 642 <td>produces no value;
643 capture plus the captures of <code>patt</code></td></tr>; 643 it <em>accummulates</em> the captures from <code>patt</code>
644 the returned value becomes the value of the previous capture 644 into the previous capture through <code>function</code>
645 </td></tr>
645<tr><td><a href="#matchtime"><code>lpeg.Cmt(patt, function)</code></a></td> 646<tr><td><a href="#matchtime"><code>lpeg.Cmt(patt, function)</code></a></td>
646 <td>the returns of <code>function</code> applied to the captures 647 <td>the returns of <code>function</code> applied to the captures
647 of <code>patt</code>; the application is done at match time</td></tr> 648 of <code>patt</code>; the application is done at match time</td></tr>
@@ -699,24 +700,25 @@ argument given in the call to <code>lpeg.match</code>.
699</p> 700</p>
700 701
701 702
702<h3><a name="cap-b"></a><code>lpeg.Cb (name)</code></h3> 703<h3><a name="cap-b"></a><code>lpeg.Cb (key)</code></h3>
703<p> 704<p>
704Creates a <em>back capture</em>. 705Creates a <em>back capture</em>.
705This pattern matches the empty string and 706This pattern matches the empty string and
706produces the values produced by the <em>most recent</em> 707produces the values produced by the <em>most recent</em>
707<a href="#cap-g">group capture</a> named <code>name</code> 708<a href="#cap-g">group capture</a> named <code>key</code>
708(where <code>name</code> can be any Lua value). 709(where <code>key</code> can be any Lua value).
709</p> 710</p>
710 711
711<p> 712<p>
712<em>Most recent</em> means the last 713<em>Most recent</em> means the last
713<em>complete</em> 714<em>complete</em>
714<em>outermost</em> 715<em>outermost</em>
715group capture with the given name. 716group capture with the given key.
716A <em>Complete</em> capture means that the entire pattern 717A <em>Complete</em> capture means that the entire pattern
717corresponding to the capture has matched. 718corresponding to the capture has matched;
719in other words, the back capture is not nested inside the group.
718An <em>Outermost</em> capture means that the capture is not inside 720An <em>Outermost</em> capture means that the capture is not inside
719another complete capture. 721another complete capture that does not contain the back capture itself.
720</p> 722</p>
721 723
722<p> 724<p>
@@ -785,13 +787,13 @@ print(sum:match("10,30,43")) --&gt; 83
785</pre> 787</pre>
786 788
787 789
788<h3><a name="cap-g"></a><code>lpeg.Cg (patt [, name])</code></h3> 790<h3><a name="cap-g"></a><code>lpeg.Cg (patt [, key])</code></h3>
789<p> 791<p>
790Creates a <em>group capture</em>. 792Creates a <em>group capture</em>.
791It groups all values returned by <code>patt</code> 793It groups all values returned by <code>patt</code>
792into a single capture. 794into a single capture.
793The group may be anonymous (if no name is given) 795The group may be anonymous (if no key is given)
794or named with the given name 796or named with the given key
795(which can be any non-nil Lua value). 797(which can be any non-nil Lua value).
796</p> 798</p>
797 799
@@ -837,7 +839,7 @@ starting at 1.
837Moreover, 839Moreover,
838for each named capture group created by <code>patt</code>, 840for each named capture group created by <code>patt</code>,
839the first value of the group is put into the table 841the first value of the group is put into the table
840with the group name as its key. 842with the group key as its key.
841The captured value is only the table. 843The captured value is only the table.
842</p> 844</p>
843 845
@@ -897,12 +899,14 @@ there is no captured value.
897<p> 899<p>
898Creates an <em>accumulator capture</em>. 900Creates an <em>accumulator capture</em>.
899This pattern behaves similarly to a 901This pattern behaves similarly to a
900<a href="cap-func">function capture</a>, 902<a href="#cap-func">function capture</a>,
901with the following differences: 903with the following differences:
902The last captured value is added as a first argument to 904The last captured value is added as a first argument to
903the call; 905the call;
904the return of the function is adjusted to one single value; 906the return of the function is adjusted to one single value;
905that value becomes the last captured value. 907that value replaces the last captured value.
908Note that the capture itself produces no values;
909it only changes the value of its previous capture.
906</p> 910</p>
907 911
908<p> 912<p>
@@ -929,6 +933,12 @@ changed to upper case;
929that value then becomes the first and only 933that value then becomes the first and only
930capture value created by the match. 934capture value created by the match.
931</p> 935</p>
936
937<p>
938As another example,
939let us consider the problem of adding a list of numbers.
940</p>
941<pre class="example">
932-- matches a numeral and captures its numerical value 942-- matches a numeral and captures its numerical value
933number = lpeg.R"09"^1 / tonumber 943number = lpeg.R"09"^1 / tonumber
934 944
@@ -944,11 +954,11 @@ print(sum:match("10,30,43")) --&gt; 83
944<p> 954<p>
945First, the initial <code>number</code> captures a number; 955First, the initial <code>number</code> captures a number;
946that first capture will play the role of an accumulator. 956that first capture will play the role of an accumulator.
947Then, each time <code>number</code> matches inside the loop 957Then, each time the sequence <code>comma-number</code>
948there is a accumulator capture: 958matches inside the loop there is an accumulator capture:
949It calls <code>add</code> with the current value of the accumulator 959It calls <code>add</code> with the current value of the accumulator
950and the value of the new number, 960and the value of the new number,
951and their sum replaces the value of the accumulator. 961and the result of the call (their sum) replaces the value of the accumulator.
952At the end of the match, 962At the end of the match,
953the accumulator with all sums is the final value. 963the accumulator with all sums is the final value.
954</p> 964</p>
@@ -956,9 +966,12 @@ the accumulator with all sums is the final value.
956<p> 966<p>
957Due to the nature of this capture, 967Due to the nature of this capture,
958you should avoid using it in places where it is not clear 968you should avoid using it in places where it is not clear
959what is its "previous" capture. 969what is its "previous" capture
970(e.g., directly nested in a <a href="#cap-string">string capture</a>
971or a <a href="#cap-num">numbered capture</a>).
960Due to implementation details, 972Due to implementation details,
961you should not use this capture inside a substitution capture. 973you should not use this capture directly nested in a
974<a href="#cap-s">substitution capture</a>.
962</p> 975</p>
963 976
964 977
@@ -1014,9 +1027,9 @@ local lpeg = require "lpeg"
1014-- matches a word followed by end-of-string 1027-- matches a word followed by end-of-string
1015p = lpeg.R"az"^1 * -1 1028p = lpeg.R"az"^1 * -1
1016 1029
1017print(p:match("hello")) --> 6 1030print(p:match("hello")) --&gt; 6
1018print(lpeg.match(p, "hello")) --> 6 1031print(lpeg.match(p, "hello")) --&gt; 6
1019print(p:match("1 hello")) --> nil 1032print(p:match("1 hello")) --&gt; nil
1020</pre> 1033</pre>
1021<p> 1034<p>
1022The pattern is simply a sequence of one or more lower-case letters 1035The pattern is simply a sequence of one or more lower-case letters
@@ -1043,7 +1056,7 @@ local name = lpeg.C(lpeg.alpha^1) * space
1043local sep = lpeg.S(",;") * space 1056local sep = lpeg.S(",;") * space
1044local pair = name * "=" * space * name * sep^-1 1057local pair = name * "=" * space * name * sep^-1
1045local list = lpeg.Ct("") * (pair % rawset)^0 1058local list = lpeg.Ct("") * (pair % rawset)^0
1046t = list:match("a=b, c = hi; next = pi") --> { a = "b", c = "hi", next = "pi" } 1059t = list:match("a=b, c = hi; next = pi") --&gt; { a = "b", c = "hi", next = "pi" }
1047</pre> 1060</pre>
1048<p> 1061<p>
1049Each pair has the format <code>name = name</code> followed by 1062Each pair has the format <code>name = name</code> followed by
@@ -1135,7 +1148,7 @@ function anywhere (p)
1135 return lpeg.P{ I * p * I + 1 * lpeg.V(1) } 1148 return lpeg.P{ I * p * I + 1 * lpeg.V(1) }
1136end 1149end
1137 1150
1138print(anywhere("world"):match("hello world!")) -> 7 12 1151print(anywhere("world"):match("hello world!")) --&gt; 7 12
1139</pre> 1152</pre>
1140 1153
1141<p> 1154<p>
@@ -1344,7 +1357,7 @@ function evalExp (s)
1344end 1357end
1345 1358
1346-- small example 1359-- small example
1347print(evalExp"3 + 5*9 / (1+1) - 12") --> 13.5 1360print(evalExp"3 + 5*9 / (1+1) - 12") --&gt; 13.5
1348</pre> 1361</pre>
1349 1362
1350<p> 1363<p>
@@ -1372,7 +1385,7 @@ G = lpeg.P{ "Exp",
1372} 1385}
1373 1386
1374-- small example 1387-- small example
1375print(lpeg.match(G, "3 + 5*9 / (1+1) - 12")) --> 13.5 1388print(lpeg.match(G, "3 + 5*9 / (1+1) - 12")) --&gt; 13.5
1376</pre> 1389</pre>
1377<p> 1390<p>
1378Note the use of the accumulator capture. 1391Note the use of the accumulator capture.
diff --git a/lpprint.c b/lpprint.c
index 6349ac2..da902e6 100644
--- a/lpprint.c
+++ b/lpprint.c
@@ -149,8 +149,8 @@ void printpatt (Instruction *p) {
149 149
150static void printcap (Capture *cap, int ident) { 150static void printcap (Capture *cap, int ident) {
151 while (ident--) printf(" "); 151 while (ident--) printf(" ");
152 printf("%s (idx: %d - size: %d) -> %lu\n", 152 printf("%s (idx: %d - size: %d) -> %lu (%p)\n",
153 capkind(cap->kind), cap->idx, cap->siz, (long)cap->index); 153 capkind(cap->kind), cap->idx, cap->siz, (long)cap->index, (void*)cap);
154} 154}
155 155
156 156
diff --git a/lpvm.c b/lpvm.c
index 5a30679..0a2fde4 100644
--- a/lpvm.c
+++ b/lpvm.c
@@ -198,11 +198,6 @@ static int removedyncap (lua_State *L, Capture *capture,
198} 198}
199 199
200 200
201/**
202** Maximum number of captures to visit when looking for an 'open'.
203*/
204#define MAXLOP 20
205
206/* 201/*
207** Find the corresponding 'open' capture before 'cap', when that capture 202** Find the corresponding 'open' capture before 'cap', when that capture
208** can become a full capture. If a full capture c1 is followed by an 203** can become a full capture. If a full capture c1 is followed by an
diff --git a/test.lua b/test.lua
index cd85b31..7e61603 100755
--- a/test.lua
+++ b/test.lua
@@ -1005,6 +1005,35 @@ p = m.Cg(m.C(1) * m.C(1), "k") * m.Ct(m.Cb("k"))
1005t = p:match("ab") 1005t = p:match("ab")
1006checkeq(t, {"a", "b"}) 1006checkeq(t, {"a", "b"})
1007 1007
1008
1009do
1010 -- some basic cases
1011 assert(m.match(m.Cg(m.Cc(3), "a") * m.Cb("a"), "a") == 3)
1012 assert(m.match(m.Cg(m.C(1), 133) * m.Cb(133), "X") == "X")
1013
1014 -- first reference to 'x' should not see the group enclosing it
1015 local p = m.Cg(m.Cb('x'), 'x') * m.Cb('x')
1016 checkerr("back reference 'x' not found", m.match, p, '')
1017
1018 local p = m.Cg(m.Cb('x') * m.C(1), 'x') * m.Cb('x')
1019 checkerr("back reference 'x' not found", m.match, p, 'abc')
1020
1021 -- reference to 'x' should not see the group enclosed in another capture
1022 local s = string.rep("a", 30)
1023 local p = (m.C(1)^-4 * m.Cg(m.C(1), 'x')) / {} * m.Cb('x')
1024 checkerr("back reference 'x' not found", m.match, p, s)
1025
1026 local p = (m.C(1)^-20 * m.Cg(m.C(1), 'x')) / {} * m.Cb('x')
1027 checkerr("back reference 'x' not found", m.match, p, s)
1028
1029 -- second reference 'k' should refer to 10 and first ref. 'k'
1030 p = m.Cg(m.Cc(20), 'k') * m.Cg(m.Cc(10) * m.Cb('k') * m.C(1), 'k')
1031 * (m.Cb('k') / function (a,b,c) return a*10 + b + tonumber(c) end)
1032 -- 10 * 10 (Cc) + 20 (Cb) + 7 (C) == 127
1033 assert(p:match("756") == 127)
1034
1035end
1036
1008p = m.P(true) 1037p = m.P(true)
1009for i = 1, 10 do p = p * m.Cg(1, i) end 1038for i = 1, 10 do p = p * m.Cg(1, i) end
1010for i = 1, 10 do 1039for i = 1, 10 do