diff options
author | Roberto Ierusalimschy <roberto@inf.puc-rio.br> | 2019-02-20 10:13:46 -0300 |
---|---|---|
committer | Roberto Ierusalimschy <roberto@inf.puc-rio.br> | 2019-02-20 10:13:46 -0300 |
commit | e08e5df853560de6482d84066a7accc6a18de545 (patch) | |
tree | ee19686bb35da90709a32ed24bf7855de1a3946a /test.lua | |
download | lpeg-e08e5df853560de6482d84066a7accc6a18de545.tar.gz lpeg-e08e5df853560de6482d84066a7accc6a18de545.tar.bz2 lpeg-e08e5df853560de6482d84066a7accc6a18de545.zip |
Fist version of LPeg on GIT
LPeg repository is being moved to git. Past versions won't be moved;
they are still available in RCS.
Diffstat (limited to 'test.lua')
-rwxr-xr-x | test.lua | 1513 |
1 files changed, 1513 insertions, 0 deletions
diff --git a/test.lua b/test.lua new file mode 100755 index 0000000..51c5204 --- /dev/null +++ b/test.lua | |||
@@ -0,0 +1,1513 @@ | |||
1 | #!/usr/bin/env lua | ||
2 | |||
3 | -- $Id: test.lua,v 1.114 2018/06/04 16:21:19 roberto Exp $ | ||
4 | |||
5 | -- require"strict" -- just to be pedantic | ||
6 | |||
7 | local m = require"lpeg" | ||
8 | |||
9 | |||
10 | -- for general use | ||
11 | local a, b, c, d, e, f, g, p, t | ||
12 | |||
13 | |||
14 | -- compatibility with Lua 5.2 | ||
15 | local unpack = rawget(table, "unpack") or unpack | ||
16 | local loadstring = rawget(_G, "loadstring") or load | ||
17 | |||
18 | |||
19 | local any = m.P(1) | ||
20 | local space = m.S" \t\n"^0 | ||
21 | |||
22 | local function checkeq (x, y, p) | ||
23 | if p then print(x,y) end | ||
24 | if type(x) ~= "table" then assert(x == y) | ||
25 | else | ||
26 | for k,v in pairs(x) do checkeq(v, y[k], p) end | ||
27 | for k,v in pairs(y) do checkeq(v, x[k], p) end | ||
28 | end | ||
29 | end | ||
30 | |||
31 | |||
32 | local mt = getmetatable(m.P(1)) | ||
33 | |||
34 | |||
35 | local allchar = {} | ||
36 | for i=0,255 do allchar[i + 1] = i end | ||
37 | allchar = string.char(unpack(allchar)) | ||
38 | assert(#allchar == 256) | ||
39 | |||
40 | local function cs2str (c) | ||
41 | return m.match(m.Cs((c + m.P(1)/"")^0), allchar) | ||
42 | end | ||
43 | |||
44 | local function eqcharset (c1, c2) | ||
45 | assert(cs2str(c1) == cs2str(c2)) | ||
46 | end | ||
47 | |||
48 | |||
49 | print"General tests for LPeg library" | ||
50 | |||
51 | assert(type(m.version()) == "string") | ||
52 | print("version " .. m.version()) | ||
53 | assert(m.type("alo") ~= "pattern") | ||
54 | assert(m.type(io.input) ~= "pattern") | ||
55 | assert(m.type(m.P"alo") == "pattern") | ||
56 | |||
57 | -- tests for some basic optimizations | ||
58 | assert(m.match(m.P(false) + "a", "a") == 2) | ||
59 | assert(m.match(m.P(true) + "a", "a") == 1) | ||
60 | assert(m.match("a" + m.P(false), "b") == nil) | ||
61 | assert(m.match("a" + m.P(true), "b") == 1) | ||
62 | |||
63 | assert(m.match(m.P(false) * "a", "a") == nil) | ||
64 | assert(m.match(m.P(true) * "a", "a") == 2) | ||
65 | assert(m.match("a" * m.P(false), "a") == nil) | ||
66 | assert(m.match("a" * m.P(true), "a") == 2) | ||
67 | |||
68 | assert(m.match(#m.P(false) * "a", "a") == nil) | ||
69 | assert(m.match(#m.P(true) * "a", "a") == 2) | ||
70 | assert(m.match("a" * #m.P(false), "a") == nil) | ||
71 | assert(m.match("a" * #m.P(true), "a") == 2) | ||
72 | |||
73 | |||
74 | -- tests for locale | ||
75 | do | ||
76 | assert(m.locale(m) == m) | ||
77 | local t = {} | ||
78 | assert(m.locale(t, m) == t) | ||
79 | local x = m.locale() | ||
80 | for n,v in pairs(x) do | ||
81 | assert(type(n) == "string") | ||
82 | eqcharset(v, m[n]) | ||
83 | end | ||
84 | end | ||
85 | |||
86 | |||
87 | assert(m.match(3, "aaaa")) | ||
88 | assert(m.match(4, "aaaa")) | ||
89 | assert(not m.match(5, "aaaa")) | ||
90 | assert(m.match(-3, "aa")) | ||
91 | assert(not m.match(-3, "aaa")) | ||
92 | assert(not m.match(-3, "aaaa")) | ||
93 | assert(not m.match(-4, "aaaa")) | ||
94 | assert(m.P(-5):match"aaaa") | ||
95 | |||
96 | assert(m.match("a", "alo") == 2) | ||
97 | assert(m.match("al", "alo") == 3) | ||
98 | assert(not m.match("alu", "alo")) | ||
99 | assert(m.match(true, "") == 1) | ||
100 | |||
101 | local digit = m.S"0123456789" | ||
102 | local upper = m.S"ABCDEFGHIJKLMNOPQRSTUVWXYZ" | ||
103 | local lower = m.S"abcdefghijklmnopqrstuvwxyz" | ||
104 | local letter = m.S"" + upper + lower | ||
105 | local alpha = letter + digit + m.R() | ||
106 | |||
107 | eqcharset(m.S"", m.P(false)) | ||
108 | eqcharset(upper, m.R("AZ")) | ||
109 | eqcharset(lower, m.R("az")) | ||
110 | eqcharset(upper + lower, m.R("AZ", "az")) | ||
111 | eqcharset(upper + lower, m.R("AZ", "cz", "aa", "bb", "90")) | ||
112 | eqcharset(digit, m.S"01234567" + "8" + "9") | ||
113 | eqcharset(upper, letter - lower) | ||
114 | eqcharset(m.S(""), m.R()) | ||
115 | assert(cs2str(m.S("")) == "") | ||
116 | |||
117 | eqcharset(m.S"\0", "\0") | ||
118 | eqcharset(m.S"\1\0\2", m.R"\0\2") | ||
119 | eqcharset(m.S"\1\0\2", m.R"\1\2" + "\0") | ||
120 | eqcharset(m.S"\1\0\2" - "\0", m.R"\1\2") | ||
121 | |||
122 | local word = alpha^1 * (1 - alpha)^0 | ||
123 | |||
124 | assert((word^0 * -1):match"alo alo") | ||
125 | assert(m.match(word^1 * -1, "alo alo")) | ||
126 | assert(m.match(word^2 * -1, "alo alo")) | ||
127 | assert(not m.match(word^3 * -1, "alo alo")) | ||
128 | |||
129 | assert(not m.match(word^-1 * -1, "alo alo")) | ||
130 | assert(m.match(word^-2 * -1, "alo alo")) | ||
131 | assert(m.match(word^-3 * -1, "alo alo")) | ||
132 | |||
133 | local eos = m.P(-1) | ||
134 | |||
135 | assert(m.match(digit^0 * letter * digit * eos, "1298a1")) | ||
136 | assert(not m.match(digit^0 * letter * eos, "1257a1")) | ||
137 | |||
138 | b = { | ||
139 | [1] = "(" * (((1 - m.S"()") + #m.P"(" * m.V(1))^0) * ")" | ||
140 | } | ||
141 | |||
142 | assert(m.match(b, "(al())()")) | ||
143 | assert(not m.match(b * eos, "(al())()")) | ||
144 | assert(m.match(b * eos, "((al())()(é))")) | ||
145 | assert(not m.match(b, "(al()()")) | ||
146 | |||
147 | assert(not m.match(letter^1 - "for", "foreach")) | ||
148 | assert(m.match(letter^1 - ("for" * eos), "foreach")) | ||
149 | assert(not m.match(letter^1 - ("for" * eos), "for")) | ||
150 | |||
151 | function basiclookfor (p) | ||
152 | return m.P { | ||
153 | [1] = p + (1 * m.V(1)) | ||
154 | } | ||
155 | end | ||
156 | |||
157 | function caplookfor (p) | ||
158 | return basiclookfor(p:C()) | ||
159 | end | ||
160 | |||
161 | assert(m.match(caplookfor(letter^1), " 4achou123...") == "achou") | ||
162 | a = {m.match(caplookfor(letter^1)^0, " two words, one more ")} | ||
163 | checkeq(a, {"two", "words", "one", "more"}) | ||
164 | |||
165 | assert(m.match( basiclookfor((#m.P(b) * 1) * m.Cp()), " ( (a)") == 7) | ||
166 | |||
167 | a = {m.match(m.C(digit^1 * m.Cc"d") + m.C(letter^1 * m.Cc"l"), "123")} | ||
168 | checkeq(a, {"123", "d"}) | ||
169 | |||
170 | -- bug in LPeg 0.12 (nil value does not create a 'ktable') | ||
171 | assert(m.match(m.Cc(nil), "") == nil) | ||
172 | |||
173 | a = {m.match(m.C(digit^1 * m.Cc"d") + m.C(letter^1 * m.Cc"l"), "abcd")} | ||
174 | checkeq(a, {"abcd", "l"}) | ||
175 | |||
176 | a = {m.match(m.Cc(10,20,30) * 'a' * m.Cp(), 'aaa')} | ||
177 | checkeq(a, {10,20,30,2}) | ||
178 | a = {m.match(m.Cp() * m.Cc(10,20,30) * 'a' * m.Cp(), 'aaa')} | ||
179 | checkeq(a, {1,10,20,30,2}) | ||
180 | a = m.match(m.Ct(m.Cp() * m.Cc(10,20,30) * 'a' * m.Cp()), 'aaa') | ||
181 | checkeq(a, {1,10,20,30,2}) | ||
182 | a = m.match(m.Ct(m.Cp() * m.Cc(7,8) * m.Cc(10,20,30) * 'a' * m.Cp()), 'aaa') | ||
183 | checkeq(a, {1,7,8,10,20,30,2}) | ||
184 | a = {m.match(m.Cc() * m.Cc() * m.Cc(1) * m.Cc(2,3,4) * m.Cc() * 'a', 'aaa')} | ||
185 | checkeq(a, {1,2,3,4}) | ||
186 | |||
187 | a = {m.match(m.Cp() * letter^1 * m.Cp(), "abcd")} | ||
188 | checkeq(a, {1, 5}) | ||
189 | |||
190 | |||
191 | t = {m.match({[1] = m.C(m.C(1) * m.V(1) + -1)}, "abc")} | ||
192 | checkeq(t, {"abc", "a", "bc", "b", "c", "c", ""}) | ||
193 | |||
194 | -- bug in 0.12 ('hascapture' did not check for captures inside a rule) | ||
195 | do | ||
196 | local pat = m.P{ | ||
197 | 'S'; | ||
198 | S1 = m.C('abc') + 3, | ||
199 | S = #m.V('S1') -- rule has capture, but '#' must ignore it | ||
200 | } | ||
201 | assert(pat:match'abc' == 1) | ||
202 | end | ||
203 | |||
204 | |||
205 | -- bug: loop in 'hascaptures' | ||
206 | do | ||
207 | local p = m.C(-m.P{m.P'x' * m.V(1) + m.P'y'}) | ||
208 | assert(p:match("xxx") == "") | ||
209 | end | ||
210 | |||
211 | |||
212 | |||
213 | -- test for small capture boundary | ||
214 | for i = 250,260 do | ||
215 | assert(#m.match(m.C(i), string.rep('a', i)) == i) | ||
216 | assert(#m.match(m.C(m.C(i)), string.rep('a', i)) == i) | ||
217 | end | ||
218 | |||
219 | -- tests for any*n and any*-n | ||
220 | for n = 1, 550, 13 do | ||
221 | local x_1 = string.rep('x', n - 1) | ||
222 | local x = x_1 .. 'a' | ||
223 | assert(not m.P(n):match(x_1)) | ||
224 | assert(m.P(n):match(x) == n + 1) | ||
225 | assert(n < 4 or m.match(m.P(n) + "xxx", x_1) == 4) | ||
226 | assert(m.C(n):match(x) == x) | ||
227 | assert(m.C(m.C(n)):match(x) == x) | ||
228 | assert(m.P(-n):match(x_1) == 1) | ||
229 | assert(not m.P(-n):match(x)) | ||
230 | assert(n < 13 or m.match(m.Cc(20) * ((n - 13) * m.P(10)) * 3, x) == 20) | ||
231 | local n3 = math.floor(n/3) | ||
232 | assert(m.match(n3 * m.Cp() * n3 * n3, x) == n3 + 1) | ||
233 | end | ||
234 | |||
235 | -- true values | ||
236 | assert(m.P(0):match("x") == 1) | ||
237 | assert(m.P(0):match("") == 1) | ||
238 | assert(m.C(0):match("x") == "") | ||
239 | |||
240 | assert(m.match(m.Cc(0) * m.P(10) + m.Cc(1) * "xuxu", "xuxu") == 1) | ||
241 | assert(m.match(m.Cc(0) * m.P(10) + m.Cc(1) * "xuxu", "xuxuxuxuxu") == 0) | ||
242 | assert(m.match(m.C(m.P(2)^1), "abcde") == "abcd") | ||
243 | p = m.Cc(0) * 1 + m.Cc(1) * 2 + m.Cc(2) * 3 + m.Cc(3) * 4 | ||
244 | |||
245 | |||
246 | -- test for alternation optimization | ||
247 | assert(m.match(m.P"a"^1 + "ab" + m.P"x"^0, "ab") == 2) | ||
248 | assert(m.match((m.P"a"^1 + "ab" + m.P"x"^0 * 1)^0, "ab") == 3) | ||
249 | assert(m.match(m.P"ab" + "cd" + "" + "cy" + "ak", "98") == 1) | ||
250 | assert(m.match(m.P"ab" + "cd" + "ax" + "cy", "ax") == 3) | ||
251 | assert(m.match("a" * m.P"b"^0 * "c" + "cd" + "ax" + "cy", "ax") == 3) | ||
252 | assert(m.match((m.P"ab" + "cd" + "ax" + "cy")^0, "ax") == 3) | ||
253 | assert(m.match(m.P(1) * "x" + m.S"" * "xu" + "ay", "ay") == 3) | ||
254 | assert(m.match(m.P"abc" + "cde" + "aka", "aka") == 4) | ||
255 | assert(m.match(m.S"abc" * "x" + "cde" + "aka", "ax") == 3) | ||
256 | assert(m.match(m.S"abc" * "x" + "cde" + "aka", "aka") == 4) | ||
257 | assert(m.match(m.S"abc" * "x" + "cde" + "aka", "cde") == 4) | ||
258 | assert(m.match(m.S"abc" * "x" + "ide" + m.S"ab" * "ka", "aka") == 4) | ||
259 | assert(m.match("ab" + m.S"abc" * m.P"y"^0 * "x" + "cde" + "aka", "ax") == 3) | ||
260 | assert(m.match("ab" + m.S"abc" * m.P"y"^0 * "x" + "cde" + "aka", "aka") == 4) | ||
261 | assert(m.match("ab" + m.S"abc" * m.P"y"^0 * "x" + "cde" + "aka", "cde") == 4) | ||
262 | assert(m.match("ab" + m.S"abc" * m.P"y"^0 * "x" + "ide" + m.S"ab" * "ka", "aka") == 4) | ||
263 | assert(m.match("ab" + m.S"abc" * m.P"y"^0 * "x" + "ide" + m.S"ab" * "ka", "ax") == 3) | ||
264 | assert(m.match(m.P(1) * "x" + "cde" + m.S"ab" * "ka", "aka") == 4) | ||
265 | assert(m.match(m.P(1) * "x" + "cde" + m.P(1) * "ka", "aka") == 4) | ||
266 | assert(m.match(m.P(1) * "x" + "cde" + m.P(1) * "ka", "cde") == 4) | ||
267 | assert(m.match(m.P"eb" + "cd" + m.P"e"^0 + "x", "ee") == 3) | ||
268 | assert(m.match(m.P"ab" + "cd" + m.P"e"^0 + "x", "abcd") == 3) | ||
269 | assert(m.match(m.P"ab" + "cd" + m.P"e"^0 + "x", "eeex") == 4) | ||
270 | assert(m.match(m.P"ab" + "cd" + m.P"e"^0 + "x", "cd") == 3) | ||
271 | assert(m.match(m.P"ab" + "cd" + m.P"e"^0 + "x", "x") == 1) | ||
272 | assert(m.match(m.P"ab" + "cd" + m.P"e"^0 + "x" + "", "zee") == 1) | ||
273 | assert(m.match(m.P"ab" + "cd" + m.P"e"^1 + "x", "abcd") == 3) | ||
274 | assert(m.match(m.P"ab" + "cd" + m.P"e"^1 + "x", "eeex") == 4) | ||
275 | assert(m.match(m.P"ab" + "cd" + m.P"e"^1 + "x", "cd") == 3) | ||
276 | assert(m.match(m.P"ab" + "cd" + m.P"e"^1 + "x", "x") == 2) | ||
277 | assert(m.match(m.P"ab" + "cd" + m.P"e"^1 + "x" + "", "zee") == 1) | ||
278 | assert(not m.match(("aa" * m.P"bc"^-1 + "aab") * "e", "aabe")) | ||
279 | |||
280 | assert(m.match("alo" * (m.P"\n" + -1), "alo") == 4) | ||
281 | |||
282 | |||
283 | -- bug in 0.12 (rc1) | ||
284 | assert(m.match((m.P"\128\187\191" + m.S"abc")^0, "\128\187\191") == 4) | ||
285 | |||
286 | assert(m.match(m.S"\0\128\255\127"^0, string.rep("\0\128\255\127", 10)) == | ||
287 | 4*10 + 1) | ||
288 | |||
289 | -- optimizations with optional parts | ||
290 | assert(m.match(("ab" * -m.P"c")^-1, "abc") == 1) | ||
291 | assert(m.match(("ab" * #m.P"c")^-1, "abd") == 1) | ||
292 | assert(m.match(("ab" * m.B"c")^-1, "ab") == 1) | ||
293 | assert(m.match(("ab" * m.P"cd"^0)^-1, "abcdcdc") == 7) | ||
294 | |||
295 | assert(m.match(m.P"ab"^-1 - "c", "abcd") == 3) | ||
296 | |||
297 | p = ('Aa' * ('Bb' * ('Cc' * m.P'Dd'^0)^0)^0)^-1 | ||
298 | assert(p:match("AaBbCcDdBbCcDdDdDdBb") == 21) | ||
299 | |||
300 | |||
301 | -- bug in 0.12.2 | ||
302 | -- p = { ('ab' ('c' 'ef'?)*)? } | ||
303 | p = m.C(('ab' * ('c' * m.P'ef'^-1)^0)^-1) | ||
304 | s = "abcefccefc" | ||
305 | assert(s == p:match(s)) | ||
306 | |||
307 | |||
308 | pi = "3.14159 26535 89793 23846 26433 83279 50288 41971 69399 37510" | ||
309 | assert(m.match(m.Cs((m.P"1" / "a" + m.P"5" / "b" + m.P"9" / "c" + 1)^0), pi) == | ||
310 | m.match(m.Cs((m.P(1) / {["1"] = "a", ["5"] = "b", ["9"] = "c"})^0), pi)) | ||
311 | print"+" | ||
312 | |||
313 | |||
314 | -- tests for capture optimizations | ||
315 | assert(m.match((m.P(3) + 4 * m.Cp()) * "a", "abca") == 5) | ||
316 | t = {m.match(((m.P"a" + m.Cp()) * m.P"x")^0, "axxaxx")} | ||
317 | checkeq(t, {3, 6}) | ||
318 | |||
319 | |||
320 | -- tests for numbered captures | ||
321 | p = m.C(1) | ||
322 | assert(m.match(m.C(m.C(p * m.C(2)) * m.C(3)) / 3, "abcdefgh") == "a") | ||
323 | assert(m.match(m.C(m.C(p * m.C(2)) * m.C(3)) / 1, "abcdefgh") == "abcdef") | ||
324 | assert(m.match(m.C(m.C(p * m.C(2)) * m.C(3)) / 4, "abcdefgh") == "bc") | ||
325 | assert(m.match(m.C(m.C(p * m.C(2)) * m.C(3)) / 0, "abcdefgh") == 7) | ||
326 | |||
327 | a, b, c = m.match(p * (m.C(p * m.C(2)) * m.C(3) / 4) * p, "abcdefgh") | ||
328 | assert(a == "a" and b == "efg" and c == "h") | ||
329 | |||
330 | -- test for table captures | ||
331 | t = m.match(m.Ct(letter^1), "alo") | ||
332 | checkeq(t, {}) | ||
333 | |||
334 | t, n = m.match(m.Ct(m.C(letter)^1) * m.Cc"t", "alo") | ||
335 | assert(n == "t" and table.concat(t) == "alo") | ||
336 | |||
337 | t = m.match(m.Ct(m.C(m.C(letter)^1)), "alo") | ||
338 | assert(table.concat(t, ";") == "alo;a;l;o") | ||
339 | |||
340 | t = m.match(m.Ct(m.C(m.C(letter)^1)), "alo") | ||
341 | assert(table.concat(t, ";") == "alo;a;l;o") | ||
342 | |||
343 | t = m.match(m.Ct(m.Ct((m.Cp() * letter * m.Cp())^1)), "alo") | ||
344 | assert(table.concat(t[1], ";") == "1;2;2;3;3;4") | ||
345 | |||
346 | t = m.match(m.Ct(m.C(m.C(1) * 1 * m.C(1))), "alo") | ||
347 | checkeq(t, {"alo", "a", "o"}) | ||
348 | |||
349 | |||
350 | -- tests for groups | ||
351 | p = m.Cg(1) -- no capture | ||
352 | assert(p:match('x') == 'x') | ||
353 | p = m.Cg(m.P(true)/function () end * 1) -- no value | ||
354 | assert(p:match('x') == 'x') | ||
355 | p = m.Cg(m.Cg(m.Cg(m.C(1)))) | ||
356 | assert(p:match('x') == 'x') | ||
357 | p = m.Cg(m.Cg(m.Cg(m.C(1))^0) * m.Cg(m.Cc(1) * m.Cc(2))) | ||
358 | t = {p:match'abc'} | ||
359 | checkeq(t, {'a', 'b', 'c', 1, 2}) | ||
360 | |||
361 | p = m.Ct(m.Cg(m.Cc(10), "hi") * m.C(1)^0 * m.Cg(m.Cc(20), "ho")) | ||
362 | t = p:match'' | ||
363 | checkeq(t, {hi = 10, ho = 20}) | ||
364 | t = p:match'abc' | ||
365 | checkeq(t, {hi = 10, ho = 20, 'a', 'b', 'c'}) | ||
366 | |||
367 | -- non-string group names | ||
368 | p = m.Ct(m.Cg(1, print) * m.Cg(1, 23.5) * m.Cg(1, io)) | ||
369 | t = p:match('abcdefghij') | ||
370 | assert(t[print] == 'a' and t[23.5] == 'b' and t[io] == 'c') | ||
371 | |||
372 | |||
373 | -- test for error messages | ||
374 | local function checkerr (msg, f, ...) | ||
375 | local st, err = pcall(f, ...) | ||
376 | assert(not st and m.match({ m.P(msg) + 1 * m.V(1) }, err)) | ||
377 | end | ||
378 | |||
379 | checkerr("rule '1' may be left recursive", m.match, { m.V(1) * 'a' }, "a") | ||
380 | checkerr("rule '1' used outside a grammar", m.match, m.V(1), "") | ||
381 | checkerr("rule 'hiii' used outside a grammar", m.match, m.V('hiii'), "") | ||
382 | checkerr("rule 'hiii' undefined in given grammar", m.match, { m.V('hiii') }, "") | ||
383 | checkerr("undefined in given grammar", m.match, { m.V{} }, "") | ||
384 | |||
385 | checkerr("rule 'A' is not a pattern", m.P, { m.P(1), A = {} }) | ||
386 | checkerr("grammar has no initial rule", m.P, { [print] = {} }) | ||
387 | |||
388 | -- grammar with a long call chain before left recursion | ||
389 | p = {'a', | ||
390 | a = m.V'b' * m.V'c' * m.V'd' * m.V'a', | ||
391 | b = m.V'c', | ||
392 | c = m.V'd', | ||
393 | d = m.V'e', | ||
394 | e = m.V'f', | ||
395 | f = m.V'g', | ||
396 | g = m.P'' | ||
397 | } | ||
398 | checkerr("rule 'a' may be left recursive", m.match, p, "a") | ||
399 | |||
400 | -- Bug in peephole optimization of LPeg 0.12 (IJmp -> ICommit) | ||
401 | -- the next grammar has an original sequence IJmp -> ICommit -> IJmp L1 | ||
402 | -- that is optimized to ICommit L1 | ||
403 | |||
404 | p = m.P { (m.P {m.P'abc'} + 'ayz') * m.V'y'; y = m.P'x' } | ||
405 | assert(p:match('abcx') == 5 and p:match('ayzx') == 5 and not p:match'abc') | ||
406 | |||
407 | |||
408 | do | ||
409 | -- large dynamic Cc | ||
410 | local lim = 2^16 - 1 | ||
411 | local c = 0 | ||
412 | local function seq (n) | ||
413 | if n == 1 then c = c + 1; return m.Cc(c) | ||
414 | else | ||
415 | local m = math.floor(n / 2) | ||
416 | return seq(m) * seq(n - m) | ||
417 | end | ||
418 | end | ||
419 | p = m.Ct(seq(lim)) | ||
420 | t = p:match('') | ||
421 | assert(t[lim] == lim) | ||
422 | checkerr("too many", function () p = p / print end) | ||
423 | checkerr("too many", seq, lim + 1) | ||
424 | end | ||
425 | |||
426 | |||
427 | -- tests for non-pattern as arguments to pattern functions | ||
428 | |||
429 | p = { ('a' * m.V(1))^-1 } * m.P'b' * { 'a' * m.V(2); m.V(1)^-1 } | ||
430 | assert(m.match(p, "aaabaac") == 7) | ||
431 | |||
432 | p = m.P'abc' * 2 * -5 * true * 'de' -- mix of numbers and strings and booleans | ||
433 | |||
434 | assert(p:match("abc01de") == 8) | ||
435 | assert(p:match("abc01de3456") == nil) | ||
436 | |||
437 | p = 'abc' * (2 * (-5 * (true * m.P'de'))) | ||
438 | |||
439 | assert(p:match("abc01de") == 8) | ||
440 | assert(p:match("abc01de3456") == nil) | ||
441 | |||
442 | p = { m.V(2), m.P"abc" } * | ||
443 | (m.P{ "xx", xx = m.P"xx" } + { "x", x = m.P"a" * m.V"x" + "" }) | ||
444 | assert(p:match("abcaaaxx") == 7) | ||
445 | assert(p:match("abcxx") == 6) | ||
446 | |||
447 | |||
448 | -- a large table capture | ||
449 | t = m.match(m.Ct(m.C('a')^0), string.rep("a", 10000)) | ||
450 | assert(#t == 10000 and t[1] == 'a' and t[#t] == 'a') | ||
451 | |||
452 | print('+') | ||
453 | |||
454 | |||
455 | -- bug in 0.10 (rechecking a grammar, after tail-call optimization) | ||
456 | m.P{ m.P { (m.P(3) + "xuxu")^0 * m.V"xuxu", xuxu = m.P(1) } } | ||
457 | |||
458 | local V = m.V | ||
459 | |||
460 | local Space = m.S(" \n\t")^0 | ||
461 | local Number = m.C(m.R("09")^1) * Space | ||
462 | local FactorOp = m.C(m.S("+-")) * Space | ||
463 | local TermOp = m.C(m.S("*/")) * Space | ||
464 | local Open = "(" * Space | ||
465 | local Close = ")" * Space | ||
466 | |||
467 | |||
468 | local function f_factor (v1, op, v2, d) | ||
469 | assert(d == nil) | ||
470 | if op == "+" then return v1 + v2 | ||
471 | else return v1 - v2 | ||
472 | end | ||
473 | end | ||
474 | |||
475 | |||
476 | local function f_term (v1, op, v2, d) | ||
477 | assert(d == nil) | ||
478 | if op == "*" then return v1 * v2 | ||
479 | else return v1 / v2 | ||
480 | end | ||
481 | end | ||
482 | |||
483 | G = m.P{ "Exp", | ||
484 | Exp = m.Cf(V"Factor" * m.Cg(FactorOp * V"Factor")^0, f_factor); | ||
485 | Factor = m.Cf(V"Term" * m.Cg(TermOp * V"Term")^0, f_term); | ||
486 | Term = Number / tonumber + Open * V"Exp" * Close; | ||
487 | } | ||
488 | |||
489 | G = Space * G * -1 | ||
490 | |||
491 | for _, s in ipairs{" 3 + 5*9 / (1+1) ", "3+4/2", "3+3-3- 9*2+3*9/1- 8"} do | ||
492 | assert(m.match(G, s) == loadstring("return "..s)()) | ||
493 | end | ||
494 | |||
495 | |||
496 | -- test for grammars (errors deep in calling non-terminals) | ||
497 | g = m.P{ | ||
498 | [1] = m.V(2) + "a", | ||
499 | [2] = "a" * m.V(3) * "x", | ||
500 | [3] = "b" * m.V(3) + "c" | ||
501 | } | ||
502 | |||
503 | assert(m.match(g, "abbbcx") == 7) | ||
504 | assert(m.match(g, "abbbbx") == 2) | ||
505 | |||
506 | |||
507 | -- tests for \0 | ||
508 | assert(m.match(m.R("\0\1")^1, "\0\1\0") == 4) | ||
509 | assert(m.match(m.S("\0\1ab")^1, "\0\1\0a") == 5) | ||
510 | assert(m.match(m.P(1)^3, "\0\1\0a") == 5) | ||
511 | assert(not m.match(-4, "\0\1\0a")) | ||
512 | assert(m.match("\0\1\0a", "\0\1\0a") == 5) | ||
513 | assert(m.match("\0\0\0", "\0\0\0") == 4) | ||
514 | assert(not m.match("\0\0\0", "\0\0")) | ||
515 | |||
516 | |||
517 | -- tests for predicates | ||
518 | assert(not m.match(-m.P("a") * 2, "alo")) | ||
519 | assert(m.match(- -m.P("a") * 2, "alo") == 3) | ||
520 | assert(m.match(#m.P("a") * 2, "alo") == 3) | ||
521 | assert(m.match(##m.P("a") * 2, "alo") == 3) | ||
522 | assert(not m.match(##m.P("c") * 2, "alo")) | ||
523 | assert(m.match(m.Cs((##m.P("a") * 1 + m.P(1)/".")^0), "aloal") == "a..a.") | ||
524 | assert(m.match(m.Cs((#((#m.P"a")/"") * 1 + m.P(1)/".")^0), "aloal") == "a..a.") | ||
525 | assert(m.match(m.Cs((- -m.P("a") * 1 + m.P(1)/".")^0), "aloal") == "a..a.") | ||
526 | assert(m.match(m.Cs((-((-m.P"a")/"") * 1 + m.P(1)/".")^0), "aloal") == "a..a.") | ||
527 | |||
528 | |||
529 | -- fixed length | ||
530 | do | ||
531 | -- 'and' predicate using fixed length | ||
532 | local p = m.C(#("a" * (m.P("bd") + "cd")) * 2) | ||
533 | assert(p:match("acd") == "ac") | ||
534 | |||
535 | p = #m.P{ "a" * m.V(2), m.P"b" } * 2 | ||
536 | assert(p:match("abc") == 3) | ||
537 | |||
538 | p = #(m.P"abc" * m.B"c") | ||
539 | assert(p:match("abc") == 1 and not p:match("ab")) | ||
540 | |||
541 | p = m.P{ "a" * m.V(2), m.P"b"^1 } | ||
542 | checkerr("pattern may not have fixed length", m.B, p) | ||
543 | |||
544 | p = "abc" * (m.P"b"^1 + m.P"a"^0) | ||
545 | checkerr("pattern may not have fixed length", m.B, p) | ||
546 | end | ||
547 | |||
548 | |||
549 | p = -m.P'a' * m.Cc(1) + -m.P'b' * m.Cc(2) + -m.P'c' * m.Cc(3) | ||
550 | assert(p:match('a') == 2 and p:match('') == 1 and p:match('b') == 1) | ||
551 | |||
552 | p = -m.P'a' * m.Cc(10) + #m.P'a' * m.Cc(20) | ||
553 | assert(p:match('a') == 20 and p:match('') == 10 and p:match('b') == 10) | ||
554 | |||
555 | |||
556 | |||
557 | -- look-behind predicate | ||
558 | assert(not m.match(m.B'a', 'a')) | ||
559 | assert(m.match(1 * m.B'a', 'a') == 2) | ||
560 | assert(not m.match(m.B(1), 'a')) | ||
561 | assert(m.match(1 * m.B(1), 'a') == 2) | ||
562 | assert(m.match(-m.B(1), 'a') == 1) | ||
563 | assert(m.match(m.B(250), string.rep('a', 250)) == nil) | ||
564 | assert(m.match(250 * m.B(250), string.rep('a', 250)) == 251) | ||
565 | |||
566 | -- look-behind with an open call | ||
567 | checkerr("pattern may not have fixed length", m.B, m.V'S1') | ||
568 | checkerr("too long to look behind", m.B, 260) | ||
569 | |||
570 | B = #letter * -m.B(letter) + -letter * m.B(letter) | ||
571 | x = m.Ct({ (B * m.Cp())^-1 * (1 * m.V(1) + m.P(true)) }) | ||
572 | checkeq(m.match(x, 'ar cal c'), {1,3,4,7,9,10}) | ||
573 | checkeq(m.match(x, ' ar cal '), {2,4,5,8}) | ||
574 | checkeq(m.match(x, ' '), {}) | ||
575 | checkeq(m.match(x, 'aloalo'), {1,7}) | ||
576 | |||
577 | assert(m.match(B, "a") == 1) | ||
578 | assert(m.match(1 * B, "a") == 2) | ||
579 | assert(not m.B(1 - letter):match("")) | ||
580 | assert((-m.B(letter)):match("") == 1) | ||
581 | |||
582 | assert((4 * m.B(letter, 4)):match("aaaaaaaa") == 5) | ||
583 | assert(not (4 * m.B(#letter * 5)):match("aaaaaaaa")) | ||
584 | assert((4 * -m.B(#letter * 5)):match("aaaaaaaa") == 5) | ||
585 | |||
586 | -- look-behind with grammars | ||
587 | assert(m.match('a' * m.B{'x', x = m.P(3)}, 'aaa') == nil) | ||
588 | assert(m.match('aa' * m.B{'x', x = m.P('aaa')}, 'aaaa') == nil) | ||
589 | assert(m.match('aaa' * m.B{'x', x = m.P('aaa')}, 'aaaaa') == 4) | ||
590 | |||
591 | |||
592 | |||
593 | -- bug in 0.9 | ||
594 | assert(m.match(('a' * #m.P'b'), "ab") == 2) | ||
595 | assert(not m.match(('a' * #m.P'b'), "a")) | ||
596 | |||
597 | assert(not m.match(#m.S'567', "")) | ||
598 | assert(m.match(#m.S'567' * 1, "6") == 2) | ||
599 | |||
600 | |||
601 | -- tests for Tail Calls | ||
602 | |||
603 | p = m.P{ 'a' * m.V(1) + '' } | ||
604 | assert(p:match(string.rep('a', 1000)) == 1001) | ||
605 | |||
606 | -- create a grammar for a simple DFA for even number of 0s and 1s | ||
607 | -- | ||
608 | -- ->1 <---0---> 2 | ||
609 | -- ^ ^ | ||
610 | -- | | | ||
611 | -- 1 1 | ||
612 | -- | | | ||
613 | -- V V | ||
614 | -- 3 <---0---> 4 | ||
615 | -- | ||
616 | -- this grammar should keep no backtracking information | ||
617 | |||
618 | p = m.P{ | ||
619 | [1] = '0' * m.V(2) + '1' * m.V(3) + -1, | ||
620 | [2] = '0' * m.V(1) + '1' * m.V(4), | ||
621 | [3] = '0' * m.V(4) + '1' * m.V(1), | ||
622 | [4] = '0' * m.V(3) + '1' * m.V(2), | ||
623 | } | ||
624 | |||
625 | assert(p:match(string.rep("00", 10000))) | ||
626 | assert(p:match(string.rep("01", 10000))) | ||
627 | assert(p:match(string.rep("011", 10000))) | ||
628 | assert(not p:match(string.rep("011", 10000) .. "1")) | ||
629 | assert(not p:match(string.rep("011", 10001))) | ||
630 | |||
631 | |||
632 | -- this grammar does need backtracking info. | ||
633 | local lim = 10000 | ||
634 | p = m.P{ '0' * m.V(1) + '0' } | ||
635 | checkerr("stack overflow", m.match, p, string.rep("0", lim)) | ||
636 | m.setmaxstack(2*lim) | ||
637 | checkerr("stack overflow", m.match, p, string.rep("0", lim)) | ||
638 | m.setmaxstack(2*lim + 4) | ||
639 | assert(m.match(p, string.rep("0", lim)) == lim + 1) | ||
640 | |||
641 | -- this repetition should not need stack space (only the call does) | ||
642 | p = m.P{ ('a' * m.V(1))^0 * 'b' + 'c' } | ||
643 | m.setmaxstack(200) | ||
644 | assert(p:match(string.rep('a', 180) .. 'c' .. string.rep('b', 180)) == 362) | ||
645 | |||
646 | m.setmaxstack(100) -- restore low limit | ||
647 | |||
648 | -- tests for optional start position | ||
649 | assert(m.match("a", "abc", 1)) | ||
650 | assert(m.match("b", "abc", 2)) | ||
651 | assert(m.match("c", "abc", 3)) | ||
652 | assert(not m.match(1, "abc", 4)) | ||
653 | assert(m.match("a", "abc", -3)) | ||
654 | assert(m.match("b", "abc", -2)) | ||
655 | assert(m.match("c", "abc", -1)) | ||
656 | assert(m.match("abc", "abc", -4)) -- truncate to position 1 | ||
657 | |||
658 | assert(m.match("", "abc", 10)) -- empty string is everywhere! | ||
659 | assert(m.match("", "", 10)) | ||
660 | assert(not m.match(1, "", 1)) | ||
661 | assert(not m.match(1, "", -1)) | ||
662 | assert(not m.match(1, "", 0)) | ||
663 | |||
664 | print("+") | ||
665 | |||
666 | |||
667 | -- tests for argument captures | ||
668 | checkerr("invalid argument", m.Carg, 0) | ||
669 | checkerr("invalid argument", m.Carg, -1) | ||
670 | checkerr("invalid argument", m.Carg, 2^18) | ||
671 | checkerr("absent extra argument #1", m.match, m.Carg(1), 'a', 1) | ||
672 | assert(m.match(m.Carg(1), 'a', 1, print) == print) | ||
673 | x = {m.match(m.Carg(1) * m.Carg(2), '', 1, 10, 20)} | ||
674 | checkeq(x, {10, 20}) | ||
675 | |||
676 | assert(m.match(m.Cmt(m.Cg(m.Carg(3), "a") * | ||
677 | m.Cmt(m.Cb("a"), function (s,i,x) | ||
678 | assert(s == "a" and i == 1); | ||
679 | return i, x+1 | ||
680 | end) * | ||
681 | m.Carg(2), function (s,i,a,b,c) | ||
682 | assert(s == "a" and i == 1 and c == nil); | ||
683 | return i, 2*a + 3*b | ||
684 | end) * "a", | ||
685 | "a", 1, false, 100, 1000) == 2*1001 + 3*100) | ||
686 | |||
687 | |||
688 | -- tests for Lua functions | ||
689 | |||
690 | t = {} | ||
691 | s = "" | ||
692 | p = m.P(function (s1, i) assert(s == s1); t[#t + 1] = i; return nil end) * false | ||
693 | s = "hi, this is a test" | ||
694 | assert(m.match(((p - m.P(-1)) + 2)^0, s) == string.len(s) + 1) | ||
695 | assert(#t == string.len(s)/2 and t[1] == 1 and t[2] == 3) | ||
696 | |||
697 | assert(not m.match(p, s)) | ||
698 | |||
699 | p = mt.__add(function (s, i) return i end, function (s, i) return nil end) | ||
700 | assert(m.match(p, "alo")) | ||
701 | |||
702 | p = mt.__mul(function (s, i) return i end, function (s, i) return nil end) | ||
703 | assert(not m.match(p, "alo")) | ||
704 | |||
705 | |||
706 | t = {} | ||
707 | p = function (s1, i) assert(s == s1); t[#t + 1] = i; return i end | ||
708 | s = "hi, this is a test" | ||
709 | assert(m.match((m.P(1) * p)^0, s) == string.len(s) + 1) | ||
710 | assert(#t == string.len(s) and t[1] == 2 and t[2] == 3) | ||
711 | |||
712 | t = {} | ||
713 | p = m.P(function (s1, i) assert(s == s1); t[#t + 1] = i; | ||
714 | return i <= s1:len() and i end) * 1 | ||
715 | s = "hi, this is a test" | ||
716 | assert(m.match(p^0, s) == string.len(s) + 1) | ||
717 | assert(#t == string.len(s) + 1 and t[1] == 1 and t[2] == 2) | ||
718 | |||
719 | p = function (s1, i) return m.match(m.P"a"^1, s1, i) end | ||
720 | assert(m.match(p, "aaaa") == 5) | ||
721 | assert(m.match(p, "abaa") == 2) | ||
722 | assert(not m.match(p, "baaa")) | ||
723 | |||
724 | checkerr("invalid position", m.match, function () return 2^20 end, s) | ||
725 | checkerr("invalid position", m.match, function () return 0 end, s) | ||
726 | checkerr("invalid position", m.match, function (s, i) return i - 1 end, s) | ||
727 | checkerr("invalid position", m.match, | ||
728 | m.P(1)^0 * function (_, i) return i - 1 end, s) | ||
729 | assert(m.match(m.P(1)^0 * function (_, i) return i end * -1, s)) | ||
730 | checkerr("invalid position", m.match, | ||
731 | m.P(1)^0 * function (_, i) return i + 1 end, s) | ||
732 | assert(m.match(m.P(function (s, i) return s:len() + 1 end) * -1, s)) | ||
733 | checkerr("invalid position", m.match, m.P(function (s, i) return s:len() + 2 end) * -1, s) | ||
734 | assert(not m.match(m.P(function (s, i) return s:len() end) * -1, s)) | ||
735 | assert(m.match(m.P(1)^0 * function (_, i) return true end, s) == | ||
736 | string.len(s) + 1) | ||
737 | for i = 1, string.len(s) + 1 do | ||
738 | assert(m.match(function (_, _) return i end, s) == i) | ||
739 | end | ||
740 | |||
741 | p = (m.P(function (s, i) return i%2 == 0 and i end) * 1 | ||
742 | + m.P(function (s, i) return i%2 ~= 0 and i + 2 <= s:len() and i end) * 3)^0 | ||
743 | * -1 | ||
744 | assert(p:match(string.rep('a', 14000))) | ||
745 | |||
746 | -- tests for Function Replacements | ||
747 | f = function (a, ...) if a ~= "x" then return {a, ...} end end | ||
748 | |||
749 | t = m.match(m.C(1)^0/f, "abc") | ||
750 | checkeq(t, {"a", "b", "c"}) | ||
751 | |||
752 | t = m.match(m.C(1)^0/f/f, "abc") | ||
753 | checkeq(t, {{"a", "b", "c"}}) | ||
754 | |||
755 | t = m.match(m.P(1)^0/f/f, "abc") -- no capture | ||
756 | checkeq(t, {{"abc"}}) | ||
757 | |||
758 | t = m.match((m.P(1)^0/f * m.Cp())/f, "abc") | ||
759 | checkeq(t, {{"abc"}, 4}) | ||
760 | |||
761 | t = m.match((m.C(1)^0/f * m.Cp())/f, "abc") | ||
762 | checkeq(t, {{"a", "b", "c"}, 4}) | ||
763 | |||
764 | t = m.match((m.C(1)^0/f * m.Cp())/f, "xbc") | ||
765 | checkeq(t, {4}) | ||
766 | |||
767 | t = m.match(m.C(m.C(1)^0)/f, "abc") | ||
768 | checkeq(t, {"abc", "a", "b", "c"}) | ||
769 | |||
770 | g = function (...) return 1, ... end | ||
771 | t = {m.match(m.C(1)^0/g/g, "abc")} | ||
772 | checkeq(t, {1, 1, "a", "b", "c"}) | ||
773 | |||
774 | t = {m.match(m.Cc(nil,nil,4) * m.Cc(nil,3) * m.Cc(nil, nil) / g / g, "")} | ||
775 | t1 = {1,1,nil,nil,4,nil,3,nil,nil} | ||
776 | for i=1,10 do assert(t[i] == t1[i]) end | ||
777 | |||
778 | -- bug in 0.12.2: ktable with only nil could be eliminated when joining | ||
779 | -- with a pattern without ktable | ||
780 | assert((m.P"aaa" * m.Cc(nil)):match"aaa" == nil) | ||
781 | |||
782 | t = {m.match((m.C(1) / function (x) return x, x.."x" end)^0, "abc")} | ||
783 | checkeq(t, {"a", "ax", "b", "bx", "c", "cx"}) | ||
784 | |||
785 | t = m.match(m.Ct((m.C(1) / function (x,y) return y, x end * m.Cc(1))^0), "abc") | ||
786 | checkeq(t, {nil, "a", 1, nil, "b", 1, nil, "c", 1}) | ||
787 | |||
788 | -- tests for Query Replacements | ||
789 | |||
790 | assert(m.match(m.C(m.C(1)^0)/{abc = 10}, "abc") == 10) | ||
791 | assert(m.match(m.C(1)^0/{a = 10}, "abc") == 10) | ||
792 | assert(m.match(m.S("ba")^0/{ab = 40}, "abc") == 40) | ||
793 | t = m.match(m.Ct((m.S("ba")/{a = 40})^0), "abc") | ||
794 | checkeq(t, {40}) | ||
795 | |||
796 | assert(m.match(m.Cs((m.C(1)/{a=".", d=".."})^0), "abcdde") == ".bc....e") | ||
797 | assert(m.match(m.Cs((m.C(1)/{f="."})^0), "abcdde") == "abcdde") | ||
798 | assert(m.match(m.Cs((m.C(1)/{d="."})^0), "abcdde") == "abc..e") | ||
799 | assert(m.match(m.Cs((m.C(1)/{e="."})^0), "abcdde") == "abcdd.") | ||
800 | assert(m.match(m.Cs((m.C(1)/{e=".", f="+"})^0), "eefef") == "..+.+") | ||
801 | assert(m.match(m.Cs((m.C(1))^0), "abcdde") == "abcdde") | ||
802 | assert(m.match(m.Cs(m.C(m.C(1)^0)), "abcdde") == "abcdde") | ||
803 | assert(m.match(1 * m.Cs(m.P(1)^0), "abcdde") == "bcdde") | ||
804 | assert(m.match(m.Cs((m.C('0')/'x' + 1)^0), "abcdde") == "abcdde") | ||
805 | assert(m.match(m.Cs((m.C('0')/'x' + 1)^0), "0ab0b0") == "xabxbx") | ||
806 | assert(m.match(m.Cs((m.C('0')/'x' + m.P(1)/{b=3})^0), "b0a0b") == "3xax3") | ||
807 | assert(m.match(m.P(1)/'%0%0'/{aa = -3} * 'x', 'ax') == -3) | ||
808 | assert(m.match(m.C(1)/'%0%1'/{aa = 'z'}/{z = -3} * 'x', 'ax') == -3) | ||
809 | |||
810 | assert(m.match(m.Cs(m.Cc(0) * (m.P(1)/"")), "4321") == "0") | ||
811 | |||
812 | assert(m.match(m.Cs((m.P(1) / "%0")^0), "abcd") == "abcd") | ||
813 | assert(m.match(m.Cs((m.P(1) / "%0.%0")^0), "abcd") == "a.ab.bc.cd.d") | ||
814 | assert(m.match(m.Cs((m.P("a") / "%0.%0" + 1)^0), "abcad") == "a.abca.ad") | ||
815 | assert(m.match(m.C("a") / "%1%%%0", "a") == "a%a") | ||
816 | assert(m.match(m.Cs((m.P(1) / ".xx")^0), "abcd") == ".xx.xx.xx.xx") | ||
817 | assert(m.match(m.Cp() * m.P(3) * m.Cp()/"%2%1%1 - %0 ", "abcde") == | ||
818 | "411 - abc ") | ||
819 | |||
820 | assert(m.match(m.P(1)/"%0", "abc") == "a") | ||
821 | checkerr("invalid capture index", m.match, m.P(1)/"%1", "abc") | ||
822 | checkerr("invalid capture index", m.match, m.P(1)/"%9", "abc") | ||
823 | |||
824 | p = m.C(1) | ||
825 | p = p * p; p = p * p; p = p * p * m.C(1) / "%9 - %1" | ||
826 | assert(p:match("1234567890") == "9 - 1") | ||
827 | |||
828 | assert(m.match(m.Cc(print), "") == print) | ||
829 | |||
830 | -- too many captures (just ignore extra ones) | ||
831 | p = m.C(1)^0 / "%2-%9-%0-%9" | ||
832 | assert(p:match"01234567890123456789" == "1-8-01234567890123456789-8") | ||
833 | s = string.rep("12345678901234567890", 20) | ||
834 | assert(m.match(m.C(1)^0 / "%9-%1-%0-%3", s) == "9-1-" .. s .. "-3") | ||
835 | |||
836 | -- string captures with non-string subcaptures | ||
837 | p = m.Cc('alo') * m.C(1) / "%1 - %2 - %1" | ||
838 | assert(p:match'x' == 'alo - x - alo') | ||
839 | |||
840 | checkerr("invalid capture value (a boolean)", m.match, m.Cc(true) / "%1", "a") | ||
841 | |||
842 | -- long strings for string capture | ||
843 | l = 10000 | ||
844 | s = string.rep('a', l) .. string.rep('b', l) .. string.rep('c', l) | ||
845 | |||
846 | p = (m.C(m.P'a'^1) * m.C(m.P'b'^1) * m.C(m.P'c'^1)) / '%3%2%1' | ||
847 | |||
848 | assert(p:match(s) == string.rep('c', l) .. | ||
849 | string.rep('b', l) .. | ||
850 | string.rep('a', l)) | ||
851 | |||
852 | print"+" | ||
853 | |||
854 | -- accumulator capture | ||
855 | function f (x) return x + 1 end | ||
856 | assert(m.match(m.Cf(m.Cc(0) * m.C(1)^0, f), "alo alo") == 7) | ||
857 | |||
858 | t = {m.match(m.Cf(m.Cc(1,2,3), error), "")} | ||
859 | checkeq(t, {1}) | ||
860 | p = m.Cf(m.Ct(true) * m.Cg(m.C(m.R"az"^1) * "=" * m.C(m.R"az"^1) * ";")^0, | ||
861 | rawset) | ||
862 | t = p:match("a=b;c=du;xux=yuy;") | ||
863 | checkeq(t, {a="b", c="du", xux="yuy"}) | ||
864 | |||
865 | |||
866 | -- errors in accumulator capture | ||
867 | |||
868 | -- no initial capture | ||
869 | checkerr("no initial value", m.match, m.Cf(m.P(5), print), 'aaaaaa') | ||
870 | -- no initial capture (very long match forces fold to be a pair open-close) | ||
871 | checkerr("no initial value", m.match, m.Cf(m.P(500), print), | ||
872 | string.rep('a', 600)) | ||
873 | |||
874 | -- nested capture produces no initial value | ||
875 | checkerr("no initial value", m.match, m.Cf(m.P(1) / {}, print), "alo") | ||
876 | |||
877 | |||
878 | -- tests for loop checker | ||
879 | |||
880 | local function isnullable (p) | ||
881 | checkerr("may accept empty string", function (p) return p^0 end, m.P(p)) | ||
882 | end | ||
883 | |||
884 | isnullable(m.P("x")^-4) | ||
885 | assert(m.match(((m.P(0) + 1) * m.S"al")^0, "alo") == 3) | ||
886 | assert(m.match((("x" + #m.P(1))^-4 * m.S"al")^0, "alo") == 3) | ||
887 | isnullable("") | ||
888 | isnullable(m.P("x")^0) | ||
889 | isnullable(m.P("x")^-1) | ||
890 | isnullable(m.P("x") + 1 + 2 + m.P("a")^-1) | ||
891 | isnullable(-m.P("ab")) | ||
892 | isnullable(- -m.P("ab")) | ||
893 | isnullable(# #(m.P("ab") + "xy")) | ||
894 | isnullable(- #m.P("ab")^0) | ||
895 | isnullable(# -m.P("ab")^1) | ||
896 | isnullable(#m.V(3)) | ||
897 | isnullable(m.V(3) + m.V(1) + m.P('a')^-1) | ||
898 | isnullable({[1] = m.V(2) * m.V(3), [2] = m.V(3), [3] = m.P(0)}) | ||
899 | assert(m.match(m.P{[1] = m.V(2) * m.V(3), [2] = m.V(3), [3] = m.P(1)}^0, "abc") | ||
900 | == 3) | ||
901 | assert(m.match(m.P""^-3, "a") == 1) | ||
902 | |||
903 | local function find (p, s) | ||
904 | return m.match(basiclookfor(p), s) | ||
905 | end | ||
906 | |||
907 | |||
908 | local function badgrammar (g, expected) | ||
909 | local stat, msg = pcall(m.P, g) | ||
910 | assert(not stat) | ||
911 | if expected then assert(find(expected, msg)) end | ||
912 | end | ||
913 | |||
914 | badgrammar({[1] = m.V(1)}, "rule '1'") | ||
915 | badgrammar({[1] = m.V(2)}, "rule '2'") -- invalid non-terminal | ||
916 | badgrammar({[1] = m.V"x"}, "rule 'x'") -- invalid non-terminal | ||
917 | badgrammar({[1] = m.V{}}, "rule '(a table)'") -- invalid non-terminal | ||
918 | badgrammar({[1] = #m.P("a") * m.V(1)}, "rule '1'") -- left-recursive | ||
919 | badgrammar({[1] = -m.P("a") * m.V(1)}, "rule '1'") -- left-recursive | ||
920 | badgrammar({[1] = -1 * m.V(1)}, "rule '1'") -- left-recursive | ||
921 | badgrammar({[1] = -1 + m.V(1)}, "rule '1'") -- left-recursive | ||
922 | badgrammar({[1] = 1 * m.V(2), [2] = m.V(2)}, "rule '2'") -- left-recursive | ||
923 | badgrammar({[1] = 1 * m.V(2)^0, [2] = m.P(0)}, "rule '1'") -- inf. loop | ||
924 | badgrammar({ m.V(2), m.V(3)^0, m.P"" }, "rule '2'") -- inf. loop | ||
925 | badgrammar({ m.V(2) * m.V(3)^0, m.V(3)^0, m.P"" }, "rule '1'") -- inf. loop | ||
926 | badgrammar({"x", x = #(m.V(1) * 'a') }, "rule '1'") -- inf. loop | ||
927 | badgrammar({ -(m.V(1) * 'a') }, "rule '1'") -- inf. loop | ||
928 | badgrammar({"x", x = m.P'a'^-1 * m.V"x"}, "rule 'x'") -- left recursive | ||
929 | badgrammar({"x", x = m.P'a' * m.V"y"^1, y = #m.P(1)}, "rule 'x'") | ||
930 | |||
931 | assert(m.match({'a' * -m.V(1)}, "aaa") == 2) | ||
932 | assert(m.match({'a' * -m.V(1)}, "aaaa") == nil) | ||
933 | |||
934 | |||
935 | -- good x bad grammars | ||
936 | m.P{ ('a' * m.V(1))^-1 } | ||
937 | m.P{ -('a' * m.V(1)) } | ||
938 | m.P{ ('abc' * m.V(1))^-1 } | ||
939 | m.P{ -('abc' * m.V(1)) } | ||
940 | badgrammar{ #m.P('abc') * m.V(1) } | ||
941 | badgrammar{ -('a' + m.V(1)) } | ||
942 | m.P{ #('a' * m.V(1)) } | ||
943 | badgrammar{ #('a' + m.V(1)) } | ||
944 | m.P{ m.B{ m.P'abc' } * 'a' * m.V(1) } | ||
945 | badgrammar{ m.B{ m.P'abc' } * m.V(1) } | ||
946 | badgrammar{ ('a' + m.P'bcd')^-1 * m.V(1) } | ||
947 | |||
948 | |||
949 | -- simple tests for maximum sizes: | ||
950 | local p = m.P"a" | ||
951 | for i=1,14 do p = p * p end | ||
952 | |||
953 | p = {} | ||
954 | for i=1,100 do p[i] = m.P"a" end | ||
955 | p = m.P(p) | ||
956 | |||
957 | |||
958 | -- strange values for rule labels | ||
959 | |||
960 | p = m.P{ "print", | ||
961 | print = m.V(print), | ||
962 | [print] = m.V(_G), | ||
963 | [_G] = m.P"a", | ||
964 | } | ||
965 | |||
966 | assert(p:match("a")) | ||
967 | |||
968 | -- initial rule | ||
969 | g = {} | ||
970 | for i = 1, 10 do g["i"..i] = "a" * m.V("i"..i+1) end | ||
971 | g.i11 = m.P"" | ||
972 | for i = 1, 10 do | ||
973 | g[1] = "i"..i | ||
974 | local p = m.P(g) | ||
975 | assert(p:match("aaaaaaaaaaa") == 11 - i + 1) | ||
976 | end | ||
977 | |||
978 | print"+" | ||
979 | |||
980 | |||
981 | -- tests for back references | ||
982 | checkerr("back reference 'x' not found", m.match, m.Cb('x'), '') | ||
983 | checkerr("back reference 'b' not found", m.match, m.Cg(1, 'a') * m.Cb('b'), 'a') | ||
984 | |||
985 | p = m.Cg(m.C(1) * m.C(1), "k") * m.Ct(m.Cb("k")) | ||
986 | t = p:match("ab") | ||
987 | checkeq(t, {"a", "b"}) | ||
988 | |||
989 | p = m.P(true) | ||
990 | for i = 1, 10 do p = p * m.Cg(1, i) end | ||
991 | for i = 1, 10 do | ||
992 | local p = p * m.Cb(i) | ||
993 | assert(p:match('abcdefghij') == string.sub('abcdefghij', i, i)) | ||
994 | end | ||
995 | |||
996 | |||
997 | t = {} | ||
998 | function foo (p) t[#t + 1] = p; return p .. "x" end | ||
999 | |||
1000 | p = m.Cg(m.C(2) / foo, "x") * m.Cb"x" * | ||
1001 | m.Cg(m.Cb('x') / foo, "x") * m.Cb"x" * | ||
1002 | m.Cg(m.Cb('x') / foo, "x") * m.Cb"x" * | ||
1003 | m.Cg(m.Cb('x') / foo, "x") * m.Cb"x" | ||
1004 | x = {p:match'ab'} | ||
1005 | checkeq(x, {'abx', 'abxx', 'abxxx', 'abxxxx'}) | ||
1006 | checkeq(t, {'ab', | ||
1007 | 'ab', 'abx', | ||
1008 | 'ab', 'abx', 'abxx', | ||
1009 | 'ab', 'abx', 'abxx', 'abxxx'}) | ||
1010 | |||
1011 | |||
1012 | |||
1013 | -- tests for match-time captures | ||
1014 | |||
1015 | p = m.P'a' * (function (s, i) return (s:sub(i, i) == 'b') and i + 1 end) | ||
1016 | + 'acd' | ||
1017 | |||
1018 | assert(p:match('abc') == 3) | ||
1019 | assert(p:match('acd') == 4) | ||
1020 | |||
1021 | local function id (s, i, ...) | ||
1022 | return true, ... | ||
1023 | end | ||
1024 | |||
1025 | assert(m.Cmt(m.Cs((m.Cmt(m.S'abc' / { a = 'x', c = 'y' }, id) + | ||
1026 | m.R'09'^1 / string.char + | ||
1027 | m.P(1))^0), id):match"acb98+68c" == "xyb\98+\68y") | ||
1028 | |||
1029 | p = m.P{'S', | ||
1030 | S = m.V'atom' * space | ||
1031 | + m.Cmt(m.Ct("(" * space * (m.Cmt(m.V'S'^1, id) + m.P(true)) * ")" * space), id), | ||
1032 | atom = m.Cmt(m.C(m.R("AZ", "az", "09")^1), id) | ||
1033 | } | ||
1034 | x = p:match"(a g () ((b) c) (d (e)))" | ||
1035 | checkeq(x, {'a', 'g', {}, {{'b'}, 'c'}, {'d', {'e'}}}); | ||
1036 | |||
1037 | x = {(m.Cmt(1, id)^0):match(string.rep('a', 500))} | ||
1038 | assert(#x == 500) | ||
1039 | |||
1040 | local function id(s, i, x) | ||
1041 | if x == 'a' then return i, 1, 3, 7 | ||
1042 | else return nil, 2, 4, 6, 8 | ||
1043 | end | ||
1044 | end | ||
1045 | |||
1046 | p = ((m.P(id) * 1 + m.Cmt(2, id) * 1 + m.Cmt(1, id) * 1))^0 | ||
1047 | assert(table.concat{p:match('abababab')} == string.rep('137', 4)) | ||
1048 | |||
1049 | local function ref (s, i, x) | ||
1050 | return m.match(x, s, i - x:len()) | ||
1051 | end | ||
1052 | |||
1053 | assert(m.Cmt(m.P(1)^0, ref):match('alo') == 4) | ||
1054 | assert((m.P(1) * m.Cmt(m.P(1)^0, ref)):match('alo') == 4) | ||
1055 | assert(not (m.P(1) * m.Cmt(m.C(1)^0, ref)):match('alo')) | ||
1056 | |||
1057 | ref = function (s,i,x) return i == tonumber(x) and i, 'xuxu' end | ||
1058 | |||
1059 | assert(m.Cmt(1, ref):match'2') | ||
1060 | assert(not m.Cmt(1, ref):match'1') | ||
1061 | assert(m.Cmt(m.P(1)^0, ref):match'03') | ||
1062 | |||
1063 | function ref (s, i, a, b) | ||
1064 | if a == b then return i, a:upper() end | ||
1065 | end | ||
1066 | |||
1067 | p = m.Cmt(m.C(m.R"az"^1) * "-" * m.C(m.R"az"^1), ref) | ||
1068 | p = (any - p)^0 * p * any^0 * -1 | ||
1069 | |||
1070 | assert(p:match'abbbc-bc ddaa' == 'BC') | ||
1071 | |||
1072 | do -- match-time captures cannot be optimized away | ||
1073 | local touch = 0 | ||
1074 | f = m.P(function () touch = touch + 1; return true end) | ||
1075 | |||
1076 | local function check(n) n = n or 1; assert(touch == n); touch = 0 end | ||
1077 | |||
1078 | assert(m.match(f * false + 'b', 'a') == nil); check() | ||
1079 | assert(m.match(f * false + 'b', '') == nil); check() | ||
1080 | assert(m.match( (f * 'a')^0 * 'b', 'b') == 2); check() | ||
1081 | assert(m.match( (f * 'a')^0 * 'b', '') == nil); check() | ||
1082 | assert(m.match( (f * 'a')^-1 * 'b', 'b') == 2); check() | ||
1083 | assert(m.match( (f * 'a')^-1 * 'b', '') == nil); check() | ||
1084 | assert(m.match( ('b' + f * 'a')^-1 * 'b', '') == nil); check() | ||
1085 | assert(m.match( (m.P'b'^-1 * f * 'a')^-1 * 'b', '') == nil); check() | ||
1086 | assert(m.match( (-m.P(1) * m.P'b'^-1 * f * 'a')^-1 * 'b', '') == nil); | ||
1087 | check() | ||
1088 | assert(m.match( (f * 'a' + 'b')^-1 * 'b', '') == nil); check() | ||
1089 | assert(m.match(f * 'a' + f * 'b', 'b') == 2); check(2) | ||
1090 | assert(m.match(f * 'a' + f * 'b', 'a') == 2); check(1) | ||
1091 | assert(m.match(-f * 'a' + 'b', 'b') == 2); check(1) | ||
1092 | assert(m.match(-f * 'a' + 'b', '') == nil); check(1) | ||
1093 | end | ||
1094 | |||
1095 | c = '[' * m.Cg(m.P'='^0, "init") * '[' * | ||
1096 | { m.Cmt(']' * m.C(m.P'='^0) * ']' * m.Cb("init"), function (_, _, s1, s2) | ||
1097 | return s1 == s2 end) | ||
1098 | + 1 * m.V(1) } / 0 | ||
1099 | |||
1100 | assert(c:match'[==[]]====]]]]==]===[]' == 18) | ||
1101 | assert(c:match'[[]=]====]=]]]==]===[]' == 14) | ||
1102 | assert(not c:match'[[]=]====]=]=]==]===[]') | ||
1103 | |||
1104 | |||
1105 | -- old bug: optimization of concat with fail removed match-time capture | ||
1106 | p = m.Cmt(0, function (s) p = s end) * m.P(false) | ||
1107 | assert(not p:match('alo')) | ||
1108 | assert(p == 'alo') | ||
1109 | |||
1110 | |||
1111 | -- ensure that failed match-time captures are not kept on Lua stack | ||
1112 | do | ||
1113 | local t = {__mode = "kv"}; setmetatable(t,t) | ||
1114 | local c = 0 | ||
1115 | |||
1116 | local function foo (s,i) | ||
1117 | collectgarbage(); | ||
1118 | assert(next(t) == "__mode" and next(t, "__mode") == nil) | ||
1119 | local x = {} | ||
1120 | t[x] = true | ||
1121 | c = c + 1 | ||
1122 | return i, x | ||
1123 | end | ||
1124 | |||
1125 | local p = m.P{ m.Cmt(0, foo) * m.P(false) + m.P(1) * m.V(1) + m.P"" } | ||
1126 | p:match(string.rep('1', 10)) | ||
1127 | assert(c == 11) | ||
1128 | end | ||
1129 | |||
1130 | |||
1131 | -- Return a match-time capture that returns 'n' captures | ||
1132 | local function manyCmt (n) | ||
1133 | return m.Cmt("a", function () | ||
1134 | local a = {}; for i = 1, n do a[i] = n - i end | ||
1135 | return true, unpack(a) | ||
1136 | end) | ||
1137 | end | ||
1138 | |||
1139 | -- bug in 1.0: failed match-time that used previous match-time results | ||
1140 | do | ||
1141 | local x | ||
1142 | local function aux (...) x = #{...}; return false end | ||
1143 | local res = {m.match(m.Cmt(manyCmt(20), aux) + manyCmt(10), "a")} | ||
1144 | assert(#res == 10 and res[1] == 9 and res[10] == 0) | ||
1145 | end | ||
1146 | |||
1147 | |||
1148 | -- bug in 1.0: problems with math-times returning too many captures | ||
1149 | do | ||
1150 | local lim = 2^11 - 10 | ||
1151 | local res = {m.match(manyCmt(lim), "a")} | ||
1152 | assert(#res == lim and res[1] == lim - 1 and res[lim] == 0) | ||
1153 | checkerr("too many", m.match, manyCmt(2^15), "a") | ||
1154 | end | ||
1155 | |||
1156 | p = (m.P(function () return true, "a" end) * 'a' | ||
1157 | + m.P(function (s, i) return i, "aa", 20 end) * 'b' | ||
1158 | + m.P(function (s,i) if i <= #s then return i, "aaa" end end) * 1)^0 | ||
1159 | |||
1160 | t = {p:match('abacc')} | ||
1161 | checkeq(t, {'a', 'aa', 20, 'a', 'aaa', 'aaa'}) | ||
1162 | |||
1163 | |||
1164 | ------------------------------------------------------------------- | ||
1165 | -- Tests for 're' module | ||
1166 | ------------------------------------------------------------------- | ||
1167 | |||
1168 | local re = require "re" | ||
1169 | |||
1170 | local match, compile = re.match, re.compile | ||
1171 | |||
1172 | |||
1173 | |||
1174 | assert(match("a", ".") == 2) | ||
1175 | assert(match("a", "''") == 1) | ||
1176 | assert(match("", " ! . ") == 1) | ||
1177 | assert(not match("a", " ! . ")) | ||
1178 | assert(match("abcde", " ( . . ) * ") == 5) | ||
1179 | assert(match("abbcde", " [a-c] +") == 5) | ||
1180 | assert(match("0abbc1de", "'0' [a-c]+ '1'") == 7) | ||
1181 | assert(match("0zz1dda", "'0' [^a-c]+ 'a'") == 8) | ||
1182 | assert(match("abbc--", " [a-c] + +") == 5) | ||
1183 | assert(match("abbc--", " [ac-] +") == 2) | ||
1184 | assert(match("abbc--", " [-acb] + ") == 7) | ||
1185 | assert(not match("abbcde", " [b-z] + ")) | ||
1186 | assert(match("abb\"de", '"abb"["]"de"') == 7) | ||
1187 | assert(match("abceeef", "'ac' ? 'ab' * 'c' { 'e' * } / 'abceeef' ") == "eee") | ||
1188 | assert(match("abceeef", "'ac'? 'ab'* 'c' { 'f'+ } / 'abceeef' ") == 8) | ||
1189 | |||
1190 | assert(re.match("aaand", "[a]^2") == 3) | ||
1191 | |||
1192 | local t = {match("abceefe", "( ( & 'e' {} ) ? . ) * ")} | ||
1193 | checkeq(t, {4, 5, 7}) | ||
1194 | local t = {match("abceefe", "((&&'e' {})? .)*")} | ||
1195 | checkeq(t, {4, 5, 7}) | ||
1196 | local t = {match("abceefe", "( ( ! ! 'e' {} ) ? . ) *")} | ||
1197 | checkeq(t, {4, 5, 7}) | ||
1198 | local t = {match("abceefe", "(( & ! & ! 'e' {})? .)*")} | ||
1199 | checkeq(t, {4, 5, 7}) | ||
1200 | |||
1201 | assert(match("cccx" , "'ab'? ('ccc' / ('cde' / 'cd'*)? / 'ccc') 'x'+") == 5) | ||
1202 | assert(match("cdx" , "'ab'? ('ccc' / ('cde' / 'cd'*)? / 'ccc') 'x'+") == 4) | ||
1203 | assert(match("abcdcdx" , "'ab'? ('ccc' / ('cde' / 'cd'*)? / 'ccc') 'x'+") == 8) | ||
1204 | |||
1205 | assert(match("abc", "a <- (. a)?") == 4) | ||
1206 | b = "balanced <- '(' ([^()] / balanced)* ')'" | ||
1207 | assert(match("(abc)", b)) | ||
1208 | assert(match("(a(b)((c) (d)))", b)) | ||
1209 | assert(not match("(a(b ((c) (d)))", b)) | ||
1210 | |||
1211 | b = compile[[ balanced <- "(" ([^()] / balanced)* ")" ]] | ||
1212 | assert(b == m.P(b)) | ||
1213 | assert(b:match"((((a))(b)))") | ||
1214 | |||
1215 | local g = [[ | ||
1216 | S <- "0" B / "1" A / "" -- balanced strings | ||
1217 | A <- "0" S / "1" A A -- one more 0 | ||
1218 | B <- "1" S / "0" B B -- one more 1 | ||
1219 | ]] | ||
1220 | assert(match("00011011", g) == 9) | ||
1221 | |||
1222 | local g = [[ | ||
1223 | S <- ("0" B / "1" A)* | ||
1224 | A <- "0" / "1" A A | ||
1225 | B <- "1" / "0" B B | ||
1226 | ]] | ||
1227 | assert(match("00011011", g) == 9) | ||
1228 | assert(match("000110110", g) == 9) | ||
1229 | assert(match("011110110", g) == 3) | ||
1230 | assert(match("000110010", g) == 1) | ||
1231 | |||
1232 | s = "aaaaaaaaaaaaaaaaaaaaaaaa" | ||
1233 | assert(match(s, "'a'^3") == 4) | ||
1234 | assert(match(s, "'a'^0") == 1) | ||
1235 | assert(match(s, "'a'^+3") == s:len() + 1) | ||
1236 | assert(not match(s, "'a'^+30")) | ||
1237 | assert(match(s, "'a'^-30") == s:len() + 1) | ||
1238 | assert(match(s, "'a'^-5") == 6) | ||
1239 | for i = 1, s:len() do | ||
1240 | assert(match(s, string.format("'a'^+%d", i)) >= i + 1) | ||
1241 | assert(match(s, string.format("'a'^-%d", i)) <= i + 1) | ||
1242 | assert(match(s, string.format("'a'^%d", i)) == i + 1) | ||
1243 | end | ||
1244 | assert(match("01234567890123456789", "[0-9]^3+") == 19) | ||
1245 | |||
1246 | |||
1247 | assert(match("01234567890123456789", "({....}{...}) -> '%2%1'") == "4560123") | ||
1248 | t = match("0123456789", "{| {.}* |}") | ||
1249 | checkeq(t, {"0", "1", "2", "3", "4", "5", "6", "7", "8", "9"}) | ||
1250 | assert(match("012345", "{| (..) -> '%0%0' |}")[1] == "0101") | ||
1251 | |||
1252 | assert(match("abcdef", "( {.} {.} {.} {.} {.} ) -> 3") == "c") | ||
1253 | assert(match("abcdef", "( {:x: . :} {.} {.} {.} {.} ) -> 3") == "d") | ||
1254 | assert(match("abcdef", "( {:x: . :} {.} {.} {.} {.} ) -> 0") == 6) | ||
1255 | |||
1256 | assert(not match("abcdef", "{:x: ({.} {.} {.}) -> 2 :} =x")) | ||
1257 | assert(match("abcbef", "{:x: ({.} {.} {.}) -> 2 :} =x")) | ||
1258 | |||
1259 | eqcharset(compile"[]]", "]") | ||
1260 | eqcharset(compile"[][]", m.S"[]") | ||
1261 | eqcharset(compile"[]-]", m.S"-]") | ||
1262 | eqcharset(compile"[-]", m.S"-") | ||
1263 | eqcharset(compile"[az-]", m.S"a-z") | ||
1264 | eqcharset(compile"[-az]", m.S"a-z") | ||
1265 | eqcharset(compile"[a-z]", m.R"az") | ||
1266 | eqcharset(compile"[]['\"]", m.S[[]['"]]) | ||
1267 | |||
1268 | eqcharset(compile"[^]]", any - "]") | ||
1269 | eqcharset(compile"[^][]", any - m.S"[]") | ||
1270 | eqcharset(compile"[^]-]", any - m.S"-]") | ||
1271 | eqcharset(compile"[^]-]", any - m.S"-]") | ||
1272 | eqcharset(compile"[^-]", any - m.S"-") | ||
1273 | eqcharset(compile"[^az-]", any - m.S"a-z") | ||
1274 | eqcharset(compile"[^-az]", any - m.S"a-z") | ||
1275 | eqcharset(compile"[^a-z]", any - m.R"az") | ||
1276 | eqcharset(compile"[^]['\"]", any - m.S[[]['"]]) | ||
1277 | |||
1278 | -- tests for comments in 're' | ||
1279 | e = compile[[ | ||
1280 | A <- _B -- \t \n %nl .<> <- -> -- | ||
1281 | _B <- 'x' --]] | ||
1282 | assert(e:match'xy' == 2) | ||
1283 | |||
1284 | -- tests for 're' with pre-definitions | ||
1285 | defs = {digits = m.R"09", letters = m.R"az", _=m.P"__"} | ||
1286 | e = compile("%letters (%letters / %digits)*", defs) | ||
1287 | assert(e:match"x123" == 5) | ||
1288 | e = compile("%_", defs) | ||
1289 | assert(e:match"__" == 3) | ||
1290 | |||
1291 | e = compile([[ | ||
1292 | S <- A+ | ||
1293 | A <- %letters+ B | ||
1294 | B <- %digits+ | ||
1295 | ]], defs) | ||
1296 | |||
1297 | e = compile("{[0-9]+'.'?[0-9]*} -> sin", math) | ||
1298 | assert(e:match("2.34") == math.sin(2.34)) | ||
1299 | |||
1300 | |||
1301 | function eq (_, _, a, b) return a == b end | ||
1302 | |||
1303 | c = re.compile([[ | ||
1304 | longstring <- '[' {:init: '='* :} '[' close | ||
1305 | close <- ']' =init ']' / . close | ||
1306 | ]]) | ||
1307 | |||
1308 | assert(c:match'[==[]]===]]]]==]===[]' == 17) | ||
1309 | assert(c:match'[[]=]====]=]]]==]===[]' == 14) | ||
1310 | assert(not c:match'[[]=]====]=]=]==]===[]') | ||
1311 | |||
1312 | c = re.compile" '[' {:init: '='* :} '[' (!(']' =init ']') .)* ']' =init ']' !. " | ||
1313 | |||
1314 | assert(c:match'[==[]]===]]]]==]') | ||
1315 | assert(c:match'[[]=]====]=][]==]===[]]') | ||
1316 | assert(not c:match'[[]=]====]=]=]==]===[]') | ||
1317 | |||
1318 | assert(re.find("hi alalo", "{:x:..:} =x") == 4) | ||
1319 | assert(re.find("hi alalo", "{:x:..:} =x", 4) == 4) | ||
1320 | assert(not re.find("hi alalo", "{:x:..:} =x", 5)) | ||
1321 | assert(re.find("hi alalo", "{'al'}", 5) == 6) | ||
1322 | assert(re.find("hi aloalolo", "{:x:..:} =x") == 8) | ||
1323 | assert(re.find("alo alohi x x", "{:word:%w+:}%W*(=word)!%w") == 11) | ||
1324 | |||
1325 | -- re.find discards any captures | ||
1326 | local a,b,c = re.find("alo", "{.}{'o'}") | ||
1327 | assert(a == 2 and b == 3 and c == nil) | ||
1328 | |||
1329 | local function match (s,p) | ||
1330 | local i,e = re.find(s,p) | ||
1331 | if i then return s:sub(i, e) end | ||
1332 | end | ||
1333 | assert(match("alo alo", '[a-z]+') == "alo") | ||
1334 | assert(match("alo alo", '{:x: [a-z]+ :} =x') == nil) | ||
1335 | assert(match("alo alo", "{:x: [a-z]+ :} ' ' =x") == "alo alo") | ||
1336 | |||
1337 | assert(re.gsub("alo alo", "[abc]", "x") == "xlo xlo") | ||
1338 | assert(re.gsub("alo alo", "%w+", ".") == ". .") | ||
1339 | assert(re.gsub("hi, how are you", "[aeiou]", string.upper) == | ||
1340 | "hI, hOw ArE yOU") | ||
1341 | |||
1342 | s = 'hi [[a comment[=]=] ending here]] and [=[another]]=]]' | ||
1343 | c = re.compile" '[' {:i: '='* :} '[' (!(']' =i ']') .)* ']' { =i } ']' " | ||
1344 | assert(re.gsub(s, c, "%2") == 'hi and =]') | ||
1345 | assert(re.gsub(s, c, "%0") == s) | ||
1346 | assert(re.gsub('[=[hi]=]', c, "%2") == '=') | ||
1347 | |||
1348 | assert(re.find("", "!.") == 1) | ||
1349 | assert(re.find("alo", "!.") == 4) | ||
1350 | |||
1351 | function addtag (s, i, t, tag) t.tag = tag; return i, t end | ||
1352 | |||
1353 | c = re.compile([[ | ||
1354 | doc <- block !. | ||
1355 | block <- (start {| (block / { [^<]+ })* |} end?) => addtag | ||
1356 | start <- '<' {:tag: [a-z]+ :} '>' | ||
1357 | end <- '</' { =tag } '>' | ||
1358 | ]], {addtag = addtag}) | ||
1359 | |||
1360 | x = c:match[[ | ||
1361 | <x>hi<b>hello</b>but<b>totheend</x>]] | ||
1362 | checkeq(x, {tag='x', 'hi', {tag = 'b', 'hello'}, 'but', | ||
1363 | {'totheend'}}) | ||
1364 | |||
1365 | |||
1366 | -- test for folding captures | ||
1367 | c = re.compile([[ | ||
1368 | S <- (number (%s+ number)*) ~> add | ||
1369 | number <- %d+ -> tonumber | ||
1370 | ]], {tonumber = tonumber, add = function (a,b) return a + b end}) | ||
1371 | assert(c:match("3 401 50") == 3 + 401 + 50) | ||
1372 | |||
1373 | -- tests for look-ahead captures | ||
1374 | x = {re.match("alo", "&(&{.}) !{'b'} {&(...)} &{..} {...} {!.}")} | ||
1375 | checkeq(x, {"", "alo", ""}) | ||
1376 | |||
1377 | assert(re.match("aloalo", | ||
1378 | "{~ (((&'al' {.}) -> 'A%1' / (&%l {.}) -> '%1%1') / .)* ~}") | ||
1379 | == "AallooAalloo") | ||
1380 | |||
1381 | -- bug in 0.9 (and older versions), due to captures in look-aheads | ||
1382 | x = re.compile[[ {~ (&(. ([a-z]* -> '*')) ([a-z]+ -> '+') ' '*)* ~} ]] | ||
1383 | assert(x:match"alo alo" == "+ +") | ||
1384 | |||
1385 | -- valid capture in look-ahead (used inside the look-ahead itself) | ||
1386 | x = re.compile[[ | ||
1387 | S <- &({:two: .. :} . =two) {[a-z]+} / . S | ||
1388 | ]] | ||
1389 | assert(x:match("hello aloaLo aloalo xuxu") == "aloalo") | ||
1390 | |||
1391 | |||
1392 | p = re.compile[[ | ||
1393 | block <- {| {:ident:space*:} line | ||
1394 | ((=ident !space line) / &(=ident space) block)* |} | ||
1395 | line <- {[^%nl]*} %nl | ||
1396 | space <- '_' -- should be ' ', but '_' is simpler for editors | ||
1397 | ]] | ||
1398 | |||
1399 | t= p:match[[ | ||
1400 | 1 | ||
1401 | __1.1 | ||
1402 | __1.2 | ||
1403 | ____1.2.1 | ||
1404 | ____ | ||
1405 | 2 | ||
1406 | __2.1 | ||
1407 | ]] | ||
1408 | checkeq(t, {"1", {"1.1", "1.2", {"1.2.1", "", ident = "____"}, ident = "__"}, | ||
1409 | "2", {"2.1", ident = "__"}, ident = ""}) | ||
1410 | |||
1411 | |||
1412 | -- nested grammars | ||
1413 | p = re.compile[[ | ||
1414 | s <- a b !. | ||
1415 | b <- ( x <- ('b' x)? ) | ||
1416 | a <- ( x <- 'a' x? ) | ||
1417 | ]] | ||
1418 | |||
1419 | assert(p:match'aaabbb') | ||
1420 | assert(p:match'aaa') | ||
1421 | assert(not p:match'bbb') | ||
1422 | assert(not p:match'aaabbba') | ||
1423 | |||
1424 | -- testing groups | ||
1425 | t = {re.match("abc", "{:S <- {:.:} {S} / '':}")} | ||
1426 | checkeq(t, {"a", "bc", "b", "c", "c", ""}) | ||
1427 | |||
1428 | t = re.match("1234", "{| {:a:.:} {:b:.:} {:c:.{.}:} |}") | ||
1429 | checkeq(t, {a="1", b="2", c="4"}) | ||
1430 | t = re.match("1234", "{|{:a:.:} {:b:{.}{.}:} {:c:{.}:}|}") | ||
1431 | checkeq(t, {a="1", b="2", c="4"}) | ||
1432 | t = re.match("12345", "{| {:.:} {:b:{.}{.}:} {:{.}{.}:} |}") | ||
1433 | checkeq(t, {"1", b="2", "4", "5"}) | ||
1434 | t = re.match("12345", "{| {:.:} {:{:b:{.}{.}:}:} {:{.}{.}:} |}") | ||
1435 | checkeq(t, {"1", "23", "4", "5"}) | ||
1436 | t = re.match("12345", "{| {:.:} {{:b:{.}{.}:}} {:{.}{.}:} |}") | ||
1437 | checkeq(t, {"1", "23", "4", "5"}) | ||
1438 | |||
1439 | |||
1440 | -- testing pre-defined names | ||
1441 | assert(os.setlocale("C") == "C") | ||
1442 | |||
1443 | function eqlpeggsub (p1, p2) | ||
1444 | local s1 = cs2str(re.compile(p1)) | ||
1445 | local s2 = string.gsub(allchar, "[^" .. p2 .. "]", "") | ||
1446 | -- if s1 ~= s2 then print(#s1,#s2) end | ||
1447 | assert(s1 == s2) | ||
1448 | end | ||
1449 | |||
1450 | |||
1451 | eqlpeggsub("%w", "%w") | ||
1452 | eqlpeggsub("%a", "%a") | ||
1453 | eqlpeggsub("%l", "%l") | ||
1454 | eqlpeggsub("%u", "%u") | ||
1455 | eqlpeggsub("%p", "%p") | ||
1456 | eqlpeggsub("%d", "%d") | ||
1457 | eqlpeggsub("%x", "%x") | ||
1458 | eqlpeggsub("%s", "%s") | ||
1459 | eqlpeggsub("%c", "%c") | ||
1460 | |||
1461 | eqlpeggsub("%W", "%W") | ||
1462 | eqlpeggsub("%A", "%A") | ||
1463 | eqlpeggsub("%L", "%L") | ||
1464 | eqlpeggsub("%U", "%U") | ||
1465 | eqlpeggsub("%P", "%P") | ||
1466 | eqlpeggsub("%D", "%D") | ||
1467 | eqlpeggsub("%X", "%X") | ||
1468 | eqlpeggsub("%S", "%S") | ||
1469 | eqlpeggsub("%C", "%C") | ||
1470 | |||
1471 | eqlpeggsub("[%w]", "%w") | ||
1472 | eqlpeggsub("[_%w]", "_%w") | ||
1473 | eqlpeggsub("[^%w]", "%W") | ||
1474 | eqlpeggsub("[%W%S]", "%W%S") | ||
1475 | |||
1476 | re.updatelocale() | ||
1477 | |||
1478 | |||
1479 | -- testing nested substitutions x string captures | ||
1480 | |||
1481 | p = re.compile[[ | ||
1482 | text <- {~ item* ~} | ||
1483 | item <- macro / [^()] / '(' item* ')' | ||
1484 | arg <- ' '* {~ (!',' item)* ~} | ||
1485 | args <- '(' arg (',' arg)* ')' | ||
1486 | macro <- ('apply' args) -> '%1(%2)' | ||
1487 | / ('add' args) -> '%1 + %2' | ||
1488 | / ('mul' args) -> '%1 * %2' | ||
1489 | ]] | ||
1490 | |||
1491 | assert(p:match"add(mul(a,b), apply(f,x))" == "a * b + f(x)") | ||
1492 | |||
1493 | rev = re.compile[[ R <- (!.) -> '' / ({.} R) -> '%2%1']] | ||
1494 | |||
1495 | assert(rev:match"0123456789" == "9876543210") | ||
1496 | |||
1497 | |||
1498 | -- testing error messages in re | ||
1499 | |||
1500 | local function errmsg (p, err) | ||
1501 | checkerr(err, re.compile, p) | ||
1502 | end | ||
1503 | |||
1504 | errmsg('aaaa', "rule 'aaaa'") | ||
1505 | errmsg('a', 'outside') | ||
1506 | errmsg('b <- a', 'undefined') | ||
1507 | errmsg("x <- 'a' x <- 'b'", 'already defined') | ||
1508 | errmsg("'a' -", "near '-'") | ||
1509 | |||
1510 | |||
1511 | print"OK" | ||
1512 | |||
1513 | |||