aboutsummaryrefslogtreecommitdiff
path: root/test.lua
diff options
context:
space:
mode:
Diffstat (limited to 'test.lua')
-rwxr-xr-xtest.lua1386
1 files changed, 1386 insertions, 0 deletions
diff --git a/test.lua b/test.lua
new file mode 100755
index 0000000..d486c03
--- /dev/null
+++ b/test.lua
@@ -0,0 +1,1386 @@
1#!/usr/bin/env lua5.1
2
3-- $Id: test.lua,v 1.101 2013/04/12 16:30:33 roberto Exp $
4
5-- require"strict" -- just to be pedantic
6
7local m = require"lpeglabel"
8
9
10-- for general use
11local a, b, c, d, e, f, g, p, t
12
13
14-- compatibility with Lua 5.2
15local unpack = rawget(table, "unpack") or unpack
16local loadstring = rawget(_G, "loadstring") or load
17
18
19-- most tests here do not need much stack space
20m.setmaxstack(5)
21
22local any = m.P(1)
23local space = m.S" \t\n"^0
24
25local function checkeq (x, y, p)
26if p then print(x,y) end
27 if type(x) ~= "table" then assert(x == y)
28 else
29 for k,v in pairs(x) do checkeq(v, y[k], p) end
30 for k,v in pairs(y) do checkeq(v, x[k], p) end
31 end
32end
33
34
35local mt = getmetatable(m.P(1))
36
37
38local allchar = {}
39for i=0,255 do allchar[i + 1] = i end
40allchar = string.char(unpack(allchar))
41assert(#allchar == 256)
42
43local function cs2str (c)
44 return m.match(m.Cs((c + m.P(1)/"")^0), allchar)
45end
46
47local function eqcharset (c1, c2)
48 assert(cs2str(c1) == cs2str(c2))
49end
50
51
52print"General tests for LPeg library"
53
54assert(type(m.version()) == "string")
55print("version " .. m.version())
56assert(m.type("alo") ~= "pattern")
57assert(m.type(io.input) ~= "pattern")
58assert(m.type(m.P"alo") == "pattern")
59
60-- tests for some basic optimizations
61assert(m.match(m.P(false) + "a", "a") == 2)
62assert(m.match(m.P(true) + "a", "a") == 1)
63assert(m.match("a" + m.P(false), "b") == nil)
64assert(m.match("a" + m.P(true), "b") == 1)
65
66assert(m.match(m.P(false) * "a", "a") == nil)
67assert(m.match(m.P(true) * "a", "a") == 2)
68assert(m.match("a" * m.P(false), "a") == nil)
69assert(m.match("a" * m.P(true), "a") == 2)
70
71assert(m.match(#m.P(false) * "a", "a") == nil)
72assert(m.match(#m.P(true) * "a", "a") == 2)
73assert(m.match("a" * #m.P(false), "a") == nil)
74assert(m.match("a" * #m.P(true), "a") == 2)
75
76
77-- tests for locale
78do
79 assert(m.locale(m) == m)
80 local t = {}
81 assert(m.locale(t, m) == t)
82 local x = m.locale()
83 for n,v in pairs(x) do
84 assert(type(n) == "string")
85 eqcharset(v, m[n])
86 end
87end
88
89
90assert(m.match(3, "aaaa"))
91assert(m.match(4, "aaaa"))
92assert(not m.match(5, "aaaa"))
93assert(m.match(-3, "aa"))
94assert(not m.match(-3, "aaa"))
95assert(not m.match(-3, "aaaa"))
96assert(not m.match(-4, "aaaa"))
97assert(m.P(-5):match"aaaa")
98
99assert(m.match("a", "alo") == 2)
100assert(m.match("al", "alo") == 3)
101assert(not m.match("alu", "alo"))
102assert(m.match(true, "") == 1)
103
104local digit = m.S"0123456789"
105local upper = m.S"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
106local lower = m.S"abcdefghijklmnopqrstuvwxyz"
107local letter = m.S"" + upper + lower
108local alpha = letter + digit + m.R()
109
110eqcharset(m.S"", m.P(false))
111eqcharset(upper, m.R("AZ"))
112eqcharset(lower, m.R("az"))
113eqcharset(upper + lower, m.R("AZ", "az"))
114eqcharset(upper + lower, m.R("AZ", "cz", "aa", "bb", "90"))
115eqcharset(digit, m.S"01234567" + "8" + "9")
116eqcharset(upper, letter - lower)
117eqcharset(m.S(""), m.R())
118assert(cs2str(m.S("")) == "")
119
120eqcharset(m.S"\0", "\0")
121eqcharset(m.S"\1\0\2", m.R"\0\2")
122eqcharset(m.S"\1\0\2", m.R"\1\2" + "\0")
123eqcharset(m.S"\1\0\2" - "\0", m.R"\1\2")
124
125local word = alpha^1 * (1 - alpha)^0
126
127assert((word^0 * -1):match"alo alo")
128assert(m.match(word^1 * -1, "alo alo"))
129assert(m.match(word^2 * -1, "alo alo"))
130assert(not m.match(word^3 * -1, "alo alo"))
131
132assert(not m.match(word^-1 * -1, "alo alo"))
133assert(m.match(word^-2 * -1, "alo alo"))
134assert(m.match(word^-3 * -1, "alo alo"))
135
136local eos = m.P(-1)
137
138assert(m.match(digit^0 * letter * digit * eos, "1298a1"))
139assert(not m.match(digit^0 * letter * eos, "1257a1"))
140
141b = {
142 [1] = "(" * (((1 - m.S"()") + #m.P"(" * m.V(1))^0) * ")"
143}
144
145assert(m.match(b, "(al())()"))
146assert(not m.match(b * eos, "(al())()"))
147assert(m.match(b * eos, "((al())()(é))"))
148assert(not m.match(b, "(al()()"))
149
150assert(not m.match(letter^1 - "for", "foreach"))
151assert(m.match(letter^1 - ("for" * eos), "foreach"))
152assert(not m.match(letter^1 - ("for" * eos), "for"))
153
154function basiclookfor (p)
155 return m.P {
156 [1] = p + (1 * m.V(1))
157 }
158end
159
160function caplookfor (p)
161 return basiclookfor(p:C())
162end
163
164assert(m.match(caplookfor(letter^1), " 4achou123...") == "achou")
165a = {m.match(caplookfor(letter^1)^0, " two words, one more ")}
166checkeq(a, {"two", "words", "one", "more"})
167
168assert(m.match( basiclookfor((#m.P(b) * 1) * m.Cp()), " ( (a)") == 7)
169
170a = {m.match(m.C(digit^1 * m.Cc"d") + m.C(letter^1 * m.Cc"l"), "123")}
171checkeq(a, {"123", "d"})
172
173a = {m.match(m.C(digit^1) * "d" * -1 + m.C(letter^1 * m.Cc"l"), "123d")}
174checkeq(a, {"123"})
175
176a = {m.match(m.C(digit^1 * m.Cc"d") + m.C(letter^1 * m.Cc"l"), "abcd")}
177checkeq(a, {"abcd", "l"})
178
179a = {m.match(m.Cc(10,20,30) * 'a' * m.Cp(), 'aaa')}
180checkeq(a, {10,20,30,2})
181a = {m.match(m.Cp() * m.Cc(10,20,30) * 'a' * m.Cp(), 'aaa')}
182checkeq(a, {1,10,20,30,2})
183a = m.match(m.Ct(m.Cp() * m.Cc(10,20,30) * 'a' * m.Cp()), 'aaa')
184checkeq(a, {1,10,20,30,2})
185a = m.match(m.Ct(m.Cp() * m.Cc(7,8) * m.Cc(10,20,30) * 'a' * m.Cp()), 'aaa')
186checkeq(a, {1,7,8,10,20,30,2})
187a = {m.match(m.Cc() * m.Cc() * m.Cc(1) * m.Cc(2,3,4) * m.Cc() * 'a', 'aaa')}
188checkeq(a, {1,2,3,4})
189
190a = {m.match(m.Cp() * letter^1 * m.Cp(), "abcd")}
191checkeq(a, {1, 5})
192
193
194t = {m.match({[1] = m.C(m.C(1) * m.V(1) + -1)}, "abc")}
195checkeq(t, {"abc", "a", "bc", "b", "c", "c", ""})
196
197
198-- test for small capture boundary
199for i = 250,260 do
200 assert(#m.match(m.C(i), string.rep('a', i)) == i)
201 assert(#m.match(m.C(m.C(i)), string.rep('a', i)) == i)
202end
203
204
205-- tests for any*n and any*-n
206for n = 1, 550 do
207 local x_1 = string.rep('x', n - 1)
208 local x = x_1 .. 'a'
209 assert(not m.P(n):match(x_1))
210 assert(m.P(n):match(x) == n + 1)
211 assert(n < 4 or m.match(m.P(n) + "xxx", x_1) == 4)
212 assert(m.C(n):match(x) == x)
213 assert(m.C(m.C(n)):match(x) == x)
214 assert(m.P(-n):match(x_1) == 1)
215 assert(not m.P(-n):match(x))
216 assert(n < 13 or m.match(m.Cc(20) * ((n - 13) * m.P(10)) * 3, x) == 20)
217 local n3 = math.floor(n/3)
218 assert(m.match(n3 * m.Cp() * n3 * n3, x) == n3 + 1)
219end
220
221-- true values
222assert(m.P(0):match("x") == 1)
223assert(m.P(0):match("") == 1)
224assert(m.C(0):match("x") == "")
225
226assert(m.match(m.Cc(0) * m.P(10) + m.Cc(1) * "xuxu", "xuxu") == 1)
227assert(m.match(m.Cc(0) * m.P(10) + m.Cc(1) * "xuxu", "xuxuxuxuxu") == 0)
228assert(m.match(m.C(m.P(2)^1), "abcde") == "abcd")
229p = m.Cc(0) * 1 + m.Cc(1) * 2 + m.Cc(2) * 3 + m.Cc(3) * 4
230
231
232-- test for alternation optimization
233assert(m.match(m.P"a"^1 + "ab" + m.P"x"^0, "ab") == 2)
234assert(m.match((m.P"a"^1 + "ab" + m.P"x"^0 * 1)^0, "ab") == 3)
235assert(m.match(m.P"ab" + "cd" + "" + "cy" + "ak", "98") == 1)
236assert(m.match(m.P"ab" + "cd" + "ax" + "cy", "ax") == 3)
237assert(m.match("a" * m.P"b"^0 * "c" + "cd" + "ax" + "cy", "ax") == 3)
238assert(m.match((m.P"ab" + "cd" + "ax" + "cy")^0, "ax") == 3)
239assert(m.match(m.P(1) * "x" + m.S"" * "xu" + "ay", "ay") == 3)
240assert(m.match(m.P"abc" + "cde" + "aka", "aka") == 4)
241assert(m.match(m.S"abc" * "x" + "cde" + "aka", "ax") == 3)
242assert(m.match(m.S"abc" * "x" + "cde" + "aka", "aka") == 4)
243assert(m.match(m.S"abc" * "x" + "cde" + "aka", "cde") == 4)
244assert(m.match(m.S"abc" * "x" + "ide" + m.S"ab" * "ka", "aka") == 4)
245assert(m.match("ab" + m.S"abc" * m.P"y"^0 * "x" + "cde" + "aka", "ax") == 3)
246assert(m.match("ab" + m.S"abc" * m.P"y"^0 * "x" + "cde" + "aka", "aka") == 4)
247assert(m.match("ab" + m.S"abc" * m.P"y"^0 * "x" + "cde" + "aka", "cde") == 4)
248assert(m.match("ab" + m.S"abc" * m.P"y"^0 * "x" + "ide" + m.S"ab" * "ka", "aka") == 4)
249assert(m.match("ab" + m.S"abc" * m.P"y"^0 * "x" + "ide" + m.S"ab" * "ka", "ax") == 3)
250assert(m.match(m.P(1) * "x" + "cde" + m.S"ab" * "ka", "aka") == 4)
251assert(m.match(m.P(1) * "x" + "cde" + m.P(1) * "ka", "aka") == 4)
252assert(m.match(m.P(1) * "x" + "cde" + m.P(1) * "ka", "cde") == 4)
253assert(m.match(m.P"eb" + "cd" + m.P"e"^0 + "x", "ee") == 3)
254assert(m.match(m.P"ab" + "cd" + m.P"e"^0 + "x", "abcd") == 3)
255assert(m.match(m.P"ab" + "cd" + m.P"e"^0 + "x", "eeex") == 4)
256assert(m.match(m.P"ab" + "cd" + m.P"e"^0 + "x", "cd") == 3)
257assert(m.match(m.P"ab" + "cd" + m.P"e"^0 + "x", "x") == 1)
258assert(m.match(m.P"ab" + "cd" + m.P"e"^0 + "x" + "", "zee") == 1)
259assert(m.match(m.P"ab" + "cd" + m.P"e"^1 + "x", "abcd") == 3)
260assert(m.match(m.P"ab" + "cd" + m.P"e"^1 + "x", "eeex") == 4)
261assert(m.match(m.P"ab" + "cd" + m.P"e"^1 + "x", "cd") == 3)
262assert(m.match(m.P"ab" + "cd" + m.P"e"^1 + "x", "x") == 2)
263assert(m.match(m.P"ab" + "cd" + m.P"e"^1 + "x" + "", "zee") == 1)
264assert(not m.match(("aa" * m.P"bc"^-1 + "aab") * "e", "aabe"))
265
266assert(m.match("alo" * (m.P"\n" + -1), "alo") == 4)
267
268
269-- bug in 0.12 (rc1)
270assert(m.match((m.P"\128\187\191" + m.S"abc")^0, "\128\187\191") == 4)
271
272assert(m.match(m.S"\0\128\255\127"^0, string.rep("\0\128\255\127", 10)) ==
273 4*10 + 1)
274
275-- optimizations with optional parts
276assert(m.match(("ab" * -m.P"c")^-1, "abc") == 1)
277assert(m.match(("ab" * #m.P"c")^-1, "abd") == 1)
278assert(m.match(("ab" * m.B"c")^-1, "ab") == 1)
279assert(m.match(("ab" * m.P"cd"^0)^-1, "abcdcdc") == 7)
280
281assert(m.match(m.P"ab"^-1 - "c", "abcd") == 3)
282
283p = ('Aa' * ('Bb' * ('Cc' * m.P'Dd'^0)^0)^0)^-1
284assert(p:match("AaBbCcDdBbCcDdDdDdBb") == 21)
285
286
287pi = "3.14159 26535 89793 23846 26433 83279 50288 41971 69399 37510"
288assert(m.match(m.Cs((m.P"1" / "a" + m.P"5" / "b" + m.P"9" / "c" + 1)^0), pi) ==
289 m.match(m.Cs((m.P(1) / {["1"] = "a", ["5"] = "b", ["9"] = "c"})^0), pi))
290print"+"
291
292
293-- tests for capture optimizations
294assert(m.match((m.P(3) + 4 * m.Cp()) * "a", "abca") == 5)
295t = {m.match(((m.P"a" + m.Cp()) * m.P"x")^0, "axxaxx")}
296checkeq(t, {3, 6})
297
298
299-- tests for numbered captures
300p = m.C(1)
301assert(m.match(m.C(m.C(p * m.C(2)) * m.C(3)) / 3, "abcdefgh") == "a")
302assert(m.match(m.C(m.C(p * m.C(2)) * m.C(3)) / 1, "abcdefgh") == "abcdef")
303assert(m.match(m.C(m.C(p * m.C(2)) * m.C(3)) / 4, "abcdefgh") == "bc")
304assert(m.match(m.C(m.C(p * m.C(2)) * m.C(3)) / 0, "abcdefgh") == 7)
305
306a, b, c = m.match(p * (m.C(p * m.C(2)) * m.C(3) / 4) * p, "abcdefgh")
307assert(a == "a" and b == "efg" and c == "h")
308
309-- test for table captures
310t = m.match(m.Ct(letter^1), "alo")
311checkeq(t, {})
312
313t, n = m.match(m.Ct(m.C(letter)^1) * m.Cc"t", "alo")
314assert(n == "t" and table.concat(t) == "alo")
315
316t = m.match(m.Ct(m.C(m.C(letter)^1)), "alo")
317assert(table.concat(t, ";") == "alo;a;l;o")
318
319t = m.match(m.Ct(m.C(m.C(letter)^1)), "alo")
320assert(table.concat(t, ";") == "alo;a;l;o")
321
322t = m.match(m.Ct(m.Ct((m.Cp() * letter * m.Cp())^1)), "alo")
323assert(table.concat(t[1], ";") == "1;2;2;3;3;4")
324
325t = m.match(m.Ct(m.C(m.C(1) * 1 * m.C(1))), "alo")
326checkeq(t, {"alo", "a", "o"})
327
328
329-- tests for groups
330p = m.Cg(1) -- no capture
331assert(p:match('x') == 'x')
332p = m.Cg(m.P(true)/function () end * 1) -- no value
333assert(p:match('x') == 'x')
334p = m.Cg(m.Cg(m.Cg(m.C(1))))
335assert(p:match('x') == 'x')
336p = m.Cg(m.Cg(m.Cg(m.C(1))^0) * m.Cg(m.Cc(1) * m.Cc(2)))
337t = {p:match'abc'}
338checkeq(t, {'a', 'b', 'c', 1, 2})
339
340p = m.Ct(m.Cg(m.Cc(10), "hi") * m.C(1)^0 * m.Cg(m.Cc(20), "ho"))
341t = p:match''
342checkeq(t, {hi = 10, ho = 20})
343t = p:match'abc'
344checkeq(t, {hi = 10, ho = 20, 'a', 'b', 'c'})
345
346
347-- test for error messages
348local function checkerr (msg, ...)
349 assert(m.match({ m.P(msg) + 1 * m.V(1) }, select(2, pcall(...))))
350end
351
352checkerr("rule '1' may be left recursive", m.match, { m.V(1) * 'a' }, "a")
353checkerr("rule '1' used outside a grammar", m.match, m.V(1), "")
354checkerr("rule 'hiii' used outside a grammar", m.match, m.V('hiii'), "")
355checkerr("rule 'hiii' undefined in given grammar", m.match, { m.V('hiii') }, "")
356checkerr("undefined in given grammar", m.match, { m.V{} }, "")
357
358checkerr("rule 'A' is not a pattern", m.P, { m.P(1), A = {} })
359checkerr("grammar has no initial rule", m.P, { [print] = {} })
360
361-- grammar with a long call chain before left recursion
362p = {'a',
363 a = m.V'b' * m.V'c' * m.V'd' * m.V'a',
364 b = m.V'c',
365 c = m.V'd',
366 d = m.V'e',
367 e = m.V'f',
368 f = m.V'g',
369 g = m.P''
370}
371checkerr("rule 'a' may be left recursive", m.match, p, "a")
372
373
374-- tests for non-pattern as arguments to pattern functions
375
376p = { ('a' * m.V(1))^-1 } * m.P'b' * { 'a' * m.V(2); m.V(1)^-1 }
377assert(m.match(p, "aaabaac") == 7)
378
379p = m.P'abc' * 2 * -5 * true * 'de' -- mix of numbers and strings and booleans
380
381assert(p:match("abc01de") == 8)
382assert(p:match("abc01de3456") == nil)
383
384p = 'abc' * (2 * (-5 * (true * m.P'de')))
385
386assert(p:match("abc01de") == 8)
387assert(p:match("abc01de3456") == nil)
388
389p = { m.V(2), m.P"abc" } *
390 (m.P{ "xx", xx = m.P"xx" } + { "x", x = m.P"a" * m.V"x" + "" })
391assert(p:match("abcaaaxx") == 7)
392assert(p:match("abcxx") == 6)
393
394
395-- a large table capture
396t = m.match(m.Ct(m.C('a')^0), string.rep("a", 10000))
397assert(#t == 10000 and t[1] == 'a' and t[#t] == 'a')
398
399print('+')
400
401
402-- bug in 0.10 (rechecking a grammar, after tail-call optimization)
403m.P{ m.P { (m.P(3) + "xuxu")^0 * m.V"xuxu", xuxu = m.P(1) } }
404
405local V = m.V
406
407local Space = m.S(" \n\t")^0
408local Number = m.C(m.R("09")^1) * Space
409local FactorOp = m.C(m.S("+-")) * Space
410local TermOp = m.C(m.S("*/")) * Space
411local Open = "(" * Space
412local Close = ")" * Space
413
414
415local function f_factor (v1, op, v2, d)
416 assert(d == nil)
417 if op == "+" then return v1 + v2
418 else return v1 - v2
419 end
420end
421
422
423local function f_term (v1, op, v2, d)
424 assert(d == nil)
425 if op == "*" then return v1 * v2
426 else return v1 / v2
427 end
428end
429
430G = m.P{ "Exp",
431 Exp = m.Cf(V"Factor" * m.Cg(FactorOp * V"Factor")^0, f_factor);
432 Factor = m.Cf(V"Term" * m.Cg(TermOp * V"Term")^0, f_term);
433 Term = Number / tonumber + Open * V"Exp" * Close;
434}
435
436G = Space * G * -1
437
438for _, s in ipairs{" 3 + 5*9 / (1+1) ", "3+4/2", "3+3-3- 9*2+3*9/1- 8"} do
439 assert(m.match(G, s) == loadstring("return "..s)())
440end
441
442
443-- test for grammars (errors deep in calling non-terminals)
444g = m.P{
445 [1] = m.V(2) + "a",
446 [2] = "a" * m.V(3) * "x",
447 [3] = "b" * m.V(3) + "c"
448}
449
450assert(m.match(g, "abbbcx") == 7)
451assert(m.match(g, "abbbbx") == 2)
452
453
454-- tests for \0
455assert(m.match(m.R("\0\1")^1, "\0\1\0") == 4)
456assert(m.match(m.S("\0\1ab")^1, "\0\1\0a") == 5)
457assert(m.match(m.P(1)^3, "\0\1\0a") == 5)
458assert(not m.match(-4, "\0\1\0a"))
459assert(m.match("\0\1\0a", "\0\1\0a") == 5)
460assert(m.match("\0\0\0", "\0\0\0") == 4)
461assert(not m.match("\0\0\0", "\0\0"))
462
463
464-- tests for predicates
465assert(not m.match(-m.P("a") * 2, "alo"))
466assert(m.match(- -m.P("a") * 2, "alo") == 3)
467assert(m.match(#m.P("a") * 2, "alo") == 3)
468assert(m.match(##m.P("a") * 2, "alo") == 3)
469assert(not m.match(##m.P("c") * 2, "alo"))
470assert(m.match(m.Cs((##m.P("a") * 1 + m.P(1)/".")^0), "aloal") == "a..a.")
471assert(m.match(m.Cs((#((#m.P"a")/"") * 1 + m.P(1)/".")^0), "aloal") == "a..a.")
472assert(m.match(m.Cs((- -m.P("a") * 1 + m.P(1)/".")^0), "aloal") == "a..a.")
473assert(m.match(m.Cs((-((-m.P"a")/"") * 1 + m.P(1)/".")^0), "aloal") == "a..a.")
474
475p = -m.P'a' * m.Cc(1) + -m.P'b' * m.Cc(2) + -m.P'c' * m.Cc(3)
476assert(p:match('a') == 2 and p:match('') == 1 and p:match('b') == 1)
477
478p = -m.P'a' * m.Cc(10) + #m.P'a' * m.Cc(20)
479assert(p:match('a') == 20 and p:match('') == 10 and p:match('b') == 10)
480
481
482
483-- look-behind predicate
484assert(not m.match(m.B'a', 'a'))
485assert(m.match(1 * m.B'a', 'a') == 2)
486assert(not m.match(m.B(1), 'a'))
487assert(m.match(1 * m.B(1), 'a') == 2)
488assert(m.match(-m.B(1), 'a') == 1)
489assert(m.match(m.B(250), string.rep('a', 250)) == nil)
490assert(m.match(250 * m.B(250), string.rep('a', 250)) == 251)
491assert(not pcall(m.B, 260))
492
493B = #letter * -m.B(letter) + -letter * m.B(letter)
494x = m.Ct({ (B * m.Cp())^-1 * (1 * m.V(1) + m.P(true)) })
495checkeq(m.match(x, 'ar cal c'), {1,3,4,7,9,10})
496checkeq(m.match(x, ' ar cal '), {2,4,5,8})
497checkeq(m.match(x, ' '), {})
498checkeq(m.match(x, 'aloalo'), {1,7})
499
500assert(m.match(B, "a") == 1)
501assert(m.match(1 * B, "a") == 2)
502assert(not m.B(1 - letter):match(""))
503assert((-m.B(letter)):match("") == 1)
504
505assert((4 * m.B(letter, 4)):match("aaaaaaaa") == 5)
506assert(not (4 * m.B(#letter * 5)):match("aaaaaaaa"))
507assert((4 * -m.B(#letter * 5)):match("aaaaaaaa") == 5)
508
509-- look-behind with grammars
510assert(m.match('a' * m.B{'x', x = m.P(3)}, 'aaa') == nil)
511assert(m.match('aa' * m.B{'x', x = m.P('aaa')}, 'aaaa') == nil)
512assert(m.match('aaa' * m.B{'x', x = m.P('aaa')}, 'aaaaa') == 4)
513
514
515
516-- bug in 0.9
517assert(m.match(('a' * #m.P'b'), "ab") == 2)
518assert(not m.match(('a' * #m.P'b'), "a"))
519
520assert(not m.match(#m.S'567', ""))
521assert(m.match(#m.S'567' * 1, "6") == 2)
522
523
524-- tests for Tail Calls
525
526--labeled failure
527p = m.P{ 'a' * m.V(1) + '' }
528assert(p:match(string.rep('a', 1000)) == 1001)
529
530-- create a grammar for a simple DFA for even number of 0s and 1s
531--
532-- ->1 <---0---> 2
533-- ^ ^
534-- | |
535-- 1 1
536-- | |
537-- V V
538-- 3 <---0---> 4
539--
540-- this grammar should keep no backtracking information
541
542p = m.P{
543 [1] = '0' * m.V(2) + '1' * m.V(3) + -1,
544 [2] = '0' * m.V(1) + '1' * m.V(4),
545 [3] = '0' * m.V(4) + '1' * m.V(1),
546 [4] = '0' * m.V(3) + '1' * m.V(2),
547}
548
549-- labeled failure
550assert(p:match(string.rep("00", 10000)))
551assert(p:match(string.rep("01", 10000)))
552assert(p:match(string.rep("011", 10000)))
553assert(not p:match(string.rep("011", 10000) .. "1"))
554assert(not p:match(string.rep("011", 10001)))
555
556
557-- this grammar does need backtracking info.
558local lim = 10000
559p = m.P{ '0' * m.V(1) + '0' }
560assert(not pcall(m.match, p, string.rep("0", lim)))
561m.setmaxstack(2*lim)
562assert(not pcall(m.match, p, string.rep("0", lim)))
563m.setmaxstack(2*lim + 4)
564assert(pcall(m.match, p, string.rep("0", lim)))
565
566-- this repetition should not need stack space (only the call does)
567p = m.P{ ('a' * m.V(1))^0 * 'b' + 'c' }
568m.setmaxstack(200)
569-- labeled failure
570assert(p:match(string.rep('a', 180) .. 'c' .. string.rep('b', 180)) == 362)
571
572m.setmaxstack(5) -- restore original limit
573
574-- tests for optional start position
575assert(m.match("a", "abc", 1))
576assert(m.match("b", "abc", 2))
577assert(m.match("c", "abc", 3))
578assert(not m.match(1, "abc", 4))
579assert(m.match("a", "abc", -3))
580assert(m.match("b", "abc", -2))
581assert(m.match("c", "abc", -1))
582assert(m.match("abc", "abc", -4)) -- truncate to position 1
583
584assert(m.match("", "abc", 10)) -- empty string is everywhere!
585assert(m.match("", "", 10))
586assert(not m.match(1, "", 1))
587assert(not m.match(1, "", -1))
588assert(not m.match(1, "", 0))
589
590print("+")
591
592
593-- tests for argument captures
594assert(not pcall(m.Carg, 0))
595assert(not pcall(m.Carg, -1))
596assert(not pcall(m.Carg, 2^18))
597assert(not pcall(m.match, m.Carg(1), 'a', 1))
598assert(m.match(m.Carg(1), 'a', 1, print) == print)
599x = {m.match(m.Carg(1) * m.Carg(2), '', 1, 10, 20)}
600checkeq(x, {10, 20})
601
602assert(m.match(m.Cmt(m.Cg(m.Carg(3), "a") *
603 m.Cmt(m.Cb("a"), function (s,i,x)
604 assert(s == "a" and i == 1);
605 return i, x+1
606 end) *
607 m.Carg(2), function (s,i,a,b,c)
608 assert(s == "a" and i == 1 and c == nil);
609 return i, 2*a + 3*b
610 end) * "a",
611 "a", 1, false, 100, 1000) == 2*1001 + 3*100)
612
613
614-- tests for Lua functions
615
616t = {}
617s = ""
618p = m.P(function (s1, i) assert(s == s1); t[#t + 1] = i; return nil end) * false
619s = "hi, this is a test"
620assert(m.match(((p - m.P(-1)) + 2)^0, s) == string.len(s) + 1)
621assert(#t == string.len(s)/2 and t[1] == 1 and t[2] == 3)
622
623assert(not m.match(p, s))
624
625p = mt.__add(function (s, i) return i end, function (s, i) return nil end)
626assert(m.match(p, "alo"))
627
628p = mt.__mul(function (s, i) return i end, function (s, i) return nil end)
629assert(not m.match(p, "alo"))
630
631
632t = {}
633p = function (s1, i) assert(s == s1); t[#t + 1] = i; return i end
634s = "hi, this is a test"
635assert(m.match((m.P(1) * p)^0, s) == string.len(s) + 1)
636assert(#t == string.len(s) and t[1] == 2 and t[2] == 3)
637
638t = {}
639p = m.P(function (s1, i) assert(s == s1); t[#t + 1] = i;
640 return i <= s1:len() and i end) * 1
641s = "hi, this is a test"
642assert(m.match(p^0, s) == string.len(s) + 1)
643assert(#t == string.len(s) + 1 and t[1] == 1 and t[2] == 2)
644
645p = function (s1, i) return m.match(m.P"a"^1, s1, i) end
646assert(m.match(p, "aaaa") == 5)
647assert(m.match(p, "abaa") == 2)
648assert(not m.match(p, "baaa"))
649
650assert(not pcall(m.match, function () return 2^20 end, s))
651assert(not pcall(m.match, function () return 0 end, s))
652assert(not pcall(m.match, function (s, i) return i - 1 end, s))
653assert(not pcall(m.match, m.P(1)^0 * function (_, i) return i - 1 end, s))
654assert(m.match(m.P(1)^0 * function (_, i) return i end * -1, s))
655assert(not pcall(m.match, m.P(1)^0 * function (_, i) return i + 1 end, s))
656assert(m.match(m.P(function (s, i) return s:len() + 1 end) * -1, s))
657assert(not pcall(m.match, m.P(function (s, i) return s:len() + 2 end) * -1, s))
658assert(not m.match(m.P(function (s, i) return s:len() end) * -1, s))
659assert(m.match(m.P(1)^0 * function (_, i) return true end, s) ==
660 string.len(s) + 1)
661for i = 1, string.len(s) + 1 do
662 assert(m.match(function (_, _) return i end, s) == i)
663end
664
665p = (m.P(function (s, i) return i%2 == 0 and i end) * 1
666 + m.P(function (s, i) return i%2 ~= 0 and i + 2 <= s:len() and i end) * 3)^0
667 * -1
668assert(p:match(string.rep('a', 14000)))
669
670-- tests for Function Replacements
671f = function (a, ...) if a ~= "x" then return {a, ...} end end
672
673t = m.match(m.C(1)^0/f, "abc")
674checkeq(t, {"a", "b", "c"})
675
676t = m.match(m.C(1)^0/f/f, "abc")
677checkeq(t, {{"a", "b", "c"}})
678
679t = m.match(m.P(1)^0/f/f, "abc") -- no capture
680checkeq(t, {{"abc"}})
681
682t = m.match((m.P(1)^0/f * m.Cp())/f, "abc")
683checkeq(t, {{"abc"}, 4})
684
685t = m.match((m.C(1)^0/f * m.Cp())/f, "abc")
686checkeq(t, {{"a", "b", "c"}, 4})
687
688t = m.match((m.C(1)^0/f * m.Cp())/f, "xbc")
689checkeq(t, {4})
690
691t = m.match(m.C(m.C(1)^0)/f, "abc")
692checkeq(t, {"abc", "a", "b", "c"})
693
694g = function (...) return 1, ... end
695t = {m.match(m.C(1)^0/g/g, "abc")}
696checkeq(t, {1, 1, "a", "b", "c"})
697
698t = {m.match(m.Cc(nil,nil,4) * m.Cc(nil,3) * m.Cc(nil, nil) / g / g, "")}
699t1 = {1,1,nil,nil,4,nil,3,nil,nil}
700for i=1,10 do assert(t[i] == t1[i]) end
701
702t = {m.match((m.C(1) / function (x) return x, x.."x" end)^0, "abc")}
703checkeq(t, {"a", "ax", "b", "bx", "c", "cx"})
704
705t = m.match(m.Ct((m.C(1) / function (x,y) return y, x end * m.Cc(1))^0), "abc")
706checkeq(t, {nil, "a", 1, nil, "b", 1, nil, "c", 1})
707
708-- tests for Query Replacements
709
710assert(m.match(m.C(m.C(1)^0)/{abc = 10}, "abc") == 10)
711assert(m.match(m.C(1)^0/{a = 10}, "abc") == 10)
712assert(m.match(m.S("ba")^0/{ab = 40}, "abc") == 40)
713t = m.match(m.Ct((m.S("ba")/{a = 40})^0), "abc")
714checkeq(t, {40})
715
716assert(m.match(m.Cs((m.C(1)/{a=".", d=".."})^0), "abcdde") == ".bc....e")
717assert(m.match(m.Cs((m.C(1)/{f="."})^0), "abcdde") == "abcdde")
718assert(m.match(m.Cs((m.C(1)/{d="."})^0), "abcdde") == "abc..e")
719assert(m.match(m.Cs((m.C(1)/{e="."})^0), "abcdde") == "abcdd.")
720assert(m.match(m.Cs((m.C(1)/{e=".", f="+"})^0), "eefef") == "..+.+")
721assert(m.match(m.Cs((m.C(1))^0), "abcdde") == "abcdde")
722assert(m.match(m.Cs(m.C(m.C(1)^0)), "abcdde") == "abcdde")
723assert(m.match(1 * m.Cs(m.P(1)^0), "abcdde") == "bcdde")
724assert(m.match(m.Cs((m.C('0')/'x' + 1)^0), "abcdde") == "abcdde")
725assert(m.match(m.Cs((m.C('0')/'x' + 1)^0), "0ab0b0") == "xabxbx")
726assert(m.match(m.Cs((m.C('0')/'x' + m.P(1)/{b=3})^0), "b0a0b") == "3xax3")
727assert(m.match(m.P(1)/'%0%0'/{aa = -3} * 'x', 'ax') == -3)
728assert(m.match(m.C(1)/'%0%1'/{aa = 'z'}/{z = -3} * 'x', 'ax') == -3)
729
730assert(m.match(m.Cs(m.Cc(0) * (m.P(1)/"")), "4321") == "0")
731
732assert(m.match(m.Cs((m.P(1) / "%0")^0), "abcd") == "abcd")
733assert(m.match(m.Cs((m.P(1) / "%0.%0")^0), "abcd") == "a.ab.bc.cd.d")
734assert(m.match(m.Cs((m.P("a") / "%0.%0" + 1)^0), "abcad") == "a.abca.ad")
735assert(m.match(m.C("a") / "%1%%%0", "a") == "a%a")
736assert(m.match(m.Cs((m.P(1) / ".xx")^0), "abcd") == ".xx.xx.xx.xx")
737assert(m.match(m.Cp() * m.P(3) * m.Cp()/"%2%1%1 - %0 ", "abcde") ==
738 "411 - abc ")
739
740assert(pcall(m.match, m.P(1)/"%0", "abc"))
741assert(not pcall(m.match, m.P(1)/"%1", "abc")) -- out of range
742assert(not pcall(m.match, m.P(1)/"%9", "abc")) -- out of range
743
744p = m.C(1)
745p = p * p; p = p * p; p = p * p * m.C(1) / "%9 - %1"
746assert(p:match("1234567890") == "9 - 1")
747
748assert(m.match(m.Cc(print), "") == print)
749
750-- too many captures (just ignore extra ones)
751p = m.C(1)^0 / "%2-%9-%0-%9"
752assert(p:match"01234567890123456789" == "1-8-01234567890123456789-8")
753s = string.rep("12345678901234567890", 20)
754assert(m.match(m.C(1)^0 / "%9-%1-%0-%3", s) == "9-1-" .. s .. "-3")
755
756-- string captures with non-string subcaptures
757p = m.Cc('alo') * m.C(1) / "%1 - %2 - %1"
758assert(p:match'x' == 'alo - x - alo')
759
760assert(not pcall(m.match, m.Cc(true) / "%1", "a"))
761
762-- long strings for string capture
763l = 10000
764s = string.rep('a', l) .. string.rep('b', l) .. string.rep('c', l)
765
766p = (m.C(m.P'a'^1) * m.C(m.P'b'^1) * m.C(m.P'c'^1)) / '%3%2%1'
767
768assert(p:match(s) == string.rep('c', l) ..
769 string.rep('b', l) ..
770 string.rep('a', l))
771
772print"+"
773
774-- accumulator capture
775function f (x) return x + 1 end
776assert(m.match(m.Cf(m.Cc(0) * m.C(1)^0, f), "alo alo") == 7)
777
778t = {m.match(m.Cf(m.Cc(1,2,3), error), "")}
779checkeq(t, {1})
780p = m.Cf(m.Ct(true) * m.Cg(m.C(m.R"az"^1) * "=" * m.C(m.R"az"^1) * ";")^0,
781 rawset)
782t = p:match("a=b;c=du;xux=yuy;")
783checkeq(t, {a="b", c="du", xux="yuy"})
784
785
786-- errors in accumulator capture
787
788-- very long match (forces fold to be a pair open-close) producing with
789-- no initial capture
790assert(not pcall(m.match, m.Cf(m.P(500), print), string.rep('a', 600)))
791
792-- nested capture produces no initial value
793assert(not pcall(m.match, m.Cf(m.P(1) / {}, print), "alo"))
794
795
796-- tests for loop checker
797
798local function haveloop (p)
799 assert(not pcall(function (p) return p^0 end, m.P(p)))
800end
801
802haveloop(m.P("x")^-4)
803assert(m.match(((m.P(0) + 1) * m.S"al")^0, "alo") == 3)
804assert(m.match((("x" + #m.P(1))^-4 * m.S"al")^0, "alo") == 3)
805haveloop("")
806haveloop(m.P("x")^0)
807haveloop(m.P("x")^-1)
808haveloop(m.P("x") + 1 + 2 + m.P("a")^-1)
809haveloop(-m.P("ab"))
810haveloop(- -m.P("ab"))
811haveloop(# #(m.P("ab") + "xy"))
812haveloop(- #m.P("ab")^0)
813haveloop(# -m.P("ab")^1)
814haveloop(#m.V(3))
815haveloop(m.V(3) + m.V(1) + m.P('a')^-1)
816haveloop({[1] = m.V(2) * m.V(3), [2] = m.V(3), [3] = m.P(0)})
817assert(m.match(m.P{[1] = m.V(2) * m.V(3), [2] = m.V(3), [3] = m.P(1)}^0, "abc")
818 == 3)
819assert(m.match(m.P""^-3, "a") == 1)
820
821local function find (p, s)
822 return m.match(basiclookfor(p), s)
823end
824
825
826local function badgrammar (g, expected)
827 local stat, msg = pcall(m.P, g)
828 assert(not stat)
829 if expected then assert(find(expected, msg)) end
830end
831
832badgrammar({[1] = m.V(1)}, "rule '1'")
833badgrammar({[1] = m.V(2)}, "rule '2'") -- invalid non-terminal
834badgrammar({[1] = m.V"x"}, "rule 'x'") -- invalid non-terminal
835badgrammar({[1] = m.V{}}, "rule '(a table)'") -- invalid non-terminal
836badgrammar({[1] = #m.P("a") * m.V(1)}, "rule '1'") -- left-recursive
837badgrammar({[1] = -m.P("a") * m.V(1)}, "rule '1'") -- left-recursive
838badgrammar({[1] = -1 * m.V(1)}, "rule '1'") -- left-recursive
839badgrammar({[1] = -1 + m.V(1)}, "rule '1'") -- left-recursive
840badgrammar({[1] = 1 * m.V(2), [2] = m.V(2)}, "rule '2'") -- left-recursive
841badgrammar({[1] = 1 * m.V(2)^0, [2] = m.P(0)}, "rule '1'") -- inf. loop
842badgrammar({ m.V(2), m.V(3)^0, m.P"" }, "rule '2'") -- inf. loop
843badgrammar({ m.V(2) * m.V(3)^0, m.V(3)^0, m.P"" }, "rule '1'") -- inf. loop
844badgrammar({"x", x = #(m.V(1) * 'a') }, "rule '1'") -- inf. loop
845badgrammar({ -(m.V(1) * 'a') }, "rule '1'") -- inf. loop
846badgrammar({"x", x = m.P'a'^-1 * m.V"x"}, "rule 'x'") -- left recursive
847badgrammar({"x", x = m.P'a' * m.V"y"^1, y = #m.P(1)}, "rule 'x'")
848
849assert(m.match({'a' * -m.V(1)}, "aaa") == 2)
850assert(m.match({'a' * -m.V(1)}, "aaaa") == nil)
851
852
853-- good x bad grammars
854m.P{ ('a' * m.V(1))^-1 }
855m.P{ -('a' * m.V(1)) }
856m.P{ ('abc' * m.V(1))^-1 }
857m.P{ -('abc' * m.V(1)) }
858badgrammar{ #m.P('abc') * m.V(1) }
859badgrammar{ -('a' + m.V(1)) }
860m.P{ #('a' * m.V(1)) }
861badgrammar{ #('a' + m.V(1)) }
862m.P{ m.B{ m.P'abc' } * 'a' * m.V(1) }
863badgrammar{ m.B{ m.P'abc' } * m.V(1) }
864badgrammar{ ('a' + m.P'bcd')^-1 * m.V(1) }
865
866
867-- simple tests for maximum sizes:
868local p = m.P"a"
869for i=1,14 do p = p * p end
870
871p = {}
872for i=1,100 do p[i] = m.P"a" end
873p = m.P(p)
874
875
876-- strange values for rule labels
877
878p = m.P{ "print",
879 print = m.V(print),
880 [print] = m.V(_G),
881 [_G] = m.P"a",
882 }
883
884assert(p:match("a"))
885
886-- initial rule
887g = {}
888for i = 1, 10 do g["i"..i] = "a" * m.V("i"..i+1) end
889g.i11 = m.P""
890for i = 1, 10 do
891 g[1] = "i"..i
892 local p = m.P(g)
893 assert(p:match("aaaaaaaaaaa") == 11 - i + 1)
894end
895
896print"+"
897
898
899-- tests for back references
900assert(not pcall(m.match, m.Cb('x'), ''))
901assert(not pcall(m.match, m.Cg(1, 'a') * m.Cb('b'), 'a'))
902
903p = m.Cg(m.C(1) * m.C(1), "k") * m.Ct(m.Cb("k"))
904t = p:match("ab")
905checkeq(t, {"a", "b"})
906
907
908t = {}
909function foo (p) t[#t + 1] = p; return p .. "x" end
910
911p = m.Cg(m.C(2) / foo, "x") * m.Cb"x" *
912 m.Cg(m.Cb('x') / foo, "x") * m.Cb"x" *
913 m.Cg(m.Cb('x') / foo, "x") * m.Cb"x" *
914 m.Cg(m.Cb('x') / foo, "x") * m.Cb"x"
915x = {p:match'ab'}
916checkeq(x, {'abx', 'abxx', 'abxxx', 'abxxxx'})
917checkeq(t, {'ab',
918 'ab', 'abx',
919 'ab', 'abx', 'abxx',
920 'ab', 'abx', 'abxx', 'abxxx'})
921
922
923
924-- tests for match-time captures
925
926p = m.P'a' * (function (s, i) return (s:sub(i, i) == 'b') and i + 1 end)
927 + 'acd'
928
929assert(p:match('abc') == 3)
930assert(p:match('acd') == 4)
931
932local function id (s, i, ...)
933 return true, ...
934end
935
936assert(m.Cmt(m.Cs((m.Cmt(m.S'abc' / { a = 'x', c = 'y' }, id) +
937 m.R'09'^1 / string.char +
938 m.P(1))^0), id):match"acb98+68c" == "xyb\98+\68y")
939
940p = m.P{'S',
941 S = m.V'atom' * space
942 + m.Cmt(m.Ct("(" * space * (m.Cmt(m.V'S'^1, id) + m.P(true)) * ")" * space), id),
943 atom = m.Cmt(m.C(m.R("AZ", "az", "09")^1), id)
944}
945x = p:match"(a g () ((b) c) (d (e)))"
946checkeq(x, {'a', 'g', {}, {{'b'}, 'c'}, {'d', {'e'}}});
947
948x = {(m.Cmt(1, id)^0):match(string.rep('a', 500))}
949assert(#x == 500)
950
951local function id(s, i, x)
952 if x == 'a' then return i, 1, 3, 7
953 else return nil, 2, 4, 6, 8
954 end
955end
956
957p = ((m.P(id) * 1 + m.Cmt(2, id) * 1 + m.Cmt(1, id) * 1))^0
958assert(table.concat{p:match('abababab')} == string.rep('137', 4))
959
960local function ref (s, i, x)
961 return m.match(x, s, i - x:len())
962end
963
964assert(m.Cmt(m.P(1)^0, ref):match('alo') == 4)
965assert((m.P(1) * m.Cmt(m.P(1)^0, ref)):match('alo') == 4)
966assert(not (m.P(1) * m.Cmt(m.C(1)^0, ref)):match('alo'))
967
968ref = function (s,i,x) return i == tonumber(x) and i, 'xuxu' end
969
970assert(m.Cmt(1, ref):match'2')
971assert(not m.Cmt(1, ref):match'1')
972assert(m.Cmt(m.P(1)^0, ref):match'03')
973
974function ref (s, i, a, b)
975 if a == b then return i, a:upper() end
976end
977
978p = m.Cmt(m.C(m.R"az"^1) * "-" * m.C(m.R"az"^1), ref)
979p = (any - p)^0 * p * any^0 * -1
980
981assert(p:match'abbbc-bc ddaa' == 'BC')
982
983do -- match-time captures cannot be optimized away
984 local touch = 0
985 f = m.P(function () touch = touch + 1; return true end)
986
987 local function check(n) n = n or 1; assert(touch == n); touch = 0 end
988
989 assert(m.match(f * false + 'b', 'a') == nil); check()
990 assert(m.match(f * false + 'b', '') == nil); check()
991 assert(m.match( (f * 'a')^0 * 'b', 'b') == 2); check()
992 assert(m.match( (f * 'a')^0 * 'b', '') == nil); check()
993 assert(m.match( (f * 'a')^-1 * 'b', 'b') == 2); check()
994 assert(m.match( (f * 'a')^-1 * 'b', '') == nil); check()
995 assert(m.match( ('b' + f * 'a')^-1 * 'b', '') == nil); check()
996 assert(m.match( (m.P'b'^-1 * f * 'a')^-1 * 'b', '') == nil); check()
997 assert(m.match( (-m.P(1) * m.P'b'^-1 * f * 'a')^-1 * 'b', '') == nil);
998 check()
999 assert(m.match( (f * 'a' + 'b')^-1 * 'b', '') == nil); check()
1000 assert(m.match(f * 'a' + f * 'b', 'b') == 2); check(2)
1001 assert(m.match(f * 'a' + f * 'b', 'a') == 2); check(1)
1002 assert(m.match(-f * 'a' + 'b', 'b') == 2); check(1)
1003 assert(m.match(-f * 'a' + 'b', '') == nil); check(1)
1004end
1005
1006c = '[' * m.Cg(m.P'='^0, "init") * '[' *
1007 { m.Cmt(']' * m.C(m.P'='^0) * ']' * m.Cb("init"), function (_, _, s1, s2)
1008 return s1 == s2 end)
1009 + 1 * m.V(1) } / 0
1010
1011assert(c:match'[==[]]====]]]]==]===[]' == 18)
1012assert(c:match'[[]=]====]=]]]==]===[]' == 14)
1013assert(not c:match'[[]=]====]=]=]==]===[]')
1014
1015
1016-- old bug: optimization of concat with fail removed match-time capture
1017p = m.Cmt(0, function (s) p = s end) * m.P(false)
1018assert(not p:match('alo'))
1019assert(p == 'alo')
1020
1021
1022-- ensure that failed match-time captures are not kept on Lua stack
1023do
1024 local t = {__mode = "kv"}; setmetatable(t,t)
1025 local c = 0
1026
1027 local function foo (s,i)
1028 collectgarbage();
1029 assert(next(t) == "__mode" and next(t, "__mode") == nil)
1030 local x = {}
1031 t[x] = true
1032 c = c + 1
1033 return i, x
1034 end
1035
1036 local p = m.P{ m.Cmt(0, foo) * m.P(false) + m.P(1) * m.V(1) + m.P"" }
1037 p:match(string.rep('1', 10))
1038 assert(c == 11)
1039end
1040
1041p = (m.P(function () return true, "a" end) * 'a'
1042 + m.P(function (s, i) return i, "aa", 20 end) * 'b'
1043 + m.P(function (s,i) if i <= #s then return i, "aaa" end end) * 1)^0
1044
1045t = {p:match('abacc')}
1046checkeq(t, {'a', 'aa', 20, 'a', 'aaa', 'aaa'})
1047
1048
1049-------------------------------------------------------------------
1050-- Tests for 're' module
1051-------------------------------------------------------------------
1052
1053local re = require "re"
1054
1055local match, compile = re.match, re.compile
1056
1057assert(match("a", ".") == 2)
1058assert(match("a", "''") == 1)
1059assert(match("", " ! . ") == 1)
1060assert(not match("a", " ! . "))
1061assert(match("abcde", " ( . . ) * ") == 5)
1062assert(match("abbcde", " [a-c] +") == 5)
1063assert(match("0abbc1de", "'0' [a-c]+ '1'") == 7)
1064assert(match("0zz1dda", "'0' [^a-c]+ 'a'") == 8)
1065assert(match("abbc--", " [a-c] + +") == 5)
1066assert(match("abbc--", " [ac-] +") == 2)
1067assert(match("abbc--", " [-acb] + ") == 7)
1068assert(not match("abbcde", " [b-z] + "))
1069assert(match("abb\"de", '"abb"["]"de"') == 7)
1070assert(match("abceeef", "'ac' ? 'ab' * 'c' { 'e' * } / 'abceeef' ") == "eee")
1071assert(match("abceeef", "'ac'? 'ab'* 'c' { 'f'+ } / 'abceeef' ") == 8)
1072local t = {match("abceefe", "( ( & 'e' {} ) ? . ) * ")}
1073checkeq(t, {4, 5, 7})
1074local t = {match("abceefe", "((&&'e' {})? .)*")}
1075checkeq(t, {4, 5, 7})
1076local t = {match("abceefe", "( ( ! ! 'e' {} ) ? . ) *")}
1077checkeq(t, {4, 5, 7})
1078local t = {match("abceefe", "(( & ! & ! 'e' {})? .)*")}
1079checkeq(t, {4, 5, 7})
1080
1081assert(match("cccx" , "'ab'? ('ccc' / ('cde' / 'cd'*)? / 'ccc') 'x'+") == 5)
1082assert(match("cdx" , "'ab'? ('ccc' / ('cde' / 'cd'*)? / 'ccc') 'x'+") == 4)
1083assert(match("abcdcdx" , "'ab'? ('ccc' / ('cde' / 'cd'*)? / 'ccc') 'x'+") == 8)
1084
1085assert(match("abc", "a <- (. a)?") == 4)
1086b = "balanced <- '(' ([^()] / balanced)* ')'"
1087assert(match("(abc)", b))
1088assert(match("(a(b)((c) (d)))", b))
1089assert(not match("(a(b ((c) (d)))", b))
1090
1091b = compile[[ balanced <- "(" ([^()] / balanced)* ")" ]]
1092assert(b == m.P(b))
1093assert(b:match"((((a))(b)))")
1094
1095local g = [[
1096 S <- "0" B / "1" A / "" -- balanced strings
1097 A <- "0" S / "1" A A -- one more 0
1098 B <- "1" S / "0" B B -- one more 1
1099]]
1100assert(match("00011011", g) == 9)
1101
1102local g = [[
1103 S <- ("0" B / "1" A)*
1104 A <- "0" / "1" A A
1105 B <- "1" / "0" B B
1106]]
1107assert(match("00011011", g) == 9)
1108assert(match("000110110", g) == 9)
1109assert(match("011110110", g) == 3)
1110assert(match("000110010", g) == 1)
1111
1112s = "aaaaaaaaaaaaaaaaaaaaaaaa"
1113assert(match(s, "'a'^3") == 4)
1114assert(match(s, "'a'^0") == 1)
1115assert(match(s, "'a'^+3") == s:len() + 1)
1116assert(not match(s, "'a'^+30"))
1117assert(match(s, "'a'^-30") == s:len() + 1)
1118assert(match(s, "'a'^-5") == 6)
1119for i = 1, s:len() do
1120 assert(match(s, string.format("'a'^+%d", i)) >= i + 1)
1121 assert(match(s, string.format("'a'^-%d", i)) <= i + 1)
1122 assert(match(s, string.format("'a'^%d", i)) == i + 1)
1123end
1124assert(match("01234567890123456789", "[0-9]^3+") == 19)
1125
1126
1127assert(match("01234567890123456789", "({....}{...}) -> '%2%1'") == "4560123")
1128t = match("0123456789", "{| {.}* |}")
1129checkeq(t, {"0", "1", "2", "3", "4", "5", "6", "7", "8", "9"})
1130assert(match("012345", "{| (..) -> '%0%0' |}")[1] == "0101")
1131
1132assert(match("abcdef", "( {.} {.} {.} {.} {.} ) -> 3") == "c")
1133assert(match("abcdef", "( {:x: . :} {.} {.} {.} {.} ) -> 3") == "d")
1134assert(match("abcdef", "( {:x: . :} {.} {.} {.} {.} ) -> 0") == 6)
1135
1136assert(not match("abcdef", "{:x: ({.} {.} {.}) -> 2 :} =x"))
1137assert(match("abcbef", "{:x: ({.} {.} {.}) -> 2 :} =x"))
1138
1139eqcharset(compile"[]]", "]")
1140eqcharset(compile"[][]", m.S"[]")
1141eqcharset(compile"[]-]", m.S"-]")
1142eqcharset(compile"[-]", m.S"-")
1143eqcharset(compile"[az-]", m.S"a-z")
1144eqcharset(compile"[-az]", m.S"a-z")
1145eqcharset(compile"[a-z]", m.R"az")
1146eqcharset(compile"[]['\"]", m.S[[]['"]])
1147
1148eqcharset(compile"[^]]", any - "]")
1149eqcharset(compile"[^][]", any - m.S"[]")
1150eqcharset(compile"[^]-]", any - m.S"-]")
1151eqcharset(compile"[^]-]", any - m.S"-]")
1152eqcharset(compile"[^-]", any - m.S"-")
1153eqcharset(compile"[^az-]", any - m.S"a-z")
1154eqcharset(compile"[^-az]", any - m.S"a-z")
1155eqcharset(compile"[^a-z]", any - m.R"az")
1156eqcharset(compile"[^]['\"]", any - m.S[[]['"]])
1157
1158-- tests for comments in 're'
1159e = compile[[
1160A <- _B -- \t \n %nl .<> <- -> --
1161_B <- 'x' --]]
1162assert(e:match'xy' == 2)
1163
1164-- tests for 're' with pre-definitions
1165defs = {digits = m.R"09", letters = m.R"az", _=m.P"__"}
1166e = compile("%letters (%letters / %digits)*", defs)
1167assert(e:match"x123" == 5)
1168e = compile("%_", defs)
1169assert(e:match"__" == 3)
1170
1171e = compile([[
1172 S <- A+
1173 A <- %letters+ B
1174 B <- %digits+
1175]], defs)
1176
1177e = compile("{[0-9]+'.'?[0-9]*} -> sin", math)
1178assert(e:match("2.34") == math.sin(2.34))
1179
1180
1181function eq (_, _, a, b) return a == b end
1182
1183c = re.compile([[
1184 longstring <- '[' {:init: '='* :} '[' close
1185 close <- ']' =init ']' / . close
1186]])
1187
1188assert(c:match'[==[]]===]]]]==]===[]' == 17)
1189assert(c:match'[[]=]====]=]]]==]===[]' == 14)
1190assert(not c:match'[[]=]====]=]=]==]===[]')
1191
1192c = re.compile" '[' {:init: '='* :} '[' (!(']' =init ']') .)* ']' =init ']' !. "
1193
1194assert(c:match'[==[]]===]]]]==]')
1195assert(c:match'[[]=]====]=][]==]===[]]')
1196assert(not c:match'[[]=]====]=]=]==]===[]')
1197
1198assert(re.find("hi alalo", "{:x:..:} =x") == 4)
1199assert(re.find("hi alalo", "{:x:..:} =x", 4) == 4)
1200assert(not re.find("hi alalo", "{:x:..:} =x", 5))
1201assert(re.find("hi alalo", "{'al'}", 5) == 6)
1202assert(re.find("hi aloalolo", "{:x:..:} =x") == 8)
1203assert(re.find("alo alohi x x", "{:word:%w+:}%W*(=word)!%w") == 11)
1204
1205-- re.find discards any captures
1206local a,b,c = re.find("alo", "{.}{'o'}")
1207assert(a == 2 and b == 3 and c == nil)
1208
1209local function match (s,p)
1210 local i,e = re.find(s,p)
1211 if i then return s:sub(i, e) end
1212end
1213assert(match("alo alo", '[a-z]+') == "alo")
1214assert(match("alo alo", '{:x: [a-z]+ :} =x') == nil)
1215assert(match("alo alo", "{:x: [a-z]+ :} ' ' =x") == "alo alo")
1216
1217assert(re.gsub("alo alo", "[abc]", "x") == "xlo xlo")
1218assert(re.gsub("alo alo", "%w+", ".") == ". .")
1219assert(re.gsub("hi, how are you", "[aeiou]", string.upper) ==
1220 "hI, hOw ArE yOU")
1221
1222s = 'hi [[a comment[=]=] ending here]] and [=[another]]=]]'
1223c = re.compile" '[' {:i: '='* :} '[' (!(']' =i ']') .)* ']' { =i } ']' "
1224assert(re.gsub(s, c, "%2") == 'hi and =]')
1225assert(re.gsub(s, c, "%0") == s)
1226assert(re.gsub('[=[hi]=]', c, "%2") == '=')
1227
1228assert(re.find("", "!.") == 1)
1229assert(re.find("alo", "!.") == 4)
1230
1231function addtag (s, i, t, tag) t.tag = tag; return i, t end
1232
1233c = re.compile([[
1234 doc <- block !.
1235 block <- (start {| (block / { [^<]+ })* |} end?) => addtag
1236 start <- '<' {:tag: [a-z]+ :} '>'
1237 end <- '</' { =tag } '>'
1238]], {addtag = addtag})
1239
1240x = c:match[[
1241<x>hi<b>hello</b>but<b>totheend</x>]]
1242checkeq(x, {tag='x', 'hi', {tag = 'b', 'hello'}, 'but',
1243 {'totheend'}})
1244
1245
1246-- tests for look-ahead captures
1247x = {re.match("alo", "&(&{.}) !{'b'} {&(...)} &{..} {...} {!.}")}
1248checkeq(x, {"", "alo", ""})
1249
1250assert(re.match("aloalo",
1251 "{~ (((&'al' {.}) -> 'A%1' / (&%l {.}) -> '%1%1') / .)* ~}")
1252 == "AallooAalloo")
1253
1254-- bug in 0.9 (and older versions), due to captures in look-aheads
1255x = re.compile[[ {~ (&(. ([a-z]* -> '*')) ([a-z]+ -> '+') ' '*)* ~} ]]
1256assert(x:match"alo alo" == "+ +")
1257
1258-- valid capture in look-ahead (used inside the look-ahead itself)
1259x = re.compile[[
1260 S <- &({:two: .. :} . =two) {[a-z]+} / . S
1261]]
1262assert(x:match("hello aloaLo aloalo xuxu") == "aloalo")
1263
1264
1265p = re.compile[[
1266 block <- {| {:ident:space*:} line
1267 ((=ident !space line) / &(=ident space) block)* |}
1268 line <- {[^%nl]*} %nl
1269 space <- '_' -- should be ' ', but '_' is simpler for editors
1270]]
1271
1272t= p:match[[
12731
1274__1.1
1275__1.2
1276____1.2.1
1277____
12782
1279__2.1
1280]]
1281checkeq(t, {"1", {"1.1", "1.2", {"1.2.1", "", ident = "____"}, ident = "__"},
1282 "2", {"2.1", ident = "__"}, ident = ""})
1283
1284
1285-- nested grammars
1286p = re.compile[[
1287 s <- a b !.
1288 b <- ( x <- ('b' x)? )
1289 a <- ( x <- 'a' x? )
1290]]
1291
1292assert(p:match'aaabbb')
1293assert(p:match'aaa')
1294assert(not p:match'bbb')
1295assert(not p:match'aaabbba')
1296
1297-- testing groups
1298t = {re.match("abc", "{:S <- {:.:} {S} / '':}")}
1299checkeq(t, {"a", "bc", "b", "c", "c", ""})
1300
1301t = re.match("1234", "{| {:a:.:} {:b:.:} {:c:.{.}:} |}")
1302checkeq(t, {a="1", b="2", c="4"})
1303t = re.match("1234", "{|{:a:.:} {:b:{.}{.}:} {:c:{.}:}|}")
1304checkeq(t, {a="1", b="2", c="4"})
1305t = re.match("12345", "{| {:.:} {:b:{.}{.}:} {:{.}{.}:} |}")
1306checkeq(t, {"1", b="2", "4", "5"})
1307t = re.match("12345", "{| {:.:} {:{:b:{.}{.}:}:} {:{.}{.}:} |}")
1308checkeq(t, {"1", "23", "4", "5"})
1309t = re.match("12345", "{| {:.:} {{:b:{.}{.}:}} {:{.}{.}:} |}")
1310checkeq(t, {"1", "23", "4", "5"})
1311
1312
1313-- testing pre-defined names
1314assert(os.setlocale("C") == "C")
1315
1316function eqlpeggsub (p1, p2)
1317 local s1 = cs2str(re.compile(p1))
1318 local s2 = string.gsub(allchar, "[^" .. p2 .. "]", "")
1319 -- if s1 ~= s2 then print(#s1,#s2) end
1320 assert(s1 == s2)
1321end
1322
1323
1324eqlpeggsub("%w", "%w")
1325eqlpeggsub("%a", "%a")
1326eqlpeggsub("%l", "%l")
1327eqlpeggsub("%u", "%u")
1328eqlpeggsub("%p", "%p")
1329eqlpeggsub("%d", "%d")
1330eqlpeggsub("%x", "%x")
1331eqlpeggsub("%s", "%s")
1332eqlpeggsub("%c", "%c")
1333
1334eqlpeggsub("%W", "%W")
1335eqlpeggsub("%A", "%A")
1336eqlpeggsub("%L", "%L")
1337eqlpeggsub("%U", "%U")
1338eqlpeggsub("%P", "%P")
1339eqlpeggsub("%D", "%D")
1340eqlpeggsub("%X", "%X")
1341eqlpeggsub("%S", "%S")
1342eqlpeggsub("%C", "%C")
1343
1344eqlpeggsub("[%w]", "%w")
1345eqlpeggsub("[_%w]", "_%w")
1346eqlpeggsub("[^%w]", "%W")
1347eqlpeggsub("[%W%S]", "%W%S")
1348
1349re.updatelocale()
1350
1351-- testing nested substitutions x string captures
1352
1353p = re.compile[[
1354 text <- {~ item* ~}
1355 item <- macro / [^()] / '(' item* ')'
1356 arg <- ' '* {~ (!',' item)* ~}
1357 args <- '(' arg (',' arg)* ')'
1358 macro <- ('apply' args) -> '%1(%2)'
1359 / ('add' args) -> '%1 + %2'
1360 / ('mul' args) -> '%1 * %2'
1361]]
1362
1363assert(p:match"add(mul(a,b), apply(f,x))" == "a * b + f(x)")
1364
1365rev = re.compile[[ R <- (!.) -> '' / ({.} R) -> '%2%1']]
1366
1367assert(rev:match"0123456789" == "9876543210")
1368
1369
1370-- testing error messages in re
1371
1372local function errmsg (p, err)
1373 local s, msg = pcall(re.compile, p)
1374 assert(not s and string.find(msg, err))
1375end
1376
1377errmsg('aaaa', "rule 'aaaa'")
1378errmsg('a', 'outside')
1379errmsg('b <- a', 'undefined')
1380errmsg("x <- 'a' x <- 'b'", 'already defined')
1381errmsg("'a' -", "near '-'")
1382
1383
1384print"OK"
1385
1386