diff options
Diffstat (limited to 'test.lua')
-rwxr-xr-x | test.lua | 142 |
1 files changed, 92 insertions, 50 deletions
@@ -1,6 +1,6 @@ | |||
1 | #!/usr/bin/env lua5.1 | 1 | #!/usr/bin/env lua5.1 |
2 | 2 | ||
3 | -- $Id: test.lua,v 1.101 2013/04/12 16:30:33 roberto Exp $ | 3 | -- $Id: test.lua,v 1.106 2015/03/04 17:31:33 roberto Exp $ |
4 | 4 | ||
5 | -- require"strict" -- just to be pedantic | 5 | -- require"strict" -- just to be pedantic |
6 | 6 | ||
@@ -170,8 +170,8 @@ assert(m.match( basiclookfor((#m.P(b) * 1) * m.Cp()), " ( (a)") == 7) | |||
170 | a = {m.match(m.C(digit^1 * m.Cc"d") + m.C(letter^1 * m.Cc"l"), "123")} | 170 | a = {m.match(m.C(digit^1 * m.Cc"d") + m.C(letter^1 * m.Cc"l"), "123")} |
171 | checkeq(a, {"123", "d"}) | 171 | checkeq(a, {"123", "d"}) |
172 | 172 | ||
173 | a = {m.match(m.C(digit^1) * "d" * -1 + m.C(letter^1 * m.Cc"l"), "123d")} | 173 | -- bug in LPeg 0.12 (nil value does not create a 'ktable') |
174 | checkeq(a, {"123"}) | 174 | assert(m.match(m.Cc(nil), "") == nil) |
175 | 175 | ||
176 | a = {m.match(m.C(digit^1 * m.Cc"d") + m.C(letter^1 * m.Cc"l"), "abcd")} | 176 | a = {m.match(m.C(digit^1 * m.Cc"d") + m.C(letter^1 * m.Cc"l"), "abcd")} |
177 | checkeq(a, {"abcd", "l"}) | 177 | checkeq(a, {"abcd", "l"}) |
@@ -194,6 +194,16 @@ checkeq(a, {1, 5}) | |||
194 | t = {m.match({[1] = m.C(m.C(1) * m.V(1) + -1)}, "abc")} | 194 | t = {m.match({[1] = m.C(m.C(1) * m.V(1) + -1)}, "abc")} |
195 | checkeq(t, {"abc", "a", "bc", "b", "c", "c", ""}) | 195 | checkeq(t, {"abc", "a", "bc", "b", "c", "c", ""}) |
196 | 196 | ||
197 | -- bug in 0.12 ('hascapture' did not check for captures inside a rule) | ||
198 | do | ||
199 | local pat = m.P{ | ||
200 | 'S'; | ||
201 | S1 = m.C('abc') + 3, | ||
202 | S = #m.V('S1') -- rule has capture, but '#' must ignore it | ||
203 | } | ||
204 | assert(pat:match'abc' == 1) | ||
205 | end | ||
206 | |||
197 | 207 | ||
198 | -- test for small capture boundary | 208 | -- test for small capture boundary |
199 | for i = 250,260 do | 209 | for i = 250,260 do |
@@ -201,9 +211,8 @@ for i = 250,260 do | |||
201 | assert(#m.match(m.C(m.C(i)), string.rep('a', i)) == i) | 211 | assert(#m.match(m.C(m.C(i)), string.rep('a', i)) == i) |
202 | end | 212 | end |
203 | 213 | ||
204 | |||
205 | -- tests for any*n and any*-n | 214 | -- tests for any*n and any*-n |
206 | for n = 1, 550 do | 215 | for n = 1, 550, 13 do |
207 | local x_1 = string.rep('x', n - 1) | 216 | local x_1 = string.rep('x', n - 1) |
208 | local x = x_1 .. 'a' | 217 | local x = x_1 .. 'a' |
209 | assert(not m.P(n):match(x_1)) | 218 | assert(not m.P(n):match(x_1)) |
@@ -345,8 +354,9 @@ checkeq(t, {hi = 10, ho = 20, 'a', 'b', 'c'}) | |||
345 | 354 | ||
346 | 355 | ||
347 | -- test for error messages | 356 | -- test for error messages |
348 | local function checkerr (msg, ...) | 357 | local function checkerr (msg, f, ...) |
349 | assert(m.match({ m.P(msg) + 1 * m.V(1) }, select(2, pcall(...)))) | 358 | local st, err = pcall(f, ...) |
359 | assert(not st and m.match({ m.P(msg) + 1 * m.V(1) }, err)) | ||
350 | end | 360 | end |
351 | 361 | ||
352 | checkerr("rule '1' may be left recursive", m.match, { m.V(1) * 'a' }, "a") | 362 | checkerr("rule '1' may be left recursive", m.match, { m.V(1) * 'a' }, "a") |
@@ -370,6 +380,32 @@ p = {'a', | |||
370 | } | 380 | } |
371 | checkerr("rule 'a' may be left recursive", m.match, p, "a") | 381 | checkerr("rule 'a' may be left recursive", m.match, p, "a") |
372 | 382 | ||
383 | -- Bug in peephole optimization of LPeg 0.12 (IJmp -> ICommit) | ||
384 | -- the next grammar has an original sequence IJmp -> ICommit -> IJmp L1 | ||
385 | -- that is optimized to ICommit L1 | ||
386 | |||
387 | p = m.P { (m.P {m.P'abc'} + 'ayz') * m.V'y'; y = m.P'x' } | ||
388 | assert(p:match('abcx') == 5 and p:match('ayzx') == 5 and not p:match'abc') | ||
389 | |||
390 | |||
391 | do | ||
392 | -- large dynamic Cc | ||
393 | local lim = 2^16 - 1 | ||
394 | local c = 0 | ||
395 | local function seq (n) | ||
396 | if n == 1 then c = c + 1; return m.Cc(c) | ||
397 | else | ||
398 | local m = math.floor(n / 2) | ||
399 | return seq(m) * seq(n - m) | ||
400 | end | ||
401 | end | ||
402 | p = m.Ct(seq(lim)) | ||
403 | t = p:match('') | ||
404 | assert(t[lim] == lim) | ||
405 | checkerr("too many", function () p = p / print end) | ||
406 | checkerr("too many", seq, lim + 1) | ||
407 | end | ||
408 | |||
373 | 409 | ||
374 | -- tests for non-pattern as arguments to pattern functions | 410 | -- tests for non-pattern as arguments to pattern functions |
375 | 411 | ||
@@ -488,7 +524,10 @@ assert(m.match(1 * m.B(1), 'a') == 2) | |||
488 | assert(m.match(-m.B(1), 'a') == 1) | 524 | assert(m.match(-m.B(1), 'a') == 1) |
489 | assert(m.match(m.B(250), string.rep('a', 250)) == nil) | 525 | assert(m.match(m.B(250), string.rep('a', 250)) == nil) |
490 | assert(m.match(250 * m.B(250), string.rep('a', 250)) == 251) | 526 | assert(m.match(250 * m.B(250), string.rep('a', 250)) == 251) |
491 | assert(not pcall(m.B, 260)) | 527 | |
528 | -- look-behind with an open call | ||
529 | checkerr("pattern may not have fixed length", m.B, m.V'S1') | ||
530 | checkerr("too long to look behind", m.B, 260) | ||
492 | 531 | ||
493 | B = #letter * -m.B(letter) + -letter * m.B(letter) | 532 | B = #letter * -m.B(letter) + -letter * m.B(letter) |
494 | x = m.Ct({ (B * m.Cp())^-1 * (1 * m.V(1) + m.P(true)) }) | 533 | x = m.Ct({ (B * m.Cp())^-1 * (1 * m.V(1) + m.P(true)) }) |
@@ -523,7 +562,6 @@ assert(m.match(#m.S'567' * 1, "6") == 2) | |||
523 | 562 | ||
524 | -- tests for Tail Calls | 563 | -- tests for Tail Calls |
525 | 564 | ||
526 | --labeled failure | ||
527 | p = m.P{ 'a' * m.V(1) + '' } | 565 | p = m.P{ 'a' * m.V(1) + '' } |
528 | assert(p:match(string.rep('a', 1000)) == 1001) | 566 | assert(p:match(string.rep('a', 1000)) == 1001) |
529 | 567 | ||
@@ -546,7 +584,6 @@ p = m.P{ | |||
546 | [4] = '0' * m.V(3) + '1' * m.V(2), | 584 | [4] = '0' * m.V(3) + '1' * m.V(2), |
547 | } | 585 | } |
548 | 586 | ||
549 | -- labeled failure | ||
550 | assert(p:match(string.rep("00", 10000))) | 587 | assert(p:match(string.rep("00", 10000))) |
551 | assert(p:match(string.rep("01", 10000))) | 588 | assert(p:match(string.rep("01", 10000))) |
552 | assert(p:match(string.rep("011", 10000))) | 589 | assert(p:match(string.rep("011", 10000))) |
@@ -557,16 +594,15 @@ assert(not p:match(string.rep("011", 10001))) | |||
557 | -- this grammar does need backtracking info. | 594 | -- this grammar does need backtracking info. |
558 | local lim = 10000 | 595 | local lim = 10000 |
559 | p = m.P{ '0' * m.V(1) + '0' } | 596 | p = m.P{ '0' * m.V(1) + '0' } |
560 | assert(not pcall(m.match, p, string.rep("0", lim))) | 597 | checkerr("too many pending", m.match, p, string.rep("0", lim)) |
561 | m.setmaxstack(2*lim) | 598 | m.setmaxstack(2*lim) |
562 | assert(not pcall(m.match, p, string.rep("0", lim))) | 599 | checkerr("too many pending", m.match, p, string.rep("0", lim)) |
563 | m.setmaxstack(2*lim + 4) | 600 | m.setmaxstack(2*lim + 4) |
564 | assert(pcall(m.match, p, string.rep("0", lim))) | 601 | assert(m.match(p, string.rep("0", lim)) == lim + 1) |
565 | 602 | ||
566 | -- this repetition should not need stack space (only the call does) | 603 | -- this repetition should not need stack space (only the call does) |
567 | p = m.P{ ('a' * m.V(1))^0 * 'b' + 'c' } | 604 | p = m.P{ ('a' * m.V(1))^0 * 'b' + 'c' } |
568 | m.setmaxstack(200) | 605 | m.setmaxstack(200) |
569 | -- labeled failure | ||
570 | assert(p:match(string.rep('a', 180) .. 'c' .. string.rep('b', 180)) == 362) | 606 | assert(p:match(string.rep('a', 180) .. 'c' .. string.rep('b', 180)) == 362) |
571 | 607 | ||
572 | m.setmaxstack(5) -- restore original limit | 608 | m.setmaxstack(5) -- restore original limit |
@@ -591,10 +627,10 @@ print("+") | |||
591 | 627 | ||
592 | 628 | ||
593 | -- tests for argument captures | 629 | -- tests for argument captures |
594 | assert(not pcall(m.Carg, 0)) | 630 | checkerr("invalid argument", m.Carg, 0) |
595 | assert(not pcall(m.Carg, -1)) | 631 | checkerr("invalid argument", m.Carg, -1) |
596 | assert(not pcall(m.Carg, 2^18)) | 632 | checkerr("invalid argument", m.Carg, 2^18) |
597 | assert(not pcall(m.match, m.Carg(1), 'a', 1)) | 633 | checkerr("absent extra argument #1", m.match, m.Carg(1), 'a', 1) |
598 | assert(m.match(m.Carg(1), 'a', 1, print) == print) | 634 | assert(m.match(m.Carg(1), 'a', 1, print) == print) |
599 | x = {m.match(m.Carg(1) * m.Carg(2), '', 1, 10, 20)} | 635 | x = {m.match(m.Carg(1) * m.Carg(2), '', 1, 10, 20)} |
600 | checkeq(x, {10, 20}) | 636 | checkeq(x, {10, 20}) |
@@ -647,14 +683,16 @@ assert(m.match(p, "aaaa") == 5) | |||
647 | assert(m.match(p, "abaa") == 2) | 683 | assert(m.match(p, "abaa") == 2) |
648 | assert(not m.match(p, "baaa")) | 684 | assert(not m.match(p, "baaa")) |
649 | 685 | ||
650 | assert(not pcall(m.match, function () return 2^20 end, s)) | 686 | checkerr("invalid position", m.match, function () return 2^20 end, s) |
651 | assert(not pcall(m.match, function () return 0 end, s)) | 687 | checkerr("invalid position", m.match, function () return 0 end, s) |
652 | assert(not pcall(m.match, function (s, i) return i - 1 end, s)) | 688 | checkerr("invalid position", m.match, function (s, i) return i - 1 end, s) |
653 | assert(not pcall(m.match, m.P(1)^0 * function (_, i) return i - 1 end, s)) | 689 | checkerr("invalid position", m.match, |
690 | m.P(1)^0 * function (_, i) return i - 1 end, s) | ||
654 | assert(m.match(m.P(1)^0 * function (_, i) return i end * -1, s)) | 691 | assert(m.match(m.P(1)^0 * function (_, i) return i end * -1, s)) |
655 | assert(not pcall(m.match, m.P(1)^0 * function (_, i) return i + 1 end, s)) | 692 | checkerr("invalid position", m.match, |
693 | m.P(1)^0 * function (_, i) return i + 1 end, s) | ||
656 | assert(m.match(m.P(function (s, i) return s:len() + 1 end) * -1, s)) | 694 | assert(m.match(m.P(function (s, i) return s:len() + 1 end) * -1, s)) |
657 | assert(not pcall(m.match, m.P(function (s, i) return s:len() + 2 end) * -1, s)) | 695 | checkerr("invalid position", m.match, m.P(function (s, i) return s:len() + 2 end) * -1, s) |
658 | assert(not m.match(m.P(function (s, i) return s:len() end) * -1, s)) | 696 | assert(not m.match(m.P(function (s, i) return s:len() end) * -1, s)) |
659 | assert(m.match(m.P(1)^0 * function (_, i) return true end, s) == | 697 | assert(m.match(m.P(1)^0 * function (_, i) return true end, s) == |
660 | string.len(s) + 1) | 698 | string.len(s) + 1) |
@@ -737,9 +775,9 @@ assert(m.match(m.Cs((m.P(1) / ".xx")^0), "abcd") == ".xx.xx.xx.xx") | |||
737 | assert(m.match(m.Cp() * m.P(3) * m.Cp()/"%2%1%1 - %0 ", "abcde") == | 775 | assert(m.match(m.Cp() * m.P(3) * m.Cp()/"%2%1%1 - %0 ", "abcde") == |
738 | "411 - abc ") | 776 | "411 - abc ") |
739 | 777 | ||
740 | assert(pcall(m.match, m.P(1)/"%0", "abc")) | 778 | assert(m.match(m.P(1)/"%0", "abc") == "a") |
741 | assert(not pcall(m.match, m.P(1)/"%1", "abc")) -- out of range | 779 | checkerr("invalid capture index", m.match, m.P(1)/"%1", "abc") |
742 | assert(not pcall(m.match, m.P(1)/"%9", "abc")) -- out of range | 780 | checkerr("invalid capture index", m.match, m.P(1)/"%9", "abc") |
743 | 781 | ||
744 | p = m.C(1) | 782 | p = m.C(1) |
745 | p = p * p; p = p * p; p = p * p * m.C(1) / "%9 - %1" | 783 | p = p * p; p = p * p; p = p * p * m.C(1) / "%9 - %1" |
@@ -757,7 +795,7 @@ assert(m.match(m.C(1)^0 / "%9-%1-%0-%3", s) == "9-1-" .. s .. "-3") | |||
757 | p = m.Cc('alo') * m.C(1) / "%1 - %2 - %1" | 795 | p = m.Cc('alo') * m.C(1) / "%1 - %2 - %1" |
758 | assert(p:match'x' == 'alo - x - alo') | 796 | assert(p:match'x' == 'alo - x - alo') |
759 | 797 | ||
760 | assert(not pcall(m.match, m.Cc(true) / "%1", "a")) | 798 | checkerr("invalid capture value (a boolean)", m.match, m.Cc(true) / "%1", "a") |
761 | 799 | ||
762 | -- long strings for string capture | 800 | -- long strings for string capture |
763 | l = 10000 | 801 | l = 10000 |
@@ -785,35 +823,37 @@ checkeq(t, {a="b", c="du", xux="yuy"}) | |||
785 | 823 | ||
786 | -- errors in accumulator capture | 824 | -- errors in accumulator capture |
787 | 825 | ||
788 | -- very long match (forces fold to be a pair open-close) producing with | ||
789 | -- no initial capture | 826 | -- no initial capture |
790 | assert(not pcall(m.match, m.Cf(m.P(500), print), string.rep('a', 600))) | 827 | checkerr("no initial value", m.match, m.Cf(m.P(5), print), 'aaaaaa') |
828 | -- no initial capture (very long match forces fold to be a pair open-close) | ||
829 | checkerr("no initial value", m.match, m.Cf(m.P(500), print), | ||
830 | string.rep('a', 600)) | ||
791 | 831 | ||
792 | -- nested capture produces no initial value | 832 | -- nested capture produces no initial value |
793 | assert(not pcall(m.match, m.Cf(m.P(1) / {}, print), "alo")) | 833 | checkerr("no initial value", m.match, m.Cf(m.P(1) / {}, print), "alo") |
794 | 834 | ||
795 | 835 | ||
796 | -- tests for loop checker | 836 | -- tests for loop checker |
797 | 837 | ||
798 | local function haveloop (p) | 838 | local function isnullable (p) |
799 | assert(not pcall(function (p) return p^0 end, m.P(p))) | 839 | checkerr("may accept empty string", function (p) return p^0 end, m.P(p)) |
800 | end | 840 | end |
801 | 841 | ||
802 | haveloop(m.P("x")^-4) | 842 | isnullable(m.P("x")^-4) |
803 | assert(m.match(((m.P(0) + 1) * m.S"al")^0, "alo") == 3) | 843 | assert(m.match(((m.P(0) + 1) * m.S"al")^0, "alo") == 3) |
804 | assert(m.match((("x" + #m.P(1))^-4 * m.S"al")^0, "alo") == 3) | 844 | assert(m.match((("x" + #m.P(1))^-4 * m.S"al")^0, "alo") == 3) |
805 | haveloop("") | 845 | isnullable("") |
806 | haveloop(m.P("x")^0) | 846 | isnullable(m.P("x")^0) |
807 | haveloop(m.P("x")^-1) | 847 | isnullable(m.P("x")^-1) |
808 | haveloop(m.P("x") + 1 + 2 + m.P("a")^-1) | 848 | isnullable(m.P("x") + 1 + 2 + m.P("a")^-1) |
809 | haveloop(-m.P("ab")) | 849 | isnullable(-m.P("ab")) |
810 | haveloop(- -m.P("ab")) | 850 | isnullable(- -m.P("ab")) |
811 | haveloop(# #(m.P("ab") + "xy")) | 851 | isnullable(# #(m.P("ab") + "xy")) |
812 | haveloop(- #m.P("ab")^0) | 852 | isnullable(- #m.P("ab")^0) |
813 | haveloop(# -m.P("ab")^1) | 853 | isnullable(# -m.P("ab")^1) |
814 | haveloop(#m.V(3)) | 854 | isnullable(#m.V(3)) |
815 | haveloop(m.V(3) + m.V(1) + m.P('a')^-1) | 855 | isnullable(m.V(3) + m.V(1) + m.P('a')^-1) |
816 | haveloop({[1] = m.V(2) * m.V(3), [2] = m.V(3), [3] = m.P(0)}) | 856 | isnullable({[1] = m.V(2) * m.V(3), [2] = m.V(3), [3] = m.P(0)}) |
817 | assert(m.match(m.P{[1] = m.V(2) * m.V(3), [2] = m.V(3), [3] = m.P(1)}^0, "abc") | 857 | assert(m.match(m.P{[1] = m.V(2) * m.V(3), [2] = m.V(3), [3] = m.P(1)}^0, "abc") |
818 | == 3) | 858 | == 3) |
819 | assert(m.match(m.P""^-3, "a") == 1) | 859 | assert(m.match(m.P""^-3, "a") == 1) |
@@ -897,8 +937,8 @@ print"+" | |||
897 | 937 | ||
898 | 938 | ||
899 | -- tests for back references | 939 | -- tests for back references |
900 | assert(not pcall(m.match, m.Cb('x'), '')) | 940 | checkerr("back reference 'x' not found", m.match, m.Cb('x'), '') |
901 | assert(not pcall(m.match, m.Cg(1, 'a') * m.Cb('b'), 'a')) | 941 | checkerr("back reference 'b' not found", m.match, m.Cg(1, 'a') * m.Cb('b'), 'a') |
902 | 942 | ||
903 | p = m.Cg(m.C(1) * m.C(1), "k") * m.Ct(m.Cb("k")) | 943 | p = m.Cg(m.C(1) * m.C(1), "k") * m.Ct(m.Cb("k")) |
904 | t = p:match("ab") | 944 | t = p:match("ab") |
@@ -1054,6 +1094,8 @@ local re = require "re" | |||
1054 | 1094 | ||
1055 | local match, compile = re.match, re.compile | 1095 | local match, compile = re.match, re.compile |
1056 | 1096 | ||
1097 | |||
1098 | |||
1057 | assert(match("a", ".") == 2) | 1099 | assert(match("a", ".") == 2) |
1058 | assert(match("a", "''") == 1) | 1100 | assert(match("a", "''") == 1) |
1059 | assert(match("", " ! . ") == 1) | 1101 | assert(match("", " ! . ") == 1) |
@@ -1348,6 +1390,7 @@ eqlpeggsub("[%W%S]", "%W%S") | |||
1348 | 1390 | ||
1349 | re.updatelocale() | 1391 | re.updatelocale() |
1350 | 1392 | ||
1393 | |||
1351 | -- testing nested substitutions x string captures | 1394 | -- testing nested substitutions x string captures |
1352 | 1395 | ||
1353 | p = re.compile[[ | 1396 | p = re.compile[[ |
@@ -1370,8 +1413,7 @@ assert(rev:match"0123456789" == "9876543210") | |||
1370 | -- testing error messages in re | 1413 | -- testing error messages in re |
1371 | 1414 | ||
1372 | local function errmsg (p, err) | 1415 | local function errmsg (p, err) |
1373 | local s, msg = pcall(re.compile, p) | 1416 | checkerr(err, re.compile, p) |
1374 | assert(not s and string.find(msg, err)) | ||
1375 | end | 1417 | end |
1376 | 1418 | ||
1377 | errmsg('aaaa', "rule 'aaaa'") | 1419 | errmsg('aaaa', "rule 'aaaa'") |