diff options
-rw-r--r-- | examples/recovery.lua | 28 | ||||
-rw-r--r-- | examples/recoveryRe.lua | 34 |
2 files changed, 61 insertions, 1 deletions
diff --git a/examples/recovery.lua b/examples/recovery.lua index 7e039c7..7af8455 100644 --- a/examples/recovery.lua +++ b/examples/recovery.lua | |||
@@ -4,6 +4,9 @@ local R, S, P, V = lpeg.R, lpeg.S, lpeg.P, lpeg.V | |||
4 | local C, Cc, Ct, Cmt = lpeg.C, lpeg.Cc, lpeg.Ct, lpeg.Cmt | 4 | local C, Cc, Ct, Cmt = lpeg.C, lpeg.Cc, lpeg.Ct, lpeg.Cmt |
5 | local T, Lc = lpeg.T, lpeg.Lc | 5 | local T, Lc = lpeg.T, lpeg.Lc |
6 | 6 | ||
7 | -- The `labels` table contains the list of labels that we will be using | ||
8 | -- as well as the corresponding error message for each label, which will | ||
9 | -- be used in our error reporting later on. | ||
7 | local labels = { | 10 | local labels = { |
8 | {"NoExp", "no expression found"}, | 11 | {"NoExp", "no expression found"}, |
9 | {"Extra", "extra characters found after the expression"}, | 12 | {"Extra", "extra characters found after the expression"}, |
@@ -12,6 +15,9 @@ local labels = { | |||
12 | {"MisClose", "missing a closing ')' after the expression"}, | 15 | {"MisClose", "missing a closing ')' after the expression"}, |
13 | } | 16 | } |
14 | 17 | ||
18 | -- The `labelIndex` function gives us the index of a label in the | ||
19 | -- `labels` table, which serves as the integer representation of the label. | ||
20 | -- We need this because LPegLabel requires us to use integers for the labels. | ||
15 | local function labelIndex(labname) | 21 | local function labelIndex(labname) |
16 | for i, elem in ipairs(labels) do | 22 | for i, elem in ipairs(labels) do |
17 | if elem[1] == labname then | 23 | if elem[1] == labname then |
@@ -21,8 +27,13 @@ local function labelIndex(labname) | |||
21 | error("could not find label: " .. labname) | 27 | error("could not find label: " .. labname) |
22 | end | 28 | end |
23 | 29 | ||
30 | -- The `errors` table will hold the list of errors recorded during parsing | ||
24 | local errors = {} | 31 | local errors = {} |
25 | 32 | ||
33 | -- The `expect` function takes a pattern and a label and returns a pattern | ||
34 | -- that throws the specified label if the original pattern fails to match. | ||
35 | -- Before throwing the label, it records the label to be thrown along with | ||
36 | -- the position of the failure (index in input string) into the `errors` table. | ||
26 | local function expect(patt, labname) | 37 | local function expect(patt, labname) |
27 | local i = labelIndex(labname) | 38 | local i = labelIndex(labname) |
28 | function recordError(input, pos) | 39 | function recordError(input, pos) |
@@ -35,6 +46,9 @@ end | |||
35 | local num = R("09")^1 / tonumber | 46 | local num = R("09")^1 / tonumber |
36 | local op = S("+-*/") | 47 | local op = S("+-*/") |
37 | 48 | ||
49 | -- The `compute` function takes an alternating list of numbers and | ||
50 | -- operators and computes the result of applying the operations | ||
51 | -- to the numbers in a left to right order (no operator precedence). | ||
38 | local function compute(tokens) | 52 | local function compute(tokens) |
39 | local result = tokens[1] | 53 | local result = tokens[1] |
40 | for i = 2, #tokens, 2 do | 54 | for i = 2, #tokens, 2 do |
@@ -53,18 +67,32 @@ local function compute(tokens) | |||
53 | return result | 67 | return result |
54 | end | 68 | end |
55 | 69 | ||
70 | -- Our grammar is a simple arithmetic expression of integers that | ||
71 | -- does not take operator precedence into account but allows grouping | ||
72 | -- via parenthesis. We have incorporated some error recovery startegies | ||
73 | -- to our grammar so that it may resume parsing even after encountering | ||
74 | -- an error, which allows us to report more errors. | ||
56 | local g = P { | 75 | local g = P { |
57 | "Exp", | 76 | "Exp", |
58 | Exp = Ct(V"Term" * (C(op) * V"OpRecov")^0) / compute; | 77 | Exp = Ct(V"Term" * (C(op) * V"OpRecov")^0) / compute; |
78 | -- `OpRecov` handles missing terms/operands by returning a dummy (zero). | ||
59 | OpRecov = Lc(V"Operand", Cc(0), labelIndex("ExpTerm")); | 79 | OpRecov = Lc(V"Operand", Cc(0), labelIndex("ExpTerm")); |
60 | Operand = expect(V"Term", "ExpTerm"); | 80 | Operand = expect(V"Term", "ExpTerm"); |
61 | Term = num + V"Group"; | 81 | Term = num + V"Group"; |
82 | -- `Group` handles missing closing parenthesis by simply ignoring it. | ||
83 | -- Like all the others, the error is still recorded of course. | ||
62 | Group = "(" * V"InnerExp" * Lc(expect(")", "MisClose"), P"", labelIndex("MisClose")); | 84 | Group = "(" * V"InnerExp" * Lc(expect(")", "MisClose"), P"", labelIndex("MisClose")); |
85 | -- `InnerExp` handles missing expressions by skipping to the next closing | ||
86 | -- parenthesis. A dummy (zero) is returned in place of the expression. | ||
63 | InnerExp = Lc(expect(V"Exp", "ExpExp"), (P(1) - ")")^0 * Cc(0), labelIndex("ExpExp")); | 87 | InnerExp = Lc(expect(V"Exp", "ExpExp"), (P(1) - ")")^0 * Cc(0), labelIndex("ExpExp")); |
64 | } | 88 | } |
65 | 89 | ||
66 | g = expect(g, "NoExp") * expect(-P(1), "Extra") | 90 | g = expect(g, "NoExp") * expect(-P(1), "Extra") |
67 | 91 | ||
92 | -- The `eval` function takes an input string to match against the grammar | ||
93 | -- we've just defined. If the input string matches, then the result of the | ||
94 | -- computation is returned, otherwise we return the error messages and | ||
95 | -- positions of all the failures encountered. | ||
68 | local function eval(input) | 96 | local function eval(input) |
69 | local result, label, suffix = g:match(input) | 97 | local result, label, suffix = g:match(input) |
70 | if #errors == 0 then | 98 | if #errors == 0 then |
diff --git a/examples/recoveryRe.lua b/examples/recoveryRe.lua index 50ec53a..795e01d 100644 --- a/examples/recoveryRe.lua +++ b/examples/recoveryRe.lua | |||
@@ -1,5 +1,8 @@ | |||
1 | local re = require"relabel" | 1 | local re = require"relabel" |
2 | 2 | ||
3 | -- The `errinfo` table contains the list of labels that we will be using | ||
4 | -- as well as the corresponding error message for each label, which will | ||
5 | -- be used in our error reporting later on. | ||
3 | local errinfo = { | 6 | local errinfo = { |
4 | {"NoExp", "no expression found"}, | 7 | {"NoExp", "no expression found"}, |
5 | {"Extra", "extra characters found after the expression"}, | 8 | {"Extra", "extra characters found after the expression"}, |
@@ -8,6 +11,10 @@ local errinfo = { | |||
8 | {"MisClose", "missing a closing ')' after the expression"}, | 11 | {"MisClose", "missing a closing ')' after the expression"}, |
9 | } | 12 | } |
10 | 13 | ||
14 | -- We split the errinfo table into two tables: `labels` which is a | ||
15 | -- mapping from the label names to its integer representation, and | ||
16 | -- `errmsgs` which is a mapping from the label names to its | ||
17 | -- corresponding error message. | ||
11 | local labels = {} | 18 | local labels = {} |
12 | local errmsgs = {} | 19 | local errmsgs = {} |
13 | 20 | ||
@@ -16,15 +23,26 @@ for i, err in ipairs(errinfo) do | |||
16 | errmsgs[err[1]] = err[2] | 23 | errmsgs[err[1]] = err[2] |
17 | end | 24 | end |
18 | 25 | ||
26 | -- The `labels` table is especially useful for making our re grammar more | ||
27 | -- readable through the use of the `setlabels` function which allows us | ||
28 | -- to use the label names directly in the re grammar instead of the integers. | ||
19 | re.setlabels(labels) | 29 | re.setlabels(labels) |
20 | 30 | ||
31 | -- The `errors` table will hold the list of errors recorded during parsing | ||
21 | local errors = {} | 32 | local errors = {} |
22 | 33 | ||
34 | -- The `recordError` function simply records the label and position of | ||
35 | -- the failure (index in input string) into the `errors` table. | ||
36 | -- Note: The unused `input` parameter is necessary, as this will be called | ||
37 | -- by LPeg's match-time capture. | ||
23 | function recordError(input, pos, label) | 38 | function recordError(input, pos, label) |
24 | table.insert(errors, {label, pos}) | 39 | table.insert(errors, {label, pos}) |
25 | return true | 40 | return true |
26 | end | 41 | end |
27 | 42 | ||
43 | -- The `compute` function takes an alternating list of numbers and | ||
44 | -- operators and computes the result of applying the operations | ||
45 | -- to the numbers in a left to right order (no operator precedence). | ||
28 | local function compute(tokens) | 46 | local function compute(tokens) |
29 | local result = tokens[1] | 47 | local result = tokens[1] |
30 | for i = 2, #tokens, 2 do | 48 | for i = 2, #tokens, 2 do |
@@ -43,17 +61,27 @@ local function compute(tokens) | |||
43 | return result | 61 | return result |
44 | end | 62 | end |
45 | 63 | ||
64 | -- Our grammar is a simple arithmetic expression of integers that | ||
65 | -- does not take operator precedence into account but allows grouping | ||
66 | -- via parenthesis. We have incorporated some error recovery startegies | ||
67 | -- to our grammar so that it may resume parsing even after encountering | ||
68 | -- an error, which allows us to report more errors. | ||
46 | local g = re.compile([[ | 69 | local g = re.compile([[ |
47 | S <- (Exp / ErrNoExp) (!. / ErrExtra) | 70 | S <- (Exp / ErrNoExp) (!. / ErrExtra) |
48 | Exp <- {| Term (op Operand)* |} -> compute | 71 | Exp <- {| Term (op Operand)* |} -> compute |
72 | -- If we encounter a missing term/operand, we return a dummy instead. | ||
49 | Operand <- Term / ErrExpTerm /{ExpTerm} dummy | 73 | Operand <- Term / ErrExpTerm /{ExpTerm} dummy |
50 | Term <- num / Group | 74 | Term <- num / Group |
75 | -- If we encounter a missing closing parenthesis, we ignore it. | ||
51 | Group <- "(" InnerExp (")" / ErrMisClose /{MisClose} "") | 76 | Group <- "(" InnerExp (")" / ErrMisClose /{MisClose} "") |
77 | -- If we encounter a missing inner expression, we skip to the next | ||
78 | -- closing parenthesis, and return a dummy in its place. | ||
52 | InnerExp <- Exp / ErrExpExp /{ExpExp} [^)]* dummy | 79 | InnerExp <- Exp / ErrExpExp /{ExpExp} [^)]* dummy |
53 | 80 | ||
54 | op <- {[-+*/]} | 81 | op <- {[-+*/]} |
55 | num <- [0-9]+ -> tonumber | 82 | num <- [0-9]+ -> tonumber |
56 | 83 | ||
84 | -- Before throwing an error, we make sure to record it first. | ||
57 | ErrNoExp <- ("" -> "NoExp" => recordError) %{NoExp} | 85 | ErrNoExp <- ("" -> "NoExp" => recordError) %{NoExp} |
58 | ErrExtra <- ("" -> "Extra" => recordError) %{Extra} | 86 | ErrExtra <- ("" -> "Extra" => recordError) %{Extra} |
59 | ErrExpTerm <- ("" -> "ExpTerm" => recordError) %{ExpTerm} | 87 | ErrExpTerm <- ("" -> "ExpTerm" => recordError) %{ExpTerm} |
@@ -67,6 +95,10 @@ local g = re.compile([[ | |||
67 | tonumber = tonumber; | 95 | tonumber = tonumber; |
68 | }) | 96 | }) |
69 | 97 | ||
98 | -- The `eval` function takes an input string to match against the grammar | ||
99 | -- we've just defined. If the input string matches, then the result of the | ||
100 | -- computation is returned, otherwise we return the error messages and | ||
101 | -- positions of all the failures encountered. | ||
70 | local function eval(input) | 102 | local function eval(input) |
71 | local result, label, suffix = g:match(input) | 103 | local result, label, suffix = g:match(input) |
72 | if #errors == 0 then | 104 | if #errors == 0 then |