diff options
| author | Undecidable Robot <undecidabot@gmail.com> | 2016-07-11 11:44:31 +0800 |
|---|---|---|
| committer | Undecidable Robot <undecidabot@gmail.com> | 2016-07-11 11:52:20 +0800 |
| commit | 6dcf7eb5bd2bf6aa35cb3f3d8b0394bf2f625093 (patch) | |
| tree | dcff90bcf780131a62273bd1ed4a7bfe6dff06df | |
| parent | 882cc2e5a99585250e03c7454aa2fcbc33b46a82 (diff) | |
| download | lpeglabel-6dcf7eb5bd2bf6aa35cb3f3d8b0394bf2f625093.tar.gz lpeglabel-6dcf7eb5bd2bf6aa35cb3f3d8b0394bf2f625093.tar.bz2 lpeglabel-6dcf7eb5bd2bf6aa35cb3f3d8b0394bf2f625093.zip | |
Adding comments to the recovery examples
| -rw-r--r-- | examples/recovery.lua | 28 | ||||
| -rw-r--r-- | examples/recoveryRe.lua | 34 |
2 files changed, 61 insertions, 1 deletions
diff --git a/examples/recovery.lua b/examples/recovery.lua index 7e039c7..7af8455 100644 --- a/examples/recovery.lua +++ b/examples/recovery.lua | |||
| @@ -4,6 +4,9 @@ local R, S, P, V = lpeg.R, lpeg.S, lpeg.P, lpeg.V | |||
| 4 | local C, Cc, Ct, Cmt = lpeg.C, lpeg.Cc, lpeg.Ct, lpeg.Cmt | 4 | local C, Cc, Ct, Cmt = lpeg.C, lpeg.Cc, lpeg.Ct, lpeg.Cmt |
| 5 | local T, Lc = lpeg.T, lpeg.Lc | 5 | local T, Lc = lpeg.T, lpeg.Lc |
| 6 | 6 | ||
| 7 | -- The `labels` table contains the list of labels that we will be using | ||
| 8 | -- as well as the corresponding error message for each label, which will | ||
| 9 | -- be used in our error reporting later on. | ||
| 7 | local labels = { | 10 | local labels = { |
| 8 | {"NoExp", "no expression found"}, | 11 | {"NoExp", "no expression found"}, |
| 9 | {"Extra", "extra characters found after the expression"}, | 12 | {"Extra", "extra characters found after the expression"}, |
| @@ -12,6 +15,9 @@ local labels = { | |||
| 12 | {"MisClose", "missing a closing ')' after the expression"}, | 15 | {"MisClose", "missing a closing ')' after the expression"}, |
| 13 | } | 16 | } |
| 14 | 17 | ||
| 18 | -- The `labelIndex` function gives us the index of a label in the | ||
| 19 | -- `labels` table, which serves as the integer representation of the label. | ||
| 20 | -- We need this because LPegLabel requires us to use integers for the labels. | ||
| 15 | local function labelIndex(labname) | 21 | local function labelIndex(labname) |
| 16 | for i, elem in ipairs(labels) do | 22 | for i, elem in ipairs(labels) do |
| 17 | if elem[1] == labname then | 23 | if elem[1] == labname then |
| @@ -21,8 +27,13 @@ local function labelIndex(labname) | |||
| 21 | error("could not find label: " .. labname) | 27 | error("could not find label: " .. labname) |
| 22 | end | 28 | end |
| 23 | 29 | ||
| 30 | -- The `errors` table will hold the list of errors recorded during parsing | ||
| 24 | local errors = {} | 31 | local errors = {} |
| 25 | 32 | ||
| 33 | -- The `expect` function takes a pattern and a label and returns a pattern | ||
| 34 | -- that throws the specified label if the original pattern fails to match. | ||
| 35 | -- Before throwing the label, it records the label to be thrown along with | ||
| 36 | -- the position of the failure (index in input string) into the `errors` table. | ||
| 26 | local function expect(patt, labname) | 37 | local function expect(patt, labname) |
| 27 | local i = labelIndex(labname) | 38 | local i = labelIndex(labname) |
| 28 | function recordError(input, pos) | 39 | function recordError(input, pos) |
| @@ -35,6 +46,9 @@ end | |||
| 35 | local num = R("09")^1 / tonumber | 46 | local num = R("09")^1 / tonumber |
| 36 | local op = S("+-*/") | 47 | local op = S("+-*/") |
| 37 | 48 | ||
| 49 | -- The `compute` function takes an alternating list of numbers and | ||
| 50 | -- operators and computes the result of applying the operations | ||
| 51 | -- to the numbers in a left to right order (no operator precedence). | ||
| 38 | local function compute(tokens) | 52 | local function compute(tokens) |
| 39 | local result = tokens[1] | 53 | local result = tokens[1] |
| 40 | for i = 2, #tokens, 2 do | 54 | for i = 2, #tokens, 2 do |
| @@ -53,18 +67,32 @@ local function compute(tokens) | |||
| 53 | return result | 67 | return result |
| 54 | end | 68 | end |
| 55 | 69 | ||
| 70 | -- Our grammar is a simple arithmetic expression of integers that | ||
| 71 | -- does not take operator precedence into account but allows grouping | ||
| 72 | -- via parenthesis. We have incorporated some error recovery startegies | ||
| 73 | -- to our grammar so that it may resume parsing even after encountering | ||
| 74 | -- an error, which allows us to report more errors. | ||
| 56 | local g = P { | 75 | local g = P { |
| 57 | "Exp", | 76 | "Exp", |
| 58 | Exp = Ct(V"Term" * (C(op) * V"OpRecov")^0) / compute; | 77 | Exp = Ct(V"Term" * (C(op) * V"OpRecov")^0) / compute; |
| 78 | -- `OpRecov` handles missing terms/operands by returning a dummy (zero). | ||
| 59 | OpRecov = Lc(V"Operand", Cc(0), labelIndex("ExpTerm")); | 79 | OpRecov = Lc(V"Operand", Cc(0), labelIndex("ExpTerm")); |
| 60 | Operand = expect(V"Term", "ExpTerm"); | 80 | Operand = expect(V"Term", "ExpTerm"); |
| 61 | Term = num + V"Group"; | 81 | Term = num + V"Group"; |
| 82 | -- `Group` handles missing closing parenthesis by simply ignoring it. | ||
| 83 | -- Like all the others, the error is still recorded of course. | ||
| 62 | Group = "(" * V"InnerExp" * Lc(expect(")", "MisClose"), P"", labelIndex("MisClose")); | 84 | Group = "(" * V"InnerExp" * Lc(expect(")", "MisClose"), P"", labelIndex("MisClose")); |
| 85 | -- `InnerExp` handles missing expressions by skipping to the next closing | ||
| 86 | -- parenthesis. A dummy (zero) is returned in place of the expression. | ||
| 63 | InnerExp = Lc(expect(V"Exp", "ExpExp"), (P(1) - ")")^0 * Cc(0), labelIndex("ExpExp")); | 87 | InnerExp = Lc(expect(V"Exp", "ExpExp"), (P(1) - ")")^0 * Cc(0), labelIndex("ExpExp")); |
| 64 | } | 88 | } |
| 65 | 89 | ||
| 66 | g = expect(g, "NoExp") * expect(-P(1), "Extra") | 90 | g = expect(g, "NoExp") * expect(-P(1), "Extra") |
| 67 | 91 | ||
| 92 | -- The `eval` function takes an input string to match against the grammar | ||
| 93 | -- we've just defined. If the input string matches, then the result of the | ||
| 94 | -- computation is returned, otherwise we return the error messages and | ||
| 95 | -- positions of all the failures encountered. | ||
| 68 | local function eval(input) | 96 | local function eval(input) |
| 69 | local result, label, suffix = g:match(input) | 97 | local result, label, suffix = g:match(input) |
| 70 | if #errors == 0 then | 98 | if #errors == 0 then |
diff --git a/examples/recoveryRe.lua b/examples/recoveryRe.lua index 50ec53a..795e01d 100644 --- a/examples/recoveryRe.lua +++ b/examples/recoveryRe.lua | |||
| @@ -1,5 +1,8 @@ | |||
| 1 | local re = require"relabel" | 1 | local re = require"relabel" |
| 2 | 2 | ||
| 3 | -- The `errinfo` table contains the list of labels that we will be using | ||
| 4 | -- as well as the corresponding error message for each label, which will | ||
| 5 | -- be used in our error reporting later on. | ||
| 3 | local errinfo = { | 6 | local errinfo = { |
| 4 | {"NoExp", "no expression found"}, | 7 | {"NoExp", "no expression found"}, |
| 5 | {"Extra", "extra characters found after the expression"}, | 8 | {"Extra", "extra characters found after the expression"}, |
| @@ -8,6 +11,10 @@ local errinfo = { | |||
| 8 | {"MisClose", "missing a closing ')' after the expression"}, | 11 | {"MisClose", "missing a closing ')' after the expression"}, |
| 9 | } | 12 | } |
| 10 | 13 | ||
| 14 | -- We split the errinfo table into two tables: `labels` which is a | ||
| 15 | -- mapping from the label names to its integer representation, and | ||
| 16 | -- `errmsgs` which is a mapping from the label names to its | ||
| 17 | -- corresponding error message. | ||
| 11 | local labels = {} | 18 | local labels = {} |
| 12 | local errmsgs = {} | 19 | local errmsgs = {} |
| 13 | 20 | ||
| @@ -16,15 +23,26 @@ for i, err in ipairs(errinfo) do | |||
| 16 | errmsgs[err[1]] = err[2] | 23 | errmsgs[err[1]] = err[2] |
| 17 | end | 24 | end |
| 18 | 25 | ||
| 26 | -- The `labels` table is especially useful for making our re grammar more | ||
| 27 | -- readable through the use of the `setlabels` function which allows us | ||
| 28 | -- to use the label names directly in the re grammar instead of the integers. | ||
| 19 | re.setlabels(labels) | 29 | re.setlabels(labels) |
| 20 | 30 | ||
| 31 | -- The `errors` table will hold the list of errors recorded during parsing | ||
| 21 | local errors = {} | 32 | local errors = {} |
| 22 | 33 | ||
| 34 | -- The `recordError` function simply records the label and position of | ||
| 35 | -- the failure (index in input string) into the `errors` table. | ||
| 36 | -- Note: The unused `input` parameter is necessary, as this will be called | ||
| 37 | -- by LPeg's match-time capture. | ||
| 23 | function recordError(input, pos, label) | 38 | function recordError(input, pos, label) |
| 24 | table.insert(errors, {label, pos}) | 39 | table.insert(errors, {label, pos}) |
| 25 | return true | 40 | return true |
| 26 | end | 41 | end |
| 27 | 42 | ||
| 43 | -- The `compute` function takes an alternating list of numbers and | ||
| 44 | -- operators and computes the result of applying the operations | ||
| 45 | -- to the numbers in a left to right order (no operator precedence). | ||
| 28 | local function compute(tokens) | 46 | local function compute(tokens) |
| 29 | local result = tokens[1] | 47 | local result = tokens[1] |
| 30 | for i = 2, #tokens, 2 do | 48 | for i = 2, #tokens, 2 do |
| @@ -43,17 +61,27 @@ local function compute(tokens) | |||
| 43 | return result | 61 | return result |
| 44 | end | 62 | end |
| 45 | 63 | ||
| 64 | -- Our grammar is a simple arithmetic expression of integers that | ||
| 65 | -- does not take operator precedence into account but allows grouping | ||
| 66 | -- via parenthesis. We have incorporated some error recovery startegies | ||
| 67 | -- to our grammar so that it may resume parsing even after encountering | ||
| 68 | -- an error, which allows us to report more errors. | ||
| 46 | local g = re.compile([[ | 69 | local g = re.compile([[ |
| 47 | S <- (Exp / ErrNoExp) (!. / ErrExtra) | 70 | S <- (Exp / ErrNoExp) (!. / ErrExtra) |
| 48 | Exp <- {| Term (op Operand)* |} -> compute | 71 | Exp <- {| Term (op Operand)* |} -> compute |
| 72 | -- If we encounter a missing term/operand, we return a dummy instead. | ||
| 49 | Operand <- Term / ErrExpTerm /{ExpTerm} dummy | 73 | Operand <- Term / ErrExpTerm /{ExpTerm} dummy |
| 50 | Term <- num / Group | 74 | Term <- num / Group |
| 75 | -- If we encounter a missing closing parenthesis, we ignore it. | ||
| 51 | Group <- "(" InnerExp (")" / ErrMisClose /{MisClose} "") | 76 | Group <- "(" InnerExp (")" / ErrMisClose /{MisClose} "") |
| 77 | -- If we encounter a missing inner expression, we skip to the next | ||
| 78 | -- closing parenthesis, and return a dummy in its place. | ||
| 52 | InnerExp <- Exp / ErrExpExp /{ExpExp} [^)]* dummy | 79 | InnerExp <- Exp / ErrExpExp /{ExpExp} [^)]* dummy |
| 53 | 80 | ||
| 54 | op <- {[-+*/]} | 81 | op <- {[-+*/]} |
| 55 | num <- [0-9]+ -> tonumber | 82 | num <- [0-9]+ -> tonumber |
| 56 | 83 | ||
| 84 | -- Before throwing an error, we make sure to record it first. | ||
| 57 | ErrNoExp <- ("" -> "NoExp" => recordError) %{NoExp} | 85 | ErrNoExp <- ("" -> "NoExp" => recordError) %{NoExp} |
| 58 | ErrExtra <- ("" -> "Extra" => recordError) %{Extra} | 86 | ErrExtra <- ("" -> "Extra" => recordError) %{Extra} |
| 59 | ErrExpTerm <- ("" -> "ExpTerm" => recordError) %{ExpTerm} | 87 | ErrExpTerm <- ("" -> "ExpTerm" => recordError) %{ExpTerm} |
| @@ -67,6 +95,10 @@ local g = re.compile([[ | |||
| 67 | tonumber = tonumber; | 95 | tonumber = tonumber; |
| 68 | }) | 96 | }) |
| 69 | 97 | ||
| 98 | -- The `eval` function takes an input string to match against the grammar | ||
| 99 | -- we've just defined. If the input string matches, then the result of the | ||
| 100 | -- computation is returned, otherwise we return the error messages and | ||
| 101 | -- positions of all the failures encountered. | ||
| 70 | local function eval(input) | 102 | local function eval(input) |
| 71 | local result, label, suffix = g:match(input) | 103 | local result, label, suffix = g:match(input) |
| 72 | if #errors == 0 then | 104 | if #errors == 0 then |
