From 6dcf7eb5bd2bf6aa35cb3f3d8b0394bf2f625093 Mon Sep 17 00:00:00 2001 From: Undecidable Robot Date: Mon, 11 Jul 2016 11:44:31 +0800 Subject: Adding comments to the recovery examples --- examples/recovery.lua | 28 ++++++++++++++++++++++++++++ examples/recoveryRe.lua | 34 +++++++++++++++++++++++++++++++++- 2 files changed, 61 insertions(+), 1 deletion(-) (limited to 'examples') diff --git a/examples/recovery.lua b/examples/recovery.lua index 7e039c7..7af8455 100644 --- a/examples/recovery.lua +++ b/examples/recovery.lua @@ -4,6 +4,9 @@ local R, S, P, V = lpeg.R, lpeg.S, lpeg.P, lpeg.V local C, Cc, Ct, Cmt = lpeg.C, lpeg.Cc, lpeg.Ct, lpeg.Cmt local T, Lc = lpeg.T, lpeg.Lc +-- The `labels` table contains the list of labels that we will be using +-- as well as the corresponding error message for each label, which will +-- be used in our error reporting later on. local labels = { {"NoExp", "no expression found"}, {"Extra", "extra characters found after the expression"}, @@ -12,6 +15,9 @@ local labels = { {"MisClose", "missing a closing ')' after the expression"}, } +-- The `labelIndex` function gives us the index of a label in the +-- `labels` table, which serves as the integer representation of the label. +-- We need this because LPegLabel requires us to use integers for the labels. local function labelIndex(labname) for i, elem in ipairs(labels) do if elem[1] == labname then @@ -21,8 +27,13 @@ local function labelIndex(labname) error("could not find label: " .. labname) end +-- The `errors` table will hold the list of errors recorded during parsing local errors = {} +-- The `expect` function takes a pattern and a label and returns a pattern +-- that throws the specified label if the original pattern fails to match. +-- Before throwing the label, it records the label to be thrown along with +-- the position of the failure (index in input string) into the `errors` table. local function expect(patt, labname) local i = labelIndex(labname) function recordError(input, pos) @@ -35,6 +46,9 @@ end local num = R("09")^1 / tonumber local op = S("+-*/") +-- The `compute` function takes an alternating list of numbers and +-- operators and computes the result of applying the operations +-- to the numbers in a left to right order (no operator precedence). local function compute(tokens) local result = tokens[1] for i = 2, #tokens, 2 do @@ -53,18 +67,32 @@ local function compute(tokens) return result end +-- Our grammar is a simple arithmetic expression of integers that +-- does not take operator precedence into account but allows grouping +-- via parenthesis. We have incorporated some error recovery startegies +-- to our grammar so that it may resume parsing even after encountering +-- an error, which allows us to report more errors. local g = P { "Exp", Exp = Ct(V"Term" * (C(op) * V"OpRecov")^0) / compute; + -- `OpRecov` handles missing terms/operands by returning a dummy (zero). OpRecov = Lc(V"Operand", Cc(0), labelIndex("ExpTerm")); Operand = expect(V"Term", "ExpTerm"); Term = num + V"Group"; + -- `Group` handles missing closing parenthesis by simply ignoring it. + -- Like all the others, the error is still recorded of course. Group = "(" * V"InnerExp" * Lc(expect(")", "MisClose"), P"", labelIndex("MisClose")); + -- `InnerExp` handles missing expressions by skipping to the next closing + -- parenthesis. A dummy (zero) is returned in place of the expression. InnerExp = Lc(expect(V"Exp", "ExpExp"), (P(1) - ")")^0 * Cc(0), labelIndex("ExpExp")); } g = expect(g, "NoExp") * expect(-P(1), "Extra") +-- The `eval` function takes an input string to match against the grammar +-- we've just defined. If the input string matches, then the result of the +-- computation is returned, otherwise we return the error messages and +-- positions of all the failures encountered. local function eval(input) local result, label, suffix = g:match(input) if #errors == 0 then diff --git a/examples/recoveryRe.lua b/examples/recoveryRe.lua index 50ec53a..795e01d 100644 --- a/examples/recoveryRe.lua +++ b/examples/recoveryRe.lua @@ -1,5 +1,8 @@ local re = require"relabel" +-- The `errinfo` table contains the list of labels that we will be using +-- as well as the corresponding error message for each label, which will +-- be used in our error reporting later on. local errinfo = { {"NoExp", "no expression found"}, {"Extra", "extra characters found after the expression"}, @@ -8,6 +11,10 @@ local errinfo = { {"MisClose", "missing a closing ')' after the expression"}, } +-- We split the errinfo table into two tables: `labels` which is a +-- mapping from the label names to its integer representation, and +-- `errmsgs` which is a mapping from the label names to its +-- corresponding error message. local labels = {} local errmsgs = {} @@ -16,15 +23,26 @@ for i, err in ipairs(errinfo) do errmsgs[err[1]] = err[2] end +-- The `labels` table is especially useful for making our re grammar more +-- readable through the use of the `setlabels` function which allows us +-- to use the label names directly in the re grammar instead of the integers. re.setlabels(labels) +-- The `errors` table will hold the list of errors recorded during parsing local errors = {} +-- The `recordError` function simply records the label and position of +-- the failure (index in input string) into the `errors` table. +-- Note: The unused `input` parameter is necessary, as this will be called +-- by LPeg's match-time capture. function recordError(input, pos, label) table.insert(errors, {label, pos}) return true end +-- The `compute` function takes an alternating list of numbers and +-- operators and computes the result of applying the operations +-- to the numbers in a left to right order (no operator precedence). local function compute(tokens) local result = tokens[1] for i = 2, #tokens, 2 do @@ -43,17 +61,27 @@ local function compute(tokens) return result end +-- Our grammar is a simple arithmetic expression of integers that +-- does not take operator precedence into account but allows grouping +-- via parenthesis. We have incorporated some error recovery startegies +-- to our grammar so that it may resume parsing even after encountering +-- an error, which allows us to report more errors. local g = re.compile([[ S <- (Exp / ErrNoExp) (!. / ErrExtra) Exp <- {| Term (op Operand)* |} -> compute + -- If we encounter a missing term/operand, we return a dummy instead. Operand <- Term / ErrExpTerm /{ExpTerm} dummy Term <- num / Group + -- If we encounter a missing closing parenthesis, we ignore it. Group <- "(" InnerExp (")" / ErrMisClose /{MisClose} "") + -- If we encounter a missing inner expression, we skip to the next + -- closing parenthesis, and return a dummy in its place. InnerExp <- Exp / ErrExpExp /{ExpExp} [^)]* dummy - op <- {[-+*/]} + op <- {[-+*/]} num <- [0-9]+ -> tonumber + -- Before throwing an error, we make sure to record it first. ErrNoExp <- ("" -> "NoExp" => recordError) %{NoExp} ErrExtra <- ("" -> "Extra" => recordError) %{Extra} ErrExpTerm <- ("" -> "ExpTerm" => recordError) %{ExpTerm} @@ -67,6 +95,10 @@ local g = re.compile([[ tonumber = tonumber; }) +-- The `eval` function takes an input string to match against the grammar +-- we've just defined. If the input string matches, then the result of the +-- computation is returned, otherwise we return the error messages and +-- positions of all the failures encountered. local function eval(input) local result, label, suffix = g:match(input) if #errors == 0 then -- cgit v1.2.3-55-g6feb