From 448762908fd822fbc101a4fe66fac9cd8aa913b5 Mon Sep 17 00:00:00 2001 From: Sergio Queiroz Date: Mon, 14 Nov 2016 17:15:27 -0300 Subject: Changing documentation and examples with recovery --- README.md | 296 ++++++++++++++++++++++++++++++++++++++--------- examples/listId1.lua | 12 +- examples/listId2.lua | 13 ++- examples/listId2Rec2.lua | 67 +++++++++++ lptree.c | 2 +- lpvm.c | 8 +- 6 files changed, 329 insertions(+), 69 deletions(-) create mode 100644 examples/listId2Rec2.lua diff --git a/README.md b/README.md index b882968..fe7c14d 100644 --- a/README.md +++ b/README.md @@ -10,43 +10,47 @@ LPegLabel is a conservative extension of the [LPeg](http://www.inf.puc-rio.br/~roberto/lpeg) library that provides an implementation of Parsing Expression Grammars (PEGs) with labeled failures. -Labels can be used to signal different kinds of erros -and to specify which alternative in a labeled ordered -choice should handle a given label. Labels can also be -combined with the standard patterns of LPeg. +Labels can be used to signal different kinds of errors +and to specify which recovery pattern should handle a +given label. Labels can also be combined with the standard +patterns of LPeg. This document describes the new functions available in LpegLabel and presents some examples of usage. -For a more detailed discussion about PEGs with labeled failures -please see [A Parsing Machine for Parsing Expression -Grammars with Labeled Failures](https://docs.google.com/viewer?a=v&pid=sites&srcid=ZGVmYXVsdGRvbWFpbnxzcW1lZGVpcm9zfGd4OjMzZmE3YzM0Y2E2MGM5Y2M). - In LPegLabel, the result of an unsuccessful matching is a triple **nil, lab, sfail**, where **lab** is the label associated with the failure, and **sfail** is the suffix input being matched when -**lab** was thrown. Below there is a brief summary +**lab** was thrown. + +With labeled failures it is possible to distinguish +between a regular failure and an error. Usually, a +regular failure is produced when the matching of a +character fails, and it is caught by an ordered choice. +An error, by its turn, is produced by the throw operator +and may be caught by the recovery operator. + +Below there is a brief summary of the new functions provided by LpegLabel: - - - - + + + + - - - + - + - +
FunctionDescription
lpeglabel.T (l)Throws label l
lpeglabel.Lc (p1, p2, l1, ..., ln)Matches p1 and tries to match p2 - if the matching of p1 gives one of l1, ..., ln -
lpeglabelrec.T (l)Throws a label l to signal an error
lpeglabelrec.Rec (p1, p2, l1, [l2, ..., ln])Specifies a recovery pattern p2 for p1, + when the matching of p1 gives one of the labels l1, ..., ln.
%{l}Syntax of relabel module. Equivalent to lpeg.T(l) + Syntax of relabelrec module. Equivalent to lpeglabelrec.T(l)
p1 /{l1, ..., ln} p2Syntax of relabel module. Equivalent to lpeg.Lc(p1, p2, l1, ..., ln) +
p1 //{l1, ..., ln} p2Syntax of relabelrec module. Equivalent to lpeglabelrec.Rec(p1, p2, l1, ..., ln)
relabel.calcline(subject, i)
relabelrec.calcline(subject, i) Calculates line and column information regarding position i of the subject
relabel.setlabels (tlabel)
relabelrec.setlabels (tlabel) Allows to specicify a table with mnemonic labels.
@@ -55,55 +59,44 @@ of the new functions provided by LpegLabel: ### Functions -#### lpeglabel.T(l) +#### lpeglabelrec.T(l) Returns a pattern that throws the label `l`. -A label must be an integer between 0 and 255. - -The label 0 is equivalent to the regular failure of PEGs. +A label must be an integer between 1 and 255. -#### lpeglabel.Lc(p1, p2, l1, ..., ln)# +#### lpeglabelrec.Rec(p1, p2, l1, ..., ln) -Returns a pattern equivalent to a *labeled ordered choice*. +Returns a *recovery pattern*. If the matching of `p1` gives one of the labels `l1, ..., ln`, -then the matching of `p2` is tried from the same position. Otherwise, -the result of the matching of `p1` is the pattern's result. +then the matching of `p2` is tried from the failure position of `p1`. +Otherwise, the result of the matching of `p1` is the pattern's result. -The labeled ordered choice `lpeg.Lc(p1, p2, 0)` is equivalent to the -regular ordered choice `p1 / p2`. - -Although PEG's ordered choice is associative, the labeled ordered choice is not. -When using this function, the user should take care to build a left-associative -labeled ordered choice pattern. #### %{l} -Syntax of *relabel* module. Equivalent to `lpeg.T(l)`. +Syntax of *relabelrec* module. Equivalent to `lpeg.T(l)`. -#### p1 /{l1, ..., ln} p2 +#### p1 //{l1, ..., ln} p2 -Syntax of *relabel* module. Equivalent to `lpeg.Lc(p1, p2, l1, ..., ln)`. +Syntax of *relabelrec* module. Equivalent to `lpeglabelrec.Rec(p1, p2, l1, ..., ln)`. -The `/{}` operator is left-associative. +The `//{}` operator is left-associative. -A grammar can use both choice operators (`/` and `/{}`), -but a single choice can not mix them. That is, the parser of `relabel` -module will not recognize a pattern as `p1 / p2 /{l1} p3`. -#### relabel.calcline (subject, i) +#### relabelrec.calcline (subject, i) Returns line and column information regarding position i of the subject. -#### relabel.setlabels (tlabel) +#### relabelrec.setlabels (tlabel) Allows to specicify a table with labels. They keys of -`tlabel` must be integers between 0 and 255, +`tlabel` must be integers between 1 and 255, and the associated values should be strings. @@ -122,8 +115,8 @@ is thrown when there is an error matching an identifier or a comma: ```lua -local m = require'lpeglabel' -local re = require'relabel' +local m = require'lpeglabelrec' +local re = require'relabelrec' local g = m.P{ "S", @@ -160,7 +153,7 @@ In this example we could think about writing rule List as follows: List = ((m.V"Comma" + m.T(2)) * (m.V"Id" + m.T(1)))^0, ``` -but when matching this expression agains the end of input +but when matching this expression against the end of input we would get a failure whose associated label would be **2**, and this would cause the failure of the *whole* repetition. @@ -168,12 +161,12 @@ and this would cause the failure of the *whole* repetition. ##### Mnemonics instead of numbers In the previous example we could have created a table -with the error messages to improve the readbility of the PEG. +with the error messages to improve the readability of the PEG. Below we rewrite the previous grammar following this approach: ```lua -local m = require'lpeglabel' -local re = require'relabel' +local m = require'lpeglabelrec' +local re = require'relabelrec' local terror = {} @@ -210,14 +203,99 @@ print(mymatch(g, "one two")) --> nil Error at line 1 (col 3): expec print(mymatch(g, "one,\n two,\nthree,")) --> nil Error at line 3 (col 6): expecting an identifier before '' ``` +#### Error Recovery + +By using the recovery operator we can specify a recovery pattern that +should be matched when a label is thrown. After matching this pattern, +and possibly recording the error, the parser can continue parsing to +find more errors. + +Below we rewrite the previous example to illustrate a recovery strategy. +Grammar `g` remains the same, but we add a recovery grammar `grec` that +handles the labels thrown by `g`. + +arithmetic expression example and modify +the `expect` function to use the recovery operator for error recovery: + +```lua +local m = require'lpeglabelrec' +local re = require'relabelrec' + +local terror = {} + +local function newError(s) + table.insert(terror, s) + return #terror +end + +local errUndef = newError("undefined") +local errId = newError("expecting an identifier") +local errComma = newError("expecting ','") + +local id = m.R'az'^1 + +local g = m.P{ + "S", + S = m.V"Id" * m.V"List", + List = -m.P(1) + m.V"Comma" * m.V"Id" * m.V"List", + Id = m.V"Sp" * id + m.T(errId), + Comma = m.V"Sp" * "," + m.T(errComma), + Sp = m.S" \n\t"^0, +} + +local subject, errors + +function recorderror(pos, lab) + local line, col = re.calcline(subject, pos) + table.insert(errors, { line = line, col = col, msg = terror[lab] }) +end + +function record (lab) + return (m.Cp() * m.Cc(lab)) / recorderror +end + +function sync (p) + return (-p * m.P(1))^0 +end + +local grec = m.P{ + "S", + S = m.Rec(m.Rec(g, m.V"ErrComma", errComma), m.V"ErrId", errId), + ErrComma = record(errComma) * sync(-m.P(1) + id), + ErrId = record(errId) * sync(-m.P(1) + ",") +} + + +function mymatch (g, s) + errors = {} + subject = s + local r, e, sfail = g:match(s) + if #errors > 0 then + local out = {} + for i, err in ipairs(errors) do + local msg = "Error at line " .. err.line .. " (col " .. err.col .. "): " .. err.msg + table.insert(out, msg) + end + return nil, table.concat(out, "\n") + end + return r +end + +print(mymatch(grec, "one,two")) +print(mymatch(grec, "one two three")) +print(mymatch(grec, "1,\n two, \n3,")) +print(mymatch(grec, "one\n two123, \nthree,")) +``` + -##### *relabel* syntax + +##### *relabelrec* syntax Now we rewrite the previous example using the syntax -supported by *relabel*: +supported by *relabelrec*: ```lua -local re = require 'relabel' +local re = require 'relabelrec' local g = re.compile[[ S <- Id List @@ -252,7 +330,7 @@ With the help of function *setlabels* we can also rewrite the previous example t mnemonic labels instead of plain numbers: ```lua -local re = require 'relabel' +local re = require 'relabelrec' local errinfo = { {"errUndef", "undefined"}, @@ -293,6 +371,11 @@ print(mymatch(g, "one two")) --> nil Error at line 1 (col 3): expec print(mymatch(g, "one,\n two,\nthree,")) --> nil Error at line 3 (col 6): expecting an identifier before '' ``` + + + + + #### Arithmetic Expressions Here's an example of an LPegLabel grammar that make its own function called @@ -420,3 +503,108 @@ print(m.match(g, "one,two")) --> 8 print(m.match(g, "one two")) --> expecting ',' print(m.match(g, "one,\n two,\nthree,")) --> expecting an identifier ``` + +#### Error Recovery + +By using labeled ordered choice or the recovery operator, when a label +is thrown, the parser may record the error and still continue parsing +to find more errors. We can even record the error right away without +actually throwing a label (relying on the regular PEG failure instead). +Below we rewrite the arithmetic expression example and modify +the `expect` function to use the recovery operator for error recovery: + +```lua +local lpeg = require"lpeglabel" + +local R, S, P, V = lpeg.R, lpeg.S, lpeg.P, lpeg.V +local C, Cc, Ct, Cmt, Carg = lpeg.C, lpeg.Cc, lpeg.Ct, lpeg.Cmt, lpeg.Carg +local T, Lc, Rec = lpeg.T, lpeg.Lc, lpeg.Rec + +local labels = { + {"NoExp", "no expression found"}, + {"Extra", "extra characters found after the expression"}, + {"ExpTerm", "expected a term after the operator"}, + {"ExpExp", "expected an expression after the parenthesis"}, + {"MisClose", "missing a closing ')' after the expression"}, +} + +local function labelindex(labname) + for i, elem in ipairs(labels) do + if elem[1] == labname then + return i + end + end + error("could not find label: " .. labname) +end + +local function expect(patt, labname, recpatt) + local i = labelindex(labname) + local function recorderror(input, pos, errors) + table.insert(errors, {i, pos}) + return true + end + if not recpatt then recpatt = P"" end + return Rec(patt, Cmt(Carg(1), recorderror) * recpatt) +end + +local num = R("09")^1 / tonumber +local op = S("+-*/") + +local function compute(tokens) + local result = tokens[1] + for i = 2, #tokens, 2 do + if tokens[i] == '+' then + result = result + tokens[i+1] + elseif tokens[i] == '-' then + result = result - tokens[i+1] + elseif tokens[i] == '*' then + result = result * tokens[i+1] + elseif tokens[i] == '/' then + result = result / tokens[i+1] + else + error('unknown operation: ' .. tokens[i]) + end + end + return result +end + + +local g = P { + "Exp", + Exp = Ct(V"Term" * (C(op) * V"Operand")^0) / compute; + Operand = expect(V"Term", "ExpTerm", Cc(0)); + Term = num + V"Group"; + Group = "(" * V"InnerExp" * expect(")", "MisClose"); + InnerExp = expect(V"Exp", "ExpExp", (P(1) - ")")^0 * Cc(0)); +} + +g = expect(g, "NoExp", P(1)^0) * expect(-P(1), "Extra") + +local function eval(input) + local errors = {} + local result, label, suffix = g:match(input, 1, errors) + if #errors == 0 then + return result + else + local out = {} + for i, err in ipairs(errors) do + local pos = err[2] + local msg = labels[err[1]][2] + table.insert(out, "syntax error: " .. msg .. " (at index " .. pos .. ")") + end + return nil, table.concat(out, "\n") + end +end + +print(eval "98-76*(54/32)") +--> 37.125 + +print(eval "-1+(1-(1*2))/2") +--> syntax error: no expression found (at index 1) + +print(eval "(1+1-1*(2/2+)-():") +--> syntax error: expected a term after the operator (at index 13) +--> syntax error: expected an expression after the parenthesis (at index 16) +--> syntax error: missing a closing ')' after the expression (at index 17) +--> syntax error: extra characters found after the expression (at index 17) +``` diff --git a/examples/listId1.lua b/examples/listId1.lua index 8976f5f..dee46e9 100644 --- a/examples/listId1.lua +++ b/examples/listId1.lua @@ -1,12 +1,14 @@ -local m = require'lpeglabel' -local re = require'relabel' +local m = require'lpeglabelrec' +local re = require'relabelrec' + +local id = m.R'az'^1 local g = m.P{ "S", S = m.V"Id" * m.V"List", - List = -m.P(1) + (m.V"Comma" + m.T(2)) * (m.V"Id" + m.T(1)) * m.V"List", - Id = m.V"Sp" * m.R'az'^1, - Comma = m.V"Sp" * ",", + List = -m.P(1) + m.V"Comma" * m.V"Id" * m.V"List", + Id = m.V"Sp" * id + m.T(1), + Comma = m.V"Sp" * "," + m.T(2), Sp = m.S" \n\t"^0, } diff --git a/examples/listId2.lua b/examples/listId2.lua index 509fda4..46f0063 100644 --- a/examples/listId2.lua +++ b/examples/listId2.lua @@ -1,5 +1,5 @@ -local m = require'lpeglabel' -local re = require'relabel' +local m = require'lpeglabelrec' +local re = require'relabelrec' local terror = {} @@ -12,15 +12,18 @@ local errUndef = newError("undefined") local errId = newError("expecting an identifier") local errComma = newError("expecting ','") +local id = m.R'az'^1 + local g = m.P{ "S", S = m.V"Id" * m.V"List", - List = -m.P(1) + (m.V"Comma" + m.T(errComma)) * (m.V"Id" + m.T(errId)) * m.V"List", - Id = m.V"Sp" * m.R'az'^1, - Comma = m.V"Sp" * ",", + List = -m.P(1) + m.V"Comma" * m.V"Id" * m.V"List", + Id = m.V"Sp" * id + m.T(errId), + Comma = m.V"Sp" * "," + m.T(errComma), Sp = m.S" \n\t"^0, } + function mymatch (g, s) local r, e, sfail = g:match(s) if not r then diff --git a/examples/listId2Rec2.lua b/examples/listId2Rec2.lua new file mode 100644 index 0000000..a347a5b --- /dev/null +++ b/examples/listId2Rec2.lua @@ -0,0 +1,67 @@ +local m = require'lpeglabelrec' +local re = require'relabelrec' + +local terror = {} + +local function newError(s) + table.insert(terror, s) + return #terror +end + +local errUndef = newError("undefined") +local errId = newError("expecting an identifier") +local errComma = newError("expecting ','") + +local id = m.R'az'^1 + +local g = m.P{ + "S", + S = m.V"Id" * m.V"List", + List = -m.P(1) + m.V"Comma" * m.V"Id" * m.V"List", + Id = m.V"Sp" * id + m.T(errId), + Comma = m.V"Sp" * "," + m.T(errComma), + Sp = m.S" \n\t"^0, +} + +local subject, errors + +function recorderror(pos, lab) + local line, col = re.calcline(subject, pos) + table.insert(errors, { line = line, col = col, msg = terror[lab] }) +end + +function record (lab) + return (m.Cp() * m.Cc(lab)) / recorderror +end + +function sync (p) + return (-p * m.P(1))^0 +end + +local grec = m.P{ + "S", + S = m.Rec(m.Rec(g, m.V"ErrComma", errComma), m.V"ErrId", errId), + ErrComma = record(errComma) * sync(-m.P(1) + id), + ErrId = record(errId) * sync(-m.P(1) + ",") +} + + +function mymatch (g, s) + errors = {} + subject = s + local r, e, sfail = g:match(s) + if #errors > 0 then + local out = {} + for i, err in ipairs(errors) do + local msg = "Error at line " .. err.line .. " (col " .. err.col .. "): " .. err.msg + table.insert(out, msg) + end + return nil, table.concat(out, "\n") + end + return r +end + +print(mymatch(grec, "one,two")) +print(mymatch(grec, "one two three")) +print(mymatch(grec, "1,\n two, \n3,")) +print(mymatch(grec, "one\n two123, \nthree,")) diff --git a/lptree.c b/lptree.c index 9861cfe..e15198e 100644 --- a/lptree.c +++ b/lptree.c @@ -28,7 +28,7 @@ const byte numsiblings[] = { 0, 0, 2, 1, /* call, opencall, rule, grammar */ 1, /* behind */ 1, 1, /* capture, runtime capture */ - 0, 2, 2 /* labeled failure throw, labeled choice, recovery */ + 0, 2 /* labeled failure throw, recovery */ }; diff --git a/lpvm.c b/lpvm.c index 122c8f4..a934478 100644 --- a/lpvm.c +++ b/lpvm.c @@ -336,8 +336,8 @@ const char *match (lua_State *L, const char *o, const char *s, const char *e, do { /* remove pending calls */ assert(pstack > getstackbase(L, ptop)); auxlab = (--pstack)->ls; - } while (auxlab == NULL || (pstack->p != &giveup && labelf != LFAIL && !testlabel(pstack->ls->cs, *labelf))); - if (pstack->p == &giveup || pstack->s != NULL) { /* labeled failure: giveup or backtrack frame */ + } while (auxlab == NULL || !(pstack->p == &giveup || testlabel(pstack->ls->cs, *labelf))); + if (pstack->s != NULL) { /* labeled failure: giveup or backtrack frame */ stack = pstack; s = stack->s; if (ndyncap > 0) /* is there matchtime captures? */ @@ -349,7 +349,7 @@ const char *match (lua_State *L, const char *o, const char *s, const char *e, stack->s = NULL; stack->p = pk; /* save return address */ stack->ls = NULL; - stack->caplevel = captop; /* TODO: necessary?? */ + stack->caplevel = captop; /* TODO: really necessary?? */ stack++; } p = pstack->p; @@ -366,7 +366,7 @@ const char *match (lua_State *L, const char *o, const char *s, const char *e, res = resdyncaptures(L, fr, s - o, e - o); /* get result */ if (res == -1) { /* fail? */ *labelf = LFAIL; /* labeled failure */ - *sfail = (const char *) s; /* TODO: ??? */ + *sfail = (const char *) s; pk = NULL; goto fail; } -- cgit v1.2.3-55-g6feb