diff options
author | Sergio Queiroz <sqmedeiros@gmail.com> | 2016-12-13 13:53:49 -0300 |
---|---|---|
committer | Sergio Queiroz <sqmedeiros@gmail.com> | 2016-12-13 13:53:49 -0300 |
commit | 09fab0decb7df93528ab40fcfd99587e9074c64f (patch) | |
tree | ecd7a763c7a08712f122945bb5ce1ed7d7e5f077 /README.md | |
parent | d80821d79376671371c15ded562fbe1a9bebc635 (diff) | |
parent | 1322d612d72ac658f2aa443dca94954b819c0993 (diff) | |
download | lpeglabel-09fab0decb7df93528ab40fcfd99587e9074c64f.tar.gz lpeglabel-09fab0decb7df93528ab40fcfd99587e9074c64f.tar.bz2 lpeglabel-09fab0decb7df93528ab40fcfd99587e9074c64f.zip |
Merge branch 'recoveryresume'
Diffstat (limited to 'README.md')
-rw-r--r-- | README.md | 665 |
1 files changed, 465 insertions, 200 deletions
@@ -10,47 +10,46 @@ LPegLabel is a conservative extension of the | |||
10 | [LPeg](http://www.inf.puc-rio.br/~roberto/lpeg) | 10 | [LPeg](http://www.inf.puc-rio.br/~roberto/lpeg) |
11 | library that provides an implementation of Parsing | 11 | library that provides an implementation of Parsing |
12 | Expression Grammars (PEGs) with labeled failures. | 12 | Expression Grammars (PEGs) with labeled failures. |
13 | Labels can be used to signal different kinds of erros | 13 | Labels can be used to signal different kinds of errors |
14 | and to specify which alternative in a labeled ordered | 14 | and to specify which recovery pattern should handle a |
15 | choice should handle a given label. Labels can also be | 15 | given label. Labels can also be combined with the standard |
16 | combined with the standard patterns of LPeg. | 16 | patterns of LPeg. |
17 | 17 | ||
18 | This document describes the new functions available | 18 | This document describes the new functions available |
19 | in LpegLabel and presents some examples of usage. | 19 | in LpegLabel and presents some examples of usage. |
20 | For a more detailed discussion about PEGs with labeled failures | ||
21 | please see [A Parsing Machine for Parsing Expression | ||
22 | Grammars with Labeled Failures](https://docs.google.com/viewer?a=v&pid=sites&srcid=ZGVmYXVsdGRvbWFpbnxzcW1lZGVpcm9zfGd4OjMzZmE3YzM0Y2E2MGM5Y2M). | ||
23 | |||
24 | 20 | ||
25 | In LPegLabel, the result of an unsuccessful matching | 21 | In LPegLabel, the result of an unsuccessful matching |
26 | is a triple **nil, lab, sfail**, where **lab** | 22 | is a triple **nil, lab, sfail**, where **lab** |
27 | is the label associated with the failure, and | 23 | is the label associated with the failure, and |
28 | **sfail** is the suffix input being matched when | 24 | **sfail** is the suffix input being matched when |
29 | **lab** was thrown. Below there is a brief summary | 25 | **lab** was thrown. |
30 | of the new functions provided by LpegLabel: | 26 | |
27 | With labeled failures it is possible to distinguish | ||
28 | between a regular failure and an error. Usually, a | ||
29 | regular failure is produced when the matching of a | ||
30 | character fails, and it is caught by an ordered choice. | ||
31 | An error, by its turn, is produced by the throw operator | ||
32 | and may be caught by the recovery operator. | ||
33 | |||
34 | Below there is a brief summary of the new functions provided by LpegLabel: | ||
31 | 35 | ||
32 | <table border="1"> | 36 | <table border="1"> |
33 | <tbody><tr><td><b>Function</b></td><td><b>Description</b></td></tr> | 37 | <tbody><tr><td><b>Function</b></td><td><b>Description</b></td></tr> |
34 | <tr><td><a href="#f-t"><code>lpeglabel.T (l)</code></a></td> | 38 | <tr><td><a href="#f-t"><code>lpeglabelrec.T (l)</code></a></td> |
35 | <td>Throws label <code>l</code></td></tr> | 39 | <td>Throws a label <code>l</code> to signal an error</td></tr> |
36 | <tr><td><a href="#f-lc"><code>lpeglabel.Lc (p1, p2, l1, ..., ln)</code></a></td> | 40 | <tr><td><a href="#f-rec"><code>lpeglabelrec.Rec (p1, p2, l1 [, l2, ..., ln])</code></a></td> |
37 | <td>Matches <code>p1</code> and tries to match <code>p2</code> | 41 | <td>Specifies a recovery pattern <code>p2</code> for <code>p1</code>, |
38 | if the matching of <code>p1</code> gives one of l<sub>1</sub>, ..., l<sub>n</sub> | 42 | when the matching of <code>p1</code> gives one of the labels l1, ..., ln.</td></tr> |
39 | </td></tr> | ||
40 | <tr><td><a href="#f-rec"><code>lpeglabel.Rec (p1, p2 [, l1, ..., ln])</code></a></td> | ||
41 | <td>Like <code>Lc</code> but does not reset the position of the parser | ||
42 | when trying <code>p2</code>. By default, it catches regular PEG failures | ||
43 | </td></tr> | ||
44 | <tr><td><a href="#re-t"><code>%{l}</code></a></td> | 43 | <tr><td><a href="#re-t"><code>%{l}</code></a></td> |
45 | <td>Syntax of <em>relabel</em> module. Equivalent to <code>lpeg.T(l)</code> | 44 | <td>Syntax of <em>relabelrec</em> module. Equivalent to <code>lpeglabelrec.T(l)</code> |
46 | </td></tr> | 45 | </td></tr> |
47 | <tr><td><a href="#re-lc"><code>p1 /{l1, ..., ln} p2</code></a></td> | 46 | <tr><td><a href="#re-rec"><code>p1 //{l1 [, l2, ..., ln} p2</code></a></td> |
48 | <td>Syntax of <em>relabel</em> module. Equivalent to <code>lpeg.Lc(p1, p2, l1, ..., ln)</code> | 47 | <td>Syntax of <em>relabelrec</em> module. Equivalent to <code>lpeglabelrec.Rec(p1, p2, l1, ..., ln)</code> |
49 | </td></tr> | 48 | </td></tr> |
50 | <tr><td><a href="#re-line"><code>relabel.calcline(subject, i)</code></a></td> | 49 | <tr><td><a href="#re-line"><code>relabelrec.calcline(subject, i)</code></a></td> |
51 | <td>Calculates line and column information regarding position <i>i</i> of the subject</code> | 50 | <td>Calculates line and column information regarding position <i>i</i> of the subject</code> |
52 | </td></tr> | 51 | </td></tr> |
53 | <tr><td><a href="#re-setl"><code>relabel.setlabels (tlabel)</code></a></td> | 52 | <tr><td><a href="#re-setl"><code>relabelrec.setlabels (tlabel)</code></a></td> |
54 | <td>Allows to specicify a table with mnemonic labels. | 53 | <td>Allows to specicify a table with mnemonic labels. |
55 | </td></tr> | 54 | </td></tr> |
56 | </tbody></table> | 55 | </tbody></table> |
@@ -59,28 +58,20 @@ of the new functions provided by LpegLabel: | |||
59 | ### Functions | 58 | ### Functions |
60 | 59 | ||
61 | 60 | ||
62 | #### <a name="f-t"></a><code>lpeglabel.T(l)</code> | 61 | #### <a name="f-t"></a><code>lpeglabelrec.T(l)</code> |
63 | 62 | ||
64 | 63 | ||
65 | Returns a pattern that throws the label `l`. | 64 | Returns a pattern that throws the label `l`. |
66 | A label must be an integer between 0 and 255. | 65 | A label must be an integer between 1 and 255. |
67 | 66 | ||
68 | The label 0 is equivalent to the regular failure of PEGs. | ||
69 | 67 | ||
68 | #### <a name="f-rec"></a><code>lpeglabelrec.Rec(p1, p2, l1, ..., ln)</code> | ||
70 | 69 | ||
71 | #### <a name="f-lc"></a><code>lpeglabel.Lc(p1, p2, l1, ..., ln)</code> | 70 | Returns a *recovery pattern*. |
72 | |||
73 | Returns a pattern equivalent to a *labeled ordered choice*. | ||
74 | If the matching of `p1` gives one of the labels `l1, ..., ln`, | 71 | If the matching of `p1` gives one of the labels `l1, ..., ln`, |
75 | then the matching of `p2` is tried from the same position. Otherwise, | 72 | then the matching of `p2` is tried from the failure position of `p1`. |
76 | the result of the matching of `p1` is the pattern's result. | 73 | Otherwise, the result of the matching of `p1` is the pattern's result. |
77 | |||
78 | The labeled ordered choice `lpeg.Lc(p1, p2, 0)` is equivalent to the | ||
79 | regular ordered choice `p1 / p2`. | ||
80 | 74 | ||
81 | Although PEG's ordered choice is associative, the labeled ordered choice is not. | ||
82 | When using this function, the user should take care to build a left-associative | ||
83 | labeled ordered choice pattern. | ||
84 | 75 | ||
85 | 76 | ||
86 | #### <a name="f-rec"></a><code>lpeglabel.Rec(p1, p2 [, l1, ..., ln])</code> | 77 | #### <a name="f-rec"></a><code>lpeglabel.Rec(p1, p2 [, l1, ..., ln])</code> |
@@ -94,29 +85,26 @@ i.e. `lpeg.Rec(p1, p2)` is equivalent to `lpeg.Rec(p1, p2, 0)`. | |||
94 | 85 | ||
95 | #### <a name="re-t"></a><code>%{l}</code> | 86 | #### <a name="re-t"></a><code>%{l}</code> |
96 | 87 | ||
97 | Syntax of *relabel* module. Equivalent to `lpeg.T(l)`. | 88 | Syntax of *relabelrec* module. Equivalent to `lpeg.T(l)`. |
98 | 89 | ||
99 | 90 | ||
100 | #### <a name="re-lc"></a><code>p1 /{l1, ..., ln} p2</code> | 91 | #### <a name="re-lc"></a><code>p1 //{l1, ..., ln} p2</code> |
101 | 92 | ||
102 | Syntax of *relabel* module. Equivalent to `lpeg.Lc(p1, p2, l1, ..., ln)`. | 93 | Syntax of *relabelrec* module. Equivalent to `lpeglabelrec.Rec(p1, p2, l1, ..., ln)`. |
103 | 94 | ||
104 | The `/{}` operator is left-associative. | 95 | The `//{}` operator is left-associative. |
105 | 96 | ||
106 | A grammar can use both choice operators (`/` and `/{}`), | ||
107 | but a single choice can not mix them. That is, the parser of `relabel` | ||
108 | module will not recognize a pattern as `p1 / p2 /{l1} p3`. | ||
109 | 97 | ||
110 | 98 | ||
111 | #### <a name="re-line"></a><code>relabel.calcline (subject, i)</code> | 99 | #### <a name="re-line"></a><code>relabelrec.calcline (subject, i)</code> |
112 | 100 | ||
113 | Returns line and column information regarding position <i>i</i> of the subject. | 101 | Returns line and column information regarding position <i>i</i> of the subject. |
114 | 102 | ||
115 | 103 | ||
116 | #### <a name="re-setl"></a><code>relabel.setlabels (tlabel)</code> | 104 | #### <a name="re-setl"></a><code>relabelrec.setlabels (tlabel)</code> |
117 | 105 | ||
118 | Allows to specicify a table with labels. They keys of | 106 | Allows to specicify a table with labels. They keys of |
119 | `tlabel` must be integers between 0 and 255, | 107 | `tlabel` must be integers between 1 and 255, |
120 | and the associated values should be strings. | 108 | and the associated values should be strings. |
121 | 109 | ||
122 | 110 | ||
@@ -132,16 +120,31 @@ in the *examples* directory. | |||
132 | The following example defines a grammar that matches | 120 | The following example defines a grammar that matches |
133 | a list of identifiers separated by commas. A label | 121 | a list of identifiers separated by commas. A label |
134 | is thrown when there is an error matching an identifier | 122 | is thrown when there is an error matching an identifier |
135 | or a comma: | 123 | or a comma. |
124 | |||
125 | We use function `newError` to store error messages in a | ||
126 | table and to return the index associated with each error message. | ||
127 | |||
136 | 128 | ||
137 | ```lua | 129 | ```lua |
138 | local m = require'lpeglabel' | 130 | local m = require'lpeglabelrec' |
139 | local re = require'relabel' | 131 | local re = require'relabelrec' |
132 | |||
133 | local terror = {} | ||
134 | |||
135 | local function newError(s) | ||
136 | table.insert(terror, s) | ||
137 | return #terror | ||
138 | end | ||
139 | |||
140 | local errUndef = newError("undefined") | ||
141 | local errId = newError("expecting an identifier") | ||
142 | local errComma = newError("expecting ','") | ||
140 | 143 | ||
141 | local g = m.P{ | 144 | local g = m.P{ |
142 | "S", | 145 | "S", |
143 | S = m.V"Id" * m.V"List", | 146 | S = m.V"Id" * m.V"List", |
144 | List = -m.P(1) + (m.V"Comma" + m.T(2)) * (m.V"Id" + m.T(1)) * m.V"List", | 147 | List = -m.P(1) + (m.V"Comma" + m.T(errComma)) * (m.V"Id" + m.T(errId)) * m.V"List", |
145 | Id = m.V"Sp" * m.R'az'^1, | 148 | Id = m.V"Sp" * m.R'az'^1, |
146 | Comma = m.V"Sp" * ",", | 149 | Comma = m.V"Sp" * ",", |
147 | Sp = m.S" \n\t"^0, | 150 | Sp = m.S" \n\t"^0, |
@@ -151,18 +154,12 @@ function mymatch (g, s) | |||
151 | local r, e, sfail = g:match(s) | 154 | local r, e, sfail = g:match(s) |
152 | if not r then | 155 | if not r then |
153 | local line, col = re.calcline(s, #s - #sfail) | 156 | local line, col = re.calcline(s, #s - #sfail) |
154 | local msg = "Error at line " .. line .. " (col " .. col .. ")" | 157 | local msg = "Error at line " .. line .. " (col " .. col .. "): " |
155 | if e == 1 then | 158 | return r, msg .. terror[e] .. " before '" .. sfail .. "'" |
156 | return r, msg .. ": expecting an identifier before '" .. sfail .. "'" | ||
157 | elseif e == 2 then | ||
158 | return r, msg .. ": expecting ',' before '" .. sfail .. "'" | ||
159 | else | ||
160 | return r, msg | ||
161 | end | ||
162 | end | 159 | end |
163 | return r | 160 | return r |
164 | end | 161 | end |
165 | 162 | ||
166 | print(mymatch(g, "one,two")) --> 8 | 163 | print(mymatch(g, "one,two")) --> 8 |
167 | print(mymatch(g, "one two")) --> nil Error at line 1 (col 3): expecting ',' before ' two' | 164 | print(mymatch(g, "one two")) --> nil Error at line 1 (col 3): expecting ',' before ' two' |
168 | print(mymatch(g, "one,\n two,\nthree,")) --> nil Error at line 3 (col 6): expecting an identifier before '' | 165 | print(mymatch(g, "one,\n two,\nthree,")) --> nil Error at line 3 (col 6): expecting an identifier before '' |
@@ -170,23 +167,73 @@ print(mymatch(g, "one,\n two,\nthree,")) --> nil Error at line 3 (col 6): expec | |||
170 | 167 | ||
171 | In this example we could think about writing rule <em>List</em> as follows: | 168 | In this example we could think about writing rule <em>List</em> as follows: |
172 | ```lua | 169 | ```lua |
173 | List = ((m.V"Comma" + m.T(2)) * (m.V"Id" + m.T(1)))^0, | 170 | List = ((m.V"Comma" + m.T(errComma)) * (m.V"Id" + m.T(errId)))^0, |
174 | ``` | 171 | ``` |
175 | 172 | ||
176 | but when matching this expression agains the end of input | 173 | but when matching this expression against the end of input |
177 | we would get a failure whose associated label would be **2**, | 174 | we would get a failure whose associated label would be **errComma**, |
178 | and this would cause the failure of the *whole* repetition. | 175 | and this would cause the failure of the *whole* repetition. |
179 | |||
180 | 176 | ||
181 | ##### Mnemonics instead of numbers | ||
182 | 177 | ||
183 | In the previous example we could have created a table | ||
184 | with the error messages to improve the readbility of the PEG. | ||
185 | Below we rewrite the previous grammar following this approach: | ||
186 | 178 | ||
179 | #### Error Recovery | ||
180 | |||
181 | By using the `Rec` function we can specify a recovery pattern that | ||
182 | should be matched when a label is thrown. After matching the recovery | ||
183 | pattern, and possibly recording the error, the parser will resume | ||
184 | the <em>regular</em> matching. For example, in the example below | ||
185 | we expect to match rule `A`, but when a failure occur the label 42 | ||
186 | is thrown and then we will try to match the recovery pattern `recp`: | ||
187 | ```lua | 187 | ```lua |
188 | local m = require'lpeglabel' | 188 | local m = require'lpeglabelrec' |
189 | local re = require'relabel' | 189 | |
190 | local recp = m.P"oast" | ||
191 | |||
192 | local g = m.P{ | ||
193 | "S", | ||
194 | S = m.Rec(m.V"A", recp, 42) * ".", | ||
195 | A = m.P"t" * (m.P"est" + m.T(42)) | ||
196 | } | ||
197 | |||
198 | print(g:match("test.")) --> 6 | ||
199 | print(g:match("toast.")) --> 7 | ||
200 | print(g:match("oast.")) --> nil 0 oast. | ||
201 | print(g:match("toward.")) --> nil 0 ward. | ||
202 | ``` | ||
203 | When trying to match subject 'toast.', in rule `A` the first | ||
204 | 't' is matched, then the matching of `m.P"est"` fails and label 42 | ||
205 | is thrown, with the associated inpux suffix 'oast.'. In rule | ||
206 | `S` label 42 is caught and the recovery pattern matches 'oast', | ||
207 | so pattern `'.'` matches the rest of the input. | ||
208 | |||
209 | When matching subject 'oast.', pattern `m.P"t"` fails, and | ||
210 | the result of the matching is <b>nil, 0, oast.</b>. | ||
211 | |||
212 | When matching 'toward.', label 42 is thrown after matching 't', | ||
213 | with the associated input suffix 'oward.'. As the matching of the | ||
214 | recovery pattern fails, the result is <b>nil, 0, ward.</b>. | ||
215 | |||
216 | Usually, the recovery pattern is an expression that does not fail. | ||
217 | In the previous example, we could have used `(m.P(1) - m.P".")^0` | ||
218 | as the recovery pattern. | ||
219 | |||
220 | Below we rewrite the grammar that describes a list of identifiers | ||
221 | to use a recovery strategy. Grammar `g` remains the same, but we add a | ||
222 | recovery grammar `grec` that handles the labels thrown by `g`. | ||
223 | |||
224 | In grammar `grec` we use functions `record` and `sync`. | ||
225 | Function `record`, plus function `recorderror`, will help | ||
226 | us to save the input position where a label was thrown, | ||
227 | while function `sync` will give us a synchronization pattern, | ||
228 | that consumes the input while is not possible to match a given | ||
229 | pattern `p`. | ||
230 | |||
231 | When the matching of an identifier fails, a defaul value ('NONE') | ||
232 | is provided. | ||
233 | |||
234 | ```lua | ||
235 | local m = require'lpeglabelrec' | ||
236 | local re = require'relabelrec' | ||
190 | 237 | ||
191 | local terror = {} | 238 | local terror = {} |
192 | 239 | ||
@@ -199,73 +246,88 @@ local errUndef = newError("undefined") | |||
199 | local errId = newError("expecting an identifier") | 246 | local errId = newError("expecting an identifier") |
200 | local errComma = newError("expecting ','") | 247 | local errComma = newError("expecting ','") |
201 | 248 | ||
249 | local id = m.R'az'^1 | ||
250 | |||
202 | local g = m.P{ | 251 | local g = m.P{ |
203 | "S", | 252 | "S", |
204 | S = m.V"Id" * m.V"List", | 253 | S = m.V"Id" * m.V"List", |
205 | List = -m.P(1) + (m.V"Comma" + m.T(errComma)) * (m.V"Id" + m.T(errId)) * m.V"List", | 254 | List = -m.P(1) + m.V"Comma" * m.V"Id" * m.V"List", |
206 | Id = m.V"Sp" * m.R'az'^1, | 255 | Id = m.V"Sp" * id + m.T(errId), |
207 | Comma = m.V"Sp" * ",", | 256 | Comma = m.V"Sp" * "," + m.T(errComma), |
208 | Sp = m.S" \n\t"^0, | 257 | Sp = m.S" \n\t"^0, |
209 | } | 258 | } |
210 | 259 | ||
211 | function mymatch (g, s) | 260 | local subject, errors |
212 | local r, e, sfail = g:match(s) | ||
213 | if not r then | ||
214 | local line, col = re.calcline(s, #s - #sfail) | ||
215 | local msg = "Error at line " .. line .. " (col " .. col .. "): " | ||
216 | return r, msg .. terror[e] .. " before '" .. sfail .. "'" | ||
217 | end | ||
218 | return r | ||
219 | end | ||
220 | |||
221 | print(mymatch(g, "one,two")) --> 8 | ||
222 | print(mymatch(g, "one two")) --> nil Error at line 1 (col 3): expecting ',' before ' two' | ||
223 | print(mymatch(g, "one,\n two,\nthree,")) --> nil Error at line 3 (col 6): expecting an identifier before '' | ||
224 | ``` | ||
225 | 261 | ||
262 | function recorderror(pos, lab) | ||
263 | local line, col = re.calcline(subject, pos) | ||
264 | table.insert(errors, { line = line, col = col, msg = terror[lab] }) | ||
265 | end | ||
226 | 266 | ||
227 | ##### *relabel* syntax | 267 | function record (lab) |
268 | return (m.Cp() * m.Cc(lab)) / recorderror | ||
269 | end | ||
228 | 270 | ||
229 | Now we rewrite the previous example using the syntax | 271 | function sync (p) |
230 | supported by *relabel*: | 272 | return (-p * m.P(1))^0 |
273 | end | ||
231 | 274 | ||
232 | ```lua | 275 | local grec = m.P{ |
233 | local re = require 'relabel' | 276 | "S", |
277 | S = m.Rec(m.Rec(g, m.V"ErrComma", errComma), m.V"ErrId", errId), | ||
278 | ErrComma = record(errComma) * sync(id), | ||
279 | ErrId = record(errId) * sync(m.P",") | ||
280 | } | ||
234 | 281 | ||
235 | local g = re.compile[[ | ||
236 | S <- Id List | ||
237 | List <- !. / (',' / %{2}) (Id / %{1}) List | ||
238 | Id <- Sp [a-z]+ | ||
239 | Comma <- Sp ',' | ||
240 | Sp <- %s* | ||
241 | ]] | ||
242 | 282 | ||
243 | function mymatch (g, s) | 283 | function mymatch (g, s) |
284 | errors = {} | ||
285 | subject = s | ||
244 | local r, e, sfail = g:match(s) | 286 | local r, e, sfail = g:match(s) |
245 | if not r then | 287 | if #errors > 0 then |
246 | local line, col = re.calcline(s, #s - #sfail) | 288 | local out = {} |
247 | local msg = "Error at line " .. line .. " (col " .. col .. ")" | 289 | for i, err in ipairs(errors) do |
248 | if e == 1 then | 290 | local msg = "Error at line " .. err.line .. " (col " .. err.col .. "): " .. err.msg |
249 | return r, msg .. ": expecting an identifier before '" .. sfail .. "'" | 291 | table.insert(out, msg) |
250 | elseif e == 2 then | ||
251 | return r, msg .. ": expecting ',' before '" .. sfail .. "'" | ||
252 | else | ||
253 | return r, msg | ||
254 | end | 292 | end |
293 | return nil, table.concat(out, "\n") .. "\n" | ||
255 | end | 294 | end |
256 | return r | 295 | return r |
257 | end | 296 | end |
258 | 297 | ||
259 | print(mymatch(g, "one,two")) --> 8 | 298 | print(mymatch(grec, "one,two")) |
260 | print(mymatch(g, "one two")) --> nil Error at line 1 (col 3): expecting ',' before ' two' | 299 | -- Captures (separated by ';'): one; two; |
261 | print(mymatch(g, "one,\n two,\nthree,")) --> nil Error at line 3 (col 6): expecting an identifier before '' | 300 | -- Syntactic errors found: 0 |
301 | |||
302 | print(mymatch(grec, "one two three")) | ||
303 | -- Captures (separated by ';'): one; two; three; | ||
304 | -- Syntactic errors found: 2 | ||
305 | -- Error at line 1 (col 4): expecting ',' | ||
306 | -- Error at line 1 (col 8): expecting ',' | ||
307 | |||
308 | print(mymatch(grec, "1,\n two, \n3,")) | ||
309 | -- Captures (separated by ';'): NONE; two; NONE; NONE; | ||
310 | -- Syntactic errors found: 3 | ||
311 | -- Error at line 1 (col 1): expecting an identifier | ||
312 | -- Error at line 2 (col 6): expecting an identifier | ||
313 | -- Error at line 3 (col 2): expecting an identifier | ||
314 | |||
315 | print(mymatch(grec, "one\n two123, \nthree,")) | ||
316 | -- Captures (separated by ';'): one; two; three; NONE; | ||
317 | -- Syntactic errors found: 3 | ||
318 | -- Error at line 2 (col 1): expecting ',' | ||
319 | -- Error at line 2 (col 5): expecting ',' | ||
320 | -- Error at line 3 (col 6): expecting an identifier | ||
262 | ``` | 321 | ``` |
263 | 322 | ||
264 | With the help of function *setlabels* we can also rewrite the previous example to use | 323 | ##### *relabelrec* syntax |
265 | mnemonic labels instead of plain numbers: | 324 | |
325 | Below we describe again a grammar that matches a list of identifiers, | ||
326 | now using the syntax supported by *relabelrec*, where `//{}` is the | ||
327 | recovery operator, and `%{}` is the throw operator: | ||
266 | 328 | ||
267 | ```lua | 329 | ```lua |
268 | local re = require 'relabel' | 330 | local re = require 'relabelrec' |
269 | 331 | ||
270 | local errinfo = { | 332 | local errinfo = { |
271 | {"errUndef", "undefined"}, | 333 | {"errUndef", "undefined"}, |
@@ -285,59 +347,124 @@ re.setlabels(labels) | |||
285 | 347 | ||
286 | local g = re.compile[[ | 348 | local g = re.compile[[ |
287 | S <- Id List | 349 | S <- Id List |
288 | List <- !. / (',' / %{errComma}) (Id / %{errId}) List | 350 | List <- !. / Comma Id List |
289 | Id <- Sp [a-z]+ | 351 | Id <- Sp {[a-z]+} / %{errId} |
290 | Comma <- Sp ',' | 352 | Comma <- Sp ',' / %{errComma} |
291 | Sp <- %s* | 353 | Sp <- %s* |
292 | ]] | 354 | ]] |
293 | 355 | ||
356 | local errors | ||
357 | |||
358 | function recorderror (subject, pos, label) | ||
359 | local line, col = re.calcline(subject, pos) | ||
360 | table.insert(errors, { line = line, col = col, msg = errmsgs[labels[label]] }) | ||
361 | return true | ||
362 | end | ||
363 | |||
364 | function sync (p) | ||
365 | return '( !(' .. p .. ') .)*' | ||
366 | end | ||
367 | |||
368 | local grec = re.compile( | ||
369 | "S <- %g //{errComma} ErrComma //{errId} ErrId" .. "\n" .. | ||
370 | "ErrComma <- ('' -> 'errComma' => recorderror) " .. sync('[a-z]+') .. "\n" .. | ||
371 | "ErrId <- ('' -> 'errId' => recorderror) " .. sync('","') .. "-> default" | ||
372 | , {g = g, recorderror = recorderror, default = "NONE"} | ||
373 | ) | ||
374 | |||
294 | function mymatch (g, s) | 375 | function mymatch (g, s) |
295 | local r, e, sfail = g:match(s) | 376 | errors = {} |
296 | if not r then | 377 | subject = s |
297 | local line, col = re.calcline(s, #s - #sfail) | 378 | io.write("Input: ", s, "\n") |
298 | local msg = "Error at line " .. line .. " (col " .. col .. "): " | 379 | local r = { g:match(s) } |
299 | return r, msg .. errmsgs[e] .. " before '" .. sfail .. "'" | 380 | io.write("Captures (separated by ';'): ") |
381 | for k, v in pairs(r) do | ||
382 | io.write(v .. "; ") | ||
300 | end | 383 | end |
384 | io.write("\nSyntactic errors found: " .. #errors) | ||
385 | if #errors > 0 then | ||
386 | io.write("\n") | ||
387 | local out = {} | ||
388 | for i, err in ipairs(errors) do | ||
389 | local msg = "Error at line " .. err.line .. " (col " .. err.col .. "): " .. err.msg | ||
390 | table.insert(out, msg) | ||
391 | end | ||
392 | io.write(table.concat(out, "\n")) | ||
393 | end | ||
394 | print("\n") | ||
301 | return r | 395 | return r |
302 | end | 396 | end |
303 | 397 | ||
304 | print(mymatch(g, "one,two")) --> 8 | 398 | print(mymatch(grec, "one,two")) |
305 | print(mymatch(g, "one two")) --> nil Error at line 1 (col 3): expecting ',' before ' two' | 399 | -- Captures (separated by ';'): one; two; |
306 | print(mymatch(g, "one,\n two,\nthree,")) --> nil Error at line 3 (col 6): expecting an identifier before '' | 400 | -- Syntactic errors found: 0 |
401 | |||
402 | print(mymatch(grec, "one two three")) | ||
403 | -- Captures (separated by ';'): one; two; three; | ||
404 | -- Syntactic errors found: 2 | ||
405 | -- Error at line 1 (col 4): expecting ',' | ||
406 | -- Error at line 1 (col 8): expecting ',' | ||
407 | |||
408 | print(mymatch(grec, "1,\n two, \n3,")) | ||
409 | -- Captures (separated by ';'): NONE; two; NONE; NONE; | ||
410 | -- Syntactic errors found: 3 | ||
411 | -- Error at line 1 (col 1): expecting an identifier | ||
412 | -- Error at line 2 (col 6): expecting an identifier | ||
413 | -- Error at line 3 (col 2): expecting an identifier | ||
414 | |||
415 | print(mymatch(grec, "one\n two123, \nthree,")) | ||
416 | -- Captures (separated by ';'): one; two; three; NONE; | ||
417 | -- Syntactic errors found: 3 | ||
418 | -- Error at line 2 (col 1): expecting ',' | ||
419 | -- Error at line 2 (col 5): expecting ',' | ||
420 | -- Error at line 3 (col 6): expecting an identifier | ||
307 | ``` | 421 | ``` |
308 | 422 | ||
423 | |||
309 | #### Arithmetic Expressions | 424 | #### Arithmetic Expressions |
310 | 425 | ||
311 | Here's an example of an LPegLabel grammar that make its own function called | 426 | Here's an example of an LPegLabel grammar that matches an expression. |
312 | 'expect', which takes a pattern and a label as parameters and throws the label | 427 | We have used a function `expect`, that takes a pattern `patt` and a label as |
313 | if the pattern fails to be matched. This function can be extended later on to | 428 | parameters and builds a new pattern that throws this label when `patt` |
314 | record all errors encountered once error recovery is implemented. | 429 | fails. |
315 | 430 | ||
316 | ```lua | 431 | When a subexpression is syntactically invalid, a default value of 1000 |
317 | local lpeg = require"lpeglabel" | 432 | is provided by the recovery pattern, so the evaluation of an expression |
433 | should always produce a numeric value. | ||
318 | 434 | ||
319 | local R, S, P, V, C, Ct, T = lpeg.R, lpeg.S, lpeg.P, lpeg.V, lpeg.C, lpeg.Ct, lpeg.T | 435 | In this example, we can see that it may be a tedious and error prone |
436 | task to build manually the recovery grammar `grec`. In the next example | ||
437 | we will show how to build the recovery grammar in a more automatic way. | ||
438 | |||
439 | ```lua | ||
440 | local m = require"lpeglabelrec" | ||
441 | local re = require"relabelrec" | ||
320 | 442 | ||
321 | local labels = { | 443 | local labels = { |
322 | {"NoExp", "no expression found"}, | 444 | {"ExpTermFirst", "expected an expression"}, |
323 | {"Extra", "extra characters found after the expression"}, | 445 | {"ExpTermOp", "expected a term after the operator"}, |
324 | {"ExpTerm", "expected a term after the operator"}, | ||
325 | {"ExpExp", "expected an expression after the parenthesis"}, | ||
326 | {"MisClose", "missing a closing ')' after the expression"}, | 446 | {"MisClose", "missing a closing ')' after the expression"}, |
327 | } | 447 | } |
328 | 448 | ||
329 | local function expect(patt, labname) | 449 | local function labelindex(labname) |
330 | for i, elem in ipairs(labels) do | 450 | for i, elem in ipairs(labels) do |
331 | if elem[1] == labname then | 451 | if elem[1] == labname then |
332 | return patt + T(i) | 452 | return i |
333 | end | 453 | end |
334 | end | 454 | end |
335 | |||
336 | error("could not find label: " .. labname) | 455 | error("could not find label: " .. labname) |
337 | end | 456 | end |
338 | 457 | ||
339 | local num = R("09")^1 / tonumber | 458 | local errors, subject |
340 | local op = S("+-*/") | 459 | |
460 | local function expect(patt, labname) | ||
461 | local i = labelindex(labname) | ||
462 | return patt + m.T(i) | ||
463 | end | ||
464 | |||
465 | |||
466 | local num = m.R("09")^1 / tonumber | ||
467 | local op = m.S("+-") | ||
341 | 468 | ||
342 | local function compute(tokens) | 469 | local function compute(tokens) |
343 | local result = tokens[1] | 470 | local result = tokens[1] |
@@ -346,10 +473,6 @@ local function compute(tokens) | |||
346 | result = result + tokens[i+1] | 473 | result = result + tokens[i+1] |
347 | elseif tokens[i] == '-' then | 474 | elseif tokens[i] == '-' then |
348 | result = result - tokens[i+1] | 475 | result = result - tokens[i+1] |
349 | elseif tokens[i] == '*' then | ||
350 | result = result * tokens[i+1] | ||
351 | elseif tokens[i] == '/' then | ||
352 | result = result / tokens[i+1] | ||
353 | else | 476 | else |
354 | error('unknown operation: ' .. tokens[i]) | 477 | error('unknown operation: ' .. tokens[i]) |
355 | end | 478 | end |
@@ -357,81 +480,223 @@ local function compute(tokens) | |||
357 | return result | 480 | return result |
358 | end | 481 | end |
359 | 482 | ||
360 | local g = P { | 483 | local g = m.P { |
361 | "Exp", | 484 | "Exp", |
362 | Exp = Ct(V"Term" * (C(op) * expect(V"Term", "ExpTerm"))^0) / compute; | 485 | Exp = m.Ct(m.V"OperandFirst" * (m.C(op) * m.V"Operand")^0) / compute, |
363 | Term = num + V"Group"; | 486 | OperandFirst = expect(m.V"Term", "ExpTermFirst"), |
364 | Group = "(" * expect(V"Exp", "ExpExp") * expect(")", "MisClose"); | 487 | Operand = expect(m.V"Term", "ExpTermOp"), |
488 | Term = num + m.V"Group", | ||
489 | Group = "(" * m.V"Exp" * expect(")", "MisClose"), | ||
365 | } | 490 | } |
366 | 491 | ||
367 | g = expect(g, "NoExp") * expect(-P(1), "Extra") | 492 | function recorderror(pos, lab) |
493 | local line, col = re.calcline(subject, pos) | ||
494 | table.insert(errors, { line = line, col = col, msg = labels[lab][2] }) | ||
495 | end | ||
368 | 496 | ||
497 | function record (labname) | ||
498 | return (m.Cp() * m.Cc(labelindex(labname))) / recorderror | ||
499 | end | ||
500 | |||
501 | function sync (p) | ||
502 | return (-p * m.P(1))^0 | ||
503 | end | ||
504 | |||
505 | function defaultValue (p) | ||
506 | return p or m.Cc(1000) | ||
507 | end | ||
508 | |||
509 | local grec = m.P { | ||
510 | "S", | ||
511 | S = m.Rec(m.V"A", m.V"ErrExpTermFirst", labelindex("ExpTermFirst")), | ||
512 | A = m.Rec(m.V"Sg", m.V"ErrExpTermOp", labelindex("ExpTermOp")), | ||
513 | Sg = m.Rec(g, m.V"ErrMisClose", labelindex("MisClose")), | ||
514 | ErrExpTermFirst = record("ExpTermFirst") * sync(op + ")") * defaultValue(), | ||
515 | ErrExpTermOp = record("ExpTermOp") * sync(op + ")") * defaultValue(), | ||
516 | ErrMisClose = record("MisClose") * sync(m.P")") * defaultValue(m.P""), | ||
517 | } | ||
518 | |||
369 | local function eval(input) | 519 | local function eval(input) |
370 | local result, label, suffix = g:match(input) | 520 | errors = {} |
371 | if result ~= nil then | 521 | io.write("Input: ", input, "\n") |
372 | return result | 522 | subject = input |
373 | else | 523 | local result, label, suffix = grec:match(input) |
374 | local pos = input:len() - suffix:len() + 1 | 524 | io.write("Syntactic errors found: " .. #errors, "\n") |
375 | local msg = labels[label][2] | 525 | if #errors > 0 then |
376 | return nil, "syntax error: " .. msg .. " (at index " .. pos .. ")" | 526 | local out = {} |
527 | for i, err in ipairs(errors) do | ||
528 | local pos = err.col | ||
529 | local msg = err.msg | ||
530 | table.insert(out, "syntax error: " .. msg .. " (at index " .. pos .. ")") | ||
531 | end | ||
532 | print(table.concat(out, "\n")) | ||
377 | end | 533 | end |
534 | io.write("Result = ") | ||
535 | return result | ||
378 | end | 536 | end |
379 | 537 | ||
380 | print(eval "98-76*(54/32)") | 538 | print(eval "90-70-(5)+3") |
381 | --> 37.125 | 539 | -- Syntactic errors found: 0 |
540 | -- Result = 18 | ||
541 | |||
542 | print(eval "15+") | ||
543 | -- Syntactic errors found: 1 | ||
544 | -- syntax error: expected a term after the operator (at index 3) | ||
545 | -- Result = 1015 | ||
546 | |||
547 | print(eval "-2") | ||
548 | -- Syntactic errors found: 1 | ||
549 | -- syntax error: expected an expression (at index 1) | ||
550 | -- Result = 998 | ||
551 | |||
552 | print(eval "1+()+") | ||
553 | -- Syntactic errors found: 2 | ||
554 | -- syntax error: expected an expression (at index 4) | ||
555 | -- syntax error: expected a term after the operator (at index 5) | ||
556 | -- Result = 2001 | ||
557 | |||
558 | print(eval "1+(") | ||
559 | -- Syntactic errors found: 2 | ||
560 | -- syntax error: expected an expression (at index 3) | ||
561 | -- syntax error: missing a closing ')' after the expression (at index 3) | ||
562 | -- Result = 1001 | ||
563 | |||
564 | print(eval "3)") | ||
565 | -- Syntactic errors found: 0 | ||
566 | -- Result = 3 | ||
567 | ``` | ||
382 | 568 | ||
383 | print(eval "(1+1-1*2/2") | 569 | #### Automatically Building the Recovery Grammar |
384 | --> syntax error: missing a closing ')' after the expression (at index 11) | ||
385 | 570 | ||
386 | print(eval "(1+)-1*(2/2)") | 571 | Below we rewrite the previous example to automatically |
387 | --> syntax error: expected a term after the operator (at index 4) | 572 | build the recovery grammar based on information provided |
573 | by the user for each label (error message, recovery pattern, etc). | ||
574 | In the example below we also throw an error when the grammar | ||
575 | does not match the whole subject. | ||
388 | 576 | ||
389 | print(eval "(1+1)-1*(/2)") | 577 | ```lua |
390 | --> syntax error: expected an expression after the parenthesis (at index 10) | 578 | local m = require"lpeglabelrec" |
579 | local re = require"relabelrec" | ||
391 | 580 | ||
392 | print(eval "1+(1-(1*2))/2x") | 581 | local num = m.R("09")^1 / tonumber |
393 | --> syntax error: extra chracters found after the expression (at index 14) | 582 | local op = m.S("+-") |
394 | 583 | ||
395 | print(eval "-1+(1-(1*2))/2") | 584 | local labels = {} |
396 | --> syntax error: no expression found (at index 1) | 585 | local nlabels = 0 |
397 | ``` | ||
398 | 586 | ||
399 | #### Catching labels | 587 | local function newError(lab, msg, psync, pcap) |
588 | nlabels = nlabels + 1 | ||
589 | psync = psync or m.P(-1) | ||
590 | pcap = pcap or m.P"" | ||
591 | labels[lab] = { id = nlabels, msg = msg, psync = psync, pcap = pcap } | ||
592 | end | ||
400 | 593 | ||
401 | When a label is thrown, the grammar itself can handle this label | 594 | newError("ExpTermFirst", "expected an expression", op + ")", m.Cc(1000)) |
402 | by using the labeled ordered choice. Below we rewrite the example | 595 | newError("ExpTermOp", "expected a term after the operator", op + ")", m.Cc(1000)) |
403 | of the list of identifiers to show this feature: | 596 | newError("MisClose", "missing a closing ')' after the expression", m.P")") |
597 | newError("Extra", "extra characters found after the expression") | ||
404 | 598 | ||
599 | local errors, subject | ||
405 | 600 | ||
406 | ```lua | 601 | local function expect(patt, labname) |
407 | local m = require'lpeglabel' | 602 | local i = labels[labname].id |
603 | return patt + m.T(i) | ||
604 | end | ||
408 | 605 | ||
409 | local terror = {} | 606 | local function compute(tokens) |
607 | local result = tokens[1] | ||
608 | for i = 2, #tokens, 2 do | ||
609 | if tokens[i] == '+' then | ||
610 | result = result + tokens[i+1] | ||
611 | elseif tokens[i] == '-' then | ||
612 | result = result - tokens[i+1] | ||
613 | else | ||
614 | error('unknown operation: ' .. tokens[i]) | ||
615 | end | ||
616 | end | ||
617 | return result | ||
618 | end | ||
410 | 619 | ||
411 | local function newError(s) | 620 | local g = m.P { |
412 | table.insert(terror, s) | 621 | "Exp", |
413 | return #terror | 622 | Exp = m.Ct(m.V"OperandFirst" * (m.C(op) * m.V"Operand")^0) / compute, |
623 | OperandFirst = expect(m.V"Term", "ExpTermFirst"), | ||
624 | Operand = expect(m.V"Term", "ExpTermOp"), | ||
625 | Term = num + m.V"Group", | ||
626 | Group = "(" * m.V"Exp" * expect(")", "MisClose"), | ||
627 | } | ||
628 | |||
629 | function recorderror(pos, lab) | ||
630 | local line, col = re.calcline(subject, pos) | ||
631 | table.insert(errors, { line = line, col = col, msg = labels[lab].msg }) | ||
414 | end | 632 | end |
415 | 633 | ||
416 | local errUndef = newError("undefined") | 634 | function record (labname) |
417 | local errId = newError("expecting an identifier") | 635 | return (m.Cp() * m.Cc(labname)) / recorderror |
418 | local errComma = newError("expecting ','") | 636 | end |
419 | 637 | ||
420 | local g = m.P{ | 638 | function sync (p) |
421 | "S", | 639 | return (-p * m.P(1))^0 |
422 | S = m.Lc(m.Lc(m.V"Id" * m.V"List", m.V"ErrId", errId), | 640 | end |
423 | m.V"ErrComma", errComma), | 641 | |
424 | List = -m.P(1) + (m.V"Comma" + m.T(errComma)) * (m.V"Id" + m.T(errId)) * m.V"List", | 642 | function defaultValue (p) |
425 | Id = m.V"Sp" * m.R'az'^1, | 643 | return p or m.Cc(1000) |
426 | Comma = m.V"Sp" * ",", | 644 | end |
427 | Sp = m.S" \n\t"^0, | 645 | |
428 | ErrId = m.Cc(errId) / terror, | 646 | local grec = g * expect(m.P(-1), "Extra") |
429 | ErrComma = m.Cc(errComma) / terror | 647 | for k, v in pairs(labels) do |
430 | } | 648 | grec = m.Rec(grec, record(k) * sync(v.psync) * v.pcap, v.id) |
649 | end | ||
650 | |||
651 | local function eval(input) | ||
652 | errors = {} | ||
653 | io.write("Input: ", input, "\n") | ||
654 | subject = input | ||
655 | local result, label, suffix = grec:match(input) | ||
656 | io.write("Syntactic errors found: " .. #errors, "\n") | ||
657 | if #errors > 0 then | ||
658 | local out = {} | ||
659 | for i, err in ipairs(errors) do | ||
660 | local pos = err.col | ||
661 | local msg = err.msg | ||
662 | table.insert(out, "syntax error: " .. msg .. " (at index " .. pos .. ")") | ||
663 | end | ||
664 | print(table.concat(out, "\n")) | ||
665 | end | ||
666 | io.write("Result = ") | ||
667 | return result | ||
668 | end | ||
431 | 669 | ||
432 | print(m.match(g, "one,two")) --> 8 | 670 | print(eval "90-70-(5)+3") |
433 | print(m.match(g, "one two")) --> expecting ',' | 671 | -- Syntactic errors found: 0 |
434 | print(m.match(g, "one,\n two,\nthree,")) --> expecting an identifier | 672 | -- Result = 18 |
673 | |||
674 | print(eval "15+") | ||
675 | -- Syntactic errors found: 1 | ||
676 | -- syntax error: expected a term after the operator (at index 3) | ||
677 | -- Result = 1015 | ||
678 | |||
679 | print(eval "-2") | ||
680 | -- Syntactic errors found: 1 | ||
681 | -- syntax error: expected an expression (at index 1) | ||
682 | -- Result = 998 | ||
683 | |||
684 | print(eval "1+()+") | ||
685 | -- Syntactic errors found: 2 | ||
686 | -- syntax error: expected an expression (at index 4) | ||
687 | -- syntax error: expected a term after the operator (at index 5) | ||
688 | -- Result = 2001 | ||
689 | |||
690 | print(eval "1+(") | ||
691 | -- Syntactic errors found: 2 | ||
692 | -- syntax error: expected an expression (at index 3) | ||
693 | -- syntax error: missing a closing ')' after the expression (at index 3) | ||
694 | -- Result = 1001 | ||
695 | |||
696 | print(eval "3)") | ||
697 | -- Syntactic errors found: 1 | ||
698 | -- syntax error: extra characters found after the expression (at index 2) | ||
699 | -- Result = 3 | ||
435 | ``` | 700 | ``` |
436 | 701 | ||
437 | #### Error Recovery | 702 | #### Error Recovery |