aboutsummaryrefslogtreecommitdiff
path: root/README.md
diff options
context:
space:
mode:
authorSergio Queiroz <sqmedeiros@gmail.com>2016-12-13 13:53:49 -0300
committerSergio Queiroz <sqmedeiros@gmail.com>2016-12-13 13:53:49 -0300
commit09fab0decb7df93528ab40fcfd99587e9074c64f (patch)
treeecd7a763c7a08712f122945bb5ce1ed7d7e5f077 /README.md
parentd80821d79376671371c15ded562fbe1a9bebc635 (diff)
parent1322d612d72ac658f2aa443dca94954b819c0993 (diff)
downloadlpeglabel-09fab0decb7df93528ab40fcfd99587e9074c64f.tar.gz
lpeglabel-09fab0decb7df93528ab40fcfd99587e9074c64f.tar.bz2
lpeglabel-09fab0decb7df93528ab40fcfd99587e9074c64f.zip
Merge branch 'recoveryresume'
Diffstat (limited to 'README.md')
-rw-r--r--README.md665
1 files changed, 465 insertions, 200 deletions
diff --git a/README.md b/README.md
index 1f1bdff..9484b3d 100644
--- a/README.md
+++ b/README.md
@@ -10,47 +10,46 @@ LPegLabel is a conservative extension of the
10[LPeg](http://www.inf.puc-rio.br/~roberto/lpeg) 10[LPeg](http://www.inf.puc-rio.br/~roberto/lpeg)
11library that provides an implementation of Parsing 11library that provides an implementation of Parsing
12Expression Grammars (PEGs) with labeled failures. 12Expression Grammars (PEGs) with labeled failures.
13Labels can be used to signal different kinds of erros 13Labels can be used to signal different kinds of errors
14and to specify which alternative in a labeled ordered 14and to specify which recovery pattern should handle a
15choice should handle a given label. Labels can also be 15given label. Labels can also be combined with the standard
16combined with the standard patterns of LPeg. 16patterns of LPeg.
17 17
18This document describes the new functions available 18This document describes the new functions available
19in LpegLabel and presents some examples of usage. 19in LpegLabel and presents some examples of usage.
20For a more detailed discussion about PEGs with labeled failures
21please see [A Parsing Machine for Parsing Expression
22Grammars with Labeled Failures](https://docs.google.com/viewer?a=v&pid=sites&srcid=ZGVmYXVsdGRvbWFpbnxzcW1lZGVpcm9zfGd4OjMzZmE3YzM0Y2E2MGM5Y2M).
23
24 20
25In LPegLabel, the result of an unsuccessful matching 21In LPegLabel, the result of an unsuccessful matching
26is a triple **nil, lab, sfail**, where **lab** 22is a triple **nil, lab, sfail**, where **lab**
27is the label associated with the failure, and 23is the label associated with the failure, and
28**sfail** is the suffix input being matched when 24**sfail** is the suffix input being matched when
29**lab** was thrown. Below there is a brief summary 25**lab** was thrown.
30of the new functions provided by LpegLabel: 26
27With labeled failures it is possible to distinguish
28between a regular failure and an error. Usually, a
29regular failure is produced when the matching of a
30character fails, and it is caught by an ordered choice.
31An error, by its turn, is produced by the throw operator
32and may be caught by the recovery operator.
33
34Below there is a brief summary of the new functions provided by LpegLabel:
31 35
32<table border="1"> 36<table border="1">
33<tbody><tr><td><b>Function</b></td><td><b>Description</b></td></tr> 37<tbody><tr><td><b>Function</b></td><td><b>Description</b></td></tr>
34<tr><td><a href="#f-t"><code>lpeglabel.T (l)</code></a></td> 38<tr><td><a href="#f-t"><code>lpeglabelrec.T (l)</code></a></td>
35 <td>Throws label <code>l</code></td></tr> 39 <td>Throws a label <code>l</code> to signal an error</td></tr>
36<tr><td><a href="#f-lc"><code>lpeglabel.Lc (p1, p2, l1, ..., ln)</code></a></td> 40<tr><td><a href="#f-rec"><code>lpeglabelrec.Rec (p1, p2, l1 [, l2, ..., ln])</code></a></td>
37 <td>Matches <code>p1</code> and tries to match <code>p2</code> 41 <td>Specifies a recovery pattern <code>p2</code> for <code>p1</code>,
38 if the matching of <code>p1</code> gives one of l<sub>1</sub>, ..., l<sub>n</sub> 42 when the matching of <code>p1</code> gives one of the labels l1, ..., ln.</td></tr>
39 </td></tr>
40<tr><td><a href="#f-rec"><code>lpeglabel.Rec (p1, p2 [, l1, ..., ln])</code></a></td>
41 <td>Like <code>Lc</code> but does not reset the position of the parser
42 when trying <code>p2</code>. By default, it catches regular PEG failures
43 </td></tr>
44<tr><td><a href="#re-t"><code>%{l}</code></a></td> 43<tr><td><a href="#re-t"><code>%{l}</code></a></td>
45 <td>Syntax of <em>relabel</em> module. Equivalent to <code>lpeg.T(l)</code> 44 <td>Syntax of <em>relabelrec</em> module. Equivalent to <code>lpeglabelrec.T(l)</code>
46 </td></tr> 45 </td></tr>
47<tr><td><a href="#re-lc"><code>p1 /{l1, ..., ln} p2</code></a></td> 46<tr><td><a href="#re-rec"><code>p1 //{l1 [, l2, ..., ln} p2</code></a></td>
48 <td>Syntax of <em>relabel</em> module. Equivalent to <code>lpeg.Lc(p1, p2, l1, ..., ln)</code> 47 <td>Syntax of <em>relabelrec</em> module. Equivalent to <code>lpeglabelrec.Rec(p1, p2, l1, ..., ln)</code>
49 </td></tr> 48 </td></tr>
50<tr><td><a href="#re-line"><code>relabel.calcline(subject, i)</code></a></td> 49<tr><td><a href="#re-line"><code>relabelrec.calcline(subject, i)</code></a></td>
51 <td>Calculates line and column information regarding position <i>i</i> of the subject</code> 50 <td>Calculates line and column information regarding position <i>i</i> of the subject</code>
52 </td></tr> 51 </td></tr>
53<tr><td><a href="#re-setl"><code>relabel.setlabels (tlabel)</code></a></td> 52<tr><td><a href="#re-setl"><code>relabelrec.setlabels (tlabel)</code></a></td>
54 <td>Allows to specicify a table with mnemonic labels. 53 <td>Allows to specicify a table with mnemonic labels.
55 </td></tr> 54 </td></tr>
56</tbody></table> 55</tbody></table>
@@ -59,28 +58,20 @@ of the new functions provided by LpegLabel:
59### Functions 58### Functions
60 59
61 60
62#### <a name="f-t"></a><code>lpeglabel.T(l)</code> 61#### <a name="f-t"></a><code>lpeglabelrec.T(l)</code>
63 62
64 63
65Returns a pattern that throws the label `l`. 64Returns a pattern that throws the label `l`.
66A label must be an integer between 0 and 255. 65A label must be an integer between 1 and 255.
67 66
68The label 0 is equivalent to the regular failure of PEGs.
69 67
68#### <a name="f-rec"></a><code>lpeglabelrec.Rec(p1, p2, l1, ..., ln)</code>
70 69
71#### <a name="f-lc"></a><code>lpeglabel.Lc(p1, p2, l1, ..., ln)</code> 70Returns a *recovery pattern*.
72
73Returns a pattern equivalent to a *labeled ordered choice*.
74If the matching of `p1` gives one of the labels `l1, ..., ln`, 71If the matching of `p1` gives one of the labels `l1, ..., ln`,
75then the matching of `p2` is tried from the same position. Otherwise, 72then the matching of `p2` is tried from the failure position of `p1`.
76the result of the matching of `p1` is the pattern's result. 73Otherwise, the result of the matching of `p1` is the pattern's result.
77
78The labeled ordered choice `lpeg.Lc(p1, p2, 0)` is equivalent to the
79regular ordered choice `p1 / p2`.
80 74
81Although PEG's ordered choice is associative, the labeled ordered choice is not.
82When using this function, the user should take care to build a left-associative
83labeled ordered choice pattern.
84 75
85 76
86#### <a name="f-rec"></a><code>lpeglabel.Rec(p1, p2 [, l1, ..., ln])</code> 77#### <a name="f-rec"></a><code>lpeglabel.Rec(p1, p2 [, l1, ..., ln])</code>
@@ -94,29 +85,26 @@ i.e. `lpeg.Rec(p1, p2)` is equivalent to `lpeg.Rec(p1, p2, 0)`.
94 85
95#### <a name="re-t"></a><code>%{l}</code> 86#### <a name="re-t"></a><code>%{l}</code>
96 87
97Syntax of *relabel* module. Equivalent to `lpeg.T(l)`. 88Syntax of *relabelrec* module. Equivalent to `lpeg.T(l)`.
98 89
99 90
100#### <a name="re-lc"></a><code>p1 /{l1, ..., ln} p2</code> 91#### <a name="re-lc"></a><code>p1 //{l1, ..., ln} p2</code>
101 92
102Syntax of *relabel* module. Equivalent to `lpeg.Lc(p1, p2, l1, ..., ln)`. 93Syntax of *relabelrec* module. Equivalent to `lpeglabelrec.Rec(p1, p2, l1, ..., ln)`.
103 94
104The `/{}` operator is left-associative. 95The `//{}` operator is left-associative.
105 96
106A grammar can use both choice operators (`/` and `/{}`),
107but a single choice can not mix them. That is, the parser of `relabel`
108module will not recognize a pattern as `p1 / p2 /{l1} p3`.
109 97
110 98
111#### <a name="re-line"></a><code>relabel.calcline (subject, i)</code> 99#### <a name="re-line"></a><code>relabelrec.calcline (subject, i)</code>
112 100
113Returns line and column information regarding position <i>i</i> of the subject. 101Returns line and column information regarding position <i>i</i> of the subject.
114 102
115 103
116#### <a name="re-setl"></a><code>relabel.setlabels (tlabel)</code> 104#### <a name="re-setl"></a><code>relabelrec.setlabels (tlabel)</code>
117 105
118Allows to specicify a table with labels. They keys of 106Allows to specicify a table with labels. They keys of
119`tlabel` must be integers between 0 and 255, 107`tlabel` must be integers between 1 and 255,
120and the associated values should be strings. 108and the associated values should be strings.
121 109
122 110
@@ -132,16 +120,31 @@ in the *examples* directory.
132The following example defines a grammar that matches 120The following example defines a grammar that matches
133a list of identifiers separated by commas. A label 121a list of identifiers separated by commas. A label
134is thrown when there is an error matching an identifier 122is thrown when there is an error matching an identifier
135or a comma: 123or a comma.
124
125We use function `newError` to store error messages in a
126table and to return the index associated with each error message.
127
136 128
137```lua 129```lua
138local m = require'lpeglabel' 130local m = require'lpeglabelrec'
139local re = require'relabel' 131local re = require'relabelrec'
132
133local terror = {}
134
135local function newError(s)
136 table.insert(terror, s)
137 return #terror
138end
139
140local errUndef = newError("undefined")
141local errId = newError("expecting an identifier")
142local errComma = newError("expecting ','")
140 143
141local g = m.P{ 144local g = m.P{
142 "S", 145 "S",
143 S = m.V"Id" * m.V"List", 146 S = m.V"Id" * m.V"List",
144 List = -m.P(1) + (m.V"Comma" + m.T(2)) * (m.V"Id" + m.T(1)) * m.V"List", 147 List = -m.P(1) + (m.V"Comma" + m.T(errComma)) * (m.V"Id" + m.T(errId)) * m.V"List",
145 Id = m.V"Sp" * m.R'az'^1, 148 Id = m.V"Sp" * m.R'az'^1,
146 Comma = m.V"Sp" * ",", 149 Comma = m.V"Sp" * ",",
147 Sp = m.S" \n\t"^0, 150 Sp = m.S" \n\t"^0,
@@ -151,18 +154,12 @@ function mymatch (g, s)
151 local r, e, sfail = g:match(s) 154 local r, e, sfail = g:match(s)
152 if not r then 155 if not r then
153 local line, col = re.calcline(s, #s - #sfail) 156 local line, col = re.calcline(s, #s - #sfail)
154 local msg = "Error at line " .. line .. " (col " .. col .. ")" 157 local msg = "Error at line " .. line .. " (col " .. col .. "): "
155 if e == 1 then 158 return r, msg .. terror[e] .. " before '" .. sfail .. "'"
156 return r, msg .. ": expecting an identifier before '" .. sfail .. "'"
157 elseif e == 2 then
158 return r, msg .. ": expecting ',' before '" .. sfail .. "'"
159 else
160 return r, msg
161 end
162 end 159 end
163 return r 160 return r
164end 161end
165 162
166print(mymatch(g, "one,two")) --> 8 163print(mymatch(g, "one,two")) --> 8
167print(mymatch(g, "one two")) --> nil Error at line 1 (col 3): expecting ',' before ' two' 164print(mymatch(g, "one two")) --> nil Error at line 1 (col 3): expecting ',' before ' two'
168print(mymatch(g, "one,\n two,\nthree,")) --> nil Error at line 3 (col 6): expecting an identifier before '' 165print(mymatch(g, "one,\n two,\nthree,")) --> nil Error at line 3 (col 6): expecting an identifier before ''
@@ -170,23 +167,73 @@ print(mymatch(g, "one,\n two,\nthree,")) --> nil Error at line 3 (col 6): expec
170 167
171In this example we could think about writing rule <em>List</em> as follows: 168In this example we could think about writing rule <em>List</em> as follows:
172```lua 169```lua
173List = ((m.V"Comma" + m.T(2)) * (m.V"Id" + m.T(1)))^0, 170List = ((m.V"Comma" + m.T(errComma)) * (m.V"Id" + m.T(errId)))^0,
174``` 171```
175 172
176but when matching this expression agains the end of input 173but when matching this expression against the end of input
177we would get a failure whose associated label would be **2**, 174we would get a failure whose associated label would be **errComma**,
178and this would cause the failure of the *whole* repetition. 175and this would cause the failure of the *whole* repetition.
179
180 176
181##### Mnemonics instead of numbers
182 177
183In the previous example we could have created a table
184with the error messages to improve the readbility of the PEG.
185Below we rewrite the previous grammar following this approach:
186 178
179#### Error Recovery
180
181By using the `Rec` function we can specify a recovery pattern that
182should be matched when a label is thrown. After matching the recovery
183pattern, and possibly recording the error, the parser will resume
184the <em>regular</em> matching. For example, in the example below
185we expect to match rule `A`, but when a failure occur the label 42
186is thrown and then we will try to match the recovery pattern `recp`:
187```lua 187```lua
188local m = require'lpeglabel' 188local m = require'lpeglabelrec'
189local re = require'relabel' 189
190local recp = m.P"oast"
191
192local g = m.P{
193 "S",
194 S = m.Rec(m.V"A", recp, 42) * ".",
195 A = m.P"t" * (m.P"est" + m.T(42))
196}
197
198print(g:match("test.")) --> 6
199print(g:match("toast.")) --> 7
200print(g:match("oast.")) --> nil 0 oast.
201print(g:match("toward.")) --> nil 0 ward.
202```
203When trying to match subject 'toast.', in rule `A` the first
204't' is matched, then the matching of `m.P"est"` fails and label 42
205is thrown, with the associated inpux suffix 'oast.'. In rule
206`S` label 42 is caught and the recovery pattern matches 'oast',
207so pattern `'.'` matches the rest of the input.
208
209When matching subject 'oast.', pattern `m.P"t"` fails, and
210the result of the matching is <b>nil, 0, oast.</b>.
211
212When matching 'toward.', label 42 is thrown after matching 't',
213with the associated input suffix 'oward.'. As the matching of the
214recovery pattern fails, the result is <b>nil, 0, ward.</b>.
215
216Usually, the recovery pattern is an expression that does not fail.
217In the previous example, we could have used `(m.P(1) - m.P".")^0`
218as the recovery pattern.
219
220Below we rewrite the grammar that describes a list of identifiers
221to use a recovery strategy. Grammar `g` remains the same, but we add a
222recovery grammar `grec` that handles the labels thrown by `g`.
223
224In grammar `grec` we use functions `record` and `sync`.
225Function `record`, plus function `recorderror`, will help
226us to save the input position where a label was thrown,
227while function `sync` will give us a synchronization pattern,
228that consumes the input while is not possible to match a given
229pattern `p`.
230
231When the matching of an identifier fails, a defaul value ('NONE')
232is provided.
233
234```lua
235local m = require'lpeglabelrec'
236local re = require'relabelrec'
190 237
191local terror = {} 238local terror = {}
192 239
@@ -199,73 +246,88 @@ local errUndef = newError("undefined")
199local errId = newError("expecting an identifier") 246local errId = newError("expecting an identifier")
200local errComma = newError("expecting ','") 247local errComma = newError("expecting ','")
201 248
249local id = m.R'az'^1
250
202local g = m.P{ 251local g = m.P{
203 "S", 252 "S",
204 S = m.V"Id" * m.V"List", 253 S = m.V"Id" * m.V"List",
205 List = -m.P(1) + (m.V"Comma" + m.T(errComma)) * (m.V"Id" + m.T(errId)) * m.V"List", 254 List = -m.P(1) + m.V"Comma" * m.V"Id" * m.V"List",
206 Id = m.V"Sp" * m.R'az'^1, 255 Id = m.V"Sp" * id + m.T(errId),
207 Comma = m.V"Sp" * ",", 256 Comma = m.V"Sp" * "," + m.T(errComma),
208 Sp = m.S" \n\t"^0, 257 Sp = m.S" \n\t"^0,
209} 258}
210 259
211function mymatch (g, s) 260local subject, errors
212 local r, e, sfail = g:match(s)
213 if not r then
214 local line, col = re.calcline(s, #s - #sfail)
215 local msg = "Error at line " .. line .. " (col " .. col .. "): "
216 return r, msg .. terror[e] .. " before '" .. sfail .. "'"
217 end
218 return r
219end
220
221print(mymatch(g, "one,two")) --> 8
222print(mymatch(g, "one two")) --> nil Error at line 1 (col 3): expecting ',' before ' two'
223print(mymatch(g, "one,\n two,\nthree,")) --> nil Error at line 3 (col 6): expecting an identifier before ''
224```
225 261
262function recorderror(pos, lab)
263 local line, col = re.calcline(subject, pos)
264 table.insert(errors, { line = line, col = col, msg = terror[lab] })
265end
226 266
227##### *relabel* syntax 267function record (lab)
268 return (m.Cp() * m.Cc(lab)) / recorderror
269end
228 270
229Now we rewrite the previous example using the syntax 271function sync (p)
230supported by *relabel*: 272 return (-p * m.P(1))^0
273end
231 274
232```lua 275local grec = m.P{
233local re = require 'relabel' 276 "S",
277 S = m.Rec(m.Rec(g, m.V"ErrComma", errComma), m.V"ErrId", errId),
278 ErrComma = record(errComma) * sync(id),
279 ErrId = record(errId) * sync(m.P",")
280}
234 281
235local g = re.compile[[
236 S <- Id List
237 List <- !. / (',' / %{2}) (Id / %{1}) List
238 Id <- Sp [a-z]+
239 Comma <- Sp ','
240 Sp <- %s*
241]]
242 282
243function mymatch (g, s) 283function mymatch (g, s)
284 errors = {}
285 subject = s
244 local r, e, sfail = g:match(s) 286 local r, e, sfail = g:match(s)
245 if not r then 287 if #errors > 0 then
246 local line, col = re.calcline(s, #s - #sfail) 288 local out = {}
247 local msg = "Error at line " .. line .. " (col " .. col .. ")" 289 for i, err in ipairs(errors) do
248 if e == 1 then 290 local msg = "Error at line " .. err.line .. " (col " .. err.col .. "): " .. err.msg
249 return r, msg .. ": expecting an identifier before '" .. sfail .. "'" 291 table.insert(out, msg)
250 elseif e == 2 then
251 return r, msg .. ": expecting ',' before '" .. sfail .. "'"
252 else
253 return r, msg
254 end 292 end
293 return nil, table.concat(out, "\n") .. "\n"
255 end 294 end
256 return r 295 return r
257end 296end
258 297
259print(mymatch(g, "one,two")) --> 8 298print(mymatch(grec, "one,two"))
260print(mymatch(g, "one two")) --> nil Error at line 1 (col 3): expecting ',' before ' two' 299-- Captures (separated by ';'): one; two;
261print(mymatch(g, "one,\n two,\nthree,")) --> nil Error at line 3 (col 6): expecting an identifier before '' 300-- Syntactic errors found: 0
301
302print(mymatch(grec, "one two three"))
303-- Captures (separated by ';'): one; two; three;
304-- Syntactic errors found: 2
305-- Error at line 1 (col 4): expecting ','
306-- Error at line 1 (col 8): expecting ','
307
308print(mymatch(grec, "1,\n two, \n3,"))
309-- Captures (separated by ';'): NONE; two; NONE; NONE;
310-- Syntactic errors found: 3
311-- Error at line 1 (col 1): expecting an identifier
312-- Error at line 2 (col 6): expecting an identifier
313-- Error at line 3 (col 2): expecting an identifier
314
315print(mymatch(grec, "one\n two123, \nthree,"))
316-- Captures (separated by ';'): one; two; three; NONE;
317-- Syntactic errors found: 3
318-- Error at line 2 (col 1): expecting ','
319-- Error at line 2 (col 5): expecting ','
320-- Error at line 3 (col 6): expecting an identifier
262``` 321```
263 322
264With the help of function *setlabels* we can also rewrite the previous example to use 323##### *relabelrec* syntax
265mnemonic labels instead of plain numbers: 324
325Below we describe again a grammar that matches a list of identifiers,
326now using the syntax supported by *relabelrec*, where `//{}` is the
327recovery operator, and `%{}` is the throw operator:
266 328
267```lua 329```lua
268local re = require 'relabel' 330local re = require 'relabelrec'
269 331
270local errinfo = { 332local errinfo = {
271 {"errUndef", "undefined"}, 333 {"errUndef", "undefined"},
@@ -285,59 +347,124 @@ re.setlabels(labels)
285 347
286local g = re.compile[[ 348local g = re.compile[[
287 S <- Id List 349 S <- Id List
288 List <- !. / (',' / %{errComma}) (Id / %{errId}) List 350 List <- !. / Comma Id List
289 Id <- Sp [a-z]+ 351 Id <- Sp {[a-z]+} / %{errId}
290 Comma <- Sp ',' 352 Comma <- Sp ',' / %{errComma}
291 Sp <- %s* 353 Sp <- %s*
292]] 354]]
293 355
356local errors
357
358function recorderror (subject, pos, label)
359 local line, col = re.calcline(subject, pos)
360 table.insert(errors, { line = line, col = col, msg = errmsgs[labels[label]] })
361 return true
362end
363
364function sync (p)
365 return '( !(' .. p .. ') .)*'
366end
367
368local grec = re.compile(
369 "S <- %g //{errComma} ErrComma //{errId} ErrId" .. "\n" ..
370 "ErrComma <- ('' -> 'errComma' => recorderror) " .. sync('[a-z]+') .. "\n" ..
371 "ErrId <- ('' -> 'errId' => recorderror) " .. sync('","') .. "-> default"
372 , {g = g, recorderror = recorderror, default = "NONE"}
373)
374
294function mymatch (g, s) 375function mymatch (g, s)
295 local r, e, sfail = g:match(s) 376 errors = {}
296 if not r then 377 subject = s
297 local line, col = re.calcline(s, #s - #sfail) 378 io.write("Input: ", s, "\n")
298 local msg = "Error at line " .. line .. " (col " .. col .. "): " 379 local r = { g:match(s) }
299 return r, msg .. errmsgs[e] .. " before '" .. sfail .. "'" 380 io.write("Captures (separated by ';'): ")
381 for k, v in pairs(r) do
382 io.write(v .. "; ")
300 end 383 end
384 io.write("\nSyntactic errors found: " .. #errors)
385 if #errors > 0 then
386 io.write("\n")
387 local out = {}
388 for i, err in ipairs(errors) do
389 local msg = "Error at line " .. err.line .. " (col " .. err.col .. "): " .. err.msg
390 table.insert(out, msg)
391 end
392 io.write(table.concat(out, "\n"))
393 end
394 print("\n")
301 return r 395 return r
302end 396end
303 397
304print(mymatch(g, "one,two")) --> 8 398print(mymatch(grec, "one,two"))
305print(mymatch(g, "one two")) --> nil Error at line 1 (col 3): expecting ',' before ' two' 399-- Captures (separated by ';'): one; two;
306print(mymatch(g, "one,\n two,\nthree,")) --> nil Error at line 3 (col 6): expecting an identifier before '' 400-- Syntactic errors found: 0
401
402print(mymatch(grec, "one two three"))
403-- Captures (separated by ';'): one; two; three;
404-- Syntactic errors found: 2
405-- Error at line 1 (col 4): expecting ','
406-- Error at line 1 (col 8): expecting ','
407
408print(mymatch(grec, "1,\n two, \n3,"))
409-- Captures (separated by ';'): NONE; two; NONE; NONE;
410-- Syntactic errors found: 3
411-- Error at line 1 (col 1): expecting an identifier
412-- Error at line 2 (col 6): expecting an identifier
413-- Error at line 3 (col 2): expecting an identifier
414
415print(mymatch(grec, "one\n two123, \nthree,"))
416-- Captures (separated by ';'): one; two; three; NONE;
417-- Syntactic errors found: 3
418-- Error at line 2 (col 1): expecting ','
419-- Error at line 2 (col 5): expecting ','
420-- Error at line 3 (col 6): expecting an identifier
307``` 421```
308 422
423
309#### Arithmetic Expressions 424#### Arithmetic Expressions
310 425
311Here's an example of an LPegLabel grammar that make its own function called 426Here's an example of an LPegLabel grammar that matches an expression.
312'expect', which takes a pattern and a label as parameters and throws the label 427We have used a function `expect`, that takes a pattern `patt` and a label as
313if the pattern fails to be matched. This function can be extended later on to 428parameters and builds a new pattern that throws this label when `patt`
314record all errors encountered once error recovery is implemented. 429fails.
315 430
316```lua 431When a subexpression is syntactically invalid, a default value of 1000
317local lpeg = require"lpeglabel" 432is provided by the recovery pattern, so the evaluation of an expression
433should always produce a numeric value.
318 434
319local R, S, P, V, C, Ct, T = lpeg.R, lpeg.S, lpeg.P, lpeg.V, lpeg.C, lpeg.Ct, lpeg.T 435In this example, we can see that it may be a tedious and error prone
436task to build manually the recovery grammar `grec`. In the next example
437we will show how to build the recovery grammar in a more automatic way.
438
439```lua
440local m = require"lpeglabelrec"
441local re = require"relabelrec"
320 442
321local labels = { 443local labels = {
322 {"NoExp", "no expression found"}, 444 {"ExpTermFirst", "expected an expression"},
323 {"Extra", "extra characters found after the expression"}, 445 {"ExpTermOp", "expected a term after the operator"},
324 {"ExpTerm", "expected a term after the operator"},
325 {"ExpExp", "expected an expression after the parenthesis"},
326 {"MisClose", "missing a closing ')' after the expression"}, 446 {"MisClose", "missing a closing ')' after the expression"},
327} 447}
328 448
329local function expect(patt, labname) 449local function labelindex(labname)
330 for i, elem in ipairs(labels) do 450 for i, elem in ipairs(labels) do
331 if elem[1] == labname then 451 if elem[1] == labname then
332 return patt + T(i) 452 return i
333 end 453 end
334 end 454 end
335
336 error("could not find label: " .. labname) 455 error("could not find label: " .. labname)
337end 456end
338 457
339local num = R("09")^1 / tonumber 458local errors, subject
340local op = S("+-*/") 459
460local function expect(patt, labname)
461 local i = labelindex(labname)
462 return patt + m.T(i)
463end
464
465
466local num = m.R("09")^1 / tonumber
467local op = m.S("+-")
341 468
342local function compute(tokens) 469local function compute(tokens)
343 local result = tokens[1] 470 local result = tokens[1]
@@ -346,10 +473,6 @@ local function compute(tokens)
346 result = result + tokens[i+1] 473 result = result + tokens[i+1]
347 elseif tokens[i] == '-' then 474 elseif tokens[i] == '-' then
348 result = result - tokens[i+1] 475 result = result - tokens[i+1]
349 elseif tokens[i] == '*' then
350 result = result * tokens[i+1]
351 elseif tokens[i] == '/' then
352 result = result / tokens[i+1]
353 else 476 else
354 error('unknown operation: ' .. tokens[i]) 477 error('unknown operation: ' .. tokens[i])
355 end 478 end
@@ -357,81 +480,223 @@ local function compute(tokens)
357 return result 480 return result
358end 481end
359 482
360local g = P { 483local g = m.P {
361 "Exp", 484 "Exp",
362 Exp = Ct(V"Term" * (C(op) * expect(V"Term", "ExpTerm"))^0) / compute; 485 Exp = m.Ct(m.V"OperandFirst" * (m.C(op) * m.V"Operand")^0) / compute,
363 Term = num + V"Group"; 486 OperandFirst = expect(m.V"Term", "ExpTermFirst"),
364 Group = "(" * expect(V"Exp", "ExpExp") * expect(")", "MisClose"); 487 Operand = expect(m.V"Term", "ExpTermOp"),
488 Term = num + m.V"Group",
489 Group = "(" * m.V"Exp" * expect(")", "MisClose"),
365} 490}
366 491
367g = expect(g, "NoExp") * expect(-P(1), "Extra") 492function recorderror(pos, lab)
493 local line, col = re.calcline(subject, pos)
494 table.insert(errors, { line = line, col = col, msg = labels[lab][2] })
495end
368 496
497function record (labname)
498 return (m.Cp() * m.Cc(labelindex(labname))) / recorderror
499end
500
501function sync (p)
502 return (-p * m.P(1))^0
503end
504
505function defaultValue (p)
506 return p or m.Cc(1000)
507end
508
509local grec = m.P {
510 "S",
511 S = m.Rec(m.V"A", m.V"ErrExpTermFirst", labelindex("ExpTermFirst")),
512 A = m.Rec(m.V"Sg", m.V"ErrExpTermOp", labelindex("ExpTermOp")),
513 Sg = m.Rec(g, m.V"ErrMisClose", labelindex("MisClose")),
514 ErrExpTermFirst = record("ExpTermFirst") * sync(op + ")") * defaultValue(),
515 ErrExpTermOp = record("ExpTermOp") * sync(op + ")") * defaultValue(),
516 ErrMisClose = record("MisClose") * sync(m.P")") * defaultValue(m.P""),
517}
518
369local function eval(input) 519local function eval(input)
370 local result, label, suffix = g:match(input) 520 errors = {}
371 if result ~= nil then 521 io.write("Input: ", input, "\n")
372 return result 522 subject = input
373 else 523 local result, label, suffix = grec:match(input)
374 local pos = input:len() - suffix:len() + 1 524 io.write("Syntactic errors found: " .. #errors, "\n")
375 local msg = labels[label][2] 525 if #errors > 0 then
376 return nil, "syntax error: " .. msg .. " (at index " .. pos .. ")" 526 local out = {}
527 for i, err in ipairs(errors) do
528 local pos = err.col
529 local msg = err.msg
530 table.insert(out, "syntax error: " .. msg .. " (at index " .. pos .. ")")
531 end
532 print(table.concat(out, "\n"))
377 end 533 end
534 io.write("Result = ")
535 return result
378end 536end
379 537
380print(eval "98-76*(54/32)") 538print(eval "90-70-(5)+3")
381--> 37.125 539-- Syntactic errors found: 0
540-- Result = 18
541
542print(eval "15+")
543-- Syntactic errors found: 1
544-- syntax error: expected a term after the operator (at index 3)
545-- Result = 1015
546
547print(eval "-2")
548-- Syntactic errors found: 1
549-- syntax error: expected an expression (at index 1)
550-- Result = 998
551
552print(eval "1+()+")
553-- Syntactic errors found: 2
554-- syntax error: expected an expression (at index 4)
555-- syntax error: expected a term after the operator (at index 5)
556-- Result = 2001
557
558print(eval "1+(")
559-- Syntactic errors found: 2
560-- syntax error: expected an expression (at index 3)
561-- syntax error: missing a closing ')' after the expression (at index 3)
562-- Result = 1001
563
564print(eval "3)")
565-- Syntactic errors found: 0
566-- Result = 3
567```
382 568
383print(eval "(1+1-1*2/2") 569#### Automatically Building the Recovery Grammar
384--> syntax error: missing a closing ')' after the expression (at index 11)
385 570
386print(eval "(1+)-1*(2/2)") 571Below we rewrite the previous example to automatically
387--> syntax error: expected a term after the operator (at index 4) 572build the recovery grammar based on information provided
573by the user for each label (error message, recovery pattern, etc).
574In the example below we also throw an error when the grammar
575does not match the whole subject.
388 576
389print(eval "(1+1)-1*(/2)") 577```lua
390--> syntax error: expected an expression after the parenthesis (at index 10) 578local m = require"lpeglabelrec"
579local re = require"relabelrec"
391 580
392print(eval "1+(1-(1*2))/2x") 581local num = m.R("09")^1 / tonumber
393--> syntax error: extra chracters found after the expression (at index 14) 582local op = m.S("+-")
394 583
395print(eval "-1+(1-(1*2))/2") 584local labels = {}
396--> syntax error: no expression found (at index 1) 585local nlabels = 0
397```
398 586
399#### Catching labels 587local function newError(lab, msg, psync, pcap)
588 nlabels = nlabels + 1
589 psync = psync or m.P(-1)
590 pcap = pcap or m.P""
591 labels[lab] = { id = nlabels, msg = msg, psync = psync, pcap = pcap }
592end
400 593
401When a label is thrown, the grammar itself can handle this label 594newError("ExpTermFirst", "expected an expression", op + ")", m.Cc(1000))
402by using the labeled ordered choice. Below we rewrite the example 595newError("ExpTermOp", "expected a term after the operator", op + ")", m.Cc(1000))
403of the list of identifiers to show this feature: 596newError("MisClose", "missing a closing ')' after the expression", m.P")")
597newError("Extra", "extra characters found after the expression")
404 598
599local errors, subject
405 600
406```lua 601local function expect(patt, labname)
407local m = require'lpeglabel' 602 local i = labels[labname].id
603 return patt + m.T(i)
604end
408 605
409local terror = {} 606local function compute(tokens)
607 local result = tokens[1]
608 for i = 2, #tokens, 2 do
609 if tokens[i] == '+' then
610 result = result + tokens[i+1]
611 elseif tokens[i] == '-' then
612 result = result - tokens[i+1]
613 else
614 error('unknown operation: ' .. tokens[i])
615 end
616 end
617 return result
618end
410 619
411local function newError(s) 620local g = m.P {
412 table.insert(terror, s) 621 "Exp",
413 return #terror 622 Exp = m.Ct(m.V"OperandFirst" * (m.C(op) * m.V"Operand")^0) / compute,
623 OperandFirst = expect(m.V"Term", "ExpTermFirst"),
624 Operand = expect(m.V"Term", "ExpTermOp"),
625 Term = num + m.V"Group",
626 Group = "(" * m.V"Exp" * expect(")", "MisClose"),
627}
628
629function recorderror(pos, lab)
630 local line, col = re.calcline(subject, pos)
631 table.insert(errors, { line = line, col = col, msg = labels[lab].msg })
414end 632end
415 633
416local errUndef = newError("undefined") 634function record (labname)
417local errId = newError("expecting an identifier") 635 return (m.Cp() * m.Cc(labname)) / recorderror
418local errComma = newError("expecting ','") 636end
419 637
420local g = m.P{ 638function sync (p)
421 "S", 639 return (-p * m.P(1))^0
422 S = m.Lc(m.Lc(m.V"Id" * m.V"List", m.V"ErrId", errId), 640end
423 m.V"ErrComma", errComma), 641
424 List = -m.P(1) + (m.V"Comma" + m.T(errComma)) * (m.V"Id" + m.T(errId)) * m.V"List", 642function defaultValue (p)
425 Id = m.V"Sp" * m.R'az'^1, 643 return p or m.Cc(1000)
426 Comma = m.V"Sp" * ",", 644end
427 Sp = m.S" \n\t"^0, 645
428 ErrId = m.Cc(errId) / terror, 646local grec = g * expect(m.P(-1), "Extra")
429 ErrComma = m.Cc(errComma) / terror 647for k, v in pairs(labels) do
430} 648 grec = m.Rec(grec, record(k) * sync(v.psync) * v.pcap, v.id)
649end
650
651local function eval(input)
652 errors = {}
653 io.write("Input: ", input, "\n")
654 subject = input
655 local result, label, suffix = grec:match(input)
656 io.write("Syntactic errors found: " .. #errors, "\n")
657 if #errors > 0 then
658 local out = {}
659 for i, err in ipairs(errors) do
660 local pos = err.col
661 local msg = err.msg
662 table.insert(out, "syntax error: " .. msg .. " (at index " .. pos .. ")")
663 end
664 print(table.concat(out, "\n"))
665 end
666 io.write("Result = ")
667 return result
668end
431 669
432print(m.match(g, "one,two")) --> 8 670print(eval "90-70-(5)+3")
433print(m.match(g, "one two")) --> expecting ',' 671-- Syntactic errors found: 0
434print(m.match(g, "one,\n two,\nthree,")) --> expecting an identifier 672-- Result = 18
673
674print(eval "15+")
675-- Syntactic errors found: 1
676-- syntax error: expected a term after the operator (at index 3)
677-- Result = 1015
678
679print(eval "-2")
680-- Syntactic errors found: 1
681-- syntax error: expected an expression (at index 1)
682-- Result = 998
683
684print(eval "1+()+")
685-- Syntactic errors found: 2
686-- syntax error: expected an expression (at index 4)
687-- syntax error: expected a term after the operator (at index 5)
688-- Result = 2001
689
690print(eval "1+(")
691-- Syntactic errors found: 2
692-- syntax error: expected an expression (at index 3)
693-- syntax error: missing a closing ')' after the expression (at index 3)
694-- Result = 1001
695
696print(eval "3)")
697-- Syntactic errors found: 1
698-- syntax error: extra characters found after the expression (at index 2)
699-- Result = 3
435``` 700```
436 701
437#### Error Recovery 702#### Error Recovery