aboutsummaryrefslogtreecommitdiff
path: root/README.md
diff options
context:
space:
mode:
Diffstat (limited to 'README.md')
-rw-r--r--README.md766
1 files changed, 241 insertions, 525 deletions
diff --git a/README.md b/README.md
index b8595ad..88b4fc3 100644
--- a/README.md
+++ b/README.md
@@ -17,7 +17,7 @@ patterns of LPeg.
17 17
18Besides that, LPegLabel also reports the farthest 18Besides that, LPegLabel also reports the farthest
19failure position in case of an ordinary failure 19failure position in case of an ordinary failure
20(which is represented by label **0**). 20(which is represented by label **fail**).
21 21
22This document describes the new functions available 22This document describes the new functions available
23in LpegLabel and presents some examples of usage. 23in LpegLabel and presents some examples of usage.
@@ -27,13 +27,13 @@ between an ordinary failure and an error. Usually, an
27ordinary failure is produced when the matching of a 27ordinary failure is produced when the matching of a
28character fails, and this failure is caught by ordered choice. 28character fails, and this failure is caught by ordered choice.
29An error (a non-ordinary failure), by its turn, is produced 29An error (a non-ordinary failure), by its turn, is produced
30by the throw operator and may be caught by the recovery operator. 30by the throw operator and may be caught by a recovery rule.
31 31
32In LPegLabel, the result of an unsuccessful matching 32In LPegLabel, the result of an unsuccessful matching
33is a triple **nil, lab, errpos**, where **lab** 33is a triple **nil, lab, errpos**, where **lab**
34is the label associated with the failure, and 34is the label associated with the failure (a string or
35**errpos** is the input position being matched when 35an integer), and **errpos** is the input position being
36**lab** was thrown. 36matched when **lab** was thrown.
37 37
38When **lab** is an ordinary failure and no error was thrown before, 38When **lab** is an ordinary failure and no error was thrown before,
39**errpos** is the farthest position where an ordinary failure occurred. 39**errpos** is the farthest position where an ordinary failure occurred.
@@ -47,31 +47,15 @@ Below there is a brief summary of the new functions provided by LpegLabel:
47<tbody><tr><td><b>Function</b></td><td><b>Description</b></td></tr> 47<tbody><tr><td><b>Function</b></td><td><b>Description</b></td></tr>
48<tr><td><a href="#f-t"><code>lpeglabel.T (l)</code></a></td> 48<tr><td><a href="#f-t"><code>lpeglabel.T (l)</code></a></td>
49 <td>Throws a label <code>l</code> to signal an error</td></tr> 49 <td>Throws a label <code>l</code> to signal an error</td></tr>
50<tr><td><a href="#f-rec"><code>lpeglabel.Rec (p1, p2, l1 [, l2, ..., ln])</code></a></td>
51 <td>Specifies a recovery pattern <code>p2</code> for <code>p1</code>,
52 when the matching of <code>p1</code> gives one of the labels l1, ..., ln.</td></tr>
53<tr><td><a href="#f-lc"><code>lpeglabel.Lc (p1, p2, l1, ..., ln)</code></a></td>
54 <td>Matches <code>p1</code> and tries to match <code>p2</code>
55 if the matching of <code>p1</code> gives one of l<sub>1</sub>, ..., l<sub>n</sub>
56 </td></tr>
57<tr><td><a href="#re-t"><code>%{l}</code></a></td> 50<tr><td><a href="#re-t"><code>%{l}</code></a></td>
58 <td>Syntax of <em>relabel</em> module. Equivalent to <code>lpeglabel.T(l)</code> 51 <td>Syntax of <em>relabel</em> module. Equivalent to <code>lpeglabel.T(l)</code>
59 </td></tr> 52 </td></tr>
60<tr><td><a href="#re-pow"><code>p^l</code></a></td> 53<tr><td><a href="#re-pow"><code>p^l</code></a></td>
61 <td>Syntax sugar available at <em>relabel</em> for <code>p / %{l}</code> 54 <td>Syntax sugar available at <em>relabel</em> for <code>p / %{l}</code>
62 </td></tr> 55 </td></tr>
63<tr><td><a href="#re-rec"><code>p1 //{l1 [, l2, ..., ln} p2</code></a></td>
64 <td>Syntax of <em>relabel</em> module. Equivalent to <code>lpeglabel.Rec(p1, p2, l1, ..., ln)</code>
65 </td></tr>
66<tr><td><a href="#re-lc"><code>p1 /{l1, ..., ln} p2</code></a></td>
67 <td>Syntax of <em>relabel</em> module. Equivalent to <code>lpeg.Lc(p1, p2, l1, ..., ln)</code>
68 </td></tr>
69<tr><td><a href="#re-line"><code>relabel.calcline(subject, i)</code></a></td> 56<tr><td><a href="#re-line"><code>relabel.calcline(subject, i)</code></a></td>
70 <td>Calculates line and column information regarding position <i>i</i> of the subject</code> 57 <td>Calculates line and column information regarding position <i>i</i> of the subject</code>
71 </td></tr> 58 </td></tr>
72<tr><td><a href="#re-setl"><code>relabel.setlabels (tlabel)</code></a></td>
73 <td>Allows to specicify a table with mnemonic labels.
74 </td></tr>
75</tbody></table> 59</tbody></table>
76 60
77 61
@@ -80,60 +64,39 @@ Below there is a brief summary of the new functions provided by LpegLabel:
80 64
81#### <a name="f-t"></a><code>lpeglabel.T(l)</code> 65#### <a name="f-t"></a><code>lpeglabel.T(l)</code>
82 66
83Returns a pattern that throws the label `l`. 67Returns a pattern that throws the label `l`, which
84A label must be an integer between 1 and 255. 68can be an integer or a string.
85
86This pattern always causes a failure, whose associated
87position will be used to set **errpos**, no matter
88whether this is the farthest failure position or not.
89
90
91#### <a name="f-rec"></a><code>lpeglabel.Rec(p1, p2, l1, ..., ln)</code>
92
93Returns a *recovery pattern*.
94If the matching of `p1` gives one of the labels `l1, ..., ln`,
95then the matching of `p2` is tried from the failure position of `p1`.
96Otherwise, the result of the matching of `p1` is the pattern's result.
97
98
99#### <a name="f-lc"></a><code>lpeglabel.Lc(p1, p2, l1, ..., ln)</code>
100
101Returns a pattern equivalent to a *labeled ordered choice*.
102If the matching of `p1` gives one of the labels `l1, ..., ln`,
103then the matching of `p2` is tried from the same position. Otherwise,
104the result of the matching of `p1` is the pattern's result.
105 69
106<!--- 70When a label is thrown, the current subject position
107The labeled ordered choice `lpeg.Lc(p1, p2, 0)` is equivalent to the 71is used to set **errpos**, no matter whether it is the
108regular ordered choice `p1 / p2`. 72fartherst failure position or not.
109-->
110 73
111Although PEG's ordered choice is associative, the labeled ordered choice is not. 74In case the PEG grammar has a rule `l`, after a label is thrown
112When using this function, the user should take care to build a left-associative 75this rule will be used as a recovery rule, otherwise the whole
113labeled ordered choice pattern. 76matching fails.
77
78The recovery rule will try to match the input from the subject
79position where `l` was thrown. In case the matching of the recovery
80rule succeeds, the regular matching is resumed. Otherwise, the
81result of the recovery rule is the matching result.
82
83When we have a predicate such as `-p` or `#p` and a label `l` is thrown
84during the matching of `p`, this causes the failure of `p`, but does
85not propagate `l`, or calls its associated recovery rule.
114 86
115 87
116#### <a name="re-t"></a><code>%{l}</code> 88#### <a name="re-t"></a><code>%{l}</code>
117 89
118Syntax of *relabel* module. Equivalent to `lpeg.T(l)`. 90Syntax of *relabel* module. Equivalent to `lpeg.T(l)`.
119 91
92Label `l` must be a valid identifier name.
120 93
121#### <a name="re-rec"></a><code>p1 //{l1, ..., ln} p2</code> 94#### <a name="re-pow"></a><code>p^l</code>
122
123Syntax of *relabel* module. Equivalent to `lpeglabel.Rec(p1, p2, l1, ..., ln)`.
124
125The `//{}` operator is left-associative.
126
127 95
128#### <a name="re-lc"></a><code>p1 /{l1, ..., ln} p2</code> 96Syntax of *relabel* module. The pattern `p^l` is equivalent
97to `p / lpeglabel.T(l)`.
129 98
130Syntax of *relabel* module. Equivalent to `lpeg.Lc(p1, p2, l1, ..., ln)`. 99Label `l` must be a valid identifier name.
131
132The `/{}` operator is left-associative.
133
134A grammar can use both choice operators (`/` and `/{}`),
135but a single choice can not mix them. That is, the parser of `relabel`
136module will not recognize a pattern as `p1 / p2 /{l1} p3`.
137 100
138 101
139#### <a name="re-line"></a><code>relabel.calcline (subject, i)</code> 102#### <a name="re-line"></a><code>relabel.calcline (subject, i)</code>
@@ -141,12 +104,6 @@ module will not recognize a pattern as `p1 / p2 /{l1} p3`.
141Returns line and column information regarding position <i>i</i> of the subject. 104Returns line and column information regarding position <i>i</i> of the subject.
142 105
143 106
144#### <a name="re-setl"></a><code>relabel.setlabels (tlabel)</code>
145
146Allows to specicify a table with labels. They keys of
147`tlabel` must be strings and the associated values must
148be integers between 1 and 255.
149
150 107
151### Examples 108### Examples
152 109
@@ -159,9 +116,9 @@ in the *examples* directory.
159 116
160This example illustrates the new values returned 117This example illustrates the new values returned
161by the *match* function in case of an unsuccessful 118by the *match* function in case of an unsuccessful
162matching. As no error is thrown, when the matching 119matching. As no error is thrown in this example,
163fails *errpos* represents the farthest suffix where 120when the matching fails *errpos* represents the
164an ordinary failure occurred. 121farthest suffix where an ordinary failure occurred.
165 122
166```lua 123```lua
167local m = require'lpeglabel' 124local m = require'lpeglabel'
@@ -172,124 +129,165 @@ function matchPrint(p, s)
172end 129end
173 130
174local p = m.P"a"^0 * m.P"b" + m.P"c" 131local p = m.P"a"^0 * m.P"b" + m.P"c"
175matchPrint(p, "abc") --> r: 3 lab: nil errpos: nil 132matchPrint(p, "abc") --> r: 3 lab: nil errpos: nil
176matchPrint(p, "c") --> r: 2 lab: nil errpos: nil 133matchPrint(p, "c") --> r: 2 lab: nil errpos: nil
177matchPrint(p, "aac") --> r: nil lab: 0 errpos: 3 134matchPrint(p, "aac") --> r: nil lab: fail errpos: 3
178matchPrint(p, "xxc") --> r: nil lab: 0 errpos: 1 135matchPrint(p, "xxc") --> r: nil lab: fail errpos: 1
179``` 136```
180 137
181 138
182#### Matching a list of identifiers separated by commas 139#### Matching a list of identifiers separated by commas
183 140
184The following example defines a grammar that matches 141The following example defines a grammar that matches
185a list of identifiers separated by commas. A label 142a (possibly empty) list of identifiers separated by commas.
186is thrown when there is an error matching an identifier 143A label is thrown when there is no identifier after a comma,
187or a comma. 144or when the whole input is not matched.
188
189We use function `newError` to store error messages in a
190table and to return the index associated with each error message.
191 145
192 146
193```lua 147```lua
194local m = require'lpeglabel' 148local m = require'lpeglabel'
195local re = require'relabel' 149local re = require'relabel'
196 150
197local terror = {} 151local terror = {
198 152 ErrId = "expecting an identifier",
199local function newError(s) 153 ErrEnd = "expecting EOF",
200 table.insert(terror, s) 154 fail = "undefined"
201 return #terror 155}
202end
203 156
204local errUndef = newError("undefined") 157local id = m.R'az'^1
205local errId = newError("expecting an identifier")
206local errComma = newError("expecting ','")
207 158
208local g = m.P{ 159local g = m.P{
209 "S", 160 'S',
210 S = m.V"Id" * m.V"List", 161 S = m.V'List' * (-m.P(1) + m.T'ErrEnd'),
211 List = -m.P(1) + (m.V"Comma" + m.T(errComma)) * (m.V"Id" + m.T(errId)) * m.V"List", 162 List = m.V'Id' * (m.V'Comma' * (m.V'Id' + m.T'ErrId'))^0,
212 Id = m.V"Sp" * m.R'az'^1, 163 Id = m.V'Sp' * id,
213 Comma = m.V"Sp" * ",", 164 Comma = m.V'Sp' * ',',
214 Sp = m.S" \n\t"^0, 165 Sp = m.S' \n\t'^0,
215} 166}
216 167
168
217function mymatch (g, s) 169function mymatch (g, s)
218 local r, e, sfail = g:match(s) 170 local r, e, pos = g:match(s)
219 if not r then 171 if not r then
220 local line, col = re.calcline(s, #s - #sfail) 172 local line, col = re.calcline(s, pos)
221 local msg = "Error at line " .. line .. " (col " .. col .. "): " 173 local msg = "Error at line " .. line .. " (col " .. col .. "): "
222 return r, msg .. terror[e] .. " before '" .. sfail .. "'" 174 return r, msg .. terror[e] .. " before '" .. s:sub(pos) .. "'"
223 end 175 end
224 return r 176 return r
225end 177end
226 178
227print(mymatch(g, "one,two")) --> 8 179print(mymatch(g, "one,two"))
228print(mymatch(g, "one two")) --> nil Error at line 1 (col 3): expecting ',' before ' two' 180print(mymatch(g, "one two"))
229print(mymatch(g, "one,\n two,\nthree,")) --> nil Error at line 3 (col 6): expecting an identifier before '' 181print(mymatch(g, "one,\n two,\nthree,4"))
230``` 182```
231 183
232In this example we could think about writing rule <em>List</em> as follows: 184In this example we could think about writing rule <em>List</em> as follows:
233```lua 185```lua
234List = ((m.V"Comma" + m.T(errComma)) * (m.V"Id" + m.T(errId)))^0, 186List = m.V'Id' * ((m.V'Comma' + m.T'ErrComma') * (m.V'Id' + m.T'ErrId'))^0,
235``` 187```
236 188
237but when matching this expression against the end of input 189but when matching <code>m.V'Comma' + m.T'ErrComma'</code> against the end of input
238we would get a failure whose associated label would be **errComma**, 190we would get a failure whose associated label would be **errComma**,
239and this would cause the failure of the *whole* repetition. 191and this would cause the failure of the *whole* repetition.
240 192
193Below we rewrite the previous grammar to indicate an error when there is no
194comma after an identifer. Before tyring to match a comma, we check if
195we have reached the end of input:
196
197```lua
198local m = require'lpeglabel'
199local re = require'relabel'
200
201local terror = {
202 ErrId = "expecting an identifier",
203 ErrComma = "expecting ','",
204 fail = "undefined"
205}
206
207local id = m.R'az'^1
208
209local g = m.P{
210 'S',
211 S = m.V'List',
212 List = m.V'Id' * (#m.P(1) * m.V'Comma' * (m.V'Id' + m.T'ErrId'))^0,
213 Id = m.V'Sp' * id,
214 Comma = m.V'Sp' * ',' + m.T'ErrComma',
215 Sp = m.S' \n\t'^0,
216}
217
218
219function mymatch (g, s)
220 local r, e, pos = g:match(s)
221 if not r then
222 local line, col = re.calcline(s, pos)
223 local msg = "Error at line " .. line .. " (col " .. col .. "): "
224 return r, msg .. terror[e] .. " before '" .. s:sub(pos) .. "'"
225 end
226 return r
227end
228
229print(mymatch(g, "one,two"))
230print(mymatch(g, "one two"))
231print(mymatch(g, "one,\n two,\nthree,4"))
232print(mymatch(g, " 1,2"))
233
234```
241 235
242 236
243#### Error Recovery 237#### Error Recovery
244 238
245By using the `Rec` function we can specify a recovery pattern that 239We can specify a recovery rule that should
246should be matched when a label is thrown. After matching the recovery 240be matched when a label is thrown. After matching
247pattern, and possibly recording the error, the parser will resume 241the recovery rule, and possibly recording the error,
248the <em>regular</em> matching. For example, in the example below 242the parser will resume the <em>regular</em> matching.
249we expect to match rule `A`, but when a failure occur the label 42 243The recovery rule must have the same name (or number)
250is thrown and then we will try to match the recovery pattern `recp`: 244of the label that was thrown.
245
246
247For example, in the example below we expect to match rule *A*,
248but when a failure occur the label `Err` is thrown and then we
249will try to match rule *Err*:
251```lua 250```lua
252local m = require'lpeglabel' 251local m = require'lpeglabel'
253 252
254local recp = m.P"oast" 253local recp = m.P"oast"
255 254
256local g = m.P{ 255local g = m.P{
257 "S", 256 'S',
258 S = m.Rec(m.V"A", recp, 42) * ".", 257 S = m.V'A' * '.',
259 A = m.P"t" * (m.P"est" + m.T(42)) 258 A = m.P't' * (m.P'est' + m.T'Err'),
259 Err = m.P'oast'
260} 260}
261 261
262print(g:match("test.")) --> 6 262print(g:match("test.")) --> 6
263print(g:match("toast.")) --> 7 263print(g:match("toast.")) --> 7
264print(g:match("oast.")) --> nil 0 oast. 264print(g:match("oast.")) --> nil fail oast.
265print(g:match("toward.")) --> nil 0 ward. 265print(g:match("toward.")) --> nil fail ward.
266``` 266```
267When trying to match subject 'toast.', in rule `A` the first 267When trying to match subject 'toast.', in rule *A* the first
268't' is matched, then the matching of `m.P"est"` fails and label 42 268't' is matched, then the matching of `m.P"est"` fails and label
269is thrown, with the associated inpux suffix 'oast.'. In rule 269`Err` is thrown, with the associated inpux suffix 'oast.'.
270`S` label 42 is caught and the recovery pattern matches 'oast', 270The recovery rule *Err* successfully matches 'oast', so
271so pattern `'.'` matches the rest of the input. 271the regular matching continues, and pattern `'.'` matches
272the rest of the input.
272 273
273When matching subject 'oast.', pattern `m.P"t"` fails, and 274When matching subject 'oast.', pattern `m.P"t"` fails, and
274the result of the matching is <b>nil, 0, oast.</b>. 275the result of the matching is <b>nil, fail, 1</b>.
275 276
276When matching 'toward.', label 42 is thrown after matching 't', 277When matching 'toward.', label `Err` is thrown after matching 't',
277with the associated input suffix 'oward.'. As the matching of the 278with the associated input suffix 'oward.'. As the matching of the
278recovery pattern fails, the result is <b>nil, 0, ward.</b>. 279recovery pattern fails, the result is <b>nil, fail, 3</b>.
279 280
280Usually, the recovery pattern is an expression that does not fail. 281Usually, the recovery pattern is an expression that does not fail.
281In the previous example, we could have used `(m.P(1) - m.P".")^0` 282In the previous example, we could have used `(m.P(1) - m.P".")^0`
282as the recovery pattern. 283as the recovery pattern.
283 284
284Below we rewrite the grammar that describes a list of identifiers 285Below we rewrite the grammar that describes a list of identifiers
285to use a recovery strategy. Grammar `g` remains the same, but we add a 286to use a recovery strategy, with the help of some auxiliary functions.
286recovery grammar `grec` that handles the labels thrown by `g`.
287
288In grammar `grec` we use functions `record` and `sync`.
289Function `record`, plus function `recorderror`, will help 287Function `record`, plus function `recorderror`, will help
290us to save the input position where a label was thrown, 288us to save the input position where a label was thrown,
291while function `sync` will give us a synchronization pattern, 289while function `sync` will give us a synchronization pattern,
292that consumes the input while is not possible to match a given 290that consumes the input while it is not possible to match a given
293pattern `p`. 291pattern `p`.
294 292
295When the matching of an identifier fails, a defaul value ('NONE') 293When the matching of an identifier fails, a defaul value ('NONE')
@@ -299,26 +297,11 @@ is provided.
299local m = require'lpeglabel' 297local m = require'lpeglabel'
300local re = require'relabel' 298local re = require'relabel'
301 299
302local terror = {} 300local terror = {
303 301 ErrId = "expecting an identifier",
304local function newError(s) 302 ErrComma = "expecting ','",
305 table.insert(terror, s) 303 ErrList = "expecting a list of identifiers",
306 return #terror 304 fail = "undefined"
307end
308
309local errUndef = newError("undefined")
310local errId = newError("expecting an identifier")
311local errComma = newError("expecting ','")
312
313local id = m.R'az'^1
314
315local g = m.P{
316 "S",
317 S = m.V"Id" * m.V"List",
318 List = -m.P(1) + m.V"Comma" * m.V"Id" * m.V"List",
319 Id = m.V"Sp" * m.C(id) + m.T(errId),
320 Comma = m.V"Sp" * "," + m.T(errComma),
321 Sp = m.S" \n\t"^0,
322} 305}
323 306
324local subject, errors 307local subject, errors
@@ -340,14 +323,20 @@ function defaultValue ()
340 return m.Cc"NONE" 323 return m.Cc"NONE"
341end 324end
342 325
343local grec = m.P{ 326local id = m.R'az'^1
327
328local g = m.P{
344 "S", 329 "S",
345 S = m.Rec(m.Rec(g, m.V"ErrComma", errComma), m.V"ErrId", errId), 330 S = m.V"List" + (m.P(1) * m.T'ErrList'),
346 ErrComma = record(errComma) * sync(id), 331 List = m.V'Id' * (#m.P(1) * m.V'Comma' * (m.V'Id' + m.T'ErrId'))^0,
347 ErrId = record(errId) * sync(m.P",") * defaultValue(), 332 Id = m.V'Sp' * m.C(id),
333 Comma = m.V'Sp' * ',' + m.T'ErrComma',
334 Sp = m.S' \n\t'^0,
335 ErrId = record'ErrId' * sync(m.P",") * defaultValue(),
336 ErrComma = record'ErrComma' * sync(id),
337 ErrList = record'ErrList' * sync(m.P(-1)) * defaultValue()
348} 338}
349 339
350
351function mymatch (g, s) 340function mymatch (g, s)
352 errors = {} 341 errors = {}
353 subject = s 342 subject = s
@@ -371,387 +360,114 @@ function mymatch (g, s)
371 return r 360 return r
372end 361end
373 362
374mymatch(grec, "one,two") 363mymatch(g, "one,two")
375mymatch(grec, "one two three") 364mymatch(g, "one two three")
376mymatch(grec, "1,\n two, \n3,") 365mymatch(g, "1,\n two, \n3,")
377mymatch(grec, "one\n two123, \nthree,") 366mymatch(g, "one\n two123, \nthree,")
378``` 367```
379 368
380##### *relabel* syntax 369##### *relabel* syntax
381 370
382Below we describe again a grammar that matches a list of identifiers, 371Below we write a grammar for a simple programming language
383now using the syntax supported by *relabel*, where `//{}` is the 372using the syntax supported by *relabel*, where `%{}` is the throw
384recovery operator, and `%{}` is the throw operator: 373operator, and the syntax `p^l` is syntatic sugar for
374`p / %{l}` (given that *l* is a valid identifier name):
385 375
386```lua 376```lua
387local re = require 'relabel' 377local re = require 'relabel'
388 378
389local errinfo = { 379local terror = {
390 {"errUndef", "undefined"}, 380 cmdSeq = "Missing ';' in CmdSeq",
391 {"errId", "expecting an identifier"}, 381 ifExp = "Error in expresion of 'if'",
392 {"errComma", "expecting ','"}, 382 ifThen = "Error matching 'then' keyword",
383 ifThenCmdSeq = "Error matching CmdSeq of 'then' branch",
384 ifElseCmdSeq = "Error matching CmdSeq of 'else' branch",
385 ifEnd = "Error matching 'end' keyword of 'if'",
386 repeatCmdSeq = "Error matching CmdSeq of 'repeat'",
387 repeatUntil = "Error matching 'until' keyword",
388 repeatExp = "Error matching expression of 'until'",
389 assignOp = "Error matching ':='",
390 assignExp = "Error matching expression of assignment",
391 readName = "Error matching 'NAME' after 'read'",
392 writeExp = "Error matching expression after 'write'",
393 simpleExp = "Error matching 'SimpleExp'",
394 term = "Error matching 'Term'",
395 factor = "Error matching 'Factor'",
396 openParExp = "Error matching expression after '('",
397 closePar = "Error matching ')'",
398 eof = "Error, expecting EOF",
399 undefined = "Undefined Error"
393} 400}
394 401
395local errmsgs = {} 402g = re.compile([[
396local labels = {} 403 Tiny <- CmdSeq (!. / %{eof})
397 404 CmdSeq <- (Cmd SEMICOLON^cmdSeq) (Cmd SEMICOLON^cmdSeq)*
398for i, err in ipairs(errinfo) do 405 Cmd <- IfCmd / RepeatCmd / ReadCmd / WriteCmd / AssignCmd
399 errmsgs[i] = err[2] 406 IfCmd <- IF Exp^ifExp THEN^ifThen CmdSeq^ifThenCmdSeq (ELSE CmdSeq^ifElseCmdSeq / '') END^ifEnd
400 labels[err[1]] = i 407 RepeatCmd <- REPEAT CmdSeq^repeatCmdSeq UNTIL^repeatUntil Exp^repeatExp
401end 408 AssignCmd <- NAME ASSIGNMENT^assignOp Exp^assignExp
402 409 ReadCmd <- READ NAME^readName
403re.setlabels(labels) 410 WriteCmd <- WRITE Exp^writeExp
404 411 Exp <- SimpleExp ((LESS / EQUAL) SimpleExp^simpleExp / '')
405local g = re.compile[[ 412 SimpleExp <- Term ((ADD / SUB) Term^term)*
406 S <- Id List 413 Term <- Factor ((MUL / DIV) Factor^factor)*
407 List <- !. / Comma Id List 414 Factor <- OPENPAR Exp^openParExp CLOSEPAR^closePar / NUMBER / NAME
408 Id <- Sp {[a-z]+} / %{errId} 415 ADD <- Sp '+'
409 Comma <- Sp ',' / %{errComma} 416 ASSIGNMENT <- Sp ':='
410 Sp <- %s* 417 CLOSEPAR <- Sp ')'
411]] 418 DIV <- Sp '/'
412 419 IF <- Sp 'if'
413local errors 420 ELSE <- Sp 'else'
414 421 END <- Sp 'end'
415function recorderror (subject, pos, label) 422 EQUAL <- Sp '='
416 local line, col = re.calcline(subject, pos) 423 LESS <- Sp '<'
417 table.insert(errors, { line = line, col = col, msg = errmsgs[labels[label]] }) 424 MUL <- Sp '*'
418 return true 425 NAME <- !RESERVED Sp [a-z]+
419end 426 NUMBER <- Sp [0-9]+
420 427 OPENPAR <- Sp '('
421function sync (p) 428 READ <- Sp 'read'
422 return '( !(' .. p .. ') .)*' 429 REPEAT <- Sp 'repeat'
423end 430 SEMICOLON <- Sp ';'
424 431 SUB <- Sp '-'
425local grec = re.compile( 432 THEN <- Sp 'then'
426 "S <- %g //{errComma} ErrComma //{errId} ErrId" .. "\n" .. 433 UNTIL <- Sp 'until'
427 "ErrComma <- ('' -> 'errComma' => recorderror) " .. sync('[a-z]+') .. "\n" .. 434 WRITE <- Sp 'write'
428 "ErrId <- ('' -> 'errId' => recorderror) " .. sync('","') .. "-> default" 435 RESERVED <- (IF / ELSE / END / READ / REPEAT / THEN / UNTIL / WRITE) ![a-z]+
429 , {g = g, recorderror = recorderror, default = "NONE"} 436 Sp <- (%s / %nl)*
430) 437]], terror)
431 438
432function mymatch (g, s) 439
433 errors = {} 440local function mymatch(g, s)
434 subject = s 441 local r, e, pos = g:match(s)
435 io.write("Input: ", s, "\n") 442 if not r then
436 local r = { g:match(s) } 443 local line, col = re.calcline(s, pos)
437 io.write("Captures (separated by ';'): ") 444 local msg = "Error at line " .. line .. " (col " .. col .. "): "
438 for k, v in pairs(r) do 445 return r, msg .. terror[e]
439 io.write(v .. "; ") 446 end
440 end 447 return r
441 io.write("\nSyntactic errors found: " .. #errors)
442 if #errors > 0 then
443 io.write("\n")
444 local out = {}
445 for i, err in ipairs(errors) do
446 local msg = "Error at line " .. err.line .. " (col " .. err.col .. "): " .. err.msg
447 table.insert(out, msg)
448 end
449 io.write(table.concat(out, "\n"))
450 end
451 print("\n")
452 return r
453end
454
455print(mymatch(grec, "one,two"))
456-- Captures (separated by ';'): one; two;
457-- Syntactic errors found: 0
458
459print(mymatch(grec, "one two three"))
460-- Captures (separated by ';'): one; two; three;
461-- Syntactic errors found: 2
462-- Error at line 1 (col 4): expecting ','
463-- Error at line 1 (col 8): expecting ','
464
465print(mymatch(grec, "1,\n two, \n3,"))
466-- Captures (separated by ';'): NONE; two; NONE; NONE;
467-- Syntactic errors found: 3
468-- Error at line 1 (col 1): expecting an identifier
469-- Error at line 2 (col 6): expecting an identifier
470-- Error at line 3 (col 2): expecting an identifier
471
472print(mymatch(grec, "one\n two123, \nthree,"))
473-- Captures (separated by ';'): one; two; three; NONE;
474-- Syntactic errors found: 3
475-- Error at line 2 (col 1): expecting ','
476-- Error at line 2 (col 5): expecting ','
477-- Error at line 3 (col 6): expecting an identifier
478```
479
480
481#### Arithmetic Expressions
482
483Here's an example of an LPegLabel grammar that matches an expression.
484We have used a function `expect`, that takes a pattern `patt` and a label as
485parameters and builds a new pattern that throws this label when `patt`
486fails.
487
488When a subexpression is syntactically invalid, a default value of 1000
489is provided by the recovery pattern, so the evaluation of an expression
490should always produce a numeric value.
491
492In this example, we can see that it may be a tedious and error prone
493task to build manually the recovery grammar `grec`. In the next example
494we will show how to build the recovery grammar in a more automatic way.
495
496```lua
497local m = require"lpeglabel"
498local re = require"relabel"
499
500local labels = {
501 {"ExpTermFirst", "expected an expression"},
502 {"ExpTermOp", "expected a term after the operator"},
503 {"MisClose", "missing a closing ')' after the expression"},
504}
505
506local function labelindex(labname)
507 for i, elem in ipairs(labels) do
508 if elem[1] == labname then
509 return i
510 end
511 end
512 error("could not find label: " .. labname)
513end
514
515local errors, subject
516
517local function expect(patt, labname)
518 local i = labelindex(labname)
519 return patt + m.T(i)
520end
521
522
523local num = m.R("09")^1 / tonumber
524local op = m.S("+-")
525
526local function compute(tokens)
527 local result = tokens[1]
528 for i = 2, #tokens, 2 do
529 if tokens[i] == '+' then
530 result = result + tokens[i+1]
531 elseif tokens[i] == '-' then
532 result = result - tokens[i+1]
533 else
534 error('unknown operation: ' .. tokens[i])
535 end
536 end
537 return result
538end
539
540local g = m.P {
541 "Exp",
542 Exp = m.Ct(m.V"OperandFirst" * (m.C(op) * m.V"Operand")^0) / compute,
543 OperandFirst = expect(m.V"Term", "ExpTermFirst"),
544 Operand = expect(m.V"Term", "ExpTermOp"),
545 Term = num + m.V"Group",
546 Group = "(" * m.V"Exp" * expect(")", "MisClose"),
547}
548
549function recorderror(pos, lab)
550 local line, col = re.calcline(subject, pos)
551 table.insert(errors, { line = line, col = col, msg = labels[lab][2] })
552end
553
554function record (labname)
555 return (m.Cp() * m.Cc(labelindex(labname))) / recorderror
556end
557
558function sync (p)
559 return (-p * m.P(1))^0
560end
561
562function defaultValue (p)
563 return p or m.Cc(1000)
564end
565
566local grec = m.P {
567 "S",
568 S = m.Rec(m.V"A", m.V"ErrExpTermFirst", labelindex("ExpTermFirst")),
569 A = m.Rec(m.V"Sg", m.V"ErrExpTermOp", labelindex("ExpTermOp")),
570 Sg = m.Rec(g, m.V"ErrMisClose", labelindex("MisClose")),
571 ErrExpTermFirst = record("ExpTermFirst") * sync(op + ")") * defaultValue(),
572 ErrExpTermOp = record("ExpTermOp") * sync(op + ")") * defaultValue(),
573 ErrMisClose = record("MisClose") * sync(m.P")") * defaultValue(m.P""),
574}
575
576local function eval(input)
577 errors = {}
578 io.write("Input: ", input, "\n")
579 subject = input
580 local result, label, suffix = grec:match(input)
581 io.write("Syntactic errors found: " .. #errors, "\n")
582 if #errors > 0 then
583 local out = {}
584 for i, err in ipairs(errors) do
585 local pos = err.col
586 local msg = err.msg
587 table.insert(out, "syntax error: " .. msg .. " (at index " .. pos .. ")")
588 end
589 print(table.concat(out, "\n"))
590 end
591 io.write("Result = ")
592 return result
593end 448end
594 449
595print(eval "90-70-(5)+3") 450local s = [[
596-- Syntactic errors found: 0 451n := 5;
597-- Result = 18 452f := 1;
598 453repeat
599print(eval "15+") 454 f := f + n;
600-- Syntactic errors found: 1 455 n := n - 1
601-- syntax error: expected a term after the operator (at index 3) 456until (n < 1);
602-- Result = 1015 457write f;]]
603 458print(mymatch(g, s))
604print(eval "-2") 459
605-- Syntactic errors found: 1 460print(mymatch(g, "a : 2"))
606-- syntax error: expected an expression (at index 1) 461print(mymatch(g, "a := 2; 6"))
607-- Result = 998
608
609print(eval "1+()+")
610-- Syntactic errors found: 2
611-- syntax error: expected an expression (at index 4)
612-- syntax error: expected a term after the operator (at index 5)
613-- Result = 2001
614
615print(eval "1+(")
616-- Syntactic errors found: 2
617-- syntax error: expected an expression (at index 3)
618-- syntax error: missing a closing ')' after the expression (at index 3)
619-- Result = 1001
620
621print(eval "3)")
622-- Syntactic errors found: 0
623-- Result = 3
624``` 462```
625 463
626#### Automatically Building the Recovery Grammar 464### Caveats
627 465
628Below we rewrite the previous example to automatically 466Does not use the number **1** to specify a recovery rule,
629build the recovery grammar based on information provided 467since that this index is used to indicate the first rule
630by the user for each label (error message, recovery pattern, etc). 468of a grammar.
631In the example below we also throw an error when the grammar
632does not match the whole subject.
633 469
634```lua 470In case your grammar has many regular and recovery rules,
635local m = require"lpeglabel" 471you may get an error message such as grammar: <em>has too many rules</em>.
636local re = require"relabel" 472In this case, we need to change *MAXRULES* in `lptypes.h`.
637 473
638local num = m.R("09")^1 / tonumber
639local op = m.S("+-")
640
641local labels = {}
642local nlabels = 0
643
644local function newError(lab, msg, psync, pcap)
645 nlabels = nlabels + 1
646 psync = psync or m.P(-1)
647 pcap = pcap or m.P""
648 labels[lab] = { id = nlabels, msg = msg, psync = psync, pcap = pcap }
649end
650
651newError("ExpTermFirst", "expected an expression", op + ")", m.Cc(1000))
652newError("ExpTermOp", "expected a term after the operator", op + ")", m.Cc(1000))
653newError("MisClose", "missing a closing ')' after the expression", m.P")")
654newError("Extra", "extra characters found after the expression")
655
656local errors, subject
657
658local function expect(patt, labname)
659 local i = labels[labname].id
660 return patt + m.T(i)
661end
662
663local function compute(tokens)
664 local result = tokens[1]
665 for i = 2, #tokens, 2 do
666 if tokens[i] == '+' then
667 result = result + tokens[i+1]
668 elseif tokens[i] == '-' then
669 result = result - tokens[i+1]
670 else
671 error('unknown operation: ' .. tokens[i])
672 end
673 end
674 return result
675end
676
677local g = m.P {
678 "Exp",
679 Exp = m.Ct(m.V"OperandFirst" * (m.C(op) * m.V"Operand")^0) / compute,
680 OperandFirst = expect(m.V"Term", "ExpTermFirst"),
681 Operand = expect(m.V"Term", "ExpTermOp"),
682 Term = num + m.V"Group",
683 Group = "(" * m.V"Exp" * expect(")", "MisClose"),
684}
685
686function recorderror(pos, lab)
687 local line, col = re.calcline(subject, pos)
688 table.insert(errors, { line = line, col = col, msg = labels[lab].msg })
689end
690
691function record (labname)
692 return (m.Cp() * m.Cc(labname)) / recorderror
693end
694
695function sync (p)
696 return (-p * m.P(1))^0
697end
698
699function defaultValue (p)
700 return p or m.Cc(1000)
701end
702
703local grec = g * expect(m.P(-1), "Extra")
704for k, v in pairs(labels) do
705 grec = m.Rec(grec, record(k) * sync(v.psync) * v.pcap, v.id)
706end
707
708local function eval(input)
709 errors = {}
710 io.write("Input: ", input, "\n")
711 subject = input
712 local result, label, suffix = grec:match(input)
713 io.write("Syntactic errors found: " .. #errors, "\n")
714 if #errors > 0 then
715 local out = {}
716 for i, err in ipairs(errors) do
717 local pos = err.col
718 local msg = err.msg
719 table.insert(out, "syntax error: " .. msg .. " (at index " .. pos .. ")")
720 end
721 print(table.concat(out, "\n"))
722 end
723 io.write("Result = ")
724 return result
725end
726
727print(eval "90-70-(5)+3")
728-- Syntactic errors found: 0
729-- Result = 18
730
731print(eval "15+")
732-- Syntactic errors found: 1
733-- syntax error: expected a term after the operator (at index 3)
734-- Result = 1015
735
736print(eval "-2")
737-- Syntactic errors found: 1
738-- syntax error: expected an expression (at index 1)
739-- Result = 998
740
741print(eval "1+()+")
742-- Syntactic errors found: 2
743-- syntax error: expected an expression (at index 4)
744-- syntax error: expected a term after the operator (at index 5)
745-- Result = 2001
746
747print(eval "1+(")
748-- Syntactic errors found: 2
749-- syntax error: expected an expression (at index 3)
750-- syntax error: missing a closing ')' after the expression (at index 3)
751-- Result = 1001
752
753print(eval "3)")
754-- Syntactic errors found: 1
755-- syntax error: extra characters found after the expression (at index 2)
756-- Result = 3
757```