diff options
Diffstat (limited to 'README.md')
-rw-r--r-- | README.md | 766 |
1 files changed, 241 insertions, 525 deletions
@@ -17,7 +17,7 @@ patterns of LPeg. | |||
17 | 17 | ||
18 | Besides that, LPegLabel also reports the farthest | 18 | Besides that, LPegLabel also reports the farthest |
19 | failure position in case of an ordinary failure | 19 | failure position in case of an ordinary failure |
20 | (which is represented by label **0**). | 20 | (which is represented by label **fail**). |
21 | 21 | ||
22 | This document describes the new functions available | 22 | This document describes the new functions available |
23 | in LpegLabel and presents some examples of usage. | 23 | in LpegLabel and presents some examples of usage. |
@@ -27,13 +27,13 @@ between an ordinary failure and an error. Usually, an | |||
27 | ordinary failure is produced when the matching of a | 27 | ordinary failure is produced when the matching of a |
28 | character fails, and this failure is caught by ordered choice. | 28 | character fails, and this failure is caught by ordered choice. |
29 | An error (a non-ordinary failure), by its turn, is produced | 29 | An error (a non-ordinary failure), by its turn, is produced |
30 | by the throw operator and may be caught by the recovery operator. | 30 | by the throw operator and may be caught by a recovery rule. |
31 | 31 | ||
32 | In LPegLabel, the result of an unsuccessful matching | 32 | In LPegLabel, the result of an unsuccessful matching |
33 | is a triple **nil, lab, errpos**, where **lab** | 33 | is a triple **nil, lab, errpos**, where **lab** |
34 | is the label associated with the failure, and | 34 | is the label associated with the failure (a string or |
35 | **errpos** is the input position being matched when | 35 | an integer), and **errpos** is the input position being |
36 | **lab** was thrown. | 36 | matched when **lab** was thrown. |
37 | 37 | ||
38 | When **lab** is an ordinary failure and no error was thrown before, | 38 | When **lab** is an ordinary failure and no error was thrown before, |
39 | **errpos** is the farthest position where an ordinary failure occurred. | 39 | **errpos** is the farthest position where an ordinary failure occurred. |
@@ -47,31 +47,15 @@ Below there is a brief summary of the new functions provided by LpegLabel: | |||
47 | <tbody><tr><td><b>Function</b></td><td><b>Description</b></td></tr> | 47 | <tbody><tr><td><b>Function</b></td><td><b>Description</b></td></tr> |
48 | <tr><td><a href="#f-t"><code>lpeglabel.T (l)</code></a></td> | 48 | <tr><td><a href="#f-t"><code>lpeglabel.T (l)</code></a></td> |
49 | <td>Throws a label <code>l</code> to signal an error</td></tr> | 49 | <td>Throws a label <code>l</code> to signal an error</td></tr> |
50 | <tr><td><a href="#f-rec"><code>lpeglabel.Rec (p1, p2, l1 [, l2, ..., ln])</code></a></td> | ||
51 | <td>Specifies a recovery pattern <code>p2</code> for <code>p1</code>, | ||
52 | when the matching of <code>p1</code> gives one of the labels l1, ..., ln.</td></tr> | ||
53 | <tr><td><a href="#f-lc"><code>lpeglabel.Lc (p1, p2, l1, ..., ln)</code></a></td> | ||
54 | <td>Matches <code>p1</code> and tries to match <code>p2</code> | ||
55 | if the matching of <code>p1</code> gives one of l<sub>1</sub>, ..., l<sub>n</sub> | ||
56 | </td></tr> | ||
57 | <tr><td><a href="#re-t"><code>%{l}</code></a></td> | 50 | <tr><td><a href="#re-t"><code>%{l}</code></a></td> |
58 | <td>Syntax of <em>relabel</em> module. Equivalent to <code>lpeglabel.T(l)</code> | 51 | <td>Syntax of <em>relabel</em> module. Equivalent to <code>lpeglabel.T(l)</code> |
59 | </td></tr> | 52 | </td></tr> |
60 | <tr><td><a href="#re-pow"><code>p^l</code></a></td> | 53 | <tr><td><a href="#re-pow"><code>p^l</code></a></td> |
61 | <td>Syntax sugar available at <em>relabel</em> for <code>p / %{l}</code> | 54 | <td>Syntax sugar available at <em>relabel</em> for <code>p / %{l}</code> |
62 | </td></tr> | 55 | </td></tr> |
63 | <tr><td><a href="#re-rec"><code>p1 //{l1 [, l2, ..., ln} p2</code></a></td> | ||
64 | <td>Syntax of <em>relabel</em> module. Equivalent to <code>lpeglabel.Rec(p1, p2, l1, ..., ln)</code> | ||
65 | </td></tr> | ||
66 | <tr><td><a href="#re-lc"><code>p1 /{l1, ..., ln} p2</code></a></td> | ||
67 | <td>Syntax of <em>relabel</em> module. Equivalent to <code>lpeg.Lc(p1, p2, l1, ..., ln)</code> | ||
68 | </td></tr> | ||
69 | <tr><td><a href="#re-line"><code>relabel.calcline(subject, i)</code></a></td> | 56 | <tr><td><a href="#re-line"><code>relabel.calcline(subject, i)</code></a></td> |
70 | <td>Calculates line and column information regarding position <i>i</i> of the subject</code> | 57 | <td>Calculates line and column information regarding position <i>i</i> of the subject</code> |
71 | </td></tr> | 58 | </td></tr> |
72 | <tr><td><a href="#re-setl"><code>relabel.setlabels (tlabel)</code></a></td> | ||
73 | <td>Allows to specicify a table with mnemonic labels. | ||
74 | </td></tr> | ||
75 | </tbody></table> | 59 | </tbody></table> |
76 | 60 | ||
77 | 61 | ||
@@ -80,60 +64,39 @@ Below there is a brief summary of the new functions provided by LpegLabel: | |||
80 | 64 | ||
81 | #### <a name="f-t"></a><code>lpeglabel.T(l)</code> | 65 | #### <a name="f-t"></a><code>lpeglabel.T(l)</code> |
82 | 66 | ||
83 | Returns a pattern that throws the label `l`. | 67 | Returns a pattern that throws the label `l`, which |
84 | A label must be an integer between 1 and 255. | 68 | can be an integer or a string. |
85 | |||
86 | This pattern always causes a failure, whose associated | ||
87 | position will be used to set **errpos**, no matter | ||
88 | whether this is the farthest failure position or not. | ||
89 | |||
90 | |||
91 | #### <a name="f-rec"></a><code>lpeglabel.Rec(p1, p2, l1, ..., ln)</code> | ||
92 | |||
93 | Returns a *recovery pattern*. | ||
94 | If the matching of `p1` gives one of the labels `l1, ..., ln`, | ||
95 | then the matching of `p2` is tried from the failure position of `p1`. | ||
96 | Otherwise, the result of the matching of `p1` is the pattern's result. | ||
97 | |||
98 | |||
99 | #### <a name="f-lc"></a><code>lpeglabel.Lc(p1, p2, l1, ..., ln)</code> | ||
100 | |||
101 | Returns a pattern equivalent to a *labeled ordered choice*. | ||
102 | If the matching of `p1` gives one of the labels `l1, ..., ln`, | ||
103 | then the matching of `p2` is tried from the same position. Otherwise, | ||
104 | the result of the matching of `p1` is the pattern's result. | ||
105 | 69 | ||
106 | <!--- | 70 | When a label is thrown, the current subject position |
107 | The labeled ordered choice `lpeg.Lc(p1, p2, 0)` is equivalent to the | 71 | is used to set **errpos**, no matter whether it is the |
108 | regular ordered choice `p1 / p2`. | 72 | fartherst failure position or not. |
109 | --> | ||
110 | 73 | ||
111 | Although PEG's ordered choice is associative, the labeled ordered choice is not. | 74 | In case the PEG grammar has a rule `l`, after a label is thrown |
112 | When using this function, the user should take care to build a left-associative | 75 | this rule will be used as a recovery rule, otherwise the whole |
113 | labeled ordered choice pattern. | 76 | matching fails. |
77 | |||
78 | The recovery rule will try to match the input from the subject | ||
79 | position where `l` was thrown. In case the matching of the recovery | ||
80 | rule succeeds, the regular matching is resumed. Otherwise, the | ||
81 | result of the recovery rule is the matching result. | ||
82 | |||
83 | When we have a predicate such as `-p` or `#p` and a label `l` is thrown | ||
84 | during the matching of `p`, this causes the failure of `p`, but does | ||
85 | not propagate `l`, or calls its associated recovery rule. | ||
114 | 86 | ||
115 | 87 | ||
116 | #### <a name="re-t"></a><code>%{l}</code> | 88 | #### <a name="re-t"></a><code>%{l}</code> |
117 | 89 | ||
118 | Syntax of *relabel* module. Equivalent to `lpeg.T(l)`. | 90 | Syntax of *relabel* module. Equivalent to `lpeg.T(l)`. |
119 | 91 | ||
92 | Label `l` must be a valid identifier name. | ||
120 | 93 | ||
121 | #### <a name="re-rec"></a><code>p1 //{l1, ..., ln} p2</code> | 94 | #### <a name="re-pow"></a><code>p^l</code> |
122 | |||
123 | Syntax of *relabel* module. Equivalent to `lpeglabel.Rec(p1, p2, l1, ..., ln)`. | ||
124 | |||
125 | The `//{}` operator is left-associative. | ||
126 | |||
127 | 95 | ||
128 | #### <a name="re-lc"></a><code>p1 /{l1, ..., ln} p2</code> | 96 | Syntax of *relabel* module. The pattern `p^l` is equivalent |
97 | to `p / lpeglabel.T(l)`. | ||
129 | 98 | ||
130 | Syntax of *relabel* module. Equivalent to `lpeg.Lc(p1, p2, l1, ..., ln)`. | 99 | Label `l` must be a valid identifier name. |
131 | |||
132 | The `/{}` operator is left-associative. | ||
133 | |||
134 | A grammar can use both choice operators (`/` and `/{}`), | ||
135 | but a single choice can not mix them. That is, the parser of `relabel` | ||
136 | module will not recognize a pattern as `p1 / p2 /{l1} p3`. | ||
137 | 100 | ||
138 | 101 | ||
139 | #### <a name="re-line"></a><code>relabel.calcline (subject, i)</code> | 102 | #### <a name="re-line"></a><code>relabel.calcline (subject, i)</code> |
@@ -141,12 +104,6 @@ module will not recognize a pattern as `p1 / p2 /{l1} p3`. | |||
141 | Returns line and column information regarding position <i>i</i> of the subject. | 104 | Returns line and column information regarding position <i>i</i> of the subject. |
142 | 105 | ||
143 | 106 | ||
144 | #### <a name="re-setl"></a><code>relabel.setlabels (tlabel)</code> | ||
145 | |||
146 | Allows to specicify a table with labels. They keys of | ||
147 | `tlabel` must be strings and the associated values must | ||
148 | be integers between 1 and 255. | ||
149 | |||
150 | 107 | ||
151 | ### Examples | 108 | ### Examples |
152 | 109 | ||
@@ -159,9 +116,9 @@ in the *examples* directory. | |||
159 | 116 | ||
160 | This example illustrates the new values returned | 117 | This example illustrates the new values returned |
161 | by the *match* function in case of an unsuccessful | 118 | by the *match* function in case of an unsuccessful |
162 | matching. As no error is thrown, when the matching | 119 | matching. As no error is thrown in this example, |
163 | fails *errpos* represents the farthest suffix where | 120 | when the matching fails *errpos* represents the |
164 | an ordinary failure occurred. | 121 | farthest suffix where an ordinary failure occurred. |
165 | 122 | ||
166 | ```lua | 123 | ```lua |
167 | local m = require'lpeglabel' | 124 | local m = require'lpeglabel' |
@@ -172,124 +129,165 @@ function matchPrint(p, s) | |||
172 | end | 129 | end |
173 | 130 | ||
174 | local p = m.P"a"^0 * m.P"b" + m.P"c" | 131 | local p = m.P"a"^0 * m.P"b" + m.P"c" |
175 | matchPrint(p, "abc") --> r: 3 lab: nil errpos: nil | 132 | matchPrint(p, "abc") --> r: 3 lab: nil errpos: nil |
176 | matchPrint(p, "c") --> r: 2 lab: nil errpos: nil | 133 | matchPrint(p, "c") --> r: 2 lab: nil errpos: nil |
177 | matchPrint(p, "aac") --> r: nil lab: 0 errpos: 3 | 134 | matchPrint(p, "aac") --> r: nil lab: fail errpos: 3 |
178 | matchPrint(p, "xxc") --> r: nil lab: 0 errpos: 1 | 135 | matchPrint(p, "xxc") --> r: nil lab: fail errpos: 1 |
179 | ``` | 136 | ``` |
180 | 137 | ||
181 | 138 | ||
182 | #### Matching a list of identifiers separated by commas | 139 | #### Matching a list of identifiers separated by commas |
183 | 140 | ||
184 | The following example defines a grammar that matches | 141 | The following example defines a grammar that matches |
185 | a list of identifiers separated by commas. A label | 142 | a (possibly empty) list of identifiers separated by commas. |
186 | is thrown when there is an error matching an identifier | 143 | A label is thrown when there is no identifier after a comma, |
187 | or a comma. | 144 | or when the whole input is not matched. |
188 | |||
189 | We use function `newError` to store error messages in a | ||
190 | table and to return the index associated with each error message. | ||
191 | 145 | ||
192 | 146 | ||
193 | ```lua | 147 | ```lua |
194 | local m = require'lpeglabel' | 148 | local m = require'lpeglabel' |
195 | local re = require'relabel' | 149 | local re = require'relabel' |
196 | 150 | ||
197 | local terror = {} | 151 | local terror = { |
198 | 152 | ErrId = "expecting an identifier", | |
199 | local function newError(s) | 153 | ErrEnd = "expecting EOF", |
200 | table.insert(terror, s) | 154 | fail = "undefined" |
201 | return #terror | 155 | } |
202 | end | ||
203 | 156 | ||
204 | local errUndef = newError("undefined") | 157 | local id = m.R'az'^1 |
205 | local errId = newError("expecting an identifier") | ||
206 | local errComma = newError("expecting ','") | ||
207 | 158 | ||
208 | local g = m.P{ | 159 | local g = m.P{ |
209 | "S", | 160 | 'S', |
210 | S = m.V"Id" * m.V"List", | 161 | S = m.V'List' * (-m.P(1) + m.T'ErrEnd'), |
211 | List = -m.P(1) + (m.V"Comma" + m.T(errComma)) * (m.V"Id" + m.T(errId)) * m.V"List", | 162 | List = m.V'Id' * (m.V'Comma' * (m.V'Id' + m.T'ErrId'))^0, |
212 | Id = m.V"Sp" * m.R'az'^1, | 163 | Id = m.V'Sp' * id, |
213 | Comma = m.V"Sp" * ",", | 164 | Comma = m.V'Sp' * ',', |
214 | Sp = m.S" \n\t"^0, | 165 | Sp = m.S' \n\t'^0, |
215 | } | 166 | } |
216 | 167 | ||
168 | |||
217 | function mymatch (g, s) | 169 | function mymatch (g, s) |
218 | local r, e, sfail = g:match(s) | 170 | local r, e, pos = g:match(s) |
219 | if not r then | 171 | if not r then |
220 | local line, col = re.calcline(s, #s - #sfail) | 172 | local line, col = re.calcline(s, pos) |
221 | local msg = "Error at line " .. line .. " (col " .. col .. "): " | 173 | local msg = "Error at line " .. line .. " (col " .. col .. "): " |
222 | return r, msg .. terror[e] .. " before '" .. sfail .. "'" | 174 | return r, msg .. terror[e] .. " before '" .. s:sub(pos) .. "'" |
223 | end | 175 | end |
224 | return r | 176 | return r |
225 | end | 177 | end |
226 | 178 | ||
227 | print(mymatch(g, "one,two")) --> 8 | 179 | print(mymatch(g, "one,two")) |
228 | print(mymatch(g, "one two")) --> nil Error at line 1 (col 3): expecting ',' before ' two' | 180 | print(mymatch(g, "one two")) |
229 | print(mymatch(g, "one,\n two,\nthree,")) --> nil Error at line 3 (col 6): expecting an identifier before '' | 181 | print(mymatch(g, "one,\n two,\nthree,4")) |
230 | ``` | 182 | ``` |
231 | 183 | ||
232 | In this example we could think about writing rule <em>List</em> as follows: | 184 | In this example we could think about writing rule <em>List</em> as follows: |
233 | ```lua | 185 | ```lua |
234 | List = ((m.V"Comma" + m.T(errComma)) * (m.V"Id" + m.T(errId)))^0, | 186 | List = m.V'Id' * ((m.V'Comma' + m.T'ErrComma') * (m.V'Id' + m.T'ErrId'))^0, |
235 | ``` | 187 | ``` |
236 | 188 | ||
237 | but when matching this expression against the end of input | 189 | but when matching <code>m.V'Comma' + m.T'ErrComma'</code> against the end of input |
238 | we would get a failure whose associated label would be **errComma**, | 190 | we would get a failure whose associated label would be **errComma**, |
239 | and this would cause the failure of the *whole* repetition. | 191 | and this would cause the failure of the *whole* repetition. |
240 | 192 | ||
193 | Below we rewrite the previous grammar to indicate an error when there is no | ||
194 | comma after an identifer. Before tyring to match a comma, we check if | ||
195 | we have reached the end of input: | ||
196 | |||
197 | ```lua | ||
198 | local m = require'lpeglabel' | ||
199 | local re = require'relabel' | ||
200 | |||
201 | local terror = { | ||
202 | ErrId = "expecting an identifier", | ||
203 | ErrComma = "expecting ','", | ||
204 | fail = "undefined" | ||
205 | } | ||
206 | |||
207 | local id = m.R'az'^1 | ||
208 | |||
209 | local g = m.P{ | ||
210 | 'S', | ||
211 | S = m.V'List', | ||
212 | List = m.V'Id' * (#m.P(1) * m.V'Comma' * (m.V'Id' + m.T'ErrId'))^0, | ||
213 | Id = m.V'Sp' * id, | ||
214 | Comma = m.V'Sp' * ',' + m.T'ErrComma', | ||
215 | Sp = m.S' \n\t'^0, | ||
216 | } | ||
217 | |||
218 | |||
219 | function mymatch (g, s) | ||
220 | local r, e, pos = g:match(s) | ||
221 | if not r then | ||
222 | local line, col = re.calcline(s, pos) | ||
223 | local msg = "Error at line " .. line .. " (col " .. col .. "): " | ||
224 | return r, msg .. terror[e] .. " before '" .. s:sub(pos) .. "'" | ||
225 | end | ||
226 | return r | ||
227 | end | ||
228 | |||
229 | print(mymatch(g, "one,two")) | ||
230 | print(mymatch(g, "one two")) | ||
231 | print(mymatch(g, "one,\n two,\nthree,4")) | ||
232 | print(mymatch(g, " 1,2")) | ||
233 | |||
234 | ``` | ||
241 | 235 | ||
242 | 236 | ||
243 | #### Error Recovery | 237 | #### Error Recovery |
244 | 238 | ||
245 | By using the `Rec` function we can specify a recovery pattern that | 239 | We can specify a recovery rule that should |
246 | should be matched when a label is thrown. After matching the recovery | 240 | be matched when a label is thrown. After matching |
247 | pattern, and possibly recording the error, the parser will resume | 241 | the recovery rule, and possibly recording the error, |
248 | the <em>regular</em> matching. For example, in the example below | 242 | the parser will resume the <em>regular</em> matching. |
249 | we expect to match rule `A`, but when a failure occur the label 42 | 243 | The recovery rule must have the same name (or number) |
250 | is thrown and then we will try to match the recovery pattern `recp`: | 244 | of the label that was thrown. |
245 | |||
246 | |||
247 | For example, in the example below we expect to match rule *A*, | ||
248 | but when a failure occur the label `Err` is thrown and then we | ||
249 | will try to match rule *Err*: | ||
251 | ```lua | 250 | ```lua |
252 | local m = require'lpeglabel' | 251 | local m = require'lpeglabel' |
253 | 252 | ||
254 | local recp = m.P"oast" | 253 | local recp = m.P"oast" |
255 | 254 | ||
256 | local g = m.P{ | 255 | local g = m.P{ |
257 | "S", | 256 | 'S', |
258 | S = m.Rec(m.V"A", recp, 42) * ".", | 257 | S = m.V'A' * '.', |
259 | A = m.P"t" * (m.P"est" + m.T(42)) | 258 | A = m.P't' * (m.P'est' + m.T'Err'), |
259 | Err = m.P'oast' | ||
260 | } | 260 | } |
261 | 261 | ||
262 | print(g:match("test.")) --> 6 | 262 | print(g:match("test.")) --> 6 |
263 | print(g:match("toast.")) --> 7 | 263 | print(g:match("toast.")) --> 7 |
264 | print(g:match("oast.")) --> nil 0 oast. | 264 | print(g:match("oast.")) --> nil fail oast. |
265 | print(g:match("toward.")) --> nil 0 ward. | 265 | print(g:match("toward.")) --> nil fail ward. |
266 | ``` | 266 | ``` |
267 | When trying to match subject 'toast.', in rule `A` the first | 267 | When trying to match subject 'toast.', in rule *A* the first |
268 | 't' is matched, then the matching of `m.P"est"` fails and label 42 | 268 | 't' is matched, then the matching of `m.P"est"` fails and label |
269 | is thrown, with the associated inpux suffix 'oast.'. In rule | 269 | `Err` is thrown, with the associated inpux suffix 'oast.'. |
270 | `S` label 42 is caught and the recovery pattern matches 'oast', | 270 | The recovery rule *Err* successfully matches 'oast', so |
271 | so pattern `'.'` matches the rest of the input. | 271 | the regular matching continues, and pattern `'.'` matches |
272 | the rest of the input. | ||
272 | 273 | ||
273 | When matching subject 'oast.', pattern `m.P"t"` fails, and | 274 | When matching subject 'oast.', pattern `m.P"t"` fails, and |
274 | the result of the matching is <b>nil, 0, oast.</b>. | 275 | the result of the matching is <b>nil, fail, 1</b>. |
275 | 276 | ||
276 | When matching 'toward.', label 42 is thrown after matching 't', | 277 | When matching 'toward.', label `Err` is thrown after matching 't', |
277 | with the associated input suffix 'oward.'. As the matching of the | 278 | with the associated input suffix 'oward.'. As the matching of the |
278 | recovery pattern fails, the result is <b>nil, 0, ward.</b>. | 279 | recovery pattern fails, the result is <b>nil, fail, 3</b>. |
279 | 280 | ||
280 | Usually, the recovery pattern is an expression that does not fail. | 281 | Usually, the recovery pattern is an expression that does not fail. |
281 | In the previous example, we could have used `(m.P(1) - m.P".")^0` | 282 | In the previous example, we could have used `(m.P(1) - m.P".")^0` |
282 | as the recovery pattern. | 283 | as the recovery pattern. |
283 | 284 | ||
284 | Below we rewrite the grammar that describes a list of identifiers | 285 | Below we rewrite the grammar that describes a list of identifiers |
285 | to use a recovery strategy. Grammar `g` remains the same, but we add a | 286 | to use a recovery strategy, with the help of some auxiliary functions. |
286 | recovery grammar `grec` that handles the labels thrown by `g`. | ||
287 | |||
288 | In grammar `grec` we use functions `record` and `sync`. | ||
289 | Function `record`, plus function `recorderror`, will help | 287 | Function `record`, plus function `recorderror`, will help |
290 | us to save the input position where a label was thrown, | 288 | us to save the input position where a label was thrown, |
291 | while function `sync` will give us a synchronization pattern, | 289 | while function `sync` will give us a synchronization pattern, |
292 | that consumes the input while is not possible to match a given | 290 | that consumes the input while it is not possible to match a given |
293 | pattern `p`. | 291 | pattern `p`. |
294 | 292 | ||
295 | When the matching of an identifier fails, a defaul value ('NONE') | 293 | When the matching of an identifier fails, a defaul value ('NONE') |
@@ -299,26 +297,11 @@ is provided. | |||
299 | local m = require'lpeglabel' | 297 | local m = require'lpeglabel' |
300 | local re = require'relabel' | 298 | local re = require'relabel' |
301 | 299 | ||
302 | local terror = {} | 300 | local terror = { |
303 | 301 | ErrId = "expecting an identifier", | |
304 | local function newError(s) | 302 | ErrComma = "expecting ','", |
305 | table.insert(terror, s) | 303 | ErrList = "expecting a list of identifiers", |
306 | return #terror | 304 | fail = "undefined" |
307 | end | ||
308 | |||
309 | local errUndef = newError("undefined") | ||
310 | local errId = newError("expecting an identifier") | ||
311 | local errComma = newError("expecting ','") | ||
312 | |||
313 | local id = m.R'az'^1 | ||
314 | |||
315 | local g = m.P{ | ||
316 | "S", | ||
317 | S = m.V"Id" * m.V"List", | ||
318 | List = -m.P(1) + m.V"Comma" * m.V"Id" * m.V"List", | ||
319 | Id = m.V"Sp" * m.C(id) + m.T(errId), | ||
320 | Comma = m.V"Sp" * "," + m.T(errComma), | ||
321 | Sp = m.S" \n\t"^0, | ||
322 | } | 305 | } |
323 | 306 | ||
324 | local subject, errors | 307 | local subject, errors |
@@ -340,14 +323,20 @@ function defaultValue () | |||
340 | return m.Cc"NONE" | 323 | return m.Cc"NONE" |
341 | end | 324 | end |
342 | 325 | ||
343 | local grec = m.P{ | 326 | local id = m.R'az'^1 |
327 | |||
328 | local g = m.P{ | ||
344 | "S", | 329 | "S", |
345 | S = m.Rec(m.Rec(g, m.V"ErrComma", errComma), m.V"ErrId", errId), | 330 | S = m.V"List" + (m.P(1) * m.T'ErrList'), |
346 | ErrComma = record(errComma) * sync(id), | 331 | List = m.V'Id' * (#m.P(1) * m.V'Comma' * (m.V'Id' + m.T'ErrId'))^0, |
347 | ErrId = record(errId) * sync(m.P",") * defaultValue(), | 332 | Id = m.V'Sp' * m.C(id), |
333 | Comma = m.V'Sp' * ',' + m.T'ErrComma', | ||
334 | Sp = m.S' \n\t'^0, | ||
335 | ErrId = record'ErrId' * sync(m.P",") * defaultValue(), | ||
336 | ErrComma = record'ErrComma' * sync(id), | ||
337 | ErrList = record'ErrList' * sync(m.P(-1)) * defaultValue() | ||
348 | } | 338 | } |
349 | 339 | ||
350 | |||
351 | function mymatch (g, s) | 340 | function mymatch (g, s) |
352 | errors = {} | 341 | errors = {} |
353 | subject = s | 342 | subject = s |
@@ -371,387 +360,114 @@ function mymatch (g, s) | |||
371 | return r | 360 | return r |
372 | end | 361 | end |
373 | 362 | ||
374 | mymatch(grec, "one,two") | 363 | mymatch(g, "one,two") |
375 | mymatch(grec, "one two three") | 364 | mymatch(g, "one two three") |
376 | mymatch(grec, "1,\n two, \n3,") | 365 | mymatch(g, "1,\n two, \n3,") |
377 | mymatch(grec, "one\n two123, \nthree,") | 366 | mymatch(g, "one\n two123, \nthree,") |
378 | ``` | 367 | ``` |
379 | 368 | ||
380 | ##### *relabel* syntax | 369 | ##### *relabel* syntax |
381 | 370 | ||
382 | Below we describe again a grammar that matches a list of identifiers, | 371 | Below we write a grammar for a simple programming language |
383 | now using the syntax supported by *relabel*, where `//{}` is the | 372 | using the syntax supported by *relabel*, where `%{}` is the throw |
384 | recovery operator, and `%{}` is the throw operator: | 373 | operator, and the syntax `p^l` is syntatic sugar for |
374 | `p / %{l}` (given that *l* is a valid identifier name): | ||
385 | 375 | ||
386 | ```lua | 376 | ```lua |
387 | local re = require 'relabel' | 377 | local re = require 'relabel' |
388 | 378 | ||
389 | local errinfo = { | 379 | local terror = { |
390 | {"errUndef", "undefined"}, | 380 | cmdSeq = "Missing ';' in CmdSeq", |
391 | {"errId", "expecting an identifier"}, | 381 | ifExp = "Error in expresion of 'if'", |
392 | {"errComma", "expecting ','"}, | 382 | ifThen = "Error matching 'then' keyword", |
383 | ifThenCmdSeq = "Error matching CmdSeq of 'then' branch", | ||
384 | ifElseCmdSeq = "Error matching CmdSeq of 'else' branch", | ||
385 | ifEnd = "Error matching 'end' keyword of 'if'", | ||
386 | repeatCmdSeq = "Error matching CmdSeq of 'repeat'", | ||
387 | repeatUntil = "Error matching 'until' keyword", | ||
388 | repeatExp = "Error matching expression of 'until'", | ||
389 | assignOp = "Error matching ':='", | ||
390 | assignExp = "Error matching expression of assignment", | ||
391 | readName = "Error matching 'NAME' after 'read'", | ||
392 | writeExp = "Error matching expression after 'write'", | ||
393 | simpleExp = "Error matching 'SimpleExp'", | ||
394 | term = "Error matching 'Term'", | ||
395 | factor = "Error matching 'Factor'", | ||
396 | openParExp = "Error matching expression after '('", | ||
397 | closePar = "Error matching ')'", | ||
398 | eof = "Error, expecting EOF", | ||
399 | undefined = "Undefined Error" | ||
393 | } | 400 | } |
394 | 401 | ||
395 | local errmsgs = {} | 402 | g = re.compile([[ |
396 | local labels = {} | 403 | Tiny <- CmdSeq (!. / %{eof}) |
397 | 404 | CmdSeq <- (Cmd SEMICOLON^cmdSeq) (Cmd SEMICOLON^cmdSeq)* | |
398 | for i, err in ipairs(errinfo) do | 405 | Cmd <- IfCmd / RepeatCmd / ReadCmd / WriteCmd / AssignCmd |
399 | errmsgs[i] = err[2] | 406 | IfCmd <- IF Exp^ifExp THEN^ifThen CmdSeq^ifThenCmdSeq (ELSE CmdSeq^ifElseCmdSeq / '') END^ifEnd |
400 | labels[err[1]] = i | 407 | RepeatCmd <- REPEAT CmdSeq^repeatCmdSeq UNTIL^repeatUntil Exp^repeatExp |
401 | end | 408 | AssignCmd <- NAME ASSIGNMENT^assignOp Exp^assignExp |
402 | 409 | ReadCmd <- READ NAME^readName | |
403 | re.setlabels(labels) | 410 | WriteCmd <- WRITE Exp^writeExp |
404 | 411 | Exp <- SimpleExp ((LESS / EQUAL) SimpleExp^simpleExp / '') | |
405 | local g = re.compile[[ | 412 | SimpleExp <- Term ((ADD / SUB) Term^term)* |
406 | S <- Id List | 413 | Term <- Factor ((MUL / DIV) Factor^factor)* |
407 | List <- !. / Comma Id List | 414 | Factor <- OPENPAR Exp^openParExp CLOSEPAR^closePar / NUMBER / NAME |
408 | Id <- Sp {[a-z]+} / %{errId} | 415 | ADD <- Sp '+' |
409 | Comma <- Sp ',' / %{errComma} | 416 | ASSIGNMENT <- Sp ':=' |
410 | Sp <- %s* | 417 | CLOSEPAR <- Sp ')' |
411 | ]] | 418 | DIV <- Sp '/' |
412 | 419 | IF <- Sp 'if' | |
413 | local errors | 420 | ELSE <- Sp 'else' |
414 | 421 | END <- Sp 'end' | |
415 | function recorderror (subject, pos, label) | 422 | EQUAL <- Sp '=' |
416 | local line, col = re.calcline(subject, pos) | 423 | LESS <- Sp '<' |
417 | table.insert(errors, { line = line, col = col, msg = errmsgs[labels[label]] }) | 424 | MUL <- Sp '*' |
418 | return true | 425 | NAME <- !RESERVED Sp [a-z]+ |
419 | end | 426 | NUMBER <- Sp [0-9]+ |
420 | 427 | OPENPAR <- Sp '(' | |
421 | function sync (p) | 428 | READ <- Sp 'read' |
422 | return '( !(' .. p .. ') .)*' | 429 | REPEAT <- Sp 'repeat' |
423 | end | 430 | SEMICOLON <- Sp ';' |
424 | 431 | SUB <- Sp '-' | |
425 | local grec = re.compile( | 432 | THEN <- Sp 'then' |
426 | "S <- %g //{errComma} ErrComma //{errId} ErrId" .. "\n" .. | 433 | UNTIL <- Sp 'until' |
427 | "ErrComma <- ('' -> 'errComma' => recorderror) " .. sync('[a-z]+') .. "\n" .. | 434 | WRITE <- Sp 'write' |
428 | "ErrId <- ('' -> 'errId' => recorderror) " .. sync('","') .. "-> default" | 435 | RESERVED <- (IF / ELSE / END / READ / REPEAT / THEN / UNTIL / WRITE) ![a-z]+ |
429 | , {g = g, recorderror = recorderror, default = "NONE"} | 436 | Sp <- (%s / %nl)* |
430 | ) | 437 | ]], terror) |
431 | 438 | ||
432 | function mymatch (g, s) | 439 | |
433 | errors = {} | 440 | local function mymatch(g, s) |
434 | subject = s | 441 | local r, e, pos = g:match(s) |
435 | io.write("Input: ", s, "\n") | 442 | if not r then |
436 | local r = { g:match(s) } | 443 | local line, col = re.calcline(s, pos) |
437 | io.write("Captures (separated by ';'): ") | 444 | local msg = "Error at line " .. line .. " (col " .. col .. "): " |
438 | for k, v in pairs(r) do | 445 | return r, msg .. terror[e] |
439 | io.write(v .. "; ") | 446 | end |
440 | end | 447 | return r |
441 | io.write("\nSyntactic errors found: " .. #errors) | ||
442 | if #errors > 0 then | ||
443 | io.write("\n") | ||
444 | local out = {} | ||
445 | for i, err in ipairs(errors) do | ||
446 | local msg = "Error at line " .. err.line .. " (col " .. err.col .. "): " .. err.msg | ||
447 | table.insert(out, msg) | ||
448 | end | ||
449 | io.write(table.concat(out, "\n")) | ||
450 | end | ||
451 | print("\n") | ||
452 | return r | ||
453 | end | ||
454 | |||
455 | print(mymatch(grec, "one,two")) | ||
456 | -- Captures (separated by ';'): one; two; | ||
457 | -- Syntactic errors found: 0 | ||
458 | |||
459 | print(mymatch(grec, "one two three")) | ||
460 | -- Captures (separated by ';'): one; two; three; | ||
461 | -- Syntactic errors found: 2 | ||
462 | -- Error at line 1 (col 4): expecting ',' | ||
463 | -- Error at line 1 (col 8): expecting ',' | ||
464 | |||
465 | print(mymatch(grec, "1,\n two, \n3,")) | ||
466 | -- Captures (separated by ';'): NONE; two; NONE; NONE; | ||
467 | -- Syntactic errors found: 3 | ||
468 | -- Error at line 1 (col 1): expecting an identifier | ||
469 | -- Error at line 2 (col 6): expecting an identifier | ||
470 | -- Error at line 3 (col 2): expecting an identifier | ||
471 | |||
472 | print(mymatch(grec, "one\n two123, \nthree,")) | ||
473 | -- Captures (separated by ';'): one; two; three; NONE; | ||
474 | -- Syntactic errors found: 3 | ||
475 | -- Error at line 2 (col 1): expecting ',' | ||
476 | -- Error at line 2 (col 5): expecting ',' | ||
477 | -- Error at line 3 (col 6): expecting an identifier | ||
478 | ``` | ||
479 | |||
480 | |||
481 | #### Arithmetic Expressions | ||
482 | |||
483 | Here's an example of an LPegLabel grammar that matches an expression. | ||
484 | We have used a function `expect`, that takes a pattern `patt` and a label as | ||
485 | parameters and builds a new pattern that throws this label when `patt` | ||
486 | fails. | ||
487 | |||
488 | When a subexpression is syntactically invalid, a default value of 1000 | ||
489 | is provided by the recovery pattern, so the evaluation of an expression | ||
490 | should always produce a numeric value. | ||
491 | |||
492 | In this example, we can see that it may be a tedious and error prone | ||
493 | task to build manually the recovery grammar `grec`. In the next example | ||
494 | we will show how to build the recovery grammar in a more automatic way. | ||
495 | |||
496 | ```lua | ||
497 | local m = require"lpeglabel" | ||
498 | local re = require"relabel" | ||
499 | |||
500 | local labels = { | ||
501 | {"ExpTermFirst", "expected an expression"}, | ||
502 | {"ExpTermOp", "expected a term after the operator"}, | ||
503 | {"MisClose", "missing a closing ')' after the expression"}, | ||
504 | } | ||
505 | |||
506 | local function labelindex(labname) | ||
507 | for i, elem in ipairs(labels) do | ||
508 | if elem[1] == labname then | ||
509 | return i | ||
510 | end | ||
511 | end | ||
512 | error("could not find label: " .. labname) | ||
513 | end | ||
514 | |||
515 | local errors, subject | ||
516 | |||
517 | local function expect(patt, labname) | ||
518 | local i = labelindex(labname) | ||
519 | return patt + m.T(i) | ||
520 | end | ||
521 | |||
522 | |||
523 | local num = m.R("09")^1 / tonumber | ||
524 | local op = m.S("+-") | ||
525 | |||
526 | local function compute(tokens) | ||
527 | local result = tokens[1] | ||
528 | for i = 2, #tokens, 2 do | ||
529 | if tokens[i] == '+' then | ||
530 | result = result + tokens[i+1] | ||
531 | elseif tokens[i] == '-' then | ||
532 | result = result - tokens[i+1] | ||
533 | else | ||
534 | error('unknown operation: ' .. tokens[i]) | ||
535 | end | ||
536 | end | ||
537 | return result | ||
538 | end | ||
539 | |||
540 | local g = m.P { | ||
541 | "Exp", | ||
542 | Exp = m.Ct(m.V"OperandFirst" * (m.C(op) * m.V"Operand")^0) / compute, | ||
543 | OperandFirst = expect(m.V"Term", "ExpTermFirst"), | ||
544 | Operand = expect(m.V"Term", "ExpTermOp"), | ||
545 | Term = num + m.V"Group", | ||
546 | Group = "(" * m.V"Exp" * expect(")", "MisClose"), | ||
547 | } | ||
548 | |||
549 | function recorderror(pos, lab) | ||
550 | local line, col = re.calcline(subject, pos) | ||
551 | table.insert(errors, { line = line, col = col, msg = labels[lab][2] }) | ||
552 | end | ||
553 | |||
554 | function record (labname) | ||
555 | return (m.Cp() * m.Cc(labelindex(labname))) / recorderror | ||
556 | end | ||
557 | |||
558 | function sync (p) | ||
559 | return (-p * m.P(1))^0 | ||
560 | end | ||
561 | |||
562 | function defaultValue (p) | ||
563 | return p or m.Cc(1000) | ||
564 | end | ||
565 | |||
566 | local grec = m.P { | ||
567 | "S", | ||
568 | S = m.Rec(m.V"A", m.V"ErrExpTermFirst", labelindex("ExpTermFirst")), | ||
569 | A = m.Rec(m.V"Sg", m.V"ErrExpTermOp", labelindex("ExpTermOp")), | ||
570 | Sg = m.Rec(g, m.V"ErrMisClose", labelindex("MisClose")), | ||
571 | ErrExpTermFirst = record("ExpTermFirst") * sync(op + ")") * defaultValue(), | ||
572 | ErrExpTermOp = record("ExpTermOp") * sync(op + ")") * defaultValue(), | ||
573 | ErrMisClose = record("MisClose") * sync(m.P")") * defaultValue(m.P""), | ||
574 | } | ||
575 | |||
576 | local function eval(input) | ||
577 | errors = {} | ||
578 | io.write("Input: ", input, "\n") | ||
579 | subject = input | ||
580 | local result, label, suffix = grec:match(input) | ||
581 | io.write("Syntactic errors found: " .. #errors, "\n") | ||
582 | if #errors > 0 then | ||
583 | local out = {} | ||
584 | for i, err in ipairs(errors) do | ||
585 | local pos = err.col | ||
586 | local msg = err.msg | ||
587 | table.insert(out, "syntax error: " .. msg .. " (at index " .. pos .. ")") | ||
588 | end | ||
589 | print(table.concat(out, "\n")) | ||
590 | end | ||
591 | io.write("Result = ") | ||
592 | return result | ||
593 | end | 448 | end |
594 | 449 | ||
595 | print(eval "90-70-(5)+3") | 450 | local s = [[ |
596 | -- Syntactic errors found: 0 | 451 | n := 5; |
597 | -- Result = 18 | 452 | f := 1; |
598 | 453 | repeat | |
599 | print(eval "15+") | 454 | f := f + n; |
600 | -- Syntactic errors found: 1 | 455 | n := n - 1 |
601 | -- syntax error: expected a term after the operator (at index 3) | 456 | until (n < 1); |
602 | -- Result = 1015 | 457 | write f;]] |
603 | 458 | print(mymatch(g, s)) | |
604 | print(eval "-2") | 459 | |
605 | -- Syntactic errors found: 1 | 460 | print(mymatch(g, "a : 2")) |
606 | -- syntax error: expected an expression (at index 1) | 461 | print(mymatch(g, "a := 2; 6")) |
607 | -- Result = 998 | ||
608 | |||
609 | print(eval "1+()+") | ||
610 | -- Syntactic errors found: 2 | ||
611 | -- syntax error: expected an expression (at index 4) | ||
612 | -- syntax error: expected a term after the operator (at index 5) | ||
613 | -- Result = 2001 | ||
614 | |||
615 | print(eval "1+(") | ||
616 | -- Syntactic errors found: 2 | ||
617 | -- syntax error: expected an expression (at index 3) | ||
618 | -- syntax error: missing a closing ')' after the expression (at index 3) | ||
619 | -- Result = 1001 | ||
620 | |||
621 | print(eval "3)") | ||
622 | -- Syntactic errors found: 0 | ||
623 | -- Result = 3 | ||
624 | ``` | 462 | ``` |
625 | 463 | ||
626 | #### Automatically Building the Recovery Grammar | 464 | ### Caveats |
627 | 465 | ||
628 | Below we rewrite the previous example to automatically | 466 | Does not use the number **1** to specify a recovery rule, |
629 | build the recovery grammar based on information provided | 467 | since that this index is used to indicate the first rule |
630 | by the user for each label (error message, recovery pattern, etc). | 468 | of a grammar. |
631 | In the example below we also throw an error when the grammar | ||
632 | does not match the whole subject. | ||
633 | 469 | ||
634 | ```lua | 470 | In case your grammar has many regular and recovery rules, |
635 | local m = require"lpeglabel" | 471 | you may get an error message such as grammar: <em>has too many rules</em>. |
636 | local re = require"relabel" | 472 | In this case, we need to change *MAXRULES* in `lptypes.h`. |
637 | 473 | ||
638 | local num = m.R("09")^1 / tonumber | ||
639 | local op = m.S("+-") | ||
640 | |||
641 | local labels = {} | ||
642 | local nlabels = 0 | ||
643 | |||
644 | local function newError(lab, msg, psync, pcap) | ||
645 | nlabels = nlabels + 1 | ||
646 | psync = psync or m.P(-1) | ||
647 | pcap = pcap or m.P"" | ||
648 | labels[lab] = { id = nlabels, msg = msg, psync = psync, pcap = pcap } | ||
649 | end | ||
650 | |||
651 | newError("ExpTermFirst", "expected an expression", op + ")", m.Cc(1000)) | ||
652 | newError("ExpTermOp", "expected a term after the operator", op + ")", m.Cc(1000)) | ||
653 | newError("MisClose", "missing a closing ')' after the expression", m.P")") | ||
654 | newError("Extra", "extra characters found after the expression") | ||
655 | |||
656 | local errors, subject | ||
657 | |||
658 | local function expect(patt, labname) | ||
659 | local i = labels[labname].id | ||
660 | return patt + m.T(i) | ||
661 | end | ||
662 | |||
663 | local function compute(tokens) | ||
664 | local result = tokens[1] | ||
665 | for i = 2, #tokens, 2 do | ||
666 | if tokens[i] == '+' then | ||
667 | result = result + tokens[i+1] | ||
668 | elseif tokens[i] == '-' then | ||
669 | result = result - tokens[i+1] | ||
670 | else | ||
671 | error('unknown operation: ' .. tokens[i]) | ||
672 | end | ||
673 | end | ||
674 | return result | ||
675 | end | ||
676 | |||
677 | local g = m.P { | ||
678 | "Exp", | ||
679 | Exp = m.Ct(m.V"OperandFirst" * (m.C(op) * m.V"Operand")^0) / compute, | ||
680 | OperandFirst = expect(m.V"Term", "ExpTermFirst"), | ||
681 | Operand = expect(m.V"Term", "ExpTermOp"), | ||
682 | Term = num + m.V"Group", | ||
683 | Group = "(" * m.V"Exp" * expect(")", "MisClose"), | ||
684 | } | ||
685 | |||
686 | function recorderror(pos, lab) | ||
687 | local line, col = re.calcline(subject, pos) | ||
688 | table.insert(errors, { line = line, col = col, msg = labels[lab].msg }) | ||
689 | end | ||
690 | |||
691 | function record (labname) | ||
692 | return (m.Cp() * m.Cc(labname)) / recorderror | ||
693 | end | ||
694 | |||
695 | function sync (p) | ||
696 | return (-p * m.P(1))^0 | ||
697 | end | ||
698 | |||
699 | function defaultValue (p) | ||
700 | return p or m.Cc(1000) | ||
701 | end | ||
702 | |||
703 | local grec = g * expect(m.P(-1), "Extra") | ||
704 | for k, v in pairs(labels) do | ||
705 | grec = m.Rec(grec, record(k) * sync(v.psync) * v.pcap, v.id) | ||
706 | end | ||
707 | |||
708 | local function eval(input) | ||
709 | errors = {} | ||
710 | io.write("Input: ", input, "\n") | ||
711 | subject = input | ||
712 | local result, label, suffix = grec:match(input) | ||
713 | io.write("Syntactic errors found: " .. #errors, "\n") | ||
714 | if #errors > 0 then | ||
715 | local out = {} | ||
716 | for i, err in ipairs(errors) do | ||
717 | local pos = err.col | ||
718 | local msg = err.msg | ||
719 | table.insert(out, "syntax error: " .. msg .. " (at index " .. pos .. ")") | ||
720 | end | ||
721 | print(table.concat(out, "\n")) | ||
722 | end | ||
723 | io.write("Result = ") | ||
724 | return result | ||
725 | end | ||
726 | |||
727 | print(eval "90-70-(5)+3") | ||
728 | -- Syntactic errors found: 0 | ||
729 | -- Result = 18 | ||
730 | |||
731 | print(eval "15+") | ||
732 | -- Syntactic errors found: 1 | ||
733 | -- syntax error: expected a term after the operator (at index 3) | ||
734 | -- Result = 1015 | ||
735 | |||
736 | print(eval "-2") | ||
737 | -- Syntactic errors found: 1 | ||
738 | -- syntax error: expected an expression (at index 1) | ||
739 | -- Result = 998 | ||
740 | |||
741 | print(eval "1+()+") | ||
742 | -- Syntactic errors found: 2 | ||
743 | -- syntax error: expected an expression (at index 4) | ||
744 | -- syntax error: expected a term after the operator (at index 5) | ||
745 | -- Result = 2001 | ||
746 | |||
747 | print(eval "1+(") | ||
748 | -- Syntactic errors found: 2 | ||
749 | -- syntax error: expected an expression (at index 3) | ||
750 | -- syntax error: missing a closing ')' after the expression (at index 3) | ||
751 | -- Result = 1001 | ||
752 | |||
753 | print(eval "3)") | ||
754 | -- Syntactic errors found: 1 | ||
755 | -- syntax error: extra characters found after the expression (at index 2) | ||
756 | -- Result = 3 | ||
757 | ``` | ||