diff options
Diffstat (limited to 'relabel.lua')
-rw-r--r-- | relabel.lua | 313 |
1 files changed, 234 insertions, 79 deletions
diff --git a/relabel.lua b/relabel.lua index b66fc2e..76846ff 100644 --- a/relabel.lua +++ b/relabel.lua | |||
@@ -1,9 +1,10 @@ | |||
1 | -- $Id: re.lua,v 1.44 2013/03/26 20:11:40 roberto Exp $ | 1 | -- $Id: re.lua,v 1.44 2013/03/26 20:11:40 roberto Exp $ |
2 | 2 | ||
3 | -- imported functions and modules | 3 | -- imported functions and modules |
4 | local tonumber, type, print, error = tonumber, type, print, error | 4 | local tonumber, type, print, error, ipairs = tonumber, type, print, error, ipairs |
5 | local setmetatable = setmetatable | 5 | local setmetatable = setmetatable |
6 | local unpack = table.unpack or unpack | 6 | local unpack, tinsert, concat = table.unpack or unpack, table.insert, table.concat |
7 | local rep = string.rep | ||
7 | local m = require"lpeglabel" | 8 | local m = require"lpeglabel" |
8 | 9 | ||
9 | -- 'm' will be used to parse expressions, and 'mm' will be used to | 10 | -- 'm' will be used to parse expressions, and 'mm' will be used to |
@@ -22,8 +23,89 @@ if version == "Lua 5.2" then _ENV = nil end | |||
22 | 23 | ||
23 | 24 | ||
24 | local any = m.P(1) | 25 | local any = m.P(1) |
26 | local dummy = mm.P(false) | ||
25 | 27 | ||
26 | 28 | ||
29 | local errinfo = { | ||
30 | {"NoPatt", "no pattern found"}, | ||
31 | {"ExtraChars", "unexpected characters after the pattern"}, | ||
32 | |||
33 | {"ExpPatt1", "expected a pattern after '/' or the label(s)"}, | ||
34 | |||
35 | {"ExpPatt2", "expected a pattern after '&'"}, | ||
36 | {"ExpPatt3", "expected a pattern after '!'"}, | ||
37 | |||
38 | {"ExpPatt4", "expected a pattern after '('"}, | ||
39 | {"ExpPatt5", "expected a pattern after ':'"}, | ||
40 | {"ExpPatt6", "expected a pattern after '{~'"}, | ||
41 | {"ExpPatt7", "expected a pattern after '{|'"}, | ||
42 | |||
43 | {"ExpPatt8", "expected a pattern after '<-'"}, | ||
44 | |||
45 | {"ExpPattOrClose", "expected a pattern or closing '}' after '{'"}, | ||
46 | |||
47 | {"ExpNum", "expected a number after '^', '+' or '-' (no space)"}, | ||
48 | {"ExpCap", "expected a string, number, '{}' or name after '->'"}, | ||
49 | |||
50 | {"ExpName1", "expected the name of a rule after '=>'"}, | ||
51 | {"ExpName2", "expected the name of a rule after '=' (no space)"}, | ||
52 | {"ExpName3", "expected the name of a rule after '<' (no space)"}, | ||
53 | |||
54 | {"ExpLab1", "expected at least one label after '{'"}, | ||
55 | {"ExpLab2", "expected a label after the comma"}, | ||
56 | |||
57 | {"ExpNameOrLab", "expected a name or label after '%' (no space)"}, | ||
58 | |||
59 | {"ExpItem", "expected at least one item after '[' or '^'"}, | ||
60 | |||
61 | {"MisClose1", "missing closing ')'"}, | ||
62 | {"MisClose2", "missing closing ':}'"}, | ||
63 | {"MisClose3", "missing closing '~}'"}, | ||
64 | {"MisClose4", "missing closing '|}'"}, | ||
65 | {"MisClose5", "missing closing '}'"}, -- for the captures | ||
66 | |||
67 | {"MisClose6", "missing closing '>'"}, | ||
68 | {"MisClose7", "missing closing '}'"}, -- for the labels | ||
69 | |||
70 | {"MisClose8", "missing closing ']'"}, | ||
71 | |||
72 | {"MisTerm1", "missing terminating single quote"}, | ||
73 | {"MisTerm2", "missing terminating double quote"}, | ||
74 | } | ||
75 | |||
76 | local errmsgs = {} | ||
77 | local labels = {} | ||
78 | |||
79 | for i, err in ipairs(errinfo) do | ||
80 | errmsgs[i] = err[2] | ||
81 | labels[err[1]] = i | ||
82 | end | ||
83 | |||
84 | local errfound = {} | ||
85 | |||
86 | local function expect (pattern, labelname) | ||
87 | local label = labels[labelname] | ||
88 | local record = function (input, pos) | ||
89 | tinsert(errfound, {label, pos}) | ||
90 | return true | ||
91 | end | ||
92 | return pattern + m.Cmt("", record) * m.T(label) | ||
93 | end | ||
94 | |||
95 | local ignore = m.Cmt(any, function (input, pos) | ||
96 | return errfound[#errfound][2], dummy | ||
97 | end) | ||
98 | |||
99 | local pointAtStart = m.Cmt(any, function (input, pos) | ||
100 | local ret = errfound[#errfound][2] | ||
101 | errfound[#errfound][2] = pos-1 | ||
102 | return ret, dummy | ||
103 | end) | ||
104 | |||
105 | local function adderror (message) | ||
106 | tinsert(errfound, {message}) | ||
107 | end | ||
108 | |||
27 | -- Pre-defined names | 109 | -- Pre-defined names |
28 | local Predef = { nl = m.P"\n" } | 110 | local Predef = { nl = m.P"\n" } |
29 | local tlabels = {} | 111 | local tlabels = {} |
@@ -75,18 +157,14 @@ local I = m.P(function (s,i) print(i, s:sub(1, i-1)); return i end) | |||
75 | 157 | ||
76 | local function getdef (id, defs) | 158 | local function getdef (id, defs) |
77 | local c = defs and defs[id] | 159 | local c = defs and defs[id] |
78 | if not c then error("undefined name: " .. id) end | 160 | if not c then |
161 | adderror("undefined name: " .. id) | ||
162 | return nil | ||
163 | end | ||
79 | return c | 164 | return c |
80 | end | 165 | end |
81 | 166 | ||
82 | 167 | ||
83 | local function patt_error (s, i) | ||
84 | local msg = (#s < i + 20) and s:sub(i) | ||
85 | or s:sub(i,i+20) .. "..." | ||
86 | msg = ("pattern error near '%s'"):format(msg) | ||
87 | error(msg, 2) | ||
88 | end | ||
89 | |||
90 | local function mult (p, n) | 168 | local function mult (p, n) |
91 | local np = mm.P(true) | 169 | local np = mm.P(true) |
92 | while n >= 1 do | 170 | while n >= 1 do |
@@ -106,44 +184,42 @@ end | |||
106 | 184 | ||
107 | local S = (Predef.space + "--" * (any - Predef.nl)^0)^0 | 185 | local S = (Predef.space + "--" * (any - Predef.nl)^0)^0 |
108 | 186 | ||
109 | local name = m.R("AZ", "az", "__") * m.R("AZ", "az", "__", "09")^0 | 187 | local name = m.C(m.R("AZ", "az", "__") * m.R("AZ", "az", "__", "09")^0) |
110 | 188 | ||
111 | local arrow = S * "<-" | 189 | local arrow = S * "<-" |
112 | 190 | ||
113 | local seq_follow = m.P"/" + ")" + "}" + ":}" + "~}" + "|}" + (name * arrow) + -1 | ||
114 | |||
115 | name = m.C(name) | ||
116 | |||
117 | |||
118 | -- a defined name only have meaning in a given environment | 191 | -- a defined name only have meaning in a given environment |
119 | local Def = name * m.Carg(1) | 192 | local Def = name * m.Carg(1) |
120 | 193 | ||
121 | local num = m.C(m.R"09"^1) * S / tonumber | 194 | local num = m.C(m.R"09"^1) * S / tonumber |
122 | 195 | ||
123 | local String = "'" * m.C((any - "'")^0) * "'" + | 196 | local String = "'" * m.C((any - "'" - m.P"\n")^0) * expect("'", "MisTerm1") |
124 | '"' * m.C((any - '"')^0) * '"' | 197 | + '"' * m.C((any - '"' - m.P"\n")^0) * expect('"', "MisTerm2") |
125 | 198 | ||
126 | 199 | ||
127 | local defined = "%" * Def / function (c,Defs) | 200 | local defined = "%" * Def / function (c,Defs) |
128 | local cat = Defs and Defs[c] or Predef[c] | 201 | local cat = Defs and Defs[c] or Predef[c] |
129 | if not cat then error ("name '" .. c .. "' undefined") end | 202 | if not cat then |
203 | adderror ("name '" .. c .. "' undefined") | ||
204 | return dummy | ||
205 | end | ||
130 | return cat | 206 | return cat |
131 | end | 207 | end |
132 | 208 | ||
133 | local Range = m.Cs(any * (m.P"-"/"") * (any - "]")) / mm.R | 209 | local Range = m.Cs(any * (m.P"-"/"") * (any - "]")) / mm.R |
134 | 210 | ||
135 | local item = defined + Range + m.C(any) | 211 | local item = defined + Range + m.C(any - m.P"\n") |
136 | 212 | ||
137 | local Class = | 213 | local Class = |
138 | "[" | 214 | "[" |
139 | * (m.C(m.P"^"^-1)) -- optional complement symbol | 215 | * (m.C(m.P"^"^-1)) -- optional complement symbol |
140 | * m.Cf(item * (item - "]")^0, mt.__add) / | 216 | * m.Cf(expect(item, "ExpItem") * (item - "]")^0, mt.__add) |
141 | function (c, p) return c == "^" and any - p or p end | 217 | / function (c, p) return c == "^" and any - p or p end |
142 | * "]" | 218 | * expect("]", "MisClose8") |
143 | 219 | ||
144 | local function adddef (t, k, exp) | 220 | local function adddef (t, k, exp) |
145 | if t[k] then | 221 | if t[k] then |
146 | error("'"..k.."' already defined as a rule") | 222 | adderror("'"..k.."' already defined as a rule") |
147 | else | 223 | else |
148 | t[k] = exp | 224 | t[k] = exp |
149 | end | 225 | end |
@@ -155,77 +231,156 @@ local function firstdef (n, r) return adddef({n}, n, r) end | |||
155 | 231 | ||
156 | local function NT (n, b) | 232 | local function NT (n, b) |
157 | if not b then | 233 | if not b then |
158 | error("rule '"..n.."' used outside a grammar") | 234 | adderror("rule '"..n.."' used outside a grammar") |
235 | return dummy | ||
159 | else return mm.V(n) | 236 | else return mm.V(n) |
160 | end | 237 | end |
161 | end | 238 | end |
162 | 239 | ||
163 | local function labchoice (...) | 240 | local function labchoice (...) |
164 | local t = { ... } | 241 | local t = { ... } |
165 | local n = #t | 242 | local n = #t |
166 | local p = t[1] | 243 | local p = t[1] |
167 | local i = 2 | 244 | local i = 2 |
168 | while i + 1 <= n do | 245 | while i + 1 <= n do |
169 | p = mm.Lc(p, t[i+1], unpack(t[i])) | 246 | p = t[i] and mm.Lc(p, t[i+1], unpack(t[i])) or mt.__add(p, t[i+1]) |
170 | i = i + 2 | 247 | i = i + 2 |
171 | end | 248 | end |
172 | 249 | ||
173 | return p | 250 | return p |
174 | end | 251 | end |
175 | 252 | ||
253 | local function labify (labelnames) | ||
254 | for i, l in ipairs(labelnames) do | ||
255 | labelnames[i] = labels[l] | ||
256 | end | ||
257 | return labelnames | ||
258 | end | ||
259 | |||
260 | local labelset1 = labify { | ||
261 | "ExpPatt2", "ExpPatt3", | ||
262 | "ExpPatt4", "ExpPatt5", "ExpPatt6", "ExpPatt7", | ||
263 | "ExpPatt8", "ExpPattOrClose", | ||
264 | "ExpNum", "ExpCap", | ||
265 | "ExpName1", "ExpName2", "ExpName3", | ||
266 | "ExpNameOrLab", "ExpItem", | ||
267 | "MisClose6" | ||
268 | } | ||
269 | |||
270 | local labelset2 = labify { | ||
271 | "MisClose1", "MisClose2", "MisClose3", "MisClose4", "MisClose5" | ||
272 | } | ||
273 | |||
274 | local labelset3 = labify { | ||
275 | "ExpPatt1", "ExpLab1", "ExpLab2", "MisClose7" | ||
276 | } | ||
176 | 277 | ||
177 | local exp = m.P{ "Exp", | 278 | local exp = m.P{ "Exp", |
178 | Exp = S * ( m.V"Grammar" | 279 | Exp = S * ( m.V"Grammar" |
179 | + (m.V"Seq") * ("/" * m.V"Labels" * S * m.V"Seq")^1 / labchoice | 280 | + (m.V"RecovSeq" * (S * "/" * m.Lc((m.Ct(m.V"Labels") + m.Cc(nil)) |
180 | + m.Cf(m.V"Seq" * ("/" * S * m.V"Seq")^0, mt.__add) ); | 281 | * expect(S * m.V"RecovSeq", |
181 | Labels = m.Ct(m.P"{" * S * m.V"Label" * (S * "," * S * m.V"Label")^0 * S * "}"); | 282 | "ExpPatt1"), |
182 | Seq = m.Cf(m.Cc(m.P"") * m.V"Prefix"^0 , mt.__mul) | 283 | m.Cc(nil) * m.V"Skip", |
183 | * (#seq_follow + patt_error); | 284 | unpack(labelset3)) |
184 | Prefix = "&" * S * m.V"Prefix" / mt.__len | 285 | )^0 |
185 | + "!" * S * m.V"Prefix" / mt.__unm | 286 | ) / labchoice); |
287 | Labels = m.P"{" * expect(S * m.V"Label", "ExpLab1") | ||
288 | * (S * "," * expect(S * m.V"Label", "ExpLab2"))^0 | ||
289 | * expect(S * "}", "MisClose7"); | ||
290 | Skip = (-m.P"/" * -m.P(name * arrow) * m.V"Ignored")^0 * m.Cc(dummy); | ||
291 | Ignored = m.V"Group" + any; | ||
292 | Group = "(" * (-m.P")" * m.V"Ignored")^0 * ")" | ||
293 | + "{" * (-m.P"}" * m.V"Ignored")^0 * "}" | ||
294 | + "[" * (-m.P"]" * m.V"Ignored")^0 * "]" | ||
295 | + "'" * (-m.P"'" * m.V"Ignored")^0 * "'" | ||
296 | + '"' * (-m.P'"' * m.V"Ignored")^0 * '"'; | ||
297 | RecovSeq = m.Lc(m.V"Seq", m.V"Skip", unpack(labelset1)); | ||
298 | Seq = m.Cf(m.Cc(m.P"") * m.V"Prefix" * (S * m.V"Prefix")^0, mt.__mul); | ||
299 | Prefix = "&" * expect(S * m.V"Prefix", "ExpPatt2") / mt.__len | ||
300 | + "!" * expect(S * m.V"Prefix", "ExpPatt3") / mt.__unm | ||
186 | + m.V"Suffix"; | 301 | + m.V"Suffix"; |
187 | Suffix = m.Cf(m.V"Primary" * S * | 302 | Suffix = m.Cf(m.V"RecovPrimary" * |
188 | ( ( m.P"+" * m.Cc(1, mt.__pow) | 303 | ( S * ( m.P"+" * m.Cc(1, mt.__pow) |
189 | + m.P"*" * m.Cc(0, mt.__pow) | 304 | + m.P"*" * m.Cc(0, mt.__pow) |
190 | + m.P"?" * m.Cc(-1, mt.__pow) | 305 | + m.P"?" * m.Cc(-1, mt.__pow) |
191 | + "^" * ( m.Cg(num * m.Cc(mult)) | 306 | + "^" * expect( m.Cg(num * m.Cc(mult)) |
192 | + m.Cg(m.C(m.S"+-" * m.R"09"^1) * m.Cc(mt.__pow)) | 307 | + m.Cg(m.C(m.S"+-" * m.R"09"^1) * m.Cc(mt.__pow) |
193 | ) | 308 | ), |
194 | + "->" * S * ( m.Cg((String + num) * m.Cc(mt.__div)) | 309 | "ExpNum") |
195 | + m.P"{}" * m.Cc(nil, m.Ct) | 310 | + "->" * expect(S * ( m.Cg((String + num) * m.Cc(mt.__div)) |
196 | + m.Cg(Def / getdef * m.Cc(mt.__div)) | 311 | + m.P"{}" * m.Cc(nil, m.Ct) |
197 | ) | 312 | + m.Cg(Def / getdef * m.Cc(mt.__div)) |
198 | + "=>" * S * m.Cg(Def / getdef * m.Cc(m.Cmt)) | 313 | ), |
199 | ) * S | 314 | "ExpCap") |
315 | + "=>" * expect(S * m.Cg(Def / getdef * m.Cc(m.Cmt)), | ||
316 | "ExpName1") | ||
317 | ) | ||
200 | )^0, function (a,b,f) return f(a,b) end ); | 318 | )^0, function (a,b,f) return f(a,b) end ); |
201 | Primary = "(" * m.V"Exp" * ")" | 319 | RecovPrimary = m.Lc(m.V"Primary", ignore, unpack(labelset2)); |
202 | + String / mm.P | 320 | Primary = "(" * expect(m.V"Exp", "ExpPatt4") * expect(S * ")", "MisClose1") |
203 | + Class | 321 | + m.Lc(String / mm.P, pointAtStart, |
204 | + defined | 322 | labels["MisTerm1"], labels["MisTerm2"]) |
205 | + "%{" * S * m.V"Label" * (S * "," * S * m.V"Label")^0 * S * "}" / mm.T | 323 | + m.Lc(Class, pointAtStart, labels["MisClose8"]) |
206 | + "{:" * (name * ":" + m.Cc(nil)) * m.V"Exp" * ":}" / | 324 | + defined |
207 | function (n, p) return mm.Cg(p, n) end | 325 | + "%" * expect(m.V"Labels", "ExpNameOrLab") / mm.T |
208 | + "=" * name / function (n) return mm.Cmt(mm.Cb(n), equalcap) end | 326 | + "{:" * (name * ":" + m.Cc(nil)) * expect(m.V"Exp", "ExpPatt5") |
209 | + m.P"{}" / mm.Cp | 327 | * expect(S * ":}", "MisClose2") |
210 | + "{~" * m.V"Exp" * "~}" / mm.Cs | 328 | / function (n, p) return mm.Cg(p, n) end |
211 | + "{|" * m.V"Exp" * "|}" / mm.Ct | 329 | + "=" * expect(name, "ExpName2") |
212 | + "{" * m.V"Exp" * "}" / mm.C | 330 | / function (n) return mm.Cmt(mm.Cb(n), equalcap) end |
213 | + m.P"." * m.Cc(any) | 331 | + m.P"{}" / mm.Cp |
214 | + (name * -arrow + "<" * name * ">") * m.Cb("G") / NT; | 332 | + "{~" * expect(m.V"Exp", "ExpPatt6") |
215 | Label = num + name / function (f) return tlabels[f] end; | 333 | * expect(S * "~}", "MisClose3") / mm.Cs |
216 | Definition = name * arrow * m.V"Exp"; | 334 | + "{|" * expect(m.V"Exp", "ExpPatt7") |
217 | Grammar = m.Cg(m.Cc(true), "G") * | 335 | * expect(S * "|}", "MisClose4") / mm.Ct |
218 | m.Cf(m.V"Definition" / firstdef * m.Cg(m.V"Definition")^0, | 336 | + "{" * expect(m.V"Exp", "ExpPattOrClose") |
219 | adddef) / mm.P | 337 | * expect(S * "}", "MisClose5") / mm.C |
338 | + m.P"." * m.Cc(any) | ||
339 | + (name * -arrow + "<" * expect(name, "ExpName3") | ||
340 | * expect(">", "MisClose6")) * m.Cb("G") / NT; | ||
341 | Label = num + name / function (f) return tlabels[f] end; | ||
342 | Definition = name * arrow * expect(m.V"Exp", "ExpPatt8"); | ||
343 | Grammar = m.Cg(m.Cc(true), "G") | ||
344 | * m.Cf(m.V"Definition" / firstdef * (S * m.Cg(m.V"Definition"))^0, | ||
345 | adddef) / mm.P; | ||
220 | } | 346 | } |
221 | 347 | ||
222 | local pattern = S * m.Cg(m.Cc(false), "G") * exp / mm.P * (-any + patt_error) | 348 | local pattern = S * m.Cg(m.Cc(false), "G") * expect(exp, "NoPatt") / mm.P |
349 | * S * expect(-any, "ExtraChars") | ||
223 | 350 | ||
351 | local function lineno (s, i) | ||
352 | if i == 1 then return 1, 1 end | ||
353 | local adjust = 0 | ||
354 | if s:sub(i,i) == '\n' then | ||
355 | i = i-1 | ||
356 | adjust = 1 | ||
357 | end | ||
358 | local rest, num = s:sub(1,i):gsub("[^\n]*\n", "") | ||
359 | local r = #rest | ||
360 | return 1 + num, (r ~= 0 and r or 1) + adjust | ||
361 | end | ||
224 | 362 | ||
225 | local function compile (p, defs) | 363 | local function compile (p, defs) |
226 | if mm.type(p) == "pattern" then return p end -- already compiled | 364 | if mm.type(p) == "pattern" then return p end -- already compiled |
227 | local cp = pattern:match(p, 1, defs) | 365 | p = p .. " " -- for better reporting of column numbers in errors when at EOF |
228 | if not cp then error("incorrect pattern", 3) end | 366 | local cp, label, suffix = pattern:match(p, 1, defs) |
367 | if #errfound > 0 then | ||
368 | local lines = {} | ||
369 | for line in p:gmatch("[^\r\n]+") do tinsert(lines, line) end | ||
370 | local errors = {} | ||
371 | for i, err in ipairs(errfound) do | ||
372 | if #err == 1 then | ||
373 | tinsert(errors, err[1]) | ||
374 | else | ||
375 | local line, col = lineno(p, err[2]) | ||
376 | tinsert(errors, "L" .. line .. ":C" .. col .. ": " .. errmsgs[err[1]]) | ||
377 | tinsert(errors, lines[line]) | ||
378 | tinsert(errors, rep(" ", col-1) .. "^") | ||
379 | end | ||
380 | end | ||
381 | errfound = {} | ||
382 | error(concat(errors, "\n")) | ||
383 | end | ||
229 | return cp | 384 | return cp |
230 | end | 385 | end |
231 | 386 | ||
@@ -264,7 +419,7 @@ local function gsub (s, p, rep) | |||
264 | end | 419 | end |
265 | 420 | ||
266 | local function setlabels (t) | 421 | local function setlabels (t) |
267 | tlabels = t | 422 | tlabels = t |
268 | end | 423 | end |
269 | 424 | ||
270 | -- exported names | 425 | -- exported names |
@@ -274,7 +429,7 @@ local re = { | |||
274 | find = find, | 429 | find = find, |
275 | gsub = gsub, | 430 | gsub = gsub, |
276 | updatelocale = updatelocale, | 431 | updatelocale = updatelocale, |
277 | setlabels = setlabels | 432 | setlabels = setlabels |
278 | } | 433 | } |
279 | 434 | ||
280 | if version == "Lua 5.1" then _G.re = re end | 435 | if version == "Lua 5.1" then _G.re = re end |