diff options
Diffstat (limited to 'relabel.lua')
-rw-r--r-- | relabel.lua | 320 |
1 files changed, 243 insertions, 77 deletions
diff --git a/relabel.lua b/relabel.lua index b66fc2e..6fdbb7c 100644 --- a/relabel.lua +++ b/relabel.lua | |||
@@ -1,9 +1,11 @@ | |||
1 | -- $Id: re.lua,v 1.44 2013/03/26 20:11:40 roberto Exp $ | 1 | -- $Id: re.lua,v 1.44 2013/03/26 20:11:40 roberto Exp $ |
2 | 2 | ||
3 | -- imported functions and modules | 3 | -- imported functions and modules |
4 | local tonumber, type, print, error = tonumber, type, print, error | 4 | local tonumber, type, print, error, ipairs = tonumber, type, print, error, ipairs |
5 | local pcall = pcall | ||
5 | local setmetatable = setmetatable | 6 | local setmetatable = setmetatable |
6 | local unpack = table.unpack or unpack | 7 | local unpack, tinsert, concat = table.unpack or unpack, table.insert, table.concat |
8 | local rep = string.rep | ||
7 | local m = require"lpeglabel" | 9 | local m = require"lpeglabel" |
8 | 10 | ||
9 | -- 'm' will be used to parse expressions, and 'mm' will be used to | 11 | -- 'm' will be used to parse expressions, and 'mm' will be used to |
@@ -22,6 +24,74 @@ if version == "Lua 5.2" then _ENV = nil end | |||
22 | 24 | ||
23 | 25 | ||
24 | local any = m.P(1) | 26 | local any = m.P(1) |
27 | local dummy = mm.P(false) | ||
28 | |||
29 | |||
30 | local errinfo = { | ||
31 | {"NoPatt", "no pattern found"}, | ||
32 | {"ExtraChars", "unexpected characters after the pattern"}, | ||
33 | |||
34 | {"ExpPatt1", "expected a pattern after '/' or the label(s)"}, | ||
35 | |||
36 | {"ExpPatt2", "expected a pattern after '&'"}, | ||
37 | {"ExpPatt3", "expected a pattern after '!'"}, | ||
38 | |||
39 | {"ExpPatt4", "expected a pattern after '('"}, | ||
40 | {"ExpPatt5", "expected a pattern after ':'"}, | ||
41 | {"ExpPatt6", "expected a pattern after '{~'"}, | ||
42 | {"ExpPatt7", "expected a pattern after '{|'"}, | ||
43 | |||
44 | {"ExpPatt8", "expected a pattern after '<-'"}, | ||
45 | |||
46 | {"ExpPattOrClose", "expected a pattern or closing '}' after '{'"}, | ||
47 | |||
48 | {"ExpNum", "expected a number after '^', '+' or '-' (no space)"}, | ||
49 | {"ExpCap", "expected a string, number, '{}' or name after '->'"}, | ||
50 | |||
51 | {"ExpName1", "expected the name of a rule after '=>'"}, | ||
52 | {"ExpName2", "expected the name of a rule after '=' (no space)"}, | ||
53 | {"ExpName3", "expected the name of a rule after '<' (no space)"}, | ||
54 | |||
55 | {"ExpLab1", "expected at least one label after '{'"}, | ||
56 | {"ExpLab2", "expected a label after the comma"}, | ||
57 | |||
58 | {"ExpNameOrLab", "expected a name or label after '%' (no space)"}, | ||
59 | |||
60 | {"ExpItem", "expected at least one item after '[' or '^'"}, | ||
61 | |||
62 | {"MisClose1", "missing closing ')'"}, | ||
63 | {"MisClose2", "missing closing ':}'"}, | ||
64 | {"MisClose3", "missing closing '~}'"}, | ||
65 | {"MisClose4", "missing closing '|}'"}, | ||
66 | {"MisClose5", "missing closing '}'"}, -- for the captures | ||
67 | |||
68 | {"MisClose6", "missing closing '>'"}, | ||
69 | {"MisClose7", "missing closing '}'"}, -- for the labels | ||
70 | |||
71 | {"MisClose8", "missing closing ']'"}, | ||
72 | |||
73 | {"MisTerm1", "missing terminating single quote"}, | ||
74 | {"MisTerm2", "missing terminating double quote"}, | ||
75 | } | ||
76 | |||
77 | local errmsgs = {} | ||
78 | local labels = {} | ||
79 | |||
80 | for i, err in ipairs(errinfo) do | ||
81 | errmsgs[i] = err[2] | ||
82 | labels[err[1]] = i | ||
83 | end | ||
84 | |||
85 | local syntaxerrs = {} | ||
86 | |||
87 | local function expect (pattern, labelname) | ||
88 | local label = labels[labelname] | ||
89 | local record = function (input, pos) | ||
90 | tinsert(syntaxerrs, { label = label, pos = pos }) | ||
91 | return true | ||
92 | end | ||
93 | return pattern + m.Cmt("", record) * m.T(label) | ||
94 | end | ||
25 | 95 | ||
26 | 96 | ||
27 | -- Pre-defined names | 97 | -- Pre-defined names |
@@ -75,18 +145,13 @@ local I = m.P(function (s,i) print(i, s:sub(1, i-1)); return i end) | |||
75 | 145 | ||
76 | local function getdef (id, defs) | 146 | local function getdef (id, defs) |
77 | local c = defs and defs[id] | 147 | local c = defs and defs[id] |
78 | if not c then error("undefined name: " .. id) end | 148 | if not c then |
149 | error("undefined name: " .. id) | ||
150 | end | ||
79 | return c | 151 | return c |
80 | end | 152 | end |
81 | 153 | ||
82 | 154 | ||
83 | local function patt_error (s, i) | ||
84 | local msg = (#s < i + 20) and s:sub(i) | ||
85 | or s:sub(i,i+20) .. "..." | ||
86 | msg = ("pattern error near '%s'"):format(msg) | ||
87 | error(msg, 2) | ||
88 | end | ||
89 | |||
90 | local function mult (p, n) | 155 | local function mult (p, n) |
91 | local np = mm.P(true) | 156 | local np = mm.P(true) |
92 | while n >= 1 do | 157 | while n >= 1 do |
@@ -106,40 +171,37 @@ end | |||
106 | 171 | ||
107 | local S = (Predef.space + "--" * (any - Predef.nl)^0)^0 | 172 | local S = (Predef.space + "--" * (any - Predef.nl)^0)^0 |
108 | 173 | ||
109 | local name = m.R("AZ", "az", "__") * m.R("AZ", "az", "__", "09")^0 | 174 | local name = m.C(m.R("AZ", "az", "__") * m.R("AZ", "az", "__", "09")^0) |
110 | 175 | ||
111 | local arrow = S * "<-" | 176 | local arrow = S * "<-" |
112 | 177 | ||
113 | local seq_follow = m.P"/" + ")" + "}" + ":}" + "~}" + "|}" + (name * arrow) + -1 | ||
114 | |||
115 | name = m.C(name) | ||
116 | |||
117 | |||
118 | -- a defined name only have meaning in a given environment | 178 | -- a defined name only have meaning in a given environment |
119 | local Def = name * m.Carg(1) | 179 | local Def = name * m.Carg(1) |
120 | 180 | ||
121 | local num = m.C(m.R"09"^1) * S / tonumber | 181 | local num = m.C(m.R"09"^1) * S / tonumber |
122 | 182 | ||
123 | local String = "'" * m.C((any - "'")^0) * "'" + | 183 | local String = "'" * m.C((any - "'" - m.P"\n")^0) * expect("'", "MisTerm1") |
124 | '"' * m.C((any - '"')^0) * '"' | 184 | + '"' * m.C((any - '"' - m.P"\n")^0) * expect('"', "MisTerm2") |
125 | 185 | ||
126 | 186 | ||
127 | local defined = "%" * Def / function (c,Defs) | 187 | local defined = "%" * Def / function (c,Defs) |
128 | local cat = Defs and Defs[c] or Predef[c] | 188 | local cat = Defs and Defs[c] or Predef[c] |
129 | if not cat then error ("name '" .. c .. "' undefined") end | 189 | if not cat then |
190 | error("name '" .. c .. "' undefined") | ||
191 | end | ||
130 | return cat | 192 | return cat |
131 | end | 193 | end |
132 | 194 | ||
133 | local Range = m.Cs(any * (m.P"-"/"") * (any - "]")) / mm.R | 195 | local Range = m.Cs(any * (m.P"-"/"") * (any - "]")) / mm.R |
134 | 196 | ||
135 | local item = defined + Range + m.C(any) | 197 | local item = defined + Range + m.C(any - m.P"\n") |
136 | 198 | ||
137 | local Class = | 199 | local Class = |
138 | "[" | 200 | "[" |
139 | * (m.C(m.P"^"^-1)) -- optional complement symbol | 201 | * (m.C(m.P"^"^-1)) -- optional complement symbol |
140 | * m.Cf(item * (item - "]")^0, mt.__add) / | 202 | * m.Cf(expect(item, "ExpItem") * (item - "]")^0, mt.__add) |
141 | function (c, p) return c == "^" and any - p or p end | 203 | / function (c, p) return c == "^" and any - p or p end |
142 | * "]" | 204 | * expect("]", "MisClose8") |
143 | 205 | ||
144 | local function adddef (t, k, exp) | 206 | local function adddef (t, k, exp) |
145 | if t[k] then | 207 | if t[k] then |
@@ -161,71 +223,175 @@ local function NT (n, b) | |||
161 | end | 223 | end |
162 | 224 | ||
163 | local function labchoice (...) | 225 | local function labchoice (...) |
164 | local t = { ... } | 226 | local t = { ... } |
165 | local n = #t | 227 | local n = #t |
166 | local p = t[1] | 228 | local p = t[1] |
167 | local i = 2 | 229 | local i = 2 |
168 | while i + 1 <= n do | 230 | while i + 1 <= n do |
169 | p = mm.Lc(p, t[i+1], unpack(t[i])) | 231 | -- t[i] == nil when there are no labels |
170 | i = i + 2 | 232 | p = t[i] and mm.Lc(p, t[i+1], unpack(t[i])) or mt.__add(p, t[i+1]) |
171 | end | 233 | i = i + 2 |
172 | 234 | end | |
173 | return p | 235 | |
236 | return p | ||
237 | end | ||
238 | |||
239 | -- error recovery | ||
240 | local skip = m.P { "Skip", | ||
241 | Skip = (-m.P"/" * -m.P(name * arrow) * m.V"Ignored")^0 * m.Cc(dummy); | ||
242 | Ignored = m.V"Group" + any; | ||
243 | Group = "(" * (-m.P")" * m.V"Ignored")^0 * ")" | ||
244 | + "{" * (-m.P"}" * m.V"Ignored")^0 * "}" | ||
245 | + "[" * (-m.P"]" * m.V"Ignored")^0 * "]" | ||
246 | + "'" * (-m.P"'" * m.V"Ignored")^0 * "'" | ||
247 | + '"' * (-m.P'"' * m.V"Ignored")^0 * '"'; | ||
248 | } | ||
249 | |||
250 | local ignore = m.Cmt(any, function (input, pos) | ||
251 | return syntaxerrs[#syntaxerrs].pos, dummy | ||
252 | end) | ||
253 | |||
254 | local pointAtStart = m.Cmt(any, function (input, pos) | ||
255 | -- like ignore but makes the last syntax error point at the start | ||
256 | local ret = syntaxerrs[#syntaxerrs].pos | ||
257 | syntaxerrs[#syntaxerrs].pos = pos-1 | ||
258 | return ret, dummy | ||
259 | end) | ||
260 | |||
261 | |||
262 | local function labify (labelnames) | ||
263 | for i, l in ipairs(labelnames) do | ||
264 | labelnames[i] = labels[l] | ||
265 | end | ||
266 | return labelnames | ||
174 | end | 267 | end |
175 | 268 | ||
269 | local labelset1 = labify { | ||
270 | "ExpPatt2", "ExpPatt3", | ||
271 | "ExpPatt4", "ExpPatt5", "ExpPatt6", "ExpPatt7", | ||
272 | "ExpPatt8", "ExpPattOrClose", | ||
273 | "ExpNum", "ExpCap", | ||
274 | "ExpName1", "ExpName2", "ExpName3", | ||
275 | "ExpNameOrLab", "ExpItem", | ||
276 | "MisClose6", "MisClose7" | ||
277 | } | ||
278 | |||
279 | local labelset2 = labify { | ||
280 | "MisClose1", "MisClose2", "MisClose3", "MisClose4", "MisClose5" | ||
281 | } | ||
282 | |||
283 | local labelset3 = labify { | ||
284 | "ExpPatt1", "ExpLab1", "ExpLab2", "MisClose7" | ||
285 | } | ||
176 | 286 | ||
177 | local exp = m.P{ "Exp", | 287 | local exp = m.P{ "Exp", |
178 | Exp = S * ( m.V"Grammar" | 288 | Exp = S * ( m.V"Grammar" |
179 | + (m.V"Seq") * ("/" * m.V"Labels" * S * m.V"Seq")^1 / labchoice | 289 | + (m.V"RecovSeq" * (S * "/" * m.Lc((m.Ct(m.V"Labels") + m.Cc(nil)) |
180 | + m.Cf(m.V"Seq" * ("/" * S * m.V"Seq")^0, mt.__add) ); | 290 | * expect(S * m.V"RecovSeq", |
181 | Labels = m.Ct(m.P"{" * S * m.V"Label" * (S * "," * S * m.V"Label")^0 * S * "}"); | 291 | "ExpPatt1"), |
182 | Seq = m.Cf(m.Cc(m.P"") * m.V"Prefix"^0 , mt.__mul) | 292 | m.Cc(nil) * skip, |
183 | * (#seq_follow + patt_error); | 293 | unpack(labelset3)) |
184 | Prefix = "&" * S * m.V"Prefix" / mt.__len | 294 | )^0 |
185 | + "!" * S * m.V"Prefix" / mt.__unm | 295 | ) / labchoice); |
296 | Labels = m.P"{" * expect(S * m.V"Label", "ExpLab1") | ||
297 | * (S * "," * expect(S * m.V"Label", "ExpLab2"))^0 | ||
298 | * expect(S * "}", "MisClose7"); | ||
299 | RecovSeq = m.Lc(m.V"Seq", skip, unpack(labelset1)); | ||
300 | Seq = m.Cf(m.Cc(m.P"") * m.V"Prefix" * (S * m.V"Prefix")^0, mt.__mul); | ||
301 | Prefix = "&" * expect(S * m.V"Prefix", "ExpPatt2") / mt.__len | ||
302 | + "!" * expect(S * m.V"Prefix", "ExpPatt3") / mt.__unm | ||
186 | + m.V"Suffix"; | 303 | + m.V"Suffix"; |
187 | Suffix = m.Cf(m.V"Primary" * S * | 304 | Suffix = m.Cf(m.V"RecovPrimary" * |
188 | ( ( m.P"+" * m.Cc(1, mt.__pow) | 305 | ( S * ( m.P"+" * m.Cc(1, mt.__pow) |
189 | + m.P"*" * m.Cc(0, mt.__pow) | 306 | + m.P"*" * m.Cc(0, mt.__pow) |
190 | + m.P"?" * m.Cc(-1, mt.__pow) | 307 | + m.P"?" * m.Cc(-1, mt.__pow) |
191 | + "^" * ( m.Cg(num * m.Cc(mult)) | 308 | + "^" * expect( m.Cg(num * m.Cc(mult)) |
192 | + m.Cg(m.C(m.S"+-" * m.R"09"^1) * m.Cc(mt.__pow)) | 309 | + m.Cg(m.C(m.S"+-" * m.R"09"^1) * m.Cc(mt.__pow) |
193 | ) | 310 | ), |
194 | + "->" * S * ( m.Cg((String + num) * m.Cc(mt.__div)) | 311 | "ExpNum") |
195 | + m.P"{}" * m.Cc(nil, m.Ct) | 312 | + "->" * expect(S * ( m.Cg((String + num) * m.Cc(mt.__div)) |
196 | + m.Cg(Def / getdef * m.Cc(mt.__div)) | 313 | + m.P"{}" * m.Cc(nil, m.Ct) |
197 | ) | 314 | + m.Cg(Def / getdef * m.Cc(mt.__div)) |
198 | + "=>" * S * m.Cg(Def / getdef * m.Cc(m.Cmt)) | 315 | ), |
199 | ) * S | 316 | "ExpCap") |
317 | + "=>" * expect(S * m.Cg(Def / getdef * m.Cc(m.Cmt)), | ||
318 | "ExpName1") | ||
319 | ) | ||
200 | )^0, function (a,b,f) return f(a,b) end ); | 320 | )^0, function (a,b,f) return f(a,b) end ); |
201 | Primary = "(" * m.V"Exp" * ")" | 321 | RecovPrimary = m.Lc(m.V"Primary", ignore, unpack(labelset2)); |
202 | + String / mm.P | 322 | Primary = "(" * expect(m.V"Exp", "ExpPatt4") * expect(S * ")", "MisClose1") |
203 | + Class | 323 | + m.Lc(String / mm.P, pointAtStart, |
204 | + defined | 324 | labels["MisTerm1"], labels["MisTerm2"]) |
205 | + "%{" * S * m.V"Label" * (S * "," * S * m.V"Label")^0 * S * "}" / mm.T | 325 | + m.Lc(Class, pointAtStart, labels["MisClose8"]) |
206 | + "{:" * (name * ":" + m.Cc(nil)) * m.V"Exp" * ":}" / | 326 | + defined |
207 | function (n, p) return mm.Cg(p, n) end | 327 | + "%" * expect(m.V"Labels", "ExpNameOrLab") / mm.T |
208 | + "=" * name / function (n) return mm.Cmt(mm.Cb(n), equalcap) end | 328 | + "{:" * (name * ":" + m.Cc(nil)) * expect(m.V"Exp", "ExpPatt5") |
209 | + m.P"{}" / mm.Cp | 329 | * expect(S * ":}", "MisClose2") |
210 | + "{~" * m.V"Exp" * "~}" / mm.Cs | 330 | / function (n, p) return mm.Cg(p, n) end |
211 | + "{|" * m.V"Exp" * "|}" / mm.Ct | 331 | + "=" * expect(name, "ExpName2") |
212 | + "{" * m.V"Exp" * "}" / mm.C | 332 | / function (n) return mm.Cmt(mm.Cb(n), equalcap) end |
213 | + m.P"." * m.Cc(any) | 333 | + m.P"{}" / mm.Cp |
214 | + (name * -arrow + "<" * name * ">") * m.Cb("G") / NT; | 334 | + "{~" * expect(m.V"Exp", "ExpPatt6") |
215 | Label = num + name / function (f) return tlabels[f] end; | 335 | * expect(S * "~}", "MisClose3") / mm.Cs |
216 | Definition = name * arrow * m.V"Exp"; | 336 | + "{|" * expect(m.V"Exp", "ExpPatt7") |
217 | Grammar = m.Cg(m.Cc(true), "G") * | 337 | * expect(S * "|}", "MisClose4") / mm.Ct |
218 | m.Cf(m.V"Definition" / firstdef * m.Cg(m.V"Definition")^0, | 338 | + "{" * expect(m.V"Exp", "ExpPattOrClose") |
219 | adddef) / mm.P | 339 | * expect(S * "}", "MisClose5") / mm.C |
340 | + m.P"." * m.Cc(any) | ||
341 | + (name * -arrow + "<" * expect(name, "ExpName3") | ||
342 | * expect(">", "MisClose6")) * m.Cb("G") / NT; | ||
343 | Label = num + name / function (f) return tlabels[f] end; | ||
344 | Definition = name * arrow * expect(m.V"Exp", "ExpPatt8"); | ||
345 | Grammar = m.Cg(m.Cc(true), "G") | ||
346 | * m.Cf(m.V"Definition" / firstdef * (S * m.Cg(m.V"Definition"))^0, | ||
347 | adddef) / mm.P; | ||
220 | } | 348 | } |
221 | 349 | ||
222 | local pattern = S * m.Cg(m.Cc(false), "G") * exp / mm.P * (-any + patt_error) | 350 | local pattern = S * m.Cg(m.Cc(false), "G") * expect(exp, "NoPatt") / mm.P |
351 | * S * expect(-any, "ExtraChars") | ||
223 | 352 | ||
353 | local function lineno (s, i) | ||
354 | if i == 1 then return 1, 1 end | ||
355 | local adjustment = 0 | ||
356 | -- report the current line if at end of line, not the next | ||
357 | if s:sub(i,i) == '\n' then | ||
358 | i = i-1 | ||
359 | adjustment = 1 | ||
360 | end | ||
361 | local rest, num = s:sub(1,i):gsub("[^\n]*\n", "") | ||
362 | local r = #rest | ||
363 | return 1 + num, (r ~= 0 and r or 1) + adjustment | ||
364 | end | ||
365 | |||
366 | local function splitlines(str) | ||
367 | local t = {} | ||
368 | local function helper(line) tinsert(t, line) return "" end | ||
369 | helper((str:gsub("(.-)\r?\n", helper))) | ||
370 | return t | ||
371 | end | ||
224 | 372 | ||
225 | local function compile (p, defs) | 373 | local function compile (p, defs) |
226 | if mm.type(p) == "pattern" then return p end -- already compiled | 374 | if mm.type(p) == "pattern" then return p end -- already compiled |
227 | local cp = pattern:match(p, 1, defs) | 375 | p = p .. " " -- for better reporting of column numbers in errors when at EOF |
228 | if not cp then error("incorrect pattern", 3) end | 376 | local ok, cp, label, suffix = pcall(function() return pattern:match(p, 1, defs) end) |
377 | if not ok and #syntaxerrs == 0 then | ||
378 | if type(cp) == "string" then | ||
379 | cp = cp:gsub("^[^:]+:[^:]+: ", "") | ||
380 | end | ||
381 | error(cp) | ||
382 | end | ||
383 | if #syntaxerrs > 0 then | ||
384 | local lines = splitlines(p) | ||
385 | local errors = {} | ||
386 | for i, err in ipairs(syntaxerrs) do | ||
387 | local line, col = lineno(p, err.pos) | ||
388 | tinsert(errors, "L" .. line .. ":C" .. col .. ": " .. errmsgs[err.label]) | ||
389 | tinsert(errors, lines[line]) | ||
390 | tinsert(errors, rep(" ", col-1) .. "^") | ||
391 | end | ||
392 | syntaxerrs = {} | ||
393 | error("syntax error(s) in pattern\n" .. concat(errors, "\n")) | ||
394 | end | ||
229 | return cp | 395 | return cp |
230 | end | 396 | end |
231 | 397 | ||
@@ -264,7 +430,7 @@ local function gsub (s, p, rep) | |||
264 | end | 430 | end |
265 | 431 | ||
266 | local function setlabels (t) | 432 | local function setlabels (t) |
267 | tlabels = t | 433 | tlabels = t |
268 | end | 434 | end |
269 | 435 | ||
270 | -- exported names | 436 | -- exported names |
@@ -274,7 +440,7 @@ local re = { | |||
274 | find = find, | 440 | find = find, |
275 | gsub = gsub, | 441 | gsub = gsub, |
276 | updatelocale = updatelocale, | 442 | updatelocale = updatelocale, |
277 | setlabels = setlabels | 443 | setlabels = setlabels |
278 | } | 444 | } |
279 | 445 | ||
280 | if version == "Lua 5.1" then _G.re = re end | 446 | if version == "Lua 5.1" then _G.re = re end |