diff options
Diffstat (limited to 'src/yuescript/yue_parser.cpp')
-rw-r--r-- | src/yuescript/yue_parser.cpp | 719 |
1 files changed, 719 insertions, 0 deletions
diff --git a/src/yuescript/yue_parser.cpp b/src/yuescript/yue_parser.cpp new file mode 100644 index 0000000..cf3da01 --- /dev/null +++ b/src/yuescript/yue_parser.cpp | |||
@@ -0,0 +1,719 @@ | |||
1 | /* Copyright (c) 2021 Jin Li, http://www.luvfight.me | ||
2 | |||
3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: | ||
4 | |||
5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. | ||
6 | |||
7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ | ||
8 | |||
9 | #include "yuescript/yue_parser.h" | ||
10 | |||
11 | namespace pl = parserlib; | ||
12 | |||
13 | namespace yue { | ||
14 | using namespace std::string_view_literals; | ||
15 | |||
16 | std::unordered_set<std::string> LuaKeywords = { | ||
17 | "and", "break", "do", "else", "elseif", | ||
18 | "end", "false", "for", "function", "goto", | ||
19 | "if", "in", "local", "nil", "not", | ||
20 | "or", "repeat", "return", "then", "true", | ||
21 | "until", "while" | ||
22 | }; | ||
23 | |||
24 | std::unordered_set<std::string> Keywords = { | ||
25 | "and", "break", "do", "else", "elseif", | ||
26 | "end", "false", "for", "function", "goto", | ||
27 | "if", "in", "local", "nil", "not", | ||
28 | "or", "repeat", "return", "then", "true", | ||
29 | "until", "while", // Lua keywords | ||
30 | "as", "class", "continue", "export", "extends", | ||
31 | "from", "global", "import", "macro", "switch", | ||
32 | "unless", "using", "when", "with" // Yue keywords | ||
33 | }; | ||
34 | |||
35 | YueParser::YueParser() { | ||
36 | plain_space = *set(" \t"); | ||
37 | Break = nl(-expr('\r') >> '\n'); | ||
38 | Any = Break | any(); | ||
39 | Stop = Break | eof(); | ||
40 | Indent = plain_space; | ||
41 | Comment = "--" >> *(not_(set("\r\n")) >> Any) >> and_(Stop); | ||
42 | multi_line_open = expr("--[["); | ||
43 | multi_line_close = expr("]]"); | ||
44 | multi_line_content = *(not_(multi_line_close) >> Any); | ||
45 | MultiLineComment = multi_line_open >> multi_line_content >> multi_line_close; | ||
46 | EscapeNewLine = expr('\\') >> *(set(" \t") | MultiLineComment) >> -Comment >> Break; | ||
47 | space_one = set(" \t") | and_(set("-\\")) >> (MultiLineComment | EscapeNewLine); | ||
48 | Space = *space_one >> -Comment; | ||
49 | SpaceBreak = Space >> Break; | ||
50 | White = Space >> *(Break >> Space); | ||
51 | EmptyLine = SpaceBreak; | ||
52 | AlphaNum = range('a', 'z') | range('A', 'Z') | range('0', '9') | '_'; | ||
53 | Name = (range('a', 'z') | range('A', 'Z') | '_') >> *AlphaNum; | ||
54 | Num = ( | ||
55 | "0x" >> | ||
56 | +(range('0', '9') | range('a', 'f') | range('A', 'F')) >> | ||
57 | -(-set("uU") >> set("lL") >> set("lL")) | ||
58 | ) | ( | ||
59 | +range('0', '9') >> -set("uU") >> set("lL") >> set("lL") | ||
60 | ) | ( | ||
61 | ( | ||
62 | +range('0', '9') >> -('.' >> +range('0', '9')) | ||
63 | ) | ( | ||
64 | '.' >> +range('0', '9') | ||
65 | ) | ||
66 | ) >> -(set("eE") >> -expr('-') >> +range('0', '9')); | ||
67 | |||
68 | Cut = false_(); | ||
69 | Seperator = true_(); | ||
70 | |||
71 | #define sym(str) (Space >> str) | ||
72 | #define symx(str) expr(str) | ||
73 | #define ensure(patt, finally) ((patt) >> (finally) | (finally) >> Cut) | ||
74 | #define key(str) (Space >> str >> not_(AlphaNum)) | ||
75 | #define disable_do(patt) (DisableDo >> ((patt) >> EnableDo | EnableDo >> Cut)) | ||
76 | #define disable_chain(patt) (DisableChain >> ((patt) >> EnableChain | EnableChain >> Cut)) | ||
77 | #define disable_do_chain(patt) (DisableDoChain >> ((patt) >> EnableDoChain | EnableDoChain >> Cut)) | ||
78 | #define plain_body_with(str) (-key(str) >> InBlock | key(str) >> Statement) | ||
79 | #define plain_body (InBlock | Statement) | ||
80 | |||
81 | Variable = pl::user(Name, [](const item_t& item) { | ||
82 | State* st = reinterpret_cast<State*>(item.user_data); | ||
83 | for (auto it = item.begin; it != item.end; ++it) st->buffer += static_cast<char>(*it); | ||
84 | auto isValid = Keywords.find(st->buffer) == Keywords.end(); | ||
85 | if (isValid) { | ||
86 | if (st->buffer == st->moduleName) { | ||
87 | st->moduleFix++; | ||
88 | st->moduleName = std::string("_module_"sv) + std::to_string(st->moduleFix); | ||
89 | } | ||
90 | } | ||
91 | st->buffer.clear(); | ||
92 | return isValid; | ||
93 | }); | ||
94 | |||
95 | LabelName = pl::user(Name, [](const item_t& item) { | ||
96 | State* st = reinterpret_cast<State*>(item.user_data); | ||
97 | for (auto it = item.begin; it != item.end; ++it) st->buffer += static_cast<char>(*it); | ||
98 | auto isValid = LuaKeywords.find(st->buffer) == LuaKeywords.end(); | ||
99 | st->buffer.clear(); | ||
100 | return isValid; | ||
101 | }); | ||
102 | |||
103 | LuaKeyword = pl::user(Name, [](const item_t& item) { | ||
104 | State* st = reinterpret_cast<State*>(item.user_data); | ||
105 | for (auto it = item.begin; it != item.end; ++it) st->buffer += static_cast<char>(*it); | ||
106 | auto it = LuaKeywords.find(st->buffer); | ||
107 | st->buffer.clear(); | ||
108 | return it != LuaKeywords.end(); | ||
109 | }); | ||
110 | |||
111 | self = expr('@'); | ||
112 | self_name = '@' >> Name; | ||
113 | self_class = expr("@@"); | ||
114 | self_class_name = "@@" >> Name; | ||
115 | |||
116 | SelfName = self_class_name | self_class | self_name | self; | ||
117 | KeyName = Space >> (SelfName | Name); | ||
118 | VarArg = expr("..."); | ||
119 | |||
120 | check_indent = pl::user(Indent, [](const item_t& item) { | ||
121 | int indent = 0; | ||
122 | for (input_it i = item.begin; i != item.end; ++i) { | ||
123 | switch (*i) { | ||
124 | case ' ': indent++; break; | ||
125 | case '\t': indent += 4; break; | ||
126 | } | ||
127 | } | ||
128 | State* st = reinterpret_cast<State*>(item.user_data); | ||
129 | return st->indents.top() == indent; | ||
130 | }); | ||
131 | CheckIndent = and_(check_indent); | ||
132 | |||
133 | advance = pl::user(Indent, [](const item_t& item) { | ||
134 | int indent = 0; | ||
135 | for (input_it i = item.begin; i != item.end; ++i) { | ||
136 | switch (*i) { | ||
137 | case ' ': indent++; break; | ||
138 | case '\t': indent += 4; break; | ||
139 | } | ||
140 | } | ||
141 | State* st = reinterpret_cast<State*>(item.user_data); | ||
142 | int top = st->indents.top(); | ||
143 | if (top != -1 && indent > top) { | ||
144 | st->indents.push(indent); | ||
145 | return true; | ||
146 | } | ||
147 | return false; | ||
148 | }); | ||
149 | Advance = and_(advance); | ||
150 | |||
151 | push_indent = pl::user(Indent, [](const item_t& item) { | ||
152 | int indent = 0; | ||
153 | for (input_it i = item.begin; i != item.end; ++i) { | ||
154 | switch (*i) { | ||
155 | case ' ': indent++; break; | ||
156 | case '\t': indent += 4; break; | ||
157 | } | ||
158 | } | ||
159 | State* st = reinterpret_cast<State*>(item.user_data); | ||
160 | st->indents.push(indent); | ||
161 | return true; | ||
162 | }); | ||
163 | PushIndent = and_(push_indent); | ||
164 | |||
165 | PreventIndent = pl::user(true_(), [](const item_t& item) { | ||
166 | State* st = reinterpret_cast<State*>(item.user_data); | ||
167 | st->indents.push(-1); | ||
168 | return true; | ||
169 | }); | ||
170 | |||
171 | PopIndent = pl::user(true_(), [](const item_t& item) { | ||
172 | State* st = reinterpret_cast<State*>(item.user_data); | ||
173 | st->indents.pop(); | ||
174 | return true; | ||
175 | }); | ||
176 | |||
177 | InBlock = +SpaceBreak >> Advance >> ensure(Block, PopIndent); | ||
178 | |||
179 | local_flag = expr('*') | expr('^'); | ||
180 | local_values = NameList >> -(sym('=') >> (TableBlock | ExpListLow)); | ||
181 | Local = key("local") >> (Space >> local_flag | local_values); | ||
182 | |||
183 | LocalAttrib = and_(key(pl::user(Name, [](const item_t& item) { | ||
184 | State* st = reinterpret_cast<State*>(item.user_data); | ||
185 | for (auto it = item.begin; it != item.end; ++it) st->buffer += static_cast<char>(*it); | ||
186 | auto it = Keywords.find(st->buffer); | ||
187 | st->buffer.clear(); | ||
188 | return it == Keywords.end(); | ||
189 | })) >> NameList >> sym('=') >> not_('=')) >> Space >> Name >> NameList >> Assign; | ||
190 | |||
191 | colon_import_name = sym('\\') >> Space >> Variable; | ||
192 | ImportName = colon_import_name | Space >> Variable; | ||
193 | ImportNameList = Seperator >> *SpaceBreak >> ImportName >> *((+SpaceBreak | sym(',') >> *SpaceBreak) >> ImportName); | ||
194 | ImportFrom = ImportNameList >> *SpaceBreak >> key("from") >> Exp; | ||
195 | |||
196 | import_literal_inner = (range('a', 'z') | range('A', 'Z') | set("_-")) >> *(AlphaNum | '-'); | ||
197 | import_literal_chain = Seperator >> import_literal_inner >> *(expr('.') >> import_literal_inner); | ||
198 | ImportLiteral = sym('\'') >> import_literal_chain >> symx('\'') | sym('"') >> import_literal_chain >> symx('"'); | ||
199 | |||
200 | macro_name_pair = Space >> MacroName >> Space >> symx(':') >> Space >> MacroName; | ||
201 | import_all_macro = expr('$'); | ||
202 | ImportTabItem = variable_pair | normal_pair | sym(':') >> MacroName | macro_name_pair | Space >> import_all_macro; | ||
203 | ImportTabList = ImportTabItem >> *(sym(',') >> ImportTabItem); | ||
204 | ImportTabLine = ( | ||
205 | PushIndent >> (ImportTabList >> PopIndent | PopIndent) | ||
206 | ) | Space; | ||
207 | import_tab_lines = SpaceBreak >> ImportTabLine >> *(-sym(',') >> SpaceBreak >> ImportTabLine) >> -sym(','); | ||
208 | ImportTabLit = | ||
209 | sym('{') >> Seperator >> | ||
210 | -ImportTabList >> | ||
211 | -sym(',') >> | ||
212 | -import_tab_lines >> | ||
213 | White >> sym('}'); | ||
214 | |||
215 | ImportAs = ImportLiteral >> -(key("as") >> (Space >> Variable | ImportTabLit)); | ||
216 | |||
217 | Import = key("import") >> (ImportAs | ImportFrom); | ||
218 | |||
219 | Label = Space >> expr("::") >> LabelName >> expr("::"); | ||
220 | |||
221 | Goto = key("goto") >> Space >> LabelName; | ||
222 | |||
223 | BreakLoop = (expr("break") | expr("continue")) >> not_(AlphaNum); | ||
224 | |||
225 | Return = key("return") >> -ExpListLow; | ||
226 | |||
227 | WithExp = ExpList >> -Assign; | ||
228 | |||
229 | With = key("with") >> -existential_op >> disable_do_chain(WithExp) >> plain_body_with("do"); | ||
230 | SwitchCase = key("when") >> disable_chain(ExpList) >> plain_body_with("then"); | ||
231 | SwitchElse = key("else") >> plain_body; | ||
232 | |||
233 | SwitchBlock = *EmptyLine >> | ||
234 | Advance >> Seperator >> | ||
235 | SwitchCase >> | ||
236 | *(+SpaceBreak >> SwitchCase) >> | ||
237 | -(+SpaceBreak >> SwitchElse) >> | ||
238 | PopIndent; | ||
239 | |||
240 | Switch = key("switch") >> disable_do(Exp) >> -key("do") | ||
241 | >> -Space >> Break >> SwitchBlock; | ||
242 | |||
243 | IfCond = disable_chain(Exp >> -Assign); | ||
244 | IfElseIf = -(Break >> *EmptyLine >> CheckIndent) >> key("elseif") >> IfCond >> plain_body_with("then"); | ||
245 | IfElse = -(Break >> *EmptyLine >> CheckIndent) >> key("else") >> plain_body; | ||
246 | If = key("if") >> Seperator >> IfCond >> plain_body_with("then") >> *IfElseIf >> -IfElse; | ||
247 | Unless = key("unless") >> Seperator >> IfCond >> plain_body_with("then") >> *IfElseIf >> -IfElse; | ||
248 | |||
249 | While = key("while") >> disable_do_chain(Exp) >> plain_body_with("do"); | ||
250 | Repeat = key("repeat") >> Body >> Break >> *EmptyLine >> CheckIndent >> key("until") >> Exp; | ||
251 | |||
252 | for_step_value = sym(',') >> Exp; | ||
253 | for_args = Space >> Variable >> sym('=') >> Exp >> sym(',') >> Exp >> -for_step_value; | ||
254 | |||
255 | For = key("for") >> disable_do_chain(for_args) >> plain_body_with("do"); | ||
256 | |||
257 | for_in = star_exp | ExpList; | ||
258 | |||
259 | ForEach = key("for") >> AssignableNameList >> key("in") >> | ||
260 | disable_do_chain(for_in) >> plain_body_with("do"); | ||
261 | |||
262 | Do = pl::user(key("do"), [](const item_t& item) { | ||
263 | State* st = reinterpret_cast<State*>(item.user_data); | ||
264 | return st->doStack.empty() || st->doStack.top(); | ||
265 | }) >> Body; | ||
266 | |||
267 | DisableDo = pl::user(true_(), [](const item_t& item) { | ||
268 | State* st = reinterpret_cast<State*>(item.user_data); | ||
269 | st->doStack.push(false); | ||
270 | return true; | ||
271 | }); | ||
272 | |||
273 | EnableDo = pl::user(true_(), [](const item_t& item) { | ||
274 | State* st = reinterpret_cast<State*>(item.user_data); | ||
275 | st->doStack.pop(); | ||
276 | return true; | ||
277 | }); | ||
278 | |||
279 | DisableDoChain = pl::user(true_(), [](const item_t& item) { | ||
280 | State* st = reinterpret_cast<State*>(item.user_data); | ||
281 | st->doStack.push(false); | ||
282 | st->chainBlockStack.push(false); | ||
283 | return true; | ||
284 | }); | ||
285 | |||
286 | EnableDoChain = pl::user(true_(), [](const item_t& item) { | ||
287 | State* st = reinterpret_cast<State*>(item.user_data); | ||
288 | st->doStack.pop(); | ||
289 | st->chainBlockStack.pop(); | ||
290 | return true; | ||
291 | }); | ||
292 | |||
293 | Comprehension = sym('[') >> Exp >> CompInner >> sym(']'); | ||
294 | comp_value = sym(',') >> Exp; | ||
295 | TblComprehension = sym('{') >> Exp >> -comp_value >> CompInner >> sym('}'); | ||
296 | |||
297 | CompInner = Seperator >> (CompForEach | CompFor) >> *CompClause; | ||
298 | star_exp = sym('*') >> Exp; | ||
299 | CompForEach = key("for") >> AssignableNameList >> key("in") >> (star_exp | Exp); | ||
300 | CompFor = key("for") >> Space >> Variable >> sym('=') >> Exp >> sym(',') >> Exp >> -for_step_value; | ||
301 | CompClause = CompFor | CompForEach | key("when") >> Exp; | ||
302 | |||
303 | Assign = sym('=') >> Seperator >> (With | If | Switch | TableBlock | Exp >> *(Space >> set(",;") >> Exp)); | ||
304 | |||
305 | update_op = | ||
306 | expr("..") | | ||
307 | expr("+") | | ||
308 | expr("-") | | ||
309 | expr("*") | | ||
310 | expr("/") | | ||
311 | expr("%") | | ||
312 | expr("or") | | ||
313 | expr("and") | | ||
314 | expr("&") | | ||
315 | expr("|") | | ||
316 | expr(">>") | | ||
317 | expr("<<"); | ||
318 | |||
319 | Update = Space >> update_op >> expr("=") >> Exp; | ||
320 | |||
321 | Assignable = AssignableChain | Space >> Variable | Space >> SelfName; | ||
322 | |||
323 | unary_value = unary_operator >> *(Space >> unary_operator) >> Value; | ||
324 | |||
325 | ExponentialOperator = expr('^'); | ||
326 | expo_value = Space >> ExponentialOperator >> *SpaceBreak >> Value; | ||
327 | expo_exp = Value >> *expo_value; | ||
328 | |||
329 | unary_operator = | ||
330 | expr('-') >> not_(set(">=") | space_one) | | ||
331 | expr('#') | | ||
332 | expr('~') >> not_(expr('=') | space_one) | | ||
333 | expr("not") >> not_(AlphaNum); | ||
334 | unary_exp = *(Space >> unary_operator) >> expo_exp; | ||
335 | |||
336 | BackcallOperator = expr("|>"); | ||
337 | backcall_value = Space >> BackcallOperator >> *SpaceBreak >> unary_exp; | ||
338 | backcall_exp = unary_exp >> *backcall_value; | ||
339 | |||
340 | BinaryOperator = | ||
341 | (expr("or") >> not_(AlphaNum)) | | ||
342 | (expr("and") >> not_(AlphaNum)) | | ||
343 | expr("<=") | | ||
344 | expr(">=") | | ||
345 | expr("~=") | | ||
346 | expr("!=") | | ||
347 | expr("==") | | ||
348 | expr("..") | | ||
349 | expr("<<") | | ||
350 | expr(">>") | | ||
351 | expr("//") | | ||
352 | set("+-*/%><|&~"); | ||
353 | exp_op_value = Space >> BinaryOperator >> *SpaceBreak >> backcall_exp; | ||
354 | Exp = Seperator >> backcall_exp >> *exp_op_value; | ||
355 | |||
356 | DisableChain = pl::user(true_(), [](const item_t& item) { | ||
357 | State* st = reinterpret_cast<State*>(item.user_data); | ||
358 | st->chainBlockStack.push(false); | ||
359 | return true; | ||
360 | }); | ||
361 | |||
362 | EnableChain = pl::user(true_(), [](const item_t& item) { | ||
363 | State* st = reinterpret_cast<State*>(item.user_data); | ||
364 | st->chainBlockStack.pop(); | ||
365 | return true; | ||
366 | }); | ||
367 | |||
368 | chain_line = CheckIndent >> (chain_item | Space >> (chain_dot_chain | ColonChain)) >> -InvokeArgs; | ||
369 | chain_block = pl::user(true_(), [](const item_t& item) { | ||
370 | State* st = reinterpret_cast<State*>(item.user_data); | ||
371 | return st->chainBlockStack.empty() || st->chainBlockStack.top(); | ||
372 | }) >> +SpaceBreak >> Advance >> ensure( | ||
373 | chain_line >> *(+SpaceBreak >> chain_line), PopIndent); | ||
374 | ChainValue = Seperator >> (Chain | Callable) >> -existential_op >> -(InvokeArgs | chain_block); | ||
375 | |||
376 | simple_table = Seperator >> KeyValue >> *(sym(',') >> KeyValue); | ||
377 | Value = SimpleValue | simple_table | ChainValue | String; | ||
378 | |||
379 | single_string_inner = expr("\\'") | "\\\\" | not_(expr('\'')) >> Any; | ||
380 | SingleString = symx('\'') >> *single_string_inner >> symx('\''); | ||
381 | interp = symx("#{") >> Exp >> sym('}'); | ||
382 | double_string_plain = expr("\\\"") | "\\\\" | not_(expr('"')) >> Any; | ||
383 | double_string_inner = +(not_(interp) >> double_string_plain); | ||
384 | double_string_content = double_string_inner | interp; | ||
385 | DoubleString = symx('"') >> Seperator >> *double_string_content >> symx('"'); | ||
386 | String = Space >> (DoubleString | SingleString | LuaString); | ||
387 | |||
388 | lua_string_open = '[' >> *expr('=') >> '['; | ||
389 | lua_string_close = ']' >> *expr('=') >> ']'; | ||
390 | |||
391 | LuaStringOpen = pl::user(lua_string_open, [](const item_t& item) { | ||
392 | size_t count = std::distance(item.begin, item.end); | ||
393 | State* st = reinterpret_cast<State*>(item.user_data); | ||
394 | st->stringOpen = count; | ||
395 | return true; | ||
396 | }); | ||
397 | |||
398 | LuaStringClose = pl::user(lua_string_close, [](const item_t& item) { | ||
399 | size_t count = std::distance(item.begin, item.end); | ||
400 | State* st = reinterpret_cast<State*>(item.user_data); | ||
401 | return st->stringOpen == count; | ||
402 | }); | ||
403 | |||
404 | LuaStringContent = *(not_(LuaStringClose) >> Any); | ||
405 | |||
406 | LuaString = LuaStringOpen >> -Break >> LuaStringContent >> LuaStringClose; | ||
407 | |||
408 | Parens = symx('(') >> *SpaceBreak >> Exp >> *SpaceBreak >> sym(')'); | ||
409 | Callable = Space >> (Variable | SelfName | MacroName | VarArg | Parens); | ||
410 | FnArgsExpList = Exp >> *((Break | sym(',')) >> White >> Exp); | ||
411 | |||
412 | FnArgs = (symx('(') >> *SpaceBreak >> -FnArgsExpList >> *SpaceBreak >> sym(')')) | | ||
413 | (sym('!') >> not_(expr('='))); | ||
414 | |||
415 | existential_op = expr('?'); | ||
416 | chain_call = (Callable | String) >> -existential_op >> ChainItems; | ||
417 | chain_item = and_(set(".\\")) >> ChainItems; | ||
418 | chain_dot_chain = DotChainItem >> -existential_op >> -ChainItems; | ||
419 | |||
420 | Chain = chain_call | chain_item | | ||
421 | Space >> (chain_dot_chain | ColonChain); | ||
422 | |||
423 | AssignableChain = Seperator >> Chain; | ||
424 | |||
425 | chain_with_colon = +ChainItem >> -ColonChain; | ||
426 | ChainItems = chain_with_colon | ColonChain; | ||
427 | |||
428 | Index = symx('[') >> Exp >> sym(']'); | ||
429 | ChainItem = Invoke >> -existential_op | DotChainItem >> -existential_op | Slice | Index >> -existential_op; | ||
430 | DotChainItem = symx('.') >> Name; | ||
431 | ColonChainItem = symx('\\') >> (LuaKeyword | Name); | ||
432 | invoke_chain = Invoke >> -existential_op >> -ChainItems; | ||
433 | ColonChain = ColonChainItem >> -existential_op >> -invoke_chain; | ||
434 | |||
435 | default_value = true_(); | ||
436 | Slice = | ||
437 | symx('[') >> | ||
438 | (Exp | default_value) >> | ||
439 | sym(',') >> | ||
440 | (Exp | default_value) >> | ||
441 | (sym(',') >> Exp | default_value) >> | ||
442 | sym(']'); | ||
443 | |||
444 | Invoke = Seperator >> ( | ||
445 | FnArgs | | ||
446 | SingleString | | ||
447 | DoubleString | | ||
448 | and_(expr('[')) >> LuaString | | ||
449 | and_(expr('{')) >> TableLit); | ||
450 | |||
451 | TableValue = KeyValue | Exp; | ||
452 | |||
453 | table_lit_lines = SpaceBreak >> TableLitLine >> *(-sym(',') >> SpaceBreak >> TableLitLine) >> -sym(','); | ||
454 | |||
455 | TableLit = | ||
456 | sym('{') >> Seperator >> | ||
457 | -TableValueList >> | ||
458 | -sym(',') >> | ||
459 | -table_lit_lines >> | ||
460 | White >> sym('}'); | ||
461 | |||
462 | TableValueList = TableValue >> *(sym(',') >> TableValue); | ||
463 | |||
464 | TableLitLine = ( | ||
465 | PushIndent >> (TableValueList >> PopIndent | PopIndent) | ||
466 | ) | ( | ||
467 | Space | ||
468 | ); | ||
469 | |||
470 | TableBlockInner = Seperator >> KeyValueLine >> *(+SpaceBreak >> KeyValueLine); | ||
471 | TableBlock = +SpaceBreak >> Advance >> ensure(TableBlockInner, PopIndent); | ||
472 | TableBlockIndent = sym('*') >> Seperator >> KeyValueList >> -sym(',') >> | ||
473 | -(+SpaceBreak >> Advance >> ensure(KeyValueList >> -sym(',') >> *(+SpaceBreak >> KeyValueLine), PopIndent)); | ||
474 | |||
475 | class_member_list = Seperator >> KeyValue >> *(sym(',') >> KeyValue); | ||
476 | ClassLine = CheckIndent >> (class_member_list | Statement) >> -sym(','); | ||
477 | ClassBlock = +SpaceBreak >> Advance >> Seperator >> ClassLine >> *(+SpaceBreak >> ClassLine) >> PopIndent; | ||
478 | |||
479 | ClassDecl = | ||
480 | key("class") >> not_(expr(':')) >> | ||
481 | -Assignable >> | ||
482 | -(key("extends") >> PreventIndent >> ensure(Exp, PopIndent)) >> | ||
483 | -ClassBlock; | ||
484 | |||
485 | global_values = NameList >> -(sym('=') >> (TableBlock | ExpListLow)); | ||
486 | global_op = expr('*') | expr('^'); | ||
487 | Global = key("global") >> (ClassDecl | (Space >> global_op) | global_values); | ||
488 | |||
489 | export_default = key("default"); | ||
490 | |||
491 | Export = pl::user(key("export"), [](const item_t& item) { | ||
492 | State* st = reinterpret_cast<State*>(item.user_data); | ||
493 | st->exportCount++; | ||
494 | return true; | ||
495 | }) >> ((pl::user(export_default, [](const item_t& item) { | ||
496 | State* st = reinterpret_cast<State*>(item.user_data); | ||
497 | bool isValid = !st->exportDefault && st->exportCount == 1; | ||
498 | st->exportDefault = true; | ||
499 | return isValid; | ||
500 | }) >> Exp) | ||
501 | | (pl::user(true_(), [](const item_t& item) { | ||
502 | State* st = reinterpret_cast<State*>(item.user_data); | ||
503 | if (st->exportDefault && st->exportCount > 1) { | ||
504 | return false; | ||
505 | } else { | ||
506 | return true; | ||
507 | } | ||
508 | }) >> ExpList >> -Assign) | ||
509 | | Macro) >> not_(Space >> statement_appendix); | ||
510 | |||
511 | variable_pair = sym(':') >> Variable; | ||
512 | |||
513 | normal_pair = ( | ||
514 | KeyName | | ||
515 | sym('[') >> Exp >> sym(']') | | ||
516 | Space >> DoubleString | | ||
517 | Space >> SingleString | | ||
518 | Space >> LuaString | ||
519 | ) >> | ||
520 | symx(':') >> | ||
521 | (Exp | TableBlock | +(SpaceBreak) >> Exp); | ||
522 | |||
523 | KeyValue = variable_pair | normal_pair; | ||
524 | |||
525 | KeyValueList = KeyValue >> *(sym(',') >> KeyValue); | ||
526 | KeyValueLine = CheckIndent >> (KeyValueList >> -sym(',') | TableBlockIndent | Space >> expr('*') >> (Exp | TableBlock)); | ||
527 | |||
528 | FnArgDef = (Variable | SelfName) >> -(sym('=') >> Space >> Exp); | ||
529 | |||
530 | FnArgDefList = Space >> Seperator >> ( | ||
531 | ( | ||
532 | FnArgDef >> | ||
533 | *((sym(',') | Break) >> White >> FnArgDef) >> | ||
534 | -((sym(',') | Break) >> White >> VarArg) | ||
535 | ) | ( | ||
536 | VarArg | ||
537 | ) | ||
538 | ); | ||
539 | |||
540 | outer_var_shadow = key("using") >> (NameList | Space >> expr("nil")); | ||
541 | |||
542 | FnArgsDef = sym('(') >> White >> -FnArgDefList >> -outer_var_shadow >> White >> sym(')'); | ||
543 | fn_arrow = expr("->") | expr("=>"); | ||
544 | FunLit = -FnArgsDef >> Space >> fn_arrow >> -Body; | ||
545 | |||
546 | MacroName = expr('$') >> -Name; | ||
547 | macro_args_def = sym('(') >> White >> -FnArgDefList >> White >> sym(')'); | ||
548 | MacroLit = -macro_args_def >> Space >> expr("->") >> Body; | ||
549 | Macro = key("macro") >> Space >> Name >> sym('=') >> MacroLit; | ||
550 | |||
551 | NameList = Seperator >> Space >> Variable >> *(sym(',') >> Space >> Variable); | ||
552 | NameOrDestructure = Space >> Variable | TableLit; | ||
553 | AssignableNameList = Seperator >> NameOrDestructure >> *(sym(',') >> NameOrDestructure); | ||
554 | |||
555 | fn_arrow_back = expr('<') >> set("-="); | ||
556 | Backcall = -FnArgsDef >> Space >> fn_arrow_back >> Space >> ChainValue; | ||
557 | |||
558 | BackcallBody = Seperator >> Space >> BackcallOperator >> unary_exp >> *(+SpaceBreak >> CheckIndent >> Space >> BackcallOperator >> unary_exp); | ||
559 | |||
560 | ExpList = Seperator >> Exp >> *(sym(',') >> Exp); | ||
561 | ExpListLow = Seperator >> Exp >> *(Space >> set(",;") >> Exp); | ||
562 | |||
563 | ArgLine = CheckIndent >> Exp >> *(sym(',') >> Exp); | ||
564 | ArgBlock = ArgLine >> *(sym(',') >> SpaceBreak >> ArgLine) >> PopIndent; | ||
565 | |||
566 | invoke_args_with_table = | ||
567 | sym(',') >> | ||
568 | ( | ||
569 | TableBlock | | ||
570 | SpaceBreak >> Advance >> ArgBlock >> -TableBlock | ||
571 | ); | ||
572 | |||
573 | InvokeArgs = | ||
574 | not_(set("-~")) >> Seperator >> | ||
575 | ( | ||
576 | Exp >> *(sym(',') >> Exp) >> -(invoke_args_with_table | TableBlock) | | ||
577 | TableBlock | ||
578 | ); | ||
579 | |||
580 | const_value = (expr("nil") | expr("true") | expr("false")) >> not_(AlphaNum); | ||
581 | |||
582 | SimpleValue = | ||
583 | (Space >> const_value) | | ||
584 | If | Unless | Switch | With | ClassDecl | ForEach | For | While | Do | | ||
585 | (Space >> unary_value) | | ||
586 | TblComprehension | TableLit | Comprehension | FunLit | | ||
587 | (Space >> Num); | ||
588 | |||
589 | ExpListAssign = ExpList >> -(Update | Assign); | ||
590 | |||
591 | if_line = key("if") >> Exp >> -Assign; | ||
592 | unless_line = key("unless") >> Exp; | ||
593 | |||
594 | statement_appendix = (if_line | unless_line | CompInner) >> Space; | ||
595 | statement_sep = and_(*SpaceBreak >> CheckIndent >> Space >> (set("($'\"") | expr("[[") | expr("[="))); | ||
596 | Statement = ( | ||
597 | Import | While | Repeat | For | ForEach | | ||
598 | Return | Local | Global | Export | Macro | | ||
599 | Space >> BreakLoop | Label | Goto | Backcall | | ||
600 | LocalAttrib | BackcallBody | ExpListAssign | ||
601 | ) >> Space >> | ||
602 | -statement_appendix >> -statement_sep; | ||
603 | |||
604 | Body = InBlock | Statement; | ||
605 | |||
606 | empty_line_stop = Space >> and_(Stop); | ||
607 | Line = and_(check_indent >> Space >> not_(BackcallOperator)) >> Statement | Advance >> ensure(and_(Space >> BackcallOperator) >> Statement, PopIndent) | empty_line_stop; | ||
608 | Block = Seperator >> Line >> *(+Break >> Line); | ||
609 | |||
610 | Shebang = expr("#!") >> *(not_(Stop) >> Any); | ||
611 | File = White >> -Shebang >> Block >> eof(); | ||
612 | } | ||
613 | |||
614 | ParseInfo YueParser::parse(std::string_view codes, rule& r) { | ||
615 | ParseInfo res; | ||
616 | try { | ||
617 | res.codes = std::make_unique<input>(); | ||
618 | *(res.codes) = _converter.from_bytes(&codes.front(), &codes.back() + 1); | ||
619 | } catch (const std::range_error&) { | ||
620 | res.error = "Invalid text encoding."sv; | ||
621 | return res; | ||
622 | } | ||
623 | error_list errors; | ||
624 | try { | ||
625 | State state; | ||
626 | res.node.set(pl::parse(*(res.codes), r, errors, &state)); | ||
627 | if (state.exportCount > 0) { | ||
628 | res.moduleName = std::move(state.moduleName); | ||
629 | res.exportDefault = state.exportDefault; | ||
630 | } | ||
631 | } catch (const std::logic_error& err) { | ||
632 | res.error = err.what(); | ||
633 | return res; | ||
634 | } | ||
635 | if (!errors.empty()) { | ||
636 | std::ostringstream buf; | ||
637 | for (error_list::iterator it = errors.begin(); it != errors.end(); ++it) { | ||
638 | const error& err = *it; | ||
639 | switch (err.m_type) { | ||
640 | case ERROR_TYPE::ERROR_SYNTAX_ERROR: | ||
641 | buf << res.errorMessage("syntax error"sv, &err); | ||
642 | break; | ||
643 | case ERROR_TYPE::ERROR_INVALID_EOF: | ||
644 | buf << res.errorMessage("invalid EOF"sv, &err); | ||
645 | break; | ||
646 | } | ||
647 | } | ||
648 | res.error = buf.str(); | ||
649 | } | ||
650 | return res; | ||
651 | } | ||
652 | |||
653 | std::string YueParser::toString(ast_node* node) { | ||
654 | return _converter.to_bytes(std::wstring(node->m_begin.m_it, node->m_end.m_it)); | ||
655 | } | ||
656 | |||
657 | std::string YueParser::toString(input::iterator begin, input::iterator end) { | ||
658 | return _converter.to_bytes(std::wstring(begin, end)); | ||
659 | } | ||
660 | |||
661 | input YueParser::encode(std::string_view codes) { | ||
662 | return _converter.from_bytes(&codes.front(), &codes.back() + 1); | ||
663 | } | ||
664 | |||
665 | std::string YueParser::decode(const input& codes) { | ||
666 | return _converter.to_bytes(codes); | ||
667 | } | ||
668 | |||
669 | namespace Utils { | ||
670 | void replace(std::string& str, std::string_view from, std::string_view to) { | ||
671 | size_t start_pos = 0; | ||
672 | while((start_pos = str.find(from, start_pos)) != std::string::npos) { | ||
673 | str.replace(start_pos, from.size(), to); | ||
674 | start_pos += to.size(); | ||
675 | } | ||
676 | } | ||
677 | |||
678 | void trim(std::string& str) { | ||
679 | if (str.empty()) return; | ||
680 | str.erase(0, str.find_first_not_of(" \t\n")); | ||
681 | str.erase(str.find_last_not_of(" \t\n") + 1); | ||
682 | } | ||
683 | } | ||
684 | |||
685 | std::string ParseInfo::errorMessage(std::string_view msg, const input_range* loc) const { | ||
686 | const int ASCII = 255; | ||
687 | int length = loc->m_begin.m_line; | ||
688 | auto begin = codes->begin(); | ||
689 | auto end = codes->end(); | ||
690 | int count = 0; | ||
691 | for (auto it = codes->begin(); it != codes->end(); ++it) { | ||
692 | if (*it == '\n') { | ||
693 | if (count + 1 == length) { | ||
694 | end = it; | ||
695 | break; | ||
696 | } else { | ||
697 | begin = it + 1; | ||
698 | } | ||
699 | count++; | ||
700 | } | ||
701 | } | ||
702 | int oldCol = loc->m_begin.m_col; | ||
703 | int col = std::max(0, oldCol - 1); | ||
704 | auto it = begin; | ||
705 | for (int i = 0; i < oldCol && it != end; ++i) { | ||
706 | if (*it > ASCII) { | ||
707 | ++col; | ||
708 | } | ||
709 | ++it; | ||
710 | } | ||
711 | auto line = Converter{}.to_bytes(std::wstring(begin, end)); | ||
712 | Utils::replace(line, "\t"sv, " "sv); | ||
713 | std::ostringstream buf; | ||
714 | buf << loc->m_begin.m_line << ": "sv << msg << | ||
715 | '\n' << line << '\n' << std::string(col, ' ') << "^"sv; | ||
716 | return buf.str(); | ||
717 | } | ||
718 | |||
719 | } // namespace yue | ||