diff options
author | Li Jin <dragon-fly@qq.com> | 2020-01-10 16:30:34 +0800 |
---|---|---|
committer | Li Jin <dragon-fly@qq.com> | 2020-01-10 16:30:34 +0800 |
commit | 52a6536103f46c26a3ba9b149b0fe7b40d524d8c (patch) | |
tree | 67e4759f8e1ea922079d0e162d84ecba5e558261 /src/MoonP/parser.hpp | |
parent | 975167856ed0b11c2ede03c6eb750ca4e4a6a7fc (diff) | |
download | yuescript-52a6536103f46c26a3ba9b149b0fe7b40d524d8c.tar.gz yuescript-52a6536103f46c26a3ba9b149b0fe7b40d524d8c.tar.bz2 yuescript-52a6536103f46c26a3ba9b149b0fe7b40d524d8c.zip |
update.
Diffstat (limited to 'src/MoonP/parser.hpp')
-rw-r--r-- | src/MoonP/parser.hpp | 435 |
1 files changed, 435 insertions, 0 deletions
diff --git a/src/MoonP/parser.hpp b/src/MoonP/parser.hpp new file mode 100644 index 0000000..9739465 --- /dev/null +++ b/src/MoonP/parser.hpp | |||
@@ -0,0 +1,435 @@ | |||
1 | /* Copyright (c) 2012, Achilleas Margaritis, modified by Jin Li | ||
2 | All rights reserved. | ||
3 | |||
4 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: | ||
5 | |||
6 | Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. | ||
7 | Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. | ||
8 | |||
9 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ | ||
10 | |||
11 | #pragma once | ||
12 | |||
13 | |||
14 | //gcc chokes without rule::rule(const rule &), | ||
15 | //msvc complains when rule::rule(const rule &) is defined. | ||
16 | #ifdef _MSC_VER | ||
17 | #pragma warning (disable: 4521) | ||
18 | #endif | ||
19 | |||
20 | |||
21 | #include <vector> | ||
22 | #include <string> | ||
23 | #include <list> | ||
24 | #include <functional> | ||
25 | #include <codecvt> | ||
26 | #include <locale> | ||
27 | |||
28 | namespace parserlib { | ||
29 | |||
30 | // const str hash helper functions | ||
31 | inline constexpr size_t hash(char const* input) | ||
32 | { | ||
33 | return *input ? *input + 33ull * hash(input + 1) : 5381; | ||
34 | } | ||
35 | inline size_t hash(const char* input, int size, int index) | ||
36 | { | ||
37 | return index < size ? input[index] + 33ull * hash(input, size, index + 1) : 5381; | ||
38 | } | ||
39 | inline size_t constexpr operator"" _id(const char* s, size_t) | ||
40 | { | ||
41 | return hash(s); | ||
42 | } | ||
43 | |||
44 | ///type of the parser's input. | ||
45 | typedef std::basic_string<wchar_t> input; | ||
46 | typedef input::iterator input_it; | ||
47 | typedef std::wstring_convert<std::codecvt_utf8<input::value_type>> Converter; | ||
48 | |||
49 | class _private; | ||
50 | class _expr; | ||
51 | class _context; | ||
52 | class rule; | ||
53 | |||
54 | |||
55 | struct item_t | ||
56 | { | ||
57 | input_it begin; | ||
58 | input_it end; | ||
59 | void* user_data; | ||
60 | }; | ||
61 | typedef std::function<bool(const item_t&)> user_handler; | ||
62 | |||
63 | |||
64 | ///position into the input. | ||
65 | class pos { | ||
66 | public: | ||
67 | ///interator into the input. | ||
68 | input::iterator m_it; | ||
69 | |||
70 | ///line. | ||
71 | int m_line; | ||
72 | |||
73 | ///column. | ||
74 | int m_col; | ||
75 | |||
76 | ///null constructor. | ||
77 | pos():m_line(-1),m_col(0) {} | ||
78 | |||
79 | /** constructor from input. | ||
80 | @param i input. | ||
81 | */ | ||
82 | pos(input &i); | ||
83 | }; | ||
84 | |||
85 | |||
86 | /** a grammar expression. | ||
87 | */ | ||
88 | class expr { | ||
89 | public: | ||
90 | /** character terminal constructor. | ||
91 | @param c character. | ||
92 | */ | ||
93 | expr(char c); | ||
94 | |||
95 | /** null-terminated string terminal constructor. | ||
96 | @param s null-terminated string. | ||
97 | */ | ||
98 | expr(const char *s); | ||
99 | |||
100 | /** rule reference constructor. | ||
101 | @param r rule. | ||
102 | */ | ||
103 | expr(rule &r); | ||
104 | |||
105 | /** creates a zero-or-more loop out of this expression. | ||
106 | @return a zero-or-more loop expression. | ||
107 | */ | ||
108 | expr operator *() const; | ||
109 | |||
110 | /** creates a one-or-more loop out of this expression. | ||
111 | @return a one-or-more loop expression. | ||
112 | */ | ||
113 | expr operator +() const; | ||
114 | |||
115 | /** creates an optional out of this expression. | ||
116 | @return an optional expression. | ||
117 | */ | ||
118 | expr operator -() const; | ||
119 | |||
120 | /** creates an AND-expression. | ||
121 | @return an AND-expression. | ||
122 | */ | ||
123 | expr operator &() const; | ||
124 | |||
125 | /** creates a NOT-expression. | ||
126 | @return a NOT-expression. | ||
127 | */ | ||
128 | expr operator !() const; | ||
129 | |||
130 | private: | ||
131 | //internal expression | ||
132 | _expr *m_expr; | ||
133 | |||
134 | //internal constructor from internal expression | ||
135 | expr(_expr *e) : m_expr(e) {} | ||
136 | |||
137 | //assignment not allowed | ||
138 | expr &operator = (expr &); | ||
139 | |||
140 | friend class _private; | ||
141 | }; | ||
142 | |||
143 | |||
144 | /** type of procedure to invoke when a rule is successfully parsed. | ||
145 | @param b begin position of input. | ||
146 | @param e end position of input. | ||
147 | @param d pointer to user data. | ||
148 | */ | ||
149 | typedef void (*parse_proc)(const pos &b, const pos &e, void *d); | ||
150 | |||
151 | |||
152 | ///input range. | ||
153 | class input_range { | ||
154 | public: | ||
155 | virtual ~input_range() {} | ||
156 | |||
157 | ///begin position. | ||
158 | pos m_begin; | ||
159 | |||
160 | ///end position. | ||
161 | pos m_end; | ||
162 | |||
163 | ///empty constructor. | ||
164 | input_range() {} | ||
165 | |||
166 | /** constructor. | ||
167 | @param b begin position. | ||
168 | @param e end position. | ||
169 | */ | ||
170 | input_range(const pos &b, const pos &e); | ||
171 | }; | ||
172 | |||
173 | |||
174 | ///enum with error types. | ||
175 | enum ERROR_TYPE { | ||
176 | ///syntax error | ||
177 | ERROR_SYNTAX_ERROR = 1, | ||
178 | |||
179 | ///invalid end of file | ||
180 | ERROR_INVALID_EOF, | ||
181 | |||
182 | ///first user error | ||
183 | ERROR_USER = 100 | ||
184 | }; | ||
185 | |||
186 | |||
187 | ///error. | ||
188 | class error : public input_range { | ||
189 | public: | ||
190 | ///type | ||
191 | int m_type; | ||
192 | |||
193 | /** constructor. | ||
194 | @param b begin position. | ||
195 | @param e end position. | ||
196 | @param t type. | ||
197 | */ | ||
198 | error(const pos &b, const pos &e, int t); | ||
199 | |||
200 | /** compare on begin position. | ||
201 | @param e the other error to compare this with. | ||
202 | @return true if this comes before the previous error, false otherwise. | ||
203 | */ | ||
204 | bool operator < (const error &e) const; | ||
205 | }; | ||
206 | |||
207 | |||
208 | ///type of error list. | ||
209 | typedef std::list<error> error_list; | ||
210 | |||
211 | |||
212 | /** represents a rule. | ||
213 | */ | ||
214 | class rule { | ||
215 | public: | ||
216 | /** character terminal constructor. | ||
217 | @param c character. | ||
218 | */ | ||
219 | rule(char c); | ||
220 | |||
221 | /** null-terminated string terminal constructor. | ||
222 | @param s null-terminated string. | ||
223 | */ | ||
224 | rule(const char *s); | ||
225 | |||
226 | /** constructor from expression. | ||
227 | @param e expression. | ||
228 | */ | ||
229 | rule(const expr &e); | ||
230 | |||
231 | /** constructor from rule. | ||
232 | @param r rule. | ||
233 | */ | ||
234 | rule(rule &r); | ||
235 | |||
236 | /** invalid constructor from rule (required by gcc). | ||
237 | @param r rule. | ||
238 | @exception std::logic_error always thrown. | ||
239 | */ | ||
240 | rule(const rule &r); | ||
241 | |||
242 | /** deletes the internal object that represents the expression. | ||
243 | */ | ||
244 | ~rule(); | ||
245 | |||
246 | /** creates a zero-or-more loop out of this rule. | ||
247 | @return a zero-or-more loop rule. | ||
248 | */ | ||
249 | expr operator *(); | ||
250 | |||
251 | /** creates a one-or-more loop out of this rule. | ||
252 | @return a one-or-more loop rule. | ||
253 | */ | ||
254 | expr operator +(); | ||
255 | |||
256 | /** creates an optional out of this rule. | ||
257 | @return an optional rule. | ||
258 | */ | ||
259 | expr operator -(); | ||
260 | |||
261 | /** creates an AND-expression out of this rule. | ||
262 | @return an AND-expression out of this rule. | ||
263 | */ | ||
264 | expr operator &(); | ||
265 | |||
266 | /** creates a NOT-expression out of this rule. | ||
267 | @return a NOT-expression out of this rule. | ||
268 | */ | ||
269 | expr operator !(); | ||
270 | |||
271 | /** sets the parse procedure. | ||
272 | @param p procedure. | ||
273 | */ | ||
274 | void set_parse_proc(parse_proc p); | ||
275 | |||
276 | /** get the this ptr (since operator & is overloaded). | ||
277 | @return pointer to this. | ||
278 | */ | ||
279 | rule *this_ptr() { return this; } | ||
280 | |||
281 | private: | ||
282 | //mode | ||
283 | enum _MODE { | ||
284 | _PARSE, | ||
285 | _REJECT, | ||
286 | _ACCEPT | ||
287 | }; | ||
288 | |||
289 | //state | ||
290 | struct _state { | ||
291 | //position in source code, relative to start | ||
292 | size_t m_pos; | ||
293 | |||
294 | //mode | ||
295 | _MODE m_mode; | ||
296 | |||
297 | //constructor | ||
298 | _state(size_t pos = -1, _MODE mode = _PARSE) : | ||
299 | m_pos(pos), m_mode(mode) {} | ||
300 | }; | ||
301 | |||
302 | //internal expression | ||
303 | _expr *m_expr; | ||
304 | |||
305 | //associated parse procedure. | ||
306 | parse_proc m_parse_proc; | ||
307 | |||
308 | //state | ||
309 | _state m_state; | ||
310 | |||
311 | //assignment not allowed | ||
312 | rule &operator = (rule &); | ||
313 | |||
314 | friend class _private; | ||
315 | friend class _context; | ||
316 | }; | ||
317 | |||
318 | |||
319 | /** creates a sequence of expressions. | ||
320 | @param left left operand. | ||
321 | @param right right operand. | ||
322 | @return an expression which parses a sequence. | ||
323 | */ | ||
324 | expr operator >> (const expr &left, const expr &right); | ||
325 | |||
326 | |||
327 | /** creates a choice of expressions. | ||
328 | @param left left operand. | ||
329 | @param right right operand. | ||
330 | @return an expression which parses a choice. | ||
331 | */ | ||
332 | expr operator | (const expr &left, const expr &right); | ||
333 | |||
334 | |||
335 | /** converts a parser expression into a terminal. | ||
336 | @param e expression. | ||
337 | @return an expression which parses a terminal. | ||
338 | */ | ||
339 | expr term(const expr &e); | ||
340 | |||
341 | |||
342 | /** creates a set expression from a null-terminated string. | ||
343 | @param s null-terminated string with characters of the set. | ||
344 | @return an expression which parses a single character out of a set. | ||
345 | */ | ||
346 | expr set(const char *s); | ||
347 | |||
348 | |||
349 | /** creates a range expression. | ||
350 | @param min min character. | ||
351 | @param max max character. | ||
352 | @return an expression which parses a single character out of range. | ||
353 | */ | ||
354 | expr range(int min, int max); | ||
355 | |||
356 | |||
357 | /** creates an expression which increments the line counter | ||
358 | and resets the column counter when the given expression | ||
359 | is parsed successfully; used for newline characters. | ||
360 | @param e expression to wrap into a newline parser. | ||
361 | @return an expression that handles newlines. | ||
362 | */ | ||
363 | expr nl(const expr &e); | ||
364 | |||
365 | |||
366 | /** creates an expression which tests for the end of input. | ||
367 | @return an expression that handles the end of input. | ||
368 | */ | ||
369 | expr eof(); | ||
370 | |||
371 | |||
372 | /** creates a not expression. | ||
373 | @param e expression. | ||
374 | @return the appropriate expression. | ||
375 | */ | ||
376 | expr not_(const expr &e); | ||
377 | |||
378 | |||
379 | /** creates an and expression. | ||
380 | @param e expression. | ||
381 | @return the appropriate expression. | ||
382 | */ | ||
383 | expr and_(const expr &e); | ||
384 | |||
385 | |||
386 | /** creates an expression that parses any character. | ||
387 | @return the appropriate expression. | ||
388 | */ | ||
389 | expr any(); | ||
390 | |||
391 | |||
392 | /** parsing succeeds without consuming any input. | ||
393 | */ | ||
394 | expr true_(); | ||
395 | |||
396 | |||
397 | /** parsing fails without consuming any input. | ||
398 | */ | ||
399 | expr false_(); | ||
400 | |||
401 | |||
402 | /** parse with target expression and let user handle result. | ||
403 | */ | ||
404 | expr user(const expr &e, const user_handler& handler); | ||
405 | |||
406 | |||
407 | /** parses the given input. | ||
408 | The parse procedures of each rule parsed are executed | ||
409 | before this function returns, if parsing succeeds. | ||
410 | @param i input. | ||
411 | @param g root rule of grammar. | ||
412 | @param el list of errors. | ||
413 | @param d user data, passed to the parse procedures. | ||
414 | @return true on parsing success, false on failure. | ||
415 | */ | ||
416 | bool parse(input &i, rule &g, error_list &el, void *d, void* ud); | ||
417 | |||
418 | |||
419 | /** output the specific input range to the specific stream. | ||
420 | @param stream stream. | ||
421 | @param ir input range. | ||
422 | @return the stream. | ||
423 | */ | ||
424 | template <class T> T &operator << (T &stream, const input_range &ir) { | ||
425 | for(input::const_iterator it = ir.m_begin.m_it; | ||
426 | it != ir.m_end.m_it; | ||
427 | ++it) | ||
428 | { | ||
429 | stream << (typename T::char_type)*it; | ||
430 | } | ||
431 | return stream; | ||
432 | } | ||
433 | |||
434 | |||
435 | } //namespace parserlib | ||