diff options
author | Li Jin <dragon-fly@qq.com> | 2020-01-10 16:30:34 +0800 |
---|---|---|
committer | Li Jin <dragon-fly@qq.com> | 2020-01-10 16:30:34 +0800 |
commit | 52a6536103f46c26a3ba9b149b0fe7b40d524d8c (patch) | |
tree | 67e4759f8e1ea922079d0e162d84ecba5e558261 /MoonParser/parser.hpp | |
parent | 975167856ed0b11c2ede03c6eb750ca4e4a6a7fc (diff) | |
download | yuescript-52a6536103f46c26a3ba9b149b0fe7b40d524d8c.tar.gz yuescript-52a6536103f46c26a3ba9b149b0fe7b40d524d8c.tar.bz2 yuescript-52a6536103f46c26a3ba9b149b0fe7b40d524d8c.zip |
update.
Diffstat (limited to 'MoonParser/parser.hpp')
-rw-r--r-- | MoonParser/parser.hpp | 425 |
1 files changed, 0 insertions, 425 deletions
diff --git a/MoonParser/parser.hpp b/MoonParser/parser.hpp deleted file mode 100644 index cbf0168..0000000 --- a/MoonParser/parser.hpp +++ /dev/null | |||
@@ -1,425 +0,0 @@ | |||
1 | #pragma once | ||
2 | |||
3 | |||
4 | //gcc chokes without rule::rule(const rule &), | ||
5 | //msvc complains when rule::rule(const rule &) is defined. | ||
6 | #ifdef _MSC_VER | ||
7 | #pragma warning (disable: 4521) | ||
8 | #endif | ||
9 | |||
10 | |||
11 | #include <vector> | ||
12 | #include <string> | ||
13 | #include <list> | ||
14 | #include <functional> | ||
15 | #include <codecvt> | ||
16 | #include <locale> | ||
17 | |||
18 | namespace parserlib { | ||
19 | |||
20 | // const str hash helper functions | ||
21 | inline constexpr size_t hash(char const* input) | ||
22 | { | ||
23 | return *input ? *input + 33ull * hash(input + 1) : 5381; | ||
24 | } | ||
25 | inline size_t hash(const char* input, int size, int index) | ||
26 | { | ||
27 | return index < size ? input[index] + 33ull * hash(input, size, index + 1) : 5381; | ||
28 | } | ||
29 | inline size_t constexpr operator"" _id(const char* s, size_t) | ||
30 | { | ||
31 | return hash(s); | ||
32 | } | ||
33 | |||
34 | ///type of the parser's input. | ||
35 | typedef std::basic_string<wchar_t> input; | ||
36 | typedef input::iterator input_it; | ||
37 | typedef std::wstring_convert<std::codecvt_utf8<input::value_type>> Converter; | ||
38 | |||
39 | class _private; | ||
40 | class _expr; | ||
41 | class _context; | ||
42 | class rule; | ||
43 | |||
44 | |||
45 | struct item_t | ||
46 | { | ||
47 | input_it begin; | ||
48 | input_it end; | ||
49 | void* user_data; | ||
50 | }; | ||
51 | typedef std::function<bool(const item_t&)> user_handler; | ||
52 | |||
53 | |||
54 | ///position into the input. | ||
55 | class pos { | ||
56 | public: | ||
57 | ///interator into the input. | ||
58 | input::iterator m_it; | ||
59 | |||
60 | ///line. | ||
61 | int m_line; | ||
62 | |||
63 | ///column. | ||
64 | int m_col; | ||
65 | |||
66 | ///null constructor. | ||
67 | pos():m_line(-1),m_col(0) {} | ||
68 | |||
69 | /** constructor from input. | ||
70 | @param i input. | ||
71 | */ | ||
72 | pos(input &i); | ||
73 | }; | ||
74 | |||
75 | |||
76 | /** a grammar expression. | ||
77 | */ | ||
78 | class expr { | ||
79 | public: | ||
80 | /** character terminal constructor. | ||
81 | @param c character. | ||
82 | */ | ||
83 | expr(char c); | ||
84 | |||
85 | /** null-terminated string terminal constructor. | ||
86 | @param s null-terminated string. | ||
87 | */ | ||
88 | expr(const char *s); | ||
89 | |||
90 | /** rule reference constructor. | ||
91 | @param r rule. | ||
92 | */ | ||
93 | expr(rule &r); | ||
94 | |||
95 | /** creates a zero-or-more loop out of this expression. | ||
96 | @return a zero-or-more loop expression. | ||
97 | */ | ||
98 | expr operator *() const; | ||
99 | |||
100 | /** creates a one-or-more loop out of this expression. | ||
101 | @return a one-or-more loop expression. | ||
102 | */ | ||
103 | expr operator +() const; | ||
104 | |||
105 | /** creates an optional out of this expression. | ||
106 | @return an optional expression. | ||
107 | */ | ||
108 | expr operator -() const; | ||
109 | |||
110 | /** creates an AND-expression. | ||
111 | @return an AND-expression. | ||
112 | */ | ||
113 | expr operator &() const; | ||
114 | |||
115 | /** creates a NOT-expression. | ||
116 | @return a NOT-expression. | ||
117 | */ | ||
118 | expr operator !() const; | ||
119 | |||
120 | private: | ||
121 | //internal expression | ||
122 | _expr *m_expr; | ||
123 | |||
124 | //internal constructor from internal expression | ||
125 | expr(_expr *e) : m_expr(e) {} | ||
126 | |||
127 | //assignment not allowed | ||
128 | expr &operator = (expr &); | ||
129 | |||
130 | friend class _private; | ||
131 | }; | ||
132 | |||
133 | |||
134 | /** type of procedure to invoke when a rule is successfully parsed. | ||
135 | @param b begin position of input. | ||
136 | @param e end position of input. | ||
137 | @param d pointer to user data. | ||
138 | */ | ||
139 | typedef void (*parse_proc)(const pos &b, const pos &e, void *d); | ||
140 | |||
141 | |||
142 | ///input range. | ||
143 | class input_range { | ||
144 | public: | ||
145 | virtual ~input_range() {} | ||
146 | |||
147 | ///begin position. | ||
148 | pos m_begin; | ||
149 | |||
150 | ///end position. | ||
151 | pos m_end; | ||
152 | |||
153 | ///empty constructor. | ||
154 | input_range() {} | ||
155 | |||
156 | /** constructor. | ||
157 | @param b begin position. | ||
158 | @param e end position. | ||
159 | */ | ||
160 | input_range(const pos &b, const pos &e); | ||
161 | }; | ||
162 | |||
163 | |||
164 | ///enum with error types. | ||
165 | enum ERROR_TYPE { | ||
166 | ///syntax error | ||
167 | ERROR_SYNTAX_ERROR = 1, | ||
168 | |||
169 | ///invalid end of file | ||
170 | ERROR_INVALID_EOF, | ||
171 | |||
172 | ///first user error | ||
173 | ERROR_USER = 100 | ||
174 | }; | ||
175 | |||
176 | |||
177 | ///error. | ||
178 | class error : public input_range { | ||
179 | public: | ||
180 | ///type | ||
181 | int m_type; | ||
182 | |||
183 | /** constructor. | ||
184 | @param b begin position. | ||
185 | @param e end position. | ||
186 | @param t type. | ||
187 | */ | ||
188 | error(const pos &b, const pos &e, int t); | ||
189 | |||
190 | /** compare on begin position. | ||
191 | @param e the other error to compare this with. | ||
192 | @return true if this comes before the previous error, false otherwise. | ||
193 | */ | ||
194 | bool operator < (const error &e) const; | ||
195 | }; | ||
196 | |||
197 | |||
198 | ///type of error list. | ||
199 | typedef std::list<error> error_list; | ||
200 | |||
201 | |||
202 | /** represents a rule. | ||
203 | */ | ||
204 | class rule { | ||
205 | public: | ||
206 | /** character terminal constructor. | ||
207 | @param c character. | ||
208 | */ | ||
209 | rule(char c); | ||
210 | |||
211 | /** null-terminated string terminal constructor. | ||
212 | @param s null-terminated string. | ||
213 | */ | ||
214 | rule(const char *s); | ||
215 | |||
216 | /** constructor from expression. | ||
217 | @param e expression. | ||
218 | */ | ||
219 | rule(const expr &e); | ||
220 | |||
221 | /** constructor from rule. | ||
222 | @param r rule. | ||
223 | */ | ||
224 | rule(rule &r); | ||
225 | |||
226 | /** invalid constructor from rule (required by gcc). | ||
227 | @param r rule. | ||
228 | @exception std::logic_error always thrown. | ||
229 | */ | ||
230 | rule(const rule &r); | ||
231 | |||
232 | /** deletes the internal object that represents the expression. | ||
233 | */ | ||
234 | ~rule(); | ||
235 | |||
236 | /** creates a zero-or-more loop out of this rule. | ||
237 | @return a zero-or-more loop rule. | ||
238 | */ | ||
239 | expr operator *(); | ||
240 | |||
241 | /** creates a one-or-more loop out of this rule. | ||
242 | @return a one-or-more loop rule. | ||
243 | */ | ||
244 | expr operator +(); | ||
245 | |||
246 | /** creates an optional out of this rule. | ||
247 | @return an optional rule. | ||
248 | */ | ||
249 | expr operator -(); | ||
250 | |||
251 | /** creates an AND-expression out of this rule. | ||
252 | @return an AND-expression out of this rule. | ||
253 | */ | ||
254 | expr operator &(); | ||
255 | |||
256 | /** creates a NOT-expression out of this rule. | ||
257 | @return a NOT-expression out of this rule. | ||
258 | */ | ||
259 | expr operator !(); | ||
260 | |||
261 | /** sets the parse procedure. | ||
262 | @param p procedure. | ||
263 | */ | ||
264 | void set_parse_proc(parse_proc p); | ||
265 | |||
266 | /** get the this ptr (since operator & is overloaded). | ||
267 | @return pointer to this. | ||
268 | */ | ||
269 | rule *this_ptr() { return this; } | ||
270 | |||
271 | private: | ||
272 | //mode | ||
273 | enum _MODE { | ||
274 | _PARSE, | ||
275 | _REJECT, | ||
276 | _ACCEPT | ||
277 | }; | ||
278 | |||
279 | //state | ||
280 | struct _state { | ||
281 | //position in source code, relative to start | ||
282 | size_t m_pos; | ||
283 | |||
284 | //mode | ||
285 | _MODE m_mode; | ||
286 | |||
287 | //constructor | ||
288 | _state(size_t pos = -1, _MODE mode = _PARSE) : | ||
289 | m_pos(pos), m_mode(mode) {} | ||
290 | }; | ||
291 | |||
292 | //internal expression | ||
293 | _expr *m_expr; | ||
294 | |||
295 | //associated parse procedure. | ||
296 | parse_proc m_parse_proc; | ||
297 | |||
298 | //state | ||
299 | _state m_state; | ||
300 | |||
301 | //assignment not allowed | ||
302 | rule &operator = (rule &); | ||
303 | |||
304 | friend class _private; | ||
305 | friend class _context; | ||
306 | }; | ||
307 | |||
308 | |||
309 | /** creates a sequence of expressions. | ||
310 | @param left left operand. | ||
311 | @param right right operand. | ||
312 | @return an expression which parses a sequence. | ||
313 | */ | ||
314 | expr operator >> (const expr &left, const expr &right); | ||
315 | |||
316 | |||
317 | /** creates a choice of expressions. | ||
318 | @param left left operand. | ||
319 | @param right right operand. | ||
320 | @return an expression which parses a choice. | ||
321 | */ | ||
322 | expr operator | (const expr &left, const expr &right); | ||
323 | |||
324 | |||
325 | /** converts a parser expression into a terminal. | ||
326 | @param e expression. | ||
327 | @return an expression which parses a terminal. | ||
328 | */ | ||
329 | expr term(const expr &e); | ||
330 | |||
331 | |||
332 | /** creates a set expression from a null-terminated string. | ||
333 | @param s null-terminated string with characters of the set. | ||
334 | @return an expression which parses a single character out of a set. | ||
335 | */ | ||
336 | expr set(const char *s); | ||
337 | |||
338 | |||
339 | /** creates a range expression. | ||
340 | @param min min character. | ||
341 | @param max max character. | ||
342 | @return an expression which parses a single character out of range. | ||
343 | */ | ||
344 | expr range(int min, int max); | ||
345 | |||
346 | |||
347 | /** creates an expression which increments the line counter | ||
348 | and resets the column counter when the given expression | ||
349 | is parsed successfully; used for newline characters. | ||
350 | @param e expression to wrap into a newline parser. | ||
351 | @return an expression that handles newlines. | ||
352 | */ | ||
353 | expr nl(const expr &e); | ||
354 | |||
355 | |||
356 | /** creates an expression which tests for the end of input. | ||
357 | @return an expression that handles the end of input. | ||
358 | */ | ||
359 | expr eof(); | ||
360 | |||
361 | |||
362 | /** creates a not expression. | ||
363 | @param e expression. | ||
364 | @return the appropriate expression. | ||
365 | */ | ||
366 | expr not_(const expr &e); | ||
367 | |||
368 | |||
369 | /** creates an and expression. | ||
370 | @param e expression. | ||
371 | @return the appropriate expression. | ||
372 | */ | ||
373 | expr and_(const expr &e); | ||
374 | |||
375 | |||
376 | /** creates an expression that parses any character. | ||
377 | @return the appropriate expression. | ||
378 | */ | ||
379 | expr any(); | ||
380 | |||
381 | |||
382 | /** parsing succeeds without consuming any input. | ||
383 | */ | ||
384 | expr true_(); | ||
385 | |||
386 | |||
387 | /** parsing fails without consuming any input. | ||
388 | */ | ||
389 | expr false_(); | ||
390 | |||
391 | |||
392 | /** parse with target expression and let user handle result. | ||
393 | */ | ||
394 | expr user(const expr &e, const user_handler& handler); | ||
395 | |||
396 | |||
397 | /** parses the given input. | ||
398 | The parse procedures of each rule parsed are executed | ||
399 | before this function returns, if parsing succeeds. | ||
400 | @param i input. | ||
401 | @param g root rule of grammar. | ||
402 | @param el list of errors. | ||
403 | @param d user data, passed to the parse procedures. | ||
404 | @return true on parsing success, false on failure. | ||
405 | */ | ||
406 | bool parse(input &i, rule &g, error_list &el, void *d, void* ud); | ||
407 | |||
408 | |||
409 | /** output the specific input range to the specific stream. | ||
410 | @param stream stream. | ||
411 | @param ir input range. | ||
412 | @return the stream. | ||
413 | */ | ||
414 | template <class T> T &operator << (T &stream, const input_range &ir) { | ||
415 | for(input::const_iterator it = ir.m_begin.m_it; | ||
416 | it != ir.m_end.m_it; | ||
417 | ++it) | ||
418 | { | ||
419 | stream << (typename T::char_type)*it; | ||
420 | } | ||
421 | return stream; | ||
422 | } | ||
423 | |||
424 | |||
425 | } //namespace parserlib | ||