diff options
author | Li Jin <dragon-fly@qq.com> | 2017-07-13 16:03:11 +0800 |
---|---|---|
committer | Li Jin <dragon-fly@qq.com> | 2017-07-13 16:03:11 +0800 |
commit | cb906e739f27931e9798510cd83725131ed55209 (patch) | |
tree | 52b465c5eb2250dec3ed3d5f02b86db79653b838 /MoonParser/parser.hpp | |
parent | 975c3c7dfa032229272c3b225de1127f1605e2d2 (diff) | |
download | yuescript-cb906e739f27931e9798510cd83725131ed55209.tar.gz yuescript-cb906e739f27931e9798510cd83725131ed55209.tar.bz2 yuescript-cb906e739f27931e9798510cd83725131ed55209.zip |
rewrite parsing codes with parserlib.
Diffstat (limited to 'MoonParser/parser.hpp')
-rw-r--r-- | MoonParser/parser.hpp | 428 |
1 files changed, 428 insertions, 0 deletions
diff --git a/MoonParser/parser.hpp b/MoonParser/parser.hpp new file mode 100644 index 0000000..540a51c --- /dev/null +++ b/MoonParser/parser.hpp | |||
@@ -0,0 +1,428 @@ | |||
1 | #ifndef PARSER_HPP | ||
2 | #define PARSER_HPP | ||
3 | |||
4 | |||
5 | //gcc chokes without rule::rule(const rule &), | ||
6 | //msvc complains when rule::rule(const rule &) is defined. | ||
7 | #ifdef _MSC_VER | ||
8 | #pragma warning (disable: 4521) | ||
9 | #endif | ||
10 | |||
11 | |||
12 | #include <vector> | ||
13 | #include <string> | ||
14 | #include <list> | ||
15 | #include <functional> | ||
16 | |||
17 | |||
18 | namespace parserlib { | ||
19 | |||
20 | |||
21 | class _private; | ||
22 | class _expr; | ||
23 | class _context; | ||
24 | class rule; | ||
25 | |||
26 | |||
27 | ///type of the parser's input. | ||
28 | typedef std::vector<int> input; | ||
29 | typedef input::iterator input_it; | ||
30 | struct item_t | ||
31 | { | ||
32 | input_it begin; | ||
33 | input_it end; | ||
34 | void* user_data; | ||
35 | }; | ||
36 | typedef std::function<bool(const item_t&)> user_handler; | ||
37 | |||
38 | |||
39 | ///position into the input. | ||
40 | class pos { | ||
41 | public: | ||
42 | ///interator into the input. | ||
43 | input::iterator m_it; | ||
44 | |||
45 | ///line. | ||
46 | int m_line; | ||
47 | |||
48 | ///column. | ||
49 | int m_col; | ||
50 | |||
51 | ///null constructor. | ||
52 | pos() {} | ||
53 | |||
54 | /** constructor from input. | ||
55 | @param i input. | ||
56 | */ | ||
57 | pos(input &i); | ||
58 | }; | ||
59 | |||
60 | |||
61 | /** a grammar expression. | ||
62 | */ | ||
63 | class expr { | ||
64 | public: | ||
65 | /** character terminal constructor. | ||
66 | @param c character. | ||
67 | */ | ||
68 | expr(int c); | ||
69 | |||
70 | /** null-terminated string terminal constructor. | ||
71 | @param s null-terminated string. | ||
72 | */ | ||
73 | expr(const char *s); | ||
74 | |||
75 | /** null-terminated wide string terminal constructor. | ||
76 | @param s null-terminated string. | ||
77 | */ | ||
78 | expr(const wchar_t *s); | ||
79 | |||
80 | /** rule reference constructor. | ||
81 | @param r rule. | ||
82 | */ | ||
83 | expr(rule &r); | ||
84 | |||
85 | /** creates a zero-or-more loop out of this expression. | ||
86 | @return a zero-or-more loop expression. | ||
87 | */ | ||
88 | expr operator *() const; | ||
89 | |||
90 | /** creates a one-or-more loop out of this expression. | ||
91 | @return a one-or-more loop expression. | ||
92 | */ | ||
93 | expr operator +() const; | ||
94 | |||
95 | /** creates an optional out of this expression. | ||
96 | @return an optional expression. | ||
97 | */ | ||
98 | expr operator -() const; | ||
99 | |||
100 | /** creates an AND-expression. | ||
101 | @return an AND-expression. | ||
102 | */ | ||
103 | expr operator &() const; | ||
104 | |||
105 | /** creates a NOT-expression. | ||
106 | @return a NOT-expression. | ||
107 | */ | ||
108 | expr operator !() const; | ||
109 | |||
110 | private: | ||
111 | //internal expression | ||
112 | _expr *m_expr; | ||
113 | |||
114 | //internal constructor from internal expression | ||
115 | expr(_expr *e) : m_expr(e) {} | ||
116 | |||
117 | //assignment not allowed | ||
118 | expr &operator = (expr &); | ||
119 | |||
120 | friend class _private; | ||
121 | }; | ||
122 | |||
123 | |||
124 | /** type of procedure to invoke when a rule is successfully parsed. | ||
125 | @param b begin position of input. | ||
126 | @param e end position of input. | ||
127 | @param d pointer to user data. | ||
128 | */ | ||
129 | typedef void (*parse_proc)(const pos &b, const pos &e, void *d); | ||
130 | |||
131 | |||
132 | ///input range. | ||
133 | class input_range { | ||
134 | public: | ||
135 | ///begin position. | ||
136 | pos m_begin; | ||
137 | |||
138 | ///end position. | ||
139 | pos m_end; | ||
140 | |||
141 | ///empty constructor. | ||
142 | input_range() {} | ||
143 | |||
144 | /** constructor. | ||
145 | @param b begin position. | ||
146 | @param e end position. | ||
147 | */ | ||
148 | input_range(const pos &b, const pos &e); | ||
149 | }; | ||
150 | |||
151 | |||
152 | ///enum with error types. | ||
153 | enum ERROR_TYPE { | ||
154 | ///syntax error | ||
155 | ERROR_SYNTAX_ERROR = 1, | ||
156 | |||
157 | ///invalid end of file | ||
158 | ERROR_INVALID_EOF, | ||
159 | |||
160 | ///first user error | ||
161 | ERROR_USER = 100 | ||
162 | }; | ||
163 | |||
164 | |||
165 | ///error. | ||
166 | class error : public input_range { | ||
167 | public: | ||
168 | ///type | ||
169 | int m_type; | ||
170 | |||
171 | /** constructor. | ||
172 | @param b begin position. | ||
173 | @param e end position. | ||
174 | @param t type. | ||
175 | */ | ||
176 | error(const pos &b, const pos &e, int t); | ||
177 | |||
178 | /** compare on begin position. | ||
179 | @param e the other error to compare this with. | ||
180 | @return true if this comes before the previous error, false otherwise. | ||
181 | */ | ||
182 | bool operator < (const error &e) const; | ||
183 | }; | ||
184 | |||
185 | |||
186 | ///type of error list. | ||
187 | typedef std::list<error> error_list; | ||
188 | |||
189 | |||
190 | /** represents a rule. | ||
191 | */ | ||
192 | class rule { | ||
193 | public: | ||
194 | /** character terminal constructor. | ||
195 | @param c character. | ||
196 | */ | ||
197 | rule(int c); | ||
198 | |||
199 | /** null-terminated string terminal constructor. | ||
200 | @param s null-terminated string. | ||
201 | */ | ||
202 | rule(const char *s); | ||
203 | |||
204 | /** null-terminated wide string terminal constructor. | ||
205 | @param s null-terminated string. | ||
206 | */ | ||
207 | rule(const wchar_t *s); | ||
208 | |||
209 | /** constructor from expression. | ||
210 | @param e expression. | ||
211 | */ | ||
212 | rule(const expr &e); | ||
213 | |||
214 | /** constructor from rule. | ||
215 | @param r rule. | ||
216 | */ | ||
217 | rule(rule &r); | ||
218 | |||
219 | /** invalid constructor from rule (required by gcc). | ||
220 | @param r rule. | ||
221 | @exception std::logic_error always thrown. | ||
222 | */ | ||
223 | rule(const rule &r); | ||
224 | |||
225 | /** deletes the internal object that represents the expression. | ||
226 | */ | ||
227 | ~rule(); | ||
228 | |||
229 | /** creates a zero-or-more loop out of this rule. | ||
230 | @return a zero-or-more loop rule. | ||
231 | */ | ||
232 | expr operator *(); | ||
233 | |||
234 | /** creates a one-or-more loop out of this rule. | ||
235 | @return a one-or-more loop rule. | ||
236 | */ | ||
237 | expr operator +(); | ||
238 | |||
239 | /** creates an optional out of this rule. | ||
240 | @return an optional rule. | ||
241 | */ | ||
242 | expr operator -(); | ||
243 | |||
244 | /** creates an AND-expression out of this rule. | ||
245 | @return an AND-expression out of this rule. | ||
246 | */ | ||
247 | expr operator &(); | ||
248 | |||
249 | /** creates a NOT-expression out of this rule. | ||
250 | @return a NOT-expression out of this rule. | ||
251 | */ | ||
252 | expr operator !(); | ||
253 | |||
254 | /** sets the parse procedure. | ||
255 | @param p procedure. | ||
256 | */ | ||
257 | void set_parse_proc(parse_proc p); | ||
258 | |||
259 | /** get the this ptr (since operator & is overloaded). | ||
260 | @return pointer to this. | ||
261 | */ | ||
262 | rule *this_ptr() { return this; } | ||
263 | |||
264 | private: | ||
265 | //mode | ||
266 | enum _MODE { | ||
267 | _PARSE, | ||
268 | _REJECT, | ||
269 | _ACCEPT | ||
270 | }; | ||
271 | |||
272 | //state | ||
273 | struct _state { | ||
274 | //position in source code, relative to start | ||
275 | size_t m_pos; | ||
276 | |||
277 | //mode | ||
278 | _MODE m_mode; | ||
279 | |||
280 | //constructor | ||
281 | _state(size_t pos = -1, _MODE mode = _PARSE) : | ||
282 | m_pos(pos), m_mode(mode) {} | ||
283 | }; | ||
284 | |||
285 | //internal expression | ||
286 | _expr *m_expr; | ||
287 | |||
288 | //associated parse procedure. | ||
289 | parse_proc m_parse_proc; | ||
290 | |||
291 | //state | ||
292 | _state m_state; | ||
293 | |||
294 | //assignment not allowed | ||
295 | rule &operator = (rule &); | ||
296 | |||
297 | friend class _private; | ||
298 | friend class _context; | ||
299 | }; | ||
300 | |||
301 | |||
302 | /** creates a sequence of expressions. | ||
303 | @param left left operand. | ||
304 | @param right right operand. | ||
305 | @return an expression which parses a sequence. | ||
306 | */ | ||
307 | expr operator >> (const expr &left, const expr &right); | ||
308 | |||
309 | |||
310 | /** creates a choice of expressions. | ||
311 | @param left left operand. | ||
312 | @param right right operand. | ||
313 | @return an expression which parses a choice. | ||
314 | */ | ||
315 | expr operator | (const expr &left, const expr &right); | ||
316 | |||
317 | |||
318 | /** converts a parser expression into a terminal. | ||
319 | @param e expression. | ||
320 | @return an expression which parses a terminal. | ||
321 | */ | ||
322 | expr term(const expr &e); | ||
323 | |||
324 | |||
325 | /** creates a set expression from a null-terminated string. | ||
326 | @param s null-terminated string with characters of the set. | ||
327 | @return an expression which parses a single character out of a set. | ||
328 | */ | ||
329 | expr set(const char *s); | ||
330 | |||
331 | |||
332 | /** creates a set expression from a null-terminated wide string. | ||
333 | @param s null-terminated string with characters of the set. | ||
334 | @return an expression which parses a single character out of a set. | ||
335 | */ | ||
336 | expr set(const wchar_t *s); | ||
337 | |||
338 | |||
339 | /** creates a range expression. | ||
340 | @param min min character. | ||
341 | @param max max character. | ||
342 | @return an expression which parses a single character out of range. | ||
343 | */ | ||
344 | expr range(int min, int max); | ||
345 | |||
346 | |||
347 | /** creates an expression which increments the line counter | ||
348 | and resets the column counter when the given expression | ||
349 | is parsed successfully; used for newline characters. | ||
350 | @param e expression to wrap into a newline parser. | ||
351 | @return an expression that handles newlines. | ||
352 | */ | ||
353 | expr nl(const expr &e); | ||
354 | |||
355 | |||
356 | /** creates an expression which tests for the end of input. | ||
357 | @return an expression that handles the end of input. | ||
358 | */ | ||
359 | expr eof(); | ||
360 | |||
361 | |||
362 | /** creates a not expression. | ||
363 | @param e expression. | ||
364 | @return the appropriate expression. | ||
365 | */ | ||
366 | expr not_(const expr &e); | ||
367 | |||
368 | |||
369 | /** creates an and expression. | ||
370 | @param e expression. | ||
371 | @return the appropriate expression. | ||
372 | */ | ||
373 | expr and_(const expr &e); | ||
374 | |||
375 | |||
376 | /** creates an expression that parses any character. | ||
377 | @return the appropriate expression. | ||
378 | */ | ||
379 | expr any(); | ||
380 | |||
381 | |||
382 | /** parsing succeeds without consuming any input. | ||
383 | */ | ||
384 | expr true_(); | ||
385 | |||
386 | |||
387 | /** parsing fails without consuming any input. | ||
388 | */ | ||
389 | expr false_(); | ||
390 | |||
391 | |||
392 | /** parse with target expression and let user handle result. | ||
393 | */ | ||
394 | expr user(const expr &e, const user_handler& handler); | ||
395 | |||
396 | |||
397 | /** parses the given input. | ||
398 | The parse procedures of each rule parsed are executed | ||
399 | before this function returns, if parsing succeeds. | ||
400 | @param i input. | ||
401 | @param g root rule of grammar. | ||
402 | @param el list of errors. | ||
403 | @param d user data, passed to the parse procedures. | ||
404 | @return true on parsing success, false on failure. | ||
405 | */ | ||
406 | bool parse(input &i, rule &g, error_list &el, void *d, void* ud); | ||
407 | |||
408 | |||
409 | /** output the specific input range to the specific stream. | ||
410 | @param stream stream. | ||
411 | @param ir input range. | ||
412 | @return the stream. | ||
413 | */ | ||
414 | template <class T> T &operator << (T &stream, const input_range &ir) { | ||
415 | for(input::const_iterator it = ir.m_begin.m_it; | ||
416 | it != ir.m_end.m_it; | ||
417 | ++it) | ||
418 | { | ||
419 | stream << (typename T::char_type)*it; | ||
420 | } | ||
421 | return stream; | ||
422 | } | ||
423 | |||
424 | |||
425 | } //namespace parserlib | ||
426 | |||
427 | |||
428 | #endif //PARSER_HPP | ||