aboutsummaryrefslogtreecommitdiff
path: root/MoonParser/parser.hpp
diff options
context:
space:
mode:
authorLi Jin <dragon-fly@qq.com>2017-07-13 16:03:11 +0800
committerLi Jin <dragon-fly@qq.com>2017-07-13 16:03:11 +0800
commitcb906e739f27931e9798510cd83725131ed55209 (patch)
tree52b465c5eb2250dec3ed3d5f02b86db79653b838 /MoonParser/parser.hpp
parent975c3c7dfa032229272c3b225de1127f1605e2d2 (diff)
downloadyuescript-cb906e739f27931e9798510cd83725131ed55209.tar.gz
yuescript-cb906e739f27931e9798510cd83725131ed55209.tar.bz2
yuescript-cb906e739f27931e9798510cd83725131ed55209.zip
rewrite parsing codes with parserlib.
Diffstat (limited to 'MoonParser/parser.hpp')
-rw-r--r--MoonParser/parser.hpp428
1 files changed, 428 insertions, 0 deletions
diff --git a/MoonParser/parser.hpp b/MoonParser/parser.hpp
new file mode 100644
index 0000000..540a51c
--- /dev/null
+++ b/MoonParser/parser.hpp
@@ -0,0 +1,428 @@
1#ifndef PARSER_HPP
2#define PARSER_HPP
3
4
5//gcc chokes without rule::rule(const rule &),
6//msvc complains when rule::rule(const rule &) is defined.
7#ifdef _MSC_VER
8#pragma warning (disable: 4521)
9#endif
10
11
12#include <vector>
13#include <string>
14#include <list>
15#include <functional>
16
17
18namespace parserlib {
19
20
21class _private;
22class _expr;
23class _context;
24class rule;
25
26
27///type of the parser's input.
28typedef std::vector<int> input;
29typedef input::iterator input_it;
30struct item_t
31{
32 input_it begin;
33 input_it end;
34 void* user_data;
35};
36typedef std::function<bool(const item_t&)> user_handler;
37
38
39///position into the input.
40class pos {
41public:
42 ///interator into the input.
43 input::iterator m_it;
44
45 ///line.
46 int m_line;
47
48 ///column.
49 int m_col;
50
51 ///null constructor.
52 pos() {}
53
54 /** constructor from input.
55 @param i input.
56 */
57 pos(input &i);
58};
59
60
61/** a grammar expression.
62 */
63class expr {
64public:
65 /** character terminal constructor.
66 @param c character.
67 */
68 expr(int c);
69
70 /** null-terminated string terminal constructor.
71 @param s null-terminated string.
72 */
73 expr(const char *s);
74
75 /** null-terminated wide string terminal constructor.
76 @param s null-terminated string.
77 */
78 expr(const wchar_t *s);
79
80 /** rule reference constructor.
81 @param r rule.
82 */
83 expr(rule &r);
84
85 /** creates a zero-or-more loop out of this expression.
86 @return a zero-or-more loop expression.
87 */
88 expr operator *() const;
89
90 /** creates a one-or-more loop out of this expression.
91 @return a one-or-more loop expression.
92 */
93 expr operator +() const;
94
95 /** creates an optional out of this expression.
96 @return an optional expression.
97 */
98 expr operator -() const;
99
100 /** creates an AND-expression.
101 @return an AND-expression.
102 */
103 expr operator &() const;
104
105 /** creates a NOT-expression.
106 @return a NOT-expression.
107 */
108 expr operator !() const;
109
110private:
111 //internal expression
112 _expr *m_expr;
113
114 //internal constructor from internal expression
115 expr(_expr *e) : m_expr(e) {}
116
117 //assignment not allowed
118 expr &operator = (expr &);
119
120 friend class _private;
121};
122
123
124/** type of procedure to invoke when a rule is successfully parsed.
125 @param b begin position of input.
126 @param e end position of input.
127 @param d pointer to user data.
128 */
129typedef void (*parse_proc)(const pos &b, const pos &e, void *d);
130
131
132///input range.
133class input_range {
134public:
135 ///begin position.
136 pos m_begin;
137
138 ///end position.
139 pos m_end;
140
141 ///empty constructor.
142 input_range() {}
143
144 /** constructor.
145 @param b begin position.
146 @param e end position.
147 */
148 input_range(const pos &b, const pos &e);
149};
150
151
152///enum with error types.
153enum ERROR_TYPE {
154 ///syntax error
155 ERROR_SYNTAX_ERROR = 1,
156
157 ///invalid end of file
158 ERROR_INVALID_EOF,
159
160 ///first user error
161 ERROR_USER = 100
162};
163
164
165///error.
166class error : public input_range {
167public:
168 ///type
169 int m_type;
170
171 /** constructor.
172 @param b begin position.
173 @param e end position.
174 @param t type.
175 */
176 error(const pos &b, const pos &e, int t);
177
178 /** compare on begin position.
179 @param e the other error to compare this with.
180 @return true if this comes before the previous error, false otherwise.
181 */
182 bool operator < (const error &e) const;
183};
184
185
186///type of error list.
187typedef std::list<error> error_list;
188
189
190/** represents a rule.
191 */
192class rule {
193public:
194 /** character terminal constructor.
195 @param c character.
196 */
197 rule(int c);
198
199 /** null-terminated string terminal constructor.
200 @param s null-terminated string.
201 */
202 rule(const char *s);
203
204 /** null-terminated wide string terminal constructor.
205 @param s null-terminated string.
206 */
207 rule(const wchar_t *s);
208
209 /** constructor from expression.
210 @param e expression.
211 */
212 rule(const expr &e);
213
214 /** constructor from rule.
215 @param r rule.
216 */
217 rule(rule &r);
218
219 /** invalid constructor from rule (required by gcc).
220 @param r rule.
221 @exception std::logic_error always thrown.
222 */
223 rule(const rule &r);
224
225 /** deletes the internal object that represents the expression.
226 */
227 ~rule();
228
229 /** creates a zero-or-more loop out of this rule.
230 @return a zero-or-more loop rule.
231 */
232 expr operator *();
233
234 /** creates a one-or-more loop out of this rule.
235 @return a one-or-more loop rule.
236 */
237 expr operator +();
238
239 /** creates an optional out of this rule.
240 @return an optional rule.
241 */
242 expr operator -();
243
244 /** creates an AND-expression out of this rule.
245 @return an AND-expression out of this rule.
246 */
247 expr operator &();
248
249 /** creates a NOT-expression out of this rule.
250 @return a NOT-expression out of this rule.
251 */
252 expr operator !();
253
254 /** sets the parse procedure.
255 @param p procedure.
256 */
257 void set_parse_proc(parse_proc p);
258
259 /** get the this ptr (since operator & is overloaded).
260 @return pointer to this.
261 */
262 rule *this_ptr() { return this; }
263
264private:
265 //mode
266 enum _MODE {
267 _PARSE,
268 _REJECT,
269 _ACCEPT
270 };
271
272 //state
273 struct _state {
274 //position in source code, relative to start
275 size_t m_pos;
276
277 //mode
278 _MODE m_mode;
279
280 //constructor
281 _state(size_t pos = -1, _MODE mode = _PARSE) :
282 m_pos(pos), m_mode(mode) {}
283 };
284
285 //internal expression
286 _expr *m_expr;
287
288 //associated parse procedure.
289 parse_proc m_parse_proc;
290
291 //state
292 _state m_state;
293
294 //assignment not allowed
295 rule &operator = (rule &);
296
297 friend class _private;
298 friend class _context;
299};
300
301
302/** creates a sequence of expressions.
303 @param left left operand.
304 @param right right operand.
305 @return an expression which parses a sequence.
306 */
307expr operator >> (const expr &left, const expr &right);
308
309
310/** creates a choice of expressions.
311 @param left left operand.
312 @param right right operand.
313 @return an expression which parses a choice.
314 */
315expr operator | (const expr &left, const expr &right);
316
317
318/** converts a parser expression into a terminal.
319 @param e expression.
320 @return an expression which parses a terminal.
321 */
322expr term(const expr &e);
323
324
325/** creates a set expression from a null-terminated string.
326 @param s null-terminated string with characters of the set.
327 @return an expression which parses a single character out of a set.
328 */
329expr set(const char *s);
330
331
332/** creates a set expression from a null-terminated wide string.
333 @param s null-terminated string with characters of the set.
334 @return an expression which parses a single character out of a set.
335 */
336expr set(const wchar_t *s);
337
338
339/** creates a range expression.
340 @param min min character.
341 @param max max character.
342 @return an expression which parses a single character out of range.
343 */
344expr range(int min, int max);
345
346
347/** creates an expression which increments the line counter
348 and resets the column counter when the given expression
349 is parsed successfully; used for newline characters.
350 @param e expression to wrap into a newline parser.
351 @return an expression that handles newlines.
352 */
353expr nl(const expr &e);
354
355
356/** creates an expression which tests for the end of input.
357 @return an expression that handles the end of input.
358 */
359expr eof();
360
361
362/** creates a not expression.
363 @param e expression.
364 @return the appropriate expression.
365 */
366expr not_(const expr &e);
367
368
369/** creates an and expression.
370 @param e expression.
371 @return the appropriate expression.
372 */
373expr and_(const expr &e);
374
375
376/** creates an expression that parses any character.
377 @return the appropriate expression.
378 */
379expr any();
380
381
382/** parsing succeeds without consuming any input.
383 */
384expr true_();
385
386
387/** parsing fails without consuming any input.
388*/
389expr false_();
390
391
392/** parse with target expression and let user handle result.
393*/
394expr user(const expr &e, const user_handler& handler);
395
396
397/** parses the given input.
398 The parse procedures of each rule parsed are executed
399 before this function returns, if parsing succeeds.
400 @param i input.
401 @param g root rule of grammar.
402 @param el list of errors.
403 @param d user data, passed to the parse procedures.
404 @return true on parsing success, false on failure.
405 */
406bool parse(input &i, rule &g, error_list &el, void *d, void* ud);
407
408
409/** output the specific input range to the specific stream.
410 @param stream stream.
411 @param ir input range.
412 @return the stream.
413 */
414template <class T> T &operator << (T &stream, const input_range &ir) {
415 for(input::const_iterator it = ir.m_begin.m_it;
416 it != ir.m_end.m_it;
417 ++it)
418 {
419 stream << (typename T::char_type)*it;
420 }
421 return stream;
422}
423
424
425} //namespace parserlib
426
427
428#endif //PARSER_HPP