aboutsummaryrefslogtreecommitdiff
path: root/src/MoonP/parser.hpp
diff options
context:
space:
mode:
authorLi Jin <dragon-fly@qq.com>2020-01-10 16:30:34 +0800
committerLi Jin <dragon-fly@qq.com>2020-01-10 16:30:34 +0800
commit52a6536103f46c26a3ba9b149b0fe7b40d524d8c (patch)
tree67e4759f8e1ea922079d0e162d84ecba5e558261 /src/MoonP/parser.hpp
parent975167856ed0b11c2ede03c6eb750ca4e4a6a7fc (diff)
downloadyuescript-52a6536103f46c26a3ba9b149b0fe7b40d524d8c.tar.gz
yuescript-52a6536103f46c26a3ba9b149b0fe7b40d524d8c.tar.bz2
yuescript-52a6536103f46c26a3ba9b149b0fe7b40d524d8c.zip
update.
Diffstat (limited to 'src/MoonP/parser.hpp')
-rw-r--r--src/MoonP/parser.hpp435
1 files changed, 435 insertions, 0 deletions
diff --git a/src/MoonP/parser.hpp b/src/MoonP/parser.hpp
new file mode 100644
index 0000000..9739465
--- /dev/null
+++ b/src/MoonP/parser.hpp
@@ -0,0 +1,435 @@
1/* Copyright (c) 2012, Achilleas Margaritis, modified by Jin Li
2All rights reserved.
3
4 Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
5
6 Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
7 Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
8
9THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
10
11#pragma once
12
13
14//gcc chokes without rule::rule(const rule &),
15//msvc complains when rule::rule(const rule &) is defined.
16#ifdef _MSC_VER
17#pragma warning (disable: 4521)
18#endif
19
20
21#include <vector>
22#include <string>
23#include <list>
24#include <functional>
25#include <codecvt>
26#include <locale>
27
28namespace parserlib {
29
30// const str hash helper functions
31inline constexpr size_t hash(char const* input)
32{
33 return *input ? *input + 33ull * hash(input + 1) : 5381;
34}
35inline size_t hash(const char* input, int size, int index)
36{
37 return index < size ? input[index] + 33ull * hash(input, size, index + 1) : 5381;
38}
39inline size_t constexpr operator"" _id(const char* s, size_t)
40{
41 return hash(s);
42}
43
44///type of the parser's input.
45typedef std::basic_string<wchar_t> input;
46typedef input::iterator input_it;
47typedef std::wstring_convert<std::codecvt_utf8<input::value_type>> Converter;
48
49class _private;
50class _expr;
51class _context;
52class rule;
53
54
55struct item_t
56{
57 input_it begin;
58 input_it end;
59 void* user_data;
60};
61typedef std::function<bool(const item_t&)> user_handler;
62
63
64///position into the input.
65class pos {
66public:
67 ///interator into the input.
68 input::iterator m_it;
69
70 ///line.
71 int m_line;
72
73 ///column.
74 int m_col;
75
76 ///null constructor.
77 pos():m_line(-1),m_col(0) {}
78
79 /** constructor from input.
80 @param i input.
81 */
82 pos(input &i);
83};
84
85
86/** a grammar expression.
87 */
88class expr {
89public:
90 /** character terminal constructor.
91 @param c character.
92 */
93 expr(char c);
94
95 /** null-terminated string terminal constructor.
96 @param s null-terminated string.
97 */
98 expr(const char *s);
99
100 /** rule reference constructor.
101 @param r rule.
102 */
103 expr(rule &r);
104
105 /** creates a zero-or-more loop out of this expression.
106 @return a zero-or-more loop expression.
107 */
108 expr operator *() const;
109
110 /** creates a one-or-more loop out of this expression.
111 @return a one-or-more loop expression.
112 */
113 expr operator +() const;
114
115 /** creates an optional out of this expression.
116 @return an optional expression.
117 */
118 expr operator -() const;
119
120 /** creates an AND-expression.
121 @return an AND-expression.
122 */
123 expr operator &() const;
124
125 /** creates a NOT-expression.
126 @return a NOT-expression.
127 */
128 expr operator !() const;
129
130private:
131 //internal expression
132 _expr *m_expr;
133
134 //internal constructor from internal expression
135 expr(_expr *e) : m_expr(e) {}
136
137 //assignment not allowed
138 expr &operator = (expr &);
139
140 friend class _private;
141};
142
143
144/** type of procedure to invoke when a rule is successfully parsed.
145 @param b begin position of input.
146 @param e end position of input.
147 @param d pointer to user data.
148 */
149typedef void (*parse_proc)(const pos &b, const pos &e, void *d);
150
151
152///input range.
153class input_range {
154public:
155 virtual ~input_range() {}
156
157 ///begin position.
158 pos m_begin;
159
160 ///end position.
161 pos m_end;
162
163 ///empty constructor.
164 input_range() {}
165
166 /** constructor.
167 @param b begin position.
168 @param e end position.
169 */
170 input_range(const pos &b, const pos &e);
171};
172
173
174///enum with error types.
175enum ERROR_TYPE {
176 ///syntax error
177 ERROR_SYNTAX_ERROR = 1,
178
179 ///invalid end of file
180 ERROR_INVALID_EOF,
181
182 ///first user error
183 ERROR_USER = 100
184};
185
186
187///error.
188class error : public input_range {
189public:
190 ///type
191 int m_type;
192
193 /** constructor.
194 @param b begin position.
195 @param e end position.
196 @param t type.
197 */
198 error(const pos &b, const pos &e, int t);
199
200 /** compare on begin position.
201 @param e the other error to compare this with.
202 @return true if this comes before the previous error, false otherwise.
203 */
204 bool operator < (const error &e) const;
205};
206
207
208///type of error list.
209typedef std::list<error> error_list;
210
211
212/** represents a rule.
213 */
214class rule {
215public:
216 /** character terminal constructor.
217 @param c character.
218 */
219 rule(char c);
220
221 /** null-terminated string terminal constructor.
222 @param s null-terminated string.
223 */
224 rule(const char *s);
225
226 /** constructor from expression.
227 @param e expression.
228 */
229 rule(const expr &e);
230
231 /** constructor from rule.
232 @param r rule.
233 */
234 rule(rule &r);
235
236 /** invalid constructor from rule (required by gcc).
237 @param r rule.
238 @exception std::logic_error always thrown.
239 */
240 rule(const rule &r);
241
242 /** deletes the internal object that represents the expression.
243 */
244 ~rule();
245
246 /** creates a zero-or-more loop out of this rule.
247 @return a zero-or-more loop rule.
248 */
249 expr operator *();
250
251 /** creates a one-or-more loop out of this rule.
252 @return a one-or-more loop rule.
253 */
254 expr operator +();
255
256 /** creates an optional out of this rule.
257 @return an optional rule.
258 */
259 expr operator -();
260
261 /** creates an AND-expression out of this rule.
262 @return an AND-expression out of this rule.
263 */
264 expr operator &();
265
266 /** creates a NOT-expression out of this rule.
267 @return a NOT-expression out of this rule.
268 */
269 expr operator !();
270
271 /** sets the parse procedure.
272 @param p procedure.
273 */
274 void set_parse_proc(parse_proc p);
275
276 /** get the this ptr (since operator & is overloaded).
277 @return pointer to this.
278 */
279 rule *this_ptr() { return this; }
280
281private:
282 //mode
283 enum _MODE {
284 _PARSE,
285 _REJECT,
286 _ACCEPT
287 };
288
289 //state
290 struct _state {
291 //position in source code, relative to start
292 size_t m_pos;
293
294 //mode
295 _MODE m_mode;
296
297 //constructor
298 _state(size_t pos = -1, _MODE mode = _PARSE) :
299 m_pos(pos), m_mode(mode) {}
300 };
301
302 //internal expression
303 _expr *m_expr;
304
305 //associated parse procedure.
306 parse_proc m_parse_proc;
307
308 //state
309 _state m_state;
310
311 //assignment not allowed
312 rule &operator = (rule &);
313
314 friend class _private;
315 friend class _context;
316};
317
318
319/** creates a sequence of expressions.
320 @param left left operand.
321 @param right right operand.
322 @return an expression which parses a sequence.
323 */
324expr operator >> (const expr &left, const expr &right);
325
326
327/** creates a choice of expressions.
328 @param left left operand.
329 @param right right operand.
330 @return an expression which parses a choice.
331 */
332expr operator | (const expr &left, const expr &right);
333
334
335/** converts a parser expression into a terminal.
336 @param e expression.
337 @return an expression which parses a terminal.
338 */
339expr term(const expr &e);
340
341
342/** creates a set expression from a null-terminated string.
343 @param s null-terminated string with characters of the set.
344 @return an expression which parses a single character out of a set.
345 */
346expr set(const char *s);
347
348
349/** creates a range expression.
350 @param min min character.
351 @param max max character.
352 @return an expression which parses a single character out of range.
353 */
354expr range(int min, int max);
355
356
357/** creates an expression which increments the line counter
358 and resets the column counter when the given expression
359 is parsed successfully; used for newline characters.
360 @param e expression to wrap into a newline parser.
361 @return an expression that handles newlines.
362 */
363expr nl(const expr &e);
364
365
366/** creates an expression which tests for the end of input.
367 @return an expression that handles the end of input.
368 */
369expr eof();
370
371
372/** creates a not expression.
373 @param e expression.
374 @return the appropriate expression.
375 */
376expr not_(const expr &e);
377
378
379/** creates an and expression.
380 @param e expression.
381 @return the appropriate expression.
382 */
383expr and_(const expr &e);
384
385
386/** creates an expression that parses any character.
387 @return the appropriate expression.
388 */
389expr any();
390
391
392/** parsing succeeds without consuming any input.
393 */
394expr true_();
395
396
397/** parsing fails without consuming any input.
398*/
399expr false_();
400
401
402/** parse with target expression and let user handle result.
403*/
404expr user(const expr &e, const user_handler& handler);
405
406
407/** parses the given input.
408 The parse procedures of each rule parsed are executed
409 before this function returns, if parsing succeeds.
410 @param i input.
411 @param g root rule of grammar.
412 @param el list of errors.
413 @param d user data, passed to the parse procedures.
414 @return true on parsing success, false on failure.
415 */
416bool parse(input &i, rule &g, error_list &el, void *d, void* ud);
417
418
419/** output the specific input range to the specific stream.
420 @param stream stream.
421 @param ir input range.
422 @return the stream.
423 */
424template <class T> T &operator << (T &stream, const input_range &ir) {
425 for(input::const_iterator it = ir.m_begin.m_it;
426 it != ir.m_end.m_it;
427 ++it)
428 {
429 stream << (typename T::char_type)*it;
430 }
431 return stream;
432}
433
434
435} //namespace parserlib