aboutsummaryrefslogtreecommitdiff
path: root/MoonParser/parser.cpp
diff options
context:
space:
mode:
authorLi Jin <dragon-fly@qq.com>2020-01-10 16:30:34 +0800
committerLi Jin <dragon-fly@qq.com>2020-01-10 16:30:34 +0800
commit52a6536103f46c26a3ba9b149b0fe7b40d524d8c (patch)
tree67e4759f8e1ea922079d0e162d84ecba5e558261 /MoonParser/parser.cpp
parent975167856ed0b11c2ede03c6eb750ca4e4a6a7fc (diff)
downloadyuescript-52a6536103f46c26a3ba9b149b0fe7b40d524d8c.tar.gz
yuescript-52a6536103f46c26a3ba9b149b0fe7b40d524d8c.tar.bz2
yuescript-52a6536103f46c26a3ba9b149b0fe7b40d524d8c.zip
update.
Diffstat (limited to 'MoonParser/parser.cpp')
-rw-r--r--MoonParser/parser.cpp1430
1 files changed, 0 insertions, 1430 deletions
diff --git a/MoonParser/parser.cpp b/MoonParser/parser.cpp
deleted file mode 100644
index 03857c6..0000000
--- a/MoonParser/parser.cpp
+++ /dev/null
@@ -1,1430 +0,0 @@
1#include <cstdlib>
2#include <cstring>
3#include <cassert>
4#include <stdexcept>
5#include <unordered_map>
6#include <unordered_set>
7
8#include "parser.hpp"
9
10
11namespace parserlib {
12
13
14//internal map from rules to parse procs
15typedef std::unordered_map<rule *, parse_proc> _parse_proc_map_t;
16
17//on exit, it deletes the parse proc map
18static _parse_proc_map_t& _get_parse_proc_map() {
19 static _parse_proc_map_t _parse_proc_map;
20 return _parse_proc_map;
21}
22
23
24//get the parse proc from the map
25static parse_proc _get_parse_proc(rule *r) {
26 _parse_proc_map_t& _parse_proc_map = _get_parse_proc_map();
27 _parse_proc_map_t::iterator it = _parse_proc_map.find(r);
28 if (it == _parse_proc_map.end()) return 0;
29 return it->second;
30}
31
32
33//internal private class that manages access to the public classes' internals.
34class _private {
35public:
36 //get the internal expression object from the expression.
37 static _expr *get_expr(const expr &e) {
38 return e.m_expr;
39 }
40
41 //create new expression from given expression
42 static expr construct_expr(_expr *e) {
43 return e;
44 }
45
46 //get the internal expression object from the rule.
47 static _expr *get_expr(rule &r) {
48 return r.m_expr;
49 }
50
51 //get the internal parse proc from the rule.
52 static parse_proc get_parse_proc(rule &r) {
53 return r.m_parse_proc;
54 }
55};
56
57
58class _context;
59
60
61//parser state
62class _state {
63public:
64 //position
65 pos m_pos;
66
67 //size of match vector
68 size_t m_matches;
69
70 //constructor
71 _state(_context &con);
72};
73
74
75//match
76class _match {
77public:
78 //rule matched
79 rule *m_rule;
80
81 //begin position
82 pos m_begin;
83
84 //end position
85 pos m_end;
86
87 //null constructor
88 _match() {}
89
90 //constructor from parameters
91 _match(rule *r, const pos &b, const pos &e) :
92 m_rule(r),
93 m_begin(b),
94 m_end(e)
95 {
96 }
97};
98
99
100//match vector
101typedef std::vector<_match> _match_vector;
102
103
104//parsing context
105class _context {
106public:
107 //user data
108 void* m_user_data;
109
110 //current position
111 pos m_pos;
112
113 //error position
114 pos m_error_pos;
115
116 //input begin
117 input::iterator m_begin;
118
119 //input end
120 input::iterator m_end;
121
122 //matches
123 _match_vector m_matches;
124
125 //constructor
126 _context(input &i, void* ud) :
127 m_user_data(ud),
128 m_pos(i),
129 m_error_pos(i),
130 m_begin(i.begin()),
131 m_end(i.end())
132 {
133 }
134
135 //check if the end is reached
136 bool end() const {
137 return m_pos.m_it == m_end;
138 }
139
140 //get the current symbol
141 input::value_type symbol() const {
142 assert(!end());
143 return *m_pos.m_it;
144 }
145
146 //set the longest possible error
147 void set_error_pos() {
148 if (m_pos.m_it > m_error_pos.m_it) {
149 m_error_pos = m_pos;
150 }
151 }
152
153 //next column
154 void next_col() {
155 ++m_pos.m_it;
156 ++m_pos.m_col;
157 }
158
159 //next line
160 void next_line() {
161 ++m_pos.m_line;
162 m_pos.m_col = 1;
163 }
164
165 //restore the state
166 void restore(const _state &st) {
167 m_pos = st.m_pos;
168 m_matches.resize(st.m_matches);
169 }
170
171 //parse non-term rule.
172 bool parse_non_term(rule &r);
173
174 //parse term rule.
175 bool parse_term(rule &r);
176
177 //execute all the parse procs
178 void do_parse_procs(void *d) const {
179 for(_match_vector::const_iterator it = m_matches.begin();
180 it != m_matches.end();
181 ++it)
182 {
183 const _match &m = *it;
184 parse_proc p = _private::get_parse_proc(*m.m_rule);
185 p(m.m_begin, m.m_end, d);
186 }
187 }
188
189private:
190 //parse non-term rule.
191 bool _parse_non_term(rule &r);
192
193 //parse term rule.
194 bool _parse_term(rule &r);
195};
196
197
198//base class for expressions
199class _expr {
200public:
201 //destructor.
202 virtual ~_expr() {
203 }
204
205 //parse with whitespace
206 virtual bool parse_non_term(_context &con) const = 0;
207
208 //parse terminal
209 virtual bool parse_term(_context &con) const = 0;
210};
211
212
213//single character expression.
214class _char : public _expr {
215public:
216 //constructor.
217 _char(char c) :
218 m_char(c)
219 {
220 }
221
222 //parse with whitespace
223 virtual bool parse_non_term(_context &con) const {
224 return _parse(con);
225 }
226
227 //parse terminal
228 virtual bool parse_term(_context &con) const {
229 return _parse(con);
230 }
231
232private:
233 //character
234 input::value_type m_char;
235
236 //internal parse
237 bool _parse(_context &con) const {
238 if (!con.end()) {
239 input::value_type ch = con.symbol();
240 if (ch == m_char) {
241 con.next_col();
242 return true;
243 }
244 }
245 con.set_error_pos();
246 return false;
247 }
248};
249
250
251//string expression.
252class _string : public _expr {
253public:
254 //constructor from ansi string.
255 _string(const char *s) :
256 m_string(Converter{}.from_bytes(s))
257 {
258 }
259
260 //parse with whitespace
261 virtual bool parse_non_term(_context &con) const {
262 return _parse(con);
263 }
264
265 //parse terminal
266 virtual bool parse_term(_context &con) const {
267 return _parse(con);
268 }
269
270private:
271 //string
272 input m_string;
273
274 //parse the string
275 bool _parse(_context &con) const {
276 for(input::const_iterator it = m_string.begin(),
277 end = m_string.end();;)
278 {
279 if (it == end) return true;
280 if (con.end()) break;
281 if (con.symbol() != *it) break;
282 ++it;
283 con.next_col();
284 }
285 con.set_error_pos();
286 return false;
287 }
288};
289
290
291//set expression.
292class _set : public _expr {
293public:
294 //constructor from ansi string.
295 _set(const char *s) {
296 auto str = Converter{}.from_bytes(s);
297 for (auto ch : str) {
298 _add(ch);
299 }
300 }
301
302 //constructor from range.
303 _set(int min, int max) {
304 assert(min >= 0);
305 assert(min <= max);
306 m_quick_set.resize((size_t)max + 1U);
307 for(; min <= max; ++min) {
308 m_quick_set[(size_t)min] = true;
309 }
310 }
311
312 //parse with whitespace
313 virtual bool parse_non_term(_context &con) const {
314 return _parse(con);
315 }
316
317 //parse terminal
318 virtual bool parse_term(_context &con) const {
319 return _parse(con);
320 }
321
322private:
323 //set is kept as an array of flags, for quick access
324 std::vector<bool> m_quick_set;
325 std::unordered_set<size_t> m_large_set;
326
327 //add character
328 void _add(size_t i) {
329 if (i <= m_quick_set.size() || i <= 255) {
330 if (i >= m_quick_set.size()) {
331 m_quick_set.resize(i + 1);
332 }
333 m_quick_set[i] = true;
334 } else {
335 m_large_set.insert(i);
336 }
337 }
338
339 //internal parse
340 bool _parse(_context &con) const {
341 if (!con.end()) {
342 size_t ch = con.symbol();
343 if (ch < m_quick_set.size()) {
344 if (m_quick_set[ch]) {
345 con.next_col();
346 return true;
347 }
348 } else if (m_large_set.find(ch) != m_large_set.end()) {
349 con.next_col();
350 return true;
351 }
352 }
353 con.set_error_pos();
354 return false;
355 }
356};
357
358
359//base class for unary expressions
360class _unary : public _expr {
361public:
362 //constructor.
363 _unary(_expr *e) :
364 m_expr(e)
365 {
366 }
367
368 //destructor.
369 virtual ~_unary() {
370 delete m_expr;
371 }
372
373protected:
374 //expression
375 _expr *m_expr;
376};
377
378
379//terminal
380class _term : public _unary {
381public:
382 //constructor.
383 _term(_expr *e) :
384 _unary(e)
385 {
386 }
387
388 //parse with whitespace
389 virtual bool parse_non_term(_context &con) const {
390 return m_expr->parse_term(con);
391 }
392
393 //parse terminal
394 virtual bool parse_term(_context &con) const {
395 return m_expr->parse_term(con);
396 }
397};
398
399
400//user
401class _user : public _unary {
402public:
403 //constructor.
404 _user(_expr *e, const user_handler &callback) :
405 _unary(e),
406 m_handler(callback)
407 {
408 }
409
410 //parse with whitespace
411 virtual bool parse_non_term(_context &con) const {
412 pos pos = con.m_pos;
413 if (m_expr->parse_non_term(con)) {
414 item_t item = {pos.m_it, con.m_pos.m_it, con.m_user_data};
415 return m_handler(item);
416 }
417 return false;
418 }
419
420 //parse terminal
421 virtual bool parse_term(_context &con) const {
422 pos pos = con.m_pos;
423 if (m_expr->parse_term(con)) {
424 item_t item = {pos.m_it, con.m_pos.m_it, con.m_user_data};
425 return m_handler(item);
426 }
427 return false;
428 }
429private:
430 user_handler m_handler;
431};
432
433
434//loop 0
435class _loop0 : public _unary {
436public:
437 //constructor.
438 _loop0(_expr *e) :
439 _unary(e)
440 {
441 }
442
443 //parse with whitespace
444 virtual bool parse_non_term(_context &con) const {
445 //if parsing of the first fails, restore the context and stop
446 _state st(con);
447 if (!m_expr->parse_non_term(con)) {
448 con.restore(st);
449 return true;
450 }
451
452 //parse the rest
453 for(;;) {
454 _state st(con);
455 if (!m_expr->parse_non_term(con)) {
456 con.restore(st);
457 break;
458 }
459 }
460
461 return true;
462 }
463
464 //parse terminal
465 virtual bool parse_term(_context &con) const {
466 //if parsing of the first fails, restore the context and stop
467 _state st(con);
468 if (!m_expr->parse_term(con)) {
469 con.restore(st);
470 return true;
471 }
472
473 //parse the rest until no more parsing is possible
474 for(;;) {
475 _state st(con);
476 if (!m_expr->parse_term(con)) {
477 con.restore(st);
478 break;
479 }
480 }
481
482 return true;
483 }
484};
485
486
487//loop 1
488class _loop1 : public _unary {
489public:
490 //constructor.
491 _loop1(_expr *e) :
492 _unary(e)
493 {
494 }
495
496 //parse with whitespace
497 virtual bool parse_non_term(_context &con) const {
498 //parse the first; if the first fails, stop
499 if (!m_expr->parse_non_term(con)) return false;
500
501 //parse the rest until no more parsing is possible
502 for(;;) {
503 _state st(con);
504 if (!m_expr->parse_non_term(con)) {
505 con.restore(st);
506 break;
507 }
508 }
509
510 return true;
511 }
512
513 //parse terminal
514 virtual bool parse_term(_context &con) const {
515 //parse the first; if the first fails, stop
516 if (!m_expr->parse_term(con)) return false;
517
518 //parse the rest until no more parsing is possible
519 for(;;) {
520 _state st(con);
521 if (!m_expr->parse_term(con)) {
522 con.restore(st);
523 break;
524 }
525 }
526
527 return true;
528 }
529};
530
531
532//optional
533class _optional : public _unary {
534public:
535 //constructor.
536 _optional(_expr *e) :
537 _unary(e)
538 {
539 }
540
541 //parse with whitespace
542 virtual bool parse_non_term(_context &con) const {
543 _state st(con);
544 if (!m_expr->parse_non_term(con)) con.restore(st);
545 return true;
546 }
547
548 //parse terminal
549 virtual bool parse_term(_context &con) const {
550 _state st(con);
551 if (!m_expr->parse_term(con)) con.restore(st);
552 return true;
553 }
554};
555
556
557//and
558class _and : public _unary {
559public:
560 //constructor.
561 _and(_expr *e) :
562 _unary(e)
563 {
564 }
565
566 //parse with whitespace
567 virtual bool parse_non_term(_context &con) const {
568 _state st(con);
569 bool ok = m_expr->parse_non_term(con);
570 con.restore(st);
571 return ok;
572 }
573
574 //parse terminal
575 virtual bool parse_term(_context &con) const {
576 _state st(con);
577 bool ok = m_expr->parse_term(con);
578 con.restore(st);
579 return ok;
580 }
581};
582
583
584//not
585class _not : public _unary {
586public:
587 //constructor.
588 _not(_expr *e) :
589 _unary(e)
590 {
591 }
592
593 //parse with whitespace
594 virtual bool parse_non_term(_context &con) const {
595 _state st(con);
596 bool ok = !m_expr->parse_non_term(con);
597 con.restore(st);
598 return ok;
599 }
600
601 //parse terminal
602 virtual bool parse_term(_context &con) const {
603 _state st(con);
604 bool ok = !m_expr->parse_term(con);
605 con.restore(st);
606 return ok;
607 }
608};
609
610
611//newline
612class _nl : public _unary {
613public:
614 //constructor.
615 _nl(_expr *e) :
616 _unary(e)
617 {
618 }
619
620 //parse with whitespace
621 virtual bool parse_non_term(_context &con) const {
622 if (!m_expr->parse_non_term(con)) return false;
623 con.next_line();
624 return true;
625 }
626
627 //parse terminal
628 virtual bool parse_term(_context &con) const {
629 if (!m_expr->parse_term(con)) return false;
630 con.next_line();
631 return true;
632 }
633};
634
635
636//base class for binary expressions
637class _binary : public _expr {
638public:
639 //constructor.
640 _binary(_expr *left, _expr *right) :
641 m_left(left), m_right(right)
642 {
643 }
644
645 //destructor.
646 virtual ~_binary() {
647 delete m_left;
648 delete m_right;
649 }
650
651protected:
652 //left and right expressions
653 _expr *m_left, *m_right;
654};
655
656
657//sequence
658class _seq : public _binary {
659public:
660 //constructor.
661 _seq(_expr *left, _expr *right) :
662 _binary(left, right)
663 {
664 }
665
666 //parse with whitespace
667 virtual bool parse_non_term(_context &con) const {
668 if (!m_left->parse_non_term(con)) return false;
669 return m_right->parse_non_term(con);
670 }
671
672 //parse terminal
673 virtual bool parse_term(_context &con) const {
674 if (!m_left->parse_term(con)) return false;
675 return m_right->parse_term(con);
676 }
677};
678
679
680//choice
681class _choice : public _binary {
682public:
683 //constructor.
684 _choice(_expr *left, _expr *right) :
685 _binary(left, right)
686 {
687 }
688
689 //parse with whitespace
690 virtual bool parse_non_term(_context &con) const {
691 _state st(con);
692 if (m_left->parse_non_term(con)) return true;
693 con.restore(st);
694 return m_right->parse_non_term(con);
695 }
696
697 //parse terminal
698 virtual bool parse_term(_context &con) const {
699 _state st(con);
700 if (m_left->parse_term(con)) return true;
701 con.restore(st);
702 return m_right->parse_term(con);
703 }
704};
705
706
707//reference to rule
708class _ref : public _expr {
709public:
710 //constructor.
711 _ref(rule &r) :
712 m_rule(r)
713 {
714 }
715
716 //parse with whitespace
717 virtual bool parse_non_term(_context &con) const {
718 return con.parse_non_term(m_rule);
719 }
720
721 //parse terminal
722 virtual bool parse_term(_context &con) const {
723 return con.parse_term(m_rule);
724 }
725
726private:
727 //reference
728 rule &m_rule;
729};
730
731
732//eof
733class _eof : public _expr {
734public:
735 //parse with whitespace
736 virtual bool parse_non_term(_context &con) const {
737 return parse_term(con);
738 }
739
740 //parse terminal
741 virtual bool parse_term(_context &con) const {
742 return con.end();
743 }
744};
745
746
747//any
748class _any : public _expr {
749public:
750 //parse with whitespace
751 virtual bool parse_non_term(_context &con) const {
752 return parse_term(con);
753 }
754
755 //parse terminal
756 virtual bool parse_term(_context &con) const {
757 if (!con.end()) {
758 con.next_col();
759 return true;
760 }
761 con.set_error_pos();
762 return false;
763 }
764};
765
766
767//true
768class _true : public _expr {
769public:
770 //parse with whitespace
771 virtual bool parse_non_term(_context &con) const {
772 return true;
773 }
774
775 //parse terminal
776 virtual bool parse_term(_context &con) const {
777 return true;
778 }
779};
780
781
782//false
783class _false: public _expr {
784public:
785 //parse with whitespace
786 virtual bool parse_non_term(_context &con) const {
787 return false;
788 }
789
790 //parse terminal
791 virtual bool parse_term(_context &con) const {
792 return false;
793 }
794};
795
796//exception thrown when left recursion terminates successfully
797struct _lr_ok {
798 rule *m_rule;
799 _lr_ok(rule *r) : m_rule(r) {}
800};
801
802
803//constructor
804_state::_state(_context &con) :
805 m_pos(con.m_pos),
806 m_matches(con.m_matches.size())
807{
808}
809
810
811//parse non-term rule.
812bool _context::parse_non_term(rule &r) {
813 //save the state of the rule
814 rule::_state old_state = r.m_state;
815
816 //success/failure result
817 bool ok;
818
819 //compute the new position
820 size_t new_pos = m_pos.m_it - m_begin;
821
822 //check if we have left recursion
823 bool lr = new_pos == r.m_state.m_pos;
824
825 //update the rule's state
826 r.m_state.m_pos = new_pos;
827
828 //handle the mode of the rule
829 switch (r.m_state.m_mode) {
830 //normal parse
831 case rule::_PARSE:
832 if (lr) {
833 //first try to parse the rule by rejecting it, so alternative branches are examined
834 r.m_state.m_mode = rule::_REJECT;
835 ok = _parse_non_term(r);
836
837 //if the first try is successful, try accepting the rule,
838 //so other elements of the sequence are parsed
839 if (ok) {
840 r.m_state.m_mode = rule::_ACCEPT;
841
842 //loop until no more parsing can be done
843 for(;;) {
844 //store the correct state, in order to backtrack if the call fails
845 _state st(*this);
846
847 //update the rule position to the current position,
848 //because at this state the rule is resolving the left recursion
849 r.m_state.m_pos = m_pos.m_it - m_begin;
850
851 //if parsing fails, restore the last good state and stop
852 if (!_parse_non_term(r)) {
853 restore(st);
854 break;
855 }
856 }
857
858 //since the left recursion was resolved successfully,
859 //return via a non-local exit
860 r.m_state = old_state;
861 throw _lr_ok(r.this_ptr());
862 }
863 }
864 else {
865 try {
866 ok = _parse_non_term(r);
867 }
868 catch (const _lr_ok &ex) {
869 //since left recursions may be mutual, we must test which rule's left recursion
870 //was ended successfully
871 if (ex.m_rule == r.this_ptr()) {
872 ok = true;
873 }
874 else {
875 r.m_state = old_state;
876 throw;
877 }
878 }
879 }
880 break;
881
882 //reject the left recursive rule
883 case rule::_REJECT:
884 if (lr) {
885 ok = false;
886 }
887 else {
888 r.m_state.m_mode = rule::_PARSE;
889 ok = _parse_non_term(r);
890 r.m_state.m_mode = rule::_REJECT;
891 }
892 break;
893
894 //accept the left recursive rule
895 case rule::_ACCEPT:
896 if (lr) {
897 ok = true;
898 }
899 else {
900 r.m_state.m_mode = rule::_PARSE;
901 ok = _parse_non_term(r);
902 r.m_state.m_mode = rule::_ACCEPT;
903 }
904 break;
905 }
906
907 //restore the rule's state
908 r.m_state = old_state;
909
910 return ok;
911}
912
913
914//parse term rule.
915bool _context::parse_term(rule &r) {
916 //save the state of the rule
917 rule::_state old_state = r.m_state;
918
919 //success/failure result
920 bool ok;
921
922 //compute the new position
923 size_t new_pos = m_pos.m_it - m_begin;
924
925 //check if we have left recursion
926 bool lr = new_pos == r.m_state.m_pos;
927
928 //update the rule's state
929 r.m_state.m_pos = new_pos;
930
931 //handle the mode of the rule
932 switch (r.m_state.m_mode) {
933 //normal parse
934 case rule::_PARSE:
935 if (lr) {
936 //first try to parse the rule by rejecting it, so alternative branches are examined
937 r.m_state.m_mode = rule::_REJECT;
938 ok = _parse_term(r);
939
940 //if the first try is successful, try accepting the rule,
941 //so other elements of the sequence are parsed
942 if (ok) {
943 r.m_state.m_mode = rule::_ACCEPT;
944
945 //loop until no more parsing can be done
946 for(;;) {
947 //store the correct state, in order to backtrack if the call fails
948 _state st(*this);
949
950 //update the rule position to the current position,
951 //because at this state the rule is resolving the left recursion
952 r.m_state.m_pos = m_pos.m_it - m_begin;
953
954 //if parsing fails, restore the last good state and stop
955 if (!_parse_term(r)) {
956 restore(st);
957 break;
958 }
959 }
960
961 //since the left recursion was resolved successfully,
962 //return via a non-local exit
963 r.m_state = old_state;
964 throw _lr_ok(r.this_ptr());
965 }
966 }
967 else {
968 try {
969 ok = _parse_term(r);
970 }
971 catch (const _lr_ok &ex) {
972 //since left recursions may be mutual, we must test which rule's left recursion
973 //was ended successfully
974 if (ex.m_rule == r.this_ptr()) {
975 ok = true;
976 }
977 else {
978 r.m_state = old_state;
979 throw;
980 }
981 }
982 }
983 break;
984
985 //reject the left recursive rule
986 case rule::_REJECT:
987 if (lr) {
988 ok = false;
989 }
990 else {
991 r.m_state.m_mode = rule::_PARSE;
992 ok = _parse_term(r);
993 r.m_state.m_mode = rule::_REJECT;
994 }
995 break;
996
997 //accept the left recursive rule
998 case rule::_ACCEPT:
999 if (lr) {
1000 ok = true;
1001 }
1002 else {
1003 r.m_state.m_mode = rule::_PARSE;
1004 ok = _parse_term(r);
1005 r.m_state.m_mode = rule::_ACCEPT;
1006 }
1007 break;
1008 }
1009
1010 //restore the rule's state
1011 r.m_state = old_state;
1012
1013 return ok;
1014}
1015
1016
1017//parse non-term rule internal.
1018bool _context::_parse_non_term(rule &r) {
1019 bool ok;
1020 if (_private::get_parse_proc(r)) {
1021 pos b = m_pos;
1022 ok = _private::get_expr(r)->parse_non_term(*this);
1023 if (ok) {
1024 m_matches.push_back(_match(r.this_ptr(), b, m_pos));
1025 }
1026 }
1027 else {
1028 ok = _private::get_expr(r)->parse_non_term(*this);
1029 }
1030 return ok;
1031}
1032
1033
1034//parse term rule internal.
1035bool _context::_parse_term(rule &r) {
1036 bool ok;
1037 if (_private::get_parse_proc(r)) {
1038 pos b = m_pos;
1039 ok = _private::get_expr(r)->parse_term(*this);
1040 if (ok) {
1041 m_matches.push_back(_match(r.this_ptr(), b, m_pos));
1042 }
1043 }
1044 else {
1045 ok = _private::get_expr(r)->parse_term(*this);
1046 }
1047 return ok;
1048}
1049
1050
1051//get syntax error
1052static error _syntax_error(_context &con) {
1053 return error(con.m_error_pos, con.m_error_pos, ERROR_SYNTAX_ERROR);
1054}
1055
1056
1057//get eof error
1058static error _eof_error(_context &con) {
1059 return error(con.m_error_pos, con.m_error_pos, ERROR_INVALID_EOF);
1060}
1061
1062
1063/** constructor from input.
1064 @param i input.
1065 */
1066pos::pos(input &i) :
1067 m_it(i.begin()),
1068 m_line(1),
1069 m_col(1)
1070{
1071}
1072
1073
1074/** character terminal constructor.
1075 @param c character.
1076 */
1077expr::expr(char c) :
1078 m_expr(new _char(c))
1079{
1080}
1081
1082
1083/** null-terminated string terminal constructor.
1084 @param s null-terminated string.
1085 */
1086expr::expr(const char *s) :
1087 m_expr(new _string(s))
1088{
1089}
1090
1091
1092/** rule reference constructor.
1093 @param r rule.
1094 */
1095expr::expr(rule &r) :
1096 m_expr(new _ref(r))
1097{
1098}
1099
1100
1101/** creates a zero-or-more loop out of this expression.
1102 @return a zero-or-more loop expression.
1103 */
1104expr expr::operator *() const {
1105 return _private::construct_expr(new _loop0(m_expr));
1106}
1107
1108
1109/** creates a one-or-more loop out of this expression.
1110 @return a one-or-more loop expression.
1111 */
1112expr expr::operator +() const {
1113 return _private::construct_expr(new _loop1(m_expr));
1114}
1115
1116
1117/** creates an optional out of this expression.
1118 @return an optional expression.
1119 */
1120expr expr::operator -() const {
1121 return _private::construct_expr(new _optional(m_expr));
1122}
1123
1124
1125/** creates an AND-expression.
1126 @return an AND-expression.
1127 */
1128expr expr::operator &() const {
1129 return _private::construct_expr((new _and(m_expr)));
1130}
1131
1132
1133/** creates a NOT-expression.
1134 @return a NOT-expression.
1135 */
1136expr expr::operator !() const {
1137 return _private::construct_expr(new _not(m_expr));
1138}
1139
1140
1141/** constructor.
1142 @param b begin position.
1143 @param e end position.
1144 */
1145input_range::input_range(const pos &b, const pos &e) :
1146m_begin(b),
1147m_end(e) {}
1148
1149
1150/** constructor.
1151 @param b begin position.
1152 @param e end position.
1153 @param t error type.
1154 */
1155error::error(const pos &b, const pos &e, int t) :
1156input_range(b, e),
1157m_type(t) {}
1158
1159
1160/** compare on begin position.
1161 @param e the other error to compare this with.
1162 @return true if this comes before the previous error, false otherwise.
1163 */
1164bool error::operator < (const error &e) const {
1165 return m_begin.m_it < e.m_begin.m_it;
1166}
1167
1168
1169/** character terminal constructor.
1170 @param c character.
1171 */
1172rule::rule(char c) :
1173 m_expr(new _char(c))
1174{
1175 m_parse_proc = _get_parse_proc(this);
1176}
1177
1178
1179/** null-terminated string terminal constructor.
1180 @param s null-terminated string.
1181 */
1182rule::rule(const char *s) :
1183 m_expr(new _string(s))
1184{
1185 m_parse_proc = _get_parse_proc(this);
1186}
1187
1188
1189/** constructor from expression.
1190 @param e expression.
1191 */
1192rule::rule(const expr &e) :
1193 m_expr(_private::get_expr(e))
1194{
1195 m_parse_proc = _get_parse_proc(this);
1196}
1197
1198
1199/** constructor from rule.
1200 @param r rule.
1201 */
1202rule::rule(rule &r) :
1203 m_expr(new _ref(r)),
1204 m_parse_proc(0)
1205{
1206 m_parse_proc = _get_parse_proc(this);
1207}
1208
1209
1210/** invalid constructor from rule (required by gcc).
1211 @param r rule.
1212 @exception std::logic_error always thrown.
1213 */
1214rule::rule(const rule &r) {
1215 throw std::logic_error("invalid operation");
1216}
1217
1218
1219/** deletes the internal object that represents the expression.
1220 */
1221rule::~rule() {
1222 delete m_expr;
1223}
1224
1225
1226/** creates a zero-or-more loop out of this rule.
1227 @return a zero-or-more loop rule.
1228 */
1229expr rule::operator *() {
1230 return _private::construct_expr(new _loop0(new _ref(*this)));
1231}
1232
1233
1234/** creates a one-or-more loop out of this rule.
1235 @return a one-or-more loop rule.
1236 */
1237expr rule::operator +() {
1238 return _private::construct_expr(new _loop1(new _ref(*this)));
1239}
1240
1241
1242/** creates an optional out of this rule.
1243 @return an optional rule.
1244 */
1245expr rule::operator -() {
1246 return _private::construct_expr(new _optional(new _ref(*this)));
1247}
1248
1249
1250/** creates an AND-expression out of this rule.
1251 @return an AND-expression out of this rule.
1252 */
1253expr rule::operator &() {
1254 return _private::construct_expr(new _and(new _ref(*this)));
1255}
1256
1257
1258/** creates a NOT-expression out of this rule.
1259 @return a NOT-expression out of this rule.
1260 */
1261expr rule::operator !() {
1262 return _private::construct_expr(new _not(new _ref(*this)));
1263}
1264
1265
1266/** sets the parse procedure.
1267 @param p procedure.
1268 */
1269void rule::set_parse_proc(parse_proc p) {
1270 assert(p);
1271 m_parse_proc = p;
1272 _parse_proc_map_t& _parse_proc_map = _get_parse_proc_map();
1273 _parse_proc_map[this] = p;
1274}
1275
1276
1277/** creates a sequence of expressions.
1278 @param left left operand.
1279 @param right right operand.
1280 @return an expression which parses a sequence.
1281 */
1282expr operator >> (const expr &left, const expr &right) {
1283 return _private::construct_expr(
1284 new _seq(_private::get_expr(left), _private::get_expr(right)));
1285}
1286
1287
1288/** creates a choice of expressions.
1289 @param left left operand.
1290 @param right right operand.
1291 @return an expression which parses a choice.
1292 */
1293expr operator | (const expr &left, const expr &right) {
1294 return _private::construct_expr(
1295 new _choice(_private::get_expr(left), _private::get_expr(right)));
1296}
1297
1298
1299/** converts a parser expression into a terminal.
1300 @param e expression.
1301 @return an expression which parses a terminal.
1302 */
1303expr term(const expr &e) {
1304 return _private::construct_expr(
1305 new _term(_private::get_expr(e)));
1306}
1307
1308
1309/** creates a set expression from a null-terminated string.
1310 @param s null-terminated string with characters of the set.
1311 @return an expression which parses a single character out of a set.
1312 */
1313expr set(const char *s) {
1314 return _private::construct_expr(new _set(s));
1315}
1316
1317
1318/** creates a range expression.
1319 @param min min character.
1320 @param max max character.
1321 @return an expression which parses a single character out of range.
1322 */
1323expr range(int min, int max) {
1324 return _private::construct_expr(new _set(min, max));
1325}
1326
1327
1328/** creates an expression which increments the line counter
1329 and resets the column counter when the given expression
1330 is parsed successfully; used for newline characters.
1331 @param e expression to wrap into a newline parser.
1332 @return an expression that handles newlines.
1333 */
1334expr nl(const expr &e) {
1335 return _private::construct_expr(new _nl(_private::get_expr(e)));
1336}
1337
1338
1339/** creates an expression which tests for the end of input.
1340 @return an expression that handles the end of input.
1341 */
1342expr eof() {
1343 return _private::construct_expr(new _eof());
1344}
1345
1346
1347/** creates a not expression.
1348 @param e expression.
1349 @return the appropriate expression.
1350 */
1351expr not_(const expr &e) {
1352 return !e;
1353}
1354
1355
1356/** creates an and expression.
1357 @param e expression.
1358 @return the appropriate expression.
1359 */
1360expr and_(const expr &e) {
1361 return &e;
1362}
1363
1364
1365/** creates an expression that parses any character.
1366 @return the appropriate expression.
1367 */
1368expr any() {
1369 return _private::construct_expr(new _any());
1370}
1371
1372
1373/** parsing succeeds without consuming any input.
1374*/
1375expr true_() {
1376 return _private::construct_expr(new _true());
1377}
1378
1379
1380/** parsing fails without consuming any input.
1381*/
1382expr false_() {
1383 return _private::construct_expr(new _false());
1384}
1385
1386
1387/** parse with target expression and let user handle result.
1388*/
1389expr user(const expr &e, const user_handler& handler) {
1390 return _private::construct_expr(new _user(_private::get_expr(e), handler));
1391}
1392
1393
1394/** parses the given input.
1395 The parse procedures of each rule parsed are executed
1396 before this function returns, if parsing succeeds.
1397 @param i input.
1398 @param g root rule of grammar.
1399 @param el list of errors.
1400 @param d user data, passed to the parse procedures.
1401 @return true on parsing success, false on failure.
1402 */
1403bool parse(input &i, rule &g, error_list &el, void *d, void* ud) {
1404 //prepare context
1405 _context con(i, ud);
1406
1407 //parse grammar
1408 if (!con.parse_non_term(g)) {
1409 el.push_back(_syntax_error(con));
1410 return false;
1411 }
1412
1413 //if end is not reached, there was an error
1414 if (!con.end()) {
1415 if (con.m_error_pos.m_it < con.m_end) {
1416 el.push_back(_syntax_error(con));
1417 }
1418 else {
1419 el.push_back(_eof_error(con));
1420 }
1421 return false;
1422 }
1423
1424 //success; execute the parse procedures
1425 con.do_parse_procs(d);
1426 return true;
1427}
1428
1429
1430} //namespace parserlib