aboutsummaryrefslogtreecommitdiff
path: root/MoonParser/parser.cpp
diff options
context:
space:
mode:
authorLi Jin <dragon-fly@qq.com>2017-07-13 16:03:11 +0800
committerLi Jin <dragon-fly@qq.com>2017-07-13 16:03:11 +0800
commitcb906e739f27931e9798510cd83725131ed55209 (patch)
tree52b465c5eb2250dec3ed3d5f02b86db79653b838 /MoonParser/parser.cpp
parent975c3c7dfa032229272c3b225de1127f1605e2d2 (diff)
downloadyuescript-cb906e739f27931e9798510cd83725131ed55209.tar.gz
yuescript-cb906e739f27931e9798510cd83725131ed55209.tar.bz2
yuescript-cb906e739f27931e9798510cd83725131ed55209.zip
rewrite parsing codes with parserlib.
Diffstat (limited to 'MoonParser/parser.cpp')
-rw-r--r--MoonParser/parser.cpp1462
1 files changed, 1462 insertions, 0 deletions
diff --git a/MoonParser/parser.cpp b/MoonParser/parser.cpp
new file mode 100644
index 0000000..5a7da73
--- /dev/null
+++ b/MoonParser/parser.cpp
@@ -0,0 +1,1462 @@
1#include <cstdlib>
2#include <cstring>
3#include <cassert>
4#include <stdexcept>
5#include <unordered_map>
6#include "parser.hpp"
7
8
9namespace parserlib {
10
11
12//internal map from rules to parse procs
13typedef std::unordered_map<rule *, parse_proc> _parse_proc_map_t;
14
15//on exit, it deletes the parse proc map
16static _parse_proc_map_t& _get_parse_proc_map() {
17 static _parse_proc_map_t _parse_proc_map;
18 return _parse_proc_map;
19}
20
21
22//get the parse proc from the map
23static parse_proc _get_parse_proc(rule *r) {
24 _parse_proc_map_t& _parse_proc_map = _get_parse_proc_map();
25 _parse_proc_map_t::iterator it = _parse_proc_map.find(r);
26 if (it == _parse_proc_map.end()) return 0;
27 return it->second;
28}
29
30
31//internal private class that manages access to the public classes' internals.
32class _private {
33public:
34 //get the internal expression object from the expression.
35 static _expr *get_expr(const expr &e) {
36 return e.m_expr;
37 }
38
39 //create new expression from given expression
40 static expr construct_expr(_expr *e) {
41 return e;
42 }
43
44 //get the internal expression object from the rule.
45 static _expr *get_expr(rule &r) {
46 return r.m_expr;
47 }
48
49 //get the internal parse proc from the rule.
50 static parse_proc get_parse_proc(rule &r) {
51 return r.m_parse_proc;
52 }
53};
54
55
56class _context;
57
58
59//parser state
60class _state {
61public:
62 //position
63 pos m_pos;
64
65 //size of match vector
66 size_t m_matches;
67
68 //constructor
69 _state(_context &con);
70};
71
72
73//match
74class _match {
75public:
76 //rule matched
77 rule *m_rule;
78
79 //begin position
80 pos m_begin;
81
82 //end position
83 pos m_end;
84
85 //null constructor
86 _match() {}
87
88 //constructor from parameters
89 _match(rule *r, const pos &b, const pos &e) :
90 m_rule(r),
91 m_begin(b),
92 m_end(e)
93 {
94 }
95};
96
97
98//match vector
99typedef std::vector<_match> _match_vector;
100
101
102//parsing context
103class _context {
104public:
105 //user data
106 void* m_user_data;
107
108 //current position
109 pos m_pos;
110
111 //error position
112 pos m_error_pos;
113
114 //input begin
115 input::iterator m_begin;
116
117 //input end
118 input::iterator m_end;
119
120 //matches
121 _match_vector m_matches;
122
123 //constructor
124 _context(input &i, void* ud) :
125 m_user_data(ud),
126 m_pos(i),
127 m_error_pos(i),
128 m_begin(i.begin()),
129 m_end(i.end())
130 {
131 }
132
133 //check if the end is reached
134 bool end() const {
135 return m_pos.m_it == m_end;
136 }
137
138 //get the current symbol
139 int symbol() const {
140 assert(!end());
141 return *m_pos.m_it;
142 }
143
144 //set the longest possible error
145 void set_error_pos() {
146 if (m_pos.m_it > m_error_pos.m_it) {
147 m_error_pos = m_pos;
148 }
149 }
150
151 //next column
152 void next_col() {
153 ++m_pos.m_it;
154 ++m_pos.m_col;
155 }
156
157 //next line
158 void next_line() {
159 ++m_pos.m_line;
160 m_pos.m_col = 1;
161 }
162
163 //restore the state
164 void restore(const _state &st) {
165 m_pos = st.m_pos;
166 m_matches.resize(st.m_matches);
167 }
168
169 //parse non-term rule.
170 bool parse_non_term(rule &r);
171
172 //parse term rule.
173 bool parse_term(rule &r);
174
175 //execute all the parse procs
176 void do_parse_procs(void *d) const {
177 for(_match_vector::const_iterator it = m_matches.begin();
178 it != m_matches.end();
179 ++it)
180 {
181 const _match &m = *it;
182 parse_proc p = _private::get_parse_proc(*m.m_rule);
183 p(m.m_begin, m.m_end, d);
184 }
185 }
186
187private:
188 //parse non-term rule.
189 bool _parse_non_term(rule &r);
190
191 //parse term rule.
192 bool _parse_term(rule &r);
193};
194
195
196//base class for expressions
197class _expr {
198public:
199 //destructor.
200 virtual ~_expr() {
201 }
202
203 //parse with whitespace
204 virtual bool parse_non_term(_context &con) const = 0;
205
206 //parse terminal
207 virtual bool parse_term(_context &con) const = 0;
208};
209
210
211//single character expression.
212class _char : public _expr {
213public:
214 //constructor.
215 _char(int c) :
216 m_char(c)
217 {
218 }
219
220 //parse with whitespace
221 virtual bool parse_non_term(_context &con) const {
222 return _parse(con);
223 }
224
225 //parse terminal
226 virtual bool parse_term(_context &con) const {
227 return _parse(con);
228 }
229
230private:
231 //character
232 int m_char;
233
234 //internal parse
235 bool _parse(_context &con) const {
236 if (!con.end()) {
237 int ch = con.symbol();
238 if (ch == m_char) {
239 con.next_col();
240 return true;
241 }
242 }
243 con.set_error_pos();
244 return false;
245 }
246};
247
248
249//string expression.
250class _string : public _expr {
251public:
252 //constructor from ansi string.
253 _string(const char *s) :
254 m_string(s, s + strlen(s))
255 {
256 }
257
258 //constructor from wide string.
259 _string(const wchar_t *s) :
260 m_string(s, s + wcslen(s))
261 {
262 }
263
264 //parse with whitespace
265 virtual bool parse_non_term(_context &con) const {
266 return _parse(con);
267 }
268
269 //parse terminal
270 virtual bool parse_term(_context &con) const {
271 return _parse(con);
272 }
273
274private:
275 //string
276 std::vector<int> m_string;
277
278 //parse the string
279 bool _parse(_context &con) const {
280 for(std::vector<int>::const_iterator it = m_string.begin(),
281 end = m_string.end();;)
282 {
283 if (it == end) return true;
284 if (con.end()) break;
285 if (con.symbol() != *it) break;
286 ++it;
287 con.next_col();
288 }
289 con.set_error_pos();
290 return false;
291 }
292};
293
294
295//set expression.
296class _set : public _expr {
297public:
298 //constructor from ansi string.
299 _set(const char *s) {
300 for(; *s; ++s) {
301 _add(*s);
302 }
303 }
304
305 //constructor from wide string.
306 _set(const wchar_t *s) {
307 for(; *s; ++s) {
308 _add(*s);
309 }
310 }
311
312 //constructor from range.
313 _set(int min, int max) {
314 assert(min >= 0);
315 assert(min <= max);
316 m_set.resize((size_t)max + 1U);
317 for(; min <= max; ++min) {
318 m_set[(size_t)min] = true;
319 }
320 }
321
322 //parse with whitespace
323 virtual bool parse_non_term(_context &con) const {
324 return _parse(con);
325 }
326
327 //parse terminal
328 virtual bool parse_term(_context &con) const {
329 return _parse(con);
330 }
331
332private:
333 //set is kept as an array of flags, for quick access
334 std::vector<bool> m_set;
335
336 //add character
337 void _add(size_t i) {
338 if (i >= m_set.size()) {
339 m_set.resize(i + 1);
340 }
341 m_set[i] = true;
342 }
343
344 //internal parse
345 bool _parse(_context &con) const {
346 if (!con.end()) {
347 size_t ch = con.symbol();
348 if (ch < m_set.size() && m_set[ch]) {
349 con.next_col();
350 return true;
351 }
352 }
353 con.set_error_pos();
354 return false;
355 }
356};
357
358
359//base class for unary expressions
360class _unary : public _expr {
361public:
362 //constructor.
363 _unary(_expr *e) :
364 m_expr(e)
365 {
366 }
367
368 //destructor.
369 virtual ~_unary() {
370 delete m_expr;
371 }
372
373protected:
374 //expression
375 _expr *m_expr;
376};
377
378
379//terminal
380class _term : public _unary {
381public:
382 //constructor.
383 _term(_expr *e) :
384 _unary(e)
385 {
386 }
387
388 //parse with whitespace
389 virtual bool parse_non_term(_context &con) const {
390 return m_expr->parse_term(con);
391 }
392
393 //parse terminal
394 virtual bool parse_term(_context &con) const {
395 return m_expr->parse_term(con);
396 }
397};
398
399
400//user
401class _user : public _unary {
402public:
403 //constructor.
404 _user(_expr *e, const user_handler &callback) :
405 _unary(e),
406 m_handler(callback)
407 {
408 }
409
410 //parse with whitespace
411 virtual bool parse_non_term(_context &con) const {
412 pos pos = con.m_pos;
413 if (m_expr->parse_non_term(con)) {
414 item_t item = {pos.m_it, con.m_pos.m_it, con.m_user_data};
415 return m_handler(item);
416 }
417 return false;
418 }
419
420 //parse terminal
421 virtual bool parse_term(_context &con) const {
422 pos pos = con.m_pos;
423 if (m_expr->parse_term(con)) {
424 item_t item = {pos.m_it, con.m_pos.m_it, con.m_user_data};
425 return m_handler(item);
426 }
427 return false;
428 }
429private:
430 user_handler m_handler;
431};
432
433
434//loop 0
435class _loop0 : public _unary {
436public:
437 //constructor.
438 _loop0(_expr *e) :
439 _unary(e)
440 {
441 }
442
443 //parse with whitespace
444 virtual bool parse_non_term(_context &con) const {
445 //if parsing of the first fails, restore the context and stop
446 _state st(con);
447 if (!m_expr->parse_non_term(con)) {
448 con.restore(st);
449 return true;
450 }
451
452 //parse the rest
453 for(;;) {
454 _state st(con);
455 if (!m_expr->parse_non_term(con)) {
456 con.restore(st);
457 break;
458 }
459 }
460
461 return true;
462 }
463
464 //parse terminal
465 virtual bool parse_term(_context &con) const {
466 //if parsing of the first fails, restore the context and stop
467 _state st(con);
468 if (!m_expr->parse_term(con)) {
469 con.restore(st);
470 return true;
471 }
472
473 //parse the rest until no more parsing is possible
474 for(;;) {
475 _state st(con);
476 if (!m_expr->parse_term(con)) {
477 con.restore(st);
478 break;
479 }
480 }
481
482 return true;
483 }
484};
485
486
487//loop 1
488class _loop1 : public _unary {
489public:
490 //constructor.
491 _loop1(_expr *e) :
492 _unary(e)
493 {
494 }
495
496 //parse with whitespace
497 virtual bool parse_non_term(_context &con) const {
498 //parse the first; if the first fails, stop
499 if (!m_expr->parse_non_term(con)) return false;
500
501 //parse the rest until no more parsing is possible
502 for(;;) {
503 _state st(con);
504 if (!m_expr->parse_non_term(con)) {
505 con.restore(st);
506 break;
507 }
508 }
509
510 return true;
511 }
512
513 //parse terminal
514 virtual bool parse_term(_context &con) const {
515 //parse the first; if the first fails, stop
516 if (!m_expr->parse_term(con)) return false;
517
518 //parse the rest until no more parsing is possible
519 for(;;) {
520 _state st(con);
521 if (!m_expr->parse_term(con)) {
522 con.restore(st);
523 break;
524 }
525 }
526
527 return true;
528 }
529};
530
531
532//optional
533class _optional : public _unary {
534public:
535 //constructor.
536 _optional(_expr *e) :
537 _unary(e)
538 {
539 }
540
541 //parse with whitespace
542 virtual bool parse_non_term(_context &con) const {
543 _state st(con);
544 if (!m_expr->parse_non_term(con)) con.restore(st);
545 return true;
546 }
547
548 //parse terminal
549 virtual bool parse_term(_context &con) const {
550 _state st(con);
551 if (!m_expr->parse_term(con)) con.restore(st);
552 return true;
553 }
554};
555
556
557//and
558class _and : public _unary {
559public:
560 //constructor.
561 _and(_expr *e) :
562 _unary(e)
563 {
564 }
565
566 //parse with whitespace
567 virtual bool parse_non_term(_context &con) const {
568 _state st(con);
569 bool ok = m_expr->parse_non_term(con);
570 con.restore(st);
571 return ok;
572 }
573
574 //parse terminal
575 virtual bool parse_term(_context &con) const {
576 _state st(con);
577 bool ok = m_expr->parse_term(con);
578 con.restore(st);
579 return ok;
580 }
581};
582
583
584//not
585class _not : public _unary {
586public:
587 //constructor.
588 _not(_expr *e) :
589 _unary(e)
590 {
591 }
592
593 //parse with whitespace
594 virtual bool parse_non_term(_context &con) const {
595 _state st(con);
596 bool ok = !m_expr->parse_non_term(con);
597 con.restore(st);
598 return ok;
599 }
600
601 //parse terminal
602 virtual bool parse_term(_context &con) const {
603 _state st(con);
604 bool ok = !m_expr->parse_term(con);
605 con.restore(st);
606 return ok;
607 }
608};
609
610
611//newline
612class _nl : public _unary {
613public:
614 //constructor.
615 _nl(_expr *e) :
616 _unary(e)
617 {
618 }
619
620 //parse with whitespace
621 virtual bool parse_non_term(_context &con) const {
622 if (!m_expr->parse_non_term(con)) return false;
623 con.next_line();
624 return true;
625 }
626
627 //parse terminal
628 virtual bool parse_term(_context &con) const {
629 if (!m_expr->parse_term(con)) return false;
630 con.next_line();
631 return true;
632 }
633};
634
635
636//base class for binary expressions
637class _binary : public _expr {
638public:
639 //constructor.
640 _binary(_expr *left, _expr *right) :
641 m_left(left), m_right(right)
642 {
643 }
644
645 //destructor.
646 virtual ~_binary() {
647 delete m_left;
648 delete m_right;
649 }
650
651protected:
652 //left and right expressions
653 _expr *m_left, *m_right;
654};
655
656
657//sequence
658class _seq : public _binary {
659public:
660 //constructor.
661 _seq(_expr *left, _expr *right) :
662 _binary(left, right)
663 {
664 }
665
666 //parse with whitespace
667 virtual bool parse_non_term(_context &con) const {
668 if (!m_left->parse_non_term(con)) return false;
669 return m_right->parse_non_term(con);
670 }
671
672 //parse terminal
673 virtual bool parse_term(_context &con) const {
674 if (!m_left->parse_term(con)) return false;
675 return m_right->parse_term(con);
676 }
677};
678
679
680//choice
681class _choice : public _binary {
682public:
683 //constructor.
684 _choice(_expr *left, _expr *right) :
685 _binary(left, right)
686 {
687 }
688
689 //parse with whitespace
690 virtual bool parse_non_term(_context &con) const {
691 _state st(con);
692 if (m_left->parse_non_term(con)) return true;
693 con.restore(st);
694 return m_right->parse_non_term(con);
695 }
696
697 //parse terminal
698 virtual bool parse_term(_context &con) const {
699 _state st(con);
700 if (m_left->parse_term(con)) return true;
701 con.restore(st);
702 return m_right->parse_term(con);
703 }
704};
705
706
707//reference to rule
708class _ref : public _expr {
709public:
710 //constructor.
711 _ref(rule &r) :
712 m_rule(r)
713 {
714 }
715
716 //parse with whitespace
717 virtual bool parse_non_term(_context &con) const {
718 return con.parse_non_term(m_rule);
719 }
720
721 //parse terminal
722 virtual bool parse_term(_context &con) const {
723 return con.parse_term(m_rule);
724 }
725
726private:
727 //reference
728 rule &m_rule;
729};
730
731
732//eof
733class _eof : public _expr {
734public:
735 //parse with whitespace
736 virtual bool parse_non_term(_context &con) const {
737 return parse_term(con);
738 }
739
740 //parse terminal
741 virtual bool parse_term(_context &con) const {
742 return con.end();
743 }
744};
745
746
747//any
748class _any : public _expr {
749public:
750 //parse with whitespace
751 virtual bool parse_non_term(_context &con) const {
752 return parse_term(con);
753 }
754
755 //parse terminal
756 virtual bool parse_term(_context &con) const {
757 if (!con.end()) {
758 con.next_col();
759 return true;
760 }
761 con.set_error_pos();
762 return false;
763 }
764};
765
766
767//true
768class _true : public _expr {
769public:
770 //parse with whitespace
771 virtual bool parse_non_term(_context &con) const {
772 return true;
773 }
774
775 //parse terminal
776 virtual bool parse_term(_context &con) const {
777 return true;
778 }
779};
780
781
782//false
783class _false: public _expr {
784public:
785 //parse with whitespace
786 virtual bool parse_non_term(_context &con) const {
787 return false;
788 }
789
790 //parse terminal
791 virtual bool parse_term(_context &con) const {
792 return false;
793 }
794};
795
796//exception thrown when left recursion terminates successfully
797struct _lr_ok {
798 rule *m_rule;
799 _lr_ok(rule *r) : m_rule(r) {}
800};
801
802
803//constructor
804_state::_state(_context &con) :
805 m_pos(con.m_pos),
806 m_matches(con.m_matches.size())
807{
808}
809
810
811//parse non-term rule.
812bool _context::parse_non_term(rule &r) {
813 //save the state of the rule
814 rule::_state old_state = r.m_state;
815
816 //success/failure result
817 bool ok;
818
819 //compute the new position
820 size_t new_pos = m_pos.m_it - m_begin;
821
822 //check if we have left recursion
823 bool lr = new_pos == r.m_state.m_pos;
824
825 //update the rule's state
826 r.m_state.m_pos = new_pos;
827
828 //handle the mode of the rule
829 switch (r.m_state.m_mode) {
830 //normal parse
831 case rule::_PARSE:
832 if (lr) {
833 //first try to parse the rule by rejecting it, so alternative branches are examined
834 r.m_state.m_mode = rule::_REJECT;
835 ok = _parse_non_term(r);
836
837 //if the first try is successful, try accepting the rule,
838 //so other elements of the sequence are parsed
839 if (ok) {
840 r.m_state.m_mode = rule::_ACCEPT;
841
842 //loop until no more parsing can be done
843 for(;;) {
844 //store the correct state, in order to backtrack if the call fails
845 _state st(*this);
846
847 //update the rule position to the current position,
848 //because at this state the rule is resolving the left recursion
849 r.m_state.m_pos = m_pos.m_it - m_begin;
850
851 //if parsing fails, restore the last good state and stop
852 if (!_parse_non_term(r)) {
853 restore(st);
854 break;
855 }
856 }
857
858 //since the left recursion was resolved successfully,
859 //return via a non-local exit
860 r.m_state = old_state;
861 throw _lr_ok(r.this_ptr());
862 }
863 }
864 else {
865 try {
866 ok = _parse_non_term(r);
867 }
868 catch (const _lr_ok &ex) {
869 //since left recursions may be mutual, we must test which rule's left recursion
870 //was ended successfully
871 if (ex.m_rule == r.this_ptr()) {
872 ok = true;
873 }
874 else {
875 r.m_state = old_state;
876 throw;
877 }
878 }
879 }
880 break;
881
882 //reject the left recursive rule
883 case rule::_REJECT:
884 if (lr) {
885 ok = false;
886 }
887 else {
888 r.m_state.m_mode = rule::_PARSE;
889 ok = _parse_non_term(r);
890 r.m_state.m_mode = rule::_REJECT;
891 }
892 break;
893
894 //accept the left recursive rule
895 case rule::_ACCEPT:
896 if (lr) {
897 ok = true;
898 }
899 else {
900 r.m_state.m_mode = rule::_PARSE;
901 ok = _parse_non_term(r);
902 r.m_state.m_mode = rule::_ACCEPT;
903 }
904 break;
905 }
906
907 //restore the rule's state
908 r.m_state = old_state;
909
910 return ok;
911}
912
913
914//parse term rule.
915bool _context::parse_term(rule &r) {
916 //save the state of the rule
917 rule::_state old_state = r.m_state;
918
919 //success/failure result
920 bool ok;
921
922 //compute the new position
923 size_t new_pos = m_pos.m_it - m_begin;
924
925 //check if we have left recursion
926 bool lr = new_pos == r.m_state.m_pos;
927
928 //update the rule's state
929 r.m_state.m_pos = new_pos;
930
931 //handle the mode of the rule
932 switch (r.m_state.m_mode) {
933 //normal parse
934 case rule::_PARSE:
935 if (lr) {
936 //first try to parse the rule by rejecting it, so alternative branches are examined
937 r.m_state.m_mode = rule::_REJECT;
938 ok = _parse_term(r);
939
940 //if the first try is successful, try accepting the rule,
941 //so other elements of the sequence are parsed
942 if (ok) {
943 r.m_state.m_mode = rule::_ACCEPT;
944
945 //loop until no more parsing can be done
946 for(;;) {
947 //store the correct state, in order to backtrack if the call fails
948 _state st(*this);
949
950 //update the rule position to the current position,
951 //because at this state the rule is resolving the left recursion
952 r.m_state.m_pos = m_pos.m_it - m_begin;
953
954 //if parsing fails, restore the last good state and stop
955 if (!_parse_term(r)) {
956 restore(st);
957 break;
958 }
959 }
960
961 //since the left recursion was resolved successfully,
962 //return via a non-local exit
963 r.m_state = old_state;
964 throw _lr_ok(r.this_ptr());
965 }
966 }
967 else {
968 try {
969 ok = _parse_term(r);
970 }
971 catch (const _lr_ok &ex) {
972 //since left recursions may be mutual, we must test which rule's left recursion
973 //was ended successfully
974 if (ex.m_rule == r.this_ptr()) {
975 ok = true;
976 }
977 else {
978 r.m_state = old_state;
979 throw;
980 }
981 }
982 }
983 break;
984
985 //reject the left recursive rule
986 case rule::_REJECT:
987 if (lr) {
988 ok = false;
989 }
990 else {
991 r.m_state.m_mode = rule::_PARSE;
992 ok = _parse_term(r);
993 r.m_state.m_mode = rule::_REJECT;
994 }
995 break;
996
997 //accept the left recursive rule
998 case rule::_ACCEPT:
999 if (lr) {
1000 ok = true;
1001 }
1002 else {
1003 r.m_state.m_mode = rule::_PARSE;
1004 ok = _parse_term(r);
1005 r.m_state.m_mode = rule::_ACCEPT;
1006 }
1007 break;
1008 }
1009
1010 //restore the rule's state
1011 r.m_state = old_state;
1012
1013 return ok;
1014}
1015
1016
1017//parse non-term rule internal.
1018bool _context::_parse_non_term(rule &r) {
1019 bool ok;
1020 if (_private::get_parse_proc(r)) {
1021 pos b = m_pos;
1022 ok = _private::get_expr(r)->parse_non_term(*this);
1023 if (ok) {
1024 m_matches.push_back(_match(r.this_ptr(), b, m_pos));
1025 }
1026 }
1027 else {
1028 ok = _private::get_expr(r)->parse_non_term(*this);
1029 }
1030 return ok;
1031}
1032
1033
1034//parse term rule internal.
1035bool _context::_parse_term(rule &r) {
1036 bool ok;
1037 if (_private::get_parse_proc(r)) {
1038 pos b = m_pos;
1039 ok = _private::get_expr(r)->parse_term(*this);
1040 if (ok) {
1041 m_matches.push_back(_match(r.this_ptr(), b, m_pos));
1042 }
1043 }
1044 else {
1045 ok = _private::get_expr(r)->parse_term(*this);
1046 }
1047 return ok;
1048}
1049
1050
1051//get syntax error
1052static error _syntax_error(_context &con) {
1053 return error(con.m_error_pos, con.m_error_pos, ERROR_SYNTAX_ERROR);
1054}
1055
1056
1057//get eof error
1058static error _eof_error(_context &con) {
1059 return error(con.m_error_pos, con.m_error_pos, ERROR_INVALID_EOF);
1060}
1061
1062
1063/** constructor from input.
1064 @param i input.
1065 */
1066pos::pos(input &i) :
1067 m_it(i.begin()),
1068 m_line(1),
1069 m_col(1)
1070{
1071}
1072
1073
1074/** character terminal constructor.
1075 @param c character.
1076 */
1077expr::expr(int c) :
1078 m_expr(new _char(c))
1079{
1080}
1081
1082
1083/** null-terminated string terminal constructor.
1084 @param s null-terminated string.
1085 */
1086expr::expr(const char *s) :
1087 m_expr(new _string(s))
1088{
1089}
1090
1091
1092/** null-terminated wide string terminal constructor.
1093 @param s null-terminated string.
1094 */
1095expr::expr(const wchar_t *s) :
1096 m_expr(new _string(s))
1097{
1098}
1099
1100
1101/** rule reference constructor.
1102 @param r rule.
1103 */
1104expr::expr(rule &r) :
1105 m_expr(new _ref(r))
1106{
1107}
1108
1109
1110/** creates a zero-or-more loop out of this expression.
1111 @return a zero-or-more loop expression.
1112 */
1113expr expr::operator *() const {
1114 return _private::construct_expr(new _loop0(m_expr));
1115}
1116
1117
1118/** creates a one-or-more loop out of this expression.
1119 @return a one-or-more loop expression.
1120 */
1121expr expr::operator +() const {
1122 return _private::construct_expr(new _loop1(m_expr));
1123}
1124
1125
1126/** creates an optional out of this expression.
1127 @return an optional expression.
1128 */
1129expr expr::operator -() const {
1130 return _private::construct_expr(new _optional(m_expr));
1131}
1132
1133
1134/** creates an AND-expression.
1135 @return an AND-expression.
1136 */
1137expr expr::operator &() const {
1138 return _private::construct_expr((new _and(m_expr)));
1139}
1140
1141
1142/** creates a NOT-expression.
1143 @return a NOT-expression.
1144 */
1145expr expr::operator !() const {
1146 return _private::construct_expr(new _not(m_expr));
1147}
1148
1149
1150/** constructor.
1151 @param b begin position.
1152 @param e end position.
1153 */
1154input_range::input_range(const pos &b, const pos &e) :
1155 m_begin(b),
1156 m_end(e)
1157{
1158}
1159
1160
1161/** constructor.
1162 @param b begin position.
1163 @param e end position.
1164 @param t error type.
1165 */
1166error::error(const pos &b, const pos &e, int t) :
1167 input_range(b, e),
1168 m_type(t)
1169{
1170}
1171
1172
1173/** compare on begin position.
1174 @param e the other error to compare this with.
1175 @return true if this comes before the previous error, false otherwise.
1176 */
1177bool error::operator < (const error &e) const {
1178 return m_begin.m_it < e.m_begin.m_it;
1179}
1180
1181
1182/** character terminal constructor.
1183 @param c character.
1184 */
1185rule::rule(int c) :
1186 m_expr(new _char(c))
1187{
1188 m_parse_proc = _get_parse_proc(this);
1189}
1190
1191
1192/** null-terminated string terminal constructor.
1193 @param s null-terminated string.
1194 */
1195rule::rule(const char *s) :
1196 m_expr(new _string(s))
1197{
1198 m_parse_proc = _get_parse_proc(this);
1199}
1200
1201
1202/** null-terminated wide string terminal constructor.
1203 @param s null-terminated string.
1204 */
1205rule::rule(const wchar_t *s) :
1206 m_expr(new _string(s))
1207{
1208 m_parse_proc = _get_parse_proc(this);
1209}
1210
1211
1212/** constructor from expression.
1213 @param e expression.
1214 */
1215rule::rule(const expr &e) :
1216 m_expr(_private::get_expr(e))
1217{
1218 m_parse_proc = _get_parse_proc(this);
1219}
1220
1221
1222/** constructor from rule.
1223 @param r rule.
1224 */
1225rule::rule(rule &r) :
1226 m_expr(new _ref(r)),
1227 m_parse_proc(0)
1228{
1229 m_parse_proc = _get_parse_proc(this);
1230}
1231
1232
1233/** invalid constructor from rule (required by gcc).
1234 @param r rule.
1235 @exception std::logic_error always thrown.
1236 */
1237rule::rule(const rule &r) {
1238 throw std::logic_error("invalid operation");
1239}
1240
1241
1242/** deletes the internal object that represents the expression.
1243 */
1244rule::~rule() {
1245 delete m_expr;
1246}
1247
1248
1249/** creates a zero-or-more loop out of this rule.
1250 @return a zero-or-more loop rule.
1251 */
1252expr rule::operator *() {
1253 return _private::construct_expr(new _loop0(new _ref(*this)));
1254}
1255
1256
1257/** creates a one-or-more loop out of this rule.
1258 @return a one-or-more loop rule.
1259 */
1260expr rule::operator +() {
1261 return _private::construct_expr(new _loop1(new _ref(*this)));
1262}
1263
1264
1265/** creates an optional out of this rule.
1266 @return an optional rule.
1267 */
1268expr rule::operator -() {
1269 return _private::construct_expr(new _optional(new _ref(*this)));
1270}
1271
1272
1273/** creates an AND-expression out of this rule.
1274 @return an AND-expression out of this rule.
1275 */
1276expr rule::operator &() {
1277 return _private::construct_expr(new _and(new _ref(*this)));
1278}
1279
1280
1281/** creates a NOT-expression out of this rule.
1282 @return a NOT-expression out of this rule.
1283 */
1284expr rule::operator !() {
1285 return _private::construct_expr(new _not(new _ref(*this)));
1286}
1287
1288
1289/** sets the parse procedure.
1290 @param p procedure.
1291 */
1292void rule::set_parse_proc(parse_proc p) {
1293 assert(p);
1294 m_parse_proc = p;
1295 _parse_proc_map_t& _parse_proc_map = _get_parse_proc_map();
1296 _parse_proc_map[this] = p;
1297}
1298
1299
1300/** creates a sequence of expressions.
1301 @param left left operand.
1302 @param right right operand.
1303 @return an expression which parses a sequence.
1304 */
1305expr operator >> (const expr &left, const expr &right) {
1306 return _private::construct_expr(
1307 new _seq(_private::get_expr(left), _private::get_expr(right)));
1308}
1309
1310
1311/** creates a choice of expressions.
1312 @param left left operand.
1313 @param right right operand.
1314 @return an expression which parses a choice.
1315 */
1316expr operator | (const expr &left, const expr &right) {
1317 return _private::construct_expr(
1318 new _choice(_private::get_expr(left), _private::get_expr(right)));
1319}
1320
1321
1322/** converts a parser expression into a terminal.
1323 @param e expression.
1324 @return an expression which parses a terminal.
1325 */
1326expr term(const expr &e) {
1327 return _private::construct_expr(
1328 new _term(_private::get_expr(e)));
1329}
1330
1331
1332/** creates a set expression from a null-terminated string.
1333 @param s null-terminated string with characters of the set.
1334 @return an expression which parses a single character out of a set.
1335 */
1336expr set(const char *s) {
1337 return _private::construct_expr(new _set(s));
1338}
1339
1340
1341/** creates a set expression from a null-terminated wide string.
1342 @param s null-terminated string with characters of the set.
1343 @return an expression which parses a single character out of a set.
1344 */
1345expr set(const wchar_t *s) {
1346 return _private::construct_expr(new _set(s));
1347}
1348
1349
1350/** creates a range expression.
1351 @param min min character.
1352 @param max max character.
1353 @return an expression which parses a single character out of range.
1354 */
1355expr range(int min, int max) {
1356 return _private::construct_expr(new _set(min, max));
1357}
1358
1359
1360/** creates an expression which increments the line counter
1361 and resets the column counter when the given expression
1362 is parsed successfully; used for newline characters.
1363 @param e expression to wrap into a newline parser.
1364 @return an expression that handles newlines.
1365 */
1366expr nl(const expr &e) {
1367 return _private::construct_expr(new _nl(_private::get_expr(e)));
1368}
1369
1370
1371/** creates an expression which tests for the end of input.
1372 @return an expression that handles the end of input.
1373 */
1374expr eof() {
1375 return _private::construct_expr(new _eof());
1376}
1377
1378
1379/** creates a not expression.
1380 @param e expression.
1381 @return the appropriate expression.
1382 */
1383expr not_(const expr &e) {
1384 return !e;
1385}
1386
1387
1388/** creates an and expression.
1389 @param e expression.
1390 @return the appropriate expression.
1391 */
1392expr and_(const expr &e) {
1393 return &e;
1394}
1395
1396
1397/** creates an expression that parses any character.
1398 @return the appropriate expression.
1399 */
1400expr any() {
1401 return _private::construct_expr(new _any());
1402}
1403
1404
1405/** parsing succeeds without consuming any input.
1406*/
1407expr true_() {
1408 return _private::construct_expr(new _true());
1409}
1410
1411
1412/** parsing fails without consuming any input.
1413*/
1414expr false_() {
1415 return _private::construct_expr(new _false());
1416}
1417
1418
1419/** parse with target expression and let user handle result.
1420*/
1421expr user(const expr &e, const user_handler& handler) {
1422 return _private::construct_expr(new _user(_private::get_expr(e), handler));
1423}
1424
1425
1426/** parses the given input.
1427 The parse procedures of each rule parsed are executed
1428 before this function returns, if parsing succeeds.
1429 @param i input.
1430 @param g root rule of grammar.
1431 @param el list of errors.
1432 @param d user data, passed to the parse procedures.
1433 @return true on parsing success, false on failure.
1434 */
1435bool parse(input &i, rule &g, error_list &el, void *d, void* ud) {
1436 //prepare context
1437 _context con(i, ud);
1438
1439 //parse grammar
1440 if (!con.parse_non_term(g)) {
1441 el.push_back(_syntax_error(con));
1442 return false;
1443 }
1444
1445 //if end is not reached, there was an error
1446 if (!con.end()) {
1447 if (con.m_error_pos.m_it < con.m_end) {
1448 el.push_back(_syntax_error(con));
1449 }
1450 else {
1451 el.push_back(_eof_error(con));
1452 }
1453 return false;
1454 }
1455
1456 //success; execute the parse procedures
1457 con.do_parse_procs(d);
1458 return true;
1459}
1460
1461
1462} //namespace parserlib