aboutsummaryrefslogtreecommitdiff
path: root/src/MoonP/parser.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/MoonP/parser.cpp')
-rw-r--r--src/MoonP/parser.cpp1439
1 files changed, 1439 insertions, 0 deletions
diff --git a/src/MoonP/parser.cpp b/src/MoonP/parser.cpp
new file mode 100644
index 0000000..19c0068
--- /dev/null
+++ b/src/MoonP/parser.cpp
@@ -0,0 +1,1439 @@
1/* Copyright (c) 2012, Achilleas Margaritis, modified by Jin Li
2All rights reserved.
3
4 Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
5
6 Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
7 Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
8
9THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
10
11#include <cstdlib>
12#include <cstring>
13#include <cassert>
14#include <stdexcept>
15#include <unordered_map>
16#include <unordered_set>
17
18#include "MoonP/parser.hpp"
19
20
21namespace parserlib {
22
23
24//internal map from rules to parse procs
25typedef std::unordered_map<rule *, parse_proc> _parse_proc_map_t;
26
27//on exit, it deletes the parse proc map
28static _parse_proc_map_t& _get_parse_proc_map() {
29 static _parse_proc_map_t _parse_proc_map;
30 return _parse_proc_map;
31}
32
33
34//get the parse proc from the map
35static parse_proc _get_parse_proc(rule *r) {
36 _parse_proc_map_t& _parse_proc_map = _get_parse_proc_map();
37 _parse_proc_map_t::iterator it = _parse_proc_map.find(r);
38 if (it == _parse_proc_map.end()) return 0;
39 return it->second;
40}
41
42
43//internal private class that manages access to the public classes' internals.
44class _private {
45public:
46 //get the internal expression object from the expression.
47 static _expr *get_expr(const expr &e) {
48 return e.m_expr;
49 }
50
51 //create new expression from given expression
52 static expr construct_expr(_expr *e) {
53 return e;
54 }
55
56 //get the internal expression object from the rule.
57 static _expr *get_expr(rule &r) {
58 return r.m_expr;
59 }
60
61 //get the internal parse proc from the rule.
62 static parse_proc get_parse_proc(rule &r) {
63 return r.m_parse_proc;
64 }
65};
66
67
68class _context;
69
70
71//parser state
72class _state {
73public:
74 //position
75 pos m_pos;
76
77 //size of match vector
78 size_t m_matches;
79
80 //constructor
81 _state(_context &con);
82};
83
84
85//match
86class _match {
87public:
88 //rule matched
89 rule *m_rule;
90
91 //begin position
92 pos m_begin;
93
94 //end position
95 pos m_end;
96
97 //null constructor
98 _match() {}
99
100 //constructor from parameters
101 _match(rule *r, const pos &b, const pos &e) :
102 m_rule(r),
103 m_begin(b),
104 m_end(e)
105 {
106 }
107};
108
109
110//match vector
111typedef std::vector<_match> _match_vector;
112
113
114//parsing context
115class _context {
116public:
117 //user data
118 void* m_user_data;
119
120 //current position
121 pos m_pos;
122
123 //error position
124 pos m_error_pos;
125
126 //input begin
127 input::iterator m_begin;
128
129 //input end
130 input::iterator m_end;
131
132 //matches
133 _match_vector m_matches;
134
135 //constructor
136 _context(input &i, void* ud) :
137 m_user_data(ud),
138 m_pos(i),
139 m_error_pos(i),
140 m_begin(i.begin()),
141 m_end(i.end())
142 {
143 }
144
145 //check if the end is reached
146 bool end() const {
147 return m_pos.m_it == m_end;
148 }
149
150 //get the current symbol
151 input::value_type symbol() const {
152 assert(!end());
153 return *m_pos.m_it;
154 }
155
156 //set the longest possible error
157 void set_error_pos() {
158 if (m_pos.m_it > m_error_pos.m_it) {
159 m_error_pos = m_pos;
160 }
161 }
162
163 //next column
164 void next_col() {
165 ++m_pos.m_it;
166 ++m_pos.m_col;
167 }
168
169 //next line
170 void next_line() {
171 ++m_pos.m_line;
172 m_pos.m_col = 1;
173 }
174
175 //restore the state
176 void restore(const _state &st) {
177 m_pos = st.m_pos;
178 m_matches.resize(st.m_matches);
179 }
180
181 //parse non-term rule.
182 bool parse_non_term(rule &r);
183
184 //parse term rule.
185 bool parse_term(rule &r);
186
187 //execute all the parse procs
188 void do_parse_procs(void *d) const {
189 for(_match_vector::const_iterator it = m_matches.begin();
190 it != m_matches.end();
191 ++it)
192 {
193 const _match &m = *it;
194 parse_proc p = _private::get_parse_proc(*m.m_rule);
195 p(m.m_begin, m.m_end, d);
196 }
197 }
198
199private:
200 //parse non-term rule.
201 bool _parse_non_term(rule &r);
202
203 //parse term rule.
204 bool _parse_term(rule &r);
205};
206
207
208//base class for expressions
209class _expr {
210public:
211 //destructor.
212 virtual ~_expr() {
213 }
214
215 //parse with whitespace
216 virtual bool parse_non_term(_context &con) const = 0;
217
218 //parse terminal
219 virtual bool parse_term(_context &con) const = 0;
220};
221
222
223//single character expression.
224class _char : public _expr {
225public:
226 //constructor.
227 _char(char c) :
228 m_char(c)
229 {
230 }
231
232 //parse with whitespace
233 virtual bool parse_non_term(_context &con) const {
234 return _parse(con);
235 }
236
237 //parse terminal
238 virtual bool parse_term(_context &con) const {
239 return _parse(con);
240 }
241
242private:
243 //character
244 input::value_type m_char;
245
246 //internal parse
247 bool _parse(_context &con) const {
248 if (!con.end()) {
249 input::value_type ch = con.symbol();
250 if (ch == m_char) {
251 con.next_col();
252 return true;
253 }
254 }
255 con.set_error_pos();
256 return false;
257 }
258};
259
260
261//string expression.
262class _string : public _expr {
263public:
264 //constructor from ansi string.
265 _string(const char *s) :
266 m_string(Converter{}.from_bytes(s))
267 {
268 }
269
270 //parse with whitespace
271 virtual bool parse_non_term(_context &con) const {
272 return _parse(con);
273 }
274
275 //parse terminal
276 virtual bool parse_term(_context &con) const {
277 return _parse(con);
278 }
279
280private:
281 //string
282 input m_string;
283
284 //parse the string
285 bool _parse(_context &con) const {
286 for(input::const_iterator it = m_string.begin(),
287 end = m_string.end();;)
288 {
289 if (it == end) return true;
290 if (con.end()) break;
291 if (con.symbol() != *it) break;
292 ++it;
293 con.next_col();
294 }
295 con.set_error_pos();
296 return false;
297 }
298};
299
300
301//set expression.
302class _set : public _expr {
303public:
304 //constructor from ansi string.
305 _set(const char *s) {
306 auto str = Converter{}.from_bytes(s);
307 for (auto ch : str) {
308 _add(ch);
309 }
310 }
311
312 //constructor from range.
313 _set(int min, int max) {
314 assert(min >= 0);
315 assert(min <= max);
316 m_quick_set.resize((size_t)max + 1U);
317 for(; min <= max; ++min) {
318 m_quick_set[(size_t)min] = true;
319 }
320 }
321
322 //parse with whitespace
323 virtual bool parse_non_term(_context &con) const {
324 return _parse(con);
325 }
326
327 //parse terminal
328 virtual bool parse_term(_context &con) const {
329 return _parse(con);
330 }
331
332private:
333 //set is kept as an array of flags, for quick access
334 std::vector<bool> m_quick_set;
335 std::unordered_set<size_t> m_large_set;
336
337 //add character
338 void _add(size_t i) {
339 if (i <= m_quick_set.size() || i <= 255) {
340 if (i >= m_quick_set.size()) {
341 m_quick_set.resize(i + 1);
342 }
343 m_quick_set[i] = true;
344 } else {
345 m_large_set.insert(i);
346 }
347 }
348
349 //internal parse
350 bool _parse(_context &con) const {
351 if (!con.end()) {
352 size_t ch = con.symbol();
353 if (ch < m_quick_set.size()) {
354 if (m_quick_set[ch]) {
355 con.next_col();
356 return true;
357 }
358 } else if (m_large_set.find(ch) != m_large_set.end()) {
359 con.next_col();
360 return true;
361 }
362 }
363 con.set_error_pos();
364 return false;
365 }
366};
367
368
369//base class for unary expressions
370class _unary : public _expr {
371public:
372 //constructor.
373 _unary(_expr *e) :
374 m_expr(e)
375 {
376 }
377
378 //destructor.
379 virtual ~_unary() {
380 delete m_expr;
381 }
382
383protected:
384 //expression
385 _expr *m_expr;
386};
387
388
389//terminal
390class _term : public _unary {
391public:
392 //constructor.
393 _term(_expr *e) :
394 _unary(e)
395 {
396 }
397
398 //parse with whitespace
399 virtual bool parse_non_term(_context &con) const {
400 return m_expr->parse_term(con);
401 }
402
403 //parse terminal
404 virtual bool parse_term(_context &con) const {
405 return m_expr->parse_term(con);
406 }
407};
408
409
410//user
411class _user : public _unary {
412public:
413 //constructor.
414 _user(_expr *e, const user_handler &callback) :
415 _unary(e),
416 m_handler(callback)
417 {
418 }
419
420 //parse with whitespace
421 virtual bool parse_non_term(_context &con) const {
422 pos pos = con.m_pos;
423 if (m_expr->parse_non_term(con)) {
424 item_t item = {pos.m_it, con.m_pos.m_it, con.m_user_data};
425 return m_handler(item);
426 }
427 return false;
428 }
429
430 //parse terminal
431 virtual bool parse_term(_context &con) const {
432 pos pos = con.m_pos;
433 if (m_expr->parse_term(con)) {
434 item_t item = {pos.m_it, con.m_pos.m_it, con.m_user_data};
435 return m_handler(item);
436 }
437 return false;
438 }
439private:
440 user_handler m_handler;
441};
442
443
444//loop 0
445class _loop0 : public _unary {
446public:
447 //constructor.
448 _loop0(_expr *e) :
449 _unary(e)
450 {
451 }
452
453 //parse with whitespace
454 virtual bool parse_non_term(_context &con) const {
455 //if parsing of the first fails, restore the context and stop
456 _state st(con);
457 if (!m_expr->parse_non_term(con)) {
458 con.restore(st);
459 return true;
460 }
461
462 //parse the rest
463 for(;;) {
464 _state st(con);
465 if (!m_expr->parse_non_term(con)) {
466 con.restore(st);
467 break;
468 }
469 }
470
471 return true;
472 }
473
474 //parse terminal
475 virtual bool parse_term(_context &con) const {
476 //if parsing of the first fails, restore the context and stop
477 _state st(con);
478 if (!m_expr->parse_term(con)) {
479 con.restore(st);
480 return true;
481 }
482
483 //parse the rest until no more parsing is possible
484 for(;;) {
485 _state st(con);
486 if (!m_expr->parse_term(con)) {
487 con.restore(st);
488 break;
489 }
490 }
491
492 return true;
493 }
494};
495
496
497//loop 1
498class _loop1 : public _unary {
499public:
500 //constructor.
501 _loop1(_expr *e) :
502 _unary(e)
503 {
504 }
505
506 //parse with whitespace
507 virtual bool parse_non_term(_context &con) const {
508 //parse the first; if the first fails, stop
509 if (!m_expr->parse_non_term(con)) return false;
510
511 //parse the rest until no more parsing is possible
512 for(;;) {
513 _state st(con);
514 if (!m_expr->parse_non_term(con)) {
515 con.restore(st);
516 break;
517 }
518 }
519
520 return true;
521 }
522
523 //parse terminal
524 virtual bool parse_term(_context &con) const {
525 //parse the first; if the first fails, stop
526 if (!m_expr->parse_term(con)) return false;
527
528 //parse the rest until no more parsing is possible
529 for(;;) {
530 _state st(con);
531 if (!m_expr->parse_term(con)) {
532 con.restore(st);
533 break;
534 }
535 }
536
537 return true;
538 }
539};
540
541
542//optional
543class _optional : public _unary {
544public:
545 //constructor.
546 _optional(_expr *e) :
547 _unary(e)
548 {
549 }
550
551 //parse with whitespace
552 virtual bool parse_non_term(_context &con) const {
553 _state st(con);
554 if (!m_expr->parse_non_term(con)) con.restore(st);
555 return true;
556 }
557
558 //parse terminal
559 virtual bool parse_term(_context &con) const {
560 _state st(con);
561 if (!m_expr->parse_term(con)) con.restore(st);
562 return true;
563 }
564};
565
566
567//and
568class _and : public _unary {
569public:
570 //constructor.
571 _and(_expr *e) :
572 _unary(e)
573 {
574 }
575
576 //parse with whitespace
577 virtual bool parse_non_term(_context &con) const {
578 _state st(con);
579 bool ok = m_expr->parse_non_term(con);
580 con.restore(st);
581 return ok;
582 }
583
584 //parse terminal
585 virtual bool parse_term(_context &con) const {
586 _state st(con);
587 bool ok = m_expr->parse_term(con);
588 con.restore(st);
589 return ok;
590 }
591};
592
593
594//not
595class _not : public _unary {
596public:
597 //constructor.
598 _not(_expr *e) :
599 _unary(e)
600 {
601 }
602
603 //parse with whitespace
604 virtual bool parse_non_term(_context &con) const {
605 _state st(con);
606 bool ok = !m_expr->parse_non_term(con);
607 con.restore(st);
608 return ok;
609 }
610
611 //parse terminal
612 virtual bool parse_term(_context &con) const {
613 _state st(con);
614 bool ok = !m_expr->parse_term(con);
615 con.restore(st);
616 return ok;
617 }
618};
619
620
621//newline
622class _nl : public _unary {
623public:
624 //constructor.
625 _nl(_expr *e) :
626 _unary(e)
627 {
628 }
629
630 //parse with whitespace
631 virtual bool parse_non_term(_context &con) const {
632 if (!m_expr->parse_non_term(con)) return false;
633 con.next_line();
634 return true;
635 }
636
637 //parse terminal
638 virtual bool parse_term(_context &con) const {
639 if (!m_expr->parse_term(con)) return false;
640 con.next_line();
641 return true;
642 }
643};
644
645
646//base class for binary expressions
647class _binary : public _expr {
648public:
649 //constructor.
650 _binary(_expr *left, _expr *right) :
651 m_left(left), m_right(right)
652 {
653 }
654
655 //destructor.
656 virtual ~_binary() {
657 delete m_left;
658 delete m_right;
659 }
660
661protected:
662 //left and right expressions
663 _expr *m_left, *m_right;
664};
665
666
667//sequence
668class _seq : public _binary {
669public:
670 //constructor.
671 _seq(_expr *left, _expr *right) :
672 _binary(left, right)
673 {
674 }
675
676 //parse with whitespace
677 virtual bool parse_non_term(_context &con) const {
678 if (!m_left->parse_non_term(con)) return false;
679 return m_right->parse_non_term(con);
680 }
681
682 //parse terminal
683 virtual bool parse_term(_context &con) const {
684 if (!m_left->parse_term(con)) return false;
685 return m_right->parse_term(con);
686 }
687};
688
689
690//choice
691class _choice : public _binary {
692public:
693 //constructor.
694 _choice(_expr *left, _expr *right) :
695 _binary(left, right)
696 {
697 }
698
699 //parse with whitespace
700 virtual bool parse_non_term(_context &con) const {
701 _state st(con);
702 if (m_left->parse_non_term(con)) return true;
703 con.restore(st);
704 return m_right->parse_non_term(con);
705 }
706
707 //parse terminal
708 virtual bool parse_term(_context &con) const {
709 _state st(con);
710 if (m_left->parse_term(con)) return true;
711 con.restore(st);
712 return m_right->parse_term(con);
713 }
714};
715
716
717//reference to rule
718class _ref : public _expr {
719public:
720 //constructor.
721 _ref(rule &r) :
722 m_rule(r)
723 {
724 }
725
726 //parse with whitespace
727 virtual bool parse_non_term(_context &con) const {
728 return con.parse_non_term(m_rule);
729 }
730
731 //parse terminal
732 virtual bool parse_term(_context &con) const {
733 return con.parse_term(m_rule);
734 }
735
736private:
737 //reference
738 rule &m_rule;
739};
740
741
742//eof
743class _eof : public _expr {
744public:
745 //parse with whitespace
746 virtual bool parse_non_term(_context &con) const {
747 return parse_term(con);
748 }
749
750 //parse terminal
751 virtual bool parse_term(_context &con) const {
752 return con.end();
753 }
754};
755
756
757//any
758class _any : public _expr {
759public:
760 //parse with whitespace
761 virtual bool parse_non_term(_context &con) const {
762 return parse_term(con);
763 }
764
765 //parse terminal
766 virtual bool parse_term(_context &con) const {
767 if (!con.end()) {
768 con.next_col();
769 return true;
770 }
771 con.set_error_pos();
772 return false;
773 }
774};
775
776
777//true
778class _true : public _expr {
779public:
780 //parse with whitespace
781 virtual bool parse_non_term(_context &) const {
782 return true;
783 }
784
785 //parse terminal
786 virtual bool parse_term(_context &) const {
787 return true;
788 }
789};
790
791
792//false
793class _false: public _expr {
794public:
795 //parse with whitespace
796 virtual bool parse_non_term(_context &) const {
797 return false;
798 }
799
800 //parse terminal
801 virtual bool parse_term(_context &) const {
802 return false;
803 }
804};
805
806//exception thrown when left recursion terminates successfully
807struct _lr_ok {
808 rule *m_rule;
809 _lr_ok(rule *r) : m_rule(r) {}
810};
811
812
813//constructor
814_state::_state(_context &con) :
815 m_pos(con.m_pos),
816 m_matches(con.m_matches.size())
817{
818}
819
820
821//parse non-term rule.
822bool _context::parse_non_term(rule &r) {
823 //save the state of the rule
824 rule::_state old_state = r.m_state;
825
826 //success/failure result
827 bool ok;
828
829 //compute the new position
830 size_t new_pos = m_pos.m_it - m_begin;
831
832 //check if we have left recursion
833 bool lr = new_pos == r.m_state.m_pos;
834
835 //update the rule's state
836 r.m_state.m_pos = new_pos;
837
838 //handle the mode of the rule
839 switch (r.m_state.m_mode) {
840 //normal parse
841 case rule::_PARSE:
842 if (lr) {
843 //first try to parse the rule by rejecting it, so alternative branches are examined
844 r.m_state.m_mode = rule::_REJECT;
845 ok = _parse_non_term(r);
846
847 //if the first try is successful, try accepting the rule,
848 //so other elements of the sequence are parsed
849 if (ok) {
850 r.m_state.m_mode = rule::_ACCEPT;
851
852 //loop until no more parsing can be done
853 for(;;) {
854 //store the correct state, in order to backtrack if the call fails
855 _state st(*this);
856
857 //update the rule position to the current position,
858 //because at this state the rule is resolving the left recursion
859 r.m_state.m_pos = m_pos.m_it - m_begin;
860
861 //if parsing fails, restore the last good state and stop
862 if (!_parse_non_term(r)) {
863 restore(st);
864 break;
865 }
866 }
867
868 //since the left recursion was resolved successfully,
869 //return via a non-local exit
870 r.m_state = old_state;
871 throw _lr_ok(r.this_ptr());
872 }
873 }
874 else {
875 try {
876 ok = _parse_non_term(r);
877 }
878 catch (const _lr_ok &ex) {
879 //since left recursions may be mutual, we must test which rule's left recursion
880 //was ended successfully
881 if (ex.m_rule == r.this_ptr()) {
882 ok = true;
883 }
884 else {
885 r.m_state = old_state;
886 throw;
887 }
888 }
889 }
890 break;
891
892 //reject the left recursive rule
893 case rule::_REJECT:
894 if (lr) {
895 ok = false;
896 }
897 else {
898 r.m_state.m_mode = rule::_PARSE;
899 ok = _parse_non_term(r);
900 r.m_state.m_mode = rule::_REJECT;
901 }
902 break;
903
904 //accept the left recursive rule
905 case rule::_ACCEPT:
906 if (lr) {
907 ok = true;
908 }
909 else {
910 r.m_state.m_mode = rule::_PARSE;
911 ok = _parse_non_term(r);
912 r.m_state.m_mode = rule::_ACCEPT;
913 }
914 break;
915 }
916
917 //restore the rule's state
918 r.m_state = old_state;
919
920 return ok;
921}
922
923
924//parse term rule.
925bool _context::parse_term(rule &r) {
926 //save the state of the rule
927 rule::_state old_state = r.m_state;
928
929 //success/failure result
930 bool ok;
931
932 //compute the new position
933 size_t new_pos = m_pos.m_it - m_begin;
934
935 //check if we have left recursion
936 bool lr = new_pos == r.m_state.m_pos;
937
938 //update the rule's state
939 r.m_state.m_pos = new_pos;
940
941 //handle the mode of the rule
942 switch (r.m_state.m_mode) {
943 //normal parse
944 case rule::_PARSE:
945 if (lr) {
946 //first try to parse the rule by rejecting it, so alternative branches are examined
947 r.m_state.m_mode = rule::_REJECT;
948 ok = _parse_term(r);
949
950 //if the first try is successful, try accepting the rule,
951 //so other elements of the sequence are parsed
952 if (ok) {
953 r.m_state.m_mode = rule::_ACCEPT;
954
955 //loop until no more parsing can be done
956 for(;;) {
957 //store the correct state, in order to backtrack if the call fails
958 _state st(*this);
959
960 //update the rule position to the current position,
961 //because at this state the rule is resolving the left recursion
962 r.m_state.m_pos = m_pos.m_it - m_begin;
963
964 //if parsing fails, restore the last good state and stop
965 if (!_parse_term(r)) {
966 restore(st);
967 break;
968 }
969 }
970
971 //since the left recursion was resolved successfully,
972 //return via a non-local exit
973 r.m_state = old_state;
974 throw _lr_ok(r.this_ptr());
975 }
976 }
977 else {
978 try {
979 ok = _parse_term(r);
980 }
981 catch (const _lr_ok &ex) {
982 //since left recursions may be mutual, we must test which rule's left recursion
983 //was ended successfully
984 if (ex.m_rule == r.this_ptr()) {
985 ok = true;
986 }
987 else {
988 r.m_state = old_state;
989 throw;
990 }
991 }
992 }
993 break;
994
995 //reject the left recursive rule
996 case rule::_REJECT:
997 if (lr) {
998 ok = false;
999 }
1000 else {
1001 r.m_state.m_mode = rule::_PARSE;
1002 ok = _parse_term(r);
1003 r.m_state.m_mode = rule::_REJECT;
1004 }
1005 break;
1006
1007 //accept the left recursive rule
1008 case rule::_ACCEPT:
1009 if (lr) {
1010 ok = true;
1011 }
1012 else {
1013 r.m_state.m_mode = rule::_PARSE;
1014 ok = _parse_term(r);
1015 r.m_state.m_mode = rule::_ACCEPT;
1016 }
1017 break;
1018 }
1019
1020 //restore the rule's state
1021 r.m_state = old_state;
1022
1023 return ok;
1024}
1025
1026
1027//parse non-term rule internal.
1028bool _context::_parse_non_term(rule &r) {
1029 bool ok;
1030 if (_private::get_parse_proc(r)) {
1031 pos b = m_pos;
1032 ok = _private::get_expr(r)->parse_non_term(*this);
1033 if (ok) {
1034 m_matches.push_back(_match(r.this_ptr(), b, m_pos));
1035 }
1036 }
1037 else {
1038 ok = _private::get_expr(r)->parse_non_term(*this);
1039 }
1040 return ok;
1041}
1042
1043
1044//parse term rule internal.
1045bool _context::_parse_term(rule &r) {
1046 bool ok;
1047 if (_private::get_parse_proc(r)) {
1048 pos b = m_pos;
1049 ok = _private::get_expr(r)->parse_term(*this);
1050 if (ok) {
1051 m_matches.push_back(_match(r.this_ptr(), b, m_pos));
1052 }
1053 }
1054 else {
1055 ok = _private::get_expr(r)->parse_term(*this);
1056 }
1057 return ok;
1058}
1059
1060
1061//get syntax error
1062static error _syntax_error(_context &con) {
1063 return error(con.m_error_pos, con.m_error_pos, ERROR_SYNTAX_ERROR);
1064}
1065
1066
1067//get eof error
1068static error _eof_error(_context &con) {
1069 return error(con.m_error_pos, con.m_error_pos, ERROR_INVALID_EOF);
1070}
1071
1072
1073/** constructor from input.
1074 @param i input.
1075 */
1076pos::pos(input &i) :
1077 m_it(i.begin()),
1078 m_line(1),
1079 m_col(1)
1080{
1081}
1082
1083
1084/** character terminal constructor.
1085 @param c character.
1086 */
1087expr::expr(char c) :
1088 m_expr(new _char(c))
1089{
1090}
1091
1092
1093/** null-terminated string terminal constructor.
1094 @param s null-terminated string.
1095 */
1096expr::expr(const char *s) :
1097 m_expr(new _string(s))
1098{
1099}
1100
1101
1102/** rule reference constructor.
1103 @param r rule.
1104 */
1105expr::expr(rule &r) :
1106 m_expr(new _ref(r))
1107{
1108}
1109
1110
1111/** creates a zero-or-more loop out of this expression.
1112 @return a zero-or-more loop expression.
1113 */
1114expr expr::operator *() const {
1115 return _private::construct_expr(new _loop0(m_expr));
1116}
1117
1118
1119/** creates a one-or-more loop out of this expression.
1120 @return a one-or-more loop expression.
1121 */
1122expr expr::operator +() const {
1123 return _private::construct_expr(new _loop1(m_expr));
1124}
1125
1126
1127/** creates an optional out of this expression.
1128 @return an optional expression.
1129 */
1130expr expr::operator -() const {
1131 return _private::construct_expr(new _optional(m_expr));
1132}
1133
1134
1135/** creates an AND-expression.
1136 @return an AND-expression.
1137 */
1138expr expr::operator &() const {
1139 return _private::construct_expr((new _and(m_expr)));
1140}
1141
1142
1143/** creates a NOT-expression.
1144 @return a NOT-expression.
1145 */
1146expr expr::operator !() const {
1147 return _private::construct_expr(new _not(m_expr));
1148}
1149
1150
1151/** constructor.
1152 @param b begin position.
1153 @param e end position.
1154 */
1155input_range::input_range(const pos &b, const pos &e) :
1156m_begin(b),
1157m_end(e) {}
1158
1159
1160/** constructor.
1161 @param b begin position.
1162 @param e end position.
1163 @param t error type.
1164 */
1165error::error(const pos &b, const pos &e, int t) :
1166input_range(b, e),
1167m_type(t) {}
1168
1169
1170/** compare on begin position.
1171 @param e the other error to compare this with.
1172 @return true if this comes before the previous error, false otherwise.
1173 */
1174bool error::operator < (const error &e) const {
1175 return m_begin.m_it < e.m_begin.m_it;
1176}
1177
1178
1179/** character terminal constructor.
1180 @param c character.
1181 */
1182rule::rule(char c) :
1183 m_expr(new _char(c))
1184{
1185 m_parse_proc = _get_parse_proc(this);
1186}
1187
1188
1189/** null-terminated string terminal constructor.
1190 @param s null-terminated string.
1191 */
1192rule::rule(const char *s) :
1193 m_expr(new _string(s))
1194{
1195 m_parse_proc = _get_parse_proc(this);
1196}
1197
1198
1199/** constructor from expression.
1200 @param e expression.
1201 */
1202rule::rule(const expr &e) :
1203 m_expr(_private::get_expr(e))
1204{
1205 m_parse_proc = _get_parse_proc(this);
1206}
1207
1208
1209/** constructor from rule.
1210 @param r rule.
1211 */
1212rule::rule(rule &r) :
1213 m_expr(new _ref(r)),
1214 m_parse_proc(0)
1215{
1216 m_parse_proc = _get_parse_proc(this);
1217}
1218
1219
1220/** invalid constructor from rule (required by gcc).
1221 @exception std::logic_error always thrown.
1222 */
1223rule::rule(const rule &) {
1224 throw std::logic_error("invalid operation");
1225}
1226
1227
1228/** deletes the internal object that represents the expression.
1229 */
1230rule::~rule() {
1231 delete m_expr;
1232}
1233
1234
1235/** creates a zero-or-more loop out of this rule.
1236 @return a zero-or-more loop rule.
1237 */
1238expr rule::operator *() {
1239 return _private::construct_expr(new _loop0(new _ref(*this)));
1240}
1241
1242
1243/** creates a one-or-more loop out of this rule.
1244 @return a one-or-more loop rule.
1245 */
1246expr rule::operator +() {
1247 return _private::construct_expr(new _loop1(new _ref(*this)));
1248}
1249
1250
1251/** creates an optional out of this rule.
1252 @return an optional rule.
1253 */
1254expr rule::operator -() {
1255 return _private::construct_expr(new _optional(new _ref(*this)));
1256}
1257
1258
1259/** creates an AND-expression out of this rule.
1260 @return an AND-expression out of this rule.
1261 */
1262expr rule::operator &() {
1263 return _private::construct_expr(new _and(new _ref(*this)));
1264}
1265
1266
1267/** creates a NOT-expression out of this rule.
1268 @return a NOT-expression out of this rule.
1269 */
1270expr rule::operator !() {
1271 return _private::construct_expr(new _not(new _ref(*this)));
1272}
1273
1274
1275/** sets the parse procedure.
1276 @param p procedure.
1277 */
1278void rule::set_parse_proc(parse_proc p) {
1279 assert(p);
1280 m_parse_proc = p;
1281 _parse_proc_map_t& _parse_proc_map = _get_parse_proc_map();
1282 _parse_proc_map[this] = p;
1283}
1284
1285
1286/** creates a sequence of expressions.
1287 @param left left operand.
1288 @param right right operand.
1289 @return an expression which parses a sequence.
1290 */
1291expr operator >> (const expr &left, const expr &right) {
1292 return _private::construct_expr(
1293 new _seq(_private::get_expr(left), _private::get_expr(right)));
1294}
1295
1296
1297/** creates a choice of expressions.
1298 @param left left operand.
1299 @param right right operand.
1300 @return an expression which parses a choice.
1301 */
1302expr operator | (const expr &left, const expr &right) {
1303 return _private::construct_expr(
1304 new _choice(_private::get_expr(left), _private::get_expr(right)));
1305}
1306
1307
1308/** converts a parser expression into a terminal.
1309 @param e expression.
1310 @return an expression which parses a terminal.
1311 */
1312expr term(const expr &e) {
1313 return _private::construct_expr(
1314 new _term(_private::get_expr(e)));
1315}
1316
1317
1318/** creates a set expression from a null-terminated string.
1319 @param s null-terminated string with characters of the set.
1320 @return an expression which parses a single character out of a set.
1321 */
1322expr set(const char *s) {
1323 return _private::construct_expr(new _set(s));
1324}
1325
1326
1327/** creates a range expression.
1328 @param min min character.
1329 @param max max character.
1330 @return an expression which parses a single character out of range.
1331 */
1332expr range(int min, int max) {
1333 return _private::construct_expr(new _set(min, max));
1334}
1335
1336
1337/** creates an expression which increments the line counter
1338 and resets the column counter when the given expression
1339 is parsed successfully; used for newline characters.
1340 @param e expression to wrap into a newline parser.
1341 @return an expression that handles newlines.
1342 */
1343expr nl(const expr &e) {
1344 return _private::construct_expr(new _nl(_private::get_expr(e)));
1345}
1346
1347
1348/** creates an expression which tests for the end of input.
1349 @return an expression that handles the end of input.
1350 */
1351expr eof() {
1352 return _private::construct_expr(new _eof());
1353}
1354
1355
1356/** creates a not expression.
1357 @param e expression.
1358 @return the appropriate expression.
1359 */
1360expr not_(const expr &e) {
1361 return !e;
1362}
1363
1364
1365/** creates an and expression.
1366 @param e expression.
1367 @return the appropriate expression.
1368 */
1369expr and_(const expr &e) {
1370 return &e;
1371}
1372
1373
1374/** creates an expression that parses any character.
1375 @return the appropriate expression.
1376 */
1377expr any() {
1378 return _private::construct_expr(new _any());
1379}
1380
1381
1382/** parsing succeeds without consuming any input.
1383*/
1384expr true_() {
1385 return _private::construct_expr(new _true());
1386}
1387
1388
1389/** parsing fails without consuming any input.
1390*/
1391expr false_() {
1392 return _private::construct_expr(new _false());
1393}
1394
1395
1396/** parse with target expression and let user handle result.
1397*/
1398expr user(const expr &e, const user_handler& handler) {
1399 return _private::construct_expr(new _user(_private::get_expr(e), handler));
1400}
1401
1402
1403/** parses the given input.
1404 The parse procedures of each rule parsed are executed
1405 before this function returns, if parsing succeeds.
1406 @param i input.
1407 @param g root rule of grammar.
1408 @param el list of errors.
1409 @param d user data, passed to the parse procedures.
1410 @return true on parsing success, false on failure.
1411 */
1412bool parse(input &i, rule &g, error_list &el, void *d, void* ud) {
1413 //prepare context
1414 _context con(i, ud);
1415
1416 //parse grammar
1417 if (!con.parse_non_term(g)) {
1418 el.push_back(_syntax_error(con));
1419 return false;
1420 }
1421
1422 //if end is not reached, there was an error
1423 if (!con.end()) {
1424 if (con.m_error_pos.m_it < con.m_end) {
1425 el.push_back(_syntax_error(con));
1426 }
1427 else {
1428 el.push_back(_eof_error(con));
1429 }
1430 return false;
1431 }
1432
1433 //success; execute the parse procedures
1434 con.do_parse_procs(d);
1435 return true;
1436}
1437
1438
1439} //namespace parserlib