aboutsummaryrefslogtreecommitdiff
path: root/src/MoonP/parser.cpp
diff options
context:
space:
mode:
authorLi Jin <dragon-fly@qq.com>2021-02-17 11:22:07 +0800
committerLi Jin <dragon-fly@qq.com>2021-02-17 11:22:07 +0800
commit7066392d1c974065181d95d93274136dcd625d43 (patch)
treecf51eafc2c52cbc12246a306bca172d799193d30 /src/MoonP/parser.cpp
parent90cd12ad9ef465f3e435e1bd034dcfbe4e19d016 (diff)
downloadyuescript-7066392d1c974065181d95d93274136dcd625d43.tar.gz
yuescript-7066392d1c974065181d95d93274136dcd625d43.tar.bz2
yuescript-7066392d1c974065181d95d93274136dcd625d43.zip
stop reusing variables, rename project.
Diffstat (limited to 'src/MoonP/parser.cpp')
-rw-r--r--src/MoonP/parser.cpp1412
1 files changed, 0 insertions, 1412 deletions
diff --git a/src/MoonP/parser.cpp b/src/MoonP/parser.cpp
deleted file mode 100644
index 8dc2ff9..0000000
--- a/src/MoonP/parser.cpp
+++ /dev/null
@@ -1,1412 +0,0 @@
1/* Copyright (c) 2012, Achilleas Margaritis, modified by Jin Li
2All rights reserved.
3
4Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
5
6Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
7
8Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
9
10THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.*/
11
12#include <cstdlib>
13#include <cstring>
14#include <cassert>
15#include <stdexcept>
16#include <unordered_map>
17#include <unordered_set>
18
19#include "MoonP/parser.hpp"
20
21
22namespace parserlib {
23
24
25//internal private class that manages access to the public classes' internals.
26class _private {
27public:
28 //get the internal expression object from the expression.
29 static _expr* get_expr(const expr& e) {
30 return e.m_expr;
31 }
32
33 //create new expression from given expression
34 static expr construct_expr(_expr* e) {
35 return e;
36 }
37
38 //get the internal expression object from the rule.
39 static _expr* get_expr(rule& r) {
40 return r.m_expr;
41 }
42
43 //get the internal parse proc from the rule.
44 static parse_proc get_parse_proc(rule& r) {
45 return r.m_parse_proc;
46 }
47};
48
49
50class _context;
51
52
53//parser state
54class _state {
55public:
56 //position
57 pos m_pos;
58
59 //size of match vector
60 size_t m_matches;
61
62 //constructor
63 _state(_context& con);
64};
65
66
67//match
68class _match {
69public:
70 //rule matched
71 rule* m_rule;
72
73 //begin position
74 pos m_begin;
75
76 //end position
77 pos m_end;
78
79 //null constructor
80 _match() {}
81
82 //constructor from parameters
83 _match(rule* r, const pos& b, const pos& e) :
84 m_rule(r),
85 m_begin(b),
86 m_end(e)
87 {
88 }
89};
90
91
92//match vector
93typedef std::vector<_match> _match_vector;
94
95
96//parsing context
97class _context {
98public:
99 //user data
100 void* m_user_data;
101
102 //current position
103 pos m_pos;
104
105 //error position
106 pos m_error_pos;
107
108 //input begin
109 input::iterator m_begin;
110
111 //input end
112 input::iterator m_end;
113
114 //matches
115 _match_vector m_matches;
116
117 //constructor
118 _context(input& i, void* ud) :
119 m_user_data(ud),
120 m_pos(i),
121 m_error_pos(i),
122 m_begin(i.begin()),
123 m_end(i.end())
124 {
125 }
126
127 //check if the end is reached
128 bool end() const {
129 return m_pos.m_it == m_end;
130 }
131
132 //get the current symbol
133 input::value_type symbol() const {
134 assert(!end());
135 return *m_pos.m_it;
136 }
137
138 //set the longest possible error
139 void set_error_pos() {
140 if (m_pos.m_it > m_error_pos.m_it) {
141 m_error_pos = m_pos;
142 }
143 }
144
145 //next column
146 void next_col() {
147 ++m_pos.m_it;
148 ++m_pos.m_col;
149 }
150
151 //next line
152 void next_line() {
153 ++m_pos.m_line;
154 m_pos.m_col = 1;
155 }
156
157 //restore the state
158 void restore(const _state& st) {
159 m_pos = st.m_pos;
160 m_matches.resize(st.m_matches);
161 }
162
163 //parse non-term rule.
164 bool parse_non_term(rule& r);
165
166 //parse term rule.
167 bool parse_term(rule& r);
168
169 //execute all the parse procs
170 void do_parse_procs(void* d) const {
171 for(_match_vector::const_iterator it = m_matches.begin();
172 it != m_matches.end();
173 ++it) {
174 const _match &m = *it;
175 parse_proc p = _private::get_parse_proc(*m.m_rule);
176 p(m.m_begin, m.m_end, d);
177 }
178 }
179
180private:
181 //parse non-term rule.
182 bool _parse_non_term(rule& r);
183
184 //parse term rule.
185 bool _parse_term(rule& r);
186};
187
188
189//base class for expressions
190class _expr {
191public:
192 //destructor.
193 virtual ~_expr() {
194 }
195
196 //parse with whitespace
197 virtual bool parse_non_term(_context& con) const = 0;
198
199 //parse terminal
200 virtual bool parse_term(_context& con) const = 0;
201};
202
203
204//single character expression.
205class _char : public _expr {
206public:
207 //constructor.
208 _char(char c) :
209 m_char(c)
210 {
211 }
212
213 //parse with whitespace
214 virtual bool parse_non_term(_context& con) const {
215 return _parse(con);
216 }
217
218 //parse terminal
219 virtual bool parse_term(_context& con) const {
220 return _parse(con);
221 }
222
223private:
224 //character
225 input::value_type m_char;
226
227 //internal parse
228 bool _parse(_context& con) const {
229 if (!con.end()) {
230 input::value_type ch = con.symbol();
231 if (ch == m_char) {
232 con.next_col();
233 return true;
234 }
235 }
236 con.set_error_pos();
237 return false;
238 }
239};
240
241
242//string expression.
243class _string : public _expr {
244public:
245 //constructor from ansi string.
246 _string(const char* s) :
247 m_string(Converter{}.from_bytes(s))
248 {
249 }
250
251 //parse with whitespace
252 virtual bool parse_non_term(_context& con) const {
253 return _parse(con);
254 }
255
256 //parse terminal
257 virtual bool parse_term(_context& con) const {
258 return _parse(con);
259 }
260
261private:
262 //string
263 input m_string;
264
265 //parse the string
266 bool _parse(_context& con) const {
267 for(input::const_iterator it = m_string.begin(),
268 end = m_string.end();;) {
269 if (it == end) return true;
270 if (con.end()) break;
271 if (con.symbol() != *it) break;
272 ++it;
273 con.next_col();
274 }
275 con.set_error_pos();
276 return false;
277 }
278};
279
280
281//set expression.
282class _set : public _expr {
283public:
284 //constructor from ansi string.
285 _set(const char* s) {
286 auto str = Converter{}.from_bytes(s);
287 for (auto ch : str) {
288 _add(ch);
289 }
290 }
291
292 //constructor from range.
293 _set(int min, int max) {
294 assert(min >= 0);
295 assert(min <= max);
296 m_quick_set.resize((size_t)max + 1U);
297 for(; min <= max; ++min) {
298 m_quick_set[(size_t)min] = true;
299 }
300 }
301
302 //parse with whitespace
303 virtual bool parse_non_term(_context& con) const {
304 return _parse(con);
305 }
306
307 //parse terminal
308 virtual bool parse_term(_context& con) const {
309 return _parse(con);
310 }
311
312private:
313 //set is kept as an array of flags, for quick access
314 std::vector<bool> m_quick_set;
315 std::unordered_set<size_t> m_large_set;
316
317 //add character
318 void _add(size_t i) {
319 if (i <= m_quick_set.size() || i <= 255) {
320 if (i >= m_quick_set.size()) {
321 m_quick_set.resize(i + 1);
322 }
323 m_quick_set[i] = true;
324 } else {
325 m_large_set.insert(i);
326 }
327 }
328
329 //internal parse
330 bool _parse(_context& con) const {
331 if (!con.end()) {
332 size_t ch = con.symbol();
333 if (ch < m_quick_set.size()) {
334 if (m_quick_set[ch]) {
335 con.next_col();
336 return true;
337 }
338 } else if (m_large_set.find(ch) != m_large_set.end()) {
339 con.next_col();
340 return true;
341 }
342 }
343 con.set_error_pos();
344 return false;
345 }
346};
347
348
349//base class for unary expressions
350class _unary : public _expr {
351public:
352 //constructor.
353 _unary(_expr* e) :
354 m_expr(e)
355 {
356 }
357
358 //destructor.
359 virtual ~_unary() {
360 delete m_expr;
361 }
362
363protected:
364 //expression
365 _expr *m_expr;
366};
367
368
369//terminal
370class _term : public _unary {
371public:
372 //constructor.
373 _term(_expr* e) :
374 _unary(e)
375 {
376 }
377
378 //parse with whitespace
379 virtual bool parse_non_term(_context& con) const {
380 return m_expr->parse_term(con);
381 }
382
383 //parse terminal
384 virtual bool parse_term(_context& con) const {
385 return m_expr->parse_term(con);
386 }
387};
388
389
390//user
391class _user : public _unary {
392public:
393 //constructor.
394 _user(_expr* e, const user_handler& callback) :
395 _unary(e),
396 m_handler(callback)
397 {
398 }
399
400 //parse with whitespace
401 virtual bool parse_non_term(_context& con) const {
402 pos pos = con.m_pos;
403 if (m_expr->parse_non_term(con)) {
404 item_t item = {pos.m_it, con.m_pos.m_it, con.m_user_data};
405 return m_handler(item);
406 }
407 return false;
408 }
409
410 //parse terminal
411 virtual bool parse_term(_context& con) const {
412 pos pos = con.m_pos;
413 if (m_expr->parse_term(con)) {
414 item_t item = {pos.m_it, con.m_pos.m_it, con.m_user_data};
415 return m_handler(item);
416 }
417 return false;
418 }
419private:
420 user_handler m_handler;
421};
422
423
424//loop 0
425class _loop0 : public _unary {
426public:
427 //constructor.
428 _loop0(_expr* e) :
429 _unary(e)
430 {
431 }
432
433 //parse with whitespace
434 virtual bool parse_non_term(_context& con) const {
435 //if parsing of the first fails, restore the context and stop
436 _state st(con);
437 if (!m_expr->parse_non_term(con)) {
438 con.restore(st);
439 return true;
440 }
441
442 //parse the rest
443 for(;;) {
444 _state st(con);
445 if (!m_expr->parse_non_term(con)) {
446 con.restore(st);
447 break;
448 }
449 }
450
451 return true;
452 }
453
454 //parse terminal
455 virtual bool parse_term(_context& con) const {
456 //if parsing of the first fails, restore the context and stop
457 _state st(con);
458 if (!m_expr->parse_term(con)) {
459 con.restore(st);
460 return true;
461 }
462
463 //parse the rest until no more parsing is possible
464 for(;;) {
465 _state st(con);
466 if (!m_expr->parse_term(con)) {
467 con.restore(st);
468 break;
469 }
470 }
471
472 return true;
473 }
474};
475
476
477//loop 1
478class _loop1 : public _unary {
479public:
480 //constructor.
481 _loop1(_expr* e) :
482 _unary(e)
483 {
484 }
485
486 //parse with whitespace
487 virtual bool parse_non_term(_context& con) const {
488 //parse the first; if the first fails, stop
489 if (!m_expr->parse_non_term(con)) return false;
490
491 //parse the rest until no more parsing is possible
492 for(;;) {
493 _state st(con);
494 if (!m_expr->parse_non_term(con)) {
495 con.restore(st);
496 break;
497 }
498 }
499
500 return true;
501 }
502
503 //parse terminal
504 virtual bool parse_term(_context& con) const {
505 //parse the first; if the first fails, stop
506 if (!m_expr->parse_term(con)) return false;
507
508 //parse the rest until no more parsing is possible
509 for(;;) {
510 _state st(con);
511 if (!m_expr->parse_term(con)) {
512 con.restore(st);
513 break;
514 }
515 }
516
517 return true;
518 }
519};
520
521
522//optional
523class _optional : public _unary {
524public:
525 //constructor.
526 _optional(_expr* e) :
527 _unary(e)
528 {
529 }
530
531 //parse with whitespace
532 virtual bool parse_non_term(_context& con) const {
533 _state st(con);
534 if (!m_expr->parse_non_term(con)) con.restore(st);
535 return true;
536 }
537
538 //parse terminal
539 virtual bool parse_term(_context& con) const {
540 _state st(con);
541 if (!m_expr->parse_term(con)) con.restore(st);
542 return true;
543 }
544};
545
546
547//and
548class _and : public _unary {
549public:
550 //constructor.
551 _and(_expr* e) :
552 _unary(e)
553 {
554 }
555
556 //parse with whitespace
557 virtual bool parse_non_term(_context& con) const {
558 _state st(con);
559 bool ok = m_expr->parse_non_term(con);
560 con.restore(st);
561 return ok;
562 }
563
564 //parse terminal
565 virtual bool parse_term(_context& con) const {
566 _state st(con);
567 bool ok = m_expr->parse_term(con);
568 con.restore(st);
569 return ok;
570 }
571};
572
573
574//not
575class _not : public _unary {
576public:
577 //constructor.
578 _not(_expr* e) :
579 _unary(e)
580 {
581 }
582
583 //parse with whitespace
584 virtual bool parse_non_term(_context& con) const {
585 _state st(con);
586 bool ok = !m_expr->parse_non_term(con);
587 con.restore(st);
588 return ok;
589 }
590
591 //parse terminal
592 virtual bool parse_term(_context& con) const {
593 _state st(con);
594 bool ok = !m_expr->parse_term(con);
595 con.restore(st);
596 return ok;
597 }
598};
599
600
601//newline
602class _nl : public _unary {
603public:
604 //constructor.
605 _nl(_expr* e) :
606 _unary(e)
607 {
608 }
609
610 //parse with whitespace
611 virtual bool parse_non_term(_context& con) const {
612 if (!m_expr->parse_non_term(con)) return false;
613 con.next_line();
614 return true;
615 }
616
617 //parse terminal
618 virtual bool parse_term(_context& con) const {
619 if (!m_expr->parse_term(con)) return false;
620 con.next_line();
621 return true;
622 }
623};
624
625
626//base class for binary expressions
627class _binary : public _expr {
628public:
629 //constructor.
630 _binary(_expr* left, _expr* right) :
631 m_left(left), m_right(right)
632 {
633 }
634
635 //destructor.
636 virtual ~_binary() {
637 delete m_left;
638 delete m_right;
639 }
640
641protected:
642 //left and right expressions
643 _expr* m_left, *m_right;
644};
645
646
647//sequence
648class _seq : public _binary {
649public:
650 //constructor.
651 _seq(_expr* left, _expr* right) :
652 _binary(left, right)
653 {
654 }
655
656 //parse with whitespace
657 virtual bool parse_non_term(_context& con) const {
658 if (!m_left->parse_non_term(con)) return false;
659 return m_right->parse_non_term(con);
660 }
661
662 //parse terminal
663 virtual bool parse_term(_context& con) const {
664 if (!m_left->parse_term(con)) return false;
665 return m_right->parse_term(con);
666 }
667};
668
669
670//choice
671class _choice : public _binary {
672public:
673 //constructor.
674 _choice(_expr* left, _expr* right) :
675 _binary(left, right)
676 {
677 }
678
679 //parse with whitespace
680 virtual bool parse_non_term(_context& con) const {
681 _state st(con);
682 if (m_left->parse_non_term(con)) return true;
683 con.restore(st);
684 return m_right->parse_non_term(con);
685 }
686
687 //parse terminal
688 virtual bool parse_term(_context& con) const {
689 _state st(con);
690 if (m_left->parse_term(con)) return true;
691 con.restore(st);
692 return m_right->parse_term(con);
693 }
694};
695
696
697//reference to rule
698class _ref : public _expr {
699public:
700 //constructor.
701 _ref(rule& r) :
702 m_rule(r)
703 {
704 }
705
706 //parse with whitespace
707 virtual bool parse_non_term(_context& con) const {
708 return con.parse_non_term(m_rule);
709 }
710
711 //parse terminal
712 virtual bool parse_term(_context& con) const {
713 return con.parse_term(m_rule);
714 }
715
716private:
717 //reference
718 rule &m_rule;
719};
720
721
722//eof
723class _eof : public _expr {
724public:
725 //parse with whitespace
726 virtual bool parse_non_term(_context& con) const {
727 return parse_term(con);
728 }
729
730 //parse terminal
731 virtual bool parse_term(_context& con) const {
732 return con.end();
733 }
734};
735
736
737//any
738class _any : public _expr {
739public:
740 //parse with whitespace
741 virtual bool parse_non_term(_context& con) const {
742 return parse_term(con);
743 }
744
745 //parse terminal
746 virtual bool parse_term(_context& con) const {
747 if (!con.end()) {
748 con.next_col();
749 return true;
750 }
751 con.set_error_pos();
752 return false;
753 }
754};
755
756
757//true
758class _true : public _expr {
759public:
760 //parse with whitespace
761 virtual bool parse_non_term(_context&) const {
762 return true;
763 }
764
765 //parse terminal
766 virtual bool parse_term(_context&) const {
767 return true;
768 }
769};
770
771
772//false
773class _false: public _expr {
774public:
775 //parse with whitespace
776 virtual bool parse_non_term(_context&) const {
777 return false;
778 }
779
780 //parse terminal
781 virtual bool parse_term(_context&) const {
782 return false;
783 }
784};
785
786//exception thrown when left recursion terminates successfully
787struct _lr_ok {
788 rule* m_rule;
789 _lr_ok(rule* r) : m_rule(r) {}
790};
791
792
793//constructor
794_state::_state(_context& con) :
795 m_pos(con.m_pos),
796 m_matches(con.m_matches.size())
797{
798}
799
800
801//parse non-term rule.
802bool _context::parse_non_term(rule& r) {
803 //save the state of the rule
804 rule::_state old_state = r.m_state;
805
806 //success/failure result
807 bool ok = false;
808
809 //compute the new position
810 size_t new_pos = m_pos.m_it - m_begin;
811
812 //check if we have left recursion
813 bool lr = new_pos == r.m_state.m_pos;
814
815 //update the rule's state
816 r.m_state.m_pos = new_pos;
817
818 //handle the mode of the rule
819 switch (r.m_state.m_mode) {
820 //normal parse
821 case rule::_PARSE:
822 if (lr) {
823 //first try to parse the rule by rejecting it, so alternative branches are examined
824 r.m_state.m_mode = rule::_REJECT;
825 ok = _parse_non_term(r);
826
827 //if the first try is successful, try accepting the rule,
828 //so other elements of the sequence are parsed
829 if (ok) {
830 r.m_state.m_mode = rule::_ACCEPT;
831
832 //loop until no more parsing can be done
833 for(;;) {
834 //store the correct state, in order to backtrack if the call fails
835 _state st(*this);
836
837 //update the rule position to the current position,
838 //because at this state the rule is resolving the left recursion
839 r.m_state.m_pos = m_pos.m_it - m_begin;
840
841 //if parsing fails, restore the last good state and stop
842 if (!_parse_non_term(r)) {
843 restore(st);
844 break;
845 }
846 }
847
848 //since the left recursion was resolved successfully,
849 //return via a non-local exit
850 r.m_state = old_state;
851 throw _lr_ok(r.this_ptr());
852 }
853 }
854 else {
855 try {
856 ok = _parse_non_term(r);
857 }
858 catch (const _lr_ok &ex) {
859 //since left recursions may be mutual, we must test which rule's left recursion
860 //was ended successfully
861 if (ex.m_rule == r.this_ptr()) {
862 ok = true;
863 }
864 else {
865 r.m_state = old_state;
866 throw;
867 }
868 }
869 }
870 break;
871
872 //reject the left recursive rule
873 case rule::_REJECT:
874 if (lr) {
875 ok = false;
876 }
877 else {
878 r.m_state.m_mode = rule::_PARSE;
879 ok = _parse_non_term(r);
880 r.m_state.m_mode = rule::_REJECT;
881 }
882 break;
883
884 //accept the left recursive rule
885 case rule::_ACCEPT:
886 if (lr) {
887 ok = true;
888 }
889 else {
890 r.m_state.m_mode = rule::_PARSE;
891 ok = _parse_non_term(r);
892 r.m_state.m_mode = rule::_ACCEPT;
893 }
894 break;
895 }
896
897 //restore the rule's state
898 r.m_state = old_state;
899
900 return ok;
901}
902
903
904//parse term rule.
905bool _context::parse_term(rule& r) {
906 //save the state of the rule
907 rule::_state old_state = r.m_state;
908
909 //success/failure result
910 bool ok = false;
911
912 //compute the new position
913 size_t new_pos = m_pos.m_it - m_begin;
914
915 //check if we have left recursion
916 bool lr = new_pos == r.m_state.m_pos;
917
918 //update the rule's state
919 r.m_state.m_pos = new_pos;
920
921 //handle the mode of the rule
922 switch (r.m_state.m_mode) {
923 //normal parse
924 case rule::_PARSE:
925 if (lr) {
926 //first try to parse the rule by rejecting it, so alternative branches are examined
927 r.m_state.m_mode = rule::_REJECT;
928 ok = _parse_term(r);
929
930 //if the first try is successful, try accepting the rule,
931 //so other elements of the sequence are parsed
932 if (ok) {
933 r.m_state.m_mode = rule::_ACCEPT;
934
935 //loop until no more parsing can be done
936 for(;;) {
937 //store the correct state, in order to backtrack if the call fails
938 _state st(*this);
939
940 //update the rule position to the current position,
941 //because at this state the rule is resolving the left recursion
942 r.m_state.m_pos = m_pos.m_it - m_begin;
943
944 //if parsing fails, restore the last good state and stop
945 if (!_parse_term(r)) {
946 restore(st);
947 break;
948 }
949 }
950
951 //since the left recursion was resolved successfully,
952 //return via a non-local exit
953 r.m_state = old_state;
954 throw _lr_ok(r.this_ptr());
955 }
956 } else {
957 try {
958 ok = _parse_term(r);
959 }
960 catch (const _lr_ok& ex) {
961 //since left recursions may be mutual, we must test which rule's left recursion
962 //was ended successfully
963 if (ex.m_rule == r.this_ptr()) {
964 ok = true;
965 }
966 else {
967 r.m_state = old_state;
968 throw;
969 }
970 }
971 }
972 break;
973
974 //reject the left recursive rule
975 case rule::_REJECT:
976 if (lr) {
977 ok = false;
978 } else {
979 r.m_state.m_mode = rule::_PARSE;
980 ok = _parse_term(r);
981 r.m_state.m_mode = rule::_REJECT;
982 }
983 break;
984
985 //accept the left recursive rule
986 case rule::_ACCEPT:
987 if (lr) {
988 ok = true;
989 } else {
990 r.m_state.m_mode = rule::_PARSE;
991 ok = _parse_term(r);
992 r.m_state.m_mode = rule::_ACCEPT;
993 }
994 break;
995 }
996
997 //restore the rule's state
998 r.m_state = old_state;
999
1000 return ok;
1001}
1002
1003
1004//parse non-term rule internal.
1005bool _context::_parse_non_term(rule& r) {
1006 bool ok = false;
1007 if (_private::get_parse_proc(r)) {
1008 pos b = m_pos;
1009 ok = _private::get_expr(r)->parse_non_term(*this);
1010 if (ok) {
1011 m_matches.push_back(_match(r.this_ptr(), b, m_pos));
1012 }
1013 } else {
1014 ok = _private::get_expr(r)->parse_non_term(*this);
1015 }
1016 return ok;
1017}
1018
1019
1020//parse term rule internal.
1021bool _context::_parse_term(rule& r) {
1022 bool ok = false;
1023 if (_private::get_parse_proc(r)) {
1024 pos b = m_pos;
1025 ok = _private::get_expr(r)->parse_term(*this);
1026 if (ok) {
1027 m_matches.push_back(_match(r.this_ptr(), b, m_pos));
1028 }
1029 } else {
1030 ok = _private::get_expr(r)->parse_term(*this);
1031 }
1032 return ok;
1033}
1034
1035
1036//get syntax error
1037static error _syntax_error(_context& con) {
1038 return error(con.m_error_pos, con.m_error_pos, ERROR_SYNTAX_ERROR);
1039}
1040
1041
1042//get eof error
1043static error _eof_error(_context& con) {
1044 return error(con.m_error_pos, con.m_error_pos, ERROR_INVALID_EOF);
1045}
1046
1047
1048/** constructor from input.
1049 @param i input.
1050*/
1051pos::pos(input& i) :
1052 m_it(i.begin()),
1053 m_line(1),
1054 m_col(1)
1055{
1056}
1057
1058
1059/** character terminal constructor.
1060 @param c character.
1061*/
1062expr::expr(char c) :
1063 m_expr(new _char(c))
1064{
1065}
1066
1067
1068/** null-terminated string terminal constructor.
1069 @param s null-terminated string.
1070*/
1071expr::expr(const char* s) :
1072 m_expr(new _string(s))
1073{
1074}
1075
1076
1077/** rule reference constructor.
1078 @param r rule.
1079*/
1080expr::expr(rule& r) :
1081 m_expr(new _ref(r))
1082{
1083}
1084
1085
1086/** creates a zero-or-more loop out of this expression.
1087 @return a zero-or-more loop expression.
1088*/
1089expr expr::operator*() const {
1090 return _private::construct_expr(new _loop0(m_expr));
1091}
1092
1093
1094/** creates a one-or-more loop out of this expression.
1095 @return a one-or-more loop expression.
1096*/
1097expr expr::operator+() const {
1098 return _private::construct_expr(new _loop1(m_expr));
1099}
1100
1101
1102/** creates an optional out of this expression.
1103 @return an optional expression.
1104*/
1105expr expr::operator-() const {
1106 return _private::construct_expr(new _optional(m_expr));
1107}
1108
1109
1110/** creates an AND-expression.
1111 @return an AND-expression.
1112*/
1113expr expr::operator&() const {
1114 return _private::construct_expr((new _and(m_expr)));
1115}
1116
1117
1118/** creates a NOT-expression.
1119 @return a NOT-expression.
1120*/
1121expr expr::operator!() const {
1122 return _private::construct_expr(new _not(m_expr));
1123}
1124
1125
1126/** constructor.
1127 @param b begin position.
1128 @param e end position.
1129*/
1130input_range::input_range(const pos& b, const pos& e) :
1131m_begin(b),
1132m_end(e) {}
1133
1134
1135/** constructor.
1136 @param b begin position.
1137 @param e end position.
1138 @param t error type.
1139*/
1140error::error(const pos& b, const pos& e, int t) :
1141input_range(b, e),
1142m_type(t) {}
1143
1144
1145/** compare on begin position.
1146 @param e the other error to compare this with.
1147 @return true if this comes before the previous error, false otherwise.
1148*/
1149bool error::operator<(const error& e) const {
1150 return m_begin.m_it < e.m_begin.m_it;
1151}
1152
1153rule::rule() :
1154m_expr(nullptr),
1155m_parse_proc(nullptr) {}
1156
1157/** character terminal constructor.
1158 @param c character.
1159*/
1160rule::rule(char c) :
1161m_expr(new _char(c)),
1162m_parse_proc(nullptr) {}
1163
1164/** null-terminated string terminal constructor.
1165 @param s null-terminated string.
1166*/
1167rule::rule(const char* s) :
1168m_expr(new _string(s)),
1169m_parse_proc(nullptr) {}
1170
1171/** constructor from expression.
1172 @param e expression.
1173*/
1174rule::rule(const expr& e) :
1175m_expr(_private::get_expr(e)),
1176m_parse_proc(nullptr) {}
1177
1178
1179/** constructor from rule.
1180 @param r rule.
1181*/
1182rule::rule(rule& r) :
1183m_expr(new _ref(r)),
1184m_parse_proc(nullptr) {}
1185
1186rule& rule::operator=(rule& r) {
1187 m_expr = new _ref(r);
1188 return *this;
1189}
1190
1191rule &rule::operator=(const expr& e) {
1192 m_expr = _private::get_expr(e);
1193 return *this;
1194}
1195
1196/** invalid constructor from rule (required by gcc).
1197 @exception std::logic_error always thrown.
1198*/
1199rule::rule(const rule&) {
1200 throw std::logic_error("invalid operation");
1201}
1202
1203
1204/** deletes the internal object that represents the expression.
1205*/
1206rule::~rule() {
1207 delete m_expr;
1208}
1209
1210
1211/** creates a zero-or-more loop out of this rule.
1212 @return a zero-or-more loop rule.
1213*/
1214expr rule::operator*() {
1215 return _private::construct_expr(new _loop0(new _ref(*this)));
1216}
1217
1218
1219/** creates a one-or-more loop out of this rule.
1220 @return a one-or-more loop rule.
1221*/
1222expr rule::operator+() {
1223 return _private::construct_expr(new _loop1(new _ref(*this)));
1224}
1225
1226
1227/** creates an optional out of this rule.
1228 @return an optional rule.
1229*/
1230expr rule::operator-() {
1231 return _private::construct_expr(new _optional(new _ref(*this)));
1232}
1233
1234
1235/** creates an AND-expression out of this rule.
1236 @return an AND-expression out of this rule.
1237*/
1238expr rule::operator&() {
1239 return _private::construct_expr(new _and(new _ref(*this)));
1240}
1241
1242
1243/** creates a NOT-expression out of this rule.
1244 @return a NOT-expression out of this rule.
1245*/
1246expr rule::operator!() {
1247 return _private::construct_expr(new _not(new _ref(*this)));
1248}
1249
1250
1251/** sets the parse procedure.
1252 @param p procedure.
1253*/
1254void rule::set_parse_proc(parse_proc p) {
1255 assert(p);
1256 m_parse_proc = p;
1257}
1258
1259
1260/** creates a sequence of expressions.
1261 @param left left operand.
1262 @param right right operand.
1263 @return an expression which parses a sequence.
1264*/
1265expr operator >> (const expr& left, const expr& right) {
1266 return _private::construct_expr(
1267 new _seq(_private::get_expr(left), _private::get_expr(right)));
1268}
1269
1270
1271/** creates a choice of expressions.
1272 @param left left operand.
1273 @param right right operand.
1274 @return an expression which parses a choice.
1275*/
1276expr operator | (const expr& left, const expr& right) {
1277 return _private::construct_expr(
1278 new _choice(_private::get_expr(left), _private::get_expr(right)));
1279}
1280
1281
1282/** converts a parser expression into a terminal.
1283 @param e expression.
1284 @return an expression which parses a terminal.
1285*/
1286expr term(const expr& e) {
1287 return _private::construct_expr(
1288 new _term(_private::get_expr(e)));
1289}
1290
1291
1292/** creates a set expression from a null-terminated string.
1293 @param s null-terminated string with characters of the set.
1294 @return an expression which parses a single character out of a set.
1295*/
1296expr set(const char* s) {
1297 return _private::construct_expr(new _set(s));
1298}
1299
1300
1301/** creates a range expression.
1302 @param min min character.
1303 @param max max character.
1304 @return an expression which parses a single character out of range.
1305*/
1306expr range(int min, int max) {
1307 return _private::construct_expr(new _set(min, max));
1308}
1309
1310
1311/** creates an expression which increments the line counter
1312 and resets the column counter when the given expression
1313 is parsed successfully; used for newline characters.
1314 @param e expression to wrap into a newline parser.
1315 @return an expression that handles newlines.
1316*/
1317expr nl(const expr& e) {
1318 return _private::construct_expr(new _nl(_private::get_expr(e)));
1319}
1320
1321
1322/** creates an expression which tests for the end of input.
1323 @return an expression that handles the end of input.
1324*/
1325expr eof() {
1326 return _private::construct_expr(new _eof());
1327}
1328
1329
1330/** creates a not expression.
1331 @param e expression.
1332 @return the appropriate expression.
1333*/
1334expr not_(const expr& e) {
1335 return !e;
1336}
1337
1338
1339/** creates an and expression.
1340 @param e expression.
1341 @return the appropriate expression.
1342*/
1343expr and_(const expr& e) {
1344 return &e;
1345}
1346
1347
1348/** creates an expression that parses any character.
1349 @return the appropriate expression.
1350*/
1351expr any() {
1352 return _private::construct_expr(new _any());
1353}
1354
1355
1356/** parsing succeeds without consuming any input.
1357*/
1358expr true_() {
1359 return _private::construct_expr(new _true());
1360}
1361
1362
1363/** parsing fails without consuming any input.
1364*/
1365expr false_() {
1366 return _private::construct_expr(new _false());
1367}
1368
1369
1370/** parse with target expression and let user handle result.
1371*/
1372expr user(const expr& e, const user_handler& handler) {
1373 return _private::construct_expr(new _user(_private::get_expr(e), handler));
1374}
1375
1376
1377/** parses the given input.
1378 The parse procedures of each rule parsed are executed
1379 before this function returns, if parsing succeeds.
1380 @param i input.
1381 @param g root rule of grammar.
1382 @param el list of errors.
1383 @param d user data, passed to the parse procedures.
1384 @return true on parsing success, false on failure.
1385*/
1386bool parse(input& i, rule& g, error_list& el, void* d, void* ud) {
1387 //prepare context
1388 _context con(i, ud);
1389
1390 //parse grammar
1391 if (!con.parse_non_term(g)) {
1392 el.push_back(_syntax_error(con));
1393 return false;
1394 }
1395
1396 //if end is not reached, there was an error
1397 if (!con.end()) {
1398 if (con.m_error_pos.m_it < con.m_end) {
1399 el.push_back(_syntax_error(con));
1400 } else {
1401 el.push_back(_eof_error(con));
1402 }
1403 return false;
1404 }
1405
1406 //success; execute the parse procedures
1407 con.do_parse_procs(d);
1408 return true;
1409}
1410
1411
1412} //namespace parserlib