From 525591758ce178e44da6aa3a11d557fd75b232e7 Mon Sep 17 00:00:00 2001 From: Li Jin Date: Mon, 5 Mar 2018 14:18:35 +0800 Subject: refactoring some codes. --- MoonParser.xcodeproj/project.pbxproj | 14 +++++- MoonParser/ast.cpp | 29 +++++++++++ MoonParser/ast.hpp | 98 ++++++++++++++++++++++-------------- MoonParser/moon_ast.cpp | 38 +++++++++----- MoonParser/moon_ast.h | 21 +++----- MoonParser/moon_parser.cpp | 2 +- MoonParser/parser.cpp | 96 +++++++++++++---------------------- MoonParser/parser.hpp | 39 +++++++------- 8 files changed, 185 insertions(+), 152 deletions(-) diff --git a/MoonParser.xcodeproj/project.pbxproj b/MoonParser.xcodeproj/project.pbxproj index fe4dd22..1216f9c 100644 --- a/MoonParser.xcodeproj/project.pbxproj +++ b/MoonParser.xcodeproj/project.pbxproj @@ -107,7 +107,7 @@ 3C0F0F641EF3781E000EADDB /* Project object */ = { isa = PBXProject; attributes = { - LastUpgradeCheck = 0820; + LastUpgradeCheck = 0920; ORGANIZATIONNAME = "Li Jin"; TargetAttributes = { 3C0F0F6B1EF3781E000EADDB = { @@ -157,7 +157,9 @@ CLANG_CXX_LIBRARY = "libc++"; CLANG_ENABLE_MODULES = YES; CLANG_ENABLE_OBJC_ARC = YES; + CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES; CLANG_WARN_BOOL_CONVERSION = YES; + CLANG_WARN_COMMA = YES; CLANG_WARN_CONSTANT_CONVERSION = YES; CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR; CLANG_WARN_DOCUMENTATION_COMMENTS = YES; @@ -165,7 +167,11 @@ CLANG_WARN_ENUM_CONVERSION = YES; CLANG_WARN_INFINITE_RECURSION = YES; CLANG_WARN_INT_CONVERSION = YES; + CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES; + CLANG_WARN_OBJC_LITERAL_CONVERSION = YES; CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR; + CLANG_WARN_RANGE_LOOP_ANALYSIS = YES; + CLANG_WARN_STRICT_PROTOTYPES = YES; CLANG_WARN_SUSPICIOUS_MOVE = YES; CLANG_WARN_UNREACHABLE_CODE = YES; CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; @@ -204,7 +210,9 @@ CLANG_CXX_LIBRARY = "libc++"; CLANG_ENABLE_MODULES = YES; CLANG_ENABLE_OBJC_ARC = YES; + CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES; CLANG_WARN_BOOL_CONVERSION = YES; + CLANG_WARN_COMMA = YES; CLANG_WARN_CONSTANT_CONVERSION = YES; CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR; CLANG_WARN_DOCUMENTATION_COMMENTS = YES; @@ -212,7 +220,11 @@ CLANG_WARN_ENUM_CONVERSION = YES; CLANG_WARN_INFINITE_RECURSION = YES; CLANG_WARN_INT_CONVERSION = YES; + CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES; + CLANG_WARN_OBJC_LITERAL_CONVERSION = YES; CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR; + CLANG_WARN_RANGE_LOOP_ANALYSIS = YES; + CLANG_WARN_STRICT_PROTOTYPES = YES; CLANG_WARN_SUSPICIOUS_MOVE = YES; CLANG_WARN_UNREACHABLE_CODE = YES; CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; diff --git a/MoonParser/ast.cpp b/MoonParser/ast.cpp index 090f5ee..739e02c 100644 --- a/MoonParser/ast.cpp +++ b/MoonParser/ast.cpp @@ -11,6 +11,13 @@ static ast_container *_current = 0; int ast_type_id = 0; +bool ast_node::visit(const std::function& begin, + const std::function& end) +{ + return begin(this) || end(this); +} + + /** sets the container under construction to be this. */ ast_container::ast_container() { @@ -41,6 +48,28 @@ void ast_container::construct(ast_stack &st) { } } +bool ast_container::visit(const std::function& begin, + const std::function& end) +{ + bool result = begin(this); + if (result) return true; + const auto& members = this->members(); + for (auto member : members) { + if (_ast_ptr* ptr = ast_cast<_ast_ptr>(member)) { + if (ptr->get() && ptr->get()->visit(begin, end)) { + return true; + } + } else if (_ast_list* list = ast_cast<_ast_list>(member)) { + for (auto obj : list->objects()) { + if (obj->visit(begin, end)) { + return true; + } + } + } + } + return end(this); +} + //register the AST member to the current container. void ast_member::_init() { diff --git a/MoonParser/ast.hpp b/MoonParser/ast.hpp index 955cdc0..1d05779 100644 --- a/MoonParser/ast.hpp +++ b/MoonParser/ast.hpp @@ -36,12 +36,12 @@ int ast_type() class ast_node : public input_range { public: ///constructor. - ast_node() : m_parent(0) {} + ast_node() : m_parent(nullptr) {} /** copy constructor. @param n source object. */ - ast_node(const ast_node &n) : m_parent(0) {} + ast_node(const ast_node &n) : m_parent(nullptr) {} ///destructor. virtual ~ast_node() {} @@ -64,9 +64,11 @@ public: virtual void construct(ast_stack &st) {} /** interface for visiting AST tree use. - @param user_data vector for storing user data. */ - virtual void visit(void* user_data) {} + virtual bool visit(const std::function& begin, + const std::function& end); + + virtual const char* getName() const { return "ast_node"; } virtual int get_type() { return ast_type(); } private: @@ -96,7 +98,6 @@ bool ast_is(ast_node* node) { class ast_member; - /** type of ast member vector. */ typedef std::vector ast_member_vector; @@ -137,8 +138,12 @@ public: from a node stack. @param st stack. */ - virtual void construct(ast_stack &st); + virtual void construct(ast_stack &st) override; + virtual bool visit(const std::function& begin, + const std::function& end) override; + + virtual const char* getName() const override { return "ast_container"; } private: ast_member_vector m_members; @@ -177,6 +182,7 @@ public: */ virtual void construct(ast_stack &st) = 0; + virtual int get_type() { return ast_type(); } private: //the container this belongs to. ast_container *m_container; @@ -185,6 +191,25 @@ private: void _init(); }; +template +T* ast_cast(ast_member *member) { + return member && ast_type() == member->get_type() ? static_cast(member) : nullptr; +} + +class _ast_ptr : public ast_member { +public: + _ast_ptr(ast_node *node): m_ptr(node) {} + + ast_node* get() const { + return m_ptr; + } + + virtual int get_type() override { + return ast_type<_ast_ptr>(); + } +protected: + ast_node *m_ptr; +}; /** pointer to an AST object. It assumes ownership of the object. @@ -192,12 +217,12 @@ private: @tparam T type of object to control. @tparam OPT if true, the object becomes optional. */ -template class ast_ptr : public ast_member { +template class ast_ptr : public _ast_ptr { public: /** the default constructor. @param obj object. */ - ast_ptr(T *obj = 0) : m_ptr(obj) { + ast_ptr(T *obj = nullptr) : _ast_ptr(obj) { _set_parent(); } @@ -206,7 +231,7 @@ public: @param src source object. */ ast_ptr(const ast_ptr &src) : - m_ptr(src.m_ptr ? new T(*src.m_ptr) : 0) + _ast_ptr(src.m_ptr ? new T(*src.m_ptr) : nullptr) { _set_parent(); } @@ -224,7 +249,7 @@ public: */ ast_ptr &operator = (const T *obj) { delete m_ptr; - m_ptr = obj ? new T(*obj) : 0; + m_ptr = obj ? new T(*obj) : nullptr; _set_parent(); return *this; } @@ -236,7 +261,7 @@ public: */ ast_ptr &operator = (const ast_ptr &src) { delete m_ptr; - m_ptr = src.m_ptr ? new T(*src.m_ptr) : 0; + m_ptr = src.m_ptr ? new T(*src.m_ptr) : nullptr; _set_parent(); return *this; } @@ -245,14 +270,14 @@ public: @return the underlying ptr value. */ T *get() const { - return m_ptr; + return static_cast(m_ptr); } /** auto conversion to the underlying object ptr. @return the underlying ptr value. */ operator T *() const { - return m_ptr; + return static_cast(m_ptr); } /** member access. @@ -299,25 +324,21 @@ public: m_ptr = obj; _set_parent(); } - private: - //ptr - T *m_ptr; - //set parent of object void _set_parent() { if (m_ptr) m_ptr->m_parent = container(); } }; -template class ast_choice : public ast_member { +template class ast_choice : public _ast_ptr { public: - ast_choice(ast_node *obj = 0) : m_ptr(obj) { + ast_choice(ast_node *obj = nullptr) : _ast_ptr(obj) { _set_parent(); } ast_choice(const ast_choice &src) : - m_ptr(src.m_ptr ? new ast_node(*src.m_ptr) : 0) + _ast_ptr(src.m_ptr ? new ast_node(*src.m_ptr) : nullptr) { _set_parent(); } @@ -328,22 +349,18 @@ public: ast_choice &operator = (const ast_node *obj) { delete m_ptr; - m_ptr = obj ? new ast_node(*obj) : 0; + m_ptr = obj ? new ast_node(*obj) : nullptr; _set_parent(); return *this; } ast_choice &operator = (const ast_choice &src) { delete m_ptr; - m_ptr = src.m_ptr ? new ast_node(*src.m_ptr) : 0; + m_ptr = src.m_ptr ? new ast_node(*src.m_ptr) : nullptr; _set_parent(); return *this; } - ast_node *get() const { - return m_ptr; - } - operator ast_node *() const { return m_ptr; } @@ -372,26 +389,33 @@ public: m_ptr = obj; _set_parent(); } - private: - //ptr - ast_node *m_ptr; - void _set_parent() { if (m_ptr) m_ptr->m_parent = container(); } }; +class _ast_list : public ast_member { +public: + ///list type. + typedef std::list container; + + virtual int get_type() override { return ast_type<_ast_list>(); } + + const container &objects() const { + return m_objects; + } +protected: + container m_objects; +}; + /** A list of objects. It pops objects of the given type from the ast stack, until no more objects can be popped. It assumes ownership of objects. @tparam T type of object to control. */ -template class ast_list : public ast_member { +template class ast_list : public _ast_list { public: - ///list type. - typedef std::list container; - ///the default constructor. ast_list() {} @@ -430,7 +454,7 @@ public: /** Pops objects of type T from the stack until no more objects can be popped. @param st stack. */ - virtual void construct(ast_stack &st) { + virtual void construct(ast_stack &st) override { for(;;) { //if the stack is empty if (st.empty()) break; @@ -455,11 +479,7 @@ public: obj->m_parent = ast_member::container(); } } - private: - //objects - container m_objects; - //deletes the objects of this list. void _clear() { while (!m_objects.empty()) { diff --git a/MoonParser/moon_ast.cpp b/MoonParser/moon_ast.cpp index d8a0db9..9627eab 100644 --- a/MoonParser/moon_ast.cpp +++ b/MoonParser/moon_ast.cpp @@ -1,5 +1,4 @@ #include -#include #include #include #include @@ -7,28 +6,24 @@ #include #include "moon_ast.h" -std::string& trim(std::string& s) +input& trim(input& s) { - s.erase(s.begin(), std::find_if(s.begin(), s.end(), [](unsigned char ch) + s.erase(s.begin(), std::find_if(s.begin(), s.end(), [](input::value_type ch) { return !std::isspace(ch); })); - s.erase(std::find_if(s.rbegin(), s.rend(), [](unsigned char ch) + s.erase(std::find_if(s.rbegin(), s.rend(), [](input::value_type ch) { return !std::isspace(ch); }).base(), s.end()); return s; } -const std::string& AstLeaf::getValue() +const input& AstLeaf::getValue() { if (_value.empty()) { - for (auto it = m_begin.m_it; it != m_end.m_it; ++it) - { - char ch = static_cast(*it); - _value.append(&ch, 1); - } + _value.assign(m_begin.m_it, m_end.m_it); return trim(_value); } return _value; @@ -151,9 +146,11 @@ AST_IMPL(BlockEnd) int main() { - std::wstring_convert>, char32_t> conv; - std::string s = R"TestCodesHere()TestCodesHere"; - input i = conv.from_bytes(s); + std::string s = R"TestCodesHere( +thing = { var: 10, hello: "world", func: => @var } +import hello, \func from thing +)TestCodesHere"; + input i = Converter{}.from_bytes(s); error_list el; BlockEnd_t* root = nullptr; @@ -161,6 +158,21 @@ int main() if (parse(i, BlockEnd, el, root, &st)) { std::cout << "matched!\n"; + root->visit([](ast_node* node) + { + if (std::string("Seperator") != node->getName()) + { + std::cout << "{" << node->getName(); + } + return false; + }, [](ast_node* node) + { + if (std::string("Seperator") != node->getName()) + { + std::cout << "}" ; + } + return false; + }); } else { diff --git a/MoonParser/moon_ast.h b/MoonParser/moon_ast.h index 7a0e805..4511b1b 100644 --- a/MoonParser/moon_ast.h +++ b/MoonParser/moon_ast.h @@ -2,23 +2,14 @@ #include "moon_parser.h" -template -struct deletable_facet : Facet -{ - template - deletable_facet(Args&& ...args): Facet(std::forward(args)...) {} - ~deletable_facet() {} -}; -typedef std::wstring_convert>, char32_t> Converter; - -std::string& trim(std::string& s); +input& trim(input& s); class AstLeaf : public ast_node { public: - const std::string& getValue(); + const input& getValue(); private: - std::string _value; + input _value; }; #define AST_LEAF(type) \ @@ -26,14 +17,16 @@ extern rule type; \ class type##_t : public AstLeaf \ { \ public: \ - virtual int get_type() override { return ast_type(); } + virtual int get_type() override { return ast_type(); } \ + virtual const char* getName() const override { return #type; } #define AST_NODE(type) \ extern rule type; \ class type##_t : public ast_container \ { \ public: \ - virtual int get_type() override { return ast_type(); } + virtual int get_type() override { return ast_type(); } \ + virtual const char* getName() const override { return #type; } #define AST_END(type) \ }; diff --git a/MoonParser/moon_parser.cpp b/MoonParser/moon_parser.cpp index 3069659..7731513 100644 --- a/MoonParser/moon_parser.cpp +++ b/MoonParser/moon_parser.cpp @@ -1,8 +1,8 @@ #include "moon_parser.h" -rule Any = any(); rule plain_space = *set(" \t"); rule Break = nl(-expr('\r') >> '\n'); +rule Any = Break | any(); rule White = *(set(" \t") | Break); rule Stop = Break | eof(); rule Comment = "--" >> *(not_(set("\r\n")) >> Any) >> and_(Stop); diff --git a/MoonParser/parser.cpp b/MoonParser/parser.cpp index 6778368..2378abe 100644 --- a/MoonParser/parser.cpp +++ b/MoonParser/parser.cpp @@ -3,6 +3,8 @@ #include #include #include +#include + #include "parser.hpp" @@ -136,7 +138,7 @@ public: } //get the current symbol - int symbol() const { + input::value_type symbol() const { assert(!end()); return *m_pos.m_it; } @@ -212,7 +214,7 @@ public: class _char : public _expr { public: //constructor. - _char(int c) : + _char(char c) : m_char(c) { } @@ -229,12 +231,12 @@ public: private: //character - int m_char; + input::value_type m_char; //internal parse bool _parse(_context &con) const { if (!con.end()) { - int ch = con.symbol(); + input::value_type ch = con.symbol(); if (ch == m_char) { con.next_col(); return true; @@ -251,13 +253,7 @@ class _string : public _expr { public: //constructor from ansi string. _string(const char *s) : - m_string(s, s + strlen(s)) - { - } - - //constructor from wide string. - _string(const wchar_t *s) : - m_string(s, s + wcslen(s)) + m_string(Converter{}.from_bytes(s)) { } @@ -273,11 +269,11 @@ public: private: //string - std::vector m_string; + input m_string; //parse the string bool _parse(_context &con) const { - for(std::vector::const_iterator it = m_string.begin(), + for(input::const_iterator it = m_string.begin(), end = m_string.end();;) { if (it == end) return true; @@ -297,25 +293,19 @@ class _set : public _expr { public: //constructor from ansi string. _set(const char *s) { - for(; *s; ++s) { - _add(*s); - } - } - - //constructor from wide string. - _set(const wchar_t *s) { - for(; *s; ++s) { - _add(*s); - } + auto str = Converter{}.from_bytes(s); + for (auto ch : str) { + _add(ch); + } } //constructor from range. _set(int min, int max) { assert(min >= 0); assert(min <= max); - m_set.resize((size_t)max + 1U); + m_quick_set.resize((size_t)max + 1U); for(; min <= max; ++min) { - m_set[(size_t)min] = true; + m_quick_set[(size_t)min] = true; } } @@ -331,25 +321,35 @@ public: private: //set is kept as an array of flags, for quick access - std::vector m_set; + std::vector m_quick_set; + std::unordered_set m_large_set; //add character void _add(size_t i) { - if (i >= m_set.size()) { - m_set.resize(i + 1); - } - m_set[i] = true; + if (i <= m_quick_set.size() || i <= 255) { + if (i >= m_quick_set.size()) { + m_quick_set.resize(i + 1); + } + m_quick_set[i] = true; + } else { + m_large_set.insert(i); + } } //internal parse bool _parse(_context &con) const { if (!con.end()) { size_t ch = con.symbol(); - if (ch < m_set.size() && m_set[ch]) { + if (ch < m_quick_set.size()) { + if (m_quick_set[ch]) { + con.next_col(); + return true; + } + } else if (m_large_set.find(ch) != m_large_set.end()) { con.next_col(); return true; } - } + } con.set_error_pos(); return false; } @@ -1074,7 +1074,7 @@ pos::pos(input &i) : /** character terminal constructor. @param c character. */ -expr::expr(int c) : +expr::expr(char c) : m_expr(new _char(c)) { } @@ -1089,15 +1089,6 @@ expr::expr(const char *s) : } -/** null-terminated wide string terminal constructor. - @param s null-terminated string. - */ -expr::expr(const wchar_t *s) : - m_expr(new _string(s)) -{ -} - - /** rule reference constructor. @param r rule. */ @@ -1182,7 +1173,7 @@ bool error::operator < (const error &e) const { /** character terminal constructor. @param c character. */ -rule::rule(int c) : +rule::rule(char c) : m_expr(new _char(c)) { m_parse_proc = _get_parse_proc(this); @@ -1199,16 +1190,6 @@ rule::rule(const char *s) : } -/** null-terminated wide string terminal constructor. - @param s null-terminated string. - */ -rule::rule(const wchar_t *s) : - m_expr(new _string(s)) -{ - m_parse_proc = _get_parse_proc(this); -} - - /** constructor from expression. @param e expression. */ @@ -1338,15 +1319,6 @@ expr set(const char *s) { } -/** creates a set expression from a null-terminated wide string. - @param s null-terminated string with characters of the set. - @return an expression which parses a single character out of a set. - */ -expr set(const wchar_t *s) { - return _private::construct_expr(new _set(s)); -} - - /** creates a range expression. @param min min character. @param max max character. diff --git a/MoonParser/parser.hpp b/MoonParser/parser.hpp index b280155..66d6067 100644 --- a/MoonParser/parser.hpp +++ b/MoonParser/parser.hpp @@ -13,8 +13,23 @@ #include #include #include +#include +#include +///type of the parser's input. +typedef std::basic_string input; +typedef input::iterator input_it; + +template +struct deletable_facet : Facet +{ + template + deletable_facet(Args&& ...args): Facet(std::forward(args)...) {} + ~deletable_facet() {} +}; +typedef std::wstring_convert>, input::value_type> Converter; + namespace parserlib { @@ -24,9 +39,6 @@ class _context; class rule; -///type of the parser's input. -typedef std::u32string input; -typedef input::iterator input_it; struct item_t { input_it begin; @@ -65,18 +77,13 @@ public: /** character terminal constructor. @param c character. */ - expr(int c); + expr(char c); /** null-terminated string terminal constructor. @param s null-terminated string. */ expr(const char *s); - /** null-terminated wide string terminal constructor. - @param s null-terminated string. - */ - expr(const wchar_t *s); - /** rule reference constructor. @param r rule. */ @@ -195,18 +202,13 @@ public: /** character terminal constructor. @param c character. */ - rule(int c); + rule(char c); /** null-terminated string terminal constructor. @param s null-terminated string. */ rule(const char *s); - /** null-terminated wide string terminal constructor. - @param s null-terminated string. - */ - rule(const wchar_t *s); - /** constructor from expression. @param e expression. */ @@ -330,13 +332,6 @@ expr term(const expr &e); expr set(const char *s); -/** creates a set expression from a null-terminated wide string. - @param s null-terminated string with characters of the set. - @return an expression which parses a single character out of a set. - */ -expr set(const wchar_t *s); - - /** creates a range expression. @param min min character. @param max max character. -- cgit v1.2.3-55-g6feb