diff options
Diffstat (limited to 'MoonParser')
-rw-r--r-- | MoonParser/ast.cpp | 29 | ||||
-rw-r--r-- | MoonParser/ast.hpp | 98 | ||||
-rw-r--r-- | MoonParser/moon_ast.cpp | 38 | ||||
-rw-r--r-- | MoonParser/moon_ast.h | 21 | ||||
-rw-r--r-- | MoonParser/moon_parser.cpp | 2 | ||||
-rw-r--r-- | MoonParser/parser.cpp | 96 | ||||
-rw-r--r-- | MoonParser/parser.hpp | 39 |
7 files changed, 172 insertions, 151 deletions
diff --git a/MoonParser/ast.cpp b/MoonParser/ast.cpp index 090f5ee..739e02c 100644 --- a/MoonParser/ast.cpp +++ b/MoonParser/ast.cpp | |||
@@ -11,6 +11,13 @@ static ast_container *_current = 0; | |||
11 | int ast_type_id = 0; | 11 | int ast_type_id = 0; |
12 | 12 | ||
13 | 13 | ||
14 | bool ast_node::visit(const std::function<bool (ast_node*)>& begin, | ||
15 | const std::function<bool (ast_node*)>& end) | ||
16 | { | ||
17 | return begin(this) || end(this); | ||
18 | } | ||
19 | |||
20 | |||
14 | /** sets the container under construction to be this. | 21 | /** sets the container under construction to be this. |
15 | */ | 22 | */ |
16 | ast_container::ast_container() { | 23 | ast_container::ast_container() { |
@@ -41,6 +48,28 @@ void ast_container::construct(ast_stack &st) { | |||
41 | } | 48 | } |
42 | } | 49 | } |
43 | 50 | ||
51 | bool ast_container::visit(const std::function<bool (ast_node*)>& begin, | ||
52 | const std::function<bool (ast_node*)>& end) | ||
53 | { | ||
54 | bool result = begin(this); | ||
55 | if (result) return true; | ||
56 | const auto& members = this->members(); | ||
57 | for (auto member : members) { | ||
58 | if (_ast_ptr* ptr = ast_cast<_ast_ptr>(member)) { | ||
59 | if (ptr->get() && ptr->get()->visit(begin, end)) { | ||
60 | return true; | ||
61 | } | ||
62 | } else if (_ast_list* list = ast_cast<_ast_list>(member)) { | ||
63 | for (auto obj : list->objects()) { | ||
64 | if (obj->visit(begin, end)) { | ||
65 | return true; | ||
66 | } | ||
67 | } | ||
68 | } | ||
69 | } | ||
70 | return end(this); | ||
71 | } | ||
72 | |||
44 | 73 | ||
45 | //register the AST member to the current container. | 74 | //register the AST member to the current container. |
46 | void ast_member::_init() { | 75 | void ast_member::_init() { |
diff --git a/MoonParser/ast.hpp b/MoonParser/ast.hpp index 955cdc0..1d05779 100644 --- a/MoonParser/ast.hpp +++ b/MoonParser/ast.hpp | |||
@@ -36,12 +36,12 @@ int ast_type() | |||
36 | class ast_node : public input_range { | 36 | class ast_node : public input_range { |
37 | public: | 37 | public: |
38 | ///constructor. | 38 | ///constructor. |
39 | ast_node() : m_parent(0) {} | 39 | ast_node() : m_parent(nullptr) {} |
40 | 40 | ||
41 | /** copy constructor. | 41 | /** copy constructor. |
42 | @param n source object. | 42 | @param n source object. |
43 | */ | 43 | */ |
44 | ast_node(const ast_node &n) : m_parent(0) {} | 44 | ast_node(const ast_node &n) : m_parent(nullptr) {} |
45 | 45 | ||
46 | ///destructor. | 46 | ///destructor. |
47 | virtual ~ast_node() {} | 47 | virtual ~ast_node() {} |
@@ -64,9 +64,11 @@ public: | |||
64 | virtual void construct(ast_stack &st) {} | 64 | virtual void construct(ast_stack &st) {} |
65 | 65 | ||
66 | /** interface for visiting AST tree use. | 66 | /** interface for visiting AST tree use. |
67 | @param user_data vector for storing user data. | ||
68 | */ | 67 | */ |
69 | virtual void visit(void* user_data) {} | 68 | virtual bool visit(const std::function<bool (ast_node*)>& begin, |
69 | const std::function<bool (ast_node*)>& end); | ||
70 | |||
71 | virtual const char* getName() const { return "ast_node"; } | ||
70 | 72 | ||
71 | virtual int get_type() { return ast_type<ast_node>(); } | 73 | virtual int get_type() { return ast_type<ast_node>(); } |
72 | private: | 74 | private: |
@@ -96,7 +98,6 @@ bool ast_is(ast_node* node) { | |||
96 | 98 | ||
97 | class ast_member; | 99 | class ast_member; |
98 | 100 | ||
99 | |||
100 | /** type of ast member vector. | 101 | /** type of ast member vector. |
101 | */ | 102 | */ |
102 | typedef std::vector<ast_member *> ast_member_vector; | 103 | typedef std::vector<ast_member *> ast_member_vector; |
@@ -137,8 +138,12 @@ public: | |||
137 | from a node stack. | 138 | from a node stack. |
138 | @param st stack. | 139 | @param st stack. |
139 | */ | 140 | */ |
140 | virtual void construct(ast_stack &st); | 141 | virtual void construct(ast_stack &st) override; |
141 | 142 | ||
143 | virtual bool visit(const std::function<bool (ast_node*)>& begin, | ||
144 | const std::function<bool (ast_node*)>& end) override; | ||
145 | |||
146 | virtual const char* getName() const override { return "ast_container"; } | ||
142 | private: | 147 | private: |
143 | ast_member_vector m_members; | 148 | ast_member_vector m_members; |
144 | 149 | ||
@@ -177,6 +182,7 @@ public: | |||
177 | */ | 182 | */ |
178 | virtual void construct(ast_stack &st) = 0; | 183 | virtual void construct(ast_stack &st) = 0; |
179 | 184 | ||
185 | virtual int get_type() { return ast_type<ast_member>(); } | ||
180 | private: | 186 | private: |
181 | //the container this belongs to. | 187 | //the container this belongs to. |
182 | ast_container *m_container; | 188 | ast_container *m_container; |
@@ -185,6 +191,25 @@ private: | |||
185 | void _init(); | 191 | void _init(); |
186 | }; | 192 | }; |
187 | 193 | ||
194 | template<class T> | ||
195 | T* ast_cast(ast_member *member) { | ||
196 | return member && ast_type<T>() == member->get_type() ? static_cast<T*>(member) : nullptr; | ||
197 | } | ||
198 | |||
199 | class _ast_ptr : public ast_member { | ||
200 | public: | ||
201 | _ast_ptr(ast_node *node): m_ptr(node) {} | ||
202 | |||
203 | ast_node* get() const { | ||
204 | return m_ptr; | ||
205 | } | ||
206 | |||
207 | virtual int get_type() override { | ||
208 | return ast_type<_ast_ptr>(); | ||
209 | } | ||
210 | protected: | ||
211 | ast_node *m_ptr; | ||
212 | }; | ||
188 | 213 | ||
189 | /** pointer to an AST object. | 214 | /** pointer to an AST object. |
190 | It assumes ownership of the object. | 215 | It assumes ownership of the object. |
@@ -192,12 +217,12 @@ private: | |||
192 | @tparam T type of object to control. | 217 | @tparam T type of object to control. |
193 | @tparam OPT if true, the object becomes optional. | 218 | @tparam OPT if true, the object becomes optional. |
194 | */ | 219 | */ |
195 | template <class T, bool OPT = false> class ast_ptr : public ast_member { | 220 | template <class T, bool OPT = false> class ast_ptr : public _ast_ptr { |
196 | public: | 221 | public: |
197 | /** the default constructor. | 222 | /** the default constructor. |
198 | @param obj object. | 223 | @param obj object. |
199 | */ | 224 | */ |
200 | ast_ptr(T *obj = 0) : m_ptr(obj) { | 225 | ast_ptr(T *obj = nullptr) : _ast_ptr(obj) { |
201 | _set_parent(); | 226 | _set_parent(); |
202 | } | 227 | } |
203 | 228 | ||
@@ -206,7 +231,7 @@ public: | |||
206 | @param src source object. | 231 | @param src source object. |
207 | */ | 232 | */ |
208 | ast_ptr(const ast_ptr<T, OPT> &src) : | 233 | ast_ptr(const ast_ptr<T, OPT> &src) : |
209 | m_ptr(src.m_ptr ? new T(*src.m_ptr) : 0) | 234 | _ast_ptr(src.m_ptr ? new T(*src.m_ptr) : nullptr) |
210 | { | 235 | { |
211 | _set_parent(); | 236 | _set_parent(); |
212 | } | 237 | } |
@@ -224,7 +249,7 @@ public: | |||
224 | */ | 249 | */ |
225 | ast_ptr<T, OPT> &operator = (const T *obj) { | 250 | ast_ptr<T, OPT> &operator = (const T *obj) { |
226 | delete m_ptr; | 251 | delete m_ptr; |
227 | m_ptr = obj ? new T(*obj) : 0; | 252 | m_ptr = obj ? new T(*obj) : nullptr; |
228 | _set_parent(); | 253 | _set_parent(); |
229 | return *this; | 254 | return *this; |
230 | } | 255 | } |
@@ -236,7 +261,7 @@ public: | |||
236 | */ | 261 | */ |
237 | ast_ptr<T, OPT> &operator = (const ast_ptr<T, OPT> &src) { | 262 | ast_ptr<T, OPT> &operator = (const ast_ptr<T, OPT> &src) { |
238 | delete m_ptr; | 263 | delete m_ptr; |
239 | m_ptr = src.m_ptr ? new T(*src.m_ptr) : 0; | 264 | m_ptr = src.m_ptr ? new T(*src.m_ptr) : nullptr; |
240 | _set_parent(); | 265 | _set_parent(); |
241 | return *this; | 266 | return *this; |
242 | } | 267 | } |
@@ -245,14 +270,14 @@ public: | |||
245 | @return the underlying ptr value. | 270 | @return the underlying ptr value. |
246 | */ | 271 | */ |
247 | T *get() const { | 272 | T *get() const { |
248 | return m_ptr; | 273 | return static_cast<T*>(m_ptr); |
249 | } | 274 | } |
250 | 275 | ||
251 | /** auto conversion to the underlying object ptr. | 276 | /** auto conversion to the underlying object ptr. |
252 | @return the underlying ptr value. | 277 | @return the underlying ptr value. |
253 | */ | 278 | */ |
254 | operator T *() const { | 279 | operator T *() const { |
255 | return m_ptr; | 280 | return static_cast<T*>(m_ptr); |
256 | } | 281 | } |
257 | 282 | ||
258 | /** member access. | 283 | /** member access. |
@@ -299,25 +324,21 @@ public: | |||
299 | m_ptr = obj; | 324 | m_ptr = obj; |
300 | _set_parent(); | 325 | _set_parent(); |
301 | } | 326 | } |
302 | |||
303 | private: | 327 | private: |
304 | //ptr | ||
305 | T *m_ptr; | ||
306 | |||
307 | //set parent of object | 328 | //set parent of object |
308 | void _set_parent() { | 329 | void _set_parent() { |
309 | if (m_ptr) m_ptr->m_parent = container(); | 330 | if (m_ptr) m_ptr->m_parent = container(); |
310 | } | 331 | } |
311 | }; | 332 | }; |
312 | 333 | ||
313 | template <class ...Args> class ast_choice : public ast_member { | 334 | template <class ...Args> class ast_choice : public _ast_ptr { |
314 | public: | 335 | public: |
315 | ast_choice(ast_node *obj = 0) : m_ptr(obj) { | 336 | ast_choice(ast_node *obj = nullptr) : _ast_ptr(obj) { |
316 | _set_parent(); | 337 | _set_parent(); |
317 | } | 338 | } |
318 | 339 | ||
319 | ast_choice(const ast_choice<Args...> &src) : | 340 | ast_choice(const ast_choice<Args...> &src) : |
320 | m_ptr(src.m_ptr ? new ast_node(*src.m_ptr) : 0) | 341 | _ast_ptr(src.m_ptr ? new ast_node(*src.m_ptr) : nullptr) |
321 | { | 342 | { |
322 | _set_parent(); | 343 | _set_parent(); |
323 | } | 344 | } |
@@ -328,22 +349,18 @@ public: | |||
328 | 349 | ||
329 | ast_choice<Args...> &operator = (const ast_node *obj) { | 350 | ast_choice<Args...> &operator = (const ast_node *obj) { |
330 | delete m_ptr; | 351 | delete m_ptr; |
331 | m_ptr = obj ? new ast_node(*obj) : 0; | 352 | m_ptr = obj ? new ast_node(*obj) : nullptr; |
332 | _set_parent(); | 353 | _set_parent(); |
333 | return *this; | 354 | return *this; |
334 | } | 355 | } |
335 | 356 | ||
336 | ast_choice<Args...> &operator = (const ast_choice<Args...> &src) { | 357 | ast_choice<Args...> &operator = (const ast_choice<Args...> &src) { |
337 | delete m_ptr; | 358 | delete m_ptr; |
338 | m_ptr = src.m_ptr ? new ast_node(*src.m_ptr) : 0; | 359 | m_ptr = src.m_ptr ? new ast_node(*src.m_ptr) : nullptr; |
339 | _set_parent(); | 360 | _set_parent(); |
340 | return *this; | 361 | return *this; |
341 | } | 362 | } |
342 | 363 | ||
343 | ast_node *get() const { | ||
344 | return m_ptr; | ||
345 | } | ||
346 | |||
347 | operator ast_node *() const { | 364 | operator ast_node *() const { |
348 | return m_ptr; | 365 | return m_ptr; |
349 | } | 366 | } |
@@ -372,26 +389,33 @@ public: | |||
372 | m_ptr = obj; | 389 | m_ptr = obj; |
373 | _set_parent(); | 390 | _set_parent(); |
374 | } | 391 | } |
375 | |||
376 | private: | 392 | private: |
377 | //ptr | ||
378 | ast_node *m_ptr; | ||
379 | |||
380 | void _set_parent() { | 393 | void _set_parent() { |
381 | if (m_ptr) m_ptr->m_parent = container(); | 394 | if (m_ptr) m_ptr->m_parent = container(); |
382 | } | 395 | } |
383 | }; | 396 | }; |
384 | 397 | ||
398 | class _ast_list : public ast_member { | ||
399 | public: | ||
400 | ///list type. | ||
401 | typedef std::list<ast_node *> container; | ||
402 | |||
403 | virtual int get_type() override { return ast_type<_ast_list>(); } | ||
404 | |||
405 | const container &objects() const { | ||
406 | return m_objects; | ||
407 | } | ||
408 | protected: | ||
409 | container m_objects; | ||
410 | }; | ||
411 | |||
385 | /** A list of objects. | 412 | /** A list of objects. |
386 | It pops objects of the given type from the ast stack, until no more objects can be popped. | 413 | It pops objects of the given type from the ast stack, until no more objects can be popped. |
387 | It assumes ownership of objects. | 414 | It assumes ownership of objects. |
388 | @tparam T type of object to control. | 415 | @tparam T type of object to control. |
389 | */ | 416 | */ |
390 | template <class T> class ast_list : public ast_member { | 417 | template <class T> class ast_list : public _ast_list { |
391 | public: | 418 | public: |
392 | ///list type. | ||
393 | typedef std::list<T *> container; | ||
394 | |||
395 | ///the default constructor. | 419 | ///the default constructor. |
396 | ast_list() {} | 420 | ast_list() {} |
397 | 421 | ||
@@ -430,7 +454,7 @@ public: | |||
430 | /** Pops objects of type T from the stack until no more objects can be popped. | 454 | /** Pops objects of type T from the stack until no more objects can be popped. |
431 | @param st stack. | 455 | @param st stack. |
432 | */ | 456 | */ |
433 | virtual void construct(ast_stack &st) { | 457 | virtual void construct(ast_stack &st) override { |
434 | for(;;) { | 458 | for(;;) { |
435 | //if the stack is empty | 459 | //if the stack is empty |
436 | if (st.empty()) break; | 460 | if (st.empty()) break; |
@@ -455,11 +479,7 @@ public: | |||
455 | obj->m_parent = ast_member::container(); | 479 | obj->m_parent = ast_member::container(); |
456 | } | 480 | } |
457 | } | 481 | } |
458 | |||
459 | private: | 482 | private: |
460 | //objects | ||
461 | container m_objects; | ||
462 | |||
463 | //deletes the objects of this list. | 483 | //deletes the objects of this list. |
464 | void _clear() { | 484 | void _clear() { |
465 | while (!m_objects.empty()) { | 485 | while (!m_objects.empty()) { |
diff --git a/MoonParser/moon_ast.cpp b/MoonParser/moon_ast.cpp index d8a0db9..9627eab 100644 --- a/MoonParser/moon_ast.cpp +++ b/MoonParser/moon_ast.cpp | |||
@@ -1,5 +1,4 @@ | |||
1 | #include <string> | 1 | #include <string> |
2 | #include <codecvt> | ||
3 | #include <unordered_set> | 2 | #include <unordered_set> |
4 | #include <stack> | 3 | #include <stack> |
5 | #include <algorithm> | 4 | #include <algorithm> |
@@ -7,28 +6,24 @@ | |||
7 | #include <vector> | 6 | #include <vector> |
8 | #include "moon_ast.h" | 7 | #include "moon_ast.h" |
9 | 8 | ||
10 | std::string& trim(std::string& s) | 9 | input& trim(input& s) |
11 | { | 10 | { |
12 | s.erase(s.begin(), std::find_if(s.begin(), s.end(), [](unsigned char ch) | 11 | s.erase(s.begin(), std::find_if(s.begin(), s.end(), [](input::value_type ch) |
13 | { | 12 | { |
14 | return !std::isspace(ch); | 13 | return !std::isspace(ch); |
15 | })); | 14 | })); |
16 | s.erase(std::find_if(s.rbegin(), s.rend(), [](unsigned char ch) | 15 | s.erase(std::find_if(s.rbegin(), s.rend(), [](input::value_type ch) |
17 | { | 16 | { |
18 | return !std::isspace(ch); | 17 | return !std::isspace(ch); |
19 | }).base(), s.end()); | 18 | }).base(), s.end()); |
20 | return s; | 19 | return s; |
21 | } | 20 | } |
22 | 21 | ||
23 | const std::string& AstLeaf::getValue() | 22 | const input& AstLeaf::getValue() |
24 | { | 23 | { |
25 | if (_value.empty()) | 24 | if (_value.empty()) |
26 | { | 25 | { |
27 | for (auto it = m_begin.m_it; it != m_end.m_it; ++it) | 26 | _value.assign(m_begin.m_it, m_end.m_it); |
28 | { | ||
29 | char ch = static_cast<char>(*it); | ||
30 | _value.append(&ch, 1); | ||
31 | } | ||
32 | return trim(_value); | 27 | return trim(_value); |
33 | } | 28 | } |
34 | return _value; | 29 | return _value; |
@@ -151,9 +146,11 @@ AST_IMPL(BlockEnd) | |||
151 | 146 | ||
152 | int main() | 147 | int main() |
153 | { | 148 | { |
154 | std::wstring_convert<deletable_facet<std::codecvt<char32_t, char, std::mbstate_t>>, char32_t> conv; | 149 | std::string s = R"TestCodesHere( |
155 | std::string s = R"TestCodesHere()TestCodesHere"; | 150 | thing = { var: 10, hello: "world", func: => @var } |
156 | input i = conv.from_bytes(s); | 151 | import hello, \func from thing |
152 | )TestCodesHere"; | ||
153 | input i = Converter{}.from_bytes(s); | ||
157 | 154 | ||
158 | error_list el; | 155 | error_list el; |
159 | BlockEnd_t* root = nullptr; | 156 | BlockEnd_t* root = nullptr; |
@@ -161,6 +158,21 @@ int main() | |||
161 | if (parse(i, BlockEnd, el, root, &st)) | 158 | if (parse(i, BlockEnd, el, root, &st)) |
162 | { | 159 | { |
163 | std::cout << "matched!\n"; | 160 | std::cout << "matched!\n"; |
161 | root->visit([](ast_node* node) | ||
162 | { | ||
163 | if (std::string("Seperator") != node->getName()) | ||
164 | { | ||
165 | std::cout << "{" << node->getName(); | ||
166 | } | ||
167 | return false; | ||
168 | }, [](ast_node* node) | ||
169 | { | ||
170 | if (std::string("Seperator") != node->getName()) | ||
171 | { | ||
172 | std::cout << "}" ; | ||
173 | } | ||
174 | return false; | ||
175 | }); | ||
164 | } | 176 | } |
165 | else | 177 | else |
166 | { | 178 | { |
diff --git a/MoonParser/moon_ast.h b/MoonParser/moon_ast.h index 7a0e805..4511b1b 100644 --- a/MoonParser/moon_ast.h +++ b/MoonParser/moon_ast.h | |||
@@ -2,23 +2,14 @@ | |||
2 | 2 | ||
3 | #include "moon_parser.h" | 3 | #include "moon_parser.h" |
4 | 4 | ||
5 | template<class Facet> | 5 | input& trim(input& s); |
6 | struct deletable_facet : Facet | ||
7 | { | ||
8 | template<class ...Args> | ||
9 | deletable_facet(Args&& ...args): Facet(std::forward<Args>(args)...) {} | ||
10 | ~deletable_facet() {} | ||
11 | }; | ||
12 | typedef std::wstring_convert<deletable_facet<std::codecvt<char32_t, char, std::mbstate_t>>, char32_t> Converter; | ||
13 | |||
14 | std::string& trim(std::string& s); | ||
15 | 6 | ||
16 | class AstLeaf : public ast_node | 7 | class AstLeaf : public ast_node |
17 | { | 8 | { |
18 | public: | 9 | public: |
19 | const std::string& getValue(); | 10 | const input& getValue(); |
20 | private: | 11 | private: |
21 | std::string _value; | 12 | input _value; |
22 | }; | 13 | }; |
23 | 14 | ||
24 | #define AST_LEAF(type) \ | 15 | #define AST_LEAF(type) \ |
@@ -26,14 +17,16 @@ extern rule type; \ | |||
26 | class type##_t : public AstLeaf \ | 17 | class type##_t : public AstLeaf \ |
27 | { \ | 18 | { \ |
28 | public: \ | 19 | public: \ |
29 | virtual int get_type() override { return ast_type<type##_t>(); } | 20 | virtual int get_type() override { return ast_type<type##_t>(); } \ |
21 | virtual const char* getName() const override { return #type; } | ||
30 | 22 | ||
31 | #define AST_NODE(type) \ | 23 | #define AST_NODE(type) \ |
32 | extern rule type; \ | 24 | extern rule type; \ |
33 | class type##_t : public ast_container \ | 25 | class type##_t : public ast_container \ |
34 | { \ | 26 | { \ |
35 | public: \ | 27 | public: \ |
36 | virtual int get_type() override { return ast_type<type##_t>(); } | 28 | virtual int get_type() override { return ast_type<type##_t>(); } \ |
29 | virtual const char* getName() const override { return #type; } | ||
37 | 30 | ||
38 | #define AST_END(type) \ | 31 | #define AST_END(type) \ |
39 | }; | 32 | }; |
diff --git a/MoonParser/moon_parser.cpp b/MoonParser/moon_parser.cpp index 3069659..7731513 100644 --- a/MoonParser/moon_parser.cpp +++ b/MoonParser/moon_parser.cpp | |||
@@ -1,8 +1,8 @@ | |||
1 | #include "moon_parser.h" | 1 | #include "moon_parser.h" |
2 | 2 | ||
3 | rule Any = any(); | ||
4 | rule plain_space = *set(" \t"); | 3 | rule plain_space = *set(" \t"); |
5 | rule Break = nl(-expr('\r') >> '\n'); | 4 | rule Break = nl(-expr('\r') >> '\n'); |
5 | rule Any = Break | any(); | ||
6 | rule White = *(set(" \t") | Break); | 6 | rule White = *(set(" \t") | Break); |
7 | rule Stop = Break | eof(); | 7 | rule Stop = Break | eof(); |
8 | rule Comment = "--" >> *(not_(set("\r\n")) >> Any) >> and_(Stop); | 8 | rule Comment = "--" >> *(not_(set("\r\n")) >> Any) >> and_(Stop); |
diff --git a/MoonParser/parser.cpp b/MoonParser/parser.cpp index 6778368..2378abe 100644 --- a/MoonParser/parser.cpp +++ b/MoonParser/parser.cpp | |||
@@ -3,6 +3,8 @@ | |||
3 | #include <cassert> | 3 | #include <cassert> |
4 | #include <stdexcept> | 4 | #include <stdexcept> |
5 | #include <unordered_map> | 5 | #include <unordered_map> |
6 | #include <unordered_set> | ||
7 | |||
6 | #include "parser.hpp" | 8 | #include "parser.hpp" |
7 | 9 | ||
8 | 10 | ||
@@ -136,7 +138,7 @@ public: | |||
136 | } | 138 | } |
137 | 139 | ||
138 | //get the current symbol | 140 | //get the current symbol |
139 | int symbol() const { | 141 | input::value_type symbol() const { |
140 | assert(!end()); | 142 | assert(!end()); |
141 | return *m_pos.m_it; | 143 | return *m_pos.m_it; |
142 | } | 144 | } |
@@ -212,7 +214,7 @@ public: | |||
212 | class _char : public _expr { | 214 | class _char : public _expr { |
213 | public: | 215 | public: |
214 | //constructor. | 216 | //constructor. |
215 | _char(int c) : | 217 | _char(char c) : |
216 | m_char(c) | 218 | m_char(c) |
217 | { | 219 | { |
218 | } | 220 | } |
@@ -229,12 +231,12 @@ public: | |||
229 | 231 | ||
230 | private: | 232 | private: |
231 | //character | 233 | //character |
232 | int m_char; | 234 | input::value_type m_char; |
233 | 235 | ||
234 | //internal parse | 236 | //internal parse |
235 | bool _parse(_context &con) const { | 237 | bool _parse(_context &con) const { |
236 | if (!con.end()) { | 238 | if (!con.end()) { |
237 | int ch = con.symbol(); | 239 | input::value_type ch = con.symbol(); |
238 | if (ch == m_char) { | 240 | if (ch == m_char) { |
239 | con.next_col(); | 241 | con.next_col(); |
240 | return true; | 242 | return true; |
@@ -251,13 +253,7 @@ class _string : public _expr { | |||
251 | public: | 253 | public: |
252 | //constructor from ansi string. | 254 | //constructor from ansi string. |
253 | _string(const char *s) : | 255 | _string(const char *s) : |
254 | m_string(s, s + strlen(s)) | 256 | m_string(Converter{}.from_bytes(s)) |
255 | { | ||
256 | } | ||
257 | |||
258 | //constructor from wide string. | ||
259 | _string(const wchar_t *s) : | ||
260 | m_string(s, s + wcslen(s)) | ||
261 | { | 257 | { |
262 | } | 258 | } |
263 | 259 | ||
@@ -273,11 +269,11 @@ public: | |||
273 | 269 | ||
274 | private: | 270 | private: |
275 | //string | 271 | //string |
276 | std::vector<char32_t> m_string; | 272 | input m_string; |
277 | 273 | ||
278 | //parse the string | 274 | //parse the string |
279 | bool _parse(_context &con) const { | 275 | bool _parse(_context &con) const { |
280 | for(std::vector<char32_t>::const_iterator it = m_string.begin(), | 276 | for(input::const_iterator it = m_string.begin(), |
281 | end = m_string.end();;) | 277 | end = m_string.end();;) |
282 | { | 278 | { |
283 | if (it == end) return true; | 279 | if (it == end) return true; |
@@ -297,25 +293,19 @@ class _set : public _expr { | |||
297 | public: | 293 | public: |
298 | //constructor from ansi string. | 294 | //constructor from ansi string. |
299 | _set(const char *s) { | 295 | _set(const char *s) { |
300 | for(; *s; ++s) { | 296 | auto str = Converter{}.from_bytes(s); |
301 | _add(*s); | 297 | for (auto ch : str) { |
302 | } | 298 | _add(ch); |
303 | } | 299 | } |
304 | |||
305 | //constructor from wide string. | ||
306 | _set(const wchar_t *s) { | ||
307 | for(; *s; ++s) { | ||
308 | _add(*s); | ||
309 | } | ||
310 | } | 300 | } |
311 | 301 | ||
312 | //constructor from range. | 302 | //constructor from range. |
313 | _set(int min, int max) { | 303 | _set(int min, int max) { |
314 | assert(min >= 0); | 304 | assert(min >= 0); |
315 | assert(min <= max); | 305 | assert(min <= max); |
316 | m_set.resize((size_t)max + 1U); | 306 | m_quick_set.resize((size_t)max + 1U); |
317 | for(; min <= max; ++min) { | 307 | for(; min <= max; ++min) { |
318 | m_set[(size_t)min] = true; | 308 | m_quick_set[(size_t)min] = true; |
319 | } | 309 | } |
320 | } | 310 | } |
321 | 311 | ||
@@ -331,25 +321,35 @@ public: | |||
331 | 321 | ||
332 | private: | 322 | private: |
333 | //set is kept as an array of flags, for quick access | 323 | //set is kept as an array of flags, for quick access |
334 | std::vector<bool> m_set; | 324 | std::vector<bool> m_quick_set; |
325 | std::unordered_set<size_t> m_large_set; | ||
335 | 326 | ||
336 | //add character | 327 | //add character |
337 | void _add(size_t i) { | 328 | void _add(size_t i) { |
338 | if (i >= m_set.size()) { | 329 | if (i <= m_quick_set.size() || i <= 255) { |
339 | m_set.resize(i + 1); | 330 | if (i >= m_quick_set.size()) { |
340 | } | 331 | m_quick_set.resize(i + 1); |
341 | m_set[i] = true; | 332 | } |
333 | m_quick_set[i] = true; | ||
334 | } else { | ||
335 | m_large_set.insert(i); | ||
336 | } | ||
342 | } | 337 | } |
343 | 338 | ||
344 | //internal parse | 339 | //internal parse |
345 | bool _parse(_context &con) const { | 340 | bool _parse(_context &con) const { |
346 | if (!con.end()) { | 341 | if (!con.end()) { |
347 | size_t ch = con.symbol(); | 342 | size_t ch = con.symbol(); |
348 | if (ch < m_set.size() && m_set[ch]) { | 343 | if (ch < m_quick_set.size()) { |
344 | if (m_quick_set[ch]) { | ||
345 | con.next_col(); | ||
346 | return true; | ||
347 | } | ||
348 | } else if (m_large_set.find(ch) != m_large_set.end()) { | ||
349 | con.next_col(); | 349 | con.next_col(); |
350 | return true; | 350 | return true; |
351 | } | 351 | } |
352 | } | 352 | } |
353 | con.set_error_pos(); | 353 | con.set_error_pos(); |
354 | return false; | 354 | return false; |
355 | } | 355 | } |
@@ -1074,7 +1074,7 @@ pos::pos(input &i) : | |||
1074 | /** character terminal constructor. | 1074 | /** character terminal constructor. |
1075 | @param c character. | 1075 | @param c character. |
1076 | */ | 1076 | */ |
1077 | expr::expr(int c) : | 1077 | expr::expr(char c) : |
1078 | m_expr(new _char(c)) | 1078 | m_expr(new _char(c)) |
1079 | { | 1079 | { |
1080 | } | 1080 | } |
@@ -1089,15 +1089,6 @@ expr::expr(const char *s) : | |||
1089 | } | 1089 | } |
1090 | 1090 | ||
1091 | 1091 | ||
1092 | /** null-terminated wide string terminal constructor. | ||
1093 | @param s null-terminated string. | ||
1094 | */ | ||
1095 | expr::expr(const wchar_t *s) : | ||
1096 | m_expr(new _string(s)) | ||
1097 | { | ||
1098 | } | ||
1099 | |||
1100 | |||
1101 | /** rule reference constructor. | 1092 | /** rule reference constructor. |
1102 | @param r rule. | 1093 | @param r rule. |
1103 | */ | 1094 | */ |
@@ -1182,7 +1173,7 @@ bool error::operator < (const error &e) const { | |||
1182 | /** character terminal constructor. | 1173 | /** character terminal constructor. |
1183 | @param c character. | 1174 | @param c character. |
1184 | */ | 1175 | */ |
1185 | rule::rule(int c) : | 1176 | rule::rule(char c) : |
1186 | m_expr(new _char(c)) | 1177 | m_expr(new _char(c)) |
1187 | { | 1178 | { |
1188 | m_parse_proc = _get_parse_proc(this); | 1179 | m_parse_proc = _get_parse_proc(this); |
@@ -1199,16 +1190,6 @@ rule::rule(const char *s) : | |||
1199 | } | 1190 | } |
1200 | 1191 | ||
1201 | 1192 | ||
1202 | /** null-terminated wide string terminal constructor. | ||
1203 | @param s null-terminated string. | ||
1204 | */ | ||
1205 | rule::rule(const wchar_t *s) : | ||
1206 | m_expr(new _string(s)) | ||
1207 | { | ||
1208 | m_parse_proc = _get_parse_proc(this); | ||
1209 | } | ||
1210 | |||
1211 | |||
1212 | /** constructor from expression. | 1193 | /** constructor from expression. |
1213 | @param e expression. | 1194 | @param e expression. |
1214 | */ | 1195 | */ |
@@ -1338,15 +1319,6 @@ expr set(const char *s) { | |||
1338 | } | 1319 | } |
1339 | 1320 | ||
1340 | 1321 | ||
1341 | /** creates a set expression from a null-terminated wide string. | ||
1342 | @param s null-terminated string with characters of the set. | ||
1343 | @return an expression which parses a single character out of a set. | ||
1344 | */ | ||
1345 | expr set(const wchar_t *s) { | ||
1346 | return _private::construct_expr(new _set(s)); | ||
1347 | } | ||
1348 | |||
1349 | |||
1350 | /** creates a range expression. | 1322 | /** creates a range expression. |
1351 | @param min min character. | 1323 | @param min min character. |
1352 | @param max max character. | 1324 | @param max max character. |
diff --git a/MoonParser/parser.hpp b/MoonParser/parser.hpp index b280155..66d6067 100644 --- a/MoonParser/parser.hpp +++ b/MoonParser/parser.hpp | |||
@@ -13,8 +13,23 @@ | |||
13 | #include <string> | 13 | #include <string> |
14 | #include <list> | 14 | #include <list> |
15 | #include <functional> | 15 | #include <functional> |
16 | #include <codecvt> | ||
17 | #include <locale> | ||
16 | 18 | ||
17 | 19 | ||
20 | ///type of the parser's input. | ||
21 | typedef std::basic_string<char32_t> input; | ||
22 | typedef input::iterator input_it; | ||
23 | |||
24 | template<class Facet> | ||
25 | struct deletable_facet : Facet | ||
26 | { | ||
27 | template<class ...Args> | ||
28 | deletable_facet(Args&& ...args): Facet(std::forward<Args>(args)...) {} | ||
29 | ~deletable_facet() {} | ||
30 | }; | ||
31 | typedef std::wstring_convert<deletable_facet<std::codecvt<input::value_type, char, std::mbstate_t>>, input::value_type> Converter; | ||
32 | |||
18 | namespace parserlib { | 33 | namespace parserlib { |
19 | 34 | ||
20 | 35 | ||
@@ -24,9 +39,6 @@ class _context; | |||
24 | class rule; | 39 | class rule; |
25 | 40 | ||
26 | 41 | ||
27 | ///type of the parser's input. | ||
28 | typedef std::u32string input; | ||
29 | typedef input::iterator input_it; | ||
30 | struct item_t | 42 | struct item_t |
31 | { | 43 | { |
32 | input_it begin; | 44 | input_it begin; |
@@ -65,18 +77,13 @@ public: | |||
65 | /** character terminal constructor. | 77 | /** character terminal constructor. |
66 | @param c character. | 78 | @param c character. |
67 | */ | 79 | */ |
68 | expr(int c); | 80 | expr(char c); |
69 | 81 | ||
70 | /** null-terminated string terminal constructor. | 82 | /** null-terminated string terminal constructor. |
71 | @param s null-terminated string. | 83 | @param s null-terminated string. |
72 | */ | 84 | */ |
73 | expr(const char *s); | 85 | expr(const char *s); |
74 | 86 | ||
75 | /** null-terminated wide string terminal constructor. | ||
76 | @param s null-terminated string. | ||
77 | */ | ||
78 | expr(const wchar_t *s); | ||
79 | |||
80 | /** rule reference constructor. | 87 | /** rule reference constructor. |
81 | @param r rule. | 88 | @param r rule. |
82 | */ | 89 | */ |
@@ -195,18 +202,13 @@ public: | |||
195 | /** character terminal constructor. | 202 | /** character terminal constructor. |
196 | @param c character. | 203 | @param c character. |
197 | */ | 204 | */ |
198 | rule(int c); | 205 | rule(char c); |
199 | 206 | ||
200 | /** null-terminated string terminal constructor. | 207 | /** null-terminated string terminal constructor. |
201 | @param s null-terminated string. | 208 | @param s null-terminated string. |
202 | */ | 209 | */ |
203 | rule(const char *s); | 210 | rule(const char *s); |
204 | 211 | ||
205 | /** null-terminated wide string terminal constructor. | ||
206 | @param s null-terminated string. | ||
207 | */ | ||
208 | rule(const wchar_t *s); | ||
209 | |||
210 | /** constructor from expression. | 212 | /** constructor from expression. |
211 | @param e expression. | 213 | @param e expression. |
212 | */ | 214 | */ |
@@ -330,13 +332,6 @@ expr term(const expr &e); | |||
330 | expr set(const char *s); | 332 | expr set(const char *s); |
331 | 333 | ||
332 | 334 | ||
333 | /** creates a set expression from a null-terminated wide string. | ||
334 | @param s null-terminated string with characters of the set. | ||
335 | @return an expression which parses a single character out of a set. | ||
336 | */ | ||
337 | expr set(const wchar_t *s); | ||
338 | |||
339 | |||
340 | /** creates a range expression. | 335 | /** creates a range expression. |
341 | @param min min character. | 336 | @param min min character. |
342 | @param max max character. | 337 | @param max max character. |