aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--MoonParser.xcodeproj/project.pbxproj14
-rw-r--r--MoonParser/ast.cpp29
-rw-r--r--MoonParser/ast.hpp98
-rw-r--r--MoonParser/moon_ast.cpp38
-rw-r--r--MoonParser/moon_ast.h21
-rw-r--r--MoonParser/moon_parser.cpp2
-rw-r--r--MoonParser/parser.cpp96
-rw-r--r--MoonParser/parser.hpp39
8 files changed, 185 insertions, 152 deletions
diff --git a/MoonParser.xcodeproj/project.pbxproj b/MoonParser.xcodeproj/project.pbxproj
index fe4dd22..1216f9c 100644
--- a/MoonParser.xcodeproj/project.pbxproj
+++ b/MoonParser.xcodeproj/project.pbxproj
@@ -107,7 +107,7 @@
107 3C0F0F641EF3781E000EADDB /* Project object */ = { 107 3C0F0F641EF3781E000EADDB /* Project object */ = {
108 isa = PBXProject; 108 isa = PBXProject;
109 attributes = { 109 attributes = {
110 LastUpgradeCheck = 0820; 110 LastUpgradeCheck = 0920;
111 ORGANIZATIONNAME = "Li Jin"; 111 ORGANIZATIONNAME = "Li Jin";
112 TargetAttributes = { 112 TargetAttributes = {
113 3C0F0F6B1EF3781E000EADDB = { 113 3C0F0F6B1EF3781E000EADDB = {
@@ -157,7 +157,9 @@
157 CLANG_CXX_LIBRARY = "libc++"; 157 CLANG_CXX_LIBRARY = "libc++";
158 CLANG_ENABLE_MODULES = YES; 158 CLANG_ENABLE_MODULES = YES;
159 CLANG_ENABLE_OBJC_ARC = YES; 159 CLANG_ENABLE_OBJC_ARC = YES;
160 CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
160 CLANG_WARN_BOOL_CONVERSION = YES; 161 CLANG_WARN_BOOL_CONVERSION = YES;
162 CLANG_WARN_COMMA = YES;
161 CLANG_WARN_CONSTANT_CONVERSION = YES; 163 CLANG_WARN_CONSTANT_CONVERSION = YES;
162 CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR; 164 CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
163 CLANG_WARN_DOCUMENTATION_COMMENTS = YES; 165 CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
@@ -165,7 +167,11 @@
165 CLANG_WARN_ENUM_CONVERSION = YES; 167 CLANG_WARN_ENUM_CONVERSION = YES;
166 CLANG_WARN_INFINITE_RECURSION = YES; 168 CLANG_WARN_INFINITE_RECURSION = YES;
167 CLANG_WARN_INT_CONVERSION = YES; 169 CLANG_WARN_INT_CONVERSION = YES;
170 CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
171 CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
168 CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR; 172 CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
173 CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
174 CLANG_WARN_STRICT_PROTOTYPES = YES;
169 CLANG_WARN_SUSPICIOUS_MOVE = YES; 175 CLANG_WARN_SUSPICIOUS_MOVE = YES;
170 CLANG_WARN_UNREACHABLE_CODE = YES; 176 CLANG_WARN_UNREACHABLE_CODE = YES;
171 CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; 177 CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
@@ -204,7 +210,9 @@
204 CLANG_CXX_LIBRARY = "libc++"; 210 CLANG_CXX_LIBRARY = "libc++";
205 CLANG_ENABLE_MODULES = YES; 211 CLANG_ENABLE_MODULES = YES;
206 CLANG_ENABLE_OBJC_ARC = YES; 212 CLANG_ENABLE_OBJC_ARC = YES;
213 CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
207 CLANG_WARN_BOOL_CONVERSION = YES; 214 CLANG_WARN_BOOL_CONVERSION = YES;
215 CLANG_WARN_COMMA = YES;
208 CLANG_WARN_CONSTANT_CONVERSION = YES; 216 CLANG_WARN_CONSTANT_CONVERSION = YES;
209 CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR; 217 CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
210 CLANG_WARN_DOCUMENTATION_COMMENTS = YES; 218 CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
@@ -212,7 +220,11 @@
212 CLANG_WARN_ENUM_CONVERSION = YES; 220 CLANG_WARN_ENUM_CONVERSION = YES;
213 CLANG_WARN_INFINITE_RECURSION = YES; 221 CLANG_WARN_INFINITE_RECURSION = YES;
214 CLANG_WARN_INT_CONVERSION = YES; 222 CLANG_WARN_INT_CONVERSION = YES;
223 CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
224 CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
215 CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR; 225 CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
226 CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
227 CLANG_WARN_STRICT_PROTOTYPES = YES;
216 CLANG_WARN_SUSPICIOUS_MOVE = YES; 228 CLANG_WARN_SUSPICIOUS_MOVE = YES;
217 CLANG_WARN_UNREACHABLE_CODE = YES; 229 CLANG_WARN_UNREACHABLE_CODE = YES;
218 CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; 230 CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
diff --git a/MoonParser/ast.cpp b/MoonParser/ast.cpp
index 090f5ee..739e02c 100644
--- a/MoonParser/ast.cpp
+++ b/MoonParser/ast.cpp
@@ -11,6 +11,13 @@ static ast_container *_current = 0;
11int ast_type_id = 0; 11int ast_type_id = 0;
12 12
13 13
14bool ast_node::visit(const std::function<bool (ast_node*)>& begin,
15 const std::function<bool (ast_node*)>& end)
16{
17 return begin(this) || end(this);
18}
19
20
14/** sets the container under construction to be this. 21/** sets the container under construction to be this.
15 */ 22 */
16ast_container::ast_container() { 23ast_container::ast_container() {
@@ -41,6 +48,28 @@ void ast_container::construct(ast_stack &st) {
41 } 48 }
42} 49}
43 50
51bool ast_container::visit(const std::function<bool (ast_node*)>& begin,
52 const std::function<bool (ast_node*)>& end)
53{
54 bool result = begin(this);
55 if (result) return true;
56 const auto& members = this->members();
57 for (auto member : members) {
58 if (_ast_ptr* ptr = ast_cast<_ast_ptr>(member)) {
59 if (ptr->get() && ptr->get()->visit(begin, end)) {
60 return true;
61 }
62 } else if (_ast_list* list = ast_cast<_ast_list>(member)) {
63 for (auto obj : list->objects()) {
64 if (obj->visit(begin, end)) {
65 return true;
66 }
67 }
68 }
69 }
70 return end(this);
71}
72
44 73
45//register the AST member to the current container. 74//register the AST member to the current container.
46void ast_member::_init() { 75void ast_member::_init() {
diff --git a/MoonParser/ast.hpp b/MoonParser/ast.hpp
index 955cdc0..1d05779 100644
--- a/MoonParser/ast.hpp
+++ b/MoonParser/ast.hpp
@@ -36,12 +36,12 @@ int ast_type()
36class ast_node : public input_range { 36class ast_node : public input_range {
37public: 37public:
38 ///constructor. 38 ///constructor.
39 ast_node() : m_parent(0) {} 39 ast_node() : m_parent(nullptr) {}
40 40
41 /** copy constructor. 41 /** copy constructor.
42 @param n source object. 42 @param n source object.
43 */ 43 */
44 ast_node(const ast_node &n) : m_parent(0) {} 44 ast_node(const ast_node &n) : m_parent(nullptr) {}
45 45
46 ///destructor. 46 ///destructor.
47 virtual ~ast_node() {} 47 virtual ~ast_node() {}
@@ -64,9 +64,11 @@ public:
64 virtual void construct(ast_stack &st) {} 64 virtual void construct(ast_stack &st) {}
65 65
66 /** interface for visiting AST tree use. 66 /** interface for visiting AST tree use.
67 @param user_data vector for storing user data.
68 */ 67 */
69 virtual void visit(void* user_data) {} 68 virtual bool visit(const std::function<bool (ast_node*)>& begin,
69 const std::function<bool (ast_node*)>& end);
70
71 virtual const char* getName() const { return "ast_node"; }
70 72
71 virtual int get_type() { return ast_type<ast_node>(); } 73 virtual int get_type() { return ast_type<ast_node>(); }
72private: 74private:
@@ -96,7 +98,6 @@ bool ast_is(ast_node* node) {
96 98
97class ast_member; 99class ast_member;
98 100
99
100/** type of ast member vector. 101/** type of ast member vector.
101 */ 102 */
102typedef std::vector<ast_member *> ast_member_vector; 103typedef std::vector<ast_member *> ast_member_vector;
@@ -137,8 +138,12 @@ public:
137 from a node stack. 138 from a node stack.
138 @param st stack. 139 @param st stack.
139 */ 140 */
140 virtual void construct(ast_stack &st); 141 virtual void construct(ast_stack &st) override;
141 142
143 virtual bool visit(const std::function<bool (ast_node*)>& begin,
144 const std::function<bool (ast_node*)>& end) override;
145
146 virtual const char* getName() const override { return "ast_container"; }
142private: 147private:
143 ast_member_vector m_members; 148 ast_member_vector m_members;
144 149
@@ -177,6 +182,7 @@ public:
177 */ 182 */
178 virtual void construct(ast_stack &st) = 0; 183 virtual void construct(ast_stack &st) = 0;
179 184
185 virtual int get_type() { return ast_type<ast_member>(); }
180private: 186private:
181 //the container this belongs to. 187 //the container this belongs to.
182 ast_container *m_container; 188 ast_container *m_container;
@@ -185,6 +191,25 @@ private:
185 void _init(); 191 void _init();
186}; 192};
187 193
194template<class T>
195T* ast_cast(ast_member *member) {
196 return member && ast_type<T>() == member->get_type() ? static_cast<T*>(member) : nullptr;
197}
198
199class _ast_ptr : public ast_member {
200public:
201 _ast_ptr(ast_node *node): m_ptr(node) {}
202
203 ast_node* get() const {
204 return m_ptr;
205 }
206
207 virtual int get_type() override {
208 return ast_type<_ast_ptr>();
209 }
210protected:
211 ast_node *m_ptr;
212};
188 213
189/** pointer to an AST object. 214/** pointer to an AST object.
190 It assumes ownership of the object. 215 It assumes ownership of the object.
@@ -192,12 +217,12 @@ private:
192 @tparam T type of object to control. 217 @tparam T type of object to control.
193 @tparam OPT if true, the object becomes optional. 218 @tparam OPT if true, the object becomes optional.
194 */ 219 */
195template <class T, bool OPT = false> class ast_ptr : public ast_member { 220template <class T, bool OPT = false> class ast_ptr : public _ast_ptr {
196public: 221public:
197 /** the default constructor. 222 /** the default constructor.
198 @param obj object. 223 @param obj object.
199 */ 224 */
200 ast_ptr(T *obj = 0) : m_ptr(obj) { 225 ast_ptr(T *obj = nullptr) : _ast_ptr(obj) {
201 _set_parent(); 226 _set_parent();
202 } 227 }
203 228
@@ -206,7 +231,7 @@ public:
206 @param src source object. 231 @param src source object.
207 */ 232 */
208 ast_ptr(const ast_ptr<T, OPT> &src) : 233 ast_ptr(const ast_ptr<T, OPT> &src) :
209 m_ptr(src.m_ptr ? new T(*src.m_ptr) : 0) 234 _ast_ptr(src.m_ptr ? new T(*src.m_ptr) : nullptr)
210 { 235 {
211 _set_parent(); 236 _set_parent();
212 } 237 }
@@ -224,7 +249,7 @@ public:
224 */ 249 */
225 ast_ptr<T, OPT> &operator = (const T *obj) { 250 ast_ptr<T, OPT> &operator = (const T *obj) {
226 delete m_ptr; 251 delete m_ptr;
227 m_ptr = obj ? new T(*obj) : 0; 252 m_ptr = obj ? new T(*obj) : nullptr;
228 _set_parent(); 253 _set_parent();
229 return *this; 254 return *this;
230 } 255 }
@@ -236,7 +261,7 @@ public:
236 */ 261 */
237 ast_ptr<T, OPT> &operator = (const ast_ptr<T, OPT> &src) { 262 ast_ptr<T, OPT> &operator = (const ast_ptr<T, OPT> &src) {
238 delete m_ptr; 263 delete m_ptr;
239 m_ptr = src.m_ptr ? new T(*src.m_ptr) : 0; 264 m_ptr = src.m_ptr ? new T(*src.m_ptr) : nullptr;
240 _set_parent(); 265 _set_parent();
241 return *this; 266 return *this;
242 } 267 }
@@ -245,14 +270,14 @@ public:
245 @return the underlying ptr value. 270 @return the underlying ptr value.
246 */ 271 */
247 T *get() const { 272 T *get() const {
248 return m_ptr; 273 return static_cast<T*>(m_ptr);
249 } 274 }
250 275
251 /** auto conversion to the underlying object ptr. 276 /** auto conversion to the underlying object ptr.
252 @return the underlying ptr value. 277 @return the underlying ptr value.
253 */ 278 */
254 operator T *() const { 279 operator T *() const {
255 return m_ptr; 280 return static_cast<T*>(m_ptr);
256 } 281 }
257 282
258 /** member access. 283 /** member access.
@@ -299,25 +324,21 @@ public:
299 m_ptr = obj; 324 m_ptr = obj;
300 _set_parent(); 325 _set_parent();
301 } 326 }
302
303private: 327private:
304 //ptr
305 T *m_ptr;
306
307 //set parent of object 328 //set parent of object
308 void _set_parent() { 329 void _set_parent() {
309 if (m_ptr) m_ptr->m_parent = container(); 330 if (m_ptr) m_ptr->m_parent = container();
310 } 331 }
311}; 332};
312 333
313template <class ...Args> class ast_choice : public ast_member { 334template <class ...Args> class ast_choice : public _ast_ptr {
314public: 335public:
315 ast_choice(ast_node *obj = 0) : m_ptr(obj) { 336 ast_choice(ast_node *obj = nullptr) : _ast_ptr(obj) {
316 _set_parent(); 337 _set_parent();
317 } 338 }
318 339
319 ast_choice(const ast_choice<Args...> &src) : 340 ast_choice(const ast_choice<Args...> &src) :
320 m_ptr(src.m_ptr ? new ast_node(*src.m_ptr) : 0) 341 _ast_ptr(src.m_ptr ? new ast_node(*src.m_ptr) : nullptr)
321 { 342 {
322 _set_parent(); 343 _set_parent();
323 } 344 }
@@ -328,22 +349,18 @@ public:
328 349
329 ast_choice<Args...> &operator = (const ast_node *obj) { 350 ast_choice<Args...> &operator = (const ast_node *obj) {
330 delete m_ptr; 351 delete m_ptr;
331 m_ptr = obj ? new ast_node(*obj) : 0; 352 m_ptr = obj ? new ast_node(*obj) : nullptr;
332 _set_parent(); 353 _set_parent();
333 return *this; 354 return *this;
334 } 355 }
335 356
336 ast_choice<Args...> &operator = (const ast_choice<Args...> &src) { 357 ast_choice<Args...> &operator = (const ast_choice<Args...> &src) {
337 delete m_ptr; 358 delete m_ptr;
338 m_ptr = src.m_ptr ? new ast_node(*src.m_ptr) : 0; 359 m_ptr = src.m_ptr ? new ast_node(*src.m_ptr) : nullptr;
339 _set_parent(); 360 _set_parent();
340 return *this; 361 return *this;
341 } 362 }
342 363
343 ast_node *get() const {
344 return m_ptr;
345 }
346
347 operator ast_node *() const { 364 operator ast_node *() const {
348 return m_ptr; 365 return m_ptr;
349 } 366 }
@@ -372,26 +389,33 @@ public:
372 m_ptr = obj; 389 m_ptr = obj;
373 _set_parent(); 390 _set_parent();
374 } 391 }
375
376private: 392private:
377 //ptr
378 ast_node *m_ptr;
379
380 void _set_parent() { 393 void _set_parent() {
381 if (m_ptr) m_ptr->m_parent = container(); 394 if (m_ptr) m_ptr->m_parent = container();
382 } 395 }
383}; 396};
384 397
398class _ast_list : public ast_member {
399public:
400 ///list type.
401 typedef std::list<ast_node *> container;
402
403 virtual int get_type() override { return ast_type<_ast_list>(); }
404
405 const container &objects() const {
406 return m_objects;
407 }
408protected:
409 container m_objects;
410};
411
385/** A list of objects. 412/** A list of objects.
386 It pops objects of the given type from the ast stack, until no more objects can be popped. 413 It pops objects of the given type from the ast stack, until no more objects can be popped.
387 It assumes ownership of objects. 414 It assumes ownership of objects.
388 @tparam T type of object to control. 415 @tparam T type of object to control.
389 */ 416 */
390template <class T> class ast_list : public ast_member { 417template <class T> class ast_list : public _ast_list {
391public: 418public:
392 ///list type.
393 typedef std::list<T *> container;
394
395 ///the default constructor. 419 ///the default constructor.
396 ast_list() {} 420 ast_list() {}
397 421
@@ -430,7 +454,7 @@ public:
430 /** Pops objects of type T from the stack until no more objects can be popped. 454 /** Pops objects of type T from the stack until no more objects can be popped.
431 @param st stack. 455 @param st stack.
432 */ 456 */
433 virtual void construct(ast_stack &st) { 457 virtual void construct(ast_stack &st) override {
434 for(;;) { 458 for(;;) {
435 //if the stack is empty 459 //if the stack is empty
436 if (st.empty()) break; 460 if (st.empty()) break;
@@ -455,11 +479,7 @@ public:
455 obj->m_parent = ast_member::container(); 479 obj->m_parent = ast_member::container();
456 } 480 }
457 } 481 }
458
459private: 482private:
460 //objects
461 container m_objects;
462
463 //deletes the objects of this list. 483 //deletes the objects of this list.
464 void _clear() { 484 void _clear() {
465 while (!m_objects.empty()) { 485 while (!m_objects.empty()) {
diff --git a/MoonParser/moon_ast.cpp b/MoonParser/moon_ast.cpp
index d8a0db9..9627eab 100644
--- a/MoonParser/moon_ast.cpp
+++ b/MoonParser/moon_ast.cpp
@@ -1,5 +1,4 @@
1#include <string> 1#include <string>
2#include <codecvt>
3#include <unordered_set> 2#include <unordered_set>
4#include <stack> 3#include <stack>
5#include <algorithm> 4#include <algorithm>
@@ -7,28 +6,24 @@
7#include <vector> 6#include <vector>
8#include "moon_ast.h" 7#include "moon_ast.h"
9 8
10std::string& trim(std::string& s) 9input& trim(input& s)
11{ 10{
12 s.erase(s.begin(), std::find_if(s.begin(), s.end(), [](unsigned char ch) 11 s.erase(s.begin(), std::find_if(s.begin(), s.end(), [](input::value_type ch)
13 { 12 {
14 return !std::isspace(ch); 13 return !std::isspace(ch);
15 })); 14 }));
16 s.erase(std::find_if(s.rbegin(), s.rend(), [](unsigned char ch) 15 s.erase(std::find_if(s.rbegin(), s.rend(), [](input::value_type ch)
17 { 16 {
18 return !std::isspace(ch); 17 return !std::isspace(ch);
19 }).base(), s.end()); 18 }).base(), s.end());
20 return s; 19 return s;
21} 20}
22 21
23const std::string& AstLeaf::getValue() 22const input& AstLeaf::getValue()
24{ 23{
25 if (_value.empty()) 24 if (_value.empty())
26 { 25 {
27 for (auto it = m_begin.m_it; it != m_end.m_it; ++it) 26 _value.assign(m_begin.m_it, m_end.m_it);
28 {
29 char ch = static_cast<char>(*it);
30 _value.append(&ch, 1);
31 }
32 return trim(_value); 27 return trim(_value);
33 } 28 }
34 return _value; 29 return _value;
@@ -151,9 +146,11 @@ AST_IMPL(BlockEnd)
151 146
152int main() 147int main()
153{ 148{
154 std::wstring_convert<deletable_facet<std::codecvt<char32_t, char, std::mbstate_t>>, char32_t> conv; 149 std::string s = R"TestCodesHere(
155 std::string s = R"TestCodesHere()TestCodesHere"; 150thing = { var: 10, hello: "world", func: => @var }
156 input i = conv.from_bytes(s); 151import hello, \func from thing
152)TestCodesHere";
153 input i = Converter{}.from_bytes(s);
157 154
158 error_list el; 155 error_list el;
159 BlockEnd_t* root = nullptr; 156 BlockEnd_t* root = nullptr;
@@ -161,6 +158,21 @@ int main()
161 if (parse(i, BlockEnd, el, root, &st)) 158 if (parse(i, BlockEnd, el, root, &st))
162 { 159 {
163 std::cout << "matched!\n"; 160 std::cout << "matched!\n";
161 root->visit([](ast_node* node)
162 {
163 if (std::string("Seperator") != node->getName())
164 {
165 std::cout << "{" << node->getName();
166 }
167 return false;
168 }, [](ast_node* node)
169 {
170 if (std::string("Seperator") != node->getName())
171 {
172 std::cout << "}" ;
173 }
174 return false;
175 });
164 } 176 }
165 else 177 else
166 { 178 {
diff --git a/MoonParser/moon_ast.h b/MoonParser/moon_ast.h
index 7a0e805..4511b1b 100644
--- a/MoonParser/moon_ast.h
+++ b/MoonParser/moon_ast.h
@@ -2,23 +2,14 @@
2 2
3#include "moon_parser.h" 3#include "moon_parser.h"
4 4
5template<class Facet> 5input& trim(input& s);
6struct deletable_facet : Facet
7{
8 template<class ...Args>
9 deletable_facet(Args&& ...args): Facet(std::forward<Args>(args)...) {}
10 ~deletable_facet() {}
11};
12typedef std::wstring_convert<deletable_facet<std::codecvt<char32_t, char, std::mbstate_t>>, char32_t> Converter;
13
14std::string& trim(std::string& s);
15 6
16class AstLeaf : public ast_node 7class AstLeaf : public ast_node
17{ 8{
18public: 9public:
19 const std::string& getValue(); 10 const input& getValue();
20private: 11private:
21 std::string _value; 12 input _value;
22}; 13};
23 14
24#define AST_LEAF(type) \ 15#define AST_LEAF(type) \
@@ -26,14 +17,16 @@ extern rule type; \
26class type##_t : public AstLeaf \ 17class type##_t : public AstLeaf \
27{ \ 18{ \
28public: \ 19public: \
29 virtual int get_type() override { return ast_type<type##_t>(); } 20 virtual int get_type() override { return ast_type<type##_t>(); } \
21 virtual const char* getName() const override { return #type; }
30 22
31#define AST_NODE(type) \ 23#define AST_NODE(type) \
32extern rule type; \ 24extern rule type; \
33class type##_t : public ast_container \ 25class type##_t : public ast_container \
34{ \ 26{ \
35public: \ 27public: \
36 virtual int get_type() override { return ast_type<type##_t>(); } 28 virtual int get_type() override { return ast_type<type##_t>(); } \
29 virtual const char* getName() const override { return #type; }
37 30
38#define AST_END(type) \ 31#define AST_END(type) \
39}; 32};
diff --git a/MoonParser/moon_parser.cpp b/MoonParser/moon_parser.cpp
index 3069659..7731513 100644
--- a/MoonParser/moon_parser.cpp
+++ b/MoonParser/moon_parser.cpp
@@ -1,8 +1,8 @@
1#include "moon_parser.h" 1#include "moon_parser.h"
2 2
3rule Any = any();
4rule plain_space = *set(" \t"); 3rule plain_space = *set(" \t");
5rule Break = nl(-expr('\r') >> '\n'); 4rule Break = nl(-expr('\r') >> '\n');
5rule Any = Break | any();
6rule White = *(set(" \t") | Break); 6rule White = *(set(" \t") | Break);
7rule Stop = Break | eof(); 7rule Stop = Break | eof();
8rule Comment = "--" >> *(not_(set("\r\n")) >> Any) >> and_(Stop); 8rule Comment = "--" >> *(not_(set("\r\n")) >> Any) >> and_(Stop);
diff --git a/MoonParser/parser.cpp b/MoonParser/parser.cpp
index 6778368..2378abe 100644
--- a/MoonParser/parser.cpp
+++ b/MoonParser/parser.cpp
@@ -3,6 +3,8 @@
3#include <cassert> 3#include <cassert>
4#include <stdexcept> 4#include <stdexcept>
5#include <unordered_map> 5#include <unordered_map>
6#include <unordered_set>
7
6#include "parser.hpp" 8#include "parser.hpp"
7 9
8 10
@@ -136,7 +138,7 @@ public:
136 } 138 }
137 139
138 //get the current symbol 140 //get the current symbol
139 int symbol() const { 141 input::value_type symbol() const {
140 assert(!end()); 142 assert(!end());
141 return *m_pos.m_it; 143 return *m_pos.m_it;
142 } 144 }
@@ -212,7 +214,7 @@ public:
212class _char : public _expr { 214class _char : public _expr {
213public: 215public:
214 //constructor. 216 //constructor.
215 _char(int c) : 217 _char(char c) :
216 m_char(c) 218 m_char(c)
217 { 219 {
218 } 220 }
@@ -229,12 +231,12 @@ public:
229 231
230private: 232private:
231 //character 233 //character
232 int m_char; 234 input::value_type m_char;
233 235
234 //internal parse 236 //internal parse
235 bool _parse(_context &con) const { 237 bool _parse(_context &con) const {
236 if (!con.end()) { 238 if (!con.end()) {
237 int ch = con.symbol(); 239 input::value_type ch = con.symbol();
238 if (ch == m_char) { 240 if (ch == m_char) {
239 con.next_col(); 241 con.next_col();
240 return true; 242 return true;
@@ -251,13 +253,7 @@ class _string : public _expr {
251public: 253public:
252 //constructor from ansi string. 254 //constructor from ansi string.
253 _string(const char *s) : 255 _string(const char *s) :
254 m_string(s, s + strlen(s)) 256 m_string(Converter{}.from_bytes(s))
255 {
256 }
257
258 //constructor from wide string.
259 _string(const wchar_t *s) :
260 m_string(s, s + wcslen(s))
261 { 257 {
262 } 258 }
263 259
@@ -273,11 +269,11 @@ public:
273 269
274private: 270private:
275 //string 271 //string
276 std::vector<char32_t> m_string; 272 input m_string;
277 273
278 //parse the string 274 //parse the string
279 bool _parse(_context &con) const { 275 bool _parse(_context &con) const {
280 for(std::vector<char32_t>::const_iterator it = m_string.begin(), 276 for(input::const_iterator it = m_string.begin(),
281 end = m_string.end();;) 277 end = m_string.end();;)
282 { 278 {
283 if (it == end) return true; 279 if (it == end) return true;
@@ -297,25 +293,19 @@ class _set : public _expr {
297public: 293public:
298 //constructor from ansi string. 294 //constructor from ansi string.
299 _set(const char *s) { 295 _set(const char *s) {
300 for(; *s; ++s) { 296 auto str = Converter{}.from_bytes(s);
301 _add(*s); 297 for (auto ch : str) {
302 } 298 _add(ch);
303 } 299 }
304
305 //constructor from wide string.
306 _set(const wchar_t *s) {
307 for(; *s; ++s) {
308 _add(*s);
309 }
310 } 300 }
311 301
312 //constructor from range. 302 //constructor from range.
313 _set(int min, int max) { 303 _set(int min, int max) {
314 assert(min >= 0); 304 assert(min >= 0);
315 assert(min <= max); 305 assert(min <= max);
316 m_set.resize((size_t)max + 1U); 306 m_quick_set.resize((size_t)max + 1U);
317 for(; min <= max; ++min) { 307 for(; min <= max; ++min) {
318 m_set[(size_t)min] = true; 308 m_quick_set[(size_t)min] = true;
319 } 309 }
320 } 310 }
321 311
@@ -331,25 +321,35 @@ public:
331 321
332private: 322private:
333 //set is kept as an array of flags, for quick access 323 //set is kept as an array of flags, for quick access
334 std::vector<bool> m_set; 324 std::vector<bool> m_quick_set;
325 std::unordered_set<size_t> m_large_set;
335 326
336 //add character 327 //add character
337 void _add(size_t i) { 328 void _add(size_t i) {
338 if (i >= m_set.size()) { 329 if (i <= m_quick_set.size() || i <= 255) {
339 m_set.resize(i + 1); 330 if (i >= m_quick_set.size()) {
340 } 331 m_quick_set.resize(i + 1);
341 m_set[i] = true; 332 }
333 m_quick_set[i] = true;
334 } else {
335 m_large_set.insert(i);
336 }
342 } 337 }
343 338
344 //internal parse 339 //internal parse
345 bool _parse(_context &con) const { 340 bool _parse(_context &con) const {
346 if (!con.end()) { 341 if (!con.end()) {
347 size_t ch = con.symbol(); 342 size_t ch = con.symbol();
348 if (ch < m_set.size() && m_set[ch]) { 343 if (ch < m_quick_set.size()) {
344 if (m_quick_set[ch]) {
345 con.next_col();
346 return true;
347 }
348 } else if (m_large_set.find(ch) != m_large_set.end()) {
349 con.next_col(); 349 con.next_col();
350 return true; 350 return true;
351 } 351 }
352 } 352 }
353 con.set_error_pos(); 353 con.set_error_pos();
354 return false; 354 return false;
355 } 355 }
@@ -1074,7 +1074,7 @@ pos::pos(input &i) :
1074/** character terminal constructor. 1074/** character terminal constructor.
1075 @param c character. 1075 @param c character.
1076 */ 1076 */
1077expr::expr(int c) : 1077expr::expr(char c) :
1078 m_expr(new _char(c)) 1078 m_expr(new _char(c))
1079{ 1079{
1080} 1080}
@@ -1089,15 +1089,6 @@ expr::expr(const char *s) :
1089} 1089}
1090 1090
1091 1091
1092/** null-terminated wide string terminal constructor.
1093 @param s null-terminated string.
1094 */
1095expr::expr(const wchar_t *s) :
1096 m_expr(new _string(s))
1097{
1098}
1099
1100
1101/** rule reference constructor. 1092/** rule reference constructor.
1102 @param r rule. 1093 @param r rule.
1103 */ 1094 */
@@ -1182,7 +1173,7 @@ bool error::operator < (const error &e) const {
1182/** character terminal constructor. 1173/** character terminal constructor.
1183 @param c character. 1174 @param c character.
1184 */ 1175 */
1185rule::rule(int c) : 1176rule::rule(char c) :
1186 m_expr(new _char(c)) 1177 m_expr(new _char(c))
1187{ 1178{
1188 m_parse_proc = _get_parse_proc(this); 1179 m_parse_proc = _get_parse_proc(this);
@@ -1199,16 +1190,6 @@ rule::rule(const char *s) :
1199} 1190}
1200 1191
1201 1192
1202/** null-terminated wide string terminal constructor.
1203 @param s null-terminated string.
1204 */
1205rule::rule(const wchar_t *s) :
1206 m_expr(new _string(s))
1207{
1208 m_parse_proc = _get_parse_proc(this);
1209}
1210
1211
1212/** constructor from expression. 1193/** constructor from expression.
1213 @param e expression. 1194 @param e expression.
1214 */ 1195 */
@@ -1338,15 +1319,6 @@ expr set(const char *s) {
1338} 1319}
1339 1320
1340 1321
1341/** creates a set expression from a null-terminated wide string.
1342 @param s null-terminated string with characters of the set.
1343 @return an expression which parses a single character out of a set.
1344 */
1345expr set(const wchar_t *s) {
1346 return _private::construct_expr(new _set(s));
1347}
1348
1349
1350/** creates a range expression. 1322/** creates a range expression.
1351 @param min min character. 1323 @param min min character.
1352 @param max max character. 1324 @param max max character.
diff --git a/MoonParser/parser.hpp b/MoonParser/parser.hpp
index b280155..66d6067 100644
--- a/MoonParser/parser.hpp
+++ b/MoonParser/parser.hpp
@@ -13,8 +13,23 @@
13#include <string> 13#include <string>
14#include <list> 14#include <list>
15#include <functional> 15#include <functional>
16#include <codecvt>
17#include <locale>
16 18
17 19
20///type of the parser's input.
21typedef std::basic_string<char32_t> input;
22typedef input::iterator input_it;
23
24template<class Facet>
25struct deletable_facet : Facet
26{
27 template<class ...Args>
28 deletable_facet(Args&& ...args): Facet(std::forward<Args>(args)...) {}
29 ~deletable_facet() {}
30};
31typedef std::wstring_convert<deletable_facet<std::codecvt<input::value_type, char, std::mbstate_t>>, input::value_type> Converter;
32
18namespace parserlib { 33namespace parserlib {
19 34
20 35
@@ -24,9 +39,6 @@ class _context;
24class rule; 39class rule;
25 40
26 41
27///type of the parser's input.
28typedef std::u32string input;
29typedef input::iterator input_it;
30struct item_t 42struct item_t
31{ 43{
32 input_it begin; 44 input_it begin;
@@ -65,18 +77,13 @@ public:
65 /** character terminal constructor. 77 /** character terminal constructor.
66 @param c character. 78 @param c character.
67 */ 79 */
68 expr(int c); 80 expr(char c);
69 81
70 /** null-terminated string terminal constructor. 82 /** null-terminated string terminal constructor.
71 @param s null-terminated string. 83 @param s null-terminated string.
72 */ 84 */
73 expr(const char *s); 85 expr(const char *s);
74 86
75 /** null-terminated wide string terminal constructor.
76 @param s null-terminated string.
77 */
78 expr(const wchar_t *s);
79
80 /** rule reference constructor. 87 /** rule reference constructor.
81 @param r rule. 88 @param r rule.
82 */ 89 */
@@ -195,18 +202,13 @@ public:
195 /** character terminal constructor. 202 /** character terminal constructor.
196 @param c character. 203 @param c character.
197 */ 204 */
198 rule(int c); 205 rule(char c);
199 206
200 /** null-terminated string terminal constructor. 207 /** null-terminated string terminal constructor.
201 @param s null-terminated string. 208 @param s null-terminated string.
202 */ 209 */
203 rule(const char *s); 210 rule(const char *s);
204 211
205 /** null-terminated wide string terminal constructor.
206 @param s null-terminated string.
207 */
208 rule(const wchar_t *s);
209
210 /** constructor from expression. 212 /** constructor from expression.
211 @param e expression. 213 @param e expression.
212 */ 214 */
@@ -330,13 +332,6 @@ expr term(const expr &e);
330expr set(const char *s); 332expr set(const char *s);
331 333
332 334
333/** creates a set expression from a null-terminated wide string.
334 @param s null-terminated string with characters of the set.
335 @return an expression which parses a single character out of a set.
336 */
337expr set(const wchar_t *s);
338
339
340/** creates a range expression. 335/** creates a range expression.
341 @param min min character. 336 @param min min character.
342 @param max max character. 337 @param max max character.