aboutsummaryrefslogtreecommitdiff
path: root/MoonParser/parser.cpp
diff options
context:
space:
mode:
authorLi Jin <dragon-fly@qq.com>2018-03-05 14:18:35 +0800
committerLi Jin <dragon-fly@qq.com>2018-03-05 14:18:35 +0800
commit525591758ce178e44da6aa3a11d557fd75b232e7 (patch)
tree27a2eaef7a47697888ad16ec5b27b9aee999ab71 /MoonParser/parser.cpp
parent3b270690501cfcc9220c8d5b63ab6f13fc2bd6b0 (diff)
downloadyuescript-525591758ce178e44da6aa3a11d557fd75b232e7.tar.gz
yuescript-525591758ce178e44da6aa3a11d557fd75b232e7.tar.bz2
yuescript-525591758ce178e44da6aa3a11d557fd75b232e7.zip
refactoring some codes.
Diffstat (limited to 'MoonParser/parser.cpp')
-rw-r--r--MoonParser/parser.cpp96
1 files changed, 34 insertions, 62 deletions
diff --git a/MoonParser/parser.cpp b/MoonParser/parser.cpp
index 6778368..2378abe 100644
--- a/MoonParser/parser.cpp
+++ b/MoonParser/parser.cpp
@@ -3,6 +3,8 @@
3#include <cassert> 3#include <cassert>
4#include <stdexcept> 4#include <stdexcept>
5#include <unordered_map> 5#include <unordered_map>
6#include <unordered_set>
7
6#include "parser.hpp" 8#include "parser.hpp"
7 9
8 10
@@ -136,7 +138,7 @@ public:
136 } 138 }
137 139
138 //get the current symbol 140 //get the current symbol
139 int symbol() const { 141 input::value_type symbol() const {
140 assert(!end()); 142 assert(!end());
141 return *m_pos.m_it; 143 return *m_pos.m_it;
142 } 144 }
@@ -212,7 +214,7 @@ public:
212class _char : public _expr { 214class _char : public _expr {
213public: 215public:
214 //constructor. 216 //constructor.
215 _char(int c) : 217 _char(char c) :
216 m_char(c) 218 m_char(c)
217 { 219 {
218 } 220 }
@@ -229,12 +231,12 @@ public:
229 231
230private: 232private:
231 //character 233 //character
232 int m_char; 234 input::value_type m_char;
233 235
234 //internal parse 236 //internal parse
235 bool _parse(_context &con) const { 237 bool _parse(_context &con) const {
236 if (!con.end()) { 238 if (!con.end()) {
237 int ch = con.symbol(); 239 input::value_type ch = con.symbol();
238 if (ch == m_char) { 240 if (ch == m_char) {
239 con.next_col(); 241 con.next_col();
240 return true; 242 return true;
@@ -251,13 +253,7 @@ class _string : public _expr {
251public: 253public:
252 //constructor from ansi string. 254 //constructor from ansi string.
253 _string(const char *s) : 255 _string(const char *s) :
254 m_string(s, s + strlen(s)) 256 m_string(Converter{}.from_bytes(s))
255 {
256 }
257
258 //constructor from wide string.
259 _string(const wchar_t *s) :
260 m_string(s, s + wcslen(s))
261 { 257 {
262 } 258 }
263 259
@@ -273,11 +269,11 @@ public:
273 269
274private: 270private:
275 //string 271 //string
276 std::vector<char32_t> m_string; 272 input m_string;
277 273
278 //parse the string 274 //parse the string
279 bool _parse(_context &con) const { 275 bool _parse(_context &con) const {
280 for(std::vector<char32_t>::const_iterator it = m_string.begin(), 276 for(input::const_iterator it = m_string.begin(),
281 end = m_string.end();;) 277 end = m_string.end();;)
282 { 278 {
283 if (it == end) return true; 279 if (it == end) return true;
@@ -297,25 +293,19 @@ class _set : public _expr {
297public: 293public:
298 //constructor from ansi string. 294 //constructor from ansi string.
299 _set(const char *s) { 295 _set(const char *s) {
300 for(; *s; ++s) { 296 auto str = Converter{}.from_bytes(s);
301 _add(*s); 297 for (auto ch : str) {
302 } 298 _add(ch);
303 } 299 }
304
305 //constructor from wide string.
306 _set(const wchar_t *s) {
307 for(; *s; ++s) {
308 _add(*s);
309 }
310 } 300 }
311 301
312 //constructor from range. 302 //constructor from range.
313 _set(int min, int max) { 303 _set(int min, int max) {
314 assert(min >= 0); 304 assert(min >= 0);
315 assert(min <= max); 305 assert(min <= max);
316 m_set.resize((size_t)max + 1U); 306 m_quick_set.resize((size_t)max + 1U);
317 for(; min <= max; ++min) { 307 for(; min <= max; ++min) {
318 m_set[(size_t)min] = true; 308 m_quick_set[(size_t)min] = true;
319 } 309 }
320 } 310 }
321 311
@@ -331,25 +321,35 @@ public:
331 321
332private: 322private:
333 //set is kept as an array of flags, for quick access 323 //set is kept as an array of flags, for quick access
334 std::vector<bool> m_set; 324 std::vector<bool> m_quick_set;
325 std::unordered_set<size_t> m_large_set;
335 326
336 //add character 327 //add character
337 void _add(size_t i) { 328 void _add(size_t i) {
338 if (i >= m_set.size()) { 329 if (i <= m_quick_set.size() || i <= 255) {
339 m_set.resize(i + 1); 330 if (i >= m_quick_set.size()) {
340 } 331 m_quick_set.resize(i + 1);
341 m_set[i] = true; 332 }
333 m_quick_set[i] = true;
334 } else {
335 m_large_set.insert(i);
336 }
342 } 337 }
343 338
344 //internal parse 339 //internal parse
345 bool _parse(_context &con) const { 340 bool _parse(_context &con) const {
346 if (!con.end()) { 341 if (!con.end()) {
347 size_t ch = con.symbol(); 342 size_t ch = con.symbol();
348 if (ch < m_set.size() && m_set[ch]) { 343 if (ch < m_quick_set.size()) {
344 if (m_quick_set[ch]) {
345 con.next_col();
346 return true;
347 }
348 } else if (m_large_set.find(ch) != m_large_set.end()) {
349 con.next_col(); 349 con.next_col();
350 return true; 350 return true;
351 } 351 }
352 } 352 }
353 con.set_error_pos(); 353 con.set_error_pos();
354 return false; 354 return false;
355 } 355 }
@@ -1074,7 +1074,7 @@ pos::pos(input &i) :
1074/** character terminal constructor. 1074/** character terminal constructor.
1075 @param c character. 1075 @param c character.
1076 */ 1076 */
1077expr::expr(int c) : 1077expr::expr(char c) :
1078 m_expr(new _char(c)) 1078 m_expr(new _char(c))
1079{ 1079{
1080} 1080}
@@ -1089,15 +1089,6 @@ expr::expr(const char *s) :
1089} 1089}
1090 1090
1091 1091
1092/** null-terminated wide string terminal constructor.
1093 @param s null-terminated string.
1094 */
1095expr::expr(const wchar_t *s) :
1096 m_expr(new _string(s))
1097{
1098}
1099
1100
1101/** rule reference constructor. 1092/** rule reference constructor.
1102 @param r rule. 1093 @param r rule.
1103 */ 1094 */
@@ -1182,7 +1173,7 @@ bool error::operator < (const error &e) const {
1182/** character terminal constructor. 1173/** character terminal constructor.
1183 @param c character. 1174 @param c character.
1184 */ 1175 */
1185rule::rule(int c) : 1176rule::rule(char c) :
1186 m_expr(new _char(c)) 1177 m_expr(new _char(c))
1187{ 1178{
1188 m_parse_proc = _get_parse_proc(this); 1179 m_parse_proc = _get_parse_proc(this);
@@ -1199,16 +1190,6 @@ rule::rule(const char *s) :
1199} 1190}
1200 1191
1201 1192
1202/** null-terminated wide string terminal constructor.
1203 @param s null-terminated string.
1204 */
1205rule::rule(const wchar_t *s) :
1206 m_expr(new _string(s))
1207{
1208 m_parse_proc = _get_parse_proc(this);
1209}
1210
1211
1212/** constructor from expression. 1193/** constructor from expression.
1213 @param e expression. 1194 @param e expression.
1214 */ 1195 */
@@ -1338,15 +1319,6 @@ expr set(const char *s) {
1338} 1319}
1339 1320
1340 1321
1341/** creates a set expression from a null-terminated wide string.
1342 @param s null-terminated string with characters of the set.
1343 @return an expression which parses a single character out of a set.
1344 */
1345expr set(const wchar_t *s) {
1346 return _private::construct_expr(new _set(s));
1347}
1348
1349
1350/** creates a range expression. 1322/** creates a range expression.
1351 @param min min character. 1323 @param min min character.
1352 @param max max character. 1324 @param max max character.