diff options
| author | Li Jin <dragon-fly@qq.com> | 2018-03-05 14:18:35 +0800 |
|---|---|---|
| committer | Li Jin <dragon-fly@qq.com> | 2018-03-05 14:18:35 +0800 |
| commit | 525591758ce178e44da6aa3a11d557fd75b232e7 (patch) | |
| tree | 27a2eaef7a47697888ad16ec5b27b9aee999ab71 /MoonParser/parser.cpp | |
| parent | 3b270690501cfcc9220c8d5b63ab6f13fc2bd6b0 (diff) | |
| download | yuescript-525591758ce178e44da6aa3a11d557fd75b232e7.tar.gz yuescript-525591758ce178e44da6aa3a11d557fd75b232e7.tar.bz2 yuescript-525591758ce178e44da6aa3a11d557fd75b232e7.zip | |
refactoring some codes.
Diffstat (limited to 'MoonParser/parser.cpp')
| -rw-r--r-- | MoonParser/parser.cpp | 96 |
1 files changed, 34 insertions, 62 deletions
diff --git a/MoonParser/parser.cpp b/MoonParser/parser.cpp index 6778368..2378abe 100644 --- a/MoonParser/parser.cpp +++ b/MoonParser/parser.cpp | |||
| @@ -3,6 +3,8 @@ | |||
| 3 | #include <cassert> | 3 | #include <cassert> |
| 4 | #include <stdexcept> | 4 | #include <stdexcept> |
| 5 | #include <unordered_map> | 5 | #include <unordered_map> |
| 6 | #include <unordered_set> | ||
| 7 | |||
| 6 | #include "parser.hpp" | 8 | #include "parser.hpp" |
| 7 | 9 | ||
| 8 | 10 | ||
| @@ -136,7 +138,7 @@ public: | |||
| 136 | } | 138 | } |
| 137 | 139 | ||
| 138 | //get the current symbol | 140 | //get the current symbol |
| 139 | int symbol() const { | 141 | input::value_type symbol() const { |
| 140 | assert(!end()); | 142 | assert(!end()); |
| 141 | return *m_pos.m_it; | 143 | return *m_pos.m_it; |
| 142 | } | 144 | } |
| @@ -212,7 +214,7 @@ public: | |||
| 212 | class _char : public _expr { | 214 | class _char : public _expr { |
| 213 | public: | 215 | public: |
| 214 | //constructor. | 216 | //constructor. |
| 215 | _char(int c) : | 217 | _char(char c) : |
| 216 | m_char(c) | 218 | m_char(c) |
| 217 | { | 219 | { |
| 218 | } | 220 | } |
| @@ -229,12 +231,12 @@ public: | |||
| 229 | 231 | ||
| 230 | private: | 232 | private: |
| 231 | //character | 233 | //character |
| 232 | int m_char; | 234 | input::value_type m_char; |
| 233 | 235 | ||
| 234 | //internal parse | 236 | //internal parse |
| 235 | bool _parse(_context &con) const { | 237 | bool _parse(_context &con) const { |
| 236 | if (!con.end()) { | 238 | if (!con.end()) { |
| 237 | int ch = con.symbol(); | 239 | input::value_type ch = con.symbol(); |
| 238 | if (ch == m_char) { | 240 | if (ch == m_char) { |
| 239 | con.next_col(); | 241 | con.next_col(); |
| 240 | return true; | 242 | return true; |
| @@ -251,13 +253,7 @@ class _string : public _expr { | |||
| 251 | public: | 253 | public: |
| 252 | //constructor from ansi string. | 254 | //constructor from ansi string. |
| 253 | _string(const char *s) : | 255 | _string(const char *s) : |
| 254 | m_string(s, s + strlen(s)) | 256 | m_string(Converter{}.from_bytes(s)) |
| 255 | { | ||
| 256 | } | ||
| 257 | |||
| 258 | //constructor from wide string. | ||
| 259 | _string(const wchar_t *s) : | ||
| 260 | m_string(s, s + wcslen(s)) | ||
| 261 | { | 257 | { |
| 262 | } | 258 | } |
| 263 | 259 | ||
| @@ -273,11 +269,11 @@ public: | |||
| 273 | 269 | ||
| 274 | private: | 270 | private: |
| 275 | //string | 271 | //string |
| 276 | std::vector<char32_t> m_string; | 272 | input m_string; |
| 277 | 273 | ||
| 278 | //parse the string | 274 | //parse the string |
| 279 | bool _parse(_context &con) const { | 275 | bool _parse(_context &con) const { |
| 280 | for(std::vector<char32_t>::const_iterator it = m_string.begin(), | 276 | for(input::const_iterator it = m_string.begin(), |
| 281 | end = m_string.end();;) | 277 | end = m_string.end();;) |
| 282 | { | 278 | { |
| 283 | if (it == end) return true; | 279 | if (it == end) return true; |
| @@ -297,25 +293,19 @@ class _set : public _expr { | |||
| 297 | public: | 293 | public: |
| 298 | //constructor from ansi string. | 294 | //constructor from ansi string. |
| 299 | _set(const char *s) { | 295 | _set(const char *s) { |
| 300 | for(; *s; ++s) { | 296 | auto str = Converter{}.from_bytes(s); |
| 301 | _add(*s); | 297 | for (auto ch : str) { |
| 302 | } | 298 | _add(ch); |
| 303 | } | 299 | } |
| 304 | |||
| 305 | //constructor from wide string. | ||
| 306 | _set(const wchar_t *s) { | ||
| 307 | for(; *s; ++s) { | ||
| 308 | _add(*s); | ||
| 309 | } | ||
| 310 | } | 300 | } |
| 311 | 301 | ||
| 312 | //constructor from range. | 302 | //constructor from range. |
| 313 | _set(int min, int max) { | 303 | _set(int min, int max) { |
| 314 | assert(min >= 0); | 304 | assert(min >= 0); |
| 315 | assert(min <= max); | 305 | assert(min <= max); |
| 316 | m_set.resize((size_t)max + 1U); | 306 | m_quick_set.resize((size_t)max + 1U); |
| 317 | for(; min <= max; ++min) { | 307 | for(; min <= max; ++min) { |
| 318 | m_set[(size_t)min] = true; | 308 | m_quick_set[(size_t)min] = true; |
| 319 | } | 309 | } |
| 320 | } | 310 | } |
| 321 | 311 | ||
| @@ -331,25 +321,35 @@ public: | |||
| 331 | 321 | ||
| 332 | private: | 322 | private: |
| 333 | //set is kept as an array of flags, for quick access | 323 | //set is kept as an array of flags, for quick access |
| 334 | std::vector<bool> m_set; | 324 | std::vector<bool> m_quick_set; |
| 325 | std::unordered_set<size_t> m_large_set; | ||
| 335 | 326 | ||
| 336 | //add character | 327 | //add character |
| 337 | void _add(size_t i) { | 328 | void _add(size_t i) { |
| 338 | if (i >= m_set.size()) { | 329 | if (i <= m_quick_set.size() || i <= 255) { |
| 339 | m_set.resize(i + 1); | 330 | if (i >= m_quick_set.size()) { |
| 340 | } | 331 | m_quick_set.resize(i + 1); |
| 341 | m_set[i] = true; | 332 | } |
| 333 | m_quick_set[i] = true; | ||
| 334 | } else { | ||
| 335 | m_large_set.insert(i); | ||
| 336 | } | ||
| 342 | } | 337 | } |
| 343 | 338 | ||
| 344 | //internal parse | 339 | //internal parse |
| 345 | bool _parse(_context &con) const { | 340 | bool _parse(_context &con) const { |
| 346 | if (!con.end()) { | 341 | if (!con.end()) { |
| 347 | size_t ch = con.symbol(); | 342 | size_t ch = con.symbol(); |
| 348 | if (ch < m_set.size() && m_set[ch]) { | 343 | if (ch < m_quick_set.size()) { |
| 344 | if (m_quick_set[ch]) { | ||
| 345 | con.next_col(); | ||
| 346 | return true; | ||
| 347 | } | ||
| 348 | } else if (m_large_set.find(ch) != m_large_set.end()) { | ||
| 349 | con.next_col(); | 349 | con.next_col(); |
| 350 | return true; | 350 | return true; |
| 351 | } | 351 | } |
| 352 | } | 352 | } |
| 353 | con.set_error_pos(); | 353 | con.set_error_pos(); |
| 354 | return false; | 354 | return false; |
| 355 | } | 355 | } |
| @@ -1074,7 +1074,7 @@ pos::pos(input &i) : | |||
| 1074 | /** character terminal constructor. | 1074 | /** character terminal constructor. |
| 1075 | @param c character. | 1075 | @param c character. |
| 1076 | */ | 1076 | */ |
| 1077 | expr::expr(int c) : | 1077 | expr::expr(char c) : |
| 1078 | m_expr(new _char(c)) | 1078 | m_expr(new _char(c)) |
| 1079 | { | 1079 | { |
| 1080 | } | 1080 | } |
| @@ -1089,15 +1089,6 @@ expr::expr(const char *s) : | |||
| 1089 | } | 1089 | } |
| 1090 | 1090 | ||
| 1091 | 1091 | ||
| 1092 | /** null-terminated wide string terminal constructor. | ||
| 1093 | @param s null-terminated string. | ||
| 1094 | */ | ||
| 1095 | expr::expr(const wchar_t *s) : | ||
| 1096 | m_expr(new _string(s)) | ||
| 1097 | { | ||
| 1098 | } | ||
| 1099 | |||
| 1100 | |||
| 1101 | /** rule reference constructor. | 1092 | /** rule reference constructor. |
| 1102 | @param r rule. | 1093 | @param r rule. |
| 1103 | */ | 1094 | */ |
| @@ -1182,7 +1173,7 @@ bool error::operator < (const error &e) const { | |||
| 1182 | /** character terminal constructor. | 1173 | /** character terminal constructor. |
| 1183 | @param c character. | 1174 | @param c character. |
| 1184 | */ | 1175 | */ |
| 1185 | rule::rule(int c) : | 1176 | rule::rule(char c) : |
| 1186 | m_expr(new _char(c)) | 1177 | m_expr(new _char(c)) |
| 1187 | { | 1178 | { |
| 1188 | m_parse_proc = _get_parse_proc(this); | 1179 | m_parse_proc = _get_parse_proc(this); |
| @@ -1199,16 +1190,6 @@ rule::rule(const char *s) : | |||
| 1199 | } | 1190 | } |
| 1200 | 1191 | ||
| 1201 | 1192 | ||
| 1202 | /** null-terminated wide string terminal constructor. | ||
| 1203 | @param s null-terminated string. | ||
| 1204 | */ | ||
| 1205 | rule::rule(const wchar_t *s) : | ||
| 1206 | m_expr(new _string(s)) | ||
| 1207 | { | ||
| 1208 | m_parse_proc = _get_parse_proc(this); | ||
| 1209 | } | ||
| 1210 | |||
| 1211 | |||
| 1212 | /** constructor from expression. | 1193 | /** constructor from expression. |
| 1213 | @param e expression. | 1194 | @param e expression. |
| 1214 | */ | 1195 | */ |
| @@ -1338,15 +1319,6 @@ expr set(const char *s) { | |||
| 1338 | } | 1319 | } |
| 1339 | 1320 | ||
| 1340 | 1321 | ||
| 1341 | /** creates a set expression from a null-terminated wide string. | ||
| 1342 | @param s null-terminated string with characters of the set. | ||
| 1343 | @return an expression which parses a single character out of a set. | ||
| 1344 | */ | ||
| 1345 | expr set(const wchar_t *s) { | ||
| 1346 | return _private::construct_expr(new _set(s)); | ||
| 1347 | } | ||
| 1348 | |||
| 1349 | |||
| 1350 | /** creates a range expression. | 1322 | /** creates a range expression. |
| 1351 | @param min min character. | 1323 | @param min min character. |
| 1352 | @param max max character. | 1324 | @param max max character. |
