diff options
Diffstat (limited to 'MoonParser/parser.cpp')
-rw-r--r-- | MoonParser/parser.cpp | 96 |
1 files changed, 34 insertions, 62 deletions
diff --git a/MoonParser/parser.cpp b/MoonParser/parser.cpp index 6778368..2378abe 100644 --- a/MoonParser/parser.cpp +++ b/MoonParser/parser.cpp | |||
@@ -3,6 +3,8 @@ | |||
3 | #include <cassert> | 3 | #include <cassert> |
4 | #include <stdexcept> | 4 | #include <stdexcept> |
5 | #include <unordered_map> | 5 | #include <unordered_map> |
6 | #include <unordered_set> | ||
7 | |||
6 | #include "parser.hpp" | 8 | #include "parser.hpp" |
7 | 9 | ||
8 | 10 | ||
@@ -136,7 +138,7 @@ public: | |||
136 | } | 138 | } |
137 | 139 | ||
138 | //get the current symbol | 140 | //get the current symbol |
139 | int symbol() const { | 141 | input::value_type symbol() const { |
140 | assert(!end()); | 142 | assert(!end()); |
141 | return *m_pos.m_it; | 143 | return *m_pos.m_it; |
142 | } | 144 | } |
@@ -212,7 +214,7 @@ public: | |||
212 | class _char : public _expr { | 214 | class _char : public _expr { |
213 | public: | 215 | public: |
214 | //constructor. | 216 | //constructor. |
215 | _char(int c) : | 217 | _char(char c) : |
216 | m_char(c) | 218 | m_char(c) |
217 | { | 219 | { |
218 | } | 220 | } |
@@ -229,12 +231,12 @@ public: | |||
229 | 231 | ||
230 | private: | 232 | private: |
231 | //character | 233 | //character |
232 | int m_char; | 234 | input::value_type m_char; |
233 | 235 | ||
234 | //internal parse | 236 | //internal parse |
235 | bool _parse(_context &con) const { | 237 | bool _parse(_context &con) const { |
236 | if (!con.end()) { | 238 | if (!con.end()) { |
237 | int ch = con.symbol(); | 239 | input::value_type ch = con.symbol(); |
238 | if (ch == m_char) { | 240 | if (ch == m_char) { |
239 | con.next_col(); | 241 | con.next_col(); |
240 | return true; | 242 | return true; |
@@ -251,13 +253,7 @@ class _string : public _expr { | |||
251 | public: | 253 | public: |
252 | //constructor from ansi string. | 254 | //constructor from ansi string. |
253 | _string(const char *s) : | 255 | _string(const char *s) : |
254 | m_string(s, s + strlen(s)) | 256 | m_string(Converter{}.from_bytes(s)) |
255 | { | ||
256 | } | ||
257 | |||
258 | //constructor from wide string. | ||
259 | _string(const wchar_t *s) : | ||
260 | m_string(s, s + wcslen(s)) | ||
261 | { | 257 | { |
262 | } | 258 | } |
263 | 259 | ||
@@ -273,11 +269,11 @@ public: | |||
273 | 269 | ||
274 | private: | 270 | private: |
275 | //string | 271 | //string |
276 | std::vector<char32_t> m_string; | 272 | input m_string; |
277 | 273 | ||
278 | //parse the string | 274 | //parse the string |
279 | bool _parse(_context &con) const { | 275 | bool _parse(_context &con) const { |
280 | for(std::vector<char32_t>::const_iterator it = m_string.begin(), | 276 | for(input::const_iterator it = m_string.begin(), |
281 | end = m_string.end();;) | 277 | end = m_string.end();;) |
282 | { | 278 | { |
283 | if (it == end) return true; | 279 | if (it == end) return true; |
@@ -297,25 +293,19 @@ class _set : public _expr { | |||
297 | public: | 293 | public: |
298 | //constructor from ansi string. | 294 | //constructor from ansi string. |
299 | _set(const char *s) { | 295 | _set(const char *s) { |
300 | for(; *s; ++s) { | 296 | auto str = Converter{}.from_bytes(s); |
301 | _add(*s); | 297 | for (auto ch : str) { |
302 | } | 298 | _add(ch); |
303 | } | 299 | } |
304 | |||
305 | //constructor from wide string. | ||
306 | _set(const wchar_t *s) { | ||
307 | for(; *s; ++s) { | ||
308 | _add(*s); | ||
309 | } | ||
310 | } | 300 | } |
311 | 301 | ||
312 | //constructor from range. | 302 | //constructor from range. |
313 | _set(int min, int max) { | 303 | _set(int min, int max) { |
314 | assert(min >= 0); | 304 | assert(min >= 0); |
315 | assert(min <= max); | 305 | assert(min <= max); |
316 | m_set.resize((size_t)max + 1U); | 306 | m_quick_set.resize((size_t)max + 1U); |
317 | for(; min <= max; ++min) { | 307 | for(; min <= max; ++min) { |
318 | m_set[(size_t)min] = true; | 308 | m_quick_set[(size_t)min] = true; |
319 | } | 309 | } |
320 | } | 310 | } |
321 | 311 | ||
@@ -331,25 +321,35 @@ public: | |||
331 | 321 | ||
332 | private: | 322 | private: |
333 | //set is kept as an array of flags, for quick access | 323 | //set is kept as an array of flags, for quick access |
334 | std::vector<bool> m_set; | 324 | std::vector<bool> m_quick_set; |
325 | std::unordered_set<size_t> m_large_set; | ||
335 | 326 | ||
336 | //add character | 327 | //add character |
337 | void _add(size_t i) { | 328 | void _add(size_t i) { |
338 | if (i >= m_set.size()) { | 329 | if (i <= m_quick_set.size() || i <= 255) { |
339 | m_set.resize(i + 1); | 330 | if (i >= m_quick_set.size()) { |
340 | } | 331 | m_quick_set.resize(i + 1); |
341 | m_set[i] = true; | 332 | } |
333 | m_quick_set[i] = true; | ||
334 | } else { | ||
335 | m_large_set.insert(i); | ||
336 | } | ||
342 | } | 337 | } |
343 | 338 | ||
344 | //internal parse | 339 | //internal parse |
345 | bool _parse(_context &con) const { | 340 | bool _parse(_context &con) const { |
346 | if (!con.end()) { | 341 | if (!con.end()) { |
347 | size_t ch = con.symbol(); | 342 | size_t ch = con.symbol(); |
348 | if (ch < m_set.size() && m_set[ch]) { | 343 | if (ch < m_quick_set.size()) { |
344 | if (m_quick_set[ch]) { | ||
345 | con.next_col(); | ||
346 | return true; | ||
347 | } | ||
348 | } else if (m_large_set.find(ch) != m_large_set.end()) { | ||
349 | con.next_col(); | 349 | con.next_col(); |
350 | return true; | 350 | return true; |
351 | } | 351 | } |
352 | } | 352 | } |
353 | con.set_error_pos(); | 353 | con.set_error_pos(); |
354 | return false; | 354 | return false; |
355 | } | 355 | } |
@@ -1074,7 +1074,7 @@ pos::pos(input &i) : | |||
1074 | /** character terminal constructor. | 1074 | /** character terminal constructor. |
1075 | @param c character. | 1075 | @param c character. |
1076 | */ | 1076 | */ |
1077 | expr::expr(int c) : | 1077 | expr::expr(char c) : |
1078 | m_expr(new _char(c)) | 1078 | m_expr(new _char(c)) |
1079 | { | 1079 | { |
1080 | } | 1080 | } |
@@ -1089,15 +1089,6 @@ expr::expr(const char *s) : | |||
1089 | } | 1089 | } |
1090 | 1090 | ||
1091 | 1091 | ||
1092 | /** null-terminated wide string terminal constructor. | ||
1093 | @param s null-terminated string. | ||
1094 | */ | ||
1095 | expr::expr(const wchar_t *s) : | ||
1096 | m_expr(new _string(s)) | ||
1097 | { | ||
1098 | } | ||
1099 | |||
1100 | |||
1101 | /** rule reference constructor. | 1092 | /** rule reference constructor. |
1102 | @param r rule. | 1093 | @param r rule. |
1103 | */ | 1094 | */ |
@@ -1182,7 +1173,7 @@ bool error::operator < (const error &e) const { | |||
1182 | /** character terminal constructor. | 1173 | /** character terminal constructor. |
1183 | @param c character. | 1174 | @param c character. |
1184 | */ | 1175 | */ |
1185 | rule::rule(int c) : | 1176 | rule::rule(char c) : |
1186 | m_expr(new _char(c)) | 1177 | m_expr(new _char(c)) |
1187 | { | 1178 | { |
1188 | m_parse_proc = _get_parse_proc(this); | 1179 | m_parse_proc = _get_parse_proc(this); |
@@ -1199,16 +1190,6 @@ rule::rule(const char *s) : | |||
1199 | } | 1190 | } |
1200 | 1191 | ||
1201 | 1192 | ||
1202 | /** null-terminated wide string terminal constructor. | ||
1203 | @param s null-terminated string. | ||
1204 | */ | ||
1205 | rule::rule(const wchar_t *s) : | ||
1206 | m_expr(new _string(s)) | ||
1207 | { | ||
1208 | m_parse_proc = _get_parse_proc(this); | ||
1209 | } | ||
1210 | |||
1211 | |||
1212 | /** constructor from expression. | 1193 | /** constructor from expression. |
1213 | @param e expression. | 1194 | @param e expression. |
1214 | */ | 1195 | */ |
@@ -1338,15 +1319,6 @@ expr set(const char *s) { | |||
1338 | } | 1319 | } |
1339 | 1320 | ||
1340 | 1321 | ||
1341 | /** creates a set expression from a null-terminated wide string. | ||
1342 | @param s null-terminated string with characters of the set. | ||
1343 | @return an expression which parses a single character out of a set. | ||
1344 | */ | ||
1345 | expr set(const wchar_t *s) { | ||
1346 | return _private::construct_expr(new _set(s)); | ||
1347 | } | ||
1348 | |||
1349 | |||
1350 | /** creates a range expression. | 1322 | /** creates a range expression. |
1351 | @param min min character. | 1323 | @param min min character. |
1352 | @param max max character. | 1324 | @param max max character. |