diff options
| author | Li Jin <dragon-fly@qq.com> | 2017-07-13 16:03:11 +0800 |
|---|---|---|
| committer | Li Jin <dragon-fly@qq.com> | 2017-07-13 16:03:11 +0800 |
| commit | cb906e739f27931e9798510cd83725131ed55209 (patch) | |
| tree | 52b465c5eb2250dec3ed3d5f02b86db79653b838 /MoonParser/parser.hpp | |
| parent | 975c3c7dfa032229272c3b225de1127f1605e2d2 (diff) | |
| download | yuescript-cb906e739f27931e9798510cd83725131ed55209.tar.gz yuescript-cb906e739f27931e9798510cd83725131ed55209.tar.bz2 yuescript-cb906e739f27931e9798510cd83725131ed55209.zip | |
rewrite parsing codes with parserlib.
Diffstat (limited to 'MoonParser/parser.hpp')
| -rw-r--r-- | MoonParser/parser.hpp | 428 |
1 files changed, 428 insertions, 0 deletions
diff --git a/MoonParser/parser.hpp b/MoonParser/parser.hpp new file mode 100644 index 0000000..540a51c --- /dev/null +++ b/MoonParser/parser.hpp | |||
| @@ -0,0 +1,428 @@ | |||
| 1 | #ifndef PARSER_HPP | ||
| 2 | #define PARSER_HPP | ||
| 3 | |||
| 4 | |||
| 5 | //gcc chokes without rule::rule(const rule &), | ||
| 6 | //msvc complains when rule::rule(const rule &) is defined. | ||
| 7 | #ifdef _MSC_VER | ||
| 8 | #pragma warning (disable: 4521) | ||
| 9 | #endif | ||
| 10 | |||
| 11 | |||
| 12 | #include <vector> | ||
| 13 | #include <string> | ||
| 14 | #include <list> | ||
| 15 | #include <functional> | ||
| 16 | |||
| 17 | |||
| 18 | namespace parserlib { | ||
| 19 | |||
| 20 | |||
| 21 | class _private; | ||
| 22 | class _expr; | ||
| 23 | class _context; | ||
| 24 | class rule; | ||
| 25 | |||
| 26 | |||
| 27 | ///type of the parser's input. | ||
| 28 | typedef std::vector<int> input; | ||
| 29 | typedef input::iterator input_it; | ||
| 30 | struct item_t | ||
| 31 | { | ||
| 32 | input_it begin; | ||
| 33 | input_it end; | ||
| 34 | void* user_data; | ||
| 35 | }; | ||
| 36 | typedef std::function<bool(const item_t&)> user_handler; | ||
| 37 | |||
| 38 | |||
| 39 | ///position into the input. | ||
| 40 | class pos { | ||
| 41 | public: | ||
| 42 | ///interator into the input. | ||
| 43 | input::iterator m_it; | ||
| 44 | |||
| 45 | ///line. | ||
| 46 | int m_line; | ||
| 47 | |||
| 48 | ///column. | ||
| 49 | int m_col; | ||
| 50 | |||
| 51 | ///null constructor. | ||
| 52 | pos() {} | ||
| 53 | |||
| 54 | /** constructor from input. | ||
| 55 | @param i input. | ||
| 56 | */ | ||
| 57 | pos(input &i); | ||
| 58 | }; | ||
| 59 | |||
| 60 | |||
| 61 | /** a grammar expression. | ||
| 62 | */ | ||
| 63 | class expr { | ||
| 64 | public: | ||
| 65 | /** character terminal constructor. | ||
| 66 | @param c character. | ||
| 67 | */ | ||
| 68 | expr(int c); | ||
| 69 | |||
| 70 | /** null-terminated string terminal constructor. | ||
| 71 | @param s null-terminated string. | ||
| 72 | */ | ||
| 73 | expr(const char *s); | ||
| 74 | |||
| 75 | /** null-terminated wide string terminal constructor. | ||
| 76 | @param s null-terminated string. | ||
| 77 | */ | ||
| 78 | expr(const wchar_t *s); | ||
| 79 | |||
| 80 | /** rule reference constructor. | ||
| 81 | @param r rule. | ||
| 82 | */ | ||
| 83 | expr(rule &r); | ||
| 84 | |||
| 85 | /** creates a zero-or-more loop out of this expression. | ||
| 86 | @return a zero-or-more loop expression. | ||
| 87 | */ | ||
| 88 | expr operator *() const; | ||
| 89 | |||
| 90 | /** creates a one-or-more loop out of this expression. | ||
| 91 | @return a one-or-more loop expression. | ||
| 92 | */ | ||
| 93 | expr operator +() const; | ||
| 94 | |||
| 95 | /** creates an optional out of this expression. | ||
| 96 | @return an optional expression. | ||
| 97 | */ | ||
| 98 | expr operator -() const; | ||
| 99 | |||
| 100 | /** creates an AND-expression. | ||
| 101 | @return an AND-expression. | ||
| 102 | */ | ||
| 103 | expr operator &() const; | ||
| 104 | |||
| 105 | /** creates a NOT-expression. | ||
| 106 | @return a NOT-expression. | ||
| 107 | */ | ||
| 108 | expr operator !() const; | ||
| 109 | |||
| 110 | private: | ||
| 111 | //internal expression | ||
| 112 | _expr *m_expr; | ||
| 113 | |||
| 114 | //internal constructor from internal expression | ||
| 115 | expr(_expr *e) : m_expr(e) {} | ||
| 116 | |||
| 117 | //assignment not allowed | ||
| 118 | expr &operator = (expr &); | ||
| 119 | |||
| 120 | friend class _private; | ||
| 121 | }; | ||
| 122 | |||
| 123 | |||
| 124 | /** type of procedure to invoke when a rule is successfully parsed. | ||
| 125 | @param b begin position of input. | ||
| 126 | @param e end position of input. | ||
| 127 | @param d pointer to user data. | ||
| 128 | */ | ||
| 129 | typedef void (*parse_proc)(const pos &b, const pos &e, void *d); | ||
| 130 | |||
| 131 | |||
| 132 | ///input range. | ||
| 133 | class input_range { | ||
| 134 | public: | ||
| 135 | ///begin position. | ||
| 136 | pos m_begin; | ||
| 137 | |||
| 138 | ///end position. | ||
| 139 | pos m_end; | ||
| 140 | |||
| 141 | ///empty constructor. | ||
| 142 | input_range() {} | ||
| 143 | |||
| 144 | /** constructor. | ||
| 145 | @param b begin position. | ||
| 146 | @param e end position. | ||
| 147 | */ | ||
| 148 | input_range(const pos &b, const pos &e); | ||
| 149 | }; | ||
| 150 | |||
| 151 | |||
| 152 | ///enum with error types. | ||
| 153 | enum ERROR_TYPE { | ||
| 154 | ///syntax error | ||
| 155 | ERROR_SYNTAX_ERROR = 1, | ||
| 156 | |||
| 157 | ///invalid end of file | ||
| 158 | ERROR_INVALID_EOF, | ||
| 159 | |||
| 160 | ///first user error | ||
| 161 | ERROR_USER = 100 | ||
| 162 | }; | ||
| 163 | |||
| 164 | |||
| 165 | ///error. | ||
| 166 | class error : public input_range { | ||
| 167 | public: | ||
| 168 | ///type | ||
| 169 | int m_type; | ||
| 170 | |||
| 171 | /** constructor. | ||
| 172 | @param b begin position. | ||
| 173 | @param e end position. | ||
| 174 | @param t type. | ||
| 175 | */ | ||
| 176 | error(const pos &b, const pos &e, int t); | ||
| 177 | |||
| 178 | /** compare on begin position. | ||
| 179 | @param e the other error to compare this with. | ||
| 180 | @return true if this comes before the previous error, false otherwise. | ||
| 181 | */ | ||
| 182 | bool operator < (const error &e) const; | ||
| 183 | }; | ||
| 184 | |||
| 185 | |||
| 186 | ///type of error list. | ||
| 187 | typedef std::list<error> error_list; | ||
| 188 | |||
| 189 | |||
| 190 | /** represents a rule. | ||
| 191 | */ | ||
| 192 | class rule { | ||
| 193 | public: | ||
| 194 | /** character terminal constructor. | ||
| 195 | @param c character. | ||
| 196 | */ | ||
| 197 | rule(int c); | ||
| 198 | |||
| 199 | /** null-terminated string terminal constructor. | ||
| 200 | @param s null-terminated string. | ||
| 201 | */ | ||
| 202 | rule(const char *s); | ||
| 203 | |||
| 204 | /** null-terminated wide string terminal constructor. | ||
| 205 | @param s null-terminated string. | ||
| 206 | */ | ||
| 207 | rule(const wchar_t *s); | ||
| 208 | |||
| 209 | /** constructor from expression. | ||
| 210 | @param e expression. | ||
| 211 | */ | ||
| 212 | rule(const expr &e); | ||
| 213 | |||
| 214 | /** constructor from rule. | ||
| 215 | @param r rule. | ||
| 216 | */ | ||
| 217 | rule(rule &r); | ||
| 218 | |||
| 219 | /** invalid constructor from rule (required by gcc). | ||
| 220 | @param r rule. | ||
| 221 | @exception std::logic_error always thrown. | ||
| 222 | */ | ||
| 223 | rule(const rule &r); | ||
| 224 | |||
| 225 | /** deletes the internal object that represents the expression. | ||
| 226 | */ | ||
| 227 | ~rule(); | ||
| 228 | |||
| 229 | /** creates a zero-or-more loop out of this rule. | ||
| 230 | @return a zero-or-more loop rule. | ||
| 231 | */ | ||
| 232 | expr operator *(); | ||
| 233 | |||
| 234 | /** creates a one-or-more loop out of this rule. | ||
| 235 | @return a one-or-more loop rule. | ||
| 236 | */ | ||
| 237 | expr operator +(); | ||
| 238 | |||
| 239 | /** creates an optional out of this rule. | ||
| 240 | @return an optional rule. | ||
| 241 | */ | ||
| 242 | expr operator -(); | ||
| 243 | |||
| 244 | /** creates an AND-expression out of this rule. | ||
| 245 | @return an AND-expression out of this rule. | ||
| 246 | */ | ||
| 247 | expr operator &(); | ||
| 248 | |||
| 249 | /** creates a NOT-expression out of this rule. | ||
| 250 | @return a NOT-expression out of this rule. | ||
| 251 | */ | ||
| 252 | expr operator !(); | ||
| 253 | |||
| 254 | /** sets the parse procedure. | ||
| 255 | @param p procedure. | ||
| 256 | */ | ||
| 257 | void set_parse_proc(parse_proc p); | ||
| 258 | |||
| 259 | /** get the this ptr (since operator & is overloaded). | ||
| 260 | @return pointer to this. | ||
| 261 | */ | ||
| 262 | rule *this_ptr() { return this; } | ||
| 263 | |||
| 264 | private: | ||
| 265 | //mode | ||
| 266 | enum _MODE { | ||
| 267 | _PARSE, | ||
| 268 | _REJECT, | ||
| 269 | _ACCEPT | ||
| 270 | }; | ||
| 271 | |||
| 272 | //state | ||
| 273 | struct _state { | ||
| 274 | //position in source code, relative to start | ||
| 275 | size_t m_pos; | ||
| 276 | |||
| 277 | //mode | ||
| 278 | _MODE m_mode; | ||
| 279 | |||
| 280 | //constructor | ||
| 281 | _state(size_t pos = -1, _MODE mode = _PARSE) : | ||
| 282 | m_pos(pos), m_mode(mode) {} | ||
| 283 | }; | ||
| 284 | |||
| 285 | //internal expression | ||
| 286 | _expr *m_expr; | ||
| 287 | |||
| 288 | //associated parse procedure. | ||
| 289 | parse_proc m_parse_proc; | ||
| 290 | |||
| 291 | //state | ||
| 292 | _state m_state; | ||
| 293 | |||
| 294 | //assignment not allowed | ||
| 295 | rule &operator = (rule &); | ||
| 296 | |||
| 297 | friend class _private; | ||
| 298 | friend class _context; | ||
| 299 | }; | ||
| 300 | |||
| 301 | |||
| 302 | /** creates a sequence of expressions. | ||
| 303 | @param left left operand. | ||
| 304 | @param right right operand. | ||
| 305 | @return an expression which parses a sequence. | ||
| 306 | */ | ||
| 307 | expr operator >> (const expr &left, const expr &right); | ||
| 308 | |||
| 309 | |||
| 310 | /** creates a choice of expressions. | ||
| 311 | @param left left operand. | ||
| 312 | @param right right operand. | ||
| 313 | @return an expression which parses a choice. | ||
| 314 | */ | ||
| 315 | expr operator | (const expr &left, const expr &right); | ||
| 316 | |||
| 317 | |||
| 318 | /** converts a parser expression into a terminal. | ||
| 319 | @param e expression. | ||
| 320 | @return an expression which parses a terminal. | ||
| 321 | */ | ||
| 322 | expr term(const expr &e); | ||
| 323 | |||
| 324 | |||
| 325 | /** creates a set expression from a null-terminated string. | ||
| 326 | @param s null-terminated string with characters of the set. | ||
| 327 | @return an expression which parses a single character out of a set. | ||
| 328 | */ | ||
| 329 | expr set(const char *s); | ||
| 330 | |||
| 331 | |||
| 332 | /** creates a set expression from a null-terminated wide string. | ||
| 333 | @param s null-terminated string with characters of the set. | ||
| 334 | @return an expression which parses a single character out of a set. | ||
| 335 | */ | ||
| 336 | expr set(const wchar_t *s); | ||
| 337 | |||
| 338 | |||
| 339 | /** creates a range expression. | ||
| 340 | @param min min character. | ||
| 341 | @param max max character. | ||
| 342 | @return an expression which parses a single character out of range. | ||
| 343 | */ | ||
| 344 | expr range(int min, int max); | ||
| 345 | |||
| 346 | |||
| 347 | /** creates an expression which increments the line counter | ||
| 348 | and resets the column counter when the given expression | ||
| 349 | is parsed successfully; used for newline characters. | ||
| 350 | @param e expression to wrap into a newline parser. | ||
| 351 | @return an expression that handles newlines. | ||
| 352 | */ | ||
| 353 | expr nl(const expr &e); | ||
| 354 | |||
| 355 | |||
| 356 | /** creates an expression which tests for the end of input. | ||
| 357 | @return an expression that handles the end of input. | ||
| 358 | */ | ||
| 359 | expr eof(); | ||
| 360 | |||
| 361 | |||
| 362 | /** creates a not expression. | ||
| 363 | @param e expression. | ||
| 364 | @return the appropriate expression. | ||
| 365 | */ | ||
| 366 | expr not_(const expr &e); | ||
| 367 | |||
| 368 | |||
| 369 | /** creates an and expression. | ||
| 370 | @param e expression. | ||
| 371 | @return the appropriate expression. | ||
| 372 | */ | ||
| 373 | expr and_(const expr &e); | ||
| 374 | |||
| 375 | |||
| 376 | /** creates an expression that parses any character. | ||
| 377 | @return the appropriate expression. | ||
| 378 | */ | ||
| 379 | expr any(); | ||
| 380 | |||
| 381 | |||
| 382 | /** parsing succeeds without consuming any input. | ||
| 383 | */ | ||
| 384 | expr true_(); | ||
| 385 | |||
| 386 | |||
| 387 | /** parsing fails without consuming any input. | ||
| 388 | */ | ||
| 389 | expr false_(); | ||
| 390 | |||
| 391 | |||
| 392 | /** parse with target expression and let user handle result. | ||
| 393 | */ | ||
| 394 | expr user(const expr &e, const user_handler& handler); | ||
| 395 | |||
| 396 | |||
| 397 | /** parses the given input. | ||
| 398 | The parse procedures of each rule parsed are executed | ||
| 399 | before this function returns, if parsing succeeds. | ||
| 400 | @param i input. | ||
| 401 | @param g root rule of grammar. | ||
| 402 | @param el list of errors. | ||
| 403 | @param d user data, passed to the parse procedures. | ||
| 404 | @return true on parsing success, false on failure. | ||
| 405 | */ | ||
| 406 | bool parse(input &i, rule &g, error_list &el, void *d, void* ud); | ||
| 407 | |||
| 408 | |||
| 409 | /** output the specific input range to the specific stream. | ||
| 410 | @param stream stream. | ||
| 411 | @param ir input range. | ||
| 412 | @return the stream. | ||
| 413 | */ | ||
| 414 | template <class T> T &operator << (T &stream, const input_range &ir) { | ||
| 415 | for(input::const_iterator it = ir.m_begin.m_it; | ||
| 416 | it != ir.m_end.m_it; | ||
| 417 | ++it) | ||
| 418 | { | ||
| 419 | stream << (typename T::char_type)*it; | ||
| 420 | } | ||
| 421 | return stream; | ||
| 422 | } | ||
| 423 | |||
| 424 | |||
| 425 | } //namespace parserlib | ||
| 426 | |||
| 427 | |||
| 428 | #endif //PARSER_HPP | ||
