diff options
Diffstat (limited to 'MoonParser/parser.hpp')
| -rw-r--r-- | MoonParser/parser.hpp | 425 |
1 files changed, 0 insertions, 425 deletions
diff --git a/MoonParser/parser.hpp b/MoonParser/parser.hpp deleted file mode 100644 index cbf0168..0000000 --- a/MoonParser/parser.hpp +++ /dev/null | |||
| @@ -1,425 +0,0 @@ | |||
| 1 | #pragma once | ||
| 2 | |||
| 3 | |||
| 4 | //gcc chokes without rule::rule(const rule &), | ||
| 5 | //msvc complains when rule::rule(const rule &) is defined. | ||
| 6 | #ifdef _MSC_VER | ||
| 7 | #pragma warning (disable: 4521) | ||
| 8 | #endif | ||
| 9 | |||
| 10 | |||
| 11 | #include <vector> | ||
| 12 | #include <string> | ||
| 13 | #include <list> | ||
| 14 | #include <functional> | ||
| 15 | #include <codecvt> | ||
| 16 | #include <locale> | ||
| 17 | |||
| 18 | namespace parserlib { | ||
| 19 | |||
| 20 | // const str hash helper functions | ||
| 21 | inline constexpr size_t hash(char const* input) | ||
| 22 | { | ||
| 23 | return *input ? *input + 33ull * hash(input + 1) : 5381; | ||
| 24 | } | ||
| 25 | inline size_t hash(const char* input, int size, int index) | ||
| 26 | { | ||
| 27 | return index < size ? input[index] + 33ull * hash(input, size, index + 1) : 5381; | ||
| 28 | } | ||
| 29 | inline size_t constexpr operator"" _id(const char* s, size_t) | ||
| 30 | { | ||
| 31 | return hash(s); | ||
| 32 | } | ||
| 33 | |||
| 34 | ///type of the parser's input. | ||
| 35 | typedef std::basic_string<wchar_t> input; | ||
| 36 | typedef input::iterator input_it; | ||
| 37 | typedef std::wstring_convert<std::codecvt_utf8<input::value_type>> Converter; | ||
| 38 | |||
| 39 | class _private; | ||
| 40 | class _expr; | ||
| 41 | class _context; | ||
| 42 | class rule; | ||
| 43 | |||
| 44 | |||
| 45 | struct item_t | ||
| 46 | { | ||
| 47 | input_it begin; | ||
| 48 | input_it end; | ||
| 49 | void* user_data; | ||
| 50 | }; | ||
| 51 | typedef std::function<bool(const item_t&)> user_handler; | ||
| 52 | |||
| 53 | |||
| 54 | ///position into the input. | ||
| 55 | class pos { | ||
| 56 | public: | ||
| 57 | ///interator into the input. | ||
| 58 | input::iterator m_it; | ||
| 59 | |||
| 60 | ///line. | ||
| 61 | int m_line; | ||
| 62 | |||
| 63 | ///column. | ||
| 64 | int m_col; | ||
| 65 | |||
| 66 | ///null constructor. | ||
| 67 | pos():m_line(-1),m_col(0) {} | ||
| 68 | |||
| 69 | /** constructor from input. | ||
| 70 | @param i input. | ||
| 71 | */ | ||
| 72 | pos(input &i); | ||
| 73 | }; | ||
| 74 | |||
| 75 | |||
| 76 | /** a grammar expression. | ||
| 77 | */ | ||
| 78 | class expr { | ||
| 79 | public: | ||
| 80 | /** character terminal constructor. | ||
| 81 | @param c character. | ||
| 82 | */ | ||
| 83 | expr(char c); | ||
| 84 | |||
| 85 | /** null-terminated string terminal constructor. | ||
| 86 | @param s null-terminated string. | ||
| 87 | */ | ||
| 88 | expr(const char *s); | ||
| 89 | |||
| 90 | /** rule reference constructor. | ||
| 91 | @param r rule. | ||
| 92 | */ | ||
| 93 | expr(rule &r); | ||
| 94 | |||
| 95 | /** creates a zero-or-more loop out of this expression. | ||
| 96 | @return a zero-or-more loop expression. | ||
| 97 | */ | ||
| 98 | expr operator *() const; | ||
| 99 | |||
| 100 | /** creates a one-or-more loop out of this expression. | ||
| 101 | @return a one-or-more loop expression. | ||
| 102 | */ | ||
| 103 | expr operator +() const; | ||
| 104 | |||
| 105 | /** creates an optional out of this expression. | ||
| 106 | @return an optional expression. | ||
| 107 | */ | ||
| 108 | expr operator -() const; | ||
| 109 | |||
| 110 | /** creates an AND-expression. | ||
| 111 | @return an AND-expression. | ||
| 112 | */ | ||
| 113 | expr operator &() const; | ||
| 114 | |||
| 115 | /** creates a NOT-expression. | ||
| 116 | @return a NOT-expression. | ||
| 117 | */ | ||
| 118 | expr operator !() const; | ||
| 119 | |||
| 120 | private: | ||
| 121 | //internal expression | ||
| 122 | _expr *m_expr; | ||
| 123 | |||
| 124 | //internal constructor from internal expression | ||
| 125 | expr(_expr *e) : m_expr(e) {} | ||
| 126 | |||
| 127 | //assignment not allowed | ||
| 128 | expr &operator = (expr &); | ||
| 129 | |||
| 130 | friend class _private; | ||
| 131 | }; | ||
| 132 | |||
| 133 | |||
| 134 | /** type of procedure to invoke when a rule is successfully parsed. | ||
| 135 | @param b begin position of input. | ||
| 136 | @param e end position of input. | ||
| 137 | @param d pointer to user data. | ||
| 138 | */ | ||
| 139 | typedef void (*parse_proc)(const pos &b, const pos &e, void *d); | ||
| 140 | |||
| 141 | |||
| 142 | ///input range. | ||
| 143 | class input_range { | ||
| 144 | public: | ||
| 145 | virtual ~input_range() {} | ||
| 146 | |||
| 147 | ///begin position. | ||
| 148 | pos m_begin; | ||
| 149 | |||
| 150 | ///end position. | ||
| 151 | pos m_end; | ||
| 152 | |||
| 153 | ///empty constructor. | ||
| 154 | input_range() {} | ||
| 155 | |||
| 156 | /** constructor. | ||
| 157 | @param b begin position. | ||
| 158 | @param e end position. | ||
| 159 | */ | ||
| 160 | input_range(const pos &b, const pos &e); | ||
| 161 | }; | ||
| 162 | |||
| 163 | |||
| 164 | ///enum with error types. | ||
| 165 | enum ERROR_TYPE { | ||
| 166 | ///syntax error | ||
| 167 | ERROR_SYNTAX_ERROR = 1, | ||
| 168 | |||
| 169 | ///invalid end of file | ||
| 170 | ERROR_INVALID_EOF, | ||
| 171 | |||
| 172 | ///first user error | ||
| 173 | ERROR_USER = 100 | ||
| 174 | }; | ||
| 175 | |||
| 176 | |||
| 177 | ///error. | ||
| 178 | class error : public input_range { | ||
| 179 | public: | ||
| 180 | ///type | ||
| 181 | int m_type; | ||
| 182 | |||
| 183 | /** constructor. | ||
| 184 | @param b begin position. | ||
| 185 | @param e end position. | ||
| 186 | @param t type. | ||
| 187 | */ | ||
| 188 | error(const pos &b, const pos &e, int t); | ||
| 189 | |||
| 190 | /** compare on begin position. | ||
| 191 | @param e the other error to compare this with. | ||
| 192 | @return true if this comes before the previous error, false otherwise. | ||
| 193 | */ | ||
| 194 | bool operator < (const error &e) const; | ||
| 195 | }; | ||
| 196 | |||
| 197 | |||
| 198 | ///type of error list. | ||
| 199 | typedef std::list<error> error_list; | ||
| 200 | |||
| 201 | |||
| 202 | /** represents a rule. | ||
| 203 | */ | ||
| 204 | class rule { | ||
| 205 | public: | ||
| 206 | /** character terminal constructor. | ||
| 207 | @param c character. | ||
| 208 | */ | ||
| 209 | rule(char c); | ||
| 210 | |||
| 211 | /** null-terminated string terminal constructor. | ||
| 212 | @param s null-terminated string. | ||
| 213 | */ | ||
| 214 | rule(const char *s); | ||
| 215 | |||
| 216 | /** constructor from expression. | ||
| 217 | @param e expression. | ||
| 218 | */ | ||
| 219 | rule(const expr &e); | ||
| 220 | |||
| 221 | /** constructor from rule. | ||
| 222 | @param r rule. | ||
| 223 | */ | ||
| 224 | rule(rule &r); | ||
| 225 | |||
| 226 | /** invalid constructor from rule (required by gcc). | ||
| 227 | @param r rule. | ||
| 228 | @exception std::logic_error always thrown. | ||
| 229 | */ | ||
| 230 | rule(const rule &r); | ||
| 231 | |||
| 232 | /** deletes the internal object that represents the expression. | ||
| 233 | */ | ||
| 234 | ~rule(); | ||
| 235 | |||
| 236 | /** creates a zero-or-more loop out of this rule. | ||
| 237 | @return a zero-or-more loop rule. | ||
| 238 | */ | ||
| 239 | expr operator *(); | ||
| 240 | |||
| 241 | /** creates a one-or-more loop out of this rule. | ||
| 242 | @return a one-or-more loop rule. | ||
| 243 | */ | ||
| 244 | expr operator +(); | ||
| 245 | |||
| 246 | /** creates an optional out of this rule. | ||
| 247 | @return an optional rule. | ||
| 248 | */ | ||
| 249 | expr operator -(); | ||
| 250 | |||
| 251 | /** creates an AND-expression out of this rule. | ||
| 252 | @return an AND-expression out of this rule. | ||
| 253 | */ | ||
| 254 | expr operator &(); | ||
| 255 | |||
| 256 | /** creates a NOT-expression out of this rule. | ||
| 257 | @return a NOT-expression out of this rule. | ||
| 258 | */ | ||
| 259 | expr operator !(); | ||
| 260 | |||
| 261 | /** sets the parse procedure. | ||
| 262 | @param p procedure. | ||
| 263 | */ | ||
| 264 | void set_parse_proc(parse_proc p); | ||
| 265 | |||
| 266 | /** get the this ptr (since operator & is overloaded). | ||
| 267 | @return pointer to this. | ||
| 268 | */ | ||
| 269 | rule *this_ptr() { return this; } | ||
| 270 | |||
| 271 | private: | ||
| 272 | //mode | ||
| 273 | enum _MODE { | ||
| 274 | _PARSE, | ||
| 275 | _REJECT, | ||
| 276 | _ACCEPT | ||
| 277 | }; | ||
| 278 | |||
| 279 | //state | ||
| 280 | struct _state { | ||
| 281 | //position in source code, relative to start | ||
| 282 | size_t m_pos; | ||
| 283 | |||
| 284 | //mode | ||
| 285 | _MODE m_mode; | ||
| 286 | |||
| 287 | //constructor | ||
| 288 | _state(size_t pos = -1, _MODE mode = _PARSE) : | ||
| 289 | m_pos(pos), m_mode(mode) {} | ||
| 290 | }; | ||
| 291 | |||
| 292 | //internal expression | ||
| 293 | _expr *m_expr; | ||
| 294 | |||
| 295 | //associated parse procedure. | ||
| 296 | parse_proc m_parse_proc; | ||
| 297 | |||
| 298 | //state | ||
| 299 | _state m_state; | ||
| 300 | |||
| 301 | //assignment not allowed | ||
| 302 | rule &operator = (rule &); | ||
| 303 | |||
| 304 | friend class _private; | ||
| 305 | friend class _context; | ||
| 306 | }; | ||
| 307 | |||
| 308 | |||
| 309 | /** creates a sequence of expressions. | ||
| 310 | @param left left operand. | ||
| 311 | @param right right operand. | ||
| 312 | @return an expression which parses a sequence. | ||
| 313 | */ | ||
| 314 | expr operator >> (const expr &left, const expr &right); | ||
| 315 | |||
| 316 | |||
| 317 | /** creates a choice of expressions. | ||
| 318 | @param left left operand. | ||
| 319 | @param right right operand. | ||
| 320 | @return an expression which parses a choice. | ||
| 321 | */ | ||
| 322 | expr operator | (const expr &left, const expr &right); | ||
| 323 | |||
| 324 | |||
| 325 | /** converts a parser expression into a terminal. | ||
| 326 | @param e expression. | ||
| 327 | @return an expression which parses a terminal. | ||
| 328 | */ | ||
| 329 | expr term(const expr &e); | ||
| 330 | |||
| 331 | |||
| 332 | /** creates a set expression from a null-terminated string. | ||
| 333 | @param s null-terminated string with characters of the set. | ||
| 334 | @return an expression which parses a single character out of a set. | ||
| 335 | */ | ||
| 336 | expr set(const char *s); | ||
| 337 | |||
| 338 | |||
| 339 | /** creates a range expression. | ||
| 340 | @param min min character. | ||
| 341 | @param max max character. | ||
| 342 | @return an expression which parses a single character out of range. | ||
| 343 | */ | ||
| 344 | expr range(int min, int max); | ||
| 345 | |||
| 346 | |||
| 347 | /** creates an expression which increments the line counter | ||
| 348 | and resets the column counter when the given expression | ||
| 349 | is parsed successfully; used for newline characters. | ||
| 350 | @param e expression to wrap into a newline parser. | ||
| 351 | @return an expression that handles newlines. | ||
| 352 | */ | ||
| 353 | expr nl(const expr &e); | ||
| 354 | |||
| 355 | |||
| 356 | /** creates an expression which tests for the end of input. | ||
| 357 | @return an expression that handles the end of input. | ||
| 358 | */ | ||
| 359 | expr eof(); | ||
| 360 | |||
| 361 | |||
| 362 | /** creates a not expression. | ||
| 363 | @param e expression. | ||
| 364 | @return the appropriate expression. | ||
| 365 | */ | ||
| 366 | expr not_(const expr &e); | ||
| 367 | |||
| 368 | |||
| 369 | /** creates an and expression. | ||
| 370 | @param e expression. | ||
| 371 | @return the appropriate expression. | ||
| 372 | */ | ||
| 373 | expr and_(const expr &e); | ||
| 374 | |||
| 375 | |||
| 376 | /** creates an expression that parses any character. | ||
| 377 | @return the appropriate expression. | ||
| 378 | */ | ||
| 379 | expr any(); | ||
| 380 | |||
| 381 | |||
| 382 | /** parsing succeeds without consuming any input. | ||
| 383 | */ | ||
| 384 | expr true_(); | ||
| 385 | |||
| 386 | |||
| 387 | /** parsing fails without consuming any input. | ||
| 388 | */ | ||
| 389 | expr false_(); | ||
| 390 | |||
| 391 | |||
| 392 | /** parse with target expression and let user handle result. | ||
| 393 | */ | ||
| 394 | expr user(const expr &e, const user_handler& handler); | ||
| 395 | |||
| 396 | |||
| 397 | /** parses the given input. | ||
| 398 | The parse procedures of each rule parsed are executed | ||
| 399 | before this function returns, if parsing succeeds. | ||
| 400 | @param i input. | ||
| 401 | @param g root rule of grammar. | ||
| 402 | @param el list of errors. | ||
| 403 | @param d user data, passed to the parse procedures. | ||
| 404 | @return true on parsing success, false on failure. | ||
| 405 | */ | ||
| 406 | bool parse(input &i, rule &g, error_list &el, void *d, void* ud); | ||
| 407 | |||
| 408 | |||
| 409 | /** output the specific input range to the specific stream. | ||
| 410 | @param stream stream. | ||
| 411 | @param ir input range. | ||
| 412 | @return the stream. | ||
| 413 | */ | ||
| 414 | template <class T> T &operator << (T &stream, const input_range &ir) { | ||
| 415 | for(input::const_iterator it = ir.m_begin.m_it; | ||
| 416 | it != ir.m_end.m_it; | ||
| 417 | ++it) | ||
| 418 | { | ||
| 419 | stream << (typename T::char_type)*it; | ||
| 420 | } | ||
| 421 | return stream; | ||
| 422 | } | ||
| 423 | |||
| 424 | |||
| 425 | } //namespace parserlib | ||
