aboutsummaryrefslogtreecommitdiff
path: root/MoonParser/parser.hpp
blob: 5ec92af956f52ac25f804d22ee757d476c1ce77d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
#ifndef PARSER_HPP
#define PARSER_HPP


//gcc chokes without rule::rule(const rule &),
//msvc complains when rule::rule(const rule &) is defined.
#ifdef _MSC_VER
#pragma warning (disable: 4521)
#endif


#include <vector>
#include <string>
#include <list>
#include <functional>


namespace parserlib {


class _private;
class _expr;
class _context;
class rule;


///type of the parser's input.
typedef std::vector<int> input;
typedef input::iterator input_it;
struct item_t
{
	input_it begin;
	input_it end;
	void* user_data;
};
typedef std::function<bool(const item_t&)> user_handler;


///position into the input.
class pos {
public:
    ///interator into the input.
    input::iterator m_it;

    ///line.
    int m_line;

    ///column.
    int m_col;

    ///null constructor.
    pos() {}

    /** constructor from input.
        @param i input.
     */
    pos(input &i);
};


/** a grammar expression.
 */
class expr {
public:
    /** character terminal constructor.
        @param c character.
     */
    expr(int c);

    /** null-terminated string terminal constructor.
        @param s null-terminated string.
     */
    expr(const char *s);

    /** null-terminated wide string terminal constructor.
        @param s null-terminated string.
     */
    expr(const wchar_t *s);

    /** rule reference constructor.
        @param r rule.
     */
    expr(rule &r);

    /** creates a zero-or-more loop out of this expression.
        @return a zero-or-more loop expression.
     */
    expr operator *() const;

    /** creates a one-or-more loop out of this expression.
        @return a one-or-more loop expression.
     */
    expr operator +() const;

    /** creates an optional out of this expression.
        @return an optional expression.
     */
    expr operator -() const;

    /** creates an AND-expression.
        @return an AND-expression.
     */
    expr operator &() const;

    /** creates a NOT-expression.
        @return a NOT-expression.
     */
    expr operator !() const;

private:
    //internal expression
    _expr *m_expr;

    //internal constructor from internal expression
    expr(_expr *e) : m_expr(e) {}

    //assignment not allowed
    expr &operator = (expr &);

    friend class _private;
};


/** type of procedure to invoke when a rule is successfully parsed.
    @param b begin position of input.
    @param e end position of input.
    @param d pointer to user data.
 */
typedef void (*parse_proc)(const pos &b, const pos &e, void *d);


///input range.
class input_range {
public:
    ///begin position.
    pos m_begin;

    ///end position.
    pos m_end;

    ///empty constructor.
    input_range() {}
	virtual ~input_range() {}

    /** constructor.
        @param b begin position.
        @param e end position.
     */
    input_range(const pos &b, const pos &e);
};


///enum with error types.
enum ERROR_TYPE {
    ///syntax error
    ERROR_SYNTAX_ERROR = 1,

    ///invalid end of file
    ERROR_INVALID_EOF,

    ///first user error
    ERROR_USER = 100
};


///error.
class error : public input_range {
public:
    ///type
    int m_type;

    /** constructor.
        @param b begin position.
        @param e end position.
        @param t type.
     */
    error(const pos &b, const pos &e, int t);

    /** compare on begin position.
        @param e the other error to compare this with.
        @return true if this comes before the previous error, false otherwise.
     */
    bool operator < (const error &e) const;
};


///type of error list.
typedef std::list<error> error_list;


/** represents a rule.
 */
class rule {
public:
    /** character terminal constructor.
        @param c character.
     */
    rule(int c);

    /** null-terminated string terminal constructor.
        @param s null-terminated string.
     */
    rule(const char *s);

    /** null-terminated wide string terminal constructor.
        @param s null-terminated string.
     */
    rule(const wchar_t *s);

    /** constructor from expression.
        @param e expression.
     */
    rule(const expr &e);

    /** constructor from rule.
        @param r rule.
     */
    rule(rule &r);

    /** invalid constructor from rule (required by gcc).
        @param r rule.
        @exception std::logic_error always thrown.
     */
    rule(const rule &r);

    /** deletes the internal object that represents the expression.
     */
    ~rule();

    /** creates a zero-or-more loop out of this rule.
        @return a zero-or-more loop rule.
     */
    expr operator *();

    /** creates a one-or-more loop out of this rule.
        @return a one-or-more loop rule.
     */
    expr operator +();

    /** creates an optional out of this rule.
        @return an optional rule.
     */
    expr operator -();

    /** creates an AND-expression out of this rule.
        @return an AND-expression out of this rule.
     */
    expr operator &();

    /** creates a NOT-expression out of this rule.
        @return a NOT-expression out of this rule.
     */
    expr operator !();

    /** sets the parse procedure.
        @param p procedure.
     */
    void set_parse_proc(parse_proc p);

    /** get the this ptr (since operator & is overloaded).
        @return pointer to this.
     */
    rule *this_ptr() { return this; }

private:
    //mode
    enum _MODE {
        _PARSE,
        _REJECT,
        _ACCEPT
    };

    //state
    struct _state {
        //position in source code, relative to start
        size_t m_pos;

        //mode
        _MODE m_mode;

        //constructor
        _state(size_t pos = -1, _MODE mode = _PARSE) :
            m_pos(pos), m_mode(mode) {}
    };

    //internal expression
    _expr *m_expr;

    //associated parse procedure.
    parse_proc m_parse_proc;

    //state
    _state m_state;

    //assignment not allowed
    rule &operator = (rule &);

    friend class _private;
    friend class _context;
};


/** creates a sequence of expressions.
    @param left left operand.
    @param right right operand.
    @return an expression which parses a sequence.
 */
expr operator >> (const expr &left, const expr &right);


/** creates a choice of expressions.
    @param left left operand.
    @param right right operand.
    @return an expression which parses a choice.
 */
expr operator | (const expr &left, const expr &right);


/** converts a parser expression into a terminal.
    @param e expression.
    @return an expression which parses a terminal.
 */
expr term(const expr &e);


/** creates a set expression from a null-terminated string.
    @param s null-terminated string with characters of the set.
    @return an expression which parses a single character out of a set.
 */
expr set(const char *s);


/** creates a set expression from a null-terminated wide string.
    @param s null-terminated string with characters of the set.
    @return an expression which parses a single character out of a set.
 */
expr set(const wchar_t *s);


/** creates a range expression.
    @param min min character.
    @param max max character.
    @return an expression which parses a single character out of range.
 */
expr range(int min, int max);


/** creates an expression which increments the line counter
    and resets the column counter when the given expression
    is parsed successfully; used for newline characters.
    @param e expression to wrap into a newline parser.
    @return an expression that handles newlines.
 */
expr nl(const expr &e);


/** creates an expression which tests for the end of input.
    @return an expression that handles the end of input.
 */
expr eof();


/** creates a not expression.
    @param e expression.
    @return the appropriate expression.
 */
expr not_(const expr &e);


/** creates an and expression.
    @param e expression.
    @return the appropriate expression.
 */
expr and_(const expr &e);


/** creates an expression that parses any character.
    @return the appropriate expression.
 */
expr any();


/** parsing succeeds without consuming any input.
 */
expr true_();


/** parsing fails without consuming any input.
*/
expr false_();


/** parse with target expression and let user handle result.
*/
expr user(const expr &e, const user_handler& handler);


/** parses the given input.
    The parse procedures of each rule parsed are executed
    before this function returns, if parsing succeeds.
    @param i input.
    @param g root rule of grammar.
    @param el list of errors.
    @param d user data, passed to the parse procedures.
    @return true on parsing success, false on failure.
 */
bool parse(input &i, rule &g, error_list &el, void *d, void* ud);


/** output the specific input range to the specific stream.
    @param stream stream.
    @param ir input range.
    @return the stream.
 */
template <class T> T &operator << (T &stream, const input_range &ir) {
    for(input::const_iterator it = ir.m_begin.m_it;
        it != ir.m_end.m_it;
        ++it)
    {
        stream << (typename T::char_type)*it;
    }
    return stream;
}


} //namespace parserlib


#endif //PARSER_HPP