diff options
author | Sergio Medeiros <sqmedeiros@gmail.com> | 2015-03-23 14:13:25 -0300 |
---|---|---|
committer | Sergio Medeiros <sqmedeiros@gmail.com> | 2015-03-23 14:13:25 -0300 |
commit | 0e93d536ba2d312502737cce2ab0cc21393c4842 (patch) | |
tree | 7de1e3ae967c90a43e7086ccef61d1722881b20c | |
parent | a5a4b257e626847be3be4878c603adb51cbb420f (diff) | |
download | lpeglabel-0e93d536ba2d312502737cce2ab0cc21393c4842.tar.gz lpeglabel-0e93d536ba2d312502737cce2ab0cc21393c4842.tar.bz2 lpeglabel-0e93d536ba2d312502737cce2ab0cc21393c4842.zip |
Updating to lpeg 0.12.2
-rw-r--r-- | LICENSE | 2 | ||||
-rw-r--r-- | lpcap.c | 4 | ||||
-rw-r--r-- | lpcap.h | 4 | ||||
-rw-r--r-- | lpcode.c | 143 | ||||
-rw-r--r-- | lpcode.h | 4 | ||||
-rw-r--r-- | lpprint.c | 19 | ||||
-rw-r--r-- | lpprint.h | 2 | ||||
-rw-r--r-- | lptree.c | 413 | ||||
-rw-r--r-- | lptypes.h | 13 | ||||
-rw-r--r-- | lpvm.c | 55 | ||||
-rw-r--r-- | lpvm.h | 13 | ||||
-rw-r--r-- | makefile | 8 | ||||
-rwxr-xr-x | test.lua | 142 | ||||
-rw-r--r-- | testlabel.lua | 8 |
14 files changed, 472 insertions, 358 deletions
@@ -1,6 +1,6 @@ | |||
1 | The MIT License (MIT) | 1 | The MIT License (MIT) |
2 | 2 | ||
3 | Copyright (c) 2014 Sérgio Medeiros | 3 | Copyright (c) 2014-2015 Sérgio Medeiros |
4 | 4 | ||
5 | Permission is hereby granted, free of charge, to any person obtaining a copy | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy |
6 | of this software and associated documentation files (the "Software"), to deal | 6 | of this software and associated documentation files (the "Software"), to deal |
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | ** $Id: lpcap.c,v 1.4 2013/03/21 20:25:12 roberto Exp $ | 2 | ** $Id: lpcap.c,v 1.5 2014/12/12 16:58:47 roberto Exp $ |
3 | ** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license) | 3 | ** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license) |
4 | */ | 4 | */ |
5 | 5 | ||
@@ -462,7 +462,7 @@ static int pushcapture (CapState *cs) { | |||
462 | case Carg: { | 462 | case Carg: { |
463 | int arg = (cs->cap++)->idx; | 463 | int arg = (cs->cap++)->idx; |
464 | if (arg + FIXEDARGS > cs->ptop) | 464 | if (arg + FIXEDARGS > cs->ptop) |
465 | return luaL_error(L, "reference to absent argument #%d", arg); | 465 | return luaL_error(L, "reference to absent extra argument #%d", arg); |
466 | lua_pushvalue(L, arg + FIXEDARGS); | 466 | lua_pushvalue(L, arg + FIXEDARGS); |
467 | return 1; | 467 | return 1; |
468 | } | 468 | } |
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | ** $Id: lpcap.h,v 1.1 2013/03/21 20:25:12 roberto Exp $ | 2 | ** $Id: lpcap.h,v 1.2 2015/02/27 17:13:17 roberto Exp $ |
3 | */ | 3 | */ |
4 | 4 | ||
5 | #if !defined(lpcap_h) | 5 | #if !defined(lpcap_h) |
@@ -18,7 +18,7 @@ typedef enum CapKind { | |||
18 | 18 | ||
19 | typedef struct Capture { | 19 | typedef struct Capture { |
20 | const char *s; /* subject position */ | 20 | const char *s; /* subject position */ |
21 | short idx; /* extra info about capture (group name, arg index, etc.) */ | 21 | unsigned short idx; /* extra info (group name, arg index, etc.) */ |
22 | byte kind; /* kind of capture */ | 22 | byte kind; /* kind of capture */ |
23 | byte siz; /* size of full capture + 1 (0 = not a full capture) */ | 23 | byte siz; /* size of full capture + 1 (0 = not a full capture) */ |
24 | } Capture; | 24 | } Capture; |
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | ** $Id: lpcode.c,v 1.18 2013/04/12 16:30:33 roberto Exp $ | 2 | ** $Id: lpcode.c,v 1.21 2014/12/12 17:01:29 roberto Exp $ |
3 | ** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license) | 3 | ** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license) |
4 | */ | 4 | */ |
5 | 5 | ||
@@ -33,26 +33,30 @@ static const Charset *fullset = &fullset_; | |||
33 | */ | 33 | */ |
34 | 34 | ||
35 | /* | 35 | /* |
36 | ** Check whether a charset is empty (IFail), singleton (IChar), | 36 | ** Check whether a charset is empty (returns IFail), singleton (IChar), |
37 | ** full (IAny), or none of those (ISet). | 37 | ** full (IAny), or none of those (ISet). When singleton, '*c' returns |
38 | ** which character it is. (When generic set, the set was the input, | ||
39 | ** so there is no need to return it.) | ||
38 | */ | 40 | */ |
39 | static Opcode charsettype (const byte *cs, int *c) { | 41 | static Opcode charsettype (const byte *cs, int *c) { |
40 | int count = 0; | 42 | int count = 0; /* number of characters in the set */ |
41 | int i; | 43 | int i; |
42 | int candidate = -1; /* candidate position for a char */ | 44 | int candidate = -1; /* candidate position for the singleton char */ |
43 | for (i = 0; i < CHARSETSIZE; i++) { | 45 | for (i = 0; i < CHARSETSIZE; i++) { /* for each byte */ |
44 | int b = cs[i]; | 46 | int b = cs[i]; |
45 | if (b == 0) { | 47 | if (b == 0) { /* is byte empty? */ |
46 | if (count > 1) return ISet; /* else set is still empty */ | 48 | if (count > 1) /* was set neither empty nor singleton? */ |
49 | return ISet; /* neither full nor empty nor singleton */ | ||
50 | /* else set is still empty or singleton */ | ||
47 | } | 51 | } |
48 | else if (b == 0xFF) { | 52 | else if (b == 0xFF) { /* is byte full? */ |
49 | if (count < (i * BITSPERCHAR)) | 53 | if (count < (i * BITSPERCHAR)) /* was set not full? */ |
50 | return ISet; | 54 | return ISet; /* neither full nor empty nor singleton */ |
51 | else count += BITSPERCHAR; /* set is still full */ | 55 | else count += BITSPERCHAR; /* set is still full */ |
52 | } | 56 | } |
53 | else if ((b & (b - 1)) == 0) { /* byte has only one bit? */ | 57 | else if ((b & (b - 1)) == 0) { /* has byte only one bit? */ |
54 | if (count > 0) | 58 | if (count > 0) /* was set not empty? */ |
55 | return ISet; /* set is neither full nor empty */ | 59 | return ISet; /* neither full nor empty nor singleton */ |
56 | else { /* set has only one char till now; track it */ | 60 | else { /* set has only one char till now; track it */ |
57 | count++; | 61 | count++; |
58 | candidate = i; | 62 | candidate = i; |
@@ -77,6 +81,7 @@ static Opcode charsettype (const byte *cs, int *c) { | |||
77 | } | 81 | } |
78 | } | 82 | } |
79 | 83 | ||
84 | |||
80 | /* | 85 | /* |
81 | ** A few basic operations on Charsets | 86 | ** A few basic operations on Charsets |
82 | */ | 87 | */ |
@@ -84,16 +89,11 @@ static void cs_complement (Charset *cs) { | |||
84 | loopset(i, cs->cs[i] = ~cs->cs[i]); | 89 | loopset(i, cs->cs[i] = ~cs->cs[i]); |
85 | } | 90 | } |
86 | 91 | ||
87 | |||
88 | static int cs_equal (const byte *cs1, const byte *cs2) { | 92 | static int cs_equal (const byte *cs1, const byte *cs2) { |
89 | loopset(i, if (cs1[i] != cs2[i]) return 0); | 93 | loopset(i, if (cs1[i] != cs2[i]) return 0); |
90 | return 1; | 94 | return 1; |
91 | } | 95 | } |
92 | 96 | ||
93 | |||
94 | /* | ||
95 | ** computes whether sets cs1 and cs2 are disjoint | ||
96 | */ | ||
97 | static int cs_disjoint (const Charset *cs1, const Charset *cs2) { | 97 | static int cs_disjoint (const Charset *cs1, const Charset *cs2) { |
98 | loopset(i, if ((cs1->cs[i] & cs2->cs[i]) != 0) return 0;) | 98 | loopset(i, if ((cs1->cs[i] & cs2->cs[i]) != 0) return 0;) |
99 | return 1; | 99 | return 1; |
@@ -101,7 +101,8 @@ static int cs_disjoint (const Charset *cs1, const Charset *cs2) { | |||
101 | 101 | ||
102 | 102 | ||
103 | /* | 103 | /* |
104 | ** Convert a 'char' pattern (TSet, TChar, TAny) to a charset | 104 | ** If 'tree' is a 'char' pattern (TSet, TChar, TAny), convert it into a |
105 | ** charset and return 1; else return 0. | ||
105 | */ | 106 | */ |
106 | int tocharset (TTree *tree, Charset *cs) { | 107 | int tocharset (TTree *tree, Charset *cs) { |
107 | switch (tree->tag) { | 108 | switch (tree->tag) { |
@@ -116,7 +117,7 @@ int tocharset (TTree *tree, Charset *cs) { | |||
116 | return 1; | 117 | return 1; |
117 | } | 118 | } |
118 | case TAny: { | 119 | case TAny: { |
119 | loopset(i, cs->cs[i] = 0xFF); /* add all to the set */ | 120 | loopset(i, cs->cs[i] = 0xFF); /* add all characters to the set */ |
120 | return 1; | 121 | return 1; |
121 | } | 122 | } |
122 | default: return 0; | 123 | default: return 0; |
@@ -125,13 +126,16 @@ int tocharset (TTree *tree, Charset *cs) { | |||
125 | 126 | ||
126 | 127 | ||
127 | /* | 128 | /* |
128 | ** Checks whether a pattern has captures | 129 | ** Check whether a pattern tree has captures |
129 | */ | 130 | */ |
130 | int hascaptures (TTree *tree) { | 131 | int hascaptures (TTree *tree) { |
131 | tailcall: | 132 | tailcall: |
132 | switch (tree->tag) { | 133 | switch (tree->tag) { |
133 | case TCapture: case TRunTime: | 134 | case TCapture: case TRunTime: |
134 | return 1; | 135 | return 1; |
136 | case TCall: | ||
137 | tree = sib2(tree); goto tailcall; /* return hascaptures(sib2(tree)); */ | ||
138 | case TOpenCall: assert(0); | ||
135 | default: { | 139 | default: { |
136 | switch (numsiblings[tree->tag]) { | 140 | switch (numsiblings[tree->tag]) { |
137 | case 1: /* return hascaptures(sib1(tree)); */ | 141 | case 1: /* return hascaptures(sib1(tree)); */ |
@@ -161,7 +165,7 @@ int hascaptures (TTree *tree) { | |||
161 | ** p is nullable => nullable(p) | 165 | ** p is nullable => nullable(p) |
162 | ** nofail(p) => p cannot fail | 166 | ** nofail(p) => p cannot fail |
163 | ** The function assumes that TOpenCall is not nullable; | 167 | ** The function assumes that TOpenCall is not nullable; |
164 | ** this will be checked again when the grammar is fixed.) | 168 | ** this will be checked again when the grammar is fixed. |
165 | ** Run-time captures can do whatever they want, so the result | 169 | ** Run-time captures can do whatever they want, so the result |
166 | ** is conservative. | 170 | ** is conservative. |
167 | */ | 171 | */ |
@@ -188,7 +192,7 @@ int checkaux (TTree *tree, int pred) { | |||
188 | if (!checkaux(sib1(tree), pred)) return 0; | 192 | if (!checkaux(sib1(tree), pred)) return 0; |
189 | /* else return checkaux(sib2(tree), pred); */ | 193 | /* else return checkaux(sib2(tree), pred); */ |
190 | tree = sib2(tree); goto tailcall; | 194 | tree = sib2(tree); goto tailcall; |
191 | case TChoice: case TLabChoice: /* labeled failure */ | 195 | case TChoice: case TLabChoice: /* labeled failure */ |
192 | if (checkaux(sib2(tree), pred)) return 1; | 196 | if (checkaux(sib2(tree), pred)) return 1; |
193 | /* else return checkaux(sib1(tree), pred); */ | 197 | /* else return checkaux(sib1(tree), pred); */ |
194 | tree = sib1(tree); goto tailcall; | 198 | tree = sib1(tree); goto tailcall; |
@@ -198,7 +202,7 @@ int checkaux (TTree *tree, int pred) { | |||
198 | case TCall: /* return checkaux(sib2(tree), pred); */ | 202 | case TCall: /* return checkaux(sib2(tree), pred); */ |
199 | tree = sib2(tree); goto tailcall; | 203 | tree = sib2(tree); goto tailcall; |
200 | default: assert(0); return 0; | 204 | default: assert(0); return 0; |
201 | }; | 205 | } |
202 | } | 206 | } |
203 | 207 | ||
204 | 208 | ||
@@ -246,16 +250,20 @@ int fixedlenx (TTree *tree, int count, int len) { | |||
246 | /* | 250 | /* |
247 | ** Computes the 'first set' of a pattern. | 251 | ** Computes the 'first set' of a pattern. |
248 | ** The result is a conservative aproximation: | 252 | ** The result is a conservative aproximation: |
249 | ** match p ax -> x' for some x ==> a in first(p). | 253 | ** match p ax -> x (for some x) ==> a belongs to first(p) |
254 | ** or | ||
255 | ** a not in first(p) ==> match p ax -> fail (for all x) | ||
256 | ** | ||
250 | ** The set 'follow' is the first set of what follows the | 257 | ** The set 'follow' is the first set of what follows the |
251 | ** pattern (full set if nothing follows it). | 258 | ** pattern (full set if nothing follows it). |
252 | ** The function returns 0 when this set can be used for | 259 | ** |
253 | ** tests that avoid the pattern altogether. | 260 | ** The function returns 0 when this resulting set can be used for |
261 | ** test instructions that avoid the pattern altogether. | ||
254 | ** A non-zero return can happen for two reasons: | 262 | ** A non-zero return can happen for two reasons: |
255 | ** 1) match p '' -> '' ==> returns 1. | 263 | ** 1) match p '' -> '' ==> return has bit 1 set |
256 | ** (tests cannot be used because they always fail for an empty input) | 264 | ** (tests cannot be used because they would always fail for an empty input); |
257 | ** 2) there is a match-time capture ==> returns 2. | 265 | ** 2) there is a match-time capture ==> return has bit 2 set |
258 | ** (match-time captures should not be avoided by optimizations) | 266 | ** (optimizations should not bypass match-time captures). |
259 | */ | 267 | */ |
260 | static int getfirst (TTree *tree, const Charset *follow, Charset *firstset) { | 268 | static int getfirst (TTree *tree, const Charset *follow, Charset *firstset) { |
261 | tailcall: | 269 | tailcall: |
@@ -266,16 +274,16 @@ static int getfirst (TTree *tree, const Charset *follow, Charset *firstset) { | |||
266 | } | 274 | } |
267 | case TTrue: { | 275 | case TTrue: { |
268 | loopset(i, firstset->cs[i] = follow->cs[i]); | 276 | loopset(i, firstset->cs[i] = follow->cs[i]); |
269 | return 1; | 277 | return 1; /* accepts the empty string */ |
270 | } | 278 | } |
271 | case TFalse: { | 279 | case TFalse: { |
272 | loopset(i, firstset->cs[i] = 0); | 280 | loopset(i, firstset->cs[i] = 0); |
273 | return 0; | 281 | return 0; |
274 | } | 282 | } |
275 | case TThrow: { /* labeled failure: must always throw the label */ | 283 | case TThrow: { /* labeled failure: must always throw the label */ |
276 | loopset(i, firstset->cs[i] = follow->cs[i]); /* follow = fullset(?) */ | 284 | loopset(i, firstset->cs[i] = follow->cs[i]); /* follow = fullset(?) */ |
277 | return 1; | 285 | return 1; |
278 | } | 286 | } |
279 | case TChoice: case TLabChoice: { /*(?) labeled failure */ | 287 | case TChoice: case TLabChoice: { /*(?) labeled failure */ |
280 | Charset csaux; | 288 | Charset csaux; |
281 | int e1 = getfirst(sib1(tree), follow, firstset); | 289 | int e1 = getfirst(sib1(tree), follow, firstset); |
@@ -285,7 +293,8 @@ static int getfirst (TTree *tree, const Charset *follow, Charset *firstset) { | |||
285 | } | 293 | } |
286 | case TSeq: { | 294 | case TSeq: { |
287 | if (!nullable(sib1(tree))) { | 295 | if (!nullable(sib1(tree))) { |
288 | /* return getfirst(sib1(tree), fullset, firstset); */ | 296 | /* when p1 is not nullable, p2 has nothing to contribute; |
297 | return getfirst(sib1(tree), fullset, firstset); */ | ||
289 | tree = sib1(tree); follow = fullset; goto tailcall; | 298 | tree = sib1(tree); follow = fullset; goto tailcall; |
290 | } | 299 | } |
291 | else { /* FIRST(p1 p2, fl) = FIRST(p1, FIRST(p2, fl)) */ | 300 | else { /* FIRST(p1 p2, fl) = FIRST(p1, FIRST(p2, fl)) */ |
@@ -329,7 +338,7 @@ static int getfirst (TTree *tree, const Charset *follow, Charset *firstset) { | |||
329 | /* else go through */ | 338 | /* else go through */ |
330 | } | 339 | } |
331 | case TBehind: { /* instruction gives no new information */ | 340 | case TBehind: { /* instruction gives no new information */ |
332 | /* call 'getfirst' to check for math-time captures */ | 341 | /* call 'getfirst' only to check for math-time captures */ |
333 | int e = getfirst(sib1(tree), follow, firstset); | 342 | int e = getfirst(sib1(tree), follow, firstset); |
334 | loopset(i, firstset->cs[i] = follow->cs[i]); /* uses follow */ | 343 | loopset(i, firstset->cs[i] = follow->cs[i]); /* uses follow */ |
335 | return e | 1; /* always can accept the empty string */ | 344 | return e | 1; /* always can accept the empty string */ |
@@ -340,13 +349,13 @@ static int getfirst (TTree *tree, const Charset *follow, Charset *firstset) { | |||
340 | 349 | ||
341 | 350 | ||
342 | /* | 351 | /* |
343 | ** If it returns true, then pattern can fail only depending on the next | 352 | ** If 'headfail(tree)' true, then 'tree' can fail only depending on the |
344 | ** character of the subject | 353 | ** next character of the subject. |
345 | */ | 354 | */ |
346 | static int headfail (TTree *tree) { | 355 | static int headfail (TTree *tree) { |
347 | tailcall: | 356 | tailcall: |
348 | switch (tree->tag) { | 357 | switch (tree->tag) { |
349 | case TChar: case TSet: case TAny: case TFalse: | 358 | case TChar: case TSet: case TAny: case TFalse: |
350 | return 1; | 359 | return 1; |
351 | case TTrue: case TRep: case TRunTime: case TNot: | 360 | case TTrue: case TRep: case TRunTime: case TNot: |
352 | case TBehind: | 361 | case TBehind: |
@@ -410,10 +419,12 @@ int sizei (const Instruction *i) { | |||
410 | switch((Opcode)i->i.code) { | 419 | switch((Opcode)i->i.code) { |
411 | case ISet: case ISpan: return CHARSETINSTSIZE; | 420 | case ISet: case ISpan: return CHARSETINSTSIZE; |
412 | case ITestSet: return CHARSETINSTSIZE + 1; | 421 | case ITestSet: return CHARSETINSTSIZE + 1; |
413 | case ITestChar: case ITestAny: case IChoice: case IJmp: | 422 | case ITestChar: case ITestAny: case IChoice: case IJmp: case ICall: |
414 | case ICall: case IOpenCall: case ICommit: case IPartialCommit: | 423 | case IOpenCall: case ICommit: case IPartialCommit: case IBackCommit: |
415 | case IBackCommit: case IThrow: return 2; /* labeled failure */ | 424 | case IThrow: /* labeled failure */ |
425 | return 2; | ||
416 | case ILabChoice: return 3; /* labeled failure */ | 426 | case ILabChoice: return 3; /* labeled failure */ |
427 | return 2; | ||
417 | default: return 1; | 428 | default: return 1; |
418 | } | 429 | } |
419 | } | 430 | } |
@@ -431,7 +442,8 @@ typedef struct CompileState { | |||
431 | 442 | ||
432 | /* | 443 | /* |
433 | ** code generation is recursive; 'opt' indicates that the code is | 444 | ** code generation is recursive; 'opt' indicates that the code is |
434 | ** being generated under a 'IChoice' operator jumping to its end. | 445 | ** being generated under a 'IChoice' operator jumping to its end |
446 | ** (that is, the match is "optional"). | ||
435 | ** 'tt' points to a previous test protecting this code. 'fl' is | 447 | ** 'tt' points to a previous test protecting this code. 'fl' is |
436 | ** the follow set of the pattern. | 448 | ** the follow set of the pattern. |
437 | */ | 449 | */ |
@@ -439,7 +451,7 @@ static void codegen (CompileState *compst, TTree *tree, int opt, int tt, | |||
439 | const Charset *fl); | 451 | const Charset *fl); |
440 | 452 | ||
441 | 453 | ||
442 | void reallocprog (lua_State *L, Pattern *p, int nsize) { | 454 | void realloccode (lua_State *L, Pattern *p, int nsize) { |
443 | void *ud; | 455 | void *ud; |
444 | lua_Alloc f = lua_getallocf(L, &ud); | 456 | lua_Alloc f = lua_getallocf(L, &ud); |
445 | void *newblock = f(ud, p->code, p->codesize * sizeof(Instruction), | 457 | void *newblock = f(ud, p->code, p->codesize * sizeof(Instruction), |
@@ -454,7 +466,7 @@ void reallocprog (lua_State *L, Pattern *p, int nsize) { | |||
454 | static int nextinstruction (CompileState *compst) { | 466 | static int nextinstruction (CompileState *compst) { |
455 | int size = compst->p->codesize; | 467 | int size = compst->p->codesize; |
456 | if (compst->ncode >= size) | 468 | if (compst->ncode >= size) |
457 | reallocprog(compst->L, compst->p, size * 2); | 469 | realloccode(compst->L, compst->p, size * 2); |
458 | return compst->ncode++; | 470 | return compst->ncode++; |
459 | } | 471 | } |
460 | 472 | ||
@@ -470,6 +482,9 @@ static int addinstruction (CompileState *compst, Opcode op, int aux) { | |||
470 | } | 482 | } |
471 | 483 | ||
472 | 484 | ||
485 | /* | ||
486 | ** Add an instruction followed by space for an offset (to be set later) | ||
487 | */ | ||
473 | static int addoffsetinst (CompileState *compst, Opcode op) { | 488 | static int addoffsetinst (CompileState *compst, Opcode op) { |
474 | int i = addinstruction(compst, op, 0); /* instruction */ | 489 | int i = addinstruction(compst, op, 0); /* instruction */ |
475 | addinstruction(compst, (Opcode)0, 0); /* open space for offset */ | 490 | addinstruction(compst, (Opcode)0, 0); /* open space for offset */ |
@@ -496,7 +511,9 @@ static int addoffsetlabinst (CompileState *compst, Labelset ls) { | |||
496 | } | 511 | } |
497 | /* labeled failure end */ | 512 | /* labeled failure end */ |
498 | 513 | ||
499 | 514 | /* | |
515 | ** Set the offset of an instruction | ||
516 | */ | ||
500 | static void setoffset (CompileState *compst, int instruction, int offset) { | 517 | static void setoffset (CompileState *compst, int instruction, int offset) { |
501 | getinstr(compst, instruction + 1).offset = offset; | 518 | getinstr(compst, instruction + 1).offset = offset; |
502 | } | 519 | } |
@@ -505,7 +522,7 @@ static void setoffset (CompileState *compst, int instruction, int offset) { | |||
505 | /* | 522 | /* |
506 | ** Add a capture instruction: | 523 | ** Add a capture instruction: |
507 | ** 'op' is the capture instruction; 'cap' the capture kind; | 524 | ** 'op' is the capture instruction; 'cap' the capture kind; |
508 | ** 'key' the key into ktable; 'aux' is optional offset | 525 | ** 'key' the key into ktable; 'aux' is the optional capture offset |
509 | ** | 526 | ** |
510 | */ | 527 | */ |
511 | static int addinstcap (CompileState *compst, Opcode op, int cap, int key, | 528 | static int addinstcap (CompileState *compst, Opcode op, int cap, int key, |
@@ -521,12 +538,18 @@ static int addinstcap (CompileState *compst, Opcode op, int cap, int key, | |||
521 | #define target(code,i) ((i) + code[i + 1].offset) | 538 | #define target(code,i) ((i) + code[i + 1].offset) |
522 | 539 | ||
523 | 540 | ||
541 | /* | ||
542 | ** Patch 'instruction' to jump to 'target' | ||
543 | */ | ||
524 | static void jumptothere (CompileState *compst, int instruction, int target) { | 544 | static void jumptothere (CompileState *compst, int instruction, int target) { |
525 | if (instruction >= 0) | 545 | if (instruction >= 0) |
526 | setoffset(compst, instruction, target - instruction); | 546 | setoffset(compst, instruction, target - instruction); |
527 | } | 547 | } |
528 | 548 | ||
529 | 549 | ||
550 | /* | ||
551 | ** Patch 'instruction' to jump to current position | ||
552 | */ | ||
530 | static void jumptohere (CompileState *compst, int instruction) { | 553 | static void jumptohere (CompileState *compst, int instruction) { |
531 | jumptothere(compst, instruction, gethere(compst)); | 554 | jumptothere(compst, instruction, gethere(compst)); |
532 | } | 555 | } |
@@ -594,7 +617,7 @@ static int codetestset (CompileState *compst, Charset *cs, int e) { | |||
594 | else { | 617 | else { |
595 | int c = 0; | 618 | int c = 0; |
596 | Opcode op = charsettype(cs->cs, &c); | 619 | Opcode op = charsettype(cs->cs, &c); |
597 | switch (op) { | 620 | switch (op) { |
598 | case IFail: return addoffsetinst(compst, IJmp); /* always jump */ | 621 | case IFail: return addoffsetinst(compst, IJmp); /* always jump */ |
599 | case IAny: return addoffsetinst(compst, ITestAny); | 622 | case IAny: return addoffsetinst(compst, ITestAny); |
600 | case IChar: { | 623 | case IChar: { |
@@ -658,8 +681,7 @@ static void codechoice (CompileState *compst, TTree *p1, TTree *p2, int opt, | |||
658 | Charset cs1, cs2; | 681 | Charset cs1, cs2; |
659 | int e1 = getfirst(p1, fullset, &cs1); | 682 | int e1 = getfirst(p1, fullset, &cs1); |
660 | if (headfail(p1) || | 683 | if (headfail(p1) || |
661 | (!e1 && (getfirst(p2, fl, &cs2), cs_disjoint(&cs1, &cs2)))) { | 684 | (!e1 && (getfirst(p2, fl, &cs2), cs_disjoint(&cs1, &cs2)))) { |
662 | /*if (0) {*/ | ||
663 | /* <p1 / p2> == test (fail(p1)) -> L1 ; p1 ; jmp L2; L1: p2; L2: */ | 685 | /* <p1 / p2> == test (fail(p1)) -> L1 ; p1 ; jmp L2; L1: p2; L2: */ |
664 | int test = codetestset(compst, &cs1, 0); | 686 | int test = codetestset(compst, &cs1, 0); |
665 | int jmp = NOINST; | 687 | int jmp = NOINST; |
@@ -690,6 +712,7 @@ static void codechoice (CompileState *compst, TTree *p1, TTree *p2, int opt, | |||
690 | } | 712 | } |
691 | } | 713 | } |
692 | 714 | ||
715 | |||
693 | /* labeled failure begin */ | 716 | /* labeled failure begin */ |
694 | static void codelabchoice (CompileState *compst, TTree *p1, TTree *p2, int opt, | 717 | static void codelabchoice (CompileState *compst, TTree *p1, TTree *p2, int opt, |
695 | const Charset *fl, Labelset ls) { | 718 | const Charset *fl, Labelset ls) { |
@@ -707,6 +730,7 @@ static void codelabchoice (CompileState *compst, TTree *p1, TTree *p2, int opt, | |||
707 | } | 730 | } |
708 | /* labeled failure end */ | 731 | /* labeled failure end */ |
709 | 732 | ||
733 | |||
710 | /* | 734 | /* |
711 | ** And predicate | 735 | ** And predicate |
712 | ** optimization: fixedlen(p) = n ==> <&p> == <p>; behind n | 736 | ** optimization: fixedlen(p) = n ==> <&p> == <p>; behind n |
@@ -907,7 +931,8 @@ static int codeseq1 (CompileState *compst, TTree *p1, TTree *p2, | |||
907 | 931 | ||
908 | /* | 932 | /* |
909 | ** Main code-generation function: dispatch to auxiliar functions | 933 | ** Main code-generation function: dispatch to auxiliar functions |
910 | ** according to kind of tree | 934 | ** according to kind of tree. ('needfollow' should return true |
935 | ** only for consructions that use 'fl'.) | ||
911 | */ | 936 | */ |
912 | static void codegen (CompileState *compst, TTree *tree, int opt, int tt, | 937 | static void codegen (CompileState *compst, TTree *tree, int opt, int tt, |
913 | const Charset *fl) { | 938 | const Charset *fl) { |
@@ -932,7 +957,7 @@ static void codegen (CompileState *compst, TTree *tree, int opt, int tt, | |||
932 | /* codegen(compst, p2, opt, tt, fl); */ | 957 | /* codegen(compst, p2, opt, tt, fl); */ |
933 | tree = sib2(tree); goto tailcall; | 958 | tree = sib2(tree); goto tailcall; |
934 | } | 959 | } |
935 | case TThrow: { /* labeled failure */ | 960 | case TThrow: { /* labeled failure */ |
936 | addthrowinstruction(compst, tree->labels); | 961 | addthrowinstruction(compst, tree->labels); |
937 | break; | 962 | break; |
938 | } | 963 | } |
@@ -958,6 +983,7 @@ static void peephole (CompileState *compst) { | |||
958 | Instruction *code = compst->p->code; | 983 | Instruction *code = compst->p->code; |
959 | int i; | 984 | int i; |
960 | for (i = 0; i < compst->ncode; i += sizei(&code[i])) { | 985 | for (i = 0; i < compst->ncode; i += sizei(&code[i])) { |
986 | redo: | ||
961 | switch (code[i].i.code) { | 987 | switch (code[i].i.code) { |
962 | case IChoice: case ICall: case ICommit: case IPartialCommit: | 988 | case IChoice: case ICall: case ICommit: case IPartialCommit: |
963 | case IBackCommit: case ITestChar: case ITestSet: case ILabChoice: /* labeled failure */ | 989 | case IBackCommit: case ITestChar: case ITestSet: case ILabChoice: /* labeled failure */ |
@@ -979,8 +1005,7 @@ static void peephole (CompileState *compst) { | |||
979 | int fft = finallabel(code, ft); | 1005 | int fft = finallabel(code, ft); |
980 | code[i] = code[ft]; /* jump becomes that instruction... */ | 1006 | code[i] = code[ft]; /* jump becomes that instruction... */ |
981 | jumptothere(compst, i, fft); /* but must correct its offset */ | 1007 | jumptothere(compst, i, fft); /* but must correct its offset */ |
982 | i--; /* reoptimize its label */ | 1008 | goto redo; /* reoptimize its label */ |
983 | break; | ||
984 | } | 1009 | } |
985 | default: { | 1010 | default: { |
986 | jumptothere(compst, i, ft); /* optimize label */ | 1011 | jumptothere(compst, i, ft); /* optimize label */ |
@@ -1002,11 +1027,11 @@ static void peephole (CompileState *compst) { | |||
1002 | Instruction *compile (lua_State *L, Pattern *p) { | 1027 | Instruction *compile (lua_State *L, Pattern *p) { |
1003 | CompileState compst; | 1028 | CompileState compst; |
1004 | compst.p = p; compst.ncode = 0; compst.L = L; | 1029 | compst.p = p; compst.ncode = 0; compst.L = L; |
1005 | reallocprog(L, p, 2); /* minimum initial size */ | 1030 | realloccode(L, p, 2); /* minimum initial size */ |
1006 | codegen(&compst, p->tree, 0, NOINST, fullset); | 1031 | codegen(&compst, p->tree, 0, NOINST, fullset); |
1007 | addinstruction(&compst, IEnd, 0); | 1032 | addinstruction(&compst, IEnd, 0); |
1008 | reallocprog(L, p, compst.ncode); /* set final size */ | 1033 | realloccode(L, p, compst.ncode); /* set final size */ |
1009 | peephole(&compst); /* labeled failure */ | 1034 | peephole(&compst); |
1010 | return p->code; | 1035 | return p->code; |
1011 | } | 1036 | } |
1012 | 1037 | ||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | ** $Id: lpcode.h,v 1.5 2013/04/04 21:24:45 roberto Exp $ | 2 | ** $Id: lpcode.h,v 1.6 2013/11/28 14:56:02 roberto Exp $ |
3 | */ | 3 | */ |
4 | 4 | ||
5 | #if !defined(lpcode_h) | 5 | #if !defined(lpcode_h) |
@@ -17,7 +17,7 @@ int fixedlenx (TTree *tree, int count, int len); | |||
17 | int hascaptures (TTree *tree); | 17 | int hascaptures (TTree *tree); |
18 | int lp_gc (lua_State *L); | 18 | int lp_gc (lua_State *L); |
19 | Instruction *compile (lua_State *L, Pattern *p); | 19 | Instruction *compile (lua_State *L, Pattern *p); |
20 | void reallocprog (lua_State *L, Pattern *p, int nsize); | 20 | void realloccode (lua_State *L, Pattern *p, int nsize); |
21 | int sizei (const Instruction *i); | 21 | int sizei (const Instruction *i); |
22 | 22 | ||
23 | 23 | ||
@@ -52,7 +52,7 @@ static void printjmp (const Instruction *op, const Instruction *p) { | |||
52 | } | 52 | } |
53 | 53 | ||
54 | 54 | ||
55 | void printinst (const Instruction *op, const Instruction *p) { | 55 | static void printinst (const Instruction *op, const Instruction *p) { |
56 | const char *const names[] = { | 56 | const char *const names[] = { |
57 | "any", "char", "set", | 57 | "any", "char", "set", |
58 | "testany", "testchar", "testset", | 58 | "testany", "testchar", "testset", |
@@ -60,8 +60,8 @@ void printinst (const Instruction *op, const Instruction *p) { | |||
60 | "ret", "end", | 60 | "ret", "end", |
61 | "choice", "jmp", "call", "open_call", | 61 | "choice", "jmp", "call", "open_call", |
62 | "commit", "partial_commit", "back_commit", "failtwice", "fail", "giveup", | 62 | "commit", "partial_commit", "back_commit", "failtwice", "fail", "giveup", |
63 | "fullcapture", "opencapture", "closecapture", "closeruntime", | 63 | "fullcapture", "opencapture", "closecapture", "closeruntime", |
64 | "throw", "labeled_choice" /* labeled failure */ | 64 | "throw", "labeled_choice" /* labeled failure */ |
65 | }; | 65 | }; |
66 | printf("%02ld: %s ", (long)(p - op), names[p->i.code]); | 66 | printf("%02ld: %s ", (long)(p - op), names[p->i.code]); |
67 | switch ((Opcode)p->i.code) { | 67 | switch ((Opcode)p->i.code) { |
@@ -103,12 +103,12 @@ void printinst (const Instruction *op, const Instruction *p) { | |||
103 | printf("%d", p->i.aux); | 103 | printf("%d", p->i.aux); |
104 | break; | 104 | break; |
105 | } | 105 | } |
106 | case IJmp: case ICall: case ICommit: case IChoice: | 106 | case IJmp: case ICall: case ICommit: case IChoice: |
107 | case IPartialCommit: case IBackCommit: case ITestAny: { | 107 | case IPartialCommit: case IBackCommit: case ITestAny: { |
108 | printjmp(op, p); | 108 | printjmp(op, p); |
109 | break; | 109 | break; |
110 | } | 110 | } |
111 | case IThrow: { /* labeled failure */ | 111 | case IThrow: { /* labeled failure */ |
112 | printf("%d", (p + 1)->labels); | 112 | printf("%d", (p + 1)->labels); |
113 | break; | 113 | break; |
114 | } | 114 | } |
@@ -117,7 +117,6 @@ void printinst (const Instruction *op, const Instruction *p) { | |||
117 | printf(" %d", (p + 2)->labels); | 117 | printf(" %d", (p + 2)->labels); |
118 | break; | 118 | break; |
119 | } | 119 | } |
120 | |||
121 | default: break; | 120 | default: break; |
122 | } | 121 | } |
123 | printf("\n"); | 122 | printf("\n"); |
@@ -194,7 +193,7 @@ void printtree (TTree *tree, int ident) { | |||
194 | } | 193 | } |
195 | case TBehind: { | 194 | case TBehind: { |
196 | printf(" %d\n", tree->u.n); | 195 | printf(" %d\n", tree->u.n); |
197 | printtree(sib1(tree), ident + 2); | 196 | printtree(sib1(tree), ident + 2); |
198 | break; | 197 | break; |
199 | } | 198 | } |
200 | case TCapture: { | 199 | case TCapture: { |
@@ -217,16 +216,16 @@ void printtree (TTree *tree, int ident) { | |||
217 | assert(rule->tag == TTrue); /* sentinel */ | 216 | assert(rule->tag == TTrue); /* sentinel */ |
218 | break; | 217 | break; |
219 | } | 218 | } |
220 | case TThrow: { /* labeled failure */ | 219 | case TThrow: { /* labeled failure */ |
221 | printf(" labels: %d\n", tree->labels); | 220 | printf(" labels: %d\n", tree->labels); |
222 | break; | 221 | break; |
223 | } | 222 | } |
224 | default: { | 223 | default: { |
225 | int sibs = numsiblings[tree->tag]; | 224 | int sibs = numsiblings[tree->tag]; |
226 | if (tree->tag == TLabChoice) { /* labeled failure */ | 225 | printf("\n"); |
226 | if (tree->tag == TLabChoice) { /* labeled failure */ | ||
227 | printf(" labels: %d\n", tree->labels); | 227 | printf(" labels: %d\n", tree->labels); |
228 | } | 228 | } |
229 | printf("\n"); | ||
230 | if (sibs >= 1) { | 229 | if (sibs >= 1) { |
231 | printtree(sib1(tree), ident + 2); | 230 | printtree(sib1(tree), ident + 2); |
232 | if (sibs >= 2) | 231 | if (sibs >= 2) |
@@ -18,8 +18,6 @@ void printtree (TTree *tree, int ident); | |||
18 | void printktable (lua_State *L, int idx); | 18 | void printktable (lua_State *L, int idx); |
19 | void printcharset (const byte *st); | 19 | void printcharset (const byte *st); |
20 | void printcaplist (Capture *cap, Capture *limit); | 20 | void printcaplist (Capture *cap, Capture *limit); |
21 | void printinst (const Instruction *op, const Instruction *p); | ||
22 | |||
23 | 21 | ||
24 | #else | 22 | #else |
25 | 23 | ||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | ** $Id: lptree.c,v 1.10 2013/04/12 16:30:33 roberto Exp $ | 2 | ** $Id: lptree.c,v 1.15 2015/03/04 17:23:00 roberto Exp $ |
3 | ** Copyright 2013, Lua.org & PUC-Rio (see 'lpeg.html' for license) | 3 | ** Copyright 2013, Lua.org & PUC-Rio (see 'lpeg.html' for license) |
4 | */ | 4 | */ |
5 | 5 | ||
@@ -127,6 +127,189 @@ static void finalfix (lua_State *L, int postable, TTree *g, TTree *t) { | |||
127 | } | 127 | } |
128 | 128 | ||
129 | 129 | ||
130 | |||
131 | /* | ||
132 | ** {=================================================================== | ||
133 | ** KTable manipulation | ||
134 | ** | ||
135 | ** - The ktable of a pattern 'p' can be shared by other patterns that | ||
136 | ** contain 'p' and no other constants. Because of this sharing, we | ||
137 | ** should not add elements to a 'ktable' unless it was freshly created | ||
138 | ** for the new pattern. | ||
139 | ** | ||
140 | ** - The maximum index in a ktable is USHRT_MAX, because trees and | ||
141 | ** patterns use unsigned shorts to store those indices. | ||
142 | ** ==================================================================== | ||
143 | */ | ||
144 | |||
145 | /* | ||
146 | ** Create a new 'ktable' to the pattern at the top of the stack. | ||
147 | */ | ||
148 | static void newktable (lua_State *L, int n) { | ||
149 | lua_createtable(L, n, 0); /* create a fresh table */ | ||
150 | lua_setfenv(L, -2); /* set it as 'ktable' for pattern */ | ||
151 | } | ||
152 | |||
153 | |||
154 | /* | ||
155 | ** Add element 'idx' to 'ktable' of pattern at the top of the stack; | ||
156 | ** Return index of new element. | ||
157 | ** If new element is nil, does not add it to table (as it would be | ||
158 | ** useless) and returns 0, as ktable[0] is always nil. | ||
159 | */ | ||
160 | static int addtoktable (lua_State *L, int idx) { | ||
161 | if (lua_isnil(L, idx)) /* nil value? */ | ||
162 | return 0; | ||
163 | else { | ||
164 | int n; | ||
165 | lua_getfenv(L, -1); /* get ktable from pattern */ | ||
166 | n = lua_objlen(L, -1); | ||
167 | if (n >= USHRT_MAX) | ||
168 | luaL_error(L, "too many Lua values in pattern"); | ||
169 | lua_pushvalue(L, idx); /* element to be added */ | ||
170 | lua_rawseti(L, -2, ++n); | ||
171 | lua_pop(L, 1); /* remove 'ktable' */ | ||
172 | return n; | ||
173 | } | ||
174 | } | ||
175 | |||
176 | |||
177 | /* | ||
178 | ** Return the number of elements in the ktable at 'idx'. | ||
179 | ** In Lua 5.2/5.3, default "environment" for patterns is nil, not | ||
180 | ** a table. Treat it as an empty table. In Lua 5.1, assumes that | ||
181 | ** the environment has no numeric indices (len == 0) | ||
182 | */ | ||
183 | static int ktablelen (lua_State *L, int idx) { | ||
184 | if (!lua_istable(L, idx)) return 0; | ||
185 | else return lua_objlen(L, idx); | ||
186 | } | ||
187 | |||
188 | |||
189 | /* | ||
190 | ** Concatentate the contents of table 'idx1' into table 'idx2'. | ||
191 | ** (Assume that both indices are negative.) | ||
192 | ** Return the original length of table 'idx2' (or 0, if no | ||
193 | ** element was added, as there is no need to correct any index). | ||
194 | */ | ||
195 | static int concattable (lua_State *L, int idx1, int idx2) { | ||
196 | int i; | ||
197 | int n1 = ktablelen(L, idx1); | ||
198 | int n2 = ktablelen(L, idx2); | ||
199 | if (n1 + n2 > USHRT_MAX) | ||
200 | luaL_error(L, "too many Lua values in pattern"); | ||
201 | if (n1 == 0) return 0; /* nothing to correct */ | ||
202 | for (i = 1; i <= n1; i++) { | ||
203 | lua_rawgeti(L, idx1, i); | ||
204 | lua_rawseti(L, idx2 - 1, n2 + i); /* correct 'idx2' */ | ||
205 | } | ||
206 | return n2; | ||
207 | } | ||
208 | |||
209 | |||
210 | /* | ||
211 | ** When joining 'ktables', constants from one of the subpatterns must | ||
212 | ** be renumbered; 'correctkeys' corrects their indices (adding 'n' | ||
213 | ** to each of them) | ||
214 | */ | ||
215 | static void correctkeys (TTree *tree, int n) { | ||
216 | if (n == 0) return; /* no correction? */ | ||
217 | tailcall: | ||
218 | switch (tree->tag) { | ||
219 | case TOpenCall: case TCall: case TRunTime: case TRule: { | ||
220 | if (tree->key > 0) | ||
221 | tree->key += n; | ||
222 | break; | ||
223 | } | ||
224 | case TCapture: { | ||
225 | if (tree->key > 0 && tree->cap != Carg && tree->cap != Cnum) | ||
226 | tree->key += n; | ||
227 | break; | ||
228 | } | ||
229 | default: break; | ||
230 | } | ||
231 | switch (numsiblings[tree->tag]) { | ||
232 | case 1: /* correctkeys(sib1(tree), n); */ | ||
233 | tree = sib1(tree); goto tailcall; | ||
234 | case 2: | ||
235 | correctkeys(sib1(tree), n); | ||
236 | tree = sib2(tree); goto tailcall; /* correctkeys(sib2(tree), n); */ | ||
237 | default: assert(numsiblings[tree->tag] == 0); break; | ||
238 | } | ||
239 | } | ||
240 | |||
241 | |||
242 | /* | ||
243 | ** Join the ktables from p1 and p2 the ktable for the new pattern at the | ||
244 | ** top of the stack, reusing them when possible. | ||
245 | */ | ||
246 | static void joinktables (lua_State *L, int p1, TTree *t2, int p2) { | ||
247 | int n1, n2; | ||
248 | lua_getfenv(L, p1); /* get ktables */ | ||
249 | lua_getfenv(L, p2); | ||
250 | n1 = ktablelen(L, -2); | ||
251 | n2 = ktablelen(L, -1); | ||
252 | if (n1 == 0 && n2 == 0) /* are both tables empty? */ | ||
253 | lua_pop(L, 2); /* nothing to be done; pop tables */ | ||
254 | else if (n2 == 0 || lua_equal(L, -2, -1)) { /* 2nd table empty or equal? */ | ||
255 | lua_pop(L, 1); /* pop 2nd table */ | ||
256 | lua_setfenv(L, -2); /* set 1st ktable into new pattern */ | ||
257 | } | ||
258 | else if (n1 == 0) { /* first table is empty? */ | ||
259 | lua_setfenv(L, -3); /* set 2nd table into new pattern */ | ||
260 | lua_pop(L, 1); /* pop 1st table */ | ||
261 | } | ||
262 | else { | ||
263 | lua_createtable(L, n1 + n2, 0); /* create ktable for new pattern */ | ||
264 | /* stack: new p; ktable p1; ktable p2; new ktable */ | ||
265 | concattable(L, -3, -1); /* from p1 into new ktable */ | ||
266 | concattable(L, -2, -1); /* from p2 into new ktable */ | ||
267 | lua_setfenv(L, -4); /* new ktable becomes 'p' environment */ | ||
268 | lua_pop(L, 2); /* pop other ktables */ | ||
269 | correctkeys(t2, n1); /* correction for indices from p2 */ | ||
270 | } | ||
271 | } | ||
272 | |||
273 | |||
274 | /* | ||
275 | ** copy 'ktable' of element 'idx' to new tree (on top of stack) | ||
276 | */ | ||
277 | static void copyktable (lua_State *L, int idx) { | ||
278 | lua_getfenv(L, idx); | ||
279 | lua_setfenv(L, -2); | ||
280 | } | ||
281 | |||
282 | |||
283 | /* | ||
284 | ** merge 'ktable' from 'stree' at stack index 'idx' into 'ktable' | ||
285 | ** from tree at the top of the stack, and correct corresponding | ||
286 | ** tree. | ||
287 | */ | ||
288 | static void mergektable (lua_State *L, int idx, TTree *stree) { | ||
289 | int n; | ||
290 | lua_getfenv(L, -1); /* get ktables */ | ||
291 | lua_getfenv(L, idx); | ||
292 | n = concattable(L, -1, -2); | ||
293 | lua_pop(L, 2); /* remove both ktables */ | ||
294 | correctkeys(stree, n); | ||
295 | } | ||
296 | |||
297 | |||
298 | /* | ||
299 | ** Create a new 'ktable' to the pattern at the top of the stack, adding | ||
300 | ** all elements from pattern 'p' (if not 0) plus element 'idx' to it. | ||
301 | ** Return index of new element. | ||
302 | */ | ||
303 | static int addtonewktable (lua_State *L, int p, int idx) { | ||
304 | newktable(L, 1); | ||
305 | if (p) | ||
306 | mergektable(L, p, NULL); | ||
307 | return addtoktable(L, idx); | ||
308 | } | ||
309 | |||
310 | /* }====================================================== */ | ||
311 | |||
312 | |||
130 | /* | 313 | /* |
131 | ** {====================================================== | 314 | ** {====================================================== |
132 | ** Tree generation | 315 | ** Tree generation |
@@ -218,29 +401,6 @@ static TTree *seqaux (TTree *tree, TTree *sib, int sibsize) { | |||
218 | 401 | ||
219 | 402 | ||
220 | /* | 403 | /* |
221 | ** Add element 'idx' to 'ktable' of pattern at the top of the stack; | ||
222 | ** create new 'ktable' if necessary. Return index of new element. | ||
223 | */ | ||
224 | static int addtoktable (lua_State *L, int idx) { | ||
225 | if (idx == 0 || lua_isnil(L, idx)) /* no actual value to insert? */ | ||
226 | return 0; | ||
227 | else { | ||
228 | int n; | ||
229 | lua_getfenv(L, -1); /* get ktable from pattern */ | ||
230 | n = lua_objlen(L, -1); | ||
231 | if (n == 0) { /* is it empty/non-existent? */ | ||
232 | lua_pop(L, 1); /* remove it */ | ||
233 | lua_createtable(L, 1, 0); /* create a fresh table */ | ||
234 | } | ||
235 | lua_pushvalue(L, idx); /* element to be added */ | ||
236 | lua_rawseti(L, -2, n + 1); | ||
237 | lua_setfenv(L, -2); /* set it as ktable for pattern */ | ||
238 | return n + 1; | ||
239 | } | ||
240 | } | ||
241 | |||
242 | |||
243 | /* | ||
244 | ** Build a sequence of 'n' nodes, each with tag 'tag' and 'u.n' got | 404 | ** Build a sequence of 'n' nodes, each with tag 'tag' and 'u.n' got |
245 | ** from the array 's' (or 0 if array is NULL). (TSeq is binary, so it | 405 | ** from the array 's' (or 0 if array is NULL). (TSeq is binary, so it |
246 | ** must build a sequence of sequence of sequence...) | 406 | ** must build a sequence of sequence of sequence...) |
@@ -315,7 +475,7 @@ static TTree *getpatt (lua_State *L, int idx, int *len) { | |||
315 | case LUA_TFUNCTION: { | 475 | case LUA_TFUNCTION: { |
316 | tree = newtree(L, 2); | 476 | tree = newtree(L, 2); |
317 | tree->tag = TRunTime; | 477 | tree->tag = TRunTime; |
318 | tree->key = addtoktable(L, idx); | 478 | tree->key = addtonewktable(L, 0, idx); |
319 | sib1(tree)->tag = TTrue; | 479 | sib1(tree)->tag = TTrue; |
320 | break; | 480 | break; |
321 | } | 481 | } |
@@ -331,123 +491,6 @@ static TTree *getpatt (lua_State *L, int idx, int *len) { | |||
331 | 491 | ||
332 | 492 | ||
333 | /* | 493 | /* |
334 | ** Return the number of elements in the ktable of pattern at 'idx'. | ||
335 | ** In Lua 5.2, default "environment" for patterns is nil, not | ||
336 | ** a table. Treat it as an empty table. In Lua 5.1, assumes that | ||
337 | ** the environment has no numeric indices (len == 0) | ||
338 | */ | ||
339 | static int ktablelen (lua_State *L, int idx) { | ||
340 | if (!lua_istable(L, idx)) return 0; | ||
341 | else return lua_objlen(L, idx); | ||
342 | } | ||
343 | |||
344 | |||
345 | /* | ||
346 | ** Concatentate the contents of table 'idx1' into table 'idx2'. | ||
347 | ** (Assume that both indices are negative.) | ||
348 | ** Return the original length of table 'idx2' | ||
349 | */ | ||
350 | static int concattable (lua_State *L, int idx1, int idx2) { | ||
351 | int i; | ||
352 | int n1 = ktablelen(L, idx1); | ||
353 | int n2 = ktablelen(L, idx2); | ||
354 | if (n1 == 0) return 0; /* nothing to correct */ | ||
355 | for (i = 1; i <= n1; i++) { | ||
356 | lua_rawgeti(L, idx1, i); | ||
357 | lua_rawseti(L, idx2 - 1, n2 + i); /* correct 'idx2' */ | ||
358 | } | ||
359 | return n2; | ||
360 | } | ||
361 | |||
362 | |||
363 | /* | ||
364 | ** Make a merge of ktables from p1 and p2 the ktable for the new | ||
365 | ** pattern at the top of the stack. | ||
366 | */ | ||
367 | static int joinktables (lua_State *L, int p1, int p2) { | ||
368 | int n1, n2; | ||
369 | lua_getfenv(L, p1); /* get ktables */ | ||
370 | lua_getfenv(L, p2); | ||
371 | n1 = ktablelen(L, -2); | ||
372 | n2 = ktablelen(L, -1); | ||
373 | if (n1 == 0 && n2 == 0) { /* are both tables empty? */ | ||
374 | lua_pop(L, 2); /* nothing to be done; pop tables */ | ||
375 | return 0; /* nothing to correct */ | ||
376 | } | ||
377 | if (n2 == 0 || lua_equal(L, -2, -1)) { /* second table is empty or equal? */ | ||
378 | lua_pop(L, 1); /* pop 2nd table */ | ||
379 | lua_setfenv(L, -2); /* set 1st ktable into new pattern */ | ||
380 | return 0; /* nothing to correct */ | ||
381 | } | ||
382 | if (n1 == 0) { /* first table is empty? */ | ||
383 | lua_setfenv(L, -3); /* set 2nd table into new pattern */ | ||
384 | lua_pop(L, 1); /* pop 1st table */ | ||
385 | return 0; /* nothing to correct */ | ||
386 | } | ||
387 | else { | ||
388 | lua_createtable(L, n1 + n2, 0); /* create ktable for new pattern */ | ||
389 | /* stack: new p; ktable p1; ktable p2; new ktable */ | ||
390 | concattable(L, -3, -1); /* from p1 into new ktable */ | ||
391 | concattable(L, -2, -1); /* from p2 into new ktable */ | ||
392 | lua_setfenv(L, -4); /* new ktable becomes p env */ | ||
393 | lua_pop(L, 2); /* pop other ktables */ | ||
394 | return n1; /* correction for indices from p2 */ | ||
395 | } | ||
396 | } | ||
397 | |||
398 | |||
399 | static void correctkeys (TTree *tree, int n) { | ||
400 | if (n == 0) return; /* no correction? */ | ||
401 | tailcall: | ||
402 | switch (tree->tag) { | ||
403 | case TOpenCall: case TCall: case TRunTime: case TRule: { | ||
404 | if (tree->key > 0) | ||
405 | tree->key += n; | ||
406 | break; | ||
407 | } | ||
408 | case TCapture: { | ||
409 | if (tree->key > 0 && tree->cap != Carg && tree->cap != Cnum) | ||
410 | tree->key += n; | ||
411 | break; | ||
412 | } | ||
413 | default: break; | ||
414 | } | ||
415 | switch (numsiblings[tree->tag]) { | ||
416 | case 1: /* correctkeys(sib1(tree), n); */ | ||
417 | tree = sib1(tree); goto tailcall; | ||
418 | case 2: | ||
419 | correctkeys(sib1(tree), n); | ||
420 | tree = sib2(tree); goto tailcall; /* correctkeys(sib2(tree), n); */ | ||
421 | default: assert(numsiblings[tree->tag] == 0); break; | ||
422 | } | ||
423 | } | ||
424 | |||
425 | |||
426 | /* | ||
427 | ** copy 'ktable' of element 'idx' to new tree (on top of stack) | ||
428 | */ | ||
429 | static void copyktable (lua_State *L, int idx) { | ||
430 | lua_getfenv(L, idx); | ||
431 | lua_setfenv(L, -2); | ||
432 | } | ||
433 | |||
434 | |||
435 | /* | ||
436 | ** merge 'ktable' from rule at stack index 'idx' into 'ktable' | ||
437 | ** from tree at the top of the stack, and correct corresponding | ||
438 | ** tree. | ||
439 | */ | ||
440 | static void mergektable (lua_State *L, int idx, TTree *rule) { | ||
441 | int n; | ||
442 | lua_getfenv(L, -1); /* get ktables */ | ||
443 | lua_getfenv(L, idx); | ||
444 | n = concattable(L, -1, -2); | ||
445 | lua_pop(L, 2); /* remove both ktables */ | ||
446 | correctkeys(rule, n); | ||
447 | } | ||
448 | |||
449 | |||
450 | /* | ||
451 | ** create a new tree, whith a new root and one sibling. | 494 | ** create a new tree, whith a new root and one sibling. |
452 | ** Sibling must be on the Lua stack, at index 1. | 495 | ** Sibling must be on the Lua stack, at index 1. |
453 | */ | 496 | */ |
@@ -475,7 +518,7 @@ static TTree *newroot2sib (lua_State *L, int tag) { | |||
475 | tree->u.ps = 1 + s1; | 518 | tree->u.ps = 1 + s1; |
476 | memcpy(sib1(tree), tree1, s1 * sizeof(TTree)); | 519 | memcpy(sib1(tree), tree1, s1 * sizeof(TTree)); |
477 | memcpy(sib2(tree), tree2, s2 * sizeof(TTree)); | 520 | memcpy(sib2(tree), tree2, s2 * sizeof(TTree)); |
478 | correctkeys(sib2(tree), joinktables(L, 1, 2)); | 521 | joinktables(L, 1, sib2(tree), 2); |
479 | return tree; | 522 | return tree; |
480 | } | 523 | } |
481 | 524 | ||
@@ -535,8 +578,8 @@ static int lp_choice (lua_State *L) { | |||
535 | */ | 578 | */ |
536 | static int lp_star (lua_State *L) { | 579 | static int lp_star (lua_State *L) { |
537 | int size1; | 580 | int size1; |
538 | int n = luaL_checkint(L, 2); | 581 | int n = (int)luaL_checkinteger(L, 2); |
539 | TTree *tree1 = gettree(L, 1, &size1); | 582 | TTree *tree1 = getpatt(L, 1, &size1); |
540 | if (n >= 0) { /* seq tree1 (seq tree1 ... (seq tree1 (rep tree1))) */ | 583 | if (n >= 0) { /* seq tree1 (seq tree1 ... (seq tree1 (rep tree1))) */ |
541 | TTree *tree = newtree(L, (n + 1) * (size1 + 1)); | 584 | TTree *tree = newtree(L, (n + 1) * (size1 + 1)); |
542 | if (nullable(tree1)) | 585 | if (nullable(tree1)) |
@@ -604,7 +647,7 @@ static int lp_sub (lua_State *L) { | |||
604 | sib1(tree)->tag = TNot; /* ...not... */ | 647 | sib1(tree)->tag = TNot; /* ...not... */ |
605 | memcpy(sib1(sib1(tree)), t2, s2 * sizeof(TTree)); /* ...t2 */ | 648 | memcpy(sib1(sib1(tree)), t2, s2 * sizeof(TTree)); /* ...t2 */ |
606 | memcpy(sib2(tree), t1, s1 * sizeof(TTree)); /* ... and t1 */ | 649 | memcpy(sib2(tree), t1, s1 * sizeof(TTree)); /* ... and t1 */ |
607 | correctkeys(sib1(tree), joinktables(L, 1, 2)); | 650 | joinktables(L, 1, sib1(tree), 2); |
608 | } | 651 | } |
609 | return 1; | 652 | return 1; |
610 | } | 653 | } |
@@ -645,8 +688,8 @@ static int lp_behind (lua_State *L) { | |||
645 | TTree *tree; | 688 | TTree *tree; |
646 | TTree *tree1 = getpatt(L, 1, NULL); | 689 | TTree *tree1 = getpatt(L, 1, NULL); |
647 | int n = fixedlen(tree1); | 690 | int n = fixedlen(tree1); |
648 | luaL_argcheck(L, !hascaptures(tree1), 1, "pattern have captures"); | ||
649 | luaL_argcheck(L, n > 0, 1, "pattern may not have fixed length"); | 691 | luaL_argcheck(L, n > 0, 1, "pattern may not have fixed length"); |
692 | luaL_argcheck(L, !hascaptures(tree1), 1, "pattern have captures"); | ||
650 | luaL_argcheck(L, n <= MAXBEHIND, 1, "pattern too long to look behind"); | 693 | luaL_argcheck(L, n <= MAXBEHIND, 1, "pattern too long to look behind"); |
651 | tree = newroot1sib(L, TBehind); | 694 | tree = newroot1sib(L, TBehind); |
652 | tree->u.n = n; | 695 | tree->u.n = n; |
@@ -697,7 +740,7 @@ static int lp_labchoice (lua_State *L) { | |||
697 | static int lp_V (lua_State *L) { | 740 | static int lp_V (lua_State *L) { |
698 | TTree *tree = newleaf(L, TOpenCall); | 741 | TTree *tree = newleaf(L, TOpenCall); |
699 | luaL_argcheck(L, !lua_isnoneornil(L, 1), 1, "non-nil value expected"); | 742 | luaL_argcheck(L, !lua_isnoneornil(L, 1), 1, "non-nil value expected"); |
700 | tree->key = addtoktable(L, 1); | 743 | tree->key = addtonewktable(L, 0, 1); |
701 | return 1; | 744 | return 1; |
702 | } | 745 | } |
703 | 746 | ||
@@ -710,7 +753,7 @@ static int lp_V (lua_State *L) { | |||
710 | static int capture_aux (lua_State *L, int cap, int labelidx) { | 753 | static int capture_aux (lua_State *L, int cap, int labelidx) { |
711 | TTree *tree = newroot1sib(L, TCapture); | 754 | TTree *tree = newroot1sib(L, TCapture); |
712 | tree->cap = cap; | 755 | tree->cap = cap; |
713 | tree->key = addtoktable(L, labelidx); | 756 | tree->key = (labelidx == 0) ? 0 : addtonewktable(L, 1, labelidx); |
714 | return 1; | 757 | return 1; |
715 | } | 758 | } |
716 | 759 | ||
@@ -718,10 +761,9 @@ static int capture_aux (lua_State *L, int cap, int labelidx) { | |||
718 | /* | 761 | /* |
719 | ** Fill a tree with an empty capture, using an empty (TTrue) sibling. | 762 | ** Fill a tree with an empty capture, using an empty (TTrue) sibling. |
720 | */ | 763 | */ |
721 | static TTree *auxemptycap (lua_State *L, TTree *tree, int cap, int idx) { | 764 | static TTree *auxemptycap (TTree *tree, int cap) { |
722 | tree->tag = TCapture; | 765 | tree->tag = TCapture; |
723 | tree->cap = cap; | 766 | tree->cap = cap; |
724 | tree->key = addtoktable(L, idx); | ||
725 | sib1(tree)->tag = TTrue; | 767 | sib1(tree)->tag = TTrue; |
726 | return tree; | 768 | return tree; |
727 | } | 769 | } |
@@ -730,8 +772,18 @@ static TTree *auxemptycap (lua_State *L, TTree *tree, int cap, int idx) { | |||
730 | /* | 772 | /* |
731 | ** Create a tree for an empty capture | 773 | ** Create a tree for an empty capture |
732 | */ | 774 | */ |
733 | static TTree *newemptycap (lua_State *L, int cap, int idx) { | 775 | static TTree *newemptycap (lua_State *L, int cap) { |
734 | return auxemptycap(L, newtree(L, 2), cap, idx); | 776 | return auxemptycap(newtree(L, 2), cap); |
777 | } | ||
778 | |||
779 | |||
780 | /* | ||
781 | ** Create a tree for an empty capture with an associated Lua value | ||
782 | */ | ||
783 | static TTree *newemptycapkey (lua_State *L, int cap, int idx) { | ||
784 | TTree *tree = auxemptycap(newtree(L, 2), cap); | ||
785 | tree->key = addtonewktable(L, 0, idx); | ||
786 | return tree; | ||
735 | } | 787 | } |
736 | 788 | ||
737 | 789 | ||
@@ -789,14 +841,14 @@ static int lp_simplecapture (lua_State *L) { | |||
789 | 841 | ||
790 | 842 | ||
791 | static int lp_poscapture (lua_State *L) { | 843 | static int lp_poscapture (lua_State *L) { |
792 | newemptycap(L, Cposition, 0); | 844 | newemptycap(L, Cposition); |
793 | return 1; | 845 | return 1; |
794 | } | 846 | } |
795 | 847 | ||
796 | 848 | ||
797 | static int lp_argcapture (lua_State *L) { | 849 | static int lp_argcapture (lua_State *L) { |
798 | int n = luaL_checkint(L, 1); | 850 | int n = (int)luaL_checkinteger(L, 1); |
799 | TTree *tree = newemptycap(L, Carg, 0); | 851 | TTree *tree = newemptycap(L, Carg); |
800 | tree->key = n; | 852 | tree->key = n; |
801 | luaL_argcheck(L, 0 < n && n <= SHRT_MAX, 1, "invalid argument index"); | 853 | luaL_argcheck(L, 0 < n && n <= SHRT_MAX, 1, "invalid argument index"); |
802 | return 1; | 854 | return 1; |
@@ -805,7 +857,7 @@ static int lp_argcapture (lua_State *L) { | |||
805 | 857 | ||
806 | static int lp_backref (lua_State *L) { | 858 | static int lp_backref (lua_State *L) { |
807 | luaL_checkstring(L, 1); | 859 | luaL_checkstring(L, 1); |
808 | newemptycap(L, Cbackref, 1); | 860 | newemptycapkey(L, Cbackref, 1); |
809 | return 1; | 861 | return 1; |
810 | } | 862 | } |
811 | 863 | ||
@@ -819,9 +871,10 @@ static int lp_constcapture (lua_State *L) { | |||
819 | if (n == 0) /* no values? */ | 871 | if (n == 0) /* no values? */ |
820 | newleaf(L, TTrue); /* no capture */ | 872 | newleaf(L, TTrue); /* no capture */ |
821 | else if (n == 1) | 873 | else if (n == 1) |
822 | newemptycap(L, Cconst, 1); /* single constant capture */ | 874 | newemptycapkey(L, Cconst, 1); /* single constant capture */ |
823 | else { /* create a group capture with all values */ | 875 | else { /* create a group capture with all values */ |
824 | TTree *tree = newtree(L, 1 + 3 * (n - 1) + 2); | 876 | TTree *tree = newtree(L, 1 + 3 * (n - 1) + 2); |
877 | newktable(L, n); /* create a 'ktable' for new tree */ | ||
825 | tree->tag = TCapture; | 878 | tree->tag = TCapture; |
826 | tree->cap = Cgroup; | 879 | tree->cap = Cgroup; |
827 | tree->key = 0; | 880 | tree->key = 0; |
@@ -829,10 +882,12 @@ static int lp_constcapture (lua_State *L) { | |||
829 | for (i = 1; i <= n - 1; i++) { | 882 | for (i = 1; i <= n - 1; i++) { |
830 | tree->tag = TSeq; | 883 | tree->tag = TSeq; |
831 | tree->u.ps = 3; /* skip TCapture and its sibling */ | 884 | tree->u.ps = 3; /* skip TCapture and its sibling */ |
832 | auxemptycap(L, sib1(tree), Cconst, i); | 885 | auxemptycap(sib1(tree), Cconst); |
886 | sib1(tree)->key = addtoktable(L, i); | ||
833 | tree = sib2(tree); | 887 | tree = sib2(tree); |
834 | } | 888 | } |
835 | auxemptycap(L, tree, Cconst, i); | 889 | auxemptycap(tree, Cconst); |
890 | tree->key = addtoktable(L, i); | ||
836 | } | 891 | } |
837 | return 1; | 892 | return 1; |
838 | } | 893 | } |
@@ -842,7 +897,7 @@ static int lp_matchtime (lua_State *L) { | |||
842 | TTree *tree; | 897 | TTree *tree; |
843 | luaL_checktype(L, 2, LUA_TFUNCTION); | 898 | luaL_checktype(L, 2, LUA_TFUNCTION); |
844 | tree = newroot1sib(L, TRunTime); | 899 | tree = newroot1sib(L, TRunTime); |
845 | tree->key = addtoktable(L, 2); | 900 | tree->key = addtonewktable(L, 1, 2); |
846 | return 1; | 901 | return 1; |
847 | } | 902 | } |
848 | 903 | ||
@@ -1146,20 +1201,20 @@ static int lp_match (lua_State *L) { | |||
1146 | lua_pushnil(L); /* initialize subscache */ | 1201 | lua_pushnil(L); /* initialize subscache */ |
1147 | lua_pushlightuserdata(L, capture); /* initialize caplistidx */ | 1202 | lua_pushlightuserdata(L, capture); /* initialize caplistidx */ |
1148 | lua_getfenv(L, 1); /* initialize penvidx */ | 1203 | lua_getfenv(L, 1); /* initialize penvidx */ |
1149 | r = match(L, s, s + i, s + l, code, capture, ptop, &labelf); | 1204 | r = match(L, s, s + i, s + l, code, capture, ptop, &labelf); /* labeled failure */ |
1150 | if (r == NULL) { | 1205 | if (r == NULL) { /* labeled failure begin */ |
1151 | int j = 0; | 1206 | int j = 0; |
1152 | int n = 1; | 1207 | int n = 1; |
1153 | lua_pushnil(L); | 1208 | lua_pushnil(L); |
1154 | while (j < (int) MAXLABELS) { | 1209 | while (j < (int) MAXLABELS) { |
1155 | if (labelf & (1 << j)) { | 1210 | if (labelf & (1 << j)) { |
1156 | lua_pushinteger(L, j); | 1211 | lua_pushinteger(L, j); |
1157 | n++; | 1212 | n++; |
1158 | } | 1213 | } |
1159 | j++; | 1214 | j++; |
1160 | } | 1215 | } |
1161 | return n; | 1216 | return n; |
1162 | } | 1217 | } /* labeled failure end */ |
1163 | return getcaptures(L, s, r, ptop); | 1218 | return getcaptures(L, s, r, ptop); |
1164 | } | 1219 | } |
1165 | 1220 | ||
@@ -1197,7 +1252,7 @@ static int lp_type (lua_State *L) { | |||
1197 | int lp_gc (lua_State *L) { | 1252 | int lp_gc (lua_State *L) { |
1198 | Pattern *p = getpattern(L, 1); | 1253 | Pattern *p = getpattern(L, 1); |
1199 | if (p->codesize > 0) | 1254 | if (p->codesize > 0) |
1200 | reallocprog(L, p, 0); | 1255 | realloccode(L, p, 0); |
1201 | return 0; | 1256 | return 0; |
1202 | } | 1257 | } |
1203 | 1258 | ||
@@ -1258,8 +1313,8 @@ static struct luaL_Reg pattreg[] = { | |||
1258 | {"version", lp_version}, | 1313 | {"version", lp_version}, |
1259 | {"setmaxstack", lp_setmax}, | 1314 | {"setmaxstack", lp_setmax}, |
1260 | {"type", lp_type}, | 1315 | {"type", lp_type}, |
1261 | {"T", lp_throw}, /* labeled failure throw */ | 1316 | {"T", lp_throw}, /* labeled failure throw */ |
1262 | {"Lc", lp_labchoice}, /* labeled failure choice */ | 1317 | {"Lc", lp_labchoice}, /* labeled failure choice */ |
1263 | {NULL, NULL} | 1318 | {NULL, NULL} |
1264 | }; | 1319 | }; |
1265 | 1320 | ||
@@ -1277,13 +1332,13 @@ static struct luaL_Reg metareg[] = { | |||
1277 | }; | 1332 | }; |
1278 | 1333 | ||
1279 | 1334 | ||
1280 | int luaopen_lpeglabel (lua_State *L); | 1335 | int luaopen_lpeglabel (lua_State *L); /* labeld failure */ |
1281 | int luaopen_lpeglabel (lua_State *L) { | 1336 | int luaopen_lpeglabel (lua_State *L) { /* labeled failure */ |
1282 | luaL_newmetatable(L, PATTERN_T); | 1337 | luaL_newmetatable(L, PATTERN_T); |
1283 | lua_pushnumber(L, MAXBACK); /* initialize maximum backtracking */ | 1338 | lua_pushnumber(L, MAXBACK); /* initialize maximum backtracking */ |
1284 | lua_setfield(L, LUA_REGISTRYINDEX, MAXSTACKIDX); | 1339 | lua_setfield(L, LUA_REGISTRYINDEX, MAXSTACKIDX); |
1285 | luaL_register(L, NULL, metareg); | 1340 | luaL_register(L, NULL, metareg); |
1286 | luaL_register(L, "lpeglabel", pattreg); | 1341 | luaL_register(L, "lpeglabel", pattreg); /* labeled failure */ |
1287 | lua_pushvalue(L, -1); | 1342 | lua_pushvalue(L, -1); |
1288 | lua_setfield(L, -3, "__index"); | 1343 | lua_setfield(L, -3, "__index"); |
1289 | return 1; | 1344 | return 1; |
@@ -1,7 +1,7 @@ | |||
1 | /* | 1 | /* |
2 | ** $Id: lptypes.h,v 1.8 2013/04/12 16:26:38 roberto Exp $ | 2 | ** $Id: lptypes.h,v 1.11 2015/03/04 16:38:00 roberto Exp $ |
3 | ** LPeg - PEG pattern matching for Lua | 3 | ** LPeg - PEG pattern matching for Lua |
4 | ** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license) | 4 | ** Copyright 2007-2014, Lua.org & PUC-Rio (see 'lpeg.html' for license) |
5 | ** written by Roberto Ierusalimschy | 5 | ** written by Roberto Ierusalimschy |
6 | */ | 6 | */ |
7 | 7 | ||
@@ -19,7 +19,7 @@ | |||
19 | #include "lua.h" | 19 | #include "lua.h" |
20 | 20 | ||
21 | 21 | ||
22 | #define VERSION "0.12" | 22 | #define VERSION "0.12.2" |
23 | 23 | ||
24 | 24 | ||
25 | #define PATTERN_T "lpeg-pattern" | 25 | #define PATTERN_T "lpeg-pattern" |
@@ -29,7 +29,7 @@ | |||
29 | /* | 29 | /* |
30 | ** compatibility with Lua 5.2 | 30 | ** compatibility with Lua 5.2 |
31 | */ | 31 | */ |
32 | #if (LUA_VERSION_NUM == 502) | 32 | #if (LUA_VERSION_NUM >= 502) |
33 | 33 | ||
34 | #undef lua_equal | 34 | #undef lua_equal |
35 | #define lua_equal(L,idx1,idx2) lua_compare(L,(idx1),(idx2),LUA_OPEQ) | 35 | #define lua_equal(L,idx1,idx2) lua_compare(L,(idx1),(idx2),LUA_OPEQ) |
@@ -56,7 +56,9 @@ | |||
56 | 56 | ||
57 | 57 | ||
58 | /* maximum number of rules in a grammar */ | 58 | /* maximum number of rules in a grammar */ |
59 | #define MAXRULES 200 | 59 | #if !defined(MAXRULES) |
60 | #define MAXRULES 1000 | ||
61 | #endif | ||
60 | 62 | ||
61 | 63 | ||
62 | 64 | ||
@@ -107,6 +109,7 @@ typedef struct Charset { | |||
107 | /* set 'b' bit in charset 'cs' */ | 109 | /* set 'b' bit in charset 'cs' */ |
108 | #define setchar(cs,b) ((cs)[(b) >> 3] |= (1 << ((b) & 7))) | 110 | #define setchar(cs,b) ((cs)[(b) >> 3] |= (1 << ((b) & 7))) |
109 | 111 | ||
112 | |||
110 | /* labeled failure begin */ | 113 | /* labeled failure begin */ |
111 | typedef int Labelset; | 114 | typedef int Labelset; |
112 | 115 | ||
@@ -38,7 +38,7 @@ typedef struct Stack { | |||
38 | const char *s; /* saved position (or NULL for calls) */ | 38 | const char *s; /* saved position (or NULL for calls) */ |
39 | const Instruction *p; /* next instruction */ | 39 | const Instruction *p; /* next instruction */ |
40 | int caplevel; | 40 | int caplevel; |
41 | Labelset ls; /* labeled failure */ | 41 | Labelset ls; /* labeled failure */ |
42 | } Stack; | 42 | } Stack; |
43 | 43 | ||
44 | 44 | ||
@@ -146,7 +146,7 @@ static int removedyncap (lua_State *L, Capture *capture, | |||
146 | ** Opcode interpreter | 146 | ** Opcode interpreter |
147 | */ | 147 | */ |
148 | const char *match (lua_State *L, const char *o, const char *s, const char *e, | 148 | const char *match (lua_State *L, const char *o, const char *s, const char *e, |
149 | Instruction *op, Capture *capture, int ptop, Labelset *labelf) { | 149 | Instruction *op, Capture *capture, int ptop, Labelset *labelf) { /* labeled failure */ |
150 | Stack stackbase[INITBACK]; | 150 | Stack stackbase[INITBACK]; |
151 | Stack *stacklimit = stackbase + INITBACK; | 151 | Stack *stacklimit = stackbase + INITBACK; |
152 | Stack *stack = stackbase; /* point to first empty slot in stack */ | 152 | Stack *stack = stackbase; /* point to first empty slot in stack */ |
@@ -157,7 +157,7 @@ const char *match (lua_State *L, const char *o, const char *s, const char *e, | |||
157 | stack->p = &giveup; stack->s = s; stack->caplevel = 0; stack++; | 157 | stack->p = &giveup; stack->s = s; stack->caplevel = 0; stack++; |
158 | lua_pushlightuserdata(L, stackbase); | 158 | lua_pushlightuserdata(L, stackbase); |
159 | for (;;) { | 159 | for (;;) { |
160 | #if defined(LPEGDEBUG) | 160 | #if defined(DEBUG) |
161 | printf("s: |%s| stck:%d, dyncaps:%d, caps:%d ", | 161 | printf("s: |%s| stck:%d, dyncaps:%d, caps:%d ", |
162 | s, stack - getstackbase(L, ptop), ndyncap, captop); | 162 | s, stack - getstackbase(L, ptop), ndyncap, captop); |
163 | printinst(op, p); | 163 | printinst(op, p); |
@@ -183,9 +183,9 @@ const char *match (lua_State *L, const char *o, const char *s, const char *e, | |||
183 | case IAny: { | 183 | case IAny: { |
184 | if (s < e) { p++; s++; } | 184 | if (s < e) { p++; s++; } |
185 | else { | 185 | else { |
186 | *labelf = LFAIL; /* labeled failure */ | 186 | *labelf = LFAIL; /* labeled failure */ |
187 | goto fail; | 187 | goto fail; |
188 | } | 188 | } |
189 | continue; | 189 | continue; |
190 | } | 190 | } |
191 | case ITestAny: { | 191 | case ITestAny: { |
@@ -196,9 +196,9 @@ const char *match (lua_State *L, const char *o, const char *s, const char *e, | |||
196 | case IChar: { | 196 | case IChar: { |
197 | if ((byte)*s == p->i.aux && s < e) { p++; s++; } | 197 | if ((byte)*s == p->i.aux && s < e) { p++; s++; } |
198 | else { | 198 | else { |
199 | *labelf = LFAIL; /* labeled failure */ | 199 | *labelf = LFAIL; /* labeled failure */ |
200 | goto fail; | 200 | goto fail; |
201 | } | 201 | } |
202 | continue; | 202 | continue; |
203 | } | 203 | } |
204 | case ITestChar: { | 204 | case ITestChar: { |
@@ -211,9 +211,9 @@ const char *match (lua_State *L, const char *o, const char *s, const char *e, | |||
211 | if (testchar((p+1)->buff, c) && s < e) | 211 | if (testchar((p+1)->buff, c) && s < e) |
212 | { p += CHARSETINSTSIZE; s++; } | 212 | { p += CHARSETINSTSIZE; s++; } |
213 | else { | 213 | else { |
214 | *labelf = LFAIL; /* labeled failure */ | 214 | *labelf = LFAIL; /* labeled failure */ |
215 | goto fail; | 215 | goto fail; |
216 | } | 216 | } |
217 | continue; | 217 | continue; |
218 | } | 218 | } |
219 | case ITestSet: { | 219 | case ITestSet: { |
@@ -226,9 +226,9 @@ const char *match (lua_State *L, const char *o, const char *s, const char *e, | |||
226 | case IBehind: { | 226 | case IBehind: { |
227 | int n = p->i.aux; | 227 | int n = p->i.aux; |
228 | if (n > s - o) { | 228 | if (n > s - o) { |
229 | *labelf = LFAIL; /* labeled failure */ | 229 | *labelf = LFAIL; /* labeled failure */ |
230 | goto fail; | 230 | goto fail; |
231 | } | 231 | } |
232 | s -= n; p++; | 232 | s -= n; p++; |
233 | continue; | 233 | continue; |
234 | } | 234 | } |
@@ -249,24 +249,23 @@ const char *match (lua_State *L, const char *o, const char *s, const char *e, | |||
249 | stack = doublestack(L, &stacklimit, ptop); | 249 | stack = doublestack(L, &stacklimit, ptop); |
250 | stack->p = p + getoffset(p); | 250 | stack->p = p + getoffset(p); |
251 | stack->s = s; | 251 | stack->s = s; |
252 | stack->ls = LFAIL; /* labeled failure */ | 252 | stack->ls = LFAIL; /* labeled failure */ |
253 | stack->caplevel = captop; | 253 | stack->caplevel = captop; |
254 | stack++; | 254 | stack++; |
255 | p += 2; | 255 | p += 2; |
256 | continue; | 256 | continue; |
257 | } | 257 | } |
258 | case ILabChoice: { /* labeled failure */ | 258 | case ILabChoice: { /* labeled failure */ |
259 | if (stack == stacklimit) | 259 | if (stack == stacklimit) |
260 | stack = doublestack(L, &stacklimit, ptop); | 260 | stack = doublestack(L, &stacklimit, ptop); |
261 | stack->p = p + getoffset(p); | 261 | stack->p = p + getoffset(p); |
262 | stack->s = s; | 262 | stack->s = s; |
263 | stack->ls = (p + 2)->labels; | 263 | stack->ls = (p + 2)->labels; |
264 | stack->caplevel = captop; | 264 | stack->caplevel = captop; |
265 | stack++; | 265 | stack++; |
266 | p += 3; | 266 | p += 3; |
267 | continue; | 267 | continue; |
268 | } | 268 | } |
269 | |||
270 | case ICall: { | 269 | case ICall: { |
271 | if (stack == stacklimit) | 270 | if (stack == stacklimit) |
272 | stack = doublestack(L, &stacklimit, ptop); | 271 | stack = doublestack(L, &stacklimit, ptop); |
@@ -296,22 +295,20 @@ const char *match (lua_State *L, const char *o, const char *s, const char *e, | |||
296 | p += getoffset(p); | 295 | p += getoffset(p); |
297 | continue; | 296 | continue; |
298 | } | 297 | } |
299 | case IThrow: { /* labeled failure */ | 298 | case IThrow: { /* labeled failure */ |
300 | *labelf = (p+1)->labels; | 299 | *labelf = (p+1)->labels; |
301 | goto fail; | 300 | goto fail; |
302 | } | 301 | } |
303 | case IFailTwice: | 302 | case IFailTwice: |
304 | assert(stack > getstackbase(L, ptop)); | 303 | assert(stack > getstackbase(L, ptop)); |
305 | stack--; | 304 | stack--; |
306 | /* go through */ | 305 | /* go through */ |
307 | case IFail: | 306 | case IFail: |
308 | *labelf = LFAIL; /* labeled failure */ | 307 | *labelf = LFAIL; /* labeled failure */ |
309 | fail: { /* pattern failed: try to backtrack */ | 308 | fail: { /* pattern failed: try to backtrack */ |
310 | do { /* remove pending calls */ | 309 | do { /* remove pending calls */ |
311 | assert(stack > getstackbase(L, ptop)); | 310 | assert(stack > getstackbase(L, ptop)); |
312 | s = (--stack)->s; | 311 | s = (--stack)->s; |
313 | /*printf("fail (s == NULL => %d), labelf=%d stack->ls=%d (stack-> == giveup %d)\n", | ||
314 | s == NULL, labelf, stack->ls, stack->p == &giveup);*/ | ||
315 | } while (s == NULL || (!(stack->ls & *labelf) && stack->p != &giveup)); | 312 | } while (s == NULL || (!(stack->ls & *labelf) && stack->p != &giveup)); |
316 | if (ndyncap > 0) /* is there matchtime captures? */ | 313 | if (ndyncap > 0) /* is there matchtime captures? */ |
317 | ndyncap -= removedyncap(L, capture, stack->caplevel, captop); | 314 | ndyncap -= removedyncap(L, capture, stack->caplevel, captop); |
@@ -328,10 +325,10 @@ const char *match (lua_State *L, const char *o, const char *s, const char *e, | |||
328 | captop -= n; /* remove nested captures */ | 325 | captop -= n; /* remove nested captures */ |
329 | fr -= rem; /* 'rem' items were popped from Lua stack */ | 326 | fr -= rem; /* 'rem' items were popped from Lua stack */ |
330 | res = resdyncaptures(L, fr, s - o, e - o); /* get result */ | 327 | res = resdyncaptures(L, fr, s - o, e - o); /* get result */ |
331 | if (res == -1) { /* fail? */ | 328 | if (res == -1) { /* fail? */ |
332 | *labelf = LFAIL; /* labeled failure */ | 329 | *labelf = LFAIL; /* labeled failure */ |
333 | goto fail; | 330 | goto fail; |
334 | } | 331 | } |
335 | s = o + res; /* else update current position */ | 332 | s = o + res; /* else update current position */ |
336 | n = lua_gettop(L) - fr + 1; /* number of new captures */ | 333 | n = lua_gettop(L) - fr + 1; /* number of new captures */ |
337 | ndyncap += n - rem; /* update number of dynamic captures */ | 334 | ndyncap += n - rem; /* update number of dynamic captures */ |
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | ** $Id: lpvm.h,v 1.2 2013/04/03 20:37:18 roberto Exp $ | 2 | ** $Id: lpvm.h,v 1.3 2014/02/21 13:06:41 roberto Exp $ |
3 | */ | 3 | */ |
4 | 4 | ||
5 | #if !defined(lpvm_h) | 5 | #if !defined(lpvm_h) |
@@ -34,8 +34,8 @@ typedef enum Opcode { | |||
34 | IOpenCapture, /* start a capture */ | 34 | IOpenCapture, /* start a capture */ |
35 | ICloseCapture, | 35 | ICloseCapture, |
36 | ICloseRunTime, | 36 | ICloseRunTime, |
37 | IThrow, /* "fails" with a specific label labeled failure */ | 37 | IThrow, /* "fails" with a specific label labeled failure */ |
38 | ILabChoice /* labeled choice */ | 38 | ILabChoice /* labeled choice */ |
39 | } Opcode; | 39 | } Opcode; |
40 | 40 | ||
41 | 41 | ||
@@ -52,14 +52,9 @@ typedef union Instruction { | |||
52 | } Instruction; | 52 | } Instruction; |
53 | 53 | ||
54 | 54 | ||
55 | int getposition (lua_State *L, int t, int i); | ||
56 | void printpatt (Instruction *p, int n); | 55 | void printpatt (Instruction *p, int n); |
57 | const char *match (lua_State *L, const char *o, const char *s, const char *e, | 56 | const char *match (lua_State *L, const char *o, const char *s, const char *e, |
58 | Instruction *op, Capture *capture, int ptop, Labelset *labelf); | 57 | Instruction *op, Capture *capture, int ptop, Labelset *labelf); /* labeled failure */ |
59 | int verify (lua_State *L, Instruction *op, const Instruction *p, | ||
60 | Instruction *e, int postable, int rule); | ||
61 | void checkrule (lua_State *L, Instruction *op, int from, int to, | ||
62 | int postable, int rule); | ||
63 | 58 | ||
64 | 59 | ||
65 | #endif | 60 | #endif |
@@ -1,8 +1,8 @@ | |||
1 | LIBNAME = lpeglabel | 1 | LIBNAME = lpeglabel |
2 | LUADIR = /usr/include/lua5.1/ | 2 | LUADIR = ../lua/ |
3 | 3 | ||
4 | #COPT = -O2 | 4 | COPT = -O2 |
5 | COPT = -DLPEG_DEBUG -g | 5 | # COPT = -DLPEG_DEBUG -g |
6 | 6 | ||
7 | CWARNS = -Wall -Wextra -pedantic \ | 7 | CWARNS = -Wall -Wextra -pedantic \ |
8 | -Waggregate-return \ | 8 | -Waggregate-return \ |
@@ -22,7 +22,7 @@ CWARNS = -Wall -Wextra -pedantic \ | |||
22 | # -Wunreachable-code \ | 22 | # -Wunreachable-code \ |
23 | 23 | ||
24 | 24 | ||
25 | CFLAGS = $(CWARNS) $(COPT) -ansi -I$(LUADIR) -fPIC | 25 | CFLAGS = $(CWARNS) $(COPT) -std=c99 -I$(LUADIR) -fPIC |
26 | CC = gcc | 26 | CC = gcc |
27 | 27 | ||
28 | FILES = lpvm.o lpcap.o lptree.o lpcode.o lpprint.o | 28 | FILES = lpvm.o lpcap.o lptree.o lpcode.o lpprint.o |
@@ -1,6 +1,6 @@ | |||
1 | #!/usr/bin/env lua5.1 | 1 | #!/usr/bin/env lua5.1 |
2 | 2 | ||
3 | -- $Id: test.lua,v 1.101 2013/04/12 16:30:33 roberto Exp $ | 3 | -- $Id: test.lua,v 1.106 2015/03/04 17:31:33 roberto Exp $ |
4 | 4 | ||
5 | -- require"strict" -- just to be pedantic | 5 | -- require"strict" -- just to be pedantic |
6 | 6 | ||
@@ -170,8 +170,8 @@ assert(m.match( basiclookfor((#m.P(b) * 1) * m.Cp()), " ( (a)") == 7) | |||
170 | a = {m.match(m.C(digit^1 * m.Cc"d") + m.C(letter^1 * m.Cc"l"), "123")} | 170 | a = {m.match(m.C(digit^1 * m.Cc"d") + m.C(letter^1 * m.Cc"l"), "123")} |
171 | checkeq(a, {"123", "d"}) | 171 | checkeq(a, {"123", "d"}) |
172 | 172 | ||
173 | a = {m.match(m.C(digit^1) * "d" * -1 + m.C(letter^1 * m.Cc"l"), "123d")} | 173 | -- bug in LPeg 0.12 (nil value does not create a 'ktable') |
174 | checkeq(a, {"123"}) | 174 | assert(m.match(m.Cc(nil), "") == nil) |
175 | 175 | ||
176 | a = {m.match(m.C(digit^1 * m.Cc"d") + m.C(letter^1 * m.Cc"l"), "abcd")} | 176 | a = {m.match(m.C(digit^1 * m.Cc"d") + m.C(letter^1 * m.Cc"l"), "abcd")} |
177 | checkeq(a, {"abcd", "l"}) | 177 | checkeq(a, {"abcd", "l"}) |
@@ -194,6 +194,16 @@ checkeq(a, {1, 5}) | |||
194 | t = {m.match({[1] = m.C(m.C(1) * m.V(1) + -1)}, "abc")} | 194 | t = {m.match({[1] = m.C(m.C(1) * m.V(1) + -1)}, "abc")} |
195 | checkeq(t, {"abc", "a", "bc", "b", "c", "c", ""}) | 195 | checkeq(t, {"abc", "a", "bc", "b", "c", "c", ""}) |
196 | 196 | ||
197 | -- bug in 0.12 ('hascapture' did not check for captures inside a rule) | ||
198 | do | ||
199 | local pat = m.P{ | ||
200 | 'S'; | ||
201 | S1 = m.C('abc') + 3, | ||
202 | S = #m.V('S1') -- rule has capture, but '#' must ignore it | ||
203 | } | ||
204 | assert(pat:match'abc' == 1) | ||
205 | end | ||
206 | |||
197 | 207 | ||
198 | -- test for small capture boundary | 208 | -- test for small capture boundary |
199 | for i = 250,260 do | 209 | for i = 250,260 do |
@@ -201,9 +211,8 @@ for i = 250,260 do | |||
201 | assert(#m.match(m.C(m.C(i)), string.rep('a', i)) == i) | 211 | assert(#m.match(m.C(m.C(i)), string.rep('a', i)) == i) |
202 | end | 212 | end |
203 | 213 | ||
204 | |||
205 | -- tests for any*n and any*-n | 214 | -- tests for any*n and any*-n |
206 | for n = 1, 550 do | 215 | for n = 1, 550, 13 do |
207 | local x_1 = string.rep('x', n - 1) | 216 | local x_1 = string.rep('x', n - 1) |
208 | local x = x_1 .. 'a' | 217 | local x = x_1 .. 'a' |
209 | assert(not m.P(n):match(x_1)) | 218 | assert(not m.P(n):match(x_1)) |
@@ -345,8 +354,9 @@ checkeq(t, {hi = 10, ho = 20, 'a', 'b', 'c'}) | |||
345 | 354 | ||
346 | 355 | ||
347 | -- test for error messages | 356 | -- test for error messages |
348 | local function checkerr (msg, ...) | 357 | local function checkerr (msg, f, ...) |
349 | assert(m.match({ m.P(msg) + 1 * m.V(1) }, select(2, pcall(...)))) | 358 | local st, err = pcall(f, ...) |
359 | assert(not st and m.match({ m.P(msg) + 1 * m.V(1) }, err)) | ||
350 | end | 360 | end |
351 | 361 | ||
352 | checkerr("rule '1' may be left recursive", m.match, { m.V(1) * 'a' }, "a") | 362 | checkerr("rule '1' may be left recursive", m.match, { m.V(1) * 'a' }, "a") |
@@ -370,6 +380,32 @@ p = {'a', | |||
370 | } | 380 | } |
371 | checkerr("rule 'a' may be left recursive", m.match, p, "a") | 381 | checkerr("rule 'a' may be left recursive", m.match, p, "a") |
372 | 382 | ||
383 | -- Bug in peephole optimization of LPeg 0.12 (IJmp -> ICommit) | ||
384 | -- the next grammar has an original sequence IJmp -> ICommit -> IJmp L1 | ||
385 | -- that is optimized to ICommit L1 | ||
386 | |||
387 | p = m.P { (m.P {m.P'abc'} + 'ayz') * m.V'y'; y = m.P'x' } | ||
388 | assert(p:match('abcx') == 5 and p:match('ayzx') == 5 and not p:match'abc') | ||
389 | |||
390 | |||
391 | do | ||
392 | -- large dynamic Cc | ||
393 | local lim = 2^16 - 1 | ||
394 | local c = 0 | ||
395 | local function seq (n) | ||
396 | if n == 1 then c = c + 1; return m.Cc(c) | ||
397 | else | ||
398 | local m = math.floor(n / 2) | ||
399 | return seq(m) * seq(n - m) | ||
400 | end | ||
401 | end | ||
402 | p = m.Ct(seq(lim)) | ||
403 | t = p:match('') | ||
404 | assert(t[lim] == lim) | ||
405 | checkerr("too many", function () p = p / print end) | ||
406 | checkerr("too many", seq, lim + 1) | ||
407 | end | ||
408 | |||
373 | 409 | ||
374 | -- tests for non-pattern as arguments to pattern functions | 410 | -- tests for non-pattern as arguments to pattern functions |
375 | 411 | ||
@@ -488,7 +524,10 @@ assert(m.match(1 * m.B(1), 'a') == 2) | |||
488 | assert(m.match(-m.B(1), 'a') == 1) | 524 | assert(m.match(-m.B(1), 'a') == 1) |
489 | assert(m.match(m.B(250), string.rep('a', 250)) == nil) | 525 | assert(m.match(m.B(250), string.rep('a', 250)) == nil) |
490 | assert(m.match(250 * m.B(250), string.rep('a', 250)) == 251) | 526 | assert(m.match(250 * m.B(250), string.rep('a', 250)) == 251) |
491 | assert(not pcall(m.B, 260)) | 527 | |
528 | -- look-behind with an open call | ||
529 | checkerr("pattern may not have fixed length", m.B, m.V'S1') | ||
530 | checkerr("too long to look behind", m.B, 260) | ||
492 | 531 | ||
493 | B = #letter * -m.B(letter) + -letter * m.B(letter) | 532 | B = #letter * -m.B(letter) + -letter * m.B(letter) |
494 | x = m.Ct({ (B * m.Cp())^-1 * (1 * m.V(1) + m.P(true)) }) | 533 | x = m.Ct({ (B * m.Cp())^-1 * (1 * m.V(1) + m.P(true)) }) |
@@ -523,7 +562,6 @@ assert(m.match(#m.S'567' * 1, "6") == 2) | |||
523 | 562 | ||
524 | -- tests for Tail Calls | 563 | -- tests for Tail Calls |
525 | 564 | ||
526 | --labeled failure | ||
527 | p = m.P{ 'a' * m.V(1) + '' } | 565 | p = m.P{ 'a' * m.V(1) + '' } |
528 | assert(p:match(string.rep('a', 1000)) == 1001) | 566 | assert(p:match(string.rep('a', 1000)) == 1001) |
529 | 567 | ||
@@ -546,7 +584,6 @@ p = m.P{ | |||
546 | [4] = '0' * m.V(3) + '1' * m.V(2), | 584 | [4] = '0' * m.V(3) + '1' * m.V(2), |
547 | } | 585 | } |
548 | 586 | ||
549 | -- labeled failure | ||
550 | assert(p:match(string.rep("00", 10000))) | 587 | assert(p:match(string.rep("00", 10000))) |
551 | assert(p:match(string.rep("01", 10000))) | 588 | assert(p:match(string.rep("01", 10000))) |
552 | assert(p:match(string.rep("011", 10000))) | 589 | assert(p:match(string.rep("011", 10000))) |
@@ -557,16 +594,15 @@ assert(not p:match(string.rep("011", 10001))) | |||
557 | -- this grammar does need backtracking info. | 594 | -- this grammar does need backtracking info. |
558 | local lim = 10000 | 595 | local lim = 10000 |
559 | p = m.P{ '0' * m.V(1) + '0' } | 596 | p = m.P{ '0' * m.V(1) + '0' } |
560 | assert(not pcall(m.match, p, string.rep("0", lim))) | 597 | checkerr("too many pending", m.match, p, string.rep("0", lim)) |
561 | m.setmaxstack(2*lim) | 598 | m.setmaxstack(2*lim) |
562 | assert(not pcall(m.match, p, string.rep("0", lim))) | 599 | checkerr("too many pending", m.match, p, string.rep("0", lim)) |
563 | m.setmaxstack(2*lim + 4) | 600 | m.setmaxstack(2*lim + 4) |
564 | assert(pcall(m.match, p, string.rep("0", lim))) | 601 | assert(m.match(p, string.rep("0", lim)) == lim + 1) |
565 | 602 | ||
566 | -- this repetition should not need stack space (only the call does) | 603 | -- this repetition should not need stack space (only the call does) |
567 | p = m.P{ ('a' * m.V(1))^0 * 'b' + 'c' } | 604 | p = m.P{ ('a' * m.V(1))^0 * 'b' + 'c' } |
568 | m.setmaxstack(200) | 605 | m.setmaxstack(200) |
569 | -- labeled failure | ||
570 | assert(p:match(string.rep('a', 180) .. 'c' .. string.rep('b', 180)) == 362) | 606 | assert(p:match(string.rep('a', 180) .. 'c' .. string.rep('b', 180)) == 362) |
571 | 607 | ||
572 | m.setmaxstack(5) -- restore original limit | 608 | m.setmaxstack(5) -- restore original limit |
@@ -591,10 +627,10 @@ print("+") | |||
591 | 627 | ||
592 | 628 | ||
593 | -- tests for argument captures | 629 | -- tests for argument captures |
594 | assert(not pcall(m.Carg, 0)) | 630 | checkerr("invalid argument", m.Carg, 0) |
595 | assert(not pcall(m.Carg, -1)) | 631 | checkerr("invalid argument", m.Carg, -1) |
596 | assert(not pcall(m.Carg, 2^18)) | 632 | checkerr("invalid argument", m.Carg, 2^18) |
597 | assert(not pcall(m.match, m.Carg(1), 'a', 1)) | 633 | checkerr("absent extra argument #1", m.match, m.Carg(1), 'a', 1) |
598 | assert(m.match(m.Carg(1), 'a', 1, print) == print) | 634 | assert(m.match(m.Carg(1), 'a', 1, print) == print) |
599 | x = {m.match(m.Carg(1) * m.Carg(2), '', 1, 10, 20)} | 635 | x = {m.match(m.Carg(1) * m.Carg(2), '', 1, 10, 20)} |
600 | checkeq(x, {10, 20}) | 636 | checkeq(x, {10, 20}) |
@@ -647,14 +683,16 @@ assert(m.match(p, "aaaa") == 5) | |||
647 | assert(m.match(p, "abaa") == 2) | 683 | assert(m.match(p, "abaa") == 2) |
648 | assert(not m.match(p, "baaa")) | 684 | assert(not m.match(p, "baaa")) |
649 | 685 | ||
650 | assert(not pcall(m.match, function () return 2^20 end, s)) | 686 | checkerr("invalid position", m.match, function () return 2^20 end, s) |
651 | assert(not pcall(m.match, function () return 0 end, s)) | 687 | checkerr("invalid position", m.match, function () return 0 end, s) |
652 | assert(not pcall(m.match, function (s, i) return i - 1 end, s)) | 688 | checkerr("invalid position", m.match, function (s, i) return i - 1 end, s) |
653 | assert(not pcall(m.match, m.P(1)^0 * function (_, i) return i - 1 end, s)) | 689 | checkerr("invalid position", m.match, |
690 | m.P(1)^0 * function (_, i) return i - 1 end, s) | ||
654 | assert(m.match(m.P(1)^0 * function (_, i) return i end * -1, s)) | 691 | assert(m.match(m.P(1)^0 * function (_, i) return i end * -1, s)) |
655 | assert(not pcall(m.match, m.P(1)^0 * function (_, i) return i + 1 end, s)) | 692 | checkerr("invalid position", m.match, |
693 | m.P(1)^0 * function (_, i) return i + 1 end, s) | ||
656 | assert(m.match(m.P(function (s, i) return s:len() + 1 end) * -1, s)) | 694 | assert(m.match(m.P(function (s, i) return s:len() + 1 end) * -1, s)) |
657 | assert(not pcall(m.match, m.P(function (s, i) return s:len() + 2 end) * -1, s)) | 695 | checkerr("invalid position", m.match, m.P(function (s, i) return s:len() + 2 end) * -1, s) |
658 | assert(not m.match(m.P(function (s, i) return s:len() end) * -1, s)) | 696 | assert(not m.match(m.P(function (s, i) return s:len() end) * -1, s)) |
659 | assert(m.match(m.P(1)^0 * function (_, i) return true end, s) == | 697 | assert(m.match(m.P(1)^0 * function (_, i) return true end, s) == |
660 | string.len(s) + 1) | 698 | string.len(s) + 1) |
@@ -737,9 +775,9 @@ assert(m.match(m.Cs((m.P(1) / ".xx")^0), "abcd") == ".xx.xx.xx.xx") | |||
737 | assert(m.match(m.Cp() * m.P(3) * m.Cp()/"%2%1%1 - %0 ", "abcde") == | 775 | assert(m.match(m.Cp() * m.P(3) * m.Cp()/"%2%1%1 - %0 ", "abcde") == |
738 | "411 - abc ") | 776 | "411 - abc ") |
739 | 777 | ||
740 | assert(pcall(m.match, m.P(1)/"%0", "abc")) | 778 | assert(m.match(m.P(1)/"%0", "abc") == "a") |
741 | assert(not pcall(m.match, m.P(1)/"%1", "abc")) -- out of range | 779 | checkerr("invalid capture index", m.match, m.P(1)/"%1", "abc") |
742 | assert(not pcall(m.match, m.P(1)/"%9", "abc")) -- out of range | 780 | checkerr("invalid capture index", m.match, m.P(1)/"%9", "abc") |
743 | 781 | ||
744 | p = m.C(1) | 782 | p = m.C(1) |
745 | p = p * p; p = p * p; p = p * p * m.C(1) / "%9 - %1" | 783 | p = p * p; p = p * p; p = p * p * m.C(1) / "%9 - %1" |
@@ -757,7 +795,7 @@ assert(m.match(m.C(1)^0 / "%9-%1-%0-%3", s) == "9-1-" .. s .. "-3") | |||
757 | p = m.Cc('alo') * m.C(1) / "%1 - %2 - %1" | 795 | p = m.Cc('alo') * m.C(1) / "%1 - %2 - %1" |
758 | assert(p:match'x' == 'alo - x - alo') | 796 | assert(p:match'x' == 'alo - x - alo') |
759 | 797 | ||
760 | assert(not pcall(m.match, m.Cc(true) / "%1", "a")) | 798 | checkerr("invalid capture value (a boolean)", m.match, m.Cc(true) / "%1", "a") |
761 | 799 | ||
762 | -- long strings for string capture | 800 | -- long strings for string capture |
763 | l = 10000 | 801 | l = 10000 |
@@ -785,35 +823,37 @@ checkeq(t, {a="b", c="du", xux="yuy"}) | |||
785 | 823 | ||
786 | -- errors in accumulator capture | 824 | -- errors in accumulator capture |
787 | 825 | ||
788 | -- very long match (forces fold to be a pair open-close) producing with | ||
789 | -- no initial capture | 826 | -- no initial capture |
790 | assert(not pcall(m.match, m.Cf(m.P(500), print), string.rep('a', 600))) | 827 | checkerr("no initial value", m.match, m.Cf(m.P(5), print), 'aaaaaa') |
828 | -- no initial capture (very long match forces fold to be a pair open-close) | ||
829 | checkerr("no initial value", m.match, m.Cf(m.P(500), print), | ||
830 | string.rep('a', 600)) | ||
791 | 831 | ||
792 | -- nested capture produces no initial value | 832 | -- nested capture produces no initial value |
793 | assert(not pcall(m.match, m.Cf(m.P(1) / {}, print), "alo")) | 833 | checkerr("no initial value", m.match, m.Cf(m.P(1) / {}, print), "alo") |
794 | 834 | ||
795 | 835 | ||
796 | -- tests for loop checker | 836 | -- tests for loop checker |
797 | 837 | ||
798 | local function haveloop (p) | 838 | local function isnullable (p) |
799 | assert(not pcall(function (p) return p^0 end, m.P(p))) | 839 | checkerr("may accept empty string", function (p) return p^0 end, m.P(p)) |
800 | end | 840 | end |
801 | 841 | ||
802 | haveloop(m.P("x")^-4) | 842 | isnullable(m.P("x")^-4) |
803 | assert(m.match(((m.P(0) + 1) * m.S"al")^0, "alo") == 3) | 843 | assert(m.match(((m.P(0) + 1) * m.S"al")^0, "alo") == 3) |
804 | assert(m.match((("x" + #m.P(1))^-4 * m.S"al")^0, "alo") == 3) | 844 | assert(m.match((("x" + #m.P(1))^-4 * m.S"al")^0, "alo") == 3) |
805 | haveloop("") | 845 | isnullable("") |
806 | haveloop(m.P("x")^0) | 846 | isnullable(m.P("x")^0) |
807 | haveloop(m.P("x")^-1) | 847 | isnullable(m.P("x")^-1) |
808 | haveloop(m.P("x") + 1 + 2 + m.P("a")^-1) | 848 | isnullable(m.P("x") + 1 + 2 + m.P("a")^-1) |
809 | haveloop(-m.P("ab")) | 849 | isnullable(-m.P("ab")) |
810 | haveloop(- -m.P("ab")) | 850 | isnullable(- -m.P("ab")) |
811 | haveloop(# #(m.P("ab") + "xy")) | 851 | isnullable(# #(m.P("ab") + "xy")) |
812 | haveloop(- #m.P("ab")^0) | 852 | isnullable(- #m.P("ab")^0) |
813 | haveloop(# -m.P("ab")^1) | 853 | isnullable(# -m.P("ab")^1) |
814 | haveloop(#m.V(3)) | 854 | isnullable(#m.V(3)) |
815 | haveloop(m.V(3) + m.V(1) + m.P('a')^-1) | 855 | isnullable(m.V(3) + m.V(1) + m.P('a')^-1) |
816 | haveloop({[1] = m.V(2) * m.V(3), [2] = m.V(3), [3] = m.P(0)}) | 856 | isnullable({[1] = m.V(2) * m.V(3), [2] = m.V(3), [3] = m.P(0)}) |
817 | assert(m.match(m.P{[1] = m.V(2) * m.V(3), [2] = m.V(3), [3] = m.P(1)}^0, "abc") | 857 | assert(m.match(m.P{[1] = m.V(2) * m.V(3), [2] = m.V(3), [3] = m.P(1)}^0, "abc") |
818 | == 3) | 858 | == 3) |
819 | assert(m.match(m.P""^-3, "a") == 1) | 859 | assert(m.match(m.P""^-3, "a") == 1) |
@@ -897,8 +937,8 @@ print"+" | |||
897 | 937 | ||
898 | 938 | ||
899 | -- tests for back references | 939 | -- tests for back references |
900 | assert(not pcall(m.match, m.Cb('x'), '')) | 940 | checkerr("back reference 'x' not found", m.match, m.Cb('x'), '') |
901 | assert(not pcall(m.match, m.Cg(1, 'a') * m.Cb('b'), 'a')) | 941 | checkerr("back reference 'b' not found", m.match, m.Cg(1, 'a') * m.Cb('b'), 'a') |
902 | 942 | ||
903 | p = m.Cg(m.C(1) * m.C(1), "k") * m.Ct(m.Cb("k")) | 943 | p = m.Cg(m.C(1) * m.C(1), "k") * m.Ct(m.Cb("k")) |
904 | t = p:match("ab") | 944 | t = p:match("ab") |
@@ -1054,6 +1094,8 @@ local re = require "re" | |||
1054 | 1094 | ||
1055 | local match, compile = re.match, re.compile | 1095 | local match, compile = re.match, re.compile |
1056 | 1096 | ||
1097 | |||
1098 | |||
1057 | assert(match("a", ".") == 2) | 1099 | assert(match("a", ".") == 2) |
1058 | assert(match("a", "''") == 1) | 1100 | assert(match("a", "''") == 1) |
1059 | assert(match("", " ! . ") == 1) | 1101 | assert(match("", " ! . ") == 1) |
@@ -1348,6 +1390,7 @@ eqlpeggsub("[%W%S]", "%W%S") | |||
1348 | 1390 | ||
1349 | re.updatelocale() | 1391 | re.updatelocale() |
1350 | 1392 | ||
1393 | |||
1351 | -- testing nested substitutions x string captures | 1394 | -- testing nested substitutions x string captures |
1352 | 1395 | ||
1353 | p = re.compile[[ | 1396 | p = re.compile[[ |
@@ -1370,8 +1413,7 @@ assert(rev:match"0123456789" == "9876543210") | |||
1370 | -- testing error messages in re | 1413 | -- testing error messages in re |
1371 | 1414 | ||
1372 | local function errmsg (p, err) | 1415 | local function errmsg (p, err) |
1373 | local s, msg = pcall(re.compile, p) | 1416 | checkerr(err, re.compile, p) |
1374 | assert(not s and string.find(msg, err)) | ||
1375 | end | 1417 | end |
1376 | 1418 | ||
1377 | errmsg('aaaa', "rule 'aaaa'") | 1419 | errmsg('aaaa', "rule 'aaaa'") |
diff --git a/testlabel.lua b/testlabel.lua index ea18bf5..98d8f6c 100644 --- a/testlabel.lua +++ b/testlabel.lua | |||
@@ -279,7 +279,7 @@ local terror = { ['cmdSeq'] = "Missing ';' in CmdSeq", | |||
279 | ['factor'] = "Error matching 'Factor'", | 279 | ['factor'] = "Error matching 'Factor'", |
280 | ['openParExp'] = "Error matching expression after '('", | 280 | ['openParExp'] = "Error matching expression after '('", |
281 | ['closePar'] = "Error matching ')'", | 281 | ['closePar'] = "Error matching ')'", |
282 | ['undefined'] = "Error undefined'"} | 282 | ['undefined'] = "Undefined Error"} |
283 | 283 | ||
284 | g = re.compile([[ | 284 | g = re.compile([[ |
285 | Tiny <- CmdSeq /{1} '' -> cmdSeq /{2} '' -> ifExp /{3} '' -> ifThen /{4} '' -> ifThenCmdSeq | 285 | Tiny <- CmdSeq /{1} '' -> cmdSeq /{2} '' -> ifExp /{3} '' -> ifThen /{4} '' -> ifThenCmdSeq |
@@ -292,7 +292,7 @@ g = re.compile([[ | |||
292 | Cmd <- IfCmd / RepeatCmd / ReadCmd / WriteCmd / AssignCmd | 292 | Cmd <- IfCmd / RepeatCmd / ReadCmd / WriteCmd / AssignCmd |
293 | IfCmd <- IF (Exp / %{2}) (THEN / %{3}) (CmdSeq / %{4}) (ELSE (CmdSeq / %{5}) / '') (END / %{6}) | 293 | IfCmd <- IF (Exp / %{2}) (THEN / %{3}) (CmdSeq / %{4}) (ELSE (CmdSeq / %{5}) / '') (END / %{6}) |
294 | RepeatCmd <- REPEAT (CmdSeq / %{7}) (UNTIL / %{8}) (Exp / %{9}) | 294 | RepeatCmd <- REPEAT (CmdSeq / %{7}) (UNTIL / %{8}) (Exp / %{9}) |
295 | AssignCmd <- !RESERVED NAME (ASSIGNMENT / %{10}) (Exp / %{11}) | 295 | AssignCmd <- NAME (ASSIGNMENT / %{10}) (Exp / %{11}) |
296 | ReadCmd <- READ (NAME / %{12}) | 296 | ReadCmd <- READ (NAME / %{12}) |
297 | WriteCmd <- WRITE (Exp / %{13}) | 297 | WriteCmd <- WRITE (Exp / %{13}) |
298 | Exp <- SimpleExp ((LESS / EQUAL) (SimpleExp / %{14}) / '') | 298 | Exp <- SimpleExp ((LESS / EQUAL) (SimpleExp / %{14}) / '') |
@@ -309,7 +309,7 @@ g = re.compile([[ | |||
309 | EQUAL <- Sp '=' | 309 | EQUAL <- Sp '=' |
310 | LESS <- Sp '<' | 310 | LESS <- Sp '<' |
311 | MUL <- Sp '*' | 311 | MUL <- Sp '*' |
312 | NAME <- Sp [a-z]+ | 312 | NAME <- !RESERVED Sp [a-z]+ |
313 | NUMBER <- Sp [0-9]+ | 313 | NUMBER <- Sp [0-9]+ |
314 | OPENPAR <- Sp '(' | 314 | OPENPAR <- Sp '(' |
315 | READ <- Sp 'read' | 315 | READ <- Sp 'read' |
@@ -319,7 +319,7 @@ g = re.compile([[ | |||
319 | THEN <- Sp 'then' | 319 | THEN <- Sp 'then' |
320 | UNTIL <- Sp 'until' | 320 | UNTIL <- Sp 'until' |
321 | WRITE <- Sp 'write' | 321 | WRITE <- Sp 'write' |
322 | RESERVED <- IF / ELSE / END / READ / REPEAT / THEN / UNTIL / WRITE | 322 | RESERVED <- (IF / ELSE / END / READ / REPEAT / THEN / UNTIL / WRITE) ![a-z]+ |
323 | Sp <- (%s / %nl)* | 323 | Sp <- (%s / %nl)* |
324 | ]], terror) | 324 | ]], terror) |
325 | 325 | ||