diff options
| author | Sergio Medeiros <sqmedeiros@gmail.com> | 2014-10-29 18:13:38 -0300 |
|---|---|---|
| committer | Sergio Medeiros <sqmedeiros@gmail.com> | 2014-10-29 18:13:38 -0300 |
| commit | 8d30a0ff8a8584e225c03d878a9add439ea193a3 (patch) | |
| tree | cdc03907837ebec1aed26be7290a5a40d00f3e2c | |
| parent | 3af55803f0a261dd9b2ccf85a7532288c4a270ae (diff) | |
| download | lpeglabel-8d30a0ff8a8584e225c03d878a9add439ea193a3.tar.gz lpeglabel-8d30a0ff8a8584e225c03d878a9add439ea193a3.tar.bz2 lpeglabel-8d30a0ff8a8584e225c03d878a9add439ea193a3.zip | |
Creating the git repository with the current implementation.
| -rw-r--r-- | lpcap.c | 537 | ||||
| -rw-r--r-- | lpcap.h | 43 | ||||
| -rw-r--r-- | lpcode.c | 1016 | ||||
| -rw-r--r-- | lpcode.h | 34 | ||||
| -rw-r--r-- | lpprint.c | 255 | ||||
| -rw-r--r-- | lpprint.h | 37 | ||||
| -rw-r--r-- | lptree.c | 1282 | ||||
| -rw-r--r-- | lptree.h | 79 | ||||
| -rw-r--r-- | lptypes.h | 158 | ||||
| -rw-r--r-- | lpvm.c | 391 | ||||
| -rw-r--r-- | lpvm.h | 66 | ||||
| -rw-r--r-- | makefile | 55 | ||||
| -rw-r--r-- | re.lua | 276 | ||||
| -rwxr-xr-x | test.lua | 1386 | ||||
| -rw-r--r-- | testlabel.lua | 422 |
15 files changed, 6037 insertions, 0 deletions
| @@ -0,0 +1,537 @@ | |||
| 1 | /* | ||
| 2 | ** $Id: lpcap.c,v 1.4 2013/03/21 20:25:12 roberto Exp $ | ||
| 3 | ** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license) | ||
| 4 | */ | ||
| 5 | |||
| 6 | #include "lua.h" | ||
| 7 | #include "lauxlib.h" | ||
| 8 | |||
| 9 | #include "lpcap.h" | ||
| 10 | #include "lptypes.h" | ||
| 11 | |||
| 12 | |||
| 13 | #define captype(cap) ((cap)->kind) | ||
| 14 | |||
| 15 | #define isclosecap(cap) (captype(cap) == Cclose) | ||
| 16 | |||
| 17 | #define closeaddr(c) ((c)->s + (c)->siz - 1) | ||
| 18 | |||
| 19 | #define isfullcap(cap) ((cap)->siz != 0) | ||
| 20 | |||
| 21 | #define getfromktable(cs,v) lua_rawgeti((cs)->L, ktableidx((cs)->ptop), v) | ||
| 22 | |||
| 23 | #define pushluaval(cs) getfromktable(cs, (cs)->cap->idx) | ||
| 24 | |||
| 25 | |||
| 26 | |||
| 27 | /* | ||
| 28 | ** Put at the cache for Lua values the value indexed by 'v' in ktable | ||
| 29 | ** of the running pattern (if it is not there yet); returns its index. | ||
| 30 | */ | ||
| 31 | static int updatecache (CapState *cs, int v) { | ||
| 32 | int idx = cs->ptop + 1; /* stack index of cache for Lua values */ | ||
| 33 | if (v != cs->valuecached) { /* not there? */ | ||
| 34 | getfromktable(cs, v); /* get value from 'ktable' */ | ||
| 35 | lua_replace(cs->L, idx); /* put it at reserved stack position */ | ||
| 36 | cs->valuecached = v; /* keep track of what is there */ | ||
| 37 | } | ||
| 38 | return idx; | ||
| 39 | } | ||
| 40 | |||
| 41 | |||
| 42 | static int pushcapture (CapState *cs); | ||
| 43 | |||
| 44 | |||
| 45 | /* | ||
| 46 | ** Goes back in a list of captures looking for an open capture | ||
| 47 | ** corresponding to a close | ||
| 48 | */ | ||
| 49 | static Capture *findopen (Capture *cap) { | ||
| 50 | int n = 0; /* number of closes waiting an open */ | ||
| 51 | for (;;) { | ||
| 52 | cap--; | ||
| 53 | if (isclosecap(cap)) n++; /* one more open to skip */ | ||
| 54 | else if (!isfullcap(cap)) | ||
| 55 | if (n-- == 0) return cap; | ||
| 56 | } | ||
| 57 | } | ||
| 58 | |||
| 59 | |||
| 60 | /* | ||
| 61 | ** Go to the next capture | ||
| 62 | */ | ||
| 63 | static void nextcap (CapState *cs) { | ||
| 64 | Capture *cap = cs->cap; | ||
| 65 | if (!isfullcap(cap)) { /* not a single capture? */ | ||
| 66 | int n = 0; /* number of opens waiting a close */ | ||
| 67 | for (;;) { /* look for corresponding close */ | ||
| 68 | cap++; | ||
| 69 | if (isclosecap(cap)) { | ||
| 70 | if (n-- == 0) break; | ||
| 71 | } | ||
| 72 | else if (!isfullcap(cap)) n++; | ||
| 73 | } | ||
| 74 | } | ||
| 75 | cs->cap = cap + 1; /* + 1 to skip last close (or entire single capture) */ | ||
| 76 | } | ||
| 77 | |||
| 78 | |||
| 79 | /* | ||
| 80 | ** Push on the Lua stack all values generated by nested captures inside | ||
| 81 | ** the current capture. Returns number of values pushed. 'addextra' | ||
| 82 | ** makes it push the entire match after all captured values. The | ||
| 83 | ** entire match is pushed also if there are no other nested values, | ||
| 84 | ** so the function never returns zero. | ||
| 85 | */ | ||
| 86 | static int pushnestedvalues (CapState *cs, int addextra) { | ||
| 87 | Capture *co = cs->cap; | ||
| 88 | if (isfullcap(cs->cap++)) { /* no nested captures? */ | ||
| 89 | lua_pushlstring(cs->L, co->s, co->siz - 1); /* push whole match */ | ||
| 90 | return 1; /* that is it */ | ||
| 91 | } | ||
| 92 | else { | ||
| 93 | int n = 0; | ||
| 94 | while (!isclosecap(cs->cap)) /* repeat for all nested patterns */ | ||
| 95 | n += pushcapture(cs); | ||
| 96 | if (addextra || n == 0) { /* need extra? */ | ||
| 97 | lua_pushlstring(cs->L, co->s, cs->cap->s - co->s); /* push whole match */ | ||
| 98 | n++; | ||
| 99 | } | ||
| 100 | cs->cap++; /* skip close entry */ | ||
| 101 | return n; | ||
| 102 | } | ||
| 103 | } | ||
| 104 | |||
| 105 | |||
| 106 | /* | ||
| 107 | ** Push only the first value generated by nested captures | ||
| 108 | */ | ||
| 109 | static void pushonenestedvalue (CapState *cs) { | ||
| 110 | int n = pushnestedvalues(cs, 0); | ||
| 111 | if (n > 1) | ||
| 112 | lua_pop(cs->L, n - 1); /* pop extra values */ | ||
| 113 | } | ||
| 114 | |||
| 115 | |||
| 116 | /* | ||
| 117 | ** Try to find a named group capture with the name given at the top of | ||
| 118 | ** the stack; goes backward from 'cap'. | ||
| 119 | */ | ||
| 120 | static Capture *findback (CapState *cs, Capture *cap) { | ||
| 121 | lua_State *L = cs->L; | ||
| 122 | while (cap-- > cs->ocap) { /* repeat until end of list */ | ||
| 123 | if (isclosecap(cap)) | ||
| 124 | cap = findopen(cap); /* skip nested captures */ | ||
| 125 | else if (!isfullcap(cap)) | ||
| 126 | continue; /* opening an enclosing capture: skip and get previous */ | ||
| 127 | if (captype(cap) == Cgroup) { | ||
| 128 | getfromktable(cs, cap->idx); /* get group name */ | ||
| 129 | if (lua_equal(L, -2, -1)) { /* right group? */ | ||
| 130 | lua_pop(L, 2); /* remove reference name and group name */ | ||
| 131 | return cap; | ||
| 132 | } | ||
| 133 | else lua_pop(L, 1); /* remove group name */ | ||
| 134 | } | ||
| 135 | } | ||
| 136 | luaL_error(L, "back reference '%s' not found", lua_tostring(L, -1)); | ||
| 137 | return NULL; /* to avoid warnings */ | ||
| 138 | } | ||
| 139 | |||
| 140 | |||
| 141 | /* | ||
| 142 | ** Back-reference capture. Return number of values pushed. | ||
| 143 | */ | ||
| 144 | static int backrefcap (CapState *cs) { | ||
| 145 | int n; | ||
| 146 | Capture *curr = cs->cap; | ||
| 147 | pushluaval(cs); /* reference name */ | ||
| 148 | cs->cap = findback(cs, curr); /* find corresponding group */ | ||
| 149 | n = pushnestedvalues(cs, 0); /* push group's values */ | ||
| 150 | cs->cap = curr + 1; | ||
| 151 | return n; | ||
| 152 | } | ||
| 153 | |||
| 154 | |||
| 155 | /* | ||
| 156 | ** Table capture: creates a new table and populates it with nested | ||
| 157 | ** captures. | ||
| 158 | */ | ||
| 159 | static int tablecap (CapState *cs) { | ||
| 160 | lua_State *L = cs->L; | ||
| 161 | int n = 0; | ||
| 162 | lua_newtable(L); | ||
| 163 | if (isfullcap(cs->cap++)) | ||
| 164 | return 1; /* table is empty */ | ||
| 165 | while (!isclosecap(cs->cap)) { | ||
| 166 | if (captype(cs->cap) == Cgroup && cs->cap->idx != 0) { /* named group? */ | ||
| 167 | pushluaval(cs); /* push group name */ | ||
| 168 | pushonenestedvalue(cs); | ||
| 169 | lua_settable(L, -3); | ||
| 170 | } | ||
| 171 | else { /* not a named group */ | ||
| 172 | int i; | ||
| 173 | int k = pushcapture(cs); | ||
| 174 | for (i = k; i > 0; i--) /* store all values into table */ | ||
| 175 | lua_rawseti(L, -(i + 1), n + i); | ||
| 176 | n += k; | ||
| 177 | } | ||
| 178 | } | ||
| 179 | cs->cap++; /* skip close entry */ | ||
| 180 | return 1; /* number of values pushed (only the table) */ | ||
| 181 | } | ||
| 182 | |||
| 183 | |||
| 184 | /* | ||
| 185 | ** Table-query capture | ||
| 186 | */ | ||
| 187 | static int querycap (CapState *cs) { | ||
| 188 | int idx = cs->cap->idx; | ||
| 189 | pushonenestedvalue(cs); /* get nested capture */ | ||
| 190 | lua_gettable(cs->L, updatecache(cs, idx)); /* query cap. value at table */ | ||
| 191 | if (!lua_isnil(cs->L, -1)) | ||
| 192 | return 1; | ||
| 193 | else { /* no value */ | ||
| 194 | lua_pop(cs->L, 1); /* remove nil */ | ||
| 195 | return 0; | ||
| 196 | } | ||
| 197 | } | ||
| 198 | |||
| 199 | |||
| 200 | /* | ||
| 201 | ** Fold capture | ||
| 202 | */ | ||
| 203 | static int foldcap (CapState *cs) { | ||
| 204 | int n; | ||
| 205 | lua_State *L = cs->L; | ||
| 206 | int idx = cs->cap->idx; | ||
| 207 | if (isfullcap(cs->cap++) || /* no nested captures? */ | ||
| 208 | isclosecap(cs->cap) || /* no nested captures (large subject)? */ | ||
| 209 | (n = pushcapture(cs)) == 0) /* nested captures with no values? */ | ||
| 210 | return luaL_error(L, "no initial value for fold capture"); | ||
| 211 | if (n > 1) | ||
| 212 | lua_pop(L, n - 1); /* leave only one result for accumulator */ | ||
| 213 | while (!isclosecap(cs->cap)) { | ||
| 214 | lua_pushvalue(L, updatecache(cs, idx)); /* get folding function */ | ||
| 215 | lua_insert(L, -2); /* put it before accumulator */ | ||
| 216 | n = pushcapture(cs); /* get next capture's values */ | ||
| 217 | lua_call(L, n + 1, 1); /* call folding function */ | ||
| 218 | } | ||
| 219 | cs->cap++; /* skip close entry */ | ||
| 220 | return 1; /* only accumulator left on the stack */ | ||
| 221 | } | ||
| 222 | |||
| 223 | |||
| 224 | /* | ||
| 225 | ** Function capture | ||
| 226 | */ | ||
| 227 | static int functioncap (CapState *cs) { | ||
| 228 | int n; | ||
| 229 | int top = lua_gettop(cs->L); | ||
| 230 | pushluaval(cs); /* push function */ | ||
| 231 | n = pushnestedvalues(cs, 0); /* push nested captures */ | ||
| 232 | lua_call(cs->L, n, LUA_MULTRET); /* call function */ | ||
| 233 | return lua_gettop(cs->L) - top; /* return function's results */ | ||
| 234 | } | ||
| 235 | |||
| 236 | |||
| 237 | /* | ||
| 238 | ** Select capture | ||
| 239 | */ | ||
| 240 | static int numcap (CapState *cs) { | ||
| 241 | int idx = cs->cap->idx; /* value to select */ | ||
| 242 | if (idx == 0) { /* no values? */ | ||
| 243 | nextcap(cs); /* skip entire capture */ | ||
| 244 | return 0; /* no value produced */ | ||
| 245 | } | ||
| 246 | else { | ||
| 247 | int n = pushnestedvalues(cs, 0); | ||
| 248 | if (n < idx) /* invalid index? */ | ||
| 249 | return luaL_error(cs->L, "no capture '%d'", idx); | ||
| 250 | else { | ||
| 251 | lua_pushvalue(cs->L, -(n - idx + 1)); /* get selected capture */ | ||
| 252 | lua_replace(cs->L, -(n + 1)); /* put it in place of 1st capture */ | ||
| 253 | lua_pop(cs->L, n - 1); /* remove other captures */ | ||
| 254 | return 1; | ||
| 255 | } | ||
| 256 | } | ||
| 257 | } | ||
| 258 | |||
| 259 | |||
| 260 | /* | ||
| 261 | ** Return the stack index of the first runtime capture in the given | ||
| 262 | ** list of captures (or zero if no runtime captures) | ||
| 263 | */ | ||
| 264 | int finddyncap (Capture *cap, Capture *last) { | ||
| 265 | for (; cap < last; cap++) { | ||
| 266 | if (cap->kind == Cruntime) | ||
| 267 | return cap->idx; /* stack position of first capture */ | ||
| 268 | } | ||
| 269 | return 0; /* no dynamic captures in this segment */ | ||
| 270 | } | ||
| 271 | |||
| 272 | |||
| 273 | /* | ||
| 274 | ** Calls a runtime capture. Returns number of captures removed by | ||
| 275 | ** the call, including the initial Cgroup. (Captures to be added are | ||
| 276 | ** on the Lua stack.) | ||
| 277 | */ | ||
| 278 | int runtimecap (CapState *cs, Capture *close, const char *s, int *rem) { | ||
| 279 | int n, id; | ||
| 280 | lua_State *L = cs->L; | ||
| 281 | int otop = lua_gettop(L); | ||
| 282 | Capture *open = findopen(close); | ||
| 283 | assert(captype(open) == Cgroup); | ||
| 284 | id = finddyncap(open, close); /* get first dynamic capture argument */ | ||
| 285 | close->kind = Cclose; /* closes the group */ | ||
| 286 | close->s = s; | ||
| 287 | cs->cap = open; cs->valuecached = 0; /* prepare capture state */ | ||
| 288 | luaL_checkstack(L, 4, "too many runtime captures"); | ||
| 289 | pushluaval(cs); /* push function to be called */ | ||
| 290 | lua_pushvalue(L, SUBJIDX); /* push original subject */ | ||
| 291 | lua_pushinteger(L, s - cs->s + 1); /* push current position */ | ||
| 292 | n = pushnestedvalues(cs, 0); /* push nested captures */ | ||
| 293 | lua_call(L, n + 2, LUA_MULTRET); /* call dynamic function */ | ||
| 294 | if (id > 0) { /* are there old dynamic captures to be removed? */ | ||
| 295 | int i; | ||
| 296 | for (i = id; i <= otop; i++) | ||
| 297 | lua_remove(L, id); /* remove old dynamic captures */ | ||
| 298 | *rem = otop - id + 1; /* total number of dynamic captures removed */ | ||
| 299 | } | ||
| 300 | else | ||
| 301 | *rem = 0; /* no dynamic captures removed */ | ||
| 302 | return close - open; /* number of captures of all kinds removed */ | ||
| 303 | } | ||
| 304 | |||
| 305 | |||
| 306 | /* | ||
| 307 | ** Auxiliary structure for substitution and string captures: keep | ||
| 308 | ** information about nested captures for future use, avoiding to push | ||
| 309 | ** string results into Lua | ||
| 310 | */ | ||
| 311 | typedef struct StrAux { | ||
| 312 | int isstring; /* whether capture is a string */ | ||
| 313 | union { | ||
| 314 | Capture *cp; /* if not a string, respective capture */ | ||
| 315 | struct { /* if it is a string... */ | ||
| 316 | const char *s; /* ... starts here */ | ||
| 317 | const char *e; /* ... ends here */ | ||
| 318 | } s; | ||
| 319 | } u; | ||
| 320 | } StrAux; | ||
| 321 | |||
| 322 | #define MAXSTRCAPS 10 | ||
| 323 | |||
| 324 | /* | ||
| 325 | ** Collect values from current capture into array 'cps'. Current | ||
| 326 | ** capture must be Cstring (first call) or Csimple (recursive calls). | ||
| 327 | ** (In first call, fills %0 with whole match for Cstring.) | ||
| 328 | ** Returns number of elements in the array that were filled. | ||
| 329 | */ | ||
| 330 | static int getstrcaps (CapState *cs, StrAux *cps, int n) { | ||
| 331 | int k = n++; | ||
| 332 | cps[k].isstring = 1; /* get string value */ | ||
| 333 | cps[k].u.s.s = cs->cap->s; /* starts here */ | ||
| 334 | if (!isfullcap(cs->cap++)) { /* nested captures? */ | ||
| 335 | while (!isclosecap(cs->cap)) { /* traverse them */ | ||
| 336 | if (n >= MAXSTRCAPS) /* too many captures? */ | ||
| 337 | nextcap(cs); /* skip extra captures (will not need them) */ | ||
| 338 | else if (captype(cs->cap) == Csimple) /* string? */ | ||
| 339 | n = getstrcaps(cs, cps, n); /* put info. into array */ | ||
| 340 | else { | ||
| 341 | cps[n].isstring = 0; /* not a string */ | ||
| 342 | cps[n].u.cp = cs->cap; /* keep original capture */ | ||
| 343 | nextcap(cs); | ||
| 344 | n++; | ||
| 345 | } | ||
| 346 | } | ||
| 347 | cs->cap++; /* skip close */ | ||
| 348 | } | ||
| 349 | cps[k].u.s.e = closeaddr(cs->cap - 1); /* ends here */ | ||
| 350 | return n; | ||
| 351 | } | ||
| 352 | |||
| 353 | |||
| 354 | /* | ||
| 355 | ** add next capture value (which should be a string) to buffer 'b' | ||
| 356 | */ | ||
| 357 | static int addonestring (luaL_Buffer *b, CapState *cs, const char *what); | ||
| 358 | |||
| 359 | |||
| 360 | /* | ||
| 361 | ** String capture: add result to buffer 'b' (instead of pushing | ||
| 362 | ** it into the stack) | ||
| 363 | */ | ||
| 364 | static void stringcap (luaL_Buffer *b, CapState *cs) { | ||
| 365 | StrAux cps[MAXSTRCAPS]; | ||
| 366 | int n; | ||
| 367 | size_t len, i; | ||
| 368 | const char *fmt; /* format string */ | ||
| 369 | fmt = lua_tolstring(cs->L, updatecache(cs, cs->cap->idx), &len); | ||
| 370 | n = getstrcaps(cs, cps, 0) - 1; /* collect nested captures */ | ||
| 371 | for (i = 0; i < len; i++) { /* traverse them */ | ||
| 372 | if (fmt[i] != '%') /* not an escape? */ | ||
| 373 | luaL_addchar(b, fmt[i]); /* add it to buffer */ | ||
| 374 | else if (fmt[++i] < '0' || fmt[i] > '9') /* not followed by a digit? */ | ||
| 375 | luaL_addchar(b, fmt[i]); /* add to buffer */ | ||
| 376 | else { | ||
| 377 | int l = fmt[i] - '0'; /* capture index */ | ||
| 378 | if (l > n) | ||
| 379 | luaL_error(cs->L, "invalid capture index (%d)", l); | ||
| 380 | else if (cps[l].isstring) | ||
| 381 | luaL_addlstring(b, cps[l].u.s.s, cps[l].u.s.e - cps[l].u.s.s); | ||
| 382 | else { | ||
| 383 | Capture *curr = cs->cap; | ||
| 384 | cs->cap = cps[l].u.cp; /* go back to evaluate that nested capture */ | ||
| 385 | if (!addonestring(b, cs, "capture")) | ||
| 386 | luaL_error(cs->L, "no values in capture index %d", l); | ||
| 387 | cs->cap = curr; /* continue from where it stopped */ | ||
| 388 | } | ||
| 389 | } | ||
| 390 | } | ||
| 391 | } | ||
| 392 | |||
| 393 | |||
| 394 | /* | ||
| 395 | ** Substitution capture: add result to buffer 'b' | ||
| 396 | */ | ||
| 397 | static void substcap (luaL_Buffer *b, CapState *cs) { | ||
| 398 | const char *curr = cs->cap->s; | ||
| 399 | if (isfullcap(cs->cap)) /* no nested captures? */ | ||
| 400 | luaL_addlstring(b, curr, cs->cap->siz - 1); /* keep original text */ | ||
| 401 | else { | ||
| 402 | cs->cap++; /* skip open entry */ | ||
| 403 | while (!isclosecap(cs->cap)) { /* traverse nested captures */ | ||
| 404 | const char *next = cs->cap->s; | ||
| 405 | luaL_addlstring(b, curr, next - curr); /* add text up to capture */ | ||
| 406 | if (addonestring(b, cs, "replacement")) | ||
| 407 | curr = closeaddr(cs->cap - 1); /* continue after match */ | ||
| 408 | else /* no capture value */ | ||
| 409 | curr = next; /* keep original text in final result */ | ||
| 410 | } | ||
| 411 | luaL_addlstring(b, curr, cs->cap->s - curr); /* add last piece of text */ | ||
| 412 | } | ||
| 413 | cs->cap++; /* go to next capture */ | ||
| 414 | } | ||
| 415 | |||
| 416 | |||
| 417 | /* | ||
| 418 | ** Evaluates a capture and adds its first value to buffer 'b'; returns | ||
| 419 | ** whether there was a value | ||
| 420 | */ | ||
| 421 | static int addonestring (luaL_Buffer *b, CapState *cs, const char *what) { | ||
| 422 | switch (captype(cs->cap)) { | ||
| 423 | case Cstring: | ||
| 424 | stringcap(b, cs); /* add capture directly to buffer */ | ||
| 425 | return 1; | ||
| 426 | case Csubst: | ||
| 427 | substcap(b, cs); /* add capture directly to buffer */ | ||
| 428 | return 1; | ||
| 429 | default: { | ||
| 430 | lua_State *L = cs->L; | ||
| 431 | int n = pushcapture(cs); | ||
| 432 | if (n > 0) { | ||
| 433 | if (n > 1) lua_pop(L, n - 1); /* only one result */ | ||
| 434 | if (!lua_isstring(L, -1)) | ||
| 435 | luaL_error(L, "invalid %s value (a %s)", what, luaL_typename(L, -1)); | ||
| 436 | luaL_addvalue(b); | ||
| 437 | } | ||
| 438 | return n; | ||
| 439 | } | ||
| 440 | } | ||
| 441 | } | ||
| 442 | |||
| 443 | |||
| 444 | /* | ||
| 445 | ** Push all values of the current capture into the stack; returns | ||
| 446 | ** number of values pushed | ||
| 447 | */ | ||
| 448 | static int pushcapture (CapState *cs) { | ||
| 449 | lua_State *L = cs->L; | ||
| 450 | luaL_checkstack(L, 4, "too many captures"); | ||
| 451 | switch (captype(cs->cap)) { | ||
| 452 | case Cposition: { | ||
| 453 | lua_pushinteger(L, cs->cap->s - cs->s + 1); | ||
| 454 | cs->cap++; | ||
| 455 | return 1; | ||
| 456 | } | ||
| 457 | case Cconst: { | ||
| 458 | pushluaval(cs); | ||
| 459 | cs->cap++; | ||
| 460 | return 1; | ||
| 461 | } | ||
| 462 | case Carg: { | ||
| 463 | int arg = (cs->cap++)->idx; | ||
| 464 | if (arg + FIXEDARGS > cs->ptop) | ||
| 465 | return luaL_error(L, "reference to absent argument #%d", arg); | ||
| 466 | lua_pushvalue(L, arg + FIXEDARGS); | ||
| 467 | return 1; | ||
| 468 | } | ||
| 469 | case Csimple: { | ||
| 470 | int k = pushnestedvalues(cs, 1); | ||
| 471 | lua_insert(L, -k); /* make whole match be first result */ | ||
| 472 | return k; | ||
| 473 | } | ||
| 474 | case Cruntime: { | ||
| 475 | lua_pushvalue(L, (cs->cap++)->idx); /* value is in the stack */ | ||
| 476 | return 1; | ||
| 477 | } | ||
| 478 | case Cstring: { | ||
| 479 | luaL_Buffer b; | ||
| 480 | luaL_buffinit(L, &b); | ||
| 481 | stringcap(&b, cs); | ||
| 482 | luaL_pushresult(&b); | ||
| 483 | return 1; | ||
| 484 | } | ||
| 485 | case Csubst: { | ||
| 486 | luaL_Buffer b; | ||
| 487 | luaL_buffinit(L, &b); | ||
| 488 | substcap(&b, cs); | ||
| 489 | luaL_pushresult(&b); | ||
| 490 | return 1; | ||
| 491 | } | ||
| 492 | case Cgroup: { | ||
| 493 | if (cs->cap->idx == 0) /* anonymous group? */ | ||
| 494 | return pushnestedvalues(cs, 0); /* add all nested values */ | ||
| 495 | else { /* named group: add no values */ | ||
| 496 | nextcap(cs); /* skip capture */ | ||
| 497 | return 0; | ||
| 498 | } | ||
| 499 | } | ||
| 500 | case Cbackref: return backrefcap(cs); | ||
| 501 | case Ctable: return tablecap(cs); | ||
| 502 | case Cfunction: return functioncap(cs); | ||
| 503 | case Cnum: return numcap(cs); | ||
| 504 | case Cquery: return querycap(cs); | ||
| 505 | case Cfold: return foldcap(cs); | ||
| 506 | default: assert(0); return 0; | ||
| 507 | } | ||
| 508 | } | ||
| 509 | |||
| 510 | |||
| 511 | /* | ||
| 512 | ** Prepare a CapState structure and traverse the entire list of | ||
| 513 | ** captures in the stack pushing its results. 's' is the subject | ||
| 514 | ** string, 'r' is the final position of the match, and 'ptop' | ||
| 515 | ** the index in the stack where some useful values were pushed. | ||
| 516 | ** Returns the number of results pushed. (If the list produces no | ||
| 517 | ** results, push the final position of the match.) | ||
| 518 | */ | ||
| 519 | int getcaptures (lua_State *L, const char *s, const char *r, int ptop) { | ||
| 520 | Capture *capture = (Capture *)lua_touserdata(L, caplistidx(ptop)); | ||
| 521 | int n = 0; | ||
| 522 | if (!isclosecap(capture)) { /* is there any capture? */ | ||
| 523 | CapState cs; | ||
| 524 | cs.ocap = cs.cap = capture; cs.L = L; | ||
| 525 | cs.s = s; cs.valuecached = 0; cs.ptop = ptop; | ||
| 526 | do { /* collect their values */ | ||
| 527 | n += pushcapture(&cs); | ||
| 528 | } while (!isclosecap(cs.cap)); | ||
| 529 | } | ||
| 530 | if (n == 0) { /* no capture values? */ | ||
| 531 | lua_pushinteger(L, r - s + 1); /* return only end position */ | ||
| 532 | n = 1; | ||
| 533 | } | ||
| 534 | return n; | ||
| 535 | } | ||
| 536 | |||
| 537 | |||
| @@ -0,0 +1,43 @@ | |||
| 1 | /* | ||
| 2 | ** $Id: lpcap.h,v 1.1 2013/03/21 20:25:12 roberto Exp $ | ||
| 3 | */ | ||
| 4 | |||
| 5 | #if !defined(lpcap_h) | ||
| 6 | #define lpcap_h | ||
| 7 | |||
| 8 | |||
| 9 | #include "lptypes.h" | ||
| 10 | |||
| 11 | |||
| 12 | /* kinds of captures */ | ||
| 13 | typedef enum CapKind { | ||
| 14 | Cclose, Cposition, Cconst, Cbackref, Carg, Csimple, Ctable, Cfunction, | ||
| 15 | Cquery, Cstring, Cnum, Csubst, Cfold, Cruntime, Cgroup | ||
| 16 | } CapKind; | ||
| 17 | |||
| 18 | |||
| 19 | typedef struct Capture { | ||
| 20 | const char *s; /* subject position */ | ||
| 21 | short idx; /* extra info about capture (group name, arg index, etc.) */ | ||
| 22 | byte kind; /* kind of capture */ | ||
| 23 | byte siz; /* size of full capture + 1 (0 = not a full capture) */ | ||
| 24 | } Capture; | ||
| 25 | |||
| 26 | |||
| 27 | typedef struct CapState { | ||
| 28 | Capture *cap; /* current capture */ | ||
| 29 | Capture *ocap; /* (original) capture list */ | ||
| 30 | lua_State *L; | ||
| 31 | int ptop; /* index of last argument to 'match' */ | ||
| 32 | const char *s; /* original string */ | ||
| 33 | int valuecached; /* value stored in cache slot */ | ||
| 34 | } CapState; | ||
| 35 | |||
| 36 | |||
| 37 | int runtimecap (CapState *cs, Capture *close, const char *s, int *rem); | ||
| 38 | int getcaptures (lua_State *L, const char *s, const char *r, int ptop); | ||
| 39 | int finddyncap (Capture *cap, Capture *last); | ||
| 40 | |||
| 41 | #endif | ||
| 42 | |||
| 43 | |||
diff --git a/lpcode.c b/lpcode.c new file mode 100644 index 0000000..4a49e7d --- /dev/null +++ b/lpcode.c | |||
| @@ -0,0 +1,1016 @@ | |||
| 1 | /* | ||
| 2 | ** $Id: lpcode.c,v 1.18 2013/04/12 16:30:33 roberto Exp $ | ||
| 3 | ** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license) | ||
| 4 | */ | ||
| 5 | |||
| 6 | #include <limits.h> | ||
| 7 | |||
| 8 | |||
| 9 | #include "lua.h" | ||
| 10 | #include "lauxlib.h" | ||
| 11 | |||
| 12 | #include "lptypes.h" | ||
| 13 | #include "lpcode.h" | ||
| 14 | |||
| 15 | |||
| 16 | /* signals a "no-instruction */ | ||
| 17 | #define NOINST -1 | ||
| 18 | |||
| 19 | |||
| 20 | |||
| 21 | static const Charset fullset_ = | ||
| 22 | {{0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | ||
| 23 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | ||
| 24 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | ||
| 25 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF}}; | ||
| 26 | |||
| 27 | static const Charset *fullset = &fullset_; | ||
| 28 | |||
| 29 | /* | ||
| 30 | ** {====================================================== | ||
| 31 | ** Analysis and some optimizations | ||
| 32 | ** ======================================================= | ||
| 33 | */ | ||
| 34 | |||
| 35 | /* | ||
| 36 | ** Check whether a charset is empty (IFail), singleton (IChar), | ||
| 37 | ** full (IAny), or none of those (ISet). | ||
| 38 | */ | ||
| 39 | static Opcode charsettype (const byte *cs, int *c) { | ||
| 40 | int count = 0; | ||
| 41 | int i; | ||
| 42 | int candidate = -1; /* candidate position for a char */ | ||
| 43 | for (i = 0; i < CHARSETSIZE; i++) { | ||
| 44 | int b = cs[i]; | ||
| 45 | if (b == 0) { | ||
| 46 | if (count > 1) return ISet; /* else set is still empty */ | ||
| 47 | } | ||
| 48 | else if (b == 0xFF) { | ||
| 49 | if (count < (i * BITSPERCHAR)) | ||
| 50 | return ISet; | ||
| 51 | else count += BITSPERCHAR; /* set is still full */ | ||
| 52 | } | ||
| 53 | else if ((b & (b - 1)) == 0) { /* byte has only one bit? */ | ||
| 54 | if (count > 0) | ||
| 55 | return ISet; /* set is neither full nor empty */ | ||
| 56 | else { /* set has only one char till now; track it */ | ||
| 57 | count++; | ||
| 58 | candidate = i; | ||
| 59 | } | ||
| 60 | } | ||
| 61 | else return ISet; /* byte is neither empty, full, nor singleton */ | ||
| 62 | } | ||
| 63 | switch (count) { | ||
| 64 | case 0: return IFail; /* empty set */ | ||
| 65 | case 1: { /* singleton; find character bit inside byte */ | ||
| 66 | int b = cs[candidate]; | ||
| 67 | *c = candidate * BITSPERCHAR; | ||
| 68 | if ((b & 0xF0) != 0) { *c += 4; b >>= 4; } | ||
| 69 | if ((b & 0x0C) != 0) { *c += 2; b >>= 2; } | ||
| 70 | if ((b & 0x02) != 0) { *c += 1; } | ||
| 71 | return IChar; | ||
| 72 | } | ||
| 73 | default: { | ||
| 74 | assert(count == CHARSETSIZE * BITSPERCHAR); /* full set */ | ||
| 75 | return IAny; | ||
| 76 | } | ||
| 77 | } | ||
| 78 | } | ||
| 79 | |||
| 80 | /* | ||
| 81 | ** A few basic operations on Charsets | ||
| 82 | */ | ||
| 83 | static void cs_complement (Charset *cs) { | ||
| 84 | loopset(i, cs->cs[i] = ~cs->cs[i]); | ||
| 85 | } | ||
| 86 | |||
| 87 | |||
| 88 | static int cs_equal (const byte *cs1, const byte *cs2) { | ||
| 89 | loopset(i, if (cs1[i] != cs2[i]) return 0); | ||
| 90 | return 1; | ||
| 91 | } | ||
| 92 | |||
| 93 | |||
| 94 | /* | ||
| 95 | ** computes whether sets cs1 and cs2 are disjoint | ||
| 96 | */ | ||
| 97 | static int cs_disjoint (const Charset *cs1, const Charset *cs2) { | ||
| 98 | loopset(i, if ((cs1->cs[i] & cs2->cs[i]) != 0) return 0;) | ||
| 99 | return 1; | ||
| 100 | } | ||
| 101 | |||
| 102 | |||
| 103 | /* | ||
| 104 | ** Convert a 'char' pattern (TSet, TChar, TAny) to a charset | ||
| 105 | */ | ||
| 106 | int tocharset (TTree *tree, Charset *cs) { | ||
| 107 | switch (tree->tag) { | ||
| 108 | case TSet: { /* copy set */ | ||
| 109 | loopset(i, cs->cs[i] = treebuffer(tree)[i]); | ||
| 110 | return 1; | ||
| 111 | } | ||
| 112 | case TChar: { /* only one char */ | ||
| 113 | assert(0 <= tree->u.n && tree->u.n <= UCHAR_MAX); | ||
| 114 | loopset(i, cs->cs[i] = 0); /* erase all chars */ | ||
| 115 | setchar(cs->cs, tree->u.n); /* add that one */ | ||
| 116 | return 1; | ||
| 117 | } | ||
| 118 | case TAny: { | ||
| 119 | loopset(i, cs->cs[i] = 0xFF); /* add all to the set */ | ||
| 120 | return 1; | ||
| 121 | } | ||
| 122 | default: return 0; | ||
| 123 | } | ||
| 124 | } | ||
| 125 | |||
| 126 | |||
| 127 | /* | ||
| 128 | ** Checks whether a pattern has captures | ||
| 129 | */ | ||
| 130 | int hascaptures (TTree *tree) { | ||
| 131 | tailcall: | ||
| 132 | switch (tree->tag) { | ||
| 133 | case TCapture: case TRunTime: | ||
| 134 | return 1; | ||
| 135 | default: { | ||
| 136 | switch (numsiblings[tree->tag]) { | ||
| 137 | case 1: /* return hascaptures(sib1(tree)); */ | ||
| 138 | tree = sib1(tree); goto tailcall; | ||
| 139 | case 2: | ||
| 140 | if (hascaptures(sib1(tree))) return 1; | ||
| 141 | /* else return hascaptures(sib2(tree)); */ | ||
| 142 | tree = sib2(tree); goto tailcall; | ||
| 143 | default: assert(numsiblings[tree->tag] == 0); return 0; | ||
| 144 | } | ||
| 145 | } | ||
| 146 | } | ||
| 147 | } | ||
| 148 | |||
| 149 | |||
| 150 | /* | ||
| 151 | ** Checks how a pattern behaves regarding the empty string, | ||
| 152 | ** in one of two different ways: | ||
| 153 | ** A pattern is *nullable* if it can match without consuming any character; | ||
| 154 | ** A pattern is *nofail* if it never fails for any string | ||
| 155 | ** (including the empty string). | ||
| 156 | ** The difference is only for predicates and run-time captures; | ||
| 157 | ** for other patterns, the two properties are equivalent. | ||
| 158 | ** (With predicates, &'a' is nullable but not nofail. Of course, | ||
| 159 | ** nofail => nullable.) | ||
| 160 | ** These functions are all convervative in the following way: | ||
| 161 | ** p is nullable => nullable(p) | ||
| 162 | ** nofail(p) => p cannot fail | ||
| 163 | ** The function assumes that TOpenCall is not nullable; | ||
| 164 | ** this will be checked again when the grammar is fixed.) | ||
| 165 | ** Run-time captures can do whatever they want, so the result | ||
| 166 | ** is conservative. | ||
| 167 | */ | ||
| 168 | int checkaux (TTree *tree, int pred) { | ||
| 169 | tailcall: | ||
| 170 | switch (tree->tag) { | ||
| 171 | case TChar: case TSet: case TAny: | ||
| 172 | case TFalse: case TOpenCall: case TThrow: /* labeled failure */ | ||
| 173 | return 0; /* not nullable */ | ||
| 174 | case TRep: case TTrue: | ||
| 175 | return 1; /* no fail */ | ||
| 176 | case TNot: case TBehind: /* can match empty, but can fail */ | ||
| 177 | if (pred == PEnofail) return 0; | ||
| 178 | else return 1; /* PEnullable */ | ||
| 179 | case TAnd: /* can match empty; fail iff body does */ | ||
| 180 | if (pred == PEnullable) return 1; | ||
| 181 | /* else return checkaux(sib1(tree), pred); */ | ||
| 182 | tree = sib1(tree); goto tailcall; | ||
| 183 | case TRunTime: /* can fail; match empty iff body does */ | ||
| 184 | if (pred == PEnofail) return 0; | ||
| 185 | /* else return checkaux(sib1(tree), pred); */ | ||
| 186 | tree = sib1(tree); goto tailcall; | ||
| 187 | case TSeq: | ||
| 188 | if (!checkaux(sib1(tree), pred)) return 0; | ||
| 189 | /* else return checkaux(sib2(tree), pred); */ | ||
| 190 | tree = sib2(tree); goto tailcall; | ||
| 191 | case TChoice: case TLabChoice: /* labeled failure */ | ||
| 192 | if (checkaux(sib2(tree), pred)) return 1; | ||
| 193 | /* else return checkaux(sib1(tree), pred); */ | ||
| 194 | tree = sib1(tree); goto tailcall; | ||
| 195 | case TCapture: case TGrammar: case TRule: | ||
| 196 | /* return checkaux(sib1(tree), pred); */ | ||
| 197 | tree = sib1(tree); goto tailcall; | ||
| 198 | case TCall: /* return checkaux(sib2(tree), pred); */ | ||
| 199 | tree = sib2(tree); goto tailcall; | ||
| 200 | default: assert(0); return 0; | ||
| 201 | }; | ||
| 202 | } | ||
| 203 | |||
| 204 | |||
| 205 | /* | ||
| 206 | ** number of characters to match a pattern (or -1 if variable) | ||
| 207 | ** ('count' avoids infinite loops for grammars) | ||
| 208 | */ | ||
| 209 | int fixedlenx (TTree *tree, int count, int len) { | ||
| 210 | tailcall: | ||
| 211 | switch (tree->tag) { | ||
| 212 | case TChar: case TSet: case TAny: | ||
| 213 | return len + 1; | ||
| 214 | case TFalse: case TTrue: case TNot: case TAnd: case TBehind: | ||
| 215 | case TThrow: /* labeled failure */ | ||
| 216 | return len; | ||
| 217 | case TRep: case TRunTime: case TOpenCall: | ||
| 218 | return -1; | ||
| 219 | case TCapture: case TRule: case TGrammar: | ||
| 220 | /* return fixedlenx(sib1(tree), count); */ | ||
| 221 | tree = sib1(tree); goto tailcall; | ||
| 222 | case TCall: | ||
| 223 | if (count++ >= MAXRULES) | ||
| 224 | return -1; /* may be a loop */ | ||
| 225 | /* else return fixedlenx(sib2(tree), count); */ | ||
| 226 | tree = sib2(tree); goto tailcall; | ||
| 227 | case TSeq: { | ||
| 228 | len = fixedlenx(sib1(tree), count, len); | ||
| 229 | if (len < 0) return -1; | ||
| 230 | /* else return fixedlenx(sib2(tree), count, len); */ | ||
| 231 | tree = sib2(tree); goto tailcall; | ||
| 232 | } | ||
| 233 | case TChoice: case TLabChoice: { /* labeled failure */ | ||
| 234 | int n1, n2; | ||
| 235 | n1 = fixedlenx(sib1(tree), count, len); | ||
| 236 | if (n1 < 0) return -1; | ||
| 237 | n2 = fixedlenx(sib2(tree), count, len); | ||
| 238 | if (n1 == n2) return n1; | ||
| 239 | else return -1; | ||
| 240 | } | ||
| 241 | default: assert(0); return 0; | ||
| 242 | }; | ||
| 243 | } | ||
| 244 | |||
| 245 | |||
| 246 | /* | ||
| 247 | ** Computes the 'first set' of a pattern. | ||
| 248 | ** The result is a conservative aproximation: | ||
| 249 | ** match p ax -> x' for some x ==> a in first(p). | ||
| 250 | ** The set 'follow' is the first set of what follows the | ||
| 251 | ** pattern (full set if nothing follows it). | ||
| 252 | ** The function returns 0 when this set can be used for | ||
| 253 | ** tests that avoid the pattern altogether. | ||
| 254 | ** A non-zero return can happen for two reasons: | ||
| 255 | ** 1) match p '' -> '' ==> returns 1. | ||
| 256 | ** (tests cannot be used because they always fail for an empty input) | ||
| 257 | ** 2) there is a match-time capture ==> returns 2. | ||
| 258 | ** (match-time captures should not be avoided by optimizations) | ||
| 259 | */ | ||
| 260 | static int getfirst (TTree *tree, const Charset *follow, Charset *firstset) { | ||
| 261 | tailcall: | ||
| 262 | switch (tree->tag) { | ||
| 263 | case TChar: case TSet: case TAny: { | ||
| 264 | tocharset(tree, firstset); | ||
| 265 | return 0; | ||
| 266 | } | ||
| 267 | case TTrue: { | ||
| 268 | loopset(i, firstset->cs[i] = follow->cs[i]); | ||
| 269 | return 1; | ||
| 270 | } | ||
| 271 | case TFalse: { | ||
| 272 | loopset(i, firstset->cs[i] = 0); | ||
| 273 | return 0; | ||
| 274 | } | ||
| 275 | case TThrow: { /* (?)labeled failure: should always throw the label */ | ||
| 276 | loopset(i, firstset->cs[i] = follow->cs[i]); /* follow = fullset? */ | ||
| 277 | return 1; | ||
| 278 | } | ||
| 279 | case TChoice: case TLabChoice: { /*(?) labeled failure */ | ||
| 280 | Charset csaux; | ||
| 281 | int e1 = getfirst(sib1(tree), follow, firstset); | ||
| 282 | int e2 = getfirst(sib2(tree), follow, &csaux); | ||
| 283 | loopset(i, firstset->cs[i] |= csaux.cs[i]); | ||
| 284 | return e1 | e2; | ||
| 285 | } | ||
| 286 | case TSeq: { | ||
| 287 | if (!nullable(sib1(tree))) { | ||
| 288 | /* return getfirst(sib1(tree), fullset, firstset); */ | ||
| 289 | tree = sib1(tree); follow = fullset; goto tailcall; | ||
| 290 | } | ||
| 291 | else { /* FIRST(p1 p2, fl) = FIRST(p1, FIRST(p2, fl)) */ | ||
| 292 | Charset csaux; | ||
| 293 | int e2 = getfirst(sib2(tree), follow, &csaux); | ||
| 294 | int e1 = getfirst(sib1(tree), &csaux, firstset); | ||
| 295 | if (e1 == 0) return 0; /* 'e1' ensures that first can be used */ | ||
| 296 | else if ((e1 | e2) & 2) /* one of the children has a matchtime? */ | ||
| 297 | return 2; /* pattern has a matchtime capture */ | ||
| 298 | else return e2; /* else depends on 'e2' */ | ||
| 299 | } | ||
| 300 | } | ||
| 301 | case TRep: { | ||
| 302 | getfirst(sib1(tree), follow, firstset); | ||
| 303 | loopset(i, firstset->cs[i] |= follow->cs[i]); | ||
| 304 | return 1; /* accept the empty string */ | ||
| 305 | } | ||
| 306 | case TCapture: case TGrammar: case TRule: { | ||
| 307 | /* return getfirst(sib1(tree), follow, firstset); */ | ||
| 308 | tree = sib1(tree); goto tailcall; | ||
| 309 | } | ||
| 310 | case TRunTime: { /* function invalidates any follow info. */ | ||
| 311 | int e = getfirst(sib1(tree), fullset, firstset); | ||
| 312 | if (e) return 2; /* function is not "protected"? */ | ||
| 313 | else return 0; /* pattern inside capture ensures first can be used */ | ||
| 314 | } | ||
| 315 | case TCall: { | ||
| 316 | /* return getfirst(sib2(tree), follow, firstset); */ | ||
| 317 | tree = sib2(tree); goto tailcall; | ||
| 318 | } | ||
| 319 | case TAnd: { | ||
| 320 | int e = getfirst(sib1(tree), follow, firstset); | ||
| 321 | loopset(i, firstset->cs[i] &= follow->cs[i]); | ||
| 322 | return e; | ||
| 323 | } | ||
| 324 | case TNot: { | ||
| 325 | if (tocharset(sib1(tree), firstset)) { | ||
| 326 | cs_complement(firstset); | ||
| 327 | return 1; | ||
| 328 | } | ||
| 329 | /* else go through */ | ||
| 330 | } | ||
| 331 | case TBehind: { /* instruction gives no new information */ | ||
| 332 | /* call 'getfirst' to check for math-time captures */ | ||
| 333 | int e = getfirst(sib1(tree), follow, firstset); | ||
| 334 | loopset(i, firstset->cs[i] = follow->cs[i]); /* uses follow */ | ||
| 335 | return e | 1; /* always can accept the empty string */ | ||
| 336 | } | ||
| 337 | default: assert(0); return 0; | ||
| 338 | } | ||
| 339 | } | ||
| 340 | |||
| 341 | |||
| 342 | /* | ||
| 343 | ** If it returns true, then pattern can fail only depending on the next | ||
| 344 | ** character of the subject | ||
| 345 | */ | ||
| 346 | static int headfail (TTree *tree) { | ||
| 347 | tailcall: | ||
| 348 | switch (tree->tag) { | ||
| 349 | case TChar: case TSet: case TAny: case TFalse: | ||
| 350 | return 1; | ||
| 351 | case TTrue: case TRep: case TRunTime: case TNot: | ||
| 352 | case TBehind: | ||
| 353 | case TThrow: /* labeled failure: should always throw the label */ | ||
| 354 | return 0; | ||
| 355 | case TCapture: case TGrammar: case TRule: case TAnd: | ||
| 356 | tree = sib1(tree); goto tailcall; /* return headfail(sib1(tree)); */ | ||
| 357 | case TCall: | ||
| 358 | tree = sib2(tree); goto tailcall; /* return headfail(sib2(tree)); */ | ||
| 359 | case TSeq: | ||
| 360 | if (!nofail(sib2(tree))) return 0; | ||
| 361 | /* else return headfail(sib1(tree)); */ | ||
| 362 | tree = sib1(tree); goto tailcall; | ||
| 363 | case TChoice: case TLabChoice: /* labeled failure */ | ||
| 364 | if (!headfail(sib1(tree))) return 0; | ||
| 365 | /* else return headfail(sib2(tree)); */ | ||
| 366 | tree = sib2(tree); goto tailcall; | ||
| 367 | default: assert(0); return 0; | ||
| 368 | } | ||
| 369 | } | ||
| 370 | |||
| 371 | |||
| 372 | /* | ||
| 373 | ** Check whether the code generation for the given tree can benefit | ||
| 374 | ** from a follow set (to avoid computing the follow set when it is | ||
| 375 | ** not needed) | ||
| 376 | */ | ||
| 377 | static int needfollow (TTree *tree) { | ||
| 378 | tailcall: | ||
| 379 | switch (tree->tag) { | ||
| 380 | case TChar: case TSet: case TAny: | ||
| 381 | case TFalse: case TTrue: case TAnd: case TNot: | ||
| 382 | case TRunTime: case TGrammar: case TCall: case TBehind: | ||
| 383 | case TThrow: case TLabChoice: /* (?)labeled failure */ | ||
| 384 | return 0; | ||
| 385 | case TChoice: case TRep: | ||
| 386 | return 1; | ||
| 387 | case TCapture: | ||
| 388 | tree = sib1(tree); goto tailcall; | ||
| 389 | case TSeq: | ||
| 390 | tree = sib2(tree); goto tailcall; | ||
| 391 | default: assert(0); return 0; | ||
| 392 | } | ||
| 393 | } | ||
| 394 | |||
| 395 | /* }====================================================== */ | ||
| 396 | |||
| 397 | |||
| 398 | |||
| 399 | /* | ||
| 400 | ** {====================================================== | ||
| 401 | ** Code generation | ||
| 402 | ** ======================================================= | ||
| 403 | */ | ||
| 404 | |||
| 405 | |||
| 406 | /* | ||
| 407 | ** size of an instruction | ||
| 408 | */ | ||
| 409 | int sizei (const Instruction *i) { | ||
| 410 | switch((Opcode)i->i.code) { | ||
| 411 | case ISet: case ISpan: return CHARSETINSTSIZE; | ||
| 412 | case ITestSet: return CHARSETINSTSIZE + 1; | ||
| 413 | case ITestChar: case ITestAny: case IChoice: case IJmp: | ||
| 414 | case ICall: case IOpenCall: case ICommit: case IPartialCommit: | ||
| 415 | case IBackCommit: case IThrow: return 2; /* labeled failure */ | ||
| 416 | case ILabChoice: return 3; /* labeled failure */ | ||
| 417 | default: return 1; | ||
| 418 | } | ||
| 419 | } | ||
| 420 | |||
| 421 | |||
| 422 | /* | ||
| 423 | ** state for the compiler | ||
| 424 | */ | ||
| 425 | typedef struct CompileState { | ||
| 426 | Pattern *p; /* pattern being compiled */ | ||
| 427 | int ncode; /* next position in p->code to be filled */ | ||
| 428 | lua_State *L; | ||
| 429 | } CompileState; | ||
| 430 | |||
| 431 | |||
| 432 | /* | ||
| 433 | ** code generation is recursive; 'opt' indicates that the code is | ||
| 434 | ** being generated under a 'IChoice' operator jumping to its end. | ||
| 435 | ** 'tt' points to a previous test protecting this code. 'fl' is | ||
| 436 | ** the follow set of the pattern. | ||
| 437 | */ | ||
| 438 | static void codegen (CompileState *compst, TTree *tree, int opt, int tt, | ||
| 439 | const Charset *fl); | ||
| 440 | |||
| 441 | |||
| 442 | void reallocprog (lua_State *L, Pattern *p, int nsize) { | ||
| 443 | void *ud; | ||
| 444 | lua_Alloc f = lua_getallocf(L, &ud); | ||
| 445 | void *newblock = f(ud, p->code, p->codesize * sizeof(Instruction), | ||
| 446 | nsize * sizeof(Instruction)); | ||
| 447 | if (newblock == NULL && nsize > 0) | ||
| 448 | luaL_error(L, "not enough memory"); | ||
| 449 | p->code = (Instruction *)newblock; | ||
| 450 | p->codesize = nsize; | ||
| 451 | } | ||
| 452 | |||
| 453 | |||
| 454 | static int nextinstruction (CompileState *compst) { | ||
| 455 | int size = compst->p->codesize; | ||
| 456 | if (compst->ncode >= size) | ||
| 457 | reallocprog(compst->L, compst->p, size * 2); | ||
| 458 | return compst->ncode++; | ||
| 459 | } | ||
| 460 | |||
| 461 | |||
| 462 | #define getinstr(cs,i) ((cs)->p->code[i]) | ||
| 463 | |||
| 464 | |||
| 465 | static int addinstruction (CompileState *compst, Opcode op, int aux) { | ||
| 466 | int i = nextinstruction(compst); | ||
| 467 | getinstr(compst, i).i.code = op; | ||
| 468 | getinstr(compst, i).i.aux = aux; | ||
| 469 | return i; | ||
| 470 | } | ||
| 471 | |||
| 472 | |||
| 473 | static int addoffsetinst (CompileState *compst, Opcode op) { | ||
| 474 | int i = addinstruction(compst, op, 0); /* instruction */ | ||
| 475 | addinstruction(compst, (Opcode)0, 0); /* open space for offset */ | ||
| 476 | assert(op == ITestSet || sizei(&getinstr(compst, i)) == 2); | ||
| 477 | return i; | ||
| 478 | } | ||
| 479 | |||
| 480 | /* labeled failure begin */ | ||
| 481 | static int addthrowinstruction (CompileState *compst, Labelset ls) { | ||
| 482 | int i = nextinstruction(compst); | ||
| 483 | getinstr(compst, i).i.code = IThrow; | ||
| 484 | i = nextinstruction(compst); | ||
| 485 | getinstr(compst, i).labels = ls; | ||
| 486 | return i; | ||
| 487 | } | ||
| 488 | |||
| 489 | static int addoffsetlabinst (CompileState *compst, Opcode op, Labelset ls) { | ||
| 490 | int j; | ||
| 491 | int i = addinstruction(compst, op, 0); /* instruction */ | ||
| 492 | addinstruction(compst, (Opcode)0, 0); /* open space for offset */ | ||
| 493 | j = nextinstruction(compst); /* open space for labels */ | ||
| 494 | getinstr(compst, j).labels = ls; | ||
| 495 | assert(op == ILabChoice); | ||
| 496 | return i; | ||
| 497 | } | ||
| 498 | /* labeled failure end */ | ||
| 499 | |||
| 500 | |||
| 501 | static void setoffset (CompileState *compst, int instruction, int offset) { | ||
| 502 | getinstr(compst, instruction + 1).offset = offset; | ||
| 503 | } | ||
| 504 | |||
| 505 | |||
| 506 | /* | ||
| 507 | ** Add a capture instruction: | ||
| 508 | ** 'op' is the capture instruction; 'cap' the capture kind; | ||
| 509 | ** 'key' the key into ktable; 'aux' is optional offset | ||
| 510 | ** | ||
| 511 | */ | ||
| 512 | static int addinstcap (CompileState *compst, Opcode op, int cap, int key, | ||
| 513 | int aux) { | ||
| 514 | int i = addinstruction(compst, op, joinkindoff(cap, aux)); | ||
| 515 | getinstr(compst, i).i.key = key; | ||
| 516 | return i; | ||
| 517 | } | ||
| 518 | |||
| 519 | |||
| 520 | #define gethere(compst) ((compst)->ncode) | ||
| 521 | |||
| 522 | #define target(code,i) ((i) + code[i + 1].offset) | ||
| 523 | |||
| 524 | |||
| 525 | static void jumptothere (CompileState *compst, int instruction, int target) { | ||
| 526 | if (instruction >= 0) | ||
| 527 | setoffset(compst, instruction, target - instruction); | ||
| 528 | } | ||
| 529 | |||
| 530 | |||
| 531 | static void jumptohere (CompileState *compst, int instruction) { | ||
| 532 | jumptothere(compst, instruction, gethere(compst)); | ||
| 533 | } | ||
| 534 | |||
| 535 | |||
| 536 | /* | ||
| 537 | ** Code an IChar instruction, or IAny if there is an equivalent | ||
| 538 | ** test dominating it | ||
| 539 | */ | ||
| 540 | static void codechar (CompileState *compst, int c, int tt) { | ||
| 541 | if (tt >= 0 && getinstr(compst, tt).i.code == ITestChar && | ||
| 542 | getinstr(compst, tt).i.aux == c) | ||
| 543 | addinstruction(compst, IAny, 0); | ||
| 544 | else | ||
| 545 | addinstruction(compst, IChar, c); | ||
| 546 | } | ||
| 547 | |||
| 548 | |||
| 549 | /* | ||
| 550 | ** Add a charset posfix to an instruction | ||
| 551 | */ | ||
| 552 | static void addcharset (CompileState *compst, const byte *cs) { | ||
| 553 | int p = gethere(compst); | ||
| 554 | int i; | ||
| 555 | for (i = 0; i < (int)CHARSETINSTSIZE - 1; i++) | ||
| 556 | nextinstruction(compst); /* space for buffer */ | ||
| 557 | /* fill buffer with charset */ | ||
| 558 | loopset(j, getinstr(compst, p).buff[j] = cs[j]); | ||
| 559 | } | ||
| 560 | |||
| 561 | |||
| 562 | /* | ||
| 563 | ** code a char set, optimizing unit sets for IChar, "complete" | ||
| 564 | ** sets for IAny, and empty sets for IFail; also use an IAny | ||
| 565 | ** when instruction is dominated by an equivalent test. | ||
| 566 | */ | ||
| 567 | static void codecharset (CompileState *compst, const byte *cs, int tt) { | ||
| 568 | int c = 0; /* (=) to avoid warnings */ | ||
| 569 | Opcode op = charsettype(cs, &c); | ||
| 570 | switch (op) { | ||
| 571 | case IChar: codechar(compst, c, tt); break; | ||
| 572 | case ISet: { /* non-trivial set? */ | ||
| 573 | if (tt >= 0 && getinstr(compst, tt).i.code == ITestSet && | ||
| 574 | cs_equal(cs, getinstr(compst, tt + 2).buff)) | ||
| 575 | addinstruction(compst, IAny, 0); | ||
| 576 | else { | ||
| 577 | addinstruction(compst, ISet, 0); | ||
| 578 | addcharset(compst, cs); | ||
| 579 | } | ||
| 580 | break; | ||
| 581 | } | ||
| 582 | default: addinstruction(compst, op, c); break; | ||
| 583 | } | ||
| 584 | } | ||
| 585 | |||
| 586 | |||
| 587 | /* | ||
| 588 | ** code a test set, optimizing unit sets for ITestChar, "complete" | ||
| 589 | ** sets for ITestAny, and empty sets for IJmp (always fails). | ||
| 590 | ** 'e' is true iff test should accept the empty string. (Test | ||
| 591 | ** instructions in the current VM never accept the empty string.) | ||
| 592 | */ | ||
| 593 | static int codetestset (CompileState *compst, Charset *cs, int e) { | ||
| 594 | if (e) return NOINST; /* no test */ | ||
| 595 | else { | ||
| 596 | int c = 0; | ||
| 597 | Opcode op = charsettype(cs->cs, &c); | ||
| 598 | switch (op) { | ||
| 599 | case IFail: return addoffsetinst(compst, IJmp); /* always jump */ | ||
| 600 | case IAny: return addoffsetinst(compst, ITestAny); | ||
| 601 | case IChar: { | ||
| 602 | int i = addoffsetinst(compst, ITestChar); | ||
| 603 | getinstr(compst, i).i.aux = c; | ||
| 604 | return i; | ||
| 605 | } | ||
| 606 | case ISet: { | ||
| 607 | int i = addoffsetinst(compst, ITestSet); | ||
| 608 | addcharset(compst, cs->cs); | ||
| 609 | return i; | ||
| 610 | } | ||
| 611 | default: assert(0); return 0; | ||
| 612 | } | ||
| 613 | } | ||
| 614 | } | ||
| 615 | |||
| 616 | |||
| 617 | /* | ||
| 618 | ** Find the final destination of a sequence of jumps | ||
| 619 | */ | ||
| 620 | static int finaltarget (Instruction *code, int i) { | ||
| 621 | while (code[i].i.code == IJmp) | ||
| 622 | i = target(code, i); | ||
| 623 | return i; | ||
| 624 | } | ||
| 625 | |||
| 626 | |||
| 627 | /* | ||
| 628 | ** final label (after traversing any jumps) | ||
| 629 | */ | ||
| 630 | static int finallabel (Instruction *code, int i) { | ||
| 631 | return finaltarget(code, target(code, i)); | ||
| 632 | } | ||
| 633 | |||
| 634 | |||
| 635 | /* | ||
| 636 | ** <behind(p)> == behind n; <p> (where n = fixedlen(p)) | ||
| 637 | */ | ||
| 638 | static void codebehind (CompileState *compst, TTree *tree) { | ||
| 639 | if (tree->u.n > 0) | ||
| 640 | addinstruction(compst, IBehind, tree->u.n); | ||
| 641 | codegen(compst, sib1(tree), 0, NOINST, fullset); | ||
| 642 | } | ||
| 643 | |||
| 644 | |||
| 645 | /* | ||
| 646 | ** Choice; optimizations: | ||
| 647 | ** - when p1 is headfail | ||
| 648 | ** - when first(p1) and first(p2) are disjoint; than | ||
| 649 | ** a character not in first(p1) cannot go to p1, and a character | ||
| 650 | ** in first(p1) cannot go to p2 (at it is not in first(p2)). | ||
| 651 | ** (The optimization is not valid if p1 accepts the empty string, | ||
| 652 | ** as then there is no character at all...) | ||
| 653 | ** - when p2 is empty and opt is true; a IPartialCommit can resuse | ||
| 654 | ** the Choice already active in the stack. | ||
| 655 | */ | ||
| 656 | static void codechoice (CompileState *compst, TTree *p1, TTree *p2, int opt, | ||
| 657 | const Charset *fl) { | ||
| 658 | int emptyp2 = (p2->tag == TTrue); | ||
| 659 | Charset cs1, cs2; | ||
| 660 | int e1 = getfirst(p1, fullset, &cs1); | ||
| 661 | if (headfail(p1) || | ||
| 662 | (!e1 && (getfirst(p2, fl, &cs2), cs_disjoint(&cs1, &cs2)))) { | ||
| 663 | /*if (0) {*/ | ||
| 664 | /* <p1 / p2> == test (fail(p1)) -> L1 ; p1 ; jmp L2; L1: p2; L2: */ | ||
| 665 | int test = codetestset(compst, &cs1, 0); | ||
| 666 | int jmp = NOINST; | ||
| 667 | codegen(compst, p1, 0, test, fl); | ||
| 668 | if (!emptyp2) | ||
| 669 | jmp = addoffsetinst(compst, IJmp); | ||
| 670 | jumptohere(compst, test); | ||
| 671 | codegen(compst, p2, opt, NOINST, fl); | ||
| 672 | jumptohere(compst, jmp); | ||
| 673 | } | ||
| 674 | else if (opt && emptyp2) { | ||
| 675 | /* p1? == IPartialCommit; p1 */ | ||
| 676 | jumptohere(compst, addoffsetinst(compst, IPartialCommit)); | ||
| 677 | codegen(compst, p1, 1, NOINST, fullset); | ||
| 678 | } | ||
| 679 | else { | ||
| 680 | /* <p1 / p2> == | ||
| 681 | test(fail(p1)) -> L1; choice L1; <p1>; commit L2; L1: <p2>; L2: */ | ||
| 682 | int pcommit; | ||
| 683 | int test = codetestset(compst, &cs1, e1); | ||
| 684 | int pchoice = addoffsetinst(compst, IChoice); | ||
| 685 | codegen(compst, p1, emptyp2, test, fullset); | ||
| 686 | pcommit = addoffsetinst(compst, ICommit); | ||
| 687 | jumptohere(compst, pchoice); | ||
| 688 | jumptohere(compst, test); | ||
| 689 | codegen(compst, p2, opt, NOINST, fl); | ||
| 690 | jumptohere(compst, pcommit); | ||
| 691 | } | ||
| 692 | } | ||
| 693 | |||
| 694 | /* labeled failure begin */ | ||
| 695 | static void codelabchoice (CompileState *compst, TTree *p1, TTree *p2, int opt, | ||
| 696 | const Charset *fl, Labelset ls) { | ||
| 697 | int emptyp2 = (p2->tag == TTrue); | ||
| 698 | int pcommit; | ||
| 699 | int test = NOINST; | ||
| 700 | int pchoice = addoffsetlabinst(compst, ILabChoice, ls); | ||
| 701 | codegen(compst, p1, emptyp2, test, fullset); | ||
| 702 | pcommit = addoffsetinst(compst, ICommit); | ||
| 703 | jumptohere(compst, pchoice); | ||
| 704 | jumptohere(compst, test); | ||
| 705 | codegen(compst, p2, opt, NOINST, fl); | ||
| 706 | jumptohere(compst, pcommit); | ||
| 707 | |||
| 708 | } | ||
| 709 | /* labeled failure end */ | ||
| 710 | |||
| 711 | /* | ||
| 712 | ** And predicate | ||
| 713 | ** optimization: fixedlen(p) = n ==> <&p> == <p>; behind n | ||
| 714 | ** (valid only when 'p' has no captures) | ||
| 715 | */ | ||
| 716 | static void codeand (CompileState *compst, TTree *tree, int tt) { | ||
| 717 | int n = fixedlen(tree); | ||
| 718 | if (n >= 0 && n <= MAXBEHIND && !hascaptures(tree)) { | ||
| 719 | codegen(compst, tree, 0, tt, fullset); | ||
| 720 | if (n > 0) | ||
| 721 | addinstruction(compst, IBehind, n); | ||
| 722 | } | ||
| 723 | else { /* default: Choice L1; p1; BackCommit L2; L1: Fail; L2: */ | ||
| 724 | int pcommit; | ||
| 725 | int pchoice = addoffsetinst(compst, IChoice); | ||
| 726 | codegen(compst, tree, 0, tt, fullset); | ||
| 727 | pcommit = addoffsetinst(compst, IBackCommit); | ||
| 728 | jumptohere(compst, pchoice); | ||
| 729 | addinstruction(compst, IFail, 0); | ||
| 730 | jumptohere(compst, pcommit); | ||
| 731 | } | ||
| 732 | } | ||
| 733 | |||
| 734 | |||
| 735 | /* | ||
| 736 | ** Captures: if pattern has fixed (and not too big) length, use | ||
| 737 | ** a single IFullCapture instruction after the match; otherwise, | ||
| 738 | ** enclose the pattern with OpenCapture - CloseCapture. | ||
| 739 | */ | ||
| 740 | static void codecapture (CompileState *compst, TTree *tree, int tt, | ||
| 741 | const Charset *fl) { | ||
| 742 | int len = fixedlen(sib1(tree)); | ||
| 743 | if (len >= 0 && len <= MAXOFF && !hascaptures(sib1(tree))) { | ||
| 744 | codegen(compst, sib1(tree), 0, tt, fl); | ||
| 745 | addinstcap(compst, IFullCapture, tree->cap, tree->key, len); | ||
| 746 | } | ||
| 747 | else { | ||
| 748 | addinstcap(compst, IOpenCapture, tree->cap, tree->key, 0); | ||
| 749 | codegen(compst, sib1(tree), 0, tt, fl); | ||
| 750 | addinstcap(compst, ICloseCapture, Cclose, 0, 0); | ||
| 751 | } | ||
| 752 | } | ||
| 753 | |||
| 754 | |||
| 755 | static void coderuntime (CompileState *compst, TTree *tree, int tt) { | ||
| 756 | addinstcap(compst, IOpenCapture, Cgroup, tree->key, 0); | ||
| 757 | codegen(compst, sib1(tree), 0, tt, fullset); | ||
| 758 | addinstcap(compst, ICloseRunTime, Cclose, 0, 0); | ||
| 759 | } | ||
| 760 | |||
| 761 | |||
| 762 | /* | ||
| 763 | ** Repetion; optimizations: | ||
| 764 | ** When pattern is a charset, can use special instruction ISpan. | ||
| 765 | ** When pattern is head fail, or if it starts with characters that | ||
| 766 | ** are disjoint from what follows the repetions, a simple test | ||
| 767 | ** is enough (a fail inside the repetition would backtrack to fail | ||
| 768 | ** again in the following pattern, so there is no need for a choice). | ||
| 769 | ** When 'opt' is true, the repetion can reuse the Choice already | ||
| 770 | ** active in the stack. | ||
| 771 | */ | ||
| 772 | static void coderep (CompileState *compst, TTree *tree, int opt, | ||
| 773 | const Charset *fl) { | ||
| 774 | Charset st; | ||
| 775 | if (tocharset(tree, &st)) { | ||
| 776 | addinstruction(compst, ISpan, 0); | ||
| 777 | addcharset(compst, st.cs); | ||
| 778 | } | ||
| 779 | else { | ||
| 780 | int e1 = getfirst(tree, fullset, &st); | ||
| 781 | if (headfail(tree) || (!e1 && cs_disjoint(&st, fl))) { | ||
| 782 | /* L1: test (fail(p1)) -> L2; <p>; jmp L1; L2: */ | ||
| 783 | int jmp; | ||
| 784 | int test = codetestset(compst, &st, 0); | ||
| 785 | codegen(compst, tree, opt, test, fullset); | ||
| 786 | jmp = addoffsetinst(compst, IJmp); | ||
| 787 | jumptohere(compst, test); | ||
| 788 | jumptothere(compst, jmp, test); | ||
| 789 | } | ||
| 790 | else { | ||
| 791 | /* test(fail(p1)) -> L2; choice L2; L1: <p>; partialcommit L1; L2: */ | ||
| 792 | /* or (if 'opt'): partialcommit L1; L1: <p>; partialcommit L1; */ | ||
| 793 | int commit, l2; | ||
| 794 | int test = codetestset(compst, &st, e1); | ||
| 795 | int pchoice = NOINST; | ||
| 796 | if (opt) | ||
| 797 | jumptohere(compst, addoffsetinst(compst, IPartialCommit)); | ||
| 798 | else | ||
| 799 | pchoice = addoffsetinst(compst, IChoice); | ||
| 800 | l2 = gethere(compst); | ||
| 801 | codegen(compst, tree, 0, NOINST, fullset); | ||
| 802 | commit = addoffsetinst(compst, IPartialCommit); | ||
| 803 | jumptothere(compst, commit, l2); | ||
| 804 | jumptohere(compst, pchoice); | ||
| 805 | jumptohere(compst, test); | ||
| 806 | } | ||
| 807 | } | ||
| 808 | } | ||
| 809 | |||
| 810 | |||
| 811 | /* | ||
| 812 | ** Not predicate; optimizations: | ||
| 813 | ** In any case, if first test fails, 'not' succeeds, so it can jump to | ||
| 814 | ** the end. If pattern is headfail, that is all (it cannot fail | ||
| 815 | ** in other parts); this case includes 'not' of simple sets. Otherwise, | ||
| 816 | ** use the default code (a choice plus a failtwice). | ||
| 817 | */ | ||
| 818 | static void codenot (CompileState *compst, TTree *tree) { | ||
| 819 | Charset st; | ||
| 820 | int e = getfirst(tree, fullset, &st); | ||
| 821 | int test = codetestset(compst, &st, e); | ||
| 822 | if (headfail(tree)) /* test (fail(p1)) -> L1; fail; L1: */ | ||
| 823 | addinstruction(compst, IFail, 0); | ||
| 824 | else { | ||
| 825 | /* test(fail(p))-> L1; choice L1; <p>; failtwice; L1: */ | ||
| 826 | int pchoice = addoffsetinst(compst, IChoice); | ||
| 827 | codegen(compst, tree, 0, NOINST, fullset); | ||
| 828 | addinstruction(compst, IFailTwice, 0); | ||
| 829 | jumptohere(compst, pchoice); | ||
| 830 | } | ||
| 831 | jumptohere(compst, test); | ||
| 832 | } | ||
| 833 | |||
| 834 | |||
| 835 | /* | ||
| 836 | ** change open calls to calls, using list 'positions' to find | ||
| 837 | ** correct offsets; also optimize tail calls | ||
| 838 | */ | ||
| 839 | static void correctcalls (CompileState *compst, int *positions, | ||
| 840 | int from, int to) { | ||
| 841 | int i; | ||
| 842 | Instruction *code = compst->p->code; | ||
| 843 | for (i = from; i < to; i += sizei(&code[i])) { | ||
| 844 | if (code[i].i.code == IOpenCall) { | ||
| 845 | int n = code[i].i.key; /* rule number */ | ||
| 846 | int rule = positions[n]; /* rule position */ | ||
| 847 | assert(rule == from || code[rule - 1].i.code == IRet); | ||
| 848 | if (code[finaltarget(code, i + 2)].i.code == IRet) /* call; ret ? */ | ||
| 849 | code[i].i.code = IJmp; /* tail call */ | ||
| 850 | else | ||
| 851 | code[i].i.code = ICall; | ||
| 852 | jumptothere(compst, i, rule); /* call jumps to respective rule */ | ||
| 853 | } | ||
| 854 | } | ||
| 855 | assert(i == to); | ||
| 856 | } | ||
| 857 | |||
| 858 | |||
| 859 | /* | ||
| 860 | ** Code for a grammar: | ||
| 861 | ** call L1; jmp L2; L1: rule 1; ret; rule 2; ret; ...; L2: | ||
| 862 | */ | ||
| 863 | static void codegrammar (CompileState *compst, TTree *grammar) { | ||
| 864 | int positions[MAXRULES]; | ||
| 865 | int rulenumber = 0; | ||
| 866 | TTree *rule; | ||
| 867 | int firstcall = addoffsetinst(compst, ICall); /* call initial rule */ | ||
| 868 | int jumptoend = addoffsetinst(compst, IJmp); /* jump to the end */ | ||
| 869 | int start = gethere(compst); /* here starts the initial rule */ | ||
| 870 | jumptohere(compst, firstcall); | ||
| 871 | for (rule = sib1(grammar); rule->tag == TRule; rule = sib2(rule)) { | ||
| 872 | positions[rulenumber++] = gethere(compst); /* save rule position */ | ||
| 873 | codegen(compst, sib1(rule), 0, NOINST, fullset); /* code rule */ | ||
| 874 | addinstruction(compst, IRet, 0); | ||
| 875 | } | ||
| 876 | assert(rule->tag == TTrue); | ||
| 877 | jumptohere(compst, jumptoend); | ||
| 878 | correctcalls(compst, positions, start, gethere(compst)); | ||
| 879 | } | ||
| 880 | |||
| 881 | |||
| 882 | static void codecall (CompileState *compst, TTree *call) { | ||
| 883 | int c = addoffsetinst(compst, IOpenCall); /* to be corrected later */ | ||
| 884 | getinstr(compst, c).i.key = sib2(call)->cap; /* rule number */ | ||
| 885 | assert(sib2(call)->tag == TRule); | ||
| 886 | } | ||
| 887 | |||
| 888 | |||
| 889 | /* | ||
| 890 | ** Code first child of a sequence | ||
| 891 | ** (second child is called in-place to allow tail call) | ||
| 892 | ** Return 'tt' for second child | ||
| 893 | */ | ||
| 894 | static int codeseq1 (CompileState *compst, TTree *p1, TTree *p2, | ||
| 895 | int tt, const Charset *fl) { | ||
| 896 | if (needfollow(p1)) { | ||
| 897 | Charset fl1; | ||
| 898 | getfirst(p2, fl, &fl1); /* p1 follow is p2 first */ | ||
| 899 | codegen(compst, p1, 0, tt, &fl1); | ||
| 900 | } | ||
| 901 | else /* use 'fullset' as follow */ | ||
| 902 | codegen(compst, p1, 0, tt, fullset); | ||
| 903 | if (fixedlen(p1) != 0) /* can 'p1' consume anything? */ | ||
| 904 | return NOINST; /* invalidate test */ | ||
| 905 | else return tt; /* else 'tt' still protects sib2 */ | ||
| 906 | } | ||
| 907 | |||
| 908 | |||
| 909 | /* | ||
| 910 | ** Main code-generation function: dispatch to auxiliar functions | ||
| 911 | ** according to kind of tree | ||
| 912 | */ | ||
| 913 | static void codegen (CompileState *compst, TTree *tree, int opt, int tt, | ||
| 914 | const Charset *fl) { | ||
| 915 | tailcall: | ||
| 916 | switch (tree->tag) { | ||
| 917 | case TChar: codechar(compst, tree->u.n, tt); break; | ||
| 918 | case TAny: addinstruction(compst, IAny, 0); break; | ||
| 919 | case TSet: codecharset(compst, treebuffer(tree), tt); break; | ||
| 920 | case TTrue: break; | ||
| 921 | case TFalse: addinstruction(compst, IFail, 0); break; | ||
| 922 | case TChoice: codechoice(compst, sib1(tree), sib2(tree), opt, fl); break; | ||
| 923 | case TRep: coderep(compst, sib1(tree), opt, fl); break; | ||
| 924 | case TBehind: codebehind(compst, tree); break; | ||
| 925 | case TNot: codenot(compst, sib1(tree)); break; | ||
| 926 | case TAnd: codeand(compst, sib1(tree), tt); break; | ||
| 927 | case TCapture: codecapture(compst, tree, tt, fl); break; | ||
| 928 | case TRunTime: coderuntime(compst, tree, tt); break; | ||
| 929 | case TGrammar: codegrammar(compst, tree); break; | ||
| 930 | case TCall: codecall(compst, tree); break; | ||
| 931 | case TSeq: { | ||
| 932 | tt = codeseq1(compst, sib1(tree), sib2(tree), tt, fl); /* code 'p1' */ | ||
| 933 | /* codegen(compst, p2, opt, tt, fl); */ | ||
| 934 | tree = sib2(tree); goto tailcall; | ||
| 935 | } | ||
| 936 | case TThrow: { /* labeled failure */ | ||
| 937 | addthrowinstruction(compst, tree->labels); | ||
| 938 | break; | ||
| 939 | } | ||
| 940 | case TLabChoice: { /* labeled failure */ | ||
| 941 | codelabchoice(compst, sib1(tree), sib2(tree), opt, fl, tree->labels); | ||
| 942 | break; | ||
| 943 | } | ||
| 944 | default: assert(0); | ||
| 945 | } | ||
| 946 | } | ||
| 947 | |||
| 948 | |||
| 949 | /* | ||
| 950 | ** Optimize jumps and other jump-like instructions. | ||
| 951 | ** * Update labels of instructions with labels to their final | ||
| 952 | ** destinations (e.g., choice L1; ... L1: jmp L2: becomes | ||
| 953 | ** choice L2) | ||
| 954 | ** * Jumps to other instructions that do jumps become those | ||
| 955 | ** instructions (e.g., jump to return becomes a return; jump | ||
| 956 | ** to commit becomes a commit) | ||
| 957 | */ | ||
| 958 | static void peephole (CompileState *compst) { | ||
| 959 | Instruction *code = compst->p->code; | ||
| 960 | int i; | ||
| 961 | for (i = 0; i < compst->ncode; i += sizei(&code[i])) { | ||
| 962 | switch (code[i].i.code) { | ||
| 963 | case IChoice: case ICall: case ICommit: case IPartialCommit: | ||
| 964 | case IBackCommit: case ITestChar: case ITestSet: case ILabChoice: /* labeled failure */ | ||
| 965 | case ITestAny: { /* instructions with labels */ | ||
| 966 | jumptothere(compst, i, finallabel(code, i)); /* optimize label */ | ||
| 967 | break; | ||
| 968 | } | ||
| 969 | case IJmp: { | ||
| 970 | int ft = finaltarget(code, i); | ||
| 971 | switch (code[ft].i.code) { /* jumping to what? */ | ||
| 972 | case IRet: case IFail: case IFailTwice: | ||
| 973 | case IEnd: { /* instructions with unconditional implicit jumps */ | ||
| 974 | code[i] = code[ft]; /* jump becomes that instruction */ | ||
| 975 | code[i + 1].i.code = IAny; /* 'no-op' for target position */ | ||
| 976 | break; | ||
| 977 | } | ||
| 978 | case ICommit: case IPartialCommit: | ||
| 979 | case IBackCommit: { /* inst. with unconditional explicit jumps */ | ||
| 980 | int fft = finallabel(code, ft); | ||
| 981 | code[i] = code[ft]; /* jump becomes that instruction... */ | ||
| 982 | jumptothere(compst, i, fft); /* but must correct its offset */ | ||
| 983 | i--; /* reoptimize its label */ | ||
| 984 | break; | ||
| 985 | } | ||
| 986 | default: { | ||
| 987 | jumptothere(compst, i, ft); /* optimize label */ | ||
| 988 | break; | ||
| 989 | } | ||
| 990 | } | ||
| 991 | break; | ||
| 992 | } | ||
| 993 | default: break; | ||
| 994 | } | ||
| 995 | } | ||
| 996 | assert(code[i - 1].i.code == IEnd); | ||
| 997 | } | ||
| 998 | |||
| 999 | |||
| 1000 | /* | ||
| 1001 | ** Compile a pattern | ||
| 1002 | */ | ||
| 1003 | Instruction *compile (lua_State *L, Pattern *p) { | ||
| 1004 | CompileState compst; | ||
| 1005 | compst.p = p; compst.ncode = 0; compst.L = L; | ||
| 1006 | reallocprog(L, p, 2); /* minimum initial size */ | ||
| 1007 | codegen(&compst, p->tree, 0, NOINST, fullset); | ||
| 1008 | addinstruction(&compst, IEnd, 0); | ||
| 1009 | reallocprog(L, p, compst.ncode); /* set final size */ | ||
| 1010 | peephole(&compst); /* labeled failure */ | ||
| 1011 | return p->code; | ||
| 1012 | } | ||
| 1013 | |||
| 1014 | |||
| 1015 | /* }====================================================== */ | ||
| 1016 | |||
diff --git a/lpcode.h b/lpcode.h new file mode 100644 index 0000000..5c9d54f --- /dev/null +++ b/lpcode.h | |||
| @@ -0,0 +1,34 @@ | |||
| 1 | /* | ||
| 2 | ** $Id: lpcode.h,v 1.5 2013/04/04 21:24:45 roberto Exp $ | ||
| 3 | */ | ||
| 4 | |||
| 5 | #if !defined(lpcode_h) | ||
| 6 | #define lpcode_h | ||
| 7 | |||
| 8 | #include "lua.h" | ||
| 9 | |||
| 10 | #include "lptypes.h" | ||
| 11 | #include "lptree.h" | ||
| 12 | #include "lpvm.h" | ||
| 13 | |||
| 14 | int tocharset (TTree *tree, Charset *cs); | ||
| 15 | int checkaux (TTree *tree, int pred); | ||
| 16 | int fixedlenx (TTree *tree, int count, int len); | ||
| 17 | int hascaptures (TTree *tree); | ||
| 18 | int lp_gc (lua_State *L); | ||
| 19 | Instruction *compile (lua_State *L, Pattern *p); | ||
| 20 | void reallocprog (lua_State *L, Pattern *p, int nsize); | ||
| 21 | int sizei (const Instruction *i); | ||
| 22 | |||
| 23 | |||
| 24 | #define PEnullable 0 | ||
| 25 | #define PEnofail 1 | ||
| 26 | |||
| 27 | #define nofail(t) checkaux(t, PEnofail) | ||
| 28 | #define nullable(t) checkaux(t, PEnullable) | ||
| 29 | |||
| 30 | #define fixedlen(t) fixedlenx(t, 0, 0) | ||
| 31 | |||
| 32 | |||
| 33 | |||
| 34 | #endif | ||
diff --git a/lpprint.c b/lpprint.c new file mode 100644 index 0000000..03239fd --- /dev/null +++ b/lpprint.c | |||
| @@ -0,0 +1,255 @@ | |||
| 1 | /* | ||
| 2 | ** $Id: lpprint.c,v 1.7 2013/04/12 16:29:49 roberto Exp $ | ||
| 3 | ** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license) | ||
| 4 | */ | ||
| 5 | |||
| 6 | #include <ctype.h> | ||
| 7 | #include <limits.h> | ||
| 8 | #include <stdio.h> | ||
| 9 | |||
| 10 | |||
| 11 | #include "lptypes.h" | ||
| 12 | #include "lpprint.h" | ||
| 13 | #include "lpcode.h" | ||
| 14 | |||
| 15 | |||
| 16 | #if defined(LPEG_DEBUG) | ||
| 17 | |||
| 18 | /* | ||
| 19 | ** {====================================================== | ||
| 20 | ** Printing patterns (for debugging) | ||
| 21 | ** ======================================================= | ||
| 22 | */ | ||
| 23 | |||
| 24 | |||
| 25 | void printcharset (const byte *st) { | ||
| 26 | int i; | ||
| 27 | printf("["); | ||
| 28 | for (i = 0; i <= UCHAR_MAX; i++) { | ||
| 29 | int first = i; | ||
| 30 | while (testchar(st, i) && i <= UCHAR_MAX) i++; | ||
| 31 | if (i - 1 == first) /* unary range? */ | ||
| 32 | printf("(%02x)", first); | ||
| 33 | else if (i - 1 > first) /* non-empty range? */ | ||
| 34 | printf("(%02x-%02x)", first, i - 1); | ||
| 35 | } | ||
| 36 | printf("]"); | ||
| 37 | } | ||
| 38 | |||
| 39 | |||
| 40 | static void printcapkind (int kind) { | ||
| 41 | const char *const modes[] = { | ||
| 42 | "close", "position", "constant", "backref", | ||
| 43 | "argument", "simple", "table", "function", | ||
| 44 | "query", "string", "num", "substitution", "fold", | ||
| 45 | "runtime", "group"}; | ||
| 46 | printf("%s", modes[kind]); | ||
| 47 | } | ||
| 48 | |||
| 49 | |||
| 50 | static void printjmp (const Instruction *op, const Instruction *p) { | ||
| 51 | printf("-> %d", (int)(p + (p + 1)->offset - op)); | ||
| 52 | } | ||
| 53 | |||
| 54 | |||
| 55 | void printinst (const Instruction *op, const Instruction *p) { | ||
| 56 | const char *const names[] = { | ||
| 57 | "any", "char", "set", | ||
| 58 | "testany", "testchar", "testset", | ||
| 59 | "span", "behind", | ||
| 60 | "ret", "end", | ||
| 61 | "choice", "jmp", "call", "open_call", | ||
| 62 | "commit", "partial_commit", "back_commit", "failtwice", "fail", "giveup", | ||
| 63 | "fullcapture", "opencapture", "closecapture", "closeruntime", | ||
| 64 | "throw", "labeled_choice" /* labeled failure */ | ||
| 65 | }; | ||
| 66 | printf("%02ld: %s ", (long)(p - op), names[p->i.code]); | ||
| 67 | switch ((Opcode)p->i.code) { | ||
| 68 | case IChar: { | ||
| 69 | printf("'%c'", p->i.aux); | ||
| 70 | break; | ||
| 71 | } | ||
| 72 | case ITestChar: { | ||
| 73 | printf("'%c'", p->i.aux); printjmp(op, p); | ||
| 74 | break; | ||
| 75 | } | ||
| 76 | case IFullCapture: { | ||
| 77 | printcapkind(getkind(p)); | ||
| 78 | printf(" (size = %d) (idx = %d)", getoff(p), p->i.key); | ||
| 79 | break; | ||
| 80 | } | ||
| 81 | case IOpenCapture: { | ||
| 82 | printcapkind(getkind(p)); | ||
| 83 | printf(" (idx = %d)", p->i.key); | ||
| 84 | break; | ||
| 85 | } | ||
| 86 | case ISet: { | ||
| 87 | printcharset((p+1)->buff); | ||
| 88 | break; | ||
| 89 | } | ||
| 90 | case ITestSet: { | ||
| 91 | printcharset((p+2)->buff); printjmp(op, p); | ||
| 92 | break; | ||
| 93 | } | ||
| 94 | case ISpan: { | ||
| 95 | printcharset((p+1)->buff); | ||
| 96 | break; | ||
| 97 | } | ||
| 98 | case IOpenCall: { | ||
| 99 | printf("-> %d", (p + 1)->offset); | ||
| 100 | break; | ||
| 101 | } | ||
| 102 | case IBehind: { | ||
| 103 | printf("%d", p->i.aux); | ||
| 104 | break; | ||
| 105 | } | ||
| 106 | case IJmp: case ICall: case ICommit: case IChoice: | ||
| 107 | case IPartialCommit: case IBackCommit: case ITestAny: { | ||
| 108 | printjmp(op, p); | ||
| 109 | break; | ||
| 110 | } | ||
| 111 | case IThrow: { /* labeled failure */ | ||
| 112 | printf("%d", (p + 1)->labels); | ||
| 113 | break; | ||
| 114 | } | ||
| 115 | case ILabChoice: { /* labeled failure */ | ||
| 116 | printjmp(op, p); | ||
| 117 | printf(" %d", (p + 2)->labels); | ||
| 118 | break; | ||
| 119 | } | ||
| 120 | |||
| 121 | default: break; | ||
| 122 | } | ||
| 123 | printf("\n"); | ||
| 124 | } | ||
| 125 | |||
| 126 | |||
| 127 | void printpatt (Instruction *p, int n) { | ||
| 128 | Instruction *op = p; | ||
| 129 | while (p < op + n) { | ||
| 130 | printinst(op, p); | ||
| 131 | p += sizei(p); | ||
| 132 | } | ||
| 133 | } | ||
| 134 | |||
| 135 | |||
| 136 | #if defined(LPEG_DEBUG) | ||
| 137 | static void printcap (Capture *cap) { | ||
| 138 | printcapkind(cap->kind); | ||
| 139 | printf(" (idx: %d - size: %d) -> %p\n", cap->idx, cap->siz, cap->s); | ||
| 140 | } | ||
| 141 | |||
| 142 | |||
| 143 | void printcaplist (Capture *cap, Capture *limit) { | ||
| 144 | printf(">======\n"); | ||
| 145 | for (; cap->s && (limit == NULL || cap < limit); cap++) | ||
| 146 | printcap(cap); | ||
| 147 | printf("=======\n"); | ||
| 148 | } | ||
| 149 | #endif | ||
| 150 | |||
| 151 | /* }====================================================== */ | ||
| 152 | |||
| 153 | |||
| 154 | /* | ||
| 155 | ** {====================================================== | ||
| 156 | ** Printing trees (for debugging) | ||
| 157 | ** ======================================================= | ||
| 158 | */ | ||
| 159 | |||
| 160 | static const char *tagnames[] = { | ||
| 161 | "char", "set", "any", | ||
| 162 | "true", "false", | ||
| 163 | "rep", | ||
| 164 | "seq", "choice", | ||
| 165 | "not", "and", | ||
| 166 | "call", "opencall", "rule", "grammar", | ||
| 167 | "behind", | ||
| 168 | "capture", "run-time" | ||
| 169 | }; | ||
| 170 | |||
| 171 | |||
| 172 | void printtree (TTree *tree, int ident) { | ||
| 173 | int i; | ||
| 174 | for (i = 0; i < ident; i++) printf(" "); | ||
| 175 | printf("%s", tagnames[tree->tag]); | ||
| 176 | switch (tree->tag) { | ||
| 177 | case TChar: { | ||
| 178 | int c = tree->u.n; | ||
| 179 | if (isprint(c)) | ||
| 180 | printf(" '%c'\n", c); | ||
| 181 | else | ||
| 182 | printf(" (%02X)\n", c); | ||
| 183 | break; | ||
| 184 | } | ||
| 185 | case TSet: { | ||
| 186 | printcharset(treebuffer(tree)); | ||
| 187 | printf("\n"); | ||
| 188 | break; | ||
| 189 | } | ||
| 190 | case TOpenCall: case TCall: { | ||
| 191 | printf(" key: %d\n", tree->key); | ||
| 192 | break; | ||
| 193 | } | ||
| 194 | case TBehind: { | ||
| 195 | printf(" %d\n", tree->u.n); | ||
| 196 | printtree(sib1(tree), ident + 2); | ||
| 197 | break; | ||
| 198 | } | ||
| 199 | case TCapture: { | ||
| 200 | printf(" cap: %d key: %d n: %d\n", tree->cap, tree->key, tree->u.n); | ||
| 201 | printtree(sib1(tree), ident + 2); | ||
| 202 | break; | ||
| 203 | } | ||
| 204 | case TRule: { | ||
| 205 | printf(" n: %d key: %d\n", tree->cap, tree->key); | ||
| 206 | printtree(sib1(tree), ident + 2); | ||
| 207 | break; /* do not print next rule as a sibling */ | ||
| 208 | } | ||
| 209 | case TGrammar: { | ||
| 210 | TTree *rule = sib1(tree); | ||
| 211 | printf(" %d\n", tree->u.n); /* number of rules */ | ||
| 212 | for (i = 0; i < tree->u.n; i++) { | ||
| 213 | printtree(rule, ident + 2); | ||
| 214 | rule = sib2(rule); | ||
| 215 | } | ||
| 216 | assert(rule->tag == TTrue); /* sentinel */ | ||
| 217 | break; | ||
| 218 | } | ||
| 219 | default: { | ||
| 220 | int sibs = numsiblings[tree->tag]; | ||
| 221 | printf("\n"); | ||
| 222 | if (sibs >= 1) { | ||
| 223 | printtree(sib1(tree), ident + 2); | ||
| 224 | if (sibs >= 2) | ||
| 225 | printtree(sib2(tree), ident + 2); | ||
| 226 | } | ||
| 227 | break; | ||
| 228 | } | ||
| 229 | } | ||
| 230 | } | ||
| 231 | |||
| 232 | |||
| 233 | void printktable (lua_State *L, int idx) { | ||
| 234 | int n, i; | ||
| 235 | lua_getfenv(L, idx); | ||
| 236 | if (lua_isnil(L, -1)) /* no ktable? */ | ||
| 237 | return; | ||
| 238 | n = lua_objlen(L, -1); | ||
| 239 | printf("["); | ||
| 240 | for (i = 1; i <= n; i++) { | ||
| 241 | printf("%d = ", i); | ||
| 242 | lua_rawgeti(L, -1, i); | ||
| 243 | if (lua_isstring(L, -1)) | ||
| 244 | printf("%s ", lua_tostring(L, -1)); | ||
| 245 | else | ||
| 246 | printf("%s ", lua_typename(L, lua_type(L, -1))); | ||
| 247 | lua_pop(L, 1); | ||
| 248 | } | ||
| 249 | printf("]\n"); | ||
| 250 | /* leave ktable at the stack */ | ||
| 251 | } | ||
| 252 | |||
| 253 | /* }====================================================== */ | ||
| 254 | |||
| 255 | #endif | ||
diff --git a/lpprint.h b/lpprint.h new file mode 100644 index 0000000..6cbe47d --- /dev/null +++ b/lpprint.h | |||
| @@ -0,0 +1,37 @@ | |||
| 1 | /* | ||
| 2 | ** $Id: lpprint.h,v 1.1 2013/03/21 20:25:12 roberto Exp $ | ||
| 3 | */ | ||
| 4 | |||
| 5 | |||
| 6 | #if !defined(lpprint_h) | ||
| 7 | #define lpprint_h | ||
| 8 | |||
| 9 | |||
| 10 | #include "lptree.h" | ||
| 11 | #include "lpvm.h" | ||
| 12 | |||
| 13 | |||
| 14 | #if defined(LPEG_DEBUG) | ||
| 15 | |||
| 16 | void printpatt (Instruction *p, int n); | ||
| 17 | void printtree (TTree *tree, int ident); | ||
| 18 | void printktable (lua_State *L, int idx); | ||
| 19 | void printcharset (const byte *st); | ||
| 20 | void printcaplist (Capture *cap, Capture *limit); | ||
| 21 | void printinst (const Instruction *op, const Instruction *p); | ||
| 22 | |||
| 23 | |||
| 24 | #else | ||
| 25 | |||
| 26 | #define printktable(L,idx) \ | ||
| 27 | luaL_error(L, "function only implemented in debug mode") | ||
| 28 | #define printtree(tree,i) \ | ||
| 29 | luaL_error(L, "function only implemented in debug mode") | ||
| 30 | #define printpatt(p,n) \ | ||
| 31 | luaL_error(L, "function only implemented in debug mode") | ||
| 32 | |||
| 33 | #endif | ||
| 34 | |||
| 35 | |||
| 36 | #endif | ||
| 37 | |||
diff --git a/lptree.c b/lptree.c new file mode 100644 index 0000000..5d2933d --- /dev/null +++ b/lptree.c | |||
| @@ -0,0 +1,1282 @@ | |||
| 1 | /* | ||
| 2 | ** $Id: lptree.c,v 1.10 2013/04/12 16:30:33 roberto Exp $ | ||
| 3 | ** Copyright 2013, Lua.org & PUC-Rio (see 'lpeg.html' for license) | ||
| 4 | */ | ||
| 5 | |||
| 6 | #include <ctype.h> | ||
| 7 | #include <limits.h> | ||
| 8 | #include <string.h> | ||
| 9 | |||
| 10 | |||
| 11 | #include "lua.h" | ||
| 12 | #include "lauxlib.h" | ||
| 13 | |||
| 14 | #include "lptypes.h" | ||
| 15 | #include "lpcap.h" | ||
| 16 | #include "lpcode.h" | ||
| 17 | #include "lpprint.h" | ||
| 18 | #include "lptree.h" | ||
| 19 | |||
| 20 | |||
| 21 | /* number of siblings for each tree */ | ||
| 22 | const byte numsiblings[] = { | ||
| 23 | 0, 0, 0, /* char, set, any */ | ||
| 24 | 0, 0, /* true, false */ | ||
| 25 | 1, /* rep */ | ||
| 26 | 2, 2, /* seq, choice */ | ||
| 27 | 1, 1, /* not, and */ | ||
| 28 | 0, 0, 2, 1, /* call, opencall, rule, grammar */ | ||
| 29 | 1, /* behind */ | ||
| 30 | 1, 1, /* capture, runtime capture */ | ||
| 31 | 0, 2 /* labeled failure throw, labeled choice */ | ||
| 32 | }; | ||
| 33 | |||
| 34 | |||
| 35 | static TTree *newgrammar (lua_State *L, int arg); | ||
| 36 | |||
| 37 | |||
| 38 | /* | ||
| 39 | ** returns a reasonable name for value at index 'idx' on the stack | ||
| 40 | */ | ||
| 41 | static const char *val2str (lua_State *L, int idx) { | ||
| 42 | const char *k = lua_tostring(L, idx); | ||
| 43 | if (k != NULL) | ||
| 44 | return lua_pushfstring(L, "%s", k); | ||
| 45 | else | ||
| 46 | return lua_pushfstring(L, "(a %s)", luaL_typename(L, idx)); | ||
| 47 | } | ||
| 48 | |||
| 49 | |||
| 50 | /* | ||
| 51 | ** Fix a TOpenCall into a TCall node, using table 'postable' to | ||
| 52 | ** translate a key to its rule address in the tree. Raises an | ||
| 53 | ** error if key does not exist. | ||
| 54 | */ | ||
| 55 | static void fixonecall (lua_State *L, int postable, TTree *g, TTree *t) { | ||
| 56 | int n; | ||
| 57 | lua_rawgeti(L, -1, t->key); /* get rule's name */ | ||
| 58 | lua_gettable(L, postable); /* query name in position table */ | ||
| 59 | n = lua_tonumber(L, -1); /* get (absolute) position */ | ||
| 60 | lua_pop(L, 1); /* remove position */ | ||
| 61 | if (n == 0) { /* no position? */ | ||
| 62 | lua_rawgeti(L, -1, t->key); /* get rule's name again */ | ||
| 63 | luaL_error(L, "rule '%s' undefined in given grammar", val2str(L, -1)); | ||
| 64 | } | ||
| 65 | t->tag = TCall; | ||
| 66 | t->u.ps = n - (t - g); /* position relative to node */ | ||
| 67 | assert(sib2(t)->tag == TRule); | ||
| 68 | sib2(t)->key = t->key; | ||
| 69 | } | ||
| 70 | |||
| 71 | |||
| 72 | /* | ||
| 73 | ** Transform left associative constructions into right | ||
| 74 | ** associative ones, for sequence and choice; that is: | ||
| 75 | ** (t11 + t12) + t2 => t11 + (t12 + t2) | ||
| 76 | ** (t11 * t12) * t2 => t11 * (t12 * t2) | ||
| 77 | ** (that is, Op (Op t11 t12) t2 => Op t11 (Op t12 t2)) | ||
| 78 | */ | ||
| 79 | static void correctassociativity (TTree *tree) { | ||
| 80 | TTree *t1 = sib1(tree); | ||
| 81 | assert(tree->tag == TChoice || tree->tag == TSeq); | ||
| 82 | while (t1->tag == tree->tag) { | ||
| 83 | int n1size = tree->u.ps - 1; /* t1 == Op t11 t12 */ | ||
| 84 | int n11size = t1->u.ps - 1; | ||
| 85 | int n12size = n1size - n11size - 1; | ||
| 86 | memmove(sib1(tree), sib1(t1), n11size * sizeof(TTree)); /* move t11 */ | ||
| 87 | tree->u.ps = n11size + 1; | ||
| 88 | sib2(tree)->tag = tree->tag; | ||
| 89 | sib2(tree)->u.ps = n12size + 1; | ||
| 90 | } | ||
| 91 | } | ||
| 92 | |||
| 93 | |||
| 94 | /* | ||
| 95 | ** Make final adjustments in a tree. Fix open calls in tree 't', | ||
| 96 | ** making them refer to their respective rules or raising appropriate | ||
| 97 | ** errors (if not inside a grammar). Correct associativity of associative | ||
| 98 | ** constructions (making them right associative). Assume that tree's | ||
| 99 | ** ktable is at the top of the stack (for error messages). | ||
| 100 | */ | ||
| 101 | static void finalfix (lua_State *L, int postable, TTree *g, TTree *t) { | ||
| 102 | tailcall: | ||
| 103 | switch (t->tag) { | ||
| 104 | case TGrammar: /* subgrammars were already fixed */ | ||
| 105 | return; | ||
| 106 | case TOpenCall: { | ||
| 107 | if (g != NULL) /* inside a grammar? */ | ||
| 108 | fixonecall(L, postable, g, t); | ||
| 109 | else { /* open call outside grammar */ | ||
| 110 | lua_rawgeti(L, -1, t->key); | ||
| 111 | luaL_error(L, "rule '%s' used outside a grammar", val2str(L, -1)); | ||
| 112 | } | ||
| 113 | break; | ||
| 114 | } | ||
| 115 | case TSeq: case TChoice: | ||
| 116 | correctassociativity(t); | ||
| 117 | break; | ||
| 118 | } | ||
| 119 | switch (numsiblings[t->tag]) { | ||
| 120 | case 1: /* finalfix(L, postable, g, sib1(t)); */ | ||
| 121 | t = sib1(t); goto tailcall; | ||
| 122 | case 2: | ||
| 123 | finalfix(L, postable, g, sib1(t)); | ||
| 124 | t = sib2(t); goto tailcall; /* finalfix(L, postable, g, sib2(t)); */ | ||
| 125 | default: assert(numsiblings[t->tag] == 0); break; | ||
| 126 | } | ||
| 127 | } | ||
| 128 | |||
| 129 | |||
| 130 | /* | ||
| 131 | ** {====================================================== | ||
| 132 | ** Tree generation | ||
| 133 | ** ======================================================= | ||
| 134 | */ | ||
| 135 | |||
| 136 | /* | ||
| 137 | ** In 5.2, could use 'luaL_testudata'... | ||
| 138 | */ | ||
| 139 | static int testpattern (lua_State *L, int idx) { | ||
| 140 | if (lua_touserdata(L, idx)) { /* value is a userdata? */ | ||
| 141 | if (lua_getmetatable(L, idx)) { /* does it have a metatable? */ | ||
| 142 | luaL_getmetatable(L, PATTERN_T); | ||
| 143 | if (lua_rawequal(L, -1, -2)) { /* does it have the correct mt? */ | ||
| 144 | lua_pop(L, 2); /* remove both metatables */ | ||
| 145 | return 1; | ||
| 146 | } | ||
| 147 | } | ||
| 148 | } | ||
| 149 | return 0; | ||
| 150 | } | ||
| 151 | |||
| 152 | |||
| 153 | static Pattern *getpattern (lua_State *L, int idx) { | ||
| 154 | return (Pattern *)luaL_checkudata(L, idx, PATTERN_T); | ||
| 155 | } | ||
| 156 | |||
| 157 | |||
| 158 | static int getsize (lua_State *L, int idx) { | ||
| 159 | return (lua_objlen(L, idx) - sizeof(Pattern)) / sizeof(TTree) + 1; | ||
| 160 | } | ||
| 161 | |||
| 162 | |||
| 163 | static TTree *gettree (lua_State *L, int idx, int *len) { | ||
| 164 | Pattern *p = getpattern(L, idx); | ||
| 165 | if (len) | ||
| 166 | *len = getsize(L, idx); | ||
| 167 | return p->tree; | ||
| 168 | } | ||
| 169 | |||
| 170 | |||
| 171 | /* | ||
| 172 | ** create a pattern | ||
| 173 | */ | ||
| 174 | static TTree *newtree (lua_State *L, int len) { | ||
| 175 | size_t size = (len - 1) * sizeof(TTree) + sizeof(Pattern); | ||
| 176 | Pattern *p = (Pattern *)lua_newuserdata(L, size); | ||
| 177 | luaL_getmetatable(L, PATTERN_T); | ||
| 178 | lua_setmetatable(L, -2); | ||
| 179 | p->code = NULL; p->codesize = 0; | ||
| 180 | return p->tree; | ||
| 181 | } | ||
| 182 | |||
| 183 | |||
| 184 | static TTree *newleaf (lua_State *L, int tag) { | ||
| 185 | TTree *tree = newtree(L, 1); | ||
| 186 | tree->tag = tag; | ||
| 187 | return tree; | ||
| 188 | } | ||
| 189 | |||
| 190 | |||
| 191 | /* labeled failure begin */ | ||
| 192 | static TTree *newlabelleaf (lua_State *L, Labelset ls) { | ||
| 193 | TTree *tree = newtree(L, 1); | ||
| 194 | tree->tag = TThrow; | ||
| 195 | tree->labels = ls; | ||
| 196 | return tree; | ||
| 197 | } | ||
| 198 | /* labeled failure end */ | ||
| 199 | |||
| 200 | |||
| 201 | static TTree *newcharset (lua_State *L) { | ||
| 202 | TTree *tree = newtree(L, bytes2slots(CHARSETSIZE) + 1); | ||
| 203 | tree->tag = TSet; | ||
| 204 | loopset(i, treebuffer(tree)[i] = 0); | ||
| 205 | return tree; | ||
| 206 | } | ||
| 207 | |||
| 208 | |||
| 209 | /* | ||
| 210 | ** add to tree a sequence where first sibling is 'sib' (with size | ||
| 211 | ** 'sibsize'); returns position for second sibling | ||
| 212 | */ | ||
| 213 | static TTree *seqaux (TTree *tree, TTree *sib, int sibsize) { | ||
| 214 | tree->tag = TSeq; tree->u.ps = sibsize + 1; | ||
| 215 | memcpy(sib1(tree), sib, sibsize * sizeof(TTree)); | ||
| 216 | return sib2(tree); | ||
| 217 | } | ||
| 218 | |||
| 219 | |||
| 220 | /* | ||
| 221 | ** Add element 'idx' to 'ktable' of pattern at the top of the stack; | ||
| 222 | ** create new 'ktable' if necessary. Return index of new element. | ||
| 223 | */ | ||
| 224 | static int addtoktable (lua_State *L, int idx) { | ||
| 225 | if (idx == 0 || lua_isnil(L, idx)) /* no actual value to insert? */ | ||
| 226 | return 0; | ||
| 227 | else { | ||
| 228 | int n; | ||
| 229 | lua_getfenv(L, -1); /* get ktable from pattern */ | ||
| 230 | n = lua_objlen(L, -1); | ||
| 231 | if (n == 0) { /* is it empty/non-existent? */ | ||
| 232 | lua_pop(L, 1); /* remove it */ | ||
| 233 | lua_createtable(L, 1, 0); /* create a fresh table */ | ||
| 234 | } | ||
| 235 | lua_pushvalue(L, idx); /* element to be added */ | ||
| 236 | lua_rawseti(L, -2, n + 1); | ||
| 237 | lua_setfenv(L, -2); /* set it as ktable for pattern */ | ||
| 238 | return n + 1; | ||
| 239 | } | ||
| 240 | } | ||
| 241 | |||
| 242 | |||
| 243 | /* | ||
| 244 | ** Build a sequence of 'n' nodes, each with tag 'tag' and 'u.n' got | ||
| 245 | ** from the array 's' (or 0 if array is NULL). (TSeq is binary, so it | ||
| 246 | ** must build a sequence of sequence of sequence...) | ||
| 247 | */ | ||
| 248 | static void fillseq (TTree *tree, int tag, int n, const char *s) { | ||
| 249 | int i; | ||
| 250 | for (i = 0; i < n - 1; i++) { /* initial n-1 copies of Seq tag; Seq ... */ | ||
| 251 | tree->tag = TSeq; tree->u.ps = 2; | ||
| 252 | sib1(tree)->tag = tag; | ||
| 253 | sib1(tree)->u.n = s ? (byte)s[i] : 0; | ||
| 254 | tree = sib2(tree); | ||
| 255 | } | ||
| 256 | tree->tag = tag; /* last one does not need TSeq */ | ||
| 257 | tree->u.n = s ? (byte)s[i] : 0; | ||
| 258 | } | ||
| 259 | |||
| 260 | |||
| 261 | /* | ||
| 262 | ** Numbers as patterns: | ||
| 263 | ** 0 == true (always match); n == TAny repeated 'n' times; | ||
| 264 | ** -n == not (TAny repeated 'n' times) | ||
| 265 | */ | ||
| 266 | static TTree *numtree (lua_State *L, int n) { | ||
| 267 | if (n == 0) | ||
| 268 | return newleaf(L, TTrue); | ||
| 269 | else { | ||
| 270 | TTree *tree, *nd; | ||
| 271 | if (n > 0) | ||
| 272 | tree = nd = newtree(L, 2 * n - 1); | ||
| 273 | else { /* negative: code it as !(-n) */ | ||
| 274 | n = -n; | ||
| 275 | tree = newtree(L, 2 * n); | ||
| 276 | tree->tag = TNot; | ||
| 277 | nd = sib1(tree); | ||
| 278 | } | ||
| 279 | fillseq(nd, TAny, n, NULL); /* sequence of 'n' any's */ | ||
| 280 | return tree; | ||
| 281 | } | ||
| 282 | } | ||
| 283 | |||
| 284 | |||
| 285 | /* | ||
| 286 | ** Convert value at index 'idx' to a pattern | ||
| 287 | */ | ||
| 288 | static TTree *getpatt (lua_State *L, int idx, int *len) { | ||
| 289 | TTree *tree; | ||
| 290 | switch (lua_type(L, idx)) { | ||
| 291 | case LUA_TSTRING: { | ||
| 292 | size_t slen; | ||
| 293 | const char *s = lua_tolstring(L, idx, &slen); /* get string */ | ||
| 294 | if (slen == 0) /* empty? */ | ||
| 295 | tree = newleaf(L, TTrue); /* always match */ | ||
| 296 | else { | ||
| 297 | tree = newtree(L, 2 * (slen - 1) + 1); | ||
| 298 | fillseq(tree, TChar, slen, s); /* sequence of 'slen' chars */ | ||
| 299 | } | ||
| 300 | break; | ||
| 301 | } | ||
| 302 | case LUA_TNUMBER: { | ||
| 303 | int n = lua_tointeger(L, idx); | ||
| 304 | tree = numtree(L, n); | ||
| 305 | break; | ||
| 306 | } | ||
| 307 | case LUA_TBOOLEAN: { | ||
| 308 | tree = (lua_toboolean(L, idx) ? newleaf(L, TTrue) : newleaf(L, TFalse)); | ||
| 309 | break; | ||
| 310 | } | ||
| 311 | case LUA_TTABLE: { | ||
| 312 | tree = newgrammar(L, idx); | ||
| 313 | break; | ||
| 314 | } | ||
| 315 | case LUA_TFUNCTION: { | ||
| 316 | tree = newtree(L, 2); | ||
| 317 | tree->tag = TRunTime; | ||
| 318 | tree->key = addtoktable(L, idx); | ||
| 319 | sib1(tree)->tag = TTrue; | ||
| 320 | break; | ||
| 321 | } | ||
| 322 | default: { | ||
| 323 | return gettree(L, idx, len); | ||
| 324 | } | ||
| 325 | } | ||
| 326 | lua_replace(L, idx); /* put new tree into 'idx' slot */ | ||
| 327 | if (len) | ||
| 328 | *len = getsize(L, idx); | ||
| 329 | return tree; | ||
| 330 | } | ||
| 331 | |||
| 332 | |||
| 333 | /* | ||
| 334 | ** Return the number of elements in the ktable of pattern at 'idx'. | ||
| 335 | ** In Lua 5.2, default "environment" for patterns is nil, not | ||
| 336 | ** a table. Treat it as an empty table. In Lua 5.1, assumes that | ||
| 337 | ** the environment has no numeric indices (len == 0) | ||
| 338 | */ | ||
| 339 | static int ktablelen (lua_State *L, int idx) { | ||
| 340 | if (!lua_istable(L, idx)) return 0; | ||
| 341 | else return lua_objlen(L, idx); | ||
| 342 | } | ||
| 343 | |||
| 344 | |||
| 345 | /* | ||
| 346 | ** Concatentate the contents of table 'idx1' into table 'idx2'. | ||
| 347 | ** (Assume that both indices are negative.) | ||
| 348 | ** Return the original length of table 'idx2' | ||
| 349 | */ | ||
| 350 | static int concattable (lua_State *L, int idx1, int idx2) { | ||
| 351 | int i; | ||
| 352 | int n1 = ktablelen(L, idx1); | ||
| 353 | int n2 = ktablelen(L, idx2); | ||
| 354 | if (n1 == 0) return 0; /* nothing to correct */ | ||
| 355 | for (i = 1; i <= n1; i++) { | ||
| 356 | lua_rawgeti(L, idx1, i); | ||
| 357 | lua_rawseti(L, idx2 - 1, n2 + i); /* correct 'idx2' */ | ||
| 358 | } | ||
| 359 | return n2; | ||
| 360 | } | ||
| 361 | |||
| 362 | |||
| 363 | /* | ||
| 364 | ** Make a merge of ktables from p1 and p2 the ktable for the new | ||
| 365 | ** pattern at the top of the stack. | ||
| 366 | */ | ||
| 367 | static int joinktables (lua_State *L, int p1, int p2) { | ||
| 368 | int n1, n2; | ||
| 369 | lua_getfenv(L, p1); /* get ktables */ | ||
| 370 | lua_getfenv(L, p2); | ||
| 371 | n1 = ktablelen(L, -2); | ||
| 372 | n2 = ktablelen(L, -1); | ||
| 373 | if (n1 == 0 && n2 == 0) { /* are both tables empty? */ | ||
| 374 | lua_pop(L, 2); /* nothing to be done; pop tables */ | ||
| 375 | return 0; /* nothing to correct */ | ||
| 376 | } | ||
| 377 | if (n2 == 0 || lua_equal(L, -2, -1)) { /* second table is empty or equal? */ | ||
| 378 | lua_pop(L, 1); /* pop 2nd table */ | ||
| 379 | lua_setfenv(L, -2); /* set 1st ktable into new pattern */ | ||
| 380 | return 0; /* nothing to correct */ | ||
| 381 | } | ||
| 382 | if (n1 == 0) { /* first table is empty? */ | ||
| 383 | lua_setfenv(L, -3); /* set 2nd table into new pattern */ | ||
| 384 | lua_pop(L, 1); /* pop 1st table */ | ||
| 385 | return 0; /* nothing to correct */ | ||
| 386 | } | ||
| 387 | else { | ||
| 388 | lua_createtable(L, n1 + n2, 0); /* create ktable for new pattern */ | ||
| 389 | /* stack: new p; ktable p1; ktable p2; new ktable */ | ||
| 390 | concattable(L, -3, -1); /* from p1 into new ktable */ | ||
| 391 | concattable(L, -2, -1); /* from p2 into new ktable */ | ||
| 392 | lua_setfenv(L, -4); /* new ktable becomes p env */ | ||
| 393 | lua_pop(L, 2); /* pop other ktables */ | ||
| 394 | return n1; /* correction for indices from p2 */ | ||
| 395 | } | ||
| 396 | } | ||
| 397 | |||
| 398 | |||
| 399 | static void correctkeys (TTree *tree, int n) { | ||
| 400 | if (n == 0) return; /* no correction? */ | ||
| 401 | tailcall: | ||
| 402 | switch (tree->tag) { | ||
| 403 | case TOpenCall: case TCall: case TRunTime: case TRule: { | ||
| 404 | if (tree->key > 0) | ||
| 405 | tree->key += n; | ||
| 406 | break; | ||
| 407 | } | ||
| 408 | case TCapture: { | ||
| 409 | if (tree->key > 0 && tree->cap != Carg && tree->cap != Cnum) | ||
| 410 | tree->key += n; | ||
| 411 | break; | ||
| 412 | } | ||
| 413 | default: break; | ||
| 414 | } | ||
| 415 | switch (numsiblings[tree->tag]) { | ||
| 416 | case 1: /* correctkeys(sib1(tree), n); */ | ||
| 417 | tree = sib1(tree); goto tailcall; | ||
| 418 | case 2: | ||
| 419 | correctkeys(sib1(tree), n); | ||
| 420 | tree = sib2(tree); goto tailcall; /* correctkeys(sib2(tree), n); */ | ||
| 421 | default: assert(numsiblings[tree->tag] == 0); break; | ||
| 422 | } | ||
| 423 | } | ||
| 424 | |||
| 425 | |||
| 426 | /* | ||
| 427 | ** copy 'ktable' of element 'idx' to new tree (on top of stack) | ||
| 428 | */ | ||
| 429 | static void copyktable (lua_State *L, int idx) { | ||
| 430 | lua_getfenv(L, idx); | ||
| 431 | lua_setfenv(L, -2); | ||
| 432 | } | ||
| 433 | |||
| 434 | |||
| 435 | /* | ||
| 436 | ** merge 'ktable' from rule at stack index 'idx' into 'ktable' | ||
| 437 | ** from tree at the top of the stack, and correct corresponding | ||
| 438 | ** tree. | ||
| 439 | */ | ||
| 440 | static void mergektable (lua_State *L, int idx, TTree *rule) { | ||
| 441 | int n; | ||
| 442 | lua_getfenv(L, -1); /* get ktables */ | ||
| 443 | lua_getfenv(L, idx); | ||
| 444 | n = concattable(L, -1, -2); | ||
| 445 | lua_pop(L, 2); /* remove both ktables */ | ||
| 446 | correctkeys(rule, n); | ||
| 447 | } | ||
| 448 | |||
| 449 | |||
| 450 | /* | ||
| 451 | ** create a new tree, whith a new root and one sibling. | ||
| 452 | ** Sibling must be on the Lua stack, at index 1. | ||
| 453 | */ | ||
| 454 | static TTree *newroot1sib (lua_State *L, int tag) { | ||
| 455 | int s1; | ||
| 456 | TTree *tree1 = getpatt(L, 1, &s1); | ||
| 457 | TTree *tree = newtree(L, 1 + s1); /* create new tree */ | ||
| 458 | tree->tag = tag; | ||
| 459 | memcpy(sib1(tree), tree1, s1 * sizeof(TTree)); | ||
| 460 | copyktable(L, 1); | ||
| 461 | return tree; | ||
| 462 | } | ||
| 463 | |||
| 464 | |||
| 465 | /* | ||
| 466 | ** create a new tree, whith a new root and 2 siblings. | ||
| 467 | ** Siblings must be on the Lua stack, first one at index 1. | ||
| 468 | */ | ||
| 469 | static TTree *newroot2sib (lua_State *L, int tag) { | ||
| 470 | int s1, s2; | ||
| 471 | TTree *tree1 = getpatt(L, 1, &s1); | ||
| 472 | TTree *tree2 = getpatt(L, 2, &s2); | ||
| 473 | TTree *tree = newtree(L, 1 + s1 + s2); /* create new tree */ | ||
| 474 | tree->tag = tag; | ||
| 475 | tree->u.ps = 1 + s1; | ||
| 476 | memcpy(sib1(tree), tree1, s1 * sizeof(TTree)); | ||
| 477 | memcpy(sib2(tree), tree2, s2 * sizeof(TTree)); | ||
| 478 | correctkeys(sib2(tree), joinktables(L, 1, 2)); | ||
| 479 | return tree; | ||
| 480 | } | ||
| 481 | |||
| 482 | |||
| 483 | static int lp_P (lua_State *L) { | ||
| 484 | luaL_checkany(L, 1); | ||
| 485 | getpatt(L, 1, NULL); | ||
| 486 | lua_settop(L, 1); | ||
| 487 | return 1; | ||
| 488 | } | ||
| 489 | |||
| 490 | |||
| 491 | /* | ||
| 492 | ** sequence operator; optimizations: | ||
| 493 | ** false x => false, x true => x, true x => x | ||
| 494 | ** (cannot do x . false => false because x may have runtime captures) | ||
| 495 | */ | ||
| 496 | static int lp_seq (lua_State *L) { | ||
| 497 | TTree *tree1 = getpatt(L, 1, NULL); | ||
| 498 | TTree *tree2 = getpatt(L, 2, NULL); | ||
| 499 | if (tree1->tag == TFalse || tree2->tag == TTrue) | ||
| 500 | lua_pushvalue(L, 1); /* false . x == false, x . true = x */ | ||
| 501 | else if (tree1->tag == TTrue) | ||
| 502 | lua_pushvalue(L, 2); /* true . x = x */ | ||
| 503 | else | ||
| 504 | newroot2sib(L, TSeq); | ||
| 505 | return 1; | ||
| 506 | } | ||
| 507 | |||
| 508 | |||
| 509 | /* | ||
| 510 | ** choice operator; optimizations: | ||
| 511 | ** charset / charset => charset | ||
| 512 | ** true / x => true, x / false => x, false / x => x | ||
| 513 | ** (x / true is not equivalent to true) | ||
| 514 | */ | ||
| 515 | static int lp_choice (lua_State *L) { | ||
| 516 | Charset st1, st2; | ||
| 517 | TTree *t1 = getpatt(L, 1, NULL); | ||
| 518 | TTree *t2 = getpatt(L, 2, NULL); | ||
| 519 | if (tocharset(t1, &st1) && tocharset(t2, &st2)) { | ||
| 520 | TTree *t = newcharset(L); | ||
| 521 | loopset(i, treebuffer(t)[i] = st1.cs[i] | st2.cs[i]); | ||
| 522 | } | ||
| 523 | else if (nofail(t1) || t2->tag == TFalse) | ||
| 524 | lua_pushvalue(L, 1); /* true / x => true, x / false => x */ | ||
| 525 | else if (t1->tag == TFalse) | ||
| 526 | lua_pushvalue(L, 2); /* false / x => x */ | ||
| 527 | else | ||
| 528 | newroot2sib(L, TChoice); | ||
| 529 | return 1; | ||
| 530 | } | ||
| 531 | |||
| 532 | |||
| 533 | /* | ||
| 534 | ** p^n | ||
| 535 | */ | ||
| 536 | static int lp_star (lua_State *L) { | ||
| 537 | int size1; | ||
| 538 | int n = luaL_checkint(L, 2); | ||
| 539 | TTree *tree1 = gettree(L, 1, &size1); | ||
| 540 | if (n >= 0) { /* seq tree1 (seq tree1 ... (seq tree1 (rep tree1))) */ | ||
| 541 | TTree *tree = newtree(L, (n + 1) * (size1 + 1)); | ||
| 542 | if (nullable(tree1)) | ||
| 543 | luaL_error(L, "loop body may accept empty string"); | ||
| 544 | while (n--) /* repeat 'n' times */ | ||
| 545 | tree = seqaux(tree, tree1, size1); | ||
| 546 | tree->tag = TRep; | ||
| 547 | memcpy(sib1(tree), tree1, size1 * sizeof(TTree)); | ||
| 548 | } | ||
| 549 | else { /* choice (seq tree1 ... choice tree1 true ...) true */ | ||
| 550 | TTree *tree; | ||
| 551 | n = -n; | ||
| 552 | /* size = (choice + seq + tree1 + true) * n, but the last has no seq */ | ||
| 553 | tree = newtree(L, n * (size1 + 3) - 1); | ||
| 554 | for (; n > 1; n--) { /* repeat (n - 1) times */ | ||
| 555 | tree->tag = TChoice; tree->u.ps = n * (size1 + 3) - 2; | ||
| 556 | sib2(tree)->tag = TTrue; | ||
| 557 | tree = sib1(tree); | ||
| 558 | tree = seqaux(tree, tree1, size1); | ||
| 559 | } | ||
| 560 | tree->tag = TChoice; tree->u.ps = size1 + 1; | ||
| 561 | sib2(tree)->tag = TTrue; | ||
| 562 | memcpy(sib1(tree), tree1, size1 * sizeof(TTree)); | ||
| 563 | } | ||
| 564 | copyktable(L, 1); | ||
| 565 | return 1; | ||
| 566 | } | ||
| 567 | |||
| 568 | |||
| 569 | /* | ||
| 570 | ** #p == &p | ||
| 571 | */ | ||
| 572 | static int lp_and (lua_State *L) { | ||
| 573 | newroot1sib(L, TAnd); | ||
| 574 | return 1; | ||
| 575 | } | ||
| 576 | |||
| 577 | |||
| 578 | /* | ||
| 579 | ** -p == !p | ||
| 580 | */ | ||
| 581 | static int lp_not (lua_State *L) { | ||
| 582 | newroot1sib(L, TNot); | ||
| 583 | return 1; | ||
| 584 | } | ||
| 585 | |||
| 586 | |||
| 587 | /* | ||
| 588 | ** [t1 - t2] == Seq (Not t2) t1 | ||
| 589 | ** If t1 and t2 are charsets, make their difference. | ||
| 590 | */ | ||
| 591 | static int lp_sub (lua_State *L) { | ||
| 592 | Charset st1, st2; | ||
| 593 | int s1, s2; | ||
| 594 | TTree *t1 = getpatt(L, 1, &s1); | ||
| 595 | TTree *t2 = getpatt(L, 2, &s2); | ||
| 596 | if (tocharset(t1, &st1) && tocharset(t2, &st2)) { | ||
| 597 | TTree *t = newcharset(L); | ||
| 598 | loopset(i, treebuffer(t)[i] = st1.cs[i] & ~st2.cs[i]); | ||
| 599 | } | ||
| 600 | else { | ||
| 601 | TTree *tree = newtree(L, 2 + s1 + s2); | ||
| 602 | tree->tag = TSeq; /* sequence of... */ | ||
| 603 | tree->u.ps = 2 + s2; | ||
| 604 | sib1(tree)->tag = TNot; /* ...not... */ | ||
| 605 | memcpy(sib1(sib1(tree)), t2, s2 * sizeof(TTree)); /* ...t2 */ | ||
| 606 | memcpy(sib2(tree), t1, s1 * sizeof(TTree)); /* ... and t1 */ | ||
| 607 | correctkeys(sib1(tree), joinktables(L, 1, 2)); | ||
| 608 | } | ||
| 609 | return 1; | ||
| 610 | } | ||
| 611 | |||
| 612 | |||
| 613 | static int lp_set (lua_State *L) { | ||
| 614 | size_t l; | ||
| 615 | const char *s = luaL_checklstring(L, 1, &l); | ||
| 616 | TTree *tree = newcharset(L); | ||
| 617 | while (l--) { | ||
| 618 | setchar(treebuffer(tree), (byte)(*s)); | ||
| 619 | s++; | ||
| 620 | } | ||
| 621 | return 1; | ||
| 622 | } | ||
| 623 | |||
| 624 | |||
| 625 | static int lp_range (lua_State *L) { | ||
| 626 | int arg; | ||
| 627 | int top = lua_gettop(L); | ||
| 628 | TTree *tree = newcharset(L); | ||
| 629 | for (arg = 1; arg <= top; arg++) { | ||
| 630 | int c; | ||
| 631 | size_t l; | ||
| 632 | const char *r = luaL_checklstring(L, arg, &l); | ||
| 633 | luaL_argcheck(L, l == 2, arg, "range must have two characters"); | ||
| 634 | for (c = (byte)r[0]; c <= (byte)r[1]; c++) | ||
| 635 | setchar(treebuffer(tree), c); | ||
| 636 | } | ||
| 637 | return 1; | ||
| 638 | } | ||
| 639 | |||
| 640 | |||
| 641 | /* | ||
| 642 | ** Look-behind predicate | ||
| 643 | */ | ||
| 644 | static int lp_behind (lua_State *L) { | ||
| 645 | TTree *tree; | ||
| 646 | TTree *tree1 = getpatt(L, 1, NULL); | ||
| 647 | int n = fixedlen(tree1); | ||
| 648 | luaL_argcheck(L, !hascaptures(tree1), 1, "pattern have captures"); | ||
| 649 | luaL_argcheck(L, n > 0, 1, "pattern may not have fixed length"); | ||
| 650 | luaL_argcheck(L, n <= MAXBEHIND, 1, "pattern too long to look behind"); | ||
| 651 | tree = newroot1sib(L, TBehind); | ||
| 652 | tree->u.n = n; | ||
| 653 | return 1; | ||
| 654 | } | ||
| 655 | |||
| 656 | |||
| 657 | /* labeled failure begin */ | ||
| 658 | /* | ||
| 659 | ** Throws a label or a set of labels | ||
| 660 | */ | ||
| 661 | static int lp_throw (lua_State *L) { | ||
| 662 | int n = lua_gettop(L); | ||
| 663 | Labelset ls = 0; | ||
| 664 | int i; | ||
| 665 | for (i = 1; i <= n; i++) { | ||
| 666 | int d = luaL_checkint(L, i); | ||
| 667 | luaL_argcheck(L, d >= 0 && d < (int)MAXLABELS, i, "invalid label index"); | ||
| 668 | setlabel(ls, d); | ||
| 669 | } | ||
| 670 | newlabelleaf(L, ls); | ||
| 671 | return 1; | ||
| 672 | } | ||
| 673 | |||
| 674 | /* | ||
| 675 | ** labeled choice function | ||
| 676 | */ | ||
| 677 | static int lp_labchoice (lua_State *L) { | ||
| 678 | TTree *tree; | ||
| 679 | int n = lua_gettop(L); | ||
| 680 | int i; | ||
| 681 | Labelset ls = 0; | ||
| 682 | for (i = 3; i <= n; i++) { | ||
| 683 | int d = luaL_checkint(L, i); | ||
| 684 | luaL_argcheck(L, d >= 0 && d < (int)MAXLABELS, i, "invalid label index"); | ||
| 685 | setlabel(ls, d); | ||
| 686 | } | ||
| 687 | tree = newroot2sib(L, TLabChoice); | ||
| 688 | tree->labels = ls; | ||
| 689 | return 1; | ||
| 690 | } | ||
| 691 | /* labeled failure end */ | ||
| 692 | |||
| 693 | |||
| 694 | /* | ||
| 695 | ** Create a non-terminal | ||
| 696 | */ | ||
| 697 | static int lp_V (lua_State *L) { | ||
| 698 | TTree *tree = newleaf(L, TOpenCall); | ||
| 699 | luaL_argcheck(L, !lua_isnoneornil(L, 1), 1, "non-nil value expected"); | ||
| 700 | tree->key = addtoktable(L, 1); | ||
| 701 | return 1; | ||
| 702 | } | ||
| 703 | |||
| 704 | |||
| 705 | /* | ||
| 706 | ** Create a tree for a non-empty capture, with a body and | ||
| 707 | ** optionally with an associated Lua value (at index 'labelidx' in the | ||
| 708 | ** stack) | ||
| 709 | */ | ||
| 710 | static int capture_aux (lua_State *L, int cap, int labelidx) { | ||
| 711 | TTree *tree = newroot1sib(L, TCapture); | ||
| 712 | tree->cap = cap; | ||
| 713 | tree->key = addtoktable(L, labelidx); | ||
| 714 | return 1; | ||
| 715 | } | ||
| 716 | |||
| 717 | |||
| 718 | /* | ||
| 719 | ** Fill a tree with an empty capture, using an empty (TTrue) sibling. | ||
| 720 | */ | ||
| 721 | static TTree *auxemptycap (lua_State *L, TTree *tree, int cap, int idx) { | ||
| 722 | tree->tag = TCapture; | ||
| 723 | tree->cap = cap; | ||
| 724 | tree->key = addtoktable(L, idx); | ||
| 725 | sib1(tree)->tag = TTrue; | ||
| 726 | return tree; | ||
| 727 | } | ||
| 728 | |||
| 729 | |||
| 730 | /* | ||
| 731 | ** Create a tree for an empty capture | ||
| 732 | */ | ||
| 733 | static TTree *newemptycap (lua_State *L, int cap, int idx) { | ||
| 734 | return auxemptycap(L, newtree(L, 2), cap, idx); | ||
| 735 | } | ||
| 736 | |||
| 737 | |||
| 738 | /* | ||
| 739 | ** Captures with syntax p / v | ||
| 740 | ** (function capture, query capture, string capture, or number capture) | ||
| 741 | */ | ||
| 742 | static int lp_divcapture (lua_State *L) { | ||
| 743 | switch (lua_type(L, 2)) { | ||
| 744 | case LUA_TFUNCTION: return capture_aux(L, Cfunction, 2); | ||
| 745 | case LUA_TTABLE: return capture_aux(L, Cquery, 2); | ||
| 746 | case LUA_TSTRING: return capture_aux(L, Cstring, 2); | ||
| 747 | case LUA_TNUMBER: { | ||
| 748 | int n = lua_tointeger(L, 2); | ||
| 749 | TTree *tree = newroot1sib(L, TCapture); | ||
| 750 | luaL_argcheck(L, 0 <= n && n <= SHRT_MAX, 1, "invalid number"); | ||
| 751 | tree->cap = Cnum; | ||
| 752 | tree->key = n; | ||
| 753 | return 1; | ||
| 754 | } | ||
| 755 | default: return luaL_argerror(L, 2, "invalid replacement value"); | ||
| 756 | } | ||
| 757 | } | ||
| 758 | |||
| 759 | |||
| 760 | static int lp_substcapture (lua_State *L) { | ||
| 761 | return capture_aux(L, Csubst, 0); | ||
| 762 | } | ||
| 763 | |||
| 764 | |||
| 765 | static int lp_tablecapture (lua_State *L) { | ||
| 766 | return capture_aux(L, Ctable, 0); | ||
| 767 | } | ||
| 768 | |||
| 769 | |||
| 770 | static int lp_groupcapture (lua_State *L) { | ||
| 771 | if (lua_isnoneornil(L, 2)) | ||
| 772 | return capture_aux(L, Cgroup, 0); | ||
| 773 | else { | ||
| 774 | luaL_checkstring(L, 2); | ||
| 775 | return capture_aux(L, Cgroup, 2); | ||
| 776 | } | ||
| 777 | } | ||
| 778 | |||
| 779 | |||
| 780 | static int lp_foldcapture (lua_State *L) { | ||
| 781 | luaL_checktype(L, 2, LUA_TFUNCTION); | ||
| 782 | return capture_aux(L, Cfold, 2); | ||
| 783 | } | ||
| 784 | |||
| 785 | |||
| 786 | static int lp_simplecapture (lua_State *L) { | ||
| 787 | return capture_aux(L, Csimple, 0); | ||
| 788 | } | ||
| 789 | |||
| 790 | |||
| 791 | static int lp_poscapture (lua_State *L) { | ||
| 792 | newemptycap(L, Cposition, 0); | ||
| 793 | return 1; | ||
| 794 | } | ||
| 795 | |||
| 796 | |||
| 797 | static int lp_argcapture (lua_State *L) { | ||
| 798 | int n = luaL_checkint(L, 1); | ||
| 799 | TTree *tree = newemptycap(L, Carg, 0); | ||
| 800 | tree->key = n; | ||
| 801 | luaL_argcheck(L, 0 < n && n <= SHRT_MAX, 1, "invalid argument index"); | ||
| 802 | return 1; | ||
| 803 | } | ||
| 804 | |||
| 805 | |||
| 806 | static int lp_backref (lua_State *L) { | ||
| 807 | luaL_checkstring(L, 1); | ||
| 808 | newemptycap(L, Cbackref, 1); | ||
| 809 | return 1; | ||
| 810 | } | ||
| 811 | |||
| 812 | |||
| 813 | /* | ||
| 814 | ** Constant capture | ||
| 815 | */ | ||
| 816 | static int lp_constcapture (lua_State *L) { | ||
| 817 | int i; | ||
| 818 | int n = lua_gettop(L); /* number of values */ | ||
| 819 | if (n == 0) /* no values? */ | ||
| 820 | newleaf(L, TTrue); /* no capture */ | ||
| 821 | else if (n == 1) | ||
| 822 | newemptycap(L, Cconst, 1); /* single constant capture */ | ||
| 823 | else { /* create a group capture with all values */ | ||
| 824 | TTree *tree = newtree(L, 1 + 3 * (n - 1) + 2); | ||
| 825 | tree->tag = TCapture; | ||
| 826 | tree->cap = Cgroup; | ||
| 827 | tree->key = 0; | ||
| 828 | tree = sib1(tree); | ||
| 829 | for (i = 1; i <= n - 1; i++) { | ||
| 830 | tree->tag = TSeq; | ||
| 831 | tree->u.ps = 3; /* skip TCapture and its sibling */ | ||
| 832 | auxemptycap(L, sib1(tree), Cconst, i); | ||
| 833 | tree = sib2(tree); | ||
| 834 | } | ||
| 835 | auxemptycap(L, tree, Cconst, i); | ||
| 836 | } | ||
| 837 | return 1; | ||
| 838 | } | ||
| 839 | |||
| 840 | |||
| 841 | static int lp_matchtime (lua_State *L) { | ||
| 842 | TTree *tree; | ||
| 843 | luaL_checktype(L, 2, LUA_TFUNCTION); | ||
| 844 | tree = newroot1sib(L, TRunTime); | ||
| 845 | tree->key = addtoktable(L, 2); | ||
| 846 | return 1; | ||
| 847 | } | ||
| 848 | |||
| 849 | /* }====================================================== */ | ||
| 850 | |||
| 851 | |||
| 852 | /* | ||
| 853 | ** {====================================================== | ||
| 854 | ** Grammar - Tree generation | ||
| 855 | ** ======================================================= | ||
| 856 | */ | ||
| 857 | |||
| 858 | /* | ||
| 859 | ** push on the stack the index and the pattern for the | ||
| 860 | ** initial rule of grammar at index 'arg' in the stack; | ||
| 861 | ** also add that index into position table. | ||
| 862 | */ | ||
| 863 | static void getfirstrule (lua_State *L, int arg, int postab) { | ||
| 864 | lua_rawgeti(L, arg, 1); /* access first element */ | ||
| 865 | if (lua_isstring(L, -1)) { /* is it the name of initial rule? */ | ||
| 866 | lua_pushvalue(L, -1); /* duplicate it to use as key */ | ||
| 867 | lua_gettable(L, arg); /* get associated rule */ | ||
| 868 | } | ||
| 869 | else { | ||
| 870 | lua_pushinteger(L, 1); /* key for initial rule */ | ||
| 871 | lua_insert(L, -2); /* put it before rule */ | ||
| 872 | } | ||
| 873 | if (!testpattern(L, -1)) { /* initial rule not a pattern? */ | ||
| 874 | if (lua_isnil(L, -1)) | ||
| 875 | luaL_error(L, "grammar has no initial rule"); | ||
| 876 | else | ||
| 877 | luaL_error(L, "initial rule '%s' is not a pattern", lua_tostring(L, -2)); | ||
| 878 | } | ||
| 879 | lua_pushvalue(L, -2); /* push key */ | ||
| 880 | lua_pushinteger(L, 1); /* push rule position (after TGrammar) */ | ||
| 881 | lua_settable(L, postab); /* insert pair at position table */ | ||
| 882 | } | ||
| 883 | |||
| 884 | /* | ||
| 885 | ** traverse grammar at index 'arg', pushing all its keys and patterns | ||
| 886 | ** into the stack. Create a new table (before all pairs key-pattern) to | ||
| 887 | ** collect all keys and their associated positions in the final tree | ||
| 888 | ** (the "position table"). | ||
| 889 | ** Return the number of rules and (in 'totalsize') the total size | ||
| 890 | ** for the new tree. | ||
| 891 | */ | ||
| 892 | static int collectrules (lua_State *L, int arg, int *totalsize) { | ||
| 893 | int n = 1; /* to count number of rules */ | ||
| 894 | int postab = lua_gettop(L) + 1; /* index of position table */ | ||
| 895 | int size; /* accumulator for total size */ | ||
| 896 | lua_newtable(L); /* create position table */ | ||
| 897 | getfirstrule(L, arg, postab); | ||
| 898 | size = 2 + getsize(L, postab + 2); /* TGrammar + TRule + rule */ | ||
| 899 | lua_pushnil(L); /* prepare to traverse grammar table */ | ||
| 900 | while (lua_next(L, arg) != 0) { | ||
| 901 | if (lua_tonumber(L, -2) == 1 || | ||
| 902 | lua_equal(L, -2, postab + 1)) { /* initial rule? */ | ||
| 903 | lua_pop(L, 1); /* remove value (keep key for lua_next) */ | ||
| 904 | continue; | ||
| 905 | } | ||
| 906 | if (!testpattern(L, -1)) /* value is not a pattern? */ | ||
| 907 | luaL_error(L, "rule '%s' is not a pattern", val2str(L, -2)); | ||
| 908 | luaL_checkstack(L, LUA_MINSTACK, "grammar has too many rules"); | ||
| 909 | lua_pushvalue(L, -2); /* push key (to insert into position table) */ | ||
| 910 | lua_pushinteger(L, size); | ||
| 911 | lua_settable(L, postab); | ||
| 912 | size += 1 + getsize(L, -1); /* update size */ | ||
| 913 | lua_pushvalue(L, -2); /* push key (for next lua_next) */ | ||
| 914 | n++; | ||
| 915 | } | ||
| 916 | *totalsize = size + 1; /* TTrue to finish list of rules */ | ||
| 917 | return n; | ||
| 918 | } | ||
| 919 | |||
| 920 | |||
| 921 | static void buildgrammar (lua_State *L, TTree *grammar, int frule, int n) { | ||
| 922 | int i; | ||
| 923 | TTree *nd = sib1(grammar); /* auxiliary pointer to traverse the tree */ | ||
| 924 | for (i = 0; i < n; i++) { /* add each rule into new tree */ | ||
| 925 | int ridx = frule + 2*i + 1; /* index of i-th rule */ | ||
| 926 | int rulesize; | ||
| 927 | TTree *rn = gettree(L, ridx, &rulesize); | ||
| 928 | nd->tag = TRule; | ||
| 929 | nd->key = 0; | ||
| 930 | nd->cap = i; /* rule number */ | ||
| 931 | nd->u.ps = rulesize + 1; /* point to next rule */ | ||
| 932 | memcpy(sib1(nd), rn, rulesize * sizeof(TTree)); /* copy rule */ | ||
| 933 | mergektable(L, ridx, sib1(nd)); /* merge its ktable into new one */ | ||
| 934 | nd = sib2(nd); /* move to next rule */ | ||
| 935 | } | ||
| 936 | nd->tag = TTrue; /* finish list of rules */ | ||
| 937 | } | ||
| 938 | |||
| 939 | |||
| 940 | /* | ||
| 941 | ** Check whether a tree has potential infinite loops | ||
| 942 | */ | ||
| 943 | static int checkloops (TTree *tree) { | ||
| 944 | tailcall: | ||
| 945 | if (tree->tag == TRep && nullable(sib1(tree))) | ||
| 946 | return 1; | ||
| 947 | else if (tree->tag == TGrammar) | ||
| 948 | return 0; /* sub-grammars already checked */ | ||
| 949 | else { | ||
| 950 | switch (numsiblings[tree->tag]) { | ||
| 951 | case 1: /* return checkloops(sib1(tree)); */ | ||
| 952 | tree = sib1(tree); goto tailcall; | ||
| 953 | case 2: | ||
| 954 | if (checkloops(sib1(tree))) return 1; | ||
| 955 | /* else return checkloops(sib2(tree)); */ | ||
| 956 | tree = sib2(tree); goto tailcall; | ||
| 957 | default: assert(numsiblings[tree->tag] == 0); return 0; | ||
| 958 | } | ||
| 959 | } | ||
| 960 | } | ||
| 961 | |||
| 962 | |||
| 963 | static int verifyerror (lua_State *L, int *passed, int npassed) { | ||
| 964 | int i, j; | ||
| 965 | for (i = npassed - 1; i >= 0; i--) { /* search for a repetition */ | ||
| 966 | for (j = i - 1; j >= 0; j--) { | ||
| 967 | if (passed[i] == passed[j]) { | ||
| 968 | lua_rawgeti(L, -1, passed[i]); /* get rule's key */ | ||
| 969 | return luaL_error(L, "rule '%s' may be left recursive", val2str(L, -1)); | ||
| 970 | } | ||
| 971 | } | ||
| 972 | } | ||
| 973 | return luaL_error(L, "too many left calls in grammar"); | ||
| 974 | } | ||
| 975 | |||
| 976 | |||
| 977 | /* | ||
| 978 | ** Check whether a rule can be left recursive; raise an error in that | ||
| 979 | ** case; otherwise return 1 iff pattern is nullable. Assume ktable at | ||
| 980 | ** the top of the stack. | ||
| 981 | */ | ||
| 982 | static int verifyrule (lua_State *L, TTree *tree, int *passed, int npassed, | ||
| 983 | int nullable) { | ||
| 984 | tailcall: | ||
| 985 | switch (tree->tag) { | ||
| 986 | case TChar: case TSet: case TAny: | ||
| 987 | case TFalse: case TThrow: /* labeled failure */ | ||
| 988 | return nullable; /* cannot pass from here */ | ||
| 989 | case TTrue: | ||
| 990 | case TBehind: /* look-behind cannot have calls */ | ||
| 991 | return 1; | ||
| 992 | case TNot: case TAnd: case TRep: | ||
| 993 | /* return verifyrule(L, sib1(tree), passed, npassed, 1); */ | ||
| 994 | tree = sib1(tree); nullable = 1; goto tailcall; | ||
| 995 | case TCapture: case TRunTime: | ||
| 996 | /* return verifyrule(L, sib1(tree), passed, npassed); */ | ||
| 997 | tree = sib1(tree); goto tailcall; | ||
| 998 | case TCall: | ||
| 999 | /* return verifyrule(L, sib2(tree), passed, npassed); */ | ||
| 1000 | tree = sib2(tree); goto tailcall; | ||
| 1001 | case TSeq: /* only check 2nd child if first is nullable */ | ||
| 1002 | if (!verifyrule(L, sib1(tree), passed, npassed, 0)) | ||
| 1003 | return nullable; | ||
| 1004 | /* else return verifyrule(L, sib2(tree), passed, npassed); */ | ||
| 1005 | tree = sib2(tree); goto tailcall; | ||
| 1006 | case TChoice: case TLabChoice: /* must check both children */ /* labeled failure */ | ||
| 1007 | nullable = verifyrule(L, sib1(tree), passed, npassed, nullable); | ||
| 1008 | /* return verifyrule(L, sib2(tree), passed, npassed, nullable); */ | ||
| 1009 | tree = sib2(tree); goto tailcall; | ||
| 1010 | case TRule: | ||
| 1011 | if (npassed >= MAXRULES) | ||
| 1012 | return verifyerror(L, passed, npassed); | ||
| 1013 | else { | ||
| 1014 | passed[npassed++] = tree->key; | ||
| 1015 | /* return verifyrule(L, sib1(tree), passed, npassed); */ | ||
| 1016 | tree = sib1(tree); goto tailcall; | ||
| 1017 | } | ||
| 1018 | case TGrammar: | ||
| 1019 | return nullable(tree); /* sub-grammar cannot be left recursive */ | ||
| 1020 | default: assert(0); return 0; | ||
| 1021 | } | ||
| 1022 | } | ||
| 1023 | |||
| 1024 | |||
| 1025 | static void verifygrammar (lua_State *L, TTree *grammar) { | ||
| 1026 | int passed[MAXRULES]; | ||
| 1027 | TTree *rule; | ||
| 1028 | /* check left-recursive rules */ | ||
| 1029 | for (rule = sib1(grammar); rule->tag == TRule; rule = sib2(rule)) { | ||
| 1030 | if (rule->key == 0) continue; /* unused rule */ | ||
| 1031 | verifyrule(L, sib1(rule), passed, 0, 0); | ||
| 1032 | } | ||
| 1033 | assert(rule->tag == TTrue); | ||
| 1034 | /* check infinite loops inside rules */ | ||
| 1035 | for (rule = sib1(grammar); rule->tag == TRule; rule = sib2(rule)) { | ||
| 1036 | if (rule->key == 0) continue; /* unused rule */ | ||
| 1037 | if (checkloops(sib1(rule))) { | ||
| 1038 | lua_rawgeti(L, -1, rule->key); /* get rule's key */ | ||
| 1039 | luaL_error(L, "empty loop in rule '%s'", val2str(L, -1)); | ||
| 1040 | } | ||
| 1041 | } | ||
| 1042 | assert(rule->tag == TTrue); | ||
| 1043 | } | ||
| 1044 | |||
| 1045 | |||
| 1046 | /* | ||
| 1047 | ** Give a name for the initial rule if it is not referenced | ||
| 1048 | */ | ||
| 1049 | static void initialrulename (lua_State *L, TTree *grammar, int frule) { | ||
| 1050 | if (sib1(grammar)->key == 0) { /* initial rule is not referenced? */ | ||
| 1051 | int n = lua_objlen(L, -1) + 1; /* index for name */ | ||
| 1052 | lua_pushvalue(L, frule); /* rule's name */ | ||
| 1053 | lua_rawseti(L, -2, n); /* ktable was on the top of the stack */ | ||
| 1054 | sib1(grammar)->key = n; | ||
| 1055 | } | ||
| 1056 | } | ||
| 1057 | |||
| 1058 | |||
| 1059 | static TTree *newgrammar (lua_State *L, int arg) { | ||
| 1060 | int treesize; | ||
| 1061 | int frule = lua_gettop(L) + 2; /* position of first rule's key */ | ||
| 1062 | int n = collectrules(L, arg, &treesize); | ||
| 1063 | TTree *g = newtree(L, treesize); | ||
| 1064 | luaL_argcheck(L, n <= MAXRULES, arg, "grammar has too many rules"); | ||
| 1065 | g->tag = TGrammar; g->u.n = n; | ||
| 1066 | lua_newtable(L); /* create 'ktable' */ | ||
| 1067 | lua_setfenv(L, -2); | ||
| 1068 | buildgrammar(L, g, frule, n); | ||
| 1069 | lua_getfenv(L, -1); /* get 'ktable' for new tree */ | ||
| 1070 | finalfix(L, frule - 1, g, sib1(g)); | ||
| 1071 | initialrulename(L, g, frule); | ||
| 1072 | verifygrammar(L, g); | ||
| 1073 | lua_pop(L, 1); /* remove 'ktable' */ | ||
| 1074 | lua_insert(L, -(n * 2 + 2)); /* move new table to proper position */ | ||
| 1075 | lua_pop(L, n * 2 + 1); /* remove position table + rule pairs */ | ||
| 1076 | return g; /* new table at the top of the stack */ | ||
| 1077 | } | ||
| 1078 | |||
| 1079 | /* }====================================================== */ | ||
| 1080 | |||
| 1081 | |||
| 1082 | static Instruction *prepcompile (lua_State *L, Pattern *p, int idx) { | ||
| 1083 | lua_getfenv(L, idx); /* push 'ktable' (may be used by 'finalfix') */ | ||
| 1084 | finalfix(L, 0, NULL, p->tree); | ||
| 1085 | lua_pop(L, 1); /* remove 'ktable' */ | ||
| 1086 | return compile(L, p); | ||
| 1087 | } | ||
| 1088 | |||
| 1089 | |||
| 1090 | static int lp_printtree (lua_State *L) { | ||
| 1091 | TTree *tree = getpatt(L, 1, NULL); | ||
| 1092 | int c = lua_toboolean(L, 2); | ||
| 1093 | if (c) { | ||
| 1094 | lua_getfenv(L, 1); /* push 'ktable' (may be used by 'finalfix') */ | ||
| 1095 | finalfix(L, 0, NULL, tree); | ||
| 1096 | lua_pop(L, 1); /* remove 'ktable' */ | ||
| 1097 | } | ||
| 1098 | printktable(L, 1); | ||
| 1099 | printtree(tree, 0); | ||
| 1100 | return 0; | ||
| 1101 | } | ||
| 1102 | |||
| 1103 | |||
| 1104 | static int lp_printcode (lua_State *L) { | ||
| 1105 | Pattern *p = getpattern(L, 1); | ||
| 1106 | printktable(L, 1); | ||
| 1107 | if (p->code == NULL) /* not compiled yet? */ | ||
| 1108 | prepcompile(L, p, 1); | ||
| 1109 | printpatt(p->code, p->codesize); | ||
| 1110 | return 0; | ||
| 1111 | } | ||
| 1112 | |||
| 1113 | |||
| 1114 | /* | ||
| 1115 | ** Get the initial position for the match, interpreting negative | ||
| 1116 | ** values from the end of the subject | ||
| 1117 | */ | ||
| 1118 | static size_t initposition (lua_State *L, size_t len) { | ||
| 1119 | lua_Integer ii = luaL_optinteger(L, 3, 1); | ||
| 1120 | if (ii > 0) { /* positive index? */ | ||
| 1121 | if ((size_t)ii <= len) /* inside the string? */ | ||
| 1122 | return (size_t)ii - 1; /* return it (corrected to 0-base) */ | ||
| 1123 | else return len; /* crop at the end */ | ||
| 1124 | } | ||
| 1125 | else { /* negative index */ | ||
| 1126 | if ((size_t)(-ii) <= len) /* inside the string? */ | ||
| 1127 | return len - ((size_t)(-ii)); /* return position from the end */ | ||
| 1128 | else return 0; /* crop at the beginning */ | ||
| 1129 | } | ||
| 1130 | } | ||
| 1131 | |||
| 1132 | |||
| 1133 | /* | ||
| 1134 | ** Main match function | ||
| 1135 | */ | ||
| 1136 | static int lp_match (lua_State *L) { | ||
| 1137 | Capture capture[INITCAPSIZE]; | ||
| 1138 | const char *r; | ||
| 1139 | size_t l; | ||
| 1140 | Pattern *p = (getpatt(L, 1, NULL), getpattern(L, 1)); | ||
| 1141 | Instruction *code = (p->code != NULL) ? p->code : prepcompile(L, p, 1); | ||
| 1142 | const char *s = luaL_checklstring(L, SUBJIDX, &l); | ||
| 1143 | size_t i = initposition(L, l); | ||
| 1144 | int ptop = lua_gettop(L); | ||
| 1145 | lua_pushnil(L); /* initialize subscache */ | ||
| 1146 | lua_pushlightuserdata(L, capture); /* initialize caplistidx */ | ||
| 1147 | lua_getfenv(L, 1); /* initialize penvidx */ | ||
| 1148 | r = match(L, s, s + i, s + l, code, capture, ptop); | ||
| 1149 | if (r == NULL) { | ||
| 1150 | lua_pushnil(L); | ||
| 1151 | return 1; | ||
| 1152 | } | ||
| 1153 | return getcaptures(L, s, r, ptop); | ||
| 1154 | } | ||
| 1155 | |||
| 1156 | |||
| 1157 | |||
| 1158 | /* | ||
| 1159 | ** {====================================================== | ||
| 1160 | ** Library creation and functions not related to matching | ||
| 1161 | ** ======================================================= | ||
| 1162 | */ | ||
| 1163 | |||
| 1164 | static int lp_setmax (lua_State *L) { | ||
| 1165 | luaL_optinteger(L, 1, -1); | ||
| 1166 | lua_settop(L, 1); | ||
| 1167 | lua_setfield(L, LUA_REGISTRYINDEX, MAXSTACKIDX); | ||
| 1168 | return 0; | ||
| 1169 | } | ||
| 1170 | |||
| 1171 | |||
| 1172 | static int lp_version (lua_State *L) { | ||
| 1173 | lua_pushstring(L, VERSION); | ||
| 1174 | return 1; | ||
| 1175 | } | ||
| 1176 | |||
| 1177 | |||
| 1178 | static int lp_type (lua_State *L) { | ||
| 1179 | if (testpattern(L, 1)) | ||
| 1180 | lua_pushliteral(L, "pattern"); | ||
| 1181 | else | ||
| 1182 | lua_pushnil(L); | ||
| 1183 | return 1; | ||
| 1184 | } | ||
| 1185 | |||
| 1186 | |||
| 1187 | int lp_gc (lua_State *L) { | ||
| 1188 | Pattern *p = getpattern(L, 1); | ||
| 1189 | if (p->codesize > 0) | ||
| 1190 | reallocprog(L, p, 0); | ||
| 1191 | return 0; | ||
| 1192 | } | ||
| 1193 | |||
| 1194 | |||
| 1195 | static void createcat (lua_State *L, const char *catname, int (catf) (int)) { | ||
| 1196 | TTree *t = newcharset(L); | ||
| 1197 | int i; | ||
| 1198 | for (i = 0; i <= UCHAR_MAX; i++) | ||
| 1199 | if (catf(i)) setchar(treebuffer(t), i); | ||
| 1200 | lua_setfield(L, -2, catname); | ||
| 1201 | } | ||
| 1202 | |||
| 1203 | |||
| 1204 | static int lp_locale (lua_State *L) { | ||
| 1205 | if (lua_isnoneornil(L, 1)) { | ||
| 1206 | lua_settop(L, 0); | ||
| 1207 | lua_createtable(L, 0, 12); | ||
| 1208 | } | ||
| 1209 | else { | ||
| 1210 | luaL_checktype(L, 1, LUA_TTABLE); | ||
| 1211 | lua_settop(L, 1); | ||
| 1212 | } | ||
| 1213 | createcat(L, "alnum", isalnum); | ||
| 1214 | createcat(L, "alpha", isalpha); | ||
| 1215 | createcat(L, "cntrl", iscntrl); | ||
| 1216 | createcat(L, "digit", isdigit); | ||
| 1217 | createcat(L, "graph", isgraph); | ||
| 1218 | createcat(L, "lower", islower); | ||
| 1219 | createcat(L, "print", isprint); | ||
| 1220 | createcat(L, "punct", ispunct); | ||
| 1221 | createcat(L, "space", isspace); | ||
| 1222 | createcat(L, "upper", isupper); | ||
| 1223 | createcat(L, "xdigit", isxdigit); | ||
| 1224 | return 1; | ||
| 1225 | } | ||
| 1226 | |||
| 1227 | |||
| 1228 | static struct luaL_Reg pattreg[] = { | ||
| 1229 | {"ptree", lp_printtree}, | ||
| 1230 | {"pcode", lp_printcode}, | ||
| 1231 | {"match", lp_match}, | ||
| 1232 | {"B", lp_behind}, | ||
| 1233 | {"V", lp_V}, | ||
| 1234 | {"C", lp_simplecapture}, | ||
| 1235 | {"Cc", lp_constcapture}, | ||
| 1236 | {"Cmt", lp_matchtime}, | ||
| 1237 | {"Cb", lp_backref}, | ||
| 1238 | {"Carg", lp_argcapture}, | ||
| 1239 | {"Cp", lp_poscapture}, | ||
| 1240 | {"Cs", lp_substcapture}, | ||
| 1241 | {"Ct", lp_tablecapture}, | ||
| 1242 | {"Cf", lp_foldcapture}, | ||
| 1243 | {"Cg", lp_groupcapture}, | ||
| 1244 | {"P", lp_P}, | ||
| 1245 | {"S", lp_set}, | ||
| 1246 | {"R", lp_range}, | ||
| 1247 | {"locale", lp_locale}, | ||
| 1248 | {"version", lp_version}, | ||
| 1249 | {"setmaxstack", lp_setmax}, | ||
| 1250 | {"type", lp_type}, | ||
| 1251 | {"T", lp_throw}, /* labeled failure throw */ | ||
| 1252 | {"Lc", lp_labchoice}, /* labeled failure choice */ | ||
| 1253 | {NULL, NULL} | ||
| 1254 | }; | ||
| 1255 | |||
| 1256 | |||
| 1257 | static struct luaL_Reg metareg[] = { | ||
| 1258 | {"__mul", lp_seq}, | ||
| 1259 | {"__add", lp_choice}, | ||
| 1260 | {"__pow", lp_star}, | ||
| 1261 | {"__gc", lp_gc}, | ||
| 1262 | {"__len", lp_and}, | ||
| 1263 | {"__div", lp_divcapture}, | ||
| 1264 | {"__unm", lp_not}, | ||
| 1265 | {"__sub", lp_sub}, | ||
| 1266 | {NULL, NULL} | ||
| 1267 | }; | ||
| 1268 | |||
| 1269 | |||
| 1270 | int luaopen_lpeglabel (lua_State *L); | ||
| 1271 | int luaopen_lpeglabel (lua_State *L) { | ||
| 1272 | luaL_newmetatable(L, PATTERN_T); | ||
| 1273 | lua_pushnumber(L, MAXBACK); /* initialize maximum backtracking */ | ||
| 1274 | lua_setfield(L, LUA_REGISTRYINDEX, MAXSTACKIDX); | ||
| 1275 | luaL_register(L, NULL, metareg); | ||
| 1276 | luaL_register(L, "lpeglabel", pattreg); | ||
| 1277 | lua_pushvalue(L, -1); | ||
| 1278 | lua_setfield(L, -3, "__index"); | ||
| 1279 | return 1; | ||
| 1280 | } | ||
| 1281 | |||
| 1282 | /* }====================================================== */ | ||
diff --git a/lptree.h b/lptree.h new file mode 100644 index 0000000..43299a4 --- /dev/null +++ b/lptree.h | |||
| @@ -0,0 +1,79 @@ | |||
| 1 | /* | ||
| 2 | ** $Id: lptree.h,v 1.2 2013/03/24 13:51:12 roberto Exp $ | ||
| 3 | */ | ||
| 4 | |||
| 5 | #if !defined(lptree_h) | ||
| 6 | #define lptree_h | ||
| 7 | |||
| 8 | |||
| 9 | #include "lptypes.h" | ||
| 10 | |||
| 11 | |||
| 12 | /* | ||
| 13 | ** types of trees | ||
| 14 | */ | ||
| 15 | typedef enum TTag { | ||
| 16 | TChar = 0, TSet, TAny, /* standard PEG elements */ | ||
| 17 | TTrue, TFalse, | ||
| 18 | TRep, | ||
| 19 | TSeq, TChoice, | ||
| 20 | TNot, TAnd, | ||
| 21 | TCall, | ||
| 22 | TOpenCall, | ||
| 23 | TRule, /* sib1 is rule's pattern, sib2 is 'next' rule */ | ||
| 24 | TGrammar, /* sib1 is initial (and first) rule */ | ||
| 25 | TBehind, /* match behind */ | ||
| 26 | TCapture, /* regular capture */ | ||
| 27 | TRunTime, /* run-time capture */ | ||
| 28 | TThrow, TLabChoice /* labeled failure */ | ||
| 29 | } TTag; | ||
| 30 | |||
| 31 | /* number of siblings for each tree */ | ||
| 32 | extern const byte numsiblings[]; | ||
| 33 | |||
| 34 | |||
| 35 | /* | ||
| 36 | ** Tree trees | ||
| 37 | ** The first sibling of a tree (if there is one) is immediately after | ||
| 38 | ** the tree. A reference to a second sibling (ps) is its position | ||
| 39 | ** relative to the position of the tree itself. A key in ktable | ||
| 40 | ** uses the (unique) address of the original tree that created that | ||
| 41 | ** entry. NULL means no data. | ||
| 42 | */ | ||
| 43 | typedef struct TTree { | ||
| 44 | byte tag; | ||
| 45 | byte cap; /* kind of capture (if it is a capture) */ | ||
| 46 | unsigned short key; /* key in ktable for Lua data (0 if no key) */ | ||
| 47 | Labelset labels; /* labeled failure */ | ||
| 48 | union { | ||
| 49 | int ps; /* occasional second sibling */ | ||
| 50 | int n; /* occasional counter */ | ||
| 51 | } u; | ||
| 52 | } TTree; | ||
| 53 | |||
| 54 | |||
| 55 | /* | ||
| 56 | ** A complete pattern has its tree plus, if already compiled, | ||
| 57 | ** its corresponding code | ||
| 58 | */ | ||
| 59 | typedef struct Pattern { | ||
| 60 | union Instruction *code; | ||
| 61 | int codesize; | ||
| 62 | TTree tree[1]; | ||
| 63 | } Pattern; | ||
| 64 | |||
| 65 | |||
| 66 | /* number of siblings for each tree */ | ||
| 67 | extern const byte numsiblings[]; | ||
| 68 | |||
| 69 | /* access to siblings */ | ||
| 70 | #define sib1(t) ((t) + 1) | ||
| 71 | #define sib2(t) ((t) + (t)->u.ps) | ||
| 72 | |||
| 73 | |||
| 74 | |||
| 75 | |||
| 76 | |||
| 77 | |||
| 78 | #endif | ||
| 79 | |||
diff --git a/lptypes.h b/lptypes.h new file mode 100644 index 0000000..503f1f0 --- /dev/null +++ b/lptypes.h | |||
| @@ -0,0 +1,158 @@ | |||
| 1 | /* | ||
| 2 | ** $Id: lptypes.h,v 1.8 2013/04/12 16:26:38 roberto Exp $ | ||
| 3 | ** LPeg - PEG pattern matching for Lua | ||
| 4 | ** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license) | ||
| 5 | ** written by Roberto Ierusalimschy | ||
| 6 | */ | ||
| 7 | |||
| 8 | #if !defined(lptypes_h) | ||
| 9 | #define lptypes_h | ||
| 10 | |||
| 11 | |||
| 12 | #if !defined(LPEG_DEBUG) | ||
| 13 | #define NDEBUG | ||
| 14 | #endif | ||
| 15 | |||
| 16 | #include <assert.h> | ||
| 17 | #include <limits.h> | ||
| 18 | |||
| 19 | #include "lua.h" | ||
| 20 | |||
| 21 | |||
| 22 | #define VERSION "0.12" | ||
| 23 | |||
| 24 | |||
| 25 | #define PATTERN_T "lpeg-pattern" | ||
| 26 | #define MAXSTACKIDX "lpeg-maxstack" | ||
| 27 | |||
| 28 | |||
| 29 | /* | ||
| 30 | ** compatibility with Lua 5.2 | ||
| 31 | */ | ||
| 32 | #if (LUA_VERSION_NUM == 502) | ||
| 33 | |||
| 34 | #undef lua_equal | ||
| 35 | #define lua_equal(L,idx1,idx2) lua_compare(L,(idx1),(idx2),LUA_OPEQ) | ||
| 36 | |||
| 37 | #undef lua_getfenv | ||
| 38 | #define lua_getfenv lua_getuservalue | ||
| 39 | #undef lua_setfenv | ||
| 40 | #define lua_setfenv lua_setuservalue | ||
| 41 | |||
| 42 | #undef lua_objlen | ||
| 43 | #define lua_objlen lua_rawlen | ||
| 44 | |||
| 45 | #undef luaL_register | ||
| 46 | #define luaL_register(L,n,f) \ | ||
| 47 | { if ((n) == NULL) luaL_setfuncs(L,f,0); else luaL_newlib(L,f); } | ||
| 48 | |||
| 49 | #endif | ||
| 50 | |||
| 51 | |||
| 52 | /* default maximum size for call/backtrack stack */ | ||
| 53 | #if !defined(MAXBACK) | ||
| 54 | #define MAXBACK 100 | ||
| 55 | #endif | ||
| 56 | |||
| 57 | |||
| 58 | /* maximum number of rules in a grammar */ | ||
| 59 | #define MAXRULES 200 | ||
| 60 | |||
| 61 | |||
| 62 | |||
| 63 | /* initial size for capture's list */ | ||
| 64 | #define INITCAPSIZE 32 | ||
| 65 | |||
| 66 | |||
| 67 | /* index, on Lua stack, for subject */ | ||
| 68 | #define SUBJIDX 2 | ||
| 69 | |||
| 70 | /* number of fixed arguments to 'match' (before capture arguments) */ | ||
| 71 | #define FIXEDARGS 3 | ||
| 72 | |||
| 73 | /* index, on Lua stack, for capture list */ | ||
| 74 | #define caplistidx(ptop) ((ptop) + 2) | ||
| 75 | |||
| 76 | /* index, on Lua stack, for pattern's ktable */ | ||
| 77 | #define ktableidx(ptop) ((ptop) + 3) | ||
| 78 | |||
| 79 | /* index, on Lua stack, for backtracking stack */ | ||
| 80 | #define stackidx(ptop) ((ptop) + 4) | ||
| 81 | |||
| 82 | |||
| 83 | |||
| 84 | typedef unsigned char byte; | ||
| 85 | |||
| 86 | |||
| 87 | #define BITSPERCHAR 8 | ||
| 88 | |||
| 89 | #define CHARSETSIZE ((UCHAR_MAX/BITSPERCHAR) + 1) | ||
| 90 | |||
| 91 | |||
| 92 | |||
| 93 | typedef struct Charset { | ||
| 94 | byte cs[CHARSETSIZE]; | ||
| 95 | } Charset; | ||
| 96 | |||
| 97 | |||
| 98 | |||
| 99 | #define loopset(v,b) { int v; for (v = 0; v < CHARSETSIZE; v++) {b;} } | ||
| 100 | |||
| 101 | /* access to charset */ | ||
| 102 | #define treebuffer(t) ((byte *)((t) + 1)) | ||
| 103 | |||
| 104 | /* number of slots needed for 'n' bytes */ | ||
| 105 | #define bytes2slots(n) (((n) - 1) / sizeof(TTree) + 1) | ||
| 106 | |||
| 107 | /* set 'b' bit in charset 'cs' */ | ||
| 108 | #define setchar(cs,b) ((cs)[(b) >> 3] |= (1 << ((b) & 7))) | ||
| 109 | |||
| 110 | /* labeled failure begin */ | ||
| 111 | typedef int Labelset; | ||
| 112 | |||
| 113 | #define MAXLABELS (sizeof(int) * 8) | ||
| 114 | |||
| 115 | #define LFAIL 1 | ||
| 116 | |||
| 117 | /* set bit 'b' in set 's' */ | ||
| 118 | #define setlabel(s, b) ((s) |= (1 << (b))) | ||
| 119 | /* labeled failure end */ | ||
| 120 | |||
| 121 | |||
| 122 | /* | ||
| 123 | ** in capture instructions, 'kind' of capture and its offset are | ||
| 124 | ** packed in field 'aux', 4 bits for each | ||
| 125 | */ | ||
| 126 | #define getkind(op) ((op)->i.aux & 0xF) | ||
| 127 | #define getoff(op) (((op)->i.aux >> 4) & 0xF) | ||
| 128 | #define joinkindoff(k,o) ((k) | ((o) << 4)) | ||
| 129 | |||
| 130 | #define MAXOFF 0xF | ||
| 131 | #define MAXAUX 0xFF | ||
| 132 | |||
| 133 | |||
| 134 | /* maximum number of bytes to look behind */ | ||
| 135 | #define MAXBEHIND MAXAUX | ||
| 136 | |||
| 137 | |||
| 138 | /* maximum size (in elements) for a pattern */ | ||
| 139 | #define MAXPATTSIZE (SHRT_MAX - 10) | ||
| 140 | |||
| 141 | |||
| 142 | /* size (in elements) for an instruction plus extra l bytes */ | ||
| 143 | #define instsize(l) (((l) + sizeof(Instruction) - 1)/sizeof(Instruction) + 1) | ||
| 144 | |||
| 145 | |||
| 146 | /* size (in elements) for a ISet instruction */ | ||
| 147 | #define CHARSETINSTSIZE instsize(CHARSETSIZE) | ||
| 148 | |||
| 149 | /* size (in elements) for a IFunc instruction */ | ||
| 150 | #define funcinstsize(p) ((p)->i.aux + 2) | ||
| 151 | |||
| 152 | |||
| 153 | |||
| 154 | #define testchar(st,c) (((int)(st)[((c) >> 3)] & (1 << ((c) & 7)))) | ||
| 155 | |||
| 156 | |||
| 157 | #endif | ||
| 158 | |||
| @@ -0,0 +1,391 @@ | |||
| 1 | /* | ||
| 2 | ** $Id: lpvm.c,v 1.5 2013/04/12 16:29:49 roberto Exp $ | ||
| 3 | ** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license) | ||
| 4 | */ | ||
| 5 | |||
| 6 | #include <limits.h> | ||
| 7 | #include <string.h> | ||
| 8 | |||
| 9 | |||
| 10 | #include "lua.h" | ||
| 11 | #include "lauxlib.h" | ||
| 12 | |||
| 13 | #include "lpcap.h" | ||
| 14 | #include "lptypes.h" | ||
| 15 | #include "lpvm.h" | ||
| 16 | #include "lpprint.h" | ||
| 17 | |||
| 18 | |||
| 19 | /* initial size for call/backtrack stack */ | ||
| 20 | #if !defined(INITBACK) | ||
| 21 | #define INITBACK 100 | ||
| 22 | #endif | ||
| 23 | |||
| 24 | |||
| 25 | #define getoffset(p) (((p) + 1)->offset) | ||
| 26 | |||
| 27 | static const Instruction giveup = {{IGiveup, 0, 0}}; | ||
| 28 | |||
| 29 | |||
| 30 | /* | ||
| 31 | ** {====================================================== | ||
| 32 | ** Virtual Machine | ||
| 33 | ** ======================================================= | ||
| 34 | */ | ||
| 35 | |||
| 36 | |||
| 37 | typedef struct Stack { | ||
| 38 | const char *s; /* saved position (or NULL for calls) */ | ||
| 39 | const Instruction *p; /* next instruction */ | ||
| 40 | int caplevel; | ||
| 41 | Labelset ls; /* labeled failure */ | ||
| 42 | } Stack; | ||
| 43 | |||
| 44 | |||
| 45 | #define getstackbase(L, ptop) ((Stack *)lua_touserdata(L, stackidx(ptop))) | ||
| 46 | |||
| 47 | |||
| 48 | /* | ||
| 49 | ** Double the size of the array of captures | ||
| 50 | */ | ||
| 51 | static Capture *doublecap (lua_State *L, Capture *cap, int captop, int ptop) { | ||
| 52 | Capture *newc; | ||
| 53 | if (captop >= INT_MAX/((int)sizeof(Capture) * 2)) | ||
| 54 | luaL_error(L, "too many captures"); | ||
| 55 | newc = (Capture *)lua_newuserdata(L, captop * 2 * sizeof(Capture)); | ||
| 56 | memcpy(newc, cap, captop * sizeof(Capture)); | ||
| 57 | lua_replace(L, caplistidx(ptop)); | ||
| 58 | return newc; | ||
| 59 | } | ||
| 60 | |||
| 61 | |||
| 62 | /* | ||
| 63 | ** Double the size of the stack | ||
| 64 | */ | ||
| 65 | static Stack *doublestack (lua_State *L, Stack **stacklimit, int ptop) { | ||
| 66 | Stack *stack = getstackbase(L, ptop); | ||
| 67 | Stack *newstack; | ||
| 68 | int n = *stacklimit - stack; /* current stack size */ | ||
| 69 | int max, newn; | ||
| 70 | lua_getfield(L, LUA_REGISTRYINDEX, MAXSTACKIDX); | ||
| 71 | max = lua_tointeger(L, -1); /* maximum allowed size */ | ||
| 72 | lua_pop(L, 1); | ||
| 73 | if (n >= max) /* already at maximum size? */ | ||
| 74 | luaL_error(L, "too many pending calls/choices"); | ||
| 75 | newn = 2 * n; /* new size */ | ||
| 76 | if (newn > max) newn = max; | ||
| 77 | newstack = (Stack *)lua_newuserdata(L, newn * sizeof(Stack)); | ||
| 78 | memcpy(newstack, stack, n * sizeof(Stack)); | ||
| 79 | lua_replace(L, stackidx(ptop)); | ||
| 80 | *stacklimit = newstack + newn; | ||
| 81 | return newstack + n; /* return next position */ | ||
| 82 | } | ||
| 83 | |||
| 84 | |||
| 85 | /* | ||
| 86 | ** Interpret the result of a dynamic capture: false -> fail; | ||
| 87 | ** true -> keep current position; number -> next position. | ||
| 88 | ** Return new subject position. 'fr' is stack index where | ||
| 89 | ** is the result; 'curr' is current subject position; 'limit' | ||
| 90 | ** is subject's size. | ||
| 91 | */ | ||
| 92 | static int resdyncaptures (lua_State *L, int fr, int curr, int limit) { | ||
| 93 | lua_Integer res; | ||
| 94 | if (!lua_toboolean(L, fr)) { /* false value? */ | ||
| 95 | lua_settop(L, fr - 1); /* remove results */ | ||
| 96 | return -1; /* and fail */ | ||
| 97 | } | ||
| 98 | else if (lua_isboolean(L, fr)) /* true? */ | ||
| 99 | res = curr; /* keep current position */ | ||
| 100 | else { | ||
| 101 | res = lua_tointeger(L, fr) - 1; /* new position */ | ||
| 102 | if (res < curr || res > limit) | ||
| 103 | luaL_error(L, "invalid position returned by match-time capture"); | ||
| 104 | } | ||
| 105 | lua_remove(L, fr); /* remove first result (offset) */ | ||
| 106 | return res; | ||
| 107 | } | ||
| 108 | |||
| 109 | |||
| 110 | /* | ||
| 111 | ** Add capture values returned by a dynamic capture to the capture list | ||
| 112 | ** 'base', nested inside a group capture. 'fd' indexes the first capture | ||
| 113 | ** value, 'n' is the number of values (at least 1). | ||
| 114 | */ | ||
| 115 | static void adddyncaptures (const char *s, Capture *base, int n, int fd) { | ||
| 116 | int i; | ||
| 117 | /* Cgroup capture is already there */ | ||
| 118 | assert(base[0].kind == Cgroup && base[0].siz == 0); | ||
| 119 | base[0].idx = 0; /* make it an anonymous group */ | ||
| 120 | for (i = 1; i <= n; i++) { /* add runtime captures */ | ||
| 121 | base[i].kind = Cruntime; | ||
| 122 | base[i].siz = 1; /* mark it as closed */ | ||
| 123 | base[i].idx = fd + i - 1; /* stack index of capture value */ | ||
| 124 | base[i].s = s; | ||
| 125 | } | ||
| 126 | base[i].kind = Cclose; /* close group */ | ||
| 127 | base[i].siz = 1; | ||
| 128 | base[i].s = s; | ||
| 129 | } | ||
| 130 | |||
| 131 | |||
| 132 | /* | ||
| 133 | ** Remove dynamic captures from the Lua stack (called in case of failure) | ||
| 134 | */ | ||
| 135 | static int removedyncap (lua_State *L, Capture *capture, | ||
| 136 | int level, int last) { | ||
| 137 | int id = finddyncap(capture + level, capture + last); /* index of 1st cap. */ | ||
| 138 | int top = lua_gettop(L); | ||
| 139 | if (id == 0) return 0; /* no dynamic captures? */ | ||
| 140 | lua_settop(L, id - 1); /* remove captures */ | ||
| 141 | return top - id + 1; /* number of values removed */ | ||
| 142 | } | ||
| 143 | |||
| 144 | |||
| 145 | /* | ||
| 146 | ** Opcode interpreter | ||
| 147 | */ | ||
| 148 | const char *match (lua_State *L, const char *o, const char *s, const char *e, | ||
| 149 | Instruction *op, Capture *capture, int ptop) { | ||
| 150 | Stack stackbase[INITBACK]; | ||
| 151 | Stack *stacklimit = stackbase + INITBACK; | ||
| 152 | Stack *stack = stackbase; /* point to first empty slot in stack */ | ||
| 153 | int capsize = INITCAPSIZE; | ||
| 154 | int captop = 0; /* point to first empty slot in captures */ | ||
| 155 | int ndyncap = 0; /* number of dynamic captures (in Lua stack) */ | ||
| 156 | const Instruction *p = op; /* current instruction */ | ||
| 157 | Labelset labelf; /* labeled failure */ | ||
| 158 | stack->p = &giveup; stack->s = s; stack->caplevel = 0; stack++; | ||
| 159 | lua_pushlightuserdata(L, stackbase); | ||
| 160 | for (;;) { | ||
| 161 | #if defined(LPEGDEBUG) | ||
| 162 | printf("s: |%s| stck:%d, dyncaps:%d, caps:%d ", | ||
| 163 | s, stack - getstackbase(L, ptop), ndyncap, captop); | ||
| 164 | printinst(op, p); | ||
| 165 | printcaplist(capture, capture + captop); | ||
| 166 | #endif | ||
| 167 | assert(stackidx(ptop) + ndyncap == lua_gettop(L) && ndyncap <= captop); | ||
| 168 | switch ((Opcode)p->i.code) { | ||
| 169 | case IEnd: { | ||
| 170 | assert(stack == getstackbase(L, ptop) + 1); | ||
| 171 | capture[captop].kind = Cclose; | ||
| 172 | capture[captop].s = NULL; | ||
| 173 | return s; | ||
| 174 | } | ||
| 175 | case IGiveup: { | ||
| 176 | assert(stack == getstackbase(L, ptop)); | ||
| 177 | return NULL; | ||
| 178 | } | ||
| 179 | case IRet: { | ||
| 180 | assert(stack > getstackbase(L, ptop) && (stack - 1)->s == NULL); | ||
| 181 | p = (--stack)->p; | ||
| 182 | continue; | ||
| 183 | } | ||
| 184 | case IAny: { | ||
| 185 | if (s < e) { p++; s++; } | ||
| 186 | else { | ||
| 187 | labelf = LFAIL; /* labeled failure */ | ||
| 188 | goto fail; | ||
| 189 | } | ||
| 190 | continue; | ||
| 191 | } | ||
| 192 | case ITestAny: { | ||
| 193 | if (s < e) p += 2; | ||
| 194 | else p += getoffset(p); | ||
| 195 | continue; | ||
| 196 | } | ||
| 197 | case IChar: { | ||
| 198 | if ((byte)*s == p->i.aux && s < e) { p++; s++; } | ||
| 199 | else { | ||
| 200 | labelf = LFAIL; /* labeled failure */ | ||
| 201 | goto fail; | ||
| 202 | } | ||
| 203 | continue; | ||
| 204 | } | ||
| 205 | case ITestChar: { | ||
| 206 | if ((byte)*s == p->i.aux && s < e) p += 2; | ||
| 207 | else p += getoffset(p); | ||
| 208 | continue; | ||
| 209 | } | ||
| 210 | case ISet: { | ||
| 211 | int c = (byte)*s; | ||
| 212 | if (testchar((p+1)->buff, c) && s < e) | ||
| 213 | { p += CHARSETINSTSIZE; s++; } | ||
| 214 | else { | ||
| 215 | labelf = LFAIL; /* labeled failure */ | ||
| 216 | goto fail; | ||
| 217 | } | ||
| 218 | continue; | ||
| 219 | } | ||
| 220 | case ITestSet: { | ||
| 221 | int c = (byte)*s; | ||
| 222 | if (testchar((p + 2)->buff, c) && s < e) | ||
| 223 | p += 1 + CHARSETINSTSIZE; | ||
| 224 | else p += getoffset(p); | ||
| 225 | continue; | ||
| 226 | } | ||
| 227 | case IBehind: { | ||
| 228 | int n = p->i.aux; | ||
| 229 | if (n > s - o) { | ||
| 230 | labelf = LFAIL; /* labeled failure */ | ||
| 231 | goto fail; | ||
| 232 | } | ||
| 233 | s -= n; p++; | ||
| 234 | continue; | ||
| 235 | } | ||
| 236 | case ISpan: { | ||
| 237 | for (; s < e; s++) { | ||
| 238 | int c = (byte)*s; | ||
| 239 | if (!testchar((p+1)->buff, c)) break; | ||
| 240 | } | ||
| 241 | p += CHARSETINSTSIZE; | ||
| 242 | continue; | ||
| 243 | } | ||
| 244 | case IJmp: { | ||
| 245 | p += getoffset(p); | ||
| 246 | continue; | ||
| 247 | } | ||
| 248 | case IChoice: { | ||
| 249 | if (stack == stacklimit) | ||
| 250 | stack = doublestack(L, &stacklimit, ptop); | ||
| 251 | stack->p = p + getoffset(p); | ||
| 252 | stack->s = s; | ||
| 253 | stack->ls = LFAIL; /* labeled failure */ | ||
| 254 | stack->caplevel = captop; | ||
| 255 | stack++; | ||
| 256 | p += 2; | ||
| 257 | continue; | ||
| 258 | } | ||
| 259 | case ILabChoice: { /* labeled failure */ | ||
| 260 | if (stack == stacklimit) | ||
| 261 | stack = doublestack(L, &stacklimit, ptop); | ||
| 262 | stack->p = p + getoffset(p); | ||
| 263 | stack->s = s; | ||
| 264 | stack->ls = (p + 2)->labels; | ||
| 265 | stack->caplevel = captop; | ||
| 266 | stack++; | ||
| 267 | p += 3; | ||
| 268 | continue; | ||
| 269 | } | ||
| 270 | |||
| 271 | case ICall: { | ||
| 272 | if (stack == stacklimit) | ||
| 273 | stack = doublestack(L, &stacklimit, ptop); | ||
| 274 | stack->s = NULL; | ||
| 275 | stack->p = p + 2; /* save return address */ | ||
| 276 | stack++; | ||
| 277 | p += getoffset(p); | ||
| 278 | continue; | ||
| 279 | } | ||
| 280 | case ICommit: { | ||
| 281 | assert(stack > getstackbase(L, ptop) && (stack - 1)->s != NULL); | ||
| 282 | stack--; | ||
| 283 | p += getoffset(p); | ||
| 284 | continue; | ||
| 285 | } | ||
| 286 | case IPartialCommit: { | ||
| 287 | assert(stack > getstackbase(L, ptop) && (stack - 1)->s != NULL); | ||
| 288 | (stack - 1)->s = s; | ||
| 289 | (stack - 1)->caplevel = captop; | ||
| 290 | p += getoffset(p); | ||
| 291 | continue; | ||
| 292 | } | ||
| 293 | case IBackCommit: { | ||
| 294 | assert(stack > getstackbase(L, ptop) && (stack - 1)->s != NULL); | ||
| 295 | s = (--stack)->s; | ||
| 296 | captop = stack->caplevel; | ||
| 297 | p += getoffset(p); | ||
| 298 | continue; | ||
| 299 | } | ||
| 300 | case IThrow: { /* labeled failure */ | ||
| 301 | labelf = (p+1)->labels; | ||
| 302 | goto fail; | ||
| 303 | } | ||
| 304 | case IFailTwice: | ||
| 305 | assert(stack > getstackbase(L, ptop)); | ||
| 306 | stack--; | ||
| 307 | /* go through */ | ||
| 308 | case IFail: | ||
| 309 | labelf = LFAIL; /* labeled failure */ | ||
| 310 | fail: { /* pattern failed: try to backtrack */ | ||
| 311 | do { /* remove pending calls */ | ||
| 312 | assert(stack > getstackbase(L, ptop)); | ||
| 313 | s = (--stack)->s; | ||
| 314 | /*printf("fail (s == NULL => %d), labelf=%d stack->ls=%d (stack-> == giveup %d)\n", | ||
| 315 | s == NULL, labelf, stack->ls, stack->p == &giveup);*/ | ||
| 316 | } while (s == NULL || (!(stack->ls & labelf) && stack->p != &giveup)); | ||
| 317 | if (ndyncap > 0) /* is there matchtime captures? */ | ||
| 318 | ndyncap -= removedyncap(L, capture, stack->caplevel, captop); | ||
| 319 | captop = stack->caplevel; | ||
| 320 | p = stack->p; | ||
| 321 | continue; | ||
| 322 | } | ||
| 323 | case ICloseRunTime: { | ||
| 324 | CapState cs; | ||
| 325 | int rem, res, n; | ||
| 326 | int fr = lua_gettop(L) + 1; /* stack index of first result */ | ||
| 327 | cs.s = o; cs.L = L; cs.ocap = capture; cs.ptop = ptop; | ||
| 328 | n = runtimecap(&cs, capture + captop, s, &rem); /* call function */ | ||
| 329 | captop -= n; /* remove nested captures */ | ||
| 330 | fr -= rem; /* 'rem' items were popped from Lua stack */ | ||
| 331 | res = resdyncaptures(L, fr, s - o, e - o); /* get result */ | ||
| 332 | if (res == -1) { /* fail? */ | ||
| 333 | labelf = LFAIL; /* labeled failure */ | ||
| 334 | goto fail; | ||
| 335 | } | ||
| 336 | s = o + res; /* else update current position */ | ||
| 337 | n = lua_gettop(L) - fr + 1; /* number of new captures */ | ||
| 338 | ndyncap += n - rem; /* update number of dynamic captures */ | ||
| 339 | if (n > 0) { /* any new capture? */ | ||
| 340 | if ((captop += n + 2) >= capsize) { | ||
| 341 | capture = doublecap(L, capture, captop, ptop); | ||
| 342 | capsize = 2 * captop; | ||
| 343 | } | ||
| 344 | /* add new captures to 'capture' list */ | ||
| 345 | adddyncaptures(s, capture + captop - n - 2, n, fr); | ||
| 346 | } | ||
| 347 | p++; | ||
| 348 | continue; | ||
| 349 | } | ||
| 350 | case ICloseCapture: { | ||
| 351 | const char *s1 = s; | ||
| 352 | assert(captop > 0); | ||
| 353 | /* if possible, turn capture into a full capture */ | ||
| 354 | if (capture[captop - 1].siz == 0 && | ||
| 355 | s1 - capture[captop - 1].s < UCHAR_MAX) { | ||
| 356 | capture[captop - 1].siz = s1 - capture[captop - 1].s + 1; | ||
| 357 | p++; | ||
| 358 | continue; | ||
| 359 | } | ||
| 360 | else { | ||
| 361 | capture[captop].siz = 1; /* mark entry as closed */ | ||
| 362 | capture[captop].s = s; | ||
| 363 | goto pushcapture; | ||
| 364 | } | ||
| 365 | } | ||
| 366 | case IOpenCapture: | ||
| 367 | capture[captop].siz = 0; /* mark entry as open */ | ||
| 368 | capture[captop].s = s; | ||
| 369 | goto pushcapture; | ||
| 370 | case IFullCapture: | ||
| 371 | capture[captop].siz = getoff(p) + 1; /* save capture size */ | ||
| 372 | capture[captop].s = s - getoff(p); | ||
| 373 | /* goto pushcapture; */ | ||
| 374 | pushcapture: { | ||
| 375 | capture[captop].idx = p->i.key; | ||
| 376 | capture[captop].kind = getkind(p); | ||
| 377 | if (++captop >= capsize) { | ||
| 378 | capture = doublecap(L, capture, captop, ptop); | ||
| 379 | capsize = 2 * captop; | ||
| 380 | } | ||
| 381 | p++; | ||
| 382 | continue; | ||
| 383 | } | ||
| 384 | default: assert(0); return NULL; | ||
| 385 | } | ||
| 386 | } | ||
| 387 | } | ||
| 388 | |||
| 389 | /* }====================================================== */ | ||
| 390 | |||
| 391 | |||
| @@ -0,0 +1,66 @@ | |||
| 1 | /* | ||
| 2 | ** $Id: lpvm.h,v 1.2 2013/04/03 20:37:18 roberto Exp $ | ||
| 3 | */ | ||
| 4 | |||
| 5 | #if !defined(lpvm_h) | ||
| 6 | #define lpvm_h | ||
| 7 | |||
| 8 | #include "lpcap.h" | ||
| 9 | |||
| 10 | |||
| 11 | /* Virtual Machine's instructions */ | ||
| 12 | typedef enum Opcode { | ||
| 13 | IAny, /* if no char, fail */ | ||
| 14 | IChar, /* if char != aux, fail */ | ||
| 15 | ISet, /* if char not in buff, fail */ | ||
| 16 | ITestAny, /* in no char, jump to 'offset' */ | ||
| 17 | ITestChar, /* if char != aux, jump to 'offset' */ | ||
| 18 | ITestSet, /* if char not in buff, jump to 'offset' */ | ||
| 19 | ISpan, /* read a span of chars in buff */ | ||
| 20 | IBehind, /* walk back 'aux' characters (fail if not possible) */ | ||
| 21 | IRet, /* return from a rule */ | ||
| 22 | IEnd, /* end of pattern */ | ||
| 23 | IChoice, /* stack a choice; next fail will jump to 'offset' */ | ||
| 24 | IJmp, /* jump to 'offset' */ | ||
| 25 | ICall, /* call rule at 'offset' */ | ||
| 26 | IOpenCall, /* call rule number 'key' (must be closed to a ICall) */ | ||
| 27 | ICommit, /* pop choice and jump to 'offset' */ | ||
| 28 | IPartialCommit, /* update top choice to current position and jump */ | ||
| 29 | IBackCommit, /* "fails" but jump to its own 'offset' */ | ||
| 30 | IFailTwice, /* pop one choice and then fail */ | ||
| 31 | IFail, /* go back to saved state on choice and jump to saved offset */ | ||
| 32 | IGiveup, /* internal use */ | ||
| 33 | IFullCapture, /* complete capture of last 'off' chars */ | ||
| 34 | IOpenCapture, /* start a capture */ | ||
| 35 | ICloseCapture, | ||
| 36 | ICloseRunTime, | ||
| 37 | IThrow, /* "fails" with a specific label labeled failure */ | ||
| 38 | ILabChoice /* labeled choice */ | ||
| 39 | } Opcode; | ||
| 40 | |||
| 41 | |||
| 42 | |||
| 43 | typedef union Instruction { | ||
| 44 | struct Inst { | ||
| 45 | byte code; | ||
| 46 | byte aux; | ||
| 47 | short key; | ||
| 48 | } i; | ||
| 49 | int offset; | ||
| 50 | Labelset labels; /* labeled failure */ | ||
| 51 | byte buff[1]; | ||
| 52 | } Instruction; | ||
| 53 | |||
| 54 | |||
| 55 | int getposition (lua_State *L, int t, int i); | ||
| 56 | void printpatt (Instruction *p, int n); | ||
| 57 | const char *match (lua_State *L, const char *o, const char *s, const char *e, | ||
| 58 | Instruction *op, Capture *capture, int ptop); | ||
| 59 | int verify (lua_State *L, Instruction *op, const Instruction *p, | ||
| 60 | Instruction *e, int postable, int rule); | ||
| 61 | void checkrule (lua_State *L, Instruction *op, int from, int to, | ||
| 62 | int postable, int rule); | ||
| 63 | |||
| 64 | |||
| 65 | #endif | ||
| 66 | |||
diff --git a/makefile b/makefile new file mode 100644 index 0000000..4f41062 --- /dev/null +++ b/makefile | |||
| @@ -0,0 +1,55 @@ | |||
| 1 | LIBNAME = lpeglabel | ||
| 2 | LUADIR = /usr/include/lua5.1/ | ||
| 3 | |||
| 4 | #COPT = -O2 | ||
| 5 | COPT = -DLPEG_DEBUG -g | ||
| 6 | |||
| 7 | CWARNS = -Wall -Wextra -pedantic \ | ||
| 8 | -Waggregate-return \ | ||
| 9 | -Wcast-align \ | ||
| 10 | -Wcast-qual \ | ||
| 11 | -Wdisabled-optimization \ | ||
| 12 | -Wpointer-arith \ | ||
| 13 | -Wshadow \ | ||
| 14 | -Wsign-compare \ | ||
| 15 | -Wundef \ | ||
| 16 | -Wwrite-strings \ | ||
| 17 | -Wbad-function-cast \ | ||
| 18 | -Wdeclaration-after-statement \ | ||
| 19 | -Wmissing-prototypes \ | ||
| 20 | -Wnested-externs \ | ||
| 21 | -Wstrict-prototypes \ | ||
| 22 | # -Wunreachable-code \ | ||
| 23 | |||
| 24 | |||
| 25 | CFLAGS = $(CWARNS) $(COPT) -ansi -I$(LUADIR) -fPIC | ||
| 26 | CC = gcc | ||
| 27 | |||
| 28 | FILES = lpvm.o lpcap.o lptree.o lpcode.o lpprint.o | ||
| 29 | |||
| 30 | # For Linux | ||
| 31 | linux: | ||
| 32 | make lpeglabel.so "DLLFLAGS = -shared -fPIC" | ||
| 33 | |||
| 34 | # For Mac OS | ||
| 35 | macosx: | ||
| 36 | make lpeglabel.so "DLLFLAGS = -bundle -undefined dynamic_lookup" | ||
| 37 | |||
| 38 | lpeglabel.so: $(FILES) | ||
| 39 | env $(CC) $(DLLFLAGS) $(FILES) -o lpeglabel.so | ||
| 40 | |||
| 41 | $(FILES): makefile | ||
| 42 | |||
| 43 | test: test.lua re.lua lpeglabel.so | ||
| 44 | ./test.lua | ||
| 45 | |||
| 46 | clean: | ||
| 47 | rm -f $(FILES) lpeglabel.so | ||
| 48 | |||
| 49 | |||
| 50 | lpcap.o: lpcap.c lpcap.h lptypes.h | ||
| 51 | lpcode.o: lpcode.c lptypes.h lpcode.h lptree.h lpvm.h lpcap.h | ||
| 52 | lpprint.o: lpprint.c lptypes.h lpprint.h lptree.h lpvm.h lpcap.h | ||
| 53 | lptree.o: lptree.c lptypes.h lpcap.h lpcode.h lptree.h lpvm.h lpprint.h | ||
| 54 | lpvm.o: lpvm.c lpcap.h lptypes.h lpvm.h lpprint.h lptree.h | ||
| 55 | |||
| @@ -0,0 +1,276 @@ | |||
| 1 | -- $Id: re.lua,v 1.44 2013/03/26 20:11:40 roberto Exp $ | ||
| 2 | |||
| 3 | -- imported functions and modules | ||
| 4 | local tonumber, type, print, error = tonumber, type, print, error | ||
| 5 | local setmetatable = setmetatable | ||
| 6 | local unpack = table.unpack | ||
| 7 | local m = require"lpeglabel" | ||
| 8 | |||
| 9 | -- 'm' will be used to parse expressions, and 'mm' will be used to | ||
| 10 | -- create expressions; that is, 're' runs on 'm', creating patterns | ||
| 11 | -- on 'mm' | ||
| 12 | local mm = m | ||
| 13 | |||
| 14 | -- pattern's metatable | ||
| 15 | local mt = getmetatable(mm.P(0)) | ||
| 16 | |||
| 17 | |||
| 18 | |||
| 19 | -- No more global accesses after this point | ||
| 20 | local version = _VERSION | ||
| 21 | if version == "Lua 5.2" then _ENV = nil end | ||
| 22 | |||
| 23 | |||
| 24 | local any = m.P(1) | ||
| 25 | |||
| 26 | |||
| 27 | -- Pre-defined names | ||
| 28 | local Predef = { nl = m.P"\n" } | ||
| 29 | |||
| 30 | |||
| 31 | local mem | ||
| 32 | local fmem | ||
| 33 | local gmem | ||
| 34 | |||
| 35 | |||
| 36 | local function updatelocale () | ||
| 37 | mm.locale(Predef) | ||
| 38 | Predef.a = Predef.alpha | ||
| 39 | Predef.c = Predef.cntrl | ||
| 40 | Predef.d = Predef.digit | ||
| 41 | Predef.g = Predef.graph | ||
| 42 | Predef.l = Predef.lower | ||
| 43 | Predef.p = Predef.punct | ||
| 44 | Predef.s = Predef.space | ||
| 45 | Predef.u = Predef.upper | ||
| 46 | Predef.w = Predef.alnum | ||
| 47 | Predef.x = Predef.xdigit | ||
| 48 | Predef.A = any - Predef.a | ||
| 49 | Predef.C = any - Predef.c | ||
| 50 | Predef.D = any - Predef.d | ||
| 51 | Predef.G = any - Predef.g | ||
| 52 | Predef.L = any - Predef.l | ||
| 53 | Predef.P = any - Predef.p | ||
| 54 | Predef.S = any - Predef.s | ||
| 55 | Predef.U = any - Predef.u | ||
| 56 | Predef.W = any - Predef.w | ||
| 57 | Predef.X = any - Predef.x | ||
| 58 | mem = {} -- restart memoization | ||
| 59 | fmem = {} | ||
| 60 | gmem = {} | ||
| 61 | local mt = {__mode = "v"} | ||
| 62 | setmetatable(mem, mt) | ||
| 63 | setmetatable(fmem, mt) | ||
| 64 | setmetatable(gmem, mt) | ||
| 65 | end | ||
| 66 | |||
| 67 | |||
| 68 | updatelocale() | ||
| 69 | |||
| 70 | |||
| 71 | |||
| 72 | local I = m.P(function (s,i) print(i, s:sub(1, i-1)); return i end) | ||
| 73 | |||
| 74 | |||
| 75 | local function getdef (id, defs) | ||
| 76 | local c = defs and defs[id] | ||
| 77 | if not c then error("undefined name: " .. id) end | ||
| 78 | return c | ||
| 79 | end | ||
| 80 | |||
| 81 | |||
| 82 | local function patt_error (s, i) | ||
| 83 | local msg = (#s < i + 20) and s:sub(i) | ||
| 84 | or s:sub(i,i+20) .. "..." | ||
| 85 | msg = ("pattern error near '%s'"):format(msg) | ||
| 86 | error(msg, 2) | ||
| 87 | end | ||
| 88 | |||
| 89 | local function mult (p, n) | ||
| 90 | local np = mm.P(true) | ||
| 91 | while n >= 1 do | ||
| 92 | if n%2 >= 1 then np = np * p end | ||
| 93 | p = p * p | ||
| 94 | n = n/2 | ||
| 95 | end | ||
| 96 | return np | ||
| 97 | end | ||
| 98 | |||
| 99 | local function equalcap (s, i, c) | ||
| 100 | if type(c) ~= "string" then return nil end | ||
| 101 | local e = #c + i | ||
| 102 | if s:sub(i, e - 1) == c then return e else return nil end | ||
| 103 | end | ||
| 104 | |||
| 105 | |||
| 106 | local S = (Predef.space + "--" * (any - Predef.nl)^0)^0 | ||
| 107 | |||
| 108 | local name = m.R("AZ", "az", "__") * m.R("AZ", "az", "__", "09")^0 | ||
| 109 | |||
| 110 | local arrow = S * "<-" | ||
| 111 | |||
| 112 | local seq_follow = m.P"/" + ")" + "}" + ":}" + "~}" + "|}" + (name * arrow) + -1 | ||
| 113 | |||
| 114 | name = m.C(name) | ||
| 115 | |||
| 116 | |||
| 117 | -- a defined name only have meaning in a given environment | ||
| 118 | local Def = name * m.Carg(1) | ||
| 119 | |||
| 120 | local num = m.C(m.R"09"^1) * S / tonumber | ||
| 121 | |||
| 122 | local String = "'" * m.C((any - "'")^0) * "'" + | ||
| 123 | '"' * m.C((any - '"')^0) * '"' | ||
| 124 | |||
| 125 | |||
| 126 | local defined = "%" * Def / function (c,Defs) | ||
| 127 | local cat = Defs and Defs[c] or Predef[c] | ||
| 128 | if not cat then error ("name '" .. c .. "' undefined") end | ||
| 129 | return cat | ||
| 130 | end | ||
| 131 | |||
| 132 | local Range = m.Cs(any * (m.P"-"/"") * (any - "]")) / mm.R | ||
| 133 | |||
| 134 | local item = defined + Range + m.C(any) | ||
| 135 | |||
| 136 | local Class = | ||
| 137 | "[" | ||
| 138 | * (m.C(m.P"^"^-1)) -- optional complement symbol | ||
| 139 | * m.Cf(item * (item - "]")^0, mt.__add) / | ||
| 140 | function (c, p) return c == "^" and any - p or p end | ||
| 141 | * "]" | ||
| 142 | |||
| 143 | local function adddef (t, k, exp) | ||
| 144 | if t[k] then | ||
| 145 | error("'"..k.."' already defined as a rule") | ||
| 146 | else | ||
| 147 | t[k] = exp | ||
| 148 | end | ||
| 149 | return t | ||
| 150 | end | ||
| 151 | |||
| 152 | local function firstdef (n, r) return adddef({n}, n, r) end | ||
| 153 | |||
| 154 | |||
| 155 | local function NT (n, b) | ||
| 156 | if not b then | ||
| 157 | error("rule '"..n.."' used outside a grammar") | ||
| 158 | else return mm.V(n) | ||
| 159 | end | ||
| 160 | end | ||
| 161 | |||
| 162 | local function labchoice (...) | ||
| 163 | local t = { ... } | ||
| 164 | local n = #t | ||
| 165 | local p = t[1] | ||
| 166 | local i = 2 | ||
| 167 | while i + 1 <= n do | ||
| 168 | p = mm.Lc(p, t[i+1], unpack(t[i])) | ||
| 169 | i = i + 2 | ||
| 170 | end | ||
| 171 | |||
| 172 | return p | ||
| 173 | end | ||
| 174 | |||
| 175 | |||
| 176 | local exp = m.P{ "Exp", | ||
| 177 | Exp = S * ( m.V"Grammar" | ||
| 178 | + (m.V"Seq") * ("/" * m.V"Labels" * S * m.V"Seq")^1 / labchoice | ||
| 179 | + m.Cf(m.V"Seq" * ("/" * S * m.V"Seq")^0, mt.__add) ); | ||
| 180 | Labels = m.Ct(m.P"{" * S * num * (S * "," * S * num)^0 * S * "}"); | ||
| 181 | Seq = m.Cf(m.Cc(m.P"") * m.V"Prefix"^0 , mt.__mul) | ||
| 182 | * (#seq_follow + patt_error); | ||
| 183 | Prefix = "&" * S * m.V"Prefix" / mt.__len | ||
| 184 | + "!" * S * m.V"Prefix" / mt.__unm | ||
| 185 | + m.V"Suffix"; | ||
| 186 | Suffix = m.Cf(m.V"Primary" * S * | ||
| 187 | ( ( m.P"+" * m.Cc(1, mt.__pow) | ||
| 188 | + m.P"*" * m.Cc(0, mt.__pow) | ||
| 189 | + m.P"?" * m.Cc(-1, mt.__pow) | ||
| 190 | + "^" * ( m.Cg(num * m.Cc(mult)) | ||
| 191 | + m.Cg(m.C(m.S"+-" * m.R"09"^1) * m.Cc(mt.__pow)) | ||
| 192 | ) | ||
| 193 | + "->" * S * ( m.Cg((String + num) * m.Cc(mt.__div)) | ||
| 194 | + m.P"{}" * m.Cc(nil, m.Ct) | ||
| 195 | + m.Cg(Def / getdef * m.Cc(mt.__div)) | ||
| 196 | ) | ||
| 197 | + "=>" * S * m.Cg(Def / getdef * m.Cc(m.Cmt)) | ||
| 198 | ) * S | ||
| 199 | )^0, function (a,b,f) return f(a,b) end ); | ||
| 200 | Primary = "(" * m.V"Exp" * ")" | ||
| 201 | + String / mm.P | ||
| 202 | + Class | ||
| 203 | + defined | ||
| 204 | + "%{" * S * num * (S * "," * S * num)^0 * S * "}" / mm.T | ||
| 205 | + "{:" * (name * ":" + m.Cc(nil)) * m.V"Exp" * ":}" / | ||
| 206 | function (n, p) return mm.Cg(p, n) end | ||
| 207 | + "=" * name / function (n) return mm.Cmt(mm.Cb(n), equalcap) end | ||
| 208 | + m.P"{}" / mm.Cp | ||
| 209 | + "{~" * m.V"Exp" * "~}" / mm.Cs | ||
| 210 | + "{|" * m.V"Exp" * "|}" / mm.Ct | ||
| 211 | + "{" * m.V"Exp" * "}" / mm.C | ||
| 212 | + m.P"." * m.Cc(any) | ||
| 213 | + (name * -arrow + "<" * name * ">") * m.Cb("G") / NT; | ||
| 214 | Definition = name * arrow * m.V"Exp"; | ||
| 215 | Grammar = m.Cg(m.Cc(true), "G") * | ||
| 216 | m.Cf(m.V"Definition" / firstdef * m.Cg(m.V"Definition")^0, | ||
| 217 | adddef) / mm.P | ||
| 218 | } | ||
| 219 | |||
| 220 | local pattern = S * m.Cg(m.Cc(false), "G") * exp / mm.P * (-any + patt_error) | ||
| 221 | |||
| 222 | |||
| 223 | local function compile (p, defs) | ||
| 224 | if mm.type(p) == "pattern" then return p end -- already compiled | ||
| 225 | local cp = pattern:match(p, 1, defs) | ||
| 226 | if not cp then error("incorrect pattern", 3) end | ||
| 227 | return cp | ||
| 228 | end | ||
| 229 | |||
| 230 | local function match (s, p, i) | ||
| 231 | local cp = mem[p] | ||
| 232 | if not cp then | ||
| 233 | cp = compile(p) | ||
| 234 | mem[p] = cp | ||
| 235 | end | ||
| 236 | return cp:match(s, i or 1) | ||
| 237 | end | ||
| 238 | |||
| 239 | local function find (s, p, i) | ||
| 240 | local cp = fmem[p] | ||
| 241 | if not cp then | ||
| 242 | cp = compile(p) / 0 | ||
| 243 | cp = mm.P{ mm.Cp() * cp * mm.Cp() + 1 * mm.V(1) } | ||
| 244 | fmem[p] = cp | ||
| 245 | end | ||
| 246 | local i, e = cp:match(s, i or 1) | ||
| 247 | if i then return i, e - 1 | ||
| 248 | else return i | ||
| 249 | end | ||
| 250 | end | ||
| 251 | |||
| 252 | local function gsub (s, p, rep) | ||
| 253 | local g = gmem[p] or {} -- ensure gmem[p] is not collected while here | ||
| 254 | gmem[p] = g | ||
| 255 | local cp = g[rep] | ||
| 256 | if not cp then | ||
| 257 | cp = compile(p) | ||
| 258 | cp = mm.Cs((cp / rep + 1)^0) | ||
| 259 | g[rep] = cp | ||
| 260 | end | ||
| 261 | return cp:match(s) | ||
| 262 | end | ||
| 263 | |||
| 264 | |||
| 265 | -- exported names | ||
| 266 | local re = { | ||
| 267 | compile = compile, | ||
| 268 | match = match, | ||
| 269 | find = find, | ||
| 270 | gsub = gsub, | ||
| 271 | updatelocale = updatelocale, | ||
| 272 | } | ||
| 273 | |||
| 274 | if version == "Lua 5.1" then _G.re = re end | ||
| 275 | |||
| 276 | return re | ||
diff --git a/test.lua b/test.lua new file mode 100755 index 0000000..d486c03 --- /dev/null +++ b/test.lua | |||
| @@ -0,0 +1,1386 @@ | |||
| 1 | #!/usr/bin/env lua5.1 | ||
| 2 | |||
| 3 | -- $Id: test.lua,v 1.101 2013/04/12 16:30:33 roberto Exp $ | ||
| 4 | |||
| 5 | -- require"strict" -- just to be pedantic | ||
| 6 | |||
| 7 | local m = require"lpeglabel" | ||
| 8 | |||
| 9 | |||
| 10 | -- for general use | ||
| 11 | local a, b, c, d, e, f, g, p, t | ||
| 12 | |||
| 13 | |||
| 14 | -- compatibility with Lua 5.2 | ||
| 15 | local unpack = rawget(table, "unpack") or unpack | ||
| 16 | local loadstring = rawget(_G, "loadstring") or load | ||
| 17 | |||
| 18 | |||
| 19 | -- most tests here do not need much stack space | ||
| 20 | m.setmaxstack(5) | ||
| 21 | |||
| 22 | local any = m.P(1) | ||
| 23 | local space = m.S" \t\n"^0 | ||
| 24 | |||
| 25 | local function checkeq (x, y, p) | ||
| 26 | if p then print(x,y) end | ||
| 27 | if type(x) ~= "table" then assert(x == y) | ||
| 28 | else | ||
| 29 | for k,v in pairs(x) do checkeq(v, y[k], p) end | ||
| 30 | for k,v in pairs(y) do checkeq(v, x[k], p) end | ||
| 31 | end | ||
| 32 | end | ||
| 33 | |||
| 34 | |||
| 35 | local mt = getmetatable(m.P(1)) | ||
| 36 | |||
| 37 | |||
| 38 | local allchar = {} | ||
| 39 | for i=0,255 do allchar[i + 1] = i end | ||
| 40 | allchar = string.char(unpack(allchar)) | ||
| 41 | assert(#allchar == 256) | ||
| 42 | |||
| 43 | local function cs2str (c) | ||
| 44 | return m.match(m.Cs((c + m.P(1)/"")^0), allchar) | ||
| 45 | end | ||
| 46 | |||
| 47 | local function eqcharset (c1, c2) | ||
| 48 | assert(cs2str(c1) == cs2str(c2)) | ||
| 49 | end | ||
| 50 | |||
| 51 | |||
| 52 | print"General tests for LPeg library" | ||
| 53 | |||
| 54 | assert(type(m.version()) == "string") | ||
| 55 | print("version " .. m.version()) | ||
| 56 | assert(m.type("alo") ~= "pattern") | ||
| 57 | assert(m.type(io.input) ~= "pattern") | ||
| 58 | assert(m.type(m.P"alo") == "pattern") | ||
| 59 | |||
| 60 | -- tests for some basic optimizations | ||
| 61 | assert(m.match(m.P(false) + "a", "a") == 2) | ||
| 62 | assert(m.match(m.P(true) + "a", "a") == 1) | ||
| 63 | assert(m.match("a" + m.P(false), "b") == nil) | ||
| 64 | assert(m.match("a" + m.P(true), "b") == 1) | ||
| 65 | |||
| 66 | assert(m.match(m.P(false) * "a", "a") == nil) | ||
| 67 | assert(m.match(m.P(true) * "a", "a") == 2) | ||
| 68 | assert(m.match("a" * m.P(false), "a") == nil) | ||
| 69 | assert(m.match("a" * m.P(true), "a") == 2) | ||
| 70 | |||
| 71 | assert(m.match(#m.P(false) * "a", "a") == nil) | ||
| 72 | assert(m.match(#m.P(true) * "a", "a") == 2) | ||
| 73 | assert(m.match("a" * #m.P(false), "a") == nil) | ||
| 74 | assert(m.match("a" * #m.P(true), "a") == 2) | ||
| 75 | |||
| 76 | |||
| 77 | -- tests for locale | ||
| 78 | do | ||
| 79 | assert(m.locale(m) == m) | ||
| 80 | local t = {} | ||
| 81 | assert(m.locale(t, m) == t) | ||
| 82 | local x = m.locale() | ||
| 83 | for n,v in pairs(x) do | ||
| 84 | assert(type(n) == "string") | ||
| 85 | eqcharset(v, m[n]) | ||
| 86 | end | ||
| 87 | end | ||
| 88 | |||
| 89 | |||
| 90 | assert(m.match(3, "aaaa")) | ||
| 91 | assert(m.match(4, "aaaa")) | ||
| 92 | assert(not m.match(5, "aaaa")) | ||
| 93 | assert(m.match(-3, "aa")) | ||
| 94 | assert(not m.match(-3, "aaa")) | ||
| 95 | assert(not m.match(-3, "aaaa")) | ||
| 96 | assert(not m.match(-4, "aaaa")) | ||
| 97 | assert(m.P(-5):match"aaaa") | ||
| 98 | |||
| 99 | assert(m.match("a", "alo") == 2) | ||
| 100 | assert(m.match("al", "alo") == 3) | ||
| 101 | assert(not m.match("alu", "alo")) | ||
| 102 | assert(m.match(true, "") == 1) | ||
| 103 | |||
| 104 | local digit = m.S"0123456789" | ||
| 105 | local upper = m.S"ABCDEFGHIJKLMNOPQRSTUVWXYZ" | ||
| 106 | local lower = m.S"abcdefghijklmnopqrstuvwxyz" | ||
| 107 | local letter = m.S"" + upper + lower | ||
| 108 | local alpha = letter + digit + m.R() | ||
| 109 | |||
| 110 | eqcharset(m.S"", m.P(false)) | ||
| 111 | eqcharset(upper, m.R("AZ")) | ||
| 112 | eqcharset(lower, m.R("az")) | ||
| 113 | eqcharset(upper + lower, m.R("AZ", "az")) | ||
| 114 | eqcharset(upper + lower, m.R("AZ", "cz", "aa", "bb", "90")) | ||
| 115 | eqcharset(digit, m.S"01234567" + "8" + "9") | ||
| 116 | eqcharset(upper, letter - lower) | ||
| 117 | eqcharset(m.S(""), m.R()) | ||
| 118 | assert(cs2str(m.S("")) == "") | ||
| 119 | |||
| 120 | eqcharset(m.S"\0", "\0") | ||
| 121 | eqcharset(m.S"\1\0\2", m.R"\0\2") | ||
| 122 | eqcharset(m.S"\1\0\2", m.R"\1\2" + "\0") | ||
| 123 | eqcharset(m.S"\1\0\2" - "\0", m.R"\1\2") | ||
| 124 | |||
| 125 | local word = alpha^1 * (1 - alpha)^0 | ||
| 126 | |||
| 127 | assert((word^0 * -1):match"alo alo") | ||
| 128 | assert(m.match(word^1 * -1, "alo alo")) | ||
| 129 | assert(m.match(word^2 * -1, "alo alo")) | ||
| 130 | assert(not m.match(word^3 * -1, "alo alo")) | ||
| 131 | |||
| 132 | assert(not m.match(word^-1 * -1, "alo alo")) | ||
| 133 | assert(m.match(word^-2 * -1, "alo alo")) | ||
| 134 | assert(m.match(word^-3 * -1, "alo alo")) | ||
| 135 | |||
| 136 | local eos = m.P(-1) | ||
| 137 | |||
| 138 | assert(m.match(digit^0 * letter * digit * eos, "1298a1")) | ||
| 139 | assert(not m.match(digit^0 * letter * eos, "1257a1")) | ||
| 140 | |||
| 141 | b = { | ||
| 142 | [1] = "(" * (((1 - m.S"()") + #m.P"(" * m.V(1))^0) * ")" | ||
| 143 | } | ||
| 144 | |||
| 145 | assert(m.match(b, "(al())()")) | ||
| 146 | assert(not m.match(b * eos, "(al())()")) | ||
| 147 | assert(m.match(b * eos, "((al())()(é))")) | ||
| 148 | assert(not m.match(b, "(al()()")) | ||
| 149 | |||
| 150 | assert(not m.match(letter^1 - "for", "foreach")) | ||
| 151 | assert(m.match(letter^1 - ("for" * eos), "foreach")) | ||
| 152 | assert(not m.match(letter^1 - ("for" * eos), "for")) | ||
| 153 | |||
| 154 | function basiclookfor (p) | ||
| 155 | return m.P { | ||
| 156 | [1] = p + (1 * m.V(1)) | ||
| 157 | } | ||
| 158 | end | ||
| 159 | |||
| 160 | function caplookfor (p) | ||
| 161 | return basiclookfor(p:C()) | ||
| 162 | end | ||
| 163 | |||
| 164 | assert(m.match(caplookfor(letter^1), " 4achou123...") == "achou") | ||
| 165 | a = {m.match(caplookfor(letter^1)^0, " two words, one more ")} | ||
| 166 | checkeq(a, {"two", "words", "one", "more"}) | ||
| 167 | |||
| 168 | assert(m.match( basiclookfor((#m.P(b) * 1) * m.Cp()), " ( (a)") == 7) | ||
| 169 | |||
| 170 | a = {m.match(m.C(digit^1 * m.Cc"d") + m.C(letter^1 * m.Cc"l"), "123")} | ||
| 171 | checkeq(a, {"123", "d"}) | ||
| 172 | |||
| 173 | a = {m.match(m.C(digit^1) * "d" * -1 + m.C(letter^1 * m.Cc"l"), "123d")} | ||
| 174 | checkeq(a, {"123"}) | ||
| 175 | |||
| 176 | a = {m.match(m.C(digit^1 * m.Cc"d") + m.C(letter^1 * m.Cc"l"), "abcd")} | ||
| 177 | checkeq(a, {"abcd", "l"}) | ||
| 178 | |||
| 179 | a = {m.match(m.Cc(10,20,30) * 'a' * m.Cp(), 'aaa')} | ||
| 180 | checkeq(a, {10,20,30,2}) | ||
| 181 | a = {m.match(m.Cp() * m.Cc(10,20,30) * 'a' * m.Cp(), 'aaa')} | ||
| 182 | checkeq(a, {1,10,20,30,2}) | ||
| 183 | a = m.match(m.Ct(m.Cp() * m.Cc(10,20,30) * 'a' * m.Cp()), 'aaa') | ||
| 184 | checkeq(a, {1,10,20,30,2}) | ||
| 185 | a = m.match(m.Ct(m.Cp() * m.Cc(7,8) * m.Cc(10,20,30) * 'a' * m.Cp()), 'aaa') | ||
| 186 | checkeq(a, {1,7,8,10,20,30,2}) | ||
| 187 | a = {m.match(m.Cc() * m.Cc() * m.Cc(1) * m.Cc(2,3,4) * m.Cc() * 'a', 'aaa')} | ||
| 188 | checkeq(a, {1,2,3,4}) | ||
| 189 | |||
| 190 | a = {m.match(m.Cp() * letter^1 * m.Cp(), "abcd")} | ||
| 191 | checkeq(a, {1, 5}) | ||
| 192 | |||
| 193 | |||
| 194 | t = {m.match({[1] = m.C(m.C(1) * m.V(1) + -1)}, "abc")} | ||
| 195 | checkeq(t, {"abc", "a", "bc", "b", "c", "c", ""}) | ||
| 196 | |||
| 197 | |||
| 198 | -- test for small capture boundary | ||
| 199 | for i = 250,260 do | ||
| 200 | assert(#m.match(m.C(i), string.rep('a', i)) == i) | ||
| 201 | assert(#m.match(m.C(m.C(i)), string.rep('a', i)) == i) | ||
| 202 | end | ||
| 203 | |||
| 204 | |||
| 205 | -- tests for any*n and any*-n | ||
| 206 | for n = 1, 550 do | ||
| 207 | local x_1 = string.rep('x', n - 1) | ||
| 208 | local x = x_1 .. 'a' | ||
| 209 | assert(not m.P(n):match(x_1)) | ||
| 210 | assert(m.P(n):match(x) == n + 1) | ||
| 211 | assert(n < 4 or m.match(m.P(n) + "xxx", x_1) == 4) | ||
| 212 | assert(m.C(n):match(x) == x) | ||
| 213 | assert(m.C(m.C(n)):match(x) == x) | ||
| 214 | assert(m.P(-n):match(x_1) == 1) | ||
| 215 | assert(not m.P(-n):match(x)) | ||
| 216 | assert(n < 13 or m.match(m.Cc(20) * ((n - 13) * m.P(10)) * 3, x) == 20) | ||
| 217 | local n3 = math.floor(n/3) | ||
| 218 | assert(m.match(n3 * m.Cp() * n3 * n3, x) == n3 + 1) | ||
| 219 | end | ||
| 220 | |||
| 221 | -- true values | ||
| 222 | assert(m.P(0):match("x") == 1) | ||
| 223 | assert(m.P(0):match("") == 1) | ||
| 224 | assert(m.C(0):match("x") == "") | ||
| 225 | |||
| 226 | assert(m.match(m.Cc(0) * m.P(10) + m.Cc(1) * "xuxu", "xuxu") == 1) | ||
| 227 | assert(m.match(m.Cc(0) * m.P(10) + m.Cc(1) * "xuxu", "xuxuxuxuxu") == 0) | ||
| 228 | assert(m.match(m.C(m.P(2)^1), "abcde") == "abcd") | ||
| 229 | p = m.Cc(0) * 1 + m.Cc(1) * 2 + m.Cc(2) * 3 + m.Cc(3) * 4 | ||
| 230 | |||
| 231 | |||
| 232 | -- test for alternation optimization | ||
| 233 | assert(m.match(m.P"a"^1 + "ab" + m.P"x"^0, "ab") == 2) | ||
| 234 | assert(m.match((m.P"a"^1 + "ab" + m.P"x"^0 * 1)^0, "ab") == 3) | ||
| 235 | assert(m.match(m.P"ab" + "cd" + "" + "cy" + "ak", "98") == 1) | ||
| 236 | assert(m.match(m.P"ab" + "cd" + "ax" + "cy", "ax") == 3) | ||
| 237 | assert(m.match("a" * m.P"b"^0 * "c" + "cd" + "ax" + "cy", "ax") == 3) | ||
| 238 | assert(m.match((m.P"ab" + "cd" + "ax" + "cy")^0, "ax") == 3) | ||
| 239 | assert(m.match(m.P(1) * "x" + m.S"" * "xu" + "ay", "ay") == 3) | ||
| 240 | assert(m.match(m.P"abc" + "cde" + "aka", "aka") == 4) | ||
| 241 | assert(m.match(m.S"abc" * "x" + "cde" + "aka", "ax") == 3) | ||
| 242 | assert(m.match(m.S"abc" * "x" + "cde" + "aka", "aka") == 4) | ||
| 243 | assert(m.match(m.S"abc" * "x" + "cde" + "aka", "cde") == 4) | ||
| 244 | assert(m.match(m.S"abc" * "x" + "ide" + m.S"ab" * "ka", "aka") == 4) | ||
| 245 | assert(m.match("ab" + m.S"abc" * m.P"y"^0 * "x" + "cde" + "aka", "ax") == 3) | ||
| 246 | assert(m.match("ab" + m.S"abc" * m.P"y"^0 * "x" + "cde" + "aka", "aka") == 4) | ||
| 247 | assert(m.match("ab" + m.S"abc" * m.P"y"^0 * "x" + "cde" + "aka", "cde") == 4) | ||
| 248 | assert(m.match("ab" + m.S"abc" * m.P"y"^0 * "x" + "ide" + m.S"ab" * "ka", "aka") == 4) | ||
| 249 | assert(m.match("ab" + m.S"abc" * m.P"y"^0 * "x" + "ide" + m.S"ab" * "ka", "ax") == 3) | ||
| 250 | assert(m.match(m.P(1) * "x" + "cde" + m.S"ab" * "ka", "aka") == 4) | ||
| 251 | assert(m.match(m.P(1) * "x" + "cde" + m.P(1) * "ka", "aka") == 4) | ||
| 252 | assert(m.match(m.P(1) * "x" + "cde" + m.P(1) * "ka", "cde") == 4) | ||
| 253 | assert(m.match(m.P"eb" + "cd" + m.P"e"^0 + "x", "ee") == 3) | ||
| 254 | assert(m.match(m.P"ab" + "cd" + m.P"e"^0 + "x", "abcd") == 3) | ||
| 255 | assert(m.match(m.P"ab" + "cd" + m.P"e"^0 + "x", "eeex") == 4) | ||
| 256 | assert(m.match(m.P"ab" + "cd" + m.P"e"^0 + "x", "cd") == 3) | ||
| 257 | assert(m.match(m.P"ab" + "cd" + m.P"e"^0 + "x", "x") == 1) | ||
| 258 | assert(m.match(m.P"ab" + "cd" + m.P"e"^0 + "x" + "", "zee") == 1) | ||
| 259 | assert(m.match(m.P"ab" + "cd" + m.P"e"^1 + "x", "abcd") == 3) | ||
| 260 | assert(m.match(m.P"ab" + "cd" + m.P"e"^1 + "x", "eeex") == 4) | ||
| 261 | assert(m.match(m.P"ab" + "cd" + m.P"e"^1 + "x", "cd") == 3) | ||
| 262 | assert(m.match(m.P"ab" + "cd" + m.P"e"^1 + "x", "x") == 2) | ||
| 263 | assert(m.match(m.P"ab" + "cd" + m.P"e"^1 + "x" + "", "zee") == 1) | ||
| 264 | assert(not m.match(("aa" * m.P"bc"^-1 + "aab") * "e", "aabe")) | ||
| 265 | |||
| 266 | assert(m.match("alo" * (m.P"\n" + -1), "alo") == 4) | ||
| 267 | |||
| 268 | |||
| 269 | -- bug in 0.12 (rc1) | ||
| 270 | assert(m.match((m.P"\128\187\191" + m.S"abc")^0, "\128\187\191") == 4) | ||
| 271 | |||
| 272 | assert(m.match(m.S"\0\128\255\127"^0, string.rep("\0\128\255\127", 10)) == | ||
| 273 | 4*10 + 1) | ||
| 274 | |||
| 275 | -- optimizations with optional parts | ||
| 276 | assert(m.match(("ab" * -m.P"c")^-1, "abc") == 1) | ||
| 277 | assert(m.match(("ab" * #m.P"c")^-1, "abd") == 1) | ||
| 278 | assert(m.match(("ab" * m.B"c")^-1, "ab") == 1) | ||
| 279 | assert(m.match(("ab" * m.P"cd"^0)^-1, "abcdcdc") == 7) | ||
| 280 | |||
| 281 | assert(m.match(m.P"ab"^-1 - "c", "abcd") == 3) | ||
| 282 | |||
| 283 | p = ('Aa' * ('Bb' * ('Cc' * m.P'Dd'^0)^0)^0)^-1 | ||
| 284 | assert(p:match("AaBbCcDdBbCcDdDdDdBb") == 21) | ||
| 285 | |||
| 286 | |||
| 287 | pi = "3.14159 26535 89793 23846 26433 83279 50288 41971 69399 37510" | ||
| 288 | assert(m.match(m.Cs((m.P"1" / "a" + m.P"5" / "b" + m.P"9" / "c" + 1)^0), pi) == | ||
| 289 | m.match(m.Cs((m.P(1) / {["1"] = "a", ["5"] = "b", ["9"] = "c"})^0), pi)) | ||
| 290 | print"+" | ||
| 291 | |||
| 292 | |||
| 293 | -- tests for capture optimizations | ||
| 294 | assert(m.match((m.P(3) + 4 * m.Cp()) * "a", "abca") == 5) | ||
| 295 | t = {m.match(((m.P"a" + m.Cp()) * m.P"x")^0, "axxaxx")} | ||
| 296 | checkeq(t, {3, 6}) | ||
| 297 | |||
| 298 | |||
| 299 | -- tests for numbered captures | ||
| 300 | p = m.C(1) | ||
| 301 | assert(m.match(m.C(m.C(p * m.C(2)) * m.C(3)) / 3, "abcdefgh") == "a") | ||
| 302 | assert(m.match(m.C(m.C(p * m.C(2)) * m.C(3)) / 1, "abcdefgh") == "abcdef") | ||
| 303 | assert(m.match(m.C(m.C(p * m.C(2)) * m.C(3)) / 4, "abcdefgh") == "bc") | ||
| 304 | assert(m.match(m.C(m.C(p * m.C(2)) * m.C(3)) / 0, "abcdefgh") == 7) | ||
| 305 | |||
| 306 | a, b, c = m.match(p * (m.C(p * m.C(2)) * m.C(3) / 4) * p, "abcdefgh") | ||
| 307 | assert(a == "a" and b == "efg" and c == "h") | ||
| 308 | |||
| 309 | -- test for table captures | ||
| 310 | t = m.match(m.Ct(letter^1), "alo") | ||
| 311 | checkeq(t, {}) | ||
| 312 | |||
| 313 | t, n = m.match(m.Ct(m.C(letter)^1) * m.Cc"t", "alo") | ||
| 314 | assert(n == "t" and table.concat(t) == "alo") | ||
| 315 | |||
| 316 | t = m.match(m.Ct(m.C(m.C(letter)^1)), "alo") | ||
| 317 | assert(table.concat(t, ";") == "alo;a;l;o") | ||
| 318 | |||
| 319 | t = m.match(m.Ct(m.C(m.C(letter)^1)), "alo") | ||
| 320 | assert(table.concat(t, ";") == "alo;a;l;o") | ||
| 321 | |||
| 322 | t = m.match(m.Ct(m.Ct((m.Cp() * letter * m.Cp())^1)), "alo") | ||
| 323 | assert(table.concat(t[1], ";") == "1;2;2;3;3;4") | ||
| 324 | |||
| 325 | t = m.match(m.Ct(m.C(m.C(1) * 1 * m.C(1))), "alo") | ||
| 326 | checkeq(t, {"alo", "a", "o"}) | ||
| 327 | |||
| 328 | |||
| 329 | -- tests for groups | ||
| 330 | p = m.Cg(1) -- no capture | ||
| 331 | assert(p:match('x') == 'x') | ||
| 332 | p = m.Cg(m.P(true)/function () end * 1) -- no value | ||
| 333 | assert(p:match('x') == 'x') | ||
| 334 | p = m.Cg(m.Cg(m.Cg(m.C(1)))) | ||
| 335 | assert(p:match('x') == 'x') | ||
| 336 | p = m.Cg(m.Cg(m.Cg(m.C(1))^0) * m.Cg(m.Cc(1) * m.Cc(2))) | ||
| 337 | t = {p:match'abc'} | ||
| 338 | checkeq(t, {'a', 'b', 'c', 1, 2}) | ||
| 339 | |||
| 340 | p = m.Ct(m.Cg(m.Cc(10), "hi") * m.C(1)^0 * m.Cg(m.Cc(20), "ho")) | ||
| 341 | t = p:match'' | ||
| 342 | checkeq(t, {hi = 10, ho = 20}) | ||
| 343 | t = p:match'abc' | ||
| 344 | checkeq(t, {hi = 10, ho = 20, 'a', 'b', 'c'}) | ||
| 345 | |||
| 346 | |||
| 347 | -- test for error messages | ||
| 348 | local function checkerr (msg, ...) | ||
| 349 | assert(m.match({ m.P(msg) + 1 * m.V(1) }, select(2, pcall(...)))) | ||
| 350 | end | ||
| 351 | |||
| 352 | checkerr("rule '1' may be left recursive", m.match, { m.V(1) * 'a' }, "a") | ||
| 353 | checkerr("rule '1' used outside a grammar", m.match, m.V(1), "") | ||
| 354 | checkerr("rule 'hiii' used outside a grammar", m.match, m.V('hiii'), "") | ||
| 355 | checkerr("rule 'hiii' undefined in given grammar", m.match, { m.V('hiii') }, "") | ||
| 356 | checkerr("undefined in given grammar", m.match, { m.V{} }, "") | ||
| 357 | |||
| 358 | checkerr("rule 'A' is not a pattern", m.P, { m.P(1), A = {} }) | ||
| 359 | checkerr("grammar has no initial rule", m.P, { [print] = {} }) | ||
| 360 | |||
| 361 | -- grammar with a long call chain before left recursion | ||
| 362 | p = {'a', | ||
| 363 | a = m.V'b' * m.V'c' * m.V'd' * m.V'a', | ||
| 364 | b = m.V'c', | ||
| 365 | c = m.V'd', | ||
| 366 | d = m.V'e', | ||
| 367 | e = m.V'f', | ||
| 368 | f = m.V'g', | ||
| 369 | g = m.P'' | ||
| 370 | } | ||
| 371 | checkerr("rule 'a' may be left recursive", m.match, p, "a") | ||
| 372 | |||
| 373 | |||
| 374 | -- tests for non-pattern as arguments to pattern functions | ||
| 375 | |||
| 376 | p = { ('a' * m.V(1))^-1 } * m.P'b' * { 'a' * m.V(2); m.V(1)^-1 } | ||
| 377 | assert(m.match(p, "aaabaac") == 7) | ||
| 378 | |||
| 379 | p = m.P'abc' * 2 * -5 * true * 'de' -- mix of numbers and strings and booleans | ||
| 380 | |||
| 381 | assert(p:match("abc01de") == 8) | ||
| 382 | assert(p:match("abc01de3456") == nil) | ||
| 383 | |||
| 384 | p = 'abc' * (2 * (-5 * (true * m.P'de'))) | ||
| 385 | |||
| 386 | assert(p:match("abc01de") == 8) | ||
| 387 | assert(p:match("abc01de3456") == nil) | ||
| 388 | |||
| 389 | p = { m.V(2), m.P"abc" } * | ||
| 390 | (m.P{ "xx", xx = m.P"xx" } + { "x", x = m.P"a" * m.V"x" + "" }) | ||
| 391 | assert(p:match("abcaaaxx") == 7) | ||
| 392 | assert(p:match("abcxx") == 6) | ||
| 393 | |||
| 394 | |||
| 395 | -- a large table capture | ||
| 396 | t = m.match(m.Ct(m.C('a')^0), string.rep("a", 10000)) | ||
| 397 | assert(#t == 10000 and t[1] == 'a' and t[#t] == 'a') | ||
| 398 | |||
| 399 | print('+') | ||
| 400 | |||
| 401 | |||
| 402 | -- bug in 0.10 (rechecking a grammar, after tail-call optimization) | ||
| 403 | m.P{ m.P { (m.P(3) + "xuxu")^0 * m.V"xuxu", xuxu = m.P(1) } } | ||
| 404 | |||
| 405 | local V = m.V | ||
| 406 | |||
| 407 | local Space = m.S(" \n\t")^0 | ||
| 408 | local Number = m.C(m.R("09")^1) * Space | ||
| 409 | local FactorOp = m.C(m.S("+-")) * Space | ||
| 410 | local TermOp = m.C(m.S("*/")) * Space | ||
| 411 | local Open = "(" * Space | ||
| 412 | local Close = ")" * Space | ||
| 413 | |||
| 414 | |||
| 415 | local function f_factor (v1, op, v2, d) | ||
| 416 | assert(d == nil) | ||
| 417 | if op == "+" then return v1 + v2 | ||
| 418 | else return v1 - v2 | ||
| 419 | end | ||
| 420 | end | ||
| 421 | |||
| 422 | |||
| 423 | local function f_term (v1, op, v2, d) | ||
| 424 | assert(d == nil) | ||
| 425 | if op == "*" then return v1 * v2 | ||
| 426 | else return v1 / v2 | ||
| 427 | end | ||
| 428 | end | ||
| 429 | |||
| 430 | G = m.P{ "Exp", | ||
| 431 | Exp = m.Cf(V"Factor" * m.Cg(FactorOp * V"Factor")^0, f_factor); | ||
| 432 | Factor = m.Cf(V"Term" * m.Cg(TermOp * V"Term")^0, f_term); | ||
| 433 | Term = Number / tonumber + Open * V"Exp" * Close; | ||
| 434 | } | ||
| 435 | |||
| 436 | G = Space * G * -1 | ||
| 437 | |||
| 438 | for _, s in ipairs{" 3 + 5*9 / (1+1) ", "3+4/2", "3+3-3- 9*2+3*9/1- 8"} do | ||
| 439 | assert(m.match(G, s) == loadstring("return "..s)()) | ||
| 440 | end | ||
| 441 | |||
| 442 | |||
| 443 | -- test for grammars (errors deep in calling non-terminals) | ||
| 444 | g = m.P{ | ||
| 445 | [1] = m.V(2) + "a", | ||
| 446 | [2] = "a" * m.V(3) * "x", | ||
| 447 | [3] = "b" * m.V(3) + "c" | ||
| 448 | } | ||
| 449 | |||
| 450 | assert(m.match(g, "abbbcx") == 7) | ||
| 451 | assert(m.match(g, "abbbbx") == 2) | ||
| 452 | |||
| 453 | |||
| 454 | -- tests for \0 | ||
| 455 | assert(m.match(m.R("\0\1")^1, "\0\1\0") == 4) | ||
| 456 | assert(m.match(m.S("\0\1ab")^1, "\0\1\0a") == 5) | ||
| 457 | assert(m.match(m.P(1)^3, "\0\1\0a") == 5) | ||
| 458 | assert(not m.match(-4, "\0\1\0a")) | ||
| 459 | assert(m.match("\0\1\0a", "\0\1\0a") == 5) | ||
| 460 | assert(m.match("\0\0\0", "\0\0\0") == 4) | ||
| 461 | assert(not m.match("\0\0\0", "\0\0")) | ||
| 462 | |||
| 463 | |||
| 464 | -- tests for predicates | ||
| 465 | assert(not m.match(-m.P("a") * 2, "alo")) | ||
| 466 | assert(m.match(- -m.P("a") * 2, "alo") == 3) | ||
| 467 | assert(m.match(#m.P("a") * 2, "alo") == 3) | ||
| 468 | assert(m.match(##m.P("a") * 2, "alo") == 3) | ||
| 469 | assert(not m.match(##m.P("c") * 2, "alo")) | ||
| 470 | assert(m.match(m.Cs((##m.P("a") * 1 + m.P(1)/".")^0), "aloal") == "a..a.") | ||
| 471 | assert(m.match(m.Cs((#((#m.P"a")/"") * 1 + m.P(1)/".")^0), "aloal") == "a..a.") | ||
| 472 | assert(m.match(m.Cs((- -m.P("a") * 1 + m.P(1)/".")^0), "aloal") == "a..a.") | ||
| 473 | assert(m.match(m.Cs((-((-m.P"a")/"") * 1 + m.P(1)/".")^0), "aloal") == "a..a.") | ||
| 474 | |||
| 475 | p = -m.P'a' * m.Cc(1) + -m.P'b' * m.Cc(2) + -m.P'c' * m.Cc(3) | ||
| 476 | assert(p:match('a') == 2 and p:match('') == 1 and p:match('b') == 1) | ||
| 477 | |||
| 478 | p = -m.P'a' * m.Cc(10) + #m.P'a' * m.Cc(20) | ||
| 479 | assert(p:match('a') == 20 and p:match('') == 10 and p:match('b') == 10) | ||
| 480 | |||
| 481 | |||
| 482 | |||
| 483 | -- look-behind predicate | ||
| 484 | assert(not m.match(m.B'a', 'a')) | ||
| 485 | assert(m.match(1 * m.B'a', 'a') == 2) | ||
| 486 | assert(not m.match(m.B(1), 'a')) | ||
| 487 | assert(m.match(1 * m.B(1), 'a') == 2) | ||
| 488 | assert(m.match(-m.B(1), 'a') == 1) | ||
| 489 | assert(m.match(m.B(250), string.rep('a', 250)) == nil) | ||
| 490 | assert(m.match(250 * m.B(250), string.rep('a', 250)) == 251) | ||
| 491 | assert(not pcall(m.B, 260)) | ||
| 492 | |||
| 493 | B = #letter * -m.B(letter) + -letter * m.B(letter) | ||
| 494 | x = m.Ct({ (B * m.Cp())^-1 * (1 * m.V(1) + m.P(true)) }) | ||
| 495 | checkeq(m.match(x, 'ar cal c'), {1,3,4,7,9,10}) | ||
| 496 | checkeq(m.match(x, ' ar cal '), {2,4,5,8}) | ||
| 497 | checkeq(m.match(x, ' '), {}) | ||
| 498 | checkeq(m.match(x, 'aloalo'), {1,7}) | ||
| 499 | |||
| 500 | assert(m.match(B, "a") == 1) | ||
| 501 | assert(m.match(1 * B, "a") == 2) | ||
| 502 | assert(not m.B(1 - letter):match("")) | ||
| 503 | assert((-m.B(letter)):match("") == 1) | ||
| 504 | |||
| 505 | assert((4 * m.B(letter, 4)):match("aaaaaaaa") == 5) | ||
| 506 | assert(not (4 * m.B(#letter * 5)):match("aaaaaaaa")) | ||
| 507 | assert((4 * -m.B(#letter * 5)):match("aaaaaaaa") == 5) | ||
| 508 | |||
| 509 | -- look-behind with grammars | ||
| 510 | assert(m.match('a' * m.B{'x', x = m.P(3)}, 'aaa') == nil) | ||
| 511 | assert(m.match('aa' * m.B{'x', x = m.P('aaa')}, 'aaaa') == nil) | ||
| 512 | assert(m.match('aaa' * m.B{'x', x = m.P('aaa')}, 'aaaaa') == 4) | ||
| 513 | |||
| 514 | |||
| 515 | |||
| 516 | -- bug in 0.9 | ||
| 517 | assert(m.match(('a' * #m.P'b'), "ab") == 2) | ||
| 518 | assert(not m.match(('a' * #m.P'b'), "a")) | ||
| 519 | |||
| 520 | assert(not m.match(#m.S'567', "")) | ||
| 521 | assert(m.match(#m.S'567' * 1, "6") == 2) | ||
| 522 | |||
| 523 | |||
| 524 | -- tests for Tail Calls | ||
| 525 | |||
| 526 | --labeled failure | ||
| 527 | p = m.P{ 'a' * m.V(1) + '' } | ||
| 528 | assert(p:match(string.rep('a', 1000)) == 1001) | ||
| 529 | |||
| 530 | -- create a grammar for a simple DFA for even number of 0s and 1s | ||
| 531 | -- | ||
| 532 | -- ->1 <---0---> 2 | ||
| 533 | -- ^ ^ | ||
| 534 | -- | | | ||
| 535 | -- 1 1 | ||
| 536 | -- | | | ||
| 537 | -- V V | ||
| 538 | -- 3 <---0---> 4 | ||
| 539 | -- | ||
| 540 | -- this grammar should keep no backtracking information | ||
| 541 | |||
| 542 | p = m.P{ | ||
| 543 | [1] = '0' * m.V(2) + '1' * m.V(3) + -1, | ||
| 544 | [2] = '0' * m.V(1) + '1' * m.V(4), | ||
| 545 | [3] = '0' * m.V(4) + '1' * m.V(1), | ||
| 546 | [4] = '0' * m.V(3) + '1' * m.V(2), | ||
| 547 | } | ||
| 548 | |||
| 549 | -- labeled failure | ||
| 550 | assert(p:match(string.rep("00", 10000))) | ||
| 551 | assert(p:match(string.rep("01", 10000))) | ||
| 552 | assert(p:match(string.rep("011", 10000))) | ||
| 553 | assert(not p:match(string.rep("011", 10000) .. "1")) | ||
| 554 | assert(not p:match(string.rep("011", 10001))) | ||
| 555 | |||
| 556 | |||
| 557 | -- this grammar does need backtracking info. | ||
| 558 | local lim = 10000 | ||
| 559 | p = m.P{ '0' * m.V(1) + '0' } | ||
| 560 | assert(not pcall(m.match, p, string.rep("0", lim))) | ||
| 561 | m.setmaxstack(2*lim) | ||
| 562 | assert(not pcall(m.match, p, string.rep("0", lim))) | ||
| 563 | m.setmaxstack(2*lim + 4) | ||
| 564 | assert(pcall(m.match, p, string.rep("0", lim))) | ||
| 565 | |||
| 566 | -- this repetition should not need stack space (only the call does) | ||
| 567 | p = m.P{ ('a' * m.V(1))^0 * 'b' + 'c' } | ||
| 568 | m.setmaxstack(200) | ||
| 569 | -- labeled failure | ||
| 570 | assert(p:match(string.rep('a', 180) .. 'c' .. string.rep('b', 180)) == 362) | ||
| 571 | |||
| 572 | m.setmaxstack(5) -- restore original limit | ||
| 573 | |||
| 574 | -- tests for optional start position | ||
| 575 | assert(m.match("a", "abc", 1)) | ||
| 576 | assert(m.match("b", "abc", 2)) | ||
| 577 | assert(m.match("c", "abc", 3)) | ||
| 578 | assert(not m.match(1, "abc", 4)) | ||
| 579 | assert(m.match("a", "abc", -3)) | ||
| 580 | assert(m.match("b", "abc", -2)) | ||
| 581 | assert(m.match("c", "abc", -1)) | ||
| 582 | assert(m.match("abc", "abc", -4)) -- truncate to position 1 | ||
| 583 | |||
| 584 | assert(m.match("", "abc", 10)) -- empty string is everywhere! | ||
| 585 | assert(m.match("", "", 10)) | ||
| 586 | assert(not m.match(1, "", 1)) | ||
| 587 | assert(not m.match(1, "", -1)) | ||
| 588 | assert(not m.match(1, "", 0)) | ||
| 589 | |||
| 590 | print("+") | ||
| 591 | |||
| 592 | |||
| 593 | -- tests for argument captures | ||
| 594 | assert(not pcall(m.Carg, 0)) | ||
| 595 | assert(not pcall(m.Carg, -1)) | ||
| 596 | assert(not pcall(m.Carg, 2^18)) | ||
| 597 | assert(not pcall(m.match, m.Carg(1), 'a', 1)) | ||
| 598 | assert(m.match(m.Carg(1), 'a', 1, print) == print) | ||
| 599 | x = {m.match(m.Carg(1) * m.Carg(2), '', 1, 10, 20)} | ||
| 600 | checkeq(x, {10, 20}) | ||
| 601 | |||
| 602 | assert(m.match(m.Cmt(m.Cg(m.Carg(3), "a") * | ||
| 603 | m.Cmt(m.Cb("a"), function (s,i,x) | ||
| 604 | assert(s == "a" and i == 1); | ||
| 605 | return i, x+1 | ||
| 606 | end) * | ||
| 607 | m.Carg(2), function (s,i,a,b,c) | ||
| 608 | assert(s == "a" and i == 1 and c == nil); | ||
| 609 | return i, 2*a + 3*b | ||
| 610 | end) * "a", | ||
| 611 | "a", 1, false, 100, 1000) == 2*1001 + 3*100) | ||
| 612 | |||
| 613 | |||
| 614 | -- tests for Lua functions | ||
| 615 | |||
| 616 | t = {} | ||
| 617 | s = "" | ||
| 618 | p = m.P(function (s1, i) assert(s == s1); t[#t + 1] = i; return nil end) * false | ||
| 619 | s = "hi, this is a test" | ||
| 620 | assert(m.match(((p - m.P(-1)) + 2)^0, s) == string.len(s) + 1) | ||
| 621 | assert(#t == string.len(s)/2 and t[1] == 1 and t[2] == 3) | ||
| 622 | |||
| 623 | assert(not m.match(p, s)) | ||
| 624 | |||
| 625 | p = mt.__add(function (s, i) return i end, function (s, i) return nil end) | ||
| 626 | assert(m.match(p, "alo")) | ||
| 627 | |||
| 628 | p = mt.__mul(function (s, i) return i end, function (s, i) return nil end) | ||
| 629 | assert(not m.match(p, "alo")) | ||
| 630 | |||
| 631 | |||
| 632 | t = {} | ||
| 633 | p = function (s1, i) assert(s == s1); t[#t + 1] = i; return i end | ||
| 634 | s = "hi, this is a test" | ||
| 635 | assert(m.match((m.P(1) * p)^0, s) == string.len(s) + 1) | ||
| 636 | assert(#t == string.len(s) and t[1] == 2 and t[2] == 3) | ||
| 637 | |||
| 638 | t = {} | ||
| 639 | p = m.P(function (s1, i) assert(s == s1); t[#t + 1] = i; | ||
| 640 | return i <= s1:len() and i end) * 1 | ||
| 641 | s = "hi, this is a test" | ||
| 642 | assert(m.match(p^0, s) == string.len(s) + 1) | ||
| 643 | assert(#t == string.len(s) + 1 and t[1] == 1 and t[2] == 2) | ||
| 644 | |||
| 645 | p = function (s1, i) return m.match(m.P"a"^1, s1, i) end | ||
| 646 | assert(m.match(p, "aaaa") == 5) | ||
| 647 | assert(m.match(p, "abaa") == 2) | ||
| 648 | assert(not m.match(p, "baaa")) | ||
| 649 | |||
| 650 | assert(not pcall(m.match, function () return 2^20 end, s)) | ||
| 651 | assert(not pcall(m.match, function () return 0 end, s)) | ||
| 652 | assert(not pcall(m.match, function (s, i) return i - 1 end, s)) | ||
| 653 | assert(not pcall(m.match, m.P(1)^0 * function (_, i) return i - 1 end, s)) | ||
| 654 | assert(m.match(m.P(1)^0 * function (_, i) return i end * -1, s)) | ||
| 655 | assert(not pcall(m.match, m.P(1)^0 * function (_, i) return i + 1 end, s)) | ||
| 656 | assert(m.match(m.P(function (s, i) return s:len() + 1 end) * -1, s)) | ||
| 657 | assert(not pcall(m.match, m.P(function (s, i) return s:len() + 2 end) * -1, s)) | ||
| 658 | assert(not m.match(m.P(function (s, i) return s:len() end) * -1, s)) | ||
| 659 | assert(m.match(m.P(1)^0 * function (_, i) return true end, s) == | ||
| 660 | string.len(s) + 1) | ||
| 661 | for i = 1, string.len(s) + 1 do | ||
| 662 | assert(m.match(function (_, _) return i end, s) == i) | ||
| 663 | end | ||
| 664 | |||
| 665 | p = (m.P(function (s, i) return i%2 == 0 and i end) * 1 | ||
| 666 | + m.P(function (s, i) return i%2 ~= 0 and i + 2 <= s:len() and i end) * 3)^0 | ||
| 667 | * -1 | ||
| 668 | assert(p:match(string.rep('a', 14000))) | ||
| 669 | |||
| 670 | -- tests for Function Replacements | ||
| 671 | f = function (a, ...) if a ~= "x" then return {a, ...} end end | ||
| 672 | |||
| 673 | t = m.match(m.C(1)^0/f, "abc") | ||
| 674 | checkeq(t, {"a", "b", "c"}) | ||
| 675 | |||
| 676 | t = m.match(m.C(1)^0/f/f, "abc") | ||
| 677 | checkeq(t, {{"a", "b", "c"}}) | ||
| 678 | |||
| 679 | t = m.match(m.P(1)^0/f/f, "abc") -- no capture | ||
| 680 | checkeq(t, {{"abc"}}) | ||
| 681 | |||
| 682 | t = m.match((m.P(1)^0/f * m.Cp())/f, "abc") | ||
| 683 | checkeq(t, {{"abc"}, 4}) | ||
| 684 | |||
| 685 | t = m.match((m.C(1)^0/f * m.Cp())/f, "abc") | ||
| 686 | checkeq(t, {{"a", "b", "c"}, 4}) | ||
| 687 | |||
| 688 | t = m.match((m.C(1)^0/f * m.Cp())/f, "xbc") | ||
| 689 | checkeq(t, {4}) | ||
| 690 | |||
| 691 | t = m.match(m.C(m.C(1)^0)/f, "abc") | ||
| 692 | checkeq(t, {"abc", "a", "b", "c"}) | ||
| 693 | |||
| 694 | g = function (...) return 1, ... end | ||
| 695 | t = {m.match(m.C(1)^0/g/g, "abc")} | ||
| 696 | checkeq(t, {1, 1, "a", "b", "c"}) | ||
| 697 | |||
| 698 | t = {m.match(m.Cc(nil,nil,4) * m.Cc(nil,3) * m.Cc(nil, nil) / g / g, "")} | ||
| 699 | t1 = {1,1,nil,nil,4,nil,3,nil,nil} | ||
| 700 | for i=1,10 do assert(t[i] == t1[i]) end | ||
| 701 | |||
| 702 | t = {m.match((m.C(1) / function (x) return x, x.."x" end)^0, "abc")} | ||
| 703 | checkeq(t, {"a", "ax", "b", "bx", "c", "cx"}) | ||
| 704 | |||
| 705 | t = m.match(m.Ct((m.C(1) / function (x,y) return y, x end * m.Cc(1))^0), "abc") | ||
| 706 | checkeq(t, {nil, "a", 1, nil, "b", 1, nil, "c", 1}) | ||
| 707 | |||
| 708 | -- tests for Query Replacements | ||
| 709 | |||
| 710 | assert(m.match(m.C(m.C(1)^0)/{abc = 10}, "abc") == 10) | ||
| 711 | assert(m.match(m.C(1)^0/{a = 10}, "abc") == 10) | ||
| 712 | assert(m.match(m.S("ba")^0/{ab = 40}, "abc") == 40) | ||
| 713 | t = m.match(m.Ct((m.S("ba")/{a = 40})^0), "abc") | ||
| 714 | checkeq(t, {40}) | ||
| 715 | |||
| 716 | assert(m.match(m.Cs((m.C(1)/{a=".", d=".."})^0), "abcdde") == ".bc....e") | ||
| 717 | assert(m.match(m.Cs((m.C(1)/{f="."})^0), "abcdde") == "abcdde") | ||
| 718 | assert(m.match(m.Cs((m.C(1)/{d="."})^0), "abcdde") == "abc..e") | ||
| 719 | assert(m.match(m.Cs((m.C(1)/{e="."})^0), "abcdde") == "abcdd.") | ||
| 720 | assert(m.match(m.Cs((m.C(1)/{e=".", f="+"})^0), "eefef") == "..+.+") | ||
| 721 | assert(m.match(m.Cs((m.C(1))^0), "abcdde") == "abcdde") | ||
| 722 | assert(m.match(m.Cs(m.C(m.C(1)^0)), "abcdde") == "abcdde") | ||
| 723 | assert(m.match(1 * m.Cs(m.P(1)^0), "abcdde") == "bcdde") | ||
| 724 | assert(m.match(m.Cs((m.C('0')/'x' + 1)^0), "abcdde") == "abcdde") | ||
| 725 | assert(m.match(m.Cs((m.C('0')/'x' + 1)^0), "0ab0b0") == "xabxbx") | ||
| 726 | assert(m.match(m.Cs((m.C('0')/'x' + m.P(1)/{b=3})^0), "b0a0b") == "3xax3") | ||
| 727 | assert(m.match(m.P(1)/'%0%0'/{aa = -3} * 'x', 'ax') == -3) | ||
| 728 | assert(m.match(m.C(1)/'%0%1'/{aa = 'z'}/{z = -3} * 'x', 'ax') == -3) | ||
| 729 | |||
| 730 | assert(m.match(m.Cs(m.Cc(0) * (m.P(1)/"")), "4321") == "0") | ||
| 731 | |||
| 732 | assert(m.match(m.Cs((m.P(1) / "%0")^0), "abcd") == "abcd") | ||
| 733 | assert(m.match(m.Cs((m.P(1) / "%0.%0")^0), "abcd") == "a.ab.bc.cd.d") | ||
| 734 | assert(m.match(m.Cs((m.P("a") / "%0.%0" + 1)^0), "abcad") == "a.abca.ad") | ||
| 735 | assert(m.match(m.C("a") / "%1%%%0", "a") == "a%a") | ||
| 736 | assert(m.match(m.Cs((m.P(1) / ".xx")^0), "abcd") == ".xx.xx.xx.xx") | ||
| 737 | assert(m.match(m.Cp() * m.P(3) * m.Cp()/"%2%1%1 - %0 ", "abcde") == | ||
| 738 | "411 - abc ") | ||
| 739 | |||
| 740 | assert(pcall(m.match, m.P(1)/"%0", "abc")) | ||
| 741 | assert(not pcall(m.match, m.P(1)/"%1", "abc")) -- out of range | ||
| 742 | assert(not pcall(m.match, m.P(1)/"%9", "abc")) -- out of range | ||
| 743 | |||
| 744 | p = m.C(1) | ||
| 745 | p = p * p; p = p * p; p = p * p * m.C(1) / "%9 - %1" | ||
| 746 | assert(p:match("1234567890") == "9 - 1") | ||
| 747 | |||
| 748 | assert(m.match(m.Cc(print), "") == print) | ||
| 749 | |||
| 750 | -- too many captures (just ignore extra ones) | ||
| 751 | p = m.C(1)^0 / "%2-%9-%0-%9" | ||
| 752 | assert(p:match"01234567890123456789" == "1-8-01234567890123456789-8") | ||
| 753 | s = string.rep("12345678901234567890", 20) | ||
| 754 | assert(m.match(m.C(1)^0 / "%9-%1-%0-%3", s) == "9-1-" .. s .. "-3") | ||
| 755 | |||
| 756 | -- string captures with non-string subcaptures | ||
| 757 | p = m.Cc('alo') * m.C(1) / "%1 - %2 - %1" | ||
| 758 | assert(p:match'x' == 'alo - x - alo') | ||
| 759 | |||
| 760 | assert(not pcall(m.match, m.Cc(true) / "%1", "a")) | ||
| 761 | |||
| 762 | -- long strings for string capture | ||
| 763 | l = 10000 | ||
| 764 | s = string.rep('a', l) .. string.rep('b', l) .. string.rep('c', l) | ||
| 765 | |||
| 766 | p = (m.C(m.P'a'^1) * m.C(m.P'b'^1) * m.C(m.P'c'^1)) / '%3%2%1' | ||
| 767 | |||
| 768 | assert(p:match(s) == string.rep('c', l) .. | ||
| 769 | string.rep('b', l) .. | ||
| 770 | string.rep('a', l)) | ||
| 771 | |||
| 772 | print"+" | ||
| 773 | |||
| 774 | -- accumulator capture | ||
| 775 | function f (x) return x + 1 end | ||
| 776 | assert(m.match(m.Cf(m.Cc(0) * m.C(1)^0, f), "alo alo") == 7) | ||
| 777 | |||
| 778 | t = {m.match(m.Cf(m.Cc(1,2,3), error), "")} | ||
| 779 | checkeq(t, {1}) | ||
| 780 | p = m.Cf(m.Ct(true) * m.Cg(m.C(m.R"az"^1) * "=" * m.C(m.R"az"^1) * ";")^0, | ||
| 781 | rawset) | ||
| 782 | t = p:match("a=b;c=du;xux=yuy;") | ||
| 783 | checkeq(t, {a="b", c="du", xux="yuy"}) | ||
| 784 | |||
| 785 | |||
| 786 | -- errors in accumulator capture | ||
| 787 | |||
| 788 | -- very long match (forces fold to be a pair open-close) producing with | ||
| 789 | -- no initial capture | ||
| 790 | assert(not pcall(m.match, m.Cf(m.P(500), print), string.rep('a', 600))) | ||
| 791 | |||
| 792 | -- nested capture produces no initial value | ||
| 793 | assert(not pcall(m.match, m.Cf(m.P(1) / {}, print), "alo")) | ||
| 794 | |||
| 795 | |||
| 796 | -- tests for loop checker | ||
| 797 | |||
| 798 | local function haveloop (p) | ||
| 799 | assert(not pcall(function (p) return p^0 end, m.P(p))) | ||
| 800 | end | ||
| 801 | |||
| 802 | haveloop(m.P("x")^-4) | ||
| 803 | assert(m.match(((m.P(0) + 1) * m.S"al")^0, "alo") == 3) | ||
| 804 | assert(m.match((("x" + #m.P(1))^-4 * m.S"al")^0, "alo") == 3) | ||
| 805 | haveloop("") | ||
| 806 | haveloop(m.P("x")^0) | ||
| 807 | haveloop(m.P("x")^-1) | ||
| 808 | haveloop(m.P("x") + 1 + 2 + m.P("a")^-1) | ||
| 809 | haveloop(-m.P("ab")) | ||
| 810 | haveloop(- -m.P("ab")) | ||
| 811 | haveloop(# #(m.P("ab") + "xy")) | ||
| 812 | haveloop(- #m.P("ab")^0) | ||
| 813 | haveloop(# -m.P("ab")^1) | ||
| 814 | haveloop(#m.V(3)) | ||
| 815 | haveloop(m.V(3) + m.V(1) + m.P('a')^-1) | ||
| 816 | haveloop({[1] = m.V(2) * m.V(3), [2] = m.V(3), [3] = m.P(0)}) | ||
| 817 | assert(m.match(m.P{[1] = m.V(2) * m.V(3), [2] = m.V(3), [3] = m.P(1)}^0, "abc") | ||
| 818 | == 3) | ||
| 819 | assert(m.match(m.P""^-3, "a") == 1) | ||
| 820 | |||
| 821 | local function find (p, s) | ||
| 822 | return m.match(basiclookfor(p), s) | ||
| 823 | end | ||
| 824 | |||
| 825 | |||
| 826 | local function badgrammar (g, expected) | ||
| 827 | local stat, msg = pcall(m.P, g) | ||
| 828 | assert(not stat) | ||
| 829 | if expected then assert(find(expected, msg)) end | ||
| 830 | end | ||
| 831 | |||
| 832 | badgrammar({[1] = m.V(1)}, "rule '1'") | ||
| 833 | badgrammar({[1] = m.V(2)}, "rule '2'") -- invalid non-terminal | ||
| 834 | badgrammar({[1] = m.V"x"}, "rule 'x'") -- invalid non-terminal | ||
| 835 | badgrammar({[1] = m.V{}}, "rule '(a table)'") -- invalid non-terminal | ||
| 836 | badgrammar({[1] = #m.P("a") * m.V(1)}, "rule '1'") -- left-recursive | ||
| 837 | badgrammar({[1] = -m.P("a") * m.V(1)}, "rule '1'") -- left-recursive | ||
| 838 | badgrammar({[1] = -1 * m.V(1)}, "rule '1'") -- left-recursive | ||
| 839 | badgrammar({[1] = -1 + m.V(1)}, "rule '1'") -- left-recursive | ||
| 840 | badgrammar({[1] = 1 * m.V(2), [2] = m.V(2)}, "rule '2'") -- left-recursive | ||
| 841 | badgrammar({[1] = 1 * m.V(2)^0, [2] = m.P(0)}, "rule '1'") -- inf. loop | ||
| 842 | badgrammar({ m.V(2), m.V(3)^0, m.P"" }, "rule '2'") -- inf. loop | ||
| 843 | badgrammar({ m.V(2) * m.V(3)^0, m.V(3)^0, m.P"" }, "rule '1'") -- inf. loop | ||
| 844 | badgrammar({"x", x = #(m.V(1) * 'a') }, "rule '1'") -- inf. loop | ||
| 845 | badgrammar({ -(m.V(1) * 'a') }, "rule '1'") -- inf. loop | ||
| 846 | badgrammar({"x", x = m.P'a'^-1 * m.V"x"}, "rule 'x'") -- left recursive | ||
| 847 | badgrammar({"x", x = m.P'a' * m.V"y"^1, y = #m.P(1)}, "rule 'x'") | ||
| 848 | |||
| 849 | assert(m.match({'a' * -m.V(1)}, "aaa") == 2) | ||
| 850 | assert(m.match({'a' * -m.V(1)}, "aaaa") == nil) | ||
| 851 | |||
| 852 | |||
| 853 | -- good x bad grammars | ||
| 854 | m.P{ ('a' * m.V(1))^-1 } | ||
| 855 | m.P{ -('a' * m.V(1)) } | ||
| 856 | m.P{ ('abc' * m.V(1))^-1 } | ||
| 857 | m.P{ -('abc' * m.V(1)) } | ||
| 858 | badgrammar{ #m.P('abc') * m.V(1) } | ||
| 859 | badgrammar{ -('a' + m.V(1)) } | ||
| 860 | m.P{ #('a' * m.V(1)) } | ||
| 861 | badgrammar{ #('a' + m.V(1)) } | ||
| 862 | m.P{ m.B{ m.P'abc' } * 'a' * m.V(1) } | ||
| 863 | badgrammar{ m.B{ m.P'abc' } * m.V(1) } | ||
| 864 | badgrammar{ ('a' + m.P'bcd')^-1 * m.V(1) } | ||
| 865 | |||
| 866 | |||
| 867 | -- simple tests for maximum sizes: | ||
| 868 | local p = m.P"a" | ||
| 869 | for i=1,14 do p = p * p end | ||
| 870 | |||
| 871 | p = {} | ||
| 872 | for i=1,100 do p[i] = m.P"a" end | ||
| 873 | p = m.P(p) | ||
| 874 | |||
| 875 | |||
| 876 | -- strange values for rule labels | ||
| 877 | |||
| 878 | p = m.P{ "print", | ||
| 879 | print = m.V(print), | ||
| 880 | [print] = m.V(_G), | ||
| 881 | [_G] = m.P"a", | ||
| 882 | } | ||
| 883 | |||
| 884 | assert(p:match("a")) | ||
| 885 | |||
| 886 | -- initial rule | ||
| 887 | g = {} | ||
| 888 | for i = 1, 10 do g["i"..i] = "a" * m.V("i"..i+1) end | ||
| 889 | g.i11 = m.P"" | ||
| 890 | for i = 1, 10 do | ||
| 891 | g[1] = "i"..i | ||
| 892 | local p = m.P(g) | ||
| 893 | assert(p:match("aaaaaaaaaaa") == 11 - i + 1) | ||
| 894 | end | ||
| 895 | |||
| 896 | print"+" | ||
| 897 | |||
| 898 | |||
| 899 | -- tests for back references | ||
| 900 | assert(not pcall(m.match, m.Cb('x'), '')) | ||
| 901 | assert(not pcall(m.match, m.Cg(1, 'a') * m.Cb('b'), 'a')) | ||
| 902 | |||
| 903 | p = m.Cg(m.C(1) * m.C(1), "k") * m.Ct(m.Cb("k")) | ||
| 904 | t = p:match("ab") | ||
| 905 | checkeq(t, {"a", "b"}) | ||
| 906 | |||
| 907 | |||
| 908 | t = {} | ||
| 909 | function foo (p) t[#t + 1] = p; return p .. "x" end | ||
| 910 | |||
| 911 | p = m.Cg(m.C(2) / foo, "x") * m.Cb"x" * | ||
| 912 | m.Cg(m.Cb('x') / foo, "x") * m.Cb"x" * | ||
| 913 | m.Cg(m.Cb('x') / foo, "x") * m.Cb"x" * | ||
| 914 | m.Cg(m.Cb('x') / foo, "x") * m.Cb"x" | ||
| 915 | x = {p:match'ab'} | ||
| 916 | checkeq(x, {'abx', 'abxx', 'abxxx', 'abxxxx'}) | ||
| 917 | checkeq(t, {'ab', | ||
| 918 | 'ab', 'abx', | ||
| 919 | 'ab', 'abx', 'abxx', | ||
| 920 | 'ab', 'abx', 'abxx', 'abxxx'}) | ||
| 921 | |||
| 922 | |||
| 923 | |||
| 924 | -- tests for match-time captures | ||
| 925 | |||
| 926 | p = m.P'a' * (function (s, i) return (s:sub(i, i) == 'b') and i + 1 end) | ||
| 927 | + 'acd' | ||
| 928 | |||
| 929 | assert(p:match('abc') == 3) | ||
| 930 | assert(p:match('acd') == 4) | ||
| 931 | |||
| 932 | local function id (s, i, ...) | ||
| 933 | return true, ... | ||
| 934 | end | ||
| 935 | |||
| 936 | assert(m.Cmt(m.Cs((m.Cmt(m.S'abc' / { a = 'x', c = 'y' }, id) + | ||
| 937 | m.R'09'^1 / string.char + | ||
| 938 | m.P(1))^0), id):match"acb98+68c" == "xyb\98+\68y") | ||
| 939 | |||
| 940 | p = m.P{'S', | ||
| 941 | S = m.V'atom' * space | ||
| 942 | + m.Cmt(m.Ct("(" * space * (m.Cmt(m.V'S'^1, id) + m.P(true)) * ")" * space), id), | ||
| 943 | atom = m.Cmt(m.C(m.R("AZ", "az", "09")^1), id) | ||
| 944 | } | ||
| 945 | x = p:match"(a g () ((b) c) (d (e)))" | ||
| 946 | checkeq(x, {'a', 'g', {}, {{'b'}, 'c'}, {'d', {'e'}}}); | ||
| 947 | |||
| 948 | x = {(m.Cmt(1, id)^0):match(string.rep('a', 500))} | ||
| 949 | assert(#x == 500) | ||
| 950 | |||
| 951 | local function id(s, i, x) | ||
| 952 | if x == 'a' then return i, 1, 3, 7 | ||
| 953 | else return nil, 2, 4, 6, 8 | ||
| 954 | end | ||
| 955 | end | ||
| 956 | |||
| 957 | p = ((m.P(id) * 1 + m.Cmt(2, id) * 1 + m.Cmt(1, id) * 1))^0 | ||
| 958 | assert(table.concat{p:match('abababab')} == string.rep('137', 4)) | ||
| 959 | |||
| 960 | local function ref (s, i, x) | ||
| 961 | return m.match(x, s, i - x:len()) | ||
| 962 | end | ||
| 963 | |||
| 964 | assert(m.Cmt(m.P(1)^0, ref):match('alo') == 4) | ||
| 965 | assert((m.P(1) * m.Cmt(m.P(1)^0, ref)):match('alo') == 4) | ||
| 966 | assert(not (m.P(1) * m.Cmt(m.C(1)^0, ref)):match('alo')) | ||
| 967 | |||
| 968 | ref = function (s,i,x) return i == tonumber(x) and i, 'xuxu' end | ||
| 969 | |||
| 970 | assert(m.Cmt(1, ref):match'2') | ||
| 971 | assert(not m.Cmt(1, ref):match'1') | ||
| 972 | assert(m.Cmt(m.P(1)^0, ref):match'03') | ||
| 973 | |||
| 974 | function ref (s, i, a, b) | ||
| 975 | if a == b then return i, a:upper() end | ||
| 976 | end | ||
| 977 | |||
| 978 | p = m.Cmt(m.C(m.R"az"^1) * "-" * m.C(m.R"az"^1), ref) | ||
| 979 | p = (any - p)^0 * p * any^0 * -1 | ||
| 980 | |||
| 981 | assert(p:match'abbbc-bc ddaa' == 'BC') | ||
| 982 | |||
| 983 | do -- match-time captures cannot be optimized away | ||
| 984 | local touch = 0 | ||
| 985 | f = m.P(function () touch = touch + 1; return true end) | ||
| 986 | |||
| 987 | local function check(n) n = n or 1; assert(touch == n); touch = 0 end | ||
| 988 | |||
| 989 | assert(m.match(f * false + 'b', 'a') == nil); check() | ||
| 990 | assert(m.match(f * false + 'b', '') == nil); check() | ||
| 991 | assert(m.match( (f * 'a')^0 * 'b', 'b') == 2); check() | ||
| 992 | assert(m.match( (f * 'a')^0 * 'b', '') == nil); check() | ||
| 993 | assert(m.match( (f * 'a')^-1 * 'b', 'b') == 2); check() | ||
| 994 | assert(m.match( (f * 'a')^-1 * 'b', '') == nil); check() | ||
| 995 | assert(m.match( ('b' + f * 'a')^-1 * 'b', '') == nil); check() | ||
| 996 | assert(m.match( (m.P'b'^-1 * f * 'a')^-1 * 'b', '') == nil); check() | ||
| 997 | assert(m.match( (-m.P(1) * m.P'b'^-1 * f * 'a')^-1 * 'b', '') == nil); | ||
| 998 | check() | ||
| 999 | assert(m.match( (f * 'a' + 'b')^-1 * 'b', '') == nil); check() | ||
| 1000 | assert(m.match(f * 'a' + f * 'b', 'b') == 2); check(2) | ||
| 1001 | assert(m.match(f * 'a' + f * 'b', 'a') == 2); check(1) | ||
| 1002 | assert(m.match(-f * 'a' + 'b', 'b') == 2); check(1) | ||
| 1003 | assert(m.match(-f * 'a' + 'b', '') == nil); check(1) | ||
| 1004 | end | ||
| 1005 | |||
| 1006 | c = '[' * m.Cg(m.P'='^0, "init") * '[' * | ||
| 1007 | { m.Cmt(']' * m.C(m.P'='^0) * ']' * m.Cb("init"), function (_, _, s1, s2) | ||
| 1008 | return s1 == s2 end) | ||
| 1009 | + 1 * m.V(1) } / 0 | ||
| 1010 | |||
| 1011 | assert(c:match'[==[]]====]]]]==]===[]' == 18) | ||
| 1012 | assert(c:match'[[]=]====]=]]]==]===[]' == 14) | ||
| 1013 | assert(not c:match'[[]=]====]=]=]==]===[]') | ||
| 1014 | |||
| 1015 | |||
| 1016 | -- old bug: optimization of concat with fail removed match-time capture | ||
| 1017 | p = m.Cmt(0, function (s) p = s end) * m.P(false) | ||
| 1018 | assert(not p:match('alo')) | ||
| 1019 | assert(p == 'alo') | ||
| 1020 | |||
| 1021 | |||
| 1022 | -- ensure that failed match-time captures are not kept on Lua stack | ||
| 1023 | do | ||
| 1024 | local t = {__mode = "kv"}; setmetatable(t,t) | ||
| 1025 | local c = 0 | ||
| 1026 | |||
| 1027 | local function foo (s,i) | ||
| 1028 | collectgarbage(); | ||
| 1029 | assert(next(t) == "__mode" and next(t, "__mode") == nil) | ||
| 1030 | local x = {} | ||
| 1031 | t[x] = true | ||
| 1032 | c = c + 1 | ||
| 1033 | return i, x | ||
| 1034 | end | ||
| 1035 | |||
| 1036 | local p = m.P{ m.Cmt(0, foo) * m.P(false) + m.P(1) * m.V(1) + m.P"" } | ||
| 1037 | p:match(string.rep('1', 10)) | ||
| 1038 | assert(c == 11) | ||
| 1039 | end | ||
| 1040 | |||
| 1041 | p = (m.P(function () return true, "a" end) * 'a' | ||
| 1042 | + m.P(function (s, i) return i, "aa", 20 end) * 'b' | ||
| 1043 | + m.P(function (s,i) if i <= #s then return i, "aaa" end end) * 1)^0 | ||
| 1044 | |||
| 1045 | t = {p:match('abacc')} | ||
| 1046 | checkeq(t, {'a', 'aa', 20, 'a', 'aaa', 'aaa'}) | ||
| 1047 | |||
| 1048 | |||
| 1049 | ------------------------------------------------------------------- | ||
| 1050 | -- Tests for 're' module | ||
| 1051 | ------------------------------------------------------------------- | ||
| 1052 | |||
| 1053 | local re = require "re" | ||
| 1054 | |||
| 1055 | local match, compile = re.match, re.compile | ||
| 1056 | |||
| 1057 | assert(match("a", ".") == 2) | ||
| 1058 | assert(match("a", "''") == 1) | ||
| 1059 | assert(match("", " ! . ") == 1) | ||
| 1060 | assert(not match("a", " ! . ")) | ||
| 1061 | assert(match("abcde", " ( . . ) * ") == 5) | ||
| 1062 | assert(match("abbcde", " [a-c] +") == 5) | ||
| 1063 | assert(match("0abbc1de", "'0' [a-c]+ '1'") == 7) | ||
| 1064 | assert(match("0zz1dda", "'0' [^a-c]+ 'a'") == 8) | ||
| 1065 | assert(match("abbc--", " [a-c] + +") == 5) | ||
| 1066 | assert(match("abbc--", " [ac-] +") == 2) | ||
| 1067 | assert(match("abbc--", " [-acb] + ") == 7) | ||
| 1068 | assert(not match("abbcde", " [b-z] + ")) | ||
| 1069 | assert(match("abb\"de", '"abb"["]"de"') == 7) | ||
| 1070 | assert(match("abceeef", "'ac' ? 'ab' * 'c' { 'e' * } / 'abceeef' ") == "eee") | ||
| 1071 | assert(match("abceeef", "'ac'? 'ab'* 'c' { 'f'+ } / 'abceeef' ") == 8) | ||
| 1072 | local t = {match("abceefe", "( ( & 'e' {} ) ? . ) * ")} | ||
| 1073 | checkeq(t, {4, 5, 7}) | ||
| 1074 | local t = {match("abceefe", "((&&'e' {})? .)*")} | ||
| 1075 | checkeq(t, {4, 5, 7}) | ||
| 1076 | local t = {match("abceefe", "( ( ! ! 'e' {} ) ? . ) *")} | ||
| 1077 | checkeq(t, {4, 5, 7}) | ||
| 1078 | local t = {match("abceefe", "(( & ! & ! 'e' {})? .)*")} | ||
| 1079 | checkeq(t, {4, 5, 7}) | ||
| 1080 | |||
| 1081 | assert(match("cccx" , "'ab'? ('ccc' / ('cde' / 'cd'*)? / 'ccc') 'x'+") == 5) | ||
| 1082 | assert(match("cdx" , "'ab'? ('ccc' / ('cde' / 'cd'*)? / 'ccc') 'x'+") == 4) | ||
| 1083 | assert(match("abcdcdx" , "'ab'? ('ccc' / ('cde' / 'cd'*)? / 'ccc') 'x'+") == 8) | ||
| 1084 | |||
| 1085 | assert(match("abc", "a <- (. a)?") == 4) | ||
| 1086 | b = "balanced <- '(' ([^()] / balanced)* ')'" | ||
| 1087 | assert(match("(abc)", b)) | ||
| 1088 | assert(match("(a(b)((c) (d)))", b)) | ||
| 1089 | assert(not match("(a(b ((c) (d)))", b)) | ||
| 1090 | |||
| 1091 | b = compile[[ balanced <- "(" ([^()] / balanced)* ")" ]] | ||
| 1092 | assert(b == m.P(b)) | ||
| 1093 | assert(b:match"((((a))(b)))") | ||
| 1094 | |||
| 1095 | local g = [[ | ||
| 1096 | S <- "0" B / "1" A / "" -- balanced strings | ||
| 1097 | A <- "0" S / "1" A A -- one more 0 | ||
| 1098 | B <- "1" S / "0" B B -- one more 1 | ||
| 1099 | ]] | ||
| 1100 | assert(match("00011011", g) == 9) | ||
| 1101 | |||
| 1102 | local g = [[ | ||
| 1103 | S <- ("0" B / "1" A)* | ||
| 1104 | A <- "0" / "1" A A | ||
| 1105 | B <- "1" / "0" B B | ||
| 1106 | ]] | ||
| 1107 | assert(match("00011011", g) == 9) | ||
| 1108 | assert(match("000110110", g) == 9) | ||
| 1109 | assert(match("011110110", g) == 3) | ||
| 1110 | assert(match("000110010", g) == 1) | ||
| 1111 | |||
| 1112 | s = "aaaaaaaaaaaaaaaaaaaaaaaa" | ||
| 1113 | assert(match(s, "'a'^3") == 4) | ||
| 1114 | assert(match(s, "'a'^0") == 1) | ||
| 1115 | assert(match(s, "'a'^+3") == s:len() + 1) | ||
| 1116 | assert(not match(s, "'a'^+30")) | ||
| 1117 | assert(match(s, "'a'^-30") == s:len() + 1) | ||
| 1118 | assert(match(s, "'a'^-5") == 6) | ||
| 1119 | for i = 1, s:len() do | ||
| 1120 | assert(match(s, string.format("'a'^+%d", i)) >= i + 1) | ||
| 1121 | assert(match(s, string.format("'a'^-%d", i)) <= i + 1) | ||
| 1122 | assert(match(s, string.format("'a'^%d", i)) == i + 1) | ||
| 1123 | end | ||
| 1124 | assert(match("01234567890123456789", "[0-9]^3+") == 19) | ||
| 1125 | |||
| 1126 | |||
| 1127 | assert(match("01234567890123456789", "({....}{...}) -> '%2%1'") == "4560123") | ||
| 1128 | t = match("0123456789", "{| {.}* |}") | ||
| 1129 | checkeq(t, {"0", "1", "2", "3", "4", "5", "6", "7", "8", "9"}) | ||
| 1130 | assert(match("012345", "{| (..) -> '%0%0' |}")[1] == "0101") | ||
| 1131 | |||
| 1132 | assert(match("abcdef", "( {.} {.} {.} {.} {.} ) -> 3") == "c") | ||
| 1133 | assert(match("abcdef", "( {:x: . :} {.} {.} {.} {.} ) -> 3") == "d") | ||
| 1134 | assert(match("abcdef", "( {:x: . :} {.} {.} {.} {.} ) -> 0") == 6) | ||
| 1135 | |||
| 1136 | assert(not match("abcdef", "{:x: ({.} {.} {.}) -> 2 :} =x")) | ||
| 1137 | assert(match("abcbef", "{:x: ({.} {.} {.}) -> 2 :} =x")) | ||
| 1138 | |||
| 1139 | eqcharset(compile"[]]", "]") | ||
| 1140 | eqcharset(compile"[][]", m.S"[]") | ||
| 1141 | eqcharset(compile"[]-]", m.S"-]") | ||
| 1142 | eqcharset(compile"[-]", m.S"-") | ||
| 1143 | eqcharset(compile"[az-]", m.S"a-z") | ||
| 1144 | eqcharset(compile"[-az]", m.S"a-z") | ||
| 1145 | eqcharset(compile"[a-z]", m.R"az") | ||
| 1146 | eqcharset(compile"[]['\"]", m.S[[]['"]]) | ||
| 1147 | |||
| 1148 | eqcharset(compile"[^]]", any - "]") | ||
| 1149 | eqcharset(compile"[^][]", any - m.S"[]") | ||
| 1150 | eqcharset(compile"[^]-]", any - m.S"-]") | ||
| 1151 | eqcharset(compile"[^]-]", any - m.S"-]") | ||
| 1152 | eqcharset(compile"[^-]", any - m.S"-") | ||
| 1153 | eqcharset(compile"[^az-]", any - m.S"a-z") | ||
| 1154 | eqcharset(compile"[^-az]", any - m.S"a-z") | ||
| 1155 | eqcharset(compile"[^a-z]", any - m.R"az") | ||
| 1156 | eqcharset(compile"[^]['\"]", any - m.S[[]['"]]) | ||
| 1157 | |||
| 1158 | -- tests for comments in 're' | ||
| 1159 | e = compile[[ | ||
| 1160 | A <- _B -- \t \n %nl .<> <- -> -- | ||
| 1161 | _B <- 'x' --]] | ||
| 1162 | assert(e:match'xy' == 2) | ||
| 1163 | |||
| 1164 | -- tests for 're' with pre-definitions | ||
| 1165 | defs = {digits = m.R"09", letters = m.R"az", _=m.P"__"} | ||
| 1166 | e = compile("%letters (%letters / %digits)*", defs) | ||
| 1167 | assert(e:match"x123" == 5) | ||
| 1168 | e = compile("%_", defs) | ||
| 1169 | assert(e:match"__" == 3) | ||
| 1170 | |||
| 1171 | e = compile([[ | ||
| 1172 | S <- A+ | ||
| 1173 | A <- %letters+ B | ||
| 1174 | B <- %digits+ | ||
| 1175 | ]], defs) | ||
| 1176 | |||
| 1177 | e = compile("{[0-9]+'.'?[0-9]*} -> sin", math) | ||
| 1178 | assert(e:match("2.34") == math.sin(2.34)) | ||
| 1179 | |||
| 1180 | |||
| 1181 | function eq (_, _, a, b) return a == b end | ||
| 1182 | |||
| 1183 | c = re.compile([[ | ||
| 1184 | longstring <- '[' {:init: '='* :} '[' close | ||
| 1185 | close <- ']' =init ']' / . close | ||
| 1186 | ]]) | ||
| 1187 | |||
| 1188 | assert(c:match'[==[]]===]]]]==]===[]' == 17) | ||
| 1189 | assert(c:match'[[]=]====]=]]]==]===[]' == 14) | ||
| 1190 | assert(not c:match'[[]=]====]=]=]==]===[]') | ||
| 1191 | |||
| 1192 | c = re.compile" '[' {:init: '='* :} '[' (!(']' =init ']') .)* ']' =init ']' !. " | ||
| 1193 | |||
| 1194 | assert(c:match'[==[]]===]]]]==]') | ||
| 1195 | assert(c:match'[[]=]====]=][]==]===[]]') | ||
| 1196 | assert(not c:match'[[]=]====]=]=]==]===[]') | ||
| 1197 | |||
| 1198 | assert(re.find("hi alalo", "{:x:..:} =x") == 4) | ||
| 1199 | assert(re.find("hi alalo", "{:x:..:} =x", 4) == 4) | ||
| 1200 | assert(not re.find("hi alalo", "{:x:..:} =x", 5)) | ||
| 1201 | assert(re.find("hi alalo", "{'al'}", 5) == 6) | ||
| 1202 | assert(re.find("hi aloalolo", "{:x:..:} =x") == 8) | ||
| 1203 | assert(re.find("alo alohi x x", "{:word:%w+:}%W*(=word)!%w") == 11) | ||
| 1204 | |||
| 1205 | -- re.find discards any captures | ||
| 1206 | local a,b,c = re.find("alo", "{.}{'o'}") | ||
| 1207 | assert(a == 2 and b == 3 and c == nil) | ||
| 1208 | |||
| 1209 | local function match (s,p) | ||
| 1210 | local i,e = re.find(s,p) | ||
| 1211 | if i then return s:sub(i, e) end | ||
| 1212 | end | ||
| 1213 | assert(match("alo alo", '[a-z]+') == "alo") | ||
| 1214 | assert(match("alo alo", '{:x: [a-z]+ :} =x') == nil) | ||
| 1215 | assert(match("alo alo", "{:x: [a-z]+ :} ' ' =x") == "alo alo") | ||
| 1216 | |||
| 1217 | assert(re.gsub("alo alo", "[abc]", "x") == "xlo xlo") | ||
| 1218 | assert(re.gsub("alo alo", "%w+", ".") == ". .") | ||
| 1219 | assert(re.gsub("hi, how are you", "[aeiou]", string.upper) == | ||
| 1220 | "hI, hOw ArE yOU") | ||
| 1221 | |||
| 1222 | s = 'hi [[a comment[=]=] ending here]] and [=[another]]=]]' | ||
| 1223 | c = re.compile" '[' {:i: '='* :} '[' (!(']' =i ']') .)* ']' { =i } ']' " | ||
| 1224 | assert(re.gsub(s, c, "%2") == 'hi and =]') | ||
| 1225 | assert(re.gsub(s, c, "%0") == s) | ||
| 1226 | assert(re.gsub('[=[hi]=]', c, "%2") == '=') | ||
| 1227 | |||
| 1228 | assert(re.find("", "!.") == 1) | ||
| 1229 | assert(re.find("alo", "!.") == 4) | ||
| 1230 | |||
| 1231 | function addtag (s, i, t, tag) t.tag = tag; return i, t end | ||
| 1232 | |||
| 1233 | c = re.compile([[ | ||
| 1234 | doc <- block !. | ||
| 1235 | block <- (start {| (block / { [^<]+ })* |} end?) => addtag | ||
| 1236 | start <- '<' {:tag: [a-z]+ :} '>' | ||
| 1237 | end <- '</' { =tag } '>' | ||
| 1238 | ]], {addtag = addtag}) | ||
| 1239 | |||
| 1240 | x = c:match[[ | ||
| 1241 | <x>hi<b>hello</b>but<b>totheend</x>]] | ||
| 1242 | checkeq(x, {tag='x', 'hi', {tag = 'b', 'hello'}, 'but', | ||
| 1243 | {'totheend'}}) | ||
| 1244 | |||
| 1245 | |||
| 1246 | -- tests for look-ahead captures | ||
| 1247 | x = {re.match("alo", "&(&{.}) !{'b'} {&(...)} &{..} {...} {!.}")} | ||
| 1248 | checkeq(x, {"", "alo", ""}) | ||
| 1249 | |||
| 1250 | assert(re.match("aloalo", | ||
| 1251 | "{~ (((&'al' {.}) -> 'A%1' / (&%l {.}) -> '%1%1') / .)* ~}") | ||
| 1252 | == "AallooAalloo") | ||
| 1253 | |||
| 1254 | -- bug in 0.9 (and older versions), due to captures in look-aheads | ||
| 1255 | x = re.compile[[ {~ (&(. ([a-z]* -> '*')) ([a-z]+ -> '+') ' '*)* ~} ]] | ||
| 1256 | assert(x:match"alo alo" == "+ +") | ||
| 1257 | |||
| 1258 | -- valid capture in look-ahead (used inside the look-ahead itself) | ||
| 1259 | x = re.compile[[ | ||
| 1260 | S <- &({:two: .. :} . =two) {[a-z]+} / . S | ||
| 1261 | ]] | ||
| 1262 | assert(x:match("hello aloaLo aloalo xuxu") == "aloalo") | ||
| 1263 | |||
| 1264 | |||
| 1265 | p = re.compile[[ | ||
| 1266 | block <- {| {:ident:space*:} line | ||
| 1267 | ((=ident !space line) / &(=ident space) block)* |} | ||
| 1268 | line <- {[^%nl]*} %nl | ||
| 1269 | space <- '_' -- should be ' ', but '_' is simpler for editors | ||
| 1270 | ]] | ||
| 1271 | |||
| 1272 | t= p:match[[ | ||
| 1273 | 1 | ||
| 1274 | __1.1 | ||
| 1275 | __1.2 | ||
| 1276 | ____1.2.1 | ||
| 1277 | ____ | ||
| 1278 | 2 | ||
| 1279 | __2.1 | ||
| 1280 | ]] | ||
| 1281 | checkeq(t, {"1", {"1.1", "1.2", {"1.2.1", "", ident = "____"}, ident = "__"}, | ||
| 1282 | "2", {"2.1", ident = "__"}, ident = ""}) | ||
| 1283 | |||
| 1284 | |||
| 1285 | -- nested grammars | ||
| 1286 | p = re.compile[[ | ||
| 1287 | s <- a b !. | ||
| 1288 | b <- ( x <- ('b' x)? ) | ||
| 1289 | a <- ( x <- 'a' x? ) | ||
| 1290 | ]] | ||
| 1291 | |||
| 1292 | assert(p:match'aaabbb') | ||
| 1293 | assert(p:match'aaa') | ||
| 1294 | assert(not p:match'bbb') | ||
| 1295 | assert(not p:match'aaabbba') | ||
| 1296 | |||
| 1297 | -- testing groups | ||
| 1298 | t = {re.match("abc", "{:S <- {:.:} {S} / '':}")} | ||
| 1299 | checkeq(t, {"a", "bc", "b", "c", "c", ""}) | ||
| 1300 | |||
| 1301 | t = re.match("1234", "{| {:a:.:} {:b:.:} {:c:.{.}:} |}") | ||
| 1302 | checkeq(t, {a="1", b="2", c="4"}) | ||
| 1303 | t = re.match("1234", "{|{:a:.:} {:b:{.}{.}:} {:c:{.}:}|}") | ||
| 1304 | checkeq(t, {a="1", b="2", c="4"}) | ||
| 1305 | t = re.match("12345", "{| {:.:} {:b:{.}{.}:} {:{.}{.}:} |}") | ||
| 1306 | checkeq(t, {"1", b="2", "4", "5"}) | ||
| 1307 | t = re.match("12345", "{| {:.:} {:{:b:{.}{.}:}:} {:{.}{.}:} |}") | ||
| 1308 | checkeq(t, {"1", "23", "4", "5"}) | ||
| 1309 | t = re.match("12345", "{| {:.:} {{:b:{.}{.}:}} {:{.}{.}:} |}") | ||
| 1310 | checkeq(t, {"1", "23", "4", "5"}) | ||
| 1311 | |||
| 1312 | |||
| 1313 | -- testing pre-defined names | ||
| 1314 | assert(os.setlocale("C") == "C") | ||
| 1315 | |||
| 1316 | function eqlpeggsub (p1, p2) | ||
| 1317 | local s1 = cs2str(re.compile(p1)) | ||
| 1318 | local s2 = string.gsub(allchar, "[^" .. p2 .. "]", "") | ||
| 1319 | -- if s1 ~= s2 then print(#s1,#s2) end | ||
| 1320 | assert(s1 == s2) | ||
| 1321 | end | ||
| 1322 | |||
| 1323 | |||
| 1324 | eqlpeggsub("%w", "%w") | ||
| 1325 | eqlpeggsub("%a", "%a") | ||
| 1326 | eqlpeggsub("%l", "%l") | ||
| 1327 | eqlpeggsub("%u", "%u") | ||
| 1328 | eqlpeggsub("%p", "%p") | ||
| 1329 | eqlpeggsub("%d", "%d") | ||
| 1330 | eqlpeggsub("%x", "%x") | ||
| 1331 | eqlpeggsub("%s", "%s") | ||
| 1332 | eqlpeggsub("%c", "%c") | ||
| 1333 | |||
| 1334 | eqlpeggsub("%W", "%W") | ||
| 1335 | eqlpeggsub("%A", "%A") | ||
| 1336 | eqlpeggsub("%L", "%L") | ||
| 1337 | eqlpeggsub("%U", "%U") | ||
| 1338 | eqlpeggsub("%P", "%P") | ||
| 1339 | eqlpeggsub("%D", "%D") | ||
| 1340 | eqlpeggsub("%X", "%X") | ||
| 1341 | eqlpeggsub("%S", "%S") | ||
| 1342 | eqlpeggsub("%C", "%C") | ||
| 1343 | |||
| 1344 | eqlpeggsub("[%w]", "%w") | ||
| 1345 | eqlpeggsub("[_%w]", "_%w") | ||
| 1346 | eqlpeggsub("[^%w]", "%W") | ||
| 1347 | eqlpeggsub("[%W%S]", "%W%S") | ||
| 1348 | |||
| 1349 | re.updatelocale() | ||
| 1350 | |||
| 1351 | -- testing nested substitutions x string captures | ||
| 1352 | |||
| 1353 | p = re.compile[[ | ||
| 1354 | text <- {~ item* ~} | ||
| 1355 | item <- macro / [^()] / '(' item* ')' | ||
| 1356 | arg <- ' '* {~ (!',' item)* ~} | ||
| 1357 | args <- '(' arg (',' arg)* ')' | ||
| 1358 | macro <- ('apply' args) -> '%1(%2)' | ||
| 1359 | / ('add' args) -> '%1 + %2' | ||
| 1360 | / ('mul' args) -> '%1 * %2' | ||
| 1361 | ]] | ||
| 1362 | |||
| 1363 | assert(p:match"add(mul(a,b), apply(f,x))" == "a * b + f(x)") | ||
| 1364 | |||
| 1365 | rev = re.compile[[ R <- (!.) -> '' / ({.} R) -> '%2%1']] | ||
| 1366 | |||
| 1367 | assert(rev:match"0123456789" == "9876543210") | ||
| 1368 | |||
| 1369 | |||
| 1370 | -- testing error messages in re | ||
| 1371 | |||
| 1372 | local function errmsg (p, err) | ||
| 1373 | local s, msg = pcall(re.compile, p) | ||
| 1374 | assert(not s and string.find(msg, err)) | ||
| 1375 | end | ||
| 1376 | |||
| 1377 | errmsg('aaaa', "rule 'aaaa'") | ||
| 1378 | errmsg('a', 'outside') | ||
| 1379 | errmsg('b <- a', 'undefined') | ||
| 1380 | errmsg("x <- 'a' x <- 'b'", 'already defined') | ||
| 1381 | errmsg("'a' -", "near '-'") | ||
| 1382 | |||
| 1383 | |||
| 1384 | print"OK" | ||
| 1385 | |||
| 1386 | |||
diff --git a/testlabel.lua b/testlabel.lua new file mode 100644 index 0000000..100c0d0 --- /dev/null +++ b/testlabel.lua | |||
| @@ -0,0 +1,422 @@ | |||
| 1 | local m = require 'lpeglabel' | ||
| 2 | |||
| 3 | local p = m.T(1, 2, 5) | ||
| 4 | assert(p:match("abc") == nil) | ||
| 5 | |||
| 6 | -- throws a label that is not caught by ordinary choice | ||
| 7 | p = m.T(1) + m.P"a" | ||
| 8 | local r = p:match("abc") | ||
| 9 | assert(r == nil) | ||
| 10 | |||
| 11 | -- again throws a label that is not caught by ordinary choice | ||
| 12 | local g = m.P{ | ||
| 13 | "S", | ||
| 14 | S = m.V"A" + m.V"B", | ||
| 15 | A = m.T(1), | ||
| 16 | B = m.P"a" | ||
| 17 | } | ||
| 18 | r = g:match("abc") | ||
| 19 | assert(r == nil) | ||
| 20 | |||
| 21 | -- throws a label that is not caught by labeled choice | ||
| 22 | p = m.Lc(m.T(2), m.P"a", 1, 3) | ||
| 23 | r = p:match("abc") | ||
| 24 | assert(r == nil) | ||
| 25 | |||
| 26 | -- modifies previous pattern | ||
| 27 | -- adds another labeled choice to catch label "2" | ||
| 28 | p = m.Lc(p, m.P"a", 2) | ||
| 29 | r = p:match("abc") | ||
| 30 | assert(r == 2) | ||
| 31 | |||
| 32 | -- throws a label that is caught by labeled choice | ||
| 33 | p = m.Lc(m.T(25), m.P"a", 25) | ||
| 34 | r = p:match("abc") | ||
| 35 | assert(r == 2) | ||
| 36 | assert(p:match("bola") == nil) | ||
| 37 | |||
| 38 | -- labeled choice did not catch "fail" by default | ||
| 39 | p = m.Lc(m.P"b", m.P"a", 1) | ||
| 40 | r = p:match("abc") | ||
| 41 | assert(r == nil, r) | ||
| 42 | assert(p:match("bola") == 2) | ||
| 43 | |||
| 44 | -- "fail" is label "0" | ||
| 45 | -- labeled choice can catch "fail" | ||
| 46 | p = m.Lc(m.P"b", m.P"a", 0) | ||
| 47 | r = p:match("abc") | ||
| 48 | assert(r == 2, r) | ||
| 49 | assert(p:match("bola") == 2) | ||
| 50 | |||
| 51 | -- "fail" is label "0" | ||
| 52 | -- labeled choice can catch "fail" or "3" | ||
| 53 | p = m.Lc(m.P"a" * m.T(3), (m.P"a" + m.P"b"), 0, 3) | ||
| 54 | assert(p:match("abc") == 2) | ||
| 55 | assert(p:match("bac") == 2) | ||
| 56 | assert(p:match("cab") == nil) | ||
| 57 | |||
| 58 | --[[ | ||
| 59 | S -> A /{1} 'a' | ||
| 60 | A -> B | ||
| 61 | B -> %1 | ||
| 62 | ]] | ||
| 63 | g = m.P{ | ||
| 64 | "S", | ||
| 65 | S = m.Lc(m.V"A", m.P"a", 1), | ||
| 66 | A = m.V"B", | ||
| 67 | B = m.T(1), | ||
| 68 | } | ||
| 69 | assert(g:match("ab") == 2) | ||
| 70 | assert(g:match("bc") == nil) | ||
| 71 | |||
| 72 | |||
| 73 | --[[ | ||
| 74 | S -> A | ||
| 75 | A -> (B (';' / %{1}))* | ||
| 76 | B -> 'a' | ||
| 77 | ]] | ||
| 78 | g = m.P{ | ||
| 79 | "S", | ||
| 80 | S = m.V"A", | ||
| 81 | A = m.P(m.V"B" * (";" + m.T(1)))^0, | ||
| 82 | B = m.P'a', | ||
| 83 | } | ||
| 84 | assert(g:match("a;a;") == 5) | ||
| 85 | assert(g:match("a;a") == nil) | ||
| 86 | |||
| 87 | |||
| 88 | |||
| 89 | -- %1 /{1,3} %2 /{2} 'a' | ||
| 90 | p = m.Lc(m.Lc(m.T(1), m.T(2), 1, 3), m.P"a", 2) | ||
| 91 | r = p:match("abc") | ||
| 92 | assert(r == 2) | ||
| 93 | assert(p:match("") == nil) | ||
| 94 | |||
| 95 | p = m.Lc(m.T(1), m.Lc(m.T(2), m.P"a", 2), 1, 3) | ||
| 96 | r = p:match("abc") | ||
| 97 | assert(r == 2) | ||
| 98 | assert(p:match("") == nil) | ||
| 99 | |||
| 100 | print("+") | ||
| 101 | |||
| 102 | --[[ grammar based on Figure 8 of paper submitted to SCP | ||
| 103 | S -> S0 /{1} ID /{2} ID '=' Exp /{3} 'unsigned'* 'int' ID /{4} 'unsigned'* ID ID / %error | ||
| 104 | S0 -> ID S1 / 'unsigned' S2 / 'int' %3 | ||
| 105 | S1 -> '=' %2 / !. %1 / ID %4 | ||
| 106 | S2 -> 'unsigned' S2 / ID %4 / 'int' %3 | ||
| 107 | ]] | ||
| 108 | |||
| 109 | local sp = m.S" \t\n"^0 | ||
| 110 | local eq = sp * m.P"=" | ||
| 111 | |||
| 112 | g = m.P{ | ||
| 113 | "S", | ||
| 114 | S = m.Lc( | ||
| 115 | m.Lc( | ||
| 116 | m.Lc( | ||
| 117 | m.Lc(m.V"S0", m.V"ID" * (m.P(1) + ""), 1), | ||
| 118 | m.V"ID" * eq * m.V"Exp", 2 | ||
| 119 | ), | ||
| 120 | m.V"U"^0 * m.V"I" * m.V"ID", 3 | ||
| 121 | ), | ||
| 122 | m.V"U"^0 * m.V"ID" * m.V"ID", 4) | ||
| 123 | + m.T(5), | ||
| 124 | S0 = m.V"ID" * m.V"S1" + m.V"U" * m.V"S2" + m.V"I" * m.T(3), | ||
| 125 | S1 = eq * m.T(2) + sp * -m.P(1) * m.T(1) + m.V"ID" * m.T(4), | ||
| 126 | S2 = m.V"U" * m.V"S2" + m.V"ID" * m.T(4) + m.V"I" * m.T(3), | ||
| 127 | ID = sp * m.P"a", | ||
| 128 | U = sp * m.P"unsigned", | ||
| 129 | I = sp * m.P"int", | ||
| 130 | Exp = sp * m.P"E", | ||
| 131 | } | ||
| 132 | --g:pcode() | ||
| 133 | |||
| 134 | local s = "a" | ||
| 135 | assert(g:match(s) == #s + 1) --1 | ||
| 136 | s = "a = E" | ||
| 137 | assert(g:match(s) == #s + 1) --2 | ||
| 138 | s = "int a" | ||
| 139 | assert(g:match(s) == #s + 1) --3 | ||
| 140 | s = "unsigned int a" | ||
| 141 | assert(g:match(s) == #s + 1) --3 | ||
| 142 | s = "unsigned a a" | ||
| 143 | assert(g:match(s) == #s + 1) --4 | ||
| 144 | s = "b" | ||
| 145 | assert(g:match(s) == nil) | ||
| 146 | s = "unsigned" | ||
| 147 | assert(g:match(s) == nil) | ||
| 148 | s = "unsigned a" | ||
| 149 | assert(g:match(s) == nil) | ||
| 150 | s = "unsigned int" | ||
| 151 | assert(g:match(s) == nil) | ||
| 152 | |||
| 153 | |||
| 154 | print("+") | ||
| 155 | |||
| 156 | local re = require 're' | ||
| 157 | |||
| 158 | g = re.compile[['a' /{4,9} [a-z] | ||
| 159 | ]] | ||
| 160 | assert(g:match("a") == 2) | ||
| 161 | assert(g:match("b") == nil) | ||
| 162 | |||
| 163 | g = re.compile[['a' /{4,9} [a-f] /{5, 7} [a-z] | ||
| 164 | ]] | ||
| 165 | assert(g:match("a") == 2) | ||
| 166 | assert(g:match("b") == nil) | ||
| 167 | |||
| 168 | g = re.compile[[%{1} /{4,9} [a-z] | ||
| 169 | ]] | ||
| 170 | assert(g:match("a") == nil) | ||
| 171 | |||
| 172 | g = re.compile[[%{1} /{4,1} [a-f] | ||
| 173 | ]] | ||
| 174 | assert(g:match("a") == 2) | ||
| 175 | assert(g:match("h") == nil) | ||
| 176 | |||
| 177 | g = re.compile[[[a-f]%{15, 9} /{4,9} [a-c]%{7} /{5, 7} [a-z] ]] | ||
| 178 | assert(g:match("a") == 2) | ||
| 179 | assert(g:match("c") == 2) | ||
| 180 | assert(g:match("d") == nil) | ||
| 181 | assert(g:match("g") == nil) | ||
| 182 | |||
| 183 | --[[ grammar based on Figure 8 of paper submitted to SCP | ||
| 184 | S -> S0 /{1} ID /{2} ID '=' Exp /{3} 'unsigned'* 'int' ID /{4} 'unsigned'* ID ID / %error | ||
| 185 | S0 -> ID S1 / 'unsigned' S2 / 'int' %3 | ||
| 186 | S1 -> '=' %2 / !. %1 / ID %4 | ||
| 187 | S2 -> 'unsigned' S2 / ID %4 / 'int' %3 | ||
| 188 | ]] | ||
| 189 | |||
| 190 | |||
| 191 | g = re.compile([[ | ||
| 192 | S <- S0 /{1} ID /{2} ID %s* '=' Exp /{3} U* Int ID /{4} U ID ID /{0} %{5} | ||
| 193 | S0 <- ID S1 / U S2 / Int %{3} | ||
| 194 | S1 <- %s* '=' %{2} / !. %{1} / ID %{4} | ||
| 195 | S2 <- U S2 / ID %{4} / Int %{3} | ||
| 196 | ID <- %s* 'a' | ||
| 197 | U <- %s* 'unsigned' | ||
| 198 | Int <- %s* 'int' | ||
| 199 | Exp <- %s* 'E' | ||
| 200 | ]]) | ||
| 201 | |||
| 202 | local s = "a" | ||
| 203 | assert(g:match(s) == #s + 1) --1 | ||
| 204 | s = "a = E" | ||
| 205 | assert(g:match(s) == #s + 1) --2 | ||
| 206 | s = "int a" | ||
| 207 | assert(g:match(s) == #s + 1) --3 | ||
| 208 | s = "unsigned int a" | ||
| 209 | assert(g:match(s) == #s + 1) --3 | ||
| 210 | s = "unsigned a a" | ||
| 211 | assert(g:match(s) == #s + 1) --4 | ||
| 212 | s = "b" | ||
| 213 | assert(g:match(s) == nil) | ||
| 214 | s = "unsigned" | ||
| 215 | assert(g:match(s) == nil) | ||
| 216 | s = "unsigned a" | ||
| 217 | assert(g:match(s) == nil) | ||
| 218 | s = "unsigned int" | ||
| 219 | assert(g:match(s) == nil) | ||
| 220 | |||
| 221 | local terror = { ['cmdSeq'] = "Missing ';' in CmdSeq", | ||
| 222 | ['ifExp'] = "Error in expresion of 'if'", | ||
| 223 | ['ifThen'] = "Error matching 'then' keyword", | ||
| 224 | ['ifThenCmdSeq'] = "Error matching CmdSeq of 'then' branch", | ||
| 225 | ['ifElseCmdSeq'] = "Error matching CmdSeq of 'else' branch", | ||
| 226 | ['ifEnd'] = "Error matching 'end' keyword of 'if'", | ||
| 227 | ['repeatCmdSeq'] = "Error matching CmdSeq of 'repeat'", | ||
| 228 | ['repeatUntil'] = "Error matching 'until' keyword", | ||
| 229 | ['repeatExp'] = "Error matching expression of 'until'", | ||
| 230 | ['assignOp'] = "Error matching ':='", | ||
| 231 | ['assignExp'] = "Error matching expression of assignment", | ||
| 232 | ['readName'] = "Error matching 'NAME' after 'read'", | ||
| 233 | ['writeExp'] = "Error matching expression after 'write'", | ||
| 234 | ['simpleExp'] = "Error matching 'SimpleExp'", | ||
| 235 | ['term'] = "Error matching 'Term'", | ||
| 236 | ['factor'] = "Error matching 'Factor'", | ||
| 237 | ['openParExp'] = "Error matching expression after '('", | ||
| 238 | ['closePar'] = "Error matching ')'", | ||
| 239 | ['undefined'] = "Error undefined'"} | ||
| 240 | |||
| 241 | g = re.compile([[ | ||
| 242 | Tiny <- CmdSeq /{1} '' -> cmdSeq /{2} '' -> ifExp /{3} '' -> ifThen /{4} '' -> ifThenCmdSeq | ||
| 243 | /{5} '' -> ifElseCmdSeq /{6} '' -> ifEnd /{7} '' -> repeatCmdSeq | ||
| 244 | /{8} '' -> repeatUntil /{9} '' -> repeatExp /{10} '' -> assignOp | ||
| 245 | /{11} '' -> assignExp /{12} '' -> readName /{13} '' -> writeExp | ||
| 246 | /{14} '' -> simpleExp /{15} '' -> term /{16} '' -> factor | ||
| 247 | /{17} '' -> openParExp /{18} '' -> closePar /{0} '' -> undefined | ||
| 248 | CmdSeq <- (Cmd (SEMICOLON / %{1})) (Cmd (SEMICOLON / %{1}))* | ||
| 249 | Cmd <- IfCmd / RepeatCmd / ReadCmd / WriteCmd / AssignCmd | ||
| 250 | IfCmd <- IF (Exp / %{2}) (THEN / %{3}) (CmdSeq / %{4}) (ELSE (CmdSeq / %{5}) / '') (END / %{6}) | ||
| 251 | RepeatCmd <- REPEAT (CmdSeq / %{7}) (UNTIL / %{8}) (Exp / %{9}) | ||
| 252 | AssignCmd <- !RESERVED NAME (ASSIGNMENT / %{10}) (Exp / %{11}) | ||
| 253 | ReadCmd <- READ (NAME / %{12}) | ||
| 254 | WriteCmd <- WRITE (Exp / %{13}) | ||
| 255 | Exp <- SimpleExp ((LESS / EQUAL) (SimpleExp / %{14}) / '') | ||
| 256 | SimpleExp <- Term ((ADD / SUB) (Term / %{15}))* | ||
| 257 | Term <- Factor ((MUL / DIV) (Factor / %{16}))* | ||
| 258 | Factor <- OPENPAR (Exp / %{17}) (CLOSEPAR / %{18}) / NUMBER / NAME | ||
| 259 | ADD <- Sp '+' | ||
| 260 | ASSIGNMENT <- Sp ':=' | ||
| 261 | CLOSEPAR <- Sp ')' | ||
| 262 | DIV <- Sp '/' | ||
| 263 | IF <- Sp 'if' | ||
| 264 | ELSE <- Sp 'else' | ||
| 265 | END <- Sp 'end' | ||
| 266 | EQUAL <- Sp '=' | ||
| 267 | LESS <- Sp '<' | ||
| 268 | MUL <- Sp '*' | ||
| 269 | NAME <- Sp [a-z]+ | ||
| 270 | NUMBER <- Sp [0-9]+ | ||
| 271 | OPENPAR <- Sp '(' | ||
| 272 | READ <- Sp 'read' | ||
| 273 | REPEAT <- Sp 'repeat' | ||
| 274 | SEMICOLON <- Sp ';' | ||
| 275 | SUB <- Sp '-' | ||
| 276 | THEN <- Sp 'then' | ||
| 277 | UNTIL <- Sp 'until' | ||
| 278 | WRITE <- Sp 'write' | ||
| 279 | RESERVED <- IF / ELSE / END / READ / REPEAT / THEN / UNTIL / WRITE | ||
| 280 | Sp <- (%s / %nl)* | ||
| 281 | ]], terror) | ||
| 282 | |||
| 283 | s = [[ | ||
| 284 | n := 5;]] | ||
| 285 | assert(g:match(s) == #s + 1) | ||
| 286 | |||
| 287 | s = [[ | ||
| 288 | n := 5; | ||
| 289 | f := 1; | ||
| 290 | repeat | ||
| 291 | f := f * n; | ||
| 292 | n := n - 1; | ||
| 293 | until (n < 1); | ||
| 294 | write f;]] | ||
| 295 | assert(g:match(s) == #s + 1) | ||
| 296 | |||
| 297 | -- a ';' is missing in 'read a' | ||
| 298 | s = [[ | ||
| 299 | read a]] | ||
| 300 | assert(g:match(s) == terror['cmdSeq']) | ||
| 301 | |||
| 302 | |||
| 303 | -- a ';' is missing in 'n := n - 1' | ||
| 304 | s = [[ | ||
| 305 | n := 5; | ||
| 306 | f := 1; | ||
| 307 | repeat | ||
| 308 | f := f * n; | ||
| 309 | n := n - 1 | ||
| 310 | until (n < 1); | ||
| 311 | write f;]] | ||
| 312 | assert(g:match(s) == terror['cmdSeq']) | ||
| 313 | |||
| 314 | |||
| 315 | -- IF expression | ||
| 316 | s = [[ | ||
| 317 | if a then a := a + 1; end;]] | ||
| 318 | assert(g:match(s) == #s + 1) | ||
| 319 | |||
| 320 | -- IF expression | ||
| 321 | s = [[ | ||
| 322 | if a then a := a + 1; else write 2; end;]] | ||
| 323 | assert(g:match(s) == #s + 1) | ||
| 324 | |||
| 325 | -- Error in expression of 'if'. 'A' is not a valida name | ||
| 326 | s = [[ | ||
| 327 | if A then a := a + 1; else write 2; end;]] | ||
| 328 | assert(g:match(s) == terror['ifExp']) | ||
| 329 | |||
| 330 | -- Error matching the 'then' keyword | ||
| 331 | s = [[ | ||
| 332 | if a a := a + 1; else write 2; end;]] | ||
| 333 | assert(g:match(s) == terror['ifThen']) | ||
| 334 | |||
| 335 | -- Error matching the CmdSeq inside of 'then' branch | ||
| 336 | s = [[ | ||
| 337 | if a then 3 := 2; else write 2; end;]] | ||
| 338 | assert(g:match(s) == terror['ifThenCmdSeq']) | ||
| 339 | |||
| 340 | -- Error matching the CmdSeq inside of 'else' branch | ||
| 341 | s = [[ | ||
| 342 | if a then b := 2; else A := 2; end;]] | ||
| 343 | assert(g:match(s) == terror['ifElseCmdSeq']) | ||
| 344 | |||
| 345 | -- Error matching 'end' of 'if' | ||
| 346 | s = [[ | ||
| 347 | if a then b := 2; else a := 2; 77;]] | ||
| 348 | assert(g:match(s) == terror['ifEnd']) | ||
| 349 | |||
| 350 | -- Error matching the CmdSeq of 'repeat' | ||
| 351 | s = [[repeat | ||
| 352 | F := f * n; | ||
| 353 | n := n - 1; | ||
| 354 | until (n < 1);]] | ||
| 355 | assert(g:match(s) == terror['repeatCmdSeq']) | ||
| 356 | |||
| 357 | -- Error matching 'until' | ||
| 358 | s = [[repeat | ||
| 359 | f := f * n; | ||
| 360 | n := n - 1; | ||
| 361 | 88 (n < 1);]] | ||
| 362 | assert(g:match(s) == terror['repeatUntil']) | ||
| 363 | |||
| 364 | -- Error matching expression of 'until' | ||
| 365 | s = [[repeat | ||
| 366 | f := f * n; | ||
| 367 | n := n - 1; | ||
| 368 | until ; (n < 1);]] | ||
| 369 | assert(g:match(s) == terror['repeatExp']) | ||
| 370 | |||
| 371 | -- Error matching ':=' | ||
| 372 | s = [[ | ||
| 373 | f = f * n;]] | ||
| 374 | assert(g:match(s) == terror['assignOp']) | ||
| 375 | |||
| 376 | -- Error matching expression of assignment | ||
| 377 | s = [[ | ||
| 378 | f := A * n;]] | ||
| 379 | assert(g:match(s) == terror['assignExp']) | ||
| 380 | |||
| 381 | -- Error matching 'name' | ||
| 382 | s = [[ | ||
| 383 | read 2;]] | ||
| 384 | assert(g:match(s) == terror['readName']) | ||
| 385 | |||
| 386 | -- Error matching expression after 'write' | ||
| 387 | s = [[ | ||
| 388 | write [a] := 2;]] | ||
| 389 | assert(g:match(s) == terror['writeExp']) | ||
| 390 | |||
| 391 | -- Error matching 'SimpleExp' | ||
| 392 | s = [[ | ||
| 393 | a := a < A;]] | ||
| 394 | assert(g:match(s) == terror['simpleExp']) | ||
| 395 | |||
| 396 | -- Error matching 'Term' | ||
| 397 | s = [[ | ||
| 398 | a := a + A;]] | ||
| 399 | assert(g:match(s) == terror['term']) | ||
| 400 | |||
| 401 | -- Error matching 'Factor' | ||
| 402 | s = [[ | ||
| 403 | a := a * A;]] | ||
| 404 | assert(g:match(s) == terror['factor']) | ||
| 405 | |||
| 406 | -- Error matching expression after '(' | ||
| 407 | s = [[ | ||
| 408 | a := (A);]] | ||
| 409 | assert(g:match(s) == terror['openParExp']) | ||
| 410 | |||
| 411 | -- Error matching ')' | ||
| 412 | s = [[ | ||
| 413 | a := (a];]] | ||
| 414 | assert(g:match(s) == terror['closePar']) | ||
| 415 | |||
| 416 | -- Error undefined | ||
| 417 | s = [[ | ||
| 418 | A := a;]] | ||
| 419 | assert(g:match(s) == terror['undefined']) | ||
| 420 | |||
| 421 | |||
| 422 | print("OK") | ||
