diff options
Diffstat (limited to 'lplvm.c')
-rw-r--r-- | lplvm.c | 499 |
1 files changed, 499 insertions, 0 deletions
@@ -0,0 +1,499 @@ | |||
1 | /* | ||
2 | ** $Id: lplvm.c $ | ||
3 | ** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license) | ||
4 | */ | ||
5 | |||
6 | #include <limits.h> | ||
7 | #include <string.h> | ||
8 | |||
9 | |||
10 | #include "lua.h" | ||
11 | #include "lauxlib.h" | ||
12 | |||
13 | #include "lplcap.h" | ||
14 | #include "lpltypes.h" | ||
15 | #include "lplvm.h" | ||
16 | #include "lplprint.h" | ||
17 | |||
18 | |||
19 | /* initial size for call/backtrack stack */ | ||
20 | #if !defined(INITBACK) | ||
21 | #define INITBACK MAXBACK | ||
22 | #endif | ||
23 | |||
24 | |||
25 | #define getoffset(p) (((p) + 1)->offset) | ||
26 | |||
27 | static const Instruction giveup = {{IGiveup, 0, 0}}; | ||
28 | |||
29 | |||
30 | /* | ||
31 | ** Decode one UTF-8 sequence, returning NULL if byte sequence is invalid. | ||
32 | */ | ||
33 | static const char *utf8_decode (const char *o, int *val) { | ||
34 | static const unsigned int limits[] = {0xFF, 0x7F, 0x7FF, 0xFFFFu}; | ||
35 | const unsigned char *s = (const unsigned char *)o; | ||
36 | unsigned int c = s[0]; /* first byte */ | ||
37 | unsigned int res = 0; /* final result */ | ||
38 | if (c < 0x80) /* ascii? */ | ||
39 | res = c; | ||
40 | else { | ||
41 | int count = 0; /* to count number of continuation bytes */ | ||
42 | while (c & 0x40) { /* still have continuation bytes? */ | ||
43 | int cc = s[++count]; /* read next byte */ | ||
44 | if ((cc & 0xC0) != 0x80) /* not a continuation byte? */ | ||
45 | return NULL; /* invalid byte sequence */ | ||
46 | res = (res << 6) | (cc & 0x3F); /* add lower 6 bits from cont. byte */ | ||
47 | c <<= 1; /* to test next bit */ | ||
48 | } | ||
49 | res |= (c & 0x7F) << (count * 5); /* add first byte */ | ||
50 | if (count > 3 || res > 0x10FFFFu || res <= limits[count]) | ||
51 | return NULL; /* invalid byte sequence */ | ||
52 | s += count; /* skip continuation bytes read */ | ||
53 | } | ||
54 | *val = res; | ||
55 | return (const char *)s + 1; /* +1 to include first byte */ | ||
56 | } | ||
57 | |||
58 | |||
59 | /* | ||
60 | ** {====================================================== | ||
61 | ** Virtual Machine | ||
62 | ** ======================================================= | ||
63 | */ | ||
64 | |||
65 | |||
66 | typedef struct Stack { | ||
67 | const char *s; /* saved position (or NULL for calls) */ | ||
68 | const Instruction *p; /* next instruction */ | ||
69 | int caplevel; | ||
70 | byte labenv; /* labeled failure */ | ||
71 | byte predchoice; /* labeled failure */ | ||
72 | } Stack; | ||
73 | |||
74 | |||
75 | #define getstackbase(L, ptop) ((Stack *)lua_touserdata(L, stackidx(ptop))) | ||
76 | |||
77 | |||
78 | /* | ||
79 | ** Ensures the size of array 'capture' (with size '*capsize' and | ||
80 | ** 'captop' elements being used) is enough to accomodate 'n' extra | ||
81 | ** elements plus one. (Because several opcodes add stuff to the capture | ||
82 | ** array, it is simpler to ensure the array always has at least one free | ||
83 | ** slot upfront and check its size later.) | ||
84 | */ | ||
85 | static Capture *growcap (lua_State *L, Capture *capture, int *capsize, | ||
86 | int captop, int n, int ptop) { | ||
87 | if (*capsize - captop > n) | ||
88 | return capture; /* no need to grow array */ | ||
89 | else { /* must grow */ | ||
90 | Capture *newc; | ||
91 | int newsize = captop + n + 1; /* minimum size needed */ | ||
92 | if (newsize < INT_MAX/((int)sizeof(Capture) * 2)) | ||
93 | newsize *= 2; /* twice that size, if not too big */ | ||
94 | else if (newsize >= INT_MAX/((int)sizeof(Capture))) | ||
95 | luaL_error(L, "too many captures"); | ||
96 | newc = (Capture *)lua_newuserdata(L, newsize * sizeof(Capture)); | ||
97 | memcpy(newc, capture, captop * sizeof(Capture)); | ||
98 | *capsize = newsize; | ||
99 | lua_replace(L, caplistidx(ptop)); | ||
100 | return newc; | ||
101 | } | ||
102 | } | ||
103 | |||
104 | |||
105 | /* | ||
106 | ** Double the size of the stack | ||
107 | */ | ||
108 | static Stack *doublestack (lua_State *L, Stack **stacklimit, int ptop) { | ||
109 | Stack *stack = getstackbase(L, ptop); | ||
110 | Stack *newstack; | ||
111 | int n = *stacklimit - stack; /* current stack size */ | ||
112 | int max, newn; | ||
113 | lua_getfield(L, LUA_REGISTRYINDEX, MAXSTACKIDX); | ||
114 | max = lua_tointeger(L, -1); /* maximum allowed size */ | ||
115 | lua_pop(L, 1); | ||
116 | if (n >= max) /* already at maximum size? */ | ||
117 | luaL_error(L, "backtrack stack overflow (current limit is %d)", max); | ||
118 | newn = 2 * n; /* new size */ | ||
119 | if (newn > max) newn = max; | ||
120 | newstack = (Stack *)lua_newuserdata(L, newn * sizeof(Stack)); | ||
121 | memcpy(newstack, stack, n * sizeof(Stack)); | ||
122 | lua_replace(L, stackidx(ptop)); | ||
123 | *stacklimit = newstack + newn; | ||
124 | return newstack + n; /* return next position */ | ||
125 | } | ||
126 | |||
127 | |||
128 | /* | ||
129 | ** Interpret the result of a dynamic capture: false -> fail; | ||
130 | ** true -> keep current position; number -> next position. | ||
131 | ** Return new subject position. 'fr' is stack index where | ||
132 | ** is the result; 'curr' is current subject position; 'limit' | ||
133 | ** is subject's size. | ||
134 | */ | ||
135 | static int resdyncaptures (lua_State *L, int fr, int curr, int limit) { | ||
136 | lua_Integer res; | ||
137 | if (!lua_toboolean(L, fr)) { /* false value? */ | ||
138 | lua_settop(L, fr - 1); /* remove results */ | ||
139 | return -1; /* and fail */ | ||
140 | } | ||
141 | else if (lua_isboolean(L, fr)) /* true? */ | ||
142 | res = curr; /* keep current position */ | ||
143 | else { | ||
144 | res = lua_tointeger(L, fr) - 1; /* new position */ | ||
145 | if (res < curr || res > limit) | ||
146 | luaL_error(L, "invalid position returned by match-time capture"); | ||
147 | } | ||
148 | lua_remove(L, fr); /* remove first result (offset) */ | ||
149 | return res; | ||
150 | } | ||
151 | |||
152 | |||
153 | /* | ||
154 | ** Add capture values returned by a dynamic capture to the list | ||
155 | ** 'capture', nested inside a group. 'fd' indexes the first capture | ||
156 | ** value, 'n' is the number of values (at least 1). The open group | ||
157 | ** capture is already in 'capture', before the place for the new entries. | ||
158 | */ | ||
159 | static void adddyncaptures (const char *s, Capture *capture, int n, int fd) { | ||
160 | int i; | ||
161 | assert(capture[-1].kind == Cgroup && capture[-1].siz == 0); | ||
162 | capture[-1].idx = 0; /* make group capture an anonymous group */ | ||
163 | for (i = 0; i < n; i++) { /* add runtime captures */ | ||
164 | capture[i].kind = Cruntime; | ||
165 | capture[i].siz = 1; /* mark it as closed */ | ||
166 | capture[i].idx = fd + i; /* stack index of capture value */ | ||
167 | capture[i].s = s; | ||
168 | } | ||
169 | capture[n].kind = Cclose; /* close group */ | ||
170 | capture[n].siz = 1; | ||
171 | capture[n].s = s; | ||
172 | } | ||
173 | |||
174 | |||
175 | /* | ||
176 | ** Remove dynamic captures from the Lua stack (called in case of failure) | ||
177 | */ | ||
178 | static int removedyncap (lua_State *L, Capture *capture, | ||
179 | int level, int last) { | ||
180 | int id = finddyncap(capture + level, capture + last); /* index of 1st cap. */ | ||
181 | int top = lua_gettop(L); | ||
182 | if (id == 0) return 0; /* no dynamic captures? */ | ||
183 | lua_settop(L, id - 1); /* remove captures */ | ||
184 | return top - id + 1; /* number of values removed */ | ||
185 | } | ||
186 | |||
187 | |||
188 | /* | ||
189 | ** Opcode interpreter | ||
190 | */ | ||
191 | const char *match (lua_State *L, const char *o, const char *s, const char *e, | ||
192 | Instruction *op, Capture *capture, int ptop, short *labelf, const char **sfail) { /* labeled failure */ | ||
193 | Stack stackbase[INITBACK]; | ||
194 | Stack *stacklimit = stackbase + INITBACK; | ||
195 | Stack *stack = stackbase; /* point to first empty slot in stack */ | ||
196 | int capsize = INITCAPSIZE; | ||
197 | int captop = 0; /* point to first empty slot in captures */ | ||
198 | int ndyncap = 0; /* number of dynamic captures (in Lua stack) */ | ||
199 | const Instruction *p = op; /* current instruction */ | ||
200 | byte insidepred = OUTPRED; /* labeled failure: label environment is off inside predicates */ | ||
201 | stack->p = &giveup; stack->s = s; stack->caplevel = 0; stack->labenv = insidepred; stack++; /* labeled failure */ | ||
202 | *sfail = s; /* labeled failure */ | ||
203 | lua_pushlightuserdata(L, stackbase); | ||
204 | for (;;) { | ||
205 | #if defined(DEBUG) | ||
206 | printf("-------------------------------------\n"); | ||
207 | printcaplist(capture, capture + captop); | ||
208 | printf("s: |%s| stck:%d, dyncaps:%d, caps:%d ", | ||
209 | s, (int)(stack - getstackbase(L, ptop)), ndyncap, captop); | ||
210 | printinst(op, p); | ||
211 | #endif | ||
212 | assert(stackidx(ptop) + ndyncap == lua_gettop(L) && ndyncap <= captop); | ||
213 | assert(insidepred == INPRED || insidepred == OUTPRED); | ||
214 | switch ((Opcode)p->i.code) { | ||
215 | case IEnd: { | ||
216 | assert(stack == getstackbase(L, ptop) + 1); | ||
217 | capture[captop].kind = Cclose; | ||
218 | capture[captop].s = NULL; | ||
219 | return s; | ||
220 | } | ||
221 | case IGiveup: { | ||
222 | assert(stack == getstackbase(L, ptop)); | ||
223 | return NULL; | ||
224 | } | ||
225 | case IRet: { | ||
226 | assert(stack > getstackbase(L, ptop) && (stack - 1)->s == NULL); | ||
227 | p = (--stack)->p; | ||
228 | continue; | ||
229 | } | ||
230 | case IAny: { | ||
231 | if (s < e) { p++; s++; } | ||
232 | else { | ||
233 | *labelf = LFAIL; /* labeled failure */ | ||
234 | updatefarthest(*sfail, s); /*labeled failure */ | ||
235 | goto fail; | ||
236 | } | ||
237 | continue; | ||
238 | } | ||
239 | case IUTFR: { | ||
240 | int codepoint; | ||
241 | if (s >= e) | ||
242 | goto fail; | ||
243 | s = utf8_decode (s, &codepoint); | ||
244 | if (s && p[1].offset <= codepoint && codepoint <= utf_to(p)) | ||
245 | p += 2; | ||
246 | else { | ||
247 | *labelf = LFAIL; /* labeled failure */ | ||
248 | updatefarthest(*sfail, s); /*labeled failure */ | ||
249 | goto fail; | ||
250 | } | ||
251 | continue; | ||
252 | } | ||
253 | case ITestAny: { | ||
254 | if (s < e) p += 2; | ||
255 | else p += getoffset(p); | ||
256 | continue; | ||
257 | } | ||
258 | case IChar: { | ||
259 | if ((byte)*s == p->i.aux && s < e) { p++; s++; } | ||
260 | else { | ||
261 | *labelf = LFAIL; /* labeled failure */ | ||
262 | updatefarthest(*sfail, s); /*labeled failure */ | ||
263 | goto fail; | ||
264 | } | ||
265 | continue; | ||
266 | } | ||
267 | case ITestChar: { | ||
268 | if ((byte)*s == p->i.aux && s < e) p += 2; | ||
269 | else p += getoffset(p); | ||
270 | continue; | ||
271 | } | ||
272 | case ISet: { | ||
273 | int c = (byte)*s; | ||
274 | if (testchar((p+1)->buff, c) && s < e) | ||
275 | { p += CHARSETINSTSIZE; s++; } | ||
276 | else { | ||
277 | *labelf = LFAIL; /* labeled failure */ | ||
278 | updatefarthest(*sfail, s); /*labeled failure */ | ||
279 | goto fail; | ||
280 | } | ||
281 | continue; | ||
282 | } | ||
283 | case ITestSet: { | ||
284 | int c = (byte)*s; | ||
285 | if (testchar((p + 2)->buff, c) && s < e) | ||
286 | p += 1 + CHARSETINSTSIZE; | ||
287 | else p += getoffset(p); | ||
288 | continue; | ||
289 | } | ||
290 | case IBehind: { | ||
291 | int n = p->i.aux; | ||
292 | if (n > s - o) { | ||
293 | *labelf = LFAIL; /* labeled failure */ | ||
294 | updatefarthest(*sfail, s); /*labeled failure */ | ||
295 | goto fail; | ||
296 | } | ||
297 | s -= n; p++; | ||
298 | continue; | ||
299 | } | ||
300 | case ISpan: { | ||
301 | for (; s < e; s++) { | ||
302 | int c = (byte)*s; | ||
303 | if (!testchar((p+1)->buff, c)) break; | ||
304 | } | ||
305 | p += CHARSETINSTSIZE; | ||
306 | continue; | ||
307 | } | ||
308 | case IJmp: { | ||
309 | p += getoffset(p); | ||
310 | continue; | ||
311 | } | ||
312 | case IChoice: { | ||
313 | if (stack == stacklimit) | ||
314 | stack = doublestack(L, &stacklimit, ptop); | ||
315 | stack->p = p + getoffset(p); | ||
316 | stack->s = s; | ||
317 | stack->caplevel = captop; | ||
318 | stack->labenv = insidepred; /* labeled failure */ | ||
319 | stack->predchoice = 0; /* labeled failure */ | ||
320 | stack++; | ||
321 | p += 2; | ||
322 | continue; | ||
323 | } | ||
324 | case IPredChoice: { /* labeled failure: new instruction */ | ||
325 | if (stack == stacklimit) | ||
326 | stack = doublestack(L, &stacklimit, ptop); | ||
327 | stack->p = p + getoffset(p); | ||
328 | stack->s = s; | ||
329 | stack->caplevel = captop; | ||
330 | stack->labenv = insidepred; | ||
331 | stack->predchoice = 1; | ||
332 | stack++; | ||
333 | insidepred = INPRED; | ||
334 | p += 2; | ||
335 | continue; | ||
336 | } | ||
337 | case ICall: { | ||
338 | if (stack == stacklimit) | ||
339 | stack = doublestack(L, &stacklimit, ptop); | ||
340 | stack->s = NULL; | ||
341 | stack->p = p + 2; /* save return address */ | ||
342 | stack++; | ||
343 | p += getoffset(p); | ||
344 | continue; | ||
345 | } | ||
346 | case ICommit: { | ||
347 | assert(stack > getstackbase(L, ptop) && (stack - 1)->s != NULL); | ||
348 | stack--; | ||
349 | p += getoffset(p); | ||
350 | continue; | ||
351 | } | ||
352 | case IPartialCommit: { | ||
353 | assert(stack > getstackbase(L, ptop) && (stack - 1)->s != NULL); | ||
354 | (stack - 1)->s = s; | ||
355 | (stack - 1)->caplevel = captop; | ||
356 | p += getoffset(p); | ||
357 | continue; | ||
358 | } | ||
359 | case IBackCommit: { | ||
360 | assert(stack > getstackbase(L, ptop) && (stack - 1)->s != NULL); | ||
361 | s = (--stack)->s; | ||
362 | insidepred = stack->labenv; /* labeled failure */ | ||
363 | if (ndyncap > 0) /* are there matchtime captures? */ | ||
364 | ndyncap -= removedyncap(L, capture, stack->caplevel, captop); | ||
365 | captop = stack->caplevel; | ||
366 | p += getoffset(p); | ||
367 | continue; | ||
368 | } | ||
369 | case IThrow: { /* labeled failure */ | ||
370 | if (insidepred == OUTPRED) { | ||
371 | *labelf = (p+1)->i.key; | ||
372 | stack = getstackbase(L, ptop); | ||
373 | stack++; | ||
374 | } | ||
375 | else { | ||
376 | while (!(stack-1)->predchoice) { | ||
377 | --stack; | ||
378 | } | ||
379 | *labelf = LFAIL; | ||
380 | } | ||
381 | *sfail = s; | ||
382 | goto fail; | ||
383 | } | ||
384 | case IThrowRec: { /* labeled failure */ | ||
385 | if (insidepred == OUTPRED) { | ||
386 | *labelf = (p+2)->i.key; | ||
387 | *sfail = s; | ||
388 | if (stack == stacklimit) | ||
389 | stack = doublestack(L, &stacklimit, ptop); | ||
390 | stack->s = NULL; | ||
391 | stack->p = p + 3; | ||
392 | stack->caplevel = captop; | ||
393 | stack++; | ||
394 | p += getoffset(p); | ||
395 | continue; | ||
396 | } else { | ||
397 | while (!(stack-1)->predchoice) { | ||
398 | --stack; | ||
399 | } | ||
400 | *labelf = LFAIL; | ||
401 | *sfail = s; | ||
402 | } | ||
403 | goto fail; | ||
404 | } | ||
405 | case IFailTwice: | ||
406 | assert(stack > getstackbase(L, ptop)); | ||
407 | stack--; | ||
408 | /* go through */ | ||
409 | case IFail: | ||
410 | *labelf = LFAIL; /* labeled failure */ | ||
411 | updatefarthest(*sfail, s); /*labeled failure */ | ||
412 | fail: { /* pattern failed: try to backtrack */ | ||
413 | do { /* remove pending calls */ | ||
414 | assert(stack > getstackbase(L, ptop)); | ||
415 | s = (--stack)->s; | ||
416 | } while (s == NULL); | ||
417 | if (ndyncap > 0) /* is there matchtime captures? */ | ||
418 | ndyncap -= removedyncap(L, capture, stack->caplevel, captop); | ||
419 | captop = stack->caplevel; | ||
420 | assert((insidepred == INPRED && stack->labenv == OUTPRED) || insidepred == stack->labenv); | ||
421 | insidepred = stack->labenv; /* labeled failure */ | ||
422 | p = stack->p; | ||
423 | #if defined(DEBUG) | ||
424 | printf("**FAIL**\n"); | ||
425 | #endif | ||
426 | continue; | ||
427 | } | ||
428 | case ICloseRunTime: { | ||
429 | CapState cs; | ||
430 | int rem, res, n; | ||
431 | int fr = lua_gettop(L) + 1; /* stack index of first result */ | ||
432 | cs.reclevel = 0; cs.L = L; | ||
433 | cs.s = o; cs.ocap = capture; cs.ptop = ptop; | ||
434 | n = runtimecap(&cs, capture + captop, s, &rem); /* call function */ | ||
435 | captop -= n; /* remove nested captures */ | ||
436 | ndyncap -= rem; /* update number of dynamic captures */ | ||
437 | fr -= rem; /* 'rem' items were popped from Lua stack */ | ||
438 | res = resdyncaptures(L, fr, s - o, e - o); /* get result */ | ||
439 | if (res == -1) { /* fail? */ | ||
440 | *labelf = LFAIL; /* labeled failure */ | ||
441 | updatefarthest(*sfail, s); /*labeled failure */ | ||
442 | goto fail; | ||
443 | } | ||
444 | s = o + res; /* else update current position */ | ||
445 | n = lua_gettop(L) - fr + 1; /* number of new captures */ | ||
446 | ndyncap += n; /* update number of dynamic captures */ | ||
447 | if (n == 0) /* no new captures? */ | ||
448 | captop--; /* remove open group */ | ||
449 | else { /* new captures; keep original open group */ | ||
450 | if (fr + n >= SHRT_MAX) | ||
451 | luaL_error(L, "too many results in match-time capture"); | ||
452 | /* add new captures + close group to 'capture' list */ | ||
453 | capture = growcap(L, capture, &capsize, captop, n + 1, ptop); | ||
454 | adddyncaptures(s, capture + captop, n, fr); | ||
455 | captop += n + 1; /* new captures + close group */ | ||
456 | } | ||
457 | p++; | ||
458 | continue; | ||
459 | } | ||
460 | case ICloseCapture: { | ||
461 | const char *s1 = s; | ||
462 | assert(captop > 0); | ||
463 | /* if possible, turn capture into a full capture */ | ||
464 | if (capture[captop - 1].siz == 0 && | ||
465 | s1 - capture[captop - 1].s < UCHAR_MAX) { | ||
466 | capture[captop - 1].siz = s1 - capture[captop - 1].s + 1; | ||
467 | p++; | ||
468 | continue; | ||
469 | } | ||
470 | else { | ||
471 | capture[captop].siz = 1; /* mark entry as closed */ | ||
472 | capture[captop].s = s; | ||
473 | goto pushcapture; | ||
474 | } | ||
475 | } | ||
476 | case IOpenCapture: | ||
477 | capture[captop].siz = 0; /* mark entry as open */ | ||
478 | capture[captop].s = s; | ||
479 | goto pushcapture; | ||
480 | case IFullCapture: | ||
481 | capture[captop].siz = getoff(p) + 1; /* save capture size */ | ||
482 | capture[captop].s = s - getoff(p); | ||
483 | /* goto pushcapture; */ | ||
484 | pushcapture: { | ||
485 | capture[captop].idx = p->i.key; | ||
486 | capture[captop].kind = getkind(p); | ||
487 | captop++; | ||
488 | capture = growcap(L, capture, &capsize, captop, 0, ptop); | ||
489 | p++; | ||
490 | continue; | ||
491 | } | ||
492 | default: assert(0); return NULL; | ||
493 | } | ||
494 | } | ||
495 | } | ||
496 | |||
497 | /* }====================================================== */ | ||
498 | |||
499 | |||