aboutsummaryrefslogtreecommitdiff
path: root/lplvm.c
diff options
context:
space:
mode:
Diffstat (limited to 'lplvm.c')
-rw-r--r--lplvm.c499
1 files changed, 499 insertions, 0 deletions
diff --git a/lplvm.c b/lplvm.c
new file mode 100644
index 0000000..1dddfe6
--- /dev/null
+++ b/lplvm.c
@@ -0,0 +1,499 @@
1/*
2** $Id: lplvm.c $
3** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license)
4*/
5
6#include <limits.h>
7#include <string.h>
8
9
10#include "lua.h"
11#include "lauxlib.h"
12
13#include "lplcap.h"
14#include "lpltypes.h"
15#include "lplvm.h"
16#include "lplprint.h"
17
18
19/* initial size for call/backtrack stack */
20#if !defined(INITBACK)
21#define INITBACK MAXBACK
22#endif
23
24
25#define getoffset(p) (((p) + 1)->offset)
26
27static const Instruction giveup = {{IGiveup, 0, 0}};
28
29
30/*
31** Decode one UTF-8 sequence, returning NULL if byte sequence is invalid.
32*/
33static const char *utf8_decode (const char *o, int *val) {
34 static const unsigned int limits[] = {0xFF, 0x7F, 0x7FF, 0xFFFFu};
35 const unsigned char *s = (const unsigned char *)o;
36 unsigned int c = s[0]; /* first byte */
37 unsigned int res = 0; /* final result */
38 if (c < 0x80) /* ascii? */
39 res = c;
40 else {
41 int count = 0; /* to count number of continuation bytes */
42 while (c & 0x40) { /* still have continuation bytes? */
43 int cc = s[++count]; /* read next byte */
44 if ((cc & 0xC0) != 0x80) /* not a continuation byte? */
45 return NULL; /* invalid byte sequence */
46 res = (res << 6) | (cc & 0x3F); /* add lower 6 bits from cont. byte */
47 c <<= 1; /* to test next bit */
48 }
49 res |= (c & 0x7F) << (count * 5); /* add first byte */
50 if (count > 3 || res > 0x10FFFFu || res <= limits[count])
51 return NULL; /* invalid byte sequence */
52 s += count; /* skip continuation bytes read */
53 }
54 *val = res;
55 return (const char *)s + 1; /* +1 to include first byte */
56}
57
58
59/*
60** {======================================================
61** Virtual Machine
62** =======================================================
63*/
64
65
66typedef struct Stack {
67 const char *s; /* saved position (or NULL for calls) */
68 const Instruction *p; /* next instruction */
69 int caplevel;
70 byte labenv; /* labeled failure */
71 byte predchoice; /* labeled failure */
72} Stack;
73
74
75#define getstackbase(L, ptop) ((Stack *)lua_touserdata(L, stackidx(ptop)))
76
77
78/*
79** Ensures the size of array 'capture' (with size '*capsize' and
80** 'captop' elements being used) is enough to accomodate 'n' extra
81** elements plus one. (Because several opcodes add stuff to the capture
82** array, it is simpler to ensure the array always has at least one free
83** slot upfront and check its size later.)
84*/
85static Capture *growcap (lua_State *L, Capture *capture, int *capsize,
86 int captop, int n, int ptop) {
87 if (*capsize - captop > n)
88 return capture; /* no need to grow array */
89 else { /* must grow */
90 Capture *newc;
91 int newsize = captop + n + 1; /* minimum size needed */
92 if (newsize < INT_MAX/((int)sizeof(Capture) * 2))
93 newsize *= 2; /* twice that size, if not too big */
94 else if (newsize >= INT_MAX/((int)sizeof(Capture)))
95 luaL_error(L, "too many captures");
96 newc = (Capture *)lua_newuserdata(L, newsize * sizeof(Capture));
97 memcpy(newc, capture, captop * sizeof(Capture));
98 *capsize = newsize;
99 lua_replace(L, caplistidx(ptop));
100 return newc;
101 }
102}
103
104
105/*
106** Double the size of the stack
107*/
108static Stack *doublestack (lua_State *L, Stack **stacklimit, int ptop) {
109 Stack *stack = getstackbase(L, ptop);
110 Stack *newstack;
111 int n = *stacklimit - stack; /* current stack size */
112 int max, newn;
113 lua_getfield(L, LUA_REGISTRYINDEX, MAXSTACKIDX);
114 max = lua_tointeger(L, -1); /* maximum allowed size */
115 lua_pop(L, 1);
116 if (n >= max) /* already at maximum size? */
117 luaL_error(L, "backtrack stack overflow (current limit is %d)", max);
118 newn = 2 * n; /* new size */
119 if (newn > max) newn = max;
120 newstack = (Stack *)lua_newuserdata(L, newn * sizeof(Stack));
121 memcpy(newstack, stack, n * sizeof(Stack));
122 lua_replace(L, stackidx(ptop));
123 *stacklimit = newstack + newn;
124 return newstack + n; /* return next position */
125}
126
127
128/*
129** Interpret the result of a dynamic capture: false -> fail;
130** true -> keep current position; number -> next position.
131** Return new subject position. 'fr' is stack index where
132** is the result; 'curr' is current subject position; 'limit'
133** is subject's size.
134*/
135static int resdyncaptures (lua_State *L, int fr, int curr, int limit) {
136 lua_Integer res;
137 if (!lua_toboolean(L, fr)) { /* false value? */
138 lua_settop(L, fr - 1); /* remove results */
139 return -1; /* and fail */
140 }
141 else if (lua_isboolean(L, fr)) /* true? */
142 res = curr; /* keep current position */
143 else {
144 res = lua_tointeger(L, fr) - 1; /* new position */
145 if (res < curr || res > limit)
146 luaL_error(L, "invalid position returned by match-time capture");
147 }
148 lua_remove(L, fr); /* remove first result (offset) */
149 return res;
150}
151
152
153/*
154** Add capture values returned by a dynamic capture to the list
155** 'capture', nested inside a group. 'fd' indexes the first capture
156** value, 'n' is the number of values (at least 1). The open group
157** capture is already in 'capture', before the place for the new entries.
158*/
159static void adddyncaptures (const char *s, Capture *capture, int n, int fd) {
160 int i;
161 assert(capture[-1].kind == Cgroup && capture[-1].siz == 0);
162 capture[-1].idx = 0; /* make group capture an anonymous group */
163 for (i = 0; i < n; i++) { /* add runtime captures */
164 capture[i].kind = Cruntime;
165 capture[i].siz = 1; /* mark it as closed */
166 capture[i].idx = fd + i; /* stack index of capture value */
167 capture[i].s = s;
168 }
169 capture[n].kind = Cclose; /* close group */
170 capture[n].siz = 1;
171 capture[n].s = s;
172}
173
174
175/*
176** Remove dynamic captures from the Lua stack (called in case of failure)
177*/
178static int removedyncap (lua_State *L, Capture *capture,
179 int level, int last) {
180 int id = finddyncap(capture + level, capture + last); /* index of 1st cap. */
181 int top = lua_gettop(L);
182 if (id == 0) return 0; /* no dynamic captures? */
183 lua_settop(L, id - 1); /* remove captures */
184 return top - id + 1; /* number of values removed */
185}
186
187
188/*
189** Opcode interpreter
190*/
191const char *match (lua_State *L, const char *o, const char *s, const char *e,
192 Instruction *op, Capture *capture, int ptop, short *labelf, const char **sfail) { /* labeled failure */
193 Stack stackbase[INITBACK];
194 Stack *stacklimit = stackbase + INITBACK;
195 Stack *stack = stackbase; /* point to first empty slot in stack */
196 int capsize = INITCAPSIZE;
197 int captop = 0; /* point to first empty slot in captures */
198 int ndyncap = 0; /* number of dynamic captures (in Lua stack) */
199 const Instruction *p = op; /* current instruction */
200 byte insidepred = OUTPRED; /* labeled failure: label environment is off inside predicates */
201 stack->p = &giveup; stack->s = s; stack->caplevel = 0; stack->labenv = insidepred; stack++; /* labeled failure */
202 *sfail = s; /* labeled failure */
203 lua_pushlightuserdata(L, stackbase);
204 for (;;) {
205#if defined(DEBUG)
206 printf("-------------------------------------\n");
207 printcaplist(capture, capture + captop);
208 printf("s: |%s| stck:%d, dyncaps:%d, caps:%d ",
209 s, (int)(stack - getstackbase(L, ptop)), ndyncap, captop);
210 printinst(op, p);
211#endif
212 assert(stackidx(ptop) + ndyncap == lua_gettop(L) && ndyncap <= captop);
213 assert(insidepred == INPRED || insidepred == OUTPRED);
214 switch ((Opcode)p->i.code) {
215 case IEnd: {
216 assert(stack == getstackbase(L, ptop) + 1);
217 capture[captop].kind = Cclose;
218 capture[captop].s = NULL;
219 return s;
220 }
221 case IGiveup: {
222 assert(stack == getstackbase(L, ptop));
223 return NULL;
224 }
225 case IRet: {
226 assert(stack > getstackbase(L, ptop) && (stack - 1)->s == NULL);
227 p = (--stack)->p;
228 continue;
229 }
230 case IAny: {
231 if (s < e) { p++; s++; }
232 else {
233 *labelf = LFAIL; /* labeled failure */
234 updatefarthest(*sfail, s); /*labeled failure */
235 goto fail;
236 }
237 continue;
238 }
239 case IUTFR: {
240 int codepoint;
241 if (s >= e)
242 goto fail;
243 s = utf8_decode (s, &codepoint);
244 if (s && p[1].offset <= codepoint && codepoint <= utf_to(p))
245 p += 2;
246 else {
247 *labelf = LFAIL; /* labeled failure */
248 updatefarthest(*sfail, s); /*labeled failure */
249 goto fail;
250 }
251 continue;
252 }
253 case ITestAny: {
254 if (s < e) p += 2;
255 else p += getoffset(p);
256 continue;
257 }
258 case IChar: {
259 if ((byte)*s == p->i.aux && s < e) { p++; s++; }
260 else {
261 *labelf = LFAIL; /* labeled failure */
262 updatefarthest(*sfail, s); /*labeled failure */
263 goto fail;
264 }
265 continue;
266 }
267 case ITestChar: {
268 if ((byte)*s == p->i.aux && s < e) p += 2;
269 else p += getoffset(p);
270 continue;
271 }
272 case ISet: {
273 int c = (byte)*s;
274 if (testchar((p+1)->buff, c) && s < e)
275 { p += CHARSETINSTSIZE; s++; }
276 else {
277 *labelf = LFAIL; /* labeled failure */
278 updatefarthest(*sfail, s); /*labeled failure */
279 goto fail;
280 }
281 continue;
282 }
283 case ITestSet: {
284 int c = (byte)*s;
285 if (testchar((p + 2)->buff, c) && s < e)
286 p += 1 + CHARSETINSTSIZE;
287 else p += getoffset(p);
288 continue;
289 }
290 case IBehind: {
291 int n = p->i.aux;
292 if (n > s - o) {
293 *labelf = LFAIL; /* labeled failure */
294 updatefarthest(*sfail, s); /*labeled failure */
295 goto fail;
296 }
297 s -= n; p++;
298 continue;
299 }
300 case ISpan: {
301 for (; s < e; s++) {
302 int c = (byte)*s;
303 if (!testchar((p+1)->buff, c)) break;
304 }
305 p += CHARSETINSTSIZE;
306 continue;
307 }
308 case IJmp: {
309 p += getoffset(p);
310 continue;
311 }
312 case IChoice: {
313 if (stack == stacklimit)
314 stack = doublestack(L, &stacklimit, ptop);
315 stack->p = p + getoffset(p);
316 stack->s = s;
317 stack->caplevel = captop;
318 stack->labenv = insidepred; /* labeled failure */
319 stack->predchoice = 0; /* labeled failure */
320 stack++;
321 p += 2;
322 continue;
323 }
324 case IPredChoice: { /* labeled failure: new instruction */
325 if (stack == stacklimit)
326 stack = doublestack(L, &stacklimit, ptop);
327 stack->p = p + getoffset(p);
328 stack->s = s;
329 stack->caplevel = captop;
330 stack->labenv = insidepred;
331 stack->predchoice = 1;
332 stack++;
333 insidepred = INPRED;
334 p += 2;
335 continue;
336 }
337 case ICall: {
338 if (stack == stacklimit)
339 stack = doublestack(L, &stacklimit, ptop);
340 stack->s = NULL;
341 stack->p = p + 2; /* save return address */
342 stack++;
343 p += getoffset(p);
344 continue;
345 }
346 case ICommit: {
347 assert(stack > getstackbase(L, ptop) && (stack - 1)->s != NULL);
348 stack--;
349 p += getoffset(p);
350 continue;
351 }
352 case IPartialCommit: {
353 assert(stack > getstackbase(L, ptop) && (stack - 1)->s != NULL);
354 (stack - 1)->s = s;
355 (stack - 1)->caplevel = captop;
356 p += getoffset(p);
357 continue;
358 }
359 case IBackCommit: {
360 assert(stack > getstackbase(L, ptop) && (stack - 1)->s != NULL);
361 s = (--stack)->s;
362 insidepred = stack->labenv; /* labeled failure */
363 if (ndyncap > 0) /* are there matchtime captures? */
364 ndyncap -= removedyncap(L, capture, stack->caplevel, captop);
365 captop = stack->caplevel;
366 p += getoffset(p);
367 continue;
368 }
369 case IThrow: { /* labeled failure */
370 if (insidepred == OUTPRED) {
371 *labelf = (p+1)->i.key;
372 stack = getstackbase(L, ptop);
373 stack++;
374 }
375 else {
376 while (!(stack-1)->predchoice) {
377 --stack;
378 }
379 *labelf = LFAIL;
380 }
381 *sfail = s;
382 goto fail;
383 }
384 case IThrowRec: { /* labeled failure */
385 if (insidepred == OUTPRED) {
386 *labelf = (p+2)->i.key;
387 *sfail = s;
388 if (stack == stacklimit)
389 stack = doublestack(L, &stacklimit, ptop);
390 stack->s = NULL;
391 stack->p = p + 3;
392 stack->caplevel = captop;
393 stack++;
394 p += getoffset(p);
395 continue;
396 } else {
397 while (!(stack-1)->predchoice) {
398 --stack;
399 }
400 *labelf = LFAIL;
401 *sfail = s;
402 }
403 goto fail;
404 }
405 case IFailTwice:
406 assert(stack > getstackbase(L, ptop));
407 stack--;
408 /* go through */
409 case IFail:
410 *labelf = LFAIL; /* labeled failure */
411 updatefarthest(*sfail, s); /*labeled failure */
412 fail: { /* pattern failed: try to backtrack */
413 do { /* remove pending calls */
414 assert(stack > getstackbase(L, ptop));
415 s = (--stack)->s;
416 } while (s == NULL);
417 if (ndyncap > 0) /* is there matchtime captures? */
418 ndyncap -= removedyncap(L, capture, stack->caplevel, captop);
419 captop = stack->caplevel;
420 assert((insidepred == INPRED && stack->labenv == OUTPRED) || insidepred == stack->labenv);
421 insidepred = stack->labenv; /* labeled failure */
422 p = stack->p;
423#if defined(DEBUG)
424 printf("**FAIL**\n");
425#endif
426 continue;
427 }
428 case ICloseRunTime: {
429 CapState cs;
430 int rem, res, n;
431 int fr = lua_gettop(L) + 1; /* stack index of first result */
432 cs.reclevel = 0; cs.L = L;
433 cs.s = o; cs.ocap = capture; cs.ptop = ptop;
434 n = runtimecap(&cs, capture + captop, s, &rem); /* call function */
435 captop -= n; /* remove nested captures */
436 ndyncap -= rem; /* update number of dynamic captures */
437 fr -= rem; /* 'rem' items were popped from Lua stack */
438 res = resdyncaptures(L, fr, s - o, e - o); /* get result */
439 if (res == -1) { /* fail? */
440 *labelf = LFAIL; /* labeled failure */
441 updatefarthest(*sfail, s); /*labeled failure */
442 goto fail;
443 }
444 s = o + res; /* else update current position */
445 n = lua_gettop(L) - fr + 1; /* number of new captures */
446 ndyncap += n; /* update number of dynamic captures */
447 if (n == 0) /* no new captures? */
448 captop--; /* remove open group */
449 else { /* new captures; keep original open group */
450 if (fr + n >= SHRT_MAX)
451 luaL_error(L, "too many results in match-time capture");
452 /* add new captures + close group to 'capture' list */
453 capture = growcap(L, capture, &capsize, captop, n + 1, ptop);
454 adddyncaptures(s, capture + captop, n, fr);
455 captop += n + 1; /* new captures + close group */
456 }
457 p++;
458 continue;
459 }
460 case ICloseCapture: {
461 const char *s1 = s;
462 assert(captop > 0);
463 /* if possible, turn capture into a full capture */
464 if (capture[captop - 1].siz == 0 &&
465 s1 - capture[captop - 1].s < UCHAR_MAX) {
466 capture[captop - 1].siz = s1 - capture[captop - 1].s + 1;
467 p++;
468 continue;
469 }
470 else {
471 capture[captop].siz = 1; /* mark entry as closed */
472 capture[captop].s = s;
473 goto pushcapture;
474 }
475 }
476 case IOpenCapture:
477 capture[captop].siz = 0; /* mark entry as open */
478 capture[captop].s = s;
479 goto pushcapture;
480 case IFullCapture:
481 capture[captop].siz = getoff(p) + 1; /* save capture size */
482 capture[captop].s = s - getoff(p);
483 /* goto pushcapture; */
484 pushcapture: {
485 capture[captop].idx = p->i.key;
486 capture[captop].kind = getkind(p);
487 captop++;
488 capture = growcap(L, capture, &capsize, captop, 0, ptop);
489 p++;
490 continue;
491 }
492 default: assert(0); return NULL;
493 }
494 }
495}
496
497/* }====================================================== */
498
499