diff options
Diffstat (limited to 'lpvm.c')
-rw-r--r-- | lpvm.c | 391 |
1 files changed, 391 insertions, 0 deletions
@@ -0,0 +1,391 @@ | |||
1 | /* | ||
2 | ** $Id: lpvm.c,v 1.5 2013/04/12 16:29:49 roberto Exp $ | ||
3 | ** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license) | ||
4 | */ | ||
5 | |||
6 | #include <limits.h> | ||
7 | #include <string.h> | ||
8 | |||
9 | |||
10 | #include "lua.h" | ||
11 | #include "lauxlib.h" | ||
12 | |||
13 | #include "lpcap.h" | ||
14 | #include "lptypes.h" | ||
15 | #include "lpvm.h" | ||
16 | #include "lpprint.h" | ||
17 | |||
18 | |||
19 | /* initial size for call/backtrack stack */ | ||
20 | #if !defined(INITBACK) | ||
21 | #define INITBACK 100 | ||
22 | #endif | ||
23 | |||
24 | |||
25 | #define getoffset(p) (((p) + 1)->offset) | ||
26 | |||
27 | static const Instruction giveup = {{IGiveup, 0, 0}}; | ||
28 | |||
29 | |||
30 | /* | ||
31 | ** {====================================================== | ||
32 | ** Virtual Machine | ||
33 | ** ======================================================= | ||
34 | */ | ||
35 | |||
36 | |||
37 | typedef struct Stack { | ||
38 | const char *s; /* saved position (or NULL for calls) */ | ||
39 | const Instruction *p; /* next instruction */ | ||
40 | int caplevel; | ||
41 | Labelset ls; /* labeled failure */ | ||
42 | } Stack; | ||
43 | |||
44 | |||
45 | #define getstackbase(L, ptop) ((Stack *)lua_touserdata(L, stackidx(ptop))) | ||
46 | |||
47 | |||
48 | /* | ||
49 | ** Double the size of the array of captures | ||
50 | */ | ||
51 | static Capture *doublecap (lua_State *L, Capture *cap, int captop, int ptop) { | ||
52 | Capture *newc; | ||
53 | if (captop >= INT_MAX/((int)sizeof(Capture) * 2)) | ||
54 | luaL_error(L, "too many captures"); | ||
55 | newc = (Capture *)lua_newuserdata(L, captop * 2 * sizeof(Capture)); | ||
56 | memcpy(newc, cap, captop * sizeof(Capture)); | ||
57 | lua_replace(L, caplistidx(ptop)); | ||
58 | return newc; | ||
59 | } | ||
60 | |||
61 | |||
62 | /* | ||
63 | ** Double the size of the stack | ||
64 | */ | ||
65 | static Stack *doublestack (lua_State *L, Stack **stacklimit, int ptop) { | ||
66 | Stack *stack = getstackbase(L, ptop); | ||
67 | Stack *newstack; | ||
68 | int n = *stacklimit - stack; /* current stack size */ | ||
69 | int max, newn; | ||
70 | lua_getfield(L, LUA_REGISTRYINDEX, MAXSTACKIDX); | ||
71 | max = lua_tointeger(L, -1); /* maximum allowed size */ | ||
72 | lua_pop(L, 1); | ||
73 | if (n >= max) /* already at maximum size? */ | ||
74 | luaL_error(L, "too many pending calls/choices"); | ||
75 | newn = 2 * n; /* new size */ | ||
76 | if (newn > max) newn = max; | ||
77 | newstack = (Stack *)lua_newuserdata(L, newn * sizeof(Stack)); | ||
78 | memcpy(newstack, stack, n * sizeof(Stack)); | ||
79 | lua_replace(L, stackidx(ptop)); | ||
80 | *stacklimit = newstack + newn; | ||
81 | return newstack + n; /* return next position */ | ||
82 | } | ||
83 | |||
84 | |||
85 | /* | ||
86 | ** Interpret the result of a dynamic capture: false -> fail; | ||
87 | ** true -> keep current position; number -> next position. | ||
88 | ** Return new subject position. 'fr' is stack index where | ||
89 | ** is the result; 'curr' is current subject position; 'limit' | ||
90 | ** is subject's size. | ||
91 | */ | ||
92 | static int resdyncaptures (lua_State *L, int fr, int curr, int limit) { | ||
93 | lua_Integer res; | ||
94 | if (!lua_toboolean(L, fr)) { /* false value? */ | ||
95 | lua_settop(L, fr - 1); /* remove results */ | ||
96 | return -1; /* and fail */ | ||
97 | } | ||
98 | else if (lua_isboolean(L, fr)) /* true? */ | ||
99 | res = curr; /* keep current position */ | ||
100 | else { | ||
101 | res = lua_tointeger(L, fr) - 1; /* new position */ | ||
102 | if (res < curr || res > limit) | ||
103 | luaL_error(L, "invalid position returned by match-time capture"); | ||
104 | } | ||
105 | lua_remove(L, fr); /* remove first result (offset) */ | ||
106 | return res; | ||
107 | } | ||
108 | |||
109 | |||
110 | /* | ||
111 | ** Add capture values returned by a dynamic capture to the capture list | ||
112 | ** 'base', nested inside a group capture. 'fd' indexes the first capture | ||
113 | ** value, 'n' is the number of values (at least 1). | ||
114 | */ | ||
115 | static void adddyncaptures (const char *s, Capture *base, int n, int fd) { | ||
116 | int i; | ||
117 | /* Cgroup capture is already there */ | ||
118 | assert(base[0].kind == Cgroup && base[0].siz == 0); | ||
119 | base[0].idx = 0; /* make it an anonymous group */ | ||
120 | for (i = 1; i <= n; i++) { /* add runtime captures */ | ||
121 | base[i].kind = Cruntime; | ||
122 | base[i].siz = 1; /* mark it as closed */ | ||
123 | base[i].idx = fd + i - 1; /* stack index of capture value */ | ||
124 | base[i].s = s; | ||
125 | } | ||
126 | base[i].kind = Cclose; /* close group */ | ||
127 | base[i].siz = 1; | ||
128 | base[i].s = s; | ||
129 | } | ||
130 | |||
131 | |||
132 | /* | ||
133 | ** Remove dynamic captures from the Lua stack (called in case of failure) | ||
134 | */ | ||
135 | static int removedyncap (lua_State *L, Capture *capture, | ||
136 | int level, int last) { | ||
137 | int id = finddyncap(capture + level, capture + last); /* index of 1st cap. */ | ||
138 | int top = lua_gettop(L); | ||
139 | if (id == 0) return 0; /* no dynamic captures? */ | ||
140 | lua_settop(L, id - 1); /* remove captures */ | ||
141 | return top - id + 1; /* number of values removed */ | ||
142 | } | ||
143 | |||
144 | |||
145 | /* | ||
146 | ** Opcode interpreter | ||
147 | */ | ||
148 | const char *match (lua_State *L, const char *o, const char *s, const char *e, | ||
149 | Instruction *op, Capture *capture, int ptop) { | ||
150 | Stack stackbase[INITBACK]; | ||
151 | Stack *stacklimit = stackbase + INITBACK; | ||
152 | Stack *stack = stackbase; /* point to first empty slot in stack */ | ||
153 | int capsize = INITCAPSIZE; | ||
154 | int captop = 0; /* point to first empty slot in captures */ | ||
155 | int ndyncap = 0; /* number of dynamic captures (in Lua stack) */ | ||
156 | const Instruction *p = op; /* current instruction */ | ||
157 | Labelset labelf; /* labeled failure */ | ||
158 | stack->p = &giveup; stack->s = s; stack->caplevel = 0; stack++; | ||
159 | lua_pushlightuserdata(L, stackbase); | ||
160 | for (;;) { | ||
161 | #if defined(LPEGDEBUG) | ||
162 | printf("s: |%s| stck:%d, dyncaps:%d, caps:%d ", | ||
163 | s, stack - getstackbase(L, ptop), ndyncap, captop); | ||
164 | printinst(op, p); | ||
165 | printcaplist(capture, capture + captop); | ||
166 | #endif | ||
167 | assert(stackidx(ptop) + ndyncap == lua_gettop(L) && ndyncap <= captop); | ||
168 | switch ((Opcode)p->i.code) { | ||
169 | case IEnd: { | ||
170 | assert(stack == getstackbase(L, ptop) + 1); | ||
171 | capture[captop].kind = Cclose; | ||
172 | capture[captop].s = NULL; | ||
173 | return s; | ||
174 | } | ||
175 | case IGiveup: { | ||
176 | assert(stack == getstackbase(L, ptop)); | ||
177 | return NULL; | ||
178 | } | ||
179 | case IRet: { | ||
180 | assert(stack > getstackbase(L, ptop) && (stack - 1)->s == NULL); | ||
181 | p = (--stack)->p; | ||
182 | continue; | ||
183 | } | ||
184 | case IAny: { | ||
185 | if (s < e) { p++; s++; } | ||
186 | else { | ||
187 | labelf = LFAIL; /* labeled failure */ | ||
188 | goto fail; | ||
189 | } | ||
190 | continue; | ||
191 | } | ||
192 | case ITestAny: { | ||
193 | if (s < e) p += 2; | ||
194 | else p += getoffset(p); | ||
195 | continue; | ||
196 | } | ||
197 | case IChar: { | ||
198 | if ((byte)*s == p->i.aux && s < e) { p++; s++; } | ||
199 | else { | ||
200 | labelf = LFAIL; /* labeled failure */ | ||
201 | goto fail; | ||
202 | } | ||
203 | continue; | ||
204 | } | ||
205 | case ITestChar: { | ||
206 | if ((byte)*s == p->i.aux && s < e) p += 2; | ||
207 | else p += getoffset(p); | ||
208 | continue; | ||
209 | } | ||
210 | case ISet: { | ||
211 | int c = (byte)*s; | ||
212 | if (testchar((p+1)->buff, c) && s < e) | ||
213 | { p += CHARSETINSTSIZE; s++; } | ||
214 | else { | ||
215 | labelf = LFAIL; /* labeled failure */ | ||
216 | goto fail; | ||
217 | } | ||
218 | continue; | ||
219 | } | ||
220 | case ITestSet: { | ||
221 | int c = (byte)*s; | ||
222 | if (testchar((p + 2)->buff, c) && s < e) | ||
223 | p += 1 + CHARSETINSTSIZE; | ||
224 | else p += getoffset(p); | ||
225 | continue; | ||
226 | } | ||
227 | case IBehind: { | ||
228 | int n = p->i.aux; | ||
229 | if (n > s - o) { | ||
230 | labelf = LFAIL; /* labeled failure */ | ||
231 | goto fail; | ||
232 | } | ||
233 | s -= n; p++; | ||
234 | continue; | ||
235 | } | ||
236 | case ISpan: { | ||
237 | for (; s < e; s++) { | ||
238 | int c = (byte)*s; | ||
239 | if (!testchar((p+1)->buff, c)) break; | ||
240 | } | ||
241 | p += CHARSETINSTSIZE; | ||
242 | continue; | ||
243 | } | ||
244 | case IJmp: { | ||
245 | p += getoffset(p); | ||
246 | continue; | ||
247 | } | ||
248 | case IChoice: { | ||
249 | if (stack == stacklimit) | ||
250 | stack = doublestack(L, &stacklimit, ptop); | ||
251 | stack->p = p + getoffset(p); | ||
252 | stack->s = s; | ||
253 | stack->ls = LFAIL; /* labeled failure */ | ||
254 | stack->caplevel = captop; | ||
255 | stack++; | ||
256 | p += 2; | ||
257 | continue; | ||
258 | } | ||
259 | case ILabChoice: { /* labeled failure */ | ||
260 | if (stack == stacklimit) | ||
261 | stack = doublestack(L, &stacklimit, ptop); | ||
262 | stack->p = p + getoffset(p); | ||
263 | stack->s = s; | ||
264 | stack->ls = (p + 2)->labels; | ||
265 | stack->caplevel = captop; | ||
266 | stack++; | ||
267 | p += 3; | ||
268 | continue; | ||
269 | } | ||
270 | |||
271 | case ICall: { | ||
272 | if (stack == stacklimit) | ||
273 | stack = doublestack(L, &stacklimit, ptop); | ||
274 | stack->s = NULL; | ||
275 | stack->p = p + 2; /* save return address */ | ||
276 | stack++; | ||
277 | p += getoffset(p); | ||
278 | continue; | ||
279 | } | ||
280 | case ICommit: { | ||
281 | assert(stack > getstackbase(L, ptop) && (stack - 1)->s != NULL); | ||
282 | stack--; | ||
283 | p += getoffset(p); | ||
284 | continue; | ||
285 | } | ||
286 | case IPartialCommit: { | ||
287 | assert(stack > getstackbase(L, ptop) && (stack - 1)->s != NULL); | ||
288 | (stack - 1)->s = s; | ||
289 | (stack - 1)->caplevel = captop; | ||
290 | p += getoffset(p); | ||
291 | continue; | ||
292 | } | ||
293 | case IBackCommit: { | ||
294 | assert(stack > getstackbase(L, ptop) && (stack - 1)->s != NULL); | ||
295 | s = (--stack)->s; | ||
296 | captop = stack->caplevel; | ||
297 | p += getoffset(p); | ||
298 | continue; | ||
299 | } | ||
300 | case IThrow: { /* labeled failure */ | ||
301 | labelf = (p+1)->labels; | ||
302 | goto fail; | ||
303 | } | ||
304 | case IFailTwice: | ||
305 | assert(stack > getstackbase(L, ptop)); | ||
306 | stack--; | ||
307 | /* go through */ | ||
308 | case IFail: | ||
309 | labelf = LFAIL; /* labeled failure */ | ||
310 | fail: { /* pattern failed: try to backtrack */ | ||
311 | do { /* remove pending calls */ | ||
312 | assert(stack > getstackbase(L, ptop)); | ||
313 | s = (--stack)->s; | ||
314 | /*printf("fail (s == NULL => %d), labelf=%d stack->ls=%d (stack-> == giveup %d)\n", | ||
315 | s == NULL, labelf, stack->ls, stack->p == &giveup);*/ | ||
316 | } while (s == NULL || (!(stack->ls & labelf) && stack->p != &giveup)); | ||
317 | if (ndyncap > 0) /* is there matchtime captures? */ | ||
318 | ndyncap -= removedyncap(L, capture, stack->caplevel, captop); | ||
319 | captop = stack->caplevel; | ||
320 | p = stack->p; | ||
321 | continue; | ||
322 | } | ||
323 | case ICloseRunTime: { | ||
324 | CapState cs; | ||
325 | int rem, res, n; | ||
326 | int fr = lua_gettop(L) + 1; /* stack index of first result */ | ||
327 | cs.s = o; cs.L = L; cs.ocap = capture; cs.ptop = ptop; | ||
328 | n = runtimecap(&cs, capture + captop, s, &rem); /* call function */ | ||
329 | captop -= n; /* remove nested captures */ | ||
330 | fr -= rem; /* 'rem' items were popped from Lua stack */ | ||
331 | res = resdyncaptures(L, fr, s - o, e - o); /* get result */ | ||
332 | if (res == -1) { /* fail? */ | ||
333 | labelf = LFAIL; /* labeled failure */ | ||
334 | goto fail; | ||
335 | } | ||
336 | s = o + res; /* else update current position */ | ||
337 | n = lua_gettop(L) - fr + 1; /* number of new captures */ | ||
338 | ndyncap += n - rem; /* update number of dynamic captures */ | ||
339 | if (n > 0) { /* any new capture? */ | ||
340 | if ((captop += n + 2) >= capsize) { | ||
341 | capture = doublecap(L, capture, captop, ptop); | ||
342 | capsize = 2 * captop; | ||
343 | } | ||
344 | /* add new captures to 'capture' list */ | ||
345 | adddyncaptures(s, capture + captop - n - 2, n, fr); | ||
346 | } | ||
347 | p++; | ||
348 | continue; | ||
349 | } | ||
350 | case ICloseCapture: { | ||
351 | const char *s1 = s; | ||
352 | assert(captop > 0); | ||
353 | /* if possible, turn capture into a full capture */ | ||
354 | if (capture[captop - 1].siz == 0 && | ||
355 | s1 - capture[captop - 1].s < UCHAR_MAX) { | ||
356 | capture[captop - 1].siz = s1 - capture[captop - 1].s + 1; | ||
357 | p++; | ||
358 | continue; | ||
359 | } | ||
360 | else { | ||
361 | capture[captop].siz = 1; /* mark entry as closed */ | ||
362 | capture[captop].s = s; | ||
363 | goto pushcapture; | ||
364 | } | ||
365 | } | ||
366 | case IOpenCapture: | ||
367 | capture[captop].siz = 0; /* mark entry as open */ | ||
368 | capture[captop].s = s; | ||
369 | goto pushcapture; | ||
370 | case IFullCapture: | ||
371 | capture[captop].siz = getoff(p) + 1; /* save capture size */ | ||
372 | capture[captop].s = s - getoff(p); | ||
373 | /* goto pushcapture; */ | ||
374 | pushcapture: { | ||
375 | capture[captop].idx = p->i.key; | ||
376 | capture[captop].kind = getkind(p); | ||
377 | if (++captop >= capsize) { | ||
378 | capture = doublecap(L, capture, captop, ptop); | ||
379 | capsize = 2 * captop; | ||
380 | } | ||
381 | p++; | ||
382 | continue; | ||
383 | } | ||
384 | default: assert(0); return NULL; | ||
385 | } | ||
386 | } | ||
387 | } | ||
388 | |||
389 | /* }====================================================== */ | ||
390 | |||
391 | |||