aboutsummaryrefslogtreecommitdiff
path: root/lpvm.c
diff options
context:
space:
mode:
Diffstat (limited to 'lpvm.c')
-rw-r--r--lpvm.c54
1 files changed, 49 insertions, 5 deletions
diff --git a/lpvm.c b/lpvm.c
index a791c44..b7ae631 100644
--- a/lpvm.c
+++ b/lpvm.c
@@ -18,16 +18,45 @@
18 18
19/* initial size for call/backtrack stack */ 19/* initial size for call/backtrack stack */
20#if !defined(INITBACK) 20#if !defined(INITBACK)
21#define INITBACK MAXBACK 21#define INITBACK MAXBACK
22#endif 22#endif
23 23
24 24
25#define getoffset(p) (((p) + 1)->offset) 25#define getoffset(p) (((p) + 1)->offset)
26 26
27static const Instruction giveup = {{IGiveup, 0, 0}}; 27static const Instruction giveup = {{IGiveup, 0, 0}};
28 28
29 29
30/* 30/*
31** Decode one UTF-8 sequence, returning NULL if byte sequence is invalid.
32*/
33static const char *utf8_decode (const char *o, int *val) {
34 static const unsigned int limits[] = {0xFF, 0x7F, 0x7FF, 0xFFFFu};
35 const unsigned char *s = (const unsigned char *)o;
36 unsigned int c = s[0]; /* first byte */
37 unsigned int res = 0; /* final result */
38 if (c < 0x80) /* ascii? */
39 res = c;
40 else {
41 int count = 0; /* to count number of continuation bytes */
42 while (c & 0x40) { /* still have continuation bytes? */
43 int cc = s[++count]; /* read next byte */
44 if ((cc & 0xC0) != 0x80) /* not a continuation byte? */
45 return NULL; /* invalid byte sequence */
46 res = (res << 6) | (cc & 0x3F); /* add lower 6 bits from cont. byte */
47 c <<= 1; /* to test next bit */
48 }
49 res |= (c & 0x7F) << (count * 5); /* add first byte */
50 if (count > 3 || res > 0x10FFFFu || res <= limits[count])
51 return NULL; /* invalid byte sequence */
52 s += count; /* skip continuation bytes read */
53 }
54 *val = res;
55 return (const char *)s + 1; /* +1 to include first byte */
56}
57
58
59/*
31** {====================================================== 60** {======================================================
32** Virtual Machine 61** Virtual Machine
33** ======================================================= 62** =======================================================
@@ -43,7 +72,7 @@ typedef struct Stack {
43} Stack; 72} Stack;
44 73
45 74
46#define getstackbase(L, ptop) ((Stack *)lua_touserdata(L, stackidx(ptop))) 75#define getstackbase(L, ptop) ((Stack *)lua_touserdata(L, stackidx(ptop)))
47 76
48 77
49/* 78/*
@@ -207,6 +236,20 @@ const char *match (lua_State *L, const char *o, const char *s, const char *e,
207 } 236 }
208 continue; 237 continue;
209 } 238 }
239 case IUTFR: {
240 int codepoint;
241 if (s >= e)
242 goto fail;
243 s = utf8_decode (s, &codepoint);
244 if (s && p[1].offset <= codepoint && codepoint <= utf_to(p))
245 p += 2;
246 else {
247 *labelf = LFAIL; /* labeled failure */
248 updatefarthest(*sfail, s); /*labeled failure */
249 goto fail;
250 }
251 continue;
252 }
210 case ITestAny: { 253 case ITestAny: {
211 if (s < e) p += 2; 254 if (s < e) p += 2;
212 else p += getoffset(p); 255 else p += getoffset(p);
@@ -301,8 +344,7 @@ const char *match (lua_State *L, const char *o, const char *s, const char *e,
301 continue; 344 continue;
302 } 345 }
303 case ICommit: { 346 case ICommit: {
304 assert(stack > getstackbase(L, ptop)); 347 assert(stack > getstackbase(L, ptop) && (stack - 1)->s != NULL);
305 assert((stack - 1)->s != NULL);
306 stack--; 348 stack--;
307 p += getoffset(p); 349 p += getoffset(p);
308 continue; 350 continue;
@@ -318,6 +360,8 @@ const char *match (lua_State *L, const char *o, const char *s, const char *e,
318 assert(stack > getstackbase(L, ptop) && (stack - 1)->s != NULL); 360 assert(stack > getstackbase(L, ptop) && (stack - 1)->s != NULL);
319 s = (--stack)->s; 361 s = (--stack)->s;
320 insidepred = stack->labenv; /* labeled failure */ 362 insidepred = stack->labenv; /* labeled failure */
363 if (ndyncap > 0) /* are there matchtime captures? */
364 ndyncap -= removedyncap(L, capture, stack->caplevel, captop);
321 captop = stack->caplevel; 365 captop = stack->caplevel;
322 p += getoffset(p); 366 p += getoffset(p);
323 continue; 367 continue;