aboutsummaryrefslogtreecommitdiff
path: root/lpvm.c
diff options
context:
space:
mode:
authorRoberto Ierusalimschy <roberto@inf.puc-rio.br>2023-04-23 11:02:52 -0300
committerRoberto Ierusalimschy <roberto@inf.puc-rio.br>2023-04-23 11:02:52 -0300
commitf8e9bc1c721a0802b2260f48ced72c7e04d7b1ef (patch)
treefcd765f59c5d74574bdb21cd7a11e1f723068d87 /lpvm.c
parent9f7183c280f310c0d0b49b7b9c3b8eac297fafa7 (diff)
downloadlpeg-f8e9bc1c721a0802b2260f48ced72c7e04d7b1ef.tar.gz
lpeg-f8e9bc1c721a0802b2260f48ced72c7e04d7b1ef.tar.bz2
lpeg-f8e9bc1c721a0802b2260f48ced72c7e04d7b1ef.zip
Towards a smaller encoding for charsets in code
Diffstat (limited to 'lpvm.c')
-rw-r--r--lpvm.c28
1 files changed, 19 insertions, 9 deletions
diff --git a/lpvm.c b/lpvm.c
index 9ee00a7..f0bb1e7 100644
--- a/lpvm.c
+++ b/lpvm.c
@@ -23,6 +23,16 @@
23static const Instruction giveup = {{IGiveup, 0, {0}}}; 23static const Instruction giveup = {{IGiveup, 0, {0}}};
24 24
25 25
26int charinset (const Instruction *i, const byte *buff, unsigned int c) {
27 c -= (unsigned int)i->i.aux2.set.offset;
28 if (c >= ((unsigned int)i->i.aux2.set.size /* size in instructions... */
29 * (unsigned int)sizeof(Instruction) /* in bytes... */
30 * 8u)) /* in bits */
31 return i->i.aux1; /* out of range */
32 return (testchar(buff, c) != i->i.aux1);
33}
34
35
26/* 36/*
27** Decode one UTF-8 sequence, returning NULL if byte sequence is invalid. 37** Decode one UTF-8 sequence, returning NULL if byte sequence is invalid.
28*/ 38*/
@@ -259,16 +269,16 @@ const char *match (lua_State *L, const char *o, const char *s, const char *e,
259 continue; 269 continue;
260 } 270 }
261 case ISet: { 271 case ISet: {
262 int c = (byte)*s; 272 unsigned int c = (byte)*s;
263 if (testchar((p+1)->buff, c) && s < e) 273 if (charinset(p, (p+1)->buff, c) && s < e)
264 { p += CHARSETINSTSIZE; s++; } 274 { p += 1 + p->i.aux2.set.size; s++; }
265 else goto fail; 275 else goto fail;
266 continue; 276 continue;
267 } 277 }
268 case ITestSet: { 278 case ITestSet: {
269 int c = (byte)*s; 279 unsigned int c = (byte)*s;
270 if (testchar((p + 2)->buff, c) && s < e) 280 if (charinset(p, (p + 2)->buff, c) && s < e)
271 p += 1 + CHARSETINSTSIZE; 281 p += 2 + p->i.aux2.set.size;
272 else p += getoffset(p); 282 else p += getoffset(p);
273 continue; 283 continue;
274 } 284 }
@@ -280,10 +290,10 @@ const char *match (lua_State *L, const char *o, const char *s, const char *e,
280 } 290 }
281 case ISpan: { 291 case ISpan: {
282 for (; s < e; s++) { 292 for (; s < e; s++) {
283 int c = (byte)*s; 293 unsigned int c = (byte)*s;
284 if (!testchar((p+1)->buff, c)) break; 294 if (!charinset(p, (p+1)->buff, c)) break;
285 } 295 }
286 p += CHARSETINSTSIZE; 296 p += 1 + p->i.aux2.set.size;
287 continue; 297 continue;
288 } 298 }
289 case IJmp: { 299 case IJmp: {