diff options
author | Roberto Ierusalimschy <roberto@inf.puc-rio.br> | 2023-04-23 11:02:52 -0300 |
---|---|---|
committer | Roberto Ierusalimschy <roberto@inf.puc-rio.br> | 2023-04-23 11:02:52 -0300 |
commit | f8e9bc1c721a0802b2260f48ced72c7e04d7b1ef (patch) | |
tree | fcd765f59c5d74574bdb21cd7a11e1f723068d87 /lpvm.c | |
parent | 9f7183c280f310c0d0b49b7b9c3b8eac297fafa7 (diff) | |
download | lpeg-f8e9bc1c721a0802b2260f48ced72c7e04d7b1ef.tar.gz lpeg-f8e9bc1c721a0802b2260f48ced72c7e04d7b1ef.tar.bz2 lpeg-f8e9bc1c721a0802b2260f48ced72c7e04d7b1ef.zip |
Towards a smaller encoding for charsets in code
Diffstat (limited to 'lpvm.c')
-rw-r--r-- | lpvm.c | 28 |
1 files changed, 19 insertions, 9 deletions
@@ -23,6 +23,16 @@ | |||
23 | static const Instruction giveup = {{IGiveup, 0, {0}}}; | 23 | static const Instruction giveup = {{IGiveup, 0, {0}}}; |
24 | 24 | ||
25 | 25 | ||
26 | int charinset (const Instruction *i, const byte *buff, unsigned int c) { | ||
27 | c -= (unsigned int)i->i.aux2.set.offset; | ||
28 | if (c >= ((unsigned int)i->i.aux2.set.size /* size in instructions... */ | ||
29 | * (unsigned int)sizeof(Instruction) /* in bytes... */ | ||
30 | * 8u)) /* in bits */ | ||
31 | return i->i.aux1; /* out of range */ | ||
32 | return (testchar(buff, c) != i->i.aux1); | ||
33 | } | ||
34 | |||
35 | |||
26 | /* | 36 | /* |
27 | ** Decode one UTF-8 sequence, returning NULL if byte sequence is invalid. | 37 | ** Decode one UTF-8 sequence, returning NULL if byte sequence is invalid. |
28 | */ | 38 | */ |
@@ -259,16 +269,16 @@ const char *match (lua_State *L, const char *o, const char *s, const char *e, | |||
259 | continue; | 269 | continue; |
260 | } | 270 | } |
261 | case ISet: { | 271 | case ISet: { |
262 | int c = (byte)*s; | 272 | unsigned int c = (byte)*s; |
263 | if (testchar((p+1)->buff, c) && s < e) | 273 | if (charinset(p, (p+1)->buff, c) && s < e) |
264 | { p += CHARSETINSTSIZE; s++; } | 274 | { p += 1 + p->i.aux2.set.size; s++; } |
265 | else goto fail; | 275 | else goto fail; |
266 | continue; | 276 | continue; |
267 | } | 277 | } |
268 | case ITestSet: { | 278 | case ITestSet: { |
269 | int c = (byte)*s; | 279 | unsigned int c = (byte)*s; |
270 | if (testchar((p + 2)->buff, c) && s < e) | 280 | if (charinset(p, (p + 2)->buff, c) && s < e) |
271 | p += 1 + CHARSETINSTSIZE; | 281 | p += 2 + p->i.aux2.set.size; |
272 | else p += getoffset(p); | 282 | else p += getoffset(p); |
273 | continue; | 283 | continue; |
274 | } | 284 | } |
@@ -280,10 +290,10 @@ const char *match (lua_State *L, const char *o, const char *s, const char *e, | |||
280 | } | 290 | } |
281 | case ISpan: { | 291 | case ISpan: { |
282 | for (; s < e; s++) { | 292 | for (; s < e; s++) { |
283 | int c = (byte)*s; | 293 | unsigned int c = (byte)*s; |
284 | if (!testchar((p+1)->buff, c)) break; | 294 | if (!charinset(p, (p+1)->buff, c)) break; |
285 | } | 295 | } |
286 | p += CHARSETINSTSIZE; | 296 | p += 1 + p->i.aux2.set.size; |
287 | continue; | 297 | continue; |
288 | } | 298 | } |
289 | case IJmp: { | 299 | case IJmp: { |