diff options
author | Roberto Ierusalimschy <roberto@inf.puc-rio.br> | 2023-04-27 10:32:39 -0300 |
---|---|---|
committer | Roberto Ierusalimschy <roberto@inf.puc-rio.br> | 2023-04-27 10:32:39 -0300 |
commit | 012cf9c86cf91cb8354e229bde335592d41b84b2 (patch) | |
tree | 353f17797b1952eaec231c8e4fd5c21e02daf875 /lptree.c | |
parent | 3403b0c7256435560b63f828da92026c5d4c898b (diff) | |
download | lpeg-012cf9c86cf91cb8354e229bde335592d41b84b2.tar.gz lpeg-012cf9c86cf91cb8354e229bde335592d41b84b2.tar.bz2 lpeg-012cf9c86cf91cb8354e229bde335592d41b84b2.zip |
Compact charsets used in trees, too.
Diffstat (limited to 'lptree.c')
-rw-r--r-- | lptree.c | 70 |
1 files changed, 53 insertions, 17 deletions
@@ -12,6 +12,7 @@ | |||
12 | #include "lpcode.h" | 12 | #include "lpcode.h" |
13 | #include "lpprint.h" | 13 | #include "lpprint.h" |
14 | #include "lptree.h" | 14 | #include "lptree.h" |
15 | #include "lpcset.h" | ||
15 | 16 | ||
16 | 17 | ||
17 | /* number of siblings for each tree */ | 18 | /* number of siblings for each tree */ |
@@ -370,14 +371,37 @@ static TTree *newleaf (lua_State *L, int tag) { | |||
370 | } | 371 | } |
371 | 372 | ||
372 | 373 | ||
373 | static TTree *newcharset (lua_State *L) { | 374 | static TTree *newccharset (lua_State *L, byte *cs, charsetinfo *info) { |
374 | TTree *tree = newtree(L, bytes2slots(CHARSETSIZE) + 1); | 375 | int i; |
376 | TTree *tree = newtree(L, bytes2slots(info->size) + 1); | ||
375 | tree->tag = TSet; | 377 | tree->tag = TSet; |
376 | loopset(i, treebuffer(tree)[i] = 0); | 378 | tree->u.set.offset = info->offset; |
379 | tree->u.set.size = info->size; | ||
380 | tree->u.set.deflt = info->deflt; | ||
381 | for (i = 0; i < info->size; i++) | ||
382 | treebuffer(tree)[i] = cs[info->offset + i]; | ||
377 | return tree; | 383 | return tree; |
378 | } | 384 | } |
379 | 385 | ||
380 | 386 | ||
387 | static TTree *newcharset (lua_State *L, byte *cs) { | ||
388 | charsetinfo info; | ||
389 | Opcode op = charsettype(cs, &info); | ||
390 | switch (op) { | ||
391 | case IFail: return newleaf(L, TFalse); | ||
392 | case IAny: return newleaf(L, TAny); | ||
393 | case IChar: { | ||
394 | TTree *tree =newleaf(L, TChar); | ||
395 | tree->u.n = info.offset; | ||
396 | return tree; | ||
397 | } | ||
398 | default: | ||
399 | assert(op == ISet); | ||
400 | return newccharset(L, cs, &info); | ||
401 | } | ||
402 | } | ||
403 | |||
404 | |||
381 | /* | 405 | /* |
382 | ** add to tree a sequence where first sibling is 'sib' (with size | 406 | ** add to tree a sequence where first sibling is 'sib' (with size |
383 | ** 'sibsize'); returns position for second sibling | 407 | ** 'sibsize'); returns position for second sibling |
@@ -549,8 +573,8 @@ static int lp_choice (lua_State *L) { | |||
549 | TTree *t1 = getpatt(L, 1, NULL); | 573 | TTree *t1 = getpatt(L, 1, NULL); |
550 | TTree *t2 = getpatt(L, 2, NULL); | 574 | TTree *t2 = getpatt(L, 2, NULL); |
551 | if (tocharset(t1, &st1) && tocharset(t2, &st2)) { | 575 | if (tocharset(t1, &st1) && tocharset(t2, &st2)) { |
552 | TTree *t = newcharset(L); | 576 | loopset(i, st1.cs[i] |= st2.cs[i]); |
553 | loopset(i, treebuffer(t)[i] = st1.cs[i] | st2.cs[i]); | 577 | newcharset(L, st1.cs); |
554 | } | 578 | } |
555 | else if (nofail(t1) || t2->tag == TFalse) | 579 | else if (nofail(t1) || t2->tag == TFalse) |
556 | lua_pushvalue(L, 1); /* true / x => true, x / false => x */ | 580 | lua_pushvalue(L, 1); /* true / x => true, x / false => x */ |
@@ -626,8 +650,8 @@ static int lp_sub (lua_State *L) { | |||
626 | TTree *t1 = getpatt(L, 1, &s1); | 650 | TTree *t1 = getpatt(L, 1, &s1); |
627 | TTree *t2 = getpatt(L, 2, &s2); | 651 | TTree *t2 = getpatt(L, 2, &s2); |
628 | if (tocharset(t1, &st1) && tocharset(t2, &st2)) { | 652 | if (tocharset(t1, &st1) && tocharset(t2, &st2)) { |
629 | TTree *t = newcharset(L); | 653 | loopset(i, st1.cs[i] &= ~st2.cs[i]); |
630 | loopset(i, treebuffer(t)[i] = st1.cs[i] & ~st2.cs[i]); | 654 | newcharset(L, st1.cs); |
631 | } | 655 | } |
632 | else { | 656 | else { |
633 | TTree *tree = newtree(L, 2 + s1 + s2); | 657 | TTree *tree = newtree(L, 2 + s1 + s2); |
@@ -645,11 +669,13 @@ static int lp_sub (lua_State *L) { | |||
645 | static int lp_set (lua_State *L) { | 669 | static int lp_set (lua_State *L) { |
646 | size_t l; | 670 | size_t l; |
647 | const char *s = luaL_checklstring(L, 1, &l); | 671 | const char *s = luaL_checklstring(L, 1, &l); |
648 | TTree *tree = newcharset(L); | 672 | byte buff[CHARSETSIZE]; |
673 | loopset(i, buff[i] = 0); | ||
649 | while (l--) { | 674 | while (l--) { |
650 | setchar(treebuffer(tree), (byte)(*s)); | 675 | setchar(buff, (byte)(*s)); |
651 | s++; | 676 | s++; |
652 | } | 677 | } |
678 | newcharset(L, buff); | ||
653 | return 1; | 679 | return 1; |
654 | } | 680 | } |
655 | 681 | ||
@@ -657,15 +683,17 @@ static int lp_set (lua_State *L) { | |||
657 | static int lp_range (lua_State *L) { | 683 | static int lp_range (lua_State *L) { |
658 | int arg; | 684 | int arg; |
659 | int top = lua_gettop(L); | 685 | int top = lua_gettop(L); |
660 | TTree *tree = newcharset(L); | 686 | byte buff[CHARSETSIZE]; |
687 | loopset(i, buff[i] = 0); | ||
661 | for (arg = 1; arg <= top; arg++) { | 688 | for (arg = 1; arg <= top; arg++) { |
662 | int c; | 689 | int c; |
663 | size_t l; | 690 | size_t l; |
664 | const char *r = luaL_checklstring(L, arg, &l); | 691 | const char *r = luaL_checklstring(L, arg, &l); |
665 | luaL_argcheck(L, l == 2, arg, "range must have two characters"); | 692 | luaL_argcheck(L, l == 2, arg, "range must have two characters"); |
666 | for (c = (byte)r[0]; c <= (byte)r[1]; c++) | 693 | for (c = (byte)r[0]; c <= (byte)r[1]; c++) |
667 | setchar(treebuffer(tree), c); | 694 | setchar(buff, c); |
668 | } | 695 | } |
696 | newcharset(L, buff); | ||
669 | return 1; | 697 | return 1; |
670 | } | 698 | } |
671 | 699 | ||
@@ -704,10 +732,12 @@ static int lp_utfr (lua_State *L) { | |||
704 | lua_Unsigned to = (lua_Unsigned)luaL_checkinteger(L, 2); | 732 | lua_Unsigned to = (lua_Unsigned)luaL_checkinteger(L, 2); |
705 | luaL_argcheck(L, from <= to, 2, "empty range"); | 733 | luaL_argcheck(L, from <= to, 2, "empty range"); |
706 | if (to <= 0x7f) { /* ascii range? */ | 734 | if (to <= 0x7f) { /* ascii range? */ |
707 | TTree *tree = newcharset(L); /* code it as a regular charset */ | ||
708 | unsigned int f; | 735 | unsigned int f; |
736 | byte buff[CHARSETSIZE]; /* code it as a regular charset */ | ||
737 | loopset(i, buff[i] = 0); | ||
709 | for (f = (int)from; f <= to; f++) | 738 | for (f = (int)from; f <= to; f++) |
710 | setchar(treebuffer(tree), f); | 739 | setchar(buff, f); |
740 | newcharset(L, buff); | ||
711 | } | 741 | } |
712 | else { /* multi-byte utf-8 range */ | 742 | else { /* multi-byte utf-8 range */ |
713 | TTree *tree = newtree(L, 2); | 743 | TTree *tree = newtree(L, 2); |
@@ -1261,11 +1291,17 @@ int lp_gc (lua_State *L) { | |||
1261 | } | 1291 | } |
1262 | 1292 | ||
1263 | 1293 | ||
1294 | /* | ||
1295 | ** Create a charset representing a category of characters, given by | ||
1296 | ** the predicate 'catf'. | ||
1297 | */ | ||
1264 | static void createcat (lua_State *L, const char *catname, int (catf) (int)) { | 1298 | static void createcat (lua_State *L, const char *catname, int (catf) (int)) { |
1265 | TTree *t = newcharset(L); | 1299 | int c; |
1266 | int i; | 1300 | byte buff[CHARSETSIZE]; |
1267 | for (i = 0; i <= UCHAR_MAX; i++) | 1301 | loopset(i, buff[i] = 0); |
1268 | if (catf(i)) setchar(treebuffer(t), i); | 1302 | for (c = 0; c <= UCHAR_MAX; c++) |
1303 | if (catf(c)) setchar(buff, c); | ||
1304 | newcharset(L, buff); | ||
1269 | lua_setfield(L, -2, catname); | 1305 | lua_setfield(L, -2, catname); |
1270 | } | 1306 | } |
1271 | 1307 | ||