aboutsummaryrefslogtreecommitdiff
path: root/lptree.c
diff options
context:
space:
mode:
authorRoberto Ierusalimschy <roberto@inf.puc-rio.br>2023-04-27 10:32:39 -0300
committerRoberto Ierusalimschy <roberto@inf.puc-rio.br>2023-04-27 10:32:39 -0300
commit012cf9c86cf91cb8354e229bde335592d41b84b2 (patch)
tree353f17797b1952eaec231c8e4fd5c21e02daf875 /lptree.c
parent3403b0c7256435560b63f828da92026c5d4c898b (diff)
downloadlpeg-012cf9c86cf91cb8354e229bde335592d41b84b2.tar.gz
lpeg-012cf9c86cf91cb8354e229bde335592d41b84b2.tar.bz2
lpeg-012cf9c86cf91cb8354e229bde335592d41b84b2.zip
Compact charsets used in trees, too.
Diffstat (limited to 'lptree.c')
-rw-r--r--lptree.c70
1 files changed, 53 insertions, 17 deletions
diff --git a/lptree.c b/lptree.c
index 4affac9..f9e170b 100644
--- a/lptree.c
+++ b/lptree.c
@@ -12,6 +12,7 @@
12#include "lpcode.h" 12#include "lpcode.h"
13#include "lpprint.h" 13#include "lpprint.h"
14#include "lptree.h" 14#include "lptree.h"
15#include "lpcset.h"
15 16
16 17
17/* number of siblings for each tree */ 18/* number of siblings for each tree */
@@ -370,14 +371,37 @@ static TTree *newleaf (lua_State *L, int tag) {
370} 371}
371 372
372 373
373static TTree *newcharset (lua_State *L) { 374static TTree *newccharset (lua_State *L, byte *cs, charsetinfo *info) {
374 TTree *tree = newtree(L, bytes2slots(CHARSETSIZE) + 1); 375 int i;
376 TTree *tree = newtree(L, bytes2slots(info->size) + 1);
375 tree->tag = TSet; 377 tree->tag = TSet;
376 loopset(i, treebuffer(tree)[i] = 0); 378 tree->u.set.offset = info->offset;
379 tree->u.set.size = info->size;
380 tree->u.set.deflt = info->deflt;
381 for (i = 0; i < info->size; i++)
382 treebuffer(tree)[i] = cs[info->offset + i];
377 return tree; 383 return tree;
378} 384}
379 385
380 386
387static TTree *newcharset (lua_State *L, byte *cs) {
388 charsetinfo info;
389 Opcode op = charsettype(cs, &info);
390 switch (op) {
391 case IFail: return newleaf(L, TFalse);
392 case IAny: return newleaf(L, TAny);
393 case IChar: {
394 TTree *tree =newleaf(L, TChar);
395 tree->u.n = info.offset;
396 return tree;
397 }
398 default:
399 assert(op == ISet);
400 return newccharset(L, cs, &info);
401 }
402}
403
404
381/* 405/*
382** add to tree a sequence where first sibling is 'sib' (with size 406** add to tree a sequence where first sibling is 'sib' (with size
383** 'sibsize'); returns position for second sibling 407** 'sibsize'); returns position for second sibling
@@ -549,8 +573,8 @@ static int lp_choice (lua_State *L) {
549 TTree *t1 = getpatt(L, 1, NULL); 573 TTree *t1 = getpatt(L, 1, NULL);
550 TTree *t2 = getpatt(L, 2, NULL); 574 TTree *t2 = getpatt(L, 2, NULL);
551 if (tocharset(t1, &st1) && tocharset(t2, &st2)) { 575 if (tocharset(t1, &st1) && tocharset(t2, &st2)) {
552 TTree *t = newcharset(L); 576 loopset(i, st1.cs[i] |= st2.cs[i]);
553 loopset(i, treebuffer(t)[i] = st1.cs[i] | st2.cs[i]); 577 newcharset(L, st1.cs);
554 } 578 }
555 else if (nofail(t1) || t2->tag == TFalse) 579 else if (nofail(t1) || t2->tag == TFalse)
556 lua_pushvalue(L, 1); /* true / x => true, x / false => x */ 580 lua_pushvalue(L, 1); /* true / x => true, x / false => x */
@@ -626,8 +650,8 @@ static int lp_sub (lua_State *L) {
626 TTree *t1 = getpatt(L, 1, &s1); 650 TTree *t1 = getpatt(L, 1, &s1);
627 TTree *t2 = getpatt(L, 2, &s2); 651 TTree *t2 = getpatt(L, 2, &s2);
628 if (tocharset(t1, &st1) && tocharset(t2, &st2)) { 652 if (tocharset(t1, &st1) && tocharset(t2, &st2)) {
629 TTree *t = newcharset(L); 653 loopset(i, st1.cs[i] &= ~st2.cs[i]);
630 loopset(i, treebuffer(t)[i] = st1.cs[i] & ~st2.cs[i]); 654 newcharset(L, st1.cs);
631 } 655 }
632 else { 656 else {
633 TTree *tree = newtree(L, 2 + s1 + s2); 657 TTree *tree = newtree(L, 2 + s1 + s2);
@@ -645,11 +669,13 @@ static int lp_sub (lua_State *L) {
645static int lp_set (lua_State *L) { 669static int lp_set (lua_State *L) {
646 size_t l; 670 size_t l;
647 const char *s = luaL_checklstring(L, 1, &l); 671 const char *s = luaL_checklstring(L, 1, &l);
648 TTree *tree = newcharset(L); 672 byte buff[CHARSETSIZE];
673 loopset(i, buff[i] = 0);
649 while (l--) { 674 while (l--) {
650 setchar(treebuffer(tree), (byte)(*s)); 675 setchar(buff, (byte)(*s));
651 s++; 676 s++;
652 } 677 }
678 newcharset(L, buff);
653 return 1; 679 return 1;
654} 680}
655 681
@@ -657,15 +683,17 @@ static int lp_set (lua_State *L) {
657static int lp_range (lua_State *L) { 683static int lp_range (lua_State *L) {
658 int arg; 684 int arg;
659 int top = lua_gettop(L); 685 int top = lua_gettop(L);
660 TTree *tree = newcharset(L); 686 byte buff[CHARSETSIZE];
687 loopset(i, buff[i] = 0);
661 for (arg = 1; arg <= top; arg++) { 688 for (arg = 1; arg <= top; arg++) {
662 int c; 689 int c;
663 size_t l; 690 size_t l;
664 const char *r = luaL_checklstring(L, arg, &l); 691 const char *r = luaL_checklstring(L, arg, &l);
665 luaL_argcheck(L, l == 2, arg, "range must have two characters"); 692 luaL_argcheck(L, l == 2, arg, "range must have two characters");
666 for (c = (byte)r[0]; c <= (byte)r[1]; c++) 693 for (c = (byte)r[0]; c <= (byte)r[1]; c++)
667 setchar(treebuffer(tree), c); 694 setchar(buff, c);
668 } 695 }
696 newcharset(L, buff);
669 return 1; 697 return 1;
670} 698}
671 699
@@ -704,10 +732,12 @@ static int lp_utfr (lua_State *L) {
704 lua_Unsigned to = (lua_Unsigned)luaL_checkinteger(L, 2); 732 lua_Unsigned to = (lua_Unsigned)luaL_checkinteger(L, 2);
705 luaL_argcheck(L, from <= to, 2, "empty range"); 733 luaL_argcheck(L, from <= to, 2, "empty range");
706 if (to <= 0x7f) { /* ascii range? */ 734 if (to <= 0x7f) { /* ascii range? */
707 TTree *tree = newcharset(L); /* code it as a regular charset */
708 unsigned int f; 735 unsigned int f;
736 byte buff[CHARSETSIZE]; /* code it as a regular charset */
737 loopset(i, buff[i] = 0);
709 for (f = (int)from; f <= to; f++) 738 for (f = (int)from; f <= to; f++)
710 setchar(treebuffer(tree), f); 739 setchar(buff, f);
740 newcharset(L, buff);
711 } 741 }
712 else { /* multi-byte utf-8 range */ 742 else { /* multi-byte utf-8 range */
713 TTree *tree = newtree(L, 2); 743 TTree *tree = newtree(L, 2);
@@ -1261,11 +1291,17 @@ int lp_gc (lua_State *L) {
1261} 1291}
1262 1292
1263 1293
1294/*
1295** Create a charset representing a category of characters, given by
1296** the predicate 'catf'.
1297*/
1264static void createcat (lua_State *L, const char *catname, int (catf) (int)) { 1298static void createcat (lua_State *L, const char *catname, int (catf) (int)) {
1265 TTree *t = newcharset(L); 1299 int c;
1266 int i; 1300 byte buff[CHARSETSIZE];
1267 for (i = 0; i <= UCHAR_MAX; i++) 1301 loopset(i, buff[i] = 0);
1268 if (catf(i)) setchar(treebuffer(t), i); 1302 for (c = 0; c <= UCHAR_MAX; c++)
1303 if (catf(c)) setchar(buff, c);
1304 newcharset(L, buff);
1269 lua_setfield(L, -2, catname); 1305 lua_setfield(L, -2, catname);
1270} 1306}
1271 1307