aboutsummaryrefslogtreecommitdiff
path: root/lptree.c
diff options
context:
space:
mode:
Diffstat (limited to 'lptree.c')
-rw-r--r--lptree.c104
1 files changed, 77 insertions, 27 deletions
diff --git a/lptree.c b/lptree.c
index b1a32c4..4afbae7 100644
--- a/lptree.c
+++ b/lptree.c
@@ -21,11 +21,11 @@
21/* number of siblings for each tree */ 21/* number of siblings for each tree */
22const byte numsiblings[] = { 22const byte numsiblings[] = {
23 0, 0, 0, /* char, set, any */ 23 0, 0, 0, /* char, set, any */
24 0, 0, /* true, false */ 24 0, 0, 0, /* true, false, utf-range */
25 1, /* rep */ 25 1, /* rep */
26 2, 2, /* seq, choice */ 26 2, 2, /* seq, choice */
27 1, 1, /* not, and */ 27 1, 1, /* not, and */
28 0, 0, 2, 1, /* call, opencall, rule, grammar */ 28 0, 0, 2, 1, 1, /* call, opencall, rule, prerule, grammar */
29 1, /* behind */ 29 1, /* behind */
30 1, 1, /* capture, runtime capture */ 30 1, 1, /* capture, runtime capture */
31 0 /* labeled failure throw */ 31 0 /* labeled failure throw */
@@ -58,7 +58,7 @@ static void fixonecall (lua_State *L, int postable, TTree *g, TTree *t, byte tag
58 lua_gettable(L, postable); /* query name in position table */ 58 lua_gettable(L, postable); /* query name in position table */
59 n = lua_tonumber(L, -1); /* get (absolute) position */ 59 n = lua_tonumber(L, -1); /* get (absolute) position */
60 lua_pop(L, 1); /* remove position */ 60 lua_pop(L, 1); /* remove position */
61 if (tag == TOpenCall) { 61 if (tag == TOpenCall) { /* labeled failure */
62 if (n == 0) { /* no position? */ 62 if (n == 0) { /* no position? */
63 lua_rawgeti(L, -1, t->key); /* get rule's name again */ 63 lua_rawgeti(L, -1, t->key); /* get rule's name again */
64 luaL_error(L, "rule '%s' undefined in given grammar", val2str(L, -1)); 64 luaL_error(L, "rule '%s' undefined in given grammar", val2str(L, -1));
@@ -109,7 +109,7 @@ static void finalfix (lua_State *L, int postable, TTree *g, TTree *t) {
109 return; 109 return;
110 case TOpenCall: { 110 case TOpenCall: {
111 if (g != NULL) /* inside a grammar? */ 111 if (g != NULL) /* inside a grammar? */
112 fixonecall(L, postable, g, t, TOpenCall); 112 fixonecall(L, postable, g, t, TOpenCall); /* labeled failure */
113 else { /* open call outside grammar */ 113 else { /* open call outside grammar */
114 lua_rawgeti(L, -1, t->key); 114 lua_rawgeti(L, -1, t->key);
115 luaL_error(L, "rule '%s' used outside a grammar", val2str(L, -1)); 115 luaL_error(L, "rule '%s' used outside a grammar", val2str(L, -1));
@@ -695,6 +695,56 @@ static int lp_range (lua_State *L) {
695 695
696 696
697/* 697/*
698** Fills a tree node with basic information about the UTF-8 code point
699** 'cpu': its value in 'n', its length in 'cap', and its first byte in
700** 'key'
701*/
702static void codeutftree (lua_State *L, TTree *t, lua_Unsigned cpu, int arg) {
703 int len, fb, cp;
704 cp = (int)cpu;
705 if (cp <= 0x7f) { /* one byte? */
706 len = 1;
707 fb = cp;
708 } else if (cp <= 0x7ff) {
709 len = 2;
710 fb = 0xC0 | (cp >> 6);
711 } else if (cp <= 0xffff) {
712 len = 3;
713 fb = 0xE0 | (cp >> 12);
714 }
715 else {
716 luaL_argcheck(L, cpu <= 0x10ffffu, arg, "invalid code point");
717 len = 4;
718 fb = 0xF0 | (cp >> 18);
719 }
720 t->u.n = cp;
721 t->cap = len;
722 t->key = fb;
723}
724
725
726static int lp_utfr (lua_State *L) {
727 lua_Unsigned from = (lua_Unsigned)luaL_checkinteger(L, 1);
728 lua_Unsigned to = (lua_Unsigned)luaL_checkinteger(L, 2);
729 luaL_argcheck(L, from <= to, 2, "empty range");
730 if (to <= 0x7f) { /* ascii range? */
731 TTree *tree = newcharset(L); /* code it as a regular charset */
732 unsigned int f;
733 for (f = (int)from; f <= to; f++)
734 setchar(treebuffer(tree), f);
735 }
736 else { /* multi-byte utf-8 range */
737 TTree *tree = newtree(L, 2);
738 tree->tag = TUTFR;
739 codeutftree(L, tree, from, 1);
740 sib1(tree)->tag = TXInfo;
741 codeutftree(L, sib1(tree), to, 2);
742 }
743 return 1;
744}
745
746
747/*
698** Look-behind predicate 748** Look-behind predicate
699*/ 749*/
700static int lp_behind (lua_State *L) { 750static int lp_behind (lua_State *L) {
@@ -940,7 +990,7 @@ static int collectrules (lua_State *L, int arg, int *totalsize) {
940 int size; /* accumulator for total size */ 990 int size; /* accumulator for total size */
941 lua_newtable(L); /* create position table */ 991 lua_newtable(L); /* create position table */
942 getfirstrule(L, arg, postab); 992 getfirstrule(L, arg, postab);
943 size = 2 + getsize(L, postab + 2); /* TGrammar + TRule + rule */ 993 size = 3 + getsize(L, postab + 2); /* TGrammar + TRule + TXInfo + rule */
944 lua_pushnil(L); /* prepare to traverse grammar table */ 994 lua_pushnil(L); /* prepare to traverse grammar table */
945 while (lua_next(L, arg) != 0) { 995 while (lua_next(L, arg) != 0) {
946 if (lua_tonumber(L, -2) == 1 || 996 if (lua_tonumber(L, -2) == 1 ||
@@ -954,11 +1004,11 @@ static int collectrules (lua_State *L, int arg, int *totalsize) {
954 lua_pushvalue(L, -2); /* push key (to insert into position table) */ 1004 lua_pushvalue(L, -2); /* push key (to insert into position table) */
955 lua_pushinteger(L, size); 1005 lua_pushinteger(L, size);
956 lua_settable(L, postab); 1006 lua_settable(L, postab);
957 size += 1 + getsize(L, -1); /* update size */ 1007 size += 2 + getsize(L, -1); /* add 'TRule + TXInfo + rule' to size */
958 lua_pushvalue(L, -2); /* push key (for next lua_next) */ 1008 lua_pushvalue(L, -2); /* push key (for next lua_next) */
959 n++; 1009 n++;
960 } 1010 }
961 *totalsize = size + 1; /* TTrue to finish list of rules */ 1011 *totalsize = size + 1; /* space for 'TTrue' finishing list of rules */
962 return n; 1012 return n;
963} 1013}
964 1014
@@ -970,11 +1020,13 @@ static void buildgrammar (lua_State *L, TTree *grammar, int frule, int n) {
970 int ridx = frule + 2*i + 1; /* index of i-th rule */ 1020 int ridx = frule + 2*i + 1; /* index of i-th rule */
971 int rulesize; 1021 int rulesize;
972 TTree *rn = gettree(L, ridx, &rulesize); 1022 TTree *rn = gettree(L, ridx, &rulesize);
1023 TTree *pr = sib1(nd); /* points to rule's prerule */
973 nd->tag = TRule; 1024 nd->tag = TRule;
974 nd->key = 0; /* will be fixed when rule is used */ 1025 nd->key = 0; /* will be fixed when rule is used */
975 nd->cap = i; /* rule number */ 1026 pr->tag = TXInfo;
976 nd->u.ps = rulesize + 1; /* point to next rule */ 1027 pr->u.n = i; /* rule number */
977 memcpy(sib1(nd), rn, rulesize * sizeof(TTree)); /* copy rule */ 1028 nd->u.ps = rulesize + 2; /* point to next rule */
1029 memcpy(sib1(pr), rn, rulesize * sizeof(TTree)); /* copy rule */
978 mergektable(L, ridx, sib1(nd)); /* merge its ktable into new one */ 1030 mergektable(L, ridx, sib1(nd)); /* merge its ktable into new one */
979 nd = sib2(nd); /* move to next rule */ 1031 nd = sib2(nd); /* move to next rule */
980 } 1032 }
@@ -1010,7 +1062,7 @@ static int checkloops (TTree *tree) {
1010** twice in 'passed', there is path from it back to itself without 1062** twice in 'passed', there is path from it back to itself without
1011** advancing the subject. 1063** advancing the subject.
1012*/ 1064*/
1013static int verifyerror (lua_State *L, int *passed, int npassed) { 1065static int verifyerror (lua_State *L, unsigned short *passed, int npassed) {
1014 int i, j; 1066 int i, j;
1015 for (i = npassed - 1; i >= 0; i--) { /* search for a repetition */ 1067 for (i = npassed - 1; i >= 0; i--) { /* search for a repetition */
1016 for (j = i - 1; j >= 0; j--) { 1068 for (j = i - 1; j >= 0; j--) {
@@ -1035,12 +1087,13 @@ static int verifyerror (lua_State *L, int *passed, int npassed) {
1035** counts the elements in 'passed'. 1087** counts the elements in 'passed'.
1036** Assume ktable at the top of the stack. 1088** Assume ktable at the top of the stack.
1037*/ 1089*/
1038static int verifyrule (lua_State *L, TTree *tree, int *passed, int npassed, 1090static int verifyrule (lua_State *L, TTree *tree, unsigned short *passed,
1039 int nb) { 1091 int npassed, int nb) {
1040 tailcall: 1092 tailcall:
1041 switch (tree->tag) { 1093 switch (tree->tag) {
1042 case TChar: case TSet: case TAny: 1094 case TChar: case TSet: case TAny:
1043 case TFalse: case TThrow: /* labeled failure */ 1095 case TFalse: case TUTFR:
1096 case TThrow: /* labeled failure */
1044 return nb; /* cannot pass from here */ 1097 return nb; /* cannot pass from here */
1045 case TTrue: 1098 case TTrue:
1046 case TBehind: /* look-behind cannot have calls */ 1099 case TBehind: /* look-behind cannot have calls */
@@ -1048,7 +1101,7 @@ static int verifyrule (lua_State *L, TTree *tree, int *passed, int npassed,
1048 case TNot: case TAnd: case TRep: 1101 case TNot: case TAnd: case TRep:
1049 /* return verifyrule(L, sib1(tree), passed, npassed, 1); */ 1102 /* return verifyrule(L, sib1(tree), passed, npassed, 1); */
1050 tree = sib1(tree); nb = 1; goto tailcall; 1103 tree = sib1(tree); nb = 1; goto tailcall;
1051 case TCapture: case TRunTime: 1104 case TCapture: case TRunTime: case TXInfo:
1052 /* return verifyrule(L, sib1(tree), passed, npassed, nb); */ 1105 /* return verifyrule(L, sib1(tree), passed, npassed, nb); */
1053 tree = sib1(tree); goto tailcall; 1106 tree = sib1(tree); goto tailcall;
1054 case TCall: 1107 case TCall:
@@ -1059,15 +1112,15 @@ static int verifyrule (lua_State *L, TTree *tree, int *passed, int npassed,
1059 return nb; 1112 return nb;
1060 /* else return verifyrule(L, sib2(tree), passed, npassed, nb); */ 1113 /* else return verifyrule(L, sib2(tree), passed, npassed, nb); */
1061 tree = sib2(tree); goto tailcall; 1114 tree = sib2(tree); goto tailcall;
1062 case TChoice: /* must check both children */ 1115 case TChoice: /* must check both children */
1063 nb = verifyrule(L, sib1(tree), passed, npassed, nb); 1116 nb = verifyrule(L, sib1(tree), passed, npassed, nb);
1064 /* return verifyrule(L, sib2(tree), passed, npassed, nb); */ 1117 /* return verifyrule(L, sib2(tree), passed, npassed, nb); */
1065 tree = sib2(tree); goto tailcall; 1118 tree = sib2(tree); goto tailcall;
1066 case TRule: 1119 case TRule:
1067 if (npassed >= MAXRULES) 1120 if (npassed >= MAXRULES) /* too many steps? */
1068 return verifyerror(L, passed, npassed); 1121 return verifyerror(L, passed, npassed); /* error */
1069 else { 1122 else {
1070 passed[npassed++] = tree->key; 1123 passed[npassed++] = tree->key; /* add rule to path */
1071 /* return verifyrule(L, sib1(tree), passed, npassed); */ 1124 /* return verifyrule(L, sib1(tree), passed, npassed); */
1072 tree = sib1(tree); goto tailcall; 1125 tree = sib1(tree); goto tailcall;
1073 } 1126 }
@@ -1079,7 +1132,7 @@ static int verifyrule (lua_State *L, TTree *tree, int *passed, int npassed,
1079 1132
1080 1133
1081static void verifygrammar (lua_State *L, TTree *grammar) { 1134static void verifygrammar (lua_State *L, TTree *grammar) {
1082 int passed[MAXRULES]; 1135 unsigned short passed[MAXRULES];
1083 TTree *rule; 1136 TTree *rule;
1084 /* check left-recursive rules */ 1137 /* check left-recursive rules */
1085 for (rule = sib1(grammar); rule->tag == TRule; rule = sib2(rule)) { 1138 for (rule = sib1(grammar); rule->tag == TRule; rule = sib2(rule)) {
@@ -1243,12 +1296,6 @@ static int lp_setmax (lua_State *L) {
1243} 1296}
1244 1297
1245 1298
1246static int lp_version (lua_State *L) {
1247 lua_pushstring(L, VERSION);
1248 return 1;
1249}
1250
1251
1252static int lp_type (lua_State *L) { 1299static int lp_type (lua_State *L) {
1253 if (testpattern(L, 1)) 1300 if (testpattern(L, 1))
1254 lua_pushliteral(L, "pattern"); 1301 lua_pushliteral(L, "pattern");
@@ -1317,8 +1364,9 @@ static struct luaL_Reg pattreg[] = {
1317 {"P", lp_P}, 1364 {"P", lp_P},
1318 {"S", lp_set}, 1365 {"S", lp_set},
1319 {"R", lp_range}, 1366 {"R", lp_range},
1367 {"utfR", lp_utfr},
1320 {"locale", lp_locale}, 1368 {"locale", lp_locale},
1321 {"version", lp_version}, 1369 {"version", NULL},
1322 {"setmaxstack", lp_setmax}, 1370 {"setmaxstack", lp_setmax},
1323 {"type", lp_type}, 1371 {"type", lp_type},
1324 {"T", lp_throw}, /* labeled failure throw */ 1372 {"T", lp_throw}, /* labeled failure throw */
@@ -1347,6 +1395,8 @@ int luaopen_lpeglabel (lua_State *L) { /* labeled failure */
1347 luaL_newlib(L, pattreg); 1395 luaL_newlib(L, pattreg);
1348 lua_pushvalue(L, -1); 1396 lua_pushvalue(L, -1);
1349 lua_setfield(L, -3, "__index"); 1397 lua_setfield(L, -3, "__index");
1398 lua_pushliteral(L, "LPegLabel " VERSION); /* labeled failure */
1399 lua_setfield(L, -2, "version");
1350 return 1; 1400 return 1;
1351} 1401}
1352 1402