diff options
Diffstat (limited to 'lptree.c')
-rw-r--r-- | lptree.c | 104 |
1 files changed, 77 insertions, 27 deletions
@@ -21,11 +21,11 @@ | |||
21 | /* number of siblings for each tree */ | 21 | /* number of siblings for each tree */ |
22 | const byte numsiblings[] = { | 22 | const byte numsiblings[] = { |
23 | 0, 0, 0, /* char, set, any */ | 23 | 0, 0, 0, /* char, set, any */ |
24 | 0, 0, /* true, false */ | 24 | 0, 0, 0, /* true, false, utf-range */ |
25 | 1, /* rep */ | 25 | 1, /* rep */ |
26 | 2, 2, /* seq, choice */ | 26 | 2, 2, /* seq, choice */ |
27 | 1, 1, /* not, and */ | 27 | 1, 1, /* not, and */ |
28 | 0, 0, 2, 1, /* call, opencall, rule, grammar */ | 28 | 0, 0, 2, 1, 1, /* call, opencall, rule, prerule, grammar */ |
29 | 1, /* behind */ | 29 | 1, /* behind */ |
30 | 1, 1, /* capture, runtime capture */ | 30 | 1, 1, /* capture, runtime capture */ |
31 | 0 /* labeled failure throw */ | 31 | 0 /* labeled failure throw */ |
@@ -58,7 +58,7 @@ static void fixonecall (lua_State *L, int postable, TTree *g, TTree *t, byte tag | |||
58 | lua_gettable(L, postable); /* query name in position table */ | 58 | lua_gettable(L, postable); /* query name in position table */ |
59 | n = lua_tonumber(L, -1); /* get (absolute) position */ | 59 | n = lua_tonumber(L, -1); /* get (absolute) position */ |
60 | lua_pop(L, 1); /* remove position */ | 60 | lua_pop(L, 1); /* remove position */ |
61 | if (tag == TOpenCall) { | 61 | if (tag == TOpenCall) { /* labeled failure */ |
62 | if (n == 0) { /* no position? */ | 62 | if (n == 0) { /* no position? */ |
63 | lua_rawgeti(L, -1, t->key); /* get rule's name again */ | 63 | lua_rawgeti(L, -1, t->key); /* get rule's name again */ |
64 | luaL_error(L, "rule '%s' undefined in given grammar", val2str(L, -1)); | 64 | luaL_error(L, "rule '%s' undefined in given grammar", val2str(L, -1)); |
@@ -109,7 +109,7 @@ static void finalfix (lua_State *L, int postable, TTree *g, TTree *t) { | |||
109 | return; | 109 | return; |
110 | case TOpenCall: { | 110 | case TOpenCall: { |
111 | if (g != NULL) /* inside a grammar? */ | 111 | if (g != NULL) /* inside a grammar? */ |
112 | fixonecall(L, postable, g, t, TOpenCall); | 112 | fixonecall(L, postable, g, t, TOpenCall); /* labeled failure */ |
113 | else { /* open call outside grammar */ | 113 | else { /* open call outside grammar */ |
114 | lua_rawgeti(L, -1, t->key); | 114 | lua_rawgeti(L, -1, t->key); |
115 | luaL_error(L, "rule '%s' used outside a grammar", val2str(L, -1)); | 115 | luaL_error(L, "rule '%s' used outside a grammar", val2str(L, -1)); |
@@ -695,6 +695,56 @@ static int lp_range (lua_State *L) { | |||
695 | 695 | ||
696 | 696 | ||
697 | /* | 697 | /* |
698 | ** Fills a tree node with basic information about the UTF-8 code point | ||
699 | ** 'cpu': its value in 'n', its length in 'cap', and its first byte in | ||
700 | ** 'key' | ||
701 | */ | ||
702 | static void codeutftree (lua_State *L, TTree *t, lua_Unsigned cpu, int arg) { | ||
703 | int len, fb, cp; | ||
704 | cp = (int)cpu; | ||
705 | if (cp <= 0x7f) { /* one byte? */ | ||
706 | len = 1; | ||
707 | fb = cp; | ||
708 | } else if (cp <= 0x7ff) { | ||
709 | len = 2; | ||
710 | fb = 0xC0 | (cp >> 6); | ||
711 | } else if (cp <= 0xffff) { | ||
712 | len = 3; | ||
713 | fb = 0xE0 | (cp >> 12); | ||
714 | } | ||
715 | else { | ||
716 | luaL_argcheck(L, cpu <= 0x10ffffu, arg, "invalid code point"); | ||
717 | len = 4; | ||
718 | fb = 0xF0 | (cp >> 18); | ||
719 | } | ||
720 | t->u.n = cp; | ||
721 | t->cap = len; | ||
722 | t->key = fb; | ||
723 | } | ||
724 | |||
725 | |||
726 | static int lp_utfr (lua_State *L) { | ||
727 | lua_Unsigned from = (lua_Unsigned)luaL_checkinteger(L, 1); | ||
728 | lua_Unsigned to = (lua_Unsigned)luaL_checkinteger(L, 2); | ||
729 | luaL_argcheck(L, from <= to, 2, "empty range"); | ||
730 | if (to <= 0x7f) { /* ascii range? */ | ||
731 | TTree *tree = newcharset(L); /* code it as a regular charset */ | ||
732 | unsigned int f; | ||
733 | for (f = (int)from; f <= to; f++) | ||
734 | setchar(treebuffer(tree), f); | ||
735 | } | ||
736 | else { /* multi-byte utf-8 range */ | ||
737 | TTree *tree = newtree(L, 2); | ||
738 | tree->tag = TUTFR; | ||
739 | codeutftree(L, tree, from, 1); | ||
740 | sib1(tree)->tag = TXInfo; | ||
741 | codeutftree(L, sib1(tree), to, 2); | ||
742 | } | ||
743 | return 1; | ||
744 | } | ||
745 | |||
746 | |||
747 | /* | ||
698 | ** Look-behind predicate | 748 | ** Look-behind predicate |
699 | */ | 749 | */ |
700 | static int lp_behind (lua_State *L) { | 750 | static int lp_behind (lua_State *L) { |
@@ -940,7 +990,7 @@ static int collectrules (lua_State *L, int arg, int *totalsize) { | |||
940 | int size; /* accumulator for total size */ | 990 | int size; /* accumulator for total size */ |
941 | lua_newtable(L); /* create position table */ | 991 | lua_newtable(L); /* create position table */ |
942 | getfirstrule(L, arg, postab); | 992 | getfirstrule(L, arg, postab); |
943 | size = 2 + getsize(L, postab + 2); /* TGrammar + TRule + rule */ | 993 | size = 3 + getsize(L, postab + 2); /* TGrammar + TRule + TXInfo + rule */ |
944 | lua_pushnil(L); /* prepare to traverse grammar table */ | 994 | lua_pushnil(L); /* prepare to traverse grammar table */ |
945 | while (lua_next(L, arg) != 0) { | 995 | while (lua_next(L, arg) != 0) { |
946 | if (lua_tonumber(L, -2) == 1 || | 996 | if (lua_tonumber(L, -2) == 1 || |
@@ -954,11 +1004,11 @@ static int collectrules (lua_State *L, int arg, int *totalsize) { | |||
954 | lua_pushvalue(L, -2); /* push key (to insert into position table) */ | 1004 | lua_pushvalue(L, -2); /* push key (to insert into position table) */ |
955 | lua_pushinteger(L, size); | 1005 | lua_pushinteger(L, size); |
956 | lua_settable(L, postab); | 1006 | lua_settable(L, postab); |
957 | size += 1 + getsize(L, -1); /* update size */ | 1007 | size += 2 + getsize(L, -1); /* add 'TRule + TXInfo + rule' to size */ |
958 | lua_pushvalue(L, -2); /* push key (for next lua_next) */ | 1008 | lua_pushvalue(L, -2); /* push key (for next lua_next) */ |
959 | n++; | 1009 | n++; |
960 | } | 1010 | } |
961 | *totalsize = size + 1; /* TTrue to finish list of rules */ | 1011 | *totalsize = size + 1; /* space for 'TTrue' finishing list of rules */ |
962 | return n; | 1012 | return n; |
963 | } | 1013 | } |
964 | 1014 | ||
@@ -970,11 +1020,13 @@ static void buildgrammar (lua_State *L, TTree *grammar, int frule, int n) { | |||
970 | int ridx = frule + 2*i + 1; /* index of i-th rule */ | 1020 | int ridx = frule + 2*i + 1; /* index of i-th rule */ |
971 | int rulesize; | 1021 | int rulesize; |
972 | TTree *rn = gettree(L, ridx, &rulesize); | 1022 | TTree *rn = gettree(L, ridx, &rulesize); |
1023 | TTree *pr = sib1(nd); /* points to rule's prerule */ | ||
973 | nd->tag = TRule; | 1024 | nd->tag = TRule; |
974 | nd->key = 0; /* will be fixed when rule is used */ | 1025 | nd->key = 0; /* will be fixed when rule is used */ |
975 | nd->cap = i; /* rule number */ | 1026 | pr->tag = TXInfo; |
976 | nd->u.ps = rulesize + 1; /* point to next rule */ | 1027 | pr->u.n = i; /* rule number */ |
977 | memcpy(sib1(nd), rn, rulesize * sizeof(TTree)); /* copy rule */ | 1028 | nd->u.ps = rulesize + 2; /* point to next rule */ |
1029 | memcpy(sib1(pr), rn, rulesize * sizeof(TTree)); /* copy rule */ | ||
978 | mergektable(L, ridx, sib1(nd)); /* merge its ktable into new one */ | 1030 | mergektable(L, ridx, sib1(nd)); /* merge its ktable into new one */ |
979 | nd = sib2(nd); /* move to next rule */ | 1031 | nd = sib2(nd); /* move to next rule */ |
980 | } | 1032 | } |
@@ -1010,7 +1062,7 @@ static int checkloops (TTree *tree) { | |||
1010 | ** twice in 'passed', there is path from it back to itself without | 1062 | ** twice in 'passed', there is path from it back to itself without |
1011 | ** advancing the subject. | 1063 | ** advancing the subject. |
1012 | */ | 1064 | */ |
1013 | static int verifyerror (lua_State *L, int *passed, int npassed) { | 1065 | static int verifyerror (lua_State *L, unsigned short *passed, int npassed) { |
1014 | int i, j; | 1066 | int i, j; |
1015 | for (i = npassed - 1; i >= 0; i--) { /* search for a repetition */ | 1067 | for (i = npassed - 1; i >= 0; i--) { /* search for a repetition */ |
1016 | for (j = i - 1; j >= 0; j--) { | 1068 | for (j = i - 1; j >= 0; j--) { |
@@ -1035,12 +1087,13 @@ static int verifyerror (lua_State *L, int *passed, int npassed) { | |||
1035 | ** counts the elements in 'passed'. | 1087 | ** counts the elements in 'passed'. |
1036 | ** Assume ktable at the top of the stack. | 1088 | ** Assume ktable at the top of the stack. |
1037 | */ | 1089 | */ |
1038 | static int verifyrule (lua_State *L, TTree *tree, int *passed, int npassed, | 1090 | static int verifyrule (lua_State *L, TTree *tree, unsigned short *passed, |
1039 | int nb) { | 1091 | int npassed, int nb) { |
1040 | tailcall: | 1092 | tailcall: |
1041 | switch (tree->tag) { | 1093 | switch (tree->tag) { |
1042 | case TChar: case TSet: case TAny: | 1094 | case TChar: case TSet: case TAny: |
1043 | case TFalse: case TThrow: /* labeled failure */ | 1095 | case TFalse: case TUTFR: |
1096 | case TThrow: /* labeled failure */ | ||
1044 | return nb; /* cannot pass from here */ | 1097 | return nb; /* cannot pass from here */ |
1045 | case TTrue: | 1098 | case TTrue: |
1046 | case TBehind: /* look-behind cannot have calls */ | 1099 | case TBehind: /* look-behind cannot have calls */ |
@@ -1048,7 +1101,7 @@ static int verifyrule (lua_State *L, TTree *tree, int *passed, int npassed, | |||
1048 | case TNot: case TAnd: case TRep: | 1101 | case TNot: case TAnd: case TRep: |
1049 | /* return verifyrule(L, sib1(tree), passed, npassed, 1); */ | 1102 | /* return verifyrule(L, sib1(tree), passed, npassed, 1); */ |
1050 | tree = sib1(tree); nb = 1; goto tailcall; | 1103 | tree = sib1(tree); nb = 1; goto tailcall; |
1051 | case TCapture: case TRunTime: | 1104 | case TCapture: case TRunTime: case TXInfo: |
1052 | /* return verifyrule(L, sib1(tree), passed, npassed, nb); */ | 1105 | /* return verifyrule(L, sib1(tree), passed, npassed, nb); */ |
1053 | tree = sib1(tree); goto tailcall; | 1106 | tree = sib1(tree); goto tailcall; |
1054 | case TCall: | 1107 | case TCall: |
@@ -1059,15 +1112,15 @@ static int verifyrule (lua_State *L, TTree *tree, int *passed, int npassed, | |||
1059 | return nb; | 1112 | return nb; |
1060 | /* else return verifyrule(L, sib2(tree), passed, npassed, nb); */ | 1113 | /* else return verifyrule(L, sib2(tree), passed, npassed, nb); */ |
1061 | tree = sib2(tree); goto tailcall; | 1114 | tree = sib2(tree); goto tailcall; |
1062 | case TChoice: /* must check both children */ | 1115 | case TChoice: /* must check both children */ |
1063 | nb = verifyrule(L, sib1(tree), passed, npassed, nb); | 1116 | nb = verifyrule(L, sib1(tree), passed, npassed, nb); |
1064 | /* return verifyrule(L, sib2(tree), passed, npassed, nb); */ | 1117 | /* return verifyrule(L, sib2(tree), passed, npassed, nb); */ |
1065 | tree = sib2(tree); goto tailcall; | 1118 | tree = sib2(tree); goto tailcall; |
1066 | case TRule: | 1119 | case TRule: |
1067 | if (npassed >= MAXRULES) | 1120 | if (npassed >= MAXRULES) /* too many steps? */ |
1068 | return verifyerror(L, passed, npassed); | 1121 | return verifyerror(L, passed, npassed); /* error */ |
1069 | else { | 1122 | else { |
1070 | passed[npassed++] = tree->key; | 1123 | passed[npassed++] = tree->key; /* add rule to path */ |
1071 | /* return verifyrule(L, sib1(tree), passed, npassed); */ | 1124 | /* return verifyrule(L, sib1(tree), passed, npassed); */ |
1072 | tree = sib1(tree); goto tailcall; | 1125 | tree = sib1(tree); goto tailcall; |
1073 | } | 1126 | } |
@@ -1079,7 +1132,7 @@ static int verifyrule (lua_State *L, TTree *tree, int *passed, int npassed, | |||
1079 | 1132 | ||
1080 | 1133 | ||
1081 | static void verifygrammar (lua_State *L, TTree *grammar) { | 1134 | static void verifygrammar (lua_State *L, TTree *grammar) { |
1082 | int passed[MAXRULES]; | 1135 | unsigned short passed[MAXRULES]; |
1083 | TTree *rule; | 1136 | TTree *rule; |
1084 | /* check left-recursive rules */ | 1137 | /* check left-recursive rules */ |
1085 | for (rule = sib1(grammar); rule->tag == TRule; rule = sib2(rule)) { | 1138 | for (rule = sib1(grammar); rule->tag == TRule; rule = sib2(rule)) { |
@@ -1243,12 +1296,6 @@ static int lp_setmax (lua_State *L) { | |||
1243 | } | 1296 | } |
1244 | 1297 | ||
1245 | 1298 | ||
1246 | static int lp_version (lua_State *L) { | ||
1247 | lua_pushstring(L, VERSION); | ||
1248 | return 1; | ||
1249 | } | ||
1250 | |||
1251 | |||
1252 | static int lp_type (lua_State *L) { | 1299 | static int lp_type (lua_State *L) { |
1253 | if (testpattern(L, 1)) | 1300 | if (testpattern(L, 1)) |
1254 | lua_pushliteral(L, "pattern"); | 1301 | lua_pushliteral(L, "pattern"); |
@@ -1317,8 +1364,9 @@ static struct luaL_Reg pattreg[] = { | |||
1317 | {"P", lp_P}, | 1364 | {"P", lp_P}, |
1318 | {"S", lp_set}, | 1365 | {"S", lp_set}, |
1319 | {"R", lp_range}, | 1366 | {"R", lp_range}, |
1367 | {"utfR", lp_utfr}, | ||
1320 | {"locale", lp_locale}, | 1368 | {"locale", lp_locale}, |
1321 | {"version", lp_version}, | 1369 | {"version", NULL}, |
1322 | {"setmaxstack", lp_setmax}, | 1370 | {"setmaxstack", lp_setmax}, |
1323 | {"type", lp_type}, | 1371 | {"type", lp_type}, |
1324 | {"T", lp_throw}, /* labeled failure throw */ | 1372 | {"T", lp_throw}, /* labeled failure throw */ |
@@ -1347,6 +1395,8 @@ int luaopen_lpeglabel (lua_State *L) { /* labeled failure */ | |||
1347 | luaL_newlib(L, pattreg); | 1395 | luaL_newlib(L, pattreg); |
1348 | lua_pushvalue(L, -1); | 1396 | lua_pushvalue(L, -1); |
1349 | lua_setfield(L, -3, "__index"); | 1397 | lua_setfield(L, -3, "__index"); |
1398 | lua_pushliteral(L, "LPegLabel " VERSION); /* labeled failure */ | ||
1399 | lua_setfield(L, -2, "version"); | ||
1350 | return 1; | 1400 | return 1; |
1351 | } | 1401 | } |
1352 | 1402 | ||