diff options
| author | Roberto Ierusalimschy <roberto@inf.puc-rio.br> | 2025-04-10 15:54:21 -0300 |
|---|---|---|
| committer | Roberto Ierusalimschy <roberto@inf.puc-rio.br> | 2025-04-10 15:54:21 -0300 |
| commit | 0ac14cf58c5b1c954e979dc35e50e610e8dd5115 (patch) | |
| tree | 236555a518740f27b0df6b4209cbb3af352ee3b9 | |
| parent | 80ec9f932aa01d445e86c699523265359055e1bd (diff) | |
| download | lpeg-0ac14cf58c5b1c954e979dc35e50e610e8dd5115.tar.gz lpeg-0ac14cf58c5b1c954e979dc35e50e610e8dd5115.tar.bz2 lpeg-0ac14cf58c5b1c954e979dc35e50e610e8dd5115.zip | |
New way to detect loops in 'verifyrule'
Instead of counting visits, mark each rule being visited, as is usual
for detecting loops when traversing a graph.
| -rw-r--r-- | lptree.c | 82 | ||||
| -rw-r--r-- | lptree.h | 3 | ||||
| -rw-r--r-- | makefile | 4 |
3 files changed, 49 insertions, 40 deletions
| @@ -1016,6 +1016,13 @@ static int collectrules (lua_State *L, int arg, int *totalsize) { | |||
| 1016 | } | 1016 | } |
| 1017 | 1017 | ||
| 1018 | 1018 | ||
| 1019 | /* Status of a Rule */ | ||
| 1020 | #define RLNOTVISITED 0 /* rule not visited yet */ | ||
| 1021 | #define RLBEINGVISITED 1 /* rule is being visited */ | ||
| 1022 | #define RLNULL 2 /* rule was visited and is nullable */ | ||
| 1023 | #define RLNONNULL 3 /* rule was visited and is not nullable */ | ||
| 1024 | |||
| 1025 | |||
| 1019 | static void buildgrammar (lua_State *L, TTree *grammar, int frule, int n) { | 1026 | static void buildgrammar (lua_State *L, TTree *grammar, int frule, int n) { |
| 1020 | int i; | 1027 | int i; |
| 1021 | TTree *nd = sib1(grammar); /* auxiliary pointer to traverse the tree */ | 1028 | TTree *nd = sib1(grammar); /* auxiliary pointer to traverse the tree */ |
| @@ -1025,6 +1032,7 @@ static void buildgrammar (lua_State *L, TTree *grammar, int frule, int n) { | |||
| 1025 | TTree *rn = gettree(L, ridx, &rulesize); | 1032 | TTree *rn = gettree(L, ridx, &rulesize); |
| 1026 | TTree *pr = sib1(nd); /* points to rule's prerule */ | 1033 | TTree *pr = sib1(nd); /* points to rule's prerule */ |
| 1027 | nd->tag = TRule; | 1034 | nd->tag = TRule; |
| 1035 | nd->cap = RLNOTVISITED; | ||
| 1028 | nd->key = 0; /* will be fixed when rule is used */ | 1036 | nd->key = 0; /* will be fixed when rule is used */ |
| 1029 | pr->tag = TXInfo; | 1037 | pr->tag = TXInfo; |
| 1030 | pr->u.n = i; /* rule number */ | 1038 | pr->u.n = i; /* rule number */ |
| @@ -1060,22 +1068,29 @@ static int checkloops (TTree *tree) { | |||
| 1060 | } | 1068 | } |
| 1061 | 1069 | ||
| 1062 | 1070 | ||
| 1071 | static int verifyrule (lua_State *L, TTree *tree, int nb); | ||
| 1072 | |||
| 1063 | /* | 1073 | /* |
| 1064 | ** Give appropriate error message for 'verifyrule'. If a rule appears | 1074 | ** Visiting a rule while verifying another rule |
| 1065 | ** twice in 'passed', there is path from it back to itself without | ||
| 1066 | ** advancing the subject. | ||
| 1067 | */ | 1075 | */ |
| 1068 | static int verifyerror (lua_State *L, unsigned short *passed, int npassed) { | 1076 | static int ruleentry (lua_State *L, TTree *tree, int nb) { |
| 1069 | int i, j; | 1077 | switch (tree->cap) { /* check rule status */ |
| 1070 | for (i = npassed - 1; i >= 0; i--) { /* search for a repetition */ | 1078 | case RLNOTVISITED: { /* rule not visited yet */ |
| 1071 | for (j = i - 1; j >= 0; j--) { | 1079 | int nbr; |
| 1072 | if (passed[i] == passed[j]) { | 1080 | tree->cap = RLBEINGVISITED; /* now we will visit it */ |
| 1073 | lua_rawgeti(L, -1, passed[i]); /* get rule's key */ | 1081 | nbr = verifyrule(L, sib1(tree), 0); |
| 1074 | return luaL_error(L, "rule '%s' may be left recursive", val2str(L, -1)); | 1082 | tree->cap = nbr ? RLNULL : RLNONNULL; /* save result */ |
| 1075 | } | 1083 | return nbr || nb; |
| 1076 | } | 1084 | } |
| 1085 | case RLBEINGVISITED: /* visited again while being visited: loop! */ | ||
| 1086 | lua_rawgeti(L, -1, tree->key); /* get rule's key */ | ||
| 1087 | return luaL_error(L, "rule '%s' may be left recursive", val2str(L, -1)); | ||
| 1088 | case RLNULL: /* rule already visited; return saved result */ | ||
| 1089 | return 1; /* 1 || nb */ | ||
| 1090 | case RLNONNULL: /* rule already visited; return saved result */ | ||
| 1091 | return nb; /* 0 || nb */ | ||
| 1092 | default: assert(0); return 0; | ||
| 1077 | } | 1093 | } |
| 1078 | return luaL_error(L, "too many left calls in grammar"); | ||
| 1079 | } | 1094 | } |
| 1080 | 1095 | ||
| 1081 | 1096 | ||
| @@ -1085,13 +1100,10 @@ static int verifyerror (lua_State *L, unsigned short *passed, int npassed) { | |||
| 1085 | ** The return value is used to check sequences, where the second pattern | 1100 | ** The return value is used to check sequences, where the second pattern |
| 1086 | ** is only relevant if the first is nullable. | 1101 | ** is only relevant if the first is nullable. |
| 1087 | ** Parameter 'nb' works as an accumulator, to allow tail calls in | 1102 | ** Parameter 'nb' works as an accumulator, to allow tail calls in |
| 1088 | ** choices. ('nb' true makes function returns true.) | 1103 | ** choices. ('nb' true makes function return true.) |
| 1089 | ** Parameter 'passed' is a list of already visited rules, 'npassed' | ||
| 1090 | ** counts the elements in 'passed'. | ||
| 1091 | ** Assume ktable at the top of the stack. | 1104 | ** Assume ktable at the top of the stack. |
| 1092 | */ | 1105 | */ |
| 1093 | static int verifyrule (lua_State *L, TTree *tree, unsigned short *passed, | 1106 | static int verifyrule (lua_State *L, TTree *tree, int nb) { |
| 1094 | int npassed, int nb) { | ||
| 1095 | tailcall: | 1107 | tailcall: |
| 1096 | switch (tree->tag) { | 1108 | switch (tree->tag) { |
| 1097 | case TChar: case TSet: case TAny: | 1109 | case TChar: case TSet: case TAny: |
| @@ -1101,45 +1113,41 @@ static int verifyrule (lua_State *L, TTree *tree, unsigned short *passed, | |||
| 1101 | case TBehind: /* look-behind cannot have calls */ | 1113 | case TBehind: /* look-behind cannot have calls */ |
| 1102 | return 1; | 1114 | return 1; |
| 1103 | case TNot: case TAnd: case TRep: | 1115 | case TNot: case TAnd: case TRep: |
| 1104 | /* return verifyrule(L, sib1(tree), passed, npassed, 1); */ | 1116 | /* return verifyrule(L, sib1(tree), 1); */ |
| 1105 | tree = sib1(tree); nb = 1; goto tailcall; | 1117 | tree = sib1(tree); nb = 1; goto tailcall; |
| 1106 | case TCapture: case TRunTime: case TXInfo: | 1118 | case TCapture: case TRunTime: case TXInfo: |
| 1107 | /* return verifyrule(L, sib1(tree), passed, npassed, nb); */ | 1119 | /* return verifyrule(L, sib1(tree), nb); */ |
| 1108 | tree = sib1(tree); goto tailcall; | 1120 | tree = sib1(tree); goto tailcall; |
| 1109 | case TCall: | 1121 | case TCall: |
| 1110 | /* return verifyrule(L, sib2(tree), passed, npassed, nb); */ | 1122 | /* return verifyrule(L, sib2(tree), nb); */ |
| 1111 | tree = sib2(tree); goto tailcall; | 1123 | tree = sib2(tree); goto tailcall; |
| 1112 | case TSeq: /* only check 2nd child if first is nb */ | 1124 | case TSeq: /* do not check 2nd child if first is not nullable */ |
| 1113 | if (!verifyrule(L, sib1(tree), passed, npassed, 0)) | 1125 | if (verifyrule(L, sib1(tree), 0) == 0) |
| 1114 | return nb; | 1126 | return nb; |
| 1115 | /* else return verifyrule(L, sib2(tree), passed, npassed, nb); */ | 1127 | /* else return verifyrule(L, sib2(tree), nb); */ |
| 1116 | tree = sib2(tree); goto tailcall; | 1128 | tree = sib2(tree); goto tailcall; |
| 1117 | case TChoice: /* must check both children */ | 1129 | case TChoice: /* must check both children */ |
| 1118 | nb = verifyrule(L, sib1(tree), passed, npassed, nb); | 1130 | nb = verifyrule(L, sib1(tree), nb); |
| 1119 | /* return verifyrule(L, sib2(tree), passed, npassed, nb); */ | 1131 | /* return verifyrule(L, sib2(tree), nb); */ |
| 1120 | tree = sib2(tree); goto tailcall; | 1132 | tree = sib2(tree); goto tailcall; |
| 1121 | case TRule: | 1133 | case TRule: |
| 1122 | if (npassed >= MAXRULES) /* too many steps? */ | 1134 | return ruleentry(L, tree, nb); |
| 1123 | return verifyerror(L, passed, npassed); /* error */ | 1135 | case TGrammar: /* nested grammar */ |
| 1124 | else { | 1136 | assert(sib1(tree)->tag == TRule); /* grammar's first rule */ |
| 1125 | passed[npassed++] = tree->key; /* add rule to path */ | 1137 | /* that rule must have been visited */ |
| 1126 | /* return verifyrule(L, sib1(tree), passed, npassed, nb); */ | 1138 | assert(sib1(tree)->cap == RLNULL || sib1(tree)->cap == RLNONNULL); |
| 1127 | tree = sib1(tree); goto tailcall; | 1139 | return (sib1(tree)->cap == RLNULL) ? 1 : nb; |
| 1128 | } | ||
| 1129 | case TGrammar: | ||
| 1130 | return nullable(tree); /* sub-grammar cannot be left recursive */ | ||
| 1131 | default: assert(0); return 0; | 1140 | default: assert(0); return 0; |
| 1132 | } | 1141 | } |
| 1133 | } | 1142 | } |
| 1134 | 1143 | ||
| 1135 | 1144 | ||
| 1136 | static void verifygrammar (lua_State *L, TTree *grammar) { | 1145 | static void verifygrammar (lua_State *L, TTree *grammar) { |
| 1137 | unsigned short passed[MAXRULES]; | ||
| 1138 | TTree *rule; | 1146 | TTree *rule; |
| 1139 | /* check left-recursive rules */ | 1147 | /* check left-recursive rules */ |
| 1140 | for (rule = sib1(grammar); rule->tag == TRule; rule = sib2(rule)) { | 1148 | for (rule = sib1(grammar); rule->tag == TRule; rule = sib2(rule)) { |
| 1141 | if (rule->key == 0) continue; /* unused rule */ | 1149 | if (rule->key != 0) /* used rule? */ |
| 1142 | verifyrule(L, sib1(rule), passed, 0, 0); | 1150 | verifyrule(L, rule, 0); |
| 1143 | } | 1151 | } |
| 1144 | assert(rule->tag == TTrue); | 1152 | assert(rule->tag == TTrue); |
| 1145 | /* check infinite loops inside rules */ | 1153 | /* check infinite loops inside rules */ |
| @@ -27,7 +27,8 @@ typedef enum TTag { | |||
| 27 | TOpenCall, /* ktable[key] is rule's key */ | 27 | TOpenCall, /* ktable[key] is rule's key */ |
| 28 | TRule, /* ktable[key] is rule's key (but key == 0 for unused rules); | 28 | TRule, /* ktable[key] is rule's key (but key == 0 for unused rules); |
| 29 | 'sib1' is rule's pattern pre-rule; 'sib2' is next rule; | 29 | 'sib1' is rule's pattern pre-rule; 'sib2' is next rule; |
| 30 | extra info 'n' is rule's sequential number */ | 30 | extra info 'n' is rule's sequential number; |
| 31 | 'cap' is rule status during 'verifyrule' */ | ||
| 31 | TXInfo, /* extra info */ | 32 | TXInfo, /* extra info */ |
| 32 | TGrammar, /* 'sib1' is initial (and first) rule */ | 33 | TGrammar, /* 'sib1' is initial (and first) rule */ |
| 33 | TBehind, /* 'sib1' is pattern, 'n' is how much to go back */ | 34 | TBehind, /* 'sib1' is pattern, 'n' is how much to go back */ |
| @@ -1,11 +1,11 @@ | |||
| 1 | LIBNAME = lpeg | 1 | LIBNAME = lpeg |
| 2 | LUADIR = ./lua/ | 2 | LUADIR = /home/roberto/prj/lua/ |
| 3 | #LUADIR = /home/roberto/prj/lua/ | ||
| 4 | 3 | ||
| 5 | COPT = -O2 -DNDEBUG | 4 | COPT = -O2 -DNDEBUG |
| 6 | # COPT = -O0 -DLPEG_DEBUG -g | 5 | # COPT = -O0 -DLPEG_DEBUG -g |
| 7 | 6 | ||
| 8 | CWARNS = -Wall -Wextra -pedantic \ | 7 | CWARNS = -Wall -Wextra -pedantic \ |
| 8 | -Wfatal-errors \ | ||
| 9 | -Waggregate-return \ | 9 | -Waggregate-return \ |
| 10 | -Wcast-align \ | 10 | -Wcast-align \ |
| 11 | -Wcast-qual \ | 11 | -Wcast-qual \ |
