diff options
author | Roberto Ierusalimschy <roberto@inf.puc-rio.br> | 1999-05-05 16:23:11 -0300 |
---|---|---|
committer | Roberto Ierusalimschy <roberto@inf.puc-rio.br> | 1999-05-05 16:23:11 -0300 |
commit | 7808ea3a5ffe8c2045dc76099f8968e3b8104360 (patch) | |
tree | fdda1783dcbd47a5d7de0ab3ddae11b7c5afc275 /lstrlib.c | |
parent | 732741b62fe1cb9cf19ecca8b210474d076ba8b3 (diff) | |
download | lua-7808ea3a5ffe8c2045dc76099f8968e3b8104360.tar.gz lua-7808ea3a5ffe8c2045dc76099f8968e3b8104360.tar.bz2 lua-7808ea3a5ffe8c2045dc76099f8968e3b8104360.zip |
new implementation for '*' in patterns + new option '+'
Diffstat (limited to 'lstrlib.c')
-rw-r--r-- | lstrlib.c | 238 |
1 files changed, 139 insertions, 99 deletions
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | ** $Id: lstrlib.c,v 1.28 1999/02/26 15:49:53 roberto Exp roberto $ | 2 | ** $Id: lstrlib.c,v 1.29 1999/04/30 14:12:05 roberto Exp roberto $ |
3 | ** Standard library for strings and pattern-matching | 3 | ** Standard library for strings and pattern-matching |
4 | ** See Copyright Notice in lua.h | 4 | ** See Copyright Notice in lua.h |
5 | */ | 5 | */ |
@@ -130,7 +130,7 @@ struct Capture { | |||
130 | 130 | ||
131 | 131 | ||
132 | #define ESC '%' | 132 | #define ESC '%' |
133 | #define SPECIALS "^$*?.([%-" | 133 | #define SPECIALS "^$*+?.([%-" |
134 | 134 | ||
135 | 135 | ||
136 | static void push_captures (struct Capture *cap) { | 136 | static void push_captures (struct Capture *cap) { |
@@ -160,8 +160,21 @@ static int capture_to_close (struct Capture *cap) { | |||
160 | } | 160 | } |
161 | 161 | ||
162 | 162 | ||
163 | static char *bracket_end (char *p) { | 163 | char *luaI_classend (char *p) { |
164 | return (*p == 0) ? NULL : strchr((*p=='^') ? p+2 : p+1, ']'); | 164 | switch (*p++) { |
165 | case ESC: | ||
166 | if (*p == '\0') | ||
167 | luaL_verror("incorrect pattern (ends with `%c')", ESC); | ||
168 | return p+1; | ||
169 | case '[': | ||
170 | if (*p == '^') p++; | ||
171 | if (*p == ']') p++; | ||
172 | p = strchr(p, ']'); | ||
173 | if (!p) lua_error("incorrect pattern (missing `]')"); | ||
174 | return p+1; | ||
175 | default: | ||
176 | return p; | ||
177 | } | ||
165 | } | 178 | } |
166 | 179 | ||
167 | 180 | ||
@@ -184,48 +197,55 @@ static int matchclass (int c, int cl) { | |||
184 | } | 197 | } |
185 | 198 | ||
186 | 199 | ||
187 | int luaI_singlematch (int c, char *p, char **ep) { | 200 | |
201 | static int matchbracketclass (int c, char *p, char *end) { | ||
202 | int sig = 1; | ||
203 | if (*(p+1) == '^') { | ||
204 | sig = 0; | ||
205 | p++; /* skip the '^' */ | ||
206 | } | ||
207 | while (++p < end) { | ||
208 | if (*p == ESC) { | ||
209 | p++; | ||
210 | if ((p < end) && matchclass(c, (unsigned char)*p)) | ||
211 | return sig; | ||
212 | } | ||
213 | else if ((*(p+1) == '-') && (p+2 < end)) { | ||
214 | p+=2; | ||
215 | if ((int)(unsigned char)*(p-2) <= c && c <= (int)(unsigned char)*p) | ||
216 | return sig; | ||
217 | } | ||
218 | else if ((unsigned char)*p == c) return sig; | ||
219 | } | ||
220 | return !sig; | ||
221 | } | ||
222 | |||
223 | |||
224 | |||
225 | int luaI_singlematch (int c, char *p, char *ep) { | ||
188 | switch (*p) { | 226 | switch (*p) { |
189 | case '.': /* matches any char */ | 227 | case '.': /* matches any char */ |
190 | *ep = p+1; | ||
191 | return 1; | 228 | return 1; |
192 | case '\0': /* end of pattern; matches nothing */ | ||
193 | *ep = p; | ||
194 | return 0; | ||
195 | case ESC: | 229 | case ESC: |
196 | if (*(++p) == '\0') | 230 | return matchclass(c, (unsigned char)*(p+1)); |
197 | luaL_verror("incorrect pattern (ends with `%c')", ESC); | 231 | case '[': |
198 | *ep = p+1; | 232 | return matchbracketclass(c, p, ep-1); |
199 | return matchclass(c, (unsigned char)*p); | ||
200 | case '[': { | ||
201 | char *end = bracket_end(p+1); | ||
202 | int sig = *(p+1) == '^' ? (p++, 0) : 1; | ||
203 | if (end == NULL) lua_error("incorrect pattern (missing `]')"); | ||
204 | *ep = end+1; | ||
205 | while (++p < end) { | ||
206 | if (*p == ESC) { | ||
207 | if (((p+1) < end) && matchclass(c, (unsigned char)*++p)) | ||
208 | return sig; | ||
209 | } | ||
210 | else if ((*(p+1) == '-') && (p+2 < end)) { | ||
211 | p+=2; | ||
212 | if ((int)(unsigned char)*(p-2) <= c && c <= (int)(unsigned char)*p) | ||
213 | return sig; | ||
214 | } | ||
215 | else if ((unsigned char)*p == c) return sig; | ||
216 | } | ||
217 | return !sig; | ||
218 | } | ||
219 | default: | 233 | default: |
220 | *ep = p+1; | ||
221 | return ((unsigned char)*p == c); | 234 | return ((unsigned char)*p == c); |
222 | } | 235 | } |
223 | } | 236 | } |
224 | 237 | ||
225 | 238 | ||
226 | static char *matchbalance (char *s, int b, int e, struct Capture *cap) { | 239 | static char *match (char *s, char *p, struct Capture *cap); |
227 | if (*s != b) return NULL; | 240 | |
241 | |||
242 | static char *matchbalance (char *s, char *p, struct Capture *cap) { | ||
243 | if (*p == 0 || *(p+1) == 0) | ||
244 | lua_error("unbalanced pattern"); | ||
245 | if (*s != *p) return NULL; | ||
228 | else { | 246 | else { |
247 | int b = *p; | ||
248 | int e = *(p+1); | ||
229 | int cont = 1; | 249 | int cont = 1; |
230 | while (++s < cap->src_end) { | 250 | while (++s < cap->src_end) { |
231 | if (*s == e) { | 251 | if (*s == e) { |
@@ -238,89 +258,109 @@ static char *matchbalance (char *s, int b, int e, struct Capture *cap) { | |||
238 | } | 258 | } |
239 | 259 | ||
240 | 260 | ||
241 | static char *matchitem (char *s, char *p, struct Capture *cap, char **ep) { | 261 | static char *max_expand (char *s, char *p, char *ep, struct Capture *cap) { |
242 | if (*p == ESC) { | 262 | int i = 0; /* counts maximum expand for item */ |
243 | p++; | 263 | while ((s+i)<cap->src_end && luaI_singlematch((unsigned char)*(s+i), p, ep)) |
244 | if (isdigit((unsigned char)*p)) { /* capture */ | 264 | i++; |
245 | int l = check_cap(*p, cap); | 265 | /* keeps trying to match mith the maximum repetitions */ |
246 | int len = cap->capture[l].len; | 266 | while (i>=0) { |
247 | *ep = p+1; | 267 | char *res = match((s+i), ep+1, cap); |
248 | if (cap->src_end-s >= len && memcmp(cap->capture[l].init, s, len) == 0) | 268 | if (res) return res; |
249 | return s+len; | 269 | i--; /* else didn't match; reduce 1 repetition to try again */ |
250 | else return NULL; | ||
251 | } | ||
252 | else if (*p == 'b') { /* balanced string */ | ||
253 | p++; | ||
254 | if (*p == 0 || *(p+1) == 0) | ||
255 | lua_error("unbalanced pattern"); | ||
256 | *ep = p+2; | ||
257 | return matchbalance(s, *p, *(p+1), cap); | ||
258 | } | ||
259 | else p--; /* and go through */ | ||
260 | } | 270 | } |
261 | /* "luaI_singlematch" sets "ep" (so must be called even at the end of "s" */ | 271 | return NULL; |
262 | return (luaI_singlematch((unsigned char)*s, p, ep) && s<cap->src_end) ? | 272 | } |
263 | s+1 : NULL; | 273 | |
274 | |||
275 | static char *min_expand (char *s, char *p, char *ep, struct Capture *cap) { | ||
276 | for (;;) { | ||
277 | char *res = match(s, ep+1, cap); | ||
278 | if (res != NULL) | ||
279 | return res; | ||
280 | else if (s<cap->src_end && luaI_singlematch((unsigned char)*s, p, ep)) | ||
281 | s++; /* try with one more repetition */ | ||
282 | else return NULL; | ||
283 | } | ||
284 | } | ||
285 | |||
286 | |||
287 | static char *start_capt (char *s, char *p, struct Capture *cap) { | ||
288 | char *res; | ||
289 | int level = cap->level; | ||
290 | if (level >= MAX_CAPT) lua_error("too many captures"); | ||
291 | cap->capture[level].init = s; | ||
292 | cap->capture[level].len = -1; | ||
293 | cap->level = level+1; | ||
294 | if ((res=match(s, p+1, cap)) == NULL) /* match failed? */ | ||
295 | cap->level--; /* undo capture */ | ||
296 | return res; | ||
297 | } | ||
298 | |||
299 | |||
300 | static char *end_capt (char *s, char *p, struct Capture *cap) { | ||
301 | int l = capture_to_close(cap); | ||
302 | char *res; | ||
303 | cap->capture[l].len = s - cap->capture[l].init; /* close capture */ | ||
304 | if ((res = match(s, p+1, cap)) == NULL) /* match failed? */ | ||
305 | cap->capture[l].len = -1; /* undo capture */ | ||
306 | return res; | ||
307 | } | ||
308 | |||
309 | |||
310 | static char *match_capture (char *s, int level, struct Capture *cap) { | ||
311 | int l = check_cap(level, cap); | ||
312 | int len = cap->capture[l].len; | ||
313 | if (cap->src_end-s >= len && | ||
314 | memcmp(cap->capture[l].init, s, len) == 0) | ||
315 | return s+len; | ||
316 | else return NULL; | ||
264 | } | 317 | } |
265 | 318 | ||
266 | 319 | ||
267 | static char *match (char *s, char *p, struct Capture *cap) { | 320 | static char *match (char *s, char *p, struct Capture *cap) { |
268 | init: /* using goto's to optimize tail recursion */ | 321 | init: /* using goto's to optimize tail recursion */ |
269 | switch (*p) { | 322 | switch (*p) { |
270 | case '(': { /* start capture */ | 323 | case '(': /* start capture */ |
271 | char *res; | 324 | return start_capt(s, p, cap); |
272 | if (cap->level >= MAX_CAPT) lua_error("too many captures"); | 325 | case ')': /* end capture */ |
273 | cap->capture[cap->level].init = s; | 326 | return end_capt(s, p, cap); |
274 | cap->capture[cap->level].len = -1; | 327 | case ESC: /* may be %[0-9] or %b */ |
275 | cap->level++; | 328 | if (isdigit((unsigned char)(*(p+1)))) { /* capture? */ |
276 | if ((res=match(s, p+1, cap)) == NULL) /* match failed? */ | 329 | s = match_capture(s, *(p+1), cap); |
277 | cap->level--; /* undo capture */ | 330 | if (s == NULL) return NULL; |
278 | return res; | 331 | p+=2; goto init; /* else return match(p+2, s, cap) */ |
279 | } | 332 | } |
280 | case ')': { /* end capture */ | 333 | else if (*(p+1) == 'b') { /* balanced string? */ |
281 | int l = capture_to_close(cap); | 334 | s = matchbalance(s, p+2, cap); |
282 | char *res; | 335 | if (s == NULL) return NULL; |
283 | cap->capture[l].len = s - cap->capture[l].init; /* close capture */ | 336 | p+=4; goto init; /* else return match(p+4, s, cap); */ |
284 | if ((res = match(s, p+1, cap)) == NULL) /* match failed? */ | 337 | } |
285 | cap->capture[l].len = -1; /* undo capture */ | 338 | else goto dflt; /* case default */ |
286 | return res; | ||
287 | } | ||
288 | case '\0': /* end of pattern */ | 339 | case '\0': /* end of pattern */ |
289 | return s; /* match succeeded */ | 340 | return s; /* match succeeded */ |
290 | case '$': | 341 | case '$': |
291 | if (*(p+1) == '\0') /* is the '$' the last char in pattern? */ | 342 | if (*(p+1) == '\0') /* is the '$' the last char in pattern? */ |
292 | return (s == cap->src_end) ? s : NULL; /* check end of string */ | 343 | return (s == cap->src_end) ? s : NULL; /* check end of string */ |
293 | /* else is a regular '$'; go through */ | 344 | else goto dflt; |
294 | default: { /* it is a pattern item */ | 345 | default: dflt: { /* it is a pattern item */ |
295 | char *ep; /* will point to what is next */ | 346 | char *ep = luaI_classend(p); /* points to what is next */ |
296 | char *s1 = matchitem(s, p, cap, &ep); | 347 | int m = s<cap->src_end && luaI_singlematch((unsigned char)*s, p, ep); |
297 | switch (*ep) { | 348 | switch (*ep) { |
298 | case '*': { /* repetition */ | ||
299 | char *res; | ||
300 | if (s1 && s1>s && ((res=match(s1, p, cap)) != NULL)) | ||
301 | return res; | ||
302 | p=ep+1; goto init; /* else return match(s, ep+1, cap); */ | ||
303 | } | ||
304 | case '?': { /* optional */ | 349 | case '?': { /* optional */ |
305 | char *res; | 350 | char *res; |
306 | if (s1 && ((res=match(s1, ep+1, cap)) != NULL)) | 351 | if (m && ((res=match(s+1, ep+1, cap)) != NULL)) |
307 | return res; | 352 | return res; |
308 | p=ep+1; goto init; /* else return match(s, ep+1, cap); */ | 353 | p=ep+1; goto init; /* else return match(s, ep+1, cap); */ |
309 | } | 354 | } |
310 | case '-': { /* repetition */ | 355 | case '*': /* 0 or more repetitions */ |
311 | char *res; | 356 | return max_expand(s, p, ep, cap); |
312 | if ((res = match(s, ep+1, cap)) != NULL) | 357 | case '+': /* 1 or more repetitions */ |
313 | return res; | 358 | return (m ? max_expand(s+1, p, ep, cap) : NULL); |
314 | else if (s1 && s1>s) { | 359 | case '-': /* 0 or more repetitions (minimum) */ |
315 | s = s1; | 360 | return min_expand(s, p, ep, cap); |
316 | goto init; /* return match(s1, p, cap); */ | ||
317 | } | ||
318 | else | ||
319 | return NULL; | ||
320 | } | ||
321 | default: | 361 | default: |
322 | if (s1) { s=s1; p=ep; goto init; } /* return match(s1, ep, cap); */ | 362 | if (!m) return NULL; |
323 | else return NULL; | 363 | s++; p=ep; goto init; /* else return match(s+1, ep, cap); */ |
324 | } | 364 | } |
325 | } | 365 | } |
326 | } | 366 | } |