aboutsummaryrefslogtreecommitdiff
path: root/lstrlib.c
diff options
context:
space:
mode:
authorRoberto Ierusalimschy <roberto@inf.puc-rio.br>1999-05-05 16:23:11 -0300
committerRoberto Ierusalimschy <roberto@inf.puc-rio.br>1999-05-05 16:23:11 -0300
commit7808ea3a5ffe8c2045dc76099f8968e3b8104360 (patch)
treefdda1783dcbd47a5d7de0ab3ddae11b7c5afc275 /lstrlib.c
parent732741b62fe1cb9cf19ecca8b210474d076ba8b3 (diff)
downloadlua-7808ea3a5ffe8c2045dc76099f8968e3b8104360.tar.gz
lua-7808ea3a5ffe8c2045dc76099f8968e3b8104360.tar.bz2
lua-7808ea3a5ffe8c2045dc76099f8968e3b8104360.zip
new implementation for '*' in patterns + new option '+'
Diffstat (limited to 'lstrlib.c')
-rw-r--r--lstrlib.c238
1 files changed, 139 insertions, 99 deletions
diff --git a/lstrlib.c b/lstrlib.c
index 977861e4..bcfa457e 100644
--- a/lstrlib.c
+++ b/lstrlib.c
@@ -1,5 +1,5 @@
1/* 1/*
2** $Id: lstrlib.c,v 1.28 1999/02/26 15:49:53 roberto Exp roberto $ 2** $Id: lstrlib.c,v 1.29 1999/04/30 14:12:05 roberto Exp roberto $
3** Standard library for strings and pattern-matching 3** Standard library for strings and pattern-matching
4** See Copyright Notice in lua.h 4** See Copyright Notice in lua.h
5*/ 5*/
@@ -130,7 +130,7 @@ struct Capture {
130 130
131 131
132#define ESC '%' 132#define ESC '%'
133#define SPECIALS "^$*?.([%-" 133#define SPECIALS "^$*+?.([%-"
134 134
135 135
136static void push_captures (struct Capture *cap) { 136static void push_captures (struct Capture *cap) {
@@ -160,8 +160,21 @@ static int capture_to_close (struct Capture *cap) {
160} 160}
161 161
162 162
163static char *bracket_end (char *p) { 163char *luaI_classend (char *p) {
164 return (*p == 0) ? NULL : strchr((*p=='^') ? p+2 : p+1, ']'); 164 switch (*p++) {
165 case ESC:
166 if (*p == '\0')
167 luaL_verror("incorrect pattern (ends with `%c')", ESC);
168 return p+1;
169 case '[':
170 if (*p == '^') p++;
171 if (*p == ']') p++;
172 p = strchr(p, ']');
173 if (!p) lua_error("incorrect pattern (missing `]')");
174 return p+1;
175 default:
176 return p;
177 }
165} 178}
166 179
167 180
@@ -184,48 +197,55 @@ static int matchclass (int c, int cl) {
184} 197}
185 198
186 199
187int luaI_singlematch (int c, char *p, char **ep) { 200
201static int matchbracketclass (int c, char *p, char *end) {
202 int sig = 1;
203 if (*(p+1) == '^') {
204 sig = 0;
205 p++; /* skip the '^' */
206 }
207 while (++p < end) {
208 if (*p == ESC) {
209 p++;
210 if ((p < end) && matchclass(c, (unsigned char)*p))
211 return sig;
212 }
213 else if ((*(p+1) == '-') && (p+2 < end)) {
214 p+=2;
215 if ((int)(unsigned char)*(p-2) <= c && c <= (int)(unsigned char)*p)
216 return sig;
217 }
218 else if ((unsigned char)*p == c) return sig;
219 }
220 return !sig;
221}
222
223
224
225int luaI_singlematch (int c, char *p, char *ep) {
188 switch (*p) { 226 switch (*p) {
189 case '.': /* matches any char */ 227 case '.': /* matches any char */
190 *ep = p+1;
191 return 1; 228 return 1;
192 case '\0': /* end of pattern; matches nothing */
193 *ep = p;
194 return 0;
195 case ESC: 229 case ESC:
196 if (*(++p) == '\0') 230 return matchclass(c, (unsigned char)*(p+1));
197 luaL_verror("incorrect pattern (ends with `%c')", ESC); 231 case '[':
198 *ep = p+1; 232 return matchbracketclass(c, p, ep-1);
199 return matchclass(c, (unsigned char)*p);
200 case '[': {
201 char *end = bracket_end(p+1);
202 int sig = *(p+1) == '^' ? (p++, 0) : 1;
203 if (end == NULL) lua_error("incorrect pattern (missing `]')");
204 *ep = end+1;
205 while (++p < end) {
206 if (*p == ESC) {
207 if (((p+1) < end) && matchclass(c, (unsigned char)*++p))
208 return sig;
209 }
210 else if ((*(p+1) == '-') && (p+2 < end)) {
211 p+=2;
212 if ((int)(unsigned char)*(p-2) <= c && c <= (int)(unsigned char)*p)
213 return sig;
214 }
215 else if ((unsigned char)*p == c) return sig;
216 }
217 return !sig;
218 }
219 default: 233 default:
220 *ep = p+1;
221 return ((unsigned char)*p == c); 234 return ((unsigned char)*p == c);
222 } 235 }
223} 236}
224 237
225 238
226static char *matchbalance (char *s, int b, int e, struct Capture *cap) { 239static char *match (char *s, char *p, struct Capture *cap);
227 if (*s != b) return NULL; 240
241
242static char *matchbalance (char *s, char *p, struct Capture *cap) {
243 if (*p == 0 || *(p+1) == 0)
244 lua_error("unbalanced pattern");
245 if (*s != *p) return NULL;
228 else { 246 else {
247 int b = *p;
248 int e = *(p+1);
229 int cont = 1; 249 int cont = 1;
230 while (++s < cap->src_end) { 250 while (++s < cap->src_end) {
231 if (*s == e) { 251 if (*s == e) {
@@ -238,89 +258,109 @@ static char *matchbalance (char *s, int b, int e, struct Capture *cap) {
238} 258}
239 259
240 260
241static char *matchitem (char *s, char *p, struct Capture *cap, char **ep) { 261static char *max_expand (char *s, char *p, char *ep, struct Capture *cap) {
242 if (*p == ESC) { 262 int i = 0; /* counts maximum expand for item */
243 p++; 263 while ((s+i)<cap->src_end && luaI_singlematch((unsigned char)*(s+i), p, ep))
244 if (isdigit((unsigned char)*p)) { /* capture */ 264 i++;
245 int l = check_cap(*p, cap); 265 /* keeps trying to match mith the maximum repetitions */
246 int len = cap->capture[l].len; 266 while (i>=0) {
247 *ep = p+1; 267 char *res = match((s+i), ep+1, cap);
248 if (cap->src_end-s >= len && memcmp(cap->capture[l].init, s, len) == 0) 268 if (res) return res;
249 return s+len; 269 i--; /* else didn't match; reduce 1 repetition to try again */
250 else return NULL;
251 }
252 else if (*p == 'b') { /* balanced string */
253 p++;
254 if (*p == 0 || *(p+1) == 0)
255 lua_error("unbalanced pattern");
256 *ep = p+2;
257 return matchbalance(s, *p, *(p+1), cap);
258 }
259 else p--; /* and go through */
260 } 270 }
261 /* "luaI_singlematch" sets "ep" (so must be called even at the end of "s" */ 271 return NULL;
262 return (luaI_singlematch((unsigned char)*s, p, ep) && s<cap->src_end) ? 272}
263 s+1 : NULL; 273
274
275static char *min_expand (char *s, char *p, char *ep, struct Capture *cap) {
276 for (;;) {
277 char *res = match(s, ep+1, cap);
278 if (res != NULL)
279 return res;
280 else if (s<cap->src_end && luaI_singlematch((unsigned char)*s, p, ep))
281 s++; /* try with one more repetition */
282 else return NULL;
283 }
284}
285
286
287static char *start_capt (char *s, char *p, struct Capture *cap) {
288 char *res;
289 int level = cap->level;
290 if (level >= MAX_CAPT) lua_error("too many captures");
291 cap->capture[level].init = s;
292 cap->capture[level].len = -1;
293 cap->level = level+1;
294 if ((res=match(s, p+1, cap)) == NULL) /* match failed? */
295 cap->level--; /* undo capture */
296 return res;
297}
298
299
300static char *end_capt (char *s, char *p, struct Capture *cap) {
301 int l = capture_to_close(cap);
302 char *res;
303 cap->capture[l].len = s - cap->capture[l].init; /* close capture */
304 if ((res = match(s, p+1, cap)) == NULL) /* match failed? */
305 cap->capture[l].len = -1; /* undo capture */
306 return res;
307}
308
309
310static char *match_capture (char *s, int level, struct Capture *cap) {
311 int l = check_cap(level, cap);
312 int len = cap->capture[l].len;
313 if (cap->src_end-s >= len &&
314 memcmp(cap->capture[l].init, s, len) == 0)
315 return s+len;
316 else return NULL;
264} 317}
265 318
266 319
267static char *match (char *s, char *p, struct Capture *cap) { 320static char *match (char *s, char *p, struct Capture *cap) {
268 init: /* using goto's to optimize tail recursion */ 321 init: /* using goto's to optimize tail recursion */
269 switch (*p) { 322 switch (*p) {
270 case '(': { /* start capture */ 323 case '(': /* start capture */
271 char *res; 324 return start_capt(s, p, cap);
272 if (cap->level >= MAX_CAPT) lua_error("too many captures"); 325 case ')': /* end capture */
273 cap->capture[cap->level].init = s; 326 return end_capt(s, p, cap);
274 cap->capture[cap->level].len = -1; 327 case ESC: /* may be %[0-9] or %b */
275 cap->level++; 328 if (isdigit((unsigned char)(*(p+1)))) { /* capture? */
276 if ((res=match(s, p+1, cap)) == NULL) /* match failed? */ 329 s = match_capture(s, *(p+1), cap);
277 cap->level--; /* undo capture */ 330 if (s == NULL) return NULL;
278 return res; 331 p+=2; goto init; /* else return match(p+2, s, cap) */
279 } 332 }
280 case ')': { /* end capture */ 333 else if (*(p+1) == 'b') { /* balanced string? */
281 int l = capture_to_close(cap); 334 s = matchbalance(s, p+2, cap);
282 char *res; 335 if (s == NULL) return NULL;
283 cap->capture[l].len = s - cap->capture[l].init; /* close capture */ 336 p+=4; goto init; /* else return match(p+4, s, cap); */
284 if ((res = match(s, p+1, cap)) == NULL) /* match failed? */ 337 }
285 cap->capture[l].len = -1; /* undo capture */ 338 else goto dflt; /* case default */
286 return res;
287 }
288 case '\0': /* end of pattern */ 339 case '\0': /* end of pattern */
289 return s; /* match succeeded */ 340 return s; /* match succeeded */
290 case '$': 341 case '$':
291 if (*(p+1) == '\0') /* is the '$' the last char in pattern? */ 342 if (*(p+1) == '\0') /* is the '$' the last char in pattern? */
292 return (s == cap->src_end) ? s : NULL; /* check end of string */ 343 return (s == cap->src_end) ? s : NULL; /* check end of string */
293 /* else is a regular '$'; go through */ 344 else goto dflt;
294 default: { /* it is a pattern item */ 345 default: dflt: { /* it is a pattern item */
295 char *ep; /* will point to what is next */ 346 char *ep = luaI_classend(p); /* points to what is next */
296 char *s1 = matchitem(s, p, cap, &ep); 347 int m = s<cap->src_end && luaI_singlematch((unsigned char)*s, p, ep);
297 switch (*ep) { 348 switch (*ep) {
298 case '*': { /* repetition */
299 char *res;
300 if (s1 && s1>s && ((res=match(s1, p, cap)) != NULL))
301 return res;
302 p=ep+1; goto init; /* else return match(s, ep+1, cap); */
303 }
304 case '?': { /* optional */ 349 case '?': { /* optional */
305 char *res; 350 char *res;
306 if (s1 && ((res=match(s1, ep+1, cap)) != NULL)) 351 if (m && ((res=match(s+1, ep+1, cap)) != NULL))
307 return res; 352 return res;
308 p=ep+1; goto init; /* else return match(s, ep+1, cap); */ 353 p=ep+1; goto init; /* else return match(s, ep+1, cap); */
309 } 354 }
310 case '-': { /* repetition */ 355 case '*': /* 0 or more repetitions */
311 char *res; 356 return max_expand(s, p, ep, cap);
312 if ((res = match(s, ep+1, cap)) != NULL) 357 case '+': /* 1 or more repetitions */
313 return res; 358 return (m ? max_expand(s+1, p, ep, cap) : NULL);
314 else if (s1 && s1>s) { 359 case '-': /* 0 or more repetitions (minimum) */
315 s = s1; 360 return min_expand(s, p, ep, cap);
316 goto init; /* return match(s1, p, cap); */
317 }
318 else
319 return NULL;
320 }
321 default: 361 default:
322 if (s1) { s=s1; p=ep; goto init; } /* return match(s1, ep, cap); */ 362 if (!m) return NULL;
323 else return NULL; 363 s++; p=ep; goto init; /* else return match(s+1, ep, cap); */
324 } 364 }
325 } 365 }
326 } 366 }