aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--liolib.c26
-rw-r--r--lstrlib.c238
-rw-r--r--lualib.h5
3 files changed, 156 insertions, 113 deletions
diff --git a/liolib.c b/liolib.c
index 13719590..f43c3fd0 100644
--- a/liolib.c
+++ b/liolib.c
@@ -1,5 +1,5 @@
1/* 1/*
2** $Id: liolib.c,v 1.37 1999/04/05 19:47:05 roberto Exp roberto $ 2** $Id: liolib.c,v 1.38 1999/04/14 20:40:32 roberto Exp $
3** Standard I/O (and system) library 3** Standard I/O (and system) library
4** See Copyright Notice in lua.h 4** See Copyright Notice in lua.h
5*/ 5*/
@@ -244,23 +244,25 @@ static int read_pattern (FILE *f, char *p) {
244 p++; 244 p++;
245 continue; 245 continue;
246 default: { 246 default: {
247 char *ep; /* get what is next */ 247 char *ep = luaI_classend(p); /* get what is next */
248 int m; /* match result */ 248 int m; /* match result */
249 if (c == NEED_OTHER) c = getc(f); 249 if (c == NEED_OTHER) c = getc(f);
250 if (c != EOF) 250 m = (c==EOF) ? 0 : luaI_singlematch(c, p, ep);
251 m = luaI_singlematch(c, p, &ep);
252 else {
253 luaI_singlematch(0, p, &ep); /* to set "ep" */
254 m = 0; /* EOF matches no pattern */
255 }
256 if (m) { 251 if (m) {
257 if (!inskip) luaL_addchar(c); 252 if (!inskip) luaL_addchar(c);
258 c = NEED_OTHER; 253 c = NEED_OTHER;
259 } 254 }
260 switch (*ep) { 255 switch (*ep) {
261 case '*': /* repetition */ 256 case '+': /* repetition (1 or more) */
262 if (!m) p = ep+1; /* else stay in (repeat) the same item */ 257 if (!m) goto break_while; /* pattern fails? */
263 continue; 258 /* else go through */
259 case '*': /* repetition (0 or more) */
260 while (m) { /* reads the same item until it fails */
261 c = getc(f);
262 m = (c==EOF) ? 0 : luaI_singlematch(c, p, ep);
263 if (m && !inskip) luaL_addchar(c);
264 }
265 /* go through to continue reading the pattern */
264 case '?': /* optional */ 266 case '?': /* optional */
265 p = ep+1; /* continues reading the pattern */ 267 p = ep+1; /* continues reading the pattern */
266 continue; 268 continue;
@@ -336,7 +338,7 @@ static void io_read (void) {
336 success = 1; /* always success */ 338 success = 1; /* always success */
337 break; 339 break;
338 case 4: /* word */ 340 case 4: /* word */
339 success = read_pattern(f, "{%s*}%S%S*"); 341 success = read_pattern(f, "{%s*}%S+");
340 break; 342 break;
341 default: 343 default:
342 success = read_pattern(f, p); 344 success = read_pattern(f, p);
diff --git a/lstrlib.c b/lstrlib.c
index 977861e4..bcfa457e 100644
--- a/lstrlib.c
+++ b/lstrlib.c
@@ -1,5 +1,5 @@
1/* 1/*
2** $Id: lstrlib.c,v 1.28 1999/02/26 15:49:53 roberto Exp roberto $ 2** $Id: lstrlib.c,v 1.29 1999/04/30 14:12:05 roberto Exp roberto $
3** Standard library for strings and pattern-matching 3** Standard library for strings and pattern-matching
4** See Copyright Notice in lua.h 4** See Copyright Notice in lua.h
5*/ 5*/
@@ -130,7 +130,7 @@ struct Capture {
130 130
131 131
132#define ESC '%' 132#define ESC '%'
133#define SPECIALS "^$*?.([%-" 133#define SPECIALS "^$*+?.([%-"
134 134
135 135
136static void push_captures (struct Capture *cap) { 136static void push_captures (struct Capture *cap) {
@@ -160,8 +160,21 @@ static int capture_to_close (struct Capture *cap) {
160} 160}
161 161
162 162
163static char *bracket_end (char *p) { 163char *luaI_classend (char *p) {
164 return (*p == 0) ? NULL : strchr((*p=='^') ? p+2 : p+1, ']'); 164 switch (*p++) {
165 case ESC:
166 if (*p == '\0')
167 luaL_verror("incorrect pattern (ends with `%c')", ESC);
168 return p+1;
169 case '[':
170 if (*p == '^') p++;
171 if (*p == ']') p++;
172 p = strchr(p, ']');
173 if (!p) lua_error("incorrect pattern (missing `]')");
174 return p+1;
175 default:
176 return p;
177 }
165} 178}
166 179
167 180
@@ -184,48 +197,55 @@ static int matchclass (int c, int cl) {
184} 197}
185 198
186 199
187int luaI_singlematch (int c, char *p, char **ep) { 200
201static int matchbracketclass (int c, char *p, char *end) {
202 int sig = 1;
203 if (*(p+1) == '^') {
204 sig = 0;
205 p++; /* skip the '^' */
206 }
207 while (++p < end) {
208 if (*p == ESC) {
209 p++;
210 if ((p < end) && matchclass(c, (unsigned char)*p))
211 return sig;
212 }
213 else if ((*(p+1) == '-') && (p+2 < end)) {
214 p+=2;
215 if ((int)(unsigned char)*(p-2) <= c && c <= (int)(unsigned char)*p)
216 return sig;
217 }
218 else if ((unsigned char)*p == c) return sig;
219 }
220 return !sig;
221}
222
223
224
225int luaI_singlematch (int c, char *p, char *ep) {
188 switch (*p) { 226 switch (*p) {
189 case '.': /* matches any char */ 227 case '.': /* matches any char */
190 *ep = p+1;
191 return 1; 228 return 1;
192 case '\0': /* end of pattern; matches nothing */
193 *ep = p;
194 return 0;
195 case ESC: 229 case ESC:
196 if (*(++p) == '\0') 230 return matchclass(c, (unsigned char)*(p+1));
197 luaL_verror("incorrect pattern (ends with `%c')", ESC); 231 case '[':
198 *ep = p+1; 232 return matchbracketclass(c, p, ep-1);
199 return matchclass(c, (unsigned char)*p);
200 case '[': {
201 char *end = bracket_end(p+1);
202 int sig = *(p+1) == '^' ? (p++, 0) : 1;
203 if (end == NULL) lua_error("incorrect pattern (missing `]')");
204 *ep = end+1;
205 while (++p < end) {
206 if (*p == ESC) {
207 if (((p+1) < end) && matchclass(c, (unsigned char)*++p))
208 return sig;
209 }
210 else if ((*(p+1) == '-') && (p+2 < end)) {
211 p+=2;
212 if ((int)(unsigned char)*(p-2) <= c && c <= (int)(unsigned char)*p)
213 return sig;
214 }
215 else if ((unsigned char)*p == c) return sig;
216 }
217 return !sig;
218 }
219 default: 233 default:
220 *ep = p+1;
221 return ((unsigned char)*p == c); 234 return ((unsigned char)*p == c);
222 } 235 }
223} 236}
224 237
225 238
226static char *matchbalance (char *s, int b, int e, struct Capture *cap) { 239static char *match (char *s, char *p, struct Capture *cap);
227 if (*s != b) return NULL; 240
241
242static char *matchbalance (char *s, char *p, struct Capture *cap) {
243 if (*p == 0 || *(p+1) == 0)
244 lua_error("unbalanced pattern");
245 if (*s != *p) return NULL;
228 else { 246 else {
247 int b = *p;
248 int e = *(p+1);
229 int cont = 1; 249 int cont = 1;
230 while (++s < cap->src_end) { 250 while (++s < cap->src_end) {
231 if (*s == e) { 251 if (*s == e) {
@@ -238,89 +258,109 @@ static char *matchbalance (char *s, int b, int e, struct Capture *cap) {
238} 258}
239 259
240 260
241static char *matchitem (char *s, char *p, struct Capture *cap, char **ep) { 261static char *max_expand (char *s, char *p, char *ep, struct Capture *cap) {
242 if (*p == ESC) { 262 int i = 0; /* counts maximum expand for item */
243 p++; 263 while ((s+i)<cap->src_end && luaI_singlematch((unsigned char)*(s+i), p, ep))
244 if (isdigit((unsigned char)*p)) { /* capture */ 264 i++;
245 int l = check_cap(*p, cap); 265 /* keeps trying to match mith the maximum repetitions */
246 int len = cap->capture[l].len; 266 while (i>=0) {
247 *ep = p+1; 267 char *res = match((s+i), ep+1, cap);
248 if (cap->src_end-s >= len && memcmp(cap->capture[l].init, s, len) == 0) 268 if (res) return res;
249 return s+len; 269 i--; /* else didn't match; reduce 1 repetition to try again */
250 else return NULL;
251 }
252 else if (*p == 'b') { /* balanced string */
253 p++;
254 if (*p == 0 || *(p+1) == 0)
255 lua_error("unbalanced pattern");
256 *ep = p+2;
257 return matchbalance(s, *p, *(p+1), cap);
258 }
259 else p--; /* and go through */
260 } 270 }
261 /* "luaI_singlematch" sets "ep" (so must be called even at the end of "s" */ 271 return NULL;
262 return (luaI_singlematch((unsigned char)*s, p, ep) && s<cap->src_end) ? 272}
263 s+1 : NULL; 273
274
275static char *min_expand (char *s, char *p, char *ep, struct Capture *cap) {
276 for (;;) {
277 char *res = match(s, ep+1, cap);
278 if (res != NULL)
279 return res;
280 else if (s<cap->src_end && luaI_singlematch((unsigned char)*s, p, ep))
281 s++; /* try with one more repetition */
282 else return NULL;
283 }
284}
285
286
287static char *start_capt (char *s, char *p, struct Capture *cap) {
288 char *res;
289 int level = cap->level;
290 if (level >= MAX_CAPT) lua_error("too many captures");
291 cap->capture[level].init = s;
292 cap->capture[level].len = -1;
293 cap->level = level+1;
294 if ((res=match(s, p+1, cap)) == NULL) /* match failed? */
295 cap->level--; /* undo capture */
296 return res;
297}
298
299
300static char *end_capt (char *s, char *p, struct Capture *cap) {
301 int l = capture_to_close(cap);
302 char *res;
303 cap->capture[l].len = s - cap->capture[l].init; /* close capture */
304 if ((res = match(s, p+1, cap)) == NULL) /* match failed? */
305 cap->capture[l].len = -1; /* undo capture */
306 return res;
307}
308
309
310static char *match_capture (char *s, int level, struct Capture *cap) {
311 int l = check_cap(level, cap);
312 int len = cap->capture[l].len;
313 if (cap->src_end-s >= len &&
314 memcmp(cap->capture[l].init, s, len) == 0)
315 return s+len;
316 else return NULL;
264} 317}
265 318
266 319
267static char *match (char *s, char *p, struct Capture *cap) { 320static char *match (char *s, char *p, struct Capture *cap) {
268 init: /* using goto's to optimize tail recursion */ 321 init: /* using goto's to optimize tail recursion */
269 switch (*p) { 322 switch (*p) {
270 case '(': { /* start capture */ 323 case '(': /* start capture */
271 char *res; 324 return start_capt(s, p, cap);
272 if (cap->level >= MAX_CAPT) lua_error("too many captures"); 325 case ')': /* end capture */
273 cap->capture[cap->level].init = s; 326 return end_capt(s, p, cap);
274 cap->capture[cap->level].len = -1; 327 case ESC: /* may be %[0-9] or %b */
275 cap->level++; 328 if (isdigit((unsigned char)(*(p+1)))) { /* capture? */
276 if ((res=match(s, p+1, cap)) == NULL) /* match failed? */ 329 s = match_capture(s, *(p+1), cap);
277 cap->level--; /* undo capture */ 330 if (s == NULL) return NULL;
278 return res; 331 p+=2; goto init; /* else return match(p+2, s, cap) */
279 } 332 }
280 case ')': { /* end capture */ 333 else if (*(p+1) == 'b') { /* balanced string? */
281 int l = capture_to_close(cap); 334 s = matchbalance(s, p+2, cap);
282 char *res; 335 if (s == NULL) return NULL;
283 cap->capture[l].len = s - cap->capture[l].init; /* close capture */ 336 p+=4; goto init; /* else return match(p+4, s, cap); */
284 if ((res = match(s, p+1, cap)) == NULL) /* match failed? */ 337 }
285 cap->capture[l].len = -1; /* undo capture */ 338 else goto dflt; /* case default */
286 return res;
287 }
288 case '\0': /* end of pattern */ 339 case '\0': /* end of pattern */
289 return s; /* match succeeded */ 340 return s; /* match succeeded */
290 case '$': 341 case '$':
291 if (*(p+1) == '\0') /* is the '$' the last char in pattern? */ 342 if (*(p+1) == '\0') /* is the '$' the last char in pattern? */
292 return (s == cap->src_end) ? s : NULL; /* check end of string */ 343 return (s == cap->src_end) ? s : NULL; /* check end of string */
293 /* else is a regular '$'; go through */ 344 else goto dflt;
294 default: { /* it is a pattern item */ 345 default: dflt: { /* it is a pattern item */
295 char *ep; /* will point to what is next */ 346 char *ep = luaI_classend(p); /* points to what is next */
296 char *s1 = matchitem(s, p, cap, &ep); 347 int m = s<cap->src_end && luaI_singlematch((unsigned char)*s, p, ep);
297 switch (*ep) { 348 switch (*ep) {
298 case '*': { /* repetition */
299 char *res;
300 if (s1 && s1>s && ((res=match(s1, p, cap)) != NULL))
301 return res;
302 p=ep+1; goto init; /* else return match(s, ep+1, cap); */
303 }
304 case '?': { /* optional */ 349 case '?': { /* optional */
305 char *res; 350 char *res;
306 if (s1 && ((res=match(s1, ep+1, cap)) != NULL)) 351 if (m && ((res=match(s+1, ep+1, cap)) != NULL))
307 return res; 352 return res;
308 p=ep+1; goto init; /* else return match(s, ep+1, cap); */ 353 p=ep+1; goto init; /* else return match(s, ep+1, cap); */
309 } 354 }
310 case '-': { /* repetition */ 355 case '*': /* 0 or more repetitions */
311 char *res; 356 return max_expand(s, p, ep, cap);
312 if ((res = match(s, ep+1, cap)) != NULL) 357 case '+': /* 1 or more repetitions */
313 return res; 358 return (m ? max_expand(s+1, p, ep, cap) : NULL);
314 else if (s1 && s1>s) { 359 case '-': /* 0 or more repetitions (minimum) */
315 s = s1; 360 return min_expand(s, p, ep, cap);
316 goto init; /* return match(s1, p, cap); */
317 }
318 else
319 return NULL;
320 }
321 default: 361 default:
322 if (s1) { s=s1; p=ep; goto init; } /* return match(s1, ep, cap); */ 362 if (!m) return NULL;
323 else return NULL; 363 s++; p=ep; goto init; /* else return match(s+1, ep, cap); */
324 } 364 }
325 } 365 }
326 } 366 }
diff --git a/lualib.h b/lualib.h
index c6e34b52..805165f0 100644
--- a/lualib.h
+++ b/lualib.h
@@ -1,5 +1,5 @@
1/* 1/*
2** $Id: lualib.h,v 1.4 1998/06/19 16:14:09 roberto Exp roberto $ 2** $Id: lualib.h,v 1.5 1999/01/08 16:47:44 roberto Exp roberto $
3** Lua standard libraries 3** Lua standard libraries
4** See Copyright Notice in lua.h 4** See Copyright Notice in lua.h
5*/ 5*/
@@ -29,7 +29,8 @@ void lua_userinit (void);
29 29
30/* Auxiliary functions (private) */ 30/* Auxiliary functions (private) */
31 31
32int luaI_singlematch (int c, char *p, char **ep); 32char *luaI_classend (char *p);
33int luaI_singlematch (int c, char *p, char *ep);
33 34
34#endif 35#endif
35 36