diff options
Diffstat (limited to 'lstrlib.c')
| -rw-r--r-- | lstrlib.c | 238 |
1 files changed, 139 insertions, 99 deletions
| @@ -1,5 +1,5 @@ | |||
| 1 | /* | 1 | /* |
| 2 | ** $Id: lstrlib.c,v 1.28 1999/02/26 15:49:53 roberto Exp roberto $ | 2 | ** $Id: lstrlib.c,v 1.29 1999/04/30 14:12:05 roberto Exp roberto $ |
| 3 | ** Standard library for strings and pattern-matching | 3 | ** Standard library for strings and pattern-matching |
| 4 | ** See Copyright Notice in lua.h | 4 | ** See Copyright Notice in lua.h |
| 5 | */ | 5 | */ |
| @@ -130,7 +130,7 @@ struct Capture { | |||
| 130 | 130 | ||
| 131 | 131 | ||
| 132 | #define ESC '%' | 132 | #define ESC '%' |
| 133 | #define SPECIALS "^$*?.([%-" | 133 | #define SPECIALS "^$*+?.([%-" |
| 134 | 134 | ||
| 135 | 135 | ||
| 136 | static void push_captures (struct Capture *cap) { | 136 | static void push_captures (struct Capture *cap) { |
| @@ -160,8 +160,21 @@ static int capture_to_close (struct Capture *cap) { | |||
| 160 | } | 160 | } |
| 161 | 161 | ||
| 162 | 162 | ||
| 163 | static char *bracket_end (char *p) { | 163 | char *luaI_classend (char *p) { |
| 164 | return (*p == 0) ? NULL : strchr((*p=='^') ? p+2 : p+1, ']'); | 164 | switch (*p++) { |
| 165 | case ESC: | ||
| 166 | if (*p == '\0') | ||
| 167 | luaL_verror("incorrect pattern (ends with `%c')", ESC); | ||
| 168 | return p+1; | ||
| 169 | case '[': | ||
| 170 | if (*p == '^') p++; | ||
| 171 | if (*p == ']') p++; | ||
| 172 | p = strchr(p, ']'); | ||
| 173 | if (!p) lua_error("incorrect pattern (missing `]')"); | ||
| 174 | return p+1; | ||
| 175 | default: | ||
| 176 | return p; | ||
| 177 | } | ||
| 165 | } | 178 | } |
| 166 | 179 | ||
| 167 | 180 | ||
| @@ -184,48 +197,55 @@ static int matchclass (int c, int cl) { | |||
| 184 | } | 197 | } |
| 185 | 198 | ||
| 186 | 199 | ||
| 187 | int luaI_singlematch (int c, char *p, char **ep) { | 200 | |
| 201 | static int matchbracketclass (int c, char *p, char *end) { | ||
| 202 | int sig = 1; | ||
| 203 | if (*(p+1) == '^') { | ||
| 204 | sig = 0; | ||
| 205 | p++; /* skip the '^' */ | ||
| 206 | } | ||
| 207 | while (++p < end) { | ||
| 208 | if (*p == ESC) { | ||
| 209 | p++; | ||
| 210 | if ((p < end) && matchclass(c, (unsigned char)*p)) | ||
| 211 | return sig; | ||
| 212 | } | ||
| 213 | else if ((*(p+1) == '-') && (p+2 < end)) { | ||
| 214 | p+=2; | ||
| 215 | if ((int)(unsigned char)*(p-2) <= c && c <= (int)(unsigned char)*p) | ||
| 216 | return sig; | ||
| 217 | } | ||
| 218 | else if ((unsigned char)*p == c) return sig; | ||
| 219 | } | ||
| 220 | return !sig; | ||
| 221 | } | ||
| 222 | |||
| 223 | |||
| 224 | |||
| 225 | int luaI_singlematch (int c, char *p, char *ep) { | ||
| 188 | switch (*p) { | 226 | switch (*p) { |
| 189 | case '.': /* matches any char */ | 227 | case '.': /* matches any char */ |
| 190 | *ep = p+1; | ||
| 191 | return 1; | 228 | return 1; |
| 192 | case '\0': /* end of pattern; matches nothing */ | ||
| 193 | *ep = p; | ||
| 194 | return 0; | ||
| 195 | case ESC: | 229 | case ESC: |
| 196 | if (*(++p) == '\0') | 230 | return matchclass(c, (unsigned char)*(p+1)); |
| 197 | luaL_verror("incorrect pattern (ends with `%c')", ESC); | 231 | case '[': |
| 198 | *ep = p+1; | 232 | return matchbracketclass(c, p, ep-1); |
| 199 | return matchclass(c, (unsigned char)*p); | ||
| 200 | case '[': { | ||
| 201 | char *end = bracket_end(p+1); | ||
| 202 | int sig = *(p+1) == '^' ? (p++, 0) : 1; | ||
| 203 | if (end == NULL) lua_error("incorrect pattern (missing `]')"); | ||
| 204 | *ep = end+1; | ||
| 205 | while (++p < end) { | ||
| 206 | if (*p == ESC) { | ||
| 207 | if (((p+1) < end) && matchclass(c, (unsigned char)*++p)) | ||
| 208 | return sig; | ||
| 209 | } | ||
| 210 | else if ((*(p+1) == '-') && (p+2 < end)) { | ||
| 211 | p+=2; | ||
| 212 | if ((int)(unsigned char)*(p-2) <= c && c <= (int)(unsigned char)*p) | ||
| 213 | return sig; | ||
| 214 | } | ||
| 215 | else if ((unsigned char)*p == c) return sig; | ||
| 216 | } | ||
| 217 | return !sig; | ||
| 218 | } | ||
| 219 | default: | 233 | default: |
| 220 | *ep = p+1; | ||
| 221 | return ((unsigned char)*p == c); | 234 | return ((unsigned char)*p == c); |
| 222 | } | 235 | } |
| 223 | } | 236 | } |
| 224 | 237 | ||
| 225 | 238 | ||
| 226 | static char *matchbalance (char *s, int b, int e, struct Capture *cap) { | 239 | static char *match (char *s, char *p, struct Capture *cap); |
| 227 | if (*s != b) return NULL; | 240 | |
| 241 | |||
| 242 | static char *matchbalance (char *s, char *p, struct Capture *cap) { | ||
| 243 | if (*p == 0 || *(p+1) == 0) | ||
| 244 | lua_error("unbalanced pattern"); | ||
| 245 | if (*s != *p) return NULL; | ||
| 228 | else { | 246 | else { |
| 247 | int b = *p; | ||
| 248 | int e = *(p+1); | ||
| 229 | int cont = 1; | 249 | int cont = 1; |
| 230 | while (++s < cap->src_end) { | 250 | while (++s < cap->src_end) { |
| 231 | if (*s == e) { | 251 | if (*s == e) { |
| @@ -238,89 +258,109 @@ static char *matchbalance (char *s, int b, int e, struct Capture *cap) { | |||
| 238 | } | 258 | } |
| 239 | 259 | ||
| 240 | 260 | ||
| 241 | static char *matchitem (char *s, char *p, struct Capture *cap, char **ep) { | 261 | static char *max_expand (char *s, char *p, char *ep, struct Capture *cap) { |
| 242 | if (*p == ESC) { | 262 | int i = 0; /* counts maximum expand for item */ |
| 243 | p++; | 263 | while ((s+i)<cap->src_end && luaI_singlematch((unsigned char)*(s+i), p, ep)) |
| 244 | if (isdigit((unsigned char)*p)) { /* capture */ | 264 | i++; |
| 245 | int l = check_cap(*p, cap); | 265 | /* keeps trying to match mith the maximum repetitions */ |
| 246 | int len = cap->capture[l].len; | 266 | while (i>=0) { |
| 247 | *ep = p+1; | 267 | char *res = match((s+i), ep+1, cap); |
| 248 | if (cap->src_end-s >= len && memcmp(cap->capture[l].init, s, len) == 0) | 268 | if (res) return res; |
| 249 | return s+len; | 269 | i--; /* else didn't match; reduce 1 repetition to try again */ |
| 250 | else return NULL; | ||
| 251 | } | ||
| 252 | else if (*p == 'b') { /* balanced string */ | ||
| 253 | p++; | ||
| 254 | if (*p == 0 || *(p+1) == 0) | ||
| 255 | lua_error("unbalanced pattern"); | ||
| 256 | *ep = p+2; | ||
| 257 | return matchbalance(s, *p, *(p+1), cap); | ||
| 258 | } | ||
| 259 | else p--; /* and go through */ | ||
| 260 | } | 270 | } |
| 261 | /* "luaI_singlematch" sets "ep" (so must be called even at the end of "s" */ | 271 | return NULL; |
| 262 | return (luaI_singlematch((unsigned char)*s, p, ep) && s<cap->src_end) ? | 272 | } |
| 263 | s+1 : NULL; | 273 | |
| 274 | |||
| 275 | static char *min_expand (char *s, char *p, char *ep, struct Capture *cap) { | ||
| 276 | for (;;) { | ||
| 277 | char *res = match(s, ep+1, cap); | ||
| 278 | if (res != NULL) | ||
| 279 | return res; | ||
| 280 | else if (s<cap->src_end && luaI_singlematch((unsigned char)*s, p, ep)) | ||
| 281 | s++; /* try with one more repetition */ | ||
| 282 | else return NULL; | ||
| 283 | } | ||
| 284 | } | ||
| 285 | |||
| 286 | |||
| 287 | static char *start_capt (char *s, char *p, struct Capture *cap) { | ||
| 288 | char *res; | ||
| 289 | int level = cap->level; | ||
| 290 | if (level >= MAX_CAPT) lua_error("too many captures"); | ||
| 291 | cap->capture[level].init = s; | ||
| 292 | cap->capture[level].len = -1; | ||
| 293 | cap->level = level+1; | ||
| 294 | if ((res=match(s, p+1, cap)) == NULL) /* match failed? */ | ||
| 295 | cap->level--; /* undo capture */ | ||
| 296 | return res; | ||
| 297 | } | ||
| 298 | |||
| 299 | |||
| 300 | static char *end_capt (char *s, char *p, struct Capture *cap) { | ||
| 301 | int l = capture_to_close(cap); | ||
| 302 | char *res; | ||
| 303 | cap->capture[l].len = s - cap->capture[l].init; /* close capture */ | ||
| 304 | if ((res = match(s, p+1, cap)) == NULL) /* match failed? */ | ||
| 305 | cap->capture[l].len = -1; /* undo capture */ | ||
| 306 | return res; | ||
| 307 | } | ||
| 308 | |||
| 309 | |||
| 310 | static char *match_capture (char *s, int level, struct Capture *cap) { | ||
| 311 | int l = check_cap(level, cap); | ||
| 312 | int len = cap->capture[l].len; | ||
| 313 | if (cap->src_end-s >= len && | ||
| 314 | memcmp(cap->capture[l].init, s, len) == 0) | ||
| 315 | return s+len; | ||
| 316 | else return NULL; | ||
| 264 | } | 317 | } |
| 265 | 318 | ||
| 266 | 319 | ||
| 267 | static char *match (char *s, char *p, struct Capture *cap) { | 320 | static char *match (char *s, char *p, struct Capture *cap) { |
| 268 | init: /* using goto's to optimize tail recursion */ | 321 | init: /* using goto's to optimize tail recursion */ |
| 269 | switch (*p) { | 322 | switch (*p) { |
| 270 | case '(': { /* start capture */ | 323 | case '(': /* start capture */ |
| 271 | char *res; | 324 | return start_capt(s, p, cap); |
| 272 | if (cap->level >= MAX_CAPT) lua_error("too many captures"); | 325 | case ')': /* end capture */ |
| 273 | cap->capture[cap->level].init = s; | 326 | return end_capt(s, p, cap); |
| 274 | cap->capture[cap->level].len = -1; | 327 | case ESC: /* may be %[0-9] or %b */ |
| 275 | cap->level++; | 328 | if (isdigit((unsigned char)(*(p+1)))) { /* capture? */ |
| 276 | if ((res=match(s, p+1, cap)) == NULL) /* match failed? */ | 329 | s = match_capture(s, *(p+1), cap); |
| 277 | cap->level--; /* undo capture */ | 330 | if (s == NULL) return NULL; |
| 278 | return res; | 331 | p+=2; goto init; /* else return match(p+2, s, cap) */ |
| 279 | } | 332 | } |
| 280 | case ')': { /* end capture */ | 333 | else if (*(p+1) == 'b') { /* balanced string? */ |
| 281 | int l = capture_to_close(cap); | 334 | s = matchbalance(s, p+2, cap); |
| 282 | char *res; | 335 | if (s == NULL) return NULL; |
| 283 | cap->capture[l].len = s - cap->capture[l].init; /* close capture */ | 336 | p+=4; goto init; /* else return match(p+4, s, cap); */ |
| 284 | if ((res = match(s, p+1, cap)) == NULL) /* match failed? */ | 337 | } |
| 285 | cap->capture[l].len = -1; /* undo capture */ | 338 | else goto dflt; /* case default */ |
| 286 | return res; | ||
| 287 | } | ||
| 288 | case '\0': /* end of pattern */ | 339 | case '\0': /* end of pattern */ |
| 289 | return s; /* match succeeded */ | 340 | return s; /* match succeeded */ |
| 290 | case '$': | 341 | case '$': |
| 291 | if (*(p+1) == '\0') /* is the '$' the last char in pattern? */ | 342 | if (*(p+1) == '\0') /* is the '$' the last char in pattern? */ |
| 292 | return (s == cap->src_end) ? s : NULL; /* check end of string */ | 343 | return (s == cap->src_end) ? s : NULL; /* check end of string */ |
| 293 | /* else is a regular '$'; go through */ | 344 | else goto dflt; |
| 294 | default: { /* it is a pattern item */ | 345 | default: dflt: { /* it is a pattern item */ |
| 295 | char *ep; /* will point to what is next */ | 346 | char *ep = luaI_classend(p); /* points to what is next */ |
| 296 | char *s1 = matchitem(s, p, cap, &ep); | 347 | int m = s<cap->src_end && luaI_singlematch((unsigned char)*s, p, ep); |
| 297 | switch (*ep) { | 348 | switch (*ep) { |
| 298 | case '*': { /* repetition */ | ||
| 299 | char *res; | ||
| 300 | if (s1 && s1>s && ((res=match(s1, p, cap)) != NULL)) | ||
| 301 | return res; | ||
| 302 | p=ep+1; goto init; /* else return match(s, ep+1, cap); */ | ||
| 303 | } | ||
| 304 | case '?': { /* optional */ | 349 | case '?': { /* optional */ |
| 305 | char *res; | 350 | char *res; |
| 306 | if (s1 && ((res=match(s1, ep+1, cap)) != NULL)) | 351 | if (m && ((res=match(s+1, ep+1, cap)) != NULL)) |
| 307 | return res; | 352 | return res; |
| 308 | p=ep+1; goto init; /* else return match(s, ep+1, cap); */ | 353 | p=ep+1; goto init; /* else return match(s, ep+1, cap); */ |
| 309 | } | 354 | } |
| 310 | case '-': { /* repetition */ | 355 | case '*': /* 0 or more repetitions */ |
| 311 | char *res; | 356 | return max_expand(s, p, ep, cap); |
| 312 | if ((res = match(s, ep+1, cap)) != NULL) | 357 | case '+': /* 1 or more repetitions */ |
| 313 | return res; | 358 | return (m ? max_expand(s+1, p, ep, cap) : NULL); |
| 314 | else if (s1 && s1>s) { | 359 | case '-': /* 0 or more repetitions (minimum) */ |
| 315 | s = s1; | 360 | return min_expand(s, p, ep, cap); |
| 316 | goto init; /* return match(s1, p, cap); */ | ||
| 317 | } | ||
| 318 | else | ||
| 319 | return NULL; | ||
| 320 | } | ||
| 321 | default: | 361 | default: |
| 322 | if (s1) { s=s1; p=ep; goto init; } /* return match(s1, ep, cap); */ | 362 | if (!m) return NULL; |
| 323 | else return NULL; | 363 | s++; p=ep; goto init; /* else return match(s+1, ep, cap); */ |
| 324 | } | 364 | } |
| 325 | } | 365 | } |
| 326 | } | 366 | } |
