diff options
| author | Denys Vlasenko <vda.linux@googlemail.com> | 2010-03-13 16:19:04 +0100 |
|---|---|---|
| committer | Denys Vlasenko <vda.linux@googlemail.com> | 2010-03-13 16:19:04 +0100 |
| commit | b76356b28e9058fac1c9a50b274db8ce24273ca9 (patch) | |
| tree | ca594b49433d813415cc5edff30b001ef83ae9e4 /shell | |
| parent | 6eaeb7737d95661ca31b162977ac443ffeb7b0b3 (diff) | |
| download | busybox-w32-b76356b28e9058fac1c9a50b274db8ce24273ca9.tar.gz busybox-w32-b76356b28e9058fac1c9a50b274db8ce24273ca9.tar.bz2 busybox-w32-b76356b28e9058fac1c9a50b274db8ce24273ca9.zip | |
ash: fix quadratic matching slowdown is ${v/*foo*/repl} (really bad one)
It is especially bad with patterns starting with "*".
With ASH_OPTIMIZE_FOR_SIZE=y, only those are optimized, +few bytes:
text data bss dec hex filename
836337 441 7564 844342 ce236 busybox_old
836341 441 7564 844346 ce23a busybox_unstripped
With ASH_OPTIMIZE_FOR_SIZE off, we also optimize patterns _ending_ with "*",
which costs about 80 bytes:
text data bss dec hex filename
836656 441 7564 844661 ce375 busybox_old
836732 441 7564 844737 ce3c1 busybox_unstripped
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
Diffstat (limited to 'shell')
| -rw-r--r-- | shell/ash.c | 71 |
1 files changed, 59 insertions, 12 deletions
diff --git a/shell/ash.c b/shell/ash.c index 0a8b6c0f2..ce82a965c 100644 --- a/shell/ash.c +++ b/shell/ash.c | |||
| @@ -6040,25 +6040,61 @@ scanleft(char *startp, char *rmesc, char *rmescend UNUSED_PARAM, char *str, int | |||
| 6040 | } | 6040 | } |
| 6041 | 6041 | ||
| 6042 | static char * | 6042 | static char * |
| 6043 | scanright(char *startp, char *rmesc, char *rmescend, char *str, int quotes, | 6043 | scanright(char *startp, char *rmesc, char *rmescend, char *pattern, int quotes, int match_at_start) |
| 6044 | int zero) | ||
| 6045 | { | 6044 | { |
| 6045 | #if !ENABLE_ASH_OPTIMIZE_FOR_SIZE | ||
| 6046 | int try2optimize = match_at_start; | ||
| 6047 | #endif | ||
| 6046 | int esc = 0; | 6048 | int esc = 0; |
| 6047 | char *loc; | 6049 | char *loc; |
| 6048 | char *loc2; | 6050 | char *loc2; |
| 6049 | 6051 | ||
| 6050 | for (loc = str - 1, loc2 = rmescend; loc >= startp; loc2--) { | 6052 | /* If we called by "${v/pattern/repl}" or "${v//pattern/repl}": |
| 6053 | * startp="escaped_value_of_v" rmesc="raw_value_of_v" | ||
| 6054 | * rmescend=""(ptr to NUL in rmesc) pattern="pattern" quotes=match_at_start=1 | ||
| 6055 | * Logic: | ||
| 6056 | * loc starts at NUL at the end of startp, loc2 starts at the end of rmesc, | ||
| 6057 | * and on each iteration they go back two/one char until they reach the beginning. | ||
| 6058 | * We try to find a match in "raw_value_of_v", "raw_value_of_", "raw_value_of" etc. | ||
| 6059 | */ | ||
| 6060 | /* TODO: document in what other circumstances we are called. */ | ||
| 6061 | |||
| 6062 | for (loc = pattern - 1, loc2 = rmescend; loc >= startp; loc2--) { | ||
| 6051 | int match; | 6063 | int match; |
| 6052 | char c = *loc2; | 6064 | char c = *loc2; |
| 6053 | const char *s = loc2; | 6065 | const char *s = loc2; |
| 6054 | if (zero) { | 6066 | if (match_at_start) { |
| 6055 | *loc2 = '\0'; | 6067 | *loc2 = '\0'; |
| 6056 | s = rmesc; | 6068 | s = rmesc; |
| 6057 | } | 6069 | } |
| 6058 | match = pmatch(str, s); | 6070 | match = pmatch(pattern, s); |
| 6071 | //bb_error_msg("pmatch(pattern:'%s',s:'%s'):%d", pattern, s, match); | ||
| 6059 | *loc2 = c; | 6072 | *loc2 = c; |
| 6060 | if (match) | 6073 | if (match) |
| 6061 | return loc; | 6074 | return loc; |
| 6075 | #if !ENABLE_ASH_OPTIMIZE_FOR_SIZE | ||
| 6076 | if (try2optimize) { | ||
| 6077 | /* Maybe we can optimize this: | ||
| 6078 | * if pattern ends with unescaped *, we can avoid checking | ||
| 6079 | * shorter strings: if "foo*" doesnt match "raw_value_of_v", | ||
| 6080 | * it wont match truncated "raw_value_of_" strings too. | ||
| 6081 | */ | ||
| 6082 | unsigned plen = strlen(pattern); | ||
| 6083 | /* Does it end with "*"? */ | ||
| 6084 | if (plen != 0 && pattern[--plen] == '*') { | ||
| 6085 | /* "xxxx*" is not escaped */ | ||
| 6086 | /* "xxx\*" is escaped */ | ||
| 6087 | /* "xx\\*" is not escaped */ | ||
| 6088 | /* "x\\\*" is escaped */ | ||
| 6089 | int slashes = 0; | ||
| 6090 | while (plen != 0 && pattern[--plen] == '\\') | ||
| 6091 | slashes++; | ||
| 6092 | if (!(slashes & 1)) | ||
| 6093 | break; /* ends with unescaped "*" */ | ||
| 6094 | } | ||
| 6095 | try2optimize = 0; | ||
| 6096 | } | ||
| 6097 | #endif | ||
| 6062 | loc--; | 6098 | loc--; |
| 6063 | if (quotes) { | 6099 | if (quotes) { |
| 6064 | if (--esc < 0) { | 6100 | if (--esc < 0) { |
| @@ -6248,7 +6284,7 @@ subevalvar(char *p, char *str, int strloc, int subtype, | |||
| 6248 | 6284 | ||
| 6249 | #if ENABLE_ASH_BASH_COMPAT | 6285 | #if ENABLE_ASH_BASH_COMPAT |
| 6250 | if (subtype == VSREPLACE || subtype == VSREPLACEALL) { | 6286 | if (subtype == VSREPLACE || subtype == VSREPLACEALL) { |
| 6251 | char *idx, *end, *restart_detect; | 6287 | char *idx, *end; |
| 6252 | 6288 | ||
| 6253 | if (!repl) { | 6289 | if (!repl) { |
| 6254 | repl = parse_sub_pattern(str, varflags & VSQUOTE); | 6290 | repl = parse_sub_pattern(str, varflags & VSQUOTE); |
| @@ -6257,17 +6293,19 @@ subevalvar(char *p, char *str, int strloc, int subtype, | |||
| 6257 | } | 6293 | } |
| 6258 | 6294 | ||
| 6259 | /* If there's no pattern to match, return the expansion unmolested */ | 6295 | /* If there's no pattern to match, return the expansion unmolested */ |
| 6260 | if (*str == '\0') | 6296 | if (str[0] == '\0') |
| 6261 | return 0; | 6297 | return 0; |
| 6262 | 6298 | ||
| 6263 | len = 0; | 6299 | len = 0; |
| 6264 | idx = startp; | 6300 | idx = startp; |
| 6265 | end = str - 1; | 6301 | end = str - 1; |
| 6266 | while (idx < end) { | 6302 | while (idx < end) { |
| 6303 | try_to_match: | ||
| 6267 | loc = scanright(idx, rmesc, rmescend, str, quotes, 1); | 6304 | loc = scanright(idx, rmesc, rmescend, str, quotes, 1); |
| 6268 | if (!loc) { | 6305 | if (!loc) { |
| 6269 | /* No match, advance */ | 6306 | /* No match, advance */ |
| 6270 | restart_detect = stackblock(); | 6307 | char *restart_detect = stackblock(); |
| 6308 | skip_matching: | ||
| 6271 | STPUTC(*idx, expdest); | 6309 | STPUTC(*idx, expdest); |
| 6272 | if (quotes && (unsigned char)*idx == CTLESC) { | 6310 | if (quotes && (unsigned char)*idx == CTLESC) { |
| 6273 | idx++; | 6311 | idx++; |
| @@ -6279,7 +6317,16 @@ subevalvar(char *p, char *str, int strloc, int subtype, | |||
| 6279 | idx++; | 6317 | idx++; |
| 6280 | len++; | 6318 | len++; |
| 6281 | rmesc++; | 6319 | rmesc++; |
| 6282 | continue; | 6320 | /* continue; - prone to quadratic behavior, smarter code: */ |
| 6321 | if (idx >= end) | ||
| 6322 | break; | ||
| 6323 | if (str[0] == '*') { | ||
| 6324 | /* Pattern is "*foo". If "*foo" does not match "long_string", | ||
| 6325 | * it would never match "ong_string" etc, no point in trying. | ||
| 6326 | */ | ||
| 6327 | goto skip_matching; | ||
| 6328 | } | ||
| 6329 | goto try_to_match; | ||
| 6283 | } | 6330 | } |
| 6284 | 6331 | ||
| 6285 | if (subtype == VSREPLACEALL) { | 6332 | if (subtype == VSREPLACEALL) { |
| @@ -6294,7 +6341,7 @@ subevalvar(char *p, char *str, int strloc, int subtype, | |||
| 6294 | } | 6341 | } |
| 6295 | 6342 | ||
| 6296 | for (loc = repl; *loc; loc++) { | 6343 | for (loc = repl; *loc; loc++) { |
| 6297 | restart_detect = stackblock(); | 6344 | char *restart_detect = stackblock(); |
| 6298 | STPUTC(*loc, expdest); | 6345 | STPUTC(*loc, expdest); |
| 6299 | if (stackblock() != restart_detect) | 6346 | if (stackblock() != restart_detect) |
| 6300 | goto restart; | 6347 | goto restart; |
| @@ -6303,7 +6350,7 @@ subevalvar(char *p, char *str, int strloc, int subtype, | |||
| 6303 | 6350 | ||
| 6304 | if (subtype == VSREPLACE) { | 6351 | if (subtype == VSREPLACE) { |
| 6305 | while (*idx) { | 6352 | while (*idx) { |
| 6306 | restart_detect = stackblock(); | 6353 | char *restart_detect = stackblock(); |
| 6307 | STPUTC(*idx, expdest); | 6354 | STPUTC(*idx, expdest); |
| 6308 | if (stackblock() != restart_detect) | 6355 | if (stackblock() != restart_detect) |
| 6309 | goto restart; | 6356 | goto restart; |
| @@ -6332,7 +6379,7 @@ subevalvar(char *p, char *str, int strloc, int subtype, | |||
| 6332 | if (subtype < 0 || subtype > 7) | 6379 | if (subtype < 0 || subtype > 7) |
| 6333 | abort(); | 6380 | abort(); |
| 6334 | #endif | 6381 | #endif |
| 6335 | /* zero = subtype == VSTRIMLEFT || subtype == VSTRIMLEFTMAX */ | 6382 | /* zero = (subtype == VSTRIMLEFT || subtype == VSTRIMLEFTMAX) */ |
| 6336 | zero = subtype >> 1; | 6383 | zero = subtype >> 1; |
| 6337 | /* VSTRIMLEFT/VSTRIMRIGHTMAX -> scanleft */ | 6384 | /* VSTRIMLEFT/VSTRIMRIGHTMAX -> scanleft */ |
| 6338 | scan = (subtype & 1) ^ zero ? scanleft : scanright; | 6385 | scan = (subtype & 1) ^ zero ? scanleft : scanright; |
