diff options
author | Denys Vlasenko <vda.linux@googlemail.com> | 2010-03-13 16:19:04 +0100 |
---|---|---|
committer | Denys Vlasenko <vda.linux@googlemail.com> | 2010-03-13 16:19:04 +0100 |
commit | b76356b28e9058fac1c9a50b274db8ce24273ca9 (patch) | |
tree | ca594b49433d813415cc5edff30b001ef83ae9e4 | |
parent | 6eaeb7737d95661ca31b162977ac443ffeb7b0b3 (diff) | |
download | busybox-w32-b76356b28e9058fac1c9a50b274db8ce24273ca9.tar.gz busybox-w32-b76356b28e9058fac1c9a50b274db8ce24273ca9.tar.bz2 busybox-w32-b76356b28e9058fac1c9a50b274db8ce24273ca9.zip |
ash: fix quadratic matching slowdown is ${v/*foo*/repl} (really bad one)
It is especially bad with patterns starting with "*".
With ASH_OPTIMIZE_FOR_SIZE=y, only those are optimized, +few bytes:
text data bss dec hex filename
836337 441 7564 844342 ce236 busybox_old
836341 441 7564 844346 ce23a busybox_unstripped
With ASH_OPTIMIZE_FOR_SIZE off, we also optimize patterns _ending_ with "*",
which costs about 80 bytes:
text data bss dec hex filename
836656 441 7564 844661 ce375 busybox_old
836732 441 7564 844737 ce3c1 busybox_unstripped
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
-rw-r--r-- | shell/ash.c | 71 |
1 files changed, 59 insertions, 12 deletions
diff --git a/shell/ash.c b/shell/ash.c index 0a8b6c0f2..ce82a965c 100644 --- a/shell/ash.c +++ b/shell/ash.c | |||
@@ -6040,25 +6040,61 @@ scanleft(char *startp, char *rmesc, char *rmescend UNUSED_PARAM, char *str, int | |||
6040 | } | 6040 | } |
6041 | 6041 | ||
6042 | static char * | 6042 | static char * |
6043 | scanright(char *startp, char *rmesc, char *rmescend, char *str, int quotes, | 6043 | scanright(char *startp, char *rmesc, char *rmescend, char *pattern, int quotes, int match_at_start) |
6044 | int zero) | ||
6045 | { | 6044 | { |
6045 | #if !ENABLE_ASH_OPTIMIZE_FOR_SIZE | ||
6046 | int try2optimize = match_at_start; | ||
6047 | #endif | ||
6046 | int esc = 0; | 6048 | int esc = 0; |
6047 | char *loc; | 6049 | char *loc; |
6048 | char *loc2; | 6050 | char *loc2; |
6049 | 6051 | ||
6050 | for (loc = str - 1, loc2 = rmescend; loc >= startp; loc2--) { | 6052 | /* If we called by "${v/pattern/repl}" or "${v//pattern/repl}": |
6053 | * startp="escaped_value_of_v" rmesc="raw_value_of_v" | ||
6054 | * rmescend=""(ptr to NUL in rmesc) pattern="pattern" quotes=match_at_start=1 | ||
6055 | * Logic: | ||
6056 | * loc starts at NUL at the end of startp, loc2 starts at the end of rmesc, | ||
6057 | * and on each iteration they go back two/one char until they reach the beginning. | ||
6058 | * We try to find a match in "raw_value_of_v", "raw_value_of_", "raw_value_of" etc. | ||
6059 | */ | ||
6060 | /* TODO: document in what other circumstances we are called. */ | ||
6061 | |||
6062 | for (loc = pattern - 1, loc2 = rmescend; loc >= startp; loc2--) { | ||
6051 | int match; | 6063 | int match; |
6052 | char c = *loc2; | 6064 | char c = *loc2; |
6053 | const char *s = loc2; | 6065 | const char *s = loc2; |
6054 | if (zero) { | 6066 | if (match_at_start) { |
6055 | *loc2 = '\0'; | 6067 | *loc2 = '\0'; |
6056 | s = rmesc; | 6068 | s = rmesc; |
6057 | } | 6069 | } |
6058 | match = pmatch(str, s); | 6070 | match = pmatch(pattern, s); |
6071 | //bb_error_msg("pmatch(pattern:'%s',s:'%s'):%d", pattern, s, match); | ||
6059 | *loc2 = c; | 6072 | *loc2 = c; |
6060 | if (match) | 6073 | if (match) |
6061 | return loc; | 6074 | return loc; |
6075 | #if !ENABLE_ASH_OPTIMIZE_FOR_SIZE | ||
6076 | if (try2optimize) { | ||
6077 | /* Maybe we can optimize this: | ||
6078 | * if pattern ends with unescaped *, we can avoid checking | ||
6079 | * shorter strings: if "foo*" doesnt match "raw_value_of_v", | ||
6080 | * it wont match truncated "raw_value_of_" strings too. | ||
6081 | */ | ||
6082 | unsigned plen = strlen(pattern); | ||
6083 | /* Does it end with "*"? */ | ||
6084 | if (plen != 0 && pattern[--plen] == '*') { | ||
6085 | /* "xxxx*" is not escaped */ | ||
6086 | /* "xxx\*" is escaped */ | ||
6087 | /* "xx\\*" is not escaped */ | ||
6088 | /* "x\\\*" is escaped */ | ||
6089 | int slashes = 0; | ||
6090 | while (plen != 0 && pattern[--plen] == '\\') | ||
6091 | slashes++; | ||
6092 | if (!(slashes & 1)) | ||
6093 | break; /* ends with unescaped "*" */ | ||
6094 | } | ||
6095 | try2optimize = 0; | ||
6096 | } | ||
6097 | #endif | ||
6062 | loc--; | 6098 | loc--; |
6063 | if (quotes) { | 6099 | if (quotes) { |
6064 | if (--esc < 0) { | 6100 | if (--esc < 0) { |
@@ -6248,7 +6284,7 @@ subevalvar(char *p, char *str, int strloc, int subtype, | |||
6248 | 6284 | ||
6249 | #if ENABLE_ASH_BASH_COMPAT | 6285 | #if ENABLE_ASH_BASH_COMPAT |
6250 | if (subtype == VSREPLACE || subtype == VSREPLACEALL) { | 6286 | if (subtype == VSREPLACE || subtype == VSREPLACEALL) { |
6251 | char *idx, *end, *restart_detect; | 6287 | char *idx, *end; |
6252 | 6288 | ||
6253 | if (!repl) { | 6289 | if (!repl) { |
6254 | repl = parse_sub_pattern(str, varflags & VSQUOTE); | 6290 | repl = parse_sub_pattern(str, varflags & VSQUOTE); |
@@ -6257,17 +6293,19 @@ subevalvar(char *p, char *str, int strloc, int subtype, | |||
6257 | } | 6293 | } |
6258 | 6294 | ||
6259 | /* If there's no pattern to match, return the expansion unmolested */ | 6295 | /* If there's no pattern to match, return the expansion unmolested */ |
6260 | if (*str == '\0') | 6296 | if (str[0] == '\0') |
6261 | return 0; | 6297 | return 0; |
6262 | 6298 | ||
6263 | len = 0; | 6299 | len = 0; |
6264 | idx = startp; | 6300 | idx = startp; |
6265 | end = str - 1; | 6301 | end = str - 1; |
6266 | while (idx < end) { | 6302 | while (idx < end) { |
6303 | try_to_match: | ||
6267 | loc = scanright(idx, rmesc, rmescend, str, quotes, 1); | 6304 | loc = scanright(idx, rmesc, rmescend, str, quotes, 1); |
6268 | if (!loc) { | 6305 | if (!loc) { |
6269 | /* No match, advance */ | 6306 | /* No match, advance */ |
6270 | restart_detect = stackblock(); | 6307 | char *restart_detect = stackblock(); |
6308 | skip_matching: | ||
6271 | STPUTC(*idx, expdest); | 6309 | STPUTC(*idx, expdest); |
6272 | if (quotes && (unsigned char)*idx == CTLESC) { | 6310 | if (quotes && (unsigned char)*idx == CTLESC) { |
6273 | idx++; | 6311 | idx++; |
@@ -6279,7 +6317,16 @@ subevalvar(char *p, char *str, int strloc, int subtype, | |||
6279 | idx++; | 6317 | idx++; |
6280 | len++; | 6318 | len++; |
6281 | rmesc++; | 6319 | rmesc++; |
6282 | continue; | 6320 | /* continue; - prone to quadratic behavior, smarter code: */ |
6321 | if (idx >= end) | ||
6322 | break; | ||
6323 | if (str[0] == '*') { | ||
6324 | /* Pattern is "*foo". If "*foo" does not match "long_string", | ||
6325 | * it would never match "ong_string" etc, no point in trying. | ||
6326 | */ | ||
6327 | goto skip_matching; | ||
6328 | } | ||
6329 | goto try_to_match; | ||
6283 | } | 6330 | } |
6284 | 6331 | ||
6285 | if (subtype == VSREPLACEALL) { | 6332 | if (subtype == VSREPLACEALL) { |
@@ -6294,7 +6341,7 @@ subevalvar(char *p, char *str, int strloc, int subtype, | |||
6294 | } | 6341 | } |
6295 | 6342 | ||
6296 | for (loc = repl; *loc; loc++) { | 6343 | for (loc = repl; *loc; loc++) { |
6297 | restart_detect = stackblock(); | 6344 | char *restart_detect = stackblock(); |
6298 | STPUTC(*loc, expdest); | 6345 | STPUTC(*loc, expdest); |
6299 | if (stackblock() != restart_detect) | 6346 | if (stackblock() != restart_detect) |
6300 | goto restart; | 6347 | goto restart; |
@@ -6303,7 +6350,7 @@ subevalvar(char *p, char *str, int strloc, int subtype, | |||
6303 | 6350 | ||
6304 | if (subtype == VSREPLACE) { | 6351 | if (subtype == VSREPLACE) { |
6305 | while (*idx) { | 6352 | while (*idx) { |
6306 | restart_detect = stackblock(); | 6353 | char *restart_detect = stackblock(); |
6307 | STPUTC(*idx, expdest); | 6354 | STPUTC(*idx, expdest); |
6308 | if (stackblock() != restart_detect) | 6355 | if (stackblock() != restart_detect) |
6309 | goto restart; | 6356 | goto restart; |
@@ -6332,7 +6379,7 @@ subevalvar(char *p, char *str, int strloc, int subtype, | |||
6332 | if (subtype < 0 || subtype > 7) | 6379 | if (subtype < 0 || subtype > 7) |
6333 | abort(); | 6380 | abort(); |
6334 | #endif | 6381 | #endif |
6335 | /* zero = subtype == VSTRIMLEFT || subtype == VSTRIMLEFTMAX */ | 6382 | /* zero = (subtype == VSTRIMLEFT || subtype == VSTRIMLEFTMAX) */ |
6336 | zero = subtype >> 1; | 6383 | zero = subtype >> 1; |
6337 | /* VSTRIMLEFT/VSTRIMRIGHTMAX -> scanleft */ | 6384 | /* VSTRIMLEFT/VSTRIMRIGHTMAX -> scanleft */ |
6338 | scan = (subtype & 1) ^ zero ? scanleft : scanright; | 6385 | scan = (subtype & 1) ^ zero ? scanleft : scanright; |