aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDenys Vlasenko <vda.linux@googlemail.com>2010-03-13 16:19:04 +0100
committerDenys Vlasenko <vda.linux@googlemail.com>2010-03-13 16:19:04 +0100
commitb76356b28e9058fac1c9a50b274db8ce24273ca9 (patch)
treeca594b49433d813415cc5edff30b001ef83ae9e4
parent6eaeb7737d95661ca31b162977ac443ffeb7b0b3 (diff)
downloadbusybox-w32-b76356b28e9058fac1c9a50b274db8ce24273ca9.tar.gz
busybox-w32-b76356b28e9058fac1c9a50b274db8ce24273ca9.tar.bz2
busybox-w32-b76356b28e9058fac1c9a50b274db8ce24273ca9.zip
ash: fix quadratic matching slowdown is ${v/*foo*/repl} (really bad one)
It is especially bad with patterns starting with "*". With ASH_OPTIMIZE_FOR_SIZE=y, only those are optimized, +few bytes: text data bss dec hex filename 836337 441 7564 844342 ce236 busybox_old 836341 441 7564 844346 ce23a busybox_unstripped With ASH_OPTIMIZE_FOR_SIZE off, we also optimize patterns _ending_ with "*", which costs about 80 bytes: text data bss dec hex filename 836656 441 7564 844661 ce375 busybox_old 836732 441 7564 844737 ce3c1 busybox_unstripped Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
-rw-r--r--shell/ash.c71
1 files changed, 59 insertions, 12 deletions
diff --git a/shell/ash.c b/shell/ash.c
index 0a8b6c0f2..ce82a965c 100644
--- a/shell/ash.c
+++ b/shell/ash.c
@@ -6040,25 +6040,61 @@ scanleft(char *startp, char *rmesc, char *rmescend UNUSED_PARAM, char *str, int
6040} 6040}
6041 6041
6042static char * 6042static char *
6043scanright(char *startp, char *rmesc, char *rmescend, char *str, int quotes, 6043scanright(char *startp, char *rmesc, char *rmescend, char *pattern, int quotes, int match_at_start)
6044 int zero)
6045{ 6044{
6045#if !ENABLE_ASH_OPTIMIZE_FOR_SIZE
6046 int try2optimize = match_at_start;
6047#endif
6046 int esc = 0; 6048 int esc = 0;
6047 char *loc; 6049 char *loc;
6048 char *loc2; 6050 char *loc2;
6049 6051
6050 for (loc = str - 1, loc2 = rmescend; loc >= startp; loc2--) { 6052 /* If we called by "${v/pattern/repl}" or "${v//pattern/repl}":
6053 * startp="escaped_value_of_v" rmesc="raw_value_of_v"
6054 * rmescend=""(ptr to NUL in rmesc) pattern="pattern" quotes=match_at_start=1
6055 * Logic:
6056 * loc starts at NUL at the end of startp, loc2 starts at the end of rmesc,
6057 * and on each iteration they go back two/one char until they reach the beginning.
6058 * We try to find a match in "raw_value_of_v", "raw_value_of_", "raw_value_of" etc.
6059 */
6060 /* TODO: document in what other circumstances we are called. */
6061
6062 for (loc = pattern - 1, loc2 = rmescend; loc >= startp; loc2--) {
6051 int match; 6063 int match;
6052 char c = *loc2; 6064 char c = *loc2;
6053 const char *s = loc2; 6065 const char *s = loc2;
6054 if (zero) { 6066 if (match_at_start) {
6055 *loc2 = '\0'; 6067 *loc2 = '\0';
6056 s = rmesc; 6068 s = rmesc;
6057 } 6069 }
6058 match = pmatch(str, s); 6070 match = pmatch(pattern, s);
6071 //bb_error_msg("pmatch(pattern:'%s',s:'%s'):%d", pattern, s, match);
6059 *loc2 = c; 6072 *loc2 = c;
6060 if (match) 6073 if (match)
6061 return loc; 6074 return loc;
6075#if !ENABLE_ASH_OPTIMIZE_FOR_SIZE
6076 if (try2optimize) {
6077 /* Maybe we can optimize this:
6078 * if pattern ends with unescaped *, we can avoid checking
6079 * shorter strings: if "foo*" doesnt match "raw_value_of_v",
6080 * it wont match truncated "raw_value_of_" strings too.
6081 */
6082 unsigned plen = strlen(pattern);
6083 /* Does it end with "*"? */
6084 if (plen != 0 && pattern[--plen] == '*') {
6085 /* "xxxx*" is not escaped */
6086 /* "xxx\*" is escaped */
6087 /* "xx\\*" is not escaped */
6088 /* "x\\\*" is escaped */
6089 int slashes = 0;
6090 while (plen != 0 && pattern[--plen] == '\\')
6091 slashes++;
6092 if (!(slashes & 1))
6093 break; /* ends with unescaped "*" */
6094 }
6095 try2optimize = 0;
6096 }
6097#endif
6062 loc--; 6098 loc--;
6063 if (quotes) { 6099 if (quotes) {
6064 if (--esc < 0) { 6100 if (--esc < 0) {
@@ -6248,7 +6284,7 @@ subevalvar(char *p, char *str, int strloc, int subtype,
6248 6284
6249#if ENABLE_ASH_BASH_COMPAT 6285#if ENABLE_ASH_BASH_COMPAT
6250 if (subtype == VSREPLACE || subtype == VSREPLACEALL) { 6286 if (subtype == VSREPLACE || subtype == VSREPLACEALL) {
6251 char *idx, *end, *restart_detect; 6287 char *idx, *end;
6252 6288
6253 if (!repl) { 6289 if (!repl) {
6254 repl = parse_sub_pattern(str, varflags & VSQUOTE); 6290 repl = parse_sub_pattern(str, varflags & VSQUOTE);
@@ -6257,17 +6293,19 @@ subevalvar(char *p, char *str, int strloc, int subtype,
6257 } 6293 }
6258 6294
6259 /* If there's no pattern to match, return the expansion unmolested */ 6295 /* If there's no pattern to match, return the expansion unmolested */
6260 if (*str == '\0') 6296 if (str[0] == '\0')
6261 return 0; 6297 return 0;
6262 6298
6263 len = 0; 6299 len = 0;
6264 idx = startp; 6300 idx = startp;
6265 end = str - 1; 6301 end = str - 1;
6266 while (idx < end) { 6302 while (idx < end) {
6303 try_to_match:
6267 loc = scanright(idx, rmesc, rmescend, str, quotes, 1); 6304 loc = scanright(idx, rmesc, rmescend, str, quotes, 1);
6268 if (!loc) { 6305 if (!loc) {
6269 /* No match, advance */ 6306 /* No match, advance */
6270 restart_detect = stackblock(); 6307 char *restart_detect = stackblock();
6308 skip_matching:
6271 STPUTC(*idx, expdest); 6309 STPUTC(*idx, expdest);
6272 if (quotes && (unsigned char)*idx == CTLESC) { 6310 if (quotes && (unsigned char)*idx == CTLESC) {
6273 idx++; 6311 idx++;
@@ -6279,7 +6317,16 @@ subevalvar(char *p, char *str, int strloc, int subtype,
6279 idx++; 6317 idx++;
6280 len++; 6318 len++;
6281 rmesc++; 6319 rmesc++;
6282 continue; 6320 /* continue; - prone to quadratic behavior, smarter code: */
6321 if (idx >= end)
6322 break;
6323 if (str[0] == '*') {
6324 /* Pattern is "*foo". If "*foo" does not match "long_string",
6325 * it would never match "ong_string" etc, no point in trying.
6326 */
6327 goto skip_matching;
6328 }
6329 goto try_to_match;
6283 } 6330 }
6284 6331
6285 if (subtype == VSREPLACEALL) { 6332 if (subtype == VSREPLACEALL) {
@@ -6294,7 +6341,7 @@ subevalvar(char *p, char *str, int strloc, int subtype,
6294 } 6341 }
6295 6342
6296 for (loc = repl; *loc; loc++) { 6343 for (loc = repl; *loc; loc++) {
6297 restart_detect = stackblock(); 6344 char *restart_detect = stackblock();
6298 STPUTC(*loc, expdest); 6345 STPUTC(*loc, expdest);
6299 if (stackblock() != restart_detect) 6346 if (stackblock() != restart_detect)
6300 goto restart; 6347 goto restart;
@@ -6303,7 +6350,7 @@ subevalvar(char *p, char *str, int strloc, int subtype,
6303 6350
6304 if (subtype == VSREPLACE) { 6351 if (subtype == VSREPLACE) {
6305 while (*idx) { 6352 while (*idx) {
6306 restart_detect = stackblock(); 6353 char *restart_detect = stackblock();
6307 STPUTC(*idx, expdest); 6354 STPUTC(*idx, expdest);
6308 if (stackblock() != restart_detect) 6355 if (stackblock() != restart_detect)
6309 goto restart; 6356 goto restart;
@@ -6332,7 +6379,7 @@ subevalvar(char *p, char *str, int strloc, int subtype,
6332 if (subtype < 0 || subtype > 7) 6379 if (subtype < 0 || subtype > 7)
6333 abort(); 6380 abort();
6334#endif 6381#endif
6335 /* zero = subtype == VSTRIMLEFT || subtype == VSTRIMLEFTMAX */ 6382 /* zero = (subtype == VSTRIMLEFT || subtype == VSTRIMLEFTMAX) */
6336 zero = subtype >> 1; 6383 zero = subtype >> 1;
6337 /* VSTRIMLEFT/VSTRIMRIGHTMAX -> scanleft */ 6384 /* VSTRIMLEFT/VSTRIMRIGHTMAX -> scanleft */
6338 scan = (subtype & 1) ^ zero ? scanleft : scanright; 6385 scan = (subtype & 1) ^ zero ? scanleft : scanright;