diff options
| author | Denys Vlasenko <vda.linux@googlemail.com> | 2010-04-04 01:17:30 +0200 |
|---|---|---|
| committer | Denys Vlasenko <vda.linux@googlemail.com> | 2010-04-04 01:17:30 +0200 |
| commit | fab288cf0b31ff64a562cc496b20add822a6abbd (patch) | |
| tree | 5feeeba796baf897cd5aceb90f59fc48b09c840b | |
| parent | 243ddcbc76d19847d9e8022dc2f6659078f5cc20 (diff) | |
| download | busybox-w32-fab288cf0b31ff64a562cc496b20add822a6abbd.tar.gz busybox-w32-fab288cf0b31ff64a562cc496b20add822a6abbd.tar.bz2 busybox-w32-fab288cf0b31ff64a562cc496b20add822a6abbd.zip | |
awk: don't append bogus data after NUL in sub(); shrink
also renamed variables to more sensible names
function old new delta
mk_re_node 56 49 -7
awk_sub 601 591 -10
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
| -rw-r--r-- | editors/awk.c | 118 |
1 files changed, 68 insertions, 50 deletions
diff --git a/editors/awk.c b/editors/awk.c index 30c6b88ef..3ba1a422d 100644 --- a/editors/awk.c +++ b/editors/awk.c | |||
| @@ -1134,15 +1134,13 @@ static node *new_node(uint32_t info) | |||
| 1134 | return n; | 1134 | return n; |
| 1135 | } | 1135 | } |
| 1136 | 1136 | ||
| 1137 | static node *mk_re_node(const char *s, node *n, regex_t *re) | 1137 | static void mk_re_node(const char *s, node *n, regex_t *re) |
| 1138 | { | 1138 | { |
| 1139 | n->info = OC_REGEXP; | 1139 | n->info = OC_REGEXP; |
| 1140 | n->l.re = re; | 1140 | n->l.re = re; |
| 1141 | n->r.ire = re + 1; | 1141 | n->r.ire = re + 1; |
| 1142 | xregcomp(re, s, REG_EXTENDED); | 1142 | xregcomp(re, s, REG_EXTENDED); |
| 1143 | xregcomp(re + 1, s, REG_EXTENDED | REG_ICASE); | 1143 | xregcomp(re + 1, s, REG_EXTENDED | REG_ICASE); |
| 1144 | |||
| 1145 | return n; | ||
| 1146 | } | 1144 | } |
| 1147 | 1145 | ||
| 1148 | static node *condition(void) | 1146 | static node *condition(void) |
| @@ -1541,7 +1539,10 @@ static regex_t *as_regex(node *op, regex_t *preg) | |||
| 1541 | return preg; | 1539 | return preg; |
| 1542 | } | 1540 | } |
| 1543 | 1541 | ||
| 1544 | /* gradually increasing buffer */ | 1542 | /* gradually increasing buffer. |
| 1543 | * note that we reallocate even if n == old_size, | ||
| 1544 | * and thus there is at least one extra allocated byte. | ||
| 1545 | */ | ||
| 1545 | static char* qrealloc(char *b, int n, int *size) | 1546 | static char* qrealloc(char *b, int n, int *size) |
| 1546 | { | 1547 | { |
| 1547 | if (!b || n >= *size) { | 1548 | if (!b || n >= *size) { |
| @@ -1983,83 +1984,100 @@ static char *awk_printf(node *n) | |||
| 1983 | return b; | 1984 | return b; |
| 1984 | } | 1985 | } |
| 1985 | 1986 | ||
| 1986 | /* common substitution routine | 1987 | /* Common substitution routine. |
| 1987 | * replace (nm) substring of (src) that match (n) with (repl), store | 1988 | * Replace (nm)'th substring of (src) that matches (rn) with (repl), |
| 1988 | * result into (dest), return number of substitutions. If nm=0, replace | 1989 | * store result into (dest), return number of substitutions. |
| 1989 | * all matches. If src or dst is NULL, use $0. If ex=TRUE, enable | 1990 | * If nm = 0, replace all matches. |
| 1990 | * subexpression matching (\1-\9) | 1991 | * If src or dst is NULL, use $0. |
| 1992 | * If subexp != 0, enable subexpression matching (\1-\9). | ||
| 1991 | */ | 1993 | */ |
| 1992 | static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int ex) | 1994 | static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int subexp) |
| 1993 | { | 1995 | { |
| 1994 | char *ds = NULL; | 1996 | char *resbuf; |
| 1995 | const char *s; | ||
| 1996 | const char *sp; | 1997 | const char *sp; |
| 1997 | int c, i, j, di, rl, so, eo, nbs, n, dssize; | 1998 | int match_no, residx, replen, resbufsize; |
| 1999 | int regexec_flags; | ||
| 1998 | regmatch_t pmatch[10]; | 2000 | regmatch_t pmatch[10]; |
| 1999 | regex_t sreg, *re; | 2001 | regex_t sreg, *regex; |
| 2002 | |||
| 2003 | resbuf = NULL; | ||
| 2004 | residx = 0; | ||
| 2005 | match_no = 0; | ||
| 2006 | regexec_flags = 0; | ||
| 2007 | regex = as_regex(rn, &sreg); | ||
| 2008 | sp = getvar_s(src ? src : intvar[F0]); | ||
| 2009 | replen = strlen(repl); | ||
| 2010 | while (regexec(regex, sp, 10, pmatch, regexec_flags) == 0) { | ||
| 2011 | int so = pmatch[0].rm_so; | ||
| 2012 | int eo = pmatch[0].rm_eo; | ||
| 2013 | |||
| 2014 | //bb_error_msg("match %u: [%u,%u] '%s'%p", match_no+1, so, eo, sp,sp); | ||
| 2015 | resbuf = qrealloc(resbuf, residx + eo + replen, &resbufsize); | ||
| 2016 | memcpy(resbuf + residx, sp, eo); | ||
| 2017 | residx += eo; | ||
| 2018 | if (++match_no >= nm) { | ||
| 2019 | const char *s; | ||
| 2020 | int nbs; | ||
| 2000 | 2021 | ||
| 2001 | re = as_regex(rn, &sreg); | ||
| 2002 | if (!src) | ||
| 2003 | src = intvar[F0]; | ||
| 2004 | if (!dest) | ||
| 2005 | dest = intvar[F0]; | ||
| 2006 | |||
| 2007 | i = di = 0; | ||
| 2008 | sp = getvar_s(src); | ||
| 2009 | rl = strlen(repl); | ||
| 2010 | while (regexec(re, sp, 10, pmatch, sp==getvar_s(src) ? 0 : REG_NOTBOL) == 0) { | ||
| 2011 | so = pmatch[0].rm_so; | ||
| 2012 | eo = pmatch[0].rm_eo; | ||
| 2013 | |||
| 2014 | ds = qrealloc(ds, di + eo + rl, &dssize); | ||
| 2015 | memcpy(ds + di, sp, eo); | ||
| 2016 | di += eo; | ||
| 2017 | if (++i >= nm) { | ||
| 2018 | /* replace */ | 2022 | /* replace */ |
| 2019 | di -= (eo - so); | 2023 | residx -= (eo - so); |
| 2020 | nbs = 0; | 2024 | nbs = 0; |
| 2021 | for (s = repl; *s; s++) { | 2025 | for (s = repl; *s; s++) { |
| 2022 | ds[di++] = c = *s; | 2026 | char c = resbuf[residx++] = *s; |
| 2023 | if (c == '\\') { | 2027 | if (c == '\\') { |
| 2024 | nbs++; | 2028 | nbs++; |
| 2025 | continue; | 2029 | continue; |
| 2026 | } | 2030 | } |
| 2027 | if (c == '&' || (ex && c >= '0' && c <= '9')) { | 2031 | if (c == '&' || (subexp && c >= '0' && c <= '9')) { |
| 2028 | di -= ((nbs + 3) >> 1); | 2032 | int j; |
| 2033 | residx -= ((nbs + 3) >> 1); | ||
| 2029 | j = 0; | 2034 | j = 0; |
| 2030 | if (c != '&') { | 2035 | if (c != '&') { |
| 2031 | j = c - '0'; | 2036 | j = c - '0'; |
| 2032 | nbs++; | 2037 | nbs++; |
| 2033 | } | 2038 | } |
| 2034 | if (nbs % 2) { | 2039 | if (nbs % 2) { |
| 2035 | ds[di++] = c; | 2040 | resbuf[residx++] = c; |
| 2036 | } else { | 2041 | } else { |
| 2037 | n = pmatch[j].rm_eo - pmatch[j].rm_so; | 2042 | int n = pmatch[j].rm_eo - pmatch[j].rm_so; |
| 2038 | ds = qrealloc(ds, di + rl + n, &dssize); | 2043 | resbuf = qrealloc(resbuf, residx + replen + n, &resbufsize); |
| 2039 | memcpy(ds + di, sp + pmatch[j].rm_so, n); | 2044 | memcpy(resbuf + residx, sp + pmatch[j].rm_so, n); |
| 2040 | di += n; | 2045 | residx += n; |
| 2041 | } | 2046 | } |
| 2042 | } | 2047 | } |
| 2043 | nbs = 0; | 2048 | nbs = 0; |
| 2044 | } | 2049 | } |
| 2045 | } | 2050 | } |
| 2046 | 2051 | ||
| 2052 | regexec_flags = REG_NOTBOL; | ||
| 2047 | sp += eo; | 2053 | sp += eo; |
| 2048 | if (i == nm) | 2054 | if (match_no == nm) |
| 2049 | break; | 2055 | break; |
| 2050 | if (eo == so) { | 2056 | if (eo == so) { |
| 2051 | ds[di] = *sp++; | 2057 | /* Empty match (e.g. "b*" will match anywhere). |
| 2052 | if (!ds[di++]) | 2058 | * Advance by one char. */ |
| 2053 | break; | 2059 | //BUG (bug 1333): |
| 2060 | //gsub(/\<b*/,"") on "abc" will reach this point, advance to "bc" | ||
| 2061 | //... and will erroneously match "b" even though it is NOT at the word start. | ||
| 2062 | //we need REG_NOTBOW but it does not exist... | ||
| 2063 | /* Subtle: this is safe only because | ||
| 2064 | * qrealloc allocated at least one extra byte */ | ||
| 2065 | resbuf[residx] = *sp; | ||
| 2066 | if (*sp == '\0') | ||
| 2067 | goto ret; | ||
| 2068 | sp++; | ||
| 2069 | residx++; | ||
| 2054 | } | 2070 | } |
| 2055 | } | 2071 | } |
| 2056 | 2072 | ||
| 2057 | ds = qrealloc(ds, di + strlen(sp), &dssize); | 2073 | resbuf = qrealloc(resbuf, residx + strlen(sp), &resbufsize); |
| 2058 | strcpy(ds + di, sp); | 2074 | strcpy(resbuf + residx, sp); |
| 2059 | setvar_p(dest, ds); | 2075 | ret: |
| 2060 | if (re == &sreg) | 2076 | //bb_error_msg("end sp:'%s'%p", sp,sp); |
| 2061 | regfree(re); | 2077 | setvar_p(dest ? dest : intvar[F0], resbuf); |
| 2062 | return i; | 2078 | if (regex == &sreg) |
| 2079 | regfree(regex); | ||
| 2080 | return match_no; | ||
| 2063 | } | 2081 | } |
| 2064 | 2082 | ||
| 2065 | static NOINLINE int do_mktime(const char *ds) | 2083 | static NOINLINE int do_mktime(const char *ds) |
