diff options
author | Denys Vlasenko <vda.linux@googlemail.com> | 2010-04-04 01:17:30 +0200 |
---|---|---|
committer | Denys Vlasenko <vda.linux@googlemail.com> | 2010-04-04 01:17:30 +0200 |
commit | fab288cf0b31ff64a562cc496b20add822a6abbd (patch) | |
tree | 5feeeba796baf897cd5aceb90f59fc48b09c840b | |
parent | 243ddcbc76d19847d9e8022dc2f6659078f5cc20 (diff) | |
download | busybox-w32-fab288cf0b31ff64a562cc496b20add822a6abbd.tar.gz busybox-w32-fab288cf0b31ff64a562cc496b20add822a6abbd.tar.bz2 busybox-w32-fab288cf0b31ff64a562cc496b20add822a6abbd.zip |
awk: don't append bogus data after NUL in sub(); shrink
also renamed variables to more sensible names
function old new delta
mk_re_node 56 49 -7
awk_sub 601 591 -10
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
-rw-r--r-- | editors/awk.c | 118 |
1 files changed, 68 insertions, 50 deletions
diff --git a/editors/awk.c b/editors/awk.c index 30c6b88ef..3ba1a422d 100644 --- a/editors/awk.c +++ b/editors/awk.c | |||
@@ -1134,15 +1134,13 @@ static node *new_node(uint32_t info) | |||
1134 | return n; | 1134 | return n; |
1135 | } | 1135 | } |
1136 | 1136 | ||
1137 | static node *mk_re_node(const char *s, node *n, regex_t *re) | 1137 | static void mk_re_node(const char *s, node *n, regex_t *re) |
1138 | { | 1138 | { |
1139 | n->info = OC_REGEXP; | 1139 | n->info = OC_REGEXP; |
1140 | n->l.re = re; | 1140 | n->l.re = re; |
1141 | n->r.ire = re + 1; | 1141 | n->r.ire = re + 1; |
1142 | xregcomp(re, s, REG_EXTENDED); | 1142 | xregcomp(re, s, REG_EXTENDED); |
1143 | xregcomp(re + 1, s, REG_EXTENDED | REG_ICASE); | 1143 | xregcomp(re + 1, s, REG_EXTENDED | REG_ICASE); |
1144 | |||
1145 | return n; | ||
1146 | } | 1144 | } |
1147 | 1145 | ||
1148 | static node *condition(void) | 1146 | static node *condition(void) |
@@ -1541,7 +1539,10 @@ static regex_t *as_regex(node *op, regex_t *preg) | |||
1541 | return preg; | 1539 | return preg; |
1542 | } | 1540 | } |
1543 | 1541 | ||
1544 | /* gradually increasing buffer */ | 1542 | /* gradually increasing buffer. |
1543 | * note that we reallocate even if n == old_size, | ||
1544 | * and thus there is at least one extra allocated byte. | ||
1545 | */ | ||
1545 | static char* qrealloc(char *b, int n, int *size) | 1546 | static char* qrealloc(char *b, int n, int *size) |
1546 | { | 1547 | { |
1547 | if (!b || n >= *size) { | 1548 | if (!b || n >= *size) { |
@@ -1983,83 +1984,100 @@ static char *awk_printf(node *n) | |||
1983 | return b; | 1984 | return b; |
1984 | } | 1985 | } |
1985 | 1986 | ||
1986 | /* common substitution routine | 1987 | /* Common substitution routine. |
1987 | * replace (nm) substring of (src) that match (n) with (repl), store | 1988 | * Replace (nm)'th substring of (src) that matches (rn) with (repl), |
1988 | * result into (dest), return number of substitutions. If nm=0, replace | 1989 | * store result into (dest), return number of substitutions. |
1989 | * all matches. If src or dst is NULL, use $0. If ex=TRUE, enable | 1990 | * If nm = 0, replace all matches. |
1990 | * subexpression matching (\1-\9) | 1991 | * If src or dst is NULL, use $0. |
1992 | * If subexp != 0, enable subexpression matching (\1-\9). | ||
1991 | */ | 1993 | */ |
1992 | static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int ex) | 1994 | static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int subexp) |
1993 | { | 1995 | { |
1994 | char *ds = NULL; | 1996 | char *resbuf; |
1995 | const char *s; | ||
1996 | const char *sp; | 1997 | const char *sp; |
1997 | int c, i, j, di, rl, so, eo, nbs, n, dssize; | 1998 | int match_no, residx, replen, resbufsize; |
1999 | int regexec_flags; | ||
1998 | regmatch_t pmatch[10]; | 2000 | regmatch_t pmatch[10]; |
1999 | regex_t sreg, *re; | 2001 | regex_t sreg, *regex; |
2002 | |||
2003 | resbuf = NULL; | ||
2004 | residx = 0; | ||
2005 | match_no = 0; | ||
2006 | regexec_flags = 0; | ||
2007 | regex = as_regex(rn, &sreg); | ||
2008 | sp = getvar_s(src ? src : intvar[F0]); | ||
2009 | replen = strlen(repl); | ||
2010 | while (regexec(regex, sp, 10, pmatch, regexec_flags) == 0) { | ||
2011 | int so = pmatch[0].rm_so; | ||
2012 | int eo = pmatch[0].rm_eo; | ||
2013 | |||
2014 | //bb_error_msg("match %u: [%u,%u] '%s'%p", match_no+1, so, eo, sp,sp); | ||
2015 | resbuf = qrealloc(resbuf, residx + eo + replen, &resbufsize); | ||
2016 | memcpy(resbuf + residx, sp, eo); | ||
2017 | residx += eo; | ||
2018 | if (++match_no >= nm) { | ||
2019 | const char *s; | ||
2020 | int nbs; | ||
2000 | 2021 | ||
2001 | re = as_regex(rn, &sreg); | ||
2002 | if (!src) | ||
2003 | src = intvar[F0]; | ||
2004 | if (!dest) | ||
2005 | dest = intvar[F0]; | ||
2006 | |||
2007 | i = di = 0; | ||
2008 | sp = getvar_s(src); | ||
2009 | rl = strlen(repl); | ||
2010 | while (regexec(re, sp, 10, pmatch, sp==getvar_s(src) ? 0 : REG_NOTBOL) == 0) { | ||
2011 | so = pmatch[0].rm_so; | ||
2012 | eo = pmatch[0].rm_eo; | ||
2013 | |||
2014 | ds = qrealloc(ds, di + eo + rl, &dssize); | ||
2015 | memcpy(ds + di, sp, eo); | ||
2016 | di += eo; | ||
2017 | if (++i >= nm) { | ||
2018 | /* replace */ | 2022 | /* replace */ |
2019 | di -= (eo - so); | 2023 | residx -= (eo - so); |
2020 | nbs = 0; | 2024 | nbs = 0; |
2021 | for (s = repl; *s; s++) { | 2025 | for (s = repl; *s; s++) { |
2022 | ds[di++] = c = *s; | 2026 | char c = resbuf[residx++] = *s; |
2023 | if (c == '\\') { | 2027 | if (c == '\\') { |
2024 | nbs++; | 2028 | nbs++; |
2025 | continue; | 2029 | continue; |
2026 | } | 2030 | } |
2027 | if (c == '&' || (ex && c >= '0' && c <= '9')) { | 2031 | if (c == '&' || (subexp && c >= '0' && c <= '9')) { |
2028 | di -= ((nbs + 3) >> 1); | 2032 | int j; |
2033 | residx -= ((nbs + 3) >> 1); | ||
2029 | j = 0; | 2034 | j = 0; |
2030 | if (c != '&') { | 2035 | if (c != '&') { |
2031 | j = c - '0'; | 2036 | j = c - '0'; |
2032 | nbs++; | 2037 | nbs++; |
2033 | } | 2038 | } |
2034 | if (nbs % 2) { | 2039 | if (nbs % 2) { |
2035 | ds[di++] = c; | 2040 | resbuf[residx++] = c; |
2036 | } else { | 2041 | } else { |
2037 | n = pmatch[j].rm_eo - pmatch[j].rm_so; | 2042 | int n = pmatch[j].rm_eo - pmatch[j].rm_so; |
2038 | ds = qrealloc(ds, di + rl + n, &dssize); | 2043 | resbuf = qrealloc(resbuf, residx + replen + n, &resbufsize); |
2039 | memcpy(ds + di, sp + pmatch[j].rm_so, n); | 2044 | memcpy(resbuf + residx, sp + pmatch[j].rm_so, n); |
2040 | di += n; | 2045 | residx += n; |
2041 | } | 2046 | } |
2042 | } | 2047 | } |
2043 | nbs = 0; | 2048 | nbs = 0; |
2044 | } | 2049 | } |
2045 | } | 2050 | } |
2046 | 2051 | ||
2052 | regexec_flags = REG_NOTBOL; | ||
2047 | sp += eo; | 2053 | sp += eo; |
2048 | if (i == nm) | 2054 | if (match_no == nm) |
2049 | break; | 2055 | break; |
2050 | if (eo == so) { | 2056 | if (eo == so) { |
2051 | ds[di] = *sp++; | 2057 | /* Empty match (e.g. "b*" will match anywhere). |
2052 | if (!ds[di++]) | 2058 | * Advance by one char. */ |
2053 | break; | 2059 | //BUG (bug 1333): |
2060 | //gsub(/\<b*/,"") on "abc" will reach this point, advance to "bc" | ||
2061 | //... and will erroneously match "b" even though it is NOT at the word start. | ||
2062 | //we need REG_NOTBOW but it does not exist... | ||
2063 | /* Subtle: this is safe only because | ||
2064 | * qrealloc allocated at least one extra byte */ | ||
2065 | resbuf[residx] = *sp; | ||
2066 | if (*sp == '\0') | ||
2067 | goto ret; | ||
2068 | sp++; | ||
2069 | residx++; | ||
2054 | } | 2070 | } |
2055 | } | 2071 | } |
2056 | 2072 | ||
2057 | ds = qrealloc(ds, di + strlen(sp), &dssize); | 2073 | resbuf = qrealloc(resbuf, residx + strlen(sp), &resbufsize); |
2058 | strcpy(ds + di, sp); | 2074 | strcpy(resbuf + residx, sp); |
2059 | setvar_p(dest, ds); | 2075 | ret: |
2060 | if (re == &sreg) | 2076 | //bb_error_msg("end sp:'%s'%p", sp,sp); |
2061 | regfree(re); | 2077 | setvar_p(dest ? dest : intvar[F0], resbuf); |
2062 | return i; | 2078 | if (regex == &sreg) |
2079 | regfree(regex); | ||
2080 | return match_no; | ||
2063 | } | 2081 | } |
2064 | 2082 | ||
2065 | static NOINLINE int do_mktime(const char *ds) | 2083 | static NOINLINE int do_mktime(const char *ds) |