aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDenys Vlasenko <vda.linux@googlemail.com>2010-04-04 01:17:30 +0200
committerDenys Vlasenko <vda.linux@googlemail.com>2010-04-04 01:17:30 +0200
commitfab288cf0b31ff64a562cc496b20add822a6abbd (patch)
tree5feeeba796baf897cd5aceb90f59fc48b09c840b
parent243ddcbc76d19847d9e8022dc2f6659078f5cc20 (diff)
downloadbusybox-w32-fab288cf0b31ff64a562cc496b20add822a6abbd.tar.gz
busybox-w32-fab288cf0b31ff64a562cc496b20add822a6abbd.tar.bz2
busybox-w32-fab288cf0b31ff64a562cc496b20add822a6abbd.zip
awk: don't append bogus data after NUL in sub(); shrink
also renamed variables to more sensible names function old new delta mk_re_node 56 49 -7 awk_sub 601 591 -10 Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
-rw-r--r--editors/awk.c118
1 files changed, 68 insertions, 50 deletions
diff --git a/editors/awk.c b/editors/awk.c
index 30c6b88ef..3ba1a422d 100644
--- a/editors/awk.c
+++ b/editors/awk.c
@@ -1134,15 +1134,13 @@ static node *new_node(uint32_t info)
1134 return n; 1134 return n;
1135} 1135}
1136 1136
1137static node *mk_re_node(const char *s, node *n, regex_t *re) 1137static void mk_re_node(const char *s, node *n, regex_t *re)
1138{ 1138{
1139 n->info = OC_REGEXP; 1139 n->info = OC_REGEXP;
1140 n->l.re = re; 1140 n->l.re = re;
1141 n->r.ire = re + 1; 1141 n->r.ire = re + 1;
1142 xregcomp(re, s, REG_EXTENDED); 1142 xregcomp(re, s, REG_EXTENDED);
1143 xregcomp(re + 1, s, REG_EXTENDED | REG_ICASE); 1143 xregcomp(re + 1, s, REG_EXTENDED | REG_ICASE);
1144
1145 return n;
1146} 1144}
1147 1145
1148static node *condition(void) 1146static node *condition(void)
@@ -1541,7 +1539,10 @@ static regex_t *as_regex(node *op, regex_t *preg)
1541 return preg; 1539 return preg;
1542} 1540}
1543 1541
1544/* gradually increasing buffer */ 1542/* gradually increasing buffer.
1543 * note that we reallocate even if n == old_size,
1544 * and thus there is at least one extra allocated byte.
1545 */
1545static char* qrealloc(char *b, int n, int *size) 1546static char* qrealloc(char *b, int n, int *size)
1546{ 1547{
1547 if (!b || n >= *size) { 1548 if (!b || n >= *size) {
@@ -1983,83 +1984,100 @@ static char *awk_printf(node *n)
1983 return b; 1984 return b;
1984} 1985}
1985 1986
1986/* common substitution routine 1987/* Common substitution routine.
1987 * replace (nm) substring of (src) that match (n) with (repl), store 1988 * Replace (nm)'th substring of (src) that matches (rn) with (repl),
1988 * result into (dest), return number of substitutions. If nm=0, replace 1989 * store result into (dest), return number of substitutions.
1989 * all matches. If src or dst is NULL, use $0. If ex=TRUE, enable 1990 * If nm = 0, replace all matches.
1990 * subexpression matching (\1-\9) 1991 * If src or dst is NULL, use $0.
1992 * If subexp != 0, enable subexpression matching (\1-\9).
1991 */ 1993 */
1992static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int ex) 1994static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int subexp)
1993{ 1995{
1994 char *ds = NULL; 1996 char *resbuf;
1995 const char *s;
1996 const char *sp; 1997 const char *sp;
1997 int c, i, j, di, rl, so, eo, nbs, n, dssize; 1998 int match_no, residx, replen, resbufsize;
1999 int regexec_flags;
1998 regmatch_t pmatch[10]; 2000 regmatch_t pmatch[10];
1999 regex_t sreg, *re; 2001 regex_t sreg, *regex;
2002
2003 resbuf = NULL;
2004 residx = 0;
2005 match_no = 0;
2006 regexec_flags = 0;
2007 regex = as_regex(rn, &sreg);
2008 sp = getvar_s(src ? src : intvar[F0]);
2009 replen = strlen(repl);
2010 while (regexec(regex, sp, 10, pmatch, regexec_flags) == 0) {
2011 int so = pmatch[0].rm_so;
2012 int eo = pmatch[0].rm_eo;
2013
2014 //bb_error_msg("match %u: [%u,%u] '%s'%p", match_no+1, so, eo, sp,sp);
2015 resbuf = qrealloc(resbuf, residx + eo + replen, &resbufsize);
2016 memcpy(resbuf + residx, sp, eo);
2017 residx += eo;
2018 if (++match_no >= nm) {
2019 const char *s;
2020 int nbs;
2000 2021
2001 re = as_regex(rn, &sreg);
2002 if (!src)
2003 src = intvar[F0];
2004 if (!dest)
2005 dest = intvar[F0];
2006
2007 i = di = 0;
2008 sp = getvar_s(src);
2009 rl = strlen(repl);
2010 while (regexec(re, sp, 10, pmatch, sp==getvar_s(src) ? 0 : REG_NOTBOL) == 0) {
2011 so = pmatch[0].rm_so;
2012 eo = pmatch[0].rm_eo;
2013
2014 ds = qrealloc(ds, di + eo + rl, &dssize);
2015 memcpy(ds + di, sp, eo);
2016 di += eo;
2017 if (++i >= nm) {
2018 /* replace */ 2022 /* replace */
2019 di -= (eo - so); 2023 residx -= (eo - so);
2020 nbs = 0; 2024 nbs = 0;
2021 for (s = repl; *s; s++) { 2025 for (s = repl; *s; s++) {
2022 ds[di++] = c = *s; 2026 char c = resbuf[residx++] = *s;
2023 if (c == '\\') { 2027 if (c == '\\') {
2024 nbs++; 2028 nbs++;
2025 continue; 2029 continue;
2026 } 2030 }
2027 if (c == '&' || (ex && c >= '0' && c <= '9')) { 2031 if (c == '&' || (subexp && c >= '0' && c <= '9')) {
2028 di -= ((nbs + 3) >> 1); 2032 int j;
2033 residx -= ((nbs + 3) >> 1);
2029 j = 0; 2034 j = 0;
2030 if (c != '&') { 2035 if (c != '&') {
2031 j = c - '0'; 2036 j = c - '0';
2032 nbs++; 2037 nbs++;
2033 } 2038 }
2034 if (nbs % 2) { 2039 if (nbs % 2) {
2035 ds[di++] = c; 2040 resbuf[residx++] = c;
2036 } else { 2041 } else {
2037 n = pmatch[j].rm_eo - pmatch[j].rm_so; 2042 int n = pmatch[j].rm_eo - pmatch[j].rm_so;
2038 ds = qrealloc(ds, di + rl + n, &dssize); 2043 resbuf = qrealloc(resbuf, residx + replen + n, &resbufsize);
2039 memcpy(ds + di, sp + pmatch[j].rm_so, n); 2044 memcpy(resbuf + residx, sp + pmatch[j].rm_so, n);
2040 di += n; 2045 residx += n;
2041 } 2046 }
2042 } 2047 }
2043 nbs = 0; 2048 nbs = 0;
2044 } 2049 }
2045 } 2050 }
2046 2051
2052 regexec_flags = REG_NOTBOL;
2047 sp += eo; 2053 sp += eo;
2048 if (i == nm) 2054 if (match_no == nm)
2049 break; 2055 break;
2050 if (eo == so) { 2056 if (eo == so) {
2051 ds[di] = *sp++; 2057 /* Empty match (e.g. "b*" will match anywhere).
2052 if (!ds[di++]) 2058 * Advance by one char. */
2053 break; 2059//BUG (bug 1333):
2060//gsub(/\<b*/,"") on "abc" will reach this point, advance to "bc"
2061//... and will erroneously match "b" even though it is NOT at the word start.
2062//we need REG_NOTBOW but it does not exist...
2063 /* Subtle: this is safe only because
2064 * qrealloc allocated at least one extra byte */
2065 resbuf[residx] = *sp;
2066 if (*sp == '\0')
2067 goto ret;
2068 sp++;
2069 residx++;
2054 } 2070 }
2055 } 2071 }
2056 2072
2057 ds = qrealloc(ds, di + strlen(sp), &dssize); 2073 resbuf = qrealloc(resbuf, residx + strlen(sp), &resbufsize);
2058 strcpy(ds + di, sp); 2074 strcpy(resbuf + residx, sp);
2059 setvar_p(dest, ds); 2075 ret:
2060 if (re == &sreg) 2076 //bb_error_msg("end sp:'%s'%p", sp,sp);
2061 regfree(re); 2077 setvar_p(dest ? dest : intvar[F0], resbuf);
2062 return i; 2078 if (regex == &sreg)
2079 regfree(regex);
2080 return match_no;
2063} 2081}
2064 2082
2065static NOINLINE int do_mktime(const char *ds) 2083static NOINLINE int do_mktime(const char *ds)