aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDenys Vlasenko <vda.linux@googlemail.com>2020-11-16 10:40:32 +0100
committerDenys Vlasenko <vda.linux@googlemail.com>2020-11-16 10:40:32 +0100
commit5323af7f51808d5ff35c624ba70bdae4807f3717 (patch)
tree80b507f54575b091a7545e0a45eed1f97b9f373b
parent89f063b900edf8b38c9dc05953887cf09b121378 (diff)
downloadbusybox-w32-5323af7f51808d5ff35c624ba70bdae4807f3717.tar.gz
busybox-w32-5323af7f51808d5ff35c624ba70bdae4807f3717.tar.bz2
busybox-w32-5323af7f51808d5ff35c624ba70bdae4807f3717.zip
awk: fix dodgy multi-char separators splitting logic
function old new delta awk_split 521 484 -37 Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
-rw-r--r--editors/awk.c25
1 files changed, 16 insertions, 9 deletions
diff --git a/editors/awk.c b/editors/awk.c
index f7451ae32..59dae4770 100644
--- a/editors/awk.c
+++ b/editors/awk.c
@@ -1765,10 +1765,9 @@ static void fsrealloc(int size)
1765 1765
1766static int awk_split(const char *s, node *spl, char **slist) 1766static int awk_split(const char *s, node *spl, char **slist)
1767{ 1767{
1768 int l, n; 1768 int n;
1769 char c[4]; 1769 char c[4];
1770 char *s1; 1770 char *s1;
1771 regmatch_t pmatch[2]; // TODO: why [2]? [1] is enough...
1772 1771
1773 /* in worst case, each char would be a separate field */ 1772 /* in worst case, each char would be a separate field */
1774 *slist = s1 = xzalloc(strlen(s) * 2 + 3); 1773 *slist = s1 = xzalloc(strlen(s) * 2 + 3);
@@ -1785,12 +1784,18 @@ static int awk_split(const char *s, node *spl, char **slist)
1785 return n; /* "": zero fields */ 1784 return n; /* "": zero fields */
1786 n++; /* at least one field will be there */ 1785 n++; /* at least one field will be there */
1787 do { 1786 do {
1787 int l;
1788 regmatch_t pmatch[2]; // TODO: why [2]? [1] is enough...
1789
1788 l = strcspn(s, c+2); /* len till next NUL or \n */ 1790 l = strcspn(s, c+2); /* len till next NUL or \n */
1789 if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0 1791 if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0
1790 && pmatch[0].rm_so <= l 1792 && pmatch[0].rm_so <= l
1791 ) { 1793 ) {
1792 l = pmatch[0].rm_so; 1794 l = pmatch[0].rm_so;
1793 if (pmatch[0].rm_eo == 0) { 1795 if (pmatch[0].rm_eo == 0) {
1796 /* For example, happens when FS can match
1797 * an empthy string (awk -F ' *')
1798 */
1794 l++; 1799 l++;
1795 pmatch[0].rm_eo++; 1800 pmatch[0].rm_eo++;
1796 } 1801 }
@@ -1800,14 +1805,16 @@ static int awk_split(const char *s, node *spl, char **slist)
1800 if (s[l]) 1805 if (s[l])
1801 pmatch[0].rm_eo++; 1806 pmatch[0].rm_eo++;
1802 } 1807 }
1803 memcpy(s1, s, l); 1808 s1 = mempcpy(s1, s, l);
1804 /* make sure we remove *all* of the separator chars */ 1809 *s1++ = '\0';
1805 do {
1806 s1[l] = '\0';
1807 } while (++l < pmatch[0].rm_eo);
1808 nextword(&s1);
1809 s += pmatch[0].rm_eo; 1810 s += pmatch[0].rm_eo;
1810 } while (*s); 1811 } while (*s);
1812
1813 /* echo a-- | awk -F-- '{ print NF, length($NF), $NF }'
1814 * should print "2 0 ":
1815 */
1816 *s1 = '\0';
1817
1811 return n; 1818 return n;
1812 } 1819 }
1813 if (c[0] == '\0') { /* null split */ 1820 if (c[0] == '\0') { /* null split */
@@ -2011,7 +2018,7 @@ static int ptest(node *pattern)
2011static int awk_getline(rstream *rsm, var *v) 2018static int awk_getline(rstream *rsm, var *v)
2012{ 2019{
2013 char *b; 2020 char *b;
2014 regmatch_t pmatch[2]; 2021 regmatch_t pmatch[2]; // TODO: why [2]? [1] is enough...
2015 int size, a, p, pp = 0; 2022 int size, a, p, pp = 0;
2016 int fd, so, eo, r, rp; 2023 int fd, so, eo, r, rp;
2017 char c, *m, *s; 2024 char c, *m, *s;