diff options
author | Denys Vlasenko <vda.linux@googlemail.com> | 2020-11-16 10:40:32 +0100 |
---|---|---|
committer | Denys Vlasenko <vda.linux@googlemail.com> | 2020-11-16 10:40:32 +0100 |
commit | 5323af7f51808d5ff35c624ba70bdae4807f3717 (patch) | |
tree | 80b507f54575b091a7545e0a45eed1f97b9f373b | |
parent | 89f063b900edf8b38c9dc05953887cf09b121378 (diff) | |
download | busybox-w32-5323af7f51808d5ff35c624ba70bdae4807f3717.tar.gz busybox-w32-5323af7f51808d5ff35c624ba70bdae4807f3717.tar.bz2 busybox-w32-5323af7f51808d5ff35c624ba70bdae4807f3717.zip |
awk: fix dodgy multi-char separators splitting logic
function old new delta
awk_split 521 484 -37
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
-rw-r--r-- | editors/awk.c | 25 |
1 files changed, 16 insertions, 9 deletions
diff --git a/editors/awk.c b/editors/awk.c index f7451ae32..59dae4770 100644 --- a/editors/awk.c +++ b/editors/awk.c | |||
@@ -1765,10 +1765,9 @@ static void fsrealloc(int size) | |||
1765 | 1765 | ||
1766 | static int awk_split(const char *s, node *spl, char **slist) | 1766 | static int awk_split(const char *s, node *spl, char **slist) |
1767 | { | 1767 | { |
1768 | int l, n; | 1768 | int n; |
1769 | char c[4]; | 1769 | char c[4]; |
1770 | char *s1; | 1770 | char *s1; |
1771 | regmatch_t pmatch[2]; // TODO: why [2]? [1] is enough... | ||
1772 | 1771 | ||
1773 | /* in worst case, each char would be a separate field */ | 1772 | /* in worst case, each char would be a separate field */ |
1774 | *slist = s1 = xzalloc(strlen(s) * 2 + 3); | 1773 | *slist = s1 = xzalloc(strlen(s) * 2 + 3); |
@@ -1785,12 +1784,18 @@ static int awk_split(const char *s, node *spl, char **slist) | |||
1785 | return n; /* "": zero fields */ | 1784 | return n; /* "": zero fields */ |
1786 | n++; /* at least one field will be there */ | 1785 | n++; /* at least one field will be there */ |
1787 | do { | 1786 | do { |
1787 | int l; | ||
1788 | regmatch_t pmatch[2]; // TODO: why [2]? [1] is enough... | ||
1789 | |||
1788 | l = strcspn(s, c+2); /* len till next NUL or \n */ | 1790 | l = strcspn(s, c+2); /* len till next NUL or \n */ |
1789 | if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0 | 1791 | if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0 |
1790 | && pmatch[0].rm_so <= l | 1792 | && pmatch[0].rm_so <= l |
1791 | ) { | 1793 | ) { |
1792 | l = pmatch[0].rm_so; | 1794 | l = pmatch[0].rm_so; |
1793 | if (pmatch[0].rm_eo == 0) { | 1795 | if (pmatch[0].rm_eo == 0) { |
1796 | /* For example, happens when FS can match | ||
1797 | * an empthy string (awk -F ' *') | ||
1798 | */ | ||
1794 | l++; | 1799 | l++; |
1795 | pmatch[0].rm_eo++; | 1800 | pmatch[0].rm_eo++; |
1796 | } | 1801 | } |
@@ -1800,14 +1805,16 @@ static int awk_split(const char *s, node *spl, char **slist) | |||
1800 | if (s[l]) | 1805 | if (s[l]) |
1801 | pmatch[0].rm_eo++; | 1806 | pmatch[0].rm_eo++; |
1802 | } | 1807 | } |
1803 | memcpy(s1, s, l); | 1808 | s1 = mempcpy(s1, s, l); |
1804 | /* make sure we remove *all* of the separator chars */ | 1809 | *s1++ = '\0'; |
1805 | do { | ||
1806 | s1[l] = '\0'; | ||
1807 | } while (++l < pmatch[0].rm_eo); | ||
1808 | nextword(&s1); | ||
1809 | s += pmatch[0].rm_eo; | 1810 | s += pmatch[0].rm_eo; |
1810 | } while (*s); | 1811 | } while (*s); |
1812 | |||
1813 | /* echo a-- | awk -F-- '{ print NF, length($NF), $NF }' | ||
1814 | * should print "2 0 ": | ||
1815 | */ | ||
1816 | *s1 = '\0'; | ||
1817 | |||
1811 | return n; | 1818 | return n; |
1812 | } | 1819 | } |
1813 | if (c[0] == '\0') { /* null split */ | 1820 | if (c[0] == '\0') { /* null split */ |
@@ -2011,7 +2018,7 @@ static int ptest(node *pattern) | |||
2011 | static int awk_getline(rstream *rsm, var *v) | 2018 | static int awk_getline(rstream *rsm, var *v) |
2012 | { | 2019 | { |
2013 | char *b; | 2020 | char *b; |
2014 | regmatch_t pmatch[2]; | 2021 | regmatch_t pmatch[2]; // TODO: why [2]? [1] is enough... |
2015 | int size, a, p, pp = 0; | 2022 | int size, a, p, pp = 0; |
2016 | int fd, so, eo, r, rp; | 2023 | int fd, so, eo, r, rp; |
2017 | char c, *m, *s; | 2024 | char c, *m, *s; |