diff options
Diffstat (limited to 'editors/awk.c')
-rw-r--r-- | editors/awk.c | 54 |
1 files changed, 39 insertions, 15 deletions
diff --git a/editors/awk.c b/editors/awk.c index 509f4ddd8..4799091ec 100644 --- a/editors/awk.c +++ b/editors/awk.c | |||
@@ -405,7 +405,7 @@ static const char tokenlist[] ALIGN1 = | |||
405 | 405 | ||
406 | #define OC_B OC_BUILTIN | 406 | #define OC_B OC_BUILTIN |
407 | 407 | ||
408 | static const uint32_t tokeninfo[] = { | 408 | static const uint32_t tokeninfo[] ALIGN4 = { |
409 | 0, | 409 | 0, |
410 | 0, | 410 | 0, |
411 | OC_REGEXP, | 411 | OC_REGEXP, |
@@ -1767,12 +1767,34 @@ static void fsrealloc(int size) | |||
1767 | nfields = size; | 1767 | nfields = size; |
1768 | } | 1768 | } |
1769 | 1769 | ||
1770 | static int regexec1_nonempty(const regex_t *preg, const char *s, regmatch_t pmatch[]) | ||
1771 | { | ||
1772 | int r = regexec(preg, s, 1, pmatch, 0); | ||
1773 | if (r == 0 && pmatch[0].rm_eo == 0) { | ||
1774 | /* For example, happens when FS can match | ||
1775 | * an empty string (awk -F ' *'). Logically, | ||
1776 | * this should split into one-char fields. | ||
1777 | * However, gawk 5.0.1 searches for first | ||
1778 | * _non-empty_ separator string match: | ||
1779 | */ | ||
1780 | size_t ofs = 0; | ||
1781 | do { | ||
1782 | ofs++; | ||
1783 | if (!s[ofs]) | ||
1784 | return REG_NOMATCH; | ||
1785 | regexec(preg, s + ofs, 1, pmatch, 0); | ||
1786 | } while (pmatch[0].rm_eo == 0); | ||
1787 | pmatch[0].rm_so += ofs; | ||
1788 | pmatch[0].rm_eo += ofs; | ||
1789 | } | ||
1790 | return r; | ||
1791 | } | ||
1792 | |||
1770 | static int awk_split(const char *s, node *spl, char **slist) | 1793 | static int awk_split(const char *s, node *spl, char **slist) |
1771 | { | 1794 | { |
1772 | int l, n; | 1795 | int n; |
1773 | char c[4]; | 1796 | char c[4]; |
1774 | char *s1; | 1797 | char *s1; |
1775 | regmatch_t pmatch[2]; // TODO: why [2]? [1] is enough... | ||
1776 | 1798 | ||
1777 | /* in worst case, each char would be a separate field */ | 1799 | /* in worst case, each char would be a separate field */ |
1778 | *slist = s1 = xzalloc(strlen(s) * 2 + 3); | 1800 | *slist = s1 = xzalloc(strlen(s) * 2 + 3); |
@@ -1789,29 +1811,31 @@ static int awk_split(const char *s, node *spl, char **slist) | |||
1789 | return n; /* "": zero fields */ | 1811 | return n; /* "": zero fields */ |
1790 | n++; /* at least one field will be there */ | 1812 | n++; /* at least one field will be there */ |
1791 | do { | 1813 | do { |
1814 | int l; | ||
1815 | regmatch_t pmatch[2]; // TODO: why [2]? [1] is enough... | ||
1816 | |||
1792 | l = strcspn(s, c+2); /* len till next NUL or \n */ | 1817 | l = strcspn(s, c+2); /* len till next NUL or \n */ |
1793 | if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0 | 1818 | if (regexec1_nonempty(icase ? spl->r.ire : spl->l.re, s, pmatch) == 0 |
1794 | && pmatch[0].rm_so <= l | 1819 | && pmatch[0].rm_so <= l |
1795 | ) { | 1820 | ) { |
1821 | /* if (pmatch[0].rm_eo == 0) ... - impossible */ | ||
1796 | l = pmatch[0].rm_so; | 1822 | l = pmatch[0].rm_so; |
1797 | if (pmatch[0].rm_eo == 0) { | ||
1798 | l++; | ||
1799 | pmatch[0].rm_eo++; | ||
1800 | } | ||
1801 | n++; /* we saw yet another delimiter */ | 1823 | n++; /* we saw yet another delimiter */ |
1802 | } else { | 1824 | } else { |
1803 | pmatch[0].rm_eo = l; | 1825 | pmatch[0].rm_eo = l; |
1804 | if (s[l]) | 1826 | if (s[l]) |
1805 | pmatch[0].rm_eo++; | 1827 | pmatch[0].rm_eo++; |
1806 | } | 1828 | } |
1807 | memcpy(s1, s, l); | 1829 | s1 = mempcpy(s1, s, l); |
1808 | /* make sure we remove *all* of the separator chars */ | 1830 | *s1++ = '\0'; |
1809 | do { | ||
1810 | s1[l] = '\0'; | ||
1811 | } while (++l < pmatch[0].rm_eo); | ||
1812 | nextword(&s1); | ||
1813 | s += pmatch[0].rm_eo; | 1831 | s += pmatch[0].rm_eo; |
1814 | } while (*s); | 1832 | } while (*s); |
1833 | |||
1834 | /* echo a-- | awk -F-- '{ print NF, length($NF), $NF }' | ||
1835 | * should print "2 0 ": | ||
1836 | */ | ||
1837 | *s1 = '\0'; | ||
1838 | |||
1815 | return n; | 1839 | return n; |
1816 | } | 1840 | } |
1817 | if (c[0] == '\0') { /* null split */ | 1841 | if (c[0] == '\0') { /* null split */ |
@@ -2030,7 +2054,7 @@ static ssize_t FAST_FUNC safe_read_strip_cr(int fd, void *buf, size_t count) | |||
2030 | static int awk_getline(rstream *rsm, var *v) | 2054 | static int awk_getline(rstream *rsm, var *v) |
2031 | { | 2055 | { |
2032 | char *b; | 2056 | char *b; |
2033 | regmatch_t pmatch[2]; | 2057 | regmatch_t pmatch[2]; // TODO: why [2]? [1] is enough... |
2034 | int size, a, p, pp = 0; | 2058 | int size, a, p, pp = 0; |
2035 | int fd, so, eo, r, rp; | 2059 | int fd, so, eo, r, rp; |
2036 | char c, *m, *s; | 2060 | char c, *m, *s; |