diff options
author | Ron Yorston <rmy@pobox.com> | 2023-01-30 10:33:59 +0000 |
---|---|---|
committer | Ron Yorston <rmy@pobox.com> | 2023-01-30 10:33:59 +0000 |
commit | 7a537eb71874cd6c8d0e9ab1f906e6466e7f793c (patch) | |
tree | af7b8c0218c5bc8b3e7c7aa5d86beca68350446f | |
parent | 33ae88c40cf56b53c06e627e0535c9c740767aa4 (diff) | |
download | busybox-w32-7a537eb71874cd6c8d0e9ab1f906e6466e7f793c.tar.gz busybox-w32-7a537eb71874cd6c8d0e9ab1f906e6466e7f793c.tar.bz2 busybox-w32-7a537eb71874cd6c8d0e9ab1f906e6466e7f793c.zip |
awk: CRLF handling
Previous efforts at handling DOS-style line endings in awk have
included commits ee7e00dc5 and 1a3717342.
The use of remove_cr() is unwise:
- It's overzealous, removing all CRs, not just those in CRLF pairs.
- Even if that were fixed awk reads input in chunks. There's a
remote chance a CRLF might appear at a chunk boundary and be
missed.
remove_cr() will be fixed separately. In awk treat all data input
as being in text mode.
Skipping CRs in skip_spaces() is also flawed. Instead read scripts
in text mode.
Add a couple of test cases. One of these (awk backslash+CRLF eaten
with no trace) fails without this patch.
-rw-r--r-- | editors/awk.c | 37 | ||||
-rwxr-xr-x | testsuite/awk.tests | 14 |
2 files changed, 32 insertions, 19 deletions
diff --git a/editors/awk.c b/editors/awk.c index cf9269c6f..2c1272554 100644 --- a/editors/awk.c +++ b/editors/awk.c | |||
@@ -826,11 +826,7 @@ static char *skip_spaces(char *p) | |||
826 | if (*p == '\\' && p[1] == '\n') { | 826 | if (*p == '\\' && p[1] == '\n') { |
827 | p++; | 827 | p++; |
828 | t_lineno++; | 828 | t_lineno++; |
829 | #if !ENABLE_PLATFORM_MINGW32 | ||
830 | } else if (*p != ' ' && *p != '\t') { | 829 | } else if (*p != ' ' && *p != '\t') { |
831 | #else | ||
832 | } else if (*p != ' ' && *p != '\t' && *p != '\r') { | ||
833 | #endif | ||
834 | break; | 830 | break; |
835 | } | 831 | } |
836 | p++; | 832 | p++; |
@@ -2231,21 +2227,6 @@ static int ptest(node *pattern) | |||
2231 | return istrue(evaluate(pattern, &G.ptest__tmpvar)); | 2227 | return istrue(evaluate(pattern, &G.ptest__tmpvar)); |
2232 | } | 2228 | } |
2233 | 2229 | ||
2234 | #if ENABLE_PLATFORM_MINGW32 | ||
2235 | static ssize_t FAST_FUNC safe_read_strip_cr(int fd, void *buf, size_t count) | ||
2236 | { | ||
2237 | ssize_t n; | ||
2238 | |||
2239 | do { | ||
2240 | n = safe_read(fd, buf, count); | ||
2241 | } while (n > 0 && (n=remove_cr((char *)buf, n)) == 0); | ||
2242 | |||
2243 | return n; | ||
2244 | } | ||
2245 | |||
2246 | #define safe_read safe_read_strip_cr | ||
2247 | #endif | ||
2248 | |||
2249 | /* read next record from stream rsm into a variable v */ | 2230 | /* read next record from stream rsm into a variable v */ |
2250 | static int awk_getline(rstream *rsm, var *v) | 2231 | static int awk_getline(rstream *rsm, var *v) |
2251 | { | 2232 | { |
@@ -2834,6 +2815,15 @@ static int is_assignment(const char *expr) | |||
2834 | return TRUE; | 2815 | return TRUE; |
2835 | } | 2816 | } |
2836 | 2817 | ||
2818 | |||
2819 | #if ENABLE_PLATFORM_MINGW32 | ||
2820 | static void set_text_mode(FILE *f) | ||
2821 | { | ||
2822 | if (f) | ||
2823 | _setmode(fileno(f), _O_TEXT); | ||
2824 | } | ||
2825 | #endif | ||
2826 | |||
2837 | /* switch to next input file */ | 2827 | /* switch to next input file */ |
2838 | static rstream *next_input_file(void) | 2828 | static rstream *next_input_file(void) |
2839 | { | 2829 | { |
@@ -2862,6 +2852,9 @@ static rstream *next_input_file(void) | |||
2862 | break; | 2852 | break; |
2863 | } | 2853 | } |
2864 | } | 2854 | } |
2855 | #if ENABLE_PLATFORM_MINGW32 | ||
2856 | set_text_mode(rsm.F); | ||
2857 | #endif | ||
2865 | 2858 | ||
2866 | files_happen = TRUE; | 2859 | files_happen = TRUE; |
2867 | setvar_s(intvar[FILENAME], fname); | 2860 | setvar_s(intvar[FILENAME], fname); |
@@ -3242,6 +3235,9 @@ static var *evaluate(node *op, var *res) | |||
3242 | } else { | 3235 | } else { |
3243 | rsm->F = fopen_for_read(L.s); /* not xfopen! */ | 3236 | rsm->F = fopen_for_read(L.s); /* not xfopen! */ |
3244 | } | 3237 | } |
3238 | #if ENABLE_PLATFORM_MINGW32 | ||
3239 | set_text_mode(rsm->F); | ||
3240 | #endif | ||
3245 | } | 3241 | } |
3246 | } else { | 3242 | } else { |
3247 | if (!iF) | 3243 | if (!iF) |
@@ -3695,6 +3691,9 @@ int awk_main(int argc UNUSED_PARAM, char **argv) | |||
3695 | 3691 | ||
3696 | g_progname = llist_pop(&list_f); | 3692 | g_progname = llist_pop(&list_f); |
3697 | fd = xopen_stdin(g_progname); | 3693 | fd = xopen_stdin(g_progname); |
3694 | #if ENABLE_PLATFORM_MINGW32 | ||
3695 | _setmode(fd, _O_TEXT); | ||
3696 | #endif | ||
3698 | s = xmalloc_read(fd, NULL); /* it's NUL-terminated */ | 3697 | s = xmalloc_read(fd, NULL); /* it's NUL-terminated */ |
3699 | close(fd); | 3698 | close(fd); |
3700 | parse_program(s); | 3699 | parse_program(s); |
diff --git a/testsuite/awk.tests b/testsuite/awk.tests index bbf0fbff1..11beb1b10 100755 --- a/testsuite/awk.tests +++ b/testsuite/awk.tests | |||
@@ -485,4 +485,18 @@ testing 'awk assign while test' \ | |||
485 | "" \ | 485 | "" \ |
486 | "foo" | 486 | "foo" |
487 | 487 | ||
488 | optional PLATFORM_MINGW32 | ||
489 | testing 'awk match line ending' \ | ||
490 | "awk '/world$/'" \ | ||
491 | "world\n" \ | ||
492 | "" \ | ||
493 | "hello\r\nworld\r\n" | ||
494 | |||
495 | testing 'awk backslash+CRLF eaten with no trace' \ | ||
496 | "awk -f -" \ | ||
497 | "Hello world\n" \ | ||
498 | '' \ | ||
499 | 'BEGIN { printf "Hello\\\r\n world\\n" }\n' | ||
500 | SKIP= | ||
501 | |||
488 | exit $FAILCOUNT | 502 | exit $FAILCOUNT |