aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRon Yorston <rmy@pobox.com>2023-01-30 10:33:59 +0000
committerRon Yorston <rmy@pobox.com>2023-01-30 10:33:59 +0000
commit7a537eb71874cd6c8d0e9ab1f906e6466e7f793c (patch)
treeaf7b8c0218c5bc8b3e7c7aa5d86beca68350446f
parent33ae88c40cf56b53c06e627e0535c9c740767aa4 (diff)
downloadbusybox-w32-7a537eb71874cd6c8d0e9ab1f906e6466e7f793c.tar.gz
busybox-w32-7a537eb71874cd6c8d0e9ab1f906e6466e7f793c.tar.bz2
busybox-w32-7a537eb71874cd6c8d0e9ab1f906e6466e7f793c.zip
awk: CRLF handling
Previous efforts at handling DOS-style line endings in awk have included commits ee7e00dc5 and 1a3717342. The use of remove_cr() is unwise: - It's overzealous, removing all CRs, not just those in CRLF pairs. - Even if that were fixed awk reads input in chunks. There's a remote chance a CRLF might appear at a chunk boundary and be missed. remove_cr() will be fixed separately. In awk treat all data input as being in text mode. Skipping CRs in skip_spaces() is also flawed. Instead read scripts in text mode. Add a couple of test cases. One of these (awk backslash+CRLF eaten with no trace) fails without this patch.
-rw-r--r--editors/awk.c37
-rwxr-xr-xtestsuite/awk.tests14
2 files changed, 32 insertions, 19 deletions
diff --git a/editors/awk.c b/editors/awk.c
index cf9269c6f..2c1272554 100644
--- a/editors/awk.c
+++ b/editors/awk.c
@@ -826,11 +826,7 @@ static char *skip_spaces(char *p)
826 if (*p == '\\' && p[1] == '\n') { 826 if (*p == '\\' && p[1] == '\n') {
827 p++; 827 p++;
828 t_lineno++; 828 t_lineno++;
829#if !ENABLE_PLATFORM_MINGW32
830 } else if (*p != ' ' && *p != '\t') { 829 } else if (*p != ' ' && *p != '\t') {
831#else
832 } else if (*p != ' ' && *p != '\t' && *p != '\r') {
833#endif
834 break; 830 break;
835 } 831 }
836 p++; 832 p++;
@@ -2231,21 +2227,6 @@ static int ptest(node *pattern)
2231 return istrue(evaluate(pattern, &G.ptest__tmpvar)); 2227 return istrue(evaluate(pattern, &G.ptest__tmpvar));
2232} 2228}
2233 2229
2234#if ENABLE_PLATFORM_MINGW32
2235static ssize_t FAST_FUNC safe_read_strip_cr(int fd, void *buf, size_t count)
2236{
2237 ssize_t n;
2238
2239 do {
2240 n = safe_read(fd, buf, count);
2241 } while (n > 0 && (n=remove_cr((char *)buf, n)) == 0);
2242
2243 return n;
2244}
2245
2246#define safe_read safe_read_strip_cr
2247#endif
2248
2249/* read next record from stream rsm into a variable v */ 2230/* read next record from stream rsm into a variable v */
2250static int awk_getline(rstream *rsm, var *v) 2231static int awk_getline(rstream *rsm, var *v)
2251{ 2232{
@@ -2834,6 +2815,15 @@ static int is_assignment(const char *expr)
2834 return TRUE; 2815 return TRUE;
2835} 2816}
2836 2817
2818
2819#if ENABLE_PLATFORM_MINGW32
2820static void set_text_mode(FILE *f)
2821{
2822 if (f)
2823 _setmode(fileno(f), _O_TEXT);
2824}
2825#endif
2826
2837/* switch to next input file */ 2827/* switch to next input file */
2838static rstream *next_input_file(void) 2828static rstream *next_input_file(void)
2839{ 2829{
@@ -2862,6 +2852,9 @@ static rstream *next_input_file(void)
2862 break; 2852 break;
2863 } 2853 }
2864 } 2854 }
2855#if ENABLE_PLATFORM_MINGW32
2856 set_text_mode(rsm.F);
2857#endif
2865 2858
2866 files_happen = TRUE; 2859 files_happen = TRUE;
2867 setvar_s(intvar[FILENAME], fname); 2860 setvar_s(intvar[FILENAME], fname);
@@ -3242,6 +3235,9 @@ static var *evaluate(node *op, var *res)
3242 } else { 3235 } else {
3243 rsm->F = fopen_for_read(L.s); /* not xfopen! */ 3236 rsm->F = fopen_for_read(L.s); /* not xfopen! */
3244 } 3237 }
3238#if ENABLE_PLATFORM_MINGW32
3239 set_text_mode(rsm->F);
3240#endif
3245 } 3241 }
3246 } else { 3242 } else {
3247 if (!iF) 3243 if (!iF)
@@ -3695,6 +3691,9 @@ int awk_main(int argc UNUSED_PARAM, char **argv)
3695 3691
3696 g_progname = llist_pop(&list_f); 3692 g_progname = llist_pop(&list_f);
3697 fd = xopen_stdin(g_progname); 3693 fd = xopen_stdin(g_progname);
3694#if ENABLE_PLATFORM_MINGW32
3695 _setmode(fd, _O_TEXT);
3696#endif
3698 s = xmalloc_read(fd, NULL); /* it's NUL-terminated */ 3697 s = xmalloc_read(fd, NULL); /* it's NUL-terminated */
3699 close(fd); 3698 close(fd);
3700 parse_program(s); 3699 parse_program(s);
diff --git a/testsuite/awk.tests b/testsuite/awk.tests
index bbf0fbff1..11beb1b10 100755
--- a/testsuite/awk.tests
+++ b/testsuite/awk.tests
@@ -485,4 +485,18 @@ testing 'awk assign while test' \
485 "" \ 485 "" \
486 "foo" 486 "foo"
487 487
488optional PLATFORM_MINGW32
489testing 'awk match line ending' \
490 "awk '/world$/'" \
491 "world\n" \
492 "" \
493 "hello\r\nworld\r\n"
494
495testing 'awk backslash+CRLF eaten with no trace' \
496 "awk -f -" \
497 "Hello world\n" \
498 '' \
499 'BEGIN { printf "Hello\\\r\n world\\n" }\n'
500SKIP=
501
488exit $FAILCOUNT 502exit $FAILCOUNT