From 50e2b59370542097eb0efd79cc8d3d39ee52bf82 Mon Sep 17 00:00:00 2001 From: Colin McAllister Date: Wed, 17 Jul 2024 16:33:35 -0500 Subject: cut: fix "-s" flag to omit blank lines Using cut with the delimiter flag ("-d") with the "-s" flag to only output lines containing the delimiter will print blank lines. This is deviant behavior from cut provided by GNU Coreutils. Blank lines should be omitted if "-s" is used with "-d". This change introduces a somewhat naiive, yet efficient solution, where line length is checked before looping though bytes. If line length is zero and the "-s" flag is used, the code will jump to parsing the next line to avoid printing a newline character. function old new delta cut_main 1196 1185 -11 Signed-off-by: Colin McAllister Signed-off-by: Denys Vlasenko --- coreutils/cut.c | 20 +++++++++++++++----- testsuite/cut.tests | 9 +++++++++ 2 files changed, 24 insertions(+), 5 deletions(-) diff --git a/coreutils/cut.c b/coreutils/cut.c index d129f9b9d..b7fe11126 100644 --- a/coreutils/cut.c +++ b/coreutils/cut.c @@ -152,11 +152,18 @@ static void cut_file(FILE *file, const char *delim, const char *odelim, unsigned uu = 0, start = 0, end = 0, out = 0; int dcount = 0; + /* Blank line? Check -s (later check for -s does not catch empty lines) */ + if (linelen == 0) { + if (option_mask32 & CUT_OPT_SUPPRESS_FLGS) + goto next_line; + } + /* Loop through bytes, finding next delimiter */ for (;;) { /* End of current range? */ if (end == linelen || dcount > cut_lists[cl_pos].endpos) { - if (++cl_pos >= nlists) break; + if (++cl_pos >= nlists) + break; if (option_mask32 & CUT_OPT_NOSORT_FLGS) start = dcount = uu = 0; end = 0; @@ -175,15 +182,18 @@ static void cut_file(FILE *file, const char *delim, const char *odelim, if (shoe) { regmatch_t rr = {-1, -1}; - if (!regexec(®, line+uu, 1, &rr, REG_NOTBOL|REG_NOTEOL)) { + if (!regexec(®, line + uu, 1, &rr, REG_NOTBOL|REG_NOTEOL)) { end = uu + rr.rm_so; uu += rr.rm_eo; } else { uu = linelen; continue; } - } else if (line[end = uu++] != *delim) - continue; + } else { + end = uu++; + if (line[end] != *delim) + continue; + } /* Got delimiter. Loop if not yet within range. */ if (dcount++ < cut_lists[cl_pos].startpos) { @@ -192,7 +202,7 @@ static void cut_file(FILE *file, const char *delim, const char *odelim, } } if (end != start || !shoe) - printf("%s%.*s", out++ ? odelim : "", end-start, line + start); + printf("%s%.*s", out++ ? odelim : "", end - start, line + start); start = uu; if (!dcount) break; diff --git a/testsuite/cut.tests b/testsuite/cut.tests index 2458c019c..0b401bc00 100755 --- a/testsuite/cut.tests +++ b/testsuite/cut.tests @@ -65,6 +65,15 @@ testing "cut with -d -f( ) -s" "cut -d' ' -f3 -s input && echo yes" "yes\n" "$in testing "cut with -d -f(a) -s" "cut -da -f3 -s input" "n\nsium:Jim\n\ncion:Ed\n" "$input" "" testing "cut with -d -f(a) -s -n" "cut -da -f3 -s -n input" "n\nsium:Jim\n\ncion:Ed\n" "$input" "" +input="\ + +foo bar baz + +bing bong boop + +" +testing "cut with -d -s omits blank lines" "cut -d' ' -f2 -s input" "bar\nbong\n" "$input" "" + # substitute for awk optional FEATURE_CUT_REGEX testing "cut -DF" "cut -DF 2,7,5" \ -- cgit v1.2.3-55-g6feb