From 50e2b59370542097eb0efd79cc8d3d39ee52bf82 Mon Sep 17 00:00:00 2001 From: Colin McAllister Date: Wed, 17 Jul 2024 16:33:35 -0500 Subject: cut: fix "-s" flag to omit blank lines Using cut with the delimiter flag ("-d") with the "-s" flag to only output lines containing the delimiter will print blank lines. This is deviant behavior from cut provided by GNU Coreutils. Blank lines should be omitted if "-s" is used with "-d". This change introduces a somewhat naiive, yet efficient solution, where line length is checked before looping though bytes. If line length is zero and the "-s" flag is used, the code will jump to parsing the next line to avoid printing a newline character. function old new delta cut_main 1196 1185 -11 Signed-off-by: Colin McAllister Signed-off-by: Denys Vlasenko --- coreutils/cut.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) (limited to 'coreutils') diff --git a/coreutils/cut.c b/coreutils/cut.c index d129f9b9d..b7fe11126 100644 --- a/coreutils/cut.c +++ b/coreutils/cut.c @@ -152,11 +152,18 @@ static void cut_file(FILE *file, const char *delim, const char *odelim, unsigned uu = 0, start = 0, end = 0, out = 0; int dcount = 0; + /* Blank line? Check -s (later check for -s does not catch empty lines) */ + if (linelen == 0) { + if (option_mask32 & CUT_OPT_SUPPRESS_FLGS) + goto next_line; + } + /* Loop through bytes, finding next delimiter */ for (;;) { /* End of current range? */ if (end == linelen || dcount > cut_lists[cl_pos].endpos) { - if (++cl_pos >= nlists) break; + if (++cl_pos >= nlists) + break; if (option_mask32 & CUT_OPT_NOSORT_FLGS) start = dcount = uu = 0; end = 0; @@ -175,15 +182,18 @@ static void cut_file(FILE *file, const char *delim, const char *odelim, if (shoe) { regmatch_t rr = {-1, -1}; - if (!regexec(®, line+uu, 1, &rr, REG_NOTBOL|REG_NOTEOL)) { + if (!regexec(®, line + uu, 1, &rr, REG_NOTBOL|REG_NOTEOL)) { end = uu + rr.rm_so; uu += rr.rm_eo; } else { uu = linelen; continue; } - } else if (line[end = uu++] != *delim) - continue; + } else { + end = uu++; + if (line[end] != *delim) + continue; + } /* Got delimiter. Loop if not yet within range. */ if (dcount++ < cut_lists[cl_pos].startpos) { @@ -192,7 +202,7 @@ static void cut_file(FILE *file, const char *delim, const char *odelim, } } if (end != start || !shoe) - printf("%s%.*s", out++ ? odelim : "", end-start, line + start); + printf("%s%.*s", out++ ? odelim : "", end - start, line + start); start = uu; if (!dcount) break; -- cgit v1.2.3-55-g6feb From f02041441344389b05d10fe6ba8759b6670b8e10 Mon Sep 17 00:00:00 2001 From: Ron Yorston Date: Sun, 3 Nov 2024 12:47:27 +0000 Subject: cut: improve detection of invalid ranges Commit 0068ce2fa (cut: add toybox-compatible options -O OUTSEP, -D, -F LIST) added detection of reversed ranges. Further improvements are possible. - The test for reversed ranges compared the start after it had been decremented with the end before decrement. It thus missed ranges of the form 2-1. - Zero isn't a valid start value for a range. (Nor is it a valid end value, but that's caught by the test for a reversed range.) - The code if (!*ltok) e = INT_MAX; duplicates a check that's already been made. - Display the actual range in the error message to make it easier to find which range was at fault. function old new delta .rodata 100273 100287 +14 cut_main 1239 1237 -2 ------------------------------------------------------------------------------ (add/remove: 0/0 grow/shrink: 1/1 up/down: 14/-2) Total: 12 bytes Signed-off-by: Ron Yorston Signed-off-by: Denys Vlasenko --- coreutils/cut.c | 19 ++++++++----------- testsuite/cut.tests | 5 ++++- 2 files changed, 12 insertions(+), 12 deletions(-) (limited to 'coreutils') diff --git a/coreutils/cut.c b/coreutils/cut.c index b7fe11126..f68bbbad5 100644 --- a/coreutils/cut.c +++ b/coreutils/cut.c @@ -278,29 +278,26 @@ int cut_main(int argc UNUSED_PARAM, char **argv) if (!ntok[0]) { s = 0; } else { - s = xatoi_positive(ntok); /* account for the fact that arrays are zero based, while * the user expects the first char on the line to be char #1 */ - if (s != 0) - s--; + s = xatoi_positive(ntok) - 1; } /* get the end pos */ if (ltok == NULL) { e = s; } else if (!ltok[0]) { + /* if the user specified no end position, + * that means "til the end of the line" */ e = INT_MAX; } else { - e = xatoi_positive(ltok); - /* if the user specified and end position of 0, - * that means "til the end of the line" */ - if (!*ltok) - e = INT_MAX; - else if (e < s) - bb_error_msg_and_die("%d<%d", e, s); - e--; /* again, arrays are zero based, lines are 1 based */ + /* again, arrays are zero based, lines are 1 based */ + e = xatoi_positive(ltok) - 1; } + if (s < 0 || e < s) + bb_error_msg_and_die("invalid range %s-%s", ntok, ltok ?: ntok); + /* add the new list */ cut_lists = xrealloc_vector(cut_lists, 4, nlists); /* NB: startpos is always >= 0 */ diff --git a/testsuite/cut.tests b/testsuite/cut.tests index 0b401bc00..c335f824b 100755 --- a/testsuite/cut.tests +++ b/testsuite/cut.tests @@ -31,7 +31,10 @@ testing "-b encapsulated" "cut -b 3-8,4-6 input" "e:two:\npha:be\ne quic\n" \ #testing "cut -bO overlaps" \ # "cut --output-delimiter ' ' -b 1-3,2-5,7-9,9-10 input" \ # "one:t o:th\nalpha beta\nthe q ick \n" "$abc" "" -testing "cut high-low error" "cut -b 8-3 abc.txt 2>/dev/null || echo err" "err\n" \ +testing "cut high-low error" "cut -b 8-3 input 2>/dev/null || echo err" "err\n" \ + "$abc" "" + +testing "cut -b 2-1 error" "cut -b 2-1 input 2>/dev/null || echo err" "err\n" \ "$abc" "" testing "cut -c a-b" "cut -c 4-10 input" ":two:th\nha:beta\n quick \n" "$abc" "" -- cgit v1.2.3-55-g6feb From 73e9d25d7503896e94b5c00093a77b33d1a17a0d Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Tue, 10 Dec 2024 01:38:31 +0100 Subject: cut: simplify OPT_ names, eliminate one variable Signed-off-by: Denys Vlasenko --- coreutils/cut.c | 51 +++++++++++++++++++++++++++------------------------ 1 file changed, 27 insertions(+), 24 deletions(-) (limited to 'coreutils') diff --git a/coreutils/cut.c b/coreutils/cut.c index f68bbbad5..48f3656b4 100644 --- a/coreutils/cut.c +++ b/coreutils/cut.c @@ -65,14 +65,14 @@ typedef struct { int rm_eo, rm_so; } regmatch_t; /* option vars */ #define OPT_STR "b:c:f:d:O:sD"IF_FEATURE_CUT_REGEX("F:")"n" -#define CUT_OPT_BYTE_FLGS (1 << 0) -#define CUT_OPT_CHAR_FLGS (1 << 1) -#define CUT_OPT_FIELDS_FLGS (1 << 2) -#define CUT_OPT_DELIM_FLGS (1 << 3) -#define CUT_OPT_ODELIM_FLGS (1 << 4) -#define CUT_OPT_SUPPRESS_FLGS (1 << 5) -#define CUT_OPT_NOSORT_FLGS (1 << 6) -#define CUT_OPT_REGEX_FLGS ((1 << 7) * ENABLE_FEATURE_CUT_REGEX) +#define OPT_BYTE (1 << 0) +#define OPT_CHAR (1 << 1) +#define OPT_FIELDS (1 << 2) +#define OPT_DELIM (1 << 3) +#define OPT_ODELIM (1 << 4) +#define OPT_SUPPRESS (1 << 5) +#define OPT_NOSORT (1 << 6) +#define OPT_REGEX ((1 << 7) * ENABLE_FEATURE_CUT_REGEX) struct cut_list { int startpos; @@ -88,12 +88,14 @@ static int cmpfunc(const void *a, const void *b) static void cut_file(FILE *file, const char *delim, const char *odelim, const struct cut_list *cut_lists, unsigned nlists) { +#define opt_REGEX (option_mask32 & OPT_REGEX) char *line; unsigned linenum = 0; /* keep these zero-based to be consistent */ regex_t reg; - int spos, shoe = option_mask32 & CUT_OPT_REGEX_FLGS; + int spos; - if (shoe) xregcomp(®, delim, REG_EXTENDED); + if (opt_REGEX) + xregcomp(®, delim, REG_EXTENDED); /* go through every line in the file */ while ((line = xmalloc_fgetline(file)) != NULL) { @@ -105,7 +107,7 @@ static void cut_file(FILE *file, const char *delim, const char *odelim, unsigned cl_pos = 0; /* cut based on chars/bytes XXX: only works when sizeof(char) == byte */ - if (option_mask32 & (CUT_OPT_CHAR_FLGS | CUT_OPT_BYTE_FLGS)) { + if (option_mask32 & (OPT_CHAR | OPT_BYTE)) { /* print the chars specified in each cut list */ for (; cl_pos < nlists; cl_pos++) { for (spos = cut_lists[cl_pos].startpos; spos < linelen;) { @@ -154,7 +156,7 @@ static void cut_file(FILE *file, const char *delim, const char *odelim, /* Blank line? Check -s (later check for -s does not catch empty lines) */ if (linelen == 0) { - if (option_mask32 & CUT_OPT_SUPPRESS_FLGS) + if (option_mask32 & OPT_SUPPRESS) goto next_line; } @@ -164,22 +166,22 @@ static void cut_file(FILE *file, const char *delim, const char *odelim, if (end == linelen || dcount > cut_lists[cl_pos].endpos) { if (++cl_pos >= nlists) break; - if (option_mask32 & CUT_OPT_NOSORT_FLGS) + if (option_mask32 & OPT_NOSORT) start = dcount = uu = 0; end = 0; } /* End of current line? */ if (uu == linelen) { /* If we've seen no delimiters, check -s */ - if (!cl_pos && !dcount && !shoe) { - if (option_mask32 & CUT_OPT_SUPPRESS_FLGS) + if (!cl_pos && !dcount && !opt_REGEX) { + if (option_mask32 & OPT_SUPPRESS) goto next_line; } else if (dcount < cut_lists[cl_pos].startpos) start = linelen; end = linelen; } else { /* Find next delimiter */ - if (shoe) { + if (opt_REGEX) { regmatch_t rr = {-1, -1}; if (!regexec(®, line + uu, 1, &rr, REG_NOTBOL|REG_NOTEOL)) { @@ -201,7 +203,7 @@ static void cut_file(FILE *file, const char *delim, const char *odelim, continue; } } - if (end != start || !shoe) + if (end != start || !opt_REGEX) printf("%s%.*s", out++ ? odelim : "", end - start, line + start); start = uu; if (!dcount) @@ -215,6 +217,7 @@ static void cut_file(FILE *file, const char *delim, const char *odelim, free(printed); free(orig_line); } +#undef opt_REGEX } int cut_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE; @@ -235,23 +238,23 @@ int cut_main(int argc UNUSED_PARAM, char **argv) &sopt, &sopt, &sopt, &delim, &odelim IF_FEATURE_CUT_REGEX(, &sopt) ); if (!delim || !*delim) - delim = (opt & CUT_OPT_REGEX_FLGS) ? "[[:space:]]+" : "\t"; - if (!odelim) odelim = (opt & CUT_OPT_REGEX_FLGS) ? " " : delim; + delim = (opt & OPT_REGEX) ? "[[:space:]]+" : "\t"; + if (!odelim) odelim = (opt & OPT_REGEX) ? " " : delim; // argc -= optind; argv += optind; - if (!(opt & (CUT_OPT_BYTE_FLGS | CUT_OPT_CHAR_FLGS | CUT_OPT_FIELDS_FLGS | CUT_OPT_REGEX_FLGS))) + if (!(opt & (OPT_BYTE | OPT_CHAR | OPT_FIELDS | OPT_REGEX))) bb_simple_error_msg_and_die("expected a list of bytes, characters, or fields"); /* non-field (char or byte) cutting has some special handling */ - if (!(opt & (CUT_OPT_FIELDS_FLGS|CUT_OPT_REGEX_FLGS))) { + if (!(opt & (OPT_FIELDS|OPT_REGEX))) { static const char _op_on_field[] ALIGN1 = " only when operating on fields"; - if (opt & CUT_OPT_SUPPRESS_FLGS) { + if (opt & OPT_SUPPRESS) { bb_error_msg_and_die ("suppressing non-delimited lines makes sense%s", _op_on_field); } - if (opt & CUT_OPT_DELIM_FLGS) { + if (opt & OPT_DELIM) { bb_error_msg_and_die ("a delimiter may be specified%s", _op_on_field); } @@ -313,7 +316,7 @@ int cut_main(int argc UNUSED_PARAM, char **argv) /* now that the lists are parsed, we need to sort them to make life * easier on us when it comes time to print the chars / fields / lines */ - if (!(opt & CUT_OPT_NOSORT_FLGS)) + if (!(opt & OPT_NOSORT)) qsort(cut_lists, nlists, sizeof(cut_lists[0]), cmpfunc); } -- cgit v1.2.3-55-g6feb From ad12ab439b5d0383ac4ebe41479b694df0b2e70d Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Tue, 10 Dec 2024 01:44:13 +0100 Subject: cut: localize 'spos' variable, convert !NUMVAR to NUMVAR == 0 This imporves readability Signed-off-by: Denys Vlasenko --- coreutils/cut.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'coreutils') diff --git a/coreutils/cut.c b/coreutils/cut.c index 48f3656b4..e81c6fecb 100644 --- a/coreutils/cut.c +++ b/coreutils/cut.c @@ -92,7 +92,6 @@ static void cut_file(FILE *file, const char *delim, const char *odelim, char *line; unsigned linenum = 0; /* keep these zero-based to be consistent */ regex_t reg; - int spos; if (opt_REGEX) xregcomp(®, delim, REG_EXTENDED); @@ -110,6 +109,7 @@ static void cut_file(FILE *file, const char *delim, const char *odelim, if (option_mask32 & (OPT_CHAR | OPT_BYTE)) { /* print the chars specified in each cut list */ for (; cl_pos < nlists; cl_pos++) { + int spos; for (spos = cut_lists[cl_pos].startpos; spos < linelen;) { if (!printed[spos]) { printed[spos] = 'X'; @@ -121,7 +121,7 @@ static void cut_file(FILE *file, const char *delim, const char *odelim, } } } else if (*delim == '\n') { /* cut by lines */ - spos = cut_lists[cl_pos].startpos; + int spos = cut_lists[cl_pos].startpos; /* get out if we have no more lists to process or if the lines * are lower than what we're interested in */ @@ -173,7 +173,7 @@ static void cut_file(FILE *file, const char *delim, const char *odelim, /* End of current line? */ if (uu == linelen) { /* If we've seen no delimiters, check -s */ - if (!cl_pos && !dcount && !opt_REGEX) { + if (cl_pos == 0 && dcount == 0 && !opt_REGEX) { if (option_mask32 & OPT_SUPPRESS) goto next_line; } else if (dcount < cut_lists[cl_pos].startpos) @@ -206,7 +206,7 @@ static void cut_file(FILE *file, const char *delim, const char *odelim, if (end != start || !opt_REGEX) printf("%s%.*s", out++ ? odelim : "", end - start, line + start); start = uu; - if (!dcount) + if (dcount == 0) break; } } @@ -239,7 +239,8 @@ int cut_main(int argc UNUSED_PARAM, char **argv) ); if (!delim || !*delim) delim = (opt & OPT_REGEX) ? "[[:space:]]+" : "\t"; - if (!odelim) odelim = (opt & OPT_REGEX) ? " " : delim; + if (!odelim) + odelim = (opt & OPT_REGEX) ? " " : delim; // argc -= optind; argv += optind; -- cgit v1.2.3-55-g6feb From 478ac90f2c55cf66c13aa9805bde69bc705647c3 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Tue, 10 Dec 2024 01:50:58 +0100 Subject: cut: allocate "printed" only if OPT_CHAR or OPT_BYTE Signed-off-by: Denys Vlasenko --- coreutils/cut.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'coreutils') diff --git a/coreutils/cut.c b/coreutils/cut.c index e81c6fecb..ca2408f97 100644 --- a/coreutils/cut.c +++ b/coreutils/cut.c @@ -101,12 +101,13 @@ static void cut_file(FILE *file, const char *delim, const char *odelim, /* set up a list so we can keep track of what's been printed */ int linelen = strlen(line); - char *printed = xzalloc(linelen + 1); char *orig_line = line; unsigned cl_pos = 0; /* cut based on chars/bytes XXX: only works when sizeof(char) == byte */ if (option_mask32 & (OPT_CHAR | OPT_BYTE)) { + char *printed = xzalloc(linelen + 1); + /* print the chars specified in each cut list */ for (; cl_pos < nlists; cl_pos++) { int spos; @@ -120,6 +121,7 @@ static void cut_file(FILE *file, const char *delim, const char *odelim, } } } + free(printed); } else if (*delim == '\n') { /* cut by lines */ int spos = cut_lists[cl_pos].startpos; @@ -214,9 +216,8 @@ static void cut_file(FILE *file, const char *delim, const char *odelim, putchar('\n'); next_line: linenum++; - free(printed); free(orig_line); - } + } /* while (got line) */ #undef opt_REGEX } -- cgit v1.2.3-55-g6feb From 808155ebf3e731538643e2fa840daeb76aac5435 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Tue, 10 Dec 2024 01:55:25 +0100 Subject: cut: "orig_line" is redundant, remove it Signed-off-by: Denys Vlasenko --- coreutils/cut.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'coreutils') diff --git a/coreutils/cut.c b/coreutils/cut.c index ca2408f97..72a6f2b80 100644 --- a/coreutils/cut.c +++ b/coreutils/cut.c @@ -101,7 +101,6 @@ static void cut_file(FILE *file, const char *delim, const char *odelim, /* set up a list so we can keep track of what's been printed */ int linelen = strlen(line); - char *orig_line = line; unsigned cl_pos = 0; /* cut based on chars/bytes XXX: only works when sizeof(char) == byte */ @@ -216,7 +215,7 @@ static void cut_file(FILE *file, const char *delim, const char *odelim, putchar('\n'); next_line: linenum++; - free(orig_line); + free(line); } /* while (got line) */ #undef opt_REGEX } -- cgit v1.2.3-55-g6feb From 1220b1519d6bb46b2aba0559969bac7377f15794 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Tue, 10 Dec 2024 02:15:25 +0100 Subject: cut: rename some variables to hopefully better names Signed-off-by: Denys Vlasenko --- coreutils/cut.c | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) (limited to 'coreutils') diff --git a/coreutils/cut.c b/coreutils/cut.c index 72a6f2b80..0fbeff8ea 100644 --- a/coreutils/cut.c +++ b/coreutils/cut.c @@ -152,7 +152,8 @@ static void cut_file(FILE *file, const char *delim, const char *odelim, puts(line); goto next_line; } else { /* cut by fields */ - unsigned uu = 0, start = 0, end = 0, out = 0; + unsigned next = 0, start = 0, end = 0; + int first_print = 1; int dcount = 0; /* Blank line? Check -s (later check for -s does not catch empty lines) */ @@ -168,11 +169,11 @@ static void cut_file(FILE *file, const char *delim, const char *odelim, if (++cl_pos >= nlists) break; if (option_mask32 & OPT_NOSORT) - start = dcount = uu = 0; + start = dcount = next = 0; end = 0; } /* End of current line? */ - if (uu == linelen) { + if (next == linelen) { /* If we've seen no delimiters, check -s */ if (cl_pos == 0 && dcount == 0 && !opt_REGEX) { if (option_mask32 & OPT_SUPPRESS) @@ -185,31 +186,36 @@ static void cut_file(FILE *file, const char *delim, const char *odelim, if (opt_REGEX) { regmatch_t rr = {-1, -1}; - if (!regexec(®, line + uu, 1, &rr, REG_NOTBOL|REG_NOTEOL)) { - end = uu + rr.rm_so; - uu += rr.rm_eo; + if (!regexec(®, line + next, 1, &rr, REG_NOTBOL|REG_NOTEOL)) { + end = next + rr.rm_so; + next += rr.rm_eo; } else { - uu = linelen; + next = linelen; continue; } } else { - end = uu++; + end = next++; if (line[end] != *delim) continue; } /* Got delimiter. Loop if not yet within range. */ if (dcount++ < cut_lists[cl_pos].startpos) { - start = uu; + start = next; continue; } } - if (end != start || !opt_REGEX) - printf("%s%.*s", out++ ? odelim : "", end - start, line + start); - start = uu; + if (end != start || !opt_REGEX) { + if (first_print) { + first_print = 0; + printf("%.*s", end - start, line + start); + } else + printf("%s%.*s", odelim, end - start, line + start); + } + start = next; if (dcount == 0) break; - } + } /* byte loop */ } /* if we printed anything, finish with newline */ putchar('\n'); -- cgit v1.2.3-55-g6feb From 55fc6a18da068a67b1854e4ca6fbb8d92e3af745 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Tue, 10 Dec 2024 02:29:48 +0100 Subject: cut: rename "cut_lists" to "cut_list" Signed-off-by: Denys Vlasenko --- coreutils/cut.c | 39 ++++++++++++++++++++------------------- 1 file changed, 20 insertions(+), 19 deletions(-) (limited to 'coreutils') diff --git a/coreutils/cut.c b/coreutils/cut.c index 0fbeff8ea..33aeff6ea 100644 --- a/coreutils/cut.c +++ b/coreutils/cut.c @@ -86,7 +86,7 @@ static int cmpfunc(const void *a, const void *b) } static void cut_file(FILE *file, const char *delim, const char *odelim, - const struct cut_list *cut_lists, unsigned nlists) + const struct cut_list *cut_list, unsigned nlists) { #define opt_REGEX (option_mask32 & OPT_REGEX) char *line; @@ -110,19 +110,19 @@ static void cut_file(FILE *file, const char *delim, const char *odelim, /* print the chars specified in each cut list */ for (; cl_pos < nlists; cl_pos++) { int spos; - for (spos = cut_lists[cl_pos].startpos; spos < linelen;) { + for (spos = cut_list[cl_pos].startpos; spos < linelen;) { if (!printed[spos]) { printed[spos] = 'X'; putchar(line[spos]); } - if (++spos > cut_lists[cl_pos].endpos) { + if (++spos > cut_list[cl_pos].endpos) { break; } } } free(printed); } else if (*delim == '\n') { /* cut by lines */ - int spos = cut_lists[cl_pos].startpos; + int spos = cut_list[cl_pos].startpos; /* get out if we have no more lists to process or if the lines * are lower than what we're interested in */ @@ -134,12 +134,12 @@ static void cut_file(FILE *file, const char *delim, const char *odelim, while (spos < (int)linenum) { spos++; /* go to the next list if we're at the end of this one */ - if (spos > cut_lists[cl_pos].endpos) { + if (spos > cut_list[cl_pos].endpos) { cl_pos++; /* get out if there's no more lists to process */ if (cl_pos >= nlists) goto next_line; - spos = cut_lists[cl_pos].startpos; + spos = cut_list[cl_pos].startpos; /* get out if the current line is lower than the one * we just became interested in */ if ((int)linenum < spos) @@ -153,8 +153,8 @@ static void cut_file(FILE *file, const char *delim, const char *odelim, goto next_line; } else { /* cut by fields */ unsigned next = 0, start = 0, end = 0; + int dcount = 0; /* Nth delimiter we saw (0 - didn't see any yet) */ int first_print = 1; - int dcount = 0; /* Blank line? Check -s (later check for -s does not catch empty lines) */ if (linelen == 0) { @@ -165,12 +165,12 @@ static void cut_file(FILE *file, const char *delim, const char *odelim, /* Loop through bytes, finding next delimiter */ for (;;) { /* End of current range? */ - if (end == linelen || dcount > cut_lists[cl_pos].endpos) { + if (end == linelen || dcount > cut_list[cl_pos].endpos) { if (++cl_pos >= nlists) break; if (option_mask32 & OPT_NOSORT) start = dcount = next = 0; - end = 0; + end = 0; /* (why?) */ } /* End of current line? */ if (next == linelen) { @@ -178,8 +178,9 @@ static void cut_file(FILE *file, const char *delim, const char *odelim, if (cl_pos == 0 && dcount == 0 && !opt_REGEX) { if (option_mask32 & OPT_SUPPRESS) goto next_line; - } else if (dcount < cut_lists[cl_pos].startpos) - start = linelen; + /* else: will print entire line */ + } else if (dcount < cut_list[cl_pos].startpos) + start = linelen; /* do not print */ end = linelen; } else { /* Find next delimiter */ @@ -200,7 +201,7 @@ static void cut_file(FILE *file, const char *delim, const char *odelim, } /* Got delimiter. Loop if not yet within range. */ - if (dcount++ < cut_lists[cl_pos].startpos) { + if (dcount++ < cut_list[cl_pos].startpos) { start = next; continue; } @@ -230,7 +231,7 @@ int cut_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE; int cut_main(int argc UNUSED_PARAM, char **argv) { /* growable array holding a series of lists */ - struct cut_list *cut_lists = NULL; + struct cut_list *cut_list = NULL; unsigned nlists = 0; /* number of elements in above list */ char *sopt, *ltok; const char *delim = NULL; @@ -309,10 +310,10 @@ int cut_main(int argc UNUSED_PARAM, char **argv) bb_error_msg_and_die("invalid range %s-%s", ntok, ltok ?: ntok); /* add the new list */ - cut_lists = xrealloc_vector(cut_lists, 4, nlists); + cut_list = xrealloc_vector(cut_list, 4, nlists); /* NB: startpos is always >= 0 */ - cut_lists[nlists].startpos = s; - cut_lists[nlists].endpos = e; + cut_list[nlists].startpos = s; + cut_list[nlists].endpos = e; nlists++; } @@ -324,7 +325,7 @@ int cut_main(int argc UNUSED_PARAM, char **argv) * easier on us when it comes time to print the chars / fields / lines */ if (!(opt & OPT_NOSORT)) - qsort(cut_lists, nlists, sizeof(cut_lists[0]), cmpfunc); + qsort(cut_list, nlists, sizeof(cut_list[0]), cmpfunc); } { @@ -339,12 +340,12 @@ int cut_main(int argc UNUSED_PARAM, char **argv) retval = EXIT_FAILURE; continue; } - cut_file(file, delim, odelim, cut_lists, nlists); + cut_file(file, delim, odelim, cut_list, nlists); fclose_if_not_stdin(file); } while (*++argv); if (ENABLE_FEATURE_CLEAN_UP) - free(cut_lists); + free(cut_list); fflush_stdout_and_exit(retval); } } -- cgit v1.2.3-55-g6feb From 470f00955212368cb688832e2e4b1fdd165e9ec6 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Tue, 10 Dec 2024 02:36:59 +0100 Subject: cut: with -F, do not regcomp() pattern for every file function old new delta cut_main 1218 1228 +10 Signed-off-by: Denys Vlasenko --- coreutils/cut.c | 42 ++++++++++++++++++++++++------------------ 1 file changed, 24 insertions(+), 18 deletions(-) (limited to 'coreutils') diff --git a/coreutils/cut.c b/coreutils/cut.c index 33aeff6ea..1eb4968d9 100644 --- a/coreutils/cut.c +++ b/coreutils/cut.c @@ -34,6 +34,7 @@ //usage: "\n -c LIST Output only characters from LIST" //usage: "\n -d SEP Field delimiter for input (default -f TAB, -F run of whitespace)" //usage: "\n -O SEP Field delimeter for output (default = -d for -f, one space for -F)" +//TODO: --output-delimiter=SEP //usage: "\n -D Don't sort/collate sections or match -fF lines without delimeter" //usage: "\n -f LIST Print only these fields (-d is single char)" //usage: IF_FEATURE_CUT_REGEX( @@ -53,11 +54,6 @@ #if ENABLE_FEATURE_CUT_REGEX #include "xregex.h" -#else -#define regex_t int -typedef struct { int rm_eo, rm_so; } regmatch_t; -#define xregcomp(x, ...) *(x) = 0 -#define regexec(...) 0 #endif /* This is a NOEXEC applet. Be very careful! */ @@ -74,6 +70,8 @@ typedef struct { int rm_eo, rm_so; } regmatch_t; #define OPT_NOSORT (1 << 6) #define OPT_REGEX ((1 << 7) * ENABLE_FEATURE_CUT_REGEX) +#define opt_REGEX (option_mask32 & OPT_REGEX) + struct cut_list { int startpos; int endpos; @@ -88,13 +86,8 @@ static int cmpfunc(const void *a, const void *b) static void cut_file(FILE *file, const char *delim, const char *odelim, const struct cut_list *cut_list, unsigned nlists) { -#define opt_REGEX (option_mask32 & OPT_REGEX) char *line; unsigned linenum = 0; /* keep these zero-based to be consistent */ - regex_t reg; - - if (opt_REGEX) - xregcomp(®, delim, REG_EXTENDED); /* go through every line in the file */ while ((line = xmalloc_fgetline(file)) != NULL) { @@ -121,7 +114,7 @@ static void cut_file(FILE *file, const char *delim, const char *odelim, } } free(printed); - } else if (*delim == '\n') { /* cut by lines */ + } else if (!opt_REGEX && *delim == '\n') { /* cut by lines */ int spos = cut_list[cl_pos].startpos; /* get out if we have no more lists to process or if the lines @@ -181,20 +174,24 @@ static void cut_file(FILE *file, const char *delim, const char *odelim, /* else: will print entire line */ } else if (dcount < cut_list[cl_pos].startpos) start = linelen; /* do not print */ - end = linelen; + end = linelen; /* print up to end */ } else { /* Find next delimiter */ +#if ENABLE_FEATURE_CUT_REGEX if (opt_REGEX) { regmatch_t rr = {-1, -1}; + regex_t *reg = (void*) delim; - if (!regexec(®, line + next, 1, &rr, REG_NOTBOL|REG_NOTEOL)) { - end = next + rr.rm_so; - next += rr.rm_eo; - } else { + if (regexec(reg, line + next, 1, &rr, REG_NOTBOL|REG_NOTEOL) != 0) { + /* not found, go to "end of line" logic */ next = linelen; continue; } - } else { + end = next + rr.rm_so; + next += rr.rm_eo; + } else +#endif + { end = next++; if (line[end] != *delim) continue; @@ -224,7 +221,6 @@ static void cut_file(FILE *file, const char *delim, const char *odelim, linenum++; free(line); } /* while (got line) */ -#undef opt_REGEX } int cut_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE; @@ -237,6 +233,9 @@ int cut_main(int argc UNUSED_PARAM, char **argv) const char *delim = NULL; const char *odelim = NULL; unsigned opt; +#if ENABLE_FEATURE_CUT_REGEX + regex_t reg; +#endif #define ARG "bcf"IF_FEATURE_CUT_REGEX("F") opt = getopt32(argv, "^" @@ -328,6 +327,13 @@ int cut_main(int argc UNUSED_PARAM, char **argv) qsort(cut_list, nlists, sizeof(cut_list[0]), cmpfunc); } +#if ENABLE_FEATURE_CUT_REGEX + if (opt & OPT_REGEX) { + xregcomp(®, delim, REG_EXTENDED); + delim = (void*) ® + } +#endif + { exitcode_t retval = EXIT_SUCCESS; -- cgit v1.2.3-55-g6feb From 103139d0e6e97c188a647adeb5c71eb39c308c26 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Tue, 10 Dec 2024 17:21:30 +0100 Subject: cut: fix "echo 1.2 | cut -d. -f1,3" (print "1", not "1.") function old new delta cut_main 1228 1201 -27 Signed-off-by: Denys Vlasenko --- coreutils/cut.c | 23 ++++++++++++++++++----- testsuite/cut.tests | 11 +++++++++-- 2 files changed, 27 insertions(+), 7 deletions(-) (limited to 'coreutils') diff --git a/coreutils/cut.c b/coreutils/cut.c index 1eb4968d9..2511befc8 100644 --- a/coreutils/cut.c +++ b/coreutils/cut.c @@ -114,7 +114,8 @@ static void cut_file(FILE *file, const char *delim, const char *odelim, } } free(printed); - } else if (!opt_REGEX && *delim == '\n') { /* cut by lines */ + /* Cut by lines */ + } else if (!opt_REGEX && *delim == '\n') { int spos = cut_list[cl_pos].startpos; /* get out if we have no more lists to process or if the lines @@ -144,7 +145,8 @@ static void cut_file(FILE *file, const char *delim, const char *odelim, * looking for, so print it */ puts(line); goto next_line; - } else { /* cut by fields */ + /* Cut by fields */ + } else { unsigned next = 0, start = 0, end = 0; int dcount = 0; /* Nth delimiter we saw (0 - didn't see any yet) */ int first_print = 1; @@ -159,22 +161,33 @@ static void cut_file(FILE *file, const char *delim, const char *odelim, for (;;) { /* End of current range? */ if (end == linelen || dcount > cut_list[cl_pos].endpos) { + end_of_range: if (++cl_pos >= nlists) break; if (option_mask32 & OPT_NOSORT) start = dcount = next = 0; end = 0; /* (why?) */ + //bb_error_msg("End of current range"); } /* End of current line? */ if (next == linelen) { + end = linelen; /* print up to end */ /* If we've seen no delimiters, check -s */ if (cl_pos == 0 && dcount == 0 && !opt_REGEX) { if (option_mask32 & OPT_SUPPRESS) goto next_line; /* else: will print entire line */ - } else if (dcount < cut_list[cl_pos].startpos) - start = linelen; /* do not print */ - end = linelen; /* print up to end */ + } else if (dcount < cut_list[cl_pos].startpos) { + /* echo 1.2 | cut -d. -f1,3: prints "1", not "1." */ + //break; + /* ^^^ this fails a case with -D: + * echo 1 2 | cut -DF 1,3,2: + * do not end line processing when didn't find field#3 + */ + //if (option_mask32 & OPT_NOSORT) - no, just do it always + goto end_of_range; + } + //bb_error_msg("End of current line: s:%d e:%d", start, end); } else { /* Find next delimiter */ #if ENABLE_FEATURE_CUT_REGEX diff --git a/testsuite/cut.tests b/testsuite/cut.tests index c335f824b..46ef545d7 100755 --- a/testsuite/cut.tests +++ b/testsuite/cut.tests @@ -90,7 +90,14 @@ Subcalifragilisticexpialidocious. Auntie Em: Hate you, hate Kansas. Took the dog. Dorothy." SKIP= -testing "cut empty field" "cut -d ':' -f 1-3" "a::b\n" "" "a::b\n" -testing "cut empty field 2" "cut -d ':' -f 3-5" "b::c\n" "" "a::b::c:d\n" +testing "cut empty field" "cut -d ':' -f 1-3" \ + "a::b\n" \ + "" "a::b\n" +testing "cut empty field 2" "cut -d ':' -f 3-5" \ + "b::c\n" \ + "" "a::b::c:d\n" +testing "cut non-existing field" "cut -d ':' -f1,3" \ + "1\n" \ + "" "1:\n" exit $FAILCOUNT -- cgit v1.2.3-55-g6feb From 38a3c0ad324359904ba3a97b097d8f94ef0cd863 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Tue, 10 Dec 2024 17:50:29 +0100 Subject: cut: comment out code which seems to be not needed Signed-off-by: Denys Vlasenko --- coreutils/cut.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'coreutils') diff --git a/coreutils/cut.c b/coreutils/cut.c index 2511befc8..54f74fc7e 100644 --- a/coreutils/cut.c +++ b/coreutils/cut.c @@ -33,14 +33,14 @@ //usage: "\n -b LIST Output only bytes from LIST" //usage: "\n -c LIST Output only characters from LIST" //usage: "\n -d SEP Field delimiter for input (default -f TAB, -F run of whitespace)" +//usage: "\n -s Drop lines with no delimiter" //usage: "\n -O SEP Field delimeter for output (default = -d for -f, one space for -F)" //TODO: --output-delimiter=SEP -//usage: "\n -D Don't sort/collate sections or match -fF lines without delimeter" //usage: "\n -f LIST Print only these fields (-d is single char)" //usage: IF_FEATURE_CUT_REGEX( //usage: "\n -F LIST Print only these fields (-d is regex)" //usage: ) -//usage: "\n -s Output only lines containing delimiter" +//usage: "\n -D Don't sort/collate sections or match -fF lines without delimeter" //usage: "\n -n Ignored" //(manpage:-n with -b: don't split multibyte characters) //usage: @@ -148,7 +148,7 @@ static void cut_file(FILE *file, const char *delim, const char *odelim, /* Cut by fields */ } else { unsigned next = 0, start = 0, end = 0; - int dcount = 0; /* Nth delimiter we saw (0 - didn't see any yet) */ + int dcount = 0; /* we saw Nth delimiter (0 - didn't see any yet) */ int first_print = 1; /* Blank line? Check -s (later check for -s does not catch empty lines) */ @@ -209,9 +209,9 @@ static void cut_file(FILE *file, const char *delim, const char *odelim, if (line[end] != *delim) continue; } - - /* Got delimiter. Loop if not yet within range. */ + /* Got delimiter */ if (dcount++ < cut_list[cl_pos].startpos) { + /* Not yet within range - loop */ start = next; continue; } @@ -224,8 +224,8 @@ static void cut_file(FILE *file, const char *delim, const char *odelim, printf("%s%.*s", odelim, end - start, line + start); } start = next; - if (dcount == 0) - break; + //if (dcount == 0) + // break; - why? } /* byte loop */ } /* if we printed anything, finish with newline */ -- cgit v1.2.3-55-g6feb From b402b13fab027185e460a4020ee0a0b897bba441 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Tue, 10 Dec 2024 18:04:13 +0100 Subject: cut: with -F disabled in .config, don't show it in --help function old new delta packed_usage 34897 34849 -48 Signed-off-by: Denys Vlasenko --- coreutils/cut.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) (limited to 'coreutils') diff --git a/coreutils/cut.c b/coreutils/cut.c index 54f74fc7e..f4d930db0 100644 --- a/coreutils/cut.c +++ b/coreutils/cut.c @@ -32,15 +32,23 @@ //usage: "Print selected fields from FILEs to stdout\n" //usage: "\n -b LIST Output only bytes from LIST" //usage: "\n -c LIST Output only characters from LIST" -//usage: "\n -d SEP Field delimiter for input (default -f TAB, -F run of whitespace)" -//usage: "\n -s Drop lines with no delimiter" -//usage: "\n -O SEP Field delimeter for output (default = -d for -f, one space for -F)" -//TODO: --output-delimiter=SEP +//usage: IF_FEATURE_CUT_REGEX( +//usage: "\n -d SEP Input field delimiter (default -f TAB, -F run of whitespace)" +//usage: ) IF_NOT_FEATURE_CUT_REGEX( +//usage: "\n -d SEP Input field delimiter (default TAB)" +//usage: ) //usage: "\n -f LIST Print only these fields (-d is single char)" //usage: IF_FEATURE_CUT_REGEX( //usage: "\n -F LIST Print only these fields (-d is regex)" //usage: ) -//usage: "\n -D Don't sort/collate sections or match -fF lines without delimeter" +//usage: "\n -s Drop lines with no delimiter (else print them in full)" +//usage: "\n -D Don't sort/collate sections or match -f"IF_FEATURE_CUT_REGEX("F")" lines without delimeter" +//usage: IF_FEATURE_CUT_REGEX( +//usage: "\n -O SEP Output field delimeter (default = -d for -f, one space for -F)" +//usage: ) IF_NOT_FEATURE_CUT_REGEX( +//usage: "\n -O SEP Output field delimeter (default = -d)" +//usage: ) +//TODO: --output-delimiter=SEP //usage: "\n -n Ignored" //(manpage:-n with -b: don't split multibyte characters) //usage: -- cgit v1.2.3-55-g6feb From a208fa03de8467d1f51821f874cbf142aaad74fa Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Tue, 10 Dec 2024 18:49:10 +0100 Subject: cut: implement --output-delimiter function old new delta cut_main 1204 1261 +57 static.cut_longopts - 20 +20 ------------------------------------------------------------------------------ (add/remove: 1/0 grow/shrink: 1/0 up/down: 77/0) Total: 77 bytes Signed-off-by: Denys Vlasenko --- coreutils/cut.c | 27 +++++++++++++++++++++++++-- testsuite/cut.tests | 32 ++++++++++++++++++++++++-------- 2 files changed, 49 insertions(+), 10 deletions(-) (limited to 'coreutils') diff --git a/coreutils/cut.c b/coreutils/cut.c index f4d930db0..9f5b649d8 100644 --- a/coreutils/cut.c +++ b/coreutils/cut.c @@ -107,6 +107,7 @@ static void cut_file(FILE *file, const char *delim, const char *odelim, /* cut based on chars/bytes XXX: only works when sizeof(char) == byte */ if (option_mask32 & (OPT_CHAR | OPT_BYTE)) { char *printed = xzalloc(linelen + 1); + int need_odelim = 0; /* print the chars specified in each cut list */ for (; cl_pos < nlists; cl_pos++) { @@ -114,9 +115,14 @@ static void cut_file(FILE *file, const char *delim, const char *odelim, for (spos = cut_list[cl_pos].startpos; spos < linelen;) { if (!printed[spos]) { printed[spos] = 'X'; + if (need_odelim && spos != 0 && !printed[spos-1]) { + need_odelim = 0; + fputs_stdout(odelim); + } putchar(line[spos]); } if (++spos > cut_list[cl_pos].endpos) { + need_odelim = (odelim && odelim[0]); /* will print OSEP (if not empty) */ break; } } @@ -165,6 +171,9 @@ static void cut_file(FILE *file, const char *delim, const char *odelim, goto next_line; } + if (!odelim) + odelim = "\t"; + /* Loop through bytes, finding next delimiter */ for (;;) { /* End of current range? */ @@ -257,17 +266,31 @@ int cut_main(int argc UNUSED_PARAM, char **argv) #if ENABLE_FEATURE_CUT_REGEX regex_t reg; #endif +#if ENABLE_LONG_OPTS + static const char cut_longopts[] ALIGN1 = + "output-delimiter\0" Required_argument "O" + ; +#endif #define ARG "bcf"IF_FEATURE_CUT_REGEX("F") +#if !ENABLE_LONG_OPTS opt = getopt32(argv, "^" OPT_STR // = "b:c:f:d:O:sD"IF_FEATURE_CUT_REGEX("F:")"n" "\0" "b--"ARG":c--"ARG":f--"ARG IF_FEATURE_CUT_REGEX("F--"ARG), &sopt, &sopt, &sopt, &delim, &odelim IF_FEATURE_CUT_REGEX(, &sopt) ); - if (!delim || !*delim) - delim = (opt & OPT_REGEX) ? "[[:space:]]+" : "\t"; +#else + opt = getopt32long(argv, "^" + OPT_STR // = "b:c:f:d:O:sD"IF_FEATURE_CUT_REGEX("F:")"n" + "\0" "b--"ARG":c--"ARG":f--"ARG IF_FEATURE_CUT_REGEX("F--"ARG), + cut_longopts, + &sopt, &sopt, &sopt, &delim, &odelim IF_FEATURE_CUT_REGEX(, &sopt) + ); +#endif if (!odelim) odelim = (opt & OPT_REGEX) ? " " : delim; + if (!delim || !*delim) + delim = (opt & OPT_REGEX) ? "[[:space:]]+" : "\t"; // argc -= optind; argv += optind; diff --git a/testsuite/cut.tests b/testsuite/cut.tests index 46ef545d7..ba5f88d60 100755 --- a/testsuite/cut.tests +++ b/testsuite/cut.tests @@ -23,14 +23,30 @@ the quick brown fox jumps over the lazy dog testing "cut -b a,a,a" "cut -b 3,3,3 input" "e\np\ne\n" "$abc" "" -testing "cut -b overlaps" "cut -b 1-3,2-5,7-9,9-10 input" \ - "one:to:th\nalphabeta\nthe qick \n" "$abc" "" -testing "-b encapsulated" "cut -b 3-8,4-6 input" "e:two:\npha:be\ne quic\n" \ - "$abc" "" -# --output-delimiter not implemnted (yet?) -#testing "cut -bO overlaps" \ -# "cut --output-delimiter ' ' -b 1-3,2-5,7-9,9-10 input" \ -# "one:t o:th\nalpha beta\nthe q ick \n" "$abc" "" +testing "cut -b overlaps" \ + "cut -b 1-3,2-5,7-9,9-10 input" \ + "\ +one:to:th +alphabeta +the qick \n" \ + "$abc" "" +testing "-b encapsulated" \ + "cut -b 3-8,4-6 input" \ + "\ +e:two: +pha:be +e quic\n" \ + "$abc" "" +optional LONG_OPTS +testing "cut -b --output-delimiter overlaps" \ + "cut --output-delimiter='^' -b 1-3,2-5,7-9,9-10 input" \ + "\ +one:t^o:th +alpha^beta +the q^ick \n" \ + "$abc" "" +SKIP= + testing "cut high-low error" "cut -b 8-3 input 2>/dev/null || echo err" "err\n" \ "$abc" "" -- cgit v1.2.3-55-g6feb From 9e364b16d1020cb7b8f8f4d4f3fe1833496b7a12 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Tue, 10 Dec 2024 21:01:52 +0100 Subject: cut: fix -d$'\n' --output-delimiter=@@ behavior function old new delta cut_main 1261 1353 +92 packed_usage 34925 34901 -24 ------------------------------------------------------------------------------ (add/remove: 0/0 grow/shrink: 1/1 up/down: 92/-24) Total: 68 bytes Signed-off-by: Denys Vlasenko --- coreutils/cut.c | 34 +++++++++++++++++++++++++++------- testsuite/cut.tests | 26 ++++++++++++++++++++++++++ 2 files changed, 53 insertions(+), 7 deletions(-) (limited to 'coreutils') diff --git a/coreutils/cut.c b/coreutils/cut.c index 9f5b649d8..2d0a6237c 100644 --- a/coreutils/cut.c +++ b/coreutils/cut.c @@ -43,11 +43,19 @@ //usage: ) //usage: "\n -s Drop lines with no delimiter (else print them in full)" //usage: "\n -D Don't sort/collate sections or match -f"IF_FEATURE_CUT_REGEX("F")" lines without delimeter" +//usage: IF_LONG_OPTS( +//usage: IF_FEATURE_CUT_REGEX( +//usage: "\n --output-delimiter SEP Output field delimeter (default = -d for -f, one space for -F)" +//usage: ) IF_NOT_FEATURE_CUT_REGEX( +//usage: "\n --output-delimiter SEP Output field delimeter (default = -d)" +//usage: ) +//usage: ) IF_NOT_LONG_OPTS( //usage: IF_FEATURE_CUT_REGEX( //usage: "\n -O SEP Output field delimeter (default = -d for -f, one space for -F)" //usage: ) IF_NOT_FEATURE_CUT_REGEX( //usage: "\n -O SEP Output field delimeter (default = -d)" //usage: ) +//usage: ) //TODO: --output-delimiter=SEP //usage: "\n -n Ignored" //(manpage:-n with -b: don't split multibyte characters) @@ -96,6 +104,7 @@ static void cut_file(FILE *file, const char *delim, const char *odelim, { char *line; unsigned linenum = 0; /* keep these zero-based to be consistent */ + int first_print = 1; /* go through every line in the file */ while ((line = xmalloc_fgetline(file)) != NULL) { @@ -130,16 +139,16 @@ static void cut_file(FILE *file, const char *delim, const char *odelim, free(printed); /* Cut by lines */ } else if (!opt_REGEX && *delim == '\n') { - int spos = cut_list[cl_pos].startpos; + unsigned spos = cut_list[cl_pos].startpos; /* get out if we have no more lists to process or if the lines * are lower than what we're interested in */ - if (((int)linenum < spos) || (cl_pos >= nlists)) + if ((linenum < spos) || (cl_pos >= nlists)) goto next_line; /* if the line we're looking for is lower than the one we were * passed, it means we displayed it already, so move on */ - while (spos < (int)linenum) { + while (spos < linenum) { spos++; /* go to the next list if we're at the end of this one */ if (spos > cut_list[cl_pos].endpos) { @@ -150,20 +159,23 @@ static void cut_file(FILE *file, const char *delim, const char *odelim, spos = cut_list[cl_pos].startpos; /* get out if the current line is lower than the one * we just became interested in */ - if ((int)linenum < spos) + if (linenum < spos) goto next_line; } } /* If we made it here, it means we've found the line we're * looking for, so print it */ - puts(line); + if (first_print) { + first_print = 0; + fputs_stdout(line); + } else + printf("%s%s", odelim, line); goto next_line; /* Cut by fields */ } else { unsigned next = 0, start = 0, end = 0; int dcount = 0; /* we saw Nth delimiter (0 - didn't see any yet) */ - int first_print = 1; /* Blank line? Check -s (later check for -s does not catch empty lines) */ if (linelen == 0) { @@ -173,6 +185,7 @@ static void cut_file(FILE *file, const char *delim, const char *odelim, if (!odelim) odelim = "\t"; + first_print = 1; /* Loop through bytes, finding next delimiter */ for (;;) { @@ -233,7 +246,10 @@ static void cut_file(FILE *file, const char *delim, const char *odelim, continue; } } - if (end != start || !opt_REGEX) { +#if ENABLE_FEATURE_CUT_REGEX + if (end != start || !opt_REGEX) +#endif + { if (first_print) { first_print = 0; printf("%.*s", end - start, line + start); @@ -251,6 +267,10 @@ static void cut_file(FILE *file, const char *delim, const char *odelim, linenum++; free(line); } /* while (got line) */ + + /* For -d$'\n' --output-delimiter=^, the overall output is still terminated with \n, not ^ */ + if (!opt_REGEX && *delim == '\n' && !first_print) + putchar('\n'); } int cut_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE; diff --git a/testsuite/cut.tests b/testsuite/cut.tests index ba5f88d60..8da390cd7 100755 --- a/testsuite/cut.tests +++ b/testsuite/cut.tests @@ -116,4 +116,30 @@ testing "cut non-existing field" "cut -d ':' -f1,3" \ "1\n" \ "" "1:\n" +# cut -d$'\n' has a special meaning: "select input lines". +# I didn't find any documentation for this feature. +testing "cut -dNEWLINE" \ + "cut -d' +' -f4,2,6-8" \ + "2\n4\n6\n7\n" \ + "" "1\n2\n3\n4\n5\n6\n7" + +testing "cut -dNEWLINE --output-delimiter" \ + "cut -d' +' -O@@ -f4,2,6-8" \ + "2@@4@@6@@7\n" \ + "" "1\n2\n3\n4\n5\n6\n7" + +testing "cut -dNEWLINE --output-delimiter 2" \ + "cut -d' +' -O@@ -f4,2,6-8" \ + "2@@4@@6@@7\n" \ + "" "1\n2\n3\n4\n5\n6\n7\n" + +testing "cut -dNEWLINE --output-delimiter EMPTY_INPUT" \ + "cut -d' +' -O@@ -f4,2,6-8" \ + "" \ + "" "" + exit $FAILCOUNT -- cgit v1.2.3-55-g6feb From a4894eaf713f0e452c272db1c5dc2a459e05808f Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Wed, 11 Dec 2024 01:10:05 +0100 Subject: cut: tweak --help function old new delta packed_usage 34901 34934 +33 cut_main 1353 1339 -14 .rodata 105724 105685 -39 ------------------------------------------------------------------------------ (add/remove: 0/0 grow/shrink: 1/2 up/down: 33/-53) Total: -20 bytes Signed-off-by: Denys Vlasenko --- coreutils/cut.c | 17 ++++++++--------- libbb/getopt32.c | 4 ++-- 2 files changed, 10 insertions(+), 11 deletions(-) (limited to 'coreutils') diff --git a/coreutils/cut.c b/coreutils/cut.c index 2d0a6237c..1e9867858 100644 --- a/coreutils/cut.c +++ b/coreutils/cut.c @@ -27,7 +27,8 @@ //kbuild:lib-$(CONFIG_CUT) += cut.o //usage:#define cut_trivial_usage -//usage: "[OPTIONS] [FILE]..." +//usage: "{-b|c LIST | -f"IF_FEATURE_CUT_REGEX("|F")" LIST [-d SEP] [-s]} [-D] [-O SEP] [FILE]..." +// --output-delimiter SEP is too long to fit into 80 char-wide help ----------------^^^^^^^^ //usage:#define cut_full_usage "\n\n" //usage: "Print selected fields from FILEs to stdout\n" //usage: "\n -b LIST Output only bytes from LIST" @@ -44,11 +45,7 @@ //usage: "\n -s Drop lines with no delimiter (else print them in full)" //usage: "\n -D Don't sort/collate sections or match -f"IF_FEATURE_CUT_REGEX("F")" lines without delimeter" //usage: IF_LONG_OPTS( -//usage: IF_FEATURE_CUT_REGEX( -//usage: "\n --output-delimiter SEP Output field delimeter (default = -d for -f, one space for -F)" -//usage: ) IF_NOT_FEATURE_CUT_REGEX( -//usage: "\n --output-delimiter SEP Output field delimeter (default = -d)" -//usage: ) +//usage: "\n --output-delimiter SEP Output field delimeter" //usage: ) IF_NOT_LONG_OPTS( //usage: IF_FEATURE_CUT_REGEX( //usage: "\n -O SEP Output field delimeter (default = -d for -f, one space for -F)" @@ -302,7 +299,8 @@ int cut_main(int argc UNUSED_PARAM, char **argv) #else opt = getopt32long(argv, "^" OPT_STR // = "b:c:f:d:O:sD"IF_FEATURE_CUT_REGEX("F:")"n" - "\0" "b--"ARG":c--"ARG":f--"ARG IF_FEATURE_CUT_REGEX("F--"ARG), + "\0" "b:c:f:"IF_FEATURE_CUT_REGEX("F:") /* one of -bcfF is required */ + "b--"ARG":c--"ARG":f--"ARG IF_FEATURE_CUT_REGEX(":F--"ARG), /* they are mutually exclusive */ cut_longopts, &sopt, &sopt, &sopt, &delim, &odelim IF_FEATURE_CUT_REGEX(, &sopt) ); @@ -314,8 +312,9 @@ int cut_main(int argc UNUSED_PARAM, char **argv) // argc -= optind; argv += optind; - if (!(opt & (OPT_BYTE | OPT_CHAR | OPT_FIELDS | OPT_REGEX))) - bb_simple_error_msg_and_die("expected a list of bytes, characters, or fields"); + //if (!(opt & (OPT_BYTE | OPT_CHAR | OPT_FIELDS | OPT_REGEX))) + // bb_simple_error_msg_and_die("expected a list of bytes, characters, or fields"); + // ^^^ handled by getopt32 /* non-field (char or byte) cutting has some special handling */ if (!(opt & (OPT_FIELDS|OPT_REGEX))) { diff --git a/libbb/getopt32.c b/libbb/getopt32.c index a8dd85159..b5efa19ac 100644 --- a/libbb/getopt32.c +++ b/libbb/getopt32.c @@ -93,7 +93,7 @@ getopt32(char **argv, const char *applet_opts, ...) "!" If the first character in the applet_opts string is a '!', report bad options, missing required options, - inconsistent options with all-ones return value (instead of abort. + inconsistent options with all-ones return value instead of aborting. "+" If the first character in the applet_opts string is a plus, then option processing will stop as soon as a non-option is @@ -265,7 +265,7 @@ Special characters: for "long options only" cases, such as tar --exclude=PATTERN, wget --header=HDR cases. - "a?b" A "?" between an option and a group of options means that + "a?bc" A "?" between an option and a group of options means that at least one of them is required to occur if the first option occurs in preceding command line arguments. -- cgit v1.2.3-55-g6feb From 9adafbc1184a413999e7c8bbfc2de85bda3e0b97 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Fri, 13 Dec 2024 19:06:58 +0100 Subject: cut: prevent infinite loop if -F REGEX matches empty delimiter function old new delta cut_main 1339 1348 +9 Signed-off-by: Denys Vlasenko --- coreutils/cut.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'coreutils') diff --git a/coreutils/cut.c b/coreutils/cut.c index 1e9867858..e12c56732 100644 --- a/coreutils/cut.c +++ b/coreutils/cut.c @@ -228,7 +228,9 @@ static void cut_file(FILE *file, const char *delim, const char *odelim, continue; } end = next + rr.rm_so; - next += rr.rm_eo; + next += (rr.rm_eo ? rr.rm_eo : 1); + /* ^^^ advancing by at least 1 prevents infinite loops */ + /* testcase: echo "no at sign" | cut -d'@*' -F 1- */ } else #endif { -- cgit v1.2.3-55-g6feb From 96a230f17ba6cb1a7ad1e383d595424da02e87fc Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Fri, 13 Dec 2024 19:31:41 +0100 Subject: cut: fix handling of -d '' Signed-off-by: Denys Vlasenko --- coreutils/cut.c | 2 +- testsuite/cut.tests | 12 ++++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) (limited to 'coreutils') diff --git a/coreutils/cut.c b/coreutils/cut.c index e12c56732..e33626d7f 100644 --- a/coreutils/cut.c +++ b/coreutils/cut.c @@ -309,7 +309,7 @@ int cut_main(int argc UNUSED_PARAM, char **argv) #endif if (!odelim) odelim = (opt & OPT_REGEX) ? " " : delim; - if (!delim || !*delim) + if (!delim) delim = (opt & OPT_REGEX) ? "[[:space:]]+" : "\t"; // argc -= optind; diff --git a/testsuite/cut.tests b/testsuite/cut.tests index 8da390cd7..52d05fde3 100755 --- a/testsuite/cut.tests +++ b/testsuite/cut.tests @@ -142,4 +142,16 @@ testing "cut -dNEWLINE --output-delimiter EMPTY_INPUT" \ "" \ "" "" +# This seems to work as if delimiter is never found. +# We test here that -d '' does *not* operate as if there was no -d +# and delimiter has defaulted to TAB: +testing "cut -d EMPTY" \ + "cut -d '' -f2-" \ + "1 2\t3 4 5\n" \ + "" "1 2\t3 4 5\n" +testing "cut -d EMPTY -s" \ + "cut -d '' -f2- -s" \ + "" \ + "" "1 2\t3 4 5\n" + exit $FAILCOUNT -- cgit v1.2.3-55-g6feb From ed6561685c8b37141a17459655774ec3386dd021 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Fri, 13 Dec 2024 19:39:34 +0100 Subject: cut: tweak comments Signed-off-by: Denys Vlasenko --- coreutils/cut.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'coreutils') diff --git a/coreutils/cut.c b/coreutils/cut.c index e33626d7f..e624ab4a5 100644 --- a/coreutils/cut.c +++ b/coreutils/cut.c @@ -53,9 +53,8 @@ //usage: "\n -O SEP Output field delimeter (default = -d)" //usage: ) //usage: ) -//TODO: --output-delimiter=SEP //usage: "\n -n Ignored" -//(manpage:-n with -b: don't split multibyte characters) +//(manpage:-n with -b: don't split multibyte characters) //usage: //usage:#define cut_example_usage //usage: "$ echo \"Hello world\" | cut -f 1 -d ' '\n" @@ -318,7 +317,7 @@ int cut_main(int argc UNUSED_PARAM, char **argv) // bb_simple_error_msg_and_die("expected a list of bytes, characters, or fields"); // ^^^ handled by getopt32 - /* non-field (char or byte) cutting has some special handling */ + /* non-field (char or byte) cutting has some special handling */ if (!(opt & (OPT_FIELDS|OPT_REGEX))) { static const char _op_on_field[] ALIGN1 = " only when operating on fields"; -- cgit v1.2.3-55-g6feb From f4f8dc68645c061d4ccec5481bc3a659b02bc84f Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Fri, 13 Dec 2024 19:43:53 +0100 Subject: cut: whitespace fixes Signed-off-by: Denys Vlasenko --- coreutils/cut.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'coreutils') diff --git a/coreutils/cut.c b/coreutils/cut.c index e624ab4a5..6eac7793f 100644 --- a/coreutils/cut.c +++ b/coreutils/cut.c @@ -285,9 +285,9 @@ int cut_main(int argc UNUSED_PARAM, char **argv) regex_t reg; #endif #if ENABLE_LONG_OPTS - static const char cut_longopts[] ALIGN1 = - "output-delimiter\0" Required_argument "O" - ; + static const char cut_longopts[] ALIGN1 = + "output-delimiter\0" Required_argument "O" + ; #endif #define ARG "bcf"IF_FEATURE_CUT_REGEX("F") -- cgit v1.2.3-55-g6feb From 7624077772878db25d8221fc4d6f731e29ebcdba Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Fri, 13 Dec 2024 20:22:05 +0100 Subject: cut: "it's legal to pass an empty list" seems to be untrue function old new delta cut_main 1344 1339 -5 Signed-off-by: Denys Vlasenko --- coreutils/cut.c | 90 ++++++++++++++++++++++++++++----------------------------- 1 file changed, 44 insertions(+), 46 deletions(-) (limited to 'coreutils') diff --git a/coreutils/cut.c b/coreutils/cut.c index 6eac7793f..9a99ad05c 100644 --- a/coreutils/cut.c +++ b/coreutils/cut.c @@ -336,61 +336,59 @@ int cut_main(int argc UNUSED_PARAM, char **argv) * valid list formats: N, N-, N-M, -M * more than one list can be separated by commas */ - { + /* take apart the lists, one by one (they are separated with commas) */ + while ((ltok = strsep(&sopt, ",")) != NULL) { char *ntok; - int s = 0, e = 0; - - /* take apart the lists, one by one (they are separated with commas) */ - while ((ltok = strsep(&sopt, ",")) != NULL) { - - /* it's actually legal to pass an empty list */ - if (!ltok[0]) - continue; + int s, e; - /* get the start pos */ - ntok = strsep(<ok, "-"); - if (!ntok[0]) { - s = 0; - } else { - /* account for the fact that arrays are zero based, while - * the user expects the first char on the line to be char #1 */ - s = xatoi_positive(ntok) - 1; - } - - /* get the end pos */ - if (ltok == NULL) { - e = s; - } else if (!ltok[0]) { - /* if the user specified no end position, - * that means "til the end of the line" */ - e = INT_MAX; - } else { - /* again, arrays are zero based, lines are 1 based */ - e = xatoi_positive(ltok) - 1; - } + /* it's actually legal to pass an empty list */ + //if (!ltok[0]) + // continue; + //^^^ testcase? - if (s < 0 || e < s) - bb_error_msg_and_die("invalid range %s-%s", ntok, ltok ?: ntok); + /* get the start pos */ + ntok = strsep(<ok, "-"); + if (!ntok[0]) { + s = 0; + } else { + /* account for the fact that arrays are zero based, while + * the user expects the first char on the line to be char #1 */ + s = xatoi_positive(ntok) - 1; + } - /* add the new list */ - cut_list = xrealloc_vector(cut_list, 4, nlists); - /* NB: startpos is always >= 0 */ - cut_list[nlists].startpos = s; - cut_list[nlists].endpos = e; - nlists++; + /* get the end pos */ + if (ltok == NULL) { + e = s; + } else if (!ltok[0]) { + /* if the user specified no end position, + * that means "til the end of the line" */ + e = INT_MAX; + } else { + /* again, arrays are zero based, lines are 1 based */ + e = xatoi_positive(ltok) - 1; } - /* make sure we got some cut positions out of all that */ - if (nlists == 0) - bb_simple_error_msg_and_die("missing list of positions"); + if (s < 0 || e < s) + bb_error_msg_and_die("invalid range %s-%s", ntok, ltok ?: ntok); - /* now that the lists are parsed, we need to sort them to make life - * easier on us when it comes time to print the chars / fields / lines - */ - if (!(opt & OPT_NOSORT)) - qsort(cut_list, nlists, sizeof(cut_list[0]), cmpfunc); + /* add the new list */ + cut_list = xrealloc_vector(cut_list, 4, nlists); + /* NB: startpos is always >= 0 */ + cut_list[nlists].startpos = s; + cut_list[nlists].endpos = e; + nlists++; } + /* make sure we got some cut positions out of all that */ + if (nlists == 0) + bb_simple_error_msg_and_die("missing list of positions"); + + /* now that the lists are parsed, we need to sort them to make life + * easier on us when it comes time to print the chars / fields / lines + */ + if (!(opt & OPT_NOSORT)) + qsort(cut_list, nlists, sizeof(cut_list[0]), cmpfunc); + #if ENABLE_FEATURE_CUT_REGEX if (opt & OPT_REGEX) { xregcomp(®, delim, REG_EXTENDED); -- cgit v1.2.3-55-g6feb From d48400d8fb266526059eb43b318cb95132f61fb3 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Fri, 13 Dec 2024 20:35:28 +0100 Subject: cut: simplify getopt32 code Signed-off-by: Denys Vlasenko --- coreutils/cut.c | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) (limited to 'coreutils') diff --git a/coreutils/cut.c b/coreutils/cut.c index 9a99ad05c..f4cf5401b 100644 --- a/coreutils/cut.c +++ b/coreutils/cut.c @@ -291,21 +291,18 @@ int cut_main(int argc UNUSED_PARAM, char **argv) #endif #define ARG "bcf"IF_FEATURE_CUT_REGEX("F") -#if !ENABLE_LONG_OPTS - opt = getopt32(argv, "^" - OPT_STR // = "b:c:f:d:O:sD"IF_FEATURE_CUT_REGEX("F:")"n" - "\0" "b--"ARG":c--"ARG":f--"ARG IF_FEATURE_CUT_REGEX("F--"ARG), - &sopt, &sopt, &sopt, &delim, &odelim IF_FEATURE_CUT_REGEX(, &sopt) - ); +#if ENABLE_LONG_OPTS + opt = getopt32long #else - opt = getopt32long(argv, "^" + opt = getopt32 +#endif + (argv, "^" OPT_STR // = "b:c:f:d:O:sD"IF_FEATURE_CUT_REGEX("F:")"n" - "\0" "b:c:f:"IF_FEATURE_CUT_REGEX("F:") /* one of -bcfF is required */ + "\0" "b:c:f:" IF_FEATURE_CUT_REGEX("F:") /* one of -bcfF is required */ "b--"ARG":c--"ARG":f--"ARG IF_FEATURE_CUT_REGEX(":F--"ARG), /* they are mutually exclusive */ - cut_longopts, + IF_LONG_OPTS(cut_longopts,) &sopt, &sopt, &sopt, &delim, &odelim IF_FEATURE_CUT_REGEX(, &sopt) - ); -#endif + ); if (!odelim) odelim = (opt & OPT_REGEX) ? " " : delim; if (!delim) -- cgit v1.2.3-55-g6feb From b25ea3f156c6b5927b358f9b12b90f1852f30311 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Sat, 14 Dec 2024 13:58:49 +0100 Subject: cut: fix -F n-m to match toybox function old new delta cut_main 1339 1391 +52 Signed-off-by: Denys Vlasenko --- coreutils/cut.c | 10 ++++++++++ testsuite/cut.tests | 21 ++++++++++++++++++--- 2 files changed, 28 insertions(+), 3 deletions(-) (limited to 'coreutils') diff --git a/coreutils/cut.c b/coreutils/cut.c index f4cf5401b..3abebe7ad 100644 --- a/coreutils/cut.c +++ b/coreutils/cut.c @@ -243,6 +243,16 @@ static void cut_file(FILE *file, const char *delim, const char *odelim, start = next; continue; } + /* -F N-M preserves intermediate delimiters: */ + //printf "1 2 3 4 5 6 7\n" | toybox cut -O: -F2,4-6,7 + //2:4 5 6:7 + if (opt_REGEX && dcount <= cut_list[cl_pos].endpos) + continue; +// NB: toybox does the above for -f too, but it's a compatibility bug: +//printf "1 2 3 4 5 6 7 8\n" | toybox cut -d' ' -O: -f2,4-6,7 +//2:4 5 6:7 // WRONG! +//printf "1 2 3 4 5 6 7 8\n" | cut -d' ' --output-delimiter=: -f2,4-6,7 +//2:4:5:6:7 // GNU coreutils 9.1 } #if ENABLE_FEATURE_CUT_REGEX if (end != start || !opt_REGEX) diff --git a/testsuite/cut.tests b/testsuite/cut.tests index 52d05fde3..e57b028ac 100755 --- a/testsuite/cut.tests +++ b/testsuite/cut.tests @@ -104,6 +104,19 @@ Weather forecast for tonight : dark. Apple: you can buy better, but you can't pay more. Subcalifragilisticexpialidocious. Auntie Em: Hate you, hate Kansas. Took the dog. Dorothy." + +optional FEATURE_CUT_REGEX LONG_OPTS +testing "cut -F preserves intermediate delimiters" \ + "cut --output-delimiter=: -F2,4-6,7" \ + "2:4 5 6:7\n" \ + "" "1 2 3 4\t\t5 6 7 8\n" +SKIP= + +optional LONG_OPTS +testing "cut -f does not preserve intermediate delimiters" \ + "cut --output-delimiter=: -d' ' -f2,4-6,7" \ + "2:4:5:6:7\n" \ + "" "1 2 3 4 5 6 7 8\n" SKIP= testing "cut empty field" "cut -d ':' -f 1-3" \ @@ -124,23 +137,25 @@ testing "cut -dNEWLINE" \ "2\n4\n6\n7\n" \ "" "1\n2\n3\n4\n5\n6\n7" +optional LONG_OPTS testing "cut -dNEWLINE --output-delimiter" \ "cut -d' -' -O@@ -f4,2,6-8" \ +' --output-delimiter=@@ -f4,2,6-8" \ "2@@4@@6@@7\n" \ "" "1\n2\n3\n4\n5\n6\n7" testing "cut -dNEWLINE --output-delimiter 2" \ "cut -d' -' -O@@ -f4,2,6-8" \ +' --output-delimiter=@@ -f4,2,6-8" \ "2@@4@@6@@7\n" \ "" "1\n2\n3\n4\n5\n6\n7\n" testing "cut -dNEWLINE --output-delimiter EMPTY_INPUT" \ "cut -d' -' -O@@ -f4,2,6-8" \ +' --output-delimiter=@@ -f4,2,6-8" \ "" \ "" "" +SKIP= # This seems to work as if delimiter is never found. # We test here that -d '' does *not* operate as if there was no -d -- cgit v1.2.3-55-g6feb From e8622f0d8524baf8a08a734b7d0fb6aac31b5ee2 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Sun, 15 Dec 2024 12:26:07 +0100 Subject: cut: disallow -f '' and -f '-' function old new delta cut_main 1391 1410 +19 Signed-off-by: Denys Vlasenko --- coreutils/cut.c | 34 +++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 15 deletions(-) (limited to 'coreutils') diff --git a/coreutils/cut.c b/coreutils/cut.c index 3abebe7ad..20138075c 100644 --- a/coreutils/cut.c +++ b/coreutils/cut.c @@ -85,8 +85,8 @@ #define opt_REGEX (option_mask32 & OPT_REGEX) struct cut_list { - int startpos; - int endpos; + unsigned startpos; + unsigned endpos; }; static int cmpfunc(const void *a, const void *b) @@ -116,7 +116,7 @@ static void cut_file(FILE *file, const char *delim, const char *odelim, /* print the chars specified in each cut list */ for (; cl_pos < nlists; cl_pos++) { - int spos; + unsigned spos; for (spos = cut_list[cl_pos].startpos; spos < linelen;) { if (!printed[spos]) { printed[spos] = 'X'; @@ -171,7 +171,7 @@ static void cut_file(FILE *file, const char *delim, const char *odelim, /* Cut by fields */ } else { unsigned next = 0, start = 0, end = 0; - int dcount = 0; /* we saw Nth delimiter (0 - didn't see any yet) */ + unsigned dcount = 0; /* we saw Nth delimiter (0 - didn't see any yet) */ /* Blank line? Check -s (later check for -s does not catch empty lines) */ if (linelen == 0) { @@ -340,10 +340,10 @@ int cut_main(int argc UNUSED_PARAM, char **argv) /* * parse list and put values into startpos and endpos. - * valid list formats: N, N-, N-M, -M - * more than one list can be separated by commas + * valid range formats: N, N-, N-M, -M + * more than one range can be separated by commas */ - /* take apart the lists, one by one (they are separated with commas) */ + /* take apart the ranges, one by one (separated with commas) */ while ((ltok = strsep(&sopt, ",")) != NULL) { char *ntok; int s, e; @@ -356,22 +356,26 @@ int cut_main(int argc UNUSED_PARAM, char **argv) /* get the start pos */ ntok = strsep(<ok, "-"); if (!ntok[0]) { - s = 0; + if (!ltok) /* testcase: -f '' */ + bb_show_usage(); + if (!ltok[0]) /* testcase: -f - */ + bb_show_usage(); + s = 0; /* "-M" means "1-M" */ } else { - /* account for the fact that arrays are zero based, while - * the user expects the first char on the line to be char #1 */ + /* "N" or "N-[M]" */ + /* arrays are zero based, while the user expects + * the first field/char on the line to be char #1 */ s = xatoi_positive(ntok) - 1; } /* get the end pos */ - if (ltok == NULL) { - e = s; + if (!ltok) { + e = s; /* "N" means "N-N" */ } else if (!ltok[0]) { - /* if the user specified no end position, - * that means "til the end of the line" */ + /* "N-" means "until the end of the line" */ e = INT_MAX; } else { - /* again, arrays are zero based, lines are 1 based */ + /* again, arrays are zero based, fields are 1 based */ e = xatoi_positive(ltok) - 1; } -- cgit v1.2.3-55-g6feb From ba9651b803f0eb2a8cba0205ae72c75ab773adaf Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Mon, 16 Dec 2024 00:19:43 +0100 Subject: cut: we can't get empty cut_list[], remove the check for that function old new delta .rodata 105685 105659 -26 Signed-off-by: Denys Vlasenko --- coreutils/cut.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) (limited to 'coreutils') diff --git a/coreutils/cut.c b/coreutils/cut.c index 20138075c..3d9f2b373 100644 --- a/coreutils/cut.c +++ b/coreutils/cut.c @@ -287,7 +287,7 @@ int cut_main(int argc UNUSED_PARAM, char **argv) /* growable array holding a series of lists */ struct cut_list *cut_list = NULL; unsigned nlists = 0; /* number of elements in above list */ - char *sopt, *ltok; + char *LIST, *ltok; const char *delim = NULL; const char *odelim = NULL; unsigned opt; @@ -311,7 +311,7 @@ int cut_main(int argc UNUSED_PARAM, char **argv) "\0" "b:c:f:" IF_FEATURE_CUT_REGEX("F:") /* one of -bcfF is required */ "b--"ARG":c--"ARG":f--"ARG IF_FEATURE_CUT_REGEX(":F--"ARG), /* they are mutually exclusive */ IF_LONG_OPTS(cut_longopts,) - &sopt, &sopt, &sopt, &delim, &odelim IF_FEATURE_CUT_REGEX(, &sopt) + &LIST, &LIST, &LIST, &delim, &odelim IF_FEATURE_CUT_REGEX(, &LIST) ); if (!odelim) odelim = (opt & OPT_REGEX) ? " " : delim; @@ -322,7 +322,7 @@ int cut_main(int argc UNUSED_PARAM, char **argv) argv += optind; //if (!(opt & (OPT_BYTE | OPT_CHAR | OPT_FIELDS | OPT_REGEX))) // bb_simple_error_msg_and_die("expected a list of bytes, characters, or fields"); - // ^^^ handled by getopt32 + //^^^ handled by getopt32 /* non-field (char or byte) cutting has some special handling */ if (!(opt & (OPT_FIELDS|OPT_REGEX))) { @@ -344,7 +344,7 @@ int cut_main(int argc UNUSED_PARAM, char **argv) * more than one range can be separated by commas */ /* take apart the ranges, one by one (separated with commas) */ - while ((ltok = strsep(&sopt, ",")) != NULL) { + while ((ltok = strsep(&LIST, ",")) != NULL) { char *ntok; int s, e; @@ -382,17 +382,20 @@ int cut_main(int argc UNUSED_PARAM, char **argv) if (s < 0 || e < s) bb_error_msg_and_die("invalid range %s-%s", ntok, ltok ?: ntok); - /* add the new list */ + /* add the new range */ cut_list = xrealloc_vector(cut_list, 4, nlists); - /* NB: startpos is always >= 0 */ + /* NB: s is always >= 0 */ cut_list[nlists].startpos = s; cut_list[nlists].endpos = e; nlists++; } /* make sure we got some cut positions out of all that */ - if (nlists == 0) - bb_simple_error_msg_and_die("missing list of positions"); + //if (nlists == 0) + // bb_simple_error_msg_and_die("missing list of positions"); + //^^^ this is impossible since one of -bcfF is required, + // they populate LIST with non-empty string and when it is parsed, + // cut_list[] gets at least one element. /* now that the lists are parsed, we need to sort them to make life * easier on us when it comes time to print the chars / fields / lines -- cgit v1.2.3-55-g6feb From 0bd84c94720b5a68d97c05975220e6482c455623 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Mon, 16 Dec 2024 00:32:57 +0100 Subject: cut: terminate cut_list[] so that we don't need "size of the array" variable function old new delta cut_main 1410 1404 -6 Signed-off-by: Denys Vlasenko --- coreutils/cut.c | 45 +++++++++++++++++++++++++-------------------- 1 file changed, 25 insertions(+), 20 deletions(-) (limited to 'coreutils') diff --git a/coreutils/cut.c b/coreutils/cut.c index 3d9f2b373..0913c4b68 100644 --- a/coreutils/cut.c +++ b/coreutils/cut.c @@ -84,19 +84,22 @@ #define opt_REGEX (option_mask32 & OPT_REGEX) -struct cut_list { +struct cut_range { unsigned startpos; unsigned endpos; }; static int cmpfunc(const void *a, const void *b) { - return (((struct cut_list *) a)->startpos - - ((struct cut_list *) b)->startpos); + return (((struct cut_range *) a)->startpos - + ((struct cut_range *) b)->startpos); } +#define END_OF_LIST(list_elem) ((list_elem).startpos == UINT_MAX) +#define NOT_END_OF_LIST(list_elem) ((list_elem).startpos != UINT_MAX) + static void cut_file(FILE *file, const char *delim, const char *odelim, - const struct cut_list *cut_list, unsigned nlists) + const struct cut_range *cut_list) { char *line; unsigned linenum = 0; /* keep these zero-based to be consistent */ @@ -115,7 +118,7 @@ static void cut_file(FILE *file, const char *delim, const char *odelim, int need_odelim = 0; /* print the chars specified in each cut list */ - for (; cl_pos < nlists; cl_pos++) { + for (; NOT_END_OF_LIST(cut_list[cl_pos]); cl_pos++) { unsigned spos; for (spos = cut_list[cl_pos].startpos; spos < linelen;) { if (!printed[spos]) { @@ -137,9 +140,9 @@ static void cut_file(FILE *file, const char *delim, const char *odelim, } else if (!opt_REGEX && *delim == '\n') { unsigned spos = cut_list[cl_pos].startpos; - /* get out if we have no more lists to process or if the lines + /* get out if we have no more ranges to process or if the lines * are lower than what we're interested in */ - if ((linenum < spos) || (cl_pos >= nlists)) + if ((linenum < spos) || END_OF_LIST(cut_list[cl_pos])) goto next_line; /* if the line we're looking for is lower than the one we were @@ -149,8 +152,8 @@ static void cut_file(FILE *file, const char *delim, const char *odelim, /* go to the next list if we're at the end of this one */ if (spos > cut_list[cl_pos].endpos) { cl_pos++; - /* get out if there's no more lists to process */ - if (cl_pos >= nlists) + /* get out if there's no more ranges to process */ + if (END_OF_LIST(cut_list[cl_pos])) goto next_line; spos = cut_list[cl_pos].startpos; /* get out if the current line is lower than the one @@ -188,7 +191,8 @@ static void cut_file(FILE *file, const char *delim, const char *odelim, /* End of current range? */ if (end == linelen || dcount > cut_list[cl_pos].endpos) { end_of_range: - if (++cl_pos >= nlists) + cl_pos++; + if (END_OF_LIST(cut_list[cl_pos])) break; if (option_mask32 & OPT_NOSORT) start = dcount = next = 0; @@ -284,9 +288,9 @@ static void cut_file(FILE *file, const char *delim, const char *odelim, int cut_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE; int cut_main(int argc UNUSED_PARAM, char **argv) { - /* growable array holding a series of lists */ - struct cut_list *cut_list = NULL; - unsigned nlists = 0; /* number of elements in above list */ + /* growable array holding a series of ranges */ + struct cut_range *cut_list = NULL; + unsigned nranges = 0; /* number of elements in above list */ char *LIST, *ltok; const char *delim = NULL; const char *odelim = NULL; @@ -383,15 +387,16 @@ int cut_main(int argc UNUSED_PARAM, char **argv) bb_error_msg_and_die("invalid range %s-%s", ntok, ltok ?: ntok); /* add the new range */ - cut_list = xrealloc_vector(cut_list, 4, nlists); + cut_list = xrealloc_vector(cut_list, 4, nranges); /* NB: s is always >= 0 */ - cut_list[nlists].startpos = s; - cut_list[nlists].endpos = e; - nlists++; + cut_list[nranges].startpos = s; + cut_list[nranges].endpos = e; + nranges++; } + cut_list[nranges].startpos = UINT_MAX; /* end indicator */ /* make sure we got some cut positions out of all that */ - //if (nlists == 0) + //if (nranges == 0) // bb_simple_error_msg_and_die("missing list of positions"); //^^^ this is impossible since one of -bcfF is required, // they populate LIST with non-empty string and when it is parsed, @@ -401,7 +406,7 @@ int cut_main(int argc UNUSED_PARAM, char **argv) * easier on us when it comes time to print the chars / fields / lines */ if (!(opt & OPT_NOSORT)) - qsort(cut_list, nlists, sizeof(cut_list[0]), cmpfunc); + qsort(cut_list, nranges, sizeof(cut_list[0]), cmpfunc); #if ENABLE_FEATURE_CUT_REGEX if (opt & OPT_REGEX) { @@ -422,7 +427,7 @@ int cut_main(int argc UNUSED_PARAM, char **argv) retval = EXIT_FAILURE; continue; } - cut_file(file, delim, odelim, cut_list, nlists); + cut_file(file, delim, odelim, cut_list); fclose_if_not_stdin(file); } while (*++argv); -- cgit v1.2.3-55-g6feb From ee8b94acbf9ac9a6a48831a244cfef897affe82e Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Mon, 16 Dec 2024 00:51:31 +0100 Subject: cut: shorten error messages on bad syntax We don't need to mimic GNU cut error messages. $ cut -d@ -b3 cut: -d DELIM makes sense only with -f or -F $ cut -s -b3 cut: -s makes sense only with -f or -F function old new delta static._op_on_field 31 32 +1 .rodata 105659 105598 -61 ------------------------------------------------------------------------------ (add/remove: 0/0 grow/shrink: 1/1 up/down: 1/-61) Total: -60 bytes Signed-off-by: Denys Vlasenko --- coreutils/cut.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'coreutils') diff --git a/coreutils/cut.c b/coreutils/cut.c index 0913c4b68..af2fd9fd4 100644 --- a/coreutils/cut.c +++ b/coreutils/cut.c @@ -330,15 +330,15 @@ int cut_main(int argc UNUSED_PARAM, char **argv) /* non-field (char or byte) cutting has some special handling */ if (!(opt & (OPT_FIELDS|OPT_REGEX))) { - static const char _op_on_field[] ALIGN1 = " only when operating on fields"; + static const char _op_on_field[] ALIGN1 = " makes sense only with -f"IF_FEATURE_CUT_REGEX(" or -F"); if (opt & OPT_SUPPRESS) { bb_error_msg_and_die - ("suppressing non-delimited lines makes sense%s", _op_on_field); + ("-s%s", _op_on_field); } if (opt & OPT_DELIM) { bb_error_msg_and_die - ("a delimiter may be specified%s", _op_on_field); + ("-d DELIM%s", _op_on_field); } } -- cgit v1.2.3-55-g6feb From e2304d47a98a2eff65a3a0a13c53df2cc69529b1 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Mon, 16 Dec 2024 01:10:55 +0100 Subject: cut: shorten error messages on bad syntax even more $ cut -s -b3 cut: -s requires -f or -F $ cut -d@ -b3 cut: -d DELIM requires -f or -F function old new delta static.requires_f - 19 +19 static._op_on_field 32 - -32 ------------------------------------------------------------------------------ (add/remove: 1/1 grow/shrink: 0/0 up/down: 19/-32) Total: -13 bytes Signed-off-by: Denys Vlasenko --- coreutils/cut.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) (limited to 'coreutils') diff --git a/coreutils/cut.c b/coreutils/cut.c index af2fd9fd4..74a704c8f 100644 --- a/coreutils/cut.c +++ b/coreutils/cut.c @@ -330,16 +330,12 @@ int cut_main(int argc UNUSED_PARAM, char **argv) /* non-field (char or byte) cutting has some special handling */ if (!(opt & (OPT_FIELDS|OPT_REGEX))) { - static const char _op_on_field[] ALIGN1 = " makes sense only with -f"IF_FEATURE_CUT_REGEX(" or -F"); - - if (opt & OPT_SUPPRESS) { - bb_error_msg_and_die - ("-s%s", _op_on_field); - } - if (opt & OPT_DELIM) { - bb_error_msg_and_die - ("-d DELIM%s", _op_on_field); - } + static const char requires_f[] ALIGN1 = " requires -f" + IF_FEATURE_CUT_REGEX(" or -F"); + if (opt & OPT_SUPPRESS) + bb_error_msg_and_die("-s%s", requires_f); + if (opt & OPT_DELIM) + bb_error_msg_and_die("-d DELIM%s", requires_f); } /* -- cgit v1.2.3-55-g6feb From dd40b40ee59e1eae6a70265741657bc55d960cce Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Fri, 20 Dec 2024 21:46:32 +0100 Subject: cut: remove unnecessary initialization of regmatch_t function old new delta cut_main 1404 1388 -16 Signed-off-by: Denys Vlasenko --- coreutils/cut.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'coreutils') diff --git a/coreutils/cut.c b/coreutils/cut.c index 74a704c8f..a766db40f 100644 --- a/coreutils/cut.c +++ b/coreutils/cut.c @@ -222,7 +222,7 @@ static void cut_file(FILE *file, const char *delim, const char *odelim, /* Find next delimiter */ #if ENABLE_FEATURE_CUT_REGEX if (opt_REGEX) { - regmatch_t rr = {-1, -1}; + regmatch_t rr; regex_t *reg = (void*) delim; if (regexec(reg, line + next, 1, &rr, REG_NOTBOL|REG_NOTEOL) != 0) { -- cgit v1.2.3-55-g6feb From b03f5162ac239c3743cfac246b3760b0020f4d23 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Fri, 20 Dec 2024 22:12:33 +0100 Subject: cut: fix up -D/-s behavior with -F function old new delta cut_main 1388 1402 +14 packed_usage 34934 34933 -1 ------------------------------------------------------------------------------ (add/remove: 0/0 grow/shrink: 1/1 up/down: 14/-1) Total: 13 bytes Signed-off-by: Denys Vlasenko --- coreutils/cut.c | 6 +++--- testsuite/cut.tests | 35 +++++++++++++++++++++++++++++++++-- 2 files changed, 36 insertions(+), 5 deletions(-) (limited to 'coreutils') diff --git a/coreutils/cut.c b/coreutils/cut.c index a766db40f..65e0e5c30 100644 --- a/coreutils/cut.c +++ b/coreutils/cut.c @@ -43,7 +43,7 @@ //usage: "\n -F LIST Print only these fields (-d is regex)" //usage: ) //usage: "\n -s Drop lines with no delimiter (else print them in full)" -//usage: "\n -D Don't sort/collate sections or match -f"IF_FEATURE_CUT_REGEX("F")" lines without delimeter" +//usage: "\n -D Don't sort ranges; line without delimiters has one field" //usage: IF_LONG_OPTS( //usage: "\n --output-delimiter SEP Output field delimeter" //usage: ) IF_NOT_LONG_OPTS( @@ -202,8 +202,8 @@ static void cut_file(FILE *file, const char *delim, const char *odelim, /* End of current line? */ if (next == linelen) { end = linelen; /* print up to end */ - /* If we've seen no delimiters, check -s */ - if (cl_pos == 0 && dcount == 0 && !opt_REGEX) { + /* If we've seen no delimiters, and no -D, check -s */ + if (!(option_mask32 & OPT_NOSORT) && cl_pos == 0 && dcount == 0) { if (option_mask32 & OPT_SUPPRESS) goto next_line; /* else: will print entire line */ diff --git a/testsuite/cut.tests b/testsuite/cut.tests index e57b028ac..21cfea809 100755 --- a/testsuite/cut.tests +++ b/testsuite/cut.tests @@ -95,8 +95,16 @@ testing "cut with -d -s omits blank lines" "cut -d' ' -f2 -s input" "bar\nbong\n # substitute for awk optional FEATURE_CUT_REGEX -testing "cut -DF" "cut -DF 2,7,5" \ - "said and your\nare\nis demand. supply\nforecast :\nyou you better,\n\nEm: Took hate\n" "" \ +testing "cut -DF unordered" "cut -DF 2,7,5" \ + "\ +said and your +are +is demand. supply +forecast : +you you better, + +Em: Took hate +" "" \ "Bother, said Pooh. It's your husband, and he has a gun. Cheerios are donut seeds. Talk is cheap because supply exceeds demand. @@ -105,6 +113,29 @@ Apple: you can buy better, but you can't pay more. Subcalifragilisticexpialidocious. Auntie Em: Hate you, hate Kansas. Took the dog. Dorothy." +# No delimiter found: print entire line regardless of -F RANGES +testing "cut -F1" "cut -d: -F1" \ + "the_only_field\n" "" \ + "the_only_field\n" +testing "cut -F2" "cut -d: -F2" \ + "the_only_field\n" "" \ + "the_only_field\n" +# No delimiter found and -s: skip entire line +testing "cut -sF1" "cut -d: -sF1" \ + "" "" \ + "the_only_field\n" +#^^^ the above is probably mishandled by toybox, it prints the line +testing "cut -sF2" "cut -d: -sF2" \ + "" "" \ + "the_only_field\n" +# -D disables special handling of lines with no delimiters, the line is treated as the 1st field +testing "cut -DF1" "cut -d: -DF1" \ + "the_only_field\n" "" \ + "the_only_field\n" +testing "cut -DF2" "cut -d: -DF2" \ + "\n" "" \ + "the_only_field\n" + optional FEATURE_CUT_REGEX LONG_OPTS testing "cut -F preserves intermediate delimiters" \ "cut --output-delimiter=: -F2,4-6,7" \ -- cgit v1.2.3-55-g6feb From 1ea89fa98a7c4b1b6924f136963c91caf5a5dccb Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Sat, 21 Dec 2024 00:24:30 +0100 Subject: cut: code shrink This change eliminates one temporary: - if (dcount++ < cut_list[cl_pos].startpos) + dcount++; + if (dcount <= cut_list[cl_pos].startpos) function old new delta cut_main 1402 1373 -29 Signed-off-by: Denys Vlasenko --- coreutils/cut.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) (limited to 'coreutils') diff --git a/coreutils/cut.c b/coreutils/cut.c index 65e0e5c30..93b58b493 100644 --- a/coreutils/cut.c +++ b/coreutils/cut.c @@ -91,8 +91,9 @@ struct cut_range { static int cmpfunc(const void *a, const void *b) { - return (((struct cut_range *) a)->startpos - - ((struct cut_range *) b)->startpos); + const struct cut_range *aa = a; + const struct cut_range *bb = b; + return aa->startpos - bb->startpos; } #define END_OF_LIST(list_elem) ((list_elem).startpos == UINT_MAX) @@ -109,18 +110,18 @@ static void cut_file(FILE *file, const char *delim, const char *odelim, while ((line = xmalloc_fgetline(file)) != NULL) { /* set up a list so we can keep track of what's been printed */ - int linelen = strlen(line); + unsigned linelen = strlen(line); unsigned cl_pos = 0; - /* cut based on chars/bytes XXX: only works when sizeof(char) == byte */ + /* Cut based on chars/bytes XXX: only works when sizeof(char) == byte */ if (option_mask32 & (OPT_CHAR | OPT_BYTE)) { char *printed = xzalloc(linelen + 1); int need_odelim = 0; /* print the chars specified in each cut list */ for (; NOT_END_OF_LIST(cut_list[cl_pos]); cl_pos++) { - unsigned spos; - for (spos = cut_list[cl_pos].startpos; spos < linelen;) { + unsigned spos = cut_list[cl_pos].startpos; + while (spos < linelen) { if (!printed[spos]) { printed[spos] = 'X'; if (need_odelim && spos != 0 && !printed[spos-1]) { @@ -129,8 +130,10 @@ static void cut_file(FILE *file, const char *delim, const char *odelim, } putchar(line[spos]); } - if (++spos > cut_list[cl_pos].endpos) { - need_odelim = (odelim && odelim[0]); /* will print OSEP (if not empty) */ + spos++; + if (spos > cut_list[cl_pos].endpos) { + /* will print OSEP (if not empty) */ + need_odelim = (odelim && odelim[0]); break; } } @@ -242,7 +245,8 @@ static void cut_file(FILE *file, const char *delim, const char *odelim, continue; } /* Got delimiter */ - if (dcount++ < cut_list[cl_pos].startpos) { + dcount++; + if (dcount <= cut_list[cl_pos].startpos) { /* Not yet within range - loop */ start = next; continue; -- cgit v1.2.3-55-g6feb From 14f57f5357cb674b88e7cdaff6267bf9d84c6b80 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Sat, 21 Dec 2024 00:43:45 +0100 Subject: cut: code shrink move "linenum" manipulations to the one place where it is used. function old new delta cut_main 1373 1360 -13 Signed-off-by: Denys Vlasenko --- coreutils/cut.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'coreutils') diff --git a/coreutils/cut.c b/coreutils/cut.c index 93b58b493..d81f36bcd 100644 --- a/coreutils/cut.c +++ b/coreutils/cut.c @@ -143,15 +143,15 @@ static void cut_file(FILE *file, const char *delim, const char *odelim, } else if (!opt_REGEX && *delim == '\n') { unsigned spos = cut_list[cl_pos].startpos; + linenum++; /* get out if we have no more ranges to process or if the lines * are lower than what we're interested in */ - if ((linenum < spos) || END_OF_LIST(cut_list[cl_pos])) + if (linenum <= spos || END_OF_LIST(cut_list[cl_pos])) goto next_line; /* if the line we're looking for is lower than the one we were * passed, it means we displayed it already, so move on */ - while (spos < linenum) { - spos++; + while (++spos < linenum) { /* go to the next list if we're at the end of this one */ if (spos > cut_list[cl_pos].endpos) { cl_pos++; @@ -161,7 +161,7 @@ static void cut_file(FILE *file, const char *delim, const char *odelim, spos = cut_list[cl_pos].startpos; /* get out if the current line is lower than the one * we just became interested in */ - if (linenum < spos) + if (linenum <= spos) goto next_line; } } @@ -280,7 +280,6 @@ static void cut_file(FILE *file, const char *delim, const char *odelim, /* if we printed anything, finish with newline */ putchar('\n'); next_line: - linenum++; free(line); } /* while (got line) */ @@ -399,7 +398,7 @@ int cut_main(int argc UNUSED_PARAM, char **argv) //if (nranges == 0) // bb_simple_error_msg_and_die("missing list of positions"); //^^^ this is impossible since one of -bcfF is required, - // they populate LIST with non-empty string and when it is parsed, + // they populate LIST with non-NULL string and when it is parsed, // cut_list[] gets at least one element. /* now that the lists are parsed, we need to sort them to make life -- cgit v1.2.3-55-g6feb