tsort: new applet

function old new delta tsort_main - 578 +578 .rodata 104884 104906 +22 applet_names 2759 2765 +6 applet_main 1596 1600 +4 packed_usage 34290 34288 -2 ------------------------------------------------------------------------------ (add/remove: 2/0 grow/shrink: 3/1 up/down: 610/-2) Total: 608 bytes Signed-off-by: David Leonard <d+busybox@adaptive-enterprises.com> Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
author: David Leonard <d+busybox@adaptive-enterprises.com> 2022-02-20 14:29:45 +1000
committer: Denys Vlasenko <vda.linux@googlemail.com> 2022-05-02 14:25:36 +0200
commit: 4642cf5b388bf60f6bea67ce3a5031d24bccd48a (patch)
tree: 8f6cec270d99a47627b16402531db303076a34ef
parent: 52a7bf6fa677abdb80f8e484f6ba77ed3d34e444 (diff)
download: busybox-w32-4642cf5b388bf60f6bea67ce3a5031d24bccd48a.tar.gz
busybox-w32-4642cf5b388bf60f6bea67ce3a5031d24bccd48a.tar.bz2
busybox-w32-4642cf5b388bf60f6bea67ce3a5031d24bccd48a.zip
3 files changed, 299 insertions, 1 deletions
diff --git a/coreutils/tsort.c b/coreutils/tsort.c
new file mode 100644
index 000000000..dedb65b15
--- /dev/null
+++ b/coreutils/tsort.c
@@ -0,0 +1,188 @@
+/* vi: set sw=4 ts=4: */
+/*
+ * tsort implementation for busybox
+ *
+ * public domain -- David Leonard, 2022
+ */
+//config:config TSORT
+//config:       bool "tsort (0.7 kb)"
+//config:       default y
+//config:       help
+//config:       tsort performs a topological sort.
+//applet:IF_TSORT(APPLET(tsort, BB_DIR_USR_BIN, BB_SUID_DROP))
+//kbuild:lib-$(CONFIG_TSORT) += tsort.o
+/* BB_AUDIT SUSv3 compliant */
+/* http://www.opengroup.org/onlinepubs/007904975/utilities/tsort.html */
+//usage:#define tsort_trivial_usage
+//usage:       "[FILE]"
+//usage:#define tsort_full_usage "\n\n"
+//usage:       "Topological sort"
+//usage:#define tsort_example_usage
+//usage:       "$ echo -e \"a b\\nb c\" | tsort\n"
+//usage:       "a\n"
+//usage:       "b\n"
+//usage:       "c\n"
+#include "libbb.h"
+#include "common_bufsiz.h"
+struct node {
+        unsigned in_count;
+        unsigned out_count;
+        struct node **out;
+        char name[1];
+};
+struct globals {
+        struct node **nodes;
+        unsigned nodes_len;
+};
+#define G (*(struct globals*)bb_common_bufsiz1)
+#define INIT_G() do { \
+        setup_common_bufsiz(); \
+        BUILD_BUG_ON(sizeof(G) > COMMON_BUFSIZE); \
+        G.nodes = NULL; \
+        G.nodes_len = 0; \
+} while (0)
+static struct node *
+get_node(const char *name)
+{
+        struct node *n;
+        unsigned a = 0;
+        unsigned b = G.nodes_len;
+        /* Binary search for name */
+        while (a != b) {
+                unsigned m = (a + b) / 2;
+                int cmp = strcmp(name, G.nodes[m]->name);
+                if (cmp == 0)
+                        return G.nodes[m]; /* found */
+                if (cmp < 0) {
+                        b = m;
+                } else {
+                        a = m + 1;
+                }
+        }
+        /* Allocate new node */
+        n = xzalloc(sizeof(*n) + strlen(name));
+        //n->in_count = 0;
+        //n->out_count = 0;
+        //n->out = NULL;
+        strcpy(n->name, name);
+        /* Insert to maintain sort */
+        G.nodes = xrealloc(G.nodes, (G.nodes_len + 1) * sizeof(*G.nodes));
+        memmove(&G.nodes[a + 1], &G.nodes[a],
+                (G.nodes_len - a) * sizeof(*G.nodes));
+        G.nodes[a] = n;
+        G.nodes_len++;
+        return n;
+}
+static void
+add_edge(struct node *a, struct node *b)
+{
+        a->out = xrealloc_vector(a->out, 6, a->out_count);
+        a->out[a->out_count++] = b;
+        b->in_count++;
+}
+int tsort_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
+int tsort_main(int argc UNUSED_PARAM, char **argv)
+{
+        char *line;
+        size_t linesz;
+        ssize_t len;
+        struct node *a;
+        int cycles;
+        INIT_G();
+        if (argv[1]) {
+                if (argv[2])
+                        bb_show_usage();
+                if (NOT_LONE_DASH(argv[1])) {
+                        close(STDIN_FILENO); /* == 0 */
+                        xopen(argv[1], O_RDONLY); /* fd will be 0 */
+                }
+        }
+        /* Read in words separated by <blank>s */
+        a = NULL;
+        line = NULL;
+        linesz = 0;
+        while ((len = getline(&line, &linesz, stdin)) != -1) {
+                char *s = line;
+                while (*(s = skip_whitespace(s)) != '\0') {
+                        struct node *b;
+                        char *word;
+                        word = s;
+                        s = skip_non_whitespace(s);
+                        if (*s)
+                                *s++ = '\0';
+                        /* Create nodes and edges for each word pair */
+                        b = get_node(word);
+                        if (!a) {
+                                a = b;
+                        } else {
+                                if (a != b)
+                                        add_edge(a, b);
+                                a = NULL;
+                        }
+                }
+        }
+// Most other tools do not check for input read error (treat them as EOF)
+//      die_if_ferror(in, input_filename);
+        if (a)
+                bb_simple_error_msg_and_die("odd input");
+        free(line);
+        /*
+         * Kahn's algorithm:
+         *   - find a node that has no incoming edges, print and remove it
+         *   - repeat until the graph is empty
+         *   - if any nodes are left, they form cycles.
+         */
+        cycles = 0;
+        while (G.nodes_len) {
+                struct node *n;
+                unsigned i;
+                /* Search for first node with no incoming edges */
+                for (i = 0; i < G.nodes_len; i++) {
+                        if (!G.nodes[i]->in_count)
+                                break;
+                }
+                if (i == G.nodes_len) {
+                        /* Must be a cycle; arbitraily break it at node 0 */
+                        cycles++;
+                        i = 0;
+#ifndef TINY
+                        bb_error_msg("cycle at %s", G.nodes[i]->name);
+#endif
+                }
+                /* Remove the node (need no longer maintain sort) */
+                n = G.nodes[i];
+                G.nodes[i] = G.nodes[--G.nodes_len];
+                /* And remove its outgoing edges */
+                for (i = 0; i < n->out_count; i++)
+                        n->out[i]->in_count--;
+                free(n->out);
+                puts(n->name);
+                free(n);
+        }
+        free(G.nodes);
+        fflush_stdout_and_exit(cycles ? 1 : 0);
+}
diff --git a/docs/posix_conformance.txt b/docs/posix_conformance.txt
index 5e107d74d..8edbe3e15 100644
--- a/docs/posix_conformance.txt
+++ b/docs/posix_conformance.txt
@@ -24,7 +24,7 @@ POSIX Tools not supported:
  gencat, getconf, iconv, join, link, locale, localedef, lp, m4,
  mailx, newgrp, nl, pathchk, pax, pr, qalter, qdel, qhold, qmove,
  qmsg, qrerun, qrls, qselect, qsig, qstat, qsub, tabs, talk, tput,
-  tsort, unlink, uucp, uustat, uux
+  unlink, uucp, uustat, uux
 POSIX Tools not supported (DEVELOPMENT):
  admin, cflow, ctags, cxref, delta, fort77, get, lex, make, nm, prs, rmdel,
diff --git a/testsuite/tsort.tests b/testsuite/tsort.tests
new file mode 100755
index 000000000..c6fe78272
--- /dev/null
+++ b/testsuite/tsort.tests
@@ -0,0 +1,110 @@
+#!/bin/sh
+# SUSv3 compliant sort tests.
+# Public Domain, David Leonard 2022
+. ./testing.sh
+#       name cmd expected ./input stdin
+testing "" "tsort"       "a\n" "" "a a\n"
+testing "" "tsort -"     "a\n" "" "a a\n"
+testing "" "tsort input" "a\n" "a a\n" ""
+testing "tsort input (w/o eol)" "tsort input" "a\n" "a a" ""
+testing "" "tsort /dev/null" "" "" ""
+testing "tsort empty" tsort "" "" ""
+testing "tsort blank" tsort "" "" "\n"
+testing "tsort blanks" tsort "" "" "\n\n \t\n "
+# simple inputs having exactly one solution
+testing "tsort 1-edge" tsort "a\nb\n" "" "a b\n"
+testing "tsort 2-edge" tsort "a\nb\nc\n" "" "a b b c\n"
+# The following test helper accommodates future variable output because, as
+# tsort is allowed to emit any total ordering that satisfies its input,
+# should the implementation changes, these tests will remain valid.
+#
+# The idea is to verify that:
+# - each input word is present EXACTLY ONCE in tsort's output
+# - for each input pair 'a b', the occurrence of 'a' APPEARS BEFORE 'b'
+# - the exit code is 0
+tsort_test () {
+        fail=
+        name="$1"; shift
+        args="$*"
+        if [ $VERBOSE ]; then
+                echo "============"
+                echo "echo \"$args\" | tsort >actual"
+        fi
+        echo "$args" | tsort >actual
+        ec=$?
+        if [ $ec -ne 0 ]; then
+                fail "tsort exit $ec, expected 0"
+        fi
+        while [ $# -ne 0 ]; do
+                a=$1; shift
+                b=$1; shift
+                aline=$(grep -nxF "$a" <actual | cut -d: -f1)
+                bline=$(grep -nxF "$b" <actual | cut -d: -f1)
+                case $aline in
+                        "") fail "word $a missing from output ($args)";;
+                        *" "*) fail "word $a duplicated ($args)";;
+                esac
+                case $bline in
+                        "") fail "word $b missing from output ($args)";;
+                        *" "*) fail "word $b duplicated ($args)";;
+                esac
+                if [ $aline -gt $bline ]; then
+                        fail "$a appears after $b ($args)"
+                fi
+        done
+        if [ $fail ] && [ $VERBOSE ]; then
+                echo "exit $ec, actual:"
+                cat actual
+        fi
+        rm actual
+        report "$name"
+}
+# Test that erroneous input causes an unsuccessful exit code
+# we don't test the output error message
+tsort_test_err () {
+        fail=
+        name="$1"; shift
+        echo "$*" | tsort >/dev/null 2>/dev/null
+        ec=$?
+        if [ $ec -eq 0 ]; then
+                fail "$name: unexpected exit 0 ($*)"
+        fi
+        report "$name"
+}
+fail () {
+        [ $VERBOSE ] && echo "ERROR: $*"
+        fail=1
+}
+report () {
+        if [ $fail ]; then
+                FAILCOUNT=$(($FAILCOUNT + 1))
+                echo "FAIL: $*"
+        else
+                echo "PASS: $*"
+        fi
+}
+tsort_test "tsort empty2"
+tsort_test "tsort singleton"    a a
+tsort_test "tsort simple"       a b b c
+tsort_test "tsort 2singleton"   a a b b
+tsort_test "tsort medium"       a b a b b c
+tsort_test "tsort std.example"  a b c c d e g g f g e f h h
+tsort_test "tsort prefixes"     a aa aa aaa aaaa aaaaa a aaaaa
+tsort_test_err "tsort odd"      a
+tsort_test_err "tsort odd2"     a b c
+tsort_test_err "tsort cycle"    a b b a
+exit $FAILCOUNT
author	David Leonard <d+busybox@adaptive-enterprises.com>	2022-02-20 14:29:45 +1000
committer	Denys Vlasenko <vda.linux@googlemail.com>	2022-05-02 14:25:36 +0200
commit	4642cf5b388bf60f6bea67ce3a5031d24bccd48a (patch)
tree	8f6cec270d99a47627b16402531db303076a34ef
parent	52a7bf6fa677abdb80f8e484f6ba77ed3d34e444 (diff)
download	busybox-w32-4642cf5b388bf60f6bea67ce3a5031d24bccd48a.tar.gz busybox-w32-4642cf5b388bf60f6bea67ce3a5031d24bccd48a.tar.bz2 busybox-w32-4642cf5b388bf60f6bea67ce3a5031d24bccd48a.zip

diff --git a/coreutils/tsort.c b/coreutils/tsort.c new file mode 100644 index 000000000..dedb65b15 --- /dev/null +++ b/coreutils/tsort.c
@@ -0,0 +1,188 @@
		1	/* vi: set sw=4 ts=4: */
		2	/*
		3	* tsort implementation for busybox
		4	*
		5	* public domain -- David Leonard, 2022
		6	*/
		7	//config:config TSORT
		8	//config: bool "tsort (0.7 kb)"
		9	//config: default y
		10	//config: help
		11	//config: tsort performs a topological sort.
		12
		13	//applet:IF_TSORT(APPLET(tsort, BB_DIR_USR_BIN, BB_SUID_DROP))
		14
		15	//kbuild:lib-$(CONFIG_TSORT) += tsort.o
		16
		17	/* BB_AUDIT SUSv3 compliant */
		18	/* http://www.opengroup.org/onlinepubs/007904975/utilities/tsort.html */
		19
		20	//usage:#define tsort_trivial_usage
		21	//usage: "[FILE]"
		22	//usage:#define tsort_full_usage "\n\n"
		23	//usage: "Topological sort"
		24	//usage:#define tsort_example_usage
		25	//usage: "$ echo -e \"a b\\nb c\" \| tsort\n"
		26	//usage: "a\n"
		27	//usage: "b\n"
		28	//usage: "c\n"
		29
		30	#include "libbb.h"
		31	#include "common_bufsiz.h"
		32
		33	struct node {
		34	unsigned in_count;
		35	unsigned out_count;
		36	struct node **out;
		37	char name[1];
		38	};
		39
		40	struct globals {
		41	struct node **nodes;
		42	unsigned nodes_len;
		43	};
		44	#define G ((struct globals)bb_common_bufsiz1)
		45	#define INIT_G() do { \
		46	setup_common_bufsiz(); \
		47	BUILD_BUG_ON(sizeof(G) > COMMON_BUFSIZE); \
		48	G.nodes = NULL; \
		49	G.nodes_len = 0; \
		50	} while (0)
		51
		52	static struct node *
		53	get_node(const char *name)
		54	{
		55	struct node *n;
		56	unsigned a = 0;
		57	unsigned b = G.nodes_len;
		58
		59	/* Binary search for name */
		60	while (a != b) {
		61	unsigned m = (a + b) / 2;
		62	int cmp = strcmp(name, G.nodes[m]->name);
		63	if (cmp == 0)
		64	return G.nodes[m]; /* found */
		65	if (cmp < 0) {
		66	b = m;
		67	} else {
		68	a = m + 1;
		69	}
		70	}
		71
		72	/* Allocate new node */
		73	n = xzalloc(sizeof(*n) + strlen(name));
		74	//n->in_count = 0;
		75	//n->out_count = 0;
		76	//n->out = NULL;
		77	strcpy(n->name, name);
		78
		79	/* Insert to maintain sort */
		80	G.nodes = xrealloc(G.nodes, (G.nodes_len + 1) * sizeof(*G.nodes));
		81	memmove(&G.nodes[a + 1], &G.nodes[a],
		82	(G.nodes_len - a) * sizeof(*G.nodes));
		83	G.nodes[a] = n;
		84	G.nodes_len++;
		85	return n;
		86	}
		87
		88	static void
		89	add_edge(struct node a, struct node b)
		90	{
		91	a->out = xrealloc_vector(a->out, 6, a->out_count);
		92	a->out[a->out_count++] = b;
		93	b->in_count++;
		94	}
		95
		96	int tsort_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
		97	int tsort_main(int argc UNUSED_PARAM, char **argv)
		98	{
		99	char *line;
		100	size_t linesz;
		101	ssize_t len;
		102	struct node *a;
		103	int cycles;
		104
		105	INIT_G();
		106
		107	if (argv[1]) {
		108	if (argv[2])
		109	bb_show_usage();
		110	if (NOT_LONE_DASH(argv[1])) {
		111	close(STDIN_FILENO); /* == 0 */
		112	xopen(argv[1], O_RDONLY); /* fd will be 0 */
		113	}
		114	}
		115
		116	/* Read in words separated by <blank>s */
		117	a = NULL;
		118	line = NULL;
		119	linesz = 0;
		120	while ((len = getline(&line, &linesz, stdin)) != -1) {
		121	char *s = line;
		122	while (*(s = skip_whitespace(s)) != '\0') {
		123	struct node *b;
		124	char *word;
		125
		126	word = s;
		127	s = skip_non_whitespace(s);
		128	if (*s)
		129	*s++ = '\0';
		130
		131	/* Create nodes and edges for each word pair */
		132	b = get_node(word);
		133	if (!a) {
		134	a = b;
		135	} else {
		136	if (a != b)
		137	add_edge(a, b);
		138	a = NULL;
		139	}
		140	}
		141	}
		142	// Most other tools do not check for input read error (treat them as EOF)
		143	// die_if_ferror(in, input_filename);
		144	if (a)
		145	bb_simple_error_msg_and_die("odd input");
		146	free(line);
		147
		148	/*
		149	* Kahn's algorithm:
		150	* - find a node that has no incoming edges, print and remove it
		151	* - repeat until the graph is empty
		152	* - if any nodes are left, they form cycles.
		153	*/
		154	cycles = 0;
		155	while (G.nodes_len) {
		156	struct node *n;
		157	unsigned i;
		158
		159	/* Search for first node with no incoming edges */
		160	for (i = 0; i < G.nodes_len; i++) {
		161	if (!G.nodes[i]->in_count)
		162	break;
		163	}
		164	if (i == G.nodes_len) {
		165	/* Must be a cycle; arbitraily break it at node 0 */
		166	cycles++;
		167	i = 0;
		168	#ifndef TINY
		169	bb_error_msg("cycle at %s", G.nodes[i]->name);
		170	#endif
		171	}
		172
		173	/* Remove the node (need no longer maintain sort) */
		174	n = G.nodes[i];
		175	G.nodes[i] = G.nodes[--G.nodes_len];
		176
		177	/* And remove its outgoing edges */
		178	for (i = 0; i < n->out_count; i++)
		179	n->out[i]->in_count--;
		180	free(n->out);
		181
		182	puts(n->name);
		183	free(n);
		184	}
		185	free(G.nodes);
		186
		187	fflush_stdout_and_exit(cycles ? 1 : 0);
		188	}


diff --git a/docs/posix_conformance.txt b/docs/posix_conformance.txt index 5e107d74d..8edbe3e15 100644 --- a/docs/posix_conformance.txt +++ b/docs/posix_conformance.txt
@@ -24,7 +24,7 @@ POSIX Tools not supported:
24	gencat, getconf, iconv, join, link, locale, localedef, lp, m4,	24	gencat, getconf, iconv, join, link, locale, localedef, lp, m4,
25	mailx, newgrp, nl, pathchk, pax, pr, qalter, qdel, qhold, qmove,	25	mailx, newgrp, nl, pathchk, pax, pr, qalter, qdel, qhold, qmove,
26	qmsg, qrerun, qrls, qselect, qsig, qstat, qsub, tabs, talk, tput,	26	qmsg, qrerun, qrls, qselect, qsig, qstat, qsub, tabs, talk, tput,
27	tsort, unlink, uucp, uustat, uux	27	unlink, uucp, uustat, uux
28		28
29	POSIX Tools not supported (DEVELOPMENT):	29	POSIX Tools not supported (DEVELOPMENT):
30	admin, cflow, ctags, cxref, delta, fort77, get, lex, make, nm, prs, rmdel,	30	admin, cflow, ctags, cxref, delta, fort77, get, lex, make, nm, prs, rmdel,


diff --git a/testsuite/tsort.tests b/testsuite/tsort.tests new file mode 100755 index 000000000..c6fe78272 --- /dev/null +++ b/testsuite/tsort.tests
@@ -0,0 +1,110 @@
		1	#!/bin/sh
		2
		3	# SUSv3 compliant sort tests.
		4	# Public Domain, David Leonard 2022
		5
		6	. ./testing.sh
		7
		8	# name cmd expected ./input stdin
		9	testing "" "tsort" "a\n" "" "a a\n"
		10	testing "" "tsort -" "a\n" "" "a a\n"
		11	testing "" "tsort input" "a\n" "a a\n" ""
		12	testing "tsort input (w/o eol)" "tsort input" "a\n" "a a" ""
		13	testing "" "tsort /dev/null" "" "" ""
		14
		15	testing "tsort empty" tsort "" "" ""
		16	testing "tsort blank" tsort "" "" "\n"
		17	testing "tsort blanks" tsort "" "" "\n\n \t\n "
		18
		19	# simple inputs having exactly one solution
		20	testing "tsort 1-edge" tsort "a\nb\n" "" "a b\n"
		21	testing "tsort 2-edge" tsort "a\nb\nc\n" "" "a b b c\n"
		22
		23
		24	# The following test helper accommodates future variable output because, as
		25	# tsort is allowed to emit any total ordering that satisfies its input,
		26	# should the implementation changes, these tests will remain valid.
		27	#
		28	# The idea is to verify that:
		29	# - each input word is present EXACTLY ONCE in tsort's output
		30	# - for each input pair 'a b', the occurrence of 'a' APPEARS BEFORE 'b'
		31	# - the exit code is 0
		32
		33	tsort_test () {
		34	fail=
		35	name="$1"; shift
		36	args="$*"
		37	if [ $VERBOSE ]; then
		38	echo "============"
		39	echo "echo \"$args\" \| tsort >actual"
		40	fi
		41	echo "$args" \| tsort >actual
		42	ec=$?
		43	if [ $ec -ne 0 ]; then
		44	fail "tsort exit $ec, expected 0"
		45	fi
		46	while [ $# -ne 0 ]; do
		47	a=$1; shift
		48	b=$1; shift
		49	aline=$(grep -nxF "$a" <actual \| cut -d: -f1)
		50	bline=$(grep -nxF "$b" <actual \| cut -d: -f1)
		51	case $aline in
		52	"") fail "word $a missing from output ($args)";;
		53	" ") fail "word $a duplicated ($args)";;
		54	esac
		55	case $bline in
		56	"") fail "word $b missing from output ($args)";;
		57	" ") fail "word $b duplicated ($args)";;
		58	esac
		59	if [ $aline -gt $bline ]; then
		60	fail "$a appears after $b ($args)"
		61	fi
		62	done
		63	if [ $fail ] && [ $VERBOSE ]; then
		64	echo "exit $ec, actual:"
		65	cat actual
		66	fi
		67	rm actual
		68	report "$name"
		69	}
		70
		71	# Test that erroneous input causes an unsuccessful exit code
		72	# we don't test the output error message
		73	tsort_test_err () {
		74	fail=
		75	name="$1"; shift
		76	echo "$*" \| tsort >/dev/null 2>/dev/null
		77	ec=$?
		78	if [ $ec -eq 0 ]; then
		79	fail "$name: unexpected exit 0 ($*)"
		80	fi
		81	report "$name"
		82	}
		83
		84	fail () {
		85	[ $VERBOSE ] && echo "ERROR: $*"
		86	fail=1
		87	}
		88
		89	report () {
		90	if [ $fail ]; then
		91	FAILCOUNT=$(($FAILCOUNT + 1))
		92	echo "FAIL: $*"
		93	else
		94	echo "PASS: $*"
		95	fi
		96	}
		97
		98	tsort_test "tsort empty2"
		99	tsort_test "tsort singleton" a a
		100	tsort_test "tsort simple" a b b c
		101	tsort_test "tsort 2singleton" a a b b
		102	tsort_test "tsort medium" a b a b b c
		103	tsort_test "tsort std.example" a b c c d e g g f g e f h h
		104	tsort_test "tsort prefixes" a aa aa aaa aaaa aaaaa a aaaaa
		105
		106	tsort_test_err "tsort odd" a
		107	tsort_test_err "tsort odd2" a b c
		108	tsort_test_err "tsort cycle" a b b a
		109
		110	exit $FAILCOUNT