diff options
Diffstat (limited to 'coreutils/shuf.c')
-rw-r--r-- | coreutils/shuf.c | 153 |
1 files changed, 153 insertions, 0 deletions
diff --git a/coreutils/shuf.c b/coreutils/shuf.c new file mode 100644 index 000000000..6d0a68fc1 --- /dev/null +++ b/coreutils/shuf.c | |||
@@ -0,0 +1,153 @@ | |||
1 | /* vi: set sw=4 ts=4: */ | ||
2 | /* | ||
3 | * shuf: Write a random permutation of the input lines to standard output. | ||
4 | * | ||
5 | * Copyright (C) 2014 by Bartosz Golaszewski <bartekgola@gmail.com> | ||
6 | * | ||
7 | * Licensed under GPLv2 or later, see file LICENSE in this source tree. | ||
8 | */ | ||
9 | |||
10 | //config:config SHUF | ||
11 | //config: bool "shuf" | ||
12 | //config: default y | ||
13 | //config: help | ||
14 | //config: Generate random permutations | ||
15 | |||
16 | //kbuild:lib-$(CONFIG_SHUF) += shuf.o | ||
17 | //applet:IF_SHUF(APPLET_NOEXEC(shuf, shuf, BB_DIR_USR_BIN, BB_SUID_DROP, shuf)) | ||
18 | |||
19 | //usage:#define shuf_trivial_usage | ||
20 | //usage: "[-e|-i L-H] [-n NUM] [-o FILE] [-z] [FILE|ARG...]" | ||
21 | //usage:#define shuf_full_usage "\n\n" | ||
22 | //usage: "Randomly permute lines\n" | ||
23 | //usage: "\n -e Treat ARGs as lines" | ||
24 | //usage: "\n -i L-H Treat numbers L-H as lines" | ||
25 | //usage: "\n -n NUM Output at most NUM lines" | ||
26 | //usage: "\n -o FILE Write to FILE, not standard output" | ||
27 | //usage: "\n -z End lines with zero byte, not newline" | ||
28 | |||
29 | #include "libbb.h" | ||
30 | |||
31 | /* This is a NOEXEC applet. Be very careful! */ | ||
32 | |||
33 | #define OPT_e (1 << 0) | ||
34 | #define OPT_i (1 << 1) | ||
35 | #define OPT_n (1 << 2) | ||
36 | #define OPT_o (1 << 3) | ||
37 | #define OPT_z (1 << 4) | ||
38 | #define OPT_STR "ei:n:o:z" | ||
39 | |||
40 | /* | ||
41 | * Use the Fisher-Yates shuffle algorithm on an array of lines. | ||
42 | */ | ||
43 | static void shuffle_lines(char **lines, unsigned numlines) | ||
44 | { | ||
45 | unsigned i; | ||
46 | unsigned r; | ||
47 | char *tmp; | ||
48 | |||
49 | srand(monotonic_us()); | ||
50 | |||
51 | for (i = numlines-1; i > 0; i--) { | ||
52 | r = rand(); | ||
53 | /* RAND_MAX can be as small as 32767 */ | ||
54 | if (i > RAND_MAX) | ||
55 | r ^= rand() << 15; | ||
56 | r %= i; | ||
57 | tmp = lines[i]; | ||
58 | lines[i] = lines[r]; | ||
59 | lines[r] = tmp; | ||
60 | } | ||
61 | } | ||
62 | |||
63 | int shuf_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE; | ||
64 | int shuf_main(int argc, char **argv) | ||
65 | { | ||
66 | unsigned opts; | ||
67 | char *opt_i_str, *opt_n_str, *opt_o_str; | ||
68 | unsigned i; | ||
69 | char **lines; | ||
70 | unsigned numlines; | ||
71 | char eol; | ||
72 | |||
73 | opt_complementary = "e--i:i--e"; /* mutually exclusive */ | ||
74 | opts = getopt32(argv, OPT_STR, &opt_i_str, &opt_n_str, &opt_o_str); | ||
75 | |||
76 | argc -= optind; | ||
77 | argv += optind; | ||
78 | |||
79 | /* Prepare lines for shuffling - either: */ | ||
80 | if (opts & OPT_e) { | ||
81 | /* make lines from command-line arguments */ | ||
82 | numlines = argc; | ||
83 | lines = argv; | ||
84 | } else | ||
85 | if (opts & OPT_i) { | ||
86 | /* create a range of numbers */ | ||
87 | char *dash; | ||
88 | unsigned lo, hi; | ||
89 | |||
90 | dash = strchr(opt_i_str, '-'); | ||
91 | if (!dash) { | ||
92 | bb_error_msg_and_die("bad range '%s'", opt_i_str); | ||
93 | } | ||
94 | *dash = '\0'; | ||
95 | lo = xatou(opt_i_str); | ||
96 | hi = xatou(dash + 1); | ||
97 | *dash = '-'; | ||
98 | if (hi < lo) { | ||
99 | bb_error_msg_and_die("bad range '%s'", opt_i_str); | ||
100 | } | ||
101 | |||
102 | numlines = (hi+1) - lo; | ||
103 | lines = xmalloc(numlines * sizeof(lines[0])); | ||
104 | for (i = 0; i < numlines; i++) { | ||
105 | lines[i] = (char*)(uintptr_t)lo; | ||
106 | lo++; | ||
107 | } | ||
108 | } else { | ||
109 | /* default - read lines from stdin or the input file */ | ||
110 | FILE *fp; | ||
111 | |||
112 | if (argc > 1) | ||
113 | bb_show_usage(); | ||
114 | |||
115 | fp = xfopen_stdin(argv[0] ? argv[0] : "-"); | ||
116 | lines = NULL; | ||
117 | numlines = 0; | ||
118 | for (;;) { | ||
119 | char *line = xmalloc_fgetline(fp); | ||
120 | if (!line) | ||
121 | break; | ||
122 | lines = xrealloc_vector(lines, 6, numlines); | ||
123 | lines[numlines++] = line; | ||
124 | } | ||
125 | fclose_if_not_stdin(fp); | ||
126 | } | ||
127 | |||
128 | if (numlines != 0) | ||
129 | shuffle_lines(lines, numlines); | ||
130 | |||
131 | if (opts & OPT_o) | ||
132 | xmove_fd(xopen(opt_o_str, O_WRONLY|O_CREAT|O_TRUNC), STDOUT_FILENO); | ||
133 | |||
134 | if (opts & OPT_n) { | ||
135 | unsigned maxlines; | ||
136 | maxlines = xatou(opt_n_str); | ||
137 | if (numlines > maxlines) | ||
138 | numlines = maxlines; | ||
139 | } | ||
140 | |||
141 | eol = '\n'; | ||
142 | if (opts & OPT_z) | ||
143 | eol = '\0'; | ||
144 | |||
145 | for (i = 0; i < numlines; i++) { | ||
146 | if (opts & OPT_i) | ||
147 | printf("%u%c", (unsigned)(uintptr_t)lines[i], eol); | ||
148 | else | ||
149 | printf("%s%c", lines[i], eol); | ||
150 | } | ||
151 | |||
152 | fflush_stdout_and_exit(EXIT_SUCCESS); | ||
153 | } | ||