aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRon Yorston <rmy@pobox.com>2021-08-07 09:41:49 +0100
committerDenys Vlasenko <vda.linux@googlemail.com>2021-08-22 15:40:21 +0200
commit8817e285b7ce071a27c366a2a602d3ca162b08ba (patch)
treef3f6e68561420ba8e2f13ea25f81257ed38452f1
parent74c4f356aee9c64978a881e5760055d0e3510a6a (diff)
downloadbusybox-w32-8817e285b7ce071a27c366a2a602d3ca162b08ba.tar.gz
busybox-w32-8817e285b7ce071a27c366a2a602d3ca162b08ba.tar.bz2
busybox-w32-8817e285b7ce071a27c366a2a602d3ca162b08ba.zip
shuf: speed-up when limited output is requested
A user noted that the following command was slower than they expected: busybox shuf -i "1500000000-$(date +%s)" -n 5 At time of writing the range contains 128 million values. On my system this takes 7.7s whereas 'shuf' from coreutils takes a handful of milliseconds. Optimise BusyBox 'shuf' for cases where -n is specified by stopping shuffling once the required number of lines have been processed. On my system the time for the example is reduced to 0.4s. function old new delta shuf_main 520 540 +20 ------------------------------------------------------------------------------ (add/remove: 0/0 grow/shrink: 1/0 up/down: 20/0) Total: 20 bytes v2: Code shrink. Since outlines <= numlines: - the loop in shuffle_lines() only needs to test the value of outlines; - shuffle_lines() can be called unconditionally. Update timing to allow for the 13 million seconds elapsed since v1. Signed-off-by: Ron Yorston <rmy@pobox.com> Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
-rw-r--r--coreutils/shuf.c29
1 files changed, 15 insertions, 14 deletions
diff --git a/coreutils/shuf.c b/coreutils/shuf.c
index fdbd3e9b2..3b2ba93cf 100644
--- a/coreutils/shuf.c
+++ b/coreutils/shuf.c
@@ -24,7 +24,7 @@
24//usage: "\n -i L-H Treat numbers L-H as lines" 24//usage: "\n -i L-H Treat numbers L-H as lines"
25//usage: "\n -n NUM Output at most NUM lines" 25//usage: "\n -n NUM Output at most NUM lines"
26//usage: "\n -o FILE Write to FILE, not standard output" 26//usage: "\n -o FILE Write to FILE, not standard output"
27//usage: "\n -z End lines with zero byte, not newline" 27//usage: "\n -z NUL terminated output"
28 28
29#include "libbb.h" 29#include "libbb.h"
30 30
@@ -39,8 +39,10 @@
39 39
40/* 40/*
41 * Use the Fisher-Yates shuffle algorithm on an array of lines. 41 * Use the Fisher-Yates shuffle algorithm on an array of lines.
42 * If the required number of output lines is less than the total
43 * we can stop shuffling early.
42 */ 44 */
43static void shuffle_lines(char **lines, unsigned numlines) 45static void shuffle_lines(char **lines, unsigned numlines, unsigned outlines)
44{ 46{
45 unsigned i; 47 unsigned i;
46 unsigned r; 48 unsigned r;
@@ -48,7 +50,7 @@ static void shuffle_lines(char **lines, unsigned numlines)
48 50
49 srand(monotonic_us()); 51 srand(monotonic_us());
50 52
51 for (i = numlines-1; i > 0; i--) { 53 for (i = numlines-1; outlines > 0; i--, outlines--) {
52 r = rand(); 54 r = rand();
53 /* RAND_MAX can be as small as 32767 */ 55 /* RAND_MAX can be as small as 32767 */
54 if (i > RAND_MAX) 56 if (i > RAND_MAX)
@@ -67,7 +69,7 @@ int shuf_main(int argc, char **argv)
67 char *opt_i_str, *opt_n_str, *opt_o_str; 69 char *opt_i_str, *opt_n_str, *opt_o_str;
68 unsigned i; 70 unsigned i;
69 char **lines; 71 char **lines;
70 unsigned numlines; 72 unsigned numlines, outlines;
71 char eol; 73 char eol;
72 74
73 opts = getopt32(argv, "^" 75 opts = getopt32(argv, "^"
@@ -128,24 +130,23 @@ int shuf_main(int argc, char **argv)
128 fclose_if_not_stdin(fp); 130 fclose_if_not_stdin(fp);
129 } 131 }
130 132
131 if (numlines != 0) 133 outlines = numlines;
132 shuffle_lines(lines, numlines); 134 if (opts & OPT_n) {
135 outlines = xatou(opt_n_str);
136 if (outlines > numlines)
137 outlines = numlines;
138 }
139
140 shuffle_lines(lines, numlines, outlines);
133 141
134 if (opts & OPT_o) 142 if (opts & OPT_o)
135 xmove_fd(xopen(opt_o_str, O_WRONLY|O_CREAT|O_TRUNC), STDOUT_FILENO); 143 xmove_fd(xopen(opt_o_str, O_WRONLY|O_CREAT|O_TRUNC), STDOUT_FILENO);
136 144
137 if (opts & OPT_n) {
138 unsigned maxlines;
139 maxlines = xatou(opt_n_str);
140 if (numlines > maxlines)
141 numlines = maxlines;
142 }
143
144 eol = '\n'; 145 eol = '\n';
145 if (opts & OPT_z) 146 if (opts & OPT_z)
146 eol = '\0'; 147 eol = '\0';
147 148
148 for (i = 0; i < numlines; i++) { 149 for (i = numlines - outlines; i < numlines; i++) {
149 if (opts & OPT_i) 150 if (opts & OPT_i)
150 printf("%u%c", (unsigned)(uintptr_t)lines[i], eol); 151 printf("%u%c", (unsigned)(uintptr_t)lines[i], eol);
151 else 152 else