diff options
Diffstat (limited to 'coreutils/shuf.c')
-rw-r--r-- | coreutils/shuf.c | 69 |
1 files changed, 33 insertions, 36 deletions
diff --git a/coreutils/shuf.c b/coreutils/shuf.c index 71b27f497..81b0df453 100644 --- a/coreutils/shuf.c +++ b/coreutils/shuf.c | |||
@@ -44,21 +44,25 @@ | |||
44 | */ | 44 | */ |
45 | static void shuffle_lines(char **lines, unsigned numlines, unsigned outlines) | 45 | static void shuffle_lines(char **lines, unsigned numlines, unsigned outlines) |
46 | { | 46 | { |
47 | unsigned i; | ||
48 | unsigned r; | ||
49 | char *tmp; | ||
50 | |||
51 | srand(monotonic_us()); | 47 | srand(monotonic_us()); |
52 | 48 | ||
53 | for (i = numlines - 1; outlines > 0; i--, outlines--) { | 49 | while (outlines != 0) { |
54 | r = rand(); | 50 | char *tmp; |
51 | unsigned r = rand(); | ||
55 | /* RAND_MAX can be as small as 32767 */ | 52 | /* RAND_MAX can be as small as 32767 */ |
56 | if (i > RAND_MAX) | 53 | if (numlines > RAND_MAX) |
57 | r ^= rand() << 15; | 54 | r ^= rand() << 15; |
58 | r %= i + 1; | 55 | r %= numlines; |
59 | tmp = lines[i]; | 56 | //TODO: the above method is seriously non-uniform when numlines is very large. |
60 | lines[i] = lines[r]; | 57 | //For example, with numlines of 0xf0000000, |
58 | //values of (r % numlines) in [0, 0x0fffffff] range | ||
59 | //are more likely: e.g. r=1 and r=0xf0000001 both map to 1, | ||
60 | //whereas only one value, r=0xefffffff, maps to 0xefffffff. | ||
61 | numlines--; | ||
62 | tmp = lines[numlines]; | ||
63 | lines[numlines] = lines[r]; | ||
61 | lines[r] = tmp; | 64 | lines[r] = tmp; |
65 | outlines--; | ||
62 | } | 66 | } |
63 | } | 67 | } |
64 | 68 | ||
@@ -67,9 +71,10 @@ int shuf_main(int argc, char **argv) | |||
67 | { | 71 | { |
68 | unsigned opts; | 72 | unsigned opts; |
69 | char *opt_i_str, *opt_n_str, *opt_o_str; | 73 | char *opt_i_str, *opt_n_str, *opt_o_str; |
70 | unsigned i; | ||
71 | char **lines; | 74 | char **lines; |
75 | unsigned long long lo = lo; | ||
72 | unsigned numlines, outlines; | 76 | unsigned numlines, outlines; |
77 | unsigned i; | ||
73 | char eol; | 78 | char eol; |
74 | 79 | ||
75 | opts = getopt32(argv, "^" | 80 | opts = getopt32(argv, "^" |
@@ -89,8 +94,8 @@ int shuf_main(int argc, char **argv) | |||
89 | } else | 94 | } else |
90 | if (opts & OPT_i) { | 95 | if (opts & OPT_i) { |
91 | /* create a range of numbers */ | 96 | /* create a range of numbers */ |
97 | unsigned long long hi; | ||
92 | char *dash; | 98 | char *dash; |
93 | uintptr_t lo, hi; | ||
94 | 99 | ||
95 | if (argv[0]) | 100 | if (argv[0]) |
96 | bb_show_usage(); | 101 | bb_show_usage(); |
@@ -100,27 +105,24 @@ int shuf_main(int argc, char **argv) | |||
100 | bb_error_msg_and_die("bad range '%s'", opt_i_str); | 105 | bb_error_msg_and_die("bad range '%s'", opt_i_str); |
101 | } | 106 | } |
102 | *dash = '\0'; | 107 | *dash = '\0'; |
103 | if (sizeof(lo) == sizeof(int)) { | 108 | lo = xatoull(opt_i_str); |
104 | lo = xatou(opt_i_str); | 109 | hi = xatoull(dash + 1); |
105 | hi = xatou(dash + 1); | ||
106 | } else | ||
107 | if (sizeof(lo) == sizeof(long)) { | ||
108 | lo = xatoul(opt_i_str); | ||
109 | hi = xatoul(dash + 1); | ||
110 | } else { | ||
111 | lo = xatoull(opt_i_str); | ||
112 | hi = xatoull(dash + 1); | ||
113 | } | ||
114 | *dash = '-'; | 110 | *dash = '-'; |
115 | if (hi < lo) { | 111 | if (hi < lo) |
116 | bb_error_msg_and_die("bad range '%s'", opt_i_str); | 112 | bb_error_msg_and_die("bad range '%s'", opt_i_str); |
113 | hi -= lo; | ||
114 | if (sizeof(size_t) > sizeof(numlines)) { | ||
115 | if (hi >= UINT_MAX) | ||
116 | bb_error_msg_and_die("bad range '%s'", opt_i_str); | ||
117 | } else { | ||
118 | if (hi >= UINT_MAX / sizeof(lines[0])) | ||
119 | bb_error_msg_and_die("bad range '%s'", opt_i_str); | ||
117 | } | 120 | } |
118 | 121 | ||
119 | numlines = (hi+1) - lo; | 122 | numlines = hi + 1; |
120 | lines = xmalloc(numlines * sizeof(lines[0])); | 123 | lines = xmalloc((size_t)numlines * sizeof(lines[0])); |
121 | for (i = 0; i < numlines; i++) { | 124 | for (i = 0; i < numlines; i++) { |
122 | lines[i] = (char*)lo; | 125 | lines[i] = (char*)(uintptr_t)i; |
123 | lo++; | ||
124 | } | 126 | } |
125 | } else { | 127 | } else { |
126 | /* default - read lines from stdin or the input file */ | 128 | /* default - read lines from stdin or the input file */ |
@@ -163,14 +165,9 @@ int shuf_main(int argc, char **argv) | |||
163 | eol = '\0'; | 165 | eol = '\0'; |
164 | 166 | ||
165 | for (i = numlines - outlines; i < numlines; i++) { | 167 | for (i = numlines - outlines; i < numlines; i++) { |
166 | if (opts & OPT_i) { | 168 | if (opts & OPT_i) |
167 | if (sizeof(lines[0]) == sizeof(int)) | 169 | printf("%"LL_FMT"u%c", lo + (uintptr_t)lines[i], eol); |
168 | printf("%u%c", (unsigned)(uintptr_t)lines[i], eol); | 170 | else |
169 | else if (sizeof(lines[0]) == sizeof(long)) | ||
170 | printf("%lu%c", (unsigned long)(uintptr_t)lines[i], eol); | ||
171 | else | ||
172 | printf("%"LL_FMT"u%c", (unsigned long long)(uintptr_t)lines[i], eol); | ||
173 | } else | ||
174 | printf("%s%c", lines[i], eol); | 171 | printf("%s%c", lines[i], eol); |
175 | } | 172 | } |
176 | 173 | ||