diff options
author | Denys Vlasenko <vda.linux@googlemail.com> | 2017-04-09 22:54:57 +0200 |
---|---|---|
committer | Denys Vlasenko <vda.linux@googlemail.com> | 2017-04-09 22:54:57 +0200 |
commit | 1d232fd44026deae94c88200766152aec9d6c5e5 (patch) | |
tree | 2512e5d107d26346c583b5ff25fd8c74cef69cd8 /coreutils | |
parent | c1b5b2a19016804f4330b36d9b69a64ccd59cb1c (diff) | |
download | busybox-w32-1d232fd44026deae94c88200766152aec9d6c5e5.tar.gz busybox-w32-1d232fd44026deae94c88200766152aec9d6c5e5.tar.bz2 busybox-w32-1d232fd44026deae94c88200766152aec9d6c5e5.zip |
factor: 30% faster code (estimate max possible factor just once)
function old new delta
factorize - 161 +161
isqrt_odd - 102 +102
factor_main 281 110 -171
------------------------------------------------------------------------------
(add/remove: 2/0 grow/shrink: 0/1 up/down: 263/-171) Total: 92 bytes
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
Diffstat (limited to 'coreutils')
-rw-r--r-- | coreutils/factor.c | 106 |
1 files changed, 82 insertions, 24 deletions
diff --git a/coreutils/factor.c b/coreutils/factor.c index 0f838a735..e01190150 100644 --- a/coreutils/factor.c +++ b/coreutils/factor.c | |||
@@ -20,65 +20,111 @@ | |||
20 | 20 | ||
21 | #include "libbb.h" | 21 | #include "libbb.h" |
22 | 22 | ||
23 | #if 0 | ||
24 | # define dbg(...) bb_error_msg(__VA_ARGS__) | ||
25 | #else | ||
26 | # define dbg(...) ((void)0) | ||
27 | #endif | ||
28 | |||
29 | typedef unsigned long long wide_t; | ||
30 | #define WIDE_BITS (unsigned)(sizeof(wide_t)*8) | ||
31 | #define TOPMOST_WIDE_BIT ((wide_t)1 << (WIDE_BITS-1)) | ||
32 | |||
23 | #if ULLONG_MAX == (UINT_MAX * UINT_MAX + 2 * UINT_MAX) | 33 | #if ULLONG_MAX == (UINT_MAX * UINT_MAX + 2 * UINT_MAX) |
24 | /* "unsigned" is half as wide as ullong */ | 34 | /* "unsigned" is half as wide as ullong */ |
25 | typedef unsigned half_t; | 35 | typedef unsigned half_t; |
36 | #define HALF_MAX UINT_MAX | ||
26 | #define HALF_FMT "" | 37 | #define HALF_FMT "" |
27 | #elif ULLONG_MAX == (ULONG_MAX * ULONG_MAX + 2 * ULONG_MAX) | 38 | #elif ULLONG_MAX == (ULONG_MAX * ULONG_MAX + 2 * ULONG_MAX) |
28 | /* long is half as wide as ullong */ | 39 | /* long is half as wide as ullong */ |
29 | typedef unsigned long half_t; | 40 | typedef unsigned long half_t; |
41 | #define HALF_MAX ULONG_MAX | ||
30 | #define HALF_FMT "l" | 42 | #define HALF_FMT "l" |
31 | #else | 43 | #else |
32 | #error Cant find an integer type which is half as wide as ullong | 44 | #error Cant find an integer type which is half as wide as ullong |
33 | #endif | 45 | #endif |
34 | 46 | ||
35 | static void factorize(const char *numstr) | 47 | /* Returns such x that x+1 > sqrt(N) */ |
48 | static inline half_t isqrt(wide_t N) | ||
36 | { | 49 | { |
37 | unsigned long long N, factor2; | 50 | wide_t x; |
38 | half_t factor; | 51 | unsigned c; |
39 | unsigned count3; | 52 | |
53 | // Never called with N < 1 | ||
54 | // if (N == 0) | ||
55 | // return 0; | ||
56 | // | ||
57 | /* Count leading zeros */ | ||
58 | c = 0; | ||
59 | while (!(N & TOPMOST_WIDE_BIT)) { | ||
60 | c++; | ||
61 | N <<= 1; | ||
62 | } | ||
63 | N >>= c; | ||
40 | 64 | ||
41 | /* Coreutils compat */ | 65 | /* Make x > sqrt(n) */ |
42 | numstr = skip_whitespace(numstr); | 66 | x = (wide_t)1 << ((WIDE_BITS + 1 - c) / 2); |
43 | if (*numstr == '+') | 67 | dbg("x:%llx", x); |
44 | numstr++; | 68 | for (;;) { |
69 | wide_t y = (x + N/x) / 2; | ||
70 | dbg("y:%llx y^2:%llx (y+1)^2:%llx]", y, y*y, (y+1)*(y+1)); | ||
71 | if (y >= x) { | ||
72 | /* Handle degenerate case N = 0xffffffffff...fffffff */ | ||
73 | if (y == (wide_t)HALF_MAX + 1) | ||
74 | y--; | ||
75 | dbg("isqrt(%llx)=%llx"HALF_FMT, N, y); | ||
76 | return y; | ||
77 | } | ||
78 | x = y; | ||
79 | } | ||
80 | } | ||
45 | 81 | ||
46 | N = bb_strtoull(numstr, NULL, 10); | 82 | static NOINLINE half_t isqrt_odd(wide_t N) |
47 | if (errno) | 83 | { |
48 | bb_show_usage(); | 84 | half_t s = isqrt(N); |
85 | if (s && !(s & 1)) /* even? */ | ||
86 | s--; | ||
87 | return s; | ||
88 | } | ||
49 | 89 | ||
50 | printf("%llu:", N); | 90 | static NOINLINE void factorize(wide_t N) |
91 | { | ||
92 | wide_t factor2; | ||
93 | half_t factor; | ||
94 | half_t max_factor; | ||
95 | unsigned count3; | ||
51 | 96 | ||
52 | if (N < 4) | 97 | if (N < 4) |
53 | goto end; | 98 | goto end; |
99 | |||
54 | while (!(N & 1)) { | 100 | while (!(N & 1)) { |
55 | printf(" 2"); | 101 | printf(" 2"); |
56 | N >>= 1; | 102 | N >>= 1; |
57 | } | 103 | } |
58 | 104 | ||
105 | max_factor = isqrt_odd(N); | ||
59 | count3 = 3; | 106 | count3 = 3; |
60 | factor = 3; | 107 | factor = 3; |
61 | factor2 = 3 * 3; | 108 | factor2 = 3 * 3; |
62 | for (;;) { | 109 | for (;;) { |
63 | unsigned long long nfactor2; | ||
64 | |||
65 | while ((N % factor) == 0) { | 110 | while ((N % factor) == 0) { |
66 | N = N / factor; | 111 | N = N / factor; |
67 | printf(" %u"HALF_FMT"", factor); | 112 | printf(" %u"HALF_FMT"", factor); |
113 | max_factor = isqrt_odd(N); | ||
68 | } | 114 | } |
69 | next_factor: | 115 | next_factor: |
70 | /* (f + 2)^2 = f^2 + 4*f + 4 = f^2 + 4*(f+1) */ | 116 | if (factor >= max_factor) |
71 | nfactor2 = factor2 + 4 * (factor + 1); | ||
72 | if (nfactor2 < factor2) /* overflow? */ | ||
73 | break; | ||
74 | factor2 = nfactor2; | ||
75 | if (factor2 > N) | ||
76 | break; | 117 | break; |
118 | /* (f + 2)^2 = f^2 + 4*f + 4 = f^2 + 4*(f+1) */ | ||
119 | factor2 = factor2 + 4 * (factor + 1); | ||
120 | /* overflow is impossible due to max_factor check */ | ||
121 | /* (factor2 > N) is impossible due to max_factor check */ | ||
77 | factor += 2; | 122 | factor += 2; |
78 | /* Rudimentary wheel sieving: skip multiples of 3: | 123 | /* Rudimentary wheel sieving: skip multiples of 3: |
79 | * Every third odd number is divisible by three and thus isn't a prime: | 124 | * Every third odd number is divisible by three and thus isn't a prime: |
80 | * 5 7 9 11 13 15 17 19 21 23 25 27 29 31 33 35 37... | 125 | * 5 7 9 11 13 15 17 19 21 23 25 27 29 31 33 35 37... |
81 | * ^ ^ ^ ^ ^ ^ ^ _ ^ ^ _ ^ (^primes) | 126 | * ^ ^ ^ ^ ^ ^ ^ _ ^ ^ _ ^ |
127 | * (^ = primes, _ = would-be-primes-if-not-divisible-by-5) | ||
82 | */ | 128 | */ |
83 | --count3; | 129 | --count3; |
84 | if (count3 == 0) { | 130 | if (count3 == 0) { |
@@ -105,8 +151,20 @@ int factor_main(int argc UNUSED_PARAM, char **argv) | |||
105 | bb_show_usage(); | 151 | bb_show_usage(); |
106 | 152 | ||
107 | do { | 153 | do { |
108 | factorize(*argv); | 154 | wide_t N; |
155 | const char *numstr; | ||
156 | |||
157 | /* Coreutils compat */ | ||
158 | numstr = skip_whitespace(*argv); | ||
159 | if (*numstr == '+') | ||
160 | numstr++; | ||
161 | |||
162 | N = bb_strtoull(numstr, NULL, 10); | ||
163 | if (errno) | ||
164 | bb_show_usage(); | ||
165 | printf("%llu:", N); | ||
166 | factorize(N); | ||
109 | } while (*++argv); | 167 | } while (*++argv); |
110 | 168 | ||
111 | fflush_stdout_and_exit(EXIT_SUCCESS); | 169 | return EXIT_SUCCESS; |
112 | } | 170 | } |