diff options
author | Denys Vlasenko <vda.linux@googlemail.com> | 2020-12-22 20:08:40 +0100 |
---|---|---|
committer | Denys Vlasenko <vda.linux@googlemail.com> | 2020-12-22 20:24:30 +0100 |
commit | 6452c300366cf0e322227ad39991ec2fdfd6beed (patch) | |
tree | d2086c915ebeddea5775aa66e0dc07dae41addc2 | |
parent | 96717d9fb4560ad98a737108f83c7b247ef04674 (diff) | |
download | busybox-w32-6452c300366cf0e322227ad39991ec2fdfd6beed.tar.gz busybox-w32-6452c300366cf0e322227ad39991ec2fdfd6beed.tar.bz2 busybox-w32-6452c300366cf0e322227ad39991ec2fdfd6beed.zip |
factor: detect squares
If we have a square, the speedup can be extremely large
(in best case example below, it's ~40000 times faster):
$ time ./busybox_old factor 18446743988964486098
18446743988964486098: 2 3037000493 3037000493
real 0m4.246s
$ time ./busybox factor 18446743988964486098
18446743988964486098: 2 3037000493 3037000493
real 0m0.000s
function old new delta
isqrt_odd - 57 +57
print_w - 36 +36
factorize 218 236 +18
print_h - 7 +7
factorize_numstr 65 72 +7
------------------------------------------------------------------------------
(add/remove: 3/0 grow/shrink: 2/0 up/down: 125/0) Total: 125 bytes
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
-rw-r--r-- | coreutils/factor.c | 42 | ||||
-rwxr-xr-x | testsuite/factor.tests | 23 |
2 files changed, 62 insertions, 3 deletions
diff --git a/coreutils/factor.c b/coreutils/factor.c index 27f26fe3f..729bd36a0 100644 --- a/coreutils/factor.c +++ b/coreutils/factor.c | |||
@@ -85,7 +85,8 @@ static const uint64_t packed_wheel[] = { | |||
85 | #undef R | 85 | #undef R |
86 | #define WHEEL_START 5 | 86 | #define WHEEL_START 5 |
87 | #define WHEEL_SIZE (5 + 24 * 20) | 87 | #define WHEEL_SIZE (5 + 24 * 20) |
88 | #define wheel_tab ((uint8_t*)&bb_common_bufsiz1) | 88 | #define square_count (((uint8_t*)&bb_common_bufsiz1)[0]) |
89 | #define wheel_tab (((uint8_t*)&bb_common_bufsiz1) + 1) | ||
89 | /* | 90 | /* |
90 | * Why, you ask? | 91 | * Why, you ask? |
91 | * plain byte array: | 92 | * plain byte array: |
@@ -119,9 +120,39 @@ static void unpack_wheel(void) | |||
119 | } | 120 | } |
120 | } | 121 | } |
121 | 122 | ||
123 | /* Prevent inlining, factorize() needs all help it can get with reducing register pressure */ | ||
124 | static NOINLINE void print_w(wide_t n) | ||
125 | { | ||
126 | unsigned rep = square_count; | ||
127 | do | ||
128 | printf(" %llu", n); | ||
129 | while (--rep != 0); | ||
130 | } | ||
131 | static NOINLINE void print_h(half_t n) | ||
132 | { | ||
133 | print_w(n); | ||
134 | } | ||
135 | |||
136 | static void factorize(wide_t N); | ||
137 | |||
122 | static half_t isqrt_odd(wide_t N) | 138 | static half_t isqrt_odd(wide_t N) |
123 | { | 139 | { |
124 | half_t s = isqrt(N); | 140 | half_t s = isqrt(N); |
141 | /* s^2 is <= N, (s+1)^2 > N */ | ||
142 | |||
143 | /* If s^2 in fact is EQUAL to N, it's very lucky. | ||
144 | * Examples: | ||
145 | * factor 18446743988964486098 = 2 * 3037000493 * 3037000493 | ||
146 | * factor 18446743902517389507 = 3 * 2479700513 * 2479700513 | ||
147 | */ | ||
148 | if ((wide_t)s * s == N) { | ||
149 | /* factorize sqrt(N), printing each factor twice */ | ||
150 | square_count *= 2; | ||
151 | factorize(s); | ||
152 | /* Let caller know we recursed */ | ||
153 | return 0; | ||
154 | } | ||
155 | |||
125 | /* Subtract 1 from even s, odd s won't change: */ | 156 | /* Subtract 1 from even s, odd s won't change: */ |
126 | /* (doesnt work for zero, but we know that s != 0 here) */ | 157 | /* (doesnt work for zero, but we know that s != 0 here) */ |
127 | s = (s - 1) | 1; | 158 | s = (s - 1) | 1; |
@@ -152,6 +183,8 @@ static NOINLINE void factorize(wide_t N) | |||
152 | * factor 18446744073709551557 (0xffffffffffffffc5). | 183 | * factor 18446744073709551557 (0xffffffffffffffc5). |
153 | */ | 184 | */ |
154 | max_factor = isqrt_odd(N); | 185 | max_factor = isqrt_odd(N); |
186 | if (!max_factor) | ||
187 | return; /* square was detected and recursively factored */ | ||
155 | factor = 2; | 188 | factor = 2; |
156 | w = 0; | 189 | w = 0; |
157 | for (;;) { | 190 | for (;;) { |
@@ -162,8 +195,10 @@ static NOINLINE void factorize(wide_t N) | |||
162 | */ | 195 | */ |
163 | while ((N % factor) == 0) { /* not likely */ | 196 | while ((N % factor) == 0) { /* not likely */ |
164 | N = N / factor; | 197 | N = N / factor; |
165 | printf(" %"HALF_FMT"u", factor); | 198 | print_h(factor); |
166 | max_factor = isqrt_odd(N); | 199 | max_factor = isqrt_odd(N); |
200 | if (!max_factor) | ||
201 | return; /* square was detected */ | ||
167 | } | 202 | } |
168 | if (factor >= max_factor) | 203 | if (factor >= max_factor) |
169 | break; | 204 | break; |
@@ -178,7 +213,7 @@ static NOINLINE void factorize(wide_t N) | |||
178 | } | 213 | } |
179 | end: | 214 | end: |
180 | if (N > 1) | 215 | if (N > 1) |
181 | printf(" %llu", N); | 216 | print_w(N); |
182 | bb_putchar('\n'); | 217 | bb_putchar('\n'); |
183 | } | 218 | } |
184 | 219 | ||
@@ -193,6 +228,7 @@ static void factorize_numstr(const char *numstr) | |||
193 | if (errno) | 228 | if (errno) |
194 | bb_show_usage(); | 229 | bb_show_usage(); |
195 | printf("%llu:", N); | 230 | printf("%llu:", N); |
231 | square_count = 1; | ||
196 | factorize(N); | 232 | factorize(N); |
197 | } | 233 | } |
198 | 234 | ||
diff --git a/testsuite/factor.tests b/testsuite/factor.tests index 2cf4a54ce..e404e29c1 100755 --- a/testsuite/factor.tests +++ b/testsuite/factor.tests | |||
@@ -45,4 +45,27 @@ testing "factor \$((2*3*5*7*11*13*17*19*23*29*31*37*41*43*47))" \ | |||
45 | "614889782588491410: 2 3 5 7 11 13 17 19 23 29 31 37 41 43 47\n" \ | 45 | "614889782588491410: 2 3 5 7 11 13 17 19 23 29 31 37 41 43 47\n" \ |
46 | "" "" | 46 | "" "" |
47 | 47 | ||
48 | # Test that square-detection code is not buggy | ||
49 | testing "factor 2 * 3037000493 * 3037000493" \ | ||
50 | "factor 18446743988964486098" \ | ||
51 | "18446743988964486098: 2 3037000493 3037000493\n" \ | ||
52 | "" "" | ||
53 | testing "factor 3 * 2479700513 * 2479700513" \ | ||
54 | "factor 18446743902517389507" \ | ||
55 | "18446743902517389507: 3 2479700513 2479700513\n" \ | ||
56 | "" "" | ||
57 | # including square-of-square cases: | ||
58 | testing "factor 3 * 37831 * 37831 * 37831 * 37831" \ | ||
59 | "factor 6144867742934288163" \ | ||
60 | "6144867742934288163: 3 37831 37831 37831 37831\n" \ | ||
61 | "" "" | ||
62 | testing "factor 3 * 13^16" \ | ||
63 | "factor 1996249827549539523" \ | ||
64 | "1996249827549539523: 3 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13\n" \ | ||
65 | "" "" | ||
66 | testing "factor 13^16" \ | ||
67 | "factor 665416609183179841" \ | ||
68 | "665416609183179841: 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13\n" \ | ||
69 | "" "" | ||
70 | |||
48 | exit $FAILCOUNT | 71 | exit $FAILCOUNT |