aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDenys Vlasenko <vda.linux@googlemail.com>2020-12-22 20:08:40 +0100
committerDenys Vlasenko <vda.linux@googlemail.com>2020-12-22 20:24:30 +0100
commit6452c300366cf0e322227ad39991ec2fdfd6beed (patch)
treed2086c915ebeddea5775aa66e0dc07dae41addc2
parent96717d9fb4560ad98a737108f83c7b247ef04674 (diff)
downloadbusybox-w32-6452c300366cf0e322227ad39991ec2fdfd6beed.tar.gz
busybox-w32-6452c300366cf0e322227ad39991ec2fdfd6beed.tar.bz2
busybox-w32-6452c300366cf0e322227ad39991ec2fdfd6beed.zip
factor: detect squares
If we have a square, the speedup can be extremely large (in best case example below, it's ~40000 times faster): $ time ./busybox_old factor 18446743988964486098 18446743988964486098: 2 3037000493 3037000493 real 0m4.246s $ time ./busybox factor 18446743988964486098 18446743988964486098: 2 3037000493 3037000493 real 0m0.000s function old new delta isqrt_odd - 57 +57 print_w - 36 +36 factorize 218 236 +18 print_h - 7 +7 factorize_numstr 65 72 +7 ------------------------------------------------------------------------------ (add/remove: 3/0 grow/shrink: 2/0 up/down: 125/0) Total: 125 bytes Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
-rw-r--r--coreutils/factor.c42
-rwxr-xr-xtestsuite/factor.tests23
2 files changed, 62 insertions, 3 deletions
diff --git a/coreutils/factor.c b/coreutils/factor.c
index 27f26fe3f..729bd36a0 100644
--- a/coreutils/factor.c
+++ b/coreutils/factor.c
@@ -85,7 +85,8 @@ static const uint64_t packed_wheel[] = {
85#undef R 85#undef R
86#define WHEEL_START 5 86#define WHEEL_START 5
87#define WHEEL_SIZE (5 + 24 * 20) 87#define WHEEL_SIZE (5 + 24 * 20)
88#define wheel_tab ((uint8_t*)&bb_common_bufsiz1) 88#define square_count (((uint8_t*)&bb_common_bufsiz1)[0])
89#define wheel_tab (((uint8_t*)&bb_common_bufsiz1) + 1)
89/* 90/*
90 * Why, you ask? 91 * Why, you ask?
91 * plain byte array: 92 * plain byte array:
@@ -119,9 +120,39 @@ static void unpack_wheel(void)
119 } 120 }
120} 121}
121 122
123/* Prevent inlining, factorize() needs all help it can get with reducing register pressure */
124static NOINLINE void print_w(wide_t n)
125{
126 unsigned rep = square_count;
127 do
128 printf(" %llu", n);
129 while (--rep != 0);
130}
131static NOINLINE void print_h(half_t n)
132{
133 print_w(n);
134}
135
136static void factorize(wide_t N);
137
122static half_t isqrt_odd(wide_t N) 138static half_t isqrt_odd(wide_t N)
123{ 139{
124 half_t s = isqrt(N); 140 half_t s = isqrt(N);
141 /* s^2 is <= N, (s+1)^2 > N */
142
143 /* If s^2 in fact is EQUAL to N, it's very lucky.
144 * Examples:
145 * factor 18446743988964486098 = 2 * 3037000493 * 3037000493
146 * factor 18446743902517389507 = 3 * 2479700513 * 2479700513
147 */
148 if ((wide_t)s * s == N) {
149 /* factorize sqrt(N), printing each factor twice */
150 square_count *= 2;
151 factorize(s);
152 /* Let caller know we recursed */
153 return 0;
154 }
155
125 /* Subtract 1 from even s, odd s won't change: */ 156 /* Subtract 1 from even s, odd s won't change: */
126 /* (doesnt work for zero, but we know that s != 0 here) */ 157 /* (doesnt work for zero, but we know that s != 0 here) */
127 s = (s - 1) | 1; 158 s = (s - 1) | 1;
@@ -152,6 +183,8 @@ static NOINLINE void factorize(wide_t N)
152 * factor 18446744073709551557 (0xffffffffffffffc5). 183 * factor 18446744073709551557 (0xffffffffffffffc5).
153 */ 184 */
154 max_factor = isqrt_odd(N); 185 max_factor = isqrt_odd(N);
186 if (!max_factor)
187 return; /* square was detected and recursively factored */
155 factor = 2; 188 factor = 2;
156 w = 0; 189 w = 0;
157 for (;;) { 190 for (;;) {
@@ -162,8 +195,10 @@ static NOINLINE void factorize(wide_t N)
162 */ 195 */
163 while ((N % factor) == 0) { /* not likely */ 196 while ((N % factor) == 0) { /* not likely */
164 N = N / factor; 197 N = N / factor;
165 printf(" %"HALF_FMT"u", factor); 198 print_h(factor);
166 max_factor = isqrt_odd(N); 199 max_factor = isqrt_odd(N);
200 if (!max_factor)
201 return; /* square was detected */
167 } 202 }
168 if (factor >= max_factor) 203 if (factor >= max_factor)
169 break; 204 break;
@@ -178,7 +213,7 @@ static NOINLINE void factorize(wide_t N)
178 } 213 }
179 end: 214 end:
180 if (N > 1) 215 if (N > 1)
181 printf(" %llu", N); 216 print_w(N);
182 bb_putchar('\n'); 217 bb_putchar('\n');
183} 218}
184 219
@@ -193,6 +228,7 @@ static void factorize_numstr(const char *numstr)
193 if (errno) 228 if (errno)
194 bb_show_usage(); 229 bb_show_usage();
195 printf("%llu:", N); 230 printf("%llu:", N);
231 square_count = 1;
196 factorize(N); 232 factorize(N);
197} 233}
198 234
diff --git a/testsuite/factor.tests b/testsuite/factor.tests
index 2cf4a54ce..e404e29c1 100755
--- a/testsuite/factor.tests
+++ b/testsuite/factor.tests
@@ -45,4 +45,27 @@ testing "factor \$((2*3*5*7*11*13*17*19*23*29*31*37*41*43*47))" \
45 "614889782588491410: 2 3 5 7 11 13 17 19 23 29 31 37 41 43 47\n" \ 45 "614889782588491410: 2 3 5 7 11 13 17 19 23 29 31 37 41 43 47\n" \
46 "" "" 46 "" ""
47 47
48# Test that square-detection code is not buggy
49testing "factor 2 * 3037000493 * 3037000493" \
50 "factor 18446743988964486098" \
51 "18446743988964486098: 2 3037000493 3037000493\n" \
52 "" ""
53testing "factor 3 * 2479700513 * 2479700513" \
54 "factor 18446743902517389507" \
55 "18446743902517389507: 3 2479700513 2479700513\n" \
56 "" ""
57# including square-of-square cases:
58testing "factor 3 * 37831 * 37831 * 37831 * 37831" \
59 "factor 6144867742934288163" \
60 "6144867742934288163: 3 37831 37831 37831 37831\n" \
61 "" ""
62testing "factor 3 * 13^16" \
63 "factor 1996249827549539523" \
64 "1996249827549539523: 3 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13\n" \
65 "" ""
66testing "factor 13^16" \
67 "factor 665416609183179841" \
68 "665416609183179841: 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13\n" \
69 "" ""
70
48exit $FAILCOUNT 71exit $FAILCOUNT