diff options
| author | Denys Vlasenko <vda.linux@googlemail.com> | 2020-12-22 20:08:40 +0100 |
|---|---|---|
| committer | Denys Vlasenko <vda.linux@googlemail.com> | 2020-12-22 20:24:30 +0100 |
| commit | 6452c300366cf0e322227ad39991ec2fdfd6beed (patch) | |
| tree | d2086c915ebeddea5775aa66e0dc07dae41addc2 /coreutils | |
| parent | 96717d9fb4560ad98a737108f83c7b247ef04674 (diff) | |
| download | busybox-w32-6452c300366cf0e322227ad39991ec2fdfd6beed.tar.gz busybox-w32-6452c300366cf0e322227ad39991ec2fdfd6beed.tar.bz2 busybox-w32-6452c300366cf0e322227ad39991ec2fdfd6beed.zip | |
factor: detect squares
If we have a square, the speedup can be extremely large
(in best case example below, it's ~40000 times faster):
$ time ./busybox_old factor 18446743988964486098
18446743988964486098: 2 3037000493 3037000493
real 0m4.246s
$ time ./busybox factor 18446743988964486098
18446743988964486098: 2 3037000493 3037000493
real 0m0.000s
function old new delta
isqrt_odd - 57 +57
print_w - 36 +36
factorize 218 236 +18
print_h - 7 +7
factorize_numstr 65 72 +7
------------------------------------------------------------------------------
(add/remove: 3/0 grow/shrink: 2/0 up/down: 125/0) Total: 125 bytes
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
Diffstat (limited to 'coreutils')
| -rw-r--r-- | coreutils/factor.c | 42 |
1 files changed, 39 insertions, 3 deletions
diff --git a/coreutils/factor.c b/coreutils/factor.c index 27f26fe3f..729bd36a0 100644 --- a/coreutils/factor.c +++ b/coreutils/factor.c | |||
| @@ -85,7 +85,8 @@ static const uint64_t packed_wheel[] = { | |||
| 85 | #undef R | 85 | #undef R |
| 86 | #define WHEEL_START 5 | 86 | #define WHEEL_START 5 |
| 87 | #define WHEEL_SIZE (5 + 24 * 20) | 87 | #define WHEEL_SIZE (5 + 24 * 20) |
| 88 | #define wheel_tab ((uint8_t*)&bb_common_bufsiz1) | 88 | #define square_count (((uint8_t*)&bb_common_bufsiz1)[0]) |
| 89 | #define wheel_tab (((uint8_t*)&bb_common_bufsiz1) + 1) | ||
| 89 | /* | 90 | /* |
| 90 | * Why, you ask? | 91 | * Why, you ask? |
| 91 | * plain byte array: | 92 | * plain byte array: |
| @@ -119,9 +120,39 @@ static void unpack_wheel(void) | |||
| 119 | } | 120 | } |
| 120 | } | 121 | } |
| 121 | 122 | ||
| 123 | /* Prevent inlining, factorize() needs all help it can get with reducing register pressure */ | ||
| 124 | static NOINLINE void print_w(wide_t n) | ||
| 125 | { | ||
| 126 | unsigned rep = square_count; | ||
| 127 | do | ||
| 128 | printf(" %llu", n); | ||
| 129 | while (--rep != 0); | ||
| 130 | } | ||
| 131 | static NOINLINE void print_h(half_t n) | ||
| 132 | { | ||
| 133 | print_w(n); | ||
| 134 | } | ||
| 135 | |||
| 136 | static void factorize(wide_t N); | ||
| 137 | |||
| 122 | static half_t isqrt_odd(wide_t N) | 138 | static half_t isqrt_odd(wide_t N) |
| 123 | { | 139 | { |
| 124 | half_t s = isqrt(N); | 140 | half_t s = isqrt(N); |
| 141 | /* s^2 is <= N, (s+1)^2 > N */ | ||
| 142 | |||
| 143 | /* If s^2 in fact is EQUAL to N, it's very lucky. | ||
| 144 | * Examples: | ||
| 145 | * factor 18446743988964486098 = 2 * 3037000493 * 3037000493 | ||
| 146 | * factor 18446743902517389507 = 3 * 2479700513 * 2479700513 | ||
| 147 | */ | ||
| 148 | if ((wide_t)s * s == N) { | ||
| 149 | /* factorize sqrt(N), printing each factor twice */ | ||
| 150 | square_count *= 2; | ||
| 151 | factorize(s); | ||
| 152 | /* Let caller know we recursed */ | ||
| 153 | return 0; | ||
| 154 | } | ||
| 155 | |||
| 125 | /* Subtract 1 from even s, odd s won't change: */ | 156 | /* Subtract 1 from even s, odd s won't change: */ |
| 126 | /* (doesnt work for zero, but we know that s != 0 here) */ | 157 | /* (doesnt work for zero, but we know that s != 0 here) */ |
| 127 | s = (s - 1) | 1; | 158 | s = (s - 1) | 1; |
| @@ -152,6 +183,8 @@ static NOINLINE void factorize(wide_t N) | |||
| 152 | * factor 18446744073709551557 (0xffffffffffffffc5). | 183 | * factor 18446744073709551557 (0xffffffffffffffc5). |
| 153 | */ | 184 | */ |
| 154 | max_factor = isqrt_odd(N); | 185 | max_factor = isqrt_odd(N); |
| 186 | if (!max_factor) | ||
| 187 | return; /* square was detected and recursively factored */ | ||
| 155 | factor = 2; | 188 | factor = 2; |
| 156 | w = 0; | 189 | w = 0; |
| 157 | for (;;) { | 190 | for (;;) { |
| @@ -162,8 +195,10 @@ static NOINLINE void factorize(wide_t N) | |||
| 162 | */ | 195 | */ |
| 163 | while ((N % factor) == 0) { /* not likely */ | 196 | while ((N % factor) == 0) { /* not likely */ |
| 164 | N = N / factor; | 197 | N = N / factor; |
| 165 | printf(" %"HALF_FMT"u", factor); | 198 | print_h(factor); |
| 166 | max_factor = isqrt_odd(N); | 199 | max_factor = isqrt_odd(N); |
| 200 | if (!max_factor) | ||
| 201 | return; /* square was detected */ | ||
| 167 | } | 202 | } |
| 168 | if (factor >= max_factor) | 203 | if (factor >= max_factor) |
| 169 | break; | 204 | break; |
| @@ -178,7 +213,7 @@ static NOINLINE void factorize(wide_t N) | |||
| 178 | } | 213 | } |
| 179 | end: | 214 | end: |
| 180 | if (N > 1) | 215 | if (N > 1) |
| 181 | printf(" %llu", N); | 216 | print_w(N); |
| 182 | bb_putchar('\n'); | 217 | bb_putchar('\n'); |
| 183 | } | 218 | } |
| 184 | 219 | ||
| @@ -193,6 +228,7 @@ static void factorize_numstr(const char *numstr) | |||
| 193 | if (errno) | 228 | if (errno) |
| 194 | bb_show_usage(); | 229 | bb_show_usage(); |
| 195 | printf("%llu:", N); | 230 | printf("%llu:", N); |
| 231 | square_count = 1; | ||
| 196 | factorize(N); | 232 | factorize(N); |
| 197 | } | 233 | } |
| 198 | 234 | ||
