diff options
| -rw-r--r-- | coreutils/factor.c | 2 | ||||
| -rw-r--r-- | include/libbb.h | 7 | ||||
| -rw-r--r-- | libbb/isqrt.c | 58 |
3 files changed, 62 insertions, 5 deletions
diff --git a/coreutils/factor.c b/coreutils/factor.c index 90e9ed761..4df523f28 100644 --- a/coreutils/factor.c +++ b/coreutils/factor.c | |||
| @@ -136,7 +136,7 @@ static void factorize(wide_t N); | |||
| 136 | 136 | ||
| 137 | static half_t isqrt_odd(wide_t N) | 137 | static half_t isqrt_odd(wide_t N) |
| 138 | { | 138 | { |
| 139 | half_t s = isqrt(N); | 139 | half_t s = isqrt_ull(N); |
| 140 | /* s^2 is <= N, (s+1)^2 > N */ | 140 | /* s^2 is <= N, (s+1)^2 > N */ |
| 141 | 141 | ||
| 142 | /* If s^2 in fact is EQUAL to N, it's very lucky. | 142 | /* If s^2 in fact is EQUAL to N, it's very lucky. |
diff --git a/include/libbb.h b/include/libbb.h index aacff42ee..7cca925b9 100644 --- a/include/libbb.h +++ b/include/libbb.h | |||
| @@ -408,7 +408,12 @@ unsigned FAST_FUNC bb_popcnt_long(unsigned long m); | |||
| 408 | #define bb_popcnt_long(m) bb_popcnt_32(m) | 408 | #define bb_popcnt_long(m) bb_popcnt_32(m) |
| 409 | #endif | 409 | #endif |
| 410 | 410 | ||
| 411 | unsigned long FAST_FUNC isqrt(unsigned long long N); | 411 | unsigned FAST_FUNC isqrt(unsigned long N); |
| 412 | #if LLONG_MAX > LONG_MAX | ||
| 413 | unsigned long FAST_FUNC isqrt_ull(unsigned long long N); | ||
| 414 | #else | ||
| 415 | # define isqrt_ull(N) isqrt(N) | ||
| 416 | #endif | ||
| 412 | 417 | ||
| 413 | unsigned long long monotonic_ns(void) FAST_FUNC; | 418 | unsigned long long monotonic_ns(void) FAST_FUNC; |
| 414 | unsigned long long monotonic_us(void) FAST_FUNC; | 419 | unsigned long long monotonic_us(void) FAST_FUNC; |
diff --git a/libbb/isqrt.c b/libbb/isqrt.c index 817b7d036..507c7d637 100644 --- a/libbb/isqrt.c +++ b/libbb/isqrt.c | |||
| @@ -9,15 +9,42 @@ | |||
| 9 | #ifndef ISQRT_TEST | 9 | #ifndef ISQRT_TEST |
| 10 | # include "libbb.h" | 10 | # include "libbb.h" |
| 11 | #else | 11 | #else |
| 12 | /* gcc -DISQRT_TEST -Wall -O2 isqrt.c -oisqrt && ./isqrt $((RANDOM*RANDOM)) */ | 12 | // gcc -DISQRT_TEST -Wall -Os -ffunction-sections isqrt.c -S -fverbose-asm |
| 13 | // gcc -DISQRT_TEST -Wall -Os -ffunction-sections isqrt.c -c | ||
| 14 | // gcc -DISQRT_TEST -Wall -Os -ffunction-sections isqrt.c -oisqrt && ./isqrt $((RANDOM*RANDOM)) | ||
| 13 | # include <stdlib.h> | 15 | # include <stdlib.h> |
| 14 | # include <stdio.h> | 16 | # include <stdio.h> |
| 15 | # include <time.h> | 17 | # include <time.h> |
| 18 | # include <limits.h> | ||
| 16 | # define FAST_FUNC /* nothing */ | 19 | # define FAST_FUNC /* nothing */ |
| 17 | #endif | 20 | #endif |
| 18 | 21 | ||
| 19 | /* Returns such x that x+1 > sqrt(N) */ | 22 | /* Returns such x that x+1 > sqrt(N) */ |
| 20 | unsigned long FAST_FUNC isqrt(unsigned long long N) | 23 | |
| 24 | /* Stolen from kernel code */ | ||
| 25 | unsigned FAST_FUNC isqrt(unsigned long x) | ||
| 26 | { | ||
| 27 | unsigned long y = 0; | ||
| 28 | unsigned long m = 1UL << (8*sizeof(x) - 2); | ||
| 29 | // ^^^^^ should be m = 1UL << ((fls(x) - 1) & ~1UL); | ||
| 30 | // but calculating fls() would take time and code | ||
| 31 | |||
| 32 | do { | ||
| 33 | unsigned long b = y + m; | ||
| 34 | y >>= 1; | ||
| 35 | if (x >= b) { | ||
| 36 | x -= b; | ||
| 37 | y += m; | ||
| 38 | } | ||
| 39 | m >>= 2; | ||
| 40 | } while (m); | ||
| 41 | return y; | ||
| 42 | } | ||
| 43 | |||
| 44 | #if LLONG_MAX > LONG_MAX | ||
| 45 | # if defined(i386) | ||
| 46 | /* Smaller code on this register-starved arch */ | ||
| 47 | unsigned long FAST_FUNC isqrt_ull(unsigned long long N) | ||
| 21 | { | 48 | { |
| 22 | unsigned long x; | 49 | unsigned long x; |
| 23 | unsigned shift; | 50 | unsigned shift; |
| @@ -33,15 +60,40 @@ unsigned long FAST_FUNC isqrt(unsigned long long N) | |||
| 33 | } while ((int)shift >= 0); | 60 | } while ((int)shift >= 0); |
| 34 | return x; | 61 | return x; |
| 35 | } | 62 | } |
| 63 | # else | ||
| 64 | /* Stolen from kernel code */ | ||
| 65 | unsigned long FAST_FUNC isqrt_ull(unsigned long long x) | ||
| 66 | { | ||
| 67 | unsigned long long y = 0; | ||
| 68 | unsigned long long m = 1ULL << (8*sizeof(x) - 2); | ||
| 69 | // ^^^^^ should be m = 1ULL << ((fls(x) - 1) & ~1ULL); | ||
| 70 | // but calculating fls() would take time and code | ||
| 71 | |||
| 72 | do { | ||
| 73 | unsigned long long b = y + m; | ||
| 74 | y >>= 1; | ||
| 75 | if (x >= b) { | ||
| 76 | x -= b; | ||
| 77 | y += m; | ||
| 78 | } | ||
| 79 | m >>= 2; | ||
| 80 | } while (m); | ||
| 81 | return y; | ||
| 82 | } | ||
| 83 | # endif | ||
| 84 | #endif /* wide version needed */ | ||
| 36 | 85 | ||
| 37 | #ifdef ISQRT_TEST | 86 | #ifdef ISQRT_TEST |
| 87 | # if LLONG_MAX == LONG_MAX | ||
| 88 | # define isqrt_ull(N) isqrt(N) | ||
| 89 | # endif | ||
| 38 | int main(int argc, char **argv) | 90 | int main(int argc, char **argv) |
| 39 | { | 91 | { |
| 40 | unsigned long long n = argv[1] ? strtoull(argv[1], NULL, 0) : time(NULL); | 92 | unsigned long long n = argv[1] ? strtoull(argv[1], NULL, 0) : time(NULL); |
| 41 | for (;;) { | 93 | for (;;) { |
| 42 | unsigned long h; | 94 | unsigned long h; |
| 43 | n--; | 95 | n--; |
| 44 | h = isqrt(n); | 96 | h = isqrt_ull(n); |
| 45 | if (!(n & 0xffff)) | 97 | if (!(n & 0xffff)) |
| 46 | printf("isqrt(%llx)=%lx\n", n, h); | 98 | printf("isqrt(%llx)=%lx\n", n, h); |
| 47 | if ((unsigned long long)h * h > n) { | 99 | if ((unsigned long long)h * h > n) { |
