diff options
author | Denys Vlasenko <vda.linux@googlemail.com> | 2024-05-31 12:01:43 +0200 |
---|---|---|
committer | Denys Vlasenko <vda.linux@googlemail.com> | 2024-05-31 16:03:23 +0200 |
commit | 2075553a1b0eb843d4ee73a039b3e30bed9274e0 (patch) | |
tree | 904f192fa9d7c920c9dde6a7bc26b208c3b03312 /libbb/popcnt.c | |
parent | 5a68a246e750359819d63bcff5ef97dd9c7788fc (diff) | |
download | busybox-w32-2075553a1b0eb843d4ee73a039b3e30bed9274e0.tar.gz busybox-w32-2075553a1b0eb843d4ee73a039b3e30bed9274e0.tar.bz2 busybox-w32-2075553a1b0eb843d4ee73a039b3e30bed9274e0.zip |
libbb: add bit counting function, use where appropriate
Although "naive" counting function is not too slow and is smaller,
using it on e.g. each of 1024 words of CPU mask feels wrong.
function old new delta
bb_popcnt_32 - 52 +52
get_prefix 323 321 -2
nproc_main 206 199 -7
d4_run_script 739 731 -8
ipcalc_main 533 507 -26
------------------------------------------------------------------------------
(add/remove: 2/0 grow/shrink: 0/4 up/down: 52/-43) Total: 9 bytes
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
Diffstat (limited to 'libbb/popcnt.c')
-rw-r--r-- | libbb/popcnt.c | 46 |
1 files changed, 46 insertions, 0 deletions
diff --git a/libbb/popcnt.c b/libbb/popcnt.c new file mode 100644 index 000000000..fe8cd240f --- /dev/null +++ b/libbb/popcnt.c | |||
@@ -0,0 +1,46 @@ | |||
1 | /* vi: set sw=4 ts=4: */ | ||
2 | /* | ||
3 | * Utility routines. | ||
4 | * | ||
5 | * Copyright (C) 2024 Denys Vlasenko | ||
6 | * | ||
7 | * Licensed under GPLv2, see file LICENSE in this source tree. | ||
8 | */ | ||
9 | #include "libbb.h" | ||
10 | |||
11 | unsigned FAST_FUNC bb_popcnt_32(uint32_t m) | ||
12 | { | ||
13 | /* replace each 2 bit group with the count of set bits in it */ | ||
14 | /* 00->00 01->01 10->01 11->10 */ | ||
15 | m = m - ((m >> 1) & 0x55555555); | ||
16 | /* in each 4 bit group, add two 2-bit counts */ | ||
17 | m = (m & 0x33333333) + ((m >> 2) & 0x33333333); | ||
18 | /* in each 8 bit group, add two 4-bit counts (in fact, 3-bit, 0nnn with n=0..4) */ | ||
19 | m = (m + (m >> 4)) & 0x0f0f0f0f; | ||
20 | #if 1 /* assume 32*32->32 multiply is fast */ | ||
21 | m = m * 0x01010101; /* top byte = m + (m<<8) + (m<<16) + (m<<24) */ | ||
22 | return m >> 24; | ||
23 | #else | ||
24 | /* 0000aaaa0000bbbb0000cccc0000dddd */ | ||
25 | /* + 0000aaaa0000bbbb0000cccc */ | ||
26 | /* = 0000xxxx000_a+b_000xxxxx000_c+d_ (we don't care about x bits) */ | ||
27 | m += m >> 8; /* in each 16-bit group, lowest 5 bits is the count */ | ||
28 | /* 0000xxxx000_a+b_000xxxxx000_c+d_ */ | ||
29 | /* + 0000xxxx000_a+b_ */ | ||
30 | /* = 0000xxxx000xxxxx00xxxxxx00a+b+cd */ | ||
31 | m += m >> 16; /* in each 32-bit group, lowest 6 bits is the count */ | ||
32 | return m & 0x3f; /* clear x bits */ | ||
33 | #endif | ||
34 | } | ||
35 | |||
36 | #if ULONG_MAX > 0xffffffff | ||
37 | unsigned FAST_FUNC bb_popcnt_long(unsigned long m) | ||
38 | { | ||
39 | BUILD_BUG_ON(sizeof(m) != 8); | ||
40 | /* 64-bit version of bb_popcnt_32 exists, but it uses 64-bit constants, | ||
41 | * which are awkward to generate on assembly level on most CPUs. | ||
42 | * For now, just add two 32-bit counts: | ||
43 | */ | ||
44 | return bb_popcnt_32((uint32_t)m) + bb_popcnt_32((uint32_t)(m >> 32)); | ||
45 | } | ||
46 | #endif | ||