aboutsummaryrefslogtreecommitdiff
path: root/libbb/popcnt.c
diff options
context:
space:
mode:
authorDenys Vlasenko <vda.linux@googlemail.com>2024-05-31 12:01:43 +0200
committerDenys Vlasenko <vda.linux@googlemail.com>2024-05-31 16:03:23 +0200
commit2075553a1b0eb843d4ee73a039b3e30bed9274e0 (patch)
tree904f192fa9d7c920c9dde6a7bc26b208c3b03312 /libbb/popcnt.c
parent5a68a246e750359819d63bcff5ef97dd9c7788fc (diff)
downloadbusybox-w32-2075553a1b0eb843d4ee73a039b3e30bed9274e0.tar.gz
busybox-w32-2075553a1b0eb843d4ee73a039b3e30bed9274e0.tar.bz2
busybox-w32-2075553a1b0eb843d4ee73a039b3e30bed9274e0.zip
libbb: add bit counting function, use where appropriate
Although "naive" counting function is not too slow and is smaller, using it on e.g. each of 1024 words of CPU mask feels wrong. function old new delta bb_popcnt_32 - 52 +52 get_prefix 323 321 -2 nproc_main 206 199 -7 d4_run_script 739 731 -8 ipcalc_main 533 507 -26 ------------------------------------------------------------------------------ (add/remove: 2/0 grow/shrink: 0/4 up/down: 52/-43) Total: 9 bytes Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
Diffstat (limited to 'libbb/popcnt.c')
-rw-r--r--libbb/popcnt.c46
1 files changed, 46 insertions, 0 deletions
diff --git a/libbb/popcnt.c b/libbb/popcnt.c
new file mode 100644
index 000000000..fe8cd240f
--- /dev/null
+++ b/libbb/popcnt.c
@@ -0,0 +1,46 @@
1/* vi: set sw=4 ts=4: */
2/*
3 * Utility routines.
4 *
5 * Copyright (C) 2024 Denys Vlasenko
6 *
7 * Licensed under GPLv2, see file LICENSE in this source tree.
8 */
9#include "libbb.h"
10
11unsigned FAST_FUNC bb_popcnt_32(uint32_t m)
12{
13 /* replace each 2 bit group with the count of set bits in it */
14 /* 00->00 01->01 10->01 11->10 */
15 m = m - ((m >> 1) & 0x55555555);
16 /* in each 4 bit group, add two 2-bit counts */
17 m = (m & 0x33333333) + ((m >> 2) & 0x33333333);
18 /* in each 8 bit group, add two 4-bit counts (in fact, 3-bit, 0nnn with n=0..4) */
19 m = (m + (m >> 4)) & 0x0f0f0f0f;
20#if 1 /* assume 32*32->32 multiply is fast */
21 m = m * 0x01010101; /* top byte = m + (m<<8) + (m<<16) + (m<<24) */
22 return m >> 24;
23#else
24 /* 0000aaaa0000bbbb0000cccc0000dddd */
25 /* + 0000aaaa0000bbbb0000cccc */
26 /* = 0000xxxx000_a+b_000xxxxx000_c+d_ (we don't care about x bits) */
27 m += m >> 8; /* in each 16-bit group, lowest 5 bits is the count */
28 /* 0000xxxx000_a+b_000xxxxx000_c+d_ */
29 /* + 0000xxxx000_a+b_ */
30 /* = 0000xxxx000xxxxx00xxxxxx00a+b+cd */
31 m += m >> 16; /* in each 32-bit group, lowest 6 bits is the count */
32 return m & 0x3f; /* clear x bits */
33#endif
34}
35
36#if ULONG_MAX > 0xffffffff
37unsigned FAST_FUNC bb_popcnt_long(unsigned long m)
38{
39 BUILD_BUG_ON(sizeof(m) != 8);
40 /* 64-bit version of bb_popcnt_32 exists, but it uses 64-bit constants,
41 * which are awkward to generate on assembly level on most CPUs.
42 * For now, just add two 32-bit counts:
43 */
44 return bb_popcnt_32((uint32_t)m) + bb_popcnt_32((uint32_t)(m >> 32));
45}
46#endif