From 2075553a1b0eb843d4ee73a039b3e30bed9274e0 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Fri, 31 May 2024 12:01:43 +0200 Subject: libbb: add bit counting function, use where appropriate Although "naive" counting function is not too slow and is smaller, using it on e.g. each of 1024 words of CPU mask feels wrong. function old new delta bb_popcnt_32 - 52 +52 get_prefix 323 321 -2 nproc_main 206 199 -7 d4_run_script 739 731 -8 ipcalc_main 533 507 -26 ------------------------------------------------------------------------------ (add/remove: 2/0 grow/shrink: 0/4 up/down: 52/-43) Total: 9 bytes Signed-off-by: Denys Vlasenko --- coreutils/nproc.c | 9 +++------ include/libbb.h | 7 +++++++ libbb/Kbuild.src | 1 + libbb/popcnt.c | 46 +++++++++++++++++++++++++++++++++++++++++++ networking/ifupdown.c | 8 +------- networking/ipcalc.c | 21 +------------------- networking/libiproute/utils.c | 11 +++++------ networking/udhcp/dhcpc.c | 15 ++------------ 8 files changed, 66 insertions(+), 52 deletions(-) create mode 100644 libbb/popcnt.c diff --git a/coreutils/nproc.c b/coreutils/nproc.c index a0d818c59..df63bf57a 100644 --- a/coreutils/nproc.c +++ b/coreutils/nproc.c @@ -56,13 +56,10 @@ int nproc_main(int argc UNUSED_PARAM, char **argv UNUSED_PARAM) unsigned long *mask = get_malloc_cpu_affinity(0, &sz); sz /= sizeof(long); for (i = 0; i < sz; i++) { - unsigned long m = mask[i]; - while (m) { - if (m & 1) - count++; - m >>= 1; - } + if (mask[i] != 0) /* most mask[i] are usually 0 */ + count += bb_popcnt_long(mask[i]); } + IF_FEATURE_CLEAN_UP(free(mask);) } IF_LONG_OPTS(count -= ignore;) diff --git a/include/libbb.h b/include/libbb.h index 67d29f843..6914b5882 100644 --- a/include/libbb.h +++ b/include/libbb.h @@ -397,6 +397,13 @@ extern int *BB_GLOBAL_CONST bb_errno; uint64_t bb_bswap_64(uint64_t x) FAST_FUNC; #endif +unsigned FAST_FUNC bb_popcnt_32(uint32_t m); +#if ULONG_MAX > 0xffffffff +unsigned FAST_FUNC bb_popcnt_long(unsigned_long m); +#else +#define bb_popcnt_long(m) bb_popcnt_32(m) +#endif + unsigned long FAST_FUNC isqrt(unsigned long long N); unsigned long long monotonic_ns(void) FAST_FUNC; diff --git a/libbb/Kbuild.src b/libbb/Kbuild.src index a0e2a6da7..cb8d2c2ec 100644 --- a/libbb/Kbuild.src +++ b/libbb/Kbuild.src @@ -72,6 +72,7 @@ lib-y += perror_nomsg.o lib-y += perror_nomsg_and_die.o lib-y += pidfile.o lib-y += platform.o +lib-y += popcnt.o lib-y += printable.o lib-y += printable_string.o lib-y += print_flags.o diff --git a/libbb/popcnt.c b/libbb/popcnt.c new file mode 100644 index 000000000..fe8cd240f --- /dev/null +++ b/libbb/popcnt.c @@ -0,0 +1,46 @@ +/* vi: set sw=4 ts=4: */ +/* + * Utility routines. + * + * Copyright (C) 2024 Denys Vlasenko + * + * Licensed under GPLv2, see file LICENSE in this source tree. + */ +#include "libbb.h" + +unsigned FAST_FUNC bb_popcnt_32(uint32_t m) +{ + /* replace each 2 bit group with the count of set bits in it */ + /* 00->00 01->01 10->01 11->10 */ + m = m - ((m >> 1) & 0x55555555); + /* in each 4 bit group, add two 2-bit counts */ + m = (m & 0x33333333) + ((m >> 2) & 0x33333333); + /* in each 8 bit group, add two 4-bit counts (in fact, 3-bit, 0nnn with n=0..4) */ + m = (m + (m >> 4)) & 0x0f0f0f0f; +#if 1 /* assume 32*32->32 multiply is fast */ + m = m * 0x01010101; /* top byte = m + (m<<8) + (m<<16) + (m<<24) */ + return m >> 24; +#else + /* 0000aaaa0000bbbb0000cccc0000dddd */ + /* + 0000aaaa0000bbbb0000cccc */ + /* = 0000xxxx000_a+b_000xxxxx000_c+d_ (we don't care about x bits) */ + m += m >> 8; /* in each 16-bit group, lowest 5 bits is the count */ + /* 0000xxxx000_a+b_000xxxxx000_c+d_ */ + /* + 0000xxxx000_a+b_ */ + /* = 0000xxxx000xxxxx00xxxxxx00a+b+cd */ + m += m >> 16; /* in each 32-bit group, lowest 6 bits is the count */ + return m & 0x3f; /* clear x bits */ +#endif +} + +#if ULONG_MAX > 0xffffffff +unsigned FAST_FUNC bb_popcnt_long(unsigned long m) +{ + BUILD_BUG_ON(sizeof(m) != 8); + /* 64-bit version of bb_popcnt_32 exists, but it uses 64-bit constants, + * which are awkward to generate on assembly level on most CPUs. + * For now, just add two 32-bit counts: + */ + return bb_popcnt_32((uint32_t)m) + bb_popcnt_32((uint32_t)(m >> 32)); +} +#endif diff --git a/networking/ifupdown.c b/networking/ifupdown.c index 6c4ae27f2..9c3640be7 100644 --- a/networking/ifupdown.c +++ b/networking/ifupdown.c @@ -306,7 +306,6 @@ static int count_netmask_bits(const char *dotted_quad) // d = ~d; /* 11110000 -> 00001111 */ /* Shorter version */ - int result; struct in_addr ip; unsigned d; @@ -316,12 +315,7 @@ static int count_netmask_bits(const char *dotted_quad) d = ~d; /* 11110000 -> 00001111 */ if (d & (d+1)) /* check that it is in 00001111 form */ return -1; /* no it is not */ - result = 32; - while (d) { - d >>= 1; - result--; - } - return result; + return bb_popcnt_32(~d); } # endif diff --git a/networking/ipcalc.c b/networking/ipcalc.c index 92e7b289d..3ec473c6b 100644 --- a/networking/ipcalc.c +++ b/networking/ipcalc.c @@ -71,25 +71,6 @@ static unsigned long get_netmask(unsigned long ipaddr) return 0; } -#if ENABLE_FEATURE_IPCALC_FANCY -static int get_prefix(unsigned long netmask) -{ - unsigned long msk = 0x80000000; - int ret = 0; - - netmask = htonl(netmask); - while (msk) { - if (netmask & msk) - ret++; - msk >>= 1; - } - return ret; -} -#else -int get_prefix(unsigned long netmask); -#endif - - #define NETMASK 0x01 #define BROADCAST 0x02 #define NETWORK 0x04 @@ -210,7 +191,7 @@ int ipcalc_main(int argc UNUSED_PARAM, char **argv) if (ENABLE_FEATURE_IPCALC_FANCY) { if (opt & NETPREFIX) { - printf("PREFIX=%i\n", get_prefix(netmask)); + printf("PREFIX=%i\n", bb_popcnt_32(netmask)); } if (opt & HOSTNAME) { diff --git a/networking/libiproute/utils.c b/networking/libiproute/utils.c index 4ce230356..3cce4a06e 100644 --- a/networking/libiproute/utils.c +++ b/networking/libiproute/utils.c @@ -175,14 +175,13 @@ static void get_prefix_1(inet_prefix *dst, char *arg, int family) if (netmask_pfx.family == AF_INET) { /* fill in prefix length of dotted quad */ uint32_t mask = ntohl(netmask_pfx.data[0]); - uint32_t host = ~mask; + uint32_t inv = ~mask; - /* a valid netmask must be 2^n - 1 */ - if (host & (host + 1)) - goto bad; + /* a valid netmask must be 11..10..00 */ + if (inv & (inv + 1)) + goto bad; /* inv is not 00..01..11 */ - for (plen = 0; mask; mask <<= 1) - ++plen; + plen = bb_popcnt_32(mask); if (plen > dst->bitlen) goto bad; /* dst->flags |= PREFIXLEN_SPECIFIED; */ diff --git a/networking/udhcp/dhcpc.c b/networking/udhcp/dhcpc.c index 07e2eadfe..e44086c2e 100644 --- a/networking/udhcp/dhcpc.c +++ b/networking/udhcp/dhcpc.c @@ -150,18 +150,6 @@ static int sprint_nip(char *dest, const char *pre, const uint8_t *ip) return sprintf(dest, "%s%u.%u.%u.%u", pre, ip[0], ip[1], ip[2], ip[3]); } -/* really simple implementation, just count the bits */ -static int mton(uint32_t mask) -{ - int i = 0; - mask = ntohl(mask); /* 111110000-like bit pattern */ - while (mask) { - i++; - mask <<= 1; - } - return i; -} - #if ENABLE_FEATURE_UDHCPC_SANITIZEOPT /* Check if a given name represents a valid DNS name */ /* See RFC1035, 2.3.1 */ @@ -508,7 +496,8 @@ static void fill_envp(struct dhcp_packet *packet) /* Generate extra envvar for DHCP_SUBNET, $mask */ uint32_t subnet; move_from_unaligned32(subnet, opt_item->data); - putenvp(xasprintf("mask=%u", mton(subnet))); +//FIXME: we do not check that subnet has bit pattern 11..10..0 + putenvp(xasprintf("mask=%u", bb_popcnt_32(subnet))); } } else { unsigned ofs; -- cgit v1.2.3-55-g6feb