diff options
| author | Denys Vlasenko <vda.linux@googlemail.com> | 2025-07-09 08:21:47 +0200 |
|---|---|---|
| committer | Denys Vlasenko <vda.linux@googlemail.com> | 2025-07-09 08:21:47 +0200 |
| commit | 11d4c08d7541408e4fbb7daaaf63aba1d07685ea (patch) | |
| tree | 43cc06ffb4161c8e538a447e337393812120380f /libbb | |
| parent | c305c81c94a086fb09444b1ea6f31fb911c25ec0 (diff) | |
| download | busybox-w32-11d4c08d7541408e4fbb7daaaf63aba1d07685ea.tar.gz busybox-w32-11d4c08d7541408e4fbb7daaaf63aba1d07685ea.tar.bz2 busybox-w32-11d4c08d7541408e4fbb7daaaf63aba1d07685ea.zip | |
libbb/bitops.c: add inlining comment
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
Diffstat (limited to 'libbb')
| -rw-r--r-- | libbb/bitops.c | 20 |
1 files changed, 20 insertions, 0 deletions
diff --git a/libbb/bitops.c b/libbb/bitops.c index 5f239676c..467e1a2d9 100644 --- a/libbb/bitops.c +++ b/libbb/bitops.c | |||
| @@ -58,6 +58,26 @@ void FAST_FUNC xorbuf16_aligned_long(void *dst, const void *src) | |||
| 58 | # endif | 58 | # endif |
| 59 | #endif | 59 | #endif |
| 60 | } | 60 | } |
| 61 | // The above can be inlined in libbb.h, in a way where compiler | ||
| 62 | // is even free to use better addressing modes than (%reg), and | ||
| 63 | // to keep the result in a register | ||
| 64 | // (to not store it to memory after each XOR): | ||
| 65 | //#if defined(__SSE__) | ||
| 66 | //#include <xmmintrin.h> | ||
| 67 | //^^^ or just: typedef float __m128_u attribute((__vector_size__(16),__may_alias__,__aligned__(1))); | ||
| 68 | //static ALWAYS_INLINE void xorbuf16_aligned_long(void *dst, const void *src) | ||
| 69 | //{ | ||
| 70 | // __m128_u xmm0, xmm1; | ||
| 71 | // asm volatile( | ||
| 72 | //"\n xorps %1,%0" | ||
| 73 | // : "=x" (xmm0), "=x" (xmm1) | ||
| 74 | // : "0" (*(__m128_u*)dst), "1" (*(__m128_u*)src) | ||
| 75 | // ); | ||
| 76 | // *(__m128_u*)dst = xmm0; // this store may be optimized out! | ||
| 77 | //} | ||
| 78 | //#endif | ||
| 79 | // but I don't trust gcc optimizer enough to not generate some monstrosity. | ||
| 80 | // See GMULT() function in TLS code as an example. | ||
| 61 | 81 | ||
| 62 | void FAST_FUNC xorbuf64_3_aligned64(void *dst, const void *src1, const void *src2) | 82 | void FAST_FUNC xorbuf64_3_aligned64(void *dst, const void *src1, const void *src2) |
| 63 | { | 83 | { |
