From 8289b346265ef63a809c68b608cd7689bbf0a342 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Sun, 8 Feb 2026 08:30:03 +0100 Subject: tls: document PSTM_64BIT + PSTM_X86_64 optimizations better Signed-off-by: Denys Vlasenko --- networking/tls.h | 18 +++++++++--------- networking/tls_pstm_montgomery_reduce.c | 4 ++-- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/networking/tls.h b/networking/tls.h index 9751d30ff..167f8baf1 100644 --- a/networking/tls.h +++ b/networking/tls.h @@ -10,7 +10,6 @@ */ #include "libbb.h" - /* Config tweaks */ #define HAVE_NATIVE_INT64 #undef USE_1024_KEY_SPEED_OPTIMIZATIONS @@ -31,16 +30,18 @@ # define PSTM_32BIT # define PSTM_X86 #endif -//#if defined(__GNUC__) && defined(__x86_64__) -// /* PSTM_X86_64 works correctly, but +782 bytes. */ -// /* Looks like most of the growth is because of PSTM_64BIT. */ +#if defined(__GNUC__) && defined(__x86_64__) + /* PSTM_64BIT + PSTM_X86_64 works correctly, but: + * +928 bytes if PSTM_64BIT but !PSTM_X86_64 + * +1003 bytes with INNERMUL8 (loop unrolling in pstm_montgomery_reduce()) + * +664 bytes without INNERMUL8 + */ //# define PSTM_64BIT //# define PSTM_X86_64 -//#endif +#endif //#if SOME_COND #define PSTM_MIPS, #define PSTM_32BIT //#if SOME_COND #define PSTM_ARM, #define PSTM_32BIT - #define PS_SUCCESS 0 #define PS_FAILURE -1 #define PS_ARG_FAIL -6 /* Failure due to bad function param */ @@ -51,14 +52,14 @@ #define PS_TRUE 1 #define PS_FALSE 0 +#undef ENDIAN_BIG +#undef ENDIAN_LITTLE #if BB_BIG_ENDIAN # define ENDIAN_BIG 1 -# undef ENDIAN_LITTLE //#???? ENDIAN_32BITWORD // controls only STORE32L, which we don't use #else # define ENDIAN_LITTLE 1 -# undef ENDIAN_BIG #endif typedef uint64_t uint64; @@ -98,7 +99,6 @@ void tls_get_random(void *buf, unsigned len) FAST_FUNC; #undef min #define min(x, y) ((x) < (y) ? (x) : (y)) - #include "tls_pstm.h" #include "tls_aes.h" #include "tls_aesgcm.h" diff --git a/networking/tls_pstm_montgomery_reduce.c b/networking/tls_pstm_montgomery_reduce.c index 4181a0590..e63e590db 100644 --- a/networking/tls_pstm_montgomery_reduce.c +++ b/networking/tls_pstm_montgomery_reduce.c @@ -135,7 +135,7 @@ asm( \ :"0"(_c[LO]), "1"(cy), "r"(mu), "r"(*tmpm++) \ : "%rax", "%rdx", "cc") -#define INNERMUL8 \ +#define INNERMUL8_disabled_for_bbox \ asm( \ "movq 0(%5),%%rax \n\t" \ "movq 0(%2),%%r10 \n\t" \ @@ -398,7 +398,7 @@ int32 FAST_FUNC pstm_montgomery_reduce(psPool_t *pool, pstm_int *a, pstm_int *m, _c = c + x; tmpm = m->dp; y = 0; -#ifdef PSTM_X86_64 +#ifdef INNERMUL8 //bbox: PSTM_X86_64 for (; y < (pa & ~7); y += 8) { INNERMUL8; _c += 8; -- cgit v1.2.3-55-g6feb