diff options
| author | Denys Vlasenko <vda.linux@googlemail.com> | 2026-02-08 08:30:03 +0100 |
|---|---|---|
| committer | Denys Vlasenko <vda.linux@googlemail.com> | 2026-02-08 08:30:03 +0100 |
| commit | 8289b346265ef63a809c68b608cd7689bbf0a342 (patch) | |
| tree | 92dd224298e2b198fd4bc66e46fa99ef607761e1 | |
| parent | 91075de0a7bd987fa376dbbd624f52a11c6078e3 (diff) | |
| download | busybox-w32-8289b346265ef63a809c68b608cd7689bbf0a342.tar.gz busybox-w32-8289b346265ef63a809c68b608cd7689bbf0a342.tar.bz2 busybox-w32-8289b346265ef63a809c68b608cd7689bbf0a342.zip | |
tls: document PSTM_64BIT + PSTM_X86_64 optimizations better
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
| -rw-r--r-- | networking/tls.h | 18 | ||||
| -rw-r--r-- | networking/tls_pstm_montgomery_reduce.c | 4 |
2 files changed, 11 insertions, 11 deletions
diff --git a/networking/tls.h b/networking/tls.h index 9751d30ff..167f8baf1 100644 --- a/networking/tls.h +++ b/networking/tls.h | |||
| @@ -10,7 +10,6 @@ | |||
| 10 | */ | 10 | */ |
| 11 | #include "libbb.h" | 11 | #include "libbb.h" |
| 12 | 12 | ||
| 13 | |||
| 14 | /* Config tweaks */ | 13 | /* Config tweaks */ |
| 15 | #define HAVE_NATIVE_INT64 | 14 | #define HAVE_NATIVE_INT64 |
| 16 | #undef USE_1024_KEY_SPEED_OPTIMIZATIONS | 15 | #undef USE_1024_KEY_SPEED_OPTIMIZATIONS |
| @@ -31,16 +30,18 @@ | |||
| 31 | # define PSTM_32BIT | 30 | # define PSTM_32BIT |
| 32 | # define PSTM_X86 | 31 | # define PSTM_X86 |
| 33 | #endif | 32 | #endif |
| 34 | //#if defined(__GNUC__) && defined(__x86_64__) | 33 | #if defined(__GNUC__) && defined(__x86_64__) |
| 35 | // /* PSTM_X86_64 works correctly, but +782 bytes. */ | 34 | /* PSTM_64BIT + PSTM_X86_64 works correctly, but: |
| 36 | // /* Looks like most of the growth is because of PSTM_64BIT. */ | 35 | * +928 bytes if PSTM_64BIT but !PSTM_X86_64 |
| 36 | * +1003 bytes with INNERMUL8 (loop unrolling in pstm_montgomery_reduce()) | ||
| 37 | * +664 bytes without INNERMUL8 | ||
| 38 | */ | ||
| 37 | //# define PSTM_64BIT | 39 | //# define PSTM_64BIT |
| 38 | //# define PSTM_X86_64 | 40 | //# define PSTM_X86_64 |
| 39 | //#endif | 41 | #endif |
| 40 | //#if SOME_COND #define PSTM_MIPS, #define PSTM_32BIT | 42 | //#if SOME_COND #define PSTM_MIPS, #define PSTM_32BIT |
| 41 | //#if SOME_COND #define PSTM_ARM, #define PSTM_32BIT | 43 | //#if SOME_COND #define PSTM_ARM, #define PSTM_32BIT |
| 42 | 44 | ||
| 43 | |||
| 44 | #define PS_SUCCESS 0 | 45 | #define PS_SUCCESS 0 |
| 45 | #define PS_FAILURE -1 | 46 | #define PS_FAILURE -1 |
| 46 | #define PS_ARG_FAIL -6 /* Failure due to bad function param */ | 47 | #define PS_ARG_FAIL -6 /* Failure due to bad function param */ |
| @@ -51,14 +52,14 @@ | |||
| 51 | #define PS_TRUE 1 | 52 | #define PS_TRUE 1 |
| 52 | #define PS_FALSE 0 | 53 | #define PS_FALSE 0 |
| 53 | 54 | ||
| 55 | #undef ENDIAN_BIG | ||
| 56 | #undef ENDIAN_LITTLE | ||
| 54 | #if BB_BIG_ENDIAN | 57 | #if BB_BIG_ENDIAN |
| 55 | # define ENDIAN_BIG 1 | 58 | # define ENDIAN_BIG 1 |
| 56 | # undef ENDIAN_LITTLE | ||
| 57 | //#???? ENDIAN_32BITWORD | 59 | //#???? ENDIAN_32BITWORD |
| 58 | // controls only STORE32L, which we don't use | 60 | // controls only STORE32L, which we don't use |
| 59 | #else | 61 | #else |
| 60 | # define ENDIAN_LITTLE 1 | 62 | # define ENDIAN_LITTLE 1 |
| 61 | # undef ENDIAN_BIG | ||
| 62 | #endif | 63 | #endif |
| 63 | 64 | ||
| 64 | typedef uint64_t uint64; | 65 | typedef uint64_t uint64; |
| @@ -98,7 +99,6 @@ void tls_get_random(void *buf, unsigned len) FAST_FUNC; | |||
| 98 | #undef min | 99 | #undef min |
| 99 | #define min(x, y) ((x) < (y) ? (x) : (y)) | 100 | #define min(x, y) ((x) < (y) ? (x) : (y)) |
| 100 | 101 | ||
| 101 | |||
| 102 | #include "tls_pstm.h" | 102 | #include "tls_pstm.h" |
| 103 | #include "tls_aes.h" | 103 | #include "tls_aes.h" |
| 104 | #include "tls_aesgcm.h" | 104 | #include "tls_aesgcm.h" |
diff --git a/networking/tls_pstm_montgomery_reduce.c b/networking/tls_pstm_montgomery_reduce.c index 4181a0590..e63e590db 100644 --- a/networking/tls_pstm_montgomery_reduce.c +++ b/networking/tls_pstm_montgomery_reduce.c | |||
| @@ -135,7 +135,7 @@ asm( \ | |||
| 135 | :"0"(_c[LO]), "1"(cy), "r"(mu), "r"(*tmpm++) \ | 135 | :"0"(_c[LO]), "1"(cy), "r"(mu), "r"(*tmpm++) \ |
| 136 | : "%rax", "%rdx", "cc") | 136 | : "%rax", "%rdx", "cc") |
| 137 | 137 | ||
| 138 | #define INNERMUL8 \ | 138 | #define INNERMUL8_disabled_for_bbox \ |
| 139 | asm( \ | 139 | asm( \ |
| 140 | "movq 0(%5),%%rax \n\t" \ | 140 | "movq 0(%5),%%rax \n\t" \ |
| 141 | "movq 0(%2),%%r10 \n\t" \ | 141 | "movq 0(%2),%%r10 \n\t" \ |
| @@ -398,7 +398,7 @@ int32 FAST_FUNC pstm_montgomery_reduce(psPool_t *pool, pstm_int *a, pstm_int *m, | |||
| 398 | _c = c + x; | 398 | _c = c + x; |
| 399 | tmpm = m->dp; | 399 | tmpm = m->dp; |
| 400 | y = 0; | 400 | y = 0; |
| 401 | #ifdef PSTM_X86_64 | 401 | #ifdef INNERMUL8 //bbox: PSTM_X86_64 |
| 402 | for (; y < (pa & ~7); y += 8) { | 402 | for (; y < (pa & ~7); y += 8) { |
| 403 | INNERMUL8; | 403 | INNERMUL8; |
| 404 | _c += 8; | 404 | _c += 8; |
