From 80e676664e1d7a0b07f14bff44f93d1fef709cf4 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Sun, 6 Jul 2025 00:44:19 +0200 Subject: libbb: add yescrypt password hashing support It seems to work, but not at all optimized for size. The extra copy of sha256 code need to be removed. The yescrypt code in libbb/yescrypt/* is adapted from libxcrypt-4.4.38 with minimal edits, hopefully making it easier to track backports by resetting the tree to this commit, then comparing changes in upstream libxcrypt to the tree. function old new delta blockmix_xor_save - 7050 +7050 static.blockmix_xor - 6475 +6475 blockmix - 3390 +3390 SHA256_Transform - 3083 +3083 yescrypt_kdf_body - 1724 +1724 PBKDF2_SHA256 - 1003 +1003 smix1 - 960 +960 yescrypt_r - 890 +890 salsa20 - 804 +804 smix - 790 +790 smix2 - 659 +659 blockmix_salsa8_xor - 601 +601 yescrypt_kdf - 479 +479 blockmix_salsa8 - 415 +415 Krnd - 256 +256 _HMAC_SHA256_Init - 213 +213 _SHA256_Update - 198 +198 _SHA256_Final - 195 +195 decode64_uint32 - 166 +166 encode64 - 153 +153 decode64 - 136 +136 libcperciva_HMAC_SHA256_Buf - 132 +132 SHA256_Pad_Almost - 131 +131 salsa20_simd_unshuffle - 101 +101 salsa20_simd_shuffle - 101 +101 yes_crypt - 90 +90 libcperciva_SHA256_Buf - 86 +86 crypt_make_rand64encoded - 85 +85 static.atoi64_partial - 77 +77 alloc_region - 72 +72 ascii64 - 65 +65 PAD - 64 +64 _HMAC_SHA256_Final - 55 +55 static.cpu_to_be32_vect - 51 +51 free_region - 47 +47 libcperciva_SHA256_Init - 37 +37 yescrypt_init_local - 34 +34 crypt_make_pw_salt 92 125 +33 initial_state - 32 +32 .rodata 105771 105803 +32 atoi64 - 25 +25 explicit_bzero - 22 +22 pw_encrypt 920 941 +21 yescrypt_free_local - 9 +9 crypt_make_salt 85 - -85 ------------------------------------------------------------------------------ (add/remove: 43/1 grow/shrink: 3/0 up/down: 31042/-85) Total: 30957 bytes Signed-off-by: Denys Vlasenko --- include/libbb.h | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) (limited to 'include/libbb.h') diff --git a/include/libbb.h b/include/libbb.h index e765e18eb..9a0a2f916 100644 --- a/include/libbb.h +++ b/include/libbb.h @@ -1806,18 +1806,24 @@ extern char *pw_encrypt(const char *clear, const char *salt, int cleanup) FAST_F extern int obscure(const char *old, const char *newval, const struct passwd *pwdp) FAST_FUNC; /* * rnd is additional random input. New one is returned. - * Useful if you call crypt_make_salt many times in a row: - * rnd = crypt_make_salt(buf1, 4, 0); - * rnd = crypt_make_salt(buf2, 4, rnd); - * rnd = crypt_make_salt(buf3, 4, rnd); + * Useful if you call crypt_make_rand64encoded many times in a row: + * rnd = crypt_make_rand64encoded(buf1, 4, 0); + * rnd = crypt_make_rand64encoded(buf2, 4, rnd); + * rnd = crypt_make_rand64encoded(buf3, 4, rnd); * (otherwise we risk having same salt generated) */ -extern int crypt_make_salt(char *p, int cnt /*, int rnd*/) FAST_FUNC; -/* "$N$" + sha_salt_16_bytes + NUL */ -#define MAX_PW_SALT_LEN (3 + 16 + 1) +extern int crypt_make_rand64encoded(char *p, int cnt /*, int rnd*/) FAST_FUNC; +/* Size of char salt[] to hold randomly-generated salt string + * sha256/512: + * "$5$" + * "$6$" + * #define MAX_PW_SALT_LEN (3 + 16 + 1) + * yescrypt: + * "$y$j9T$" + */ +#define MAX_PW_SALT_LEN (7 + 24 + 1) extern char* crypt_make_pw_salt(char p[MAX_PW_SALT_LEN], const char *algo) FAST_FUNC; - /* Returns number of lines changed, or -1 on error */ #if !(ENABLE_FEATURE_ADDUSER_TO_GROUP || ENABLE_FEATURE_DEL_USER_FROM_GROUP) #define update_passwd(filename, username, data, member) \ -- cgit v1.2.3-55-g6feb From 62abd47815f0ee2f6c0ea6549fabe6d5c307ef8d Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Sun, 6 Jul 2025 10:50:46 +0200 Subject: Move "sha256-hash a memory array and produce the digest" helper to libbb Signed-off-by: Denys Vlasenko --- include/libbb.h | 1 + libbb/yescrypt/alg-sha256.c | 26 +++++--------------------- libbb/yescrypt/alg-sha256.h | 6 ------ libbb/yescrypt/alg-yescrypt-kdf.c | 2 +- 4 files changed, 7 insertions(+), 28 deletions(-) (limited to 'include/libbb.h') diff --git a/include/libbb.h b/include/libbb.h index 9a0a2f916..270a9d593 100644 --- a/include/libbb.h +++ b/include/libbb.h @@ -2233,6 +2233,7 @@ enum { SHA512_OUTSIZE = 64, SHA3_OUTSIZE = 28, }; +void FAST_FUNC sha256_block(const void *in, size_t len, uint8_t hash[32]); extern uint32_t *global_crc32_table; uint32_t *crc32_filltable(uint32_t *tbl256, int endian) FAST_FUNC; diff --git a/libbb/yescrypt/alg-sha256.c b/libbb/yescrypt/alg-sha256.c index 038ac0ddb..315c094a2 100644 --- a/libbb/yescrypt/alg-sha256.c +++ b/libbb/yescrypt/alg-sha256.c @@ -25,38 +25,22 @@ * SUCH DAMAGE. */ -/** - * SHA256_Buf(in, len, digest): - * Compute the SHA256 hash of ${len} bytes from ${in} and write it to ${digest}. - */ -void -SHA256_Buf(const void * in, size_t len, uint8_t digest[32]) -{ - sha256_ctx_t ctx; - sha256_begin(&ctx); - sha256_hash(&ctx, in, len); - sha256_end(&ctx, digest); -} - /** * HMAC_SHA256_Init(ctx, K, Klen): * Initialize the HMAC-SHA256 context ${ctx} with ${Klen} bytes of key from * ${K}. */ static void -HMAC_SHA256_Init(HMAC_SHA256_CTX * ctx, const void * _K, size_t Klen) +HMAC_SHA256_Init(HMAC_SHA256_CTX *ctx, const void *_K, size_t Klen) { uint8_t pad[64]; uint8_t khash[32]; - const uint8_t * K = _K; + const uint8_t *K = _K; size_t i; /* If Klen > 64, the key is really SHA256(K). */ if (Klen > 64) { -// SHA256_Init(&ctx->ictx); -// _SHA256_Update(&ctx->ictx, K, Klen, tmp32); -// _SHA256_Final(khash, &ctx->ictx, tmp32); - SHA256_Buf(K, Klen, khash); + sha256_block(K, Klen, khash); K = khash; Klen = 32; } @@ -81,7 +65,7 @@ HMAC_SHA256_Init(HMAC_SHA256_CTX * ctx, const void * _K, size_t Klen) * Input ${len} bytes from ${in} into the HMAC-SHA256 context ${ctx}. */ static void -HMAC_SHA256_Update(HMAC_SHA256_CTX * ctx, const void * in, size_t len) +HMAC_SHA256_Update(HMAC_SHA256_CTX *ctx, const void *in, size_t len) { /* Feed data to the inner SHA256 operation. */ sha256_hash(&ctx->ictx, in, len); @@ -93,7 +77,7 @@ HMAC_SHA256_Update(HMAC_SHA256_CTX * ctx, const void * in, size_t len) * buffer ${digest}. */ static void -HMAC_SHA256_Final(uint8_t digest[32], HMAC_SHA256_CTX * ctx) +HMAC_SHA256_Final(uint8_t digest[32], HMAC_SHA256_CTX *ctx) { uint8_t ihash[32]; diff --git a/libbb/yescrypt/alg-sha256.h b/libbb/yescrypt/alg-sha256.h index 8a4968267..6d2cc0a04 100644 --- a/libbb/yescrypt/alg-sha256.h +++ b/libbb/yescrypt/alg-sha256.h @@ -34,12 +34,6 @@ #define HMAC_SHA256_Buf libcperciva_HMAC_SHA256_Buf #define HMAC_SHA256_CTX libcperciva_HMAC_SHA256_CTX -/** - * SHA256_Buf(in, len, digest): - * Compute the SHA256 hash of ${len} bytes from ${in} and write it to ${digest}. - */ -extern void SHA256_Buf(const void *, size_t, uint8_t[32]); - /* Context structure for HMAC-SHA256 operations. */ typedef struct { sha256_ctx_t ictx; diff --git a/libbb/yescrypt/alg-yescrypt-kdf.c b/libbb/yescrypt/alg-yescrypt-kdf.c index 01a66a6a8..5c1f1006a 100644 --- a/libbb/yescrypt/alg-yescrypt-kdf.c +++ b/libbb/yescrypt/alg-yescrypt-kdf.c @@ -945,7 +945,7 @@ static int yescrypt_kdf32_body( size_t clen = /*buflen:*/32; if (clen > sizeof(dk)) clen = sizeof(dk); - SHA256_Buf(sha256, sizeof(sha256), dk); + sha256_block(sha256, sizeof(sha256), dk); memcpy(buf32, dk, clen); } } -- cgit v1.2.3-55-g6feb From 0893bc3bac8705b22679ad77f39ee56d3ba728c9 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Sun, 6 Jul 2025 11:12:13 +0200 Subject: libbb/yescrypt: code shrink static.PBKDF2_SHA256 - 189 +189 HMAC_SHA256_Init - 159 +159 HMAC_SHA256_Buf - 58 +58 HMAC_SHA256_Final - 53 +53 i2a64 - 42 +42 yescrypt_r 1221 1215 -6 yescrypt_kdf32_body 1064 1046 -18 i64c 42 - -42 libcperciva_HMAC_SHA256_Final 53 - -53 libcperciva_HMAC_SHA256_Buf 58 - -58 ascii64 65 - -65 libcperciva_HMAC_SHA256_Init 159 - -159 PBKDF2_SHA256 386 - -386 ------------------------------------------------------------------------------ (add/remove: 5/6 grow/shrink: 0/2 up/down: 501/-787) Total: -286 bytes Signed-off-by: Denys Vlasenko --- include/libbb.h | 2 ++ libbb/pw_encrypt.c | 12 +++++------- libbb/pw_encrypt_des.c | 8 ++++---- libbb/yescrypt/alg-sha256.c | 2 +- libbb/yescrypt/alg-sha256.h | 18 ------------------ libbb/yescrypt/alg-yescrypt-common.c | 2 +- libbb/yescrypt/y.c | 6 ------ 7 files changed, 13 insertions(+), 37 deletions(-) (limited to 'include/libbb.h') diff --git a/include/libbb.h b/include/libbb.h index 270a9d593..e88499a80 100644 --- a/include/libbb.h +++ b/include/libbb.h @@ -2189,6 +2189,8 @@ char *decode_base64(char *dst, const char **pp_src) FAST_FUNC; char *decode_base32(char *dst, const char **pp_src) FAST_FUNC; void read_base64(FILE *src_stream, FILE *dst_stream, int flags) FAST_FUNC; +int FAST_FUNC i2a64(int i); + typedef struct md5_ctx_t { uint8_t wbuffer[64]; /* always correctly aligned for uint64_t */ void (*process_block)(struct md5_ctx_t*) FAST_FUNC; diff --git a/libbb/pw_encrypt.c b/libbb/pw_encrypt.c index 1d530974e..97dee7229 100644 --- a/libbb/pw_encrypt.c +++ b/libbb/pw_encrypt.c @@ -13,11 +13,10 @@ #endif #include "libbb.h" -/* static const uint8_t ascii64[] ALIGN1 = +/* 0..63 -> * "./0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; */ - -static int i64c(int i) +int FAST_FUNC i2a64(int i) { i &= 0x3f; if (i == 0) @@ -45,8 +44,8 @@ int FAST_FUNC crypt_make_rand64encoded(char *p, int cnt /*, int x */) * It has no problem with visibly alternating lowest bit * but is also weak in cryptographic sense + needs div, * which needs more code (and slower) on many CPUs */ - *p++ = i64c(x >> 16); - *p++ = i64c(x >> 22); + *p++ = i2a64(x >> 16); + *p++ = i2a64(x >> 22); } while (--cnt); *p = '\0'; return x; @@ -120,8 +119,7 @@ static char* to64(char *s, unsigned v, int n) { while (--n >= 0) { - /* *s++ = ascii64[v & 0x3f]; */ - *s++ = i64c(v); + *s++ = i2a64(v); v >>= 6; } return s; diff --git a/libbb/pw_encrypt_des.c b/libbb/pw_encrypt_des.c index fe8237cfe..c836ab684 100644 --- a/libbb/pw_encrypt_des.c +++ b/libbb/pw_encrypt_des.c @@ -703,10 +703,10 @@ to64_msb_first(char *s, unsigned v) *s++ = ascii64[(v >> 6) & 0x3f]; /* bits 11..6 */ *s = ascii64[v & 0x3f]; /* bits 5..0 */ #endif - *s++ = i64c(v >> 18); /* bits 23..18 */ - *s++ = i64c(v >> 12); /* bits 17..12 */ - *s++ = i64c(v >> 6); /* bits 11..6 */ - *s = i64c(v); /* bits 5..0 */ + *s++ = i2a64(v >> 18); /* bits 23..18 */ + *s++ = i2a64(v >> 12); /* bits 17..12 */ + *s++ = i2a64(v >> 6); /* bits 11..6 */ + *s = i2a64(v); /* bits 5..0 */ } static char * diff --git a/libbb/yescrypt/alg-sha256.c b/libbb/yescrypt/alg-sha256.c index a1d4275e6..a17028b6b 100644 --- a/libbb/yescrypt/alg-sha256.c +++ b/libbb/yescrypt/alg-sha256.c @@ -107,7 +107,7 @@ HMAC_SHA256_Buf(const void *K, size_t Klen, const void *in, size_t len, * Compute PBKDF2(passwd, salt, c, dkLen) using HMAC-SHA256 as the PRF, and * write the output to buf. The value dkLen must be at most 32 * (2^32 - 1). */ -void +static void PBKDF2_SHA256(const uint8_t *passwd, size_t passwdlen, const uint8_t *salt, size_t saltlen, uint64_t c, uint8_t *buf, size_t dkLen) diff --git a/libbb/yescrypt/alg-sha256.h b/libbb/yescrypt/alg-sha256.h index 6d2cc0a04..862f49dbe 100644 --- a/libbb/yescrypt/alg-sha256.h +++ b/libbb/yescrypt/alg-sha256.h @@ -24,26 +24,8 @@ * SUCH DAMAGE. */ -/* - * Use #defines in order to avoid namespace collisions with anyone else's - * SHA256 code (e.g., the code in OpenSSL). - */ -#define HMAC_SHA256_Init libcperciva_HMAC_SHA256_Init -#define HMAC_SHA256_Update libcperciva_HMAC_SHA256_Update -#define HMAC_SHA256_Final libcperciva_HMAC_SHA256_Final -#define HMAC_SHA256_Buf libcperciva_HMAC_SHA256_Buf -#define HMAC_SHA256_CTX libcperciva_HMAC_SHA256_CTX - /* Context structure for HMAC-SHA256 operations. */ typedef struct { sha256_ctx_t ictx; sha256_ctx_t octx; } HMAC_SHA256_CTX; - -/** - * PBKDF2_SHA256(passwd, passwdlen, salt, saltlen, c, buf, dkLen): - * Compute PBKDF2(passwd, salt, c, dkLen) using HMAC-SHA256 as the PRF, and - * write the output to buf. The value dkLen must be at most 32 * (2^32 - 1). - */ -extern void PBKDF2_SHA256(const uint8_t *, size_t, const uint8_t *, size_t, - uint64_t, uint8_t *, size_t); diff --git a/libbb/yescrypt/alg-yescrypt-common.c b/libbb/yescrypt/alg-yescrypt-common.c index da7fa5e0f..7a1e92cab 100644 --- a/libbb/yescrypt/alg-yescrypt-common.c +++ b/libbb/yescrypt/alg-yescrypt-common.c @@ -88,7 +88,7 @@ static uint8_t *encode64_uint32_fixed( for (bits = 0; bits < srcbits; bits += 6) { if (dstlen < 2) return NULL; - *dst++ = itoa64[src & 0x3f]; + *dst++ = i2a64(src); dstlen--; src >>= 6; } diff --git a/libbb/yescrypt/y.c b/libbb/yescrypt/y.c index 042c439a0..2c6afd4f8 100644 --- a/libbb/yescrypt/y.c +++ b/libbb/yescrypt/y.c @@ -124,12 +124,6 @@ VECTOR_TO_CPU(be,32); VECTOR_TO_CPU(be,64); -const unsigned char ascii64[65] = - "./0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; -/* 0000000000111111111122222222223333333333444444444455555555556666 */ -/* 0123456789012345678901234567890123456789012345678901234567890123 */ -#define itoa64 ascii64 - #define YESCRYPT_INTERNAL #include "alg-sha256.h" #include "alg-yescrypt.h" -- cgit v1.2.3-55-g6feb From 23b5527f5c400a300c56afa36a6a5abaa81adbb7 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Sun, 6 Jul 2025 19:08:57 +0200 Subject: libbb/yescrypt: use common ACSII char-to-64 conversion code function old new delta a2i64 - 46 +46 yescrypt_r 1215 1235 +20 decode64_uint32 167 177 +10 atoi64 25 - -25 ascii_to_bin 53 - -53 static.atoi64_partial 77 - -77 ------------------------------------------------------------------------------ (add/remove: 1/3 grow/shrink: 2/0 up/down: 76/-155) Total: -79 bytes Signed-off-by: Denys Vlasenko --- include/libbb.h | 1 + libbb/pw_encrypt.c | 25 +++++++++++++++++++++++++ libbb/pw_encrypt_des.c | 23 ++--------------------- libbb/yescrypt/alg-yescrypt-common.c | 22 +++------------------- 4 files changed, 31 insertions(+), 40 deletions(-) (limited to 'include/libbb.h') diff --git a/include/libbb.h b/include/libbb.h index e88499a80..1c23d2f66 100644 --- a/include/libbb.h +++ b/include/libbb.h @@ -2190,6 +2190,7 @@ char *decode_base32(char *dst, const char **pp_src) FAST_FUNC; void read_base64(FILE *src_stream, FILE *dst_stream, int flags) FAST_FUNC; int FAST_FUNC i2a64(int i); +int FAST_FUNC a2i64(char c); typedef struct md5_ctx_t { uint8_t wbuffer[64]; /* always correctly aligned for uint64_t */ diff --git a/libbb/pw_encrypt.c b/libbb/pw_encrypt.c index 71f7731fd..af84606bf 100644 --- a/libbb/pw_encrypt.c +++ b/libbb/pw_encrypt.c @@ -30,6 +30,31 @@ int FAST_FUNC i2a64(int i) return ('a' - 38 + i); } +/* Returns >=64 for invalid chars */ +int FAST_FUNC a2i64(char c) +{ + unsigned char ch = c; + if (ch >= 'a') + /* "a..z" to 38..63 */ + /* anything after "z": positive int >= 64 */ + return (ch - 'a' + 38); + + if (ch > 'Z') + /* after "Z" but before "a": positive byte >= 64 */ + return ch; + + if (ch >= 'A') + /* "A..Z" to 12..37 */ + return (ch - 'A' + 12); + + if (ch > '9') + return 64; + + /* "./0123456789" to 0,1,2..11 */ + /* anything before "." becomes positive byte >= 64 */ + return (unsigned char)(ch - '.'); +} + int FAST_FUNC crypt_make_rand64encoded(char *p, int cnt /*, int x */) { /* was: x += ... */ diff --git a/libbb/pw_encrypt_des.c b/libbb/pw_encrypt_des.c index c836ab684..8b5edaaed 100644 --- a/libbb/pw_encrypt_des.c +++ b/libbb/pw_encrypt_des.c @@ -200,25 +200,6 @@ static const uint32_t bits32[32] ALIGN4 = { static const uint8_t bits8[8] ALIGN1 = { 0x80, 0x40, 0x20, 0x10, 0x08, 0x04, 0x02, 0x01 }; -static int -ascii_to_bin(char ch) -{ - if (ch > 'z') - return 0; - if (ch >= 'a') - return (ch - 'a' + 38); - if (ch > 'Z') - return 0; - if (ch >= 'A') - return (ch - 'A' + 12); - if (ch > '9') - return 0; - if (ch >= '.') - return (ch - '.'); - return 0; -} - - /* Static stuff that stays resident and doesn't change after * being initialized, and therefore doesn't need to be made * reentrant. */ @@ -740,8 +721,8 @@ des_crypt(struct des_ctx *ctx, char output[DES_OUT_BUFSIZE], */ output[0] = salt_str[0]; output[1] = salt_str[1]; - salt = (ascii_to_bin(salt_str[1]) << 6) - | ascii_to_bin(salt_str[0]); + salt = (a2i64(salt_str[1]) << 6) + | a2i64(salt_str[0]); setup_salt(ctx, salt); /* set ctx->saltbits for do_des() */ /* Do it. */ diff --git a/libbb/yescrypt/alg-yescrypt-common.c b/libbb/yescrypt/alg-yescrypt-common.c index 7a1e92cab..b9a5c51ac 100644 --- a/libbb/yescrypt/alg-yescrypt-common.c +++ b/libbb/yescrypt/alg-yescrypt-common.c @@ -23,22 +23,6 @@ * yescrypt_params_t field, and convert salt ti binary - * both of these are negligible compared to main hashing operation */ -static NOINLINE uint32_t atoi64(uint8_t src) -{ - static const uint8_t atoi64_partial[77] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, - 64, 64, 64, 64, 64, 64, 64, - 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, - 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, - 64, 64, 64, 64, 64, 64, - 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, - 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63 - }; - if (src >= '.' && src <= 'z') - return atoi64_partial[src - '.']; - return 64; -} - static NOINLINE const uint8_t *decode64_uint32( uint32_t *dst, const uint8_t *src, uint32_t val) @@ -49,7 +33,7 @@ static NOINLINE const uint8_t *decode64_uint32( if (!src) /* prevous decode failed already? */ goto fail; - c = atoi64(*src++); + c = a2i64(*src++); if (c > 63) goto fail; @@ -64,7 +48,7 @@ static NOINLINE const uint8_t *decode64_uint32( val += (c - start) << bits; while (--chars) { - c = atoi64(*src++); + c = a2i64(*src++); if (c > 63) goto fail; bits -= 6; @@ -138,7 +122,7 @@ static const uint8_t *decode64( while (dstpos <= *dstlen && srclen) { uint32_t value = 0, bits = 0; while (srclen--) { - uint32_t c = atoi64(*src); + uint32_t c = a2i64(*src); if (c > 63) { srclen = 0; break; -- cgit v1.2.3-55-g6feb From 53de6e6150ea5538930e1963eb87ada153093ea0 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Sun, 6 Jul 2025 22:43:28 +0200 Subject: libbb/yescrypt: use common ascii64 encoding routine function old new delta num2str64_lsb_first 33 46 +13 yescrypt_r 1235 1133 -102 ------------------------------------------------------------------------------ (add/remove: 0/0 grow/shrink: 1/1 up/down: 13/-102) Total: -89 bytes Signed-off-by: Denys Vlasenko --- include/libbb.h | 1 + libbb/yescrypt/alg-yescrypt-common.c | 98 +++++++++++++----------------------- libbb/yescrypt/alg-yescrypt.h | 4 +- 3 files changed, 37 insertions(+), 66 deletions(-) (limited to 'include/libbb.h') diff --git a/include/libbb.h b/include/libbb.h index 1c23d2f66..b761b1091 100644 --- a/include/libbb.h +++ b/include/libbb.h @@ -2191,6 +2191,7 @@ void read_base64(FILE *src_stream, FILE *dst_stream, int flags) FAST_FUNC; int FAST_FUNC i2a64(int i); int FAST_FUNC a2i64(char c); +char* FAST_FUNC num2str64_lsb_first(char *s, unsigned v, int n); typedef struct md5_ctx_t { uint8_t wbuffer[64]; /* always correctly aligned for uint64_t */ diff --git a/libbb/yescrypt/alg-yescrypt-common.c b/libbb/yescrypt/alg-yescrypt-common.c index 435eaecca..5bdf1893e 100644 --- a/libbb/yescrypt/alg-yescrypt-common.c +++ b/libbb/yescrypt/alg-yescrypt-common.c @@ -19,8 +19,8 @@ */ /* Not inlining: - * decode64 fuinctions are only used to read - * yescrypt_params_t field, and convert salt ti binary - + * de/encode64 functions are only used to read + * yescrypt_params_t field, and convert salt to binary - * both of these are negligible compared to main hashing operation */ static NOINLINE const uint8_t *decode64_uint32( @@ -63,56 +63,6 @@ fail: return NULL; } -static uint8_t *encode64_uint32_fixed( - uint8_t *dst, size_t dstlen, - uint32_t src, uint32_t srcbits) -{ - uint32_t bits; - - for (bits = 0; bits < srcbits; bits += 6) { - if (dstlen < 2) - return NULL; - *dst++ = i2a64(src); - dstlen--; - src >>= 6; - } - - if (src || dstlen < 1) - return NULL; - - *dst = 0; /* NUL terminate just in case */ - - return dst; -} - -static uint8_t *encode64( - uint8_t *dst, size_t dstlen, - const uint8_t *src, size_t srclen) -{ - size_t i; - - for (i = 0; i < srclen; ) { - uint8_t *dnext; - uint32_t value = 0, bits = 0; - do { - value |= (uint32_t)src[i++] << bits; - bits += 8; - } while (bits < 24 && i < srclen); - dnext = encode64_uint32_fixed(dst, dstlen, value, bits); - if (!dnext) - return NULL; - dstlen -= dnext - dst; - dst = dnext; - } - - if (dstlen < 1) - return NULL; - - *dst = 0; /* NUL terminate just in case */ - - return dst; -} - static const uint8_t *decode64( uint8_t *dst, size_t *dstlen, const uint8_t *src, size_t srclen) @@ -156,21 +106,43 @@ static const uint8_t *decode64( *dstlen = dstpos; return src; } - fail: *dstlen = 0; return NULL; } -uint8_t *yescrypt_r( +static char *encode64( + char *dst, size_t dstlen, + const uint8_t *src, size_t srclen) +{ + while (srclen) { + uint32_t value = 0, b = 0; + do { + value |= (uint32_t)(*src++ << b); + b += 8; + srclen--; + } while (srclen && b < 24); + + b >>= 3; /* number of bits to number of bytes */ + b++; /* 1, 2 or 3 bytes will become 2, 3 or 4 ascii64 chars */ + dstlen -= b; + if ((ssize_t)dstlen <= 0) + return NULL; + dst = num2str64_lsb_first(dst, value, b); + } + *dst = '\0'; + return dst; +} + +char *yescrypt_r( const uint8_t *passwd, size_t passwdlen, const uint8_t *setting, - uint8_t *buf, size_t buflen) + char *buf, size_t buflen) { yescrypt_ctx_t yctx[1]; unsigned char hashbin32[32]; + char *dst; const uint8_t *src, *saltstr, *saltend; - uint8_t *dst; size_t need, prefixlen, saltstrlen; uint32_t flavor, N_log2; @@ -241,11 +213,11 @@ uint8_t *yescrypt_r( goto fail; yctx->param.NROM = (uint64_t)1 << NROM_log2; } + if (!src) + goto fail; + if (*src != '$') + goto fail; } - if (!src) - goto fail; - if (*src != '$') - goto fail; saltstr = src + 1; src = (uint8_t *)strchrnul((char *)saltstr, '$'); @@ -268,16 +240,14 @@ uint8_t *yescrypt_r( dst = mempcpy(buf, setting, prefixlen); *dst++ = '$'; dst = encode64(dst, buflen - (dst - buf), hashbin32, sizeof(hashbin32)); - if (!dst || dst >= buf + buflen) + if (!dst) goto fail; - - *dst = 0; /* NUL termination */ ret: free_region(yctx->local); explicit_bzero(yctx, sizeof(yctx)); explicit_bzero(hashbin32, sizeof(hashbin32)); return buf; -fail: + fail: buf = NULL; goto ret; } diff --git a/libbb/yescrypt/alg-yescrypt.h b/libbb/yescrypt/alg-yescrypt.h index 5b442c2c9..996af333f 100644 --- a/libbb/yescrypt/alg-yescrypt.h +++ b/libbb/yescrypt/alg-yescrypt.h @@ -151,8 +151,8 @@ typedef struct { * * MT-safe as long as local and buf are local to the thread. */ -extern uint8_t *yescrypt_r( +extern char *yescrypt_r( const uint8_t *passwd, size_t passwdlen, const uint8_t *setting, - uint8_t *buf, size_t buflen + char *buf, size_t buflen ); -- cgit v1.2.3-55-g6feb From 1a0913d57ce8287703cfe666d9240e3a147ea30d Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Mon, 7 Jul 2025 07:44:01 +0200 Subject: libbb: factor out HMAC code from TLS function old new delta hmac_block - 88 +88 hmac_peek_hash - 61 +61 hmac_end - 50 +50 hmac_begin 140 177 +37 hmac_hash_v - 30 +30 .rodata 105799 105787 -12 hmac_sha_precomputed 54 - -54 hmac_sha_precomputed_v 69 - -69 hmac 83 - -83 ------------------------------------------------------------------------------ (add/remove: 5/3 grow/shrink: 1/1 up/down: 266/-218) Total: 48 bytes Signed-off-by: Denys Vlasenko --- include/libbb.h | 40 +++++++++++--- libbb/hash_hmac.c | 99 +++++++++++++++++++++++++++++++++ networking/tls.c | 161 ++++++++++-------------------------------------------- 3 files changed, 161 insertions(+), 139 deletions(-) create mode 100644 libbb/hash_hmac.c (limited to 'include/libbb.h') diff --git a/include/libbb.h b/include/libbb.h index b761b1091..3f60acaa0 100644 --- a/include/libbb.h +++ b/include/libbb.h @@ -2193,6 +2193,16 @@ int FAST_FUNC i2a64(int i); int FAST_FUNC a2i64(char c); char* FAST_FUNC num2str64_lsb_first(char *s, unsigned v, int n); +enum { + /* how many bytes XYZ_end() fills */ + MD5_OUTSIZE = 16, + SHA1_OUTSIZE = 20, + SHA256_OUTSIZE = 32, + SHA512_OUTSIZE = 64, + SHA3_OUTSIZE = 28, + /* size of input block */ + SHA2_INSIZE = 64, +}; typedef struct md5_ctx_t { uint8_t wbuffer[64]; /* always correctly aligned for uint64_t */ void (*process_block)(struct md5_ctx_t*) FAST_FUNC; @@ -2226,18 +2236,32 @@ unsigned sha512_end(sha512_ctx_t *ctx, void *resbuf) FAST_FUNC; void sha3_begin(sha3_ctx_t *ctx) FAST_FUNC; void sha3_hash(sha3_ctx_t *ctx, const void *buffer, size_t len) FAST_FUNC; unsigned sha3_end(sha3_ctx_t *ctx, void *resbuf) FAST_FUNC; +void FAST_FUNC sha256_block(const void *in, size_t len, uint8_t hash[32]); /* TLS benefits from knowing that sha1 and sha256 share these. Give them "agnostic" names too */ typedef struct md5_ctx_t md5sha_ctx_t; #define md5sha_hash md5_hash #define sha_end sha1_end -enum { - MD5_OUTSIZE = 16, - SHA1_OUTSIZE = 20, - SHA256_OUTSIZE = 32, - SHA512_OUTSIZE = 64, - SHA3_OUTSIZE = 28, -}; -void FAST_FUNC sha256_block(const void *in, size_t len, uint8_t hash[32]); + +/* RFC 2104 HMAC (hash-based message authentication code) */ +typedef struct hmac_ctx { + md5sha_ctx_t hashed_key_xor_ipad; + md5sha_ctx_t hashed_key_xor_opad; +} hmac_ctx_t; +#define HMAC_ONLY_SHA256 (!ENABLE_FEATURE_TLS_SHA1) +typedef void md5sha_begin_func(md5sha_ctx_t *ctx) FAST_FUNC; +#if HMAC_ONLY_SHA256 +#define hmac_begin(ctx,key,key_size,begin) \ + hmac_begin(ctx,key,key_size) +#endif +void FAST_FUNC hmac_begin(hmac_ctx_t *ctx, uint8_t *key, unsigned key_size, md5sha_begin_func *begin); +static ALWAYS_INLINE void hmac_hash(hmac_ctx_t *ctx, const void *in, size_t len) +{ + md5sha_hash(&ctx->hashed_key_xor_ipad, in, len); +} +unsigned FAST_FUNC hmac_end(hmac_ctx_t *ctx, uint8_t *out); +/* HMAC helpers for TLS: */ +void FAST_FUNC hmac_hash_v(hmac_ctx_t *ctx, va_list va); +unsigned FAST_FUNC hmac_peek_hash(hmac_ctx_t *ctx, uint8_t *out, ...); extern uint32_t *global_crc32_table; uint32_t *crc32_filltable(uint32_t *tbl256, int endian) FAST_FUNC; diff --git a/libbb/hash_hmac.c b/libbb/hash_hmac.c new file mode 100644 index 000000000..8cf936949 --- /dev/null +++ b/libbb/hash_hmac.c @@ -0,0 +1,99 @@ +/* + * Copyright (C) 2025 Denys Vlasenko + * + * Licensed under GPLv2, see file LICENSE in this source tree. + */ +//kbuild:lib-$(CONFIG_TLS) += hash_hmac.o +//kbuild:lib-$(CONFIG_USE_BB_CRYPT_YES) += hash_hmac.o + +#include "libbb.h" + +// RFC 2104: +// HMAC(key, text) based on a hash H (say, sha256) is: +// ipad = [0x36 x INSIZE] +// opad = [0x5c x INSIZE] +// HMAC(key, text) = H((key XOR opad) + H((key XOR ipad) + text)) +// +// H(key XOR opad) and H(key XOR ipad) can be precomputed +// if we often need HMAC hmac with the same key. +// +// text is often given in disjoint pieces. +void FAST_FUNC hmac_begin(hmac_ctx_t *ctx, uint8_t *key, unsigned key_size, md5sha_begin_func *begin) +{ +#if HMAC_ONLY_SHA256 +#define begin sha256_begin +#endif + uint8_t key_xor_ipad[SHA2_INSIZE]; + uint8_t key_xor_opad[SHA2_INSIZE]; + unsigned i; + + // "The authentication key can be of any length up to INSIZE, the + // block length of the hash function. Applications that use keys longer + // than INSIZE bytes will first hash the key using H and then use the + // resultant OUTSIZE byte string as the actual key to HMAC." + if (key_size > SHA2_INSIZE) { + uint8_t tempkey[SHA1_OUTSIZE < SHA256_OUTSIZE ? SHA256_OUTSIZE : SHA1_OUTSIZE]; + /* use ctx->hashed_key_xor_ipad as scratch ctx */ + begin(&ctx->hashed_key_xor_ipad); + md5sha_hash(&ctx->hashed_key_xor_ipad, key, key_size); + key_size = sha_end(&ctx->hashed_key_xor_ipad, tempkey); + key = tempkey; + } + + for (i = 0; i < key_size; i++) { + key_xor_ipad[i] = key[i] ^ 0x36; + key_xor_opad[i] = key[i] ^ 0x5c; + } + for (; i < SHA2_INSIZE; i++) { + key_xor_ipad[i] = 0x36; + key_xor_opad[i] = 0x5c; + } + + begin(&ctx->hashed_key_xor_ipad); + begin(&ctx->hashed_key_xor_opad); + md5sha_hash(&ctx->hashed_key_xor_ipad, key_xor_ipad, SHA2_INSIZE); + md5sha_hash(&ctx->hashed_key_xor_opad, key_xor_opad, SHA2_INSIZE); +} +#undef begin + +unsigned FAST_FUNC hmac_end(hmac_ctx_t *ctx, uint8_t *out) +{ + unsigned len = sha_end(&ctx->hashed_key_xor_ipad, out); + /* out = H((key XOR opad) + out) */ + md5sha_hash(&ctx->hashed_key_xor_opad, out, len); + return sha_end(&ctx->hashed_key_xor_opad, out); +} + +/* TLS helpers */ + +void FAST_FUNC hmac_hash_v( + hmac_ctx_t *ctx, + va_list va) +{ + uint8_t *in; + + /* ctx->hashed_key_xor_ipad contains unclosed "H((key XOR ipad) +" state */ + /* ctx->hashed_key_xor_opad contains unclosed "H((key XOR opad) +" state */ + + /* calculate out = H((key XOR ipad) + text) */ + while ((in = va_arg(va, uint8_t*)) != NULL) { + unsigned size = va_arg(va, unsigned); + md5sha_hash(&ctx->hashed_key_xor_ipad, in, size); + } +} + +/* Using HMAC state, make a copy of it (IOW: without affecting this state!) + * hash in the list of (ptr,size) blocks, and finish the HMAC to out[] buffer. + * This function is useful for TLS PRF. + */ +unsigned FAST_FUNC hmac_peek_hash(hmac_ctx_t *ctx, uint8_t *out, ...) +{ + hmac_ctx_t tmpctx = *ctx; /* struct copy */ + va_list va; + + va_start(va, out); + hmac_hash_v(&tmpctx, va); + va_end(va); + + return hmac_end(&tmpctx, out); +} diff --git a/networking/tls.c b/networking/tls.c index 8d074c058..b8caf1e4b 100644 --- a/networking/tls.c +++ b/networking/tls.c @@ -188,8 +188,6 @@ #define TLS_MAX_OUTBUF (1 << 14) enum { - SHA_INSIZE = 64, - AES128_KEYSIZE = 16, AES256_KEYSIZE = 32, @@ -393,128 +391,6 @@ static void hash_handshake(tls_state_t *tls, const char *fmt, const void *buffer # define TLS_MAC_SIZE(tls) (tls)->MAC_size #endif -// RFC 2104: -// HMAC(key, text) based on a hash H (say, sha256) is: -// ipad = [0x36 x INSIZE] -// opad = [0x5c x INSIZE] -// HMAC(key, text) = H((key XOR opad) + H((key XOR ipad) + text)) -// -// H(key XOR opad) and H(key XOR ipad) can be precomputed -// if we often need HMAC hmac with the same key. -// -// text is often given in disjoint pieces. -typedef struct hmac_precomputed { - md5sha_ctx_t hashed_key_xor_ipad; - md5sha_ctx_t hashed_key_xor_opad; -} hmac_precomputed_t; - -typedef void md5sha_begin_func(md5sha_ctx_t *ctx) FAST_FUNC; -#if !ENABLE_FEATURE_TLS_SHA1 -#define hmac_begin(pre,key,key_size,begin) \ - hmac_begin(pre,key,key_size) -#define begin sha256_begin -#endif -static void hmac_begin(hmac_precomputed_t *pre, uint8_t *key, unsigned key_size, md5sha_begin_func *begin) -{ - uint8_t key_xor_ipad[SHA_INSIZE]; - uint8_t key_xor_opad[SHA_INSIZE]; -// uint8_t tempkey[SHA1_OUTSIZE < SHA256_OUTSIZE ? SHA256_OUTSIZE : SHA1_OUTSIZE]; - unsigned i; - - // "The authentication key can be of any length up to INSIZE, the - // block length of the hash function. Applications that use keys longer - // than INSIZE bytes will first hash the key using H and then use the - // resultant OUTSIZE byte string as the actual key to HMAC." - if (key_size > SHA_INSIZE) { - bb_simple_error_msg_and_die("HMAC key>64"); //does not happen (yet?) -// md5sha_ctx_t ctx; -// begin(&ctx); -// md5sha_hash(&ctx, key, key_size); -// key_size = sha_end(&ctx, tempkey); -// //key = tempkey; - right? RIGHT? why does it work without this? -// // because SHA_INSIZE is 64, but hmac() is always called with -// // key_size = tls->MAC_size = SHA1/256_OUTSIZE (20 or 32), -// // and prf_hmac_sha256() -> hmac_sha256() key sizes are: -// // - RSA_PREMASTER_SIZE is 48 -// // - CURVE25519_KEYSIZE is 32 -// // - master_secret[] is 48 - } - - for (i = 0; i < key_size; i++) { - key_xor_ipad[i] = key[i] ^ 0x36; - key_xor_opad[i] = key[i] ^ 0x5c; - } - for (; i < SHA_INSIZE; i++) { - key_xor_ipad[i] = 0x36; - key_xor_opad[i] = 0x5c; - } - - begin(&pre->hashed_key_xor_ipad); - begin(&pre->hashed_key_xor_opad); - md5sha_hash(&pre->hashed_key_xor_ipad, key_xor_ipad, SHA_INSIZE); - md5sha_hash(&pre->hashed_key_xor_opad, key_xor_opad, SHA_INSIZE); -} -#undef begin - -static unsigned hmac_sha_precomputed_v( - hmac_precomputed_t *pre, - uint8_t *out, - va_list va) -{ - uint8_t *text; - unsigned len; - - /* pre->hashed_key_xor_ipad contains unclosed "H((key XOR ipad) +" state */ - /* pre->hashed_key_xor_opad contains unclosed "H((key XOR opad) +" state */ - - /* calculate out = H((key XOR ipad) + text) */ - while ((text = va_arg(va, uint8_t*)) != NULL) { - unsigned text_size = va_arg(va, unsigned); - md5sha_hash(&pre->hashed_key_xor_ipad, text, text_size); - } - len = sha_end(&pre->hashed_key_xor_ipad, out); - - /* out = H((key XOR opad) + out) */ - md5sha_hash(&pre->hashed_key_xor_opad, out, len); - return sha_end(&pre->hashed_key_xor_opad, out); -} - -static unsigned hmac_sha_precomputed(hmac_precomputed_t *pre_init, uint8_t *out, ...) -{ - hmac_precomputed_t pre; - va_list va; - unsigned len; - - va_start(va, out); - pre = *pre_init; /* struct copy */ - len = hmac_sha_precomputed_v(&pre, out, va); - va_end(va); - return len; -} - -#if !ENABLE_FEATURE_TLS_SHA1 -#define hmac(tls,out,key,key_size,...) \ - hmac(out,key,key_size, __VA_ARGS__) -#endif -static unsigned hmac(tls_state_t *tls, uint8_t *out, uint8_t *key, unsigned key_size, ...) -{ - hmac_precomputed_t pre; - va_list va; - unsigned len; - - va_start(va, key_size); - - hmac_begin(&pre, key, key_size, - (ENABLE_FEATURE_TLS_SHA1 && tls->MAC_size == SHA1_OUTSIZE) - ? sha1_begin - : sha256_begin - ); - len = hmac_sha_precomputed_v(&pre, out, va); - - va_end(va); - return len; -} - // RFC 5246: // 5. HMAC and the Pseudorandom Function //... @@ -559,7 +435,7 @@ static void prf_hmac_sha256(/*tls_state_t *tls,*/ const char *label, uint8_t *seed, unsigned seed_size) { - hmac_precomputed_t pre; + hmac_ctx_t ctx; uint8_t a[TLS_MAX_MAC_SIZE]; uint8_t *out_p = outbuf; unsigned label_size = strlen(label); @@ -569,26 +445,26 @@ static void prf_hmac_sha256(/*tls_state_t *tls,*/ #define SEED label, label_size, seed, seed_size #define A a, MAC_size - hmac_begin(&pre, secret, secret_size, sha256_begin); + hmac_begin(&ctx, secret, secret_size, sha256_begin); /* A(1) = HMAC_hash(secret, seed) */ - hmac_sha_precomputed(&pre, a, SEED, NULL); + hmac_peek_hash(&ctx, a, SEED, NULL); for (;;) { /* HMAC_hash(secret, A(1) + seed) */ if (outbuf_size <= MAC_size) { /* Last, possibly incomplete, block */ /* (use a[] as temp buffer) */ - hmac_sha_precomputed(&pre, a, A, SEED, NULL); + hmac_peek_hash(&ctx, a, A, SEED, NULL); memcpy(out_p, a, outbuf_size); return; } /* Not last block. Store directly to result buffer */ - hmac_sha_precomputed(&pre, out_p, A, SEED, NULL); + hmac_peek_hash(&ctx, out_p, A, SEED, NULL); out_p += MAC_size; outbuf_size -= MAC_size; /* A(2) = HMAC_hash(secret, A(1)) */ - hmac_sha_precomputed(&pre, a, A, NULL); + hmac_peek_hash(&ctx, a, A, NULL); } #undef A #undef SECRET @@ -655,6 +531,29 @@ static void *tls_get_zeroed_outbuf(tls_state_t *tls, int len) return record; } +/* Calculate the HMAC over the list of blocks */ +#if !ENABLE_FEATURE_TLS_SHA1 +#define hmac_block(tls,out,key,key_size,...) \ + hmac_block(out,key,key_size, __VA_ARGS__) +#endif +static unsigned hmac_block(tls_state_t *tls, uint8_t *out, uint8_t *key, unsigned key_size, ...) +{ + hmac_ctx_t ctx; + va_list va; + + hmac_begin(&ctx, key, key_size, + (ENABLE_FEATURE_TLS_SHA1 && tls->MAC_size == SHA1_OUTSIZE) + ? sha1_begin + : sha256_begin + ); + + va_start(va, key_size); + hmac_hash_v(&ctx, va); + va_end(va); + + return hmac_end(&ctx, out); +} + static void xwrite_encrypted_and_hmac_signed(tls_state_t *tls, unsigned size, unsigned type) { uint8_t *buf = tls->outbuf + OUTBUF_PFX; @@ -676,7 +575,7 @@ static void xwrite_encrypted_and_hmac_signed(tls_state_t *tls, unsigned size, un xhdr->len16_lo = size & 0xff; /* Calculate MAC signature */ - hmac(tls, buf + size, /* result */ + hmac_block(tls, buf + size, /* result */ tls->client_write_MAC_key, TLS_MAC_SIZE(tls), &tls->write_seq64_be, sizeof(tls->write_seq64_be), xhdr, RECHDR_LEN, -- cgit v1.2.3-55-g6feb From c11730490ad68737120d569b9760e2c35e28977e Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Mon, 7 Jul 2025 08:21:44 +0200 Subject: libbb/yescrypt: remove redundant SHA256 HMAC implementation function old new delta hmac_blocks - 88 +88 static.PBKDF2_SHA256 176 213 +37 yescrypt_kdf32_body 1046 1052 +6 static.smix 759 762 +3 hmac_block 88 64 -24 HMAC_SHA256_Final 53 - -53 HMAC_SHA256_Buf 58 - -58 HMAC_SHA256_Init 159 - -159 ------------------------------------------------------------------------------ (add/remove: 1/3 grow/shrink: 3/1 up/down: 134/-294) Total: -160 bytes Signed-off-by: Denys Vlasenko --- include/libbb.h | 10 ++++- libbb/hash_hmac.c | 10 ++++- libbb/yescrypt/alg-sha256.c | 94 ++++----------------------------------- libbb/yescrypt/alg-yescrypt-kdf.c | 24 +++++++--- networking/tls.c | 8 ++-- 5 files changed, 48 insertions(+), 98 deletions(-) (limited to 'include/libbb.h') diff --git a/include/libbb.h b/include/libbb.h index 3f60acaa0..cbf723f7e 100644 --- a/include/libbb.h +++ b/include/libbb.h @@ -2253,12 +2253,20 @@ typedef void md5sha_begin_func(md5sha_ctx_t *ctx) FAST_FUNC; #define hmac_begin(ctx,key,key_size,begin) \ hmac_begin(ctx,key,key_size) #endif -void FAST_FUNC hmac_begin(hmac_ctx_t *ctx, uint8_t *key, unsigned key_size, md5sha_begin_func *begin); +void FAST_FUNC hmac_begin(hmac_ctx_t *ctx, const uint8_t *key, unsigned key_size, md5sha_begin_func *begin); static ALWAYS_INLINE void hmac_hash(hmac_ctx_t *ctx, const void *in, size_t len) { md5sha_hash(&ctx->hashed_key_xor_ipad, in, len); } unsigned FAST_FUNC hmac_end(hmac_ctx_t *ctx, uint8_t *out); +#if HMAC_ONLY_SHA256 +#define hmac_block(key,key_size,begin,in,sz,out) \ + hmac_block(key,key_size,in,sz,out) +#endif +unsigned FAST_FUNC hmac_block(const uint8_t *key, unsigned key_size, + md5sha_begin_func *begin, + const void *in, unsigned sz, + uint8_t *out); /* HMAC helpers for TLS: */ void FAST_FUNC hmac_hash_v(hmac_ctx_t *ctx, va_list va); unsigned FAST_FUNC hmac_peek_hash(hmac_ctx_t *ctx, uint8_t *out, ...); diff --git a/libbb/hash_hmac.c b/libbb/hash_hmac.c index 8cf936949..9e48e0f51 100644 --- a/libbb/hash_hmac.c +++ b/libbb/hash_hmac.c @@ -18,7 +18,7 @@ // if we often need HMAC hmac with the same key. // // text is often given in disjoint pieces. -void FAST_FUNC hmac_begin(hmac_ctx_t *ctx, uint8_t *key, unsigned key_size, md5sha_begin_func *begin) +void FAST_FUNC hmac_begin(hmac_ctx_t *ctx, const uint8_t *key, unsigned key_size, md5sha_begin_func *begin) { #if HMAC_ONLY_SHA256 #define begin sha256_begin @@ -64,6 +64,14 @@ unsigned FAST_FUNC hmac_end(hmac_ctx_t *ctx, uint8_t *out) return sha_end(&ctx->hashed_key_xor_opad, out); } +unsigned FAST_FUNC hmac_block(const uint8_t *key, unsigned key_size, md5sha_begin_func *begin, const void *in, unsigned sz, uint8_t *out) +{ + hmac_ctx_t ctx; + hmac_begin(&ctx, key, key_size, begin); + hmac_hash(&ctx, in, sz); + return hmac_end(&ctx, out); +} + /* TLS helpers */ void FAST_FUNC hmac_hash_v( diff --git a/libbb/yescrypt/alg-sha256.c b/libbb/yescrypt/alg-sha256.c index f56b905ad..1ccffa1e5 100644 --- a/libbb/yescrypt/alg-sha256.c +++ b/libbb/yescrypt/alg-sha256.c @@ -25,82 +25,6 @@ * SUCH DAMAGE. */ -/** - * HMAC_SHA256_Init(ctx, K, Klen): - * Initialize the HMAC-SHA256 context ${ctx} with ${Klen} bytes of key from - * ${K}. - */ -static void -HMAC_SHA256_Init(HMAC_SHA256_CTX *ctx, const void *_K, size_t Klen) -{ - uint8_t pad[64]; - uint8_t khash[32]; - const uint8_t *K = _K; - size_t i; - - /* If Klen > 64, the key is really SHA256(K). */ - if (Klen > 64) { - sha256_block(K, Klen, khash); - K = khash; - Klen = 32; - } - - /* Inner SHA256 operation is SHA256(K xor [block of 0x36] || data). */ - sha256_begin(&ctx->ictx); - memset(pad, 0x36, 64); - for (i = 0; i < Klen; i++) - pad[i] ^= K[i]; - sha256_hash(&ctx->ictx, pad, 64); - - /* Outer SHA256 operation is SHA256(K xor [block of 0x5c] || hash). */ - sha256_begin(&ctx->octx); - memset(pad, 0x5c, 64); - for (i = 0; i < Klen; i++) - pad[i] ^= K[i]; - sha256_hash(&ctx->octx, pad, 64); -} - -/** - * HMAC_SHA256_Update(ctx, in, len): - * Input ${len} bytes from ${in} into the HMAC-SHA256 context ${ctx}. - */ -static void -HMAC_SHA256_Update(HMAC_SHA256_CTX *ctx, const void *in, size_t len) -{ - /* Feed data to the inner SHA256 operation. */ - sha256_hash(&ctx->ictx, in, len); -} - -/** - * HMAC_SHA256_Final(ctx, digest): - * Output the HMAC-SHA256 of the data input to the context ${ctx} into the - * buffer ${digest}. - */ -static void -HMAC_SHA256_Final(HMAC_SHA256_CTX *ctx, void *digest) -{ - /* Finish the inner SHA256 operation. */ - sha256_end(&ctx->ictx, digest); /* using digest[] as scratch space */ - /* Feed the inner hash to the outer SHA256 operation. */ - sha256_hash(&ctx->octx, digest, 32); /* using digest[] as scratch space */ - /* Finish the outer SHA256 operation. */ - sha256_end(&ctx->octx, digest); -} - -/** - * HMAC_SHA256_Buf(K, Klen, in, len, digest): - * Compute the HMAC-SHA256 of ${len} bytes from ${in} using the key ${K} of - * length ${Klen}, and write the result to ${digest}. - */ -static void -HMAC_SHA256_Buf(const void *K, size_t Klen, const void *in, size_t len, void *digest) -{ - HMAC_SHA256_CTX ctx; - HMAC_SHA256_Init(&ctx, K, Klen); - HMAC_SHA256_Update(&ctx, in, len); - HMAC_SHA256_Final(&ctx, digest); -} - /** * PBKDF2_SHA256(passwd, passwdlen, salt, saltlen, c, buf, dkLen): * Compute PBKDF2(passwd, salt, c, dkLen) using HMAC-SHA256 as the PRF, and @@ -111,15 +35,15 @@ PBKDF2_SHA256(const uint8_t *passwd, size_t passwdlen, const uint8_t *salt, size_t saltlen, uint64_t c, uint8_t *buf, size_t dkLen) { - HMAC_SHA256_CTX Phctx, PShctx, hctx; + hmac_ctx_t Phctx, PShctx; size_t i; /* Compute HMAC state after processing P. */ - HMAC_SHA256_Init(&Phctx, passwd, passwdlen); + hmac_begin(&Phctx, passwd, passwdlen, sha256_begin); /* Compute HMAC state after processing P and S. */ - memcpy(&PShctx, &Phctx, sizeof(HMAC_SHA256_CTX)); - HMAC_SHA256_Update(&PShctx, salt, saltlen); + PShctx = Phctx; + hmac_hash(&PShctx, salt, saltlen); /* Iterate through the blocks. */ for (i = 0; dkLen != 0; i++) { @@ -134,18 +58,16 @@ PBKDF2_SHA256(const uint8_t *passwd, size_t passwdlen, ivec = SWAP_BE32((uint32_t)(i + 1)); /* Compute U_1 = PRF(P, S || INT(i)). */ - hctx = PShctx; - HMAC_SHA256_Update(&hctx, &ivec, 4); - HMAC_SHA256_Final(&hctx, T); + hmac_peek_hash(&PShctx, (void*)T, &ivec, 4, NULL); +//TODO: the above is a vararg function, might incur some ABI pain +//does libbb need a non-vararg version with just one (buf,len)? if (c > 1) { /* T_i = U_1 ... */ memcpy(U, T, 32); for (j = 2; j <= c; j++) { /* Compute U_j. */ - hctx = Phctx; - HMAC_SHA256_Update(&hctx, U, 32); - HMAC_SHA256_Final(&hctx, U); + hmac_peek_hash(&Phctx, (void*)U, U, 32, NULL); /* ... xor U_j ... */ for (k = 0; k < 32 / 8; k++) T[k] ^= U[k]; diff --git a/libbb/yescrypt/alg-yescrypt-kdf.c b/libbb/yescrypt/alg-yescrypt-kdf.c index 27ef2caa4..f1f06621e 100644 --- a/libbb/yescrypt/alg-yescrypt-kdf.c +++ b/libbb/yescrypt/alg-yescrypt-kdf.c @@ -735,8 +735,12 @@ static void smix(uint8_t *B, size_t r, uint32_t N, uint32_t p, uint32_t t, ctx_i->S0 = Si + Sbytes / 3 * 2; ctx_i->w = 0; if (i == 0) - HMAC_SHA256_Buf(Bp + (128 * r - 64), 64, - passwd, 32, passwd); + hmac_block( + /* key,len: */ Bp + (128 * r - 64), 64, + /* hash fn: */ sha256_begin, + /* in,len: */ passwd, 32, + /* outbuf: */ passwd + ); } smix1(Bp, r, Np, flags, Vp, NROM, VROM, XYp, ctx_i); smix2(Bp, r, p2floor(Np), Nloop_rw, flags, Vp, @@ -907,9 +911,12 @@ static int yescrypt_kdf32_body( S = (uint8_t *)XY + XY_size; if (flags) { - HMAC_SHA256_Buf("yescrypt-prehash", - (flags & YESCRYPT_PREHASH) ? 16 : 8, - passwd, passwdlen, sha256); + hmac_block( + /* key,len: */ (const void*)"yescrypt-prehash", (flags & YESCRYPT_PREHASH) ? 16 : 8, + /* hash fn: */ sha256_begin, + /* in,len: */ passwd, passwdlen, + /* outbuf: */ sha256 + ); passwd = sha256; passwdlen = sizeof(sha256); } @@ -946,7 +953,12 @@ static int yescrypt_kdf32_body( */ if (flags && !(flags & YESCRYPT_PREHASH)) { /* Compute ClientKey */ - HMAC_SHA256_Buf(dkp, sizeof(dk), "Client Key", 10, sha256); + hmac_block( + /* key,len: */ dkp, sizeof(dk), + /* hash fn: */ sha256_begin, + /* in,len: */ "Client Key", 10, + /* outbuf: */ sha256 + ); /* Compute StoredKey */ { size_t clen = /*buflen:*/32; diff --git a/networking/tls.c b/networking/tls.c index b8caf1e4b..098cf7cac 100644 --- a/networking/tls.c +++ b/networking/tls.c @@ -533,10 +533,10 @@ static void *tls_get_zeroed_outbuf(tls_state_t *tls, int len) /* Calculate the HMAC over the list of blocks */ #if !ENABLE_FEATURE_TLS_SHA1 -#define hmac_block(tls,out,key,key_size,...) \ - hmac_block(out,key,key_size, __VA_ARGS__) +#define hmac_blocks(tls,out,key,key_size,...) \ + hmac_blocks(out,key,key_size, __VA_ARGS__) #endif -static unsigned hmac_block(tls_state_t *tls, uint8_t *out, uint8_t *key, unsigned key_size, ...) +static unsigned hmac_blocks(tls_state_t *tls, uint8_t *out, uint8_t *key, unsigned key_size, ...) { hmac_ctx_t ctx; va_list va; @@ -575,7 +575,7 @@ static void xwrite_encrypted_and_hmac_signed(tls_state_t *tls, unsigned size, un xhdr->len16_lo = size & 0xff; /* Calculate MAC signature */ - hmac_block(tls, buf + size, /* result */ + hmac_blocks(tls, buf + size, /* result */ tls->client_write_MAC_key, TLS_MAC_SIZE(tls), &tls->write_seq64_be, sizeof(tls->write_seq64_be), xhdr, RECHDR_LEN, -- cgit v1.2.3-55-g6feb From 75758c73608f3c6be9ea2d338a199a8aa11c51e2 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Mon, 7 Jul 2025 17:08:32 +0200 Subject: libbb/yescrypt: accept longer salts (up to 84 chars) function old new delta cryptpw_main 214 223 +9 chpasswd_main 347 356 +9 passwd_main 931 934 +3 yescrypt_r 1084 1056 -28 ------------------------------------------------------------------------------ (add/remove: 0/0 grow/shrink: 3/1 up/down: 21/-28) Total: -7 bytes Signed-off-by: Denys Vlasenko --- include/libbb.h | 5 ++- libbb/yescrypt/alg-yescrypt-common.c | 76 +++++++++++++++++++++++++++++++++++- libbb/yescrypt/alg-yescrypt.h | 5 +++ testsuite/cryptpw.tests | 4 ++ 4 files changed, 86 insertions(+), 4 deletions(-) (limited to 'include/libbb.h') diff --git a/include/libbb.h b/include/libbb.h index cbf723f7e..544ca3155 100644 --- a/include/libbb.h +++ b/include/libbb.h @@ -1819,9 +1819,10 @@ extern int crypt_make_rand64encoded(char *p, int cnt /*, int rnd*/) FAST_FUNC; * "$6$" * #define MAX_PW_SALT_LEN (3 + 16 + 1) * yescrypt: - * "$y$j9T$" + * "$y$" "$" + * (84 chars are ascii64-encoded 64 binary bytes) */ -#define MAX_PW_SALT_LEN (7 + 24 + 1) +#define MAX_PW_SALT_LEN (3 + 8*6 + 1 + 84 + 1) extern char* crypt_make_pw_salt(char p[MAX_PW_SALT_LEN], const char *algo) FAST_FUNC; /* Returns number of lines changed, or -1 on error */ diff --git a/libbb/yescrypt/alg-yescrypt-common.c b/libbb/yescrypt/alg-yescrypt-common.c index db6e098c7..1e896df64 100644 --- a/libbb/yescrypt/alg-yescrypt-common.c +++ b/libbb/yescrypt/alg-yescrypt-common.c @@ -63,6 +63,75 @@ fail: return NULL; } +#if 1 +static const uint8_t *decode64( + uint8_t *dst, size_t *dstlen, + const uint8_t *src, size_t srclen) +{ + size_t dstpos = 0; + + dbg_dec64("src:'%s' len:%d", src, (int)srclen); + for (;;) { + uint32_t c, value = 0; + int bits = 0; + while (srclen != 0) { + srclen--; + c = a2i64(*src); + if (c > 63) { /* bad ascii64 char, stop decoding at it */ + srclen = 0; + break; + } + src++; + value |= c << bits; + bits += 6; + if (bits == 24) /* got 4 chars */ + goto store; + } + /* we read entire src, or met a non-ascii64 char (such as "$") */ + if (bits == 0) + break; + /* else: we got last, partial bit block - store it */ + store: + dbg_dec64(" storing bits:%d v:%08x", bits, (int)SWAP_BE32(value)); //BE to see lsb first + while (dstpos < *dstlen) { + if (srclen == 0 && value == 0) { + /* Example: mkpasswd PWD '$y$j9T$123': + * the "123" is bits:18 value:03,51,00 + * is considered to be 2 bytes, not 3! + * + * '$y$j9T$zzz' in upstream fails outright (3rd byte isn't zero). + * IOW: for upstream, validity of salt depends on VALUE, + * not just size of salt. Which is a bug. + * The '$y$j9T$zzz.' salt is the same + * (it adds 6 zero msbits) but upstream works with it, + * thus '$y$j9T$zzz' should work too and give the same result. + */ + goto end; + } + dstpos++; + *dst++ = value; + value >>= 8; + bits -= 8; + if (bits <= 0) /* can get negative, if we e.g. had 6 bits */ + goto next; + } + dbg_dec64(" ERR: bits:%d dst[] is too small", bits); + goto fail; + next: + if (srclen == 0) + break; + } + end: + /* here, srclen is 0, no need to check */ + *dstlen = dstpos; + dbg_dec64("dec64: OK, dst[%d]", (int)dstpos); + return src; +fail: + *dstlen = 0; + return NULL; +} +#else +/* Buggy (and larger) original code */ static const uint8_t *decode64( uint8_t *dst, size_t *dstlen, const uint8_t *src, size_t srclen) @@ -87,6 +156,7 @@ static const uint8_t *decode64( break; if (bits < 12) /* must have at least one full byte */ goto fail; + dbg_dec64(" storing bits:%d v:%08x", (int)bits, (int)SWAP_BE32(value)); //BE to see lsb first while (dstpos++ < *dstlen) { *dst++ = value; value >>= 8; @@ -104,12 +174,14 @@ static const uint8_t *decode64( if (!srclen && dstpos <= *dstlen) { *dstlen = dstpos; + dbg_dec64("dec64: OK, dst[%d]", (int)dstpos); return src; } fail: - *dstlen = 0; + /* *dstlen = 0; - not needed, caller detects error by seeing NULL */ return NULL; } +#endif static char *encode64( char *dst, size_t dstlen, @@ -189,7 +261,7 @@ char *yescrypt_r( goto fail; if (*src != '$') { src = decode64_uint32(&u32, src, 1); - dbg("yescrypt has extended params:0x%x", (unsigned)have); + dbg("yescrypt has extended params:0x%x", (unsigned)u32); if (u32 & 1) src = decode64_uint32(&yctx->param.p, src, 2); if (u32 & 2) diff --git a/libbb/yescrypt/alg-yescrypt.h b/libbb/yescrypt/alg-yescrypt.h index 97475d89f..4554e3de3 100644 --- a/libbb/yescrypt/alg-yescrypt.h +++ b/libbb/yescrypt/alg-yescrypt.h @@ -33,6 +33,11 @@ # else # define dbg(...) bb_error_msg(__VA_ARGS__) # endif +# if 1 +# define dbg_dec64(...) ((void)0) +# else +# define dbg_dec64(...) bb_error_msg(__VA_ARGS__) +# endif #endif /** diff --git a/testsuite/cryptpw.tests b/testsuite/cryptpw.tests index 739fb4e9f..ef04e20d7 100755 --- a/testsuite/cryptpw.tests +++ b/testsuite/cryptpw.tests @@ -73,6 +73,10 @@ testing 'cryptpw yescrypt with 3-char salt' \ 'cryptpw -m yescrypt qweRTY123@-+ j9T\$123' \ '$y$j9T$123$A34DMIGUbUIo3bjx66Wtk2IFoREMIw6d49it25KQh2D\n' \ '' '' +testing 'cryptpw yescrypt with 84-char salt (max size)' \ + 'cryptpw -m yescrypt qweRTY123@-+ j9T\$123456789012345678901234567890123456789012345678901234567890123456789012345678901234' \ + '$y$j9T$123456789012345678901234567890123456789012345678901234567890123456789012345678901234$ubrUuPCpI97LIMlVMt/A0Mhs/kBK2UBJYcQSxEZSlz4\n' \ + '' '' testing 'cryptpw yescrypt implicit' \ 'cryptpw qweRTY123@-+ \$y\$j9T\$123456789012345678901234' \ '$y$j9T$123456789012345678901234$AKxw5OX/T4jD.v./IW.5tE/j7izNjw06fg3OvH1LsN9\n' \ -- cgit v1.2.3-55-g6feb From c305c81c94a086fb09444b1ea6f31fb911c25ec0 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Wed, 9 Jul 2025 06:51:04 +0200 Subject: libbb: introduce and use block-XOR functions On x86_64, they can be done in 16-byte blocks 64-bit: function old new delta xorbuf_3 - 84 +84 xorbuf64_3_aligned64 - 58 +58 smix1 687 712 +25 xwrite_encrypted 520 534 +14 xorbuf16_aligned_long - 13 +13 tls_xread_record 733 742 +9 xorbuf 21 13 -8 xorbuf_aligned_AES_BLOCK_SIZE 15 - -15 blockmix 814 762 -52 blockmix_salsa8 317 198 -119 blockmix_xor_save 1620 1499 -121 blockmix_xor 1543 1322 -221 ------------------------------------------------------------------------------ (add/remove: 4/1 grow/shrink: 3/5 up/down: 203/-536) Total: -333 bytes 32-bit: function old new delta xorbuf_3 - 76 +76 xorbuf64_3_aligned64 - 36 +36 xorbuf16_aligned_long - 23 +23 xwrite_encrypted 499 507 +8 tls_xread_record 646 650 +4 xorbuf 22 11 -11 xorbuf_aligned_AES_BLOCK_SIZE 23 - -23 blockmix 1083 938 -145 blockmix_salsa8 415 210 -205 blockmix_salsa8_xor 601 163 -438 blockmix_xor 2103 1533 -570 blockmix_xor_save 2614 1859 -755 ------------------------------------------------------------------------------ (add/remove: 4/1 grow/shrink: 2/6 up/down: 147/-2147) Total: -2000 bytes Signed-off-by: Denys Vlasenko --- include/libbb.h | 10 ++++ libbb/bitops.c | 108 ++++++++++++++++++++++++++++++++++++++ libbb/yescrypt/alg-sha256.c | 1 + libbb/yescrypt/alg-yescrypt-kdf.c | 7 +++ networking/tls.c | 39 +++----------- networking/tls.h | 5 +- networking/tls_aesgcm.c | 5 +- 7 files changed, 137 insertions(+), 38 deletions(-) create mode 100644 libbb/bitops.c (limited to 'include/libbb.h') diff --git a/include/libbb.h b/include/libbb.h index 544ca3155..79427fb31 100644 --- a/include/libbb.h +++ b/include/libbb.h @@ -1113,6 +1113,16 @@ char *bin2hex(char *dst, const char *src, int count) FAST_FUNC; /* Reverse */ char* hex2bin(char *dst, const char *src, int count) FAST_FUNC; +void FAST_FUNC xorbuf_3(void *dst, const void *src1, const void *src2, unsigned count); +void FAST_FUNC xorbuf(void* buf, const void* mask, unsigned count); +void FAST_FUNC xorbuf16_aligned_long(void* buf, const void* mask); +void FAST_FUNC xorbuf64_3_aligned64(void *dst, const void *src1, const void *src2); +#if BB_UNALIGNED_MEMACCESS_OK +# define xorbuf16(buf,mask) xorbuf16_aligned_long(buf,mask) +#else +void FAST_FUNC xorbuf16(void* buf, const void* mask); +#endif + /* Generate a UUID */ void generate_uuid(uint8_t *buf) FAST_FUNC; diff --git a/libbb/bitops.c b/libbb/bitops.c new file mode 100644 index 000000000..5f239676c --- /dev/null +++ b/libbb/bitops.c @@ -0,0 +1,108 @@ +/* + * Utility routines. + * + * Copyright (C) 2025 by Denys Vlasenko + * + * Licensed under GPLv2, see file LICENSE in this source tree. + */ +//kbuild:lib-y += bitops.o + +#include "libbb.h" + +void FAST_FUNC xorbuf_3(void *dst, const void *src1, const void *src2, unsigned count) +{ + uint8_t *d = dst; + const uint8_t *s1 = src1; + const uint8_t *s2 = src2; +#if BB_UNALIGNED_MEMACCESS_OK + while (count >= sizeof(long)) { + *(long*)d = *(long*)s1 ^ *(long*)s2; + count -= sizeof(long); + d += sizeof(long); + s1 += sizeof(long); + s2 += sizeof(long); + } +#endif + while (count--) + *d++ = *s1++ ^ *s2++; +} + +void FAST_FUNC xorbuf(void *dst, const void *src, unsigned count) +{ + xorbuf_3(dst, dst, src, count); +} + +void FAST_FUNC xorbuf16_aligned_long(void *dst, const void *src) +{ +#if defined(__SSE__) /* any x86_64 has it */ + asm volatile( +"\n movups (%0),%%xmm0" +"\n movups (%1),%%xmm1" // can't just xorps(%1),%%xmm0: +"\n xorps %%xmm1,%%xmm0" // SSE requires 16-byte alignment +"\n movups %%xmm0,(%0)" +"\n" + : "=r" (dst), "=r" (src) + : "0" (dst), "1" (src) + : "xmm0", "xmm1", "memory" + ); +#else + unsigned long *d = dst; + const unsigned long *s = src; + d[0] ^= s[0]; +# if LONG_MAX <= 0x7fffffffffffffff + d[1] ^= s[1]; +# if LONG_MAX == 0x7fffffff + d[2] ^= s[2]; + d[3] ^= s[3]; +# endif +# endif +#endif +} + +void FAST_FUNC xorbuf64_3_aligned64(void *dst, const void *src1, const void *src2) +{ +#if defined(__SSE__) /* any x86_64 has it */ + asm volatile( +"\n movups 0*16(%1),%%xmm0" +"\n movups 0*16(%2),%%xmm1" // can't just xorps(%2),%%xmm0: +"\n xorps %%xmm1,%%xmm0" // SSE requires 16-byte alignment, we have only 8-byte +"\n movups %%xmm0,0*16(%0)" +"\n movups 1*16(%1),%%xmm0" +"\n movups 1*16(%2),%%xmm1" +"\n xorps %%xmm1,%%xmm0" +"\n movups %%xmm0,1*16(%0)" +"\n movups 2*16(%1),%%xmm0" +"\n movups 2*16(%2),%%xmm1" +"\n xorps %%xmm1,%%xmm0" +"\n movups %%xmm0,2*16(%0)" +"\n movups 3*16(%1),%%xmm0" +"\n movups 3*16(%2),%%xmm1" +"\n xorps %%xmm1,%%xmm0" +"\n movups %%xmm0,3*16(%0)" +"\n" + : "=r" (dst), "=r" (src1), "=r" (src2) + : "0" (dst), "1" (src1), "2" (src2) + : "xmm0", "xmm1", "memory" + ); +#else + long *d = dst; + const long *s1 = src1; + const long *s2 = src2; + unsigned count = 64 / sizeof(long); + do { + *d++ = *s1++ ^ *s2++; + } while (--count != 0); +#endif +} + +#if !BB_UNALIGNED_MEMACCESS_OK +void FAST_FUNC xorbuf16(void *dst, const void *src) +{ +#define p_aligned(a) (((uintptr_t)(a) & (sizeof(long)-1)) == 0) + if (p_aligned(src) && p_aligned(dst)) { + xorbuf16_aligned_long(dst, src); + return; + } + xorbuf_3(dst, dst, src, 16); +} +#endif diff --git a/libbb/yescrypt/alg-sha256.c b/libbb/yescrypt/alg-sha256.c index 25446406b..20e8d1ee4 100644 --- a/libbb/yescrypt/alg-sha256.c +++ b/libbb/yescrypt/alg-sha256.c @@ -72,6 +72,7 @@ PBKDF2_SHA256(const uint8_t *passwd, size_t passwdlen, /* ... xor U_j ... */ for (k = 0; k < 32 / 8; k++) T[k] ^= U[k]; + //TODO: xorbuf32_aligned_long(T, U); } } diff --git a/libbb/yescrypt/alg-yescrypt-kdf.c b/libbb/yescrypt/alg-yescrypt-kdf.c index f421db111..112862ec9 100644 --- a/libbb/yescrypt/alg-yescrypt-kdf.c +++ b/libbb/yescrypt/alg-yescrypt-kdf.c @@ -180,6 +180,7 @@ static void salsa20(salsa20_blk_t *restrict B, #define SALSA20_2(out) \ salsa20(&X, &out, 1) +#if 0 #define XOR(out, in1, in2) \ do { \ (out).d[0] = (in1).d[0] ^ (in2).d[0]; \ @@ -191,6 +192,12 @@ do { \ (out).d[6] = (in1).d[6] ^ (in2).d[6]; \ (out).d[7] = (in1).d[7] ^ (in2).d[7]; \ } while (0) +#else +#define XOR(out, in1, in2) \ +do { \ + xorbuf64_3_aligned64(&(out).d, &(in1).d, &(in2).d); \ +} while (0) +#endif #define XOR_X(in) XOR(X, X, in) #define XOR_X_2(in1, in2) XOR(X, in1, in2) diff --git a/networking/tls.c b/networking/tls.c index 098cf7cac..ac6f0767f 100644 --- a/networking/tls.c +++ b/networking/tls.c @@ -333,34 +333,6 @@ void FAST_FUNC tls_get_random(void *buf, unsigned len) xfunc_die(); } -static void xorbuf3(void *dst, const void *src1, const void *src2, unsigned count) -{ - uint8_t *d = dst; - const uint8_t *s1 = src1; - const uint8_t* s2 = src2; - while (count--) - *d++ = *s1++ ^ *s2++; -} - -void FAST_FUNC xorbuf(void *dst, const void *src, unsigned count) -{ - xorbuf3(dst, dst, src, count); -} - -void FAST_FUNC xorbuf_aligned_AES_BLOCK_SIZE(void *dst, const void *src) -{ - unsigned long *d = dst; - const unsigned long *s = src; - d[0] ^= s[0]; -#if ULONG_MAX <= 0xffffffffffffffff - d[1] ^= s[1]; - #if ULONG_MAX == 0xffffffff - d[2] ^= s[2]; - d[3] ^= s[3]; - #endif -#endif -} - #if !TLS_DEBUG_HASH # define hash_handshake(tls, fmt, buffer, len) \ hash_handshake(tls, buffer, len) @@ -764,8 +736,13 @@ static void xwrite_encrypted_aesgcm(tls_state_t *tls, unsigned size, unsigned ty cnt++; COUNTER(nonce) = htonl(cnt); /* yes, first cnt here is 2 (!) */ aes_encrypt_one_block(&tls->aes_encrypt, nonce, scratch); - n = remaining > AES_BLOCK_SIZE ? AES_BLOCK_SIZE : remaining; - xorbuf(buf, scratch, n); + if (remaining >= AES_BLOCK_SIZE) { + n = AES_BLOCK_SIZE; + xorbuf_AES_BLOCK_SIZE(buf, scratch); + } else { + n = remaining; + xorbuf(buf, scratch, n); + } buf += n; remaining -= n; } @@ -923,7 +900,7 @@ static void tls_aesgcm_decrypt(tls_state_t *tls, uint8_t *buf, int size) COUNTER(nonce) = htonl(cnt); /* yes, first cnt here is 2 (!) */ aes_encrypt_one_block(&tls->aes_decrypt, nonce, scratch); n = remaining > AES_BLOCK_SIZE ? AES_BLOCK_SIZE : remaining; - xorbuf3(buf, scratch, buf + 8, n); + xorbuf_3(buf, scratch, buf + 8, n); buf += n; remaining -= n; } diff --git a/networking/tls.h b/networking/tls.h index 0173b87b2..9751d30ff 100644 --- a/networking/tls.h +++ b/networking/tls.h @@ -82,10 +82,9 @@ typedef int16_t int16; void tls_get_random(void *buf, unsigned len) FAST_FUNC; -void xorbuf(void* buf, const void* mask, unsigned count) FAST_FUNC; - #define ALIGNED_long ALIGNED(sizeof(long)) -void xorbuf_aligned_AES_BLOCK_SIZE(void* buf, const void* mask) FAST_FUNC; +#define xorbuf_aligned_AES_BLOCK_SIZE(dst,src) xorbuf16_aligned_long(dst,src) +#define xorbuf_AES_BLOCK_SIZE(dst,src) xorbuf16(dst,src) #define matrixCryptoGetPrngData(buf, len, userPtr) (tls_get_random(buf, len), PS_SUCCESS) diff --git a/networking/tls_aesgcm.c b/networking/tls_aesgcm.c index 5ddcdd2ad..9c2381a57 100644 --- a/networking/tls_aesgcm.c +++ b/networking/tls_aesgcm.c @@ -167,10 +167,7 @@ void FAST_FUNC aesgcm_GHASH(byte* h, blocks = cSz / AES_BLOCK_SIZE; partial = cSz % AES_BLOCK_SIZE; while (blocks--) { - if (BB_UNALIGNED_MEMACCESS_OK) // c is not guaranteed to be aligned - xorbuf_aligned_AES_BLOCK_SIZE(x, c); - else - xorbuf(x, c, AES_BLOCK_SIZE); + xorbuf_AES_BLOCK_SIZE(x, c); GMULT(x, h); c += AES_BLOCK_SIZE; } -- cgit v1.2.3-55-g6feb From dde90f1d4746a2efef20fc227cd38bbcb58d546c Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Thu, 17 Jul 2025 17:34:14 +0200 Subject: libbb.h: increase MAX_PW_SALT_LEN Signed-off-by: Denys Vlasenko --- include/libbb.h | 12 ++++++------ loginutils/cryptpw.c | 3 +-- 2 files changed, 7 insertions(+), 8 deletions(-) (limited to 'include/libbb.h') diff --git a/include/libbb.h b/include/libbb.h index 79427fb31..4f44680aa 100644 --- a/include/libbb.h +++ b/include/libbb.h @@ -1825,14 +1825,14 @@ extern int obscure(const char *old, const char *newval, const struct passwd *pwd extern int crypt_make_rand64encoded(char *p, int cnt /*, int rnd*/) FAST_FUNC; /* Size of char salt[] to hold randomly-generated salt string * sha256/512: - * "$5$" - * "$6$" - * #define MAX_PW_SALT_LEN (3 + 16 + 1) + * "$5$" ["rounds=999999999$"] "" + * "$6$" ["rounds=999999999$"] "" + * #define MAX_PW_SALT_LEN (3 + sizeof("rounds=999999999$")-1 + 16 + 1) * yescrypt: - * "$y$" "$" - * (84 chars are ascii64-encoded 64 binary bytes) + * "$y$" "$" + * (86 chars are ascii64-encoded 64 binary bytes) */ -#define MAX_PW_SALT_LEN (3 + 8*6 + 1 + 84 + 1) +#define MAX_PW_SALT_LEN (3 + 8*6 + 1 + 86 + 1) extern char* crypt_make_pw_salt(char p[MAX_PW_SALT_LEN], const char *algo) FAST_FUNC; /* Returns number of lines changed, or -1 on error */ diff --git a/loginutils/cryptpw.c b/loginutils/cryptpw.c index 1c338540f..c0f6280cd 100644 --- a/loginutils/cryptpw.c +++ b/loginutils/cryptpw.c @@ -84,8 +84,7 @@ to cryptpw. -a option (alias for -m) came from cryptpw. int cryptpw_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE; int cryptpw_main(int argc UNUSED_PARAM, char **argv) { - /* Supports: cryptpw -m sha256 PASS 'rounds=999999999$SALT' */ - char salt[MAX_PW_SALT_LEN + sizeof("rounds=999999999$")]; + char salt[MAX_PW_SALT_LEN]; char *salt_ptr; char *password; const char *opt_m, *opt_S; -- cgit v1.2.3-55-g6feb