From 80e676664e1d7a0b07f14bff44f93d1fef709cf4 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Sun, 6 Jul 2025 00:44:19 +0200 Subject: libbb: add yescrypt password hashing support It seems to work, but not at all optimized for size. The extra copy of sha256 code need to be removed. The yescrypt code in libbb/yescrypt/* is adapted from libxcrypt-4.4.38 with minimal edits, hopefully making it easier to track backports by resetting the tree to this commit, then comparing changes in upstream libxcrypt to the tree. function old new delta blockmix_xor_save - 7050 +7050 static.blockmix_xor - 6475 +6475 blockmix - 3390 +3390 SHA256_Transform - 3083 +3083 yescrypt_kdf_body - 1724 +1724 PBKDF2_SHA256 - 1003 +1003 smix1 - 960 +960 yescrypt_r - 890 +890 salsa20 - 804 +804 smix - 790 +790 smix2 - 659 +659 blockmix_salsa8_xor - 601 +601 yescrypt_kdf - 479 +479 blockmix_salsa8 - 415 +415 Krnd - 256 +256 _HMAC_SHA256_Init - 213 +213 _SHA256_Update - 198 +198 _SHA256_Final - 195 +195 decode64_uint32 - 166 +166 encode64 - 153 +153 decode64 - 136 +136 libcperciva_HMAC_SHA256_Buf - 132 +132 SHA256_Pad_Almost - 131 +131 salsa20_simd_unshuffle - 101 +101 salsa20_simd_shuffle - 101 +101 yes_crypt - 90 +90 libcperciva_SHA256_Buf - 86 +86 crypt_make_rand64encoded - 85 +85 static.atoi64_partial - 77 +77 alloc_region - 72 +72 ascii64 - 65 +65 PAD - 64 +64 _HMAC_SHA256_Final - 55 +55 static.cpu_to_be32_vect - 51 +51 free_region - 47 +47 libcperciva_SHA256_Init - 37 +37 yescrypt_init_local - 34 +34 crypt_make_pw_salt 92 125 +33 initial_state - 32 +32 .rodata 105771 105803 +32 atoi64 - 25 +25 explicit_bzero - 22 +22 pw_encrypt 920 941 +21 yescrypt_free_local - 9 +9 crypt_make_salt 85 - -85 ------------------------------------------------------------------------------ (add/remove: 43/1 grow/shrink: 3/0 up/down: 31042/-85) Total: 30957 bytes Signed-off-by: Denys Vlasenko --- libbb/pw_encrypt.c | 53 +- libbb/yescrypt/Kbuild.src | 9 + libbb/yescrypt/README | 4 + libbb/yescrypt/alg-sha256.c | 608 ++++++++++++++++++ libbb/yescrypt/alg-sha256.h | 113 ++++ libbb/yescrypt/alg-yescrypt-common.c | 714 +++++++++++++++++++++ libbb/yescrypt/alg-yescrypt-kdf.c | 1070 ++++++++++++++++++++++++++++++++ libbb/yescrypt/alg-yescrypt-platform.c | 65 ++ libbb/yescrypt/alg-yescrypt.h | 355 +++++++++++ libbb/yescrypt/y.c | 139 +++++ 10 files changed, 3126 insertions(+), 4 deletions(-) create mode 100644 libbb/yescrypt/Kbuild.src create mode 100644 libbb/yescrypt/README create mode 100644 libbb/yescrypt/alg-sha256.c create mode 100644 libbb/yescrypt/alg-sha256.h create mode 100644 libbb/yescrypt/alg-yescrypt-common.c create mode 100644 libbb/yescrypt/alg-yescrypt-kdf.c create mode 100644 libbb/yescrypt/alg-yescrypt-platform.c create mode 100644 libbb/yescrypt/alg-yescrypt.h create mode 100644 libbb/yescrypt/y.c (limited to 'libbb') diff --git a/libbb/pw_encrypt.c b/libbb/pw_encrypt.c index 3463fd95b..1d530974e 100644 --- a/libbb/pw_encrypt.c +++ b/libbb/pw_encrypt.c @@ -31,7 +31,7 @@ static int i64c(int i) return ('a' - 38 + i); } -int FAST_FUNC crypt_make_salt(char *p, int cnt /*, int x */) +int FAST_FUNC crypt_make_rand64encoded(char *p, int cnt /*, int x */) { /* was: x += ... */ unsigned x = getpid() + monotonic_us(); @@ -68,11 +68,49 @@ char* FAST_FUNC crypt_make_pw_salt(char salt[MAX_PW_SALT_LEN], const char *algo) #if !ENABLE_USE_BB_CRYPT || ENABLE_USE_BB_CRYPT_SHA if ((algo[0]|0x20) == 's') { /* sha */ salt[1] = '5' + (strcasecmp(algo, "sha512") == 0); - len = 16/2; + len = 16 / 2; + } +#endif +#if !ENABLE_USE_BB_CRYPT || ENABLE_USE_BB_CRYPT_YES + if ((algo[0]|0x20) == 'y') { /* yescrypt */ + salt[1] = 'y'; + len = 24 / 2; +// The "j9T$" below is the default "yescrypt parameters" encoded by yescrypt_encode_params_r(): +// +//shadow-4.17.4/src/passwd.c +// salt = crypt_make_rand64encoded(NULL, NULL); +//shadow-4.17.4/lib/salt.c +//const char *crypt_make_rand64encoded(const char *meth, void *arg) +// if (streq(method, "YESCRYPT")) { +// MAGNUM(result, 'y'); +// salt_len = YESCRYPT_SALT_SIZE; // 24 +// rounds = YESCRYPT_get_salt_cost(arg); // always Y_COST_DEFAULT == 5 for NULL arg +// YESCRYPT_salt_cost_to_buf(result, rounds); // always "j9T$" +// char *retval = crypt_gensalt(result, rounds, NULL, 0); +//libxcrypt-4.4.38/lib/crypt-yescrypt.c +//void gensalt_yescrypt_rn (unsigned long count, +// const uint8_t *rbytes, size_t nrbytes, +// uint8_t *output, size_t o_size) +// yescrypt_params_t params = { +// .flags = YESCRYPT_DEFAULTS, +// .p = 1, +// }; +// if (count < 3) ... else +// params.r = 32; // N in 4KiB +// params.N = 1ULL << (count + 7); // 3 -> 1024, 4 -> 2048, ... 11 -> 262144 +// yescrypt_encode_params_r(¶ms, rbytes, nrbytes, outbuf, o_size) // always "$y$j9T$" + salt_ptr = stpcpy(salt_ptr, "j9T$"); + crypt_make_rand64encoded(salt_ptr, len); /* appends 2*len random chars */ + /* For "mkpasswd -m yescrypt PASS j9T$" use case, + * "j9T$" is considered part of salt, + * need to return pointer to 'j'. Without -4, + * we'd end up using "j9T$j9T$" as salt. + */ + return salt_ptr - 4; } #endif } - crypt_make_salt(salt_ptr, len); + crypt_make_rand64encoded(salt_ptr, len); /* appends 2*len random chars */ return salt_ptr; } @@ -99,6 +137,9 @@ to64(char *s, unsigned v, int n) #if ENABLE_USE_BB_CRYPT_SHA #include "pw_encrypt_sha.c" #endif +#if ENABLE_USE_BB_CRYPT_YES +#include "pw_encrypt_yes.c" +#endif /* Other advanced crypt ids (TODO?): */ /* $2$ or $2a$: Blowfish */ @@ -109,10 +150,14 @@ static struct des_ctx *des_ctx; /* my_crypt returns malloc'ed data */ static char *my_crypt(const char *key, const char *salt) { - /* MD5 or SHA? */ + /* "$x$...." string? */ if (salt[0] == '$' && salt[1] && salt[2] == '$') { if (salt[1] == '1') return md5_crypt(xzalloc(MD5_OUT_BUFSIZE), (unsigned char*)key, (unsigned char*)salt); +#if ENABLE_USE_BB_CRYPT_YES + if (salt[1] == 'y') + return yes_crypt(key, salt); +#endif #if ENABLE_USE_BB_CRYPT_SHA if (salt[1] == '5' || salt[1] == '6') return sha_crypt((char*)key, (char*)salt); diff --git a/libbb/yescrypt/Kbuild.src b/libbb/yescrypt/Kbuild.src new file mode 100644 index 000000000..a61211a29 --- /dev/null +++ b/libbb/yescrypt/Kbuild.src @@ -0,0 +1,9 @@ +# Makefile for busybox +# +# Copyright (C) 2025 by Denys Vlasenko +# +# Licensed under GPLv2, see file LICENSE in this source tree. + +lib-y:= + +INSERT diff --git a/libbb/yescrypt/README b/libbb/yescrypt/README new file mode 100644 index 000000000..c1011c56a --- /dev/null +++ b/libbb/yescrypt/README @@ -0,0 +1,4 @@ +The yescrypt code in this directory is adapted from libxcrypt-4.4.38 +with minimal edits, hopefully making it easier to track +backports by resetting the tree to the commit which created this file, +then comparing changes in upstream libxcrypt to the tree. diff --git a/libbb/yescrypt/alg-sha256.c b/libbb/yescrypt/alg-sha256.c new file mode 100644 index 000000000..0c1b846be --- /dev/null +++ b/libbb/yescrypt/alg-sha256.c @@ -0,0 +1,608 @@ +/*- + * Copyright 2005-2016 Colin Percival + * Copyright 2016-2018,2021 Alexander Peslyak + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if defined(__GNUC__) +#define restrict __restrict +#else +#define restrict +#endif + +/* SHA256 round constants. */ +static const uint32_t Krnd[64] = { + 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, + 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, + 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, + 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, + 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, + 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, + 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, + 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, + 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, + 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, + 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, + 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, + 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, + 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, + 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, + 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 +}; + +/* Elementary functions used by SHA256 */ +#define Ch(x, y, z) ((x & (y ^ z)) ^ z) +#if 1 /* Explicit caching/reuse of common subexpression between rounds */ +#define Maj(x, y, z) (y ^ ((x_xor_y = x ^ y) & y_xor_z)) +#else /* Let the compiler cache/reuse or not */ +#define Maj(x, y, z) (y ^ ((x ^ y) & (y ^ z))) +#endif +#define SHR(x, n) (x >> n) +#define ROTR(x, n) ((x >> n) | (x << (32 - n))) +#define S0(x) (ROTR(x, 2) ^ ROTR(x, 13) ^ ROTR(x, 22)) +#define S1(x) (ROTR(x, 6) ^ ROTR(x, 11) ^ ROTR(x, 25)) +#define s0(x) (ROTR(x, 7) ^ ROTR(x, 18) ^ SHR(x, 3)) +#define s1(x) (ROTR(x, 17) ^ ROTR(x, 19) ^ SHR(x, 10)) + +/* SHA256 round function */ +#define RND(a, b, c, d, e, f, g, h, k) \ + h += S1(e) + Ch(e, f, g) + k; \ + d += h; \ + h += S0(a) + Maj(a, b, c); \ + y_xor_z = x_xor_y; + +/* Adjusted round function for rotating state */ +#define RNDr(S, W, i, ii) \ + RND(S[(64 - i) % 8], S[(65 - i) % 8], \ + S[(66 - i) % 8], S[(67 - i) % 8], \ + S[(68 - i) % 8], S[(69 - i) % 8], \ + S[(70 - i) % 8], S[(71 - i) % 8], \ + W[i + ii] + Krnd[i + ii]) + +/* Message schedule computation */ +#define MSCH(W, ii, i) \ + W[i + ii + 16] = s1(W[i + ii + 14]) + W[i + ii + 9] + s0(W[i + ii + 1]) + W[i + ii] + +/* + * SHA256 block compression function. The 256-bit state is transformed via + * the 512-bit input block to produce a new state. + */ +static void +SHA256_Transform(uint32_t state[static restrict 8], + const uint8_t block[static restrict 64], + uint32_t W[static restrict 64], uint32_t S[static restrict 8]) +{ + int i; + + /* 1. Prepare the first part of the message schedule W. */ + be32dec_vect(W, block, 16); + + /* 2. Initialize working variables. */ + memcpy(S, state, 32); + + /* 3. Mix. */ + for (i = 0; i <= 48; i += 16) { + uint32_t x_xor_y, y_xor_z = S[(65 - i) % 8] ^ S[(66 - i) % 8]; + RNDr(S, W, 0, i); + RNDr(S, W, 1, i); + RNDr(S, W, 2, i); + RNDr(S, W, 3, i); + RNDr(S, W, 4, i); + RNDr(S, W, 5, i); + RNDr(S, W, 6, i); + RNDr(S, W, 7, i); + RNDr(S, W, 8, i); + RNDr(S, W, 9, i); + RNDr(S, W, 10, i); + RNDr(S, W, 11, i); + RNDr(S, W, 12, i); + RNDr(S, W, 13, i); + RNDr(S, W, 14, i); + RNDr(S, W, 15, i); + + if (i == 48) + break; + + MSCH(W, 0, i); + MSCH(W, 1, i); + MSCH(W, 2, i); + MSCH(W, 3, i); + MSCH(W, 4, i); + MSCH(W, 5, i); + MSCH(W, 6, i); + MSCH(W, 7, i); + MSCH(W, 8, i); + MSCH(W, 9, i); + MSCH(W, 10, i); + MSCH(W, 11, i); + MSCH(W, 12, i); + MSCH(W, 13, i); + MSCH(W, 14, i); + MSCH(W, 15, i); + } + + /* 4. Mix local working variables into global state. */ + state[0] += S[0]; + state[1] += S[1]; + state[2] += S[2]; + state[3] += S[3]; + state[4] += S[4]; + state[5] += S[5]; + state[6] += S[6]; + state[7] += S[7]; +} + +static const uint8_t PAD[64] = { + 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +}; + +/* Add padding and terminating bit-count. */ +static void +SHA256_Pad(SHA256_CTX * ctx, uint32_t tmp32[static restrict 72]) +{ + size_t r; + + /* Figure out how many bytes we have buffered. */ + r = (ctx->count >> 3) & 0x3f; + + /* Pad to 56 mod 64, transforming if we finish a block en route. */ + if (r < 56) { + /* Pad to 56 mod 64. */ + memcpy(&ctx->buf[r], PAD, 56 - r); + } else { + /* Finish the current block and mix. */ + memcpy(&ctx->buf[r], PAD, 64 - r); + SHA256_Transform(ctx->state, ctx->buf, &tmp32[0], &tmp32[64]); + + /* The start of the final block is all zeroes. */ + memset(&ctx->buf[0], 0, 56); + } + + /* Add the terminating bit-count. */ + be64enc(&ctx->buf[56], ctx->count); + + /* Mix in the final block. */ + SHA256_Transform(ctx->state, ctx->buf, &tmp32[0], &tmp32[64]); +} + +/* Magic initialization constants. */ +static const uint32_t initial_state[8] = { + 0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A, + 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19 +}; + +/** + * SHA256_Init(ctx): + * Initialize the SHA256 context ${ctx}. + */ +void +SHA256_Init(SHA256_CTX * ctx) +{ + + /* Zero bits processed so far. */ + ctx->count = 0; + + /* Initialize state. */ + memcpy(ctx->state, initial_state, sizeof(initial_state)); +} + +/** + * SHA256_Update(ctx, in, len): + * Input ${len} bytes from ${in} into the SHA256 context ${ctx}. + */ +static void +_SHA256_Update(SHA256_CTX * ctx, const void * in, size_t len, + uint32_t tmp32[static restrict 72]) +{ + uint32_t r; + const uint8_t * src = in; + + /* Return immediately if we have nothing to do. */ + if (len == 0) + return; + + /* Number of bytes left in the buffer from previous updates. */ + r = (ctx->count >> 3) & 0x3f; + + /* Update number of bits. */ + ctx->count += (uint64_t)(len) << 3; + + /* Handle the case where we don't need to perform any transforms. */ + if (len < 64 - r) { + memcpy(&ctx->buf[r], src, len); + return; + } + + /* Finish the current block. */ + memcpy(&ctx->buf[r], src, 64 - r); + SHA256_Transform(ctx->state, ctx->buf, &tmp32[0], &tmp32[64]); + src += 64 - r; + len -= 64 - r; + + /* Perform complete blocks. */ + while (len >= 64) { + SHA256_Transform(ctx->state, src, &tmp32[0], &tmp32[64]); + src += 64; + len -= 64; + } + + /* Copy left over data into buffer. */ + memcpy(ctx->buf, src, len); +} + +/* Wrapper function for intermediate-values sanitization. */ +void +SHA256_Update(SHA256_CTX * ctx, const void * in, size_t len) +{ + uint32_t tmp32[72]; + + /* Call the real function. */ + _SHA256_Update(ctx, in, len, tmp32); + + /* Clean the stack. */ + explicit_bzero(tmp32, 288); +} + +/** + * SHA256_Final(digest, ctx): + * Output the SHA256 hash of the data input to the context ${ctx} into the + * buffer ${digest}. + */ +static void +_SHA256_Final(uint8_t digest[32], SHA256_CTX * ctx, + uint32_t tmp32[static restrict 72]) +{ + + /* Add padding. */ + SHA256_Pad(ctx, tmp32); + + /* Write the hash. */ + be32enc_vect(digest, ctx->state, 8); +} + +/* Wrapper function for intermediate-values sanitization. */ +void +SHA256_Final(uint8_t digest[32], SHA256_CTX * ctx) +{ + uint32_t tmp32[72]; + + /* Call the real function. */ + _SHA256_Final(digest, ctx, tmp32); + + /* Clear the context state. */ + explicit_bzero(ctx, sizeof(SHA256_CTX)); + + /* Clean the stack. */ + explicit_bzero(tmp32, 288); +} + +/** + * SHA256_Buf(in, len, digest): + * Compute the SHA256 hash of ${len} bytes from ${in} and write it to ${digest}. + */ +void +SHA256_Buf(const void * in, size_t len, uint8_t digest[32]) +{ + SHA256_CTX ctx; + uint32_t tmp32[72]; + + SHA256_Init(&ctx); + _SHA256_Update(&ctx, in, len, tmp32); + _SHA256_Final(digest, &ctx, tmp32); + + /* Clean the stack. */ + explicit_bzero(&ctx, sizeof(SHA256_CTX)); + explicit_bzero(tmp32, 288); +} + +/** + * HMAC_SHA256_Init(ctx, K, Klen): + * Initialize the HMAC-SHA256 context ${ctx} with ${Klen} bytes of key from + * ${K}. + */ +static void +_HMAC_SHA256_Init(HMAC_SHA256_CTX * ctx, const void * _K, size_t Klen, + uint32_t tmp32[static restrict 72], uint8_t pad[static restrict 64], + uint8_t khash[static restrict 32]) +{ + const uint8_t * K = _K; + size_t i; + + /* If Klen > 64, the key is really SHA256(K). */ + if (Klen > 64) { + SHA256_Init(&ctx->ictx); + _SHA256_Update(&ctx->ictx, K, Klen, tmp32); + _SHA256_Final(khash, &ctx->ictx, tmp32); + K = khash; + Klen = 32; + } + + /* Inner SHA256 operation is SHA256(K xor [block of 0x36] || data). */ + SHA256_Init(&ctx->ictx); + memset(pad, 0x36, 64); + for (i = 0; i < Klen; i++) + pad[i] ^= K[i]; + _SHA256_Update(&ctx->ictx, pad, 64, tmp32); + + /* Outer SHA256 operation is SHA256(K xor [block of 0x5c] || hash). */ + SHA256_Init(&ctx->octx); + memset(pad, 0x5c, 64); + for (i = 0; i < Klen; i++) + pad[i] ^= K[i]; + _SHA256_Update(&ctx->octx, pad, 64, tmp32); +} + +/* Wrapper function for intermediate-values sanitization. */ +void +HMAC_SHA256_Init(HMAC_SHA256_CTX * ctx, const void * _K, size_t Klen) +{ + uint32_t tmp32[72]; + uint8_t pad[64]; + uint8_t khash[32]; + + /* Call the real function. */ + _HMAC_SHA256_Init(ctx, _K, Klen, tmp32, pad, khash); + + /* Clean the stack. */ + explicit_bzero(tmp32, 288); + explicit_bzero(khash, 32); + explicit_bzero(pad, 64); +} + +/** + * HMAC_SHA256_Update(ctx, in, len): + * Input ${len} bytes from ${in} into the HMAC-SHA256 context ${ctx}. + */ +static void +_HMAC_SHA256_Update(HMAC_SHA256_CTX * ctx, const void * in, size_t len, + uint32_t tmp32[static restrict 72]) +{ + + /* Feed data to the inner SHA256 operation. */ + _SHA256_Update(&ctx->ictx, in, len, tmp32); +} + +/* Wrapper function for intermediate-values sanitization. */ +void +HMAC_SHA256_Update(HMAC_SHA256_CTX * ctx, const void * in, size_t len) +{ + uint32_t tmp32[72]; + + /* Call the real function. */ + _HMAC_SHA256_Update(ctx, in, len, tmp32); + + /* Clean the stack. */ + explicit_bzero(tmp32, 288); +} + +/** + * HMAC_SHA256_Final(digest, ctx): + * Output the HMAC-SHA256 of the data input to the context ${ctx} into the + * buffer ${digest}. + */ +static void +_HMAC_SHA256_Final(uint8_t digest[32], HMAC_SHA256_CTX * ctx, + uint32_t tmp32[static restrict 72], uint8_t ihash[static restrict 32]) +{ + + /* Finish the inner SHA256 operation. */ + _SHA256_Final(ihash, &ctx->ictx, tmp32); + + /* Feed the inner hash to the outer SHA256 operation. */ + _SHA256_Update(&ctx->octx, ihash, 32, tmp32); + + /* Finish the outer SHA256 operation. */ + _SHA256_Final(digest, &ctx->octx, tmp32); +} + +/* Wrapper function for intermediate-values sanitization. */ +void +HMAC_SHA256_Final(uint8_t digest[32], HMAC_SHA256_CTX * ctx) +{ + uint32_t tmp32[72]; + uint8_t ihash[32]; + + /* Call the real function. */ + _HMAC_SHA256_Final(digest, ctx, tmp32, ihash); + + /* Clear the context state. */ + explicit_bzero(ctx, sizeof(HMAC_SHA256_CTX)); + + /* Clean the stack. */ + explicit_bzero(tmp32, 288); + explicit_bzero(ihash, 32); +} + +/** + * HMAC_SHA256_Buf(K, Klen, in, len, digest): + * Compute the HMAC-SHA256 of ${len} bytes from ${in} using the key ${K} of + * length ${Klen}, and write the result to ${digest}. + */ +void +HMAC_SHA256_Buf(const void * K, size_t Klen, const void * in, size_t len, + uint8_t digest[32]) +{ + HMAC_SHA256_CTX ctx; + uint32_t tmp32[72]; + uint8_t tmp8[96]; + + _HMAC_SHA256_Init(&ctx, K, Klen, tmp32, &tmp8[0], &tmp8[64]); + _HMAC_SHA256_Update(&ctx, in, len, tmp32); + _HMAC_SHA256_Final(digest, &ctx, tmp32, &tmp8[0]); + + /* Clean the stack. */ + explicit_bzero(&ctx, sizeof(HMAC_SHA256_CTX)); + explicit_bzero(tmp32, 288); + explicit_bzero(tmp8, 96); +} + +/* Add padding and terminating bit-count, but don't invoke Transform yet. */ +static int +SHA256_Pad_Almost(SHA256_CTX * ctx, uint8_t len[static restrict 8], + uint32_t tmp32[static restrict 72]) +{ + uint32_t r; + + r = (ctx->count >> 3) & 0x3f; + if (r >= 56) + return -1; + + /* + * Convert length to a vector of bytes -- we do this now rather + * than later because the length will change after we pad. + */ + be64enc(len, ctx->count); + + /* Add 1--56 bytes so that the resulting length is 56 mod 64. */ + _SHA256_Update(ctx, PAD, 56 - r, tmp32); + + /* Add the terminating bit-count. */ + ctx->buf[63] = len[7]; + _SHA256_Update(ctx, len, 7, tmp32); + + return 0; +} + +/** + * PBKDF2_SHA256(passwd, passwdlen, salt, saltlen, c, buf, dkLen): + * Compute PBKDF2(passwd, salt, c, dkLen) using HMAC-SHA256 as the PRF, and + * write the output to buf. The value dkLen must be at most 32 * (2^32 - 1). + */ +void +PBKDF2_SHA256(const uint8_t * passwd, size_t passwdlen, const uint8_t * salt, + size_t saltlen, uint64_t c, uint8_t * buf, size_t dkLen) +{ + HMAC_SHA256_CTX Phctx, PShctx, hctx; + uint32_t tmp32[72]; + union { + uint8_t tmp8[96]; + uint32_t state[8]; + } u; + size_t i; + uint8_t ivec[4]; + uint8_t U[32]; + uint8_t T[32]; + uint64_t j; + int k; + size_t clen; + + /* Sanity-check. */ + assert(dkLen <= 32 * (size_t)(UINT32_MAX)); + + if (c == 1 && (dkLen & 31) == 0 && (saltlen & 63) <= 51) { + uint32_t oldcount; + uint8_t * ivecp; + + /* Compute HMAC state after processing P and S. */ + _HMAC_SHA256_Init(&hctx, passwd, passwdlen, + tmp32, &u.tmp8[0], &u.tmp8[64]); + _HMAC_SHA256_Update(&hctx, salt, saltlen, tmp32); + + /* Prepare ictx padding. */ + oldcount = hctx.ictx.count & (0x3f << 3); + _HMAC_SHA256_Update(&hctx, "\0\0\0", 4, tmp32); + if ((hctx.ictx.count & (0x3f << 3)) < oldcount || + SHA256_Pad_Almost(&hctx.ictx, u.tmp8, tmp32)) + goto generic; /* Can't happen due to saltlen check */ + ivecp = hctx.ictx.buf + (oldcount >> 3); + + /* Prepare octx padding. */ + hctx.octx.count += 32 << 3; + SHA256_Pad_Almost(&hctx.octx, u.tmp8, tmp32); + + /* Iterate through the blocks. */ + for (i = 0; i * 32 < dkLen; i++) { + /* Generate INT(i + 1). */ + be32enc(ivecp, (uint32_t)(i + 1)); + + /* Compute U_1 = PRF(P, S || INT(i)). */ + memcpy(u.state, hctx.ictx.state, sizeof(u.state)); + SHA256_Transform(u.state, hctx.ictx.buf, + &tmp32[0], &tmp32[64]); + be32enc_vect(hctx.octx.buf, u.state, 8); + memcpy(u.state, hctx.octx.state, sizeof(u.state)); + SHA256_Transform(u.state, hctx.octx.buf, + &tmp32[0], &tmp32[64]); + be32enc_vect(&buf[i * 32], u.state, 8); + } + + goto cleanup; + } + +generic: + /* Compute HMAC state after processing P. */ + _HMAC_SHA256_Init(&Phctx, passwd, passwdlen, + tmp32, &u.tmp8[0], &u.tmp8[64]); + + /* Compute HMAC state after processing P and S. */ + memcpy(&PShctx, &Phctx, sizeof(HMAC_SHA256_CTX)); + _HMAC_SHA256_Update(&PShctx, salt, saltlen, tmp32); + + /* Iterate through the blocks. */ + for (i = 0; i * 32 < dkLen; i++) { + /* Generate INT(i + 1). */ + be32enc(ivec, (uint32_t)(i + 1)); + + /* Compute U_1 = PRF(P, S || INT(i)). */ + memcpy(&hctx, &PShctx, sizeof(HMAC_SHA256_CTX)); + _HMAC_SHA256_Update(&hctx, ivec, 4, tmp32); + _HMAC_SHA256_Final(T, &hctx, tmp32, u.tmp8); + + if (c > 1) { + /* T_i = U_1 ... */ + memcpy(U, T, 32); + + for (j = 2; j <= c; j++) { + /* Compute U_j. */ + memcpy(&hctx, &Phctx, sizeof(HMAC_SHA256_CTX)); + _HMAC_SHA256_Update(&hctx, U, 32, tmp32); + _HMAC_SHA256_Final(U, &hctx, tmp32, u.tmp8); + + /* ... xor U_j ... */ + for (k = 0; k < 32; k++) + T[k] ^= U[k]; + } + } + + /* Copy as many bytes as necessary into buf. */ + clen = dkLen - i * 32; + if (clen > 32) + clen = 32; + memcpy(&buf[i * 32], T, clen); + } + + /* Clean the stack. */ + explicit_bzero(&Phctx, sizeof(HMAC_SHA256_CTX)); + explicit_bzero(&PShctx, sizeof(HMAC_SHA256_CTX)); + explicit_bzero(U, 32); + explicit_bzero(T, 32); + +cleanup: + explicit_bzero(&hctx, sizeof(HMAC_SHA256_CTX)); + explicit_bzero(tmp32, 288); + explicit_bzero(&u, sizeof(u)); +} diff --git a/libbb/yescrypt/alg-sha256.h b/libbb/yescrypt/alg-sha256.h new file mode 100644 index 000000000..1e75307d3 --- /dev/null +++ b/libbb/yescrypt/alg-sha256.h @@ -0,0 +1,113 @@ +/*- + * Copyright 2005-2016 Colin Percival + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Use #defines in order to avoid namespace collisions with anyone else's + * SHA256 code (e.g., the code in OpenSSL). + */ +#define SHA256_Init libcperciva_SHA256_Init +#define SHA256_Update libcperciva_SHA256_Update +#define SHA256_Final libcperciva_SHA256_Final +#define SHA256_Buf libcperciva_SHA256_Buf +#define SHA256_CTX libcperciva_SHA256_CTX +#define HMAC_SHA256_Init libcperciva_HMAC_SHA256_Init +#define HMAC_SHA256_Update libcperciva_HMAC_SHA256_Update +#define HMAC_SHA256_Final libcperciva_HMAC_SHA256_Final +#define HMAC_SHA256_Buf libcperciva_HMAC_SHA256_Buf +#define HMAC_SHA256_CTX libcperciva_HMAC_SHA256_CTX + +/* Context structure for SHA256 operations. */ +typedef struct { + uint32_t state[8]; + uint64_t count; + uint8_t buf[64]; +} SHA256_CTX; + +/** + * SHA256_Init(ctx): + * Initialize the SHA256 context ${ctx}. + */ +extern void SHA256_Init(SHA256_CTX *); + +/** + * SHA256_Update(ctx, in, len): + * Input ${len} bytes from ${in} into the SHA256 context ${ctx}. + */ +extern void SHA256_Update(SHA256_CTX *, const void *, size_t); + +/** + * SHA256_Final(digest, ctx): + * Output the SHA256 hash of the data input to the context ${ctx} into the + * buffer ${digest}. + */ +extern void SHA256_Final(uint8_t[32], SHA256_CTX *); + +/** + * SHA256_Buf(in, len, digest): + * Compute the SHA256 hash of ${len} bytes from ${in} and write it to ${digest}. + */ +extern void SHA256_Buf(const void *, size_t, uint8_t[32]); + +/* Context structure for HMAC-SHA256 operations. */ +typedef struct { + SHA256_CTX ictx; + SHA256_CTX octx; +} HMAC_SHA256_CTX; + +/** + * HMAC_SHA256_Init(ctx, K, Klen): + * Initialize the HMAC-SHA256 context ${ctx} with ${Klen} bytes of key from + * ${K}. + */ +extern void HMAC_SHA256_Init(HMAC_SHA256_CTX *, const void *, size_t); + +/** + * HMAC_SHA256_Update(ctx, in, len): + * Input ${len} bytes from ${in} into the HMAC-SHA256 context ${ctx}. + */ +extern void HMAC_SHA256_Update(HMAC_SHA256_CTX *, const void *, size_t); + +/** + * HMAC_SHA256_Final(digest, ctx): + * Output the HMAC-SHA256 of the data input to the context ${ctx} into the + * buffer ${digest}. + */ +extern void HMAC_SHA256_Final(uint8_t[32], HMAC_SHA256_CTX *); + +/** + * HMAC_SHA256_Buf(K, Klen, in, len, digest): + * Compute the HMAC-SHA256 of ${len} bytes from ${in} using the key ${K} of + * length ${Klen}, and write the result to ${digest}. + */ +extern void HMAC_SHA256_Buf(const void *, size_t, const void *, size_t, uint8_t[32]); + +/** + * PBKDF2_SHA256(passwd, passwdlen, salt, saltlen, c, buf, dkLen): + * Compute PBKDF2(passwd, salt, c, dkLen) using HMAC-SHA256 as the PRF, and + * write the output to buf. The value dkLen must be at most 32 * (2^32 - 1). + */ +extern void PBKDF2_SHA256(const uint8_t *, size_t, const uint8_t *, size_t, + uint64_t, uint8_t *, size_t); diff --git a/libbb/yescrypt/alg-yescrypt-common.c b/libbb/yescrypt/alg-yescrypt-common.c new file mode 100644 index 000000000..75b59d1cf --- /dev/null +++ b/libbb/yescrypt/alg-yescrypt-common.c @@ -0,0 +1,714 @@ +/*- + * Copyright 2013-2018 Alexander Peslyak + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if 0 //UNUSED +static uint8_t *encode64_uint32(uint8_t *dst, size_t dstlen, + uint32_t src, uint32_t min) +{ + uint32_t start = 0, end = 47, chars = 1, bits = 0; + + if (src < min) + return NULL; + src -= min; + + do { + uint32_t count = (end + 1 - start) << bits; + if (src < count) + break; + if (start >= 63) + return NULL; + start = end + 1; + end = start + (62 - end) / 2; + src -= count; + chars++; + bits += 6; + } while (1); + + if (dstlen <= chars) /* require room for a NUL terminator */ + return NULL; + + *dst++ = itoa64[start + (src >> bits)]; + + while (--chars) { + bits -= 6; + *dst++ = itoa64[(src >> bits) & 0x3f]; + } + + *dst = 0; /* NUL terminate just in case */ + + return dst; +} +#endif //UNUSED + +static inline uint32_t atoi64(uint8_t src) +{ + static const uint8_t atoi64_partial[77] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 64, 64, 64, 64, 64, 64, 64, + 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, + 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, + 64, 64, 64, 64, 64, 64, + 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, + 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63 + }; + if (src >= '.' && src <= 'z') + return atoi64_partial[src - '.']; + return 64; +} + +static const uint8_t *decode64_uint32(uint32_t *dst, + const uint8_t *src, uint32_t min) +{ + uint32_t start = 0, end = 47, chars = 1, bits = 0; + uint32_t c; + + c = atoi64(*src++); + if (c > 63) + goto fail; + + *dst = min; + while (c > end) { + *dst += (end + 1 - start) << bits; + start = end + 1; + end = start + (62 - end) / 2; + chars++; + bits += 6; + } + + *dst += (c - start) << bits; + + while (--chars) { + c = atoi64(*src++); + if (c > 63) + goto fail; + bits -= 6; + *dst += c << bits; + } + + return src; + +fail: + *dst = 0; + return NULL; +} + +static uint8_t *encode64_uint32_fixed(uint8_t *dst, size_t dstlen, + uint32_t src, uint32_t srcbits) +{ + uint32_t bits; + + for (bits = 0; bits < srcbits; bits += 6) { + if (dstlen < 2) + return NULL; + *dst++ = itoa64[src & 0x3f]; + dstlen--; + src >>= 6; + } + + if (src || dstlen < 1) + return NULL; + + *dst = 0; /* NUL terminate just in case */ + + return dst; +} + +uint8_t *encode64(uint8_t *dst, size_t dstlen, + const uint8_t *src, size_t srclen) +{ + size_t i; + + for (i = 0; i < srclen; ) { + uint8_t *dnext; + uint32_t value = 0, bits = 0; + do { + value |= (uint32_t)src[i++] << bits; + bits += 8; + } while (bits < 24 && i < srclen); + dnext = encode64_uint32_fixed(dst, dstlen, value, bits); + if (!dnext) + return NULL; + dstlen -= dnext - dst; + dst = dnext; + } + + if (dstlen < 1) + return NULL; + + *dst = 0; /* NUL terminate just in case */ + + return dst; +} + +static const uint8_t *decode64_uint32_fixed(uint32_t *dst, uint32_t dstbits, + const uint8_t *src) +{ + uint32_t bits; + + *dst = 0; + for (bits = 0; bits < dstbits; bits += 6) { + uint32_t c = atoi64(*src++); + if (c > 63) { + *dst = 0; + return NULL; + } + *dst |= c << bits; + } + + return src; +} + +const uint8_t *decode64(uint8_t *dst, size_t *dstlen, + const uint8_t *src, size_t srclen) +{ + size_t dstpos = 0; + + while (dstpos <= *dstlen && srclen) { + uint32_t value = 0, bits = 0; + while (srclen--) { + uint32_t c = atoi64(*src); + if (c > 63) { + srclen = 0; + break; + } + src++; + value |= c << bits; + bits += 6; + if (bits >= 24) + break; + } + if (!bits) + break; + if (bits < 12) /* must have at least one full byte */ + goto fail; + while (dstpos++ < *dstlen) { + *dst++ = value; + value >>= 8; + bits -= 8; + if (bits < 8) { /* 2 or 4 */ + if (value) /* must be 0 */ + goto fail; + bits = 0; + break; + } + } + if (bits) + goto fail; + } + + if (!srclen && dstpos <= *dstlen) { + *dstlen = dstpos; + return src; + } + +fail: + *dstlen = 0; + return NULL; +} + +#if 0 //UNUSED //KEY: +typedef enum { ENC = 1, DEC = -1 } encrypt_dir_t; + +static void memxor(unsigned char *dst, unsigned char *src, size_t size) +{ + while (size--) + *dst++ ^= *src++; +} + +static void yescrypt_sha256_cipher(unsigned char *data, size_t datalen, + const yescrypt_binary_t *key, encrypt_dir_t dir) +{ + SHA256_CTX ctx; + unsigned char f[32 + 4]; + size_t halflen, which; + unsigned char mask, round, target; + + if (!datalen) + return; + if (datalen > 64) + datalen = 64; + + halflen = datalen >> 1; + + which = 0; /* offset to half we are working on (0 or halflen) */ + mask = 0x0f; /* current half's extra nibble mask if datalen is odd */ + + round = 0; + target = 5; /* 6 rounds due to Jacques Patarin's CRYPTO 2004 paper */ + + if (dir == DEC) { + which = halflen; /* even round count, so swap the halves */ + mask ^= 0xff; + + round = target; + target = 0; + } + + f[32] = 0; + f[33] = sizeof(*key); + f[34] = datalen; + + do { + SHA256_Init(&ctx); + f[35] = round; + SHA256_Update(&ctx, &f[32], 4); + SHA256_Update(&ctx, key, sizeof(*key)); + SHA256_Update(&ctx, &data[which], halflen); + if (datalen & 1) { + f[0] = data[datalen - 1] & mask; + SHA256_Update(&ctx, f, 1); + } + SHA256_Final(f, &ctx); + which ^= halflen; + memxor(&data[which], f, halflen); + if (datalen & 1) { + mask ^= 0xff; + data[datalen - 1] ^= f[halflen] & mask; + } + if (round == target) + break; + round += dir; + } while (1); + + /* ctx is presumably zeroized by SHA256_Final() */ + explicit_bzero(f, sizeof(f)); +} +#endif //UNUSED //KEY: + +uint8_t *yescrypt_r( + const yescrypt_shared_t *shared, + yescrypt_local_t *local, + const uint8_t *passwd, size_t passwdlen, + const uint8_t *setting, + //KEY const yescrypt_binary_t *key, + uint8_t *buf, size_t buflen) +{ + unsigned char saltbin[64], hashbin[32]; + const uint8_t *src, *saltstr, *salt; + uint8_t *dst; + size_t need, prefixlen, saltstrlen, saltlen; + yescrypt_params_t params = { .p = 1 }; + + if (setting[0] != '$' || + (setting[1] != '7' && setting[1] != 'y') || + setting[2] != '$') + return NULL; + src = setting + 3; + + if (setting[1] == '7') { + uint32_t N_log2 = atoi64(*src++); + if (N_log2 < 1 || N_log2 > 63) + return NULL; + params.N = (uint64_t)1 << N_log2; + + src = decode64_uint32_fixed(¶ms.r, 30, src); + if (!src) + return NULL; + + src = decode64_uint32_fixed(¶ms.p, 30, src); + if (!src) + return NULL; + + //KEY:if (key) + //KEY: return NULL; + } else { + uint32_t flavor, N_log2; + + src = decode64_uint32(&flavor, src, 0); + if (!src) + return NULL; + + if (flavor < YESCRYPT_RW) { + params.flags = flavor; + } else if (flavor <= YESCRYPT_RW + (YESCRYPT_RW_FLAVOR_MASK >> 2)) { + params.flags = YESCRYPT_RW + ((flavor - YESCRYPT_RW) << 2); + } else { + return NULL; + } + + src = decode64_uint32(&N_log2, src, 1); + if (!src || N_log2 > 63) + return NULL; + params.N = (uint64_t)1 << N_log2; + + src = decode64_uint32(¶ms.r, src, 1); + if (!src) + return NULL; + + if (*src != '$') { + uint32_t have; + + src = decode64_uint32(&have, src, 1); + if (!src) + return NULL; + + if (have & 1) { + src = decode64_uint32(¶ms.p, src, 2); + if (!src) + return NULL; + } + + if (have & 2) { + src = decode64_uint32(¶ms.t, src, 1); + if (!src) + return NULL; + } + + if (have & 4) { + src = decode64_uint32(¶ms.g, src, 1); + if (!src) + return NULL; + } + + if (have & 8) { + uint32_t NROM_log2; + src = decode64_uint32(&NROM_log2, src, 1); + if (!src || NROM_log2 > 63) + return NULL; + params.NROM = (uint64_t)1 << NROM_log2; + } + } + + if (*src++ != '$') + return NULL; + } + + prefixlen = src - setting; + + saltstr = src; + src = (uint8_t *)strrchr((char *)saltstr, '$'); + if (src) + saltstrlen = src - saltstr; + else + saltstrlen = strlen((char *)saltstr); + + if (setting[1] == '7') { + salt = saltstr; + saltlen = saltstrlen; + } else { + const uint8_t *saltend; + + saltlen = sizeof(saltbin); + saltend = decode64(saltbin, &saltlen, saltstr, saltstrlen); + + if (!saltend || (size_t)(saltend - saltstr) != saltstrlen) + goto fail; + + salt = saltbin; + + //KEY:if (key) + //KEY: yescrypt_sha256_cipher(saltbin, saltlen, key, ENC); + } + + need = prefixlen + saltstrlen + 1 + HASH_LEN + 1; + if (need > buflen || need < saltstrlen) + goto fail; + + if (yescrypt_kdf(shared, local, passwd, passwdlen, salt, saltlen, + ¶ms, hashbin, sizeof(hashbin))) + goto fail; + + //KEY:if (key) { + //KEY: explicit_bzero(saltbin, sizeof(saltbin)); + //KEY: yescrypt_sha256_cipher(hashbin, sizeof(hashbin), key, ENC); + //KEY:} + + dst = buf; + memcpy(dst, setting, prefixlen + saltstrlen); + dst += prefixlen + saltstrlen; + *dst++ = '$'; + + dst = encode64(dst, buflen - (dst - buf), hashbin, sizeof(hashbin)); + explicit_bzero(hashbin, sizeof(hashbin)); + if (!dst || dst >= buf + buflen) + return NULL; + + *dst = 0; /* NUL termination */ + + return buf; + +fail: + explicit_bzero(saltbin, sizeof(saltbin)); + explicit_bzero(hashbin, sizeof(hashbin)); + return NULL; +} + + + + + + + + + +int yescrypt_free_shared(yescrypt_shared_t *shared) +{ + return free_region(shared); +} + +int yescrypt_init_local(yescrypt_local_t *local) +{ + init_region(local); + return 0; +} + +int yescrypt_free_local(yescrypt_local_t *local) +{ + return free_region(local); +} + +#if 0 //UNUSED +uint8_t *yescrypt(const uint8_t *passwd, const uint8_t *setting) +{ + /* prefix, '$', hash, NUL */ + static uint8_t buf[PREFIX_LEN + 1 + HASH_LEN + 1]; + yescrypt_local_t local; + uint8_t *retval; + + if (yescrypt_init_local(&local)) + return NULL; + retval = yescrypt_r( + /* const yescrypt_shared_t *shared */ NULL, + &local, + passwd, strlen((char *)passwd), + setting, + //KEY:/* const yescrypt_binary_t *key */ NULL, + buf, sizeof(buf)); + if (yescrypt_free_local(&local)) + return NULL; + return retval; +} + +uint8_t *yescrypt_reencrypt(uint8_t *hash, + const yescrypt_binary_t *from_key, + const yescrypt_binary_t *to_key) +{ + uint8_t *retval = NULL, *saltstart, *hashstart; + const uint8_t *hashend; + unsigned char saltbin[64], hashbin[32]; + size_t saltstrlen, saltlen = 0, hashlen; + + if (strncmp((char *)hash, "$y$", 3)) + return NULL; + + saltstart = NULL; + hashstart = (uint8_t *)strrchr((char *)hash, '$'); + if (hashstart) { + if (hashstart > (uint8_t *)hash) { + saltstart = hashstart - 1; + while (*saltstart != '$' && saltstart > hash) + saltstart--; + if (*saltstart == '$') + saltstart++; + } + hashstart++; + } else { + hashstart = hash; + } + saltstrlen = saltstart ? (hashstart - 1 - saltstart) : 0; + if (saltstrlen > BYTES2CHARS(64) || + strlen((char *)hashstart) != HASH_LEN) + return NULL; + + if (saltstrlen) { + const uint8_t *saltend; + saltlen = sizeof(saltbin); + saltend = decode64(saltbin, &saltlen, saltstart, saltstrlen); + if (!saltend || *saltend != '$' || saltlen < 1 || saltlen > 64) + goto out; + + if (from_key) + yescrypt_sha256_cipher(saltbin, saltlen, from_key, ENC); + if (to_key) + yescrypt_sha256_cipher(saltbin, saltlen, to_key, DEC); + } + + hashlen = sizeof(hashbin); + hashend = decode64(hashbin, &hashlen, hashstart, HASH_LEN); + if (!hashend || *hashend || hashlen != sizeof(hashbin)) + goto out; + + if (from_key) + yescrypt_sha256_cipher(hashbin, hashlen, from_key, DEC); + if (to_key) + yescrypt_sha256_cipher(hashbin, hashlen, to_key, ENC); + + if (saltstrlen) { + if (!encode64(saltstart, saltstrlen + 1, saltbin, saltlen)) + goto out; /* can't happen */ + *(saltstart + saltstrlen) = '$'; + } + + if (!encode64(hashstart, HASH_LEN + 1, hashbin, hashlen)) + goto out; /* can't happen */ + + retval = hash; + +out: + explicit_bzero(saltbin, sizeof(saltbin)); + explicit_bzero(hashbin, sizeof(hashbin)); + + return retval; +} + +static uint32_t N2log2(uint64_t N) +{ + uint32_t N_log2; + + if (N < 2) + return 0; + + N_log2 = 2; + while (N >> N_log2 != 0) + N_log2++; + N_log2--; + + if (N >> N_log2 != 1) + return 0; + + return N_log2; +} + +uint8_t *yescrypt_encode_params_r(const yescrypt_params_t *params, + const uint8_t *src, size_t srclen, + uint8_t *buf, size_t buflen) +{ + uint32_t flavor, N_log2, NROM_log2, have; + uint8_t *dst; + + if (srclen > SIZE_MAX / 16) + return NULL; + + if (params->flags < YESCRYPT_RW) { + flavor = params->flags; + } else if ((params->flags & YESCRYPT_MODE_MASK) == YESCRYPT_RW && + params->flags <= (YESCRYPT_RW | YESCRYPT_RW_FLAVOR_MASK)) { + flavor = YESCRYPT_RW + (params->flags >> 2); + } else { + return NULL; + } + + N_log2 = N2log2(params->N); + if (!N_log2) + return NULL; + + NROM_log2 = N2log2(params->NROM); + if (params->NROM && !NROM_log2) + return NULL; + + if ((uint64_t)params->r * (uint64_t)params->p >= (1U << 30)) + return NULL; + + dst = buf; + *dst++ = '$'; + *dst++ = 'y'; + *dst++ = '$'; + + dst = encode64_uint32(dst, buflen - (dst - buf), flavor, 0); + if (!dst) + return NULL; + + dst = encode64_uint32(dst, buflen - (dst - buf), N_log2, 1); + if (!dst) + return NULL; + + dst = encode64_uint32(dst, buflen - (dst - buf), params->r, 1); + if (!dst) + return NULL; + + have = 0; + if (params->p != 1) + have |= 1; + if (params->t) + have |= 2; + if (params->g) + have |= 4; + if (NROM_log2) + have |= 8; + + if (have) { + dst = encode64_uint32(dst, buflen - (dst - buf), have, 1); + if (!dst) + return NULL; + } + + if (params->p != 1) { + dst = encode64_uint32(dst, buflen - (dst - buf), params->p, 2); + if (!dst) + return NULL; + } + + if (params->t) { + dst = encode64_uint32(dst, buflen - (dst - buf), params->t, 1); + if (!dst) + return NULL; + } + + if (params->g) { + dst = encode64_uint32(dst, buflen - (dst - buf), params->g, 1); + if (!dst) + return NULL; + } + + if (NROM_log2) { + dst = encode64_uint32(dst, buflen - (dst - buf), NROM_log2, 1); + if (!dst) + return NULL; + } + + if (dst >= buf + buflen) + return NULL; + + *dst++ = '$'; + + dst = encode64(dst, buflen - (dst - buf), src, srclen); + if (!dst || dst >= buf + buflen) + return NULL; + + *dst = 0; /* NUL termination */ + + return buf; +} + +uint8_t *yescrypt_encode_params(const yescrypt_params_t *params, + const uint8_t *src, size_t srclen) +{ + /* prefix, NUL */ + static uint8_t buf[PREFIX_LEN + 1]; + return yescrypt_encode_params_r(params, src, srclen, buf, sizeof(buf)); +} + +int crypto_scrypt(const uint8_t *passwd, size_t passwdlen, + const uint8_t *salt, size_t saltlen, uint64_t N, uint32_t r, uint32_t p, + uint8_t *buf, size_t buflen) +{ + yescrypt_local_t local; + yescrypt_params_t params = { .flags = 0, .N = N, .r = r, .p = p }; + int retval; + + if (yescrypt_init_local(&local)) + return -1; + retval = yescrypt_kdf(NULL, &local, + passwd, passwdlen, salt, saltlen, ¶ms, buf, buflen); + if (yescrypt_free_local(&local)) + return -1; + return retval; +} +#endif //UNUSED diff --git a/libbb/yescrypt/alg-yescrypt-kdf.c b/libbb/yescrypt/alg-yescrypt-kdf.c new file mode 100644 index 000000000..3e98ffb0a --- /dev/null +++ b/libbb/yescrypt/alg-yescrypt-kdf.c @@ -0,0 +1,1070 @@ +/*- + * Copyright 2009 Colin Percival + * Copyright 2012-2018 Alexander Peslyak + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * This file was originally written by Colin Percival as part of the Tarsnap + * online backup system. + */ + +#if __STDC_VERSION__ >= 199901L +/* Have restrict */ +#elif defined(__GNUC__) +#define restrict __restrict +#else +#define restrict +#endif + +#ifdef __GNUC__ +#define unlikely(exp) __builtin_expect(exp, 0) +#else +#define unlikely(exp) (exp) +#endif + +#if 0 //def __SSE__ +... +#else /* !defined(__SSE2__) */ + +typedef union { + uint32_t w[16]; + uint64_t d[8]; +} salsa20_blk_t; + +static inline void salsa20_simd_shuffle(const salsa20_blk_t *Bin, + salsa20_blk_t *Bout) +{ +#define COMBINE(out, in1, in2) \ + Bout->d[out] = Bin->w[in1 * 2] | ((uint64_t)Bin->w[in2 * 2 + 1] << 32); + COMBINE(0, 0, 2) + COMBINE(1, 5, 7) + COMBINE(2, 2, 4) + COMBINE(3, 7, 1) + COMBINE(4, 4, 6) + COMBINE(5, 1, 3) + COMBINE(6, 6, 0) + COMBINE(7, 3, 5) +#undef COMBINE +} + +static inline void salsa20_simd_unshuffle(const salsa20_blk_t *Bin, + salsa20_blk_t *Bout) +{ +#define UNCOMBINE(out, in1, in2) \ + Bout->w[out * 2] = Bin->d[in1]; \ + Bout->w[out * 2 + 1] = Bin->d[in2] >> 32; + UNCOMBINE(0, 0, 6) + UNCOMBINE(1, 5, 3) + UNCOMBINE(2, 2, 0) + UNCOMBINE(3, 7, 5) + UNCOMBINE(4, 4, 2) + UNCOMBINE(5, 1, 7) + UNCOMBINE(6, 6, 4) + UNCOMBINE(7, 3, 1) +#undef UNCOMBINE +} +#define DECL_X \ + salsa20_blk_t X; +#define DECL_Y \ + salsa20_blk_t Y; + +#define COPY(out, in) \ + (out).d[0] = (in).d[0]; \ + (out).d[1] = (in).d[1]; \ + (out).d[2] = (in).d[2]; \ + (out).d[3] = (in).d[3]; \ + (out).d[4] = (in).d[4]; \ + (out).d[5] = (in).d[5]; \ + (out).d[6] = (in).d[6]; \ + (out).d[7] = (in).d[7]; + +#define READ_X(in) COPY(X, in) +#define WRITE_X(out) COPY(out, X) + +/** + * salsa20(B): + * Apply the Salsa20 core to the provided block. + */ +static inline void salsa20(salsa20_blk_t *restrict B, + salsa20_blk_t *restrict Bout, uint32_t doublerounds) +{ + salsa20_blk_t X; +#define x X.w + + salsa20_simd_unshuffle(B, &X); + + do { +#define R(a,b) (((a) << (b)) | ((a) >> (32 - (b)))) + /* Operate on columns */ + x[ 4] ^= R(x[ 0]+x[12], 7); x[ 8] ^= R(x[ 4]+x[ 0], 9); + x[12] ^= R(x[ 8]+x[ 4],13); x[ 0] ^= R(x[12]+x[ 8],18); + + x[ 9] ^= R(x[ 5]+x[ 1], 7); x[13] ^= R(x[ 9]+x[ 5], 9); + x[ 1] ^= R(x[13]+x[ 9],13); x[ 5] ^= R(x[ 1]+x[13],18); + + x[14] ^= R(x[10]+x[ 6], 7); x[ 2] ^= R(x[14]+x[10], 9); + x[ 6] ^= R(x[ 2]+x[14],13); x[10] ^= R(x[ 6]+x[ 2],18); + + x[ 3] ^= R(x[15]+x[11], 7); x[ 7] ^= R(x[ 3]+x[15], 9); + x[11] ^= R(x[ 7]+x[ 3],13); x[15] ^= R(x[11]+x[ 7],18); + + /* Operate on rows */ + x[ 1] ^= R(x[ 0]+x[ 3], 7); x[ 2] ^= R(x[ 1]+x[ 0], 9); + x[ 3] ^= R(x[ 2]+x[ 1],13); x[ 0] ^= R(x[ 3]+x[ 2],18); + + x[ 6] ^= R(x[ 5]+x[ 4], 7); x[ 7] ^= R(x[ 6]+x[ 5], 9); + x[ 4] ^= R(x[ 7]+x[ 6],13); x[ 5] ^= R(x[ 4]+x[ 7],18); + + x[11] ^= R(x[10]+x[ 9], 7); x[ 8] ^= R(x[11]+x[10], 9); + x[ 9] ^= R(x[ 8]+x[11],13); x[10] ^= R(x[ 9]+x[ 8],18); + + x[12] ^= R(x[15]+x[14], 7); x[13] ^= R(x[12]+x[15], 9); + x[14] ^= R(x[13]+x[12],13); x[15] ^= R(x[14]+x[13],18); +#undef R + } while (--doublerounds); +#undef x + + { + uint32_t i; + salsa20_simd_shuffle(&X, Bout); + for (i = 0; i < 16; i += 4) { + B->w[i] = Bout->w[i] += B->w[i]; + B->w[i + 1] = Bout->w[i + 1] += B->w[i + 1]; + B->w[i + 2] = Bout->w[i + 2] += B->w[i + 2]; + B->w[i + 3] = Bout->w[i + 3] += B->w[i + 3]; + } + } + +#if 0 + /* Too expensive */ + explicit_bzero(&X, sizeof(X)); +#endif +} + +/** + * Apply the Salsa20/2 core to the block provided in X. + */ +#define SALSA20_2(out) \ + salsa20(&X, &out, 1); + +#define XOR(out, in1, in2) \ + (out).d[0] = (in1).d[0] ^ (in2).d[0]; \ + (out).d[1] = (in1).d[1] ^ (in2).d[1]; \ + (out).d[2] = (in1).d[2] ^ (in2).d[2]; \ + (out).d[3] = (in1).d[3] ^ (in2).d[3]; \ + (out).d[4] = (in1).d[4] ^ (in2).d[4]; \ + (out).d[5] = (in1).d[5] ^ (in2).d[5]; \ + (out).d[6] = (in1).d[6] ^ (in2).d[6]; \ + (out).d[7] = (in1).d[7] ^ (in2).d[7]; + +#define XOR_X(in) XOR(X, X, in) +#define XOR_X_2(in1, in2) XOR(X, in1, in2) +#define XOR_X_WRITE_XOR_Y_2(out, in) \ + XOR(Y, out, in) \ + COPY(out, Y) \ + XOR(X, X, Y) + +/** + * Apply the Salsa20/8 core to the block provided in X ^ in. + */ +#define SALSA20_8_XOR_MEM(in, out) \ + XOR_X(in); \ + salsa20(&X, &out, 4); + +#define INTEGERIFY (uint32_t)X.d[0] + +#endif /* !defined(__SSE2__) */ + +/** + * blockmix_salsa8(Bin, Bout, r): + * Compute Bout = BlockMix_{salsa20/8, r}(Bin). The input Bin must be 128r + * bytes in length; the output Bout must also be the same size. + */ +static void blockmix_salsa8(const salsa20_blk_t *restrict Bin, + salsa20_blk_t *restrict Bout, size_t r) +{ + size_t i; + DECL_X + + READ_X(Bin[r * 2 - 1]) + for (i = 0; i < r; i++) { + SALSA20_8_XOR_MEM(Bin[i * 2], Bout[i]) + SALSA20_8_XOR_MEM(Bin[i * 2 + 1], Bout[r + i]) + } +} + +static uint32_t blockmix_salsa8_xor(const salsa20_blk_t *restrict Bin1, + const salsa20_blk_t *restrict Bin2, salsa20_blk_t *restrict Bout, + size_t r) +{ + size_t i; + DECL_X + +#ifdef PREFETCH + PREFETCH(&Bin2[r * 2 - 1], _MM_HINT_T0) + for (i = 0; i < r - 1; i++) { + PREFETCH(&Bin2[i * 2], _MM_HINT_T0) + PREFETCH(&Bin2[i * 2 + 1], _MM_HINT_T0) + } + PREFETCH(&Bin2[i * 2], _MM_HINT_T0) +#endif + + XOR_X_2(Bin1[r * 2 - 1], Bin2[r * 2 - 1]) + for (i = 0; i < r; i++) { + XOR_X(Bin1[i * 2]) + SALSA20_8_XOR_MEM(Bin2[i * 2], Bout[i]) + XOR_X(Bin1[i * 2 + 1]) + SALSA20_8_XOR_MEM(Bin2[i * 2 + 1], Bout[r + i]) + } + + return INTEGERIFY; +} + +/* This is tunable */ +#define Swidth 8 + +/* Not tunable in this implementation, hard-coded in a few places */ +#define PWXsimple 2 +#define PWXgather 4 + +/* Derived values. Not tunable except via Swidth above. */ +#define PWXbytes (PWXgather * PWXsimple * 8) +#define Sbytes (3 * (1 << Swidth) * PWXsimple * 8) +#define Smask (((1 << Swidth) - 1) * PWXsimple * 8) +#define Smask2 (((uint64_t)Smask << 32) | Smask) + +#define DECL_SMASK2REG /* empty */ +#define FORCE_REGALLOC_3 /* empty */ +#define MAYBE_MEMORY_BARRIER /* empty */ + +#if 0 //def __SSE2__ +... +#else /* !defined(__SSE2__) */ + +#define PWXFORM_SIMD(x0, x1) { \ + uint64_t x = x0 & Smask2; \ + uint64_t *p0 = (uint64_t *)(S0 + (uint32_t)x); \ + uint64_t *p1 = (uint64_t *)(S1 + (x >> 32)); \ + x0 = ((x0 >> 32) * (uint32_t)x0 + p0[0]) ^ p1[0]; \ + x1 = ((x1 >> 32) * (uint32_t)x1 + p0[1]) ^ p1[1]; \ +} + +#define PWXFORM_ROUND \ + PWXFORM_SIMD(X.d[0], X.d[1]) \ + PWXFORM_SIMD(X.d[2], X.d[3]) \ + PWXFORM_SIMD(X.d[4], X.d[5]) \ + PWXFORM_SIMD(X.d[6], X.d[7]) +#endif + +/* + * This offset helps address the 256-byte write block via the single-byte + * displacements encodable in x86(-64) instructions. It is needed because the + * displacements are signed. Without it, we'd get 4-byte displacements for + * half of the writes. Setting it to 0x80 instead of 0x7c would avoid needing + * a displacement for one of the writes, but then the LEA instruction would + * need a 4-byte displacement. + */ +#define PWXFORM_WRITE_OFFSET 0x7c + +#define PWXFORM_WRITE \ + WRITE_X(*(salsa20_blk_t *)(Sw - PWXFORM_WRITE_OFFSET)) \ + Sw += 64; + +#define PWXFORM { \ + uint8_t *Sw = S2 + w + PWXFORM_WRITE_OFFSET; \ + FORCE_REGALLOC_3 \ + MAYBE_MEMORY_BARRIER \ + PWXFORM_ROUND \ + PWXFORM_ROUND PWXFORM_WRITE \ + PWXFORM_ROUND PWXFORM_WRITE \ + PWXFORM_ROUND PWXFORM_WRITE \ + PWXFORM_ROUND PWXFORM_WRITE \ + PWXFORM_ROUND \ + w = (w + 64 * 4) & Smask2; \ + { \ + uint8_t *Stmp = S2; \ + S2 = S1; \ + S1 = S0; \ + S0 = Stmp; \ + } \ +} + +typedef struct { + uint8_t *S0, *S1, *S2; + size_t w; +} pwxform_ctx_t; + +#define Salloc (Sbytes + ((sizeof(pwxform_ctx_t) + 63) & ~63U)) + +/** + * blockmix_pwxform(Bin, Bout, r, S): + * Compute Bout = BlockMix_pwxform{salsa20/2, r, S}(Bin). The input Bin must + * be 128r bytes in length; the output Bout must also be the same size. + */ +static void blockmix(const salsa20_blk_t *restrict Bin, + salsa20_blk_t *restrict Bout, size_t r, pwxform_ctx_t *restrict ctx) +{ + uint8_t *S0 = ctx->S0, *S1 = ctx->S1, *S2 = ctx->S2; + size_t w = ctx->w; + size_t i; + DECL_X + + /* Convert count of 128-byte blocks to max index of 64-byte block */ + r = r * 2 - 1; + + READ_X(Bin[r]) + + DECL_SMASK2REG + + i = 0; + do { + XOR_X(Bin[i]) + PWXFORM + if (unlikely(i >= r)) + break; + WRITE_X(Bout[i]) + i++; + } while (1); + + ctx->S0 = S0; ctx->S1 = S1; ctx->S2 = S2; + ctx->w = w; + + SALSA20_2(Bout[i]) +} + +static uint32_t blockmix_xor(const salsa20_blk_t *Bin1, + const salsa20_blk_t *restrict Bin2, salsa20_blk_t *Bout, + size_t r, int Bin2_in_ROM, pwxform_ctx_t *restrict ctx) +{ + uint8_t *S0 = ctx->S0, *S1 = ctx->S1, *S2 = ctx->S2; + size_t w = ctx->w; + size_t i; + DECL_X + + /* Convert count of 128-byte blocks to max index of 64-byte block */ + r = r * 2 - 1; + +#ifdef PREFETCH + if (Bin2_in_ROM) { + PREFETCH(&Bin2[r], _MM_HINT_NTA) + for (i = 0; i < r; i++) { + PREFETCH(&Bin2[i], _MM_HINT_NTA) + } + } else { + PREFETCH(&Bin2[r], _MM_HINT_T0) + for (i = 0; i < r; i++) { + PREFETCH(&Bin2[i], _MM_HINT_T0) + } + } +#else + (void)Bin2_in_ROM; /* unused */ +#endif + + XOR_X_2(Bin1[r], Bin2[r]) + + DECL_SMASK2REG + + i = 0; + r--; + do { + XOR_X(Bin1[i]) + XOR_X(Bin2[i]) + PWXFORM + WRITE_X(Bout[i]) + + XOR_X(Bin1[i + 1]) + XOR_X(Bin2[i + 1]) + PWXFORM + + if (unlikely(i >= r)) + break; + + WRITE_X(Bout[i + 1]) + + i += 2; + } while (1); + i++; + + ctx->S0 = S0; ctx->S1 = S1; ctx->S2 = S2; + ctx->w = w; + + SALSA20_2(Bout[i]) + + return INTEGERIFY; +} + +static uint32_t blockmix_xor_save(salsa20_blk_t *restrict Bin1out, + salsa20_blk_t *restrict Bin2, + size_t r, pwxform_ctx_t *restrict ctx) +{ + uint8_t *S0 = ctx->S0, *S1 = ctx->S1, *S2 = ctx->S2; + size_t w = ctx->w; + size_t i; + DECL_X + DECL_Y + + /* Convert count of 128-byte blocks to max index of 64-byte block */ + r = r * 2 - 1; + +#ifdef PREFETCH + PREFETCH(&Bin2[r], _MM_HINT_T0) + for (i = 0; i < r; i++) { + PREFETCH(&Bin2[i], _MM_HINT_T0) + } +#endif + + XOR_X_2(Bin1out[r], Bin2[r]) + + DECL_SMASK2REG + + i = 0; + r--; + do { + XOR_X_WRITE_XOR_Y_2(Bin2[i], Bin1out[i]) + PWXFORM + WRITE_X(Bin1out[i]) + + XOR_X_WRITE_XOR_Y_2(Bin2[i + 1], Bin1out[i + 1]) + PWXFORM + + if (unlikely(i >= r)) + break; + + WRITE_X(Bin1out[i + 1]) + + i += 2; + } while (1); + i++; + + ctx->S0 = S0; ctx->S1 = S1; ctx->S2 = S2; + ctx->w = w; + + SALSA20_2(Bin1out[i]) + + return INTEGERIFY; +} + +/** + * integerify(B, r): + * Return the result of parsing B_{2r-1} as a little-endian integer. + */ +static inline uint32_t integerify(const salsa20_blk_t *B, size_t r) +{ +/* + * Our 64-bit words are in host byte order, which is why we don't just read + * w[0] here (would be wrong on big-endian). Also, our 32-bit words are + * SIMD-shuffled (so the next 32 bits would be part of d[6]), but currently + * this does not matter as we only care about the least significant 32 bits. + */ + return (uint32_t)B[2 * r - 1].d[0]; +} + +/** + * smix1(B, r, N, flags, V, NROM, VROM, XY, ctx): + * Compute first loop of B = SMix_r(B, N). The input B must be 128r bytes in + * length; the temporary storage V must be 128rN bytes in length; the temporary + * storage XY must be 128r+64 bytes in length. N must be even and at least 4. + * The array V must be aligned to a multiple of 64 bytes, and arrays B and XY + * to a multiple of at least 16 bytes. + */ +static void smix1(uint8_t *B, size_t r, uint32_t N, yescrypt_flags_t flags, + salsa20_blk_t *V, uint32_t NROM, const salsa20_blk_t *VROM, + salsa20_blk_t *XY, pwxform_ctx_t *ctx) +{ + size_t s = 2 * r; + salsa20_blk_t *X = V, *Y = &V[s]; + uint32_t i, j; + + for (i = 0; i < 2 * r; i++) { + const salsa20_blk_t *src = (salsa20_blk_t *)&B[i * 64]; + salsa20_blk_t *tmp = Y; + salsa20_blk_t *dst = &X[i]; + size_t k; + for (k = 0; k < 16; k++) + tmp->w[k] = le32dec((const uint8_t *) &src->w[k]); + salsa20_simd_shuffle(tmp, dst); + } + + if (VROM) { + uint32_t n; + const salsa20_blk_t *V_j; + + V_j = &VROM[(NROM - 1) * s]; + j = blockmix_xor(X, V_j, Y, r, 1, ctx) & (NROM - 1); + V_j = &VROM[j * s]; + X = Y + s; + j = blockmix_xor(Y, V_j, X, r, 1, ctx); + + for (n = 2; n < N; n <<= 1) { + uint32_t m = (n < N / 2) ? n : (N - 1 - n); + for (i = 1; i < m; i += 2) { + j &= n - 1; + j += i - 1; + V_j = &V[j * s]; + Y = X + s; + j = blockmix_xor(X, V_j, Y, r, 0, ctx) & (NROM - 1); + V_j = &VROM[j * s]; + X = Y + s; + j = blockmix_xor(Y, V_j, X, r, 1, ctx); + } + } + n >>= 1; + + j &= n - 1; + j += N - 2 - n; + V_j = &V[j * s]; + Y = X + s; + j = blockmix_xor(X, V_j, Y, r, 0, ctx) & (NROM - 1); + V_j = &VROM[j * s]; + blockmix_xor(Y, V_j, XY, r, 1, ctx); + } else if (flags & YESCRYPT_RW) { + uint32_t n; + salsa20_blk_t *V_j; + + blockmix(X, Y, r, ctx); + X = Y + s; + blockmix(Y, X, r, ctx); + j = integerify(X, r); + + for (n = 2; n < N; n <<= 1) { + uint32_t m = (n < N / 2) ? n : (N - 1 - n); + for (i = 1; i < m; i += 2) { + Y = X + s; + j &= n - 1; + j += i - 1; + V_j = &V[j * s]; + j = blockmix_xor(X, V_j, Y, r, 0, ctx); + j &= n - 1; + j += i; + V_j = &V[j * s]; + X = Y + s; + j = blockmix_xor(Y, V_j, X, r, 0, ctx); + } + } + n >>= 1; + + j &= n - 1; + j += N - 2 - n; + V_j = &V[j * s]; + Y = X + s; + j = blockmix_xor(X, V_j, Y, r, 0, ctx); + j &= n - 1; + j += N - 1 - n; + V_j = &V[j * s]; + blockmix_xor(Y, V_j, XY, r, 0, ctx); + } else { + N -= 2; + do { + blockmix_salsa8(X, Y, r); + X = Y + s; + blockmix_salsa8(Y, X, r); + Y = X + s; + } while ((N -= 2)); + + blockmix_salsa8(X, Y, r); + blockmix_salsa8(Y, XY, r); + } + + for (i = 0; i < 2 * r; i++) { + const salsa20_blk_t *src = &XY[i]; + salsa20_blk_t *tmp = &XY[s]; + salsa20_blk_t *dst = (salsa20_blk_t *)&B[i * 64]; + size_t k; + for (k = 0; k < 16; k++) + le32enc((uint8_t *)&tmp->w[k], src->w[k]); + salsa20_simd_unshuffle(tmp, dst); + } +} + +/** + * smix2(B, r, N, Nloop, flags, V, NROM, VROM, XY, ctx): + * Compute second loop of B = SMix_r(B, N). The input B must be 128r bytes in + * length; the temporary storage V must be 128rN bytes in length; the temporary + * storage XY must be 256r bytes in length. N must be a power of 2 and at + * least 2. Nloop must be even. The array V must be aligned to a multiple of + * 64 bytes, and arrays B and XY to a multiple of at least 16 bytes. + */ +static void smix2(uint8_t *B, size_t r, uint32_t N, uint64_t Nloop, + yescrypt_flags_t flags, salsa20_blk_t *V, uint32_t NROM, + const salsa20_blk_t *VROM, salsa20_blk_t *XY, pwxform_ctx_t *ctx) +{ + size_t s = 2 * r; + salsa20_blk_t *X = XY, *Y = &XY[s]; + uint32_t i, j; + + if (Nloop == 0) + return; + + for (i = 0; i < 2 * r; i++) { + const salsa20_blk_t *src = (salsa20_blk_t *)&B[i * 64]; + salsa20_blk_t *tmp = Y; + salsa20_blk_t *dst = &X[i]; + size_t k; + for (k = 0; k < 16; k++) + tmp->w[k] = le32dec((const uint8_t *)&src->w[k]); + salsa20_simd_shuffle(tmp, dst); + } + + j = integerify(X, r) & (N - 1); + +/* + * Normally, VROM implies YESCRYPT_RW, but we check for these separately + * because our SMix resets YESCRYPT_RW for the smix2() calls operating on the + * entire V when p > 1. + */ + if (VROM && (flags & YESCRYPT_RW)) { + do { + salsa20_blk_t *V_j = &V[j * s]; + const salsa20_blk_t *VROM_j; + j = blockmix_xor_save(X, V_j, r, ctx) & (NROM - 1); + VROM_j = &VROM[j * s]; + j = blockmix_xor(X, VROM_j, X, r, 1, ctx) & (N - 1); + } while (Nloop -= 2); + } else if (VROM) { + do { + const salsa20_blk_t *V_j = &V[j * s]; + j = blockmix_xor(X, V_j, X, r, 0, ctx) & (NROM - 1); + V_j = &VROM[j * s]; + j = blockmix_xor(X, V_j, X, r, 1, ctx) & (N - 1); + } while (Nloop -= 2); + } else if (flags & YESCRYPT_RW) { + do { + salsa20_blk_t *V_j = &V[j * s]; + j = blockmix_xor_save(X, V_j, r, ctx) & (N - 1); + V_j = &V[j * s]; + j = blockmix_xor_save(X, V_j, r, ctx) & (N - 1); + } while (Nloop -= 2); + } else if (ctx) { + do { + const salsa20_blk_t *V_j = &V[j * s]; + j = blockmix_xor(X, V_j, X, r, 0, ctx) & (N - 1); + V_j = &V[j * s]; + j = blockmix_xor(X, V_j, X, r, 0, ctx) & (N - 1); + } while (Nloop -= 2); + } else { + do { + const salsa20_blk_t *V_j = &V[j * s]; + j = blockmix_salsa8_xor(X, V_j, Y, r) & (N - 1); + V_j = &V[j * s]; + j = blockmix_salsa8_xor(Y, V_j, X, r) & (N - 1); + } while (Nloop -= 2); + } + + for (i = 0; i < 2 * r; i++) { + const salsa20_blk_t *src = &X[i]; + salsa20_blk_t *tmp = Y; + salsa20_blk_t *dst = (salsa20_blk_t *)&B[i * 64]; + size_t k; + for (k = 0; k < 16; k++) + le32enc((uint8_t *)&tmp->w[k], src->w[k]); + salsa20_simd_unshuffle(tmp, dst); + } +} + +/** + * p2floor(x): + * Largest power of 2 not greater than argument. + */ +static uint64_t p2floor(uint64_t x) +{ + uint64_t y; + while ((y = x & (x - 1))) + x = y; + return x; +} + +/** + * smix(B, r, N, p, t, flags, V, NROM, VROM, XY, S, passwd): + * Compute B = SMix_r(B, N). The input B must be 128rp bytes in length; the + * temporary storage V must be 128rN bytes in length; the temporary storage + * XY must be 256r or 256rp bytes in length (the larger size is required with + * OpenMP-enabled builds). N must be a power of 2 and at least 4. The array V + * must be aligned to a multiple of 64 bytes, and arrays B and XY to a multiple + * of at least 16 bytes (aligning them to 64 bytes as well saves cache lines + * and helps avoid false sharing in OpenMP-enabled builds when p > 1, but it + * might also result in cache bank conflicts). + */ +static void smix(uint8_t *B, size_t r, uint32_t N, uint32_t p, uint32_t t, + yescrypt_flags_t flags, + salsa20_blk_t *V, uint32_t NROM, const salsa20_blk_t *VROM, + salsa20_blk_t *XY, uint8_t *S, uint8_t *passwd) +{ + size_t s = 2 * r; + uint32_t Nchunk; + uint64_t Nloop_all, Nloop_rw; + uint32_t i; + + Nchunk = N / p; + Nloop_all = Nchunk; + if (flags & YESCRYPT_RW) { + if (t <= 1) { + if (t) + Nloop_all *= 2; /* 2/3 */ + Nloop_all = (Nloop_all + 2) / 3; /* 1/3, round up */ + } else { + Nloop_all *= t - 1; + } + } else if (t) { + if (t == 1) + Nloop_all += (Nloop_all + 1) / 2; /* 1.5, round up */ + Nloop_all *= t; + } + + Nloop_rw = 0; + if (flags & YESCRYPT_INIT_SHARED) + Nloop_rw = Nloop_all; + else if (flags & YESCRYPT_RW) + Nloop_rw = Nloop_all / p; + + Nchunk &= ~(uint32_t)1; /* round down to even */ + Nloop_all++; Nloop_all &= ~(uint64_t)1; /* round up to even */ + Nloop_rw++; Nloop_rw &= ~(uint64_t)1; /* round up to even */ + + for (i = 0; i < p; i++) { + uint32_t Vchunk = i * Nchunk; + uint32_t Np = (i < p - 1) ? Nchunk : (N - Vchunk); + uint8_t *Bp = &B[128 * r * i]; + salsa20_blk_t *Vp = &V[Vchunk * s]; + salsa20_blk_t *XYp = XY; + pwxform_ctx_t *ctx_i = NULL; + if (flags & YESCRYPT_RW) { + uint8_t *Si = S + i * Salloc; + smix1(Bp, 1, Sbytes / 128, 0 /* no flags */, + (salsa20_blk_t *)Si, 0, NULL, XYp, NULL); + ctx_i = (pwxform_ctx_t *)(Si + Sbytes); + ctx_i->S2 = Si; + ctx_i->S1 = Si + Sbytes / 3; + ctx_i->S0 = Si + Sbytes / 3 * 2; + ctx_i->w = 0; + if (i == 0) + HMAC_SHA256_Buf(Bp + (128 * r - 64), 64, + passwd, 32, passwd); + } + smix1(Bp, r, Np, flags, Vp, NROM, VROM, XYp, ctx_i); + smix2(Bp, r, p2floor(Np), Nloop_rw, flags, Vp, + NROM, VROM, XYp, ctx_i); + } + + if (Nloop_all > Nloop_rw) { + for (i = 0; i < p; i++) { + uint8_t *Bp = &B[128 * r * i]; + salsa20_blk_t *XYp = XY; + pwxform_ctx_t *ctx_i = NULL; + if (flags & YESCRYPT_RW) { + uint8_t *Si = S + i * Salloc; + ctx_i = (pwxform_ctx_t *)(Si + Sbytes); + } + smix2(Bp, r, N, Nloop_all - Nloop_rw, + flags & (yescrypt_flags_t)~YESCRYPT_RW, + V, NROM, VROM, XYp, ctx_i); + } + } +} + +/** + * yescrypt_kdf_body(shared, local, passwd, passwdlen, salt, saltlen, + * flags, N, r, p, t, NROM, buf, buflen): + * Compute scrypt(passwd[0 .. passwdlen - 1], salt[0 .. saltlen - 1], N, r, + * p, buflen), or a revision of scrypt as requested by flags and shared, and + * write the result into buf. + * + * shared and flags may request special modes as described in yescrypt.h. + * + * local is the thread-local data structure, allowing to preserve and reuse a + * memory allocation across calls, thereby reducing its overhead. + * + * t controls computation time while not affecting peak memory usage. + * + * Return 0 on success; or -1 on error. + * + * This optimized implementation currently limits N to the range from 4 to + * 2^31, but other implementations might not. + */ +static int yescrypt_kdf_body(const yescrypt_shared_t *shared, + yescrypt_local_t *local, + const uint8_t *passwd, size_t passwdlen, + const uint8_t *salt, size_t saltlen, + yescrypt_flags_t flags, uint64_t N, uint32_t r, uint32_t p, uint32_t t, + uint64_t NROM, + uint8_t *buf, size_t buflen) +{ + yescrypt_region_t tmp; + const salsa20_blk_t *VROM; + size_t B_size, V_size, XY_size, need; + uint8_t *B, *S; + salsa20_blk_t *V, *XY; + uint8_t sha256[32]; + uint8_t dk[sizeof(sha256)], *dkp = buf; + + /* Sanity-check parameters */ + switch (flags & YESCRYPT_MODE_MASK) { + case 0: /* classic scrypt - can't have anything non-standard */ + if (flags || t || NROM) + goto out_EINVAL; + break; + case YESCRYPT_WORM: + if (flags != YESCRYPT_WORM || NROM) + goto out_EINVAL; + break; + case YESCRYPT_RW: + if (flags != (flags & YESCRYPT_KNOWN_FLAGS)) + goto out_EINVAL; +#if PWXsimple == 2 && PWXgather == 4 && Sbytes == 12288 + if ((flags & YESCRYPT_RW_FLAVOR_MASK) == + (YESCRYPT_ROUNDS_6 | YESCRYPT_GATHER_4 | + YESCRYPT_SIMPLE_2 | YESCRYPT_SBOX_12K)) + break; +#else +#error "Unsupported pwxform settings" +#endif + /* FALLTHRU */ + default: + goto out_EINVAL; + } +#if SIZE_MAX > UINT32_MAX + if (buflen > (((uint64_t)1 << 32) - 1) * 32) + goto out_EINVAL; +#endif + if ((uint64_t)r * (uint64_t)p >= 1 << 30) + goto out_EINVAL; + if (N > UINT32_MAX) + goto out_EINVAL; + if ((N & (N - 1)) != 0 || N <= 3 || r < 1 || p < 1) + goto out_EINVAL; + if (r > SIZE_MAX / 256 / p || + N > SIZE_MAX / 128 / r) + goto out_EINVAL; + if (flags & YESCRYPT_RW) { + /* p cannot be greater than SIZE_MAX/Salloc on 64-bit systems, + but it can on 32-bit systems. */ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wtype-limits" + if (N / p <= 3 || p > SIZE_MAX / Salloc) + goto out_EINVAL; +#pragma GCC diagnostic pop + } + + VROM = NULL; + if (shared) { + uint64_t expected_size = (size_t)128 * r * NROM; + if ((NROM & (NROM - 1)) != 0 || + NROM <= 1 || NROM > UINT32_MAX || + shared->aligned_size < expected_size) + goto out_EINVAL; + if (!(flags & YESCRYPT_INIT_SHARED)) { + uint64_t *tag = (uint64_t *) + ((uint8_t *)shared->aligned + expected_size - 48); + if (tag[0] != YESCRYPT_ROM_TAG1 || tag[1] != YESCRYPT_ROM_TAG2) + goto out_EINVAL; + } + VROM = shared->aligned; + } else { + if (NROM) + goto out_EINVAL; + } + + /* Allocate memory */ + V = NULL; + V_size = (size_t)128 * r * N; + need = V_size; + if (flags & YESCRYPT_INIT_SHARED) { + if (local->aligned_size < need) { + if (local->base || local->aligned || + local->base_size || local->aligned_size) + goto out_EINVAL; + if (!alloc_region(local, need)) + return -1; + } + if (flags & YESCRYPT_ALLOC_ONLY) + return -2; /* expected "failure" */ + V = (salsa20_blk_t *)local->aligned; + need = 0; + } + B_size = (size_t)128 * r * p; + need += B_size; + if (need < B_size) + goto out_EINVAL; + XY_size = (size_t)256 * r; + need += XY_size; + if (need < XY_size) + goto out_EINVAL; + if (flags & YESCRYPT_RW) { + size_t S_size = (size_t)Salloc * p; + need += S_size; + if (need < S_size) + goto out_EINVAL; + } + if (flags & YESCRYPT_INIT_SHARED) { + if (!alloc_region(&tmp, need)) + return -1; + B = (uint8_t *)tmp.aligned; + XY = (salsa20_blk_t *)((uint8_t *)B + B_size); + } else { + init_region(&tmp); + if (local->aligned_size < need) { + if (free_region(local)) + return -1; + if (!alloc_region(local, need)) + return -1; + } + if (flags & YESCRYPT_ALLOC_ONLY) + return -3; /* expected "failure" */ + B = (uint8_t *)local->aligned; + V = (salsa20_blk_t *)((uint8_t *)B + B_size); + XY = (salsa20_blk_t *)((uint8_t *)V + V_size); + } + S = NULL; + if (flags & YESCRYPT_RW) + S = (uint8_t *)XY + XY_size; + + if (flags) { + HMAC_SHA256_Buf("yescrypt-prehash", + (flags & YESCRYPT_PREHASH) ? 16 : 8, + passwd, passwdlen, sha256); + passwd = sha256; + passwdlen = sizeof(sha256); + } + + PBKDF2_SHA256(passwd, passwdlen, salt, saltlen, 1, B, B_size); + + if (flags) + memcpy(sha256, B, sizeof(sha256)); + + if (p == 1 || (flags & YESCRYPT_RW)) { + smix(B, r, N, p, t, flags, V, NROM, VROM, XY, S, sha256); + } else { + uint32_t i; + for (i = 0; i < p; i++) { + smix(&B[(size_t)128 * r * i], r, N, 1, t, flags, V, + NROM, VROM, XY, NULL, NULL); + } + } + + dkp = buf; + if (flags && buflen < sizeof(dk)) { + PBKDF2_SHA256(passwd, passwdlen, B, B_size, 1, dk, sizeof(dk)); + dkp = dk; + } + + PBKDF2_SHA256(passwd, passwdlen, B, B_size, 1, buf, buflen); + + /* + * Except when computing classic scrypt, allow all computation so far + * to be performed on the client. The final steps below match those of + * SCRAM (RFC 5802), so that an extension of SCRAM (with the steps so + * far in place of SCRAM's use of PBKDF2 and with SHA-256 in place of + * SCRAM's use of SHA-1) would be usable with yescrypt hashes. + */ + if (flags && !(flags & YESCRYPT_PREHASH)) { + /* Compute ClientKey */ + HMAC_SHA256_Buf(dkp, sizeof(dk), "Client Key", 10, sha256); + /* Compute StoredKey */ + { + size_t clen = buflen; + if (clen > sizeof(dk)) + clen = sizeof(dk); + SHA256_Buf(sha256, sizeof(sha256), dk); + memcpy(buf, dk, clen); + } + } + + if (flags) { + explicit_bzero(sha256, sizeof(sha256)); + explicit_bzero(dk, sizeof(dk)); + } + + if (free_region(&tmp)) { + explicit_bzero(buf, buflen); /* must preserve errno */ + return -1; + } + + /* Success! */ + return 0; + +out_EINVAL: + errno = EINVAL; + return -1; +} + +/** + * yescrypt_kdf(shared, local, passwd, passwdlen, salt, saltlen, params, + * buf, buflen): + * Compute scrypt or its revision as requested by the parameters. The inputs + * to this function are the same as those for yescrypt_kdf_body() above, with + * the addition of g, which controls hash upgrades (0 for no upgrades so far). + */ +int yescrypt_kdf(const yescrypt_shared_t *shared, + yescrypt_local_t *local, + const uint8_t *passwd, size_t passwdlen, + const uint8_t *salt, size_t saltlen, + const yescrypt_params_t *params, + uint8_t *buf, size_t buflen) +{ + yescrypt_flags_t flags = params->flags; + uint64_t N = params->N; + uint32_t r = params->r; + uint32_t p = params->p; + uint32_t t = params->t; + uint32_t g = params->g; + uint64_t NROM = params->NROM; + uint8_t dk[32]; + int retval; + + /* Support for hash upgrades has been temporarily removed */ + if (g) { + errno = EINVAL; + return -1; + } + + if ((flags & (YESCRYPT_RW | YESCRYPT_INIT_SHARED)) == YESCRYPT_RW + && p >= 1 + && N / p >= 0x100 + && N / p * r >= 0x20000 + ) { + if (yescrypt_kdf_body(shared, local, + passwd, passwdlen, salt, saltlen, + flags | YESCRYPT_ALLOC_ONLY, N, r, p, t, NROM, + buf, buflen) != -3 + ) { + errno = EINVAL; + return -1; + } + retval = yescrypt_kdf_body(shared, local, + passwd, passwdlen, salt, saltlen, + flags | YESCRYPT_PREHASH, N >> 6, r, p, 0, NROM, + dk, sizeof(dk)); + if (retval) + return retval; + passwd = dk; + passwdlen = sizeof(dk); + } + + retval = yescrypt_kdf_body(shared, local, + passwd, passwdlen, salt, saltlen, + flags, N, r, p, t, NROM, buf, buflen); +#ifndef SKIP_MEMZERO + if (passwd == dk) + explicit_bzero(dk, sizeof(dk)); +#endif + return retval; +} diff --git a/libbb/yescrypt/alg-yescrypt-platform.c b/libbb/yescrypt/alg-yescrypt-platform.c new file mode 100644 index 000000000..09809c4b0 --- /dev/null +++ b/libbb/yescrypt/alg-yescrypt-platform.c @@ -0,0 +1,65 @@ +/*- + * Copyright 2013-2018,2022 Alexander Peslyak + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +static void *alloc_region(yescrypt_region_t *region, size_t size) +{ + size_t base_size = size; + uint8_t *base, *aligned; + +#if 0 //def MAP_ANON - use mmap, possibly +//(if defined(MAP_HUGETLB) && defined(MAP_HUGE_2MB)) using 2MB pages +#else /* mmap not available */ + base = aligned = NULL; + if (size + 63 < size) { + errno = ENOMEM; + } else { + base = malloc(size + 63); + if (base) { + aligned = base + 63; + aligned -= (uintptr_t)aligned & 63; + } + } +#endif + region->base = base; + region->aligned = aligned; + region->base_size = base ? base_size : 0; + region->aligned_size = base ? size : 0; + return aligned; +} + +static inline void init_region(yescrypt_region_t *region) +{ + region->base = region->aligned = NULL; + region->base_size = region->aligned_size = 0; +} + +static int free_region(yescrypt_region_t *region) +{ + if (region->base) { +#if 0 //def MAP_ANON + if (munmap(region->base, region->base_size)) + return -1; +#else + free(region->base); +#endif + } + init_region(region); + return 0; +} diff --git a/libbb/yescrypt/alg-yescrypt.h b/libbb/yescrypt/alg-yescrypt.h new file mode 100644 index 000000000..e3c7e6398 --- /dev/null +++ b/libbb/yescrypt/alg-yescrypt.h @@ -0,0 +1,355 @@ +/*- + * Copyright 2009 Colin Percival + * Copyright 2013-2018 Alexander Peslyak + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * This file was originally written by Colin Percival as part of the Tarsnap + * online backup system. + */ + +#if 0 //UNUSED +/** + * crypto_scrypt(passwd, passwdlen, salt, saltlen, N, r, p, buf, buflen): + * Compute scrypt(passwd[0 .. passwdlen - 1], salt[0 .. saltlen - 1], N, r, + * p, buflen) and write the result into buf. The parameters r, p, and buflen + * must satisfy r * p < 2^30 and buflen <= (2^32 - 1) * 32. The parameter N + * must be a power of 2 greater than 1. + * + * Return 0 on success; or -1 on error. + * + * MT-safe as long as buf is local to the thread. + */ +extern int crypto_scrypt(const uint8_t *passwd, size_t passwdlen, + const uint8_t *salt, size_t saltlen, + uint64_t N, uint32_t r, uint32_t p, uint8_t *buf, size_t buflen); +#endif + +/** + * Internal type used by the memory allocator. Please do not use it directly. + * Use yescrypt_shared_t and yescrypt_local_t as appropriate instead, since + * they might differ from each other in a future version. + */ +typedef struct { + void *base, *aligned; + size_t base_size, aligned_size; +} yescrypt_region_t; + +/** + * Types for shared (ROM) and thread-local (RAM) data structures. + */ +typedef yescrypt_region_t yescrypt_shared_t; +typedef yescrypt_region_t yescrypt_local_t; + +/** + * Two 64-bit tags placed 48 bytes to the end of a ROM in host byte endianness + * (and followed by 32 bytes of the ROM digest). + */ +#define YESCRYPT_ROM_TAG1 0x7470797263736579 /* "yescrypt" */ +#define YESCRYPT_ROM_TAG2 0x687361684d4f522d /* "-ROMhash" */ + +/** + * Type and possible values for the flags argument of yescrypt_kdf(), + * yescrypt_encode_params_r(), yescrypt_encode_params(). Most of these may be + * OR'ed together, except that YESCRYPT_WORM stands on its own. + * Please refer to the description of yescrypt_kdf() below for the meaning of + * these flags. + */ +typedef uint32_t yescrypt_flags_t; +/* Public */ +#define YESCRYPT_WORM 1 +#define YESCRYPT_RW 0x002 +#define YESCRYPT_ROUNDS_3 0x000 +#define YESCRYPT_ROUNDS_6 0x004 +#define YESCRYPT_GATHER_1 0x000 +#define YESCRYPT_GATHER_2 0x008 +#define YESCRYPT_GATHER_4 0x010 +#define YESCRYPT_GATHER_8 0x018 +#define YESCRYPT_SIMPLE_1 0x000 +#define YESCRYPT_SIMPLE_2 0x020 +#define YESCRYPT_SIMPLE_4 0x040 +#define YESCRYPT_SIMPLE_8 0x060 +#define YESCRYPT_SBOX_6K 0x000 +#define YESCRYPT_SBOX_12K 0x080 +#define YESCRYPT_SBOX_24K 0x100 +#define YESCRYPT_SBOX_48K 0x180 +#define YESCRYPT_SBOX_96K 0x200 +#define YESCRYPT_SBOX_192K 0x280 +#define YESCRYPT_SBOX_384K 0x300 +#define YESCRYPT_SBOX_768K 0x380 +/* Only valid for yescrypt_init_shared() */ +#define YESCRYPT_SHARED_PREALLOCATED 0x10000 +#ifdef YESCRYPT_INTERNAL +/* Private */ +#define YESCRYPT_MODE_MASK 0x003 +#define YESCRYPT_RW_FLAVOR_MASK 0x3fc +#define YESCRYPT_INIT_SHARED 0x01000000 +#define YESCRYPT_ALLOC_ONLY 0x08000000 +#define YESCRYPT_PREHASH 0x10000000 +#endif + +#define YESCRYPT_RW_DEFAULTS \ + (YESCRYPT_RW | \ + YESCRYPT_ROUNDS_6 | YESCRYPT_GATHER_4 | YESCRYPT_SIMPLE_2 | \ + YESCRYPT_SBOX_12K) + +#define YESCRYPT_DEFAULTS YESCRYPT_RW_DEFAULTS + +#ifdef YESCRYPT_INTERNAL +#define YESCRYPT_KNOWN_FLAGS \ + (YESCRYPT_MODE_MASK | YESCRYPT_RW_FLAVOR_MASK | \ + YESCRYPT_SHARED_PREALLOCATED | \ + YESCRYPT_INIT_SHARED | YESCRYPT_ALLOC_ONLY | YESCRYPT_PREHASH) +#endif + +/** + * yescrypt parameters combined into one struct. N, r, p are the same as in + * classic scrypt, except that the meaning of p changes when YESCRYPT_RW is + * set. flags, t, g, NROM are special to yescrypt. + */ +typedef struct { + yescrypt_flags_t flags; + uint64_t N; + uint32_t r, p, t, g; + uint64_t NROM; +} yescrypt_params_t; + +/** + * A 256-bit yescrypt hash, or a hash encryption key (which may itself have + * been derived as a yescrypt hash of a human-specified key string). + */ +typedef union { + unsigned char uc[32]; + uint64_t u64[4]; +} yescrypt_binary_t; + +/* How many chars base-64 encoded bytes require? */ +#define BYTES2CHARS(bytes) ((((bytes) * 8) + 5) / 6) +/* The /etc/passwd-style hash is "$" */ +/* + * "$y$", up to 8 params of up to 6 chars each, '$', salt + * Alternatively, but that's smaller: + * "$7$", 3 params encoded as 1+5+5 chars, salt + */ +#define PREFIX_LEN (3 + 8 * 6 + 1 + BYTES2CHARS(32)) + +#define HASH_SIZE sizeof(yescrypt_binary_t) /* bytes */ +#define HASH_LEN BYTES2CHARS(HASH_SIZE) + + +/** + * yescrypt_init_shared(shared, seed, seedlen, params): + * Optionally allocate memory for and initialize the shared (ROM) data + * structure. The parameters flags, NROM, r, p, and t specify how the ROM is + * to be initialized, and seed and seedlen specify the initial seed affecting + * the data with which the ROM is filled. + * + * Return 0 on success; or -1 on error. + * + * If bit YESCRYPT_SHARED_PREALLOCATED in flags is set, then memory for the + * ROM is assumed to have been preallocated by the caller, with shared->aligned + * being the start address of the ROM and shared->aligned_size being its size + * (which must be sufficient for NROM, r, p). This may be used e.g. when the + * ROM is to be placed in a SysV shared memory segment allocated by the caller. + * + * MT-safe as long as shared is local to the thread. + */ +extern int yescrypt_init_shared(yescrypt_shared_t *shared, + const uint8_t *seed, size_t seedlen, const yescrypt_params_t *params); + +/** + * yescrypt_digest_shared(shared): + * Extract the previously stored message digest of the provided yescrypt ROM. + * + * Return pointer to the message digest on success; or NULL on error. + * + * MT-unsafe. + */ +extern yescrypt_binary_t *yescrypt_digest_shared(yescrypt_shared_t *shared); + +/** + * yescrypt_free_shared(shared): + * Free memory that had been allocated with yescrypt_init_shared(). + * + * Return 0 on success; or -1 on error. + * + * MT-safe as long as shared is local to the thread. + */ +extern int yescrypt_free_shared(yescrypt_shared_t *shared); + +/** + * yescrypt_init_local(local): + * Initialize the thread-local (RAM) data structure. Actual memory allocation + * is currently fully postponed until a call to yescrypt_kdf() or yescrypt_r(). + * + * Return 0 on success; or -1 on error. + * + * MT-safe as long as local is local to the thread. + */ +extern int yescrypt_init_local(yescrypt_local_t *local); + +/** + * yescrypt_free_local(local): + * Free memory that may have been allocated for an initialized thread-local + * (RAM) data structure. + * + * Return 0 on success; or -1 on error. + * + * MT-safe as long as local is local to the thread. + */ +extern int yescrypt_free_local(yescrypt_local_t *local); + +/** + * yescrypt_kdf(shared, local, passwd, passwdlen, salt, saltlen, params, + * buf, buflen): + * Compute scrypt(passwd[0 .. passwdlen - 1], salt[0 .. saltlen - 1], N, r, + * p, buflen), or a revision of scrypt as requested by flags and shared, and + * write the result into buf. The parameters N, r, p, and buflen must satisfy + * the same conditions as with crypto_scrypt(). t controls computation time + * while not affecting peak memory usage (t = 0 is optimal unless higher N*r + * is not affordable while higher t is). g controls hash upgrades (g = 0 for + * no upgrades so far). shared and flags may request special modes. local is + * the thread-local data structure, allowing to preserve and reuse a memory + * allocation across calls, thereby reducing processing overhead. + * + * Return 0 on success; or -1 on error. + * + * Classic scrypt is available by setting shared = NULL, flags = 0, and t = 0. + * + * Setting YESCRYPT_WORM enables only minimal deviations from classic scrypt: + * support for the t parameter, and pre- and post-hashing. + * + * Setting YESCRYPT_RW fully enables yescrypt. As a side effect of differences + * between the algorithms, it also prevents p > 1 from growing the threads' + * combined processing time and memory allocation (like it did with classic + * scrypt and YESCRYPT_WORM), treating p as a divider rather than a multiplier. + * + * Passing a shared structure, with ROM contents previously computed by + * yescrypt_init_shared(), enables the use of ROM and requires YESCRYPT_RW. + * + * In order to allow for initialization of the ROM to be split into a separate + * program (or separate invocation of the same program), the shared->aligned + * and shared->aligned_size fields may optionally be set by the caller directly + * (e.g., to a mapped SysV shm segment), without using yescrypt_init_shared(). + * + * local must be initialized with yescrypt_init_local(). + * + * MT-safe as long as local and buf are local to the thread. + */ +extern int yescrypt_kdf(const yescrypt_shared_t *shared, + yescrypt_local_t *local, + const uint8_t *passwd, size_t passwdlen, + const uint8_t *salt, size_t saltlen, + const yescrypt_params_t *params, + uint8_t *buf, size_t buflen); + +/** + * yescrypt_r(shared, local, passwd, passwdlen, setting, key, buf, buflen): + * Compute and encode an scrypt or enhanced scrypt hash of passwd given the + * parameters and salt value encoded in setting. If shared is not NULL, a ROM + * is used and YESCRYPT_RW is required. Otherwise, whether to compute classic + * scrypt, YESCRYPT_WORM (a slight deviation from classic scrypt), or + * YESCRYPT_RW (time-memory tradeoff discouraging modification) is determined + * by the setting string. shared (if not NULL) and local must be initialized + * as described above for yescrypt_kdf(). buf must be large enough (as + * indicated by buflen) to hold the encoded hash string. + * + * Return the encoded hash string on success; or NULL on error. + * + * MT-safe as long as local and buf are local to the thread. + */ +extern uint8_t *yescrypt_r(const yescrypt_shared_t *shared, + yescrypt_local_t *local, + const uint8_t *passwd, size_t passwdlen, + const uint8_t *setting, + //KEY: const yescrypt_binary_t *key, + uint8_t *buf, size_t buflen); + +#if 0 //UNUSED +/** + * yescrypt(passwd, setting): + * Compute and encode an scrypt or enhanced scrypt hash of passwd given the + * parameters and salt value encoded in setting. Whether to compute classic + * scrypt, YESCRYPT_WORM (a slight deviation from classic scrypt), or + * YESCRYPT_RW (time-memory tradeoff discouraging modification) is determined + * by the setting string. + * + * Return the encoded hash string on success; or NULL on error. + * + * This is a crypt(3)-like interface, which is simpler to use than + * yescrypt_r(), but it is not MT-safe, it does not allow for the use of a ROM, + * and it is slower than yescrypt_r() for repeated calls because it allocates + * and frees memory on each call. + * + * MT-unsafe. + */ +extern uint8_t *yescrypt(const uint8_t *passwd, const uint8_t *setting); + +/** + * yescrypt_reencrypt(hash, from_key, to_key): + * Re-encrypt a yescrypt hash from one key to another. Either key may be NULL + * to indicate unencrypted hash. The encoded hash string is modified in-place. + * + * Return the hash pointer on success; or NULL on error (in which case the hash + * string is left unmodified). + * + * MT-safe as long as hash is local to the thread. + */ +extern uint8_t *yescrypt_reencrypt(uint8_t *hash, + const yescrypt_binary_t *from_key, + const yescrypt_binary_t *to_key); + +/** + * yescrypt_encode_params_r(params, src, srclen, buf, buflen): + * Generate a setting string for use with yescrypt_r() and yescrypt() by + * encoding into it the parameters flags, N, r, p, t, g, and a salt given by + * src (of srclen bytes). buf must be large enough (as indicated by buflen) + * to hold the setting string. + * + * Return the setting string on success; or NULL on error. + * + * MT-safe as long as buf is local to the thread. + */ +extern uint8_t *yescrypt_encode_params_r(const yescrypt_params_t *params, + const uint8_t *src, size_t srclen, + uint8_t *buf, size_t buflen); + + +/** + * yescrypt_encode_params(params, src, srclen): + * Generate a setting string for use with yescrypt_r() and yescrypt(). This + * function is the same as yescrypt_encode_params_r() except that it uses a + * static buffer and thus is not MT-safe. + * + * Return the setting string on success; or NULL on error. + * + * MT-unsafe. + */ +extern uint8_t *yescrypt_encode_params(const yescrypt_params_t *params, + const uint8_t *src, size_t srclen); +#endif + +extern const uint8_t *decode64(uint8_t *dst, size_t *dstlen, + const uint8_t *src, size_t srclen); +extern uint8_t *encode64(uint8_t *dst, size_t dstlen, + const uint8_t *src, size_t srclen); diff --git a/libbb/yescrypt/y.c b/libbb/yescrypt/y.c new file mode 100644 index 000000000..042c439a0 --- /dev/null +++ b/libbb/yescrypt/y.c @@ -0,0 +1,139 @@ +//kbuild:lib-$(CONFIG_USE_BB_CRYPT_YES) += y.o + +#include + +#include + +static inline void +cpu_to_le32 (unsigned char *buf, uint32_t n) +{ + buf[0] = (unsigned char)((n & 0x000000FFu) >> 0); + buf[1] = (unsigned char)((n & 0x0000FF00u) >> 8); + buf[2] = (unsigned char)((n & 0x00FF0000u) >> 16); + buf[3] = (unsigned char)((n & 0xFF000000u) >> 24); +} +static inline uint32_t +le32_to_cpu (const unsigned char *buf) +{ + return ((((uint32_t)buf[0]) << 0) | + (((uint32_t)buf[1]) << 8) | + (((uint32_t)buf[2]) << 16) | + (((uint32_t)buf[3]) << 24) ); +} + +/* Alternative names used in code derived from Colin Percival's + cryptography libraries. */ +#define le32enc cpu_to_le32 +#define le32dec le32_to_cpu +#define le64enc cpu_to_le64 +#define le64dec le64_to_cpu + +#define be32enc cpu_to_be32 +#define be32dec be32_to_cpu +#define be64enc cpu_to_be64 +#define be64dec be64_to_cpu + +#define be32enc_vect cpu_to_be32_vect +#define be32dec_vect be32_to_cpu_vect +#define be64enc_vect cpu_to_be64_vect +#define be64dec_vect be64_to_cpu_vect + + +//USED ONY BY SHA256 for be32_to_cpu_vect(): +static inline void +cpu_to_be32(unsigned char *buf, uint32_t n) +{ + buf[0] = (unsigned char)((n & 0xFF000000u) >> 24); + buf[1] = (unsigned char)((n & 0x00FF0000u) >> 16); + buf[2] = (unsigned char)((n & 0x0000FF00u) >> 8); + buf[3] = (unsigned char)((n & 0x000000FFu) >> 0); +} +static inline void +cpu_to_be64 (unsigned char *buf, uint64_t n) +{ + buf[0] = (unsigned char)((n & 0xFF00000000000000ull) >> 56); + buf[1] = (unsigned char)((n & 0x00FF000000000000ull) >> 48); + buf[2] = (unsigned char)((n & 0x0000FF0000000000ull) >> 40); + buf[3] = (unsigned char)((n & 0x000000FF00000000ull) >> 32); + buf[4] = (unsigned char)((n & 0x00000000FF000000ull) >> 24); + buf[5] = (unsigned char)((n & 0x0000000000FF0000ull) >> 16); + buf[6] = (unsigned char)((n & 0x000000000000FF00ull) >> 8); + buf[7] = (unsigned char)((n & 0x00000000000000FFull) >> 0); +} +static inline uint32_t +be32_to_cpu (const unsigned char *buf) +{ + return ((((uint32_t)buf[0]) << 24) | + (((uint32_t)buf[1]) << 16) | + (((uint32_t)buf[2]) << 8) | + (((uint32_t)buf[3]) << 0) ); +} +static inline uint64_t +be64_to_cpu (const unsigned char *buf) +{ + return ((((uint64_t)buf[0]) << 56) | + (((uint64_t)buf[1]) << 48) | + (((uint64_t)buf[2]) << 40) | + (((uint64_t)buf[3]) << 32) | + (((uint64_t)buf[4]) << 24) | + (((uint64_t)buf[5]) << 16) | + (((uint64_t)buf[6]) << 8) | + (((uint64_t)buf[7]) << 0) ); +} +/* Template: Define a function named cpu_to__vect that + takes a vector SRC of LEN integers, each of type uint_t, and + writes them to the buffer DST in the endianness defined by END. + Caution: LEN is the number of vector elements, not the total size + of the buffers. */ +#define VECTOR_CPU_TO(end, bits) VECTOR_CPU_TO_(end##bits, uint##bits##_t) +#define VECTOR_CPU_TO_(prim, stype) \ + static inline void \ + cpu_to_##prim##_vect(uint8_t *dst, const stype *src, size_t len) \ + { \ + while (len) \ + { \ + cpu_to_##prim(dst, *src); \ + src += 1; \ + dst += sizeof(stype); \ + len -= 1; \ + } \ + } struct _swallow_semicolon +/* Template: Define a function named _to_cpu_vect that + reads a vector of LEN integers, each of type uint_t, from the + buffer SRC, in the endianness defined by END, and writes them to + the vector DST. Caution: LEN is the number of vector elements, not + the total size of the buffers. */ +#define VECTOR_TO_CPU(end, bits) VECTOR_TO_CPU_(end##bits, uint##bits##_t) +#define VECTOR_TO_CPU_(prim, dtype) \ + static inline void \ + prim##_to_cpu_vect(dtype *dst, const uint8_t *src, size_t len) \ + { \ + while (len) \ + { \ + *dst = prim##_to_cpu(src); \ + src += sizeof(dtype); \ + dst += 1; \ + len -= 1; \ + } \ + } struct _swallow_semicolon +/* These are the vectorized endianness-conversion functions that are + presently used. Add more as necessary. */ +VECTOR_CPU_TO(be,32); +VECTOR_CPU_TO(be,64); +VECTOR_TO_CPU(be,32); +VECTOR_TO_CPU(be,64); + + +const unsigned char ascii64[65] = + "./0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; +/* 0000000000111111111122222222223333333333444444444455555555556666 */ +/* 0123456789012345678901234567890123456789012345678901234567890123 */ +#define itoa64 ascii64 + +#define YESCRYPT_INTERNAL +#include "alg-sha256.h" +#include "alg-yescrypt.h" +#include "alg-sha256.c" +#include "alg-yescrypt-platform.c" +#include "alg-yescrypt-kdf.c" +#include "alg-yescrypt-common.c" -- cgit v1.2.3-55-g6feb From 444a91abc93fe1b3d98e9a18a63f0a2056290208 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Sun, 6 Jul 2025 00:55:08 +0200 Subject: libbb/yescrypt: remove obviously unused, commented-out parts No code changes Signed-off-by: Denys Vlasenko --- libbb/yescrypt/alg-yescrypt-common.c | 364 ----------------------------------- libbb/yescrypt/alg-yescrypt.h | 81 -------- 2 files changed, 445 deletions(-) (limited to 'libbb') diff --git a/libbb/yescrypt/alg-yescrypt-common.c b/libbb/yescrypt/alg-yescrypt-common.c index 75b59d1cf..7c519b01d 100644 --- a/libbb/yescrypt/alg-yescrypt-common.c +++ b/libbb/yescrypt/alg-yescrypt-common.c @@ -18,45 +18,6 @@ * SUCH DAMAGE. */ -#if 0 //UNUSED -static uint8_t *encode64_uint32(uint8_t *dst, size_t dstlen, - uint32_t src, uint32_t min) -{ - uint32_t start = 0, end = 47, chars = 1, bits = 0; - - if (src < min) - return NULL; - src -= min; - - do { - uint32_t count = (end + 1 - start) << bits; - if (src < count) - break; - if (start >= 63) - return NULL; - start = end + 1; - end = start + (62 - end) / 2; - src -= count; - chars++; - bits += 6; - } while (1); - - if (dstlen <= chars) /* require room for a NUL terminator */ - return NULL; - - *dst++ = itoa64[start + (src >> bits)]; - - while (--chars) { - bits -= 6; - *dst++ = itoa64[(src >> bits) & 0x3f]; - } - - *dst = 0; /* NUL terminate just in case */ - - return dst; -} -#endif //UNUSED - static inline uint32_t atoi64(uint8_t src) { static const uint8_t atoi64_partial[77] = { @@ -223,81 +184,11 @@ fail: return NULL; } -#if 0 //UNUSED //KEY: -typedef enum { ENC = 1, DEC = -1 } encrypt_dir_t; - -static void memxor(unsigned char *dst, unsigned char *src, size_t size) -{ - while (size--) - *dst++ ^= *src++; -} - -static void yescrypt_sha256_cipher(unsigned char *data, size_t datalen, - const yescrypt_binary_t *key, encrypt_dir_t dir) -{ - SHA256_CTX ctx; - unsigned char f[32 + 4]; - size_t halflen, which; - unsigned char mask, round, target; - - if (!datalen) - return; - if (datalen > 64) - datalen = 64; - - halflen = datalen >> 1; - - which = 0; /* offset to half we are working on (0 or halflen) */ - mask = 0x0f; /* current half's extra nibble mask if datalen is odd */ - - round = 0; - target = 5; /* 6 rounds due to Jacques Patarin's CRYPTO 2004 paper */ - - if (dir == DEC) { - which = halflen; /* even round count, so swap the halves */ - mask ^= 0xff; - - round = target; - target = 0; - } - - f[32] = 0; - f[33] = sizeof(*key); - f[34] = datalen; - - do { - SHA256_Init(&ctx); - f[35] = round; - SHA256_Update(&ctx, &f[32], 4); - SHA256_Update(&ctx, key, sizeof(*key)); - SHA256_Update(&ctx, &data[which], halflen); - if (datalen & 1) { - f[0] = data[datalen - 1] & mask; - SHA256_Update(&ctx, f, 1); - } - SHA256_Final(f, &ctx); - which ^= halflen; - memxor(&data[which], f, halflen); - if (datalen & 1) { - mask ^= 0xff; - data[datalen - 1] ^= f[halflen] & mask; - } - if (round == target) - break; - round += dir; - } while (1); - - /* ctx is presumably zeroized by SHA256_Final() */ - explicit_bzero(f, sizeof(f)); -} -#endif //UNUSED //KEY: - uint8_t *yescrypt_r( const yescrypt_shared_t *shared, yescrypt_local_t *local, const uint8_t *passwd, size_t passwdlen, const uint8_t *setting, - //KEY const yescrypt_binary_t *key, uint8_t *buf, size_t buflen) { unsigned char saltbin[64], hashbin[32]; @@ -325,9 +216,6 @@ uint8_t *yescrypt_r( src = decode64_uint32_fixed(¶ms.p, 30, src); if (!src) return NULL; - - //KEY:if (key) - //KEY: return NULL; } else { uint32_t flavor, N_log2; @@ -425,11 +313,6 @@ uint8_t *yescrypt_r( ¶ms, hashbin, sizeof(hashbin))) goto fail; - //KEY:if (key) { - //KEY: explicit_bzero(saltbin, sizeof(saltbin)); - //KEY: yescrypt_sha256_cipher(hashbin, sizeof(hashbin), key, ENC); - //KEY:} - dst = buf; memcpy(dst, setting, prefixlen + saltstrlen); dst += prefixlen + saltstrlen; @@ -450,14 +333,6 @@ fail: return NULL; } - - - - - - - - int yescrypt_free_shared(yescrypt_shared_t *shared) { return free_region(shared); @@ -473,242 +348,3 @@ int yescrypt_free_local(yescrypt_local_t *local) { return free_region(local); } - -#if 0 //UNUSED -uint8_t *yescrypt(const uint8_t *passwd, const uint8_t *setting) -{ - /* prefix, '$', hash, NUL */ - static uint8_t buf[PREFIX_LEN + 1 + HASH_LEN + 1]; - yescrypt_local_t local; - uint8_t *retval; - - if (yescrypt_init_local(&local)) - return NULL; - retval = yescrypt_r( - /* const yescrypt_shared_t *shared */ NULL, - &local, - passwd, strlen((char *)passwd), - setting, - //KEY:/* const yescrypt_binary_t *key */ NULL, - buf, sizeof(buf)); - if (yescrypt_free_local(&local)) - return NULL; - return retval; -} - -uint8_t *yescrypt_reencrypt(uint8_t *hash, - const yescrypt_binary_t *from_key, - const yescrypt_binary_t *to_key) -{ - uint8_t *retval = NULL, *saltstart, *hashstart; - const uint8_t *hashend; - unsigned char saltbin[64], hashbin[32]; - size_t saltstrlen, saltlen = 0, hashlen; - - if (strncmp((char *)hash, "$y$", 3)) - return NULL; - - saltstart = NULL; - hashstart = (uint8_t *)strrchr((char *)hash, '$'); - if (hashstart) { - if (hashstart > (uint8_t *)hash) { - saltstart = hashstart - 1; - while (*saltstart != '$' && saltstart > hash) - saltstart--; - if (*saltstart == '$') - saltstart++; - } - hashstart++; - } else { - hashstart = hash; - } - saltstrlen = saltstart ? (hashstart - 1 - saltstart) : 0; - if (saltstrlen > BYTES2CHARS(64) || - strlen((char *)hashstart) != HASH_LEN) - return NULL; - - if (saltstrlen) { - const uint8_t *saltend; - saltlen = sizeof(saltbin); - saltend = decode64(saltbin, &saltlen, saltstart, saltstrlen); - if (!saltend || *saltend != '$' || saltlen < 1 || saltlen > 64) - goto out; - - if (from_key) - yescrypt_sha256_cipher(saltbin, saltlen, from_key, ENC); - if (to_key) - yescrypt_sha256_cipher(saltbin, saltlen, to_key, DEC); - } - - hashlen = sizeof(hashbin); - hashend = decode64(hashbin, &hashlen, hashstart, HASH_LEN); - if (!hashend || *hashend || hashlen != sizeof(hashbin)) - goto out; - - if (from_key) - yescrypt_sha256_cipher(hashbin, hashlen, from_key, DEC); - if (to_key) - yescrypt_sha256_cipher(hashbin, hashlen, to_key, ENC); - - if (saltstrlen) { - if (!encode64(saltstart, saltstrlen + 1, saltbin, saltlen)) - goto out; /* can't happen */ - *(saltstart + saltstrlen) = '$'; - } - - if (!encode64(hashstart, HASH_LEN + 1, hashbin, hashlen)) - goto out; /* can't happen */ - - retval = hash; - -out: - explicit_bzero(saltbin, sizeof(saltbin)); - explicit_bzero(hashbin, sizeof(hashbin)); - - return retval; -} - -static uint32_t N2log2(uint64_t N) -{ - uint32_t N_log2; - - if (N < 2) - return 0; - - N_log2 = 2; - while (N >> N_log2 != 0) - N_log2++; - N_log2--; - - if (N >> N_log2 != 1) - return 0; - - return N_log2; -} - -uint8_t *yescrypt_encode_params_r(const yescrypt_params_t *params, - const uint8_t *src, size_t srclen, - uint8_t *buf, size_t buflen) -{ - uint32_t flavor, N_log2, NROM_log2, have; - uint8_t *dst; - - if (srclen > SIZE_MAX / 16) - return NULL; - - if (params->flags < YESCRYPT_RW) { - flavor = params->flags; - } else if ((params->flags & YESCRYPT_MODE_MASK) == YESCRYPT_RW && - params->flags <= (YESCRYPT_RW | YESCRYPT_RW_FLAVOR_MASK)) { - flavor = YESCRYPT_RW + (params->flags >> 2); - } else { - return NULL; - } - - N_log2 = N2log2(params->N); - if (!N_log2) - return NULL; - - NROM_log2 = N2log2(params->NROM); - if (params->NROM && !NROM_log2) - return NULL; - - if ((uint64_t)params->r * (uint64_t)params->p >= (1U << 30)) - return NULL; - - dst = buf; - *dst++ = '$'; - *dst++ = 'y'; - *dst++ = '$'; - - dst = encode64_uint32(dst, buflen - (dst - buf), flavor, 0); - if (!dst) - return NULL; - - dst = encode64_uint32(dst, buflen - (dst - buf), N_log2, 1); - if (!dst) - return NULL; - - dst = encode64_uint32(dst, buflen - (dst - buf), params->r, 1); - if (!dst) - return NULL; - - have = 0; - if (params->p != 1) - have |= 1; - if (params->t) - have |= 2; - if (params->g) - have |= 4; - if (NROM_log2) - have |= 8; - - if (have) { - dst = encode64_uint32(dst, buflen - (dst - buf), have, 1); - if (!dst) - return NULL; - } - - if (params->p != 1) { - dst = encode64_uint32(dst, buflen - (dst - buf), params->p, 2); - if (!dst) - return NULL; - } - - if (params->t) { - dst = encode64_uint32(dst, buflen - (dst - buf), params->t, 1); - if (!dst) - return NULL; - } - - if (params->g) { - dst = encode64_uint32(dst, buflen - (dst - buf), params->g, 1); - if (!dst) - return NULL; - } - - if (NROM_log2) { - dst = encode64_uint32(dst, buflen - (dst - buf), NROM_log2, 1); - if (!dst) - return NULL; - } - - if (dst >= buf + buflen) - return NULL; - - *dst++ = '$'; - - dst = encode64(dst, buflen - (dst - buf), src, srclen); - if (!dst || dst >= buf + buflen) - return NULL; - - *dst = 0; /* NUL termination */ - - return buf; -} - -uint8_t *yescrypt_encode_params(const yescrypt_params_t *params, - const uint8_t *src, size_t srclen) -{ - /* prefix, NUL */ - static uint8_t buf[PREFIX_LEN + 1]; - return yescrypt_encode_params_r(params, src, srclen, buf, sizeof(buf)); -} - -int crypto_scrypt(const uint8_t *passwd, size_t passwdlen, - const uint8_t *salt, size_t saltlen, uint64_t N, uint32_t r, uint32_t p, - uint8_t *buf, size_t buflen) -{ - yescrypt_local_t local; - yescrypt_params_t params = { .flags = 0, .N = N, .r = r, .p = p }; - int retval; - - if (yescrypt_init_local(&local)) - return -1; - retval = yescrypt_kdf(NULL, &local, - passwd, passwdlen, salt, saltlen, ¶ms, buf, buflen); - if (yescrypt_free_local(&local)) - return -1; - return retval; -} -#endif //UNUSED diff --git a/libbb/yescrypt/alg-yescrypt.h b/libbb/yescrypt/alg-yescrypt.h index e3c7e6398..9755ac420 100644 --- a/libbb/yescrypt/alg-yescrypt.h +++ b/libbb/yescrypt/alg-yescrypt.h @@ -28,23 +28,6 @@ * online backup system. */ -#if 0 //UNUSED -/** - * crypto_scrypt(passwd, passwdlen, salt, saltlen, N, r, p, buf, buflen): - * Compute scrypt(passwd[0 .. passwdlen - 1], salt[0 .. saltlen - 1], N, r, - * p, buflen) and write the result into buf. The parameters r, p, and buflen - * must satisfy r * p < 2^30 and buflen <= (2^32 - 1) * 32. The parameter N - * must be a power of 2 greater than 1. - * - * Return 0 on success; or -1 on error. - * - * MT-safe as long as buf is local to the thread. - */ -extern int crypto_scrypt(const uint8_t *passwd, size_t passwdlen, - const uint8_t *salt, size_t saltlen, - uint64_t N, uint32_t r, uint32_t p, uint8_t *buf, size_t buflen); -#endif - /** * Internal type used by the memory allocator. Please do not use it directly. * Use yescrypt_shared_t and yescrypt_local_t as appropriate instead, since @@ -285,70 +268,6 @@ extern uint8_t *yescrypt_r(const yescrypt_shared_t *shared, //KEY: const yescrypt_binary_t *key, uint8_t *buf, size_t buflen); -#if 0 //UNUSED -/** - * yescrypt(passwd, setting): - * Compute and encode an scrypt or enhanced scrypt hash of passwd given the - * parameters and salt value encoded in setting. Whether to compute classic - * scrypt, YESCRYPT_WORM (a slight deviation from classic scrypt), or - * YESCRYPT_RW (time-memory tradeoff discouraging modification) is determined - * by the setting string. - * - * Return the encoded hash string on success; or NULL on error. - * - * This is a crypt(3)-like interface, which is simpler to use than - * yescrypt_r(), but it is not MT-safe, it does not allow for the use of a ROM, - * and it is slower than yescrypt_r() for repeated calls because it allocates - * and frees memory on each call. - * - * MT-unsafe. - */ -extern uint8_t *yescrypt(const uint8_t *passwd, const uint8_t *setting); - -/** - * yescrypt_reencrypt(hash, from_key, to_key): - * Re-encrypt a yescrypt hash from one key to another. Either key may be NULL - * to indicate unencrypted hash. The encoded hash string is modified in-place. - * - * Return the hash pointer on success; or NULL on error (in which case the hash - * string is left unmodified). - * - * MT-safe as long as hash is local to the thread. - */ -extern uint8_t *yescrypt_reencrypt(uint8_t *hash, - const yescrypt_binary_t *from_key, - const yescrypt_binary_t *to_key); - -/** - * yescrypt_encode_params_r(params, src, srclen, buf, buflen): - * Generate a setting string for use with yescrypt_r() and yescrypt() by - * encoding into it the parameters flags, N, r, p, t, g, and a salt given by - * src (of srclen bytes). buf must be large enough (as indicated by buflen) - * to hold the setting string. - * - * Return the setting string on success; or NULL on error. - * - * MT-safe as long as buf is local to the thread. - */ -extern uint8_t *yescrypt_encode_params_r(const yescrypt_params_t *params, - const uint8_t *src, size_t srclen, - uint8_t *buf, size_t buflen); - - -/** - * yescrypt_encode_params(params, src, srclen): - * Generate a setting string for use with yescrypt_r() and yescrypt(). This - * function is the same as yescrypt_encode_params_r() except that it uses a - * static buffer and thus is not MT-safe. - * - * Return the setting string on success; or NULL on error. - * - * MT-unsafe. - */ -extern uint8_t *yescrypt_encode_params(const yescrypt_params_t *params, - const uint8_t *src, size_t srclen); -#endif - extern const uint8_t *decode64(uint8_t *dst, size_t *dstlen, const uint8_t *src, size_t srclen); extern uint8_t *encode64(uint8_t *dst, size_t dstlen, -- cgit v1.2.3-55-g6feb From be327bed9da573275e874af106fb52effb846dc1 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Sun, 6 Jul 2025 01:03:49 +0200 Subject: libbb/yescrypt: remove unused yescrypt_shared_t function old new delta static.smix - 755 +755 static.smix1 - 631 +631 static.smix2 - 452 +452 yescrypt_free_local 9 49 +40 yes_crypt 90 87 -3 yescrypt_r 890 879 -11 yescrypt_kdf 479 449 -30 free_region 47 - -47 yescrypt_kdf_body 1724 1467 -257 smix2 659 - -659 smix 790 - -790 smix1 960 - -960 ------------------------------------------------------------------------------ (add/remove: 3/4 grow/shrink: 1/4 up/down: 1878/-2757) Total: -879 bytes Signed-off-by: Denys Vlasenko --- libbb/yescrypt/alg-yescrypt-common.c | 8 +----- libbb/yescrypt/alg-yescrypt-kdf.c | 29 +++++---------------- libbb/yescrypt/alg-yescrypt.h | 50 ++---------------------------------- 3 files changed, 10 insertions(+), 77 deletions(-) (limited to 'libbb') diff --git a/libbb/yescrypt/alg-yescrypt-common.c b/libbb/yescrypt/alg-yescrypt-common.c index 7c519b01d..9d7231f09 100644 --- a/libbb/yescrypt/alg-yescrypt-common.c +++ b/libbb/yescrypt/alg-yescrypt-common.c @@ -185,7 +185,6 @@ fail: } uint8_t *yescrypt_r( - const yescrypt_shared_t *shared, yescrypt_local_t *local, const uint8_t *passwd, size_t passwdlen, const uint8_t *setting, @@ -309,7 +308,7 @@ uint8_t *yescrypt_r( if (need > buflen || need < saltstrlen) goto fail; - if (yescrypt_kdf(shared, local, passwd, passwdlen, salt, saltlen, + if (yescrypt_kdf(local, passwd, passwdlen, salt, saltlen, ¶ms, hashbin, sizeof(hashbin))) goto fail; @@ -333,11 +332,6 @@ fail: return NULL; } -int yescrypt_free_shared(yescrypt_shared_t *shared) -{ - return free_region(shared); -} - int yescrypt_init_local(yescrypt_local_t *local) { init_region(local); diff --git a/libbb/yescrypt/alg-yescrypt-kdf.c b/libbb/yescrypt/alg-yescrypt-kdf.c index 3e98ffb0a..da23c1b59 100644 --- a/libbb/yescrypt/alg-yescrypt-kdf.c +++ b/libbb/yescrypt/alg-yescrypt-kdf.c @@ -800,7 +800,7 @@ static void smix(uint8_t *B, size_t r, uint32_t N, uint32_t p, uint32_t t, * This optimized implementation currently limits N to the range from 4 to * 2^31, but other implementations might not. */ -static int yescrypt_kdf_body(const yescrypt_shared_t *shared, +static int yescrypt_kdf_body( yescrypt_local_t *local, const uint8_t *passwd, size_t passwdlen, const uint8_t *salt, size_t saltlen, @@ -865,23 +865,8 @@ static int yescrypt_kdf_body(const yescrypt_shared_t *shared, } VROM = NULL; - if (shared) { - uint64_t expected_size = (size_t)128 * r * NROM; - if ((NROM & (NROM - 1)) != 0 || - NROM <= 1 || NROM > UINT32_MAX || - shared->aligned_size < expected_size) - goto out_EINVAL; - if (!(flags & YESCRYPT_INIT_SHARED)) { - uint64_t *tag = (uint64_t *) - ((uint8_t *)shared->aligned + expected_size - 48); - if (tag[0] != YESCRYPT_ROM_TAG1 || tag[1] != YESCRYPT_ROM_TAG2) - goto out_EINVAL; - } - VROM = shared->aligned; - } else { - if (NROM) - goto out_EINVAL; - } + if (NROM) + goto out_EINVAL; /* Allocate memory */ V = NULL; @@ -1013,7 +998,7 @@ out_EINVAL: * to this function are the same as those for yescrypt_kdf_body() above, with * the addition of g, which controls hash upgrades (0 for no upgrades so far). */ -int yescrypt_kdf(const yescrypt_shared_t *shared, +int yescrypt_kdf( yescrypt_local_t *local, const uint8_t *passwd, size_t passwdlen, const uint8_t *salt, size_t saltlen, @@ -1041,7 +1026,7 @@ int yescrypt_kdf(const yescrypt_shared_t *shared, && N / p >= 0x100 && N / p * r >= 0x20000 ) { - if (yescrypt_kdf_body(shared, local, + if (yescrypt_kdf_body(local, passwd, passwdlen, salt, saltlen, flags | YESCRYPT_ALLOC_ONLY, N, r, p, t, NROM, buf, buflen) != -3 @@ -1049,7 +1034,7 @@ int yescrypt_kdf(const yescrypt_shared_t *shared, errno = EINVAL; return -1; } - retval = yescrypt_kdf_body(shared, local, + retval = yescrypt_kdf_body(local, passwd, passwdlen, salt, saltlen, flags | YESCRYPT_PREHASH, N >> 6, r, p, 0, NROM, dk, sizeof(dk)); @@ -1059,7 +1044,7 @@ int yescrypt_kdf(const yescrypt_shared_t *shared, passwdlen = sizeof(dk); } - retval = yescrypt_kdf_body(shared, local, + retval = yescrypt_kdf_body(local, passwd, passwdlen, salt, saltlen, flags, N, r, p, t, NROM, buf, buflen); #ifndef SKIP_MEMZERO diff --git a/libbb/yescrypt/alg-yescrypt.h b/libbb/yescrypt/alg-yescrypt.h index 9755ac420..09638e3e1 100644 --- a/libbb/yescrypt/alg-yescrypt.h +++ b/libbb/yescrypt/alg-yescrypt.h @@ -41,7 +41,6 @@ typedef struct { /** * Types for shared (ROM) and thread-local (RAM) data structures. */ -typedef yescrypt_region_t yescrypt_shared_t; typedef yescrypt_region_t yescrypt_local_t; /** @@ -80,8 +79,6 @@ typedef uint32_t yescrypt_flags_t; #define YESCRYPT_SBOX_192K 0x280 #define YESCRYPT_SBOX_384K 0x300 #define YESCRYPT_SBOX_768K 0x380 -/* Only valid for yescrypt_init_shared() */ -#define YESCRYPT_SHARED_PREALLOCATED 0x10000 #ifdef YESCRYPT_INTERNAL /* Private */ #define YESCRYPT_MODE_MASK 0x003 @@ -101,7 +98,6 @@ typedef uint32_t yescrypt_flags_t; #ifdef YESCRYPT_INTERNAL #define YESCRYPT_KNOWN_FLAGS \ (YESCRYPT_MODE_MASK | YESCRYPT_RW_FLAVOR_MASK | \ - YESCRYPT_SHARED_PREALLOCATED | \ YESCRYPT_INIT_SHARED | YESCRYPT_ALLOC_ONLY | YESCRYPT_PREHASH) #endif @@ -139,47 +135,6 @@ typedef union { #define HASH_SIZE sizeof(yescrypt_binary_t) /* bytes */ #define HASH_LEN BYTES2CHARS(HASH_SIZE) - -/** - * yescrypt_init_shared(shared, seed, seedlen, params): - * Optionally allocate memory for and initialize the shared (ROM) data - * structure. The parameters flags, NROM, r, p, and t specify how the ROM is - * to be initialized, and seed and seedlen specify the initial seed affecting - * the data with which the ROM is filled. - * - * Return 0 on success; or -1 on error. - * - * If bit YESCRYPT_SHARED_PREALLOCATED in flags is set, then memory for the - * ROM is assumed to have been preallocated by the caller, with shared->aligned - * being the start address of the ROM and shared->aligned_size being its size - * (which must be sufficient for NROM, r, p). This may be used e.g. when the - * ROM is to be placed in a SysV shared memory segment allocated by the caller. - * - * MT-safe as long as shared is local to the thread. - */ -extern int yescrypt_init_shared(yescrypt_shared_t *shared, - const uint8_t *seed, size_t seedlen, const yescrypt_params_t *params); - -/** - * yescrypt_digest_shared(shared): - * Extract the previously stored message digest of the provided yescrypt ROM. - * - * Return pointer to the message digest on success; or NULL on error. - * - * MT-unsafe. - */ -extern yescrypt_binary_t *yescrypt_digest_shared(yescrypt_shared_t *shared); - -/** - * yescrypt_free_shared(shared): - * Free memory that had been allocated with yescrypt_init_shared(). - * - * Return 0 on success; or -1 on error. - * - * MT-safe as long as shared is local to the thread. - */ -extern int yescrypt_free_shared(yescrypt_shared_t *shared); - /** * yescrypt_init_local(local): * Initialize the thread-local (RAM) data structure. Actual memory allocation @@ -239,7 +194,7 @@ extern int yescrypt_free_local(yescrypt_local_t *local); * * MT-safe as long as local and buf are local to the thread. */ -extern int yescrypt_kdf(const yescrypt_shared_t *shared, +extern int yescrypt_kdf( yescrypt_local_t *local, const uint8_t *passwd, size_t passwdlen, const uint8_t *salt, size_t saltlen, @@ -261,11 +216,10 @@ extern int yescrypt_kdf(const yescrypt_shared_t *shared, * * MT-safe as long as local and buf are local to the thread. */ -extern uint8_t *yescrypt_r(const yescrypt_shared_t *shared, +extern uint8_t *yescrypt_r( yescrypt_local_t *local, const uint8_t *passwd, size_t passwdlen, const uint8_t *setting, - //KEY: const yescrypt_binary_t *key, uint8_t *buf, size_t buflen); extern const uint8_t *decode64(uint8_t *dst, size_t *dstlen, -- cgit v1.2.3-55-g6feb From 10196929da47461d1d684cc65662cbaf591412fe Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Sun, 6 Jul 2025 01:10:53 +0200 Subject: libbb/yescrypt: remove unreachable support for scrypt hash ("$7$...") function old new delta decode64 136 174 +38 decode64_uint32 166 201 +35 atoi64 25 - -25 yescrypt_r 879 692 -187 ------------------------------------------------------------------------------ (add/remove: 0/1 grow/shrink: 2/1 up/down: 73/-212) Total: -139 bytes Signed-off-by: Denys Vlasenko --- libbb/yescrypt/alg-yescrypt-common.c | 144 ++++++++++++----------------------- 1 file changed, 49 insertions(+), 95 deletions(-) (limited to 'libbb') diff --git a/libbb/yescrypt/alg-yescrypt-common.c b/libbb/yescrypt/alg-yescrypt-common.c index 9d7231f09..bf2934bc9 100644 --- a/libbb/yescrypt/alg-yescrypt-common.c +++ b/libbb/yescrypt/alg-yescrypt-common.c @@ -118,24 +118,6 @@ uint8_t *encode64(uint8_t *dst, size_t dstlen, return dst; } -static const uint8_t *decode64_uint32_fixed(uint32_t *dst, uint32_t dstbits, - const uint8_t *src) -{ - uint32_t bits; - - *dst = 0; - for (bits = 0; bits < dstbits; bits += 6) { - uint32_t c = atoi64(*src++); - if (c > 63) { - *dst = 0; - return NULL; - } - *dst |= c << bits; - } - - return src; -} - const uint8_t *decode64(uint8_t *dst, size_t *dstlen, const uint8_t *src, size_t srclen) { @@ -192,91 +174,73 @@ uint8_t *yescrypt_r( { unsigned char saltbin[64], hashbin[32]; const uint8_t *src, *saltstr, *salt; + const uint8_t *saltend; uint8_t *dst; size_t need, prefixlen, saltstrlen, saltlen; + uint32_t flavor, N_log2; yescrypt_params_t params = { .p = 1 }; - if (setting[0] != '$' || - (setting[1] != '7' && setting[1] != 'y') || - setting[2] != '$') - return NULL; + /* we assume setting starts with "$y$" (caller must ensure this) */ src = setting + 3; - if (setting[1] == '7') { - uint32_t N_log2 = atoi64(*src++); - if (N_log2 < 1 || N_log2 > 63) - return NULL; - params.N = (uint64_t)1 << N_log2; - - src = decode64_uint32_fixed(¶ms.r, 30, src); - if (!src) - return NULL; + src = decode64_uint32(&flavor, src, 0); + if (!src) + return NULL; - src = decode64_uint32_fixed(¶ms.p, 30, src); - if (!src) - return NULL; + if (flavor < YESCRYPT_RW) { + params.flags = flavor; + } else if (flavor <= YESCRYPT_RW + (YESCRYPT_RW_FLAVOR_MASK >> 2)) { + params.flags = YESCRYPT_RW + ((flavor - YESCRYPT_RW) << 2); } else { - uint32_t flavor, N_log2; + return NULL; + } - src = decode64_uint32(&flavor, src, 0); - if (!src) - return NULL; + src = decode64_uint32(&N_log2, src, 1); + if (!src || N_log2 > 63) + return NULL; + params.N = (uint64_t)1 << N_log2; - if (flavor < YESCRYPT_RW) { - params.flags = flavor; - } else if (flavor <= YESCRYPT_RW + (YESCRYPT_RW_FLAVOR_MASK >> 2)) { - params.flags = YESCRYPT_RW + ((flavor - YESCRYPT_RW) << 2); - } else { - return NULL; - } + src = decode64_uint32(¶ms.r, src, 1); + if (!src) + return NULL; - src = decode64_uint32(&N_log2, src, 1); - if (!src || N_log2 > 63) - return NULL; - params.N = (uint64_t)1 << N_log2; + if (*src != '$') { + uint32_t have; - src = decode64_uint32(¶ms.r, src, 1); + src = decode64_uint32(&have, src, 1); if (!src) return NULL; - if (*src != '$') { - uint32_t have; - - src = decode64_uint32(&have, src, 1); + if (have & 1) { + src = decode64_uint32(¶ms.p, src, 2); if (!src) return NULL; + } - if (have & 1) { - src = decode64_uint32(¶ms.p, src, 2); - if (!src) - return NULL; - } - - if (have & 2) { - src = decode64_uint32(¶ms.t, src, 1); - if (!src) - return NULL; - } - - if (have & 4) { - src = decode64_uint32(¶ms.g, src, 1); - if (!src) - return NULL; - } + if (have & 2) { + src = decode64_uint32(¶ms.t, src, 1); + if (!src) + return NULL; + } - if (have & 8) { - uint32_t NROM_log2; - src = decode64_uint32(&NROM_log2, src, 1); - if (!src || NROM_log2 > 63) - return NULL; - params.NROM = (uint64_t)1 << NROM_log2; - } + if (have & 4) { + src = decode64_uint32(¶ms.g, src, 1); + if (!src) + return NULL; } - if (*src++ != '$') - return NULL; + if (have & 8) { + uint32_t NROM_log2; + src = decode64_uint32(&NROM_log2, src, 1); + if (!src || NROM_log2 > 63) + return NULL; + params.NROM = (uint64_t)1 << NROM_log2; + } } + if (*src++ != '$') + return NULL; + prefixlen = src - setting; saltstr = src; @@ -286,23 +250,13 @@ uint8_t *yescrypt_r( else saltstrlen = strlen((char *)saltstr); - if (setting[1] == '7') { - salt = saltstr; - saltlen = saltstrlen; - } else { - const uint8_t *saltend; - - saltlen = sizeof(saltbin); - saltend = decode64(saltbin, &saltlen, saltstr, saltstrlen); - - if (!saltend || (size_t)(saltend - saltstr) != saltstrlen) - goto fail; + saltlen = sizeof(saltbin); + saltend = decode64(saltbin, &saltlen, saltstr, saltstrlen); - salt = saltbin; + if (!saltend || (size_t)(saltend - saltstr) != saltstrlen) + goto fail; - //KEY:if (key) - //KEY: yescrypt_sha256_cipher(saltbin, saltlen, key, ENC); - } + salt = saltbin; need = prefixlen + saltstrlen + 1 + HASH_LEN + 1; if (need > buflen || need < saltstrlen) -- cgit v1.2.3-55-g6feb From bf8231d11ddd49abcded8b18b63ddeaea49c7fc8 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Sun, 6 Jul 2025 01:29:14 +0200 Subject: libbb/yescrypt: remove YESCRYPT_INIT_SHARED flag and code using it It's only used by libxcrypt-4.4.38/lib/alg-yescrypt-opt.c code (optimized code version?) which wasn't ported to busybox function old new delta yescrypt_kdf 449 442 -7 static.smix 755 739 -16 alloc_region 72 - -72 yescrypt_kdf_body 1467 1239 -228 ------------------------------------------------------------------------------ (add/remove: 0/1 grow/shrink: 0/3 up/down: 0/-323) Total: -323 bytes Signed-off-by: Denys Vlasenko --- libbb/yescrypt/alg-yescrypt-kdf.c | 46 ++++++++++----------------------------- libbb/yescrypt/alg-yescrypt.h | 3 +-- 2 files changed, 13 insertions(+), 36 deletions(-) (limited to 'libbb') diff --git a/libbb/yescrypt/alg-yescrypt-kdf.c b/libbb/yescrypt/alg-yescrypt-kdf.c index da23c1b59..6c1108514 100644 --- a/libbb/yescrypt/alg-yescrypt-kdf.c +++ b/libbb/yescrypt/alg-yescrypt-kdf.c @@ -731,9 +731,7 @@ static void smix(uint8_t *B, size_t r, uint32_t N, uint32_t p, uint32_t t, } Nloop_rw = 0; - if (flags & YESCRYPT_INIT_SHARED) - Nloop_rw = Nloop_all; - else if (flags & YESCRYPT_RW) + if (flags & YESCRYPT_RW) Nloop_rw = Nloop_all / p; Nchunk &= ~(uint32_t)1; /* round down to even */ @@ -872,19 +870,6 @@ static int yescrypt_kdf_body( V = NULL; V_size = (size_t)128 * r * N; need = V_size; - if (flags & YESCRYPT_INIT_SHARED) { - if (local->aligned_size < need) { - if (local->base || local->aligned || - local->base_size || local->aligned_size) - goto out_EINVAL; - if (!alloc_region(local, need)) - return -1; - } - if (flags & YESCRYPT_ALLOC_ONLY) - return -2; /* expected "failure" */ - V = (salsa20_blk_t *)local->aligned; - need = 0; - } B_size = (size_t)128 * r * p; need += B_size; if (need < B_size) @@ -899,25 +884,18 @@ static int yescrypt_kdf_body( if (need < S_size) goto out_EINVAL; } - if (flags & YESCRYPT_INIT_SHARED) { - if (!alloc_region(&tmp, need)) + init_region(&tmp); + if (local->aligned_size < need) { + if (free_region(local)) + return -1; + if (!alloc_region(local, need)) return -1; - B = (uint8_t *)tmp.aligned; - XY = (salsa20_blk_t *)((uint8_t *)B + B_size); - } else { - init_region(&tmp); - if (local->aligned_size < need) { - if (free_region(local)) - return -1; - if (!alloc_region(local, need)) - return -1; - } - if (flags & YESCRYPT_ALLOC_ONLY) - return -3; /* expected "failure" */ - B = (uint8_t *)local->aligned; - V = (salsa20_blk_t *)((uint8_t *)B + B_size); - XY = (salsa20_blk_t *)((uint8_t *)V + V_size); } + if (flags & YESCRYPT_ALLOC_ONLY) + return -3; /* expected "failure" */ + B = (uint8_t *)local->aligned; + V = (salsa20_blk_t *)((uint8_t *)B + B_size); + XY = (salsa20_blk_t *)((uint8_t *)V + V_size); S = NULL; if (flags & YESCRYPT_RW) S = (uint8_t *)XY + XY_size; @@ -1021,7 +999,7 @@ int yescrypt_kdf( return -1; } - if ((flags & (YESCRYPT_RW | YESCRYPT_INIT_SHARED)) == YESCRYPT_RW + if ((flags & YESCRYPT_RW) && p >= 1 && N / p >= 0x100 && N / p * r >= 0x20000 diff --git a/libbb/yescrypt/alg-yescrypt.h b/libbb/yescrypt/alg-yescrypt.h index 09638e3e1..fb791e899 100644 --- a/libbb/yescrypt/alg-yescrypt.h +++ b/libbb/yescrypt/alg-yescrypt.h @@ -83,7 +83,6 @@ typedef uint32_t yescrypt_flags_t; /* Private */ #define YESCRYPT_MODE_MASK 0x003 #define YESCRYPT_RW_FLAVOR_MASK 0x3fc -#define YESCRYPT_INIT_SHARED 0x01000000 #define YESCRYPT_ALLOC_ONLY 0x08000000 #define YESCRYPT_PREHASH 0x10000000 #endif @@ -98,7 +97,7 @@ typedef uint32_t yescrypt_flags_t; #ifdef YESCRYPT_INTERNAL #define YESCRYPT_KNOWN_FLAGS \ (YESCRYPT_MODE_MASK | YESCRYPT_RW_FLAVOR_MASK | \ - YESCRYPT_INIT_SHARED | YESCRYPT_ALLOC_ONLY | YESCRYPT_PREHASH) + YESCRYPT_ALLOC_ONLY | YESCRYPT_PREHASH) #endif /** -- cgit v1.2.3-55-g6feb From 3d9b965b4af3f497560cc17a371f0f45d0feeb87 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Sun, 6 Jul 2025 01:35:23 +0200 Subject: libbb/yescrypt: remove unused yescrypt_binary_t Signed-off-by: Denys Vlasenko --- libbb/yescrypt/alg-yescrypt.h | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) (limited to 'libbb') diff --git a/libbb/yescrypt/alg-yescrypt.h b/libbb/yescrypt/alg-yescrypt.h index fb791e899..bb30da8a8 100644 --- a/libbb/yescrypt/alg-yescrypt.h +++ b/libbb/yescrypt/alg-yescrypt.h @@ -112,15 +112,6 @@ typedef struct { uint64_t NROM; } yescrypt_params_t; -/** - * A 256-bit yescrypt hash, or a hash encryption key (which may itself have - * been derived as a yescrypt hash of a human-specified key string). - */ -typedef union { - unsigned char uc[32]; - uint64_t u64[4]; -} yescrypt_binary_t; - /* How many chars base-64 encoded bytes require? */ #define BYTES2CHARS(bytes) ((((bytes) * 8) + 5) / 6) /* The /etc/passwd-style hash is "$" */ @@ -131,7 +122,7 @@ typedef union { */ #define PREFIX_LEN (3 + 8 * 6 + 1 + BYTES2CHARS(32)) -#define HASH_SIZE sizeof(yescrypt_binary_t) /* bytes */ +#define HASH_SIZE 32 #define HASH_LEN BYTES2CHARS(HASH_SIZE) /** -- cgit v1.2.3-55-g6feb From e189464fa7dac7c6b6ae5ee80d55ab05dc67b6d7 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Sun, 6 Jul 2025 02:39:07 +0200 Subject: libbb/yescrypt: make some functions static function old new delta static.yescrypt_kdf_body - 1166 +1166 yescrypt_r 692 1342 +650 atoi64 - 25 +25 decode64_uint32 201 166 -35 encode64 153 - -153 decode64 174 - -174 yescrypt_kdf 442 - -442 yescrypt_kdf_body 1239 - -1239 ------------------------------------------------------------------------------ (add/remove: 2/4 grow/shrink: 1/1 up/down: 1841/-2043) Total: -202 bytes Signed-off-by: Denys Vlasenko --- libbb/yescrypt/alg-yescrypt-common.c | 27 ++++++++++++++++++--------- libbb/yescrypt/alg-yescrypt-kdf.c | 1 + libbb/yescrypt/alg-yescrypt.h | 9 +++------ 3 files changed, 22 insertions(+), 15 deletions(-) (limited to 'libbb') diff --git a/libbb/yescrypt/alg-yescrypt-common.c b/libbb/yescrypt/alg-yescrypt-common.c index bf2934bc9..c04e074bd 100644 --- a/libbb/yescrypt/alg-yescrypt-common.c +++ b/libbb/yescrypt/alg-yescrypt-common.c @@ -18,7 +18,12 @@ * SUCH DAMAGE. */ -static inline uint32_t atoi64(uint8_t src) +/* Not inlining: + * decode64 fuinctions are only used to read + * yescrypt_params_t field, and convert salt ti binary - + * both of these are negligible compared to main hashing operation + */ +static NOINLINE uint32_t atoi64(uint8_t src) { static const uint8_t atoi64_partial[77] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, @@ -34,8 +39,9 @@ static inline uint32_t atoi64(uint8_t src) return 64; } -static const uint8_t *decode64_uint32(uint32_t *dst, - const uint8_t *src, uint32_t min) +static NOINLINE const uint8_t *decode64_uint32( + uint32_t *dst, + const uint8_t *src, uint32_t min) { uint32_t start = 0, end = 47, chars = 1, bits = 0; uint32_t c; @@ -70,8 +76,9 @@ fail: return NULL; } -static uint8_t *encode64_uint32_fixed(uint8_t *dst, size_t dstlen, - uint32_t src, uint32_t srcbits) +static uint8_t *encode64_uint32_fixed( + uint8_t *dst, size_t dstlen, + uint32_t src, uint32_t srcbits) { uint32_t bits; @@ -91,8 +98,9 @@ static uint8_t *encode64_uint32_fixed(uint8_t *dst, size_t dstlen, return dst; } -uint8_t *encode64(uint8_t *dst, size_t dstlen, - const uint8_t *src, size_t srclen) +static uint8_t *encode64( + uint8_t *dst, size_t dstlen, + const uint8_t *src, size_t srclen) { size_t i; @@ -118,8 +126,9 @@ uint8_t *encode64(uint8_t *dst, size_t dstlen, return dst; } -const uint8_t *decode64(uint8_t *dst, size_t *dstlen, - const uint8_t *src, size_t srclen) +static const uint8_t *decode64( + uint8_t *dst, size_t *dstlen, + const uint8_t *src, size_t srclen) { size_t dstpos = 0; diff --git a/libbb/yescrypt/alg-yescrypt-kdf.c b/libbb/yescrypt/alg-yescrypt-kdf.c index 6c1108514..93938e69c 100644 --- a/libbb/yescrypt/alg-yescrypt-kdf.c +++ b/libbb/yescrypt/alg-yescrypt-kdf.c @@ -976,6 +976,7 @@ out_EINVAL: * to this function are the same as those for yescrypt_kdf_body() above, with * the addition of g, which controls hash upgrades (0 for no upgrades so far). */ +static int yescrypt_kdf( yescrypt_local_t *local, const uint8_t *passwd, size_t passwdlen, diff --git a/libbb/yescrypt/alg-yescrypt.h b/libbb/yescrypt/alg-yescrypt.h index bb30da8a8..7f2b7f471 100644 --- a/libbb/yescrypt/alg-yescrypt.h +++ b/libbb/yescrypt/alg-yescrypt.h @@ -184,12 +184,14 @@ extern int yescrypt_free_local(yescrypt_local_t *local); * * MT-safe as long as local and buf are local to the thread. */ -extern int yescrypt_kdf( +#ifdef YESCRYPT_INTERNAL +static int yescrypt_kdf( yescrypt_local_t *local, const uint8_t *passwd, size_t passwdlen, const uint8_t *salt, size_t saltlen, const yescrypt_params_t *params, uint8_t *buf, size_t buflen); +#endif /** * yescrypt_r(shared, local, passwd, passwdlen, setting, key, buf, buflen): @@ -211,8 +213,3 @@ extern uint8_t *yescrypt_r( const uint8_t *passwd, size_t passwdlen, const uint8_t *setting, uint8_t *buf, size_t buflen); - -extern const uint8_t *decode64(uint8_t *dst, size_t *dstlen, - const uint8_t *src, size_t srclen); -extern uint8_t *encode64(uint8_t *dst, size_t dstlen, - const uint8_t *src, size_t srclen); -- cgit v1.2.3-55-g6feb From 9b6e03fd094d0947b04e25617d1d322bfadf7466 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Sun, 6 Jul 2025 03:03:23 +0200 Subject: libbb/yescrypt: remove unused yescrypt_region_t tmp Signed-off-by: Denys Vlasenko --- libbb/yescrypt/alg-yescrypt-kdf.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'libbb') diff --git a/libbb/yescrypt/alg-yescrypt-kdf.c b/libbb/yescrypt/alg-yescrypt-kdf.c index 93938e69c..3ee9bfa43 100644 --- a/libbb/yescrypt/alg-yescrypt-kdf.c +++ b/libbb/yescrypt/alg-yescrypt-kdf.c @@ -806,7 +806,6 @@ static int yescrypt_kdf_body( uint64_t NROM, uint8_t *buf, size_t buflen) { - yescrypt_region_t tmp; const salsa20_blk_t *VROM; size_t B_size, V_size, XY_size, need; uint8_t *B, *S; @@ -884,7 +883,6 @@ static int yescrypt_kdf_body( if (need < S_size) goto out_EINVAL; } - init_region(&tmp); if (local->aligned_size < need) { if (free_region(local)) return -1; @@ -956,11 +954,6 @@ static int yescrypt_kdf_body( explicit_bzero(dk, sizeof(dk)); } - if (free_region(&tmp)) { - explicit_bzero(buf, buflen); /* must preserve errno */ - return -1; - } - /* Success! */ return 0; -- cgit v1.2.3-55-g6feb From 1524540613427b22cc9acf9e8fb1748f5dd425f7 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Sun, 6 Jul 2025 03:22:46 +0200 Subject: libbb/yescrypt: remove unused variable Signed-off-by: Denys Vlasenko --- libbb/yescrypt/alg-yescrypt-common.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) (limited to 'libbb') diff --git a/libbb/yescrypt/alg-yescrypt-common.c b/libbb/yescrypt/alg-yescrypt-common.c index c04e074bd..388bf1a12 100644 --- a/libbb/yescrypt/alg-yescrypt-common.c +++ b/libbb/yescrypt/alg-yescrypt-common.c @@ -182,8 +182,7 @@ uint8_t *yescrypt_r( uint8_t *buf, size_t buflen) { unsigned char saltbin[64], hashbin[32]; - const uint8_t *src, *saltstr, *salt; - const uint8_t *saltend; + const uint8_t *src, *saltstr, *saltend; uint8_t *dst; size_t need, prefixlen, saltstrlen, saltlen; uint32_t flavor, N_log2; @@ -265,19 +264,15 @@ uint8_t *yescrypt_r( if (!saltend || (size_t)(saltend - saltstr) != saltstrlen) goto fail; - salt = saltbin; - need = prefixlen + saltstrlen + 1 + HASH_LEN + 1; if (need > buflen || need < saltstrlen) goto fail; - if (yescrypt_kdf(local, passwd, passwdlen, salt, saltlen, + if (yescrypt_kdf(local, passwd, passwdlen, saltbin, saltlen, ¶ms, hashbin, sizeof(hashbin))) goto fail; - dst = buf; - memcpy(dst, setting, prefixlen + saltstrlen); - dst += prefixlen + saltstrlen; + dst = mempcpy(buf, setting, prefixlen + saltstrlen); *dst++ = '$'; dst = encode64(dst, buflen - (dst - buf), hashbin, sizeof(hashbin)); -- cgit v1.2.3-55-g6feb From 1c977008e0b84f0d277367901b981c0d958dbdce Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Sun, 6 Jul 2025 03:40:06 +0200 Subject: libbb/yescrypt: code shrink in parameter decoding function old new delta decode64_uint32 166 167 +1 yescrypt_r 1342 1311 -31 ------------------------------------------------------------------------------ (add/remove: 0/0 grow/shrink: 1/1 up/down: 1/-31) Total: -30 bytes Signed-off-by: Denys Vlasenko --- libbb/yescrypt/alg-yescrypt-common.c | 48 +++++++++++++----------------------- 1 file changed, 17 insertions(+), 31 deletions(-) (limited to 'libbb') diff --git a/libbb/yescrypt/alg-yescrypt-common.c b/libbb/yescrypt/alg-yescrypt-common.c index 388bf1a12..65c92f22f 100644 --- a/libbb/yescrypt/alg-yescrypt-common.c +++ b/libbb/yescrypt/alg-yescrypt-common.c @@ -41,33 +41,36 @@ static NOINLINE uint32_t atoi64(uint8_t src) static NOINLINE const uint8_t *decode64_uint32( uint32_t *dst, - const uint8_t *src, uint32_t min) + const uint8_t *src, uint32_t val) { uint32_t start = 0, end = 47, chars = 1, bits = 0; uint32_t c; + if (!src) /* prevous decode failed already? */ + goto fail; + c = atoi64(*src++); if (c > 63) goto fail; - *dst = min; while (c > end) { - *dst += (end + 1 - start) << bits; + val += (end + 1 - start) << bits; start = end + 1; end = start + (62 - end) / 2; chars++; bits += 6; } - *dst += (c - start) << bits; + val += (c - start) << bits; while (--chars) { c = atoi64(*src++); if (c > 63) goto fail; bits -= 6; - *dst += c << bits; + val += c << bits; } + *dst = val; return src; @@ -192,8 +195,8 @@ uint8_t *yescrypt_r( src = setting + 3; src = decode64_uint32(&flavor, src, 0); - if (!src) - return NULL; + //if (!src) + // return NULL; if (flavor < YESCRYPT_RW) { params.flags = flavor; @@ -204,48 +207,32 @@ uint8_t *yescrypt_r( } src = decode64_uint32(&N_log2, src, 1); - if (!src || N_log2 > 63) + if (/*!src ||*/ N_log2 > 63) return NULL; params.N = (uint64_t)1 << N_log2; src = decode64_uint32(¶ms.r, src, 1); if (!src) return NULL; - if (*src != '$') { uint32_t have; - src = decode64_uint32(&have, src, 1); - if (!src) - return NULL; - - if (have & 1) { + if (have & 1) src = decode64_uint32(¶ms.p, src, 2); - if (!src) - return NULL; - } - - if (have & 2) { + if (have & 2) src = decode64_uint32(¶ms.t, src, 1); - if (!src) - return NULL; - } - - if (have & 4) { + if (have & 4) src = decode64_uint32(¶ms.g, src, 1); - if (!src) - return NULL; - } - if (have & 8) { uint32_t NROM_log2; src = decode64_uint32(&NROM_log2, src, 1); - if (!src || NROM_log2 > 63) + if (/*!src ||*/ NROM_log2 > 63) return NULL; params.NROM = (uint64_t)1 << NROM_log2; } } - + if (!src) + return NULL; if (*src++ != '$') return NULL; @@ -274,7 +261,6 @@ uint8_t *yescrypt_r( dst = mempcpy(buf, setting, prefixlen + saltstrlen); *dst++ = '$'; - dst = encode64(dst, buflen - (dst - buf), hashbin, sizeof(hashbin)); explicit_bzero(hashbin, sizeof(hashbin)); if (!dst || dst >= buf + buflen) -- cgit v1.2.3-55-g6feb From 85d3c48217bcbbe670aa466705fee703b95cbc36 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Sun, 6 Jul 2025 03:59:59 +0200 Subject: libbb/yescrypt: code shrink in salt decoding function old new delta yescrypt_r 1311 1288 -23 Signed-off-by: Denys Vlasenko --- libbb/yescrypt/alg-yescrypt-common.c | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) (limited to 'libbb') diff --git a/libbb/yescrypt/alg-yescrypt-common.c b/libbb/yescrypt/alg-yescrypt-common.c index 65c92f22f..8d75fa051 100644 --- a/libbb/yescrypt/alg-yescrypt-common.c +++ b/libbb/yescrypt/alg-yescrypt-common.c @@ -233,33 +233,29 @@ uint8_t *yescrypt_r( } if (!src) return NULL; - if (*src++ != '$') + if (*src != '$') return NULL; - prefixlen = src - setting; - - saltstr = src; - src = (uint8_t *)strrchr((char *)saltstr, '$'); - if (src) - saltstrlen = src - saltstr; - else - saltstrlen = strlen((char *)saltstr); + saltstr = src + 1; + src = (uint8_t *)strchrnul((char *)saltstr, '$'); + prefixlen = src - setting; /* len("$y$$") */ + saltstrlen = src - saltstr; /* len("") */ + /* src points to end of salt ('$' or NUL byte), won't be used past this point */ saltlen = sizeof(saltbin); saltend = decode64(saltbin, &saltlen, saltstr, saltstrlen); + if (saltend != saltstr + saltstrlen) + goto fail; /* saltbin[] is too small, or bad char during decode */ - if (!saltend || (size_t)(saltend - saltstr) != saltstrlen) - goto fail; - - need = prefixlen + saltstrlen + 1 + HASH_LEN + 1; - if (need > buflen || need < saltstrlen) + need = prefixlen + 1 + HASH_LEN + 1; + if (need > buflen || need < prefixlen) goto fail; if (yescrypt_kdf(local, passwd, passwdlen, saltbin, saltlen, ¶ms, hashbin, sizeof(hashbin))) goto fail; - dst = mempcpy(buf, setting, prefixlen + saltstrlen); + dst = mempcpy(buf, setting, prefixlen); *dst++ = '$'; dst = encode64(dst, buflen - (dst - buf), hashbin, sizeof(hashbin)); explicit_bzero(hashbin, sizeof(hashbin)); -- cgit v1.2.3-55-g6feb From 736589f877be576274503fdcd066aff4b5c12949 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Sun, 6 Jul 2025 06:05:08 +0200 Subject: libbb/yescrypt: reduce the number of function parameters function old new delta yescrypt_kdf32_body - 1077 +1077 static.smix 739 753 +14 yescrypt_init_local 34 - -34 yes_crypt 87 50 -37 yescrypt_free_local 49 - -49 yescrypt_r 1288 1217 -71 static.yescrypt_kdf_body 1166 - -1166 ------------------------------------------------------------------------------ (add/remove: 1/3 grow/shrink: 1/2 up/down: 1091/-1357) Total: -266 bytes Signed-off-by: Denys Vlasenko --- libbb/yescrypt/alg-yescrypt-common.c | 74 ++++++++++------------- libbb/yescrypt/alg-yescrypt-kdf.c | 104 ++++++++++++++++----------------- libbb/yescrypt/alg-yescrypt-platform.c | 37 +++++------- libbb/yescrypt/alg-yescrypt.h | 87 ++++++++++++--------------- 4 files changed, 136 insertions(+), 166 deletions(-) (limited to 'libbb') diff --git a/libbb/yescrypt/alg-yescrypt-common.c b/libbb/yescrypt/alg-yescrypt-common.c index 8d75fa051..ebc531b08 100644 --- a/libbb/yescrypt/alg-yescrypt-common.c +++ b/libbb/yescrypt/alg-yescrypt-common.c @@ -179,62 +179,64 @@ fail: } uint8_t *yescrypt_r( - yescrypt_local_t *local, const uint8_t *passwd, size_t passwdlen, const uint8_t *setting, uint8_t *buf, size_t buflen) { - unsigned char saltbin[64], hashbin[32]; + yescrypt_ctx_t yctx[1]; + unsigned char hashbin32[32]; const uint8_t *src, *saltstr, *saltend; uint8_t *dst; - size_t need, prefixlen, saltstrlen, saltlen; + size_t need, prefixlen, saltstrlen; uint32_t flavor, N_log2; - yescrypt_params_t params = { .p = 1 }; + + memset(yctx, 0, sizeof(yctx)); + yctx->param.p = 1; /* we assume setting starts with "$y$" (caller must ensure this) */ src = setting + 3; src = decode64_uint32(&flavor, src, 0); //if (!src) - // return NULL; + // goto fail; if (flavor < YESCRYPT_RW) { - params.flags = flavor; + yctx->param.flags = flavor; } else if (flavor <= YESCRYPT_RW + (YESCRYPT_RW_FLAVOR_MASK >> 2)) { - params.flags = YESCRYPT_RW + ((flavor - YESCRYPT_RW) << 2); + yctx->param.flags = YESCRYPT_RW + ((flavor - YESCRYPT_RW) << 2); } else { - return NULL; + goto fail; } src = decode64_uint32(&N_log2, src, 1); if (/*!src ||*/ N_log2 > 63) - return NULL; - params.N = (uint64_t)1 << N_log2; + goto fail; + yctx->param.N = (uint64_t)1 << N_log2; - src = decode64_uint32(¶ms.r, src, 1); + src = decode64_uint32(&yctx->param.r, src, 1); if (!src) - return NULL; + goto fail; if (*src != '$') { uint32_t have; src = decode64_uint32(&have, src, 1); if (have & 1) - src = decode64_uint32(¶ms.p, src, 2); + src = decode64_uint32(&yctx->param.p, src, 2); if (have & 2) - src = decode64_uint32(¶ms.t, src, 1); + src = decode64_uint32(&yctx->param.t, src, 1); if (have & 4) - src = decode64_uint32(¶ms.g, src, 1); + src = decode64_uint32(&yctx->param.g, src, 1); if (have & 8) { uint32_t NROM_log2; src = decode64_uint32(&NROM_log2, src, 1); if (/*!src ||*/ NROM_log2 > 63) - return NULL; - params.NROM = (uint64_t)1 << NROM_log2; + goto fail; + yctx->param.NROM = (uint64_t)1 << NROM_log2; } } if (!src) - return NULL; + goto fail; if (*src != '$') - return NULL; + goto fail; saltstr = src + 1; src = (uint8_t *)strchrnul((char *)saltstr, '$'); @@ -242,8 +244,8 @@ uint8_t *yescrypt_r( saltstrlen = src - saltstr; /* len("") */ /* src points to end of salt ('$' or NUL byte), won't be used past this point */ - saltlen = sizeof(saltbin); - saltend = decode64(saltbin, &saltlen, saltstr, saltstrlen); + yctx->saltlen = sizeof(yctx->salt); + saltend = decode64(yctx->salt, &yctx->saltlen, saltstr, saltstrlen); if (saltend != saltstr + saltstrlen) goto fail; /* saltbin[] is too small, or bad char during decode */ @@ -251,34 +253,22 @@ uint8_t *yescrypt_r( if (need > buflen || need < prefixlen) goto fail; - if (yescrypt_kdf(local, passwd, passwdlen, saltbin, saltlen, - ¶ms, hashbin, sizeof(hashbin))) + if (yescrypt_kdf32(yctx, passwd, passwdlen, hashbin32)) goto fail; dst = mempcpy(buf, setting, prefixlen); *dst++ = '$'; - dst = encode64(dst, buflen - (dst - buf), hashbin, sizeof(hashbin)); - explicit_bzero(hashbin, sizeof(hashbin)); + dst = encode64(dst, buflen - (dst - buf), hashbin32, sizeof(hashbin32)); if (!dst || dst >= buf + buflen) - return NULL; + goto fail; *dst = 0; /* NUL termination */ - + ret: + free_region(yctx->local); + explicit_bzero(yctx, sizeof(yctx)); + explicit_bzero(hashbin32, sizeof(hashbin32)); return buf; - fail: - explicit_bzero(saltbin, sizeof(saltbin)); - explicit_bzero(hashbin, sizeof(hashbin)); - return NULL; -} - -int yescrypt_init_local(yescrypt_local_t *local) -{ - init_region(local); - return 0; -} - -int yescrypt_free_local(yescrypt_local_t *local) -{ - return free_region(local); + buf = NULL; + goto ret; } diff --git a/libbb/yescrypt/alg-yescrypt-kdf.c b/libbb/yescrypt/alg-yescrypt-kdf.c index 3ee9bfa43..2b84564b9 100644 --- a/libbb/yescrypt/alg-yescrypt-kdf.c +++ b/libbb/yescrypt/alg-yescrypt-kdf.c @@ -798,29 +798,27 @@ static void smix(uint8_t *B, size_t r, uint32_t N, uint32_t p, uint32_t t, * This optimized implementation currently limits N to the range from 4 to * 2^31, but other implementations might not. */ -static int yescrypt_kdf_body( - yescrypt_local_t *local, - const uint8_t *passwd, size_t passwdlen, - const uint8_t *salt, size_t saltlen, - yescrypt_flags_t flags, uint64_t N, uint32_t r, uint32_t p, uint32_t t, - uint64_t NROM, - uint8_t *buf, size_t buflen) +static int yescrypt_kdf32_body( + yescrypt_ctx_t *yctx, + const uint8_t *passwd, size_t passwdlen, + yescrypt_flags_t flags, uint64_t N, uint32_t t, + uint8_t *buf32) { const salsa20_blk_t *VROM; size_t B_size, V_size, XY_size, need; uint8_t *B, *S; salsa20_blk_t *V, *XY; uint8_t sha256[32]; - uint8_t dk[sizeof(sha256)], *dkp = buf; + uint8_t dk[sizeof(sha256)], *dkp = buf32; /* Sanity-check parameters */ switch (flags & YESCRYPT_MODE_MASK) { case 0: /* classic scrypt - can't have anything non-standard */ - if (flags || t || NROM) + if (flags || t || yctx->param.NROM) goto out_EINVAL; break; case YESCRYPT_WORM: - if (flags != YESCRYPT_WORM || NROM) + if (flags != YESCRYPT_WORM || yctx->param.NROM) goto out_EINVAL; break; case YESCRYPT_RW: @@ -842,6 +840,9 @@ static int yescrypt_kdf_body( if (buflen > (((uint64_t)1 << 32) - 1) * 32) goto out_EINVAL; #endif + { + const uint32_t r = yctx->param.r; + const uint32_t p = yctx->param.p; if ((uint64_t)r * (uint64_t)p >= 1 << 30) goto out_EINVAL; if (N > UINT32_MAX) @@ -862,7 +863,7 @@ static int yescrypt_kdf_body( } VROM = NULL; - if (NROM) + if (yctx->param.NROM) goto out_EINVAL; /* Allocate memory */ @@ -883,15 +884,14 @@ static int yescrypt_kdf_body( if (need < S_size) goto out_EINVAL; } - if (local->aligned_size < need) { - if (free_region(local)) - return -1; - if (!alloc_region(local, need)) - return -1; + if (yctx->local->aligned_size < need) { + free_region(yctx->local); + alloc_region(yctx->local, need); + dbg("allocated local:%u", need); } if (flags & YESCRYPT_ALLOC_ONLY) return -3; /* expected "failure" */ - B = (uint8_t *)local->aligned; + B = (uint8_t *)yctx->local->aligned; V = (salsa20_blk_t *)((uint8_t *)B + B_size); XY = (salsa20_blk_t *)((uint8_t *)V + V_size); S = NULL; @@ -906,28 +906,28 @@ static int yescrypt_kdf_body( passwdlen = sizeof(sha256); } - PBKDF2_SHA256(passwd, passwdlen, salt, saltlen, 1, B, B_size); + PBKDF2_SHA256(passwd, passwdlen, yctx->salt, yctx->saltlen, 1, B, B_size); if (flags) memcpy(sha256, B, sizeof(sha256)); if (p == 1 || (flags & YESCRYPT_RW)) { - smix(B, r, N, p, t, flags, V, NROM, VROM, XY, S, sha256); + smix(B, r, N, p, t, flags, V, yctx->param.NROM, VROM, XY, S, sha256); } else { uint32_t i; for (i = 0; i < p; i++) { smix(&B[(size_t)128 * r * i], r, N, 1, t, flags, V, - NROM, VROM, XY, NULL, NULL); + yctx->param.NROM, VROM, XY, NULL, NULL); } } - dkp = buf; - if (flags && buflen < sizeof(dk)) { + dkp = buf32; + if (flags && /*buflen:*/32 < sizeof(dk)) { PBKDF2_SHA256(passwd, passwdlen, B, B_size, 1, dk, sizeof(dk)); dkp = dk; } - PBKDF2_SHA256(passwd, passwdlen, B, B_size, 1, buf, buflen); + PBKDF2_SHA256(passwd, passwdlen, B, B_size, 1, buf32, /*buflen:*/32); /* * Except when computing classic scrypt, allow all computation so far @@ -941,11 +941,11 @@ static int yescrypt_kdf_body( HMAC_SHA256_Buf(dkp, sizeof(dk), "Client Key", 10, sha256); /* Compute StoredKey */ { - size_t clen = buflen; + size_t clen = /*buflen:*/32; if (clen > sizeof(dk)) clen = sizeof(dk); SHA256_Buf(sha256, sizeof(sha256), dk); - memcpy(buf, dk, clen); + memcpy(buf32, dk, clen); } } @@ -960,6 +960,7 @@ static int yescrypt_kdf_body( out_EINVAL: errno = EINVAL; return -1; + } } /** @@ -970,21 +971,18 @@ out_EINVAL: * the addition of g, which controls hash upgrades (0 for no upgrades so far). */ static -int yescrypt_kdf( - yescrypt_local_t *local, +int yescrypt_kdf32( + yescrypt_ctx_t *yctx, const uint8_t *passwd, size_t passwdlen, - const uint8_t *salt, size_t saltlen, - const yescrypt_params_t *params, - uint8_t *buf, size_t buflen) + uint8_t *buf32) { - yescrypt_flags_t flags = params->flags; - uint64_t N = params->N; - uint32_t r = params->r; - uint32_t p = params->p; - uint32_t t = params->t; - uint32_t g = params->g; - uint64_t NROM = params->NROM; - uint8_t dk[32]; + yescrypt_flags_t flags = yctx->param.flags; + uint64_t N = yctx->param.N; + uint32_t r = yctx->param.r; + uint32_t p = yctx->param.p; + uint32_t t = yctx->param.t; + uint32_t g = yctx->param.g; + uint8_t dk32[32]; int retval; /* Support for hash upgrades has been temporarily removed */ @@ -998,30 +996,30 @@ int yescrypt_kdf( && N / p >= 0x100 && N / p * r >= 0x20000 ) { - if (yescrypt_kdf_body(local, - passwd, passwdlen, salt, saltlen, - flags | YESCRYPT_ALLOC_ONLY, N, r, p, t, NROM, - buf, buflen) != -3 + if (yescrypt_kdf32_body(yctx, + passwd, passwdlen, + flags | YESCRYPT_ALLOC_ONLY, N, t, + buf32) != -3 ) { errno = EINVAL; return -1; } - retval = yescrypt_kdf_body(local, - passwd, passwdlen, salt, saltlen, - flags | YESCRYPT_PREHASH, N >> 6, r, p, 0, NROM, - dk, sizeof(dk)); + retval = yescrypt_kdf32_body(yctx, + passwd, passwdlen, + flags | YESCRYPT_PREHASH, N >> 6, 0, + dk32); if (retval) return retval; - passwd = dk; - passwdlen = sizeof(dk); + passwd = dk32; + passwdlen = sizeof(dk32); } - retval = yescrypt_kdf_body(local, - passwd, passwdlen, salt, saltlen, - flags, N, r, p, t, NROM, buf, buflen); + retval = yescrypt_kdf32_body(yctx, + passwd, passwdlen, + flags, N, t, buf32); #ifndef SKIP_MEMZERO - if (passwd == dk) - explicit_bzero(dk, sizeof(dk)); + if (passwd == dk32) + explicit_bzero(dk32, sizeof(dk32)); #endif return retval; } diff --git a/libbb/yescrypt/alg-yescrypt-platform.c b/libbb/yescrypt/alg-yescrypt-platform.c index 09809c4b0..41627df2d 100644 --- a/libbb/yescrypt/alg-yescrypt-platform.c +++ b/libbb/yescrypt/alg-yescrypt-platform.c @@ -18,7 +18,7 @@ * SUCH DAMAGE. */ -static void *alloc_region(yescrypt_region_t *region, size_t size) +static void alloc_region(yescrypt_region_t *region, size_t size) { size_t base_size = size; uint8_t *base, *aligned; @@ -27,39 +27,32 @@ static void *alloc_region(yescrypt_region_t *region, size_t size) //(if defined(MAP_HUGETLB) && defined(MAP_HUGE_2MB)) using 2MB pages #else /* mmap not available */ base = aligned = NULL; - if (size + 63 < size) { - errno = ENOMEM; - } else { - base = malloc(size + 63); - if (base) { - aligned = base + 63; - aligned -= (uintptr_t)aligned & 63; - } + if (size + 63 < size) + bb_die_memory_exhausted(); + base = malloc(size + 63); + if (base) { + aligned = base + 63; + aligned -= (uintptr_t)aligned & 63; } #endif region->base = base; region->aligned = aligned; region->base_size = base ? base_size : 0; region->aligned_size = base ? size : 0; - return aligned; } -static inline void init_region(yescrypt_region_t *region) +static void free_region(yescrypt_region_t *region) { - region->base = region->aligned = NULL; - region->base_size = region->aligned_size = 0; -} - -static int free_region(yescrypt_region_t *region) -{ - if (region->base) { #if 0 //def MAP_ANON + if (region->base) { if (munmap(region->base, region->base_size)) return -1; + } #else - free(region->base); + free(region->base); #endif - } - init_region(region); - return 0; + region->base = NULL; + region->aligned = NULL; + region->base_size = 0; + region->aligned_size = 0; } diff --git a/libbb/yescrypt/alg-yescrypt.h b/libbb/yescrypt/alg-yescrypt.h index 7f2b7f471..cac1959f9 100644 --- a/libbb/yescrypt/alg-yescrypt.h +++ b/libbb/yescrypt/alg-yescrypt.h @@ -27,21 +27,13 @@ * This file was originally written by Colin Percival as part of the Tarsnap * online backup system. */ - -/** - * Internal type used by the memory allocator. Please do not use it directly. - * Use yescrypt_shared_t and yescrypt_local_t as appropriate instead, since - * they might differ from each other in a future version. - */ -typedef struct { - void *base, *aligned; - size_t base_size, aligned_size; -} yescrypt_region_t; - -/** - * Types for shared (ROM) and thread-local (RAM) data structures. - */ -typedef yescrypt_region_t yescrypt_local_t; +#ifdef YESCRYPT_INTERNAL +# if 1 +# define dbg(...) ((void)0) +# else +# define dbg(...) bb_error_msg(__VA_ARGS__) +# endif +#endif /** * Two 64-bit tags placed 48 bytes to the end of a ROM in host byte endianness @@ -100,6 +92,16 @@ typedef uint32_t yescrypt_flags_t; YESCRYPT_ALLOC_ONLY | YESCRYPT_PREHASH) #endif +/** + * Internal type used by the memory allocator. Please do not use it directly. + * Use yescrypt_shared_t and yescrypt_local_t as appropriate instead, since + * they might differ from each other in a future version. + */ +typedef struct { + void *base, *aligned; + size_t base_size, aligned_size; +} yescrypt_region_t; + /** * yescrypt parameters combined into one struct. N, r, p are the same as in * classic scrypt, except that the meaning of p changes when YESCRYPT_RW is @@ -112,6 +114,19 @@ typedef struct { uint64_t NROM; } yescrypt_params_t; +typedef struct { + yescrypt_params_t param; + + /* salt in binary form */ + /* stored here to cut down on the amont of function paramaters */ + unsigned char salt[64]; + size_t saltlen; + + /* used by the memory allocator */ + //yescrypt_region_t shared[1]; + yescrypt_region_t local[1]; +} yescrypt_ctx_t; + /* How many chars base-64 encoded bytes require? */ #define BYTES2CHARS(bytes) ((((bytes) * 8) + 5) / 6) /* The /etc/passwd-style hash is "$" */ @@ -125,28 +140,6 @@ typedef struct { #define HASH_SIZE 32 #define HASH_LEN BYTES2CHARS(HASH_SIZE) -/** - * yescrypt_init_local(local): - * Initialize the thread-local (RAM) data structure. Actual memory allocation - * is currently fully postponed until a call to yescrypt_kdf() or yescrypt_r(). - * - * Return 0 on success; or -1 on error. - * - * MT-safe as long as local is local to the thread. - */ -extern int yescrypt_init_local(yescrypt_local_t *local); - -/** - * yescrypt_free_local(local): - * Free memory that may have been allocated for an initialized thread-local - * (RAM) data structure. - * - * Return 0 on success; or -1 on error. - * - * MT-safe as long as local is local to the thread. - */ -extern int yescrypt_free_local(yescrypt_local_t *local); - /** * yescrypt_kdf(shared, local, passwd, passwdlen, salt, saltlen, params, * buf, buflen): @@ -180,17 +173,13 @@ extern int yescrypt_free_local(yescrypt_local_t *local); * and shared->aligned_size fields may optionally be set by the caller directly * (e.g., to a mapped SysV shm segment), without using yescrypt_init_shared(). * - * local must be initialized with yescrypt_init_local(). - * * MT-safe as long as local and buf are local to the thread. */ #ifdef YESCRYPT_INTERNAL -static int yescrypt_kdf( - yescrypt_local_t *local, - const uint8_t *passwd, size_t passwdlen, - const uint8_t *salt, size_t saltlen, - const yescrypt_params_t *params, - uint8_t *buf, size_t buflen); +static int yescrypt_kdf32( + yescrypt_ctx_t *yctx, + const uint8_t *passwd, size_t passwdlen, + uint8_t *buf32); #endif /** @@ -209,7 +198,7 @@ static int yescrypt_kdf( * MT-safe as long as local and buf are local to the thread. */ extern uint8_t *yescrypt_r( - yescrypt_local_t *local, - const uint8_t *passwd, size_t passwdlen, - const uint8_t *setting, - uint8_t *buf, size_t buflen); + const uint8_t *passwd, size_t passwdlen, + const uint8_t *setting, + uint8_t *buf, size_t buflen +); -- cgit v1.2.3-55-g6feb From 8be6dafe94690f24829901680c4768847a41b8f2 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Sun, 6 Jul 2025 06:50:18 +0200 Subject: libbb/yescrypt: use mmap for allocation, it's large - 16Mbytes This automatically gives it alignment sufficient for any future SIMD optimizations. function old new delta yescrypt_r 1217 1221 +4 yescrypt_kdf32_body 1077 1059 -18 ------------------------------------------------------------------------------ (add/remove: 0/0 grow/shrink: 1/1 up/down: 4/-18) Total: -14 bytes Signed-off-by: Denys Vlasenko --- libbb/yescrypt/alg-yescrypt-common.c | 22 +++++++++++++++++ libbb/yescrypt/alg-yescrypt-kdf.c | 3 ++- libbb/yescrypt/alg-yescrypt-platform.c | 43 ++++++++++++---------------------- libbb/yescrypt/alg-yescrypt.h | 13 ++++------ 4 files changed, 43 insertions(+), 38 deletions(-) (limited to 'libbb') diff --git a/libbb/yescrypt/alg-yescrypt-common.c b/libbb/yescrypt/alg-yescrypt-common.c index ebc531b08..a54e21ce2 100644 --- a/libbb/yescrypt/alg-yescrypt-common.c +++ b/libbb/yescrypt/alg-yescrypt-common.c @@ -197,6 +197,9 @@ uint8_t *yescrypt_r( src = setting + 3; src = decode64_uint32(&flavor, src, 0); + dbg("yescrypt flavor=0x%x YESCRYPT_RW:%u", + (unsigned)flavor, !!(flavor & YESCRYPT_RW) + ); //if (!src) // goto fail; @@ -204,6 +207,22 @@ uint8_t *yescrypt_r( yctx->param.flags = flavor; } else if (flavor <= YESCRYPT_RW + (YESCRYPT_RW_FLAVOR_MASK >> 2)) { yctx->param.flags = YESCRYPT_RW + ((flavor - YESCRYPT_RW) << 2); + dbg("yctx->param.flags=0x%x", (unsigned)yctx->param.flags); + dbg(" YESCRYPT_RW:%u" , !!(yctx->param.flags & YESCRYPT_RW )); + dbg(" YESCRYPT_ROUNDS_6:%u" , !!(yctx->param.flags & YESCRYPT_ROUNDS_6 )); + dbg(" YESCRYPT_GATHER_2:%u" , !!(yctx->param.flags & YESCRYPT_GATHER_2 )); + dbg(" YESCRYPT_GATHER_4:%u" , !!(yctx->param.flags & YESCRYPT_GATHER_4 )); + dbg(" YESCRYPT_GATHER_8:%u" , !!(yctx->param.flags & YESCRYPT_GATHER_8 )); + dbg(" YESCRYPT_SIMPLE_2:%u" , !!(yctx->param.flags & YESCRYPT_SIMPLE_2 )); + dbg(" YESCRYPT_SIMPLE_4:%u" , !!(yctx->param.flags & YESCRYPT_SIMPLE_4 )); + dbg(" YESCRYPT_SIMPLE_8:%u" , !!(yctx->param.flags & YESCRYPT_SIMPLE_8 )); + dbg(" YESCRYPT_SBOX_12K:%u" , !!(yctx->param.flags & YESCRYPT_SBOX_12K )); + dbg(" YESCRYPT_SBOX_24K:%u" , !!(yctx->param.flags & YESCRYPT_SBOX_24K )); + dbg(" YESCRYPT_SBOX_48K:%u" , !!(yctx->param.flags & YESCRYPT_SBOX_48K )); + dbg(" YESCRYPT_SBOX_96K:%u" , !!(yctx->param.flags & YESCRYPT_SBOX_96K )); + dbg(" YESCRYPT_SBOX_192K:%u", !!(yctx->param.flags & YESCRYPT_SBOX_192K)); + dbg(" YESCRYPT_SBOX_384K:%u", !!(yctx->param.flags & YESCRYPT_SBOX_384K)); + dbg(" YESCRYPT_SBOX_768K:%u", !!(yctx->param.flags & YESCRYPT_SBOX_768K)); } else { goto fail; } @@ -212,13 +231,16 @@ uint8_t *yescrypt_r( if (/*!src ||*/ N_log2 > 63) goto fail; yctx->param.N = (uint64_t)1 << N_log2; + dbg("yctx->param.N=%llu (1<<%u)", (unsigned long long)yctx->param.N, (unsigned)N_log2); src = decode64_uint32(&yctx->param.r, src, 1); if (!src) goto fail; + dbg("yctx->param.r=%u", yctx->param.r); if (*src != '$') { uint32_t have; src = decode64_uint32(&have, src, 1); + dbg("yescrypt has extended params:0x%x", (unsigned)have); if (have & 1) src = decode64_uint32(&yctx->param.p, src, 2); if (have & 2) diff --git a/libbb/yescrypt/alg-yescrypt-kdf.c b/libbb/yescrypt/alg-yescrypt-kdf.c index 2b84564b9..01a66a6a8 100644 --- a/libbb/yescrypt/alg-yescrypt-kdf.c +++ b/libbb/yescrypt/alg-yescrypt-kdf.c @@ -887,7 +887,8 @@ static int yescrypt_kdf32_body( if (yctx->local->aligned_size < need) { free_region(yctx->local); alloc_region(yctx->local, need); - dbg("allocated local:%u", need); + dbg("allocated local:%u 0x%x", need, need); + /* standard "j9T" params allocate 16Mbytes here */ } if (flags & YESCRYPT_ALLOC_ONLY) return -3; /* expected "failure" */ diff --git a/libbb/yescrypt/alg-yescrypt-platform.c b/libbb/yescrypt/alg-yescrypt-platform.c index 41627df2d..8dd5feb55 100644 --- a/libbb/yescrypt/alg-yescrypt-platform.c +++ b/libbb/yescrypt/alg-yescrypt-platform.c @@ -20,39 +20,26 @@ static void alloc_region(yescrypt_region_t *region, size_t size) { - size_t base_size = size; - uint8_t *base, *aligned; - -#if 0 //def MAP_ANON - use mmap, possibly -//(if defined(MAP_HUGETLB) && defined(MAP_HUGE_2MB)) using 2MB pages -#else /* mmap not available */ - base = aligned = NULL; - if (size + 63 < size) + int flags = +# ifdef MAP_NOCORE /* huh? */ + MAP_NOCORE | +# endif + MAP_ANON | MAP_PRIVATE; + uint8_t *base = mmap(NULL, size, PROT_READ | PROT_WRITE, flags, -1, 0); + if (base == MAP_FAILED) bb_die_memory_exhausted(); - base = malloc(size + 63); - if (base) { - aligned = base + 63; - aligned -= (uintptr_t)aligned & 63; - } -#endif - region->base = base; - region->aligned = aligned; - region->base_size = base ? base_size : 0; - region->aligned_size = base ? size : 0; + //region->base = base; + //region->base_size = size; + region->aligned = base; + region->aligned_size = size; } static void free_region(yescrypt_region_t *region) { -#if 0 //def MAP_ANON - if (region->base) { - if (munmap(region->base, region->base_size)) - return -1; - } -#else - free(region->base); -#endif - region->base = NULL; + if (region->aligned) + munmap(region->aligned, region->aligned_size); + //region->base = NULL; + //region->base_size = 0; region->aligned = NULL; - region->base_size = 0; region->aligned_size = 0; } diff --git a/libbb/yescrypt/alg-yescrypt.h b/libbb/yescrypt/alg-yescrypt.h index cac1959f9..ebd705cf0 100644 --- a/libbb/yescrypt/alg-yescrypt.h +++ b/libbb/yescrypt/alg-yescrypt.h @@ -35,13 +35,6 @@ # endif #endif -/** - * Two 64-bit tags placed 48 bytes to the end of a ROM in host byte endianness - * (and followed by 32 bytes of the ROM digest). - */ -#define YESCRYPT_ROM_TAG1 0x7470797263736579 /* "yescrypt" */ -#define YESCRYPT_ROM_TAG2 0x687361684d4f522d /* "-ROMhash" */ - /** * Type and possible values for the flags argument of yescrypt_kdf(), * yescrypt_encode_params_r(), yescrypt_encode_params(). Most of these may be @@ -98,8 +91,10 @@ typedef uint32_t yescrypt_flags_t; * they might differ from each other in a future version. */ typedef struct { - void *base, *aligned; - size_t base_size, aligned_size; +// void *base; + void *aligned; +// size_t base_size; + size_t aligned_size; } yescrypt_region_t; /** -- cgit v1.2.3-55-g6feb From ffac25d056beda59d8a8f6506325f6b4b33cf5ad Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Sun, 6 Jul 2025 07:43:04 +0200 Subject: libbb/yescrypt: comment tweaks Signed-off-by: Denys Vlasenko --- libbb/yescrypt/alg-yescrypt-common.c | 13 ++++++++----- libbb/yescrypt/alg-yescrypt.h | 2 +- 2 files changed, 9 insertions(+), 6 deletions(-) (limited to 'libbb') diff --git a/libbb/yescrypt/alg-yescrypt-common.c b/libbb/yescrypt/alg-yescrypt-common.c index a54e21ce2..da7fa5e0f 100644 --- a/libbb/yescrypt/alg-yescrypt-common.c +++ b/libbb/yescrypt/alg-yescrypt-common.c @@ -197,15 +197,15 @@ uint8_t *yescrypt_r( src = setting + 3; src = decode64_uint32(&flavor, src, 0); - dbg("yescrypt flavor=0x%x YESCRYPT_RW:%u", - (unsigned)flavor, !!(flavor & YESCRYPT_RW) - ); + /* "j9T" returns: 0x2f */ + dbg("yescrypt flavor=0x%x YESCRYPT_RW:%u", (unsigned)flavor, !!(flavor & YESCRYPT_RW)); //if (!src) // goto fail; if (flavor < YESCRYPT_RW) { yctx->param.flags = flavor; } else if (flavor <= YESCRYPT_RW + (YESCRYPT_RW_FLAVOR_MASK >> 2)) { + /* "j9T" sets flags to 0xb6 */ yctx->param.flags = YESCRYPT_RW + ((flavor - YESCRYPT_RW) << 2); dbg("yctx->param.flags=0x%x", (unsigned)yctx->param.flags); dbg(" YESCRYPT_RW:%u" , !!(yctx->param.flags & YESCRYPT_RW )); @@ -231,12 +231,15 @@ uint8_t *yescrypt_r( if (/*!src ||*/ N_log2 > 63) goto fail; yctx->param.N = (uint64_t)1 << N_log2; + /* "j9T" sets to 4096 (1<<12) */ dbg("yctx->param.N=%llu (1<<%u)", (unsigned long long)yctx->param.N, (unsigned)N_log2); src = decode64_uint32(&yctx->param.r, src, 1); + /* "j9T" sets to 32 */ + dbg("yctx->param.r=%u", yctx->param.r); + if (!src) goto fail; - dbg("yctx->param.r=%u", yctx->param.r); if (*src != '$') { uint32_t have; src = decode64_uint32(&have, src, 1); @@ -269,7 +272,7 @@ uint8_t *yescrypt_r( yctx->saltlen = sizeof(yctx->salt); saltend = decode64(yctx->salt, &yctx->saltlen, saltstr, saltstrlen); if (saltend != saltstr + saltstrlen) - goto fail; /* saltbin[] is too small, or bad char during decode */ + goto fail; /* salt[] is too small, or bad char during decode */ need = prefixlen + 1 + HASH_LEN + 1; if (need > buflen || need < prefixlen) diff --git a/libbb/yescrypt/alg-yescrypt.h b/libbb/yescrypt/alg-yescrypt.h index ebd705cf0..edabbc222 100644 --- a/libbb/yescrypt/alg-yescrypt.h +++ b/libbb/yescrypt/alg-yescrypt.h @@ -113,7 +113,7 @@ typedef struct { yescrypt_params_t param; /* salt in binary form */ - /* stored here to cut down on the amont of function paramaters */ + /* stored here to cut down on the amount of function paramaters */ unsigned char salt[64]; size_t saltlen; -- cgit v1.2.3-55-g6feb From 4e5a6b6dbb77f735c4f10b61dd32173ccc3a842a Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Sun, 6 Jul 2025 10:35:22 +0200 Subject: libbb/yescrypt: remove extra sha256 implementation function old new delta libcperciva_HMAC_SHA256_Init - 159 +159 libcperciva_HMAC_SHA256_Final - 56 +56 SHA256_Buf - 40 +40 static.smix 753 759 +6 yescrypt_kdf32_body 1059 1060 +1 .rodata 105803 105799 -4 initial_state 32 - -32 libcperciva_SHA256_Init 37 - -37 static.cpu_to_be32_vect 51 - -51 _HMAC_SHA256_Final 55 - -55 PAD 64 - -64 libcperciva_HMAC_SHA256_Buf 132 58 -74 libcperciva_SHA256_Buf 86 - -86 SHA256_Pad_Almost 131 - -131 _SHA256_Final 195 - -195 _SHA256_Update 198 - -198 _HMAC_SHA256_Init 213 - -213 Krnd 256 - -256 PBKDF2_SHA256 1003 386 -617 SHA256_Transform 3083 - -3083 ------------------------------------------------------------------------------ (add/remove: 3/12 grow/shrink: 2/3 up/down: 262/-5096) Total: -4834 bytes Signed-off-by: Denys Vlasenko --- libbb/yescrypt/alg-sha256.c | 498 ++++---------------------------------------- libbb/yescrypt/alg-sha256.h | 62 +----- 2 files changed, 39 insertions(+), 521 deletions(-) (limited to 'libbb') diff --git a/libbb/yescrypt/alg-sha256.c b/libbb/yescrypt/alg-sha256.c index 0c1b846be..038ac0ddb 100644 --- a/libbb/yescrypt/alg-sha256.c +++ b/libbb/yescrypt/alg-sha256.c @@ -25,281 +25,6 @@ * SUCH DAMAGE. */ -#if defined(__GNUC__) -#define restrict __restrict -#else -#define restrict -#endif - -/* SHA256 round constants. */ -static const uint32_t Krnd[64] = { - 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, - 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, - 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, - 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, - 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, - 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, - 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, - 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, - 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, - 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, - 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, - 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, - 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, - 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, - 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, - 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 -}; - -/* Elementary functions used by SHA256 */ -#define Ch(x, y, z) ((x & (y ^ z)) ^ z) -#if 1 /* Explicit caching/reuse of common subexpression between rounds */ -#define Maj(x, y, z) (y ^ ((x_xor_y = x ^ y) & y_xor_z)) -#else /* Let the compiler cache/reuse or not */ -#define Maj(x, y, z) (y ^ ((x ^ y) & (y ^ z))) -#endif -#define SHR(x, n) (x >> n) -#define ROTR(x, n) ((x >> n) | (x << (32 - n))) -#define S0(x) (ROTR(x, 2) ^ ROTR(x, 13) ^ ROTR(x, 22)) -#define S1(x) (ROTR(x, 6) ^ ROTR(x, 11) ^ ROTR(x, 25)) -#define s0(x) (ROTR(x, 7) ^ ROTR(x, 18) ^ SHR(x, 3)) -#define s1(x) (ROTR(x, 17) ^ ROTR(x, 19) ^ SHR(x, 10)) - -/* SHA256 round function */ -#define RND(a, b, c, d, e, f, g, h, k) \ - h += S1(e) + Ch(e, f, g) + k; \ - d += h; \ - h += S0(a) + Maj(a, b, c); \ - y_xor_z = x_xor_y; - -/* Adjusted round function for rotating state */ -#define RNDr(S, W, i, ii) \ - RND(S[(64 - i) % 8], S[(65 - i) % 8], \ - S[(66 - i) % 8], S[(67 - i) % 8], \ - S[(68 - i) % 8], S[(69 - i) % 8], \ - S[(70 - i) % 8], S[(71 - i) % 8], \ - W[i + ii] + Krnd[i + ii]) - -/* Message schedule computation */ -#define MSCH(W, ii, i) \ - W[i + ii + 16] = s1(W[i + ii + 14]) + W[i + ii + 9] + s0(W[i + ii + 1]) + W[i + ii] - -/* - * SHA256 block compression function. The 256-bit state is transformed via - * the 512-bit input block to produce a new state. - */ -static void -SHA256_Transform(uint32_t state[static restrict 8], - const uint8_t block[static restrict 64], - uint32_t W[static restrict 64], uint32_t S[static restrict 8]) -{ - int i; - - /* 1. Prepare the first part of the message schedule W. */ - be32dec_vect(W, block, 16); - - /* 2. Initialize working variables. */ - memcpy(S, state, 32); - - /* 3. Mix. */ - for (i = 0; i <= 48; i += 16) { - uint32_t x_xor_y, y_xor_z = S[(65 - i) % 8] ^ S[(66 - i) % 8]; - RNDr(S, W, 0, i); - RNDr(S, W, 1, i); - RNDr(S, W, 2, i); - RNDr(S, W, 3, i); - RNDr(S, W, 4, i); - RNDr(S, W, 5, i); - RNDr(S, W, 6, i); - RNDr(S, W, 7, i); - RNDr(S, W, 8, i); - RNDr(S, W, 9, i); - RNDr(S, W, 10, i); - RNDr(S, W, 11, i); - RNDr(S, W, 12, i); - RNDr(S, W, 13, i); - RNDr(S, W, 14, i); - RNDr(S, W, 15, i); - - if (i == 48) - break; - - MSCH(W, 0, i); - MSCH(W, 1, i); - MSCH(W, 2, i); - MSCH(W, 3, i); - MSCH(W, 4, i); - MSCH(W, 5, i); - MSCH(W, 6, i); - MSCH(W, 7, i); - MSCH(W, 8, i); - MSCH(W, 9, i); - MSCH(W, 10, i); - MSCH(W, 11, i); - MSCH(W, 12, i); - MSCH(W, 13, i); - MSCH(W, 14, i); - MSCH(W, 15, i); - } - - /* 4. Mix local working variables into global state. */ - state[0] += S[0]; - state[1] += S[1]; - state[2] += S[2]; - state[3] += S[3]; - state[4] += S[4]; - state[5] += S[5]; - state[6] += S[6]; - state[7] += S[7]; -} - -static const uint8_t PAD[64] = { - 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 -}; - -/* Add padding and terminating bit-count. */ -static void -SHA256_Pad(SHA256_CTX * ctx, uint32_t tmp32[static restrict 72]) -{ - size_t r; - - /* Figure out how many bytes we have buffered. */ - r = (ctx->count >> 3) & 0x3f; - - /* Pad to 56 mod 64, transforming if we finish a block en route. */ - if (r < 56) { - /* Pad to 56 mod 64. */ - memcpy(&ctx->buf[r], PAD, 56 - r); - } else { - /* Finish the current block and mix. */ - memcpy(&ctx->buf[r], PAD, 64 - r); - SHA256_Transform(ctx->state, ctx->buf, &tmp32[0], &tmp32[64]); - - /* The start of the final block is all zeroes. */ - memset(&ctx->buf[0], 0, 56); - } - - /* Add the terminating bit-count. */ - be64enc(&ctx->buf[56], ctx->count); - - /* Mix in the final block. */ - SHA256_Transform(ctx->state, ctx->buf, &tmp32[0], &tmp32[64]); -} - -/* Magic initialization constants. */ -static const uint32_t initial_state[8] = { - 0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A, - 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19 -}; - -/** - * SHA256_Init(ctx): - * Initialize the SHA256 context ${ctx}. - */ -void -SHA256_Init(SHA256_CTX * ctx) -{ - - /* Zero bits processed so far. */ - ctx->count = 0; - - /* Initialize state. */ - memcpy(ctx->state, initial_state, sizeof(initial_state)); -} - -/** - * SHA256_Update(ctx, in, len): - * Input ${len} bytes from ${in} into the SHA256 context ${ctx}. - */ -static void -_SHA256_Update(SHA256_CTX * ctx, const void * in, size_t len, - uint32_t tmp32[static restrict 72]) -{ - uint32_t r; - const uint8_t * src = in; - - /* Return immediately if we have nothing to do. */ - if (len == 0) - return; - - /* Number of bytes left in the buffer from previous updates. */ - r = (ctx->count >> 3) & 0x3f; - - /* Update number of bits. */ - ctx->count += (uint64_t)(len) << 3; - - /* Handle the case where we don't need to perform any transforms. */ - if (len < 64 - r) { - memcpy(&ctx->buf[r], src, len); - return; - } - - /* Finish the current block. */ - memcpy(&ctx->buf[r], src, 64 - r); - SHA256_Transform(ctx->state, ctx->buf, &tmp32[0], &tmp32[64]); - src += 64 - r; - len -= 64 - r; - - /* Perform complete blocks. */ - while (len >= 64) { - SHA256_Transform(ctx->state, src, &tmp32[0], &tmp32[64]); - src += 64; - len -= 64; - } - - /* Copy left over data into buffer. */ - memcpy(ctx->buf, src, len); -} - -/* Wrapper function for intermediate-values sanitization. */ -void -SHA256_Update(SHA256_CTX * ctx, const void * in, size_t len) -{ - uint32_t tmp32[72]; - - /* Call the real function. */ - _SHA256_Update(ctx, in, len, tmp32); - - /* Clean the stack. */ - explicit_bzero(tmp32, 288); -} - -/** - * SHA256_Final(digest, ctx): - * Output the SHA256 hash of the data input to the context ${ctx} into the - * buffer ${digest}. - */ -static void -_SHA256_Final(uint8_t digest[32], SHA256_CTX * ctx, - uint32_t tmp32[static restrict 72]) -{ - - /* Add padding. */ - SHA256_Pad(ctx, tmp32); - - /* Write the hash. */ - be32enc_vect(digest, ctx->state, 8); -} - -/* Wrapper function for intermediate-values sanitization. */ -void -SHA256_Final(uint8_t digest[32], SHA256_CTX * ctx) -{ - uint32_t tmp32[72]; - - /* Call the real function. */ - _SHA256_Final(digest, ctx, tmp32); - - /* Clear the context state. */ - explicit_bzero(ctx, sizeof(SHA256_CTX)); - - /* Clean the stack. */ - explicit_bzero(tmp32, 288); -} - /** * SHA256_Buf(in, len, digest): * Compute the SHA256 hash of ${len} bytes from ${in} and write it to ${digest}. @@ -307,16 +32,10 @@ SHA256_Final(uint8_t digest[32], SHA256_CTX * ctx) void SHA256_Buf(const void * in, size_t len, uint8_t digest[32]) { - SHA256_CTX ctx; - uint32_t tmp32[72]; - - SHA256_Init(&ctx); - _SHA256_Update(&ctx, in, len, tmp32); - _SHA256_Final(digest, &ctx, tmp32); - - /* Clean the stack. */ - explicit_bzero(&ctx, sizeof(SHA256_CTX)); - explicit_bzero(tmp32, 288); + sha256_ctx_t ctx; + sha256_begin(&ctx); + sha256_hash(&ctx, in, len); + sha256_end(&ctx, digest); } /** @@ -325,52 +44,36 @@ SHA256_Buf(const void * in, size_t len, uint8_t digest[32]) * ${K}. */ static void -_HMAC_SHA256_Init(HMAC_SHA256_CTX * ctx, const void * _K, size_t Klen, - uint32_t tmp32[static restrict 72], uint8_t pad[static restrict 64], - uint8_t khash[static restrict 32]) +HMAC_SHA256_Init(HMAC_SHA256_CTX * ctx, const void * _K, size_t Klen) { + uint8_t pad[64]; + uint8_t khash[32]; const uint8_t * K = _K; size_t i; /* If Klen > 64, the key is really SHA256(K). */ if (Klen > 64) { - SHA256_Init(&ctx->ictx); - _SHA256_Update(&ctx->ictx, K, Klen, tmp32); - _SHA256_Final(khash, &ctx->ictx, tmp32); +// SHA256_Init(&ctx->ictx); +// _SHA256_Update(&ctx->ictx, K, Klen, tmp32); +// _SHA256_Final(khash, &ctx->ictx, tmp32); + SHA256_Buf(K, Klen, khash); K = khash; Klen = 32; } /* Inner SHA256 operation is SHA256(K xor [block of 0x36] || data). */ - SHA256_Init(&ctx->ictx); + sha256_begin(&ctx->ictx); memset(pad, 0x36, 64); for (i = 0; i < Klen; i++) pad[i] ^= K[i]; - _SHA256_Update(&ctx->ictx, pad, 64, tmp32); + sha256_hash(&ctx->ictx, pad, 64); /* Outer SHA256 operation is SHA256(K xor [block of 0x5c] || hash). */ - SHA256_Init(&ctx->octx); + sha256_begin(&ctx->octx); memset(pad, 0x5c, 64); for (i = 0; i < Klen; i++) pad[i] ^= K[i]; - _SHA256_Update(&ctx->octx, pad, 64, tmp32); -} - -/* Wrapper function for intermediate-values sanitization. */ -void -HMAC_SHA256_Init(HMAC_SHA256_CTX * ctx, const void * _K, size_t Klen) -{ - uint32_t tmp32[72]; - uint8_t pad[64]; - uint8_t khash[32]; - - /* Call the real function. */ - _HMAC_SHA256_Init(ctx, _K, Klen, tmp32, pad, khash); - - /* Clean the stack. */ - explicit_bzero(tmp32, 288); - explicit_bzero(khash, 32); - explicit_bzero(pad, 64); + sha256_hash(&ctx->octx, pad, 64); } /** @@ -378,25 +81,10 @@ HMAC_SHA256_Init(HMAC_SHA256_CTX * ctx, const void * _K, size_t Klen) * Input ${len} bytes from ${in} into the HMAC-SHA256 context ${ctx}. */ static void -_HMAC_SHA256_Update(HMAC_SHA256_CTX * ctx, const void * in, size_t len, - uint32_t tmp32[static restrict 72]) -{ - - /* Feed data to the inner SHA256 operation. */ - _SHA256_Update(&ctx->ictx, in, len, tmp32); -} - -/* Wrapper function for intermediate-values sanitization. */ -void HMAC_SHA256_Update(HMAC_SHA256_CTX * ctx, const void * in, size_t len) { - uint32_t tmp32[72]; - - /* Call the real function. */ - _HMAC_SHA256_Update(ctx, in, len, tmp32); - - /* Clean the stack. */ - explicit_bzero(tmp32, 288); + /* Feed data to the inner SHA256 operation. */ + sha256_hash(&ctx->ictx, in, len); } /** @@ -405,36 +93,16 @@ HMAC_SHA256_Update(HMAC_SHA256_CTX * ctx, const void * in, size_t len) * buffer ${digest}. */ static void -_HMAC_SHA256_Final(uint8_t digest[32], HMAC_SHA256_CTX * ctx, - uint32_t tmp32[static restrict 72], uint8_t ihash[static restrict 32]) +HMAC_SHA256_Final(uint8_t digest[32], HMAC_SHA256_CTX * ctx) { + uint8_t ihash[32]; /* Finish the inner SHA256 operation. */ - _SHA256_Final(ihash, &ctx->ictx, tmp32); - + sha256_end(&ctx->ictx, ihash); /* Feed the inner hash to the outer SHA256 operation. */ - _SHA256_Update(&ctx->octx, ihash, 32, tmp32); - + sha256_hash(&ctx->octx, ihash, 32); /* Finish the outer SHA256 operation. */ - _SHA256_Final(digest, &ctx->octx, tmp32); -} - -/* Wrapper function for intermediate-values sanitization. */ -void -HMAC_SHA256_Final(uint8_t digest[32], HMAC_SHA256_CTX * ctx) -{ - uint32_t tmp32[72]; - uint8_t ihash[32]; - - /* Call the real function. */ - _HMAC_SHA256_Final(digest, ctx, tmp32, ihash); - - /* Clear the context state. */ - explicit_bzero(ctx, sizeof(HMAC_SHA256_CTX)); - - /* Clean the stack. */ - explicit_bzero(tmp32, 288); - explicit_bzero(ihash, 32); + sha256_end(&ctx->octx, digest); } /** @@ -442,49 +110,14 @@ HMAC_SHA256_Final(uint8_t digest[32], HMAC_SHA256_CTX * ctx) * Compute the HMAC-SHA256 of ${len} bytes from ${in} using the key ${K} of * length ${Klen}, and write the result to ${digest}. */ -void -HMAC_SHA256_Buf(const void * K, size_t Klen, const void * in, size_t len, - uint8_t digest[32]) +static void +HMAC_SHA256_Buf(const void *K, size_t Klen, const void *in, size_t len, + uint8_t digest[32]) { HMAC_SHA256_CTX ctx; - uint32_t tmp32[72]; - uint8_t tmp8[96]; - - _HMAC_SHA256_Init(&ctx, K, Klen, tmp32, &tmp8[0], &tmp8[64]); - _HMAC_SHA256_Update(&ctx, in, len, tmp32); - _HMAC_SHA256_Final(digest, &ctx, tmp32, &tmp8[0]); - - /* Clean the stack. */ - explicit_bzero(&ctx, sizeof(HMAC_SHA256_CTX)); - explicit_bzero(tmp32, 288); - explicit_bzero(tmp8, 96); -} - -/* Add padding and terminating bit-count, but don't invoke Transform yet. */ -static int -SHA256_Pad_Almost(SHA256_CTX * ctx, uint8_t len[static restrict 8], - uint32_t tmp32[static restrict 72]) -{ - uint32_t r; - - r = (ctx->count >> 3) & 0x3f; - if (r >= 56) - return -1; - - /* - * Convert length to a vector of bytes -- we do this now rather - * than later because the length will change after we pad. - */ - be64enc(len, ctx->count); - - /* Add 1--56 bytes so that the resulting length is 56 mod 64. */ - _SHA256_Update(ctx, PAD, 56 - r, tmp32); - - /* Add the terminating bit-count. */ - ctx->buf[63] = len[7]; - _SHA256_Update(ctx, len, 7, tmp32); - - return 0; + HMAC_SHA256_Init(&ctx, K, Klen); + HMAC_SHA256_Update(&ctx, in, len); + HMAC_SHA256_Final(digest, &ctx); } /** @@ -493,15 +126,11 @@ SHA256_Pad_Almost(SHA256_CTX * ctx, uint8_t len[static restrict 8], * write the output to buf. The value dkLen must be at most 32 * (2^32 - 1). */ void -PBKDF2_SHA256(const uint8_t * passwd, size_t passwdlen, const uint8_t * salt, - size_t saltlen, uint64_t c, uint8_t * buf, size_t dkLen) +PBKDF2_SHA256(const uint8_t *passwd, size_t passwdlen, + const uint8_t *salt, size_t saltlen, + uint64_t c, uint8_t *buf, size_t dkLen) { HMAC_SHA256_CTX Phctx, PShctx, hctx; - uint32_t tmp32[72]; - union { - uint8_t tmp8[96]; - uint32_t state[8]; - } u; size_t i; uint8_t ivec[4]; uint8_t U[32]; @@ -513,54 +142,12 @@ PBKDF2_SHA256(const uint8_t * passwd, size_t passwdlen, const uint8_t * salt, /* Sanity-check. */ assert(dkLen <= 32 * (size_t)(UINT32_MAX)); - if (c == 1 && (dkLen & 31) == 0 && (saltlen & 63) <= 51) { - uint32_t oldcount; - uint8_t * ivecp; - - /* Compute HMAC state after processing P and S. */ - _HMAC_SHA256_Init(&hctx, passwd, passwdlen, - tmp32, &u.tmp8[0], &u.tmp8[64]); - _HMAC_SHA256_Update(&hctx, salt, saltlen, tmp32); - - /* Prepare ictx padding. */ - oldcount = hctx.ictx.count & (0x3f << 3); - _HMAC_SHA256_Update(&hctx, "\0\0\0", 4, tmp32); - if ((hctx.ictx.count & (0x3f << 3)) < oldcount || - SHA256_Pad_Almost(&hctx.ictx, u.tmp8, tmp32)) - goto generic; /* Can't happen due to saltlen check */ - ivecp = hctx.ictx.buf + (oldcount >> 3); - - /* Prepare octx padding. */ - hctx.octx.count += 32 << 3; - SHA256_Pad_Almost(&hctx.octx, u.tmp8, tmp32); - - /* Iterate through the blocks. */ - for (i = 0; i * 32 < dkLen; i++) { - /* Generate INT(i + 1). */ - be32enc(ivecp, (uint32_t)(i + 1)); - - /* Compute U_1 = PRF(P, S || INT(i)). */ - memcpy(u.state, hctx.ictx.state, sizeof(u.state)); - SHA256_Transform(u.state, hctx.ictx.buf, - &tmp32[0], &tmp32[64]); - be32enc_vect(hctx.octx.buf, u.state, 8); - memcpy(u.state, hctx.octx.state, sizeof(u.state)); - SHA256_Transform(u.state, hctx.octx.buf, - &tmp32[0], &tmp32[64]); - be32enc_vect(&buf[i * 32], u.state, 8); - } - - goto cleanup; - } - -generic: /* Compute HMAC state after processing P. */ - _HMAC_SHA256_Init(&Phctx, passwd, passwdlen, - tmp32, &u.tmp8[0], &u.tmp8[64]); + HMAC_SHA256_Init(&Phctx, passwd, passwdlen); /* Compute HMAC state after processing P and S. */ memcpy(&PShctx, &Phctx, sizeof(HMAC_SHA256_CTX)); - _HMAC_SHA256_Update(&PShctx, salt, saltlen, tmp32); + HMAC_SHA256_Update(&PShctx, salt, saltlen); /* Iterate through the blocks. */ for (i = 0; i * 32 < dkLen; i++) { @@ -569,8 +156,8 @@ generic: /* Compute U_1 = PRF(P, S || INT(i)). */ memcpy(&hctx, &PShctx, sizeof(HMAC_SHA256_CTX)); - _HMAC_SHA256_Update(&hctx, ivec, 4, tmp32); - _HMAC_SHA256_Final(T, &hctx, tmp32, u.tmp8); + HMAC_SHA256_Update(&hctx, ivec, 4); + HMAC_SHA256_Final(T, &hctx); if (c > 1) { /* T_i = U_1 ... */ @@ -579,8 +166,8 @@ generic: for (j = 2; j <= c; j++) { /* Compute U_j. */ memcpy(&hctx, &Phctx, sizeof(HMAC_SHA256_CTX)); - _HMAC_SHA256_Update(&hctx, U, 32, tmp32); - _HMAC_SHA256_Final(U, &hctx, tmp32, u.tmp8); + HMAC_SHA256_Update(&hctx, U, 32); + HMAC_SHA256_Final(U, &hctx); /* ... xor U_j ... */ for (k = 0; k < 32; k++) @@ -594,15 +181,4 @@ generic: clen = 32; memcpy(&buf[i * 32], T, clen); } - - /* Clean the stack. */ - explicit_bzero(&Phctx, sizeof(HMAC_SHA256_CTX)); - explicit_bzero(&PShctx, sizeof(HMAC_SHA256_CTX)); - explicit_bzero(U, 32); - explicit_bzero(T, 32); - -cleanup: - explicit_bzero(&hctx, sizeof(HMAC_SHA256_CTX)); - explicit_bzero(tmp32, 288); - explicit_bzero(&u, sizeof(u)); } diff --git a/libbb/yescrypt/alg-sha256.h b/libbb/yescrypt/alg-sha256.h index 1e75307d3..8a4968267 100644 --- a/libbb/yescrypt/alg-sha256.h +++ b/libbb/yescrypt/alg-sha256.h @@ -28,43 +28,12 @@ * Use #defines in order to avoid namespace collisions with anyone else's * SHA256 code (e.g., the code in OpenSSL). */ -#define SHA256_Init libcperciva_SHA256_Init -#define SHA256_Update libcperciva_SHA256_Update -#define SHA256_Final libcperciva_SHA256_Final -#define SHA256_Buf libcperciva_SHA256_Buf -#define SHA256_CTX libcperciva_SHA256_CTX #define HMAC_SHA256_Init libcperciva_HMAC_SHA256_Init #define HMAC_SHA256_Update libcperciva_HMAC_SHA256_Update #define HMAC_SHA256_Final libcperciva_HMAC_SHA256_Final #define HMAC_SHA256_Buf libcperciva_HMAC_SHA256_Buf #define HMAC_SHA256_CTX libcperciva_HMAC_SHA256_CTX -/* Context structure for SHA256 operations. */ -typedef struct { - uint32_t state[8]; - uint64_t count; - uint8_t buf[64]; -} SHA256_CTX; - -/** - * SHA256_Init(ctx): - * Initialize the SHA256 context ${ctx}. - */ -extern void SHA256_Init(SHA256_CTX *); - -/** - * SHA256_Update(ctx, in, len): - * Input ${len} bytes from ${in} into the SHA256 context ${ctx}. - */ -extern void SHA256_Update(SHA256_CTX *, const void *, size_t); - -/** - * SHA256_Final(digest, ctx): - * Output the SHA256 hash of the data input to the context ${ctx} into the - * buffer ${digest}. - */ -extern void SHA256_Final(uint8_t[32], SHA256_CTX *); - /** * SHA256_Buf(in, len, digest): * Compute the SHA256 hash of ${len} bytes from ${in} and write it to ${digest}. @@ -73,37 +42,10 @@ extern void SHA256_Buf(const void *, size_t, uint8_t[32]); /* Context structure for HMAC-SHA256 operations. */ typedef struct { - SHA256_CTX ictx; - SHA256_CTX octx; + sha256_ctx_t ictx; + sha256_ctx_t octx; } HMAC_SHA256_CTX; -/** - * HMAC_SHA256_Init(ctx, K, Klen): - * Initialize the HMAC-SHA256 context ${ctx} with ${Klen} bytes of key from - * ${K}. - */ -extern void HMAC_SHA256_Init(HMAC_SHA256_CTX *, const void *, size_t); - -/** - * HMAC_SHA256_Update(ctx, in, len): - * Input ${len} bytes from ${in} into the HMAC-SHA256 context ${ctx}. - */ -extern void HMAC_SHA256_Update(HMAC_SHA256_CTX *, const void *, size_t); - -/** - * HMAC_SHA256_Final(digest, ctx): - * Output the HMAC-SHA256 of the data input to the context ${ctx} into the - * buffer ${digest}. - */ -extern void HMAC_SHA256_Final(uint8_t[32], HMAC_SHA256_CTX *); - -/** - * HMAC_SHA256_Buf(K, Klen, in, len, digest): - * Compute the HMAC-SHA256 of ${len} bytes from ${in} using the key ${K} of - * length ${Klen}, and write the result to ${digest}. - */ -extern void HMAC_SHA256_Buf(const void *, size_t, const void *, size_t, uint8_t[32]); - /** * PBKDF2_SHA256(passwd, passwdlen, salt, saltlen, c, buf, dkLen): * Compute PBKDF2(passwd, salt, c, dkLen) using HMAC-SHA256 as the PRF, and -- cgit v1.2.3-55-g6feb From 62abd47815f0ee2f6c0ea6549fabe6d5c307ef8d Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Sun, 6 Jul 2025 10:50:46 +0200 Subject: Move "sha256-hash a memory array and produce the digest" helper to libbb Signed-off-by: Denys Vlasenko --- include/libbb.h | 1 + libbb/yescrypt/alg-sha256.c | 26 +++++--------------------- libbb/yescrypt/alg-sha256.h | 6 ------ libbb/yescrypt/alg-yescrypt-kdf.c | 2 +- 4 files changed, 7 insertions(+), 28 deletions(-) (limited to 'libbb') diff --git a/include/libbb.h b/include/libbb.h index 9a0a2f916..270a9d593 100644 --- a/include/libbb.h +++ b/include/libbb.h @@ -2233,6 +2233,7 @@ enum { SHA512_OUTSIZE = 64, SHA3_OUTSIZE = 28, }; +void FAST_FUNC sha256_block(const void *in, size_t len, uint8_t hash[32]); extern uint32_t *global_crc32_table; uint32_t *crc32_filltable(uint32_t *tbl256, int endian) FAST_FUNC; diff --git a/libbb/yescrypt/alg-sha256.c b/libbb/yescrypt/alg-sha256.c index 038ac0ddb..315c094a2 100644 --- a/libbb/yescrypt/alg-sha256.c +++ b/libbb/yescrypt/alg-sha256.c @@ -25,38 +25,22 @@ * SUCH DAMAGE. */ -/** - * SHA256_Buf(in, len, digest): - * Compute the SHA256 hash of ${len} bytes from ${in} and write it to ${digest}. - */ -void -SHA256_Buf(const void * in, size_t len, uint8_t digest[32]) -{ - sha256_ctx_t ctx; - sha256_begin(&ctx); - sha256_hash(&ctx, in, len); - sha256_end(&ctx, digest); -} - /** * HMAC_SHA256_Init(ctx, K, Klen): * Initialize the HMAC-SHA256 context ${ctx} with ${Klen} bytes of key from * ${K}. */ static void -HMAC_SHA256_Init(HMAC_SHA256_CTX * ctx, const void * _K, size_t Klen) +HMAC_SHA256_Init(HMAC_SHA256_CTX *ctx, const void *_K, size_t Klen) { uint8_t pad[64]; uint8_t khash[32]; - const uint8_t * K = _K; + const uint8_t *K = _K; size_t i; /* If Klen > 64, the key is really SHA256(K). */ if (Klen > 64) { -// SHA256_Init(&ctx->ictx); -// _SHA256_Update(&ctx->ictx, K, Klen, tmp32); -// _SHA256_Final(khash, &ctx->ictx, tmp32); - SHA256_Buf(K, Klen, khash); + sha256_block(K, Klen, khash); K = khash; Klen = 32; } @@ -81,7 +65,7 @@ HMAC_SHA256_Init(HMAC_SHA256_CTX * ctx, const void * _K, size_t Klen) * Input ${len} bytes from ${in} into the HMAC-SHA256 context ${ctx}. */ static void -HMAC_SHA256_Update(HMAC_SHA256_CTX * ctx, const void * in, size_t len) +HMAC_SHA256_Update(HMAC_SHA256_CTX *ctx, const void *in, size_t len) { /* Feed data to the inner SHA256 operation. */ sha256_hash(&ctx->ictx, in, len); @@ -93,7 +77,7 @@ HMAC_SHA256_Update(HMAC_SHA256_CTX * ctx, const void * in, size_t len) * buffer ${digest}. */ static void -HMAC_SHA256_Final(uint8_t digest[32], HMAC_SHA256_CTX * ctx) +HMAC_SHA256_Final(uint8_t digest[32], HMAC_SHA256_CTX *ctx) { uint8_t ihash[32]; diff --git a/libbb/yescrypt/alg-sha256.h b/libbb/yescrypt/alg-sha256.h index 8a4968267..6d2cc0a04 100644 --- a/libbb/yescrypt/alg-sha256.h +++ b/libbb/yescrypt/alg-sha256.h @@ -34,12 +34,6 @@ #define HMAC_SHA256_Buf libcperciva_HMAC_SHA256_Buf #define HMAC_SHA256_CTX libcperciva_HMAC_SHA256_CTX -/** - * SHA256_Buf(in, len, digest): - * Compute the SHA256 hash of ${len} bytes from ${in} and write it to ${digest}. - */ -extern void SHA256_Buf(const void *, size_t, uint8_t[32]); - /* Context structure for HMAC-SHA256 operations. */ typedef struct { sha256_ctx_t ictx; diff --git a/libbb/yescrypt/alg-yescrypt-kdf.c b/libbb/yescrypt/alg-yescrypt-kdf.c index 01a66a6a8..5c1f1006a 100644 --- a/libbb/yescrypt/alg-yescrypt-kdf.c +++ b/libbb/yescrypt/alg-yescrypt-kdf.c @@ -945,7 +945,7 @@ static int yescrypt_kdf32_body( size_t clen = /*buflen:*/32; if (clen > sizeof(dk)) clen = sizeof(dk); - SHA256_Buf(sha256, sizeof(sha256), dk); + sha256_block(sha256, sizeof(sha256), dk); memcpy(buf32, dk, clen); } } -- cgit v1.2.3-55-g6feb From b21cd481db85a193677bc956ab14c6418711d8e4 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Sun, 6 Jul 2025 11:02:03 +0200 Subject: libbb/yescrypt: code shrink function old new delta libcperciva_HMAC_SHA256_Final 56 53 -3 Signed-off-by: Denys Vlasenko --- libbb/yescrypt/alg-sha256.c | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) (limited to 'libbb') diff --git a/libbb/yescrypt/alg-sha256.c b/libbb/yescrypt/alg-sha256.c index 315c094a2..a1d4275e6 100644 --- a/libbb/yescrypt/alg-sha256.c +++ b/libbb/yescrypt/alg-sha256.c @@ -72,19 +72,17 @@ HMAC_SHA256_Update(HMAC_SHA256_CTX *ctx, const void *in, size_t len) } /** - * HMAC_SHA256_Final(digest, ctx): + * HMAC_SHA256_Final(ctx, digest): * Output the HMAC-SHA256 of the data input to the context ${ctx} into the * buffer ${digest}. */ static void -HMAC_SHA256_Final(uint8_t digest[32], HMAC_SHA256_CTX *ctx) +HMAC_SHA256_Final(HMAC_SHA256_CTX *ctx, uint8_t digest[32]) { - uint8_t ihash[32]; - /* Finish the inner SHA256 operation. */ - sha256_end(&ctx->ictx, ihash); + sha256_end(&ctx->ictx, digest); /* using digest[] as scratch space */ /* Feed the inner hash to the outer SHA256 operation. */ - sha256_hash(&ctx->octx, ihash, 32); + sha256_hash(&ctx->octx, digest, 32); /* using digest[] as scratch space */ /* Finish the outer SHA256 operation. */ sha256_end(&ctx->octx, digest); } @@ -101,7 +99,7 @@ HMAC_SHA256_Buf(const void *K, size_t Klen, const void *in, size_t len, HMAC_SHA256_CTX ctx; HMAC_SHA256_Init(&ctx, K, Klen); HMAC_SHA256_Update(&ctx, in, len); - HMAC_SHA256_Final(digest, &ctx); + HMAC_SHA256_Final(&ctx, digest); } /** @@ -141,18 +139,16 @@ PBKDF2_SHA256(const uint8_t *passwd, size_t passwdlen, /* Compute U_1 = PRF(P, S || INT(i)). */ memcpy(&hctx, &PShctx, sizeof(HMAC_SHA256_CTX)); HMAC_SHA256_Update(&hctx, ivec, 4); - HMAC_SHA256_Final(T, &hctx); + HMAC_SHA256_Final(&hctx, T); if (c > 1) { /* T_i = U_1 ... */ memcpy(U, T, 32); - for (j = 2; j <= c; j++) { /* Compute U_j. */ memcpy(&hctx, &Phctx, sizeof(HMAC_SHA256_CTX)); HMAC_SHA256_Update(&hctx, U, 32); - HMAC_SHA256_Final(U, &hctx); - + HMAC_SHA256_Final(&hctx, U); /* ... xor U_j ... */ for (k = 0; k < 32; k++) T[k] ^= U[k]; -- cgit v1.2.3-55-g6feb From 0893bc3bac8705b22679ad77f39ee56d3ba728c9 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Sun, 6 Jul 2025 11:12:13 +0200 Subject: libbb/yescrypt: code shrink static.PBKDF2_SHA256 - 189 +189 HMAC_SHA256_Init - 159 +159 HMAC_SHA256_Buf - 58 +58 HMAC_SHA256_Final - 53 +53 i2a64 - 42 +42 yescrypt_r 1221 1215 -6 yescrypt_kdf32_body 1064 1046 -18 i64c 42 - -42 libcperciva_HMAC_SHA256_Final 53 - -53 libcperciva_HMAC_SHA256_Buf 58 - -58 ascii64 65 - -65 libcperciva_HMAC_SHA256_Init 159 - -159 PBKDF2_SHA256 386 - -386 ------------------------------------------------------------------------------ (add/remove: 5/6 grow/shrink: 0/2 up/down: 501/-787) Total: -286 bytes Signed-off-by: Denys Vlasenko --- include/libbb.h | 2 ++ libbb/pw_encrypt.c | 12 +++++------- libbb/pw_encrypt_des.c | 8 ++++---- libbb/yescrypt/alg-sha256.c | 2 +- libbb/yescrypt/alg-sha256.h | 18 ------------------ libbb/yescrypt/alg-yescrypt-common.c | 2 +- libbb/yescrypt/y.c | 6 ------ 7 files changed, 13 insertions(+), 37 deletions(-) (limited to 'libbb') diff --git a/include/libbb.h b/include/libbb.h index 270a9d593..e88499a80 100644 --- a/include/libbb.h +++ b/include/libbb.h @@ -2189,6 +2189,8 @@ char *decode_base64(char *dst, const char **pp_src) FAST_FUNC; char *decode_base32(char *dst, const char **pp_src) FAST_FUNC; void read_base64(FILE *src_stream, FILE *dst_stream, int flags) FAST_FUNC; +int FAST_FUNC i2a64(int i); + typedef struct md5_ctx_t { uint8_t wbuffer[64]; /* always correctly aligned for uint64_t */ void (*process_block)(struct md5_ctx_t*) FAST_FUNC; diff --git a/libbb/pw_encrypt.c b/libbb/pw_encrypt.c index 1d530974e..97dee7229 100644 --- a/libbb/pw_encrypt.c +++ b/libbb/pw_encrypt.c @@ -13,11 +13,10 @@ #endif #include "libbb.h" -/* static const uint8_t ascii64[] ALIGN1 = +/* 0..63 -> * "./0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; */ - -static int i64c(int i) +int FAST_FUNC i2a64(int i) { i &= 0x3f; if (i == 0) @@ -45,8 +44,8 @@ int FAST_FUNC crypt_make_rand64encoded(char *p, int cnt /*, int x */) * It has no problem with visibly alternating lowest bit * but is also weak in cryptographic sense + needs div, * which needs more code (and slower) on many CPUs */ - *p++ = i64c(x >> 16); - *p++ = i64c(x >> 22); + *p++ = i2a64(x >> 16); + *p++ = i2a64(x >> 22); } while (--cnt); *p = '\0'; return x; @@ -120,8 +119,7 @@ static char* to64(char *s, unsigned v, int n) { while (--n >= 0) { - /* *s++ = ascii64[v & 0x3f]; */ - *s++ = i64c(v); + *s++ = i2a64(v); v >>= 6; } return s; diff --git a/libbb/pw_encrypt_des.c b/libbb/pw_encrypt_des.c index fe8237cfe..c836ab684 100644 --- a/libbb/pw_encrypt_des.c +++ b/libbb/pw_encrypt_des.c @@ -703,10 +703,10 @@ to64_msb_first(char *s, unsigned v) *s++ = ascii64[(v >> 6) & 0x3f]; /* bits 11..6 */ *s = ascii64[v & 0x3f]; /* bits 5..0 */ #endif - *s++ = i64c(v >> 18); /* bits 23..18 */ - *s++ = i64c(v >> 12); /* bits 17..12 */ - *s++ = i64c(v >> 6); /* bits 11..6 */ - *s = i64c(v); /* bits 5..0 */ + *s++ = i2a64(v >> 18); /* bits 23..18 */ + *s++ = i2a64(v >> 12); /* bits 17..12 */ + *s++ = i2a64(v >> 6); /* bits 11..6 */ + *s = i2a64(v); /* bits 5..0 */ } static char * diff --git a/libbb/yescrypt/alg-sha256.c b/libbb/yescrypt/alg-sha256.c index a1d4275e6..a17028b6b 100644 --- a/libbb/yescrypt/alg-sha256.c +++ b/libbb/yescrypt/alg-sha256.c @@ -107,7 +107,7 @@ HMAC_SHA256_Buf(const void *K, size_t Klen, const void *in, size_t len, * Compute PBKDF2(passwd, salt, c, dkLen) using HMAC-SHA256 as the PRF, and * write the output to buf. The value dkLen must be at most 32 * (2^32 - 1). */ -void +static void PBKDF2_SHA256(const uint8_t *passwd, size_t passwdlen, const uint8_t *salt, size_t saltlen, uint64_t c, uint8_t *buf, size_t dkLen) diff --git a/libbb/yescrypt/alg-sha256.h b/libbb/yescrypt/alg-sha256.h index 6d2cc0a04..862f49dbe 100644 --- a/libbb/yescrypt/alg-sha256.h +++ b/libbb/yescrypt/alg-sha256.h @@ -24,26 +24,8 @@ * SUCH DAMAGE. */ -/* - * Use #defines in order to avoid namespace collisions with anyone else's - * SHA256 code (e.g., the code in OpenSSL). - */ -#define HMAC_SHA256_Init libcperciva_HMAC_SHA256_Init -#define HMAC_SHA256_Update libcperciva_HMAC_SHA256_Update -#define HMAC_SHA256_Final libcperciva_HMAC_SHA256_Final -#define HMAC_SHA256_Buf libcperciva_HMAC_SHA256_Buf -#define HMAC_SHA256_CTX libcperciva_HMAC_SHA256_CTX - /* Context structure for HMAC-SHA256 operations. */ typedef struct { sha256_ctx_t ictx; sha256_ctx_t octx; } HMAC_SHA256_CTX; - -/** - * PBKDF2_SHA256(passwd, passwdlen, salt, saltlen, c, buf, dkLen): - * Compute PBKDF2(passwd, salt, c, dkLen) using HMAC-SHA256 as the PRF, and - * write the output to buf. The value dkLen must be at most 32 * (2^32 - 1). - */ -extern void PBKDF2_SHA256(const uint8_t *, size_t, const uint8_t *, size_t, - uint64_t, uint8_t *, size_t); diff --git a/libbb/yescrypt/alg-yescrypt-common.c b/libbb/yescrypt/alg-yescrypt-common.c index da7fa5e0f..7a1e92cab 100644 --- a/libbb/yescrypt/alg-yescrypt-common.c +++ b/libbb/yescrypt/alg-yescrypt-common.c @@ -88,7 +88,7 @@ static uint8_t *encode64_uint32_fixed( for (bits = 0; bits < srcbits; bits += 6) { if (dstlen < 2) return NULL; - *dst++ = itoa64[src & 0x3f]; + *dst++ = i2a64(src); dstlen--; src >>= 6; } diff --git a/libbb/yescrypt/y.c b/libbb/yescrypt/y.c index 042c439a0..2c6afd4f8 100644 --- a/libbb/yescrypt/y.c +++ b/libbb/yescrypt/y.c @@ -124,12 +124,6 @@ VECTOR_TO_CPU(be,32); VECTOR_TO_CPU(be,64); -const unsigned char ascii64[65] = - "./0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; -/* 0000000000111111111122222222223333333333444444444455555555556666 */ -/* 0123456789012345678901234567890123456789012345678901234567890123 */ -#define itoa64 ascii64 - #define YESCRYPT_INTERNAL #include "alg-sha256.h" #include "alg-yescrypt.h" -- cgit v1.2.3-55-g6feb From 9c4cd75d12894cd9139d549dfe445f18572ee7d1 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Sun, 6 Jul 2025 11:16:43 +0200 Subject: restore mangled comment Signed-off-by: Denys Vlasenko --- libbb/pw_encrypt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'libbb') diff --git a/libbb/pw_encrypt.c b/libbb/pw_encrypt.c index 97dee7229..71f7731fd 100644 --- a/libbb/pw_encrypt.c +++ b/libbb/pw_encrypt.c @@ -77,9 +77,9 @@ char* FAST_FUNC crypt_make_pw_salt(char salt[MAX_PW_SALT_LEN], const char *algo) // The "j9T$" below is the default "yescrypt parameters" encoded by yescrypt_encode_params_r(): // //shadow-4.17.4/src/passwd.c -// salt = crypt_make_rand64encoded(NULL, NULL); +// salt = crypt_make_salt(NULL, NULL); //shadow-4.17.4/lib/salt.c -//const char *crypt_make_rand64encoded(const char *meth, void *arg) +//const char *crypt_make_salt(const char *meth, void *arg) // if (streq(method, "YESCRYPT")) { // MAGNUM(result, 'y'); // salt_len = YESCRYPT_SALT_SIZE; // 24 -- cgit v1.2.3-55-g6feb From b8f76001662ad4b3073945589a0cb270b627c994 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Sun, 6 Jul 2025 18:27:16 +0200 Subject: libbb/yescrypt: more efficient byteswapping function old new delta static.smix1 631 604 -27 static.smix2 452 420 -32 ------------------------------------------------------------------------------ (add/remove: 0/0 grow/shrink: 0/2 up/down: 0/-59) Total: -59 bytes Signed-off-by: Denys Vlasenko --- libbb/yescrypt/alg-sha256.c | 10 ++-- libbb/yescrypt/alg-yescrypt-kdf.c | 8 +-- libbb/yescrypt/y.c | 122 -------------------------------------- 3 files changed, 8 insertions(+), 132 deletions(-) (limited to 'libbb') diff --git a/libbb/yescrypt/alg-sha256.c b/libbb/yescrypt/alg-sha256.c index a17028b6b..28d8c5296 100644 --- a/libbb/yescrypt/alg-sha256.c +++ b/libbb/yescrypt/alg-sha256.c @@ -114,16 +114,12 @@ PBKDF2_SHA256(const uint8_t *passwd, size_t passwdlen, { HMAC_SHA256_CTX Phctx, PShctx, hctx; size_t i; - uint8_t ivec[4]; uint8_t U[32]; uint8_t T[32]; uint64_t j; int k; size_t clen; - /* Sanity-check. */ - assert(dkLen <= 32 * (size_t)(UINT32_MAX)); - /* Compute HMAC state after processing P. */ HMAC_SHA256_Init(&Phctx, passwd, passwdlen); @@ -133,12 +129,14 @@ PBKDF2_SHA256(const uint8_t *passwd, size_t passwdlen, /* Iterate through the blocks. */ for (i = 0; i * 32 < dkLen; i++) { + uint32_t ivec; + /* Generate INT(i + 1). */ - be32enc(ivec, (uint32_t)(i + 1)); + ivec = SWAP_BE32((uint32_t)(i + 1)); /* Compute U_1 = PRF(P, S || INT(i)). */ memcpy(&hctx, &PShctx, sizeof(HMAC_SHA256_CTX)); - HMAC_SHA256_Update(&hctx, ivec, 4); + HMAC_SHA256_Update(&hctx, &ivec, 4); HMAC_SHA256_Final(&hctx, T); if (c > 1) { diff --git a/libbb/yescrypt/alg-yescrypt-kdf.c b/libbb/yescrypt/alg-yescrypt-kdf.c index 5c1f1006a..ee8fb408e 100644 --- a/libbb/yescrypt/alg-yescrypt-kdf.c +++ b/libbb/yescrypt/alg-yescrypt-kdf.c @@ -501,7 +501,7 @@ static void smix1(uint8_t *B, size_t r, uint32_t N, yescrypt_flags_t flags, salsa20_blk_t *dst = &X[i]; size_t k; for (k = 0; k < 16; k++) - tmp->w[k] = le32dec((const uint8_t *) &src->w[k]); + tmp->w[k] = SWAP_LE32(src->w[k]); salsa20_simd_shuffle(tmp, dst); } @@ -591,7 +591,7 @@ static void smix1(uint8_t *B, size_t r, uint32_t N, yescrypt_flags_t flags, salsa20_blk_t *dst = (salsa20_blk_t *)&B[i * 64]; size_t k; for (k = 0; k < 16; k++) - le32enc((uint8_t *)&tmp->w[k], src->w[k]); + tmp->w[k] = SWAP_LE32(src->w[k]); salsa20_simd_unshuffle(tmp, dst); } } @@ -621,7 +621,7 @@ static void smix2(uint8_t *B, size_t r, uint32_t N, uint64_t Nloop, salsa20_blk_t *dst = &X[i]; size_t k; for (k = 0; k < 16; k++) - tmp->w[k] = le32dec((const uint8_t *)&src->w[k]); + tmp->w[k] = SWAP_LE32(src->w[k]); salsa20_simd_shuffle(tmp, dst); } @@ -676,7 +676,7 @@ static void smix2(uint8_t *B, size_t r, uint32_t N, uint64_t Nloop, salsa20_blk_t *dst = (salsa20_blk_t *)&B[i * 64]; size_t k; for (k = 0; k < 16; k++) - le32enc((uint8_t *)&tmp->w[k], src->w[k]); + tmp->w[k] = SWAP_LE32(src->w[k]); salsa20_simd_unshuffle(tmp, dst); } } diff --git a/libbb/yescrypt/y.c b/libbb/yescrypt/y.c index 2c6afd4f8..92c6eb7a8 100644 --- a/libbb/yescrypt/y.c +++ b/libbb/yescrypt/y.c @@ -2,128 +2,6 @@ #include -#include - -static inline void -cpu_to_le32 (unsigned char *buf, uint32_t n) -{ - buf[0] = (unsigned char)((n & 0x000000FFu) >> 0); - buf[1] = (unsigned char)((n & 0x0000FF00u) >> 8); - buf[2] = (unsigned char)((n & 0x00FF0000u) >> 16); - buf[3] = (unsigned char)((n & 0xFF000000u) >> 24); -} -static inline uint32_t -le32_to_cpu (const unsigned char *buf) -{ - return ((((uint32_t)buf[0]) << 0) | - (((uint32_t)buf[1]) << 8) | - (((uint32_t)buf[2]) << 16) | - (((uint32_t)buf[3]) << 24) ); -} - -/* Alternative names used in code derived from Colin Percival's - cryptography libraries. */ -#define le32enc cpu_to_le32 -#define le32dec le32_to_cpu -#define le64enc cpu_to_le64 -#define le64dec le64_to_cpu - -#define be32enc cpu_to_be32 -#define be32dec be32_to_cpu -#define be64enc cpu_to_be64 -#define be64dec be64_to_cpu - -#define be32enc_vect cpu_to_be32_vect -#define be32dec_vect be32_to_cpu_vect -#define be64enc_vect cpu_to_be64_vect -#define be64dec_vect be64_to_cpu_vect - - -//USED ONY BY SHA256 for be32_to_cpu_vect(): -static inline void -cpu_to_be32(unsigned char *buf, uint32_t n) -{ - buf[0] = (unsigned char)((n & 0xFF000000u) >> 24); - buf[1] = (unsigned char)((n & 0x00FF0000u) >> 16); - buf[2] = (unsigned char)((n & 0x0000FF00u) >> 8); - buf[3] = (unsigned char)((n & 0x000000FFu) >> 0); -} -static inline void -cpu_to_be64 (unsigned char *buf, uint64_t n) -{ - buf[0] = (unsigned char)((n & 0xFF00000000000000ull) >> 56); - buf[1] = (unsigned char)((n & 0x00FF000000000000ull) >> 48); - buf[2] = (unsigned char)((n & 0x0000FF0000000000ull) >> 40); - buf[3] = (unsigned char)((n & 0x000000FF00000000ull) >> 32); - buf[4] = (unsigned char)((n & 0x00000000FF000000ull) >> 24); - buf[5] = (unsigned char)((n & 0x0000000000FF0000ull) >> 16); - buf[6] = (unsigned char)((n & 0x000000000000FF00ull) >> 8); - buf[7] = (unsigned char)((n & 0x00000000000000FFull) >> 0); -} -static inline uint32_t -be32_to_cpu (const unsigned char *buf) -{ - return ((((uint32_t)buf[0]) << 24) | - (((uint32_t)buf[1]) << 16) | - (((uint32_t)buf[2]) << 8) | - (((uint32_t)buf[3]) << 0) ); -} -static inline uint64_t -be64_to_cpu (const unsigned char *buf) -{ - return ((((uint64_t)buf[0]) << 56) | - (((uint64_t)buf[1]) << 48) | - (((uint64_t)buf[2]) << 40) | - (((uint64_t)buf[3]) << 32) | - (((uint64_t)buf[4]) << 24) | - (((uint64_t)buf[5]) << 16) | - (((uint64_t)buf[6]) << 8) | - (((uint64_t)buf[7]) << 0) ); -} -/* Template: Define a function named cpu_to__vect that - takes a vector SRC of LEN integers, each of type uint_t, and - writes them to the buffer DST in the endianness defined by END. - Caution: LEN is the number of vector elements, not the total size - of the buffers. */ -#define VECTOR_CPU_TO(end, bits) VECTOR_CPU_TO_(end##bits, uint##bits##_t) -#define VECTOR_CPU_TO_(prim, stype) \ - static inline void \ - cpu_to_##prim##_vect(uint8_t *dst, const stype *src, size_t len) \ - { \ - while (len) \ - { \ - cpu_to_##prim(dst, *src); \ - src += 1; \ - dst += sizeof(stype); \ - len -= 1; \ - } \ - } struct _swallow_semicolon -/* Template: Define a function named _to_cpu_vect that - reads a vector of LEN integers, each of type uint_t, from the - buffer SRC, in the endianness defined by END, and writes them to - the vector DST. Caution: LEN is the number of vector elements, not - the total size of the buffers. */ -#define VECTOR_TO_CPU(end, bits) VECTOR_TO_CPU_(end##bits, uint##bits##_t) -#define VECTOR_TO_CPU_(prim, dtype) \ - static inline void \ - prim##_to_cpu_vect(dtype *dst, const uint8_t *src, size_t len) \ - { \ - while (len) \ - { \ - *dst = prim##_to_cpu(src); \ - src += sizeof(dtype); \ - dst += 1; \ - len -= 1; \ - } \ - } struct _swallow_semicolon -/* These are the vectorized endianness-conversion functions that are - presently used. Add more as necessary. */ -VECTOR_CPU_TO(be,32); -VECTOR_CPU_TO(be,64); -VECTOR_TO_CPU(be,32); -VECTOR_TO_CPU(be,64); - - #define YESCRYPT_INTERNAL #include "alg-sha256.h" #include "alg-yescrypt.h" -- cgit v1.2.3-55-g6feb From f5af8aefeae7fd73167403dd407e9a14aead5948 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Sun, 6 Jul 2025 18:35:37 +0200 Subject: libbb/yescrypt: shrink PBKDF2 function old new delta static.PBKDF2_SHA256 189 176 -13 Signed-off-by: Denys Vlasenko --- libbb/yescrypt/alg-sha256.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) (limited to 'libbb') diff --git a/libbb/yescrypt/alg-sha256.c b/libbb/yescrypt/alg-sha256.c index 28d8c5296..f56b905ad 100644 --- a/libbb/yescrypt/alg-sha256.c +++ b/libbb/yescrypt/alg-sha256.c @@ -77,7 +77,7 @@ HMAC_SHA256_Update(HMAC_SHA256_CTX *ctx, const void *in, size_t len) * buffer ${digest}. */ static void -HMAC_SHA256_Final(HMAC_SHA256_CTX *ctx, uint8_t digest[32]) +HMAC_SHA256_Final(HMAC_SHA256_CTX *ctx, void *digest) { /* Finish the inner SHA256 operation. */ sha256_end(&ctx->ictx, digest); /* using digest[] as scratch space */ @@ -93,8 +93,7 @@ HMAC_SHA256_Final(HMAC_SHA256_CTX *ctx, uint8_t digest[32]) * length ${Klen}, and write the result to ${digest}. */ static void -HMAC_SHA256_Buf(const void *K, size_t Klen, const void *in, size_t len, - uint8_t digest[32]) +HMAC_SHA256_Buf(const void *K, size_t Klen, const void *in, size_t len, void *digest) { HMAC_SHA256_CTX ctx; HMAC_SHA256_Init(&ctx, K, Klen); @@ -114,11 +113,6 @@ PBKDF2_SHA256(const uint8_t *passwd, size_t passwdlen, { HMAC_SHA256_CTX Phctx, PShctx, hctx; size_t i; - uint8_t U[32]; - uint8_t T[32]; - uint64_t j; - int k; - size_t clen; /* Compute HMAC state after processing P. */ HMAC_SHA256_Init(&Phctx, passwd, passwdlen); @@ -128,14 +122,19 @@ PBKDF2_SHA256(const uint8_t *passwd, size_t passwdlen, HMAC_SHA256_Update(&PShctx, salt, saltlen); /* Iterate through the blocks. */ - for (i = 0; i * 32 < dkLen; i++) { + for (i = 0; dkLen != 0; i++) { + uint64_t U[32 / 8]; + uint64_t T[32 / 8]; + uint64_t j; uint32_t ivec; + size_t clen; + int k; /* Generate INT(i + 1). */ ivec = SWAP_BE32((uint32_t)(i + 1)); /* Compute U_1 = PRF(P, S || INT(i)). */ - memcpy(&hctx, &PShctx, sizeof(HMAC_SHA256_CTX)); + hctx = PShctx; HMAC_SHA256_Update(&hctx, &ivec, 4); HMAC_SHA256_Final(&hctx, T); @@ -144,19 +143,20 @@ PBKDF2_SHA256(const uint8_t *passwd, size_t passwdlen, memcpy(U, T, 32); for (j = 2; j <= c; j++) { /* Compute U_j. */ - memcpy(&hctx, &Phctx, sizeof(HMAC_SHA256_CTX)); + hctx = Phctx; HMAC_SHA256_Update(&hctx, U, 32); HMAC_SHA256_Final(&hctx, U); /* ... xor U_j ... */ - for (k = 0; k < 32; k++) + for (k = 0; k < 32 / 8; k++) T[k] ^= U[k]; } } /* Copy as many bytes as necessary into buf. */ - clen = dkLen - i * 32; + clen = dkLen; if (clen > 32) clen = 32; - memcpy(&buf[i * 32], T, clen); + buf = mempcpy(buf, T, clen); + dkLen -= clen; } } -- cgit v1.2.3-55-g6feb From 23b5527f5c400a300c56afa36a6a5abaa81adbb7 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Sun, 6 Jul 2025 19:08:57 +0200 Subject: libbb/yescrypt: use common ACSII char-to-64 conversion code function old new delta a2i64 - 46 +46 yescrypt_r 1215 1235 +20 decode64_uint32 167 177 +10 atoi64 25 - -25 ascii_to_bin 53 - -53 static.atoi64_partial 77 - -77 ------------------------------------------------------------------------------ (add/remove: 1/3 grow/shrink: 2/0 up/down: 76/-155) Total: -79 bytes Signed-off-by: Denys Vlasenko --- include/libbb.h | 1 + libbb/pw_encrypt.c | 25 +++++++++++++++++++++++++ libbb/pw_encrypt_des.c | 23 ++--------------------- libbb/yescrypt/alg-yescrypt-common.c | 22 +++------------------- 4 files changed, 31 insertions(+), 40 deletions(-) (limited to 'libbb') diff --git a/include/libbb.h b/include/libbb.h index e88499a80..1c23d2f66 100644 --- a/include/libbb.h +++ b/include/libbb.h @@ -2190,6 +2190,7 @@ char *decode_base32(char *dst, const char **pp_src) FAST_FUNC; void read_base64(FILE *src_stream, FILE *dst_stream, int flags) FAST_FUNC; int FAST_FUNC i2a64(int i); +int FAST_FUNC a2i64(char c); typedef struct md5_ctx_t { uint8_t wbuffer[64]; /* always correctly aligned for uint64_t */ diff --git a/libbb/pw_encrypt.c b/libbb/pw_encrypt.c index 71f7731fd..af84606bf 100644 --- a/libbb/pw_encrypt.c +++ b/libbb/pw_encrypt.c @@ -30,6 +30,31 @@ int FAST_FUNC i2a64(int i) return ('a' - 38 + i); } +/* Returns >=64 for invalid chars */ +int FAST_FUNC a2i64(char c) +{ + unsigned char ch = c; + if (ch >= 'a') + /* "a..z" to 38..63 */ + /* anything after "z": positive int >= 64 */ + return (ch - 'a' + 38); + + if (ch > 'Z') + /* after "Z" but before "a": positive byte >= 64 */ + return ch; + + if (ch >= 'A') + /* "A..Z" to 12..37 */ + return (ch - 'A' + 12); + + if (ch > '9') + return 64; + + /* "./0123456789" to 0,1,2..11 */ + /* anything before "." becomes positive byte >= 64 */ + return (unsigned char)(ch - '.'); +} + int FAST_FUNC crypt_make_rand64encoded(char *p, int cnt /*, int x */) { /* was: x += ... */ diff --git a/libbb/pw_encrypt_des.c b/libbb/pw_encrypt_des.c index c836ab684..8b5edaaed 100644 --- a/libbb/pw_encrypt_des.c +++ b/libbb/pw_encrypt_des.c @@ -200,25 +200,6 @@ static const uint32_t bits32[32] ALIGN4 = { static const uint8_t bits8[8] ALIGN1 = { 0x80, 0x40, 0x20, 0x10, 0x08, 0x04, 0x02, 0x01 }; -static int -ascii_to_bin(char ch) -{ - if (ch > 'z') - return 0; - if (ch >= 'a') - return (ch - 'a' + 38); - if (ch > 'Z') - return 0; - if (ch >= 'A') - return (ch - 'A' + 12); - if (ch > '9') - return 0; - if (ch >= '.') - return (ch - '.'); - return 0; -} - - /* Static stuff that stays resident and doesn't change after * being initialized, and therefore doesn't need to be made * reentrant. */ @@ -740,8 +721,8 @@ des_crypt(struct des_ctx *ctx, char output[DES_OUT_BUFSIZE], */ output[0] = salt_str[0]; output[1] = salt_str[1]; - salt = (ascii_to_bin(salt_str[1]) << 6) - | ascii_to_bin(salt_str[0]); + salt = (a2i64(salt_str[1]) << 6) + | a2i64(salt_str[0]); setup_salt(ctx, salt); /* set ctx->saltbits for do_des() */ /* Do it. */ diff --git a/libbb/yescrypt/alg-yescrypt-common.c b/libbb/yescrypt/alg-yescrypt-common.c index 7a1e92cab..b9a5c51ac 100644 --- a/libbb/yescrypt/alg-yescrypt-common.c +++ b/libbb/yescrypt/alg-yescrypt-common.c @@ -23,22 +23,6 @@ * yescrypt_params_t field, and convert salt ti binary - * both of these are negligible compared to main hashing operation */ -static NOINLINE uint32_t atoi64(uint8_t src) -{ - static const uint8_t atoi64_partial[77] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, - 64, 64, 64, 64, 64, 64, 64, - 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, - 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, - 64, 64, 64, 64, 64, 64, - 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, - 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63 - }; - if (src >= '.' && src <= 'z') - return atoi64_partial[src - '.']; - return 64; -} - static NOINLINE const uint8_t *decode64_uint32( uint32_t *dst, const uint8_t *src, uint32_t val) @@ -49,7 +33,7 @@ static NOINLINE const uint8_t *decode64_uint32( if (!src) /* prevous decode failed already? */ goto fail; - c = atoi64(*src++); + c = a2i64(*src++); if (c > 63) goto fail; @@ -64,7 +48,7 @@ static NOINLINE const uint8_t *decode64_uint32( val += (c - start) << bits; while (--chars) { - c = atoi64(*src++); + c = a2i64(*src++); if (c > 63) goto fail; bits -= 6; @@ -138,7 +122,7 @@ static const uint8_t *decode64( while (dstpos <= *dstlen && srclen) { uint32_t value = 0, bits = 0; while (srclen--) { - uint32_t c = atoi64(*src); + uint32_t c = a2i64(*src); if (c > 63) { srclen = 0; break; -- cgit v1.2.3-55-g6feb From 82bbbd2e538c29a585a6cb00be2e999f50bf865c Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Sun, 6 Jul 2025 19:14:09 +0200 Subject: libbb: code shrink in DES crypt function old new delta pw_encrypt 941 945 +4 bits32 128 - -128 ------------------------------------------------------------------------------ (add/remove: 0/1 grow/shrink: 1/0 up/down: 4/-128) Total: -124 bytes Signed-off-by: Denys Vlasenko --- libbb/pw_encrypt_des.c | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) (limited to 'libbb') diff --git a/libbb/pw_encrypt_des.c b/libbb/pw_encrypt_des.c index 8b5edaaed..38c76a15c 100644 --- a/libbb/pw_encrypt_des.c +++ b/libbb/pw_encrypt_des.c @@ -186,17 +186,6 @@ static const uint8_t pbox[32] ALIGN1 = { 2, 8, 24, 14, 32, 27, 3, 9, 19, 13, 30, 6, 22, 11, 4, 25 }; -static const uint32_t bits32[32] ALIGN4 = { - 0x80000000, 0x40000000, 0x20000000, 0x10000000, - 0x08000000, 0x04000000, 0x02000000, 0x01000000, - 0x00800000, 0x00400000, 0x00200000, 0x00100000, - 0x00080000, 0x00040000, 0x00020000, 0x00010000, - 0x00008000, 0x00004000, 0x00002000, 0x00001000, - 0x00000800, 0x00000400, 0x00000200, 0x00000100, - 0x00000080, 0x00000040, 0x00000020, 0x00000010, - 0x00000008, 0x00000004, 0x00000002, 0x00000001 -}; - static const uint8_t bits8[8] ALIGN1 = { 0x80, 0x40, 0x20, 0x10, 0x08, 0x04, 0x02, 0x01 }; @@ -335,11 +324,18 @@ des_init(struct des_ctx *ctx, const struct const_des_ctx *cctx) int i, j, b, k, inbit, obit; uint32_t p; const uint32_t *bits28, *bits24; + uint32_t bits32[32]; if (!ctx) ctx = xmalloc(sizeof(*ctx)); const_ctx = cctx; + p = 0x80000000U; + for (i = 0; p; i++) { + bits32[i] = p; + p >>= 1; + } + #if USE_REPETITIVE_SPEEDUP old_rawkey0 = old_rawkey1 = 0; old_salt = 0; -- cgit v1.2.3-55-g6feb From 447eb6bf71cf9d73a23f15c11b62e9e886e712ff Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Sun, 6 Jul 2025 19:30:58 +0200 Subject: libbb: in DES crypt, error out on invalid salt chars function old new delta des_crypt 1308 1327 +19 Signed-off-by: Denys Vlasenko --- libbb/pw_encrypt_des.c | 36 +++++++----------------------------- 1 file changed, 7 insertions(+), 29 deletions(-) (limited to 'libbb') diff --git a/libbb/pw_encrypt_des.c b/libbb/pw_encrypt_des.c index 38c76a15c..bfa039bb5 100644 --- a/libbb/pw_encrypt_des.c +++ b/libbb/pw_encrypt_des.c @@ -674,12 +674,6 @@ do_des(struct des_ctx *ctx, /*uint32_t l_in, uint32_t r_in,*/ uint32_t *l_out, u static void to64_msb_first(char *s, unsigned v) { -#if 0 - *s++ = ascii64[(v >> 18) & 0x3f]; /* bits 23..18 */ - *s++ = ascii64[(v >> 12) & 0x3f]; /* bits 17..12 */ - *s++ = ascii64[(v >> 6) & 0x3f]; /* bits 11..6 */ - *s = ascii64[v & 0x3f]; /* bits 5..0 */ -#endif *s++ = i2a64(v >> 18); /* bits 23..18 */ *s++ = i2a64(v >> 12); /* bits 17..12 */ *s++ = i2a64(v >> 6); /* bits 11..6 */ @@ -717,34 +711,19 @@ des_crypt(struct des_ctx *ctx, char output[DES_OUT_BUFSIZE], */ output[0] = salt_str[0]; output[1] = salt_str[1]; - salt = (a2i64(salt_str[1]) << 6) - | a2i64(salt_str[0]); + + salt = a2i64(salt_str[0]); + if (salt >= 64) + return NULL; /* bad salt char */ + salt |= (a2i64(salt_str[1]) << 6); + if (salt >= (64 << 6)) + return NULL; /* bad salt char */ setup_salt(ctx, salt); /* set ctx->saltbits for do_des() */ /* Do it. */ do_des(ctx, /*0, 0,*/ &r0, &r1, 25 /* count */); /* Now encode the result. */ -#if 0 -{ - uint32_t l = (r0 >> 8); - q = (uint8_t *)output + 2; - *q++ = ascii64[(l >> 18) & 0x3f]; /* bits 31..26 of r0 */ - *q++ = ascii64[(l >> 12) & 0x3f]; /* bits 25..20 of r0 */ - *q++ = ascii64[(l >> 6) & 0x3f]; /* bits 19..14 of r0 */ - *q++ = ascii64[l & 0x3f]; /* bits 13..8 of r0 */ - l = ((r0 << 16) | (r1 >> 16)); - *q++ = ascii64[(l >> 18) & 0x3f]; /* bits 7..2 of r0 */ - *q++ = ascii64[(l >> 12) & 0x3f]; /* bits 1..2 of r0 and 31..28 of r1 */ - *q++ = ascii64[(l >> 6) & 0x3f]; /* bits 27..22 of r1 */ - *q++ = ascii64[l & 0x3f]; /* bits 21..16 of r1 */ - l = r1 << 2; - *q++ = ascii64[(l >> 12) & 0x3f]; /* bits 15..10 of r1 */ - *q++ = ascii64[(l >> 6) & 0x3f]; /* bits 9..4 of r1 */ - *q++ = ascii64[l & 0x3f]; /* bits 3..0 of r1 + 00 */ - *q = 0; -} -#else /* Each call takes low-order 24 bits and stores 4 chars */ /* bits 31..8 of r0 */ to64_msb_first(output + 2, (r0 >> 8)); @@ -754,7 +733,6 @@ des_crypt(struct des_ctx *ctx, char output[DES_OUT_BUFSIZE], to64_msb_first(output + 10, (r1 << 8)); /* extra zero byte is encoded as '.', fixing it */ output[13] = '\0'; -#endif return output; } -- cgit v1.2.3-55-g6feb From aebe6f71320a168504ca56cf9064d6cad744d770 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Sun, 6 Jul 2025 19:47:29 +0200 Subject: libbb/yescrypt: remove inlines (whcih gcc detects anyway) and dead ifderfs Signed-off-by: Denys Vlasenko --- libbb/yescrypt/alg-yescrypt-kdf.c | 93 +++++++++++++++++++++------------------ 1 file changed, 49 insertions(+), 44 deletions(-) (limited to 'libbb') diff --git a/libbb/yescrypt/alg-yescrypt-kdf.c b/libbb/yescrypt/alg-yescrypt-kdf.c index ee8fb408e..329e30d1f 100644 --- a/libbb/yescrypt/alg-yescrypt-kdf.c +++ b/libbb/yescrypt/alg-yescrypt-kdf.c @@ -42,17 +42,14 @@ #define unlikely(exp) (exp) #endif -#if 0 //def __SSE__ -... -#else /* !defined(__SSE2__) */ - typedef union { uint32_t w[16]; uint64_t d[8]; } salsa20_blk_t; -static inline void salsa20_simd_shuffle(const salsa20_blk_t *Bin, - salsa20_blk_t *Bout) +static void salsa20_simd_shuffle( + const salsa20_blk_t *Bin, + salsa20_blk_t *Bout) { #define COMBINE(out, in1, in2) \ Bout->d[out] = Bin->w[in1 * 2] | ((uint64_t)Bin->w[in2 * 2 + 1] << 32); @@ -67,8 +64,9 @@ static inline void salsa20_simd_shuffle(const salsa20_blk_t *Bin, #undef COMBINE } -static inline void salsa20_simd_unshuffle(const salsa20_blk_t *Bin, - salsa20_blk_t *Bout) +static void salsa20_simd_unshuffle( + const salsa20_blk_t *Bin, + salsa20_blk_t *Bout) { #define UNCOMBINE(out, in1, in2) \ Bout->w[out * 2] = Bin->d[in1]; \ @@ -83,6 +81,7 @@ static inline void salsa20_simd_unshuffle(const salsa20_blk_t *Bin, UNCOMBINE(7, 3, 1) #undef UNCOMBINE } + #define DECL_X \ salsa20_blk_t X; #define DECL_Y \ @@ -105,8 +104,9 @@ static inline void salsa20_simd_unshuffle(const salsa20_blk_t *Bin, * salsa20(B): * Apply the Salsa20 core to the provided block. */ -static inline void salsa20(salsa20_blk_t *restrict B, - salsa20_blk_t *restrict Bout, uint32_t doublerounds) +static void salsa20(salsa20_blk_t *restrict B, + salsa20_blk_t *restrict Bout, + uint32_t doublerounds) { salsa20_blk_t X; #define x X.w @@ -193,15 +193,15 @@ static inline void salsa20(salsa20_blk_t *restrict B, #define INTEGERIFY (uint32_t)X.d[0] -#endif /* !defined(__SSE2__) */ - /** * blockmix_salsa8(Bin, Bout, r): * Compute Bout = BlockMix_{salsa20/8, r}(Bin). The input Bin must be 128r * bytes in length; the output Bout must also be the same size. */ -static void blockmix_salsa8(const salsa20_blk_t *restrict Bin, - salsa20_blk_t *restrict Bout, size_t r) +static void blockmix_salsa8( + const salsa20_blk_t *restrict Bin, + salsa20_blk_t *restrict Bout, + size_t r) { size_t i; DECL_X @@ -213,9 +213,11 @@ static void blockmix_salsa8(const salsa20_blk_t *restrict Bin, } } -static uint32_t blockmix_salsa8_xor(const salsa20_blk_t *restrict Bin1, - const salsa20_blk_t *restrict Bin2, salsa20_blk_t *restrict Bout, - size_t r) +static uint32_t blockmix_salsa8_xor( + const salsa20_blk_t *restrict Bin1, + const salsa20_blk_t *restrict Bin2, + salsa20_blk_t *restrict Bout, + size_t r) { size_t i; DECL_X @@ -257,10 +259,6 @@ static uint32_t blockmix_salsa8_xor(const salsa20_blk_t *restrict Bin1, #define FORCE_REGALLOC_3 /* empty */ #define MAYBE_MEMORY_BARRIER /* empty */ -#if 0 //def __SSE2__ -... -#else /* !defined(__SSE2__) */ - #define PWXFORM_SIMD(x0, x1) { \ uint64_t x = x0 & Smask2; \ uint64_t *p0 = (uint64_t *)(S0 + (uint32_t)x); \ @@ -274,7 +272,6 @@ static uint32_t blockmix_salsa8_xor(const salsa20_blk_t *restrict Bin1, PWXFORM_SIMD(X.d[2], X.d[3]) \ PWXFORM_SIMD(X.d[4], X.d[5]) \ PWXFORM_SIMD(X.d[6], X.d[7]) -#endif /* * This offset helps address the 256-byte write block via the single-byte @@ -321,8 +318,11 @@ typedef struct { * Compute Bout = BlockMix_pwxform{salsa20/2, r, S}(Bin). The input Bin must * be 128r bytes in length; the output Bout must also be the same size. */ -static void blockmix(const salsa20_blk_t *restrict Bin, - salsa20_blk_t *restrict Bout, size_t r, pwxform_ctx_t *restrict ctx) +static void blockmix( + const salsa20_blk_t *restrict Bin, + salsa20_blk_t *restrict Bout, + size_t r, + pwxform_ctx_t *restrict ctx) { uint8_t *S0 = ctx->S0, *S1 = ctx->S1, *S2 = ctx->S2; size_t w = ctx->w; @@ -353,8 +353,10 @@ static void blockmix(const salsa20_blk_t *restrict Bin, } static uint32_t blockmix_xor(const salsa20_blk_t *Bin1, - const salsa20_blk_t *restrict Bin2, salsa20_blk_t *Bout, - size_t r, int Bin2_in_ROM, pwxform_ctx_t *restrict ctx) + const salsa20_blk_t *restrict Bin2, + salsa20_blk_t *Bout, + size_t r, int Bin2_in_ROM, + pwxform_ctx_t *restrict ctx) { uint8_t *S0 = ctx->S0, *S1 = ctx->S1, *S2 = ctx->S2; size_t w = ctx->w; @@ -413,9 +415,11 @@ static uint32_t blockmix_xor(const salsa20_blk_t *Bin1, return INTEGERIFY; } -static uint32_t blockmix_xor_save(salsa20_blk_t *restrict Bin1out, - salsa20_blk_t *restrict Bin2, - size_t r, pwxform_ctx_t *restrict ctx) +static uint32_t blockmix_xor_save( + salsa20_blk_t *restrict Bin1out, + salsa20_blk_t *restrict Bin2, + size_t r, + pwxform_ctx_t *restrict ctx) { uint8_t *S0 = ctx->S0, *S1 = ctx->S1, *S2 = ctx->S2; size_t w = ctx->w; @@ -426,13 +430,6 @@ static uint32_t blockmix_xor_save(salsa20_blk_t *restrict Bin1out, /* Convert count of 128-byte blocks to max index of 64-byte block */ r = r * 2 - 1; -#ifdef PREFETCH - PREFETCH(&Bin2[r], _MM_HINT_T0) - for (i = 0; i < r; i++) { - PREFETCH(&Bin2[i], _MM_HINT_T0) - } -#endif - XOR_X_2(Bin1out[r], Bin2[r]) DECL_SMASK2REG @@ -487,9 +484,12 @@ static inline uint32_t integerify(const salsa20_blk_t *B, size_t r) * The array V must be aligned to a multiple of 64 bytes, and arrays B and XY * to a multiple of at least 16 bytes. */ -static void smix1(uint8_t *B, size_t r, uint32_t N, yescrypt_flags_t flags, - salsa20_blk_t *V, uint32_t NROM, const salsa20_blk_t *VROM, - salsa20_blk_t *XY, pwxform_ctx_t *ctx) +static void smix1(uint8_t *B, size_t r, uint32_t N, + yescrypt_flags_t flags, + salsa20_blk_t *V, + uint32_t NROM, const salsa20_blk_t *VROM, + salsa20_blk_t *XY, + pwxform_ctx_t *ctx) { size_t s = 2 * r; salsa20_blk_t *X = V, *Y = &V[s]; @@ -605,8 +605,11 @@ static void smix1(uint8_t *B, size_t r, uint32_t N, yescrypt_flags_t flags, * 64 bytes, and arrays B and XY to a multiple of at least 16 bytes. */ static void smix2(uint8_t *B, size_t r, uint32_t N, uint64_t Nloop, - yescrypt_flags_t flags, salsa20_blk_t *V, uint32_t NROM, - const salsa20_blk_t *VROM, salsa20_blk_t *XY, pwxform_ctx_t *ctx) + yescrypt_flags_t flags, + salsa20_blk_t *V, + uint32_t NROM, const salsa20_blk_t *VROM, + salsa20_blk_t *XY, + pwxform_ctx_t *ctx) { size_t s = 2 * r; salsa20_blk_t *X = XY, *Y = &XY[s]; @@ -705,9 +708,11 @@ static uint64_t p2floor(uint64_t x) * might also result in cache bank conflicts). */ static void smix(uint8_t *B, size_t r, uint32_t N, uint32_t p, uint32_t t, - yescrypt_flags_t flags, - salsa20_blk_t *V, uint32_t NROM, const salsa20_blk_t *VROM, - salsa20_blk_t *XY, uint8_t *S, uint8_t *passwd) + yescrypt_flags_t flags, + salsa20_blk_t *V, + uint32_t NROM, const salsa20_blk_t *VROM, + salsa20_blk_t *XY, + uint8_t *S, uint8_t *passwd) { size_t s = 2 * r; uint32_t Nchunk; -- cgit v1.2.3-55-g6feb From 0574928c85630c28a48d08b01078031c356cf1cf Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Sun, 6 Jul 2025 20:08:53 +0200 Subject: libbb/yescrypt: remove unused prefetching code Signed-off-by: Denys Vlasenko --- libbb/yescrypt/alg-yescrypt-kdf.c | 61 ++++++++++++--------------------------- 1 file changed, 18 insertions(+), 43 deletions(-) (limited to 'libbb') diff --git a/libbb/yescrypt/alg-yescrypt-kdf.c b/libbb/yescrypt/alg-yescrypt-kdf.c index 329e30d1f..781e1f0bb 100644 --- a/libbb/yescrypt/alg-yescrypt-kdf.c +++ b/libbb/yescrypt/alg-yescrypt-kdf.c @@ -48,8 +48,8 @@ typedef union { } salsa20_blk_t; static void salsa20_simd_shuffle( - const salsa20_blk_t *Bin, - salsa20_blk_t *Bout) + const salsa20_blk_t *Bin, + salsa20_blk_t *Bout) { #define COMBINE(out, in1, in2) \ Bout->d[out] = Bin->w[in1 * 2] | ((uint64_t)Bin->w[in2 * 2 + 1] << 32); @@ -222,15 +222,6 @@ static uint32_t blockmix_salsa8_xor( size_t i; DECL_X -#ifdef PREFETCH - PREFETCH(&Bin2[r * 2 - 1], _MM_HINT_T0) - for (i = 0; i < r - 1; i++) { - PREFETCH(&Bin2[i * 2], _MM_HINT_T0) - PREFETCH(&Bin2[i * 2 + 1], _MM_HINT_T0) - } - PREFETCH(&Bin2[i * 2], _MM_HINT_T0) -#endif - XOR_X_2(Bin1[r * 2 - 1], Bin2[r * 2 - 1]) for (i = 0; i < r; i++) { XOR_X(Bin1[i * 2]) @@ -355,7 +346,7 @@ static void blockmix( static uint32_t blockmix_xor(const salsa20_blk_t *Bin1, const salsa20_blk_t *restrict Bin2, salsa20_blk_t *Bout, - size_t r, int Bin2_in_ROM, + size_t r, pwxform_ctx_t *restrict ctx) { uint8_t *S0 = ctx->S0, *S1 = ctx->S1, *S2 = ctx->S2; @@ -366,22 +357,6 @@ static uint32_t blockmix_xor(const salsa20_blk_t *Bin1, /* Convert count of 128-byte blocks to max index of 64-byte block */ r = r * 2 - 1; -#ifdef PREFETCH - if (Bin2_in_ROM) { - PREFETCH(&Bin2[r], _MM_HINT_NTA) - for (i = 0; i < r; i++) { - PREFETCH(&Bin2[i], _MM_HINT_NTA) - } - } else { - PREFETCH(&Bin2[r], _MM_HINT_T0) - for (i = 0; i < r; i++) { - PREFETCH(&Bin2[i], _MM_HINT_T0) - } - } -#else - (void)Bin2_in_ROM; /* unused */ -#endif - XOR_X_2(Bin1[r], Bin2[r]) DECL_SMASK2REG @@ -510,10 +485,10 @@ static void smix1(uint8_t *B, size_t r, uint32_t N, const salsa20_blk_t *V_j; V_j = &VROM[(NROM - 1) * s]; - j = blockmix_xor(X, V_j, Y, r, 1, ctx) & (NROM - 1); + j = blockmix_xor(X, V_j, Y, r, ctx) & (NROM - 1); V_j = &VROM[j * s]; X = Y + s; - j = blockmix_xor(Y, V_j, X, r, 1, ctx); + j = blockmix_xor(Y, V_j, X, r, ctx); for (n = 2; n < N; n <<= 1) { uint32_t m = (n < N / 2) ? n : (N - 1 - n); @@ -522,10 +497,10 @@ static void smix1(uint8_t *B, size_t r, uint32_t N, j += i - 1; V_j = &V[j * s]; Y = X + s; - j = blockmix_xor(X, V_j, Y, r, 0, ctx) & (NROM - 1); + j = blockmix_xor(X, V_j, Y, r, ctx) & (NROM - 1); V_j = &VROM[j * s]; X = Y + s; - j = blockmix_xor(Y, V_j, X, r, 1, ctx); + j = blockmix_xor(Y, V_j, X, r, ctx); } } n >>= 1; @@ -534,9 +509,9 @@ static void smix1(uint8_t *B, size_t r, uint32_t N, j += N - 2 - n; V_j = &V[j * s]; Y = X + s; - j = blockmix_xor(X, V_j, Y, r, 0, ctx) & (NROM - 1); + j = blockmix_xor(X, V_j, Y, r, ctx) & (NROM - 1); V_j = &VROM[j * s]; - blockmix_xor(Y, V_j, XY, r, 1, ctx); + blockmix_xor(Y, V_j, XY, r, ctx); } else if (flags & YESCRYPT_RW) { uint32_t n; salsa20_blk_t *V_j; @@ -553,12 +528,12 @@ static void smix1(uint8_t *B, size_t r, uint32_t N, j &= n - 1; j += i - 1; V_j = &V[j * s]; - j = blockmix_xor(X, V_j, Y, r, 0, ctx); + j = blockmix_xor(X, V_j, Y, r, ctx); j &= n - 1; j += i; V_j = &V[j * s]; X = Y + s; - j = blockmix_xor(Y, V_j, X, r, 0, ctx); + j = blockmix_xor(Y, V_j, X, r, ctx); } } n >>= 1; @@ -567,11 +542,11 @@ static void smix1(uint8_t *B, size_t r, uint32_t N, j += N - 2 - n; V_j = &V[j * s]; Y = X + s; - j = blockmix_xor(X, V_j, Y, r, 0, ctx); + j = blockmix_xor(X, V_j, Y, r, ctx); j &= n - 1; j += N - 1 - n; V_j = &V[j * s]; - blockmix_xor(Y, V_j, XY, r, 0, ctx); + blockmix_xor(Y, V_j, XY, r, ctx); } else { N -= 2; do { @@ -641,14 +616,14 @@ static void smix2(uint8_t *B, size_t r, uint32_t N, uint64_t Nloop, const salsa20_blk_t *VROM_j; j = blockmix_xor_save(X, V_j, r, ctx) & (NROM - 1); VROM_j = &VROM[j * s]; - j = blockmix_xor(X, VROM_j, X, r, 1, ctx) & (N - 1); + j = blockmix_xor(X, VROM_j, X, r, ctx) & (N - 1); } while (Nloop -= 2); } else if (VROM) { do { const salsa20_blk_t *V_j = &V[j * s]; - j = blockmix_xor(X, V_j, X, r, 0, ctx) & (NROM - 1); + j = blockmix_xor(X, V_j, X, r, ctx) & (NROM - 1); V_j = &VROM[j * s]; - j = blockmix_xor(X, V_j, X, r, 1, ctx) & (N - 1); + j = blockmix_xor(X, V_j, X, r, ctx) & (N - 1); } while (Nloop -= 2); } else if (flags & YESCRYPT_RW) { do { @@ -660,9 +635,9 @@ static void smix2(uint8_t *B, size_t r, uint32_t N, uint64_t Nloop, } else if (ctx) { do { const salsa20_blk_t *V_j = &V[j * s]; - j = blockmix_xor(X, V_j, X, r, 0, ctx) & (N - 1); + j = blockmix_xor(X, V_j, X, r, ctx) & (N - 1); V_j = &V[j * s]; - j = blockmix_xor(X, V_j, X, r, 0, ctx) & (N - 1); + j = blockmix_xor(X, V_j, X, r, ctx) & (N - 1); } while (Nloop -= 2); } else { do { -- cgit v1.2.3-55-g6feb From f3a1b7be72a2f2699ba25ecdb2c93b90e109f725 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Sun, 6 Jul 2025 20:59:00 +0200 Subject: libbb/yescrypt: remove one NOINLINE, add copyright headers, merge two source files function old new delta pw_encrypt 945 974 +29 yes_crypt 50 - -50 ------------------------------------------------------------------------------ (add/remove: 0/1 grow/shrink: 1/0 up/down: 29/-50) Total: -21 bytes Signed-off-by: Denys Vlasenko --- libbb/yescrypt/alg-yescrypt-common.c | 2 +- libbb/yescrypt/alg-yescrypt-kdf.c | 27 ++++++++++++++++++ libbb/yescrypt/alg-yescrypt-platform.c | 45 ------------------------------ libbb/yescrypt/alg-yescrypt.h | 51 ++++------------------------------ libbb/yescrypt/y.c | 10 +++++-- 5 files changed, 41 insertions(+), 94 deletions(-) delete mode 100644 libbb/yescrypt/alg-yescrypt-platform.c (limited to 'libbb') diff --git a/libbb/yescrypt/alg-yescrypt-common.c b/libbb/yescrypt/alg-yescrypt-common.c index b9a5c51ac..435eaecca 100644 --- a/libbb/yescrypt/alg-yescrypt-common.c +++ b/libbb/yescrypt/alg-yescrypt-common.c @@ -258,7 +258,7 @@ uint8_t *yescrypt_r( if (saltend != saltstr + saltstrlen) goto fail; /* salt[] is too small, or bad char during decode */ - need = prefixlen + 1 + HASH_LEN + 1; + need = prefixlen + 1 + YESCRYPT_HASH_LEN + 1; if (need > buflen || need < prefixlen) goto fail; diff --git a/libbb/yescrypt/alg-yescrypt-kdf.c b/libbb/yescrypt/alg-yescrypt-kdf.c index 781e1f0bb..27ef2caa4 100644 --- a/libbb/yescrypt/alg-yescrypt-kdf.c +++ b/libbb/yescrypt/alg-yescrypt-kdf.c @@ -759,6 +759,33 @@ static void smix(uint8_t *B, size_t r, uint32_t N, uint32_t p, uint32_t t, } } +/* Allocator code */ + +static void alloc_region(yescrypt_region_t *region, size_t size) +{ + int flags = +# ifdef MAP_NOCORE /* huh? */ + MAP_NOCORE | +# endif + MAP_ANON | MAP_PRIVATE; + uint8_t *base = mmap(NULL, size, PROT_READ | PROT_WRITE, flags, -1, 0); + if (base == MAP_FAILED) + bb_die_memory_exhausted(); + //region->base = base; + //region->base_size = size; + region->aligned = base; + region->aligned_size = size; +} + +static void free_region(yescrypt_region_t *region) +{ + if (region->aligned) + munmap(region->aligned, region->aligned_size); + //region->base = NULL; + //region->base_size = 0; + region->aligned = NULL; + region->aligned_size = 0; +} /** * yescrypt_kdf_body(shared, local, passwd, passwdlen, salt, saltlen, * flags, N, r, p, t, NROM, buf, buflen): diff --git a/libbb/yescrypt/alg-yescrypt-platform.c b/libbb/yescrypt/alg-yescrypt-platform.c deleted file mode 100644 index 8dd5feb55..000000000 --- a/libbb/yescrypt/alg-yescrypt-platform.c +++ /dev/null @@ -1,45 +0,0 @@ -/*- - * Copyright 2013-2018,2022 Alexander Peslyak - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -static void alloc_region(yescrypt_region_t *region, size_t size) -{ - int flags = -# ifdef MAP_NOCORE /* huh? */ - MAP_NOCORE | -# endif - MAP_ANON | MAP_PRIVATE; - uint8_t *base = mmap(NULL, size, PROT_READ | PROT_WRITE, flags, -1, 0); - if (base == MAP_FAILED) - bb_die_memory_exhausted(); - //region->base = base; - //region->base_size = size; - region->aligned = base; - region->aligned_size = size; -} - -static void free_region(yescrypt_region_t *region) -{ - if (region->aligned) - munmap(region->aligned, region->aligned_size); - //region->base = NULL; - //region->base_size = 0; - region->aligned = NULL; - region->aligned_size = 0; -} diff --git a/libbb/yescrypt/alg-yescrypt.h b/libbb/yescrypt/alg-yescrypt.h index edabbc222..5b442c2c9 100644 --- a/libbb/yescrypt/alg-yescrypt.h +++ b/libbb/yescrypt/alg-yescrypt.h @@ -64,6 +64,7 @@ typedef uint32_t yescrypt_flags_t; #define YESCRYPT_SBOX_192K 0x280 #define YESCRYPT_SBOX_384K 0x300 #define YESCRYPT_SBOX_768K 0x380 + #ifdef YESCRYPT_INTERNAL /* Private */ #define YESCRYPT_MODE_MASK 0x003 @@ -123,59 +124,17 @@ typedef struct { } yescrypt_ctx_t; /* How many chars base-64 encoded bytes require? */ -#define BYTES2CHARS(bytes) ((((bytes) * 8) + 5) / 6) +#define YESCRYPT_BYTES2CHARS(bytes) ((((bytes) * 8) + 5) / 6) /* The /etc/passwd-style hash is "$" */ /* * "$y$", up to 8 params of up to 6 chars each, '$', salt * Alternatively, but that's smaller: * "$7$", 3 params encoded as 1+5+5 chars, salt */ -#define PREFIX_LEN (3 + 8 * 6 + 1 + BYTES2CHARS(32)) - -#define HASH_SIZE 32 -#define HASH_LEN BYTES2CHARS(HASH_SIZE) +#define YESCRYPT_PREFIX_LEN (3 + 8 * 6 + 1 + YESCRYPT_BYTES2CHARS(32)) -/** - * yescrypt_kdf(shared, local, passwd, passwdlen, salt, saltlen, params, - * buf, buflen): - * Compute scrypt(passwd[0 .. passwdlen - 1], salt[0 .. saltlen - 1], N, r, - * p, buflen), or a revision of scrypt as requested by flags and shared, and - * write the result into buf. The parameters N, r, p, and buflen must satisfy - * the same conditions as with crypto_scrypt(). t controls computation time - * while not affecting peak memory usage (t = 0 is optimal unless higher N*r - * is not affordable while higher t is). g controls hash upgrades (g = 0 for - * no upgrades so far). shared and flags may request special modes. local is - * the thread-local data structure, allowing to preserve and reuse a memory - * allocation across calls, thereby reducing processing overhead. - * - * Return 0 on success; or -1 on error. - * - * Classic scrypt is available by setting shared = NULL, flags = 0, and t = 0. - * - * Setting YESCRYPT_WORM enables only minimal deviations from classic scrypt: - * support for the t parameter, and pre- and post-hashing. - * - * Setting YESCRYPT_RW fully enables yescrypt. As a side effect of differences - * between the algorithms, it also prevents p > 1 from growing the threads' - * combined processing time and memory allocation (like it did with classic - * scrypt and YESCRYPT_WORM), treating p as a divider rather than a multiplier. - * - * Passing a shared structure, with ROM contents previously computed by - * yescrypt_init_shared(), enables the use of ROM and requires YESCRYPT_RW. - * - * In order to allow for initialization of the ROM to be split into a separate - * program (or separate invocation of the same program), the shared->aligned - * and shared->aligned_size fields may optionally be set by the caller directly - * (e.g., to a mapped SysV shm segment), without using yescrypt_init_shared(). - * - * MT-safe as long as local and buf are local to the thread. - */ -#ifdef YESCRYPT_INTERNAL -static int yescrypt_kdf32( - yescrypt_ctx_t *yctx, - const uint8_t *passwd, size_t passwdlen, - uint8_t *buf32); -#endif +#define YESCRYPT_HASH_SIZE 32 +#define YESCRYPT_HASH_LEN YESCRYPT_BYTES2CHARS(YESCRYPT_HASH_SIZE) /** * yescrypt_r(shared, local, passwd, passwdlen, setting, key, buf, buflen): diff --git a/libbb/yescrypt/y.c b/libbb/yescrypt/y.c index 92c6eb7a8..e7d447531 100644 --- a/libbb/yescrypt/y.c +++ b/libbb/yescrypt/y.c @@ -1,11 +1,17 @@ +/* + * The compilation unit for yescrypt-related code. + * + * Copyright (C) 2025 by Denys Vlasenko + * + * Licensed under GPLv2, see file LICENSE in this source tree. + */ //kbuild:lib-$(CONFIG_USE_BB_CRYPT_YES) += y.o -#include +#include "libbb.h" #define YESCRYPT_INTERNAL #include "alg-sha256.h" #include "alg-yescrypt.h" #include "alg-sha256.c" -#include "alg-yescrypt-platform.c" #include "alg-yescrypt-kdf.c" #include "alg-yescrypt-common.c" -- cgit v1.2.3-55-g6feb From 5e17e3c6f49cef45a86ed9438941ca2d4f6ae906 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Sun, 6 Jul 2025 21:59:03 +0200 Subject: libbb: shuffle ascii64 code around, shrink i2a64() function old new delta num2str64_4chars_msb_first - 55 +55 num2str64_lsb_first - 33 +33 i2a64 42 25 -17 to64 33 - -33 to64_msb_first 55 - -55 ------------------------------------------------------------------------------ (add/remove: 2/2 grow/shrink: 0/1 up/down: 88/-105) Total: -17 bytes Signed-off-by: Denys Vlasenko --- libbb/pw_encrypt.c | 74 +------------------------------------------------- libbb/pw_encrypt_des.c | 15 ++-------- libbb/pw_encrypt_md5.c | 4 +-- libbb/pw_encrypt_sha.c | 2 +- 4 files changed, 7 insertions(+), 88 deletions(-) (limited to 'libbb') diff --git a/libbb/pw_encrypt.c b/libbb/pw_encrypt.c index af84606bf..4acc33039 100644 --- a/libbb/pw_encrypt.c +++ b/libbb/pw_encrypt.c @@ -13,68 +13,7 @@ #endif #include "libbb.h" -/* 0..63 -> - * "./0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; - */ -int FAST_FUNC i2a64(int i) -{ - i &= 0x3f; - if (i == 0) - return '.'; - if (i == 1) - return '/'; - if (i < 12) - return ('0' - 2 + i); - if (i < 38) - return ('A' - 12 + i); - return ('a' - 38 + i); -} - -/* Returns >=64 for invalid chars */ -int FAST_FUNC a2i64(char c) -{ - unsigned char ch = c; - if (ch >= 'a') - /* "a..z" to 38..63 */ - /* anything after "z": positive int >= 64 */ - return (ch - 'a' + 38); - - if (ch > 'Z') - /* after "Z" but before "a": positive byte >= 64 */ - return ch; - - if (ch >= 'A') - /* "A..Z" to 12..37 */ - return (ch - 'A' + 12); - - if (ch > '9') - return 64; - - /* "./0123456789" to 0,1,2..11 */ - /* anything before "." becomes positive byte >= 64 */ - return (unsigned char)(ch - '.'); -} - -int FAST_FUNC crypt_make_rand64encoded(char *p, int cnt /*, int x */) -{ - /* was: x += ... */ - unsigned x = getpid() + monotonic_us(); - do { - /* x = (x*1664525 + 1013904223) % 2^32 generator is lame - * (low-order bit is not "random", etc...), - * but for our purposes it is good enough */ - x = x*1664525 + 1013904223; - /* BTW, Park and Miller's "minimal standard generator" is - * x = x*16807 % ((2^31)-1) - * It has no problem with visibly alternating lowest bit - * but is also weak in cryptographic sense + needs div, - * which needs more code (and slower) on many CPUs */ - *p++ = i2a64(x >> 16); - *p++ = i2a64(x >> 22); - } while (--cnt); - *p = '\0'; - return x; -} +#include "pw_ascii64.c" char* FAST_FUNC crypt_make_pw_salt(char salt[MAX_PW_SALT_LEN], const char *algo) { @@ -139,17 +78,6 @@ char* FAST_FUNC crypt_make_pw_salt(char salt[MAX_PW_SALT_LEN], const char *algo) } #if ENABLE_USE_BB_CRYPT - -static char* -to64(char *s, unsigned v, int n) -{ - while (--n >= 0) { - *s++ = i2a64(v); - v >>= 6; - } - return s; -} - /* * DES and MD5 crypt implementations are taken from uclibc. * They were modified to not use static buffers. diff --git a/libbb/pw_encrypt_des.c b/libbb/pw_encrypt_des.c index bfa039bb5..ca8aa9bcc 100644 --- a/libbb/pw_encrypt_des.c +++ b/libbb/pw_encrypt_des.c @@ -671,15 +671,6 @@ do_des(struct des_ctx *ctx, /*uint32_t l_in, uint32_t r_in,*/ uint32_t *l_out, u #define DES_OUT_BUFSIZE 21 -static void -to64_msb_first(char *s, unsigned v) -{ - *s++ = i2a64(v >> 18); /* bits 23..18 */ - *s++ = i2a64(v >> 12); /* bits 17..12 */ - *s++ = i2a64(v >> 6); /* bits 11..6 */ - *s = i2a64(v); /* bits 5..0 */ -} - static char * NOINLINE des_crypt(struct des_ctx *ctx, char output[DES_OUT_BUFSIZE], @@ -726,11 +717,11 @@ des_crypt(struct des_ctx *ctx, char output[DES_OUT_BUFSIZE], /* Now encode the result. */ /* Each call takes low-order 24 bits and stores 4 chars */ /* bits 31..8 of r0 */ - to64_msb_first(output + 2, (r0 >> 8)); + num2str64_4chars_msb_first(output + 2, (r0 >> 8)); /* bits 7..0 of r0 and 31..16 of r1 */ - to64_msb_first(output + 6, (r0 << 16) | (r1 >> 16)); + num2str64_4chars_msb_first(output + 6, (r0 << 16) | (r1 >> 16)); /* bits 15..0 of r1 and two zero bits (plus extra zero byte) */ - to64_msb_first(output + 10, (r1 << 8)); + num2str64_4chars_msb_first(output + 10, (r1 << 8)); /* extra zero byte is encoded as '.', fixing it */ output[13] = '\0'; diff --git a/libbb/pw_encrypt_md5.c b/libbb/pw_encrypt_md5.c index 1e52ecaea..92d039f96 100644 --- a/libbb/pw_encrypt_md5.c +++ b/libbb/pw_encrypt_md5.c @@ -149,9 +149,9 @@ md5_crypt(char result[MD5_OUT_BUFSIZE], const unsigned char *pw, const unsigned final[16] = final[5]; for (i = 0; i < 5; i++) { unsigned l = (final[i] << 16) | (final[i+6] << 8) | final[i+12]; - p = to64(p, l, 4); + p = num2str64_lsb_first(p, l, 4); } - p = to64(p, final[11], 2); + p = num2str64_lsb_first(p, final[11], 2); *p = '\0'; /* Don't leave anything around in vm they could use. */ diff --git a/libbb/pw_encrypt_sha.c b/libbb/pw_encrypt_sha.c index 5457d7ab6..516293920 100644 --- a/libbb/pw_encrypt_sha.c +++ b/libbb/pw_encrypt_sha.c @@ -198,7 +198,7 @@ sha_crypt(/*const*/ char *key_data, /*const*/ char *salt_data) #define b64_from_24bit(B2, B1, B0, N) \ do { \ unsigned w = ((B2) << 16) | ((B1) << 8) | (B0); \ - resptr = to64(resptr, w, N); \ + resptr = num2str64_lsb_first(resptr, w, N); \ } while (0) if (_32or64 == 32) { /* sha256 */ unsigned i = 0; -- cgit v1.2.3-55-g6feb From 53de6e6150ea5538930e1963eb87ada153093ea0 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Sun, 6 Jul 2025 22:43:28 +0200 Subject: libbb/yescrypt: use common ascii64 encoding routine function old new delta num2str64_lsb_first 33 46 +13 yescrypt_r 1235 1133 -102 ------------------------------------------------------------------------------ (add/remove: 0/0 grow/shrink: 1/1 up/down: 13/-102) Total: -89 bytes Signed-off-by: Denys Vlasenko --- include/libbb.h | 1 + libbb/yescrypt/alg-yescrypt-common.c | 98 +++++++++++++----------------------- libbb/yescrypt/alg-yescrypt.h | 4 +- 3 files changed, 37 insertions(+), 66 deletions(-) (limited to 'libbb') diff --git a/include/libbb.h b/include/libbb.h index 1c23d2f66..b761b1091 100644 --- a/include/libbb.h +++ b/include/libbb.h @@ -2191,6 +2191,7 @@ void read_base64(FILE *src_stream, FILE *dst_stream, int flags) FAST_FUNC; int FAST_FUNC i2a64(int i); int FAST_FUNC a2i64(char c); +char* FAST_FUNC num2str64_lsb_first(char *s, unsigned v, int n); typedef struct md5_ctx_t { uint8_t wbuffer[64]; /* always correctly aligned for uint64_t */ diff --git a/libbb/yescrypt/alg-yescrypt-common.c b/libbb/yescrypt/alg-yescrypt-common.c index 435eaecca..5bdf1893e 100644 --- a/libbb/yescrypt/alg-yescrypt-common.c +++ b/libbb/yescrypt/alg-yescrypt-common.c @@ -19,8 +19,8 @@ */ /* Not inlining: - * decode64 fuinctions are only used to read - * yescrypt_params_t field, and convert salt ti binary - + * de/encode64 functions are only used to read + * yescrypt_params_t field, and convert salt to binary - * both of these are negligible compared to main hashing operation */ static NOINLINE const uint8_t *decode64_uint32( @@ -63,56 +63,6 @@ fail: return NULL; } -static uint8_t *encode64_uint32_fixed( - uint8_t *dst, size_t dstlen, - uint32_t src, uint32_t srcbits) -{ - uint32_t bits; - - for (bits = 0; bits < srcbits; bits += 6) { - if (dstlen < 2) - return NULL; - *dst++ = i2a64(src); - dstlen--; - src >>= 6; - } - - if (src || dstlen < 1) - return NULL; - - *dst = 0; /* NUL terminate just in case */ - - return dst; -} - -static uint8_t *encode64( - uint8_t *dst, size_t dstlen, - const uint8_t *src, size_t srclen) -{ - size_t i; - - for (i = 0; i < srclen; ) { - uint8_t *dnext; - uint32_t value = 0, bits = 0; - do { - value |= (uint32_t)src[i++] << bits; - bits += 8; - } while (bits < 24 && i < srclen); - dnext = encode64_uint32_fixed(dst, dstlen, value, bits); - if (!dnext) - return NULL; - dstlen -= dnext - dst; - dst = dnext; - } - - if (dstlen < 1) - return NULL; - - *dst = 0; /* NUL terminate just in case */ - - return dst; -} - static const uint8_t *decode64( uint8_t *dst, size_t *dstlen, const uint8_t *src, size_t srclen) @@ -156,21 +106,43 @@ static const uint8_t *decode64( *dstlen = dstpos; return src; } - fail: *dstlen = 0; return NULL; } -uint8_t *yescrypt_r( +static char *encode64( + char *dst, size_t dstlen, + const uint8_t *src, size_t srclen) +{ + while (srclen) { + uint32_t value = 0, b = 0; + do { + value |= (uint32_t)(*src++ << b); + b += 8; + srclen--; + } while (srclen && b < 24); + + b >>= 3; /* number of bits to number of bytes */ + b++; /* 1, 2 or 3 bytes will become 2, 3 or 4 ascii64 chars */ + dstlen -= b; + if ((ssize_t)dstlen <= 0) + return NULL; + dst = num2str64_lsb_first(dst, value, b); + } + *dst = '\0'; + return dst; +} + +char *yescrypt_r( const uint8_t *passwd, size_t passwdlen, const uint8_t *setting, - uint8_t *buf, size_t buflen) + char *buf, size_t buflen) { yescrypt_ctx_t yctx[1]; unsigned char hashbin32[32]; + char *dst; const uint8_t *src, *saltstr, *saltend; - uint8_t *dst; size_t need, prefixlen, saltstrlen; uint32_t flavor, N_log2; @@ -241,11 +213,11 @@ uint8_t *yescrypt_r( goto fail; yctx->param.NROM = (uint64_t)1 << NROM_log2; } + if (!src) + goto fail; + if (*src != '$') + goto fail; } - if (!src) - goto fail; - if (*src != '$') - goto fail; saltstr = src + 1; src = (uint8_t *)strchrnul((char *)saltstr, '$'); @@ -268,16 +240,14 @@ uint8_t *yescrypt_r( dst = mempcpy(buf, setting, prefixlen); *dst++ = '$'; dst = encode64(dst, buflen - (dst - buf), hashbin32, sizeof(hashbin32)); - if (!dst || dst >= buf + buflen) + if (!dst) goto fail; - - *dst = 0; /* NUL termination */ ret: free_region(yctx->local); explicit_bzero(yctx, sizeof(yctx)); explicit_bzero(hashbin32, sizeof(hashbin32)); return buf; -fail: + fail: buf = NULL; goto ret; } diff --git a/libbb/yescrypt/alg-yescrypt.h b/libbb/yescrypt/alg-yescrypt.h index 5b442c2c9..996af333f 100644 --- a/libbb/yescrypt/alg-yescrypt.h +++ b/libbb/yescrypt/alg-yescrypt.h @@ -151,8 +151,8 @@ typedef struct { * * MT-safe as long as local and buf are local to the thread. */ -extern uint8_t *yescrypt_r( +extern char *yescrypt_r( const uint8_t *passwd, size_t passwdlen, const uint8_t *setting, - uint8_t *buf, size_t buflen + char *buf, size_t buflen ); -- cgit v1.2.3-55-g6feb From a2a5db41a3ace98e7eff53aca231053be7717c66 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Mon, 7 Jul 2025 00:04:10 +0200 Subject: libbb/pw_ascii64.c: add forgotten source file Signed-off-by: Denys Vlasenko --- libbb/pw_ascii64.c | 91 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 91 insertions(+) create mode 100644 libbb/pw_ascii64.c (limited to 'libbb') diff --git a/libbb/pw_ascii64.c b/libbb/pw_ascii64.c new file mode 100644 index 000000000..3993932ca --- /dev/null +++ b/libbb/pw_ascii64.c @@ -0,0 +1,91 @@ +/* vi: set sw=4 ts=4: */ +/* + * Utility routines. + * + * Copyright (C) 1999-2004 by Erik Andersen + * + * Licensed under GPLv2 or later, see file LICENSE in this source tree. + */ + +/* Returns >=64 for invalid chars */ +int FAST_FUNC a2i64(char c) +{ + unsigned char ch = c; + if (ch >= 'a') + /* "a..z" to 38..63 */ + /* anything after "z": positive int >= 64 */ + return (ch - 'a' + 38); + + if (ch > 'Z') + /* after "Z" but before "a": positive byte >= 64 */ + return ch; + + if (ch >= 'A') + /* "A..Z" to 12..37 */ + return (ch - 'A' + 12); + + if (ch > '9') + return 64; + + /* "./0123456789" to 0,1,2..11 */ + /* anything before "." becomes positive byte >= 64 */ + return (unsigned char)(ch - '.'); +} + +/* 0..63 -> + * "./0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; + */ +int FAST_FUNC i2a64(int i) +{ + i &= 0x3f; + + i += '.'; + /* the above maps 0..11 to "./0123456789": + * ACSII codes of "./" are ('0'-2) and ('0'-1) */ + + if (i > '9') + i += ('A' - '9' - 1); + if (i > 'Z') + i += ('a' - 'Z' - 1); + return i; +} + +char* FAST_FUNC +num2str64_lsb_first(char *s, unsigned v, int n) +{ + while (--n >= 0) { + *s++ = i2a64(v); + v >>= 6; + } + return s; +} + +static void +num2str64_4chars_msb_first(char *s, unsigned v) +{ + *s++ = i2a64(v >> 18); /* bits 23..18 */ + *s++ = i2a64(v >> 12); /* bits 17..12 */ + *s++ = i2a64(v >> 6); /* bits 11..6 */ + *s = i2a64(v); /* bits 5..0 */ +} + +int FAST_FUNC crypt_make_rand64encoded(char *p, int cnt /*, int x */) +{ + /* was: x += ... */ + unsigned x = getpid() + monotonic_us(); + do { + /* x = (x*1664525 + 1013904223) % 2^32 generator is lame + * (low-order bit is not "random", etc...), + * but for our purposes it is good enough */ + x = x*1664525 + 1013904223; + /* BTW, Park and Miller's "minimal standard generator" is + * x = x*16807 % ((2^31)-1) + * It has no problem with visibly alternating lowest bit + * but is also weak in cryptographic sense + needs div, + * which needs more code (and slower) on many CPUs */ + *p++ = i2a64(x >> 16); + *p++ = i2a64(x >> 22); + } while (--cnt); + *p = '\0'; + return x; +} -- cgit v1.2.3-55-g6feb From e2091c98425e2e1615db7dcad9556c928ea5123d Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Mon, 7 Jul 2025 04:57:16 +0200 Subject: libbb: add two more forgotten source files Signed-off-by: Denys Vlasenko --- libbb/hash_sha256_block.c | 19 +++++++++++++++++++ libbb/pw_encrypt_yes.c | 24 ++++++++++++++++++++++++ 2 files changed, 43 insertions(+) create mode 100644 libbb/hash_sha256_block.c create mode 100644 libbb/pw_encrypt_yes.c (limited to 'libbb') diff --git a/libbb/hash_sha256_block.c b/libbb/hash_sha256_block.c new file mode 100644 index 000000000..3c4366321 --- /dev/null +++ b/libbb/hash_sha256_block.c @@ -0,0 +1,19 @@ +/* vi: set sw=4 ts=4: */ +/* + * Utility routines. + * + * Copyright (C) 2025 Denys Vlasenko + * + * Licensed under GPLv2 or later, see file LICENSE in this source tree. + */ +//kbuild:lib-y += hash_sha256_block.o +#include "libbb.h" + +void FAST_FUNC +sha256_block(const void *in, size_t len, uint8_t hash[32]) +{ + sha256_ctx_t ctx; + sha256_begin(&ctx); + sha256_hash(&ctx, in, len); + sha256_end(&ctx, hash); +} diff --git a/libbb/pw_encrypt_yes.c b/libbb/pw_encrypt_yes.c new file mode 100644 index 000000000..50bd06418 --- /dev/null +++ b/libbb/pw_encrypt_yes.c @@ -0,0 +1,24 @@ +/* + * Utility routines. + * + * Copyright (C) 2025 by Denys Vlasenko + * + * Licensed under GPLv2, see file LICENSE in this source tree. + */ +#include "yescrypt/alg-yescrypt.h" + +static char * +yes_crypt(const char *passwd, const char *salt_data) +{ + /* prefix, '$', hash, NUL */ + char buf[YESCRYPT_PREFIX_LEN + 1 + YESCRYPT_HASH_LEN + 1]; + char *retval; + + retval = yescrypt_r( + (const uint8_t *)passwd, strlen(passwd), + (const uint8_t *)salt_data, + buf, sizeof(buf)); + /* The returned value is either buf[], or NULL on error */ + + return xstrdup(retval); +} -- cgit v1.2.3-55-g6feb From 1a0913d57ce8287703cfe666d9240e3a147ea30d Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Mon, 7 Jul 2025 07:44:01 +0200 Subject: libbb: factor out HMAC code from TLS function old new delta hmac_block - 88 +88 hmac_peek_hash - 61 +61 hmac_end - 50 +50 hmac_begin 140 177 +37 hmac_hash_v - 30 +30 .rodata 105799 105787 -12 hmac_sha_precomputed 54 - -54 hmac_sha_precomputed_v 69 - -69 hmac 83 - -83 ------------------------------------------------------------------------------ (add/remove: 5/3 grow/shrink: 1/1 up/down: 266/-218) Total: 48 bytes Signed-off-by: Denys Vlasenko --- include/libbb.h | 40 +++++++++++--- libbb/hash_hmac.c | 99 +++++++++++++++++++++++++++++++++ networking/tls.c | 161 ++++++++++-------------------------------------------- 3 files changed, 161 insertions(+), 139 deletions(-) create mode 100644 libbb/hash_hmac.c (limited to 'libbb') diff --git a/include/libbb.h b/include/libbb.h index b761b1091..3f60acaa0 100644 --- a/include/libbb.h +++ b/include/libbb.h @@ -2193,6 +2193,16 @@ int FAST_FUNC i2a64(int i); int FAST_FUNC a2i64(char c); char* FAST_FUNC num2str64_lsb_first(char *s, unsigned v, int n); +enum { + /* how many bytes XYZ_end() fills */ + MD5_OUTSIZE = 16, + SHA1_OUTSIZE = 20, + SHA256_OUTSIZE = 32, + SHA512_OUTSIZE = 64, + SHA3_OUTSIZE = 28, + /* size of input block */ + SHA2_INSIZE = 64, +}; typedef struct md5_ctx_t { uint8_t wbuffer[64]; /* always correctly aligned for uint64_t */ void (*process_block)(struct md5_ctx_t*) FAST_FUNC; @@ -2226,18 +2236,32 @@ unsigned sha512_end(sha512_ctx_t *ctx, void *resbuf) FAST_FUNC; void sha3_begin(sha3_ctx_t *ctx) FAST_FUNC; void sha3_hash(sha3_ctx_t *ctx, const void *buffer, size_t len) FAST_FUNC; unsigned sha3_end(sha3_ctx_t *ctx, void *resbuf) FAST_FUNC; +void FAST_FUNC sha256_block(const void *in, size_t len, uint8_t hash[32]); /* TLS benefits from knowing that sha1 and sha256 share these. Give them "agnostic" names too */ typedef struct md5_ctx_t md5sha_ctx_t; #define md5sha_hash md5_hash #define sha_end sha1_end -enum { - MD5_OUTSIZE = 16, - SHA1_OUTSIZE = 20, - SHA256_OUTSIZE = 32, - SHA512_OUTSIZE = 64, - SHA3_OUTSIZE = 28, -}; -void FAST_FUNC sha256_block(const void *in, size_t len, uint8_t hash[32]); + +/* RFC 2104 HMAC (hash-based message authentication code) */ +typedef struct hmac_ctx { + md5sha_ctx_t hashed_key_xor_ipad; + md5sha_ctx_t hashed_key_xor_opad; +} hmac_ctx_t; +#define HMAC_ONLY_SHA256 (!ENABLE_FEATURE_TLS_SHA1) +typedef void md5sha_begin_func(md5sha_ctx_t *ctx) FAST_FUNC; +#if HMAC_ONLY_SHA256 +#define hmac_begin(ctx,key,key_size,begin) \ + hmac_begin(ctx,key,key_size) +#endif +void FAST_FUNC hmac_begin(hmac_ctx_t *ctx, uint8_t *key, unsigned key_size, md5sha_begin_func *begin); +static ALWAYS_INLINE void hmac_hash(hmac_ctx_t *ctx, const void *in, size_t len) +{ + md5sha_hash(&ctx->hashed_key_xor_ipad, in, len); +} +unsigned FAST_FUNC hmac_end(hmac_ctx_t *ctx, uint8_t *out); +/* HMAC helpers for TLS: */ +void FAST_FUNC hmac_hash_v(hmac_ctx_t *ctx, va_list va); +unsigned FAST_FUNC hmac_peek_hash(hmac_ctx_t *ctx, uint8_t *out, ...); extern uint32_t *global_crc32_table; uint32_t *crc32_filltable(uint32_t *tbl256, int endian) FAST_FUNC; diff --git a/libbb/hash_hmac.c b/libbb/hash_hmac.c new file mode 100644 index 000000000..8cf936949 --- /dev/null +++ b/libbb/hash_hmac.c @@ -0,0 +1,99 @@ +/* + * Copyright (C) 2025 Denys Vlasenko + * + * Licensed under GPLv2, see file LICENSE in this source tree. + */ +//kbuild:lib-$(CONFIG_TLS) += hash_hmac.o +//kbuild:lib-$(CONFIG_USE_BB_CRYPT_YES) += hash_hmac.o + +#include "libbb.h" + +// RFC 2104: +// HMAC(key, text) based on a hash H (say, sha256) is: +// ipad = [0x36 x INSIZE] +// opad = [0x5c x INSIZE] +// HMAC(key, text) = H((key XOR opad) + H((key XOR ipad) + text)) +// +// H(key XOR opad) and H(key XOR ipad) can be precomputed +// if we often need HMAC hmac with the same key. +// +// text is often given in disjoint pieces. +void FAST_FUNC hmac_begin(hmac_ctx_t *ctx, uint8_t *key, unsigned key_size, md5sha_begin_func *begin) +{ +#if HMAC_ONLY_SHA256 +#define begin sha256_begin +#endif + uint8_t key_xor_ipad[SHA2_INSIZE]; + uint8_t key_xor_opad[SHA2_INSIZE]; + unsigned i; + + // "The authentication key can be of any length up to INSIZE, the + // block length of the hash function. Applications that use keys longer + // than INSIZE bytes will first hash the key using H and then use the + // resultant OUTSIZE byte string as the actual key to HMAC." + if (key_size > SHA2_INSIZE) { + uint8_t tempkey[SHA1_OUTSIZE < SHA256_OUTSIZE ? SHA256_OUTSIZE : SHA1_OUTSIZE]; + /* use ctx->hashed_key_xor_ipad as scratch ctx */ + begin(&ctx->hashed_key_xor_ipad); + md5sha_hash(&ctx->hashed_key_xor_ipad, key, key_size); + key_size = sha_end(&ctx->hashed_key_xor_ipad, tempkey); + key = tempkey; + } + + for (i = 0; i < key_size; i++) { + key_xor_ipad[i] = key[i] ^ 0x36; + key_xor_opad[i] = key[i] ^ 0x5c; + } + for (; i < SHA2_INSIZE; i++) { + key_xor_ipad[i] = 0x36; + key_xor_opad[i] = 0x5c; + } + + begin(&ctx->hashed_key_xor_ipad); + begin(&ctx->hashed_key_xor_opad); + md5sha_hash(&ctx->hashed_key_xor_ipad, key_xor_ipad, SHA2_INSIZE); + md5sha_hash(&ctx->hashed_key_xor_opad, key_xor_opad, SHA2_INSIZE); +} +#undef begin + +unsigned FAST_FUNC hmac_end(hmac_ctx_t *ctx, uint8_t *out) +{ + unsigned len = sha_end(&ctx->hashed_key_xor_ipad, out); + /* out = H((key XOR opad) + out) */ + md5sha_hash(&ctx->hashed_key_xor_opad, out, len); + return sha_end(&ctx->hashed_key_xor_opad, out); +} + +/* TLS helpers */ + +void FAST_FUNC hmac_hash_v( + hmac_ctx_t *ctx, + va_list va) +{ + uint8_t *in; + + /* ctx->hashed_key_xor_ipad contains unclosed "H((key XOR ipad) +" state */ + /* ctx->hashed_key_xor_opad contains unclosed "H((key XOR opad) +" state */ + + /* calculate out = H((key XOR ipad) + text) */ + while ((in = va_arg(va, uint8_t*)) != NULL) { + unsigned size = va_arg(va, unsigned); + md5sha_hash(&ctx->hashed_key_xor_ipad, in, size); + } +} + +/* Using HMAC state, make a copy of it (IOW: without affecting this state!) + * hash in the list of (ptr,size) blocks, and finish the HMAC to out[] buffer. + * This function is useful for TLS PRF. + */ +unsigned FAST_FUNC hmac_peek_hash(hmac_ctx_t *ctx, uint8_t *out, ...) +{ + hmac_ctx_t tmpctx = *ctx; /* struct copy */ + va_list va; + + va_start(va, out); + hmac_hash_v(&tmpctx, va); + va_end(va); + + return hmac_end(&tmpctx, out); +} diff --git a/networking/tls.c b/networking/tls.c index 8d074c058..b8caf1e4b 100644 --- a/networking/tls.c +++ b/networking/tls.c @@ -188,8 +188,6 @@ #define TLS_MAX_OUTBUF (1 << 14) enum { - SHA_INSIZE = 64, - AES128_KEYSIZE = 16, AES256_KEYSIZE = 32, @@ -393,128 +391,6 @@ static void hash_handshake(tls_state_t *tls, const char *fmt, const void *buffer # define TLS_MAC_SIZE(tls) (tls)->MAC_size #endif -// RFC 2104: -// HMAC(key, text) based on a hash H (say, sha256) is: -// ipad = [0x36 x INSIZE] -// opad = [0x5c x INSIZE] -// HMAC(key, text) = H((key XOR opad) + H((key XOR ipad) + text)) -// -// H(key XOR opad) and H(key XOR ipad) can be precomputed -// if we often need HMAC hmac with the same key. -// -// text is often given in disjoint pieces. -typedef struct hmac_precomputed { - md5sha_ctx_t hashed_key_xor_ipad; - md5sha_ctx_t hashed_key_xor_opad; -} hmac_precomputed_t; - -typedef void md5sha_begin_func(md5sha_ctx_t *ctx) FAST_FUNC; -#if !ENABLE_FEATURE_TLS_SHA1 -#define hmac_begin(pre,key,key_size,begin) \ - hmac_begin(pre,key,key_size) -#define begin sha256_begin -#endif -static void hmac_begin(hmac_precomputed_t *pre, uint8_t *key, unsigned key_size, md5sha_begin_func *begin) -{ - uint8_t key_xor_ipad[SHA_INSIZE]; - uint8_t key_xor_opad[SHA_INSIZE]; -// uint8_t tempkey[SHA1_OUTSIZE < SHA256_OUTSIZE ? SHA256_OUTSIZE : SHA1_OUTSIZE]; - unsigned i; - - // "The authentication key can be of any length up to INSIZE, the - // block length of the hash function. Applications that use keys longer - // than INSIZE bytes will first hash the key using H and then use the - // resultant OUTSIZE byte string as the actual key to HMAC." - if (key_size > SHA_INSIZE) { - bb_simple_error_msg_and_die("HMAC key>64"); //does not happen (yet?) -// md5sha_ctx_t ctx; -// begin(&ctx); -// md5sha_hash(&ctx, key, key_size); -// key_size = sha_end(&ctx, tempkey); -// //key = tempkey; - right? RIGHT? why does it work without this? -// // because SHA_INSIZE is 64, but hmac() is always called with -// // key_size = tls->MAC_size = SHA1/256_OUTSIZE (20 or 32), -// // and prf_hmac_sha256() -> hmac_sha256() key sizes are: -// // - RSA_PREMASTER_SIZE is 48 -// // - CURVE25519_KEYSIZE is 32 -// // - master_secret[] is 48 - } - - for (i = 0; i < key_size; i++) { - key_xor_ipad[i] = key[i] ^ 0x36; - key_xor_opad[i] = key[i] ^ 0x5c; - } - for (; i < SHA_INSIZE; i++) { - key_xor_ipad[i] = 0x36; - key_xor_opad[i] = 0x5c; - } - - begin(&pre->hashed_key_xor_ipad); - begin(&pre->hashed_key_xor_opad); - md5sha_hash(&pre->hashed_key_xor_ipad, key_xor_ipad, SHA_INSIZE); - md5sha_hash(&pre->hashed_key_xor_opad, key_xor_opad, SHA_INSIZE); -} -#undef begin - -static unsigned hmac_sha_precomputed_v( - hmac_precomputed_t *pre, - uint8_t *out, - va_list va) -{ - uint8_t *text; - unsigned len; - - /* pre->hashed_key_xor_ipad contains unclosed "H((key XOR ipad) +" state */ - /* pre->hashed_key_xor_opad contains unclosed "H((key XOR opad) +" state */ - - /* calculate out = H((key XOR ipad) + text) */ - while ((text = va_arg(va, uint8_t*)) != NULL) { - unsigned text_size = va_arg(va, unsigned); - md5sha_hash(&pre->hashed_key_xor_ipad, text, text_size); - } - len = sha_end(&pre->hashed_key_xor_ipad, out); - - /* out = H((key XOR opad) + out) */ - md5sha_hash(&pre->hashed_key_xor_opad, out, len); - return sha_end(&pre->hashed_key_xor_opad, out); -} - -static unsigned hmac_sha_precomputed(hmac_precomputed_t *pre_init, uint8_t *out, ...) -{ - hmac_precomputed_t pre; - va_list va; - unsigned len; - - va_start(va, out); - pre = *pre_init; /* struct copy */ - len = hmac_sha_precomputed_v(&pre, out, va); - va_end(va); - return len; -} - -#if !ENABLE_FEATURE_TLS_SHA1 -#define hmac(tls,out,key,key_size,...) \ - hmac(out,key,key_size, __VA_ARGS__) -#endif -static unsigned hmac(tls_state_t *tls, uint8_t *out, uint8_t *key, unsigned key_size, ...) -{ - hmac_precomputed_t pre; - va_list va; - unsigned len; - - va_start(va, key_size); - - hmac_begin(&pre, key, key_size, - (ENABLE_FEATURE_TLS_SHA1 && tls->MAC_size == SHA1_OUTSIZE) - ? sha1_begin - : sha256_begin - ); - len = hmac_sha_precomputed_v(&pre, out, va); - - va_end(va); - return len; -} - // RFC 5246: // 5. HMAC and the Pseudorandom Function //... @@ -559,7 +435,7 @@ static void prf_hmac_sha256(/*tls_state_t *tls,*/ const char *label, uint8_t *seed, unsigned seed_size) { - hmac_precomputed_t pre; + hmac_ctx_t ctx; uint8_t a[TLS_MAX_MAC_SIZE]; uint8_t *out_p = outbuf; unsigned label_size = strlen(label); @@ -569,26 +445,26 @@ static void prf_hmac_sha256(/*tls_state_t *tls,*/ #define SEED label, label_size, seed, seed_size #define A a, MAC_size - hmac_begin(&pre, secret, secret_size, sha256_begin); + hmac_begin(&ctx, secret, secret_size, sha256_begin); /* A(1) = HMAC_hash(secret, seed) */ - hmac_sha_precomputed(&pre, a, SEED, NULL); + hmac_peek_hash(&ctx, a, SEED, NULL); for (;;) { /* HMAC_hash(secret, A(1) + seed) */ if (outbuf_size <= MAC_size) { /* Last, possibly incomplete, block */ /* (use a[] as temp buffer) */ - hmac_sha_precomputed(&pre, a, A, SEED, NULL); + hmac_peek_hash(&ctx, a, A, SEED, NULL); memcpy(out_p, a, outbuf_size); return; } /* Not last block. Store directly to result buffer */ - hmac_sha_precomputed(&pre, out_p, A, SEED, NULL); + hmac_peek_hash(&ctx, out_p, A, SEED, NULL); out_p += MAC_size; outbuf_size -= MAC_size; /* A(2) = HMAC_hash(secret, A(1)) */ - hmac_sha_precomputed(&pre, a, A, NULL); + hmac_peek_hash(&ctx, a, A, NULL); } #undef A #undef SECRET @@ -655,6 +531,29 @@ static void *tls_get_zeroed_outbuf(tls_state_t *tls, int len) return record; } +/* Calculate the HMAC over the list of blocks */ +#if !ENABLE_FEATURE_TLS_SHA1 +#define hmac_block(tls,out,key,key_size,...) \ + hmac_block(out,key,key_size, __VA_ARGS__) +#endif +static unsigned hmac_block(tls_state_t *tls, uint8_t *out, uint8_t *key, unsigned key_size, ...) +{ + hmac_ctx_t ctx; + va_list va; + + hmac_begin(&ctx, key, key_size, + (ENABLE_FEATURE_TLS_SHA1 && tls->MAC_size == SHA1_OUTSIZE) + ? sha1_begin + : sha256_begin + ); + + va_start(va, key_size); + hmac_hash_v(&ctx, va); + va_end(va); + + return hmac_end(&ctx, out); +} + static void xwrite_encrypted_and_hmac_signed(tls_state_t *tls, unsigned size, unsigned type) { uint8_t *buf = tls->outbuf + OUTBUF_PFX; @@ -676,7 +575,7 @@ static void xwrite_encrypted_and_hmac_signed(tls_state_t *tls, unsigned size, un xhdr->len16_lo = size & 0xff; /* Calculate MAC signature */ - hmac(tls, buf + size, /* result */ + hmac_block(tls, buf + size, /* result */ tls->client_write_MAC_key, TLS_MAC_SIZE(tls), &tls->write_seq64_be, sizeof(tls->write_seq64_be), xhdr, RECHDR_LEN, -- cgit v1.2.3-55-g6feb From c11730490ad68737120d569b9760e2c35e28977e Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Mon, 7 Jul 2025 08:21:44 +0200 Subject: libbb/yescrypt: remove redundant SHA256 HMAC implementation function old new delta hmac_blocks - 88 +88 static.PBKDF2_SHA256 176 213 +37 yescrypt_kdf32_body 1046 1052 +6 static.smix 759 762 +3 hmac_block 88 64 -24 HMAC_SHA256_Final 53 - -53 HMAC_SHA256_Buf 58 - -58 HMAC_SHA256_Init 159 - -159 ------------------------------------------------------------------------------ (add/remove: 1/3 grow/shrink: 3/1 up/down: 134/-294) Total: -160 bytes Signed-off-by: Denys Vlasenko --- include/libbb.h | 10 ++++- libbb/hash_hmac.c | 10 ++++- libbb/yescrypt/alg-sha256.c | 94 ++++----------------------------------- libbb/yescrypt/alg-yescrypt-kdf.c | 24 +++++++--- networking/tls.c | 8 ++-- 5 files changed, 48 insertions(+), 98 deletions(-) (limited to 'libbb') diff --git a/include/libbb.h b/include/libbb.h index 3f60acaa0..cbf723f7e 100644 --- a/include/libbb.h +++ b/include/libbb.h @@ -2253,12 +2253,20 @@ typedef void md5sha_begin_func(md5sha_ctx_t *ctx) FAST_FUNC; #define hmac_begin(ctx,key,key_size,begin) \ hmac_begin(ctx,key,key_size) #endif -void FAST_FUNC hmac_begin(hmac_ctx_t *ctx, uint8_t *key, unsigned key_size, md5sha_begin_func *begin); +void FAST_FUNC hmac_begin(hmac_ctx_t *ctx, const uint8_t *key, unsigned key_size, md5sha_begin_func *begin); static ALWAYS_INLINE void hmac_hash(hmac_ctx_t *ctx, const void *in, size_t len) { md5sha_hash(&ctx->hashed_key_xor_ipad, in, len); } unsigned FAST_FUNC hmac_end(hmac_ctx_t *ctx, uint8_t *out); +#if HMAC_ONLY_SHA256 +#define hmac_block(key,key_size,begin,in,sz,out) \ + hmac_block(key,key_size,in,sz,out) +#endif +unsigned FAST_FUNC hmac_block(const uint8_t *key, unsigned key_size, + md5sha_begin_func *begin, + const void *in, unsigned sz, + uint8_t *out); /* HMAC helpers for TLS: */ void FAST_FUNC hmac_hash_v(hmac_ctx_t *ctx, va_list va); unsigned FAST_FUNC hmac_peek_hash(hmac_ctx_t *ctx, uint8_t *out, ...); diff --git a/libbb/hash_hmac.c b/libbb/hash_hmac.c index 8cf936949..9e48e0f51 100644 --- a/libbb/hash_hmac.c +++ b/libbb/hash_hmac.c @@ -18,7 +18,7 @@ // if we often need HMAC hmac with the same key. // // text is often given in disjoint pieces. -void FAST_FUNC hmac_begin(hmac_ctx_t *ctx, uint8_t *key, unsigned key_size, md5sha_begin_func *begin) +void FAST_FUNC hmac_begin(hmac_ctx_t *ctx, const uint8_t *key, unsigned key_size, md5sha_begin_func *begin) { #if HMAC_ONLY_SHA256 #define begin sha256_begin @@ -64,6 +64,14 @@ unsigned FAST_FUNC hmac_end(hmac_ctx_t *ctx, uint8_t *out) return sha_end(&ctx->hashed_key_xor_opad, out); } +unsigned FAST_FUNC hmac_block(const uint8_t *key, unsigned key_size, md5sha_begin_func *begin, const void *in, unsigned sz, uint8_t *out) +{ + hmac_ctx_t ctx; + hmac_begin(&ctx, key, key_size, begin); + hmac_hash(&ctx, in, sz); + return hmac_end(&ctx, out); +} + /* TLS helpers */ void FAST_FUNC hmac_hash_v( diff --git a/libbb/yescrypt/alg-sha256.c b/libbb/yescrypt/alg-sha256.c index f56b905ad..1ccffa1e5 100644 --- a/libbb/yescrypt/alg-sha256.c +++ b/libbb/yescrypt/alg-sha256.c @@ -25,82 +25,6 @@ * SUCH DAMAGE. */ -/** - * HMAC_SHA256_Init(ctx, K, Klen): - * Initialize the HMAC-SHA256 context ${ctx} with ${Klen} bytes of key from - * ${K}. - */ -static void -HMAC_SHA256_Init(HMAC_SHA256_CTX *ctx, const void *_K, size_t Klen) -{ - uint8_t pad[64]; - uint8_t khash[32]; - const uint8_t *K = _K; - size_t i; - - /* If Klen > 64, the key is really SHA256(K). */ - if (Klen > 64) { - sha256_block(K, Klen, khash); - K = khash; - Klen = 32; - } - - /* Inner SHA256 operation is SHA256(K xor [block of 0x36] || data). */ - sha256_begin(&ctx->ictx); - memset(pad, 0x36, 64); - for (i = 0; i < Klen; i++) - pad[i] ^= K[i]; - sha256_hash(&ctx->ictx, pad, 64); - - /* Outer SHA256 operation is SHA256(K xor [block of 0x5c] || hash). */ - sha256_begin(&ctx->octx); - memset(pad, 0x5c, 64); - for (i = 0; i < Klen; i++) - pad[i] ^= K[i]; - sha256_hash(&ctx->octx, pad, 64); -} - -/** - * HMAC_SHA256_Update(ctx, in, len): - * Input ${len} bytes from ${in} into the HMAC-SHA256 context ${ctx}. - */ -static void -HMAC_SHA256_Update(HMAC_SHA256_CTX *ctx, const void *in, size_t len) -{ - /* Feed data to the inner SHA256 operation. */ - sha256_hash(&ctx->ictx, in, len); -} - -/** - * HMAC_SHA256_Final(ctx, digest): - * Output the HMAC-SHA256 of the data input to the context ${ctx} into the - * buffer ${digest}. - */ -static void -HMAC_SHA256_Final(HMAC_SHA256_CTX *ctx, void *digest) -{ - /* Finish the inner SHA256 operation. */ - sha256_end(&ctx->ictx, digest); /* using digest[] as scratch space */ - /* Feed the inner hash to the outer SHA256 operation. */ - sha256_hash(&ctx->octx, digest, 32); /* using digest[] as scratch space */ - /* Finish the outer SHA256 operation. */ - sha256_end(&ctx->octx, digest); -} - -/** - * HMAC_SHA256_Buf(K, Klen, in, len, digest): - * Compute the HMAC-SHA256 of ${len} bytes from ${in} using the key ${K} of - * length ${Klen}, and write the result to ${digest}. - */ -static void -HMAC_SHA256_Buf(const void *K, size_t Klen, const void *in, size_t len, void *digest) -{ - HMAC_SHA256_CTX ctx; - HMAC_SHA256_Init(&ctx, K, Klen); - HMAC_SHA256_Update(&ctx, in, len); - HMAC_SHA256_Final(&ctx, digest); -} - /** * PBKDF2_SHA256(passwd, passwdlen, salt, saltlen, c, buf, dkLen): * Compute PBKDF2(passwd, salt, c, dkLen) using HMAC-SHA256 as the PRF, and @@ -111,15 +35,15 @@ PBKDF2_SHA256(const uint8_t *passwd, size_t passwdlen, const uint8_t *salt, size_t saltlen, uint64_t c, uint8_t *buf, size_t dkLen) { - HMAC_SHA256_CTX Phctx, PShctx, hctx; + hmac_ctx_t Phctx, PShctx; size_t i; /* Compute HMAC state after processing P. */ - HMAC_SHA256_Init(&Phctx, passwd, passwdlen); + hmac_begin(&Phctx, passwd, passwdlen, sha256_begin); /* Compute HMAC state after processing P and S. */ - memcpy(&PShctx, &Phctx, sizeof(HMAC_SHA256_CTX)); - HMAC_SHA256_Update(&PShctx, salt, saltlen); + PShctx = Phctx; + hmac_hash(&PShctx, salt, saltlen); /* Iterate through the blocks. */ for (i = 0; dkLen != 0; i++) { @@ -134,18 +58,16 @@ PBKDF2_SHA256(const uint8_t *passwd, size_t passwdlen, ivec = SWAP_BE32((uint32_t)(i + 1)); /* Compute U_1 = PRF(P, S || INT(i)). */ - hctx = PShctx; - HMAC_SHA256_Update(&hctx, &ivec, 4); - HMAC_SHA256_Final(&hctx, T); + hmac_peek_hash(&PShctx, (void*)T, &ivec, 4, NULL); +//TODO: the above is a vararg function, might incur some ABI pain +//does libbb need a non-vararg version with just one (buf,len)? if (c > 1) { /* T_i = U_1 ... */ memcpy(U, T, 32); for (j = 2; j <= c; j++) { /* Compute U_j. */ - hctx = Phctx; - HMAC_SHA256_Update(&hctx, U, 32); - HMAC_SHA256_Final(&hctx, U); + hmac_peek_hash(&Phctx, (void*)U, U, 32, NULL); /* ... xor U_j ... */ for (k = 0; k < 32 / 8; k++) T[k] ^= U[k]; diff --git a/libbb/yescrypt/alg-yescrypt-kdf.c b/libbb/yescrypt/alg-yescrypt-kdf.c index 27ef2caa4..f1f06621e 100644 --- a/libbb/yescrypt/alg-yescrypt-kdf.c +++ b/libbb/yescrypt/alg-yescrypt-kdf.c @@ -735,8 +735,12 @@ static void smix(uint8_t *B, size_t r, uint32_t N, uint32_t p, uint32_t t, ctx_i->S0 = Si + Sbytes / 3 * 2; ctx_i->w = 0; if (i == 0) - HMAC_SHA256_Buf(Bp + (128 * r - 64), 64, - passwd, 32, passwd); + hmac_block( + /* key,len: */ Bp + (128 * r - 64), 64, + /* hash fn: */ sha256_begin, + /* in,len: */ passwd, 32, + /* outbuf: */ passwd + ); } smix1(Bp, r, Np, flags, Vp, NROM, VROM, XYp, ctx_i); smix2(Bp, r, p2floor(Np), Nloop_rw, flags, Vp, @@ -907,9 +911,12 @@ static int yescrypt_kdf32_body( S = (uint8_t *)XY + XY_size; if (flags) { - HMAC_SHA256_Buf("yescrypt-prehash", - (flags & YESCRYPT_PREHASH) ? 16 : 8, - passwd, passwdlen, sha256); + hmac_block( + /* key,len: */ (const void*)"yescrypt-prehash", (flags & YESCRYPT_PREHASH) ? 16 : 8, + /* hash fn: */ sha256_begin, + /* in,len: */ passwd, passwdlen, + /* outbuf: */ sha256 + ); passwd = sha256; passwdlen = sizeof(sha256); } @@ -946,7 +953,12 @@ static int yescrypt_kdf32_body( */ if (flags && !(flags & YESCRYPT_PREHASH)) { /* Compute ClientKey */ - HMAC_SHA256_Buf(dkp, sizeof(dk), "Client Key", 10, sha256); + hmac_block( + /* key,len: */ dkp, sizeof(dk), + /* hash fn: */ sha256_begin, + /* in,len: */ "Client Key", 10, + /* outbuf: */ sha256 + ); /* Compute StoredKey */ { size_t clen = /*buflen:*/32; diff --git a/networking/tls.c b/networking/tls.c index b8caf1e4b..098cf7cac 100644 --- a/networking/tls.c +++ b/networking/tls.c @@ -533,10 +533,10 @@ static void *tls_get_zeroed_outbuf(tls_state_t *tls, int len) /* Calculate the HMAC over the list of blocks */ #if !ENABLE_FEATURE_TLS_SHA1 -#define hmac_block(tls,out,key,key_size,...) \ - hmac_block(out,key,key_size, __VA_ARGS__) +#define hmac_blocks(tls,out,key,key_size,...) \ + hmac_blocks(out,key,key_size, __VA_ARGS__) #endif -static unsigned hmac_block(tls_state_t *tls, uint8_t *out, uint8_t *key, unsigned key_size, ...) +static unsigned hmac_blocks(tls_state_t *tls, uint8_t *out, uint8_t *key, unsigned key_size, ...) { hmac_ctx_t ctx; va_list va; @@ -575,7 +575,7 @@ static void xwrite_encrypted_and_hmac_signed(tls_state_t *tls, unsigned size, un xhdr->len16_lo = size & 0xff; /* Calculate MAC signature */ - hmac_block(tls, buf + size, /* result */ + hmac_blocks(tls, buf + size, /* result */ tls->client_write_MAC_key, TLS_MAC_SIZE(tls), &tls->write_seq64_be, sizeof(tls->write_seq64_be), xhdr, RECHDR_LEN, -- cgit v1.2.3-55-g6feb From f464be22bd63bf0326bc14a755cbac282fad159a Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Mon, 7 Jul 2025 10:39:14 +0200 Subject: libbb/yescrypt: make it possible to set constant parameters, and set YESCRYPT_RW function old new delta yescrypt_kdf32_body 1052 1420 +368 yescrypt_r 1133 1084 -49 static.smix 762 - -762 ------------------------------------------------------------------------------ (add/remove: 0/1 grow/shrink: 1/1 up/down: 368/-811) Total: -443 bytes Signed-off-by: Denys Vlasenko --- libbb/yescrypt/alg-yescrypt-common.c | 61 ++++++++---------- libbb/yescrypt/alg-yescrypt-kdf.c | 58 ++++++++--------- libbb/yescrypt/alg-yescrypt.h | 117 +++++++++++++++++++++++++---------- testsuite/cryptpw.tests | 2 +- 4 files changed, 142 insertions(+), 96 deletions(-) (limited to 'libbb') diff --git a/libbb/yescrypt/alg-yescrypt-common.c b/libbb/yescrypt/alg-yescrypt-common.c index 5bdf1893e..db6e098c7 100644 --- a/libbb/yescrypt/alg-yescrypt-common.c +++ b/libbb/yescrypt/alg-yescrypt-common.c @@ -144,7 +144,7 @@ char *yescrypt_r( char *dst; const uint8_t *src, *saltstr, *saltend; size_t need, prefixlen, saltstrlen; - uint32_t flavor, N_log2; + uint32_t u32; memset(yctx, 0, sizeof(yctx)); yctx->param.p = 1; @@ -152,43 +152,34 @@ char *yescrypt_r( /* we assume setting starts with "$y$" (caller must ensure this) */ src = setting + 3; - src = decode64_uint32(&flavor, src, 0); + src = decode64_uint32(&yctx->param.flags, src, 0); /* "j9T" returns: 0x2f */ - dbg("yescrypt flavor=0x%x YESCRYPT_RW:%u", (unsigned)flavor, !!(flavor & YESCRYPT_RW)); //if (!src) // goto fail; - if (flavor < YESCRYPT_RW) { - yctx->param.flags = flavor; - } else if (flavor <= YESCRYPT_RW + (YESCRYPT_RW_FLAVOR_MASK >> 2)) { + if (yctx->param.flags < YESCRYPT_RW) { + dbg("yctx->param.flags=0x%x", (unsigned)yctx->param.flags); + goto fail; // bbox: we don't support scrypt - only yescrypt + } else if (yctx->param.flags <= YESCRYPT_RW + (YESCRYPT_RW_FLAVOR_MASK >> 2)) { /* "j9T" sets flags to 0xb6 */ - yctx->param.flags = YESCRYPT_RW + ((flavor - YESCRYPT_RW) << 2); + yctx->param.flags = YESCRYPT_RW + ((yctx->param.flags - YESCRYPT_RW) << 2); dbg("yctx->param.flags=0x%x", (unsigned)yctx->param.flags); - dbg(" YESCRYPT_RW:%u" , !!(yctx->param.flags & YESCRYPT_RW )); - dbg(" YESCRYPT_ROUNDS_6:%u" , !!(yctx->param.flags & YESCRYPT_ROUNDS_6 )); - dbg(" YESCRYPT_GATHER_2:%u" , !!(yctx->param.flags & YESCRYPT_GATHER_2 )); - dbg(" YESCRYPT_GATHER_4:%u" , !!(yctx->param.flags & YESCRYPT_GATHER_4 )); - dbg(" YESCRYPT_GATHER_8:%u" , !!(yctx->param.flags & YESCRYPT_GATHER_8 )); - dbg(" YESCRYPT_SIMPLE_2:%u" , !!(yctx->param.flags & YESCRYPT_SIMPLE_2 )); - dbg(" YESCRYPT_SIMPLE_4:%u" , !!(yctx->param.flags & YESCRYPT_SIMPLE_4 )); - dbg(" YESCRYPT_SIMPLE_8:%u" , !!(yctx->param.flags & YESCRYPT_SIMPLE_8 )); - dbg(" YESCRYPT_SBOX_12K:%u" , !!(yctx->param.flags & YESCRYPT_SBOX_12K )); - dbg(" YESCRYPT_SBOX_24K:%u" , !!(yctx->param.flags & YESCRYPT_SBOX_24K )); - dbg(" YESCRYPT_SBOX_48K:%u" , !!(yctx->param.flags & YESCRYPT_SBOX_48K )); - dbg(" YESCRYPT_SBOX_96K:%u" , !!(yctx->param.flags & YESCRYPT_SBOX_96K )); - dbg(" YESCRYPT_SBOX_192K:%u", !!(yctx->param.flags & YESCRYPT_SBOX_192K)); - dbg(" YESCRYPT_SBOX_384K:%u", !!(yctx->param.flags & YESCRYPT_SBOX_384K)); - dbg(" YESCRYPT_SBOX_768K:%u", !!(yctx->param.flags & YESCRYPT_SBOX_768K)); + dbg(" YESCRYPT_RW:%u", !!(yctx->param.flags & YESCRYPT_RW)); + dbg((yctx->param.flags & YESCRYPT_RW_FLAVOR_MASK) == + (YESCRYPT_ROUNDS_6 | YESCRYPT_GATHER_4 | YESCRYPT_SIMPLE_2 | YESCRYPT_SBOX_12K) + ? " YESCRYPT_ROUNDS_6 | YESCRYPT_GATHER_4 | YESCRYPT_SIMPLE_2 | YESCRYPT_SBOX_12K" + : " flags are not standard" + ); } else { goto fail; } - src = decode64_uint32(&N_log2, src, 1); - if (/*!src ||*/ N_log2 > 63) + src = decode64_uint32(&u32, src, 1); + if (/*!src ||*/ u32 > 63) goto fail; - yctx->param.N = (uint64_t)1 << N_log2; + yctx->param.N = (uint64_t)1 << u32; /* "j9T" sets to 4096 (1<<12) */ - dbg("yctx->param.N=%llu (1<<%u)", (unsigned long long)yctx->param.N, (unsigned)N_log2); + dbg("yctx->param.N=%llu (1<<%u)", (unsigned long long)yctx->param.N, (unsigned)u32); src = decode64_uint32(&yctx->param.r, src, 1); /* "j9T" sets to 32 */ @@ -197,21 +188,19 @@ char *yescrypt_r( if (!src) goto fail; if (*src != '$') { - uint32_t have; - src = decode64_uint32(&have, src, 1); + src = decode64_uint32(&u32, src, 1); dbg("yescrypt has extended params:0x%x", (unsigned)have); - if (have & 1) + if (u32 & 1) src = decode64_uint32(&yctx->param.p, src, 2); - if (have & 2) + if (u32 & 2) src = decode64_uint32(&yctx->param.t, src, 1); - if (have & 4) + if (u32 & 4) src = decode64_uint32(&yctx->param.g, src, 1); - if (have & 8) { - uint32_t NROM_log2; - src = decode64_uint32(&NROM_log2, src, 1); - if (/*!src ||*/ NROM_log2 > 63) + if (u32 & 8) { + src = decode64_uint32(&u32, src, 1); + if (/*!src ||*/ u32 > 63) goto fail; - yctx->param.NROM = (uint64_t)1 << NROM_log2; + yctx->param.NROM = (uint64_t)1 << u32; } if (!src) goto fail; diff --git a/libbb/yescrypt/alg-yescrypt-kdf.c b/libbb/yescrypt/alg-yescrypt-kdf.c index f1f06621e..13ae62b7c 100644 --- a/libbb/yescrypt/alg-yescrypt-kdf.c +++ b/libbb/yescrypt/alg-yescrypt-kdf.c @@ -460,7 +460,7 @@ static inline uint32_t integerify(const salsa20_blk_t *B, size_t r) * to a multiple of at least 16 bytes. */ static void smix1(uint8_t *B, size_t r, uint32_t N, - yescrypt_flags_t flags, + uint32_t flags, salsa20_blk_t *V, uint32_t NROM, const salsa20_blk_t *VROM, salsa20_blk_t *XY, @@ -513,6 +513,7 @@ static void smix1(uint8_t *B, size_t r, uint32_t N, V_j = &VROM[j * s]; blockmix_xor(Y, V_j, XY, r, ctx); } else if (flags & YESCRYPT_RW) { +//can't use flags___YESCRYPT_RW, smix1() may be called with flags = 0 uint32_t n; salsa20_blk_t *V_j; @@ -580,7 +581,7 @@ static void smix1(uint8_t *B, size_t r, uint32_t N, * 64 bytes, and arrays B and XY to a multiple of at least 16 bytes. */ static void smix2(uint8_t *B, size_t r, uint32_t N, uint64_t Nloop, - yescrypt_flags_t flags, + uint32_t flags, salsa20_blk_t *V, uint32_t NROM, const salsa20_blk_t *VROM, salsa20_blk_t *XY, @@ -610,6 +611,7 @@ static void smix2(uint8_t *B, size_t r, uint32_t N, uint64_t Nloop, * because our SMix resets YESCRYPT_RW for the smix2() calls operating on the * entire V when p > 1. */ +//and this is why bbox can't use flags___YESCRYPT_RW in this function if (VROM && (flags & YESCRYPT_RW)) { do { salsa20_blk_t *V_j = &V[j * s]; @@ -683,7 +685,7 @@ static uint64_t p2floor(uint64_t x) * might also result in cache bank conflicts). */ static void smix(uint8_t *B, size_t r, uint32_t N, uint32_t p, uint32_t t, - yescrypt_flags_t flags, + uint32_t flags, salsa20_blk_t *V, uint32_t NROM, const salsa20_blk_t *VROM, salsa20_blk_t *XY, @@ -696,7 +698,7 @@ static void smix(uint8_t *B, size_t r, uint32_t N, uint32_t p, uint32_t t, Nchunk = N / p; Nloop_all = Nchunk; - if (flags & YESCRYPT_RW) { + if (flags___YESCRYPT_RW) { if (t <= 1) { if (t) Nloop_all *= 2; /* 2/3 */ @@ -711,7 +713,7 @@ static void smix(uint8_t *B, size_t r, uint32_t N, uint32_t p, uint32_t t, } Nloop_rw = 0; - if (flags & YESCRYPT_RW) + if (flags___YESCRYPT_RW) Nloop_rw = Nloop_all / p; Nchunk &= ~(uint32_t)1; /* round down to even */ @@ -725,7 +727,7 @@ static void smix(uint8_t *B, size_t r, uint32_t N, uint32_t p, uint32_t t, salsa20_blk_t *Vp = &V[Vchunk * s]; salsa20_blk_t *XYp = XY; pwxform_ctx_t *ctx_i = NULL; - if (flags & YESCRYPT_RW) { + if (flags___YESCRYPT_RW) { uint8_t *Si = S + i * Salloc; smix1(Bp, 1, Sbytes / 128, 0 /* no flags */, (salsa20_blk_t *)Si, 0, NULL, XYp, NULL); @@ -752,12 +754,12 @@ static void smix(uint8_t *B, size_t r, uint32_t N, uint32_t p, uint32_t t, uint8_t *Bp = &B[128 * r * i]; salsa20_blk_t *XYp = XY; pwxform_ctx_t *ctx_i = NULL; - if (flags & YESCRYPT_RW) { + if (flags___YESCRYPT_RW) { uint8_t *Si = S + i * Salloc; ctx_i = (pwxform_ctx_t *)(Si + Sbytes); } smix2(Bp, r, N, Nloop_all - Nloop_rw, - flags & (yescrypt_flags_t)~YESCRYPT_RW, + flags & (uint32_t)~YESCRYPT_RW, V, NROM, VROM, XYp, ctx_i); } } @@ -812,7 +814,7 @@ static void free_region(yescrypt_region_t *region) static int yescrypt_kdf32_body( yescrypt_ctx_t *yctx, const uint8_t *passwd, size_t passwdlen, - yescrypt_flags_t flags, uint64_t N, uint32_t t, + uint32_t flags, uint64_t N, uint32_t t, uint8_t *buf32) { const salsa20_blk_t *VROM; @@ -823,13 +825,13 @@ static int yescrypt_kdf32_body( uint8_t dk[sizeof(sha256)], *dkp = buf32; /* Sanity-check parameters */ - switch (flags & YESCRYPT_MODE_MASK) { + switch (flags___YESCRYPT_MODE_MASK) { case 0: /* classic scrypt - can't have anything non-standard */ - if (flags || t || yctx->param.NROM) + if (flags || t || YCTX_param_NROM) goto out_EINVAL; break; case YESCRYPT_WORM: - if (flags != YESCRYPT_WORM || yctx->param.NROM) + if (flags != YESCRYPT_WORM || YCTX_param_NROM) goto out_EINVAL; break; case YESCRYPT_RW: @@ -852,8 +854,8 @@ static int yescrypt_kdf32_body( goto out_EINVAL; #endif { - const uint32_t r = yctx->param.r; - const uint32_t p = yctx->param.p; + const uint32_t r = YCTX_param_r; + const uint32_t p = YCTX_param_p; if ((uint64_t)r * (uint64_t)p >= 1 << 30) goto out_EINVAL; if (N > UINT32_MAX) @@ -863,7 +865,7 @@ static int yescrypt_kdf32_body( if (r > SIZE_MAX / 256 / p || N > SIZE_MAX / 128 / r) goto out_EINVAL; - if (flags & YESCRYPT_RW) { + if (flags___YESCRYPT_RW) { /* p cannot be greater than SIZE_MAX/Salloc on 64-bit systems, but it can on 32-bit systems. */ #pragma GCC diagnostic push @@ -874,7 +876,7 @@ static int yescrypt_kdf32_body( } VROM = NULL; - if (yctx->param.NROM) + if (YCTX_param_NROM) goto out_EINVAL; /* Allocate memory */ @@ -889,7 +891,7 @@ static int yescrypt_kdf32_body( need += XY_size; if (need < XY_size) goto out_EINVAL; - if (flags & YESCRYPT_RW) { + if (flags___YESCRYPT_RW) { size_t S_size = (size_t)Salloc * p; need += S_size; if (need < S_size) @@ -907,7 +909,7 @@ static int yescrypt_kdf32_body( V = (salsa20_blk_t *)((uint8_t *)B + B_size); XY = (salsa20_blk_t *)((uint8_t *)V + V_size); S = NULL; - if (flags & YESCRYPT_RW) + if (flags___YESCRYPT_RW) S = (uint8_t *)XY + XY_size; if (flags) { @@ -926,13 +928,13 @@ static int yescrypt_kdf32_body( if (flags) memcpy(sha256, B, sizeof(sha256)); - if (p == 1 || (flags & YESCRYPT_RW)) { - smix(B, r, N, p, t, flags, V, yctx->param.NROM, VROM, XY, S, sha256); + if (p == 1 || (flags___YESCRYPT_RW)) { + smix(B, r, N, p, t, flags, V, YCTX_param_NROM, VROM, XY, S, sha256); } else { uint32_t i; for (i = 0; i < p; i++) { smix(&B[(size_t)128 * r * i], r, N, 1, t, flags, V, - yctx->param.NROM, VROM, XY, NULL, NULL); + YCTX_param_NROM, VROM, XY, NULL, NULL); } } @@ -996,12 +998,12 @@ int yescrypt_kdf32( const uint8_t *passwd, size_t passwdlen, uint8_t *buf32) { - yescrypt_flags_t flags = yctx->param.flags; - uint64_t N = yctx->param.N; - uint32_t r = yctx->param.r; - uint32_t p = yctx->param.p; - uint32_t t = yctx->param.t; - uint32_t g = yctx->param.g; + uint32_t flags = YCTX_param_flags; + uint64_t N = YCTX_param_N; + uint32_t r = YCTX_param_r; + uint32_t p = YCTX_param_p; + uint32_t t = YCTX_param_t; + uint32_t g = YCTX_param_g; uint8_t dk32[32]; int retval; @@ -1011,7 +1013,7 @@ int yescrypt_kdf32( return -1; } - if ((flags & YESCRYPT_RW) + if ((flags___YESCRYPT_RW) && p >= 1 && N / p >= 0x100 && N / p * r >= 0x20000 diff --git a/libbb/yescrypt/alg-yescrypt.h b/libbb/yescrypt/alg-yescrypt.h index 996af333f..97475d89f 100644 --- a/libbb/yescrypt/alg-yescrypt.h +++ b/libbb/yescrypt/alg-yescrypt.h @@ -42,28 +42,32 @@ * Please refer to the description of yescrypt_kdf() below for the meaning of * these flags. */ -typedef uint32_t yescrypt_flags_t; +/* yescrypt flags: + * bits pos: 7654321076543210 + * ss r w + * sbox gg y + */ /* Public */ #define YESCRYPT_WORM 1 #define YESCRYPT_RW 0x002 -#define YESCRYPT_ROUNDS_3 0x000 -#define YESCRYPT_ROUNDS_6 0x004 -#define YESCRYPT_GATHER_1 0x000 -#define YESCRYPT_GATHER_2 0x008 -#define YESCRYPT_GATHER_4 0x010 -#define YESCRYPT_GATHER_8 0x018 -#define YESCRYPT_SIMPLE_1 0x000 -#define YESCRYPT_SIMPLE_2 0x020 -#define YESCRYPT_SIMPLE_4 0x040 -#define YESCRYPT_SIMPLE_8 0x060 -#define YESCRYPT_SBOX_6K 0x000 -#define YESCRYPT_SBOX_12K 0x080 -#define YESCRYPT_SBOX_24K 0x100 -#define YESCRYPT_SBOX_48K 0x180 -#define YESCRYPT_SBOX_96K 0x200 -#define YESCRYPT_SBOX_192K 0x280 -#define YESCRYPT_SBOX_384K 0x300 -#define YESCRYPT_SBOX_768K 0x380 +#define YESCRYPT_ROUNDS_3 0x000 //r=0 +#define YESCRYPT_ROUNDS_6 0x004 //r=1 +#define YESCRYPT_GATHER_1 0x000 //gg=00 +#define YESCRYPT_GATHER_2 0x008 //gg=01 +#define YESCRYPT_GATHER_4 0x010 //gg=10 +#define YESCRYPT_GATHER_8 0x018 //gg=11 +#define YESCRYPT_SIMPLE_1 0x000 //ss=00 +#define YESCRYPT_SIMPLE_2 0x020 //ss=01 +#define YESCRYPT_SIMPLE_4 0x040 //ss=11 +#define YESCRYPT_SIMPLE_8 0x060 //ss=11 +#define YESCRYPT_SBOX_6K 0x000 //sbox=0000 +#define YESCRYPT_SBOX_12K 0x080 //sbox=0001 +#define YESCRYPT_SBOX_24K 0x100 //sbox=0010 +#define YESCRYPT_SBOX_48K 0x180 //sbox=0011 +#define YESCRYPT_SBOX_96K 0x200 //sbox=0100 +#define YESCRYPT_SBOX_192K 0x280 //sbox=0101 +#define YESCRYPT_SBOX_384K 0x300 //sbox=0110 +#define YESCRYPT_SBOX_768K 0x380 //sbox=0111 #ifdef YESCRYPT_INTERNAL /* Private */ @@ -86,6 +90,19 @@ typedef uint32_t yescrypt_flags_t; YESCRYPT_ALLOC_ONLY | YESCRYPT_PREHASH) #endif +/* How many chars base-64 encoded bytes require? */ +#define YESCRYPT_BYTES2CHARS(bytes) ((((bytes) * 8) + 5) / 6) +/* The /etc/passwd-style hash is "$" */ +/* + * "$y$", up to 8 params of up to 6 chars each, '$', salt + * Alternatively, but that's smaller: + * "$7$", 3 params encoded as 1+5+5 chars, salt + */ +#define YESCRYPT_PREFIX_LEN (3 + 8 * 6 + 1 + YESCRYPT_BYTES2CHARS(32)) + +#define YESCRYPT_HASH_SIZE 32 +#define YESCRYPT_HASH_LEN YESCRYPT_BYTES2CHARS(YESCRYPT_HASH_SIZE) + /** * Internal type used by the memory allocator. Please do not use it directly. * Use yescrypt_shared_t and yescrypt_local_t as appropriate instead, since @@ -104,7 +121,7 @@ typedef struct { * set. flags, t, g, NROM are special to yescrypt. */ typedef struct { - yescrypt_flags_t flags; + uint32_t flags; uint64_t N; uint32_t r, p, t, g; uint64_t NROM; @@ -123,18 +140,56 @@ typedef struct { yescrypt_region_t local[1]; } yescrypt_ctx_t; -/* How many chars base-64 encoded bytes require? */ -#define YESCRYPT_BYTES2CHARS(bytes) ((((bytes) * 8) + 5) / 6) -/* The /etc/passwd-style hash is "$" */ -/* - * "$y$", up to 8 params of up to 6 chars each, '$', salt - * Alternatively, but that's smaller: - * "$7$", 3 params encoded as 1+5+5 chars, salt - */ -#define YESCRYPT_PREFIX_LEN (3 + 8 * 6 + 1 + YESCRYPT_BYTES2CHARS(32)) +// How much can save by forcing "standard" value by commenting the next line: +// 160 bytes +//#define YCTX_param_flags yctx->param.flags +// 260 bytes +//#define flags___YESCRYPT_RW (flags & YESCRYPT_RW) +// 140 bytes +//#define flags___YESCRYPT_MODE_MASK (flags & YESCRYPT_MODE_MASK) +// ^^^^ forcing the above since the code already requires (checks for) this +// 50 bytes +#define YCTX_param_N yctx->param.N +// -100 bytes (negative!!!) +#define YCTX_param_r yctx->param.r +// 400 bytes +#define YCTX_param_p yctx->param.p +// 130 bytes +#define YCTX_param_t yctx->param.t +// 2 bytes +#define YCTX_param_g yctx->param.g +// 1 bytes +// ^^^^ this looks wrong, compiler should be able to constant-propagate the fact that NROM code is dead +#define YCTX_param_NROM yctx->param.NROM -#define YESCRYPT_HASH_SIZE 32 -#define YESCRYPT_HASH_LEN YESCRYPT_BYTES2CHARS(YESCRYPT_HASH_SIZE) +// standard ("j9T") values: +#ifndef YCTX_param_flags +#define YCTX_param_flags (YESCRYPT_RW | YESCRYPT_ROUNDS_6 | YESCRYPT_GATHER_4 | YESCRYPT_SIMPLE_2 | YESCRYPT_SBOX_12K) +#endif +#ifndef flags___YESCRYPT_RW +#define flags___YESCRYPT_RW ((void)flags, YESCRYPT_RW) +#endif +#ifndef flags___YESCRYPT_MODE_MASK +#define flags___YESCRYPT_MODE_MASK ((void)flags, YESCRYPT_RW) +#endif +#ifndef YCTX_param_N +#define YCTX_param_N 4096 +#endif +#ifndef YCTX_param_r +#define YCTX_param_r 32 +#endif +#ifndef YCTX_param_p +#define YCTX_param_p 1 +#endif +#ifndef YCTX_param_t +#define YCTX_param_t 0 +#endif +#ifndef YCTX_param_g +#define YCTX_param_g 0 +#endif +#ifndef YCTX_param_NROM +#define YCTX_param_NROM 0 +#endif /** * yescrypt_r(shared, local, passwd, passwdlen, setting, key, buf, buflen): diff --git a/testsuite/cryptpw.tests b/testsuite/cryptpw.tests index a17123218..739fb4e9f 100755 --- a/testsuite/cryptpw.tests +++ b/testsuite/cryptpw.tests @@ -60,7 +60,7 @@ testing 'cryptpw yescrypt' \ 'cryptpw -m yescrypt qweRTY123@-+ j9T\$123456789012345678901234' \ '$y$j9T$123456789012345678901234$AKxw5OX/T4jD.v./IW.5tE/j7izNjw06fg3OvH1LsN9\n' \ '' '' -testing 'cryptpw yescrypt with non-standard cost 4 instead of 5 (j8T instead of j9T)' \ +testing 'cryptpw yescrypt with non-standard N=2048 instead of 4096 (j8T instead of j9T)' \ 'cryptpw -m yescrypt qweRTY123@-+ j8T\$123456789012345678901234' \ '$y$j8T$123456789012345678901234$JQUUfopCxlfZNE8f.THJwbOkhy.XtB3GIjo9HUVioWB\n' \ '' '' -- cgit v1.2.3-55-g6feb From 63025e8bca426f1013ac0ae7ef103bf8a7f71622 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Mon, 7 Jul 2025 10:50:20 +0200 Subject: libbb/yescrypt: remove forgotten SHA256 HMAC header file Signed-off-by: Denys Vlasenko --- libbb/yescrypt/alg-sha256.h | 31 ------------------------------- libbb/yescrypt/y.c | 1 - 2 files changed, 32 deletions(-) delete mode 100644 libbb/yescrypt/alg-sha256.h (limited to 'libbb') diff --git a/libbb/yescrypt/alg-sha256.h b/libbb/yescrypt/alg-sha256.h deleted file mode 100644 index 862f49dbe..000000000 --- a/libbb/yescrypt/alg-sha256.h +++ /dev/null @@ -1,31 +0,0 @@ -/*- - * Copyright 2005-2016 Colin Percival - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -/* Context structure for HMAC-SHA256 operations. */ -typedef struct { - sha256_ctx_t ictx; - sha256_ctx_t octx; -} HMAC_SHA256_CTX; diff --git a/libbb/yescrypt/y.c b/libbb/yescrypt/y.c index e7d447531..d5ab8903f 100644 --- a/libbb/yescrypt/y.c +++ b/libbb/yescrypt/y.c @@ -10,7 +10,6 @@ #include "libbb.h" #define YESCRYPT_INTERNAL -#include "alg-sha256.h" #include "alg-yescrypt.h" #include "alg-sha256.c" #include "alg-yescrypt-kdf.c" -- cgit v1.2.3-55-g6feb From 75758c73608f3c6be9ea2d338a199a8aa11c51e2 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Mon, 7 Jul 2025 17:08:32 +0200 Subject: libbb/yescrypt: accept longer salts (up to 84 chars) function old new delta cryptpw_main 214 223 +9 chpasswd_main 347 356 +9 passwd_main 931 934 +3 yescrypt_r 1084 1056 -28 ------------------------------------------------------------------------------ (add/remove: 0/0 grow/shrink: 3/1 up/down: 21/-28) Total: -7 bytes Signed-off-by: Denys Vlasenko --- include/libbb.h | 5 ++- libbb/yescrypt/alg-yescrypt-common.c | 76 +++++++++++++++++++++++++++++++++++- libbb/yescrypt/alg-yescrypt.h | 5 +++ testsuite/cryptpw.tests | 4 ++ 4 files changed, 86 insertions(+), 4 deletions(-) (limited to 'libbb') diff --git a/include/libbb.h b/include/libbb.h index cbf723f7e..544ca3155 100644 --- a/include/libbb.h +++ b/include/libbb.h @@ -1819,9 +1819,10 @@ extern int crypt_make_rand64encoded(char *p, int cnt /*, int rnd*/) FAST_FUNC; * "$6$" * #define MAX_PW_SALT_LEN (3 + 16 + 1) * yescrypt: - * "$y$j9T$" + * "$y$" "$" + * (84 chars are ascii64-encoded 64 binary bytes) */ -#define MAX_PW_SALT_LEN (7 + 24 + 1) +#define MAX_PW_SALT_LEN (3 + 8*6 + 1 + 84 + 1) extern char* crypt_make_pw_salt(char p[MAX_PW_SALT_LEN], const char *algo) FAST_FUNC; /* Returns number of lines changed, or -1 on error */ diff --git a/libbb/yescrypt/alg-yescrypt-common.c b/libbb/yescrypt/alg-yescrypt-common.c index db6e098c7..1e896df64 100644 --- a/libbb/yescrypt/alg-yescrypt-common.c +++ b/libbb/yescrypt/alg-yescrypt-common.c @@ -63,6 +63,75 @@ fail: return NULL; } +#if 1 +static const uint8_t *decode64( + uint8_t *dst, size_t *dstlen, + const uint8_t *src, size_t srclen) +{ + size_t dstpos = 0; + + dbg_dec64("src:'%s' len:%d", src, (int)srclen); + for (;;) { + uint32_t c, value = 0; + int bits = 0; + while (srclen != 0) { + srclen--; + c = a2i64(*src); + if (c > 63) { /* bad ascii64 char, stop decoding at it */ + srclen = 0; + break; + } + src++; + value |= c << bits; + bits += 6; + if (bits == 24) /* got 4 chars */ + goto store; + } + /* we read entire src, or met a non-ascii64 char (such as "$") */ + if (bits == 0) + break; + /* else: we got last, partial bit block - store it */ + store: + dbg_dec64(" storing bits:%d v:%08x", bits, (int)SWAP_BE32(value)); //BE to see lsb first + while (dstpos < *dstlen) { + if (srclen == 0 && value == 0) { + /* Example: mkpasswd PWD '$y$j9T$123': + * the "123" is bits:18 value:03,51,00 + * is considered to be 2 bytes, not 3! + * + * '$y$j9T$zzz' in upstream fails outright (3rd byte isn't zero). + * IOW: for upstream, validity of salt depends on VALUE, + * not just size of salt. Which is a bug. + * The '$y$j9T$zzz.' salt is the same + * (it adds 6 zero msbits) but upstream works with it, + * thus '$y$j9T$zzz' should work too and give the same result. + */ + goto end; + } + dstpos++; + *dst++ = value; + value >>= 8; + bits -= 8; + if (bits <= 0) /* can get negative, if we e.g. had 6 bits */ + goto next; + } + dbg_dec64(" ERR: bits:%d dst[] is too small", bits); + goto fail; + next: + if (srclen == 0) + break; + } + end: + /* here, srclen is 0, no need to check */ + *dstlen = dstpos; + dbg_dec64("dec64: OK, dst[%d]", (int)dstpos); + return src; +fail: + *dstlen = 0; + return NULL; +} +#else +/* Buggy (and larger) original code */ static const uint8_t *decode64( uint8_t *dst, size_t *dstlen, const uint8_t *src, size_t srclen) @@ -87,6 +156,7 @@ static const uint8_t *decode64( break; if (bits < 12) /* must have at least one full byte */ goto fail; + dbg_dec64(" storing bits:%d v:%08x", (int)bits, (int)SWAP_BE32(value)); //BE to see lsb first while (dstpos++ < *dstlen) { *dst++ = value; value >>= 8; @@ -104,12 +174,14 @@ static const uint8_t *decode64( if (!srclen && dstpos <= *dstlen) { *dstlen = dstpos; + dbg_dec64("dec64: OK, dst[%d]", (int)dstpos); return src; } fail: - *dstlen = 0; + /* *dstlen = 0; - not needed, caller detects error by seeing NULL */ return NULL; } +#endif static char *encode64( char *dst, size_t dstlen, @@ -189,7 +261,7 @@ char *yescrypt_r( goto fail; if (*src != '$') { src = decode64_uint32(&u32, src, 1); - dbg("yescrypt has extended params:0x%x", (unsigned)have); + dbg("yescrypt has extended params:0x%x", (unsigned)u32); if (u32 & 1) src = decode64_uint32(&yctx->param.p, src, 2); if (u32 & 2) diff --git a/libbb/yescrypt/alg-yescrypt.h b/libbb/yescrypt/alg-yescrypt.h index 97475d89f..4554e3de3 100644 --- a/libbb/yescrypt/alg-yescrypt.h +++ b/libbb/yescrypt/alg-yescrypt.h @@ -33,6 +33,11 @@ # else # define dbg(...) bb_error_msg(__VA_ARGS__) # endif +# if 1 +# define dbg_dec64(...) ((void)0) +# else +# define dbg_dec64(...) bb_error_msg(__VA_ARGS__) +# endif #endif /** diff --git a/testsuite/cryptpw.tests b/testsuite/cryptpw.tests index 739fb4e9f..ef04e20d7 100755 --- a/testsuite/cryptpw.tests +++ b/testsuite/cryptpw.tests @@ -73,6 +73,10 @@ testing 'cryptpw yescrypt with 3-char salt' \ 'cryptpw -m yescrypt qweRTY123@-+ j9T\$123' \ '$y$j9T$123$A34DMIGUbUIo3bjx66Wtk2IFoREMIw6d49it25KQh2D\n' \ '' '' +testing 'cryptpw yescrypt with 84-char salt (max size)' \ + 'cryptpw -m yescrypt qweRTY123@-+ j9T\$123456789012345678901234567890123456789012345678901234567890123456789012345678901234' \ + '$y$j9T$123456789012345678901234567890123456789012345678901234567890123456789012345678901234$ubrUuPCpI97LIMlVMt/A0Mhs/kBK2UBJYcQSxEZSlz4\n' \ + '' '' testing 'cryptpw yescrypt implicit' \ 'cryptpw qweRTY123@-+ \$y\$j9T\$123456789012345678901234' \ '$y$j9T$123456789012345678901234$AKxw5OX/T4jD.v./IW.5tE/j7izNjw06fg3OvH1LsN9\n' \ -- cgit v1.2.3-55-g6feb From 479d8db99faa005783fc483d0830314d6156e51f Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Mon, 7 Jul 2025 17:26:47 +0200 Subject: libbb/yescrypt: fix salts ending in dots (corresponding to binary zeros) Signed-off-by: Denys Vlasenko --- libbb/yescrypt/alg-yescrypt-common.c | 2 +- testsuite/cryptpw.tests | 20 ++++++++++++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) (limited to 'libbb') diff --git a/libbb/yescrypt/alg-yescrypt-common.c b/libbb/yescrypt/alg-yescrypt-common.c index 1e896df64..262fe82fb 100644 --- a/libbb/yescrypt/alg-yescrypt-common.c +++ b/libbb/yescrypt/alg-yescrypt-common.c @@ -94,7 +94,7 @@ static const uint8_t *decode64( store: dbg_dec64(" storing bits:%d v:%08x", bits, (int)SWAP_BE32(value)); //BE to see lsb first while (dstpos < *dstlen) { - if (srclen == 0 && value == 0) { + if (srclen == 0 && value == 0 && bits < 8) { /* Example: mkpasswd PWD '$y$j9T$123': * the "123" is bits:18 value:03,51,00 * is considered to be 2 bytes, not 3! diff --git a/testsuite/cryptpw.tests b/testsuite/cryptpw.tests index ef04e20d7..ab8f32d8e 100755 --- a/testsuite/cryptpw.tests +++ b/testsuite/cryptpw.tests @@ -65,6 +65,9 @@ testing 'cryptpw yescrypt with non-standard N=2048 instead of 4096 (j8T instead '$y$j8T$123456789012345678901234$JQUUfopCxlfZNE8f.THJwbOkhy.XtB3GIjo9HUVioWB\n' \ '' '' # mkpasswd-5.6.2 allows short salts for yescrypt +# ...but there is a catch. Not all of them. +# The "partial" (not fitting in whole bytes) ascii64-encoded salt +# is a special case. For example, "$zzz" would not even work in upstream. testing 'cryptpw yescrypt with empty salt' \ 'cryptpw -m yescrypt qweRTY123@-+ j9T\$' \ '$y$j9T$$hpeksL94GXNRwnA00L3c8WFy0khFAUbCpBSak.N3Bp.\n' \ @@ -73,6 +76,23 @@ testing 'cryptpw yescrypt with 3-char salt' \ 'cryptpw -m yescrypt qweRTY123@-+ j9T\$123' \ '$y$j9T$123$A34DMIGUbUIo3bjx66Wtk2IFoREMIw6d49it25KQh2D\n' \ '' '' +# "." is not allowed in mkpasswd-5.6.2 +# .................................... +# ".." is decoded into one zero byte (not two) +testing 'cryptpw yescrypt with 2-char salt ".."' \ + 'cryptpw -m yescrypt qweRTY123@-+ j9T\$..' \ + '$y$j9T$..$yVHeOayxOGg6cHL3.dg10u7T.qSgySfLN3uhSVSLNn/\n' \ + '' '' +# "..." is decoded into two zero bytes (not three, not one) +testing 'cryptpw yescrypt with 3-char salt "..."' \ + 'cryptpw -m yescrypt qweRTY123@-+ j9T\$...' \ + '$y$j9T$...$xHvJ5USZ7hFyXYbOijtEOMfZRS23cWIxu2eIBXRymA5\n' \ + '' '' +# "...." is decoded into three zero bytes (no surprises here) +testing 'cryptpw yescrypt with 4-char salt "...."' \ + 'cryptpw -m yescrypt qweRTY123@-+ j9T\$....' \ + '$y$j9T$....$wOnauYL2/NEtr6YQi9pi8AtV7L57sEbVOAnWJIcP9q2\n' \ + '' '' testing 'cryptpw yescrypt with 84-char salt (max size)' \ 'cryptpw -m yescrypt qweRTY123@-+ j9T\$123456789012345678901234567890123456789012345678901234567890123456789012345678901234' \ '$y$j9T$123456789012345678901234567890123456789012345678901234567890123456789012345678901234$ubrUuPCpI97LIMlVMt/A0Mhs/kBK2UBJYcQSxEZSlz4\n' \ -- cgit v1.2.3-55-g6feb From 5e9b95ff40de7e9b3395e51d456647ae99b2e8b2 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Mon, 7 Jul 2025 17:42:35 +0200 Subject: cryptpw: -m sha512crypt must also be accepted Signed-off-by: Denys Vlasenko --- libbb/pw_encrypt.c | 2 +- testsuite/cryptpw.tests | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'libbb') diff --git a/libbb/pw_encrypt.c b/libbb/pw_encrypt.c index 4acc33039..3b2fea00d 100644 --- a/libbb/pw_encrypt.c +++ b/libbb/pw_encrypt.c @@ -30,7 +30,7 @@ char* FAST_FUNC crypt_make_pw_salt(char salt[MAX_PW_SALT_LEN], const char *algo) *salt_ptr++ = '$'; #if !ENABLE_USE_BB_CRYPT || ENABLE_USE_BB_CRYPT_SHA if ((algo[0]|0x20) == 's') { /* sha */ - salt[1] = '5' + (strcasecmp(algo, "sha512") == 0); + salt[1] = '5' + (strncasecmp(algo, "sha512", 6) == 0); len = 16 / 2; } #endif diff --git a/testsuite/cryptpw.tests b/testsuite/cryptpw.tests index ab8f32d8e..beac35efe 100755 --- a/testsuite/cryptpw.tests +++ b/testsuite/cryptpw.tests @@ -49,6 +49,10 @@ testing 'cryptpw sha512' \ 'cryptpw -m sha512 QWErty 123456789012345678901234567890' \ '$6$1234567890123456$KB7QqxFyqmJSWyQYcCuGeFukgz1bPQoipWZf7.9L7z3k8UNTXa6UikbKcUGDc2ANn7DOGmDaroxDgpK16w/RE0\n' \ '' '' +testing 'cryptpw sha512crypt' \ + 'cryptpw -m sha512crypt QWErty 123456789012345678901234567890' \ + '$6$1234567890123456$KB7QqxFyqmJSWyQYcCuGeFukgz1bPQoipWZf7.9L7z3k8UNTXa6UikbKcUGDc2ANn7DOGmDaroxDgpK16w/RE0\n' \ + '' '' testing 'cryptpw sha512 rounds=99999' \ 'cryptpw -m sha512 QWErty rounds=99999\$123456789012345678901234567890' \ '$6$rounds=99999$1234567890123456$BfF6gD6ZjUmwawH5QaAglYAxtU./yvsz0fcQ464l49aMI2DZW3j5ri28CrxK7riPWNpLuUpfaIdY751SBYKUH.\n' \ -- cgit v1.2.3-55-g6feb From e3484095f9fd48d771df4e49db8b6384b5219c21 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Mon, 7 Jul 2025 18:03:10 +0200 Subject: libbb/yescrypt: code shrink function old new delta yescrypt_r 1063 1048 -15 Signed-off-by: Denys Vlasenko --- libbb/yescrypt/alg-yescrypt-common.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'libbb') diff --git a/libbb/yescrypt/alg-yescrypt-common.c b/libbb/yescrypt/alg-yescrypt-common.c index 262fe82fb..99e8b1277 100644 --- a/libbb/yescrypt/alg-yescrypt-common.c +++ b/libbb/yescrypt/alg-yescrypt-common.c @@ -126,8 +126,8 @@ static const uint8_t *decode64( *dstlen = dstpos; dbg_dec64("dec64: OK, dst[%d]", (int)dstpos); return src; -fail: - *dstlen = 0; + fail: + /* *dstlen = 0; - not needed, caller detects error by seeing NULL */ return NULL; } #else @@ -177,7 +177,7 @@ static const uint8_t *decode64( dbg_dec64("dec64: OK, dst[%d]", (int)dstpos); return src; } -fail: + fail: /* *dstlen = 0; - not needed, caller detects error by seeing NULL */ return NULL; } -- cgit v1.2.3-55-g6feb From 67b98c7b1b4e0eeeacdd3086baf24248c8ceeee9 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Mon, 7 Jul 2025 18:28:56 +0200 Subject: libbb/yescrypt: no need to find salt-terminating "$" twice function old new delta yescrypt_r 1048 1029 -19 Signed-off-by: Denys Vlasenko --- libbb/yescrypt/alg-yescrypt-common.c | 26 ++++++++++---------------- 1 file changed, 10 insertions(+), 16 deletions(-) (limited to 'libbb') diff --git a/libbb/yescrypt/alg-yescrypt-common.c b/libbb/yescrypt/alg-yescrypt-common.c index 99e8b1277..dfcf32145 100644 --- a/libbb/yescrypt/alg-yescrypt-common.c +++ b/libbb/yescrypt/alg-yescrypt-common.c @@ -66,7 +66,7 @@ fail: #if 1 static const uint8_t *decode64( uint8_t *dst, size_t *dstlen, - const uint8_t *src, size_t srclen) + const uint8_t *src) { size_t dstpos = 0; @@ -74,11 +74,9 @@ static const uint8_t *decode64( for (;;) { uint32_t c, value = 0; int bits = 0; - while (srclen != 0) { - srclen--; + while (*src && *src != '$') { c = a2i64(*src); if (c > 63) { /* bad ascii64 char, stop decoding at it */ - srclen = 0; break; } src++; @@ -94,7 +92,7 @@ static const uint8_t *decode64( store: dbg_dec64(" storing bits:%d v:%08x", bits, (int)SWAP_BE32(value)); //BE to see lsb first while (dstpos < *dstlen) { - if (srclen == 0 && value == 0 && bits < 8) { + if (!(*src && *src != '$') && value == 0 && bits < 8) { /* Example: mkpasswd PWD '$y$j9T$123': * the "123" is bits:18 value:03,51,00 * is considered to be 2 bytes, not 3! @@ -118,7 +116,7 @@ static const uint8_t *decode64( dbg_dec64(" ERR: bits:%d dst[] is too small", bits); goto fail; next: - if (srclen == 0) + if (!*src || *src == '$') break; } end: @@ -214,8 +212,8 @@ char *yescrypt_r( yescrypt_ctx_t yctx[1]; unsigned char hashbin32[32]; char *dst; - const uint8_t *src, *saltstr, *saltend; - size_t need, prefixlen, saltstrlen; + const uint8_t *src, *saltend; + size_t need, prefixlen; uint32_t u32; memset(yctx, 0, sizeof(yctx)); @@ -280,17 +278,13 @@ char *yescrypt_r( goto fail; } - saltstr = src + 1; - src = (uint8_t *)strchrnul((char *)saltstr, '$'); - prefixlen = src - setting; /* len("$y$$") */ - saltstrlen = src - saltstr; /* len("") */ - /* src points to end of salt ('$' or NUL byte), won't be used past this point */ - yctx->saltlen = sizeof(yctx->salt); - saltend = decode64(yctx->salt, &yctx->saltlen, saltstr, saltstrlen); - if (saltend != saltstr + saltstrlen) + src++; /* now points to salt */ + saltend = decode64(yctx->salt, &yctx->saltlen, src); + if (!saltend || (*saltend != '\0' && *saltend != '$')) goto fail; /* salt[] is too small, or bad char during decode */ + prefixlen = saltend - setting; need = prefixlen + 1 + YESCRYPT_HASH_LEN + 1; if (need > buflen || need < prefixlen) goto fail; -- cgit v1.2.3-55-g6feb From 7798f651a48926636944d556a158a9b569a56367 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Mon, 7 Jul 2025 21:36:31 +0200 Subject: add libbb/yescrypt/PARAMETERS Signed-off-by: Denys Vlasenko --- libbb/yescrypt/PARAMETERS | 196 +++++++++++++++++++++++++++++++++++ libbb/yescrypt/alg-sha256.c | 9 +- libbb/yescrypt/alg-yescrypt-common.c | 3 +- 3 files changed, 202 insertions(+), 6 deletions(-) create mode 100644 libbb/yescrypt/PARAMETERS (limited to 'libbb') diff --git a/libbb/yescrypt/PARAMETERS b/libbb/yescrypt/PARAMETERS new file mode 100644 index 000000000..d9f5d24e6 --- /dev/null +++ b/libbb/yescrypt/PARAMETERS @@ -0,0 +1,196 @@ + Optimal yescrypt configuration. + +yescrypt is very flexible, but configuring it optimally is complicated. +Here are some guidelines to simplify near-optimal configuration. We +start by listing the parameters and their typical values, and then give +currently recommended parameter sets by use case. + + + Parameters and their typical values. + +Set flags (yescrypt flavor) to YESCRYPT_DEFAULTS to use the currently +recommended flavor. (Other flags values exist for compatibility and for +specialized cases where you think you know what you're doing.) + +Set N (block count) based on target memory usage and running time, as +well as on the value of r (block size in 128 byte units). N must be a +power of two. + +Set r (block size) to 8 (so that N is in KiB, which is convenient) or to +another small value (if more optimal or for fine-tuning of the total +size and/or running time). Reasonable values for r are from 8 to 96. + +Set p (parallelism) to 1 meaning no thread-level parallelism within one +computation of yescrypt. (Use of thread-level parallelism within +yescrypt makes sense for ROM initialization and for key derivation at +high memory usage, but usually not for password hashing where +parallelism is available through concurrent authentication attempts. +Don't use p > 1 unnecessarily.) + +Set t (time) to 0 to use the optimal running time for a given memory +usage. This will allow you to maximize the memory usage (the value of +N*r) while staying within your running time constraints. (Non-zero t +makes sense in specialized cases where you can't afford higher memory +usage but can afford more time.) + +Set g (upgrades) to 0 because there have been no hash upgrades yet. + +Set NROM (block count of ROM) to 0 unless you use a ROM (see below). +NROM must be a power of two. + + + Password hashing for user authentication, no ROM. + +Small and fast (memory usage 2 MiB, performance like bcrypt cost 2^5 - +latency 2-3 ms and throughput 10,000+ per second on a 16-core server): + +flags = YESCRYPT_DEFAULTS, N = 2048, r = 8, p = 1, t = 0, g = 0, NROM = 0 + +Large and slow (memory usage 16 MiB, performance like bcrypt cost 2^8 - +latency 10-30 ms and throughput 1000+ per second on a 16-core server): + +flags = YESCRYPT_DEFAULTS, N = 4096, r = 32, p = 1, t = 0, g = 0, NROM = 0 + +Of course, even heavier and slower settings are possible, if affordable. +Simply double the value of N as many times as needed. Since N must be a +power of two, you may use r (in the range of 8 to 32) or/and t (in the +range of 0 to 2) for fine-tuning the running time, but first bring N to +the maximum you can afford. If this feels too complicated, just use one +of the two parameter sets given above (preferably the second) as-is. + + + Password hashing for user authentication, with ROM. + +It's similar to the above, except that you need to adjust r, set NROM, +and initialize the ROM. + +First decide on a ROM size, such as making it a large portion of your +dedicated authentication servers' RAM sizes. Since NROM (block count) +must be a power of two, you might need to choose r (block size) based on +how your desired ROM size corresponds to a power of two. Also tuning +for performance on current hardware, you'll likely end up with r in the +range from slightly below 16 to 32. For example, to use 15/16 of a +server's 256 GiB RAM as ROM (thus, making it 240 GiB), you could use +r=15 or r=30. To use 23/24 of a server's 384 GiB RAM as ROM (thus, +making it 368 GiB), you'd use r=23. Then set NROM to your desired ROM +size in KiB divided by 128*r. Note that these examples might (or might +not) be too extreme, leaving little memory for the rest of the system. +You could as well opt for 7/8 with r=14 or 11/12 with r=11 or r=22. + +Note that higher r may make placing of ROM in e.g. NVMe flash memory +instead of in RAM more reasonable (or less unreasonable) than it would +have been with a lower r. If this is a concern as it relates to +possible attacks and you do not intend to ever do it defensively, you +might want to keep r lower (e.g., prefer r=15 over r=30 in the example +above, even if 30 performs slightly faster). + +Your adjustments to r, if you deviate from powers of two, will also +result in weirder memory usage per hash. Like 1.75 MiB at r=14 instead +of 2 MiB at r=8 that you would have used without a ROM. That's OK. + +For ROM initialization, which you do with yescrypt_init_shared(), use +the same r and NROM that you'd later use for password hashing, choose p +based on your servers' physical and/or logical CPU count (maybe +considering eventual upgrades as you won't be able to change this later, +but without going unnecessarily high - e.g., p=28, p=56, or p=112 make +sense on servers that currently have 28 physical / 56 logical CPUs), and +set the rest of the parameters to: + +flags = YESCRYPT_DEFAULTS, N = 0, t = 0, g = 0 + +N is set to 0 because it isn't relevant during ROM initialization (you +can use different values of N for hashing passwords with the same ROM). + +To keep the ROM in e.g. SysV shared memory and reuse it across your +authentication service restarts, you'd need to allocate the memory and +set the flags to "YESCRYPT_DEFAULTS | YESCRYPT_SHARED_PREALLOCATED". + +For actual password hashing, you'd use your chosen values for N, r, +NROM, and set the rest of the parameters to: + +flags = YESCRYPT_DEFAULTS, p = 1, t = 0, g = 0 + +Note that although you'd use a large p for ROM initialization, you +should use p=1 for actual password hashing like you would without a ROM. + +Do not forget to pass the ROM into the actual password hashing (and keep +r and NROM set accordingly). + +Since N must be a power of two and r is dependent on ROM size, you may +use t (in the range of 0 to 2) for fine-tuning the running time, but +first bring N to the maximum you can afford. + +If this feels too complicated, or even if it doesn't, please consider +engaging Openwall for your yescrypt deployment. We'd be happy to help. + + + Password-based key derivation. + +(Or rather passphrase-based.) + +Use settings similar to those for password hashing without a ROM, but +adjusted for higher memory usage and running time, and optionally with +thread-level parallelism. + +Small and fast (memory usage 128 MiB, running time under 100 ms on a +fast desktop): + +flags = YESCRYPT_DEFAULTS, N = 32768, r = 32, p = 1, t = 0, g = 0, NROM = 0 + +Large and fast (memory usage 1 GiB, running time under 200 ms on a fast +quad-core desktop not including memory allocation overhead, under 250 ms +with the overhead included), but requires build with OpenMP support (or +otherwise will run as slow as yet be weaker than its p=1 alternative): + +flags = YESCRYPT_DEFAULTS, N = 262144, r = 32, p = 4, t = 0, g = 0, NROM = 0 + +Large and slower (memory usage 1 GiB, running time under 300 ms on a +fast quad-core desktop not including memory allocation overhead, under +350 ms with the overhead included), also requires build with OpenMP +support (or otherwise will run slower than the p=1 alternative below): + +flags = YESCRYPT_DEFAULTS, N = 262144, r = 32, p = 4, t = 2, g = 0, NROM = 0 + +Large and slow (memory usage 1 GiB, running time under 600 ms on a fast +desktop not including memory allocation overhead, under 650 ms with the +overhead included): + +flags = YESCRYPT_DEFAULTS, N = 262144, r = 32, p = 1, t = 0, g = 0, NROM = 0 + +Just like with password hashing, even heavier and slower settings are +possible, if affordable, and you achieve them by adjusting N, r, t in +the same way and in the same preferred ranges (please see the section on +password hashing without a ROM, above). Unlike with password hashing, +it makes some sense to go above t=2 if you expect that your users might +not be able to afford more memory but can afford more time. However, +increasing the memory usage provides better protection, and we don't +recommend forcing your users to wait for more than 1 second as they +could as well type more characters in that time. If this feels too +complicated, just use one of the above parameter sets as-is. + + + Amortization of memory allocation overhead. + +It takes a significant fraction of yescrypt's total running time to +allocate memory from the operating system, especially considering that +the kernel zeroizes the memory before handing it over to your program. + +Unless you naturally need to compute yescrypt just once per process, you +may achieve greater efficiency by fully using advanced yescrypt APIs +that let you preserve and reuse the memory allocation across yescrypt +invocations. This is done by reusing the structure pointed to by the +"yescrypt_local_t *local" argument of yescrypt_r() or yescrypt_kdf() +without calling yescrypt_free_local() inbetween the repeated invocations +of yescrypt. + + + YESCRYPT_DEFAULTS macro. + +Please note that the value of the YESCRYPT_DEFAULTS macro might change +later, so if you use the macro like it's recommended here then for +results reproducible across versions you might need to store its value +somewhere along with the hashes or the encrypted data. + +If you use yescrypt's standard hash string encoding, then yescrypt +already encodes and decodes this value for you, so you don't need to +worry about this. diff --git a/libbb/yescrypt/alg-sha256.c b/libbb/yescrypt/alg-sha256.c index 1ccffa1e5..25446406b 100644 --- a/libbb/yescrypt/alg-sha256.c +++ b/libbb/yescrypt/alg-sha256.c @@ -36,7 +36,7 @@ PBKDF2_SHA256(const uint8_t *passwd, size_t passwdlen, uint64_t c, uint8_t *buf, size_t dkLen) { hmac_ctx_t Phctx, PShctx; - size_t i; + uint32_t i; /* Compute HMAC state after processing P. */ hmac_begin(&Phctx, passwd, passwdlen, sha256_begin); @@ -46,7 +46,7 @@ PBKDF2_SHA256(const uint8_t *passwd, size_t passwdlen, hmac_hash(&PShctx, salt, saltlen); /* Iterate through the blocks. */ - for (i = 0; dkLen != 0; i++) { + for (i = 0; dkLen != 0; ) { uint64_t U[32 / 8]; uint64_t T[32 / 8]; uint64_t j; @@ -54,8 +54,9 @@ PBKDF2_SHA256(const uint8_t *passwd, size_t passwdlen, size_t clen; int k; - /* Generate INT(i + 1). */ - ivec = SWAP_BE32((uint32_t)(i + 1)); + /* Generate INT(i). */ + i++; + ivec = SWAP_BE32(i); /* Compute U_1 = PRF(P, S || INT(i)). */ hmac_peek_hash(&PShctx, (void*)T, &ivec, 4, NULL); diff --git a/libbb/yescrypt/alg-yescrypt-common.c b/libbb/yescrypt/alg-yescrypt-common.c index dfcf32145..5d8be587a 100644 --- a/libbb/yescrypt/alg-yescrypt-common.c +++ b/libbb/yescrypt/alg-yescrypt-common.c @@ -92,7 +92,7 @@ static const uint8_t *decode64( store: dbg_dec64(" storing bits:%d v:%08x", bits, (int)SWAP_BE32(value)); //BE to see lsb first while (dstpos < *dstlen) { - if (!(*src && *src != '$') && value == 0 && bits < 8) { + if ((!*src || *src == '$') && value == 0 && bits < 8) { /* Example: mkpasswd PWD '$y$j9T$123': * the "123" is bits:18 value:03,51,00 * is considered to be 2 bytes, not 3! @@ -120,7 +120,6 @@ static const uint8_t *decode64( break; } end: - /* here, srclen is 0, no need to check */ *dstlen = dstpos; dbg_dec64("dec64: OK, dst[%d]", (int)dstpos); return src; -- cgit v1.2.3-55-g6feb From e5d3a87633eac2a8a17d909b98a1e6dd21f80489 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Mon, 7 Jul 2025 21:52:39 +0200 Subject: libbb/yescrypt: 64-bit build fixes Signed-off-by: Denys Vlasenko --- libbb/yescrypt/alg-yescrypt-kdf.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) (limited to 'libbb') diff --git a/libbb/yescrypt/alg-yescrypt-kdf.c b/libbb/yescrypt/alg-yescrypt-kdf.c index 13ae62b7c..d24b05150 100644 --- a/libbb/yescrypt/alg-yescrypt-kdf.c +++ b/libbb/yescrypt/alg-yescrypt-kdf.c @@ -823,6 +823,7 @@ static int yescrypt_kdf32_body( salsa20_blk_t *V, *XY; uint8_t sha256[32]; uint8_t dk[sizeof(sha256)], *dkp = buf32; + uint32_t r, p; /* Sanity-check parameters */ switch (flags___YESCRYPT_MODE_MASK) { @@ -849,13 +850,9 @@ static int yescrypt_kdf32_body( default: goto out_EINVAL; } -#if SIZE_MAX > UINT32_MAX - if (buflen > (((uint64_t)1 << 32) - 1) * 32) - goto out_EINVAL; -#endif - { - const uint32_t r = YCTX_param_r; - const uint32_t p = YCTX_param_p; + + r = YCTX_param_r; + p = YCTX_param_p; if ((uint64_t)r * (uint64_t)p >= 1 << 30) goto out_EINVAL; if (N > UINT32_MAX) @@ -982,7 +979,6 @@ static int yescrypt_kdf32_body( out_EINVAL: errno = EINVAL; return -1; - } } /** -- cgit v1.2.3-55-g6feb From f8e9bd30d73f2acf6818da71a2ba44748151b716 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Mon, 7 Jul 2025 22:34:31 +0200 Subject: libbb/yescrypt: disable unrolling in two places Also, make many define macros safer function old new delta blockmix 2300 814 -1486 blockmix_xor 4606 1543 -3063 blockmix_xor_save 4737 1620 -3117 ------------------------------------------------------------------------------ (add/remove: 0/0 grow/shrink: 0/3 up/down: 0/-7666) Total: -7666 bytes Signed-off-by: Denys Vlasenko --- libbb/yescrypt/alg-yescrypt-kdf.c | 255 ++++++++++++++++++++++++-------------- 1 file changed, 159 insertions(+), 96 deletions(-) (limited to 'libbb') diff --git a/libbb/yescrypt/alg-yescrypt-kdf.c b/libbb/yescrypt/alg-yescrypt-kdf.c index d24b05150..ab095eae1 100644 --- a/libbb/yescrypt/alg-yescrypt-kdf.c +++ b/libbb/yescrypt/alg-yescrypt-kdf.c @@ -42,6 +42,15 @@ #define unlikely(exp) (exp) #endif +// Not a size win if 0 +#define UNROLL_COPY 1 + +// -5324 bytes if 0: +#define UNROLL_PWXFORM_ROUND 0 +// -4864 bytes if 0: +#define UNROLL_PWXFORM 0 +// both 0: -7666 bytes + typedef union { uint32_t w[16]; uint64_t d[8]; @@ -52,15 +61,17 @@ static void salsa20_simd_shuffle( salsa20_blk_t *Bout) { #define COMBINE(out, in1, in2) \ - Bout->d[out] = Bin->w[in1 * 2] | ((uint64_t)Bin->w[in2 * 2 + 1] << 32); - COMBINE(0, 0, 2) - COMBINE(1, 5, 7) - COMBINE(2, 2, 4) - COMBINE(3, 7, 1) - COMBINE(4, 4, 6) - COMBINE(5, 1, 3) - COMBINE(6, 6, 0) - COMBINE(7, 3, 5) +do { \ + Bout->d[out] = Bin->w[in1 * 2] | ((uint64_t)Bin->w[in2 * 2 + 1] << 32); \ +} while (0) + COMBINE(0, 0, 2); + COMBINE(1, 5, 7); + COMBINE(2, 2, 4); + COMBINE(3, 7, 1); + COMBINE(4, 4, 6); + COMBINE(5, 1, 3); + COMBINE(6, 6, 0); + COMBINE(7, 3, 5); #undef COMBINE } @@ -69,25 +80,29 @@ static void salsa20_simd_unshuffle( salsa20_blk_t *Bout) { #define UNCOMBINE(out, in1, in2) \ +do { \ Bout->w[out * 2] = Bin->d[in1]; \ - Bout->w[out * 2 + 1] = Bin->d[in2] >> 32; - UNCOMBINE(0, 0, 6) - UNCOMBINE(1, 5, 3) - UNCOMBINE(2, 2, 0) - UNCOMBINE(3, 7, 5) - UNCOMBINE(4, 4, 2) - UNCOMBINE(5, 1, 7) - UNCOMBINE(6, 6, 4) - UNCOMBINE(7, 3, 1) + Bout->w[out * 2 + 1] = Bin->d[in2] >> 32; \ +} while (0) + UNCOMBINE(0, 0, 6); + UNCOMBINE(1, 5, 3); + UNCOMBINE(2, 2, 0); + UNCOMBINE(3, 7, 5); + UNCOMBINE(4, 4, 2); + UNCOMBINE(5, 1, 7); + UNCOMBINE(6, 6, 4); + UNCOMBINE(7, 3, 1); #undef UNCOMBINE } #define DECL_X \ - salsa20_blk_t X; + salsa20_blk_t X #define DECL_Y \ - salsa20_blk_t Y; + salsa20_blk_t Y +#if UNROLL_COPY #define COPY(out, in) \ +do { \ (out).d[0] = (in).d[0]; \ (out).d[1] = (in).d[1]; \ (out).d[2] = (in).d[2]; \ @@ -95,9 +110,17 @@ static void salsa20_simd_unshuffle( (out).d[4] = (in).d[4]; \ (out).d[5] = (in).d[5]; \ (out).d[6] = (in).d[6]; \ - (out).d[7] = (in).d[7]; + (out).d[7] = (in).d[7]; \ +} while (0) +#else +#define COPY(out, in) \ +do { \ + for (int copyi=0; copyi<8; copyi++) \ + (out).d[copyi] = (in).d[copyi]; \ +} while (0) +#endif -#define READ_X(in) COPY(X, in) +#define READ_X(in) COPY(X, in) #define WRITE_X(out) COPY(out, X) /** @@ -154,7 +177,6 @@ static void salsa20(salsa20_blk_t *restrict B, B->w[i + 3] = Bout->w[i + 3] += B->w[i + 3]; } } - #if 0 /* Too expensive */ explicit_bzero(&X, sizeof(X)); @@ -165,9 +187,10 @@ static void salsa20(salsa20_blk_t *restrict B, * Apply the Salsa20/2 core to the block provided in X. */ #define SALSA20_2(out) \ - salsa20(&X, &out, 1); + salsa20(&X, &out, 1) #define XOR(out, in1, in2) \ +do { \ (out).d[0] = (in1).d[0] ^ (in2).d[0]; \ (out).d[1] = (in1).d[1] ^ (in2).d[1]; \ (out).d[2] = (in1).d[2] ^ (in2).d[2]; \ @@ -175,23 +198,28 @@ static void salsa20(salsa20_blk_t *restrict B, (out).d[4] = (in1).d[4] ^ (in2).d[4]; \ (out).d[5] = (in1).d[5] ^ (in2).d[5]; \ (out).d[6] = (in1).d[6] ^ (in2).d[6]; \ - (out).d[7] = (in1).d[7] ^ (in2).d[7]; + (out).d[7] = (in1).d[7] ^ (in2).d[7]; \ +} while (0) -#define XOR_X(in) XOR(X, X, in) +#define XOR_X(in) XOR(X, X, in) #define XOR_X_2(in1, in2) XOR(X, in1, in2) #define XOR_X_WRITE_XOR_Y_2(out, in) \ - XOR(Y, out, in) \ - COPY(out, Y) \ - XOR(X, X, Y) +do { \ + XOR(Y, out, in); \ + COPY(out, Y); \ + XOR(X, X, Y); \ +} while (0) /** * Apply the Salsa20/8 core to the block provided in X ^ in. */ #define SALSA20_8_XOR_MEM(in, out) \ +do { \ XOR_X(in); \ - salsa20(&X, &out, 4); + salsa20(&X, &out, 4); \ +} while (0) -#define INTEGERIFY (uint32_t)X.d[0] +#define INTEGERIFY ((uint32_t)X.d[0]) /** * blockmix_salsa8(Bin, Bout, r): @@ -204,12 +232,12 @@ static void blockmix_salsa8( size_t r) { size_t i; - DECL_X + DECL_X; - READ_X(Bin[r * 2 - 1]) + READ_X(Bin[r * 2 - 1]); for (i = 0; i < r; i++) { - SALSA20_8_XOR_MEM(Bin[i * 2], Bout[i]) - SALSA20_8_XOR_MEM(Bin[i * 2 + 1], Bout[r + i]) + SALSA20_8_XOR_MEM(Bin[i * 2], Bout[i]); + SALSA20_8_XOR_MEM(Bin[i * 2 + 1], Bout[r + i]); } } @@ -220,14 +248,14 @@ static uint32_t blockmix_salsa8_xor( size_t r) { size_t i; - DECL_X + DECL_X; - XOR_X_2(Bin1[r * 2 - 1], Bin2[r * 2 - 1]) + XOR_X_2(Bin1[r * 2 - 1], Bin2[r * 2 - 1]); for (i = 0; i < r; i++) { - XOR_X(Bin1[i * 2]) - SALSA20_8_XOR_MEM(Bin2[i * 2], Bout[i]) - XOR_X(Bin1[i * 2 + 1]) - SALSA20_8_XOR_MEM(Bin2[i * 2 + 1], Bout[r + i]) + XOR_X(Bin1[i * 2]); + SALSA20_8_XOR_MEM(Bin2[i * 2], Bout[i]); + XOR_X(Bin1[i * 2 + 1]); + SALSA20_8_XOR_MEM(Bin2[i * 2 + 1], Bout[r + i]); } return INTEGERIFY; @@ -242,27 +270,38 @@ static uint32_t blockmix_salsa8_xor( /* Derived values. Not tunable except via Swidth above. */ #define PWXbytes (PWXgather * PWXsimple * 8) -#define Sbytes (3 * (1 << Swidth) * PWXsimple * 8) -#define Smask (((1 << Swidth) - 1) * PWXsimple * 8) -#define Smask2 (((uint64_t)Smask << 32) | Smask) +#define Sbytes (3 * (1 << Swidth) * PWXsimple * 8) +#define Smask (((1 << Swidth) - 1) * PWXsimple * 8) +#define Smask2 (((uint64_t)Smask << 32) | Smask) -#define DECL_SMASK2REG /* empty */ -#define FORCE_REGALLOC_3 /* empty */ -#define MAYBE_MEMORY_BARRIER /* empty */ +#define DECL_SMASK2REG do {} while (0) +#define FORCE_REGALLOC_3 do {} while (0) +#define MAYBE_MEMORY_BARRIER do {} while (0) -#define PWXFORM_SIMD(x0, x1) { \ +#define PWXFORM_SIMD(x0, x1) \ +do { \ uint64_t x = x0 & Smask2; \ uint64_t *p0 = (uint64_t *)(S0 + (uint32_t)x); \ uint64_t *p1 = (uint64_t *)(S1 + (x >> 32)); \ x0 = ((x0 >> 32) * (uint32_t)x0 + p0[0]) ^ p1[0]; \ x1 = ((x1 >> 32) * (uint32_t)x1 + p0[1]) ^ p1[1]; \ -} +} while (0) +#if UNROLL_PWXFORM_ROUND +#define PWXFORM_ROUND \ +do { \ + PWXFORM_SIMD(X.d[0], X.d[1]); \ + PWXFORM_SIMD(X.d[2], X.d[3]); \ + PWXFORM_SIMD(X.d[4], X.d[5]); \ + PWXFORM_SIMD(X.d[6], X.d[7]); \ +} while (0) +#else #define PWXFORM_ROUND \ - PWXFORM_SIMD(X.d[0], X.d[1]) \ - PWXFORM_SIMD(X.d[2], X.d[3]) \ - PWXFORM_SIMD(X.d[4], X.d[5]) \ - PWXFORM_SIMD(X.d[6], X.d[7]) +do { \ + for (int pwxi=0; pwxi<8; pwxi+=2) \ + PWXFORM_SIMD(X.d[pwxi], X.d[pwxi + 1]); \ +} while (0) +#endif /* * This offset helps address the 256-byte write block via the single-byte @@ -275,19 +314,23 @@ static uint32_t blockmix_salsa8_xor( #define PWXFORM_WRITE_OFFSET 0x7c #define PWXFORM_WRITE \ - WRITE_X(*(salsa20_blk_t *)(Sw - PWXFORM_WRITE_OFFSET)) \ - Sw += 64; - -#define PWXFORM { \ +do { \ + WRITE_X(*(salsa20_blk_t *)(Sw - PWXFORM_WRITE_OFFSET)); \ + Sw += 64; \ +} while (0) + +#if UNROLL_PWXFORM +#define PWXFORM \ +do { \ uint8_t *Sw = S2 + w + PWXFORM_WRITE_OFFSET; \ - FORCE_REGALLOC_3 \ - MAYBE_MEMORY_BARRIER \ - PWXFORM_ROUND \ - PWXFORM_ROUND PWXFORM_WRITE \ - PWXFORM_ROUND PWXFORM_WRITE \ - PWXFORM_ROUND PWXFORM_WRITE \ - PWXFORM_ROUND PWXFORM_WRITE \ - PWXFORM_ROUND \ + FORCE_REGALLOC_3; \ + MAYBE_MEMORY_BARRIER; \ + PWXFORM_ROUND; \ + PWXFORM_ROUND; PWXFORM_WRITE; \ + PWXFORM_ROUND; PWXFORM_WRITE; \ + PWXFORM_ROUND; PWXFORM_WRITE; \ + PWXFORM_ROUND; PWXFORM_WRITE; \ + PWXFORM_ROUND; \ w = (w + 64 * 4) & Smask2; \ { \ uint8_t *Stmp = S2; \ @@ -295,7 +338,27 @@ static uint32_t blockmix_salsa8_xor( S1 = S0; \ S0 = Stmp; \ } \ -} +} while (0) +#else +#define PWXFORM \ +do { \ + uint8_t *Sw = S2 + w + PWXFORM_WRITE_OFFSET; \ + FORCE_REGALLOC_3; \ + MAYBE_MEMORY_BARRIER; \ + PWXFORM_ROUND; \ + for (int pwxj=0; pwxj<4; pwxj++) {\ + PWXFORM_ROUND; PWXFORM_WRITE; \ + } \ + PWXFORM_ROUND; \ + w = (w + 64 * 4) & Smask2; \ + { \ + uint8_t *Stmp = S2; \ + S2 = S1; \ + S1 = S0; \ + S0 = Stmp; \ + } \ +} while (0) +#endif typedef struct { uint8_t *S0, *S1, *S2; @@ -318,29 +381,29 @@ static void blockmix( uint8_t *S0 = ctx->S0, *S1 = ctx->S1, *S2 = ctx->S2; size_t w = ctx->w; size_t i; - DECL_X + DECL_X; /* Convert count of 128-byte blocks to max index of 64-byte block */ r = r * 2 - 1; - READ_X(Bin[r]) + READ_X(Bin[r]); - DECL_SMASK2REG + DECL_SMASK2REG; i = 0; do { - XOR_X(Bin[i]) - PWXFORM + XOR_X(Bin[i]); + PWXFORM; if (unlikely(i >= r)) break; - WRITE_X(Bout[i]) + WRITE_X(Bout[i]); i++; } while (1); ctx->S0 = S0; ctx->S1 = S1; ctx->S2 = S2; ctx->w = w; - SALSA20_2(Bout[i]) + SALSA20_2(Bout[i]); } static uint32_t blockmix_xor(const salsa20_blk_t *Bin1, @@ -352,31 +415,31 @@ static uint32_t blockmix_xor(const salsa20_blk_t *Bin1, uint8_t *S0 = ctx->S0, *S1 = ctx->S1, *S2 = ctx->S2; size_t w = ctx->w; size_t i; - DECL_X + DECL_X; /* Convert count of 128-byte blocks to max index of 64-byte block */ r = r * 2 - 1; - XOR_X_2(Bin1[r], Bin2[r]) + XOR_X_2(Bin1[r], Bin2[r]); - DECL_SMASK2REG + DECL_SMASK2REG; i = 0; r--; do { - XOR_X(Bin1[i]) - XOR_X(Bin2[i]) - PWXFORM - WRITE_X(Bout[i]) + XOR_X(Bin1[i]); + XOR_X(Bin2[i]); + PWXFORM; + WRITE_X(Bout[i]); - XOR_X(Bin1[i + 1]) - XOR_X(Bin2[i + 1]) - PWXFORM + XOR_X(Bin1[i + 1]); + XOR_X(Bin2[i + 1]); + PWXFORM; if (unlikely(i >= r)) break; - WRITE_X(Bout[i + 1]) + WRITE_X(Bout[i + 1]); i += 2; } while (1); @@ -385,7 +448,7 @@ static uint32_t blockmix_xor(const salsa20_blk_t *Bin1, ctx->S0 = S0; ctx->S1 = S1; ctx->S2 = S2; ctx->w = w; - SALSA20_2(Bout[i]) + SALSA20_2(Bout[i]); return INTEGERIFY; } @@ -399,30 +462,30 @@ static uint32_t blockmix_xor_save( uint8_t *S0 = ctx->S0, *S1 = ctx->S1, *S2 = ctx->S2; size_t w = ctx->w; size_t i; - DECL_X - DECL_Y + DECL_X; + DECL_Y; /* Convert count of 128-byte blocks to max index of 64-byte block */ r = r * 2 - 1; - XOR_X_2(Bin1out[r], Bin2[r]) + XOR_X_2(Bin1out[r], Bin2[r]); - DECL_SMASK2REG + DECL_SMASK2REG; i = 0; r--; do { - XOR_X_WRITE_XOR_Y_2(Bin2[i], Bin1out[i]) - PWXFORM - WRITE_X(Bin1out[i]) + XOR_X_WRITE_XOR_Y_2(Bin2[i], Bin1out[i]); + PWXFORM; + WRITE_X(Bin1out[i]); - XOR_X_WRITE_XOR_Y_2(Bin2[i + 1], Bin1out[i + 1]) - PWXFORM + XOR_X_WRITE_XOR_Y_2(Bin2[i + 1], Bin1out[i + 1]); + PWXFORM; if (unlikely(i >= r)) break; - WRITE_X(Bin1out[i + 1]) + WRITE_X(Bin1out[i + 1]); i += 2; } while (1); @@ -431,7 +494,7 @@ static uint32_t blockmix_xor_save( ctx->S0 = S0; ctx->S1 = S1; ctx->S2 = S2; ctx->w = w; - SALSA20_2(Bin1out[i]) + SALSA20_2(Bin1out[i]); return INTEGERIFY; } -- cgit v1.2.3-55-g6feb From 8466c3e78fa10d1a3e2bf1a75657fd6d1f4aec30 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Mon, 7 Jul 2025 23:07:58 +0200 Subject: libbb/yescrypt: madvise(MADV_HUGEPAGE) our usually very large allocation Nearly ~2 faster run when buffer is gigabytes in size function old new delta yescrypt_kdf32_body 1386 1406 +20 Signed-off-by: Denys Vlasenko --- libbb/yescrypt/alg-yescrypt-kdf.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) (limited to 'libbb') diff --git a/libbb/yescrypt/alg-yescrypt-kdf.c b/libbb/yescrypt/alg-yescrypt-kdf.c index ab095eae1..4c2cfe849 100644 --- a/libbb/yescrypt/alg-yescrypt-kdf.c +++ b/libbb/yescrypt/alg-yescrypt-kdf.c @@ -832,14 +832,25 @@ static void smix(uint8_t *B, size_t r, uint32_t N, uint32_t p, uint32_t t, static void alloc_region(yescrypt_region_t *region, size_t size) { + uint8_t *base; int flags = # ifdef MAP_NOCORE /* huh? */ MAP_NOCORE | # endif MAP_ANON | MAP_PRIVATE; - uint8_t *base = mmap(NULL, size, PROT_READ | PROT_WRITE, flags, -1, 0); + + base = mmap(NULL, size, PROT_READ | PROT_WRITE, flags, -1, 0); if (base == MAP_FAILED) bb_die_memory_exhausted(); + +#if defined(MADV_HUGEPAGE) + /* Reduces mkpasswd qweRTY123@-+ '$y$jHT$123' + * (which allocates 4 Gbytes) + * run time from 10.543s to 5.635s + * Seen on linux-5.18.0. + */ + madvise(base, size, MADV_HUGEPAGE); +#endif //region->base = base; //region->base_size = size; region->aligned = base; @@ -960,7 +971,7 @@ static int yescrypt_kdf32_body( if (yctx->local->aligned_size < need) { free_region(yctx->local); alloc_region(yctx->local, need); - dbg("allocated local:%u 0x%x", need, need); + dbg("allocated local:%lu 0x%lx", (long)need, (long)need); /* standard "j9T" params allocate 16Mbytes here */ } if (flags & YESCRYPT_ALLOC_ONLY) -- cgit v1.2.3-55-g6feb From d18ac080e4bb7d63e0ec0dea16bacc6ac455f390 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Tue, 8 Jul 2025 00:14:24 +0200 Subject: libbb/yescrypt: code shrink Setting EINVAL in errno is not necessary, just error return works. function old new delta yescrypt_kdf32_body 1434 1423 -11 yescrypt_r 1029 990 -39 ------------------------------------------------------------------------------ (add/remove: 0/0 grow/shrink: 0/2 up/down: 0/-50) Total: -50 bytes Signed-off-by: Denys Vlasenko --- libbb/yescrypt/alg-yescrypt-common.c | 4 ++- libbb/yescrypt/alg-yescrypt-kdf.c | 64 ++++++++++++++++++++++++++---------- 2 files changed, 49 insertions(+), 19 deletions(-) (limited to 'libbb') diff --git a/libbb/yescrypt/alg-yescrypt-common.c b/libbb/yescrypt/alg-yescrypt-common.c index 5d8be587a..e5d045058 100644 --- a/libbb/yescrypt/alg-yescrypt-common.c +++ b/libbb/yescrypt/alg-yescrypt-common.c @@ -288,8 +288,10 @@ char *yescrypt_r( if (need > buflen || need < prefixlen) goto fail; - if (yescrypt_kdf32(yctx, passwd, passwdlen, hashbin32)) + if (yescrypt_kdf32(yctx, passwd, passwdlen, hashbin32)) { + dbg("error in yescrypt_kdf32"); goto fail; + } dst = mempcpy(buf, setting, prefixlen); *dst++ = '$'; diff --git a/libbb/yescrypt/alg-yescrypt-kdf.c b/libbb/yescrypt/alg-yescrypt-kdf.c index 4c2cfe849..c998de51d 100644 --- a/libbb/yescrypt/alg-yescrypt-kdf.c +++ b/libbb/yescrypt/alg-yescrypt-kdf.c @@ -927,22 +927,42 @@ static int yescrypt_kdf32_body( r = YCTX_param_r; p = YCTX_param_p; - if ((uint64_t)r * (uint64_t)p >= 1 << 30) + if ((uint64_t)r * (uint64_t)p >= 1 << 30) { + dbg("r * n >= 2^30"); goto out_EINVAL; - if (N > UINT32_MAX) + } + if (N > UINT32_MAX) { + dbg("N > 0x%lx", (long)UINT32_MAX); goto out_EINVAL; - if ((N & (N - 1)) != 0 || N <= 3 || r < 1 || p < 1) + } + if ((N & (N - 1)) != 0 +//TODO: ^^^^^^^^^^^^^^^^^^^^^^ do not check this, code guarantees power-of-2 + || N <= 3 + || r < 1 + || p < 1 + ) { + dbg("bad N, r or p"); goto out_EINVAL; - if (r > SIZE_MAX / 256 / p || - N > SIZE_MAX / 128 / r) + } + if (r > SIZE_MAX / 256 / p + || N > SIZE_MAX / 128 / r + ) { + /* 32-bit testcase: mkpasswd qweRTY123@-+ '$y$jHT$123' + * (works on 64-bit, needs buffer > 4Gbytes) + */ + dbg("r > SIZE_MAX / 256 / p? %c", "NY"[r > SIZE_MAX / 256 / p]); + dbg("N > SIZE_MAX / 128 / r? %c", "NY"[N > SIZE_MAX / 128 / r]); goto out_EINVAL; + } if (flags___YESCRYPT_RW) { /* p cannot be greater than SIZE_MAX/Salloc on 64-bit systems, but it can on 32-bit systems. */ #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wtype-limits" - if (N / p <= 3 || p > SIZE_MAX / Salloc) + if (N / p <= 3 || p > SIZE_MAX / Salloc) { + dbg("bad p:%ld", (long)p); goto out_EINVAL; + } #pragma GCC diagnostic pop } @@ -956,17 +976,23 @@ static int yescrypt_kdf32_body( need = V_size; B_size = (size_t)128 * r * p; need += B_size; - if (need < B_size) + if (need < B_size) { + dbg("integer overflow at += B_size(%lu)", (long)B_size); goto out_EINVAL; + } XY_size = (size_t)256 * r; need += XY_size; - if (need < XY_size) + if (need < XY_size) { + dbg("integer overflow at += XY_size(%lu)", (long)XY_size); goto out_EINVAL; + } if (flags___YESCRYPT_RW) { size_t S_size = (size_t)Salloc * p; need += S_size; - if (need < S_size) + if (need < S_size) { + dbg("integer overflow at += S_size(%lu)", (long)S_size); goto out_EINVAL; + } } if (yctx->local->aligned_size < need) { free_region(yctx->local); @@ -1050,8 +1076,8 @@ static int yescrypt_kdf32_body( /* Success! */ return 0; -out_EINVAL: - errno = EINVAL; + out_EINVAL: + //bbox does not need this: errno = EINVAL; return -1; } @@ -1079,7 +1105,7 @@ int yescrypt_kdf32( /* Support for hash upgrades has been temporarily removed */ if (g) { - errno = EINVAL; + //bbox does not need this: errno = EINVAL; return -1; } @@ -1093,15 +1119,17 @@ int yescrypt_kdf32( flags | YESCRYPT_ALLOC_ONLY, N, t, buf32) != -3 ) { - errno = EINVAL; + dbg("yescrypt_kdf32_body: not -3"); return -1; } retval = yescrypt_kdf32_body(yctx, passwd, passwdlen, flags | YESCRYPT_PREHASH, N >> 6, 0, dk32); - if (retval) + if (retval) { + dbg("yescrypt_kdf32_body(PREHASH):%d", retval); return retval; + } passwd = dk32; passwdlen = sizeof(dk32); } @@ -1109,9 +1137,9 @@ int yescrypt_kdf32( retval = yescrypt_kdf32_body(yctx, passwd, passwdlen, flags, N, t, buf32); -#ifndef SKIP_MEMZERO - if (passwd == dk32) - explicit_bzero(dk32, sizeof(dk32)); -#endif + + explicit_bzero(dk32, sizeof(dk32)); + + dbg("yescrypt_kdf32_body:%d", retval); return retval; } -- cgit v1.2.3-55-g6feb From 78bd8a44b75cf39ea17c8a586ba35776d835551f Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Tue, 8 Jul 2025 03:36:17 +0200 Subject: libbb/yescrypt: explain and shrink decode64_uint32() function old new delta decode64_uint32 177 141 -36 Signed-off-by: Denys Vlasenko --- libbb/yescrypt/alg-yescrypt-common.c | 87 +++++++++++++++++++++++++++++++----- libbb/yescrypt/alg-yescrypt.h | 2 + 2 files changed, 79 insertions(+), 10 deletions(-) (limited to 'libbb') diff --git a/libbb/yescrypt/alg-yescrypt-common.c b/libbb/yescrypt/alg-yescrypt-common.c index e5d045058..c85b2a0b9 100644 --- a/libbb/yescrypt/alg-yescrypt-common.c +++ b/libbb/yescrypt/alg-yescrypt-common.c @@ -27,41 +27,106 @@ static NOINLINE const uint8_t *decode64_uint32( uint32_t *dst, const uint8_t *src, uint32_t val) { - uint32_t start = 0, end = 47, chars = 1, bits = 0; + uint32_t start = 0, end = 47, bits = 0; uint32_t c; - if (!src) /* prevous decode failed already? */ + if (!src) /* previous decode failed already? */ goto fail; c = a2i64(*src++); if (c > 63) goto fail; +// The encoding of number N: +// start = 0 end = 47 +// If N < 48, it is encoded verbatim, else +// N -= 48 +// start = end+1 = 48 +// end += (64-end)/2 = 55 +// If N < (end+1-start)<<6 = 8<<6, it is encoded as 48+(N>>6)|low6bits (that is, 48...55|<6bit>), else +// N -= 8<<6 +// start = end+1 = 56 +// end += (64-end)/2 = 59 +// If N < (end+1-start)<<2*6 = 4<<12, it is encoded as 56+(N>>2*6)|low12bits (that is, 56...59|<6bit>|<6bit>), else +// ...same for 60..61|<6bit>|<6bit>|<6bit> +// .......same for 62|<6bit>|<6bit>|<6bit>|<6bit> +// .......same for 63|<6bit>|<6bit>|<6bit>|<6bit>|<6bit> + dbg_dec64("c:%d val:0x%08x", (int)c, (unsigned)val); while (c > end) { + dbg_dec64("c:%d > end:%d", (int)c, (int)end); val += (end + 1 - start) << bits; + dbg_dec64("val+=0x%08x", (int)((end + 1 - start) << bits)); + dbg_dec64(" val:0x%08x", (unsigned)val); start = end + 1; - end = start + (62 - end) / 2; - chars++; + end += (64 - end) / 2; bits += 6; + dbg_dec64("start=%d", (int)start); + dbg_dec64("end=%d", (int)end); + dbg_dec64("bits=%d", (int)bits); } val += (c - start) << bits; + dbg_dec64("final val+=0x%08x", (int)((c - start) << bits)); + dbg_dec64(" val:0x%08x", (unsigned)val); - while (--chars) { + while (bits != 0) { c = a2i64(*src++); if (c > 63) goto fail; bits -= 6; val += c << bits; + dbg_dec64("low bits val+=0x%08x", (int)(c << bits)); + dbg_dec64(" val:0x%08x", (unsigned)val); } + ret: *dst = val; - return src; + fail: + val = 0; + src = NULL; + goto ret; +} -fail: - *dst = 0; - return NULL; +#if TEST_DECODE64 +static void test_decode64_uint32(void) +{ + const uint8_t *src, *end; + uint32_t u32; + int a = 48; + int b = 8<<6; // 0x0200 + int c = 4<<12; // 0x04000 + int d = 2<<18; // 0x080000 + int e = 1<<24; // 0x1000000 + + src = (void*)"wzzz"; + end = decode64_uint32(&u32, src, 0); + if (u32 != 0x0003ffff+c+b+a) bb_error_msg_and_die("Incorrect decode '%s':0x%08x", src, (unsigned)u32); + if (end != src + 4) bb_error_msg_and_die("Incorrect decode '%s': %p end:%p", src, src, end); + src = (void*)"xzzz"; + end = decode64_uint32(&u32, src, 0); + if (u32 != 0x0007ffff+c+b+a) bb_error_msg_and_die("Incorrect decode '%s':0x%08x", src, (unsigned)u32); + if (end != src + 4) bb_error_msg_and_die("Incorrect decode '%s': %p end:%p", src, src, end); + // Note how the last representable "x---" encoding, 0x7ffff, is exactly d-1! + // And if we now increment it, we get: + src = (void*)"y...."; + end = decode64_uint32(&u32, src, 0); + if (u32 != 0x00000000+d+c+b+a) bb_error_msg_and_die("Incorrect decode '%s':0x%08x", src, (unsigned)u32); + if (end != src + 5) bb_error_msg_and_die("Incorrect decode '%s': %p end:%p", src, src, end); + src = (void*)"yzzzz"; + end = decode64_uint32(&u32, src, 0); + if (u32 != 0x00ffffff+d+c+b+a) bb_error_msg_and_die("Incorrect decode '%s':0x%08x", src, (unsigned)u32); + if (end != src + 5) bb_error_msg_and_die("Incorrect decode '%s': %p end:%p", src, src, end); + + src = (void*)"zzzzzz"; + end = decode64_uint32(&u32, src, 0); + if (u32 != 0x3fffffff+e+d+c+b+a) bb_error_msg_and_die("Incorrect decode '%s':0x%08x", src, (unsigned)u32); + if (end != src + 6) bb_error_msg_and_die("Incorrect decode '%s': %p end:%p", src, src, end); + + bb_error_msg("test_decode64_uint32() OK"); } +#else +# define test_decode64_uint32() ((void)0) +#endif #if 1 static const uint8_t *decode64( @@ -70,7 +135,7 @@ static const uint8_t *decode64( { size_t dstpos = 0; - dbg_dec64("src:'%s' len:%d", src, (int)srclen); + dbg_dec64("src:'%s'", src); for (;;) { uint32_t c, value = 0; int bits = 0; @@ -215,6 +280,8 @@ char *yescrypt_r( size_t need, prefixlen; uint32_t u32; + test_decode64_uint32(); + memset(yctx, 0, sizeof(yctx)); yctx->param.p = 1; diff --git a/libbb/yescrypt/alg-yescrypt.h b/libbb/yescrypt/alg-yescrypt.h index 4554e3de3..e558cfdc5 100644 --- a/libbb/yescrypt/alg-yescrypt.h +++ b/libbb/yescrypt/alg-yescrypt.h @@ -28,6 +28,7 @@ * online backup system. */ #ifdef YESCRYPT_INTERNAL + # if 1 # define dbg(...) ((void)0) # else @@ -39,6 +40,7 @@ # define dbg_dec64(...) bb_error_msg(__VA_ARGS__) # endif #endif +#define TEST_DECODE64 0 /** * Type and possible values for the flags argument of yescrypt_kdf(), -- cgit v1.2.3-55-g6feb From d0f0874d573f33f9f14372a4513f22f76c559479 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Tue, 8 Jul 2025 05:25:31 +0200 Subject: libbb/yescrypt: disable code which accepts unusual yescrypt parameters Almost any reasonable yescrypt hashes in /etc/shadow should only ever use "jXY" parameters which set N and r. Fancy multi-byte-encoded wide integers are not needed for that. function old new delta static.yescrypt_kdf32_body - 899 +899 static.PBKDF2_SHA256 213 219 +6 decode64_uint32 141 - -141 yescrypt_r 990 805 -185 yescrypt_kdf32_body 1423 - -1423 ------------------------------------------------------------------------------ (add/remove: 1/2 grow/shrink: 1/1 up/down: 905/-1749) Total: -844 bytes Signed-off-by: Denys Vlasenko --- libbb/yescrypt/alg-yescrypt-common.c | 25 +++++++- libbb/yescrypt/alg-yescrypt.h | 120 ++++++++++++++++++++--------------- 2 files changed, 92 insertions(+), 53 deletions(-) (limited to 'libbb') diff --git a/libbb/yescrypt/alg-yescrypt-common.c b/libbb/yescrypt/alg-yescrypt-common.c index c85b2a0b9..e48be6581 100644 --- a/libbb/yescrypt/alg-yescrypt-common.c +++ b/libbb/yescrypt/alg-yescrypt-common.c @@ -18,6 +18,23 @@ * SUCH DAMAGE. */ +#if RESTRICTED_PARAMS + +#define decode64_uint32(dst, src, min) \ +({ \ + uint32_t d32 = a2i64(*(src)); \ + if (d32 > 47) \ + goto fail; \ + *(dst) = d32 + (min); \ + ++src; \ +}) +#define test_decode64_uint32() ((void)0) +#define FULL_PARAMS(...) + +#else + +#define FULL_PARAMS(...) __VA_ARGS__ + /* Not inlining: * de/encode64 functions are only used to read * yescrypt_params_t field, and convert salt to binary - @@ -128,6 +145,8 @@ static void test_decode64_uint32(void) # define test_decode64_uint32() ((void)0) #endif +#endif /* !RESTRICTED_PARAMS */ + #if 1 static const uint8_t *decode64( uint8_t *dst, size_t *dstlen, @@ -283,7 +302,7 @@ char *yescrypt_r( test_decode64_uint32(); memset(yctx, 0, sizeof(yctx)); - yctx->param.p = 1; + FULL_PARAMS(yctx->param.p = 1;) /* we assume setting starts with "$y$" (caller must ensure this) */ src = setting + 3; @@ -324,6 +343,9 @@ char *yescrypt_r( if (!src) goto fail; if (*src != '$') { +#if RESTRICTED_PARAMS + goto fail; +#else src = decode64_uint32(&u32, src, 1); dbg("yescrypt has extended params:0x%x", (unsigned)u32); if (u32 & 1) @@ -342,6 +364,7 @@ char *yescrypt_r( goto fail; if (*src != '$') goto fail; +#endif } yctx->saltlen = sizeof(yctx->salt); diff --git a/libbb/yescrypt/alg-yescrypt.h b/libbb/yescrypt/alg-yescrypt.h index e558cfdc5..a1d540c08 100644 --- a/libbb/yescrypt/alg-yescrypt.h +++ b/libbb/yescrypt/alg-yescrypt.h @@ -29,6 +29,8 @@ */ #ifdef YESCRYPT_INTERNAL +// busybox debug and size-reduction configuration + # if 1 # define dbg(...) ((void)0) # else @@ -42,6 +44,68 @@ #endif #define TEST_DECODE64 0 +/* Only accept one-char parameters in hash, and only first three? + * Almost any reasonable yescrypt hashes in /etc/shadow should + * only ever use "jXY" parameters which set N and r. + * Fancy multi-byte-encoded wide integers are not needed for that. + */ +#define RESTRICTED_PARAMS 1 +/* Note: if you enable the above, please also enable + * YCTX_param_p, YCTX_param_t, YCTX_param_g, YCTX_param_NROM + * optimizations. + */ + +// How much we save by forcing "standard" value by commenting the next line: +// 160 bytes +//#define YCTX_param_flags yctx->param.flags +// 260 bytes +//#define flags___YESCRYPT_RW (flags & YESCRYPT_RW) +// 140 bytes +//#define flags___YESCRYPT_MODE_MASK (flags & YESCRYPT_MODE_MASK) +// ^^^^ forcing the above since the code already requires (checks for) this +// 50 bytes +#define YCTX_param_N yctx->param.N +// -100 bytes (negative!!!) +#define YCTX_param_r yctx->param.r +// 400 bytes +//#define YCTX_param_p yctx->param.p +// 130 bytes +//#define YCTX_param_t yctx->param.t +// 2 bytes +//#define YCTX_param_g yctx->param.g +// 1 bytes +// ^^^^ this looks wrong, compiler should be able to constant-propagate the fact that NROM code is dead +//#define YCTX_param_NROM yctx->param.NROM + +#ifndef YCTX_param_flags +#define YCTX_param_flags (YESCRYPT_RW | YESCRYPT_ROUNDS_6 | YESCRYPT_GATHER_4 | YESCRYPT_SIMPLE_2 | YESCRYPT_SBOX_12K) +#endif +#ifndef flags___YESCRYPT_RW +#define flags___YESCRYPT_RW ((void)flags, YESCRYPT_RW) +#endif +#ifndef flags___YESCRYPT_MODE_MASK +#define flags___YESCRYPT_MODE_MASK ((void)flags, YESCRYPT_RW) +#endif +// standard ("j9T") values: +#ifndef YCTX_param_N +#define YCTX_param_N 4096 +#endif +#ifndef YCTX_param_r +#define YCTX_param_r 32 +#endif +#ifndef YCTX_param_p +#define YCTX_param_p 1 +#endif +#ifndef YCTX_param_t +#define YCTX_param_t 0 +#endif +#ifndef YCTX_param_g +#define YCTX_param_g 0 +#endif +#ifndef YCTX_param_NROM +#define YCTX_param_NROM 0 +#endif + /** * Type and possible values for the flags argument of yescrypt_kdf(), * yescrypt_encode_params_r(), yescrypt_encode_params(). Most of these may be @@ -129,9 +193,12 @@ typedef struct { */ typedef struct { uint32_t flags; + uint32_t r; uint64_t N; - uint32_t r, p, t, g; +#if !RESTRICTED_PARAMS + uint32_t p, t, g; uint64_t NROM; +#endif } yescrypt_params_t; typedef struct { @@ -147,57 +214,6 @@ typedef struct { yescrypt_region_t local[1]; } yescrypt_ctx_t; -// How much can save by forcing "standard" value by commenting the next line: -// 160 bytes -//#define YCTX_param_flags yctx->param.flags -// 260 bytes -//#define flags___YESCRYPT_RW (flags & YESCRYPT_RW) -// 140 bytes -//#define flags___YESCRYPT_MODE_MASK (flags & YESCRYPT_MODE_MASK) -// ^^^^ forcing the above since the code already requires (checks for) this -// 50 bytes -#define YCTX_param_N yctx->param.N -// -100 bytes (negative!!!) -#define YCTX_param_r yctx->param.r -// 400 bytes -#define YCTX_param_p yctx->param.p -// 130 bytes -#define YCTX_param_t yctx->param.t -// 2 bytes -#define YCTX_param_g yctx->param.g -// 1 bytes -// ^^^^ this looks wrong, compiler should be able to constant-propagate the fact that NROM code is dead -#define YCTX_param_NROM yctx->param.NROM - -// standard ("j9T") values: -#ifndef YCTX_param_flags -#define YCTX_param_flags (YESCRYPT_RW | YESCRYPT_ROUNDS_6 | YESCRYPT_GATHER_4 | YESCRYPT_SIMPLE_2 | YESCRYPT_SBOX_12K) -#endif -#ifndef flags___YESCRYPT_RW -#define flags___YESCRYPT_RW ((void)flags, YESCRYPT_RW) -#endif -#ifndef flags___YESCRYPT_MODE_MASK -#define flags___YESCRYPT_MODE_MASK ((void)flags, YESCRYPT_RW) -#endif -#ifndef YCTX_param_N -#define YCTX_param_N 4096 -#endif -#ifndef YCTX_param_r -#define YCTX_param_r 32 -#endif -#ifndef YCTX_param_p -#define YCTX_param_p 1 -#endif -#ifndef YCTX_param_t -#define YCTX_param_t 0 -#endif -#ifndef YCTX_param_g -#define YCTX_param_g 0 -#endif -#ifndef YCTX_param_NROM -#define YCTX_param_NROM 0 -#endif - /** * yescrypt_r(shared, local, passwd, passwdlen, setting, key, buf, buflen): * Compute and encode an scrypt or enhanced scrypt hash of passwd given the -- cgit v1.2.3-55-g6feb From 093070879476bad95595ab6352bb23f565b85347 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Tue, 8 Jul 2025 06:51:17 +0200 Subject: libbb/yescrypt: disable NROM code function old new delta smix1 - 595 +595 smix2 - 414 +414 static.yescrypt_kdf32_body 899 847 -52 static.smix2 420 - -420 static.smix1 604 - -604 ------------------------------------------------------------------------------ (add/remove: 2/2 grow/shrink: 0/1 up/down: 1009/-1076) Total: -67 bytes Signed-off-by: Denys Vlasenko --- libbb/yescrypt/alg-yescrypt-kdf.c | 43 +++++++++++++++++++++++++-------------- libbb/yescrypt/alg-yescrypt.h | 34 ++++++++++++++++++++----------- 2 files changed, 50 insertions(+), 27 deletions(-) (limited to 'libbb') diff --git a/libbb/yescrypt/alg-yescrypt-kdf.c b/libbb/yescrypt/alg-yescrypt-kdf.c index c998de51d..f421db111 100644 --- a/libbb/yescrypt/alg-yescrypt-kdf.c +++ b/libbb/yescrypt/alg-yescrypt-kdf.c @@ -42,15 +42,6 @@ #define unlikely(exp) (exp) #endif -// Not a size win if 0 -#define UNROLL_COPY 1 - -// -5324 bytes if 0: -#define UNROLL_PWXFORM_ROUND 0 -// -4864 bytes if 0: -#define UNROLL_PWXFORM 0 -// both 0: -7666 bytes - typedef union { uint32_t w[16]; uint64_t d[8]; @@ -100,7 +91,7 @@ do { \ #define DECL_Y \ salsa20_blk_t Y -#if UNROLL_COPY +#if KDF_UNROLL_COPY #define COPY(out, in) \ do { \ (out).d[0] = (in).d[0]; \ @@ -287,7 +278,7 @@ do { \ x1 = ((x1 >> 32) * (uint32_t)x1 + p0[1]) ^ p1[1]; \ } while (0) -#if UNROLL_PWXFORM_ROUND +#if KDF_UNROLL_PWXFORM_ROUND #define PWXFORM_ROUND \ do { \ PWXFORM_SIMD(X.d[0], X.d[1]); \ @@ -319,7 +310,7 @@ do { \ Sw += 64; \ } while (0) -#if UNROLL_PWXFORM +#if KDF_UNROLL_PWXFORM #define PWXFORM \ do { \ uint8_t *Sw = S2 + w + PWXFORM_WRITE_OFFSET; \ @@ -522,6 +513,10 @@ static inline uint32_t integerify(const salsa20_blk_t *B, size_t r) * The array V must be aligned to a multiple of 64 bytes, and arrays B and XY * to a multiple of at least 16 bytes. */ +#if DISABLE_NROM_CODE +#define smix1(B,r,N,flags,V,NROM,VROM,XY,ctx) \ + smix1(B,r,N,flags,V,XY,ctx) +#endif static void smix1(uint8_t *B, size_t r, uint32_t N, uint32_t flags, salsa20_blk_t *V, @@ -529,6 +524,10 @@ static void smix1(uint8_t *B, size_t r, uint32_t N, salsa20_blk_t *XY, pwxform_ctx_t *ctx) { +#if DISABLE_NROM_CODE + uint32_t NROM = 0; + const salsa20_blk_t *VROM = NULL; +#endif size_t s = 2 * r; salsa20_blk_t *X = V, *Y = &V[s]; uint32_t i, j; @@ -643,6 +642,10 @@ static void smix1(uint8_t *B, size_t r, uint32_t N, * least 2. Nloop must be even. The array V must be aligned to a multiple of * 64 bytes, and arrays B and XY to a multiple of at least 16 bytes. */ +#if DISABLE_NROM_CODE +#define smix2(B,r,N,Nloop,flags,V,NROM,VROM,XY,ctx) \ + smix2(B,r,N,Nloop,flags,V,XY,ctx) +#endif static void smix2(uint8_t *B, size_t r, uint32_t N, uint64_t Nloop, uint32_t flags, salsa20_blk_t *V, @@ -650,6 +653,10 @@ static void smix2(uint8_t *B, size_t r, uint32_t N, uint64_t Nloop, salsa20_blk_t *XY, pwxform_ctx_t *ctx) { +#if DISABLE_NROM_CODE + uint32_t NROM = 0; + const salsa20_blk_t *VROM = NULL; +#endif size_t s = 2 * r; salsa20_blk_t *X = XY, *Y = &XY[s]; uint32_t i, j; @@ -747,6 +754,10 @@ static uint64_t p2floor(uint64_t x) * and helps avoid false sharing in OpenMP-enabled builds when p > 1, but it * might also result in cache bank conflicts). */ +#if DISABLE_NROM_CODE +#define smix(B,r,N,p,t,flags,V,NROM,VROM,XY,S,passwd) \ + smix(B,r,N,p,t,flags,V,XY,S,passwd) +#endif static void smix(uint8_t *B, size_t r, uint32_t N, uint32_t p, uint32_t t, uint32_t flags, salsa20_blk_t *V, @@ -891,7 +902,9 @@ static int yescrypt_kdf32_body( uint32_t flags, uint64_t N, uint32_t t, uint8_t *buf32) { +#if !DISABLE_NROM_CODE const salsa20_blk_t *VROM; +#endif size_t B_size, V_size, XY_size, need; uint8_t *B, *S; salsa20_blk_t *V, *XY; @@ -935,9 +948,7 @@ static int yescrypt_kdf32_body( dbg("N > 0x%lx", (long)UINT32_MAX); goto out_EINVAL; } - if ((N & (N - 1)) != 0 -//TODO: ^^^^^^^^^^^^^^^^^^^^^^ do not check this, code guarantees power-of-2 - || N <= 3 + if (N <= 3 || r < 1 || p < 1 ) { @@ -966,9 +977,11 @@ static int yescrypt_kdf32_body( #pragma GCC diagnostic pop } +#if !DISABLE_NROM_CODE VROM = NULL; if (YCTX_param_NROM) goto out_EINVAL; +#endif /* Allocate memory */ V = NULL; diff --git a/libbb/yescrypt/alg-yescrypt.h b/libbb/yescrypt/alg-yescrypt.h index a1d540c08..2a9434809 100644 --- a/libbb/yescrypt/alg-yescrypt.h +++ b/libbb/yescrypt/alg-yescrypt.h @@ -27,10 +27,10 @@ * This file was originally written by Colin Percival as part of the Tarsnap * online backup system. */ -#ifdef YESCRYPT_INTERNAL // busybox debug and size-reduction configuration +#ifdef YESCRYPT_INTERNAL # if 1 # define dbg(...) ((void)0) # else @@ -41,19 +41,20 @@ # else # define dbg_dec64(...) bb_error_msg(__VA_ARGS__) # endif +# define TEST_DECODE64 0 #endif -#define TEST_DECODE64 0 -/* Only accept one-char parameters in hash, and only first three? - * Almost any reasonable yescrypt hashes in /etc/shadow should - * only ever use "jXY" parameters which set N and r. - * Fancy multi-byte-encoded wide integers are not needed for that. - */ -#define RESTRICTED_PARAMS 1 -/* Note: if you enable the above, please also enable - * YCTX_param_p, YCTX_param_t, YCTX_param_g, YCTX_param_NROM - * optimizations. - */ + +// Only accept one-char parameters in salt, and only first three? +// Almost any reasonable yescrypt hashes in /etc/shadow should +// only ever use "jXY" parameters which set N and r. +// Fancy multi-byte-encoded wide integers are not needed for that. +#define RESTRICTED_PARAMS 1 +// Note: if you enable the above, please also enable +// YCTX_param_p, YCTX_param_t, YCTX_param_g, YCTX_param_NROM +// optimizations, and DISABLE_NROM_CODE. + +#define DISABLE_NROM_CODE 1 // How much we save by forcing "standard" value by commenting the next line: // 160 bytes @@ -106,6 +107,15 @@ #define YCTX_param_NROM 0 #endif +// "Faster, or smaller code" knobs: +// Not a size win if disabled, so keeping it 1: +#define KDF_UNROLL_COPY 1 +// -5324 bytes if 0: +#define KDF_UNROLL_PWXFORM_ROUND 0 +// -4864 bytes if 0: +#define KDF_UNROLL_PWXFORM 0 +// both 0: -7666 bytes + /** * Type and possible values for the flags argument of yescrypt_kdf(), * yescrypt_encode_params_r(), yescrypt_encode_params(). Most of these may be -- cgit v1.2.3-55-g6feb From 7b313c34651038f39d98ff105102b16a6be94e1c Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Tue, 8 Jul 2025 10:04:47 +0200 Subject: libbb: code shrink in sha_crypt() Signed-off-by: Denys Vlasenko --- libbb/pw_encrypt_sha.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'libbb') diff --git a/libbb/pw_encrypt_sha.c b/libbb/pw_encrypt_sha.c index 516293920..695a5c07f 100644 --- a/libbb/pw_encrypt_sha.c +++ b/libbb/pw_encrypt_sha.c @@ -84,8 +84,7 @@ sha_crypt(/*const*/ char *key_data, /*const*/ char *salt_data) as a scratch space later. */ salt_data = xstrndup(salt_data, salt_len); /* add "salt$" to result */ - strcpy(resptr, salt_data); - resptr += salt_len; + resptr = stpcpy(resptr, salt_data); *resptr++ = '$'; /* key data doesn't need much processing */ key_len = strlen(key_data); -- cgit v1.2.3-55-g6feb From c305c81c94a086fb09444b1ea6f31fb911c25ec0 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Wed, 9 Jul 2025 06:51:04 +0200 Subject: libbb: introduce and use block-XOR functions On x86_64, they can be done in 16-byte blocks 64-bit: function old new delta xorbuf_3 - 84 +84 xorbuf64_3_aligned64 - 58 +58 smix1 687 712 +25 xwrite_encrypted 520 534 +14 xorbuf16_aligned_long - 13 +13 tls_xread_record 733 742 +9 xorbuf 21 13 -8 xorbuf_aligned_AES_BLOCK_SIZE 15 - -15 blockmix 814 762 -52 blockmix_salsa8 317 198 -119 blockmix_xor_save 1620 1499 -121 blockmix_xor 1543 1322 -221 ------------------------------------------------------------------------------ (add/remove: 4/1 grow/shrink: 3/5 up/down: 203/-536) Total: -333 bytes 32-bit: function old new delta xorbuf_3 - 76 +76 xorbuf64_3_aligned64 - 36 +36 xorbuf16_aligned_long - 23 +23 xwrite_encrypted 499 507 +8 tls_xread_record 646 650 +4 xorbuf 22 11 -11 xorbuf_aligned_AES_BLOCK_SIZE 23 - -23 blockmix 1083 938 -145 blockmix_salsa8 415 210 -205 blockmix_salsa8_xor 601 163 -438 blockmix_xor 2103 1533 -570 blockmix_xor_save 2614 1859 -755 ------------------------------------------------------------------------------ (add/remove: 4/1 grow/shrink: 2/6 up/down: 147/-2147) Total: -2000 bytes Signed-off-by: Denys Vlasenko --- include/libbb.h | 10 ++++ libbb/bitops.c | 108 ++++++++++++++++++++++++++++++++++++++ libbb/yescrypt/alg-sha256.c | 1 + libbb/yescrypt/alg-yescrypt-kdf.c | 7 +++ networking/tls.c | 39 +++----------- networking/tls.h | 5 +- networking/tls_aesgcm.c | 5 +- 7 files changed, 137 insertions(+), 38 deletions(-) create mode 100644 libbb/bitops.c (limited to 'libbb') diff --git a/include/libbb.h b/include/libbb.h index 544ca3155..79427fb31 100644 --- a/include/libbb.h +++ b/include/libbb.h @@ -1113,6 +1113,16 @@ char *bin2hex(char *dst, const char *src, int count) FAST_FUNC; /* Reverse */ char* hex2bin(char *dst, const char *src, int count) FAST_FUNC; +void FAST_FUNC xorbuf_3(void *dst, const void *src1, const void *src2, unsigned count); +void FAST_FUNC xorbuf(void* buf, const void* mask, unsigned count); +void FAST_FUNC xorbuf16_aligned_long(void* buf, const void* mask); +void FAST_FUNC xorbuf64_3_aligned64(void *dst, const void *src1, const void *src2); +#if BB_UNALIGNED_MEMACCESS_OK +# define xorbuf16(buf,mask) xorbuf16_aligned_long(buf,mask) +#else +void FAST_FUNC xorbuf16(void* buf, const void* mask); +#endif + /* Generate a UUID */ void generate_uuid(uint8_t *buf) FAST_FUNC; diff --git a/libbb/bitops.c b/libbb/bitops.c new file mode 100644 index 000000000..5f239676c --- /dev/null +++ b/libbb/bitops.c @@ -0,0 +1,108 @@ +/* + * Utility routines. + * + * Copyright (C) 2025 by Denys Vlasenko + * + * Licensed under GPLv2, see file LICENSE in this source tree. + */ +//kbuild:lib-y += bitops.o + +#include "libbb.h" + +void FAST_FUNC xorbuf_3(void *dst, const void *src1, const void *src2, unsigned count) +{ + uint8_t *d = dst; + const uint8_t *s1 = src1; + const uint8_t *s2 = src2; +#if BB_UNALIGNED_MEMACCESS_OK + while (count >= sizeof(long)) { + *(long*)d = *(long*)s1 ^ *(long*)s2; + count -= sizeof(long); + d += sizeof(long); + s1 += sizeof(long); + s2 += sizeof(long); + } +#endif + while (count--) + *d++ = *s1++ ^ *s2++; +} + +void FAST_FUNC xorbuf(void *dst, const void *src, unsigned count) +{ + xorbuf_3(dst, dst, src, count); +} + +void FAST_FUNC xorbuf16_aligned_long(void *dst, const void *src) +{ +#if defined(__SSE__) /* any x86_64 has it */ + asm volatile( +"\n movups (%0),%%xmm0" +"\n movups (%1),%%xmm1" // can't just xorps(%1),%%xmm0: +"\n xorps %%xmm1,%%xmm0" // SSE requires 16-byte alignment +"\n movups %%xmm0,(%0)" +"\n" + : "=r" (dst), "=r" (src) + : "0" (dst), "1" (src) + : "xmm0", "xmm1", "memory" + ); +#else + unsigned long *d = dst; + const unsigned long *s = src; + d[0] ^= s[0]; +# if LONG_MAX <= 0x7fffffffffffffff + d[1] ^= s[1]; +# if LONG_MAX == 0x7fffffff + d[2] ^= s[2]; + d[3] ^= s[3]; +# endif +# endif +#endif +} + +void FAST_FUNC xorbuf64_3_aligned64(void *dst, const void *src1, const void *src2) +{ +#if defined(__SSE__) /* any x86_64 has it */ + asm volatile( +"\n movups 0*16(%1),%%xmm0" +"\n movups 0*16(%2),%%xmm1" // can't just xorps(%2),%%xmm0: +"\n xorps %%xmm1,%%xmm0" // SSE requires 16-byte alignment, we have only 8-byte +"\n movups %%xmm0,0*16(%0)" +"\n movups 1*16(%1),%%xmm0" +"\n movups 1*16(%2),%%xmm1" +"\n xorps %%xmm1,%%xmm0" +"\n movups %%xmm0,1*16(%0)" +"\n movups 2*16(%1),%%xmm0" +"\n movups 2*16(%2),%%xmm1" +"\n xorps %%xmm1,%%xmm0" +"\n movups %%xmm0,2*16(%0)" +"\n movups 3*16(%1),%%xmm0" +"\n movups 3*16(%2),%%xmm1" +"\n xorps %%xmm1,%%xmm0" +"\n movups %%xmm0,3*16(%0)" +"\n" + : "=r" (dst), "=r" (src1), "=r" (src2) + : "0" (dst), "1" (src1), "2" (src2) + : "xmm0", "xmm1", "memory" + ); +#else + long *d = dst; + const long *s1 = src1; + const long *s2 = src2; + unsigned count = 64 / sizeof(long); + do { + *d++ = *s1++ ^ *s2++; + } while (--count != 0); +#endif +} + +#if !BB_UNALIGNED_MEMACCESS_OK +void FAST_FUNC xorbuf16(void *dst, const void *src) +{ +#define p_aligned(a) (((uintptr_t)(a) & (sizeof(long)-1)) == 0) + if (p_aligned(src) && p_aligned(dst)) { + xorbuf16_aligned_long(dst, src); + return; + } + xorbuf_3(dst, dst, src, 16); +} +#endif diff --git a/libbb/yescrypt/alg-sha256.c b/libbb/yescrypt/alg-sha256.c index 25446406b..20e8d1ee4 100644 --- a/libbb/yescrypt/alg-sha256.c +++ b/libbb/yescrypt/alg-sha256.c @@ -72,6 +72,7 @@ PBKDF2_SHA256(const uint8_t *passwd, size_t passwdlen, /* ... xor U_j ... */ for (k = 0; k < 32 / 8; k++) T[k] ^= U[k]; + //TODO: xorbuf32_aligned_long(T, U); } } diff --git a/libbb/yescrypt/alg-yescrypt-kdf.c b/libbb/yescrypt/alg-yescrypt-kdf.c index f421db111..112862ec9 100644 --- a/libbb/yescrypt/alg-yescrypt-kdf.c +++ b/libbb/yescrypt/alg-yescrypt-kdf.c @@ -180,6 +180,7 @@ static void salsa20(salsa20_blk_t *restrict B, #define SALSA20_2(out) \ salsa20(&X, &out, 1) +#if 0 #define XOR(out, in1, in2) \ do { \ (out).d[0] = (in1).d[0] ^ (in2).d[0]; \ @@ -191,6 +192,12 @@ do { \ (out).d[6] = (in1).d[6] ^ (in2).d[6]; \ (out).d[7] = (in1).d[7] ^ (in2).d[7]; \ } while (0) +#else +#define XOR(out, in1, in2) \ +do { \ + xorbuf64_3_aligned64(&(out).d, &(in1).d, &(in2).d); \ +} while (0) +#endif #define XOR_X(in) XOR(X, X, in) #define XOR_X_2(in1, in2) XOR(X, in1, in2) diff --git a/networking/tls.c b/networking/tls.c index 098cf7cac..ac6f0767f 100644 --- a/networking/tls.c +++ b/networking/tls.c @@ -333,34 +333,6 @@ void FAST_FUNC tls_get_random(void *buf, unsigned len) xfunc_die(); } -static void xorbuf3(void *dst, const void *src1, const void *src2, unsigned count) -{ - uint8_t *d = dst; - const uint8_t *s1 = src1; - const uint8_t* s2 = src2; - while (count--) - *d++ = *s1++ ^ *s2++; -} - -void FAST_FUNC xorbuf(void *dst, const void *src, unsigned count) -{ - xorbuf3(dst, dst, src, count); -} - -void FAST_FUNC xorbuf_aligned_AES_BLOCK_SIZE(void *dst, const void *src) -{ - unsigned long *d = dst; - const unsigned long *s = src; - d[0] ^= s[0]; -#if ULONG_MAX <= 0xffffffffffffffff - d[1] ^= s[1]; - #if ULONG_MAX == 0xffffffff - d[2] ^= s[2]; - d[3] ^= s[3]; - #endif -#endif -} - #if !TLS_DEBUG_HASH # define hash_handshake(tls, fmt, buffer, len) \ hash_handshake(tls, buffer, len) @@ -764,8 +736,13 @@ static void xwrite_encrypted_aesgcm(tls_state_t *tls, unsigned size, unsigned ty cnt++; COUNTER(nonce) = htonl(cnt); /* yes, first cnt here is 2 (!) */ aes_encrypt_one_block(&tls->aes_encrypt, nonce, scratch); - n = remaining > AES_BLOCK_SIZE ? AES_BLOCK_SIZE : remaining; - xorbuf(buf, scratch, n); + if (remaining >= AES_BLOCK_SIZE) { + n = AES_BLOCK_SIZE; + xorbuf_AES_BLOCK_SIZE(buf, scratch); + } else { + n = remaining; + xorbuf(buf, scratch, n); + } buf += n; remaining -= n; } @@ -923,7 +900,7 @@ static void tls_aesgcm_decrypt(tls_state_t *tls, uint8_t *buf, int size) COUNTER(nonce) = htonl(cnt); /* yes, first cnt here is 2 (!) */ aes_encrypt_one_block(&tls->aes_decrypt, nonce, scratch); n = remaining > AES_BLOCK_SIZE ? AES_BLOCK_SIZE : remaining; - xorbuf3(buf, scratch, buf + 8, n); + xorbuf_3(buf, scratch, buf + 8, n); buf += n; remaining -= n; } diff --git a/networking/tls.h b/networking/tls.h index 0173b87b2..9751d30ff 100644 --- a/networking/tls.h +++ b/networking/tls.h @@ -82,10 +82,9 @@ typedef int16_t int16; void tls_get_random(void *buf, unsigned len) FAST_FUNC; -void xorbuf(void* buf, const void* mask, unsigned count) FAST_FUNC; - #define ALIGNED_long ALIGNED(sizeof(long)) -void xorbuf_aligned_AES_BLOCK_SIZE(void* buf, const void* mask) FAST_FUNC; +#define xorbuf_aligned_AES_BLOCK_SIZE(dst,src) xorbuf16_aligned_long(dst,src) +#define xorbuf_AES_BLOCK_SIZE(dst,src) xorbuf16(dst,src) #define matrixCryptoGetPrngData(buf, len, userPtr) (tls_get_random(buf, len), PS_SUCCESS) diff --git a/networking/tls_aesgcm.c b/networking/tls_aesgcm.c index 5ddcdd2ad..9c2381a57 100644 --- a/networking/tls_aesgcm.c +++ b/networking/tls_aesgcm.c @@ -167,10 +167,7 @@ void FAST_FUNC aesgcm_GHASH(byte* h, blocks = cSz / AES_BLOCK_SIZE; partial = cSz % AES_BLOCK_SIZE; while (blocks--) { - if (BB_UNALIGNED_MEMACCESS_OK) // c is not guaranteed to be aligned - xorbuf_aligned_AES_BLOCK_SIZE(x, c); - else - xorbuf(x, c, AES_BLOCK_SIZE); + xorbuf_AES_BLOCK_SIZE(x, c); GMULT(x, h); c += AES_BLOCK_SIZE; } -- cgit v1.2.3-55-g6feb From 11d4c08d7541408e4fbb7daaaf63aba1d07685ea Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Wed, 9 Jul 2025 08:21:47 +0200 Subject: libbb/bitops.c: add inlining comment Signed-off-by: Denys Vlasenko --- libbb/bitops.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'libbb') diff --git a/libbb/bitops.c b/libbb/bitops.c index 5f239676c..467e1a2d9 100644 --- a/libbb/bitops.c +++ b/libbb/bitops.c @@ -58,6 +58,26 @@ void FAST_FUNC xorbuf16_aligned_long(void *dst, const void *src) # endif #endif } +// The above can be inlined in libbb.h, in a way where compiler +// is even free to use better addressing modes than (%reg), and +// to keep the result in a register +// (to not store it to memory after each XOR): +//#if defined(__SSE__) +//#include +//^^^ or just: typedef float __m128_u attribute((__vector_size__(16),__may_alias__,__aligned__(1))); +//static ALWAYS_INLINE void xorbuf16_aligned_long(void *dst, const void *src) +//{ +// __m128_u xmm0, xmm1; +// asm volatile( +//"\n xorps %1,%0" +// : "=x" (xmm0), "=x" (xmm1) +// : "0" (*(__m128_u*)dst), "1" (*(__m128_u*)src) +// ); +// *(__m128_u*)dst = xmm0; // this store may be optimized out! +//} +//#endif +// but I don't trust gcc optimizer enough to not generate some monstrosity. +// See GMULT() function in TLS code as an example. void FAST_FUNC xorbuf64_3_aligned64(void *dst, const void *src1, const void *src2) { -- cgit v1.2.3-55-g6feb From 95f169f3bb07528c9302c604ff2a1f40b41a98d8 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Wed, 9 Jul 2025 10:38:11 +0200 Subject: libbb/yescrypt: code shrink function old new delta static.yescrypt_kdf32_body 847 823 -24 yescrypt_r 805 767 -38 ------------------------------------------------------------------------------ (add/remove: 0/0 grow/shrink: 0/2 up/down: 0/-62) Total: -62 bytes Signed-off-by: Denys Vlasenko --- libbb/yescrypt/alg-yescrypt-common.c | 19 ++++++++++++------- libbb/yescrypt/alg-yescrypt-kdf.c | 24 ++++++++++++++++-------- 2 files changed, 28 insertions(+), 15 deletions(-) (limited to 'libbb') diff --git a/libbb/yescrypt/alg-yescrypt-common.c b/libbb/yescrypt/alg-yescrypt-common.c index e48be6581..1c063b895 100644 --- a/libbb/yescrypt/alg-yescrypt-common.c +++ b/libbb/yescrypt/alg-yescrypt-common.c @@ -23,8 +23,8 @@ #define decode64_uint32(dst, src, min) \ ({ \ uint32_t d32 = a2i64(*(src)); \ - if (d32 > 47) \ - goto fail; \ + if (d32 > 47) \ + goto fail; \ *(dst) = d32 + (min); \ ++src; \ }) @@ -292,8 +292,12 @@ char *yescrypt_r( const uint8_t *setting, char *buf, size_t buflen) { - yescrypt_ctx_t yctx[1]; - unsigned char hashbin32[32]; + struct { + yescrypt_ctx_t yctx[1]; + unsigned char hashbin32[32]; + } u; +#define yctx u.yctx +#define hashbin32 u.hashbin32 char *dst; const uint8_t *src, *saltend; size_t need, prefixlen; @@ -375,7 +379,7 @@ char *yescrypt_r( prefixlen = saltend - setting; need = prefixlen + 1 + YESCRYPT_HASH_LEN + 1; - if (need > buflen || need < prefixlen) + if (need > buflen /*overflow is quite unlikely: || need < prefixlen*/) goto fail; if (yescrypt_kdf32(yctx, passwd, passwdlen, hashbin32)) { @@ -390,10 +394,11 @@ char *yescrypt_r( goto fail; ret: free_region(yctx->local); - explicit_bzero(yctx, sizeof(yctx)); - explicit_bzero(hashbin32, sizeof(hashbin32)); + explicit_bzero(&u, sizeof(u)); return buf; fail: buf = NULL; goto ret; +#undef yctx +#undef hashbin32 } diff --git a/libbb/yescrypt/alg-yescrypt-kdf.c b/libbb/yescrypt/alg-yescrypt-kdf.c index 112862ec9..29d9efc07 100644 --- a/libbb/yescrypt/alg-yescrypt-kdf.c +++ b/libbb/yescrypt/alg-yescrypt-kdf.c @@ -915,8 +915,13 @@ static int yescrypt_kdf32_body( size_t B_size, V_size, XY_size, need; uint8_t *B, *S; salsa20_blk_t *V, *XY; - uint8_t sha256[32]; - uint8_t dk[sizeof(sha256)], *dkp = buf32; + struct { + uint8_t sha256[32]; + uint8_t dk[32]; + } u; +#define sha256 u.sha256 +#define dk u.dk + uint8_t *dkp = buf32; uint32_t r, p; /* Sanity-check parameters */ @@ -1083,15 +1088,16 @@ static int yescrypt_kdf32_body( size_t clen = /*buflen:*/32; if (clen > sizeof(dk)) clen = sizeof(dk); - sha256_block(sha256, sizeof(sha256), dk); - memcpy(buf32, dk, clen); + if (sizeof(dk) != 32) { /* not true, optimize it out */ + sha256_block(sha256, sizeof(sha256), dk); + memcpy(buf32, dk, clen); + } else { + sha256_block(sha256, sizeof(sha256), buf32); + } } } - if (flags) { - explicit_bzero(sha256, sizeof(sha256)); - explicit_bzero(dk, sizeof(dk)); - } + explicit_bzero(&u, sizeof(u)); /* Success! */ return 0; @@ -1099,6 +1105,8 @@ static int yescrypt_kdf32_body( out_EINVAL: //bbox does not need this: errno = EINVAL; return -1; +#undef sha256 +#undef dk } /** -- cgit v1.2.3-55-g6feb From e62bfbcaed051146b83a096e778b5822069c160b Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Fri, 11 Jul 2025 08:56:10 +0200 Subject: libbb/yescrypt: code shrink function old new delta salsa20 684 650 -34 blockmix_salsa8 198 144 -54 blockmix 762 565 -197 blockmix_xor 1322 1028 -294 blockmix_xor_save 1499 1103 -396 ------------------------------------------------------------------------------ (add/remove: 0/0 grow/shrink: 0/5 up/down: 0/-975) Total: -975 bytes Signed-off-by: Denys Vlasenko --- libbb/yescrypt/alg-yescrypt-kdf.c | 9 +++------ libbb/yescrypt/alg-yescrypt.h | 8 ++++---- 2 files changed, 7 insertions(+), 10 deletions(-) (limited to 'libbb') diff --git a/libbb/yescrypt/alg-yescrypt-kdf.c b/libbb/yescrypt/alg-yescrypt-kdf.c index 29d9efc07..1c254e2e2 100644 --- a/libbb/yescrypt/alg-yescrypt-kdf.c +++ b/libbb/yescrypt/alg-yescrypt-kdf.c @@ -106,8 +106,7 @@ do { \ #else #define COPY(out, in) \ do { \ - for (int copyi=0; copyi<8; copyi++) \ - (out).d[copyi] = (in).d[copyi]; \ + memcpy((out).d, (in).d, sizeof((in).d)); \ } while (0) #endif @@ -161,11 +160,9 @@ static void salsa20(salsa20_blk_t *restrict B, { uint32_t i; salsa20_simd_shuffle(&X, Bout); - for (i = 0; i < 16; i += 4) { + for (i = 0; i < 16; i++) { + // bbox: note: was unrolled x4 B->w[i] = Bout->w[i] += B->w[i]; - B->w[i + 1] = Bout->w[i + 1] += B->w[i + 1]; - B->w[i + 2] = Bout->w[i + 2] += B->w[i + 2]; - B->w[i + 3] = Bout->w[i + 3] += B->w[i + 3]; } } #if 0 diff --git a/libbb/yescrypt/alg-yescrypt.h b/libbb/yescrypt/alg-yescrypt.h index 2a9434809..0b93945af 100644 --- a/libbb/yescrypt/alg-yescrypt.h +++ b/libbb/yescrypt/alg-yescrypt.h @@ -107,14 +107,14 @@ #define YCTX_param_NROM 0 #endif -// "Faster, or smaller code" knobs: -// Not a size win if disabled, so keeping it 1: -#define KDF_UNROLL_COPY 1 +// "Faster/smaller code" knobs: +// -941 bytes: +#define KDF_UNROLL_COPY 0 // -5324 bytes if 0: #define KDF_UNROLL_PWXFORM_ROUND 0 // -4864 bytes if 0: #define KDF_UNROLL_PWXFORM 0 -// both 0: -7666 bytes +// if both this ^^^^^^^^^^ and PWXFORM_ROUND set to 0: -7666 bytes /** * Type and possible values for the flags argument of yescrypt_kdf(), -- cgit v1.2.3-55-g6feb From abe0b45cd7098f0f83f727d4d4c4c09dc2b172f7 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Tue, 15 Jul 2025 05:47:18 +0200 Subject: libbb/yescrypt: code shrink function old new delta blockmix_xor 1177 702 -475 Signed-off-by: Denys Vlasenko --- libbb/yescrypt/alg-yescrypt-kdf.c | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) (limited to 'libbb') diff --git a/libbb/yescrypt/alg-yescrypt-kdf.c b/libbb/yescrypt/alg-yescrypt-kdf.c index 1c254e2e2..f75361d96 100644 --- a/libbb/yescrypt/alg-yescrypt-kdf.c +++ b/libbb/yescrypt/alg-yescrypt-kdf.c @@ -421,26 +421,19 @@ static uint32_t blockmix_xor(const salsa20_blk_t *Bin1, i = 0; r--; - do { + for (;;) { XOR_X(Bin1[i]); XOR_X(Bin2[i]); PWXFORM; - WRITE_X(Bout[i]); - - XOR_X(Bin1[i + 1]); - XOR_X(Bin2[i + 1]); - PWXFORM; - - if (unlikely(i >= r)) + if (unlikely(i > r)) break; + WRITE_X(Bout[i]); + i++; + } - WRITE_X(Bout[i + 1]); - - i += 2; - } while (1); - i++; - - ctx->S0 = S0; ctx->S1 = S1; ctx->S2 = S2; + ctx->S0 = S0; + ctx->S1 = S1; + ctx->S2 = S2; ctx->w = w; SALSA20_2(Bout[i]); -- cgit v1.2.3-55-g6feb From b823735b7eb6428e827cf463123d3caaa48804ff Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Thu, 17 Jul 2025 17:01:40 +0200 Subject: libbb/yescrypt: actually, largest allowed salt is 86 chars, support that function old new delta yescrypt_r 767 756 -11 Signed-off-by: Denys Vlasenko --- libbb/yescrypt/alg-yescrypt-common.c | 26 +++++++++++++++----------- libbb/yescrypt/alg-yescrypt.h | 2 +- testsuite/cryptpw.tests | 10 +++++++--- 3 files changed, 23 insertions(+), 15 deletions(-) (limited to 'libbb') diff --git a/libbb/yescrypt/alg-yescrypt-common.c b/libbb/yescrypt/alg-yescrypt-common.c index 1c063b895..c51823787 100644 --- a/libbb/yescrypt/alg-yescrypt-common.c +++ b/libbb/yescrypt/alg-yescrypt-common.c @@ -152,13 +152,13 @@ static const uint8_t *decode64( uint8_t *dst, size_t *dstlen, const uint8_t *src) { - size_t dstpos = 0; + unsigned dstpos = 0; dbg_dec64("src:'%s'", src); for (;;) { uint32_t c, value = 0; int bits = 0; - while (*src && *src != '$') { + while (*src != '\0' && *src != '$') { c = a2i64(*src); if (c > 63) { /* bad ascii64 char, stop decoding at it */ break; @@ -174,9 +174,11 @@ static const uint8_t *decode64( break; /* else: we got last, partial bit block - store it */ store: - dbg_dec64(" storing bits:%d v:%08x", bits, (int)SWAP_BE32(value)); //BE to see lsb first - while (dstpos < *dstlen) { - if ((!*src || *src == '$') && value == 0 && bits < 8) { + dbg_dec64(" storing bits:%d dstpos:%u v:%08x", bits, dstpos, (int)SWAP_BE32(value)); //BE to see lsb first + for (;;) { + if ((*src == '\0' || *src == '$') + && value == 0 && bits < 8 + ) { /* Example: mkpasswd PWD '$y$j9T$123': * the "123" is bits:18 value:03,51,00 * is considered to be 2 bytes, not 3! @@ -190,17 +192,18 @@ static const uint8_t *decode64( */ goto end; } - dstpos++; + if (dstpos >= *dstlen) { + dbg_dec64(" ERR: bits:%d dstpos:%u dst[] is too small", bits, dstpos); + goto fail; + } *dst++ = value; + dstpos++; value >>= 8; bits -= 8; if (bits <= 0) /* can get negative, if we e.g. had 6 bits */ - goto next; + break; } - dbg_dec64(" ERR: bits:%d dst[] is too small", bits); - goto fail; - next: - if (!*src || *src == '$') + if (*src == '\0' || *src == '$') break; } end: @@ -376,6 +379,7 @@ char *yescrypt_r( saltend = decode64(yctx->salt, &yctx->saltlen, src); if (!saltend || (*saltend != '\0' && *saltend != '$')) goto fail; /* salt[] is too small, or bad char during decode */ + dbg_dec64("salt is %d ascii64 chars -> %d bytes (in binary)", (int)(saltend - src), (int)yctx->saltlen); prefixlen = saltend - setting; need = prefixlen + 1 + YESCRYPT_HASH_LEN + 1; diff --git a/libbb/yescrypt/alg-yescrypt.h b/libbb/yescrypt/alg-yescrypt.h index 0b93945af..5051efbb4 100644 --- a/libbb/yescrypt/alg-yescrypt.h +++ b/libbb/yescrypt/alg-yescrypt.h @@ -139,7 +139,7 @@ #define YESCRYPT_GATHER_8 0x018 //gg=11 #define YESCRYPT_SIMPLE_1 0x000 //ss=00 #define YESCRYPT_SIMPLE_2 0x020 //ss=01 -#define YESCRYPT_SIMPLE_4 0x040 //ss=11 +#define YESCRYPT_SIMPLE_4 0x040 //ss=10 #define YESCRYPT_SIMPLE_8 0x060 //ss=11 #define YESCRYPT_SBOX_6K 0x000 //sbox=0000 #define YESCRYPT_SBOX_12K 0x080 //sbox=0001 diff --git a/testsuite/cryptpw.tests b/testsuite/cryptpw.tests index beac35efe..83bfde521 100755 --- a/testsuite/cryptpw.tests +++ b/testsuite/cryptpw.tests @@ -97,9 +97,13 @@ testing 'cryptpw yescrypt with 4-char salt "...."' \ 'cryptpw -m yescrypt qweRTY123@-+ j9T\$....' \ '$y$j9T$....$wOnauYL2/NEtr6YQi9pi8AtV7L57sEbVOAnWJIcP9q2\n' \ '' '' -testing 'cryptpw yescrypt with 84-char salt (max size)' \ - 'cryptpw -m yescrypt qweRTY123@-+ j9T\$123456789012345678901234567890123456789012345678901234567890123456789012345678901234' \ - '$y$j9T$123456789012345678901234567890123456789012345678901234567890123456789012345678901234$ubrUuPCpI97LIMlVMt/A0Mhs/kBK2UBJYcQSxEZSlz4\n' \ +# 84 chars = 21 4-char blocks which decode into 21*3 = 63 bytes. +# The last byte of the maximum allowed salt size has to come from an incomplete +# char block. E.g. "z/" encodes byte 0x7f. "z1" is 0xff. +# Anything larger (e.g. "z2") is an error (it encodes 0x13f). +testing 'cryptpw yescrypt with 86-char salt (max size)' \ + 'cryptpw -m yescrypt qweRTY123@-+ j9T\$123456789012345678901234567890123456789012345678901234567890123456789012345678901234z/' \ + '$y$j9T$123456789012345678901234567890123456789012345678901234567890123456789012345678901234z/$Exxe8IoPXiddFsqj7iqCanRf8FyquAoB0/uceLmLjG.\n' \ '' '' testing 'cryptpw yescrypt implicit' \ 'cryptpw qweRTY123@-+ \$y\$j9T\$123456789012345678901234' \ -- cgit v1.2.3-55-g6feb From 51b45ce28a6a5bbc035b200f170d520f94b7e59f Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Sat, 19 Jul 2025 17:20:35 +0200 Subject: libbb/yescrypt: code shrink function old new delta blockmix_xor_save 1169 708 -461 Signed-off-by: Denys Vlasenko --- libbb/yescrypt/alg-yescrypt-kdf.c | 53 ++++++++++++++++++--------------------- 1 file changed, 25 insertions(+), 28 deletions(-) (limited to 'libbb') diff --git a/libbb/yescrypt/alg-yescrypt-kdf.c b/libbb/yescrypt/alg-yescrypt-kdf.c index f75361d96..01503c6e0 100644 --- a/libbb/yescrypt/alg-yescrypt-kdf.c +++ b/libbb/yescrypt/alg-yescrypt-kdf.c @@ -368,10 +368,10 @@ typedef struct { * be 128r bytes in length; the output Bout must also be the same size. */ static void blockmix( - const salsa20_blk_t *restrict Bin, - salsa20_blk_t *restrict Bout, - size_t r, - pwxform_ctx_t *restrict ctx) + const salsa20_blk_t *restrict Bin, + salsa20_blk_t *restrict Bout, + size_t r, + pwxform_ctx_t *restrict ctx) { uint8_t *S0 = ctx->S0, *S1 = ctx->S1, *S2 = ctx->S2; size_t w = ctx->w; @@ -386,22 +386,25 @@ static void blockmix( DECL_SMASK2REG; i = 0; - do { + for (;;) { XOR_X(Bin[i]); PWXFORM; if (unlikely(i >= r)) break; WRITE_X(Bout[i]); i++; - } while (1); + } - ctx->S0 = S0; ctx->S1 = S1; ctx->S2 = S2; + ctx->S0 = S0; + ctx->S1 = S1; + ctx->S2 = S2; ctx->w = w; SALSA20_2(Bout[i]); } -static uint32_t blockmix_xor(const salsa20_blk_t *Bin1, +static uint32_t blockmix_xor( + const salsa20_blk_t *Bin1, const salsa20_blk_t *restrict Bin2, salsa20_blk_t *Bout, size_t r, @@ -462,24 +465,18 @@ static uint32_t blockmix_xor_save( i = 0; r--; - do { + for (;;) { XOR_X_WRITE_XOR_Y_2(Bin2[i], Bin1out[i]); PWXFORM; - WRITE_X(Bin1out[i]); - - XOR_X_WRITE_XOR_Y_2(Bin2[i + 1], Bin1out[i + 1]); - PWXFORM; - - if (unlikely(i >= r)) + if (unlikely(i > r)) break; + WRITE_X(Bin1out[i]); + i++; + } - WRITE_X(Bin1out[i + 1]); - - i += 2; - } while (1); - i++; - - ctx->S0 = S0; ctx->S1 = S1; ctx->S2 = S2; + ctx->S0 = S0; + ctx->S1 = S1; + ctx->S2 = S2; ctx->w = w; SALSA20_2(Bin1out[i]); @@ -801,7 +798,7 @@ static void smix(uint8_t *B, size_t r, uint32_t N, uint32_t p, uint32_t t, if (flags___YESCRYPT_RW) { uint8_t *Si = S + i * Salloc; smix1(Bp, 1, Sbytes / 128, 0 /* no flags */, - (salsa20_blk_t *)Si, 0, NULL, XYp, NULL); + (salsa20_blk_t *)Si, 0, NULL, XYp, NULL); ctx_i = (pwxform_ctx_t *)(Si + Sbytes); ctx_i->S2 = Si; ctx_i->S1 = Si + Sbytes / 3; @@ -809,7 +806,7 @@ static void smix(uint8_t *B, size_t r, uint32_t N, uint32_t p, uint32_t t, ctx_i->w = 0; if (i == 0) hmac_block( - /* key,len: */ Bp + (128 * r - 64), 64, + /* key,len: */ Bp + (128 * r - 64), 64, /* hash fn: */ sha256_begin, /* in,len: */ passwd, 32, /* outbuf: */ passwd @@ -817,7 +814,7 @@ static void smix(uint8_t *B, size_t r, uint32_t N, uint32_t p, uint32_t t, } smix1(Bp, r, Np, flags, Vp, NROM, VROM, XYp, ctx_i); smix2(Bp, r, p2floor(Np), Nloop_rw, flags, Vp, - NROM, VROM, XYp, ctx_i); + NROM, VROM, XYp, ctx_i); } if (Nloop_all > Nloop_rw) { @@ -830,8 +827,8 @@ static void smix(uint8_t *B, size_t r, uint32_t N, uint32_t p, uint32_t t, ctx_i = (pwxform_ctx_t *)(Si + Sbytes); } smix2(Bp, r, N, Nloop_all - Nloop_rw, - flags & (uint32_t)~YESCRYPT_RW, - V, NROM, VROM, XYp, ctx_i); + flags & (uint32_t)~YESCRYPT_RW, + V, NROM, VROM, XYp, ctx_i); } } } @@ -1046,7 +1043,7 @@ static int yescrypt_kdf32_body( uint32_t i; for (i = 0; i < p; i++) { smix(&B[(size_t)128 * r * i], r, N, 1, t, flags, V, - YCTX_param_NROM, VROM, XY, NULL, NULL); + YCTX_param_NROM, VROM, XY, NULL, NULL); } } -- cgit v1.2.3-55-g6feb From 6979467a62c4bc58eeede0436d06c0cd57649705 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Sat, 19 Jul 2025 18:17:24 +0200 Subject: cryptpw: fix detection of crypt algo from salt (was broken if default isn't DES) The symptom is: "cryptpw ... implicit" testsuite tests were failing if CONFIG_FEATURE_DEFAULT_PASSWD_ALGO is not "des". function old new delta cryptpw_main 223 283 +60 pw_encrypt 974 975 +1 ------------------------------------------------------------------------------ (add/remove: 0/0 grow/shrink: 2/0 up/down: 61/0) Total: 61 bytes Signed-off-by: Denys Vlasenko --- libbb/pw_encrypt.c | 11 +++++------ loginutils/cryptpw.c | 34 +++++++++++++++++++++++++++++----- 2 files changed, 34 insertions(+), 11 deletions(-) (limited to 'libbb') diff --git a/libbb/pw_encrypt.c b/libbb/pw_encrypt.c index 3b2fea00d..56191b00e 100644 --- a/libbb/pw_encrypt.c +++ b/libbb/pw_encrypt.c @@ -37,9 +37,8 @@ char* FAST_FUNC crypt_make_pw_salt(char salt[MAX_PW_SALT_LEN], const char *algo) #if !ENABLE_USE_BB_CRYPT || ENABLE_USE_BB_CRYPT_YES if ((algo[0]|0x20) == 'y') { /* yescrypt */ salt[1] = 'y'; - len = 24 / 2; + len = 22 / 2; // The "j9T$" below is the default "yescrypt parameters" encoded by yescrypt_encode_params_r(): -// //shadow-4.17.4/src/passwd.c // salt = crypt_make_salt(NULL, NULL); //shadow-4.17.4/lib/salt.c @@ -105,13 +104,13 @@ static char *my_crypt(const char *key, const char *salt) if (salt[0] == '$' && salt[1] && salt[2] == '$') { if (salt[1] == '1') return md5_crypt(xzalloc(MD5_OUT_BUFSIZE), (unsigned char*)key, (unsigned char*)salt); -#if ENABLE_USE_BB_CRYPT_YES - if (salt[1] == 'y') - return yes_crypt(key, salt); -#endif #if ENABLE_USE_BB_CRYPT_SHA if (salt[1] == '5' || salt[1] == '6') return sha_crypt((char*)key, (char*)salt); +#endif +#if ENABLE_USE_BB_CRYPT_YES + if (salt[1] == 'y') + return yes_crypt(key, salt); #endif } diff --git a/loginutils/cryptpw.c b/loginutils/cryptpw.c index c0f6280cd..666deff0b 100644 --- a/loginutils/cryptpw.c +++ b/loginutils/cryptpw.c @@ -99,7 +99,7 @@ int cryptpw_main(int argc UNUSED_PARAM, char **argv) ; #endif fd = STDIN_FILENO; - opt_m = CONFIG_FEATURE_DEFAULT_PASSWD_ALGO; + opt_m = NULL; opt_S = NULL; /* at most two non-option arguments; -P NUM */ getopt32long(argv, "^" "sP:+S:m:a:" "\0" "?2", @@ -113,10 +113,34 @@ int cryptpw_main(int argc UNUSED_PARAM, char **argv) if (argv[0] && !opt_S) opt_S = argv[1]; - salt_ptr = crypt_make_pw_salt(salt, opt_m); - if (opt_S) - /* put user's data after the "$N$" prefix */ - safe_strncpy(salt_ptr, opt_S, sizeof(salt) - (sizeof("$N$")-1)); + if (opt_S && !opt_S[0]) { + /* mkpasswd 5.6.2 compat: SALT of "" + * is treated as not specified + * (both forms: -S "" and argv[1] of "") + */ + opt_S = NULL; + } + + if (opt_m) { + /* "cryptpw -m ALGO PASSWORD [SALT]" */ + /* generate "$x$" algo prefix + random salt */ + salt_ptr = crypt_make_pw_salt(salt, opt_m); + if (opt_S) { + /* "cryptpw -m ALGO PASSWORD SALT" */ + /* put SALT data after the "$x$" prefix */ + safe_strncpy(salt_ptr, opt_S, sizeof(salt) - (sizeof("$N$")-1)); + } + } else { + if (!opt_S) { + /* "cryptpw PASSWORD" */ + /* generate random salt with default algo */ + crypt_make_pw_salt(salt, CONFIG_FEATURE_DEFAULT_PASSWD_ALGO); + } else { + /* "cryptpw PASSWORD '$x$SALT'" */ + /* use given salt; algo will be detected by pw_encrypt() */ + safe_strncpy(salt, opt_S, sizeof(salt)); + } + } xmove_fd(fd, STDIN_FILENO); -- cgit v1.2.3-55-g6feb From c6b9e763731545f8e7f3482a90128859496c2964 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Sat, 19 Jul 2025 18:42:32 +0200 Subject: libbb: crypt_make_pw_salt(): fix yescrypt's random salt last char function old new delta crypt_make_pw_salt 128 146 +18 Signed-off-by: Denys Vlasenko --- libbb/pw_encrypt.c | 14 +++++++++++--- miscutils/crond.c | 2 +- 2 files changed, 12 insertions(+), 4 deletions(-) (limited to 'libbb') diff --git a/libbb/pw_encrypt.c b/libbb/pw_encrypt.c index 56191b00e..93653de9f 100644 --- a/libbb/pw_encrypt.c +++ b/libbb/pw_encrypt.c @@ -17,7 +17,7 @@ char* FAST_FUNC crypt_make_pw_salt(char salt[MAX_PW_SALT_LEN], const char *algo) { - int len = 2/2; + int len = 2 / 2; char *salt_ptr = salt; /* Standard chpasswd uses uppercase algos ("MD5", not "md5"). @@ -36,8 +36,8 @@ char* FAST_FUNC crypt_make_pw_salt(char salt[MAX_PW_SALT_LEN], const char *algo) #endif #if !ENABLE_USE_BB_CRYPT || ENABLE_USE_BB_CRYPT_YES if ((algo[0]|0x20) == 'y') { /* yescrypt */ + int rnd; salt[1] = 'y'; - len = 22 / 2; // The "j9T$" below is the default "yescrypt parameters" encoded by yescrypt_encode_params_r(): //shadow-4.17.4/src/passwd.c // salt = crypt_make_salt(NULL, NULL); @@ -61,8 +61,16 @@ char* FAST_FUNC crypt_make_pw_salt(char salt[MAX_PW_SALT_LEN], const char *algo) // params.r = 32; // N in 4KiB // params.N = 1ULL << (count + 7); // 3 -> 1024, 4 -> 2048, ... 11 -> 262144 // yescrypt_encode_params_r(¶ms, rbytes, nrbytes, outbuf, o_size) // always "$y$j9T$" + len = 22 / 2; salt_ptr = stpcpy(salt_ptr, "j9T$"); - crypt_make_rand64encoded(salt_ptr, len); /* appends 2*len random chars */ + /* append 2*len random chars */ + rnd = crypt_make_rand64encoded(salt_ptr, len); + /* fix up last char: it must be in 0..3 range (encoded as one of "./01"). + * IOW: salt_ptr[20..21] encode 16th random byte, must not be > 0xff. + * Without this, we can generate salts which are rejected + * by implementations with more strict salt length check. + */ + salt_ptr[21] = i2a64(rnd & 3); /* For "mkpasswd -m yescrypt PASS j9T$" use case, * "j9T$" is considered part of salt, * need to return pointer to 'j'. Without -4, diff --git a/miscutils/crond.c b/miscutils/crond.c index b3762d327..96131cae4 100644 --- a/miscutils/crond.c +++ b/miscutils/crond.c @@ -177,7 +177,7 @@ static void crondlog(unsigned level, const char *msg, va_list va) { if (level >= G.log_level) { /* - * We are called only for info meesages. + * We are called only for info messages. * Warnings/errors use plain bb_[p]error_msg's, which * need not touch syslog_level * (they are ok with LOG_ERR default). -- cgit v1.2.3-55-g6feb From 77a49a61b2957f532c4a736fd3ddf1154aecc176 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Sun, 20 Jul 2025 08:40:31 +0200 Subject: libbb/yescrypt: de-unroll salsa20() function old new delta salsa20 760 296 -464 Signed-off-by: Denys Vlasenko --- libbb/yescrypt/alg-yescrypt-kdf.c | 96 ++++++++++++++++++++++++++++++--------- libbb/yescrypt/alg-yescrypt.h | 3 +- 2 files changed, 76 insertions(+), 23 deletions(-) (limited to 'libbb') diff --git a/libbb/yescrypt/alg-yescrypt-kdf.c b/libbb/yescrypt/alg-yescrypt-kdf.c index 01503c6e0..a9a1bd591 100644 --- a/libbb/yescrypt/alg-yescrypt-kdf.c +++ b/libbb/yescrypt/alg-yescrypt-kdf.c @@ -129,30 +129,82 @@ static void salsa20(salsa20_blk_t *restrict B, do { #define R(a,b) (((a) << (b)) | ((a) >> (32 - (b)))) /* Operate on columns */ - x[ 4] ^= R(x[ 0]+x[12], 7); x[ 8] ^= R(x[ 4]+x[ 0], 9); - x[12] ^= R(x[ 8]+x[ 4],13); x[ 0] ^= R(x[12]+x[ 8],18); - - x[ 9] ^= R(x[ 5]+x[ 1], 7); x[13] ^= R(x[ 9]+x[ 5], 9); - x[ 1] ^= R(x[13]+x[ 9],13); x[ 5] ^= R(x[ 1]+x[13],18); - - x[14] ^= R(x[10]+x[ 6], 7); x[ 2] ^= R(x[14]+x[10], 9); - x[ 6] ^= R(x[ 2]+x[14],13); x[10] ^= R(x[ 6]+x[ 2],18); - - x[ 3] ^= R(x[15]+x[11], 7); x[ 7] ^= R(x[ 3]+x[15], 9); - x[11] ^= R(x[ 7]+x[ 3],13); x[15] ^= R(x[11]+x[ 7],18); - +#if KDF_UNROLL_SALSA20 + x[ 4] ^= R(x[ 0]+x[12], 7); // x[j] ^= R(x[k]+x[l], CONST) + x[ 8] ^= R(x[ 4]+x[ 0], 9); + x[12] ^= R(x[ 8]+x[ 4],13); + x[ 0] ^= R(x[12]+x[ 8],18); + + x[ 9] ^= R(x[ 5]+x[ 1], 7); + x[13] ^= R(x[ 9]+x[ 5], 9); + x[ 1] ^= R(x[13]+x[ 9],13); + x[ 5] ^= R(x[ 1]+x[13],18); + + x[14] ^= R(x[10]+x[ 6], 7); + x[ 2] ^= R(x[14]+x[10], 9); + x[ 6] ^= R(x[ 2]+x[14],13); + x[10] ^= R(x[ 6]+x[ 2],18); + + x[ 3] ^= R(x[15]+x[11], 7); + x[ 7] ^= R(x[ 3]+x[15], 9); + x[11] ^= R(x[ 7]+x[ 3],13); + x[15] ^= R(x[11]+x[ 7],18); +#else + { + unsigned j, k, l; + j = 4; k = 0; l = 12; + for (;;) { + uint32_t t; + x[j] ^= ({ t = x[k] + x[l]; R(t, 7); }); l = k; k = j; j = (j+4) & 0xf; + x[j] ^= ({ t = x[k] + x[l]; R(t, 9); }); l = k; k = j; j = (j+4) & 0xf; + x[j] ^= ({ t = x[k] + x[l]; R(t,13); }); l = k; k = j; j = (j+4) & 0xf; + x[j] ^= ({ t = x[k] + x[l]; R(t,18); }); + if (j == 15) break; + l = j + 1; k = j + 5; j = (j+9) & 0xf; + } + } +#endif /* Operate on rows */ - x[ 1] ^= R(x[ 0]+x[ 3], 7); x[ 2] ^= R(x[ 1]+x[ 0], 9); - x[ 3] ^= R(x[ 2]+x[ 1],13); x[ 0] ^= R(x[ 3]+x[ 2],18); - - x[ 6] ^= R(x[ 5]+x[ 4], 7); x[ 7] ^= R(x[ 6]+x[ 5], 9); - x[ 4] ^= R(x[ 7]+x[ 6],13); x[ 5] ^= R(x[ 4]+x[ 7],18); - - x[11] ^= R(x[10]+x[ 9], 7); x[ 8] ^= R(x[11]+x[10], 9); - x[ 9] ^= R(x[ 8]+x[11],13); x[10] ^= R(x[ 9]+x[ 8],18); +#if KDF_UNROLL_SALSA20 +// i=0 n=0 + x[ 1] ^= R(x[ 0]+x[ 3], 7); // [i + (n+1)&3] [i + (n+0)&3] [i + (n+3)&3] + x[ 2] ^= R(x[ 1]+x[ 0], 9); // [i + (n+2)&3] [i + (n+1)&3] [i + (n+0)&3] + x[ 3] ^= R(x[ 2]+x[ 1],13); // [i + (n+3)&3] [i + (n+2)&3] [i + (n+1)&3] + x[ 0] ^= R(x[ 3]+x[ 2],18); // [i + (n+0)&3] [i + (n+3)&3] [i + (n+2)&3] +// i=4 n=1 ^^^j^^^ ^^^k^^^ ^^^l^^^ + x[ 6] ^= R(x[ 5]+x[ 4], 7); // [i + (n+1)&3] [i + (n+0)&3] [i + (n+3)&3] + x[ 7] ^= R(x[ 6]+x[ 5], 9); // [i + (n+2)&3] [i + (n+1)&3] [i + (n+0)&3] + x[ 4] ^= R(x[ 7]+x[ 6],13); // [i + (n+3)&3] [i + (n+2)&3] [i + (n+1)&3] + x[ 5] ^= R(x[ 4]+x[ 7],18); // [i + (n+0)&3] [i + (n+3)&3] [i + (n+2)&3] +// i=8 n=2 + x[11] ^= R(x[10]+x[ 9], 7); // [i + (n+1)&3] [i + (n+0)&3] [i + (n+3)&3] + x[ 8] ^= R(x[11]+x[10], 9); // [i + (n+2)&3] [i + (n+1)&3] [i + (n+0)&3] + x[ 9] ^= R(x[ 8]+x[11],13); // [i + (n+3)&3] [i + (n+2)&3] [i + (n+1)&3] + x[10] ^= R(x[ 9]+x[ 8],18); // [i + (n+0)&3] [i + (n+3)&3] [i + (n+2)&3] +// i=12 n=3 + x[12] ^= R(x[15]+x[14], 7); // [i + (n+1)&3] [i + (n+0)&3] [i + (n+3)&3] + x[13] ^= R(x[12]+x[15], 9); // [i + (n+2)&3] [i + (n+1)&3] [i + (n+0)&3] + x[14] ^= R(x[13]+x[12],13); // [i + (n+3)&3] [i + (n+2)&3] [i + (n+1)&3] + x[15] ^= R(x[14]+x[13],18); // [i + (n+0)&3] [i + (n+3)&3] [i + (n+2)&3] +#else + { + unsigned j, k, l; + uint32_t *xrow; + j = 1; k = 0; l = 3; + xrow = &x[0]; + for (;;) { + uint32_t t; + xrow[j] ^= ({ t = xrow[k] + xrow[l]; R(t, 7); }); l = k; k = j; j = (j+1) & 3; + xrow[j] ^= ({ t = xrow[k] + xrow[l]; R(t, 9); }); l = k; k = j; j = (j+1) & 3; + xrow[j] ^= ({ t = xrow[k] + xrow[l]; R(t,13); }); l = k; k = j; j = (j+1) & 3; + xrow[j] ^= ({ t = xrow[k] + xrow[l]; R(t,18); }); + if (j == 3) break; + l = j; k = j + 1; j = (j+2) & 3; + xrow += 4; + } + } +#endif - x[12] ^= R(x[15]+x[14], 7); x[13] ^= R(x[12]+x[15], 9); - x[14] ^= R(x[13]+x[12],13); x[15] ^= R(x[14]+x[13],18); #undef R } while (--doublerounds); #undef x diff --git a/libbb/yescrypt/alg-yescrypt.h b/libbb/yescrypt/alg-yescrypt.h index 5051efbb4..b69843f5d 100644 --- a/libbb/yescrypt/alg-yescrypt.h +++ b/libbb/yescrypt/alg-yescrypt.h @@ -44,7 +44,6 @@ # define TEST_DECODE64 0 #endif - // Only accept one-char parameters in salt, and only first three? // Almost any reasonable yescrypt hashes in /etc/shadow should // only ever use "jXY" parameters which set N and r. @@ -115,6 +114,8 @@ // -4864 bytes if 0: #define KDF_UNROLL_PWXFORM 0 // if both this ^^^^^^^^^^ and PWXFORM_ROUND set to 0: -7666 bytes +// -464 bytes: +#define KDF_UNROLL_SALSA20 0 /** * Type and possible values for the flags argument of yescrypt_kdf(), -- cgit v1.2.3-55-g6feb From d23ad559044af2f706d7b7a8ede05107b4c10cf7 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Mon, 21 Jul 2025 08:14:08 +0200 Subject: libbb: change define names in SHA-NI code to match kernel source more closely No code changes Signed-off-by: Denys Vlasenko --- libbb/hash_sha256_hwaccel_x86-32.S | 166 ++++++++++++++++++------------------- libbb/hash_sha256_hwaccel_x86-64.S | 166 ++++++++++++++++++------------------- 2 files changed, 166 insertions(+), 166 deletions(-) (limited to 'libbb') diff --git a/libbb/hash_sha256_hwaccel_x86-32.S b/libbb/hash_sha256_hwaccel_x86-32.S index a0e4a571a..332b7513f 100644 --- a/libbb/hash_sha256_hwaccel_x86-32.S +++ b/libbb/hash_sha256_hwaccel_x86-32.S @@ -34,10 +34,10 @@ #define MSG %xmm0 #define STATE0 %xmm1 #define STATE1 %xmm2 -#define MSGTMP0 %xmm3 -#define MSGTMP1 %xmm4 -#define MSGTMP2 %xmm5 -#define MSGTMP3 %xmm6 +#define MSG0 %xmm3 +#define MSG1 %xmm4 +#define MSG2 %xmm5 +#define MSG3 %xmm6 #define XMMTMP %xmm7 @@ -46,9 +46,9 @@ .balign 8 # allow decoders to fetch at least 2 first insns sha256_process_block64_shaNI: - movu128 76+0*16(%eax), XMMTMP /* ABCD (little-endian dword order) */ + movu128 76+0*16(%eax), XMMTMP /* ABCD (shown least-significant-dword-first) */ movu128 76+1*16(%eax), STATE1 /* EFGH */ -/* shufps takes dwords 0,1 from *2nd* operand, and dwords 2,3 from 1st one */ +/* shufps: dwords 0,1 of the result are selected from *2nd* operand, and dwords 2,3 from 1st operand */ mova128 STATE1, STATE0 /* --- -------------- ABCD -- EFGH */ shufps SHUF(1,0,1,0), XMMTMP, STATE0 /* FEBA */ @@ -61,7 +61,7 @@ sha256_process_block64_shaNI: /* Rounds 0-3 */ movu128 0*16(DATA_PTR), MSG pshufb XMMTMP, MSG - mova128 MSG, MSGTMP0 + mova128 MSG, MSG0 paddd 0*16-8*16(SHA256CONSTANTS), MSG sha256rnds2 MSG, STATE0, STATE1 shuf128_32 $0x0E, MSG, MSG @@ -70,170 +70,170 @@ sha256_process_block64_shaNI: /* Rounds 4-7 */ movu128 1*16(DATA_PTR), MSG pshufb XMMTMP, MSG - mova128 MSG, MSGTMP1 + mova128 MSG, MSG1 paddd 1*16-8*16(SHA256CONSTANTS), MSG sha256rnds2 MSG, STATE0, STATE1 shuf128_32 $0x0E, MSG, MSG sha256rnds2 MSG, STATE1, STATE0 - sha256msg1 MSGTMP1, MSGTMP0 + sha256msg1 MSG1, MSG0 /* Rounds 8-11 */ movu128 2*16(DATA_PTR), MSG pshufb XMMTMP, MSG - mova128 MSG, MSGTMP2 + mova128 MSG, MSG2 paddd 2*16-8*16(SHA256CONSTANTS), MSG sha256rnds2 MSG, STATE0, STATE1 shuf128_32 $0x0E, MSG, MSG sha256rnds2 MSG, STATE1, STATE0 - sha256msg1 MSGTMP2, MSGTMP1 + sha256msg1 MSG2, MSG1 /* Rounds 12-15 */ movu128 3*16(DATA_PTR), MSG pshufb XMMTMP, MSG /* ...to here */ - mova128 MSG, MSGTMP3 + mova128 MSG, MSG3 paddd 3*16-8*16(SHA256CONSTANTS), MSG sha256rnds2 MSG, STATE0, STATE1 - mova128 MSGTMP3, XMMTMP - palignr $4, MSGTMP2, XMMTMP - paddd XMMTMP, MSGTMP0 - sha256msg2 MSGTMP3, MSGTMP0 + mova128 MSG3, XMMTMP + palignr $4, MSG2, XMMTMP + paddd XMMTMP, MSG0 + sha256msg2 MSG3, MSG0 shuf128_32 $0x0E, MSG, MSG sha256rnds2 MSG, STATE1, STATE0 - sha256msg1 MSGTMP3, MSGTMP2 + sha256msg1 MSG3, MSG2 /* Rounds 16-19 */ - mova128 MSGTMP0, MSG + mova128 MSG0, MSG paddd 4*16-8*16(SHA256CONSTANTS), MSG sha256rnds2 MSG, STATE0, STATE1 - mova128 MSGTMP0, XMMTMP - palignr $4, MSGTMP3, XMMTMP - paddd XMMTMP, MSGTMP1 - sha256msg2 MSGTMP0, MSGTMP1 + mova128 MSG0, XMMTMP + palignr $4, MSG3, XMMTMP + paddd XMMTMP, MSG1 + sha256msg2 MSG0, MSG1 shuf128_32 $0x0E, MSG, MSG sha256rnds2 MSG, STATE1, STATE0 - sha256msg1 MSGTMP0, MSGTMP3 + sha256msg1 MSG0, MSG3 /* Rounds 20-23 */ - mova128 MSGTMP1, MSG + mova128 MSG1, MSG paddd 5*16-8*16(SHA256CONSTANTS), MSG sha256rnds2 MSG, STATE0, STATE1 - mova128 MSGTMP1, XMMTMP - palignr $4, MSGTMP0, XMMTMP - paddd XMMTMP, MSGTMP2 - sha256msg2 MSGTMP1, MSGTMP2 + mova128 MSG1, XMMTMP + palignr $4, MSG0, XMMTMP + paddd XMMTMP, MSG2 + sha256msg2 MSG1, MSG2 shuf128_32 $0x0E, MSG, MSG sha256rnds2 MSG, STATE1, STATE0 - sha256msg1 MSGTMP1, MSGTMP0 + sha256msg1 MSG1, MSG0 /* Rounds 24-27 */ - mova128 MSGTMP2, MSG + mova128 MSG2, MSG paddd 6*16-8*16(SHA256CONSTANTS), MSG sha256rnds2 MSG, STATE0, STATE1 - mova128 MSGTMP2, XMMTMP - palignr $4, MSGTMP1, XMMTMP - paddd XMMTMP, MSGTMP3 - sha256msg2 MSGTMP2, MSGTMP3 + mova128 MSG2, XMMTMP + palignr $4, MSG1, XMMTMP + paddd XMMTMP, MSG3 + sha256msg2 MSG2, MSG3 shuf128_32 $0x0E, MSG, MSG sha256rnds2 MSG, STATE1, STATE0 - sha256msg1 MSGTMP2, MSGTMP1 + sha256msg1 MSG2, MSG1 /* Rounds 28-31 */ - mova128 MSGTMP3, MSG + mova128 MSG3, MSG paddd 7*16-8*16(SHA256CONSTANTS), MSG sha256rnds2 MSG, STATE0, STATE1 - mova128 MSGTMP3, XMMTMP - palignr $4, MSGTMP2, XMMTMP - paddd XMMTMP, MSGTMP0 - sha256msg2 MSGTMP3, MSGTMP0 + mova128 MSG3, XMMTMP + palignr $4, MSG2, XMMTMP + paddd XMMTMP, MSG0 + sha256msg2 MSG3, MSG0 shuf128_32 $0x0E, MSG, MSG sha256rnds2 MSG, STATE1, STATE0 - sha256msg1 MSGTMP3, MSGTMP2 + sha256msg1 MSG3, MSG2 /* Rounds 32-35 */ - mova128 MSGTMP0, MSG + mova128 MSG0, MSG paddd 8*16-8*16(SHA256CONSTANTS), MSG sha256rnds2 MSG, STATE0, STATE1 - mova128 MSGTMP0, XMMTMP - palignr $4, MSGTMP3, XMMTMP - paddd XMMTMP, MSGTMP1 - sha256msg2 MSGTMP0, MSGTMP1 + mova128 MSG0, XMMTMP + palignr $4, MSG3, XMMTMP + paddd XMMTMP, MSG1 + sha256msg2 MSG0, MSG1 shuf128_32 $0x0E, MSG, MSG sha256rnds2 MSG, STATE1, STATE0 - sha256msg1 MSGTMP0, MSGTMP3 + sha256msg1 MSG0, MSG3 /* Rounds 36-39 */ - mova128 MSGTMP1, MSG + mova128 MSG1, MSG paddd 9*16-8*16(SHA256CONSTANTS), MSG sha256rnds2 MSG, STATE0, STATE1 - mova128 MSGTMP1, XMMTMP - palignr $4, MSGTMP0, XMMTMP - paddd XMMTMP, MSGTMP2 - sha256msg2 MSGTMP1, MSGTMP2 + mova128 MSG1, XMMTMP + palignr $4, MSG0, XMMTMP + paddd XMMTMP, MSG2 + sha256msg2 MSG1, MSG2 shuf128_32 $0x0E, MSG, MSG sha256rnds2 MSG, STATE1, STATE0 - sha256msg1 MSGTMP1, MSGTMP0 + sha256msg1 MSG1, MSG0 /* Rounds 40-43 */ - mova128 MSGTMP2, MSG + mova128 MSG2, MSG paddd 10*16-8*16(SHA256CONSTANTS), MSG sha256rnds2 MSG, STATE0, STATE1 - mova128 MSGTMP2, XMMTMP - palignr $4, MSGTMP1, XMMTMP - paddd XMMTMP, MSGTMP3 - sha256msg2 MSGTMP2, MSGTMP3 + mova128 MSG2, XMMTMP + palignr $4, MSG1, XMMTMP + paddd XMMTMP, MSG3 + sha256msg2 MSG2, MSG3 shuf128_32 $0x0E, MSG, MSG sha256rnds2 MSG, STATE1, STATE0 - sha256msg1 MSGTMP2, MSGTMP1 + sha256msg1 MSG2, MSG1 /* Rounds 44-47 */ - mova128 MSGTMP3, MSG + mova128 MSG3, MSG paddd 11*16-8*16(SHA256CONSTANTS), MSG sha256rnds2 MSG, STATE0, STATE1 - mova128 MSGTMP3, XMMTMP - palignr $4, MSGTMP2, XMMTMP - paddd XMMTMP, MSGTMP0 - sha256msg2 MSGTMP3, MSGTMP0 + mova128 MSG3, XMMTMP + palignr $4, MSG2, XMMTMP + paddd XMMTMP, MSG0 + sha256msg2 MSG3, MSG0 shuf128_32 $0x0E, MSG, MSG sha256rnds2 MSG, STATE1, STATE0 - sha256msg1 MSGTMP3, MSGTMP2 + sha256msg1 MSG3, MSG2 /* Rounds 48-51 */ - mova128 MSGTMP0, MSG + mova128 MSG0, MSG paddd 12*16-8*16(SHA256CONSTANTS), MSG sha256rnds2 MSG, STATE0, STATE1 - mova128 MSGTMP0, XMMTMP - palignr $4, MSGTMP3, XMMTMP - paddd XMMTMP, MSGTMP1 - sha256msg2 MSGTMP0, MSGTMP1 + mova128 MSG0, XMMTMP + palignr $4, MSG3, XMMTMP + paddd XMMTMP, MSG1 + sha256msg2 MSG0, MSG1 shuf128_32 $0x0E, MSG, MSG sha256rnds2 MSG, STATE1, STATE0 - sha256msg1 MSGTMP0, MSGTMP3 + sha256msg1 MSG0, MSG3 /* Rounds 52-55 */ - mova128 MSGTMP1, MSG + mova128 MSG1, MSG paddd 13*16-8*16(SHA256CONSTANTS), MSG sha256rnds2 MSG, STATE0, STATE1 - mova128 MSGTMP1, XMMTMP - palignr $4, MSGTMP0, XMMTMP - paddd XMMTMP, MSGTMP2 - sha256msg2 MSGTMP1, MSGTMP2 + mova128 MSG1, XMMTMP + palignr $4, MSG0, XMMTMP + paddd XMMTMP, MSG2 + sha256msg2 MSG1, MSG2 shuf128_32 $0x0E, MSG, MSG sha256rnds2 MSG, STATE1, STATE0 /* Rounds 56-59 */ - mova128 MSGTMP2, MSG + mova128 MSG2, MSG paddd 14*16-8*16(SHA256CONSTANTS), MSG sha256rnds2 MSG, STATE0, STATE1 - mova128 MSGTMP2, XMMTMP - palignr $4, MSGTMP1, XMMTMP - paddd XMMTMP, MSGTMP3 - sha256msg2 MSGTMP2, MSGTMP3 + mova128 MSG2, XMMTMP + palignr $4, MSG1, XMMTMP + paddd XMMTMP, MSG3 + sha256msg2 MSG2, MSG3 shuf128_32 $0x0E, MSG, MSG sha256rnds2 MSG, STATE1, STATE0 /* Rounds 60-63 */ - mova128 MSGTMP3, MSG + mova128 MSG3, MSG paddd 15*16-8*16(SHA256CONSTANTS), MSG sha256rnds2 MSG, STATE0, STATE1 shuf128_32 $0x0E, MSG, MSG @@ -241,7 +241,7 @@ sha256_process_block64_shaNI: /* Write hash values back in the correct order */ mova128 STATE0, XMMTMP -/* shufps takes dwords 0,1 from *2nd* operand, and dwords 2,3 from 1st one */ +/* shufps: dwords 0,1 of the result are selected from *2nd* operand, and dwords 2,3 from 1st operand */ /* --- -------------- HGDC -- FEBA */ shufps SHUF(3,2,3,2), STATE1, STATE0 /* ABCD */ shufps SHUF(1,0,1,0), STATE1, XMMTMP /* EFGH */ diff --git a/libbb/hash_sha256_hwaccel_x86-64.S b/libbb/hash_sha256_hwaccel_x86-64.S index 172c2eae2..f8911968b 100644 --- a/libbb/hash_sha256_hwaccel_x86-64.S +++ b/libbb/hash_sha256_hwaccel_x86-64.S @@ -34,10 +34,10 @@ #define MSG %xmm0 #define STATE0 %xmm1 #define STATE1 %xmm2 -#define MSGTMP0 %xmm3 -#define MSGTMP1 %xmm4 -#define MSGTMP2 %xmm5 -#define MSGTMP3 %xmm6 +#define MSG0 %xmm3 +#define MSG1 %xmm4 +#define MSG2 %xmm5 +#define MSG3 %xmm6 #define XMMTMP %xmm7 @@ -49,9 +49,9 @@ .balign 8 # allow decoders to fetch at least 2 first insns sha256_process_block64_shaNI: - movu128 80+0*16(%rdi), XMMTMP /* ABCD (little-endian dword order) */ + movu128 80+0*16(%rdi), XMMTMP /* ABCD (shown least-significant-dword-first) */ movu128 80+1*16(%rdi), STATE1 /* EFGH */ -/* shufps takes dwords 0,1 from *2nd* operand, and dwords 2,3 from 1st one */ +/* shufps: dwords 0,1 of the result are selected from *2nd* operand, and dwords 2,3 from 1st operand */ mova128 STATE1, STATE0 /* --- -------------- ABCD -- EFGH */ shufps SHUF(1,0,1,0), XMMTMP, STATE0 /* FEBA */ @@ -68,7 +68,7 @@ sha256_process_block64_shaNI: /* Rounds 0-3 */ movu128 0*16(DATA_PTR), MSG pshufb XMMTMP, MSG - mova128 MSG, MSGTMP0 + mova128 MSG, MSG0 paddd 0*16-8*16(SHA256CONSTANTS), MSG sha256rnds2 MSG, STATE0, STATE1 shuf128_32 $0x0E, MSG, MSG @@ -77,170 +77,170 @@ sha256_process_block64_shaNI: /* Rounds 4-7 */ movu128 1*16(DATA_PTR), MSG pshufb XMMTMP, MSG - mova128 MSG, MSGTMP1 + mova128 MSG, MSG1 paddd 1*16-8*16(SHA256CONSTANTS), MSG sha256rnds2 MSG, STATE0, STATE1 shuf128_32 $0x0E, MSG, MSG sha256rnds2 MSG, STATE1, STATE0 - sha256msg1 MSGTMP1, MSGTMP0 + sha256msg1 MSG1, MSG0 /* Rounds 8-11 */ movu128 2*16(DATA_PTR), MSG pshufb XMMTMP, MSG - mova128 MSG, MSGTMP2 + mova128 MSG, MSG2 paddd 2*16-8*16(SHA256CONSTANTS), MSG sha256rnds2 MSG, STATE0, STATE1 shuf128_32 $0x0E, MSG, MSG sha256rnds2 MSG, STATE1, STATE0 - sha256msg1 MSGTMP2, MSGTMP1 + sha256msg1 MSG2, MSG1 /* Rounds 12-15 */ movu128 3*16(DATA_PTR), MSG pshufb XMMTMP, MSG /* ...to here */ - mova128 MSG, MSGTMP3 + mova128 MSG, MSG3 paddd 3*16-8*16(SHA256CONSTANTS), MSG sha256rnds2 MSG, STATE0, STATE1 - mova128 MSGTMP3, XMMTMP - palignr $4, MSGTMP2, XMMTMP - paddd XMMTMP, MSGTMP0 - sha256msg2 MSGTMP3, MSGTMP0 + mova128 MSG3, XMMTMP + palignr $4, MSG2, XMMTMP + paddd XMMTMP, MSG0 + sha256msg2 MSG3, MSG0 shuf128_32 $0x0E, MSG, MSG sha256rnds2 MSG, STATE1, STATE0 - sha256msg1 MSGTMP3, MSGTMP2 + sha256msg1 MSG3, MSG2 /* Rounds 16-19 */ - mova128 MSGTMP0, MSG + mova128 MSG0, MSG paddd 4*16-8*16(SHA256CONSTANTS), MSG sha256rnds2 MSG, STATE0, STATE1 - mova128 MSGTMP0, XMMTMP - palignr $4, MSGTMP3, XMMTMP - paddd XMMTMP, MSGTMP1 - sha256msg2 MSGTMP0, MSGTMP1 + mova128 MSG0, XMMTMP + palignr $4, MSG3, XMMTMP + paddd XMMTMP, MSG1 + sha256msg2 MSG0, MSG1 shuf128_32 $0x0E, MSG, MSG sha256rnds2 MSG, STATE1, STATE0 - sha256msg1 MSGTMP0, MSGTMP3 + sha256msg1 MSG0, MSG3 /* Rounds 20-23 */ - mova128 MSGTMP1, MSG + mova128 MSG1, MSG paddd 5*16-8*16(SHA256CONSTANTS), MSG sha256rnds2 MSG, STATE0, STATE1 - mova128 MSGTMP1, XMMTMP - palignr $4, MSGTMP0, XMMTMP - paddd XMMTMP, MSGTMP2 - sha256msg2 MSGTMP1, MSGTMP2 + mova128 MSG1, XMMTMP + palignr $4, MSG0, XMMTMP + paddd XMMTMP, MSG2 + sha256msg2 MSG1, MSG2 shuf128_32 $0x0E, MSG, MSG sha256rnds2 MSG, STATE1, STATE0 - sha256msg1 MSGTMP1, MSGTMP0 + sha256msg1 MSG1, MSG0 /* Rounds 24-27 */ - mova128 MSGTMP2, MSG + mova128 MSG2, MSG paddd 6*16-8*16(SHA256CONSTANTS), MSG sha256rnds2 MSG, STATE0, STATE1 - mova128 MSGTMP2, XMMTMP - palignr $4, MSGTMP1, XMMTMP - paddd XMMTMP, MSGTMP3 - sha256msg2 MSGTMP2, MSGTMP3 + mova128 MSG2, XMMTMP + palignr $4, MSG1, XMMTMP + paddd XMMTMP, MSG3 + sha256msg2 MSG2, MSG3 shuf128_32 $0x0E, MSG, MSG sha256rnds2 MSG, STATE1, STATE0 - sha256msg1 MSGTMP2, MSGTMP1 + sha256msg1 MSG2, MSG1 /* Rounds 28-31 */ - mova128 MSGTMP3, MSG + mova128 MSG3, MSG paddd 7*16-8*16(SHA256CONSTANTS), MSG sha256rnds2 MSG, STATE0, STATE1 - mova128 MSGTMP3, XMMTMP - palignr $4, MSGTMP2, XMMTMP - paddd XMMTMP, MSGTMP0 - sha256msg2 MSGTMP3, MSGTMP0 + mova128 MSG3, XMMTMP + palignr $4, MSG2, XMMTMP + paddd XMMTMP, MSG0 + sha256msg2 MSG3, MSG0 shuf128_32 $0x0E, MSG, MSG sha256rnds2 MSG, STATE1, STATE0 - sha256msg1 MSGTMP3, MSGTMP2 + sha256msg1 MSG3, MSG2 /* Rounds 32-35 */ - mova128 MSGTMP0, MSG + mova128 MSG0, MSG paddd 8*16-8*16(SHA256CONSTANTS), MSG sha256rnds2 MSG, STATE0, STATE1 - mova128 MSGTMP0, XMMTMP - palignr $4, MSGTMP3, XMMTMP - paddd XMMTMP, MSGTMP1 - sha256msg2 MSGTMP0, MSGTMP1 + mova128 MSG0, XMMTMP + palignr $4, MSG3, XMMTMP + paddd XMMTMP, MSG1 + sha256msg2 MSG0, MSG1 shuf128_32 $0x0E, MSG, MSG sha256rnds2 MSG, STATE1, STATE0 - sha256msg1 MSGTMP0, MSGTMP3 + sha256msg1 MSG0, MSG3 /* Rounds 36-39 */ - mova128 MSGTMP1, MSG + mova128 MSG1, MSG paddd 9*16-8*16(SHA256CONSTANTS), MSG sha256rnds2 MSG, STATE0, STATE1 - mova128 MSGTMP1, XMMTMP - palignr $4, MSGTMP0, XMMTMP - paddd XMMTMP, MSGTMP2 - sha256msg2 MSGTMP1, MSGTMP2 + mova128 MSG1, XMMTMP + palignr $4, MSG0, XMMTMP + paddd XMMTMP, MSG2 + sha256msg2 MSG1, MSG2 shuf128_32 $0x0E, MSG, MSG sha256rnds2 MSG, STATE1, STATE0 - sha256msg1 MSGTMP1, MSGTMP0 + sha256msg1 MSG1, MSG0 /* Rounds 40-43 */ - mova128 MSGTMP2, MSG + mova128 MSG2, MSG paddd 10*16-8*16(SHA256CONSTANTS), MSG sha256rnds2 MSG, STATE0, STATE1 - mova128 MSGTMP2, XMMTMP - palignr $4, MSGTMP1, XMMTMP - paddd XMMTMP, MSGTMP3 - sha256msg2 MSGTMP2, MSGTMP3 + mova128 MSG2, XMMTMP + palignr $4, MSG1, XMMTMP + paddd XMMTMP, MSG3 + sha256msg2 MSG2, MSG3 shuf128_32 $0x0E, MSG, MSG sha256rnds2 MSG, STATE1, STATE0 - sha256msg1 MSGTMP2, MSGTMP1 + sha256msg1 MSG2, MSG1 /* Rounds 44-47 */ - mova128 MSGTMP3, MSG + mova128 MSG3, MSG paddd 11*16-8*16(SHA256CONSTANTS), MSG sha256rnds2 MSG, STATE0, STATE1 - mova128 MSGTMP3, XMMTMP - palignr $4, MSGTMP2, XMMTMP - paddd XMMTMP, MSGTMP0 - sha256msg2 MSGTMP3, MSGTMP0 + mova128 MSG3, XMMTMP + palignr $4, MSG2, XMMTMP + paddd XMMTMP, MSG0 + sha256msg2 MSG3, MSG0 shuf128_32 $0x0E, MSG, MSG sha256rnds2 MSG, STATE1, STATE0 - sha256msg1 MSGTMP3, MSGTMP2 + sha256msg1 MSG3, MSG2 /* Rounds 48-51 */ - mova128 MSGTMP0, MSG + mova128 MSG0, MSG paddd 12*16-8*16(SHA256CONSTANTS), MSG sha256rnds2 MSG, STATE0, STATE1 - mova128 MSGTMP0, XMMTMP - palignr $4, MSGTMP3, XMMTMP - paddd XMMTMP, MSGTMP1 - sha256msg2 MSGTMP0, MSGTMP1 + mova128 MSG0, XMMTMP + palignr $4, MSG3, XMMTMP + paddd XMMTMP, MSG1 + sha256msg2 MSG0, MSG1 shuf128_32 $0x0E, MSG, MSG sha256rnds2 MSG, STATE1, STATE0 - sha256msg1 MSGTMP0, MSGTMP3 + sha256msg1 MSG0, MSG3 /* Rounds 52-55 */ - mova128 MSGTMP1, MSG + mova128 MSG1, MSG paddd 13*16-8*16(SHA256CONSTANTS), MSG sha256rnds2 MSG, STATE0, STATE1 - mova128 MSGTMP1, XMMTMP - palignr $4, MSGTMP0, XMMTMP - paddd XMMTMP, MSGTMP2 - sha256msg2 MSGTMP1, MSGTMP2 + mova128 MSG1, XMMTMP + palignr $4, MSG0, XMMTMP + paddd XMMTMP, MSG2 + sha256msg2 MSG1, MSG2 shuf128_32 $0x0E, MSG, MSG sha256rnds2 MSG, STATE1, STATE0 /* Rounds 56-59 */ - mova128 MSGTMP2, MSG + mova128 MSG2, MSG paddd 14*16-8*16(SHA256CONSTANTS), MSG sha256rnds2 MSG, STATE0, STATE1 - mova128 MSGTMP2, XMMTMP - palignr $4, MSGTMP1, XMMTMP - paddd XMMTMP, MSGTMP3 - sha256msg2 MSGTMP2, MSGTMP3 + mova128 MSG2, XMMTMP + palignr $4, MSG1, XMMTMP + paddd XMMTMP, MSG3 + sha256msg2 MSG2, MSG3 shuf128_32 $0x0E, MSG, MSG sha256rnds2 MSG, STATE1, STATE0 /* Rounds 60-63 */ - mova128 MSGTMP3, MSG + mova128 MSG3, MSG paddd 15*16-8*16(SHA256CONSTANTS), MSG sha256rnds2 MSG, STATE0, STATE1 shuf128_32 $0x0E, MSG, MSG @@ -252,7 +252,7 @@ sha256_process_block64_shaNI: /* Write hash values back in the correct order */ mova128 STATE0, XMMTMP -/* shufps takes dwords 0,1 from *2nd* operand, and dwords 2,3 from 1st one */ +/* shufps: dwords 0,1 of the result are selected from *2nd* operand, and dwords 2,3 from 1st operand */ /* --- -------------- HGDC -- FEBA */ shufps SHUF(3,2,3,2), STATE1, STATE0 /* ABCD */ shufps SHUF(1,0,1,0), STATE1, XMMTMP /* EFGH */ -- cgit v1.2.3-55-g6feb From ed22c5bd4a537f016321a888743a8b12f6bb15a9 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Tue, 22 Jul 2025 00:03:02 +0200 Subject: libbb: SHA-NI code shrink function old new delta sha256_process_block64_shaNI 673 657 -16 Signed-off-by: Denys Vlasenko --- libbb/hash_sha256_hwaccel_x86-32.S | 50 +++++++++++++++++++++++++------------- libbb/hash_sha256_hwaccel_x86-64.S | 50 +++++++++++++++++++++++++------------- 2 files changed, 66 insertions(+), 34 deletions(-) (limited to 'libbb') diff --git a/libbb/hash_sha256_hwaccel_x86-32.S b/libbb/hash_sha256_hwaccel_x86-32.S index 332b7513f..6362ae382 100644 --- a/libbb/hash_sha256_hwaccel_x86-32.S +++ b/libbb/hash_sha256_hwaccel_x86-32.S @@ -41,7 +41,7 @@ #define XMMTMP %xmm7 -#define SHUF(a,b,c,d) $(a+(b<<2)+(c<<4)+(d<<6)) +#define SHUF(a,b,c,d) $((a)+((b)<<2)+((c)<<4)+((d)<<6)) .balign 8 # allow decoders to fetch at least 2 first insns sha256_process_block64_shaNI: @@ -58,13 +58,29 @@ sha256_process_block64_shaNI: mova128 PSHUFFLE_BSWAP32_FLIP_MASK, XMMTMP movl $K256+8*16, SHA256CONSTANTS +// sha256rnds2 instruction uses only lower 64 bits of MSG. +// The code below needs to move upper 64 bits to lower 64 bits +// for the second sha256rnds2 invocation +// (what remains in upper bits does not matter). +// There are several ways to do it: +// movhlps MSG, MSG // abcd -> cdcd (3 bytes of code) +// shuf128_32 SHUF(2,3,n,n), MSG, MSG // abcd -> cdXX (4 bytes) +// punpckhqdq MSG, MSG // abcd -> cdcd (4 bytes) +// psrldq $8, MSG // abcd -> cd00 (5 bytes) +// palignr $8, MSG, MSG // abcd -> cdab (6 bytes, SSSE3 insn) +#define MOVE_UPPER64_DOWN(reg) movhlps reg, reg +//#define MOVE_UPPER64_DOWN(reg) shuf128_32 SHUF(2,3,0,0), reg, reg +//#define MOVE_UPPER64_DOWN(reg) punpckhqdq reg, reg +//#define MOVE_UPPER64_DOWN(reg) psrldq $8, reg +//#define MOVE_UPPER64_DOWN(reg) palignr $8, reg, reg + /* Rounds 0-3 */ movu128 0*16(DATA_PTR), MSG pshufb XMMTMP, MSG mova128 MSG, MSG0 paddd 0*16-8*16(SHA256CONSTANTS), MSG sha256rnds2 MSG, STATE0, STATE1 - shuf128_32 $0x0E, MSG, MSG + MOVE_UPPER64_DOWN(MSG) sha256rnds2 MSG, STATE1, STATE0 /* Rounds 4-7 */ @@ -73,7 +89,7 @@ sha256_process_block64_shaNI: mova128 MSG, MSG1 paddd 1*16-8*16(SHA256CONSTANTS), MSG sha256rnds2 MSG, STATE0, STATE1 - shuf128_32 $0x0E, MSG, MSG + MOVE_UPPER64_DOWN(MSG) sha256rnds2 MSG, STATE1, STATE0 sha256msg1 MSG1, MSG0 @@ -83,7 +99,7 @@ sha256_process_block64_shaNI: mova128 MSG, MSG2 paddd 2*16-8*16(SHA256CONSTANTS), MSG sha256rnds2 MSG, STATE0, STATE1 - shuf128_32 $0x0E, MSG, MSG + MOVE_UPPER64_DOWN(MSG) sha256rnds2 MSG, STATE1, STATE0 sha256msg1 MSG2, MSG1 @@ -98,7 +114,7 @@ sha256_process_block64_shaNI: palignr $4, MSG2, XMMTMP paddd XMMTMP, MSG0 sha256msg2 MSG3, MSG0 - shuf128_32 $0x0E, MSG, MSG + MOVE_UPPER64_DOWN(MSG) sha256rnds2 MSG, STATE1, STATE0 sha256msg1 MSG3, MSG2 @@ -110,7 +126,7 @@ sha256_process_block64_shaNI: palignr $4, MSG3, XMMTMP paddd XMMTMP, MSG1 sha256msg2 MSG0, MSG1 - shuf128_32 $0x0E, MSG, MSG + MOVE_UPPER64_DOWN(MSG) sha256rnds2 MSG, STATE1, STATE0 sha256msg1 MSG0, MSG3 @@ -122,7 +138,7 @@ sha256_process_block64_shaNI: palignr $4, MSG0, XMMTMP paddd XMMTMP, MSG2 sha256msg2 MSG1, MSG2 - shuf128_32 $0x0E, MSG, MSG + MOVE_UPPER64_DOWN(MSG) sha256rnds2 MSG, STATE1, STATE0 sha256msg1 MSG1, MSG0 @@ -134,7 +150,7 @@ sha256_process_block64_shaNI: palignr $4, MSG1, XMMTMP paddd XMMTMP, MSG3 sha256msg2 MSG2, MSG3 - shuf128_32 $0x0E, MSG, MSG + MOVE_UPPER64_DOWN(MSG) sha256rnds2 MSG, STATE1, STATE0 sha256msg1 MSG2, MSG1 @@ -146,7 +162,7 @@ sha256_process_block64_shaNI: palignr $4, MSG2, XMMTMP paddd XMMTMP, MSG0 sha256msg2 MSG3, MSG0 - shuf128_32 $0x0E, MSG, MSG + MOVE_UPPER64_DOWN(MSG) sha256rnds2 MSG, STATE1, STATE0 sha256msg1 MSG3, MSG2 @@ -158,7 +174,7 @@ sha256_process_block64_shaNI: palignr $4, MSG3, XMMTMP paddd XMMTMP, MSG1 sha256msg2 MSG0, MSG1 - shuf128_32 $0x0E, MSG, MSG + MOVE_UPPER64_DOWN(MSG) sha256rnds2 MSG, STATE1, STATE0 sha256msg1 MSG0, MSG3 @@ -170,7 +186,7 @@ sha256_process_block64_shaNI: palignr $4, MSG0, XMMTMP paddd XMMTMP, MSG2 sha256msg2 MSG1, MSG2 - shuf128_32 $0x0E, MSG, MSG + MOVE_UPPER64_DOWN(MSG) sha256rnds2 MSG, STATE1, STATE0 sha256msg1 MSG1, MSG0 @@ -182,7 +198,7 @@ sha256_process_block64_shaNI: palignr $4, MSG1, XMMTMP paddd XMMTMP, MSG3 sha256msg2 MSG2, MSG3 - shuf128_32 $0x0E, MSG, MSG + MOVE_UPPER64_DOWN(MSG) sha256rnds2 MSG, STATE1, STATE0 sha256msg1 MSG2, MSG1 @@ -194,7 +210,7 @@ sha256_process_block64_shaNI: palignr $4, MSG2, XMMTMP paddd XMMTMP, MSG0 sha256msg2 MSG3, MSG0 - shuf128_32 $0x0E, MSG, MSG + MOVE_UPPER64_DOWN(MSG) sha256rnds2 MSG, STATE1, STATE0 sha256msg1 MSG3, MSG2 @@ -206,7 +222,7 @@ sha256_process_block64_shaNI: palignr $4, MSG3, XMMTMP paddd XMMTMP, MSG1 sha256msg2 MSG0, MSG1 - shuf128_32 $0x0E, MSG, MSG + MOVE_UPPER64_DOWN(MSG) sha256rnds2 MSG, STATE1, STATE0 sha256msg1 MSG0, MSG3 @@ -218,7 +234,7 @@ sha256_process_block64_shaNI: palignr $4, MSG0, XMMTMP paddd XMMTMP, MSG2 sha256msg2 MSG1, MSG2 - shuf128_32 $0x0E, MSG, MSG + MOVE_UPPER64_DOWN(MSG) sha256rnds2 MSG, STATE1, STATE0 /* Rounds 56-59 */ @@ -229,14 +245,14 @@ sha256_process_block64_shaNI: palignr $4, MSG1, XMMTMP paddd XMMTMP, MSG3 sha256msg2 MSG2, MSG3 - shuf128_32 $0x0E, MSG, MSG + MOVE_UPPER64_DOWN(MSG) sha256rnds2 MSG, STATE1, STATE0 /* Rounds 60-63 */ mova128 MSG3, MSG paddd 15*16-8*16(SHA256CONSTANTS), MSG sha256rnds2 MSG, STATE0, STATE1 - shuf128_32 $0x0E, MSG, MSG + MOVE_UPPER64_DOWN(MSG) sha256rnds2 MSG, STATE1, STATE0 /* Write hash values back in the correct order */ diff --git a/libbb/hash_sha256_hwaccel_x86-64.S b/libbb/hash_sha256_hwaccel_x86-64.S index f8911968b..92f00ebcd 100644 --- a/libbb/hash_sha256_hwaccel_x86-64.S +++ b/libbb/hash_sha256_hwaccel_x86-64.S @@ -44,7 +44,7 @@ #define SAVE0 %xmm8 #define SAVE1 %xmm9 -#define SHUF(a,b,c,d) $(a+(b<<2)+(c<<4)+(d<<6)) +#define SHUF(a,b,c,d) $((a)+((b)<<2)+((c)<<4)+((d)<<6)) .balign 8 # allow decoders to fetch at least 2 first insns sha256_process_block64_shaNI: @@ -65,13 +65,29 @@ sha256_process_block64_shaNI: mova128 STATE0, SAVE0 mova128 STATE1, SAVE1 +// sha256rnds2 instruction uses only lower 64 bits of MSG. +// The code below needs to move upper 64 bits to lower 64 bits +// for the second sha256rnds2 invocation +// (what remains in upper bits does not matter). +// There are several ways to do it: +// movhlps MSG, MSG // abcd -> cdcd (3 bytes of code) +// shuf128_32 SHUF(2,3,n,n), MSG, MSG // abcd -> cdXX (4 bytes) +// punpckhqdq MSG, MSG // abcd -> cdcd (4 bytes) +// psrldq $8, MSG // abcd -> cd00 (5 bytes) +// palignr $8, MSG, MSG // abcd -> cdab (6 bytes, SSSE3 insn) +#define MOVE_UPPER64_DOWN(reg) movhlps reg, reg +//#define MOVE_UPPER64_DOWN(reg) shuf128_32 SHUF(2,3,0,0), reg, reg +//#define MOVE_UPPER64_DOWN(reg) punpckhqdq reg, reg +//#define MOVE_UPPER64_DOWN(reg) psrldq $8, reg +//#define MOVE_UPPER64_DOWN(reg) palignr $8, reg, reg + /* Rounds 0-3 */ movu128 0*16(DATA_PTR), MSG pshufb XMMTMP, MSG mova128 MSG, MSG0 paddd 0*16-8*16(SHA256CONSTANTS), MSG sha256rnds2 MSG, STATE0, STATE1 - shuf128_32 $0x0E, MSG, MSG + MOVE_UPPER64_DOWN(MSG) sha256rnds2 MSG, STATE1, STATE0 /* Rounds 4-7 */ @@ -80,7 +96,7 @@ sha256_process_block64_shaNI: mova128 MSG, MSG1 paddd 1*16-8*16(SHA256CONSTANTS), MSG sha256rnds2 MSG, STATE0, STATE1 - shuf128_32 $0x0E, MSG, MSG + MOVE_UPPER64_DOWN(MSG) sha256rnds2 MSG, STATE1, STATE0 sha256msg1 MSG1, MSG0 @@ -90,7 +106,7 @@ sha256_process_block64_shaNI: mova128 MSG, MSG2 paddd 2*16-8*16(SHA256CONSTANTS), MSG sha256rnds2 MSG, STATE0, STATE1 - shuf128_32 $0x0E, MSG, MSG + MOVE_UPPER64_DOWN(MSG) sha256rnds2 MSG, STATE1, STATE0 sha256msg1 MSG2, MSG1 @@ -105,7 +121,7 @@ sha256_process_block64_shaNI: palignr $4, MSG2, XMMTMP paddd XMMTMP, MSG0 sha256msg2 MSG3, MSG0 - shuf128_32 $0x0E, MSG, MSG + MOVE_UPPER64_DOWN(MSG) sha256rnds2 MSG, STATE1, STATE0 sha256msg1 MSG3, MSG2 @@ -117,7 +133,7 @@ sha256_process_block64_shaNI: palignr $4, MSG3, XMMTMP paddd XMMTMP, MSG1 sha256msg2 MSG0, MSG1 - shuf128_32 $0x0E, MSG, MSG + MOVE_UPPER64_DOWN(MSG) sha256rnds2 MSG, STATE1, STATE0 sha256msg1 MSG0, MSG3 @@ -129,7 +145,7 @@ sha256_process_block64_shaNI: palignr $4, MSG0, XMMTMP paddd XMMTMP, MSG2 sha256msg2 MSG1, MSG2 - shuf128_32 $0x0E, MSG, MSG + MOVE_UPPER64_DOWN(MSG) sha256rnds2 MSG, STATE1, STATE0 sha256msg1 MSG1, MSG0 @@ -141,7 +157,7 @@ sha256_process_block64_shaNI: palignr $4, MSG1, XMMTMP paddd XMMTMP, MSG3 sha256msg2 MSG2, MSG3 - shuf128_32 $0x0E, MSG, MSG + MOVE_UPPER64_DOWN(MSG) sha256rnds2 MSG, STATE1, STATE0 sha256msg1 MSG2, MSG1 @@ -153,7 +169,7 @@ sha256_process_block64_shaNI: palignr $4, MSG2, XMMTMP paddd XMMTMP, MSG0 sha256msg2 MSG3, MSG0 - shuf128_32 $0x0E, MSG, MSG + MOVE_UPPER64_DOWN(MSG) sha256rnds2 MSG, STATE1, STATE0 sha256msg1 MSG3, MSG2 @@ -165,7 +181,7 @@ sha256_process_block64_shaNI: palignr $4, MSG3, XMMTMP paddd XMMTMP, MSG1 sha256msg2 MSG0, MSG1 - shuf128_32 $0x0E, MSG, MSG + MOVE_UPPER64_DOWN(MSG) sha256rnds2 MSG, STATE1, STATE0 sha256msg1 MSG0, MSG3 @@ -177,7 +193,7 @@ sha256_process_block64_shaNI: palignr $4, MSG0, XMMTMP paddd XMMTMP, MSG2 sha256msg2 MSG1, MSG2 - shuf128_32 $0x0E, MSG, MSG + MOVE_UPPER64_DOWN(MSG) sha256rnds2 MSG, STATE1, STATE0 sha256msg1 MSG1, MSG0 @@ -189,7 +205,7 @@ sha256_process_block64_shaNI: palignr $4, MSG1, XMMTMP paddd XMMTMP, MSG3 sha256msg2 MSG2, MSG3 - shuf128_32 $0x0E, MSG, MSG + MOVE_UPPER64_DOWN(MSG) sha256rnds2 MSG, STATE1, STATE0 sha256msg1 MSG2, MSG1 @@ -201,7 +217,7 @@ sha256_process_block64_shaNI: palignr $4, MSG2, XMMTMP paddd XMMTMP, MSG0 sha256msg2 MSG3, MSG0 - shuf128_32 $0x0E, MSG, MSG + MOVE_UPPER64_DOWN(MSG) sha256rnds2 MSG, STATE1, STATE0 sha256msg1 MSG3, MSG2 @@ -213,7 +229,7 @@ sha256_process_block64_shaNI: palignr $4, MSG3, XMMTMP paddd XMMTMP, MSG1 sha256msg2 MSG0, MSG1 - shuf128_32 $0x0E, MSG, MSG + MOVE_UPPER64_DOWN(MSG) sha256rnds2 MSG, STATE1, STATE0 sha256msg1 MSG0, MSG3 @@ -225,7 +241,7 @@ sha256_process_block64_shaNI: palignr $4, MSG0, XMMTMP paddd XMMTMP, MSG2 sha256msg2 MSG1, MSG2 - shuf128_32 $0x0E, MSG, MSG + MOVE_UPPER64_DOWN(MSG) sha256rnds2 MSG, STATE1, STATE0 /* Rounds 56-59 */ @@ -236,14 +252,14 @@ sha256_process_block64_shaNI: palignr $4, MSG1, XMMTMP paddd XMMTMP, MSG3 sha256msg2 MSG2, MSG3 - shuf128_32 $0x0E, MSG, MSG + MOVE_UPPER64_DOWN(MSG) sha256rnds2 MSG, STATE1, STATE0 /* Rounds 60-63 */ mova128 MSG3, MSG paddd 15*16-8*16(SHA256CONSTANTS), MSG sha256rnds2 MSG, STATE0, STATE1 - shuf128_32 $0x0E, MSG, MSG + MOVE_UPPER64_DOWN(MSG) sha256rnds2 MSG, STATE1, STATE0 /* Add current hash values with previously saved */ -- cgit v1.2.3-55-g6feb From e022ff88d14fe916d74fd1311b657dedfec59425 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Tue, 22 Jul 2025 16:43:22 +0200 Subject: libbb: SHA-NI - document that unpckhpd also works, no code changes Signed-off-by: Denys Vlasenko --- libbb/hash_sha256_hwaccel_x86-32.S | 2 ++ libbb/hash_sha256_hwaccel_x86-64.S | 2 ++ 2 files changed, 4 insertions(+) (limited to 'libbb') diff --git a/libbb/hash_sha256_hwaccel_x86-32.S b/libbb/hash_sha256_hwaccel_x86-32.S index 6362ae382..8d84055e8 100644 --- a/libbb/hash_sha256_hwaccel_x86-32.S +++ b/libbb/hash_sha256_hwaccel_x86-32.S @@ -66,11 +66,13 @@ sha256_process_block64_shaNI: // movhlps MSG, MSG // abcd -> cdcd (3 bytes of code) // shuf128_32 SHUF(2,3,n,n), MSG, MSG // abcd -> cdXX (4 bytes) // punpckhqdq MSG, MSG // abcd -> cdcd (4 bytes) +// unpckhpd MSG, MSG // abcd -> cdcd (4 bytes) // psrldq $8, MSG // abcd -> cd00 (5 bytes) // palignr $8, MSG, MSG // abcd -> cdab (6 bytes, SSSE3 insn) #define MOVE_UPPER64_DOWN(reg) movhlps reg, reg //#define MOVE_UPPER64_DOWN(reg) shuf128_32 SHUF(2,3,0,0), reg, reg //#define MOVE_UPPER64_DOWN(reg) punpckhqdq reg, reg +//#define MOVE_UPPER64_DOWN(reg) unpckhpd reg, reg //#define MOVE_UPPER64_DOWN(reg) psrldq $8, reg //#define MOVE_UPPER64_DOWN(reg) palignr $8, reg, reg diff --git a/libbb/hash_sha256_hwaccel_x86-64.S b/libbb/hash_sha256_hwaccel_x86-64.S index 92f00ebcd..ee3abbd1f 100644 --- a/libbb/hash_sha256_hwaccel_x86-64.S +++ b/libbb/hash_sha256_hwaccel_x86-64.S @@ -73,11 +73,13 @@ sha256_process_block64_shaNI: // movhlps MSG, MSG // abcd -> cdcd (3 bytes of code) // shuf128_32 SHUF(2,3,n,n), MSG, MSG // abcd -> cdXX (4 bytes) // punpckhqdq MSG, MSG // abcd -> cdcd (4 bytes) +// unpckhpd MSG, MSG // abcd -> cdcd (4 bytes) // psrldq $8, MSG // abcd -> cd00 (5 bytes) // palignr $8, MSG, MSG // abcd -> cdab (6 bytes, SSSE3 insn) #define MOVE_UPPER64_DOWN(reg) movhlps reg, reg //#define MOVE_UPPER64_DOWN(reg) shuf128_32 SHUF(2,3,0,0), reg, reg //#define MOVE_UPPER64_DOWN(reg) punpckhqdq reg, reg +//#define MOVE_UPPER64_DOWN(reg) unpckhpd reg, reg //#define MOVE_UPPER64_DOWN(reg) psrldq $8, reg //#define MOVE_UPPER64_DOWN(reg) palignr $8, reg, reg -- cgit v1.2.3-55-g6feb