aboutsummaryrefslogtreecommitdiff
path: root/libbb
diff options
context:
space:
mode:
Diffstat (limited to 'libbb')
-rw-r--r--libbb/Config.src14
-rw-r--r--libbb/appletlib.c2
-rw-r--r--libbb/bitops.c128
-rw-r--r--libbb/const_hack.c29
-rw-r--r--libbb/dump.c16
-rw-r--r--libbb/getopt32.c17
-rw-r--r--libbb/hash_hmac.c154
-rw-r--r--libbb/hash_md5_sha.c78
-rw-r--r--libbb/hash_sha256_block.c19
-rw-r--r--libbb/hash_sha256_hwaccel_x86-32.S218
-rw-r--r--libbb/hash_sha256_hwaccel_x86-64.S218
-rw-r--r--libbb/lineedit.c112
-rw-r--r--libbb/poll_with_signals.c48
-rw-r--r--libbb/pw_ascii64.c91
-rw-r--r--libbb/pw_encrypt.c113
-rw-r--r--libbb/pw_encrypt_des.c88
-rw-r--r--libbb/pw_encrypt_md5.c4
-rw-r--r--libbb/pw_encrypt_sha.c5
-rw-r--r--libbb/pw_encrypt_yes.c24
-rw-r--r--libbb/read_key.c25
-rw-r--r--libbb/u_signal_names.c4
-rw-r--r--libbb/xfuncs.c2
-rw-r--r--libbb/yescrypt/Kbuild.src9
-rw-r--r--libbb/yescrypt/PARAMETERS196
-rw-r--r--libbb/yescrypt/README4
-rw-r--r--libbb/yescrypt/alg-sha256.c86
-rw-r--r--libbb/yescrypt/alg-yescrypt-common.c408
-rw-r--r--libbb/yescrypt/alg-yescrypt-kdf.c1212
-rw-r--r--libbb/yescrypt/alg-yescrypt.h247
-rw-r--r--libbb/yescrypt/y.c16
30 files changed, 3164 insertions, 423 deletions
diff --git a/libbb/Config.src b/libbb/Config.src
index 61b4601d6..eff327c2a 100644
--- a/libbb/Config.src
+++ b/libbb/Config.src
@@ -37,6 +37,14 @@ config PASSWORD_MINLEN
37 help 37 help
38 Minimum allowable password length. 38 Minimum allowable password length.
39 39
40config FEATURE_USE_CNG_API
41 bool "Use the Windows CNG API for checksums (Windows 10+ only)"
42 default n
43 depends on PLATFORM_MINGW32
44 help
45 Use the in-built Windows CNG API for checksums.
46 This reduces code size, but is only supported on Windows 10+.
47
40config MD5_SMALL 48config MD5_SMALL
41 int "MD5: Trade bytes for speed (0:fast, 3:slow)" 49 int "MD5: Trade bytes for speed (0:fast, 3:slow)"
42 default 1 # all "fast or small" options default to small 50 default 1 # all "fast or small" options default to small
@@ -67,6 +75,7 @@ config SHA1_SMALL
67config SHA1_HWACCEL 75config SHA1_HWACCEL
68 bool "SHA1: Use hardware accelerated instructions if possible" 76 bool "SHA1: Use hardware accelerated instructions if possible"
69 default y 77 default y
78 depends on !FEATURE_USE_CNG_API
70 help 79 help
71 On x86, this adds ~590 bytes of code. Throughput 80 On x86, this adds ~590 bytes of code. Throughput
72 is about twice as fast as fully-unrolled generic code. 81 is about twice as fast as fully-unrolled generic code.
@@ -74,6 +83,7 @@ config SHA1_HWACCEL
74config SHA256_HWACCEL 83config SHA256_HWACCEL
75 bool "SHA256: Use hardware accelerated instructions if possible" 84 bool "SHA256: Use hardware accelerated instructions if possible"
76 default y 85 default y
86 depends on !FEATURE_USE_CNG_API
77 help 87 help
78 On x86, this adds ~1k bytes of code. 88 On x86, this adds ~1k bytes of code.
79 89
@@ -182,8 +192,8 @@ config FEATURE_EDITING_VI
182config FEATURE_EDITING_HISTORY 192config FEATURE_EDITING_HISTORY
183 int "History size" 193 int "History size"
184 # Don't allow way too big values here, code uses fixed "char *history[N]" struct member 194 # Don't allow way too big values here, code uses fixed "char *history[N]" struct member
185 range 0 9999 195 range 0 2000
186 default 255 196 default 200
187 depends on FEATURE_EDITING 197 depends on FEATURE_EDITING
188 help 198 help
189 Specify command history size (0 - disable). 199 Specify command history size (0 - disable).
diff --git a/libbb/appletlib.c b/libbb/appletlib.c
index d6e042775..b1064d10a 100644
--- a/libbb/appletlib.c
+++ b/libbb/appletlib.c
@@ -933,7 +933,7 @@ int busybox_main(int argc UNUSED_PARAM, char **argv)
933 full_write1_str(" multi-call binary.\n"); /* reuse */ 933 full_write1_str(" multi-call binary.\n"); /* reuse */
934#endif 934#endif
935 full_write1_str( 935 full_write1_str(
936 "BusyBox is copyrighted by many authors between 1998-2024.\n" 936 "BusyBox is copyrighted by many authors between 1998-2025.\n"
937 "Licensed under GPLv2. See source distribution for detailed\n" 937 "Licensed under GPLv2. See source distribution for detailed\n"
938 "copyright notices.\n" 938 "copyright notices.\n"
939 "\n" 939 "\n"
diff --git a/libbb/bitops.c b/libbb/bitops.c
new file mode 100644
index 000000000..467e1a2d9
--- /dev/null
+++ b/libbb/bitops.c
@@ -0,0 +1,128 @@
1/*
2 * Utility routines.
3 *
4 * Copyright (C) 2025 by Denys Vlasenko <vda.linux@googlemail.com>
5 *
6 * Licensed under GPLv2, see file LICENSE in this source tree.
7 */
8//kbuild:lib-y += bitops.o
9
10#include "libbb.h"
11
12void FAST_FUNC xorbuf_3(void *dst, const void *src1, const void *src2, unsigned count)
13{
14 uint8_t *d = dst;
15 const uint8_t *s1 = src1;
16 const uint8_t *s2 = src2;
17#if BB_UNALIGNED_MEMACCESS_OK
18 while (count >= sizeof(long)) {
19 *(long*)d = *(long*)s1 ^ *(long*)s2;
20 count -= sizeof(long);
21 d += sizeof(long);
22 s1 += sizeof(long);
23 s2 += sizeof(long);
24 }
25#endif
26 while (count--)
27 *d++ = *s1++ ^ *s2++;
28}
29
30void FAST_FUNC xorbuf(void *dst, const void *src, unsigned count)
31{
32 xorbuf_3(dst, dst, src, count);
33}
34
35void FAST_FUNC xorbuf16_aligned_long(void *dst, const void *src)
36{
37#if defined(__SSE__) /* any x86_64 has it */
38 asm volatile(
39"\n movups (%0),%%xmm0"
40"\n movups (%1),%%xmm1" // can't just xorps(%1),%%xmm0:
41"\n xorps %%xmm1,%%xmm0" // SSE requires 16-byte alignment
42"\n movups %%xmm0,(%0)"
43"\n"
44 : "=r" (dst), "=r" (src)
45 : "0" (dst), "1" (src)
46 : "xmm0", "xmm1", "memory"
47 );
48#else
49 unsigned long *d = dst;
50 const unsigned long *s = src;
51 d[0] ^= s[0];
52# if LONG_MAX <= 0x7fffffffffffffff
53 d[1] ^= s[1];
54# if LONG_MAX == 0x7fffffff
55 d[2] ^= s[2];
56 d[3] ^= s[3];
57# endif
58# endif
59#endif
60}
61// The above can be inlined in libbb.h, in a way where compiler
62// is even free to use better addressing modes than (%reg), and
63// to keep the result in a register
64// (to not store it to memory after each XOR):
65//#if defined(__SSE__)
66//#include <xmmintrin.h>
67//^^^ or just: typedef float __m128_u attribute((__vector_size__(16),__may_alias__,__aligned__(1)));
68//static ALWAYS_INLINE void xorbuf16_aligned_long(void *dst, const void *src)
69//{
70// __m128_u xmm0, xmm1;
71// asm volatile(
72//"\n xorps %1,%0"
73// : "=x" (xmm0), "=x" (xmm1)
74// : "0" (*(__m128_u*)dst), "1" (*(__m128_u*)src)
75// );
76// *(__m128_u*)dst = xmm0; // this store may be optimized out!
77//}
78//#endif
79// but I don't trust gcc optimizer enough to not generate some monstrosity.
80// See GMULT() function in TLS code as an example.
81
82void FAST_FUNC xorbuf64_3_aligned64(void *dst, const void *src1, const void *src2)
83{
84#if defined(__SSE__) /* any x86_64 has it */
85 asm volatile(
86"\n movups 0*16(%1),%%xmm0"
87"\n movups 0*16(%2),%%xmm1" // can't just xorps(%2),%%xmm0:
88"\n xorps %%xmm1,%%xmm0" // SSE requires 16-byte alignment, we have only 8-byte
89"\n movups %%xmm0,0*16(%0)"
90"\n movups 1*16(%1),%%xmm0"
91"\n movups 1*16(%2),%%xmm1"
92"\n xorps %%xmm1,%%xmm0"
93"\n movups %%xmm0,1*16(%0)"
94"\n movups 2*16(%1),%%xmm0"
95"\n movups 2*16(%2),%%xmm1"
96"\n xorps %%xmm1,%%xmm0"
97"\n movups %%xmm0,2*16(%0)"
98"\n movups 3*16(%1),%%xmm0"
99"\n movups 3*16(%2),%%xmm1"
100"\n xorps %%xmm1,%%xmm0"
101"\n movups %%xmm0,3*16(%0)"
102"\n"
103 : "=r" (dst), "=r" (src1), "=r" (src2)
104 : "0" (dst), "1" (src1), "2" (src2)
105 : "xmm0", "xmm1", "memory"
106 );
107#else
108 long *d = dst;
109 const long *s1 = src1;
110 const long *s2 = src2;
111 unsigned count = 64 / sizeof(long);
112 do {
113 *d++ = *s1++ ^ *s2++;
114 } while (--count != 0);
115#endif
116}
117
118#if !BB_UNALIGNED_MEMACCESS_OK
119void FAST_FUNC xorbuf16(void *dst, const void *src)
120{
121#define p_aligned(a) (((uintptr_t)(a) & (sizeof(long)-1)) == 0)
122 if (p_aligned(src) && p_aligned(dst)) {
123 xorbuf16_aligned_long(dst, src);
124 return;
125 }
126 xorbuf_3(dst, dst, src, 16);
127}
128#endif
diff --git a/libbb/const_hack.c b/libbb/const_hack.c
index 75163fede..1d175481b 100644
--- a/libbb/const_hack.c
+++ b/libbb/const_hack.c
@@ -9,18 +9,27 @@
9#include "libbb.h" 9#include "libbb.h"
10 10
11#if defined(__clang_major__) && __clang_major__ >= 9 11#if defined(__clang_major__) && __clang_major__ >= 9
12void FAST_FUNC XZALLOC_CONST_PTR(const void *pptr, size_t size) 12/* Clang/llvm drops assignment to "constant" storage. Silently.
13 * Needs serious convincing to not eliminate the store.
14 */
15static ALWAYS_INLINE void* not_const_pp(const void *p)
13{ 16{
14 ASSIGN_CONST_PTR(pptr, xzalloc(size)); 17 void *pp;
18 asm volatile (
19 "# forget that p points to const"
20 : /*outputs*/ "=r" (pp)
21 : /*inputs*/ "0" (p)
22 );
23 return pp;
15} 24}
16 25void FAST_FUNC ASSIGN_CONST_PTR(const void *pptr, void *v)
17# if ENABLE_PLATFORM_MINGW32 26{
18void FAST_FUNC ASSIGN_CONST_PTR(const void *pptr, const void *v) 27 *(void**)not_const_pp(pptr) = v;
28 barrier();
29}
30void FAST_FUNC XZALLOC_CONST_PTR(const void *pptr, size_t size)
19{ 31{
20 do { 32 *(void**)not_const_pp(pptr) = xzalloc(size);
21 *(void**)not_const_pp(pptr) = (void*)(v); 33 barrier();
22 barrier();
23 } while (0);
24} 34}
25# endif
26#endif 35#endif
diff --git a/libbb/dump.c b/libbb/dump.c
index ffc46f6a7..b2abe85af 100644
--- a/libbb/dump.c
+++ b/libbb/dump.c
@@ -204,9 +204,11 @@ static NOINLINE void rewrite(priv_dumper_t *dumper, FS *fs)
204 if (!e) 204 if (!e)
205 goto DO_BAD_CONV_CHAR; 205 goto DO_BAD_CONV_CHAR;
206 pr->flags = F_INT; 206 pr->flags = F_INT;
207 if (e > int_convs + 1) /* not d or i? */
208 pr->flags = F_UINT;
209 byte_count_str = "\010\004\002\001"; 207 byte_count_str = "\010\004\002\001";
208 if (e > int_convs + 1) { /* not d or i? */
209 pr->flags = F_UINT;
210 byte_count_str++;
211 }
210 goto DO_BYTE_COUNT; 212 goto DO_BYTE_COUNT;
211 } else 213 } else
212 if (strchr(int_convs, *p1)) { /* %d etc */ 214 if (strchr(int_convs, *p1)) { /* %d etc */
@@ -701,15 +703,21 @@ static NOINLINE void display(priv_dumper_t* dumper)
701 conv_u(pr, bp); 703 conv_u(pr, bp);
702 break; 704 break;
703 case F_UINT: { 705 case F_UINT: {
706 union {
707 uint16_t uval16;
708 uint32_t uval32;
709 } u;
704 unsigned value = (unsigned char)*bp; 710 unsigned value = (unsigned char)*bp;
705 switch (pr->bcnt) { 711 switch (pr->bcnt) {
706 case 1: 712 case 1:
707 break; 713 break;
708 case 2: 714 case 2:
709 move_from_unaligned16(value, bp); 715 move_from_unaligned16(u.uval16, bp);
716 value = u.uval16;
710 break; 717 break;
711 case 4: 718 case 4:
712 move_from_unaligned32(value, bp); 719 move_from_unaligned32(u.uval32, bp);
720 value = u.uval32;
713 break; 721 break;
714 /* case 8: no users yet */ 722 /* case 8: no users yet */
715 } 723 }
diff --git a/libbb/getopt32.c b/libbb/getopt32.c
index 56040e150..76d29d5eb 100644
--- a/libbb/getopt32.c
+++ b/libbb/getopt32.c
@@ -93,7 +93,7 @@ getopt32(char **argv, const char *applet_opts, ...)
93 93
94 "!" If the first character in the applet_opts string is a '!', 94 "!" If the first character in the applet_opts string is a '!',
95 report bad options, missing required options, 95 report bad options, missing required options,
96 inconsistent options with all-ones return value (instead of abort. 96 inconsistent options with all-ones return value instead of aborting.
97 97
98 "+" If the first character in the applet_opts string is a plus, 98 "+" If the first character in the applet_opts string is a plus,
99 then option processing will stop as soon as a non-option is 99 then option processing will stop as soon as a non-option is
@@ -265,7 +265,7 @@ Special characters:
265 for "long options only" cases, such as tar --exclude=PATTERN, 265 for "long options only" cases, such as tar --exclude=PATTERN,
266 wget --header=HDR cases. 266 wget --header=HDR cases.
267 267
268 "a?b" A "?" between an option and a group of options means that 268 "a?bc" A "?" between an option and a group of options means that
269 at least one of them is required to occur if the first option 269 at least one of them is required to occur if the first option
270 occurs in preceding command line arguments. 270 occurs in preceding command line arguments.
271 271
@@ -348,9 +348,6 @@ vgetopt32(char **argv, const char *applet_opts, const char *applet_long_options,
348 unsigned trigger; 348 unsigned trigger;
349 int min_arg = 0; 349 int min_arg = 0;
350 int max_arg = -1; 350 int max_arg = -1;
351 int spec_flgs = 0;
352
353#define SHOW_USAGE_IF_ERROR 1
354 351
355 on_off = complementary; 352 on_off = complementary;
356 memset(on_off, 0, sizeof(complementary)); 353 memset(on_off, 0, sizeof(complementary));
@@ -449,9 +446,7 @@ vgetopt32(char **argv, const char *applet_opts, const char *applet_long_options,
449 continue; 446 continue;
450 c = s[1]; 447 c = s[1];
451 if (*s == '?') { 448 if (*s == '?') {
452 if (c < '0' || c > '9') { 449 if (c >= '0' && c <= '9') {
453 spec_flgs |= SHOW_USAGE_IF_ERROR;
454 } else {
455 max_arg = c - '0'; 450 max_arg = c - '0';
456 s++; 451 s++;
457 } 452 }
@@ -465,8 +460,10 @@ vgetopt32(char **argv, const char *applet_opts, const char *applet_long_options,
465 continue; 460 continue;
466 } 461 }
467 if (*s == '=') { 462 if (*s == '=') {
468 min_arg = max_arg = c - '0'; 463 if (c >= '0' && c <= '9') {
469 s++; 464 min_arg = max_arg = c - '0';
465 s++;
466 }
470 continue; 467 continue;
471 } 468 }
472 for (on_off = complementary; on_off->opt_char; on_off++) 469 for (on_off = complementary; on_off->opt_char; on_off++)
diff --git a/libbb/hash_hmac.c b/libbb/hash_hmac.c
new file mode 100644
index 000000000..b3138029f
--- /dev/null
+++ b/libbb/hash_hmac.c
@@ -0,0 +1,154 @@
1/*
2 * Copyright (C) 2025 Denys Vlasenko
3 *
4 * Licensed under GPLv2, see file LICENSE in this source tree.
5 */
6//kbuild:lib-$(CONFIG_TLS) += hash_hmac.o
7//kbuild:lib-$(CONFIG_USE_BB_CRYPT_YES) += hash_hmac.o
8
9#include "libbb.h"
10
11// RFC 2104:
12// HMAC(key, text) based on a hash H (say, sha256) is:
13// ipad = [0x36 x INSIZE]
14// opad = [0x5c x INSIZE]
15// HMAC(key, text) = H((key XOR opad) + H((key XOR ipad) + text))
16//
17// H(key XOR opad) and H(key XOR ipad) can be precomputed
18// if we often need HMAC hmac with the same key.
19//
20// text is often given in disjoint pieces.
21#if !ENABLE_FEATURE_USE_CNG_API
22void FAST_FUNC hmac_begin(hmac_ctx_t *ctx, const uint8_t *key, unsigned key_size, md5sha_begin_func *begin)
23{
24#if HMAC_ONLY_SHA256
25#define begin sha256_begin
26#endif
27 uint8_t key_xor_ipad[SHA2_INSIZE];
28 uint8_t key_xor_opad[SHA2_INSIZE];
29 unsigned i;
30
31 // "The authentication key can be of any length up to INSIZE, the
32 // block length of the hash function. Applications that use keys longer
33 // than INSIZE bytes will first hash the key using H and then use the
34 // resultant OUTSIZE byte string as the actual key to HMAC."
35 if (key_size > SHA2_INSIZE) {
36 uint8_t tempkey[SHA1_OUTSIZE < SHA256_OUTSIZE ? SHA256_OUTSIZE : SHA1_OUTSIZE];
37 /* use ctx->hashed_key_xor_ipad as scratch ctx */
38 begin(&ctx->hashed_key_xor_ipad);
39 md5sha_hash(&ctx->hashed_key_xor_ipad, key, key_size);
40 key_size = sha_end(&ctx->hashed_key_xor_ipad, tempkey);
41 key = tempkey;
42 }
43
44 for (i = 0; i < key_size; i++) {
45 key_xor_ipad[i] = key[i] ^ 0x36;
46 key_xor_opad[i] = key[i] ^ 0x5c;
47 }
48 for (; i < SHA2_INSIZE; i++) {
49 key_xor_ipad[i] = 0x36;
50 key_xor_opad[i] = 0x5c;
51 }
52
53 begin(&ctx->hashed_key_xor_ipad);
54 begin(&ctx->hashed_key_xor_opad);
55 md5sha_hash(&ctx->hashed_key_xor_ipad, key_xor_ipad, SHA2_INSIZE);
56 md5sha_hash(&ctx->hashed_key_xor_opad, key_xor_opad, SHA2_INSIZE);
57}
58#undef begin
59
60unsigned FAST_FUNC hmac_end(hmac_ctx_t *ctx, uint8_t *out)
61{
62 unsigned len = sha_end(&ctx->hashed_key_xor_ipad, out);
63 /* out = H((key XOR opad) + out) */
64 md5sha_hash(&ctx->hashed_key_xor_opad, out, len);
65 return sha_end(&ctx->hashed_key_xor_opad, out);
66}
67
68unsigned FAST_FUNC hmac_block(const uint8_t *key, unsigned key_size, md5sha_begin_func *begin, const void *in, unsigned sz, uint8_t *out)
69{
70 hmac_ctx_t ctx;
71 hmac_begin(&ctx, key, key_size, begin);
72 hmac_hash(&ctx, in, sz);
73 return hmac_end(&ctx, out);
74}
75
76/* TLS helpers */
77
78void FAST_FUNC hmac_hash_v(
79 hmac_ctx_t *ctx,
80 va_list va)
81{
82 uint8_t *in;
83
84 /* ctx->hashed_key_xor_ipad contains unclosed "H((key XOR ipad) +" state */
85 /* ctx->hashed_key_xor_opad contains unclosed "H((key XOR opad) +" state */
86
87 /* calculate out = H((key XOR ipad) + text) */
88 while ((in = va_arg(va, uint8_t*)) != NULL) {
89 unsigned size = va_arg(va, unsigned);
90 md5sha_hash(&ctx->hashed_key_xor_ipad, in, size);
91 }
92}
93#else
94void _hmac_begin(hmac_ctx_t *ctx, uint8_t *key, unsigned key_size,
95 BCRYPT_ALG_HANDLE alg_handle) {
96 DWORD hash_object_length = 0;
97 ULONG _unused;
98 NTSTATUS status;
99
100 status = BCryptGetProperty(alg_handle, BCRYPT_OBJECT_LENGTH,
101 (PUCHAR)&hash_object_length, sizeof(DWORD), &_unused, 0);
102 mingw_die_if_error(status, "BCryptGetProperty");
103 status = BCryptGetProperty(alg_handle, BCRYPT_HASH_LENGTH,
104 (PUCHAR)&ctx->output_size, sizeof(DWORD), &_unused, 0);
105 mingw_die_if_error(status, "BCryptGetProperty");
106
107 ctx->hash_obj = xmalloc(hash_object_length);
108
109 status = BCryptCreateHash(alg_handle, &ctx->handle, ctx->hash_obj,
110 hash_object_length, key, key_size, BCRYPT_HASH_REUSABLE_FLAG);
111 mingw_die_if_error(status, "BCryptCreateHash");
112}
113
114unsigned FAST_FUNC hmac_end(hmac_ctx_t *ctx, uint8_t *out)
115{
116 NTSTATUS status;
117
118 status = BCryptFinishHash(ctx->handle, out, ctx->output_size, 0);
119 mingw_die_if_error(status, "BCryptFinishHash");
120
121 return ctx->output_size;
122}
123
124void FAST_FUNC hmac_hash_v(hmac_ctx_t *ctx, va_list va)
125{
126 uint8_t *in;
127
128 while ((in = va_arg(va, uint8_t*)) != NULL) {
129 unsigned size = va_arg(va, unsigned);
130 BCryptHashData(ctx->handle, in, size, 0);
131 }
132}
133
134void hmac_uninit(hmac_ctx_t *ctx) {
135 BCryptDestroyHash(ctx->handle);
136 free(ctx->hash_obj);
137}
138#endif
139
140/* Using HMAC state, make a copy of it (IOW: without affecting this state!)
141 * hash in the list of (ptr,size) blocks, and finish the HMAC to out[] buffer.
142 * This function is useful for TLS PRF.
143 */
144unsigned hmac_peek_hash(hmac_ctx_t *ctx, uint8_t *out, ...)
145{
146 hmac_ctx_t tmpctx = *ctx; /* struct copy */
147 va_list va;
148
149 va_start(va, out);
150 hmac_hash_v(&tmpctx, va);
151 va_end(va);
152
153 return hmac_end(&tmpctx, out);
154}
diff --git a/libbb/hash_md5_sha.c b/libbb/hash_md5_sha.c
index 75a61c32c..22dd890bf 100644
--- a/libbb/hash_md5_sha.c
+++ b/libbb/hash_md5_sha.c
@@ -13,6 +13,82 @@
13 13
14#define NEED_SHA512 (ENABLE_SHA512SUM || ENABLE_USE_BB_CRYPT_SHA) 14#define NEED_SHA512 (ENABLE_SHA512SUM || ENABLE_USE_BB_CRYPT_SHA)
15 15
16#if ENABLE_FEATURE_USE_CNG_API
17# include <windows.h>
18# include <bcrypt.h>
19
20// these work on Windows >= 10
21# define BCRYPT_MD5_ALG_HANDLE ((BCRYPT_ALG_HANDLE) 0x00000021)
22# define BCRYPT_SHA1_ALG_HANDLE ((BCRYPT_ALG_HANDLE) 0x00000031)
23# define BCRYPT_SHA256_ALG_HANDLE ((BCRYPT_ALG_HANDLE) 0x00000041)
24# define BCRYPT_SHA512_ALG_HANDLE ((BCRYPT_ALG_HANDLE) 0x00000061)
25
26/* Initialize structure containing state of computation.
27 * (RFC 1321, 3.3: Step 3)
28 */
29
30static void generic_init(struct bcrypt_hash_ctx_t *ctx, BCRYPT_ALG_HANDLE alg_handle) {
31 DWORD hash_object_length = 0;
32 ULONG _unused;
33 NTSTATUS status;
34
35 status = BCryptGetProperty(alg_handle, BCRYPT_OBJECT_LENGTH, (PUCHAR)&hash_object_length, sizeof(DWORD), &_unused, 0);
36 mingw_die_if_error(status, "BCryptGetProperty");
37 status = BCryptGetProperty(alg_handle, BCRYPT_HASH_LENGTH, (PUCHAR)&ctx->output_size, sizeof(DWORD), &_unused, 0);
38 mingw_die_if_error(status, "BCryptGetProperty");
39
40
41 ctx->hash_obj = xmalloc(hash_object_length);
42
43 status = BCryptCreateHash(alg_handle, &ctx->handle, ctx->hash_obj, hash_object_length, NULL, 0, 0);
44 mingw_die_if_error(status, "BCryptCreateHash");
45}
46
47void FAST_FUNC md5_begin(md5_ctx_t *ctx)
48{
49 generic_init(ctx, BCRYPT_MD5_ALG_HANDLE);
50}
51
52void FAST_FUNC sha1_begin(sha1_ctx_t *ctx)
53{
54 generic_init(ctx, BCRYPT_SHA1_ALG_HANDLE);
55}
56
57/* Initialize structure containing state of computation.
58 (FIPS 180-2:5.3.2) */
59void FAST_FUNC sha256_begin(sha256_ctx_t *ctx)
60{
61 generic_init(ctx, BCRYPT_SHA256_ALG_HANDLE);
62}
63
64#if NEED_SHA512
65/* Initialize structure containing state of computation.
66 (FIPS 180-2:5.3.3) */
67void FAST_FUNC sha512_begin(sha512_ctx_t *ctx)
68{
69 generic_init(ctx, BCRYPT_SHA512_ALG_HANDLE);
70}
71#endif /* NEED_SHA512 */
72
73void FAST_FUNC generic_hash(struct bcrypt_hash_ctx_t *ctx, const void *buffer, size_t len)
74{
75 /*
76 for perf, no error checking here
77 */
78 /*NTSTATUS status = */ BCryptHashData(ctx->handle, (const PUCHAR)buffer, len, 0);
79 // mingw_die_if_error(status, "BCryptHashData");
80}
81
82unsigned FAST_FUNC generic_end(struct bcrypt_hash_ctx_t *ctx, void *resbuf)
83{
84 NTSTATUS status = BCryptFinishHash(ctx->handle, resbuf, ctx->output_size, 0);
85 mingw_die_if_error(status, "BCryptFinishHash");
86 BCryptDestroyHash(ctx->handle);
87 free(ctx->hash_obj);
88 return ctx->output_size;
89}
90#endif /* !ENABLE_FEATURE_USE_CNG_API */
91
16#if ENABLE_SHA1_HWACCEL || ENABLE_SHA256_HWACCEL 92#if ENABLE_SHA1_HWACCEL || ENABLE_SHA256_HWACCEL
17# if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) 93# if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
18static void cpuid_eax_ebx_ecx(unsigned *eax, unsigned *ebx, unsigned *ecx, unsigned *edx) 94static void cpuid_eax_ebx_ecx(unsigned *eax, unsigned *ebx, unsigned *ecx, unsigned *edx)
@@ -80,6 +156,7 @@ static ALWAYS_INLINE uint64_t rotl64(uint64_t x, unsigned n)
80 return (x << n) | (x >> (64 - n)); 156 return (x << n) | (x >> (64 - n));
81} 157}
82 158
159#if !ENABLE_FEATURE_USE_CNG_API
83/* Process the remaining bytes in the buffer */ 160/* Process the remaining bytes in the buffer */
84static void FAST_FUNC common64_end(md5_ctx_t *ctx, int swap_needed) 161static void FAST_FUNC common64_end(md5_ctx_t *ctx, int swap_needed)
85{ 162{
@@ -1367,6 +1444,7 @@ unsigned FAST_FUNC sha512_end(sha512_ctx_t *ctx, void *resbuf)
1367 return sizeof(ctx->hash); 1444 return sizeof(ctx->hash);
1368} 1445}
1369#endif /* NEED_SHA512 */ 1446#endif /* NEED_SHA512 */
1447#endif /* !ENABLE_FEATURE_USE_CNG_API */
1370 1448
1371 1449
1372/* 1450/*
diff --git a/libbb/hash_sha256_block.c b/libbb/hash_sha256_block.c
new file mode 100644
index 000000000..3c4366321
--- /dev/null
+++ b/libbb/hash_sha256_block.c
@@ -0,0 +1,19 @@
1/* vi: set sw=4 ts=4: */
2/*
3 * Utility routines.
4 *
5 * Copyright (C) 2025 Denys Vlasenko
6 *
7 * Licensed under GPLv2 or later, see file LICENSE in this source tree.
8 */
9//kbuild:lib-y += hash_sha256_block.o
10#include "libbb.h"
11
12void FAST_FUNC
13sha256_block(const void *in, size_t len, uint8_t hash[32])
14{
15 sha256_ctx_t ctx;
16 sha256_begin(&ctx);
17 sha256_hash(&ctx, in, len);
18 sha256_end(&ctx, hash);
19}
diff --git a/libbb/hash_sha256_hwaccel_x86-32.S b/libbb/hash_sha256_hwaccel_x86-32.S
index a0e4a571a..8d84055e8 100644
--- a/libbb/hash_sha256_hwaccel_x86-32.S
+++ b/libbb/hash_sha256_hwaccel_x86-32.S
@@ -34,21 +34,21 @@
34#define MSG %xmm0 34#define MSG %xmm0
35#define STATE0 %xmm1 35#define STATE0 %xmm1
36#define STATE1 %xmm2 36#define STATE1 %xmm2
37#define MSGTMP0 %xmm3 37#define MSG0 %xmm3
38#define MSGTMP1 %xmm4 38#define MSG1 %xmm4
39#define MSGTMP2 %xmm5 39#define MSG2 %xmm5
40#define MSGTMP3 %xmm6 40#define MSG3 %xmm6
41 41
42#define XMMTMP %xmm7 42#define XMMTMP %xmm7
43 43
44#define SHUF(a,b,c,d) $(a+(b<<2)+(c<<4)+(d<<6)) 44#define SHUF(a,b,c,d) $((a)+((b)<<2)+((c)<<4)+((d)<<6))
45 45
46 .balign 8 # allow decoders to fetch at least 2 first insns 46 .balign 8 # allow decoders to fetch at least 2 first insns
47sha256_process_block64_shaNI: 47sha256_process_block64_shaNI:
48 48
49 movu128 76+0*16(%eax), XMMTMP /* ABCD (little-endian dword order) */ 49 movu128 76+0*16(%eax), XMMTMP /* ABCD (shown least-significant-dword-first) */
50 movu128 76+1*16(%eax), STATE1 /* EFGH */ 50 movu128 76+1*16(%eax), STATE1 /* EFGH */
51/* shufps takes dwords 0,1 from *2nd* operand, and dwords 2,3 from 1st one */ 51/* shufps: dwords 0,1 of the result are selected from *2nd* operand, and dwords 2,3 from 1st operand */
52 mova128 STATE1, STATE0 52 mova128 STATE1, STATE0
53 /* --- -------------- ABCD -- EFGH */ 53 /* --- -------------- ABCD -- EFGH */
54 shufps SHUF(1,0,1,0), XMMTMP, STATE0 /* FEBA */ 54 shufps SHUF(1,0,1,0), XMMTMP, STATE0 /* FEBA */
@@ -58,190 +58,208 @@ sha256_process_block64_shaNI:
58 mova128 PSHUFFLE_BSWAP32_FLIP_MASK, XMMTMP 58 mova128 PSHUFFLE_BSWAP32_FLIP_MASK, XMMTMP
59 movl $K256+8*16, SHA256CONSTANTS 59 movl $K256+8*16, SHA256CONSTANTS
60 60
61// sha256rnds2 instruction uses only lower 64 bits of MSG.
62// The code below needs to move upper 64 bits to lower 64 bits
63// for the second sha256rnds2 invocation
64// (what remains in upper bits does not matter).
65// There are several ways to do it:
66// movhlps MSG, MSG // abcd -> cdcd (3 bytes of code)
67// shuf128_32 SHUF(2,3,n,n), MSG, MSG // abcd -> cdXX (4 bytes)
68// punpckhqdq MSG, MSG // abcd -> cdcd (4 bytes)
69// unpckhpd MSG, MSG // abcd -> cdcd (4 bytes)
70// psrldq $8, MSG // abcd -> cd00 (5 bytes)
71// palignr $8, MSG, MSG // abcd -> cdab (6 bytes, SSSE3 insn)
72#define MOVE_UPPER64_DOWN(reg) movhlps reg, reg
73//#define MOVE_UPPER64_DOWN(reg) shuf128_32 SHUF(2,3,0,0), reg, reg
74//#define MOVE_UPPER64_DOWN(reg) punpckhqdq reg, reg
75//#define MOVE_UPPER64_DOWN(reg) unpckhpd reg, reg
76//#define MOVE_UPPER64_DOWN(reg) psrldq $8, reg
77//#define MOVE_UPPER64_DOWN(reg) palignr $8, reg, reg
78
61 /* Rounds 0-3 */ 79 /* Rounds 0-3 */
62 movu128 0*16(DATA_PTR), MSG 80 movu128 0*16(DATA_PTR), MSG
63 pshufb XMMTMP, MSG 81 pshufb XMMTMP, MSG
64 mova128 MSG, MSGTMP0 82 mova128 MSG, MSG0
65 paddd 0*16-8*16(SHA256CONSTANTS), MSG 83 paddd 0*16-8*16(SHA256CONSTANTS), MSG
66 sha256rnds2 MSG, STATE0, STATE1 84 sha256rnds2 MSG, STATE0, STATE1
67 shuf128_32 $0x0E, MSG, MSG 85 MOVE_UPPER64_DOWN(MSG)
68 sha256rnds2 MSG, STATE1, STATE0 86 sha256rnds2 MSG, STATE1, STATE0
69 87
70 /* Rounds 4-7 */ 88 /* Rounds 4-7 */
71 movu128 1*16(DATA_PTR), MSG 89 movu128 1*16(DATA_PTR), MSG
72 pshufb XMMTMP, MSG 90 pshufb XMMTMP, MSG
73 mova128 MSG, MSGTMP1 91 mova128 MSG, MSG1
74 paddd 1*16-8*16(SHA256CONSTANTS), MSG 92 paddd 1*16-8*16(SHA256CONSTANTS), MSG
75 sha256rnds2 MSG, STATE0, STATE1 93 sha256rnds2 MSG, STATE0, STATE1
76 shuf128_32 $0x0E, MSG, MSG 94 MOVE_UPPER64_DOWN(MSG)
77 sha256rnds2 MSG, STATE1, STATE0 95 sha256rnds2 MSG, STATE1, STATE0
78 sha256msg1 MSGTMP1, MSGTMP0 96 sha256msg1 MSG1, MSG0
79 97
80 /* Rounds 8-11 */ 98 /* Rounds 8-11 */
81 movu128 2*16(DATA_PTR), MSG 99 movu128 2*16(DATA_PTR), MSG
82 pshufb XMMTMP, MSG 100 pshufb XMMTMP, MSG
83 mova128 MSG, MSGTMP2 101 mova128 MSG, MSG2
84 paddd 2*16-8*16(SHA256CONSTANTS), MSG 102 paddd 2*16-8*16(SHA256CONSTANTS), MSG
85 sha256rnds2 MSG, STATE0, STATE1 103 sha256rnds2 MSG, STATE0, STATE1
86 shuf128_32 $0x0E, MSG, MSG 104 MOVE_UPPER64_DOWN(MSG)
87 sha256rnds2 MSG, STATE1, STATE0 105 sha256rnds2 MSG, STATE1, STATE0
88 sha256msg1 MSGTMP2, MSGTMP1 106 sha256msg1 MSG2, MSG1
89 107
90 /* Rounds 12-15 */ 108 /* Rounds 12-15 */
91 movu128 3*16(DATA_PTR), MSG 109 movu128 3*16(DATA_PTR), MSG
92 pshufb XMMTMP, MSG 110 pshufb XMMTMP, MSG
93/* ...to here */ 111/* ...to here */
94 mova128 MSG, MSGTMP3 112 mova128 MSG, MSG3
95 paddd 3*16-8*16(SHA256CONSTANTS), MSG 113 paddd 3*16-8*16(SHA256CONSTANTS), MSG
96 sha256rnds2 MSG, STATE0, STATE1 114 sha256rnds2 MSG, STATE0, STATE1
97 mova128 MSGTMP3, XMMTMP 115 mova128 MSG3, XMMTMP
98 palignr $4, MSGTMP2, XMMTMP 116 palignr $4, MSG2, XMMTMP
99 paddd XMMTMP, MSGTMP0 117 paddd XMMTMP, MSG0
100 sha256msg2 MSGTMP3, MSGTMP0 118 sha256msg2 MSG3, MSG0
101 shuf128_32 $0x0E, MSG, MSG 119 MOVE_UPPER64_DOWN(MSG)
102 sha256rnds2 MSG, STATE1, STATE0 120 sha256rnds2 MSG, STATE1, STATE0
103 sha256msg1 MSGTMP3, MSGTMP2 121 sha256msg1 MSG3, MSG2
104 122
105 /* Rounds 16-19 */ 123 /* Rounds 16-19 */
106 mova128 MSGTMP0, MSG 124 mova128 MSG0, MSG
107 paddd 4*16-8*16(SHA256CONSTANTS), MSG 125 paddd 4*16-8*16(SHA256CONSTANTS), MSG
108 sha256rnds2 MSG, STATE0, STATE1 126 sha256rnds2 MSG, STATE0, STATE1
109 mova128 MSGTMP0, XMMTMP 127 mova128 MSG0, XMMTMP
110 palignr $4, MSGTMP3, XMMTMP 128 palignr $4, MSG3, XMMTMP
111 paddd XMMTMP, MSGTMP1 129 paddd XMMTMP, MSG1
112 sha256msg2 MSGTMP0, MSGTMP1 130 sha256msg2 MSG0, MSG1
113 shuf128_32 $0x0E, MSG, MSG 131 MOVE_UPPER64_DOWN(MSG)
114 sha256rnds2 MSG, STATE1, STATE0 132 sha256rnds2 MSG, STATE1, STATE0
115 sha256msg1 MSGTMP0, MSGTMP3 133 sha256msg1 MSG0, MSG3
116 134
117 /* Rounds 20-23 */ 135 /* Rounds 20-23 */
118 mova128 MSGTMP1, MSG 136 mova128 MSG1, MSG
119 paddd 5*16-8*16(SHA256CONSTANTS), MSG 137 paddd 5*16-8*16(SHA256CONSTANTS), MSG
120 sha256rnds2 MSG, STATE0, STATE1 138 sha256rnds2 MSG, STATE0, STATE1
121 mova128 MSGTMP1, XMMTMP 139 mova128 MSG1, XMMTMP
122 palignr $4, MSGTMP0, XMMTMP 140 palignr $4, MSG0, XMMTMP
123 paddd XMMTMP, MSGTMP2 141 paddd XMMTMP, MSG2
124 sha256msg2 MSGTMP1, MSGTMP2 142 sha256msg2 MSG1, MSG2
125 shuf128_32 $0x0E, MSG, MSG 143 MOVE_UPPER64_DOWN(MSG)
126 sha256rnds2 MSG, STATE1, STATE0 144 sha256rnds2 MSG, STATE1, STATE0
127 sha256msg1 MSGTMP1, MSGTMP0 145 sha256msg1 MSG1, MSG0
128 146
129 /* Rounds 24-27 */ 147 /* Rounds 24-27 */
130 mova128 MSGTMP2, MSG 148 mova128 MSG2, MSG
131 paddd 6*16-8*16(SHA256CONSTANTS), MSG 149 paddd 6*16-8*16(SHA256CONSTANTS), MSG
132 sha256rnds2 MSG, STATE0, STATE1 150 sha256rnds2 MSG, STATE0, STATE1
133 mova128 MSGTMP2, XMMTMP 151 mova128 MSG2, XMMTMP
134 palignr $4, MSGTMP1, XMMTMP 152 palignr $4, MSG1, XMMTMP
135 paddd XMMTMP, MSGTMP3 153 paddd XMMTMP, MSG3
136 sha256msg2 MSGTMP2, MSGTMP3 154 sha256msg2 MSG2, MSG3
137 shuf128_32 $0x0E, MSG, MSG 155 MOVE_UPPER64_DOWN(MSG)
138 sha256rnds2 MSG, STATE1, STATE0 156 sha256rnds2 MSG, STATE1, STATE0
139 sha256msg1 MSGTMP2, MSGTMP1 157 sha256msg1 MSG2, MSG1
140 158
141 /* Rounds 28-31 */ 159 /* Rounds 28-31 */
142 mova128 MSGTMP3, MSG 160 mova128 MSG3, MSG
143 paddd 7*16-8*16(SHA256CONSTANTS), MSG 161 paddd 7*16-8*16(SHA256CONSTANTS), MSG
144 sha256rnds2 MSG, STATE0, STATE1 162 sha256rnds2 MSG, STATE0, STATE1
145 mova128 MSGTMP3, XMMTMP 163 mova128 MSG3, XMMTMP
146 palignr $4, MSGTMP2, XMMTMP 164 palignr $4, MSG2, XMMTMP
147 paddd XMMTMP, MSGTMP0 165 paddd XMMTMP, MSG0
148 sha256msg2 MSGTMP3, MSGTMP0 166 sha256msg2 MSG3, MSG0
149 shuf128_32 $0x0E, MSG, MSG 167 MOVE_UPPER64_DOWN(MSG)
150 sha256rnds2 MSG, STATE1, STATE0 168 sha256rnds2 MSG, STATE1, STATE0
151 sha256msg1 MSGTMP3, MSGTMP2 169 sha256msg1 MSG3, MSG2
152 170
153 /* Rounds 32-35 */ 171 /* Rounds 32-35 */
154 mova128 MSGTMP0, MSG 172 mova128 MSG0, MSG
155 paddd 8*16-8*16(SHA256CONSTANTS), MSG 173 paddd 8*16-8*16(SHA256CONSTANTS), MSG
156 sha256rnds2 MSG, STATE0, STATE1 174 sha256rnds2 MSG, STATE0, STATE1
157 mova128 MSGTMP0, XMMTMP 175 mova128 MSG0, XMMTMP
158 palignr $4, MSGTMP3, XMMTMP 176 palignr $4, MSG3, XMMTMP
159 paddd XMMTMP, MSGTMP1 177 paddd XMMTMP, MSG1
160 sha256msg2 MSGTMP0, MSGTMP1 178 sha256msg2 MSG0, MSG1
161 shuf128_32 $0x0E, MSG, MSG 179 MOVE_UPPER64_DOWN(MSG)
162 sha256rnds2 MSG, STATE1, STATE0 180 sha256rnds2 MSG, STATE1, STATE0
163 sha256msg1 MSGTMP0, MSGTMP3 181 sha256msg1 MSG0, MSG3
164 182
165 /* Rounds 36-39 */ 183 /* Rounds 36-39 */
166 mova128 MSGTMP1, MSG 184 mova128 MSG1, MSG
167 paddd 9*16-8*16(SHA256CONSTANTS), MSG 185 paddd 9*16-8*16(SHA256CONSTANTS), MSG
168 sha256rnds2 MSG, STATE0, STATE1 186 sha256rnds2 MSG, STATE0, STATE1
169 mova128 MSGTMP1, XMMTMP 187 mova128 MSG1, XMMTMP
170 palignr $4, MSGTMP0, XMMTMP 188 palignr $4, MSG0, XMMTMP
171 paddd XMMTMP, MSGTMP2 189 paddd XMMTMP, MSG2
172 sha256msg2 MSGTMP1, MSGTMP2 190 sha256msg2 MSG1, MSG2
173 shuf128_32 $0x0E, MSG, MSG 191 MOVE_UPPER64_DOWN(MSG)
174 sha256rnds2 MSG, STATE1, STATE0 192 sha256rnds2 MSG, STATE1, STATE0
175 sha256msg1 MSGTMP1, MSGTMP0 193 sha256msg1 MSG1, MSG0
176 194
177 /* Rounds 40-43 */ 195 /* Rounds 40-43 */
178 mova128 MSGTMP2, MSG 196 mova128 MSG2, MSG
179 paddd 10*16-8*16(SHA256CONSTANTS), MSG 197 paddd 10*16-8*16(SHA256CONSTANTS), MSG
180 sha256rnds2 MSG, STATE0, STATE1 198 sha256rnds2 MSG, STATE0, STATE1
181 mova128 MSGTMP2, XMMTMP 199 mova128 MSG2, XMMTMP
182 palignr $4, MSGTMP1, XMMTMP 200 palignr $4, MSG1, XMMTMP
183 paddd XMMTMP, MSGTMP3 201 paddd XMMTMP, MSG3
184 sha256msg2 MSGTMP2, MSGTMP3 202 sha256msg2 MSG2, MSG3
185 shuf128_32 $0x0E, MSG, MSG 203 MOVE_UPPER64_DOWN(MSG)
186 sha256rnds2 MSG, STATE1, STATE0 204 sha256rnds2 MSG, STATE1, STATE0
187 sha256msg1 MSGTMP2, MSGTMP1 205 sha256msg1 MSG2, MSG1
188 206
189 /* Rounds 44-47 */ 207 /* Rounds 44-47 */
190 mova128 MSGTMP3, MSG 208 mova128 MSG3, MSG
191 paddd 11*16-8*16(SHA256CONSTANTS), MSG 209 paddd 11*16-8*16(SHA256CONSTANTS), MSG
192 sha256rnds2 MSG, STATE0, STATE1 210 sha256rnds2 MSG, STATE0, STATE1
193 mova128 MSGTMP3, XMMTMP 211 mova128 MSG3, XMMTMP
194 palignr $4, MSGTMP2, XMMTMP 212 palignr $4, MSG2, XMMTMP
195 paddd XMMTMP, MSGTMP0 213 paddd XMMTMP, MSG0
196 sha256msg2 MSGTMP3, MSGTMP0 214 sha256msg2 MSG3, MSG0
197 shuf128_32 $0x0E, MSG, MSG 215 MOVE_UPPER64_DOWN(MSG)
198 sha256rnds2 MSG, STATE1, STATE0 216 sha256rnds2 MSG, STATE1, STATE0
199 sha256msg1 MSGTMP3, MSGTMP2 217 sha256msg1 MSG3, MSG2
200 218
201 /* Rounds 48-51 */ 219 /* Rounds 48-51 */
202 mova128 MSGTMP0, MSG 220 mova128 MSG0, MSG
203 paddd 12*16-8*16(SHA256CONSTANTS), MSG 221 paddd 12*16-8*16(SHA256CONSTANTS), MSG
204 sha256rnds2 MSG, STATE0, STATE1 222 sha256rnds2 MSG, STATE0, STATE1
205 mova128 MSGTMP0, XMMTMP 223 mova128 MSG0, XMMTMP
206 palignr $4, MSGTMP3, XMMTMP 224 palignr $4, MSG3, XMMTMP
207 paddd XMMTMP, MSGTMP1 225 paddd XMMTMP, MSG1
208 sha256msg2 MSGTMP0, MSGTMP1 226 sha256msg2 MSG0, MSG1
209 shuf128_32 $0x0E, MSG, MSG 227 MOVE_UPPER64_DOWN(MSG)
210 sha256rnds2 MSG, STATE1, STATE0 228 sha256rnds2 MSG, STATE1, STATE0
211 sha256msg1 MSGTMP0, MSGTMP3 229 sha256msg1 MSG0, MSG3
212 230
213 /* Rounds 52-55 */ 231 /* Rounds 52-55 */
214 mova128 MSGTMP1, MSG 232 mova128 MSG1, MSG
215 paddd 13*16-8*16(SHA256CONSTANTS), MSG 233 paddd 13*16-8*16(SHA256CONSTANTS), MSG
216 sha256rnds2 MSG, STATE0, STATE1 234 sha256rnds2 MSG, STATE0, STATE1
217 mova128 MSGTMP1, XMMTMP 235 mova128 MSG1, XMMTMP
218 palignr $4, MSGTMP0, XMMTMP 236 palignr $4, MSG0, XMMTMP
219 paddd XMMTMP, MSGTMP2 237 paddd XMMTMP, MSG2
220 sha256msg2 MSGTMP1, MSGTMP2 238 sha256msg2 MSG1, MSG2
221 shuf128_32 $0x0E, MSG, MSG 239 MOVE_UPPER64_DOWN(MSG)
222 sha256rnds2 MSG, STATE1, STATE0 240 sha256rnds2 MSG, STATE1, STATE0
223 241
224 /* Rounds 56-59 */ 242 /* Rounds 56-59 */
225 mova128 MSGTMP2, MSG 243 mova128 MSG2, MSG
226 paddd 14*16-8*16(SHA256CONSTANTS), MSG 244 paddd 14*16-8*16(SHA256CONSTANTS), MSG
227 sha256rnds2 MSG, STATE0, STATE1 245 sha256rnds2 MSG, STATE0, STATE1
228 mova128 MSGTMP2, XMMTMP 246 mova128 MSG2, XMMTMP
229 palignr $4, MSGTMP1, XMMTMP 247 palignr $4, MSG1, XMMTMP
230 paddd XMMTMP, MSGTMP3 248 paddd XMMTMP, MSG3
231 sha256msg2 MSGTMP2, MSGTMP3 249 sha256msg2 MSG2, MSG3
232 shuf128_32 $0x0E, MSG, MSG 250 MOVE_UPPER64_DOWN(MSG)
233 sha256rnds2 MSG, STATE1, STATE0 251 sha256rnds2 MSG, STATE1, STATE0
234 252
235 /* Rounds 60-63 */ 253 /* Rounds 60-63 */
236 mova128 MSGTMP3, MSG 254 mova128 MSG3, MSG
237 paddd 15*16-8*16(SHA256CONSTANTS), MSG 255 paddd 15*16-8*16(SHA256CONSTANTS), MSG
238 sha256rnds2 MSG, STATE0, STATE1 256 sha256rnds2 MSG, STATE0, STATE1
239 shuf128_32 $0x0E, MSG, MSG 257 MOVE_UPPER64_DOWN(MSG)
240 sha256rnds2 MSG, STATE1, STATE0 258 sha256rnds2 MSG, STATE1, STATE0
241 259
242 /* Write hash values back in the correct order */ 260 /* Write hash values back in the correct order */
243 mova128 STATE0, XMMTMP 261 mova128 STATE0, XMMTMP
244/* shufps takes dwords 0,1 from *2nd* operand, and dwords 2,3 from 1st one */ 262/* shufps: dwords 0,1 of the result are selected from *2nd* operand, and dwords 2,3 from 1st operand */
245 /* --- -------------- HGDC -- FEBA */ 263 /* --- -------------- HGDC -- FEBA */
246 shufps SHUF(3,2,3,2), STATE1, STATE0 /* ABCD */ 264 shufps SHUF(3,2,3,2), STATE1, STATE0 /* ABCD */
247 shufps SHUF(1,0,1,0), STATE1, XMMTMP /* EFGH */ 265 shufps SHUF(1,0,1,0), STATE1, XMMTMP /* EFGH */
diff --git a/libbb/hash_sha256_hwaccel_x86-64.S b/libbb/hash_sha256_hwaccel_x86-64.S
index 172c2eae2..ee3abbd1f 100644
--- a/libbb/hash_sha256_hwaccel_x86-64.S
+++ b/libbb/hash_sha256_hwaccel_x86-64.S
@@ -34,24 +34,24 @@
34#define MSG %xmm0 34#define MSG %xmm0
35#define STATE0 %xmm1 35#define STATE0 %xmm1
36#define STATE1 %xmm2 36#define STATE1 %xmm2
37#define MSGTMP0 %xmm3 37#define MSG0 %xmm3
38#define MSGTMP1 %xmm4 38#define MSG1 %xmm4
39#define MSGTMP2 %xmm5 39#define MSG2 %xmm5
40#define MSGTMP3 %xmm6 40#define MSG3 %xmm6
41 41
42#define XMMTMP %xmm7 42#define XMMTMP %xmm7
43 43
44#define SAVE0 %xmm8 44#define SAVE0 %xmm8
45#define SAVE1 %xmm9 45#define SAVE1 %xmm9
46 46
47#define SHUF(a,b,c,d) $(a+(b<<2)+(c<<4)+(d<<6)) 47#define SHUF(a,b,c,d) $((a)+((b)<<2)+((c)<<4)+((d)<<6))
48 48
49 .balign 8 # allow decoders to fetch at least 2 first insns 49 .balign 8 # allow decoders to fetch at least 2 first insns
50sha256_process_block64_shaNI: 50sha256_process_block64_shaNI:
51 51
52 movu128 80+0*16(%rdi), XMMTMP /* ABCD (little-endian dword order) */ 52 movu128 80+0*16(%rdi), XMMTMP /* ABCD (shown least-significant-dword-first) */
53 movu128 80+1*16(%rdi), STATE1 /* EFGH */ 53 movu128 80+1*16(%rdi), STATE1 /* EFGH */
54/* shufps takes dwords 0,1 from *2nd* operand, and dwords 2,3 from 1st one */ 54/* shufps: dwords 0,1 of the result are selected from *2nd* operand, and dwords 2,3 from 1st operand */
55 mova128 STATE1, STATE0 55 mova128 STATE1, STATE0
56 /* --- -------------- ABCD -- EFGH */ 56 /* --- -------------- ABCD -- EFGH */
57 shufps SHUF(1,0,1,0), XMMTMP, STATE0 /* FEBA */ 57 shufps SHUF(1,0,1,0), XMMTMP, STATE0 /* FEBA */
@@ -65,185 +65,203 @@ sha256_process_block64_shaNI:
65 mova128 STATE0, SAVE0 65 mova128 STATE0, SAVE0
66 mova128 STATE1, SAVE1 66 mova128 STATE1, SAVE1
67 67
68// sha256rnds2 instruction uses only lower 64 bits of MSG.
69// The code below needs to move upper 64 bits to lower 64 bits
70// for the second sha256rnds2 invocation
71// (what remains in upper bits does not matter).
72// There are several ways to do it:
73// movhlps MSG, MSG // abcd -> cdcd (3 bytes of code)
74// shuf128_32 SHUF(2,3,n,n), MSG, MSG // abcd -> cdXX (4 bytes)
75// punpckhqdq MSG, MSG // abcd -> cdcd (4 bytes)
76// unpckhpd MSG, MSG // abcd -> cdcd (4 bytes)
77// psrldq $8, MSG // abcd -> cd00 (5 bytes)
78// palignr $8, MSG, MSG // abcd -> cdab (6 bytes, SSSE3 insn)
79#define MOVE_UPPER64_DOWN(reg) movhlps reg, reg
80//#define MOVE_UPPER64_DOWN(reg) shuf128_32 SHUF(2,3,0,0), reg, reg
81//#define MOVE_UPPER64_DOWN(reg) punpckhqdq reg, reg
82//#define MOVE_UPPER64_DOWN(reg) unpckhpd reg, reg
83//#define MOVE_UPPER64_DOWN(reg) psrldq $8, reg
84//#define MOVE_UPPER64_DOWN(reg) palignr $8, reg, reg
85
68 /* Rounds 0-3 */ 86 /* Rounds 0-3 */
69 movu128 0*16(DATA_PTR), MSG 87 movu128 0*16(DATA_PTR), MSG
70 pshufb XMMTMP, MSG 88 pshufb XMMTMP, MSG
71 mova128 MSG, MSGTMP0 89 mova128 MSG, MSG0
72 paddd 0*16-8*16(SHA256CONSTANTS), MSG 90 paddd 0*16-8*16(SHA256CONSTANTS), MSG
73 sha256rnds2 MSG, STATE0, STATE1 91 sha256rnds2 MSG, STATE0, STATE1
74 shuf128_32 $0x0E, MSG, MSG 92 MOVE_UPPER64_DOWN(MSG)
75 sha256rnds2 MSG, STATE1, STATE0 93 sha256rnds2 MSG, STATE1, STATE0
76 94
77 /* Rounds 4-7 */ 95 /* Rounds 4-7 */
78 movu128 1*16(DATA_PTR), MSG 96 movu128 1*16(DATA_PTR), MSG
79 pshufb XMMTMP, MSG 97 pshufb XMMTMP, MSG
80 mova128 MSG, MSGTMP1 98 mova128 MSG, MSG1
81 paddd 1*16-8*16(SHA256CONSTANTS), MSG 99 paddd 1*16-8*16(SHA256CONSTANTS), MSG
82 sha256rnds2 MSG, STATE0, STATE1 100 sha256rnds2 MSG, STATE0, STATE1
83 shuf128_32 $0x0E, MSG, MSG 101 MOVE_UPPER64_DOWN(MSG)
84 sha256rnds2 MSG, STATE1, STATE0 102 sha256rnds2 MSG, STATE1, STATE0
85 sha256msg1 MSGTMP1, MSGTMP0 103 sha256msg1 MSG1, MSG0
86 104
87 /* Rounds 8-11 */ 105 /* Rounds 8-11 */
88 movu128 2*16(DATA_PTR), MSG 106 movu128 2*16(DATA_PTR), MSG
89 pshufb XMMTMP, MSG 107 pshufb XMMTMP, MSG
90 mova128 MSG, MSGTMP2 108 mova128 MSG, MSG2
91 paddd 2*16-8*16(SHA256CONSTANTS), MSG 109 paddd 2*16-8*16(SHA256CONSTANTS), MSG
92 sha256rnds2 MSG, STATE0, STATE1 110 sha256rnds2 MSG, STATE0, STATE1
93 shuf128_32 $0x0E, MSG, MSG 111 MOVE_UPPER64_DOWN(MSG)
94 sha256rnds2 MSG, STATE1, STATE0 112 sha256rnds2 MSG, STATE1, STATE0
95 sha256msg1 MSGTMP2, MSGTMP1 113 sha256msg1 MSG2, MSG1
96 114
97 /* Rounds 12-15 */ 115 /* Rounds 12-15 */
98 movu128 3*16(DATA_PTR), MSG 116 movu128 3*16(DATA_PTR), MSG
99 pshufb XMMTMP, MSG 117 pshufb XMMTMP, MSG
100/* ...to here */ 118/* ...to here */
101 mova128 MSG, MSGTMP3 119 mova128 MSG, MSG3
102 paddd 3*16-8*16(SHA256CONSTANTS), MSG 120 paddd 3*16-8*16(SHA256CONSTANTS), MSG
103 sha256rnds2 MSG, STATE0, STATE1 121 sha256rnds2 MSG, STATE0, STATE1
104 mova128 MSGTMP3, XMMTMP 122 mova128 MSG3, XMMTMP
105 palignr $4, MSGTMP2, XMMTMP 123 palignr $4, MSG2, XMMTMP
106 paddd XMMTMP, MSGTMP0 124 paddd XMMTMP, MSG0
107 sha256msg2 MSGTMP3, MSGTMP0 125 sha256msg2 MSG3, MSG0
108 shuf128_32 $0x0E, MSG, MSG 126 MOVE_UPPER64_DOWN(MSG)
109 sha256rnds2 MSG, STATE1, STATE0 127 sha256rnds2 MSG, STATE1, STATE0
110 sha256msg1 MSGTMP3, MSGTMP2 128 sha256msg1 MSG3, MSG2
111 129
112 /* Rounds 16-19 */ 130 /* Rounds 16-19 */
113 mova128 MSGTMP0, MSG 131 mova128 MSG0, MSG
114 paddd 4*16-8*16(SHA256CONSTANTS), MSG 132 paddd 4*16-8*16(SHA256CONSTANTS), MSG
115 sha256rnds2 MSG, STATE0, STATE1 133 sha256rnds2 MSG, STATE0, STATE1
116 mova128 MSGTMP0, XMMTMP 134 mova128 MSG0, XMMTMP
117 palignr $4, MSGTMP3, XMMTMP 135 palignr $4, MSG3, XMMTMP
118 paddd XMMTMP, MSGTMP1 136 paddd XMMTMP, MSG1
119 sha256msg2 MSGTMP0, MSGTMP1 137 sha256msg2 MSG0, MSG1
120 shuf128_32 $0x0E, MSG, MSG 138 MOVE_UPPER64_DOWN(MSG)
121 sha256rnds2 MSG, STATE1, STATE0 139 sha256rnds2 MSG, STATE1, STATE0
122 sha256msg1 MSGTMP0, MSGTMP3 140 sha256msg1 MSG0, MSG3
123 141
124 /* Rounds 20-23 */ 142 /* Rounds 20-23 */
125 mova128 MSGTMP1, MSG 143 mova128 MSG1, MSG
126 paddd 5*16-8*16(SHA256CONSTANTS), MSG 144 paddd 5*16-8*16(SHA256CONSTANTS), MSG
127 sha256rnds2 MSG, STATE0, STATE1 145 sha256rnds2 MSG, STATE0, STATE1
128 mova128 MSGTMP1, XMMTMP 146 mova128 MSG1, XMMTMP
129 palignr $4, MSGTMP0, XMMTMP 147 palignr $4, MSG0, XMMTMP
130 paddd XMMTMP, MSGTMP2 148 paddd XMMTMP, MSG2
131 sha256msg2 MSGTMP1, MSGTMP2 149 sha256msg2 MSG1, MSG2
132 shuf128_32 $0x0E, MSG, MSG 150 MOVE_UPPER64_DOWN(MSG)
133 sha256rnds2 MSG, STATE1, STATE0 151 sha256rnds2 MSG, STATE1, STATE0
134 sha256msg1 MSGTMP1, MSGTMP0 152 sha256msg1 MSG1, MSG0
135 153
136 /* Rounds 24-27 */ 154 /* Rounds 24-27 */
137 mova128 MSGTMP2, MSG 155 mova128 MSG2, MSG
138 paddd 6*16-8*16(SHA256CONSTANTS), MSG 156 paddd 6*16-8*16(SHA256CONSTANTS), MSG
139 sha256rnds2 MSG, STATE0, STATE1 157 sha256rnds2 MSG, STATE0, STATE1
140 mova128 MSGTMP2, XMMTMP 158 mova128 MSG2, XMMTMP
141 palignr $4, MSGTMP1, XMMTMP 159 palignr $4, MSG1, XMMTMP
142 paddd XMMTMP, MSGTMP3 160 paddd XMMTMP, MSG3
143 sha256msg2 MSGTMP2, MSGTMP3 161 sha256msg2 MSG2, MSG3
144 shuf128_32 $0x0E, MSG, MSG 162 MOVE_UPPER64_DOWN(MSG)
145 sha256rnds2 MSG, STATE1, STATE0 163 sha256rnds2 MSG, STATE1, STATE0
146 sha256msg1 MSGTMP2, MSGTMP1 164 sha256msg1 MSG2, MSG1
147 165
148 /* Rounds 28-31 */ 166 /* Rounds 28-31 */
149 mova128 MSGTMP3, MSG 167 mova128 MSG3, MSG
150 paddd 7*16-8*16(SHA256CONSTANTS), MSG 168 paddd 7*16-8*16(SHA256CONSTANTS), MSG
151 sha256rnds2 MSG, STATE0, STATE1 169 sha256rnds2 MSG, STATE0, STATE1
152 mova128 MSGTMP3, XMMTMP 170 mova128 MSG3, XMMTMP
153 palignr $4, MSGTMP2, XMMTMP 171 palignr $4, MSG2, XMMTMP
154 paddd XMMTMP, MSGTMP0 172 paddd XMMTMP, MSG0
155 sha256msg2 MSGTMP3, MSGTMP0 173 sha256msg2 MSG3, MSG0
156 shuf128_32 $0x0E, MSG, MSG 174 MOVE_UPPER64_DOWN(MSG)
157 sha256rnds2 MSG, STATE1, STATE0 175 sha256rnds2 MSG, STATE1, STATE0
158 sha256msg1 MSGTMP3, MSGTMP2 176 sha256msg1 MSG3, MSG2
159 177
160 /* Rounds 32-35 */ 178 /* Rounds 32-35 */
161 mova128 MSGTMP0, MSG 179 mova128 MSG0, MSG
162 paddd 8*16-8*16(SHA256CONSTANTS), MSG 180 paddd 8*16-8*16(SHA256CONSTANTS), MSG
163 sha256rnds2 MSG, STATE0, STATE1 181 sha256rnds2 MSG, STATE0, STATE1
164 mova128 MSGTMP0, XMMTMP 182 mova128 MSG0, XMMTMP
165 palignr $4, MSGTMP3, XMMTMP 183 palignr $4, MSG3, XMMTMP
166 paddd XMMTMP, MSGTMP1 184 paddd XMMTMP, MSG1
167 sha256msg2 MSGTMP0, MSGTMP1 185 sha256msg2 MSG0, MSG1
168 shuf128_32 $0x0E, MSG, MSG 186 MOVE_UPPER64_DOWN(MSG)
169 sha256rnds2 MSG, STATE1, STATE0 187 sha256rnds2 MSG, STATE1, STATE0
170 sha256msg1 MSGTMP0, MSGTMP3 188 sha256msg1 MSG0, MSG3
171 189
172 /* Rounds 36-39 */ 190 /* Rounds 36-39 */
173 mova128 MSGTMP1, MSG 191 mova128 MSG1, MSG
174 paddd 9*16-8*16(SHA256CONSTANTS), MSG 192 paddd 9*16-8*16(SHA256CONSTANTS), MSG
175 sha256rnds2 MSG, STATE0, STATE1 193 sha256rnds2 MSG, STATE0, STATE1
176 mova128 MSGTMP1, XMMTMP 194 mova128 MSG1, XMMTMP
177 palignr $4, MSGTMP0, XMMTMP 195 palignr $4, MSG0, XMMTMP
178 paddd XMMTMP, MSGTMP2 196 paddd XMMTMP, MSG2
179 sha256msg2 MSGTMP1, MSGTMP2 197 sha256msg2 MSG1, MSG2
180 shuf128_32 $0x0E, MSG, MSG 198 MOVE_UPPER64_DOWN(MSG)
181 sha256rnds2 MSG, STATE1, STATE0 199 sha256rnds2 MSG, STATE1, STATE0
182 sha256msg1 MSGTMP1, MSGTMP0 200 sha256msg1 MSG1, MSG0
183 201
184 /* Rounds 40-43 */ 202 /* Rounds 40-43 */
185 mova128 MSGTMP2, MSG 203 mova128 MSG2, MSG
186 paddd 10*16-8*16(SHA256CONSTANTS), MSG 204 paddd 10*16-8*16(SHA256CONSTANTS), MSG
187 sha256rnds2 MSG, STATE0, STATE1 205 sha256rnds2 MSG, STATE0, STATE1
188 mova128 MSGTMP2, XMMTMP 206 mova128 MSG2, XMMTMP
189 palignr $4, MSGTMP1, XMMTMP 207 palignr $4, MSG1, XMMTMP
190 paddd XMMTMP, MSGTMP3 208 paddd XMMTMP, MSG3
191 sha256msg2 MSGTMP2, MSGTMP3 209 sha256msg2 MSG2, MSG3
192 shuf128_32 $0x0E, MSG, MSG 210 MOVE_UPPER64_DOWN(MSG)
193 sha256rnds2 MSG, STATE1, STATE0 211 sha256rnds2 MSG, STATE1, STATE0
194 sha256msg1 MSGTMP2, MSGTMP1 212 sha256msg1 MSG2, MSG1
195 213
196 /* Rounds 44-47 */ 214 /* Rounds 44-47 */
197 mova128 MSGTMP3, MSG 215 mova128 MSG3, MSG
198 paddd 11*16-8*16(SHA256CONSTANTS), MSG 216 paddd 11*16-8*16(SHA256CONSTANTS), MSG
199 sha256rnds2 MSG, STATE0, STATE1 217 sha256rnds2 MSG, STATE0, STATE1
200 mova128 MSGTMP3, XMMTMP 218 mova128 MSG3, XMMTMP
201 palignr $4, MSGTMP2, XMMTMP 219 palignr $4, MSG2, XMMTMP
202 paddd XMMTMP, MSGTMP0 220 paddd XMMTMP, MSG0
203 sha256msg2 MSGTMP3, MSGTMP0 221 sha256msg2 MSG3, MSG0
204 shuf128_32 $0x0E, MSG, MSG 222 MOVE_UPPER64_DOWN(MSG)
205 sha256rnds2 MSG, STATE1, STATE0 223 sha256rnds2 MSG, STATE1, STATE0
206 sha256msg1 MSGTMP3, MSGTMP2 224 sha256msg1 MSG3, MSG2
207 225
208 /* Rounds 48-51 */ 226 /* Rounds 48-51 */
209 mova128 MSGTMP0, MSG 227 mova128 MSG0, MSG
210 paddd 12*16-8*16(SHA256CONSTANTS), MSG 228 paddd 12*16-8*16(SHA256CONSTANTS), MSG
211 sha256rnds2 MSG, STATE0, STATE1 229 sha256rnds2 MSG, STATE0, STATE1
212 mova128 MSGTMP0, XMMTMP 230 mova128 MSG0, XMMTMP
213 palignr $4, MSGTMP3, XMMTMP 231 palignr $4, MSG3, XMMTMP
214 paddd XMMTMP, MSGTMP1 232 paddd XMMTMP, MSG1
215 sha256msg2 MSGTMP0, MSGTMP1 233 sha256msg2 MSG0, MSG1
216 shuf128_32 $0x0E, MSG, MSG 234 MOVE_UPPER64_DOWN(MSG)
217 sha256rnds2 MSG, STATE1, STATE0 235 sha256rnds2 MSG, STATE1, STATE0
218 sha256msg1 MSGTMP0, MSGTMP3 236 sha256msg1 MSG0, MSG3
219 237
220 /* Rounds 52-55 */ 238 /* Rounds 52-55 */
221 mova128 MSGTMP1, MSG 239 mova128 MSG1, MSG
222 paddd 13*16-8*16(SHA256CONSTANTS), MSG 240 paddd 13*16-8*16(SHA256CONSTANTS), MSG
223 sha256rnds2 MSG, STATE0, STATE1 241 sha256rnds2 MSG, STATE0, STATE1
224 mova128 MSGTMP1, XMMTMP 242 mova128 MSG1, XMMTMP
225 palignr $4, MSGTMP0, XMMTMP 243 palignr $4, MSG0, XMMTMP
226 paddd XMMTMP, MSGTMP2 244 paddd XMMTMP, MSG2
227 sha256msg2 MSGTMP1, MSGTMP2 245 sha256msg2 MSG1, MSG2
228 shuf128_32 $0x0E, MSG, MSG 246 MOVE_UPPER64_DOWN(MSG)
229 sha256rnds2 MSG, STATE1, STATE0 247 sha256rnds2 MSG, STATE1, STATE0
230 248
231 /* Rounds 56-59 */ 249 /* Rounds 56-59 */
232 mova128 MSGTMP2, MSG 250 mova128 MSG2, MSG
233 paddd 14*16-8*16(SHA256CONSTANTS), MSG 251 paddd 14*16-8*16(SHA256CONSTANTS), MSG
234 sha256rnds2 MSG, STATE0, STATE1 252 sha256rnds2 MSG, STATE0, STATE1
235 mova128 MSGTMP2, XMMTMP 253 mova128 MSG2, XMMTMP
236 palignr $4, MSGTMP1, XMMTMP 254 palignr $4, MSG1, XMMTMP
237 paddd XMMTMP, MSGTMP3 255 paddd XMMTMP, MSG3
238 sha256msg2 MSGTMP2, MSGTMP3 256 sha256msg2 MSG2, MSG3
239 shuf128_32 $0x0E, MSG, MSG 257 MOVE_UPPER64_DOWN(MSG)
240 sha256rnds2 MSG, STATE1, STATE0 258 sha256rnds2 MSG, STATE1, STATE0
241 259
242 /* Rounds 60-63 */ 260 /* Rounds 60-63 */
243 mova128 MSGTMP3, MSG 261 mova128 MSG3, MSG
244 paddd 15*16-8*16(SHA256CONSTANTS), MSG 262 paddd 15*16-8*16(SHA256CONSTANTS), MSG
245 sha256rnds2 MSG, STATE0, STATE1 263 sha256rnds2 MSG, STATE0, STATE1
246 shuf128_32 $0x0E, MSG, MSG 264 MOVE_UPPER64_DOWN(MSG)
247 sha256rnds2 MSG, STATE1, STATE0 265 sha256rnds2 MSG, STATE1, STATE0
248 266
249 /* Add current hash values with previously saved */ 267 /* Add current hash values with previously saved */
@@ -252,7 +270,7 @@ sha256_process_block64_shaNI:
252 270
253 /* Write hash values back in the correct order */ 271 /* Write hash values back in the correct order */
254 mova128 STATE0, XMMTMP 272 mova128 STATE0, XMMTMP
255/* shufps takes dwords 0,1 from *2nd* operand, and dwords 2,3 from 1st one */ 273/* shufps: dwords 0,1 of the result are selected from *2nd* operand, and dwords 2,3 from 1st operand */
256 /* --- -------------- HGDC -- FEBA */ 274 /* --- -------------- HGDC -- FEBA */
257 shufps SHUF(3,2,3,2), STATE1, STATE0 /* ABCD */ 275 shufps SHUF(3,2,3,2), STATE1, STATE0 /* ABCD */
258 shufps SHUF(1,0,1,0), STATE1, XMMTMP /* EFGH */ 276 shufps SHUF(1,0,1,0), STATE1, XMMTMP /* EFGH */
diff --git a/libbb/lineedit.c b/libbb/lineedit.c
index 8e2b37853..c8a0f37fe 100644
--- a/libbb/lineedit.c
+++ b/libbb/lineedit.c
@@ -457,7 +457,7 @@ static void put_cur_glyph_and_inc_cursor(void)
457 * have automargin (IOW: it is moving cursor to next line 457 * have automargin (IOW: it is moving cursor to next line
458 * by itself (which is wrong for VT-10x terminals)), 458 * by itself (which is wrong for VT-10x terminals)),
459 * this will break things: there will be one extra empty line */ 459 * this will break things: there will be one extra empty line */
460 puts("\r"); /* + implicit '\n' */ 460 fputs("\r\n", stderr);
461#else 461#else
462 /* VT-10x terminals don't wrap cursor to next line when last char 462 /* VT-10x terminals don't wrap cursor to next line when last char
463 * on the line is printed - cursor stays "over" this char. 463 * on the line is printed - cursor stays "over" this char.
@@ -1302,9 +1302,10 @@ static void showfiles(void)
1302 ); 1302 );
1303 } 1303 }
1304 if (ENABLE_UNICODE_SUPPORT) 1304 if (ENABLE_UNICODE_SUPPORT)
1305 puts(printable_string(matches[n])); 1305 fputs(printable_string(matches[n]), stderr);
1306 else 1306 else
1307 puts(matches[n]); 1307 fputs(matches[n], stderr);
1308 bb_putchar_stderr('\n');
1308 } 1309 }
1309} 1310}
1310 1311
@@ -1595,8 +1596,8 @@ unsigned FAST_FUNC size_from_HISTFILESIZE(const char *hp)
1595# endif 1596# endif
1596 if (hp) { 1597 if (hp) {
1597 size = atoi(hp); 1598 size = atoi(hp);
1598 if (size <= 0) 1599 if (size < 0)
1599 return 1; 1600 return 0;
1600 if (size > MAX_HISTORY) 1601 if (size > MAX_HISTORY)
1601 return MAX_HISTORY; 1602 return MAX_HISTORY;
1602 } 1603 }
@@ -1690,18 +1691,21 @@ static void load_history(line_input_t *st_parm)
1690 /* NB: do not trash old history if file can't be opened */ 1691 /* NB: do not trash old history if file can't be opened */
1691 1692
1692 fp = fopen_for_read(st_parm->hist_file); 1693 fp = fopen_for_read(st_parm->hist_file);
1693 if (fp) { 1694 if (!fp)
1694 /* clean up old history */ 1695 return;
1695 for (idx = st_parm->cnt_history; idx > 0;) { 1696
1696 idx--; 1697 /* clean up old history */
1697 free(st_parm->history[idx]); 1698 for (idx = st_parm->cnt_history; idx > 0;) {
1698 st_parm->history[idx] = NULL; 1699 idx--;
1699 } 1700 free(st_parm->history[idx]);
1701 st_parm->history[idx] = NULL;
1702 }
1700 1703
1701 /* fill temp_h[], retaining only last MAX_HISTORY lines */ 1704 /* fill temp_h[], retaining only last max_history lines */
1702 memset(temp_h, 0, sizeof(temp_h)); 1705 memset(temp_h, 0, sizeof(temp_h));
1703 idx = 0; 1706 idx = 0;
1704 st_parm->cnt_history_in_file = 0; 1707 st_parm->cnt_history_in_file = 0;
1708 if (st_parm->max_history != 0) {
1705 while ((line = xmalloc_fgetline(fp)) != NULL) { 1709 while ((line = xmalloc_fgetline(fp)) != NULL) {
1706 if (line[0] == '\0') { 1710 if (line[0] == '\0') {
1707 free(line); 1711 free(line);
@@ -1714,34 +1718,34 @@ static void load_history(line_input_t *st_parm)
1714 if (idx == st_parm->max_history) 1718 if (idx == st_parm->max_history)
1715 idx = 0; 1719 idx = 0;
1716 } 1720 }
1717 fclose(fp); 1721 }
1718 1722 fclose(fp);
1719 /* find first non-NULL temp_h[], if any */
1720 if (st_parm->cnt_history_in_file) {
1721 while (temp_h[idx] == NULL) {
1722 idx++;
1723 if (idx == st_parm->max_history)
1724 idx = 0;
1725 }
1726 }
1727 1723
1728 /* copy temp_h[] to st_parm->history[] */ 1724 /* find first non-NULL temp_h[], if any */
1729 for (i = 0; i < st_parm->max_history;) { 1725 if (st_parm->cnt_history_in_file != 0) {
1730 line = temp_h[idx]; 1726 while (temp_h[idx] == NULL) {
1731 if (!line)
1732 break;
1733 idx++; 1727 idx++;
1734 if (idx == st_parm->max_history) 1728 if (idx == st_parm->max_history)
1735 idx = 0; 1729 idx = 0;
1736 line_len = strlen(line);
1737 if (line_len >= MAX_LINELEN)
1738 line[MAX_LINELEN-1] = '\0';
1739 st_parm->history[i++] = line;
1740 } 1730 }
1741 st_parm->cnt_history = i;
1742 if (ENABLE_FEATURE_EDITING_SAVE_ON_EXIT)
1743 st_parm->cnt_history_in_file = i;
1744 } 1731 }
1732
1733 /* copy temp_h[] to st_parm->history[] */
1734 for (i = 0; i < st_parm->max_history;) {
1735 line = temp_h[idx];
1736 if (!line)
1737 break;
1738 idx++;
1739 if (idx == st_parm->max_history)
1740 idx = 0;
1741 line_len = strlen(line);
1742 if (line_len >= MAX_LINELEN)
1743 line[MAX_LINELEN-1] = '\0';
1744 st_parm->history[i++] = line;
1745 }
1746 st_parm->cnt_history = i;
1747 if (ENABLE_FEATURE_EDITING_SAVE_ON_EXIT)
1748 st_parm->cnt_history_in_file = i;
1745} 1749}
1746 1750
1747# if ENABLE_FEATURE_EDITING_SAVE_ON_EXIT 1751# if ENABLE_FEATURE_EDITING_SAVE_ON_EXIT
@@ -1749,17 +1753,27 @@ void FAST_FUNC save_history(line_input_t *st)
1749{ 1753{
1750 FILE *fp; 1754 FILE *fp;
1751 1755
1752 if (!st || !st->hist_file) 1756 /* bash compat: HISTFILE="" disables history saving */
1757 if (!st || !st->hist_file || !state->hist_file[0])
1753 return; 1758 return;
1754 if (st->cnt_history <= st->cnt_history_in_file) 1759 if (st->cnt_history <= st->cnt_history_in_file)
1755 return; 1760 return; /* no new entries were added */
1761 /* note: if st->max_history is 0, we do not abort: we truncate the history to 0 lines */
1756 1762
1757 fp = fopen(st->hist_file, "a"); 1763 fp = fopen(st->hist_file, (st->max_history == 0 ? "w" : "a"));
1758 if (fp) { 1764 if (fp) {
1759 int i, fd; 1765 int i, fd;
1760 char *new_name; 1766 char *new_name;
1761 line_input_t *st_temp; 1767 line_input_t *st_temp;
1762 1768
1769 /* max_history==0 needs special-casing in general code,
1770 * just handle it in a simpler way: */
1771 if (st->max_history == 0) {
1772 /* fopen("w") already truncated it */
1773 fclose(fp);
1774 return;
1775 }
1776
1763 for (i = st->cnt_history_in_file; i < st->cnt_history; i++) 1777 for (i = st->cnt_history_in_file; i < st->cnt_history; i++)
1764 fprintf(fp, "%s\n", st->history[i]); 1778 fprintf(fp, "%s\n", st->history[i]);
1765 fclose(fp); 1779 fclose(fp);
@@ -1769,6 +1783,8 @@ void FAST_FUNC save_history(line_input_t *st)
1769 st_temp = new_line_input_t(st->flags); 1783 st_temp = new_line_input_t(st->flags);
1770 st_temp->hist_file = st->hist_file; 1784 st_temp->hist_file = st->hist_file;
1771 st_temp->max_history = st->max_history; 1785 st_temp->max_history = st->max_history;
1786 /* load no more than max_history last lines */
1787 /* (in unlikely case that file disappeared, st_temp gets empty history) */
1772 load_history(st_temp); 1788 load_history(st_temp);
1773 1789
1774 /* write out temp file and replace hist_file atomically */ 1790 /* write out temp file and replace hist_file atomically */
@@ -1792,13 +1808,13 @@ static void save_history(char *str)
1792 int fd; 1808 int fd;
1793 int len, len2; 1809 int len, len2;
1794 1810
1795 if (!state->hist_file) 1811 /* bash compat: HISTFILE="" disables history saving */
1812 if (!state->hist_file || !state->hist_file[0])
1796 return; 1813 return;
1797 1814
1798 fd = open(state->hist_file, O_WRONLY | O_CREAT | O_APPEND, 0600); 1815 fd = open(state->hist_file, O_WRONLY | O_CREAT | O_APPEND, 0600);
1799 if (fd < 0) 1816 if (fd < 0)
1800 return; 1817 return;
1801 xlseek(fd, 0, SEEK_END); /* paranoia */
1802 len = strlen(str); 1818 len = strlen(str);
1803 str[len] = '\n'; /* we (try to) do atomic write */ 1819 str[len] = '\n'; /* we (try to) do atomic write */
1804 len2 = full_write(fd, str, len + 1); 1820 len2 = full_write(fd, str, len + 1);
@@ -1853,13 +1869,10 @@ static void remember_in_history(char *str)
1853 if (str[0] == '\0') 1869 if (str[0] == '\0')
1854 return; 1870 return;
1855 i = state->cnt_history; 1871 i = state->cnt_history;
1856 /* Don't save dupes */ 1872 /* Don't save dups */
1857 if (i && strcmp(state->history[i-1], str) == 0) 1873 if (i != 0 && strcmp(state->history[i-1], str) == 0)
1858 return; 1874 return;
1859 1875
1860 free(state->history[state->max_history]); /* redundant, paranoia */
1861 state->history[state->max_history] = NULL; /* redundant, paranoia */
1862
1863 /* If history[] is full, remove the oldest command */ 1876 /* If history[] is full, remove the oldest command */
1864 /* we need to keep history[state->max_history] empty, hence >=, not > */ 1877 /* we need to keep history[state->max_history] empty, hence >=, not > */
1865 if (i >= state->max_history) { 1878 if (i >= state->max_history) {
@@ -1872,7 +1885,7 @@ static void remember_in_history(char *str)
1872 state->cnt_history_in_file--; 1885 state->cnt_history_in_file--;
1873# endif 1886# endif
1874 } 1887 }
1875 /* i <= state->max_history-1 */ 1888 /* i < state->max_history */
1876 state->history[i++] = xstrdup(str); 1889 state->history[i++] = xstrdup(str);
1877 /* i <= state->max_history */ 1890 /* i <= state->max_history */
1878 state->cur_history = i; 1891 state->cur_history = i;
@@ -2388,7 +2401,6 @@ static int lineedit_read_key(char *read_key_buffer, int timeout)
2388 errno = EINTR; 2401 errno = EINTR;
2389 return -1; 2402 return -1;
2390 } 2403 }
2391//FIXME: still races here with signals, but small window to poll() inside read_key
2392 IF_FEATURE_EDITING_WINCH(S.ok_to_redraw = 1;) 2404 IF_FEATURE_EDITING_WINCH(S.ok_to_redraw = 1;)
2393 /* errno = 0; - read_key does this itself */ 2405 /* errno = 0; - read_key does this itself */
2394 ic = read_key(STDIN_FILENO, read_key_buffer, timeout); 2406 ic = read_key(STDIN_FILENO, read_key_buffer, timeout);
diff --git a/libbb/poll_with_signals.c b/libbb/poll_with_signals.c
new file mode 100644
index 000000000..d3c005418
--- /dev/null
+++ b/libbb/poll_with_signals.c
@@ -0,0 +1,48 @@
1/* vi: set sw=4 ts=4: */
2/*
3 * Utility routines.
4 *
5 * Copyright (C) 2025 Denys Vlasenko <vda.linux@googlemail.com>
6 *
7 * Licensed under GPLv2, see file LICENSE in this source tree.
8 */
9//kbuild:lib-$(CONFIG_PLATFORM_POSIX) += poll_with_signals.o
10
11#include "libbb.h"
12
13/* Shells, for example, need their line input and "read" builtin
14 * to be interruptible, and the naive handling of it a-la:
15 * if (bb_got_signal) {
16 * errno = EINTR;
17 * return -1;
18 * }
19 * poll(pfd, 1, -1); // signal here would set EINTR
20 * is racy.
21 * This is a bit heavy-handed, but safe wrt races:
22 */
23int FAST_FUNC check_got_signal_and_poll(struct pollfd pfd[1], int timeout)
24{
25 int n;
26 struct timespec tv;
27 sigset_t orig_mask;
28
29 if (bb_got_signal) /* optimization */
30 goto eintr;
31
32 if (timeout >= 0) {
33 tv.tv_sec = timeout / 1000;
34 tv.tv_nsec = (timeout % 1000) * 1000000;
35 }
36 /* test bb_got_signal, then poll(), atomically wrt signals */
37 sigfillset(&orig_mask);
38 sigprocmask2(SIG_BLOCK, &orig_mask);
39 if (bb_got_signal) {
40 sigprocmask2(SIG_SETMASK, &orig_mask);
41 eintr:
42 errno = EINTR; /* inform the caller that we got a signal */
43 return -1;
44 }
45 n = ppoll(pfd, 1, timeout >= 0 ? &tv : NULL, &orig_mask);
46 sigprocmask2(SIG_SETMASK, &orig_mask);
47 return n;
48}
diff --git a/libbb/pw_ascii64.c b/libbb/pw_ascii64.c
new file mode 100644
index 000000000..3993932ca
--- /dev/null
+++ b/libbb/pw_ascii64.c
@@ -0,0 +1,91 @@
1/* vi: set sw=4 ts=4: */
2/*
3 * Utility routines.
4 *
5 * Copyright (C) 1999-2004 by Erik Andersen <andersen@codepoet.org>
6 *
7 * Licensed under GPLv2 or later, see file LICENSE in this source tree.
8 */
9
10/* Returns >=64 for invalid chars */
11int FAST_FUNC a2i64(char c)
12{
13 unsigned char ch = c;
14 if (ch >= 'a')
15 /* "a..z" to 38..63 */
16 /* anything after "z": positive int >= 64 */
17 return (ch - 'a' + 38);
18
19 if (ch > 'Z')
20 /* after "Z" but before "a": positive byte >= 64 */
21 return ch;
22
23 if (ch >= 'A')
24 /* "A..Z" to 12..37 */
25 return (ch - 'A' + 12);
26
27 if (ch > '9')
28 return 64;
29
30 /* "./0123456789" to 0,1,2..11 */
31 /* anything before "." becomes positive byte >= 64 */
32 return (unsigned char)(ch - '.');
33}
34
35/* 0..63 ->
36 * "./0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
37 */
38int FAST_FUNC i2a64(int i)
39{
40 i &= 0x3f;
41
42 i += '.';
43 /* the above maps 0..11 to "./0123456789":
44 * ACSII codes of "./" are ('0'-2) and ('0'-1) */
45
46 if (i > '9')
47 i += ('A' - '9' - 1);
48 if (i > 'Z')
49 i += ('a' - 'Z' - 1);
50 return i;
51}
52
53char* FAST_FUNC
54num2str64_lsb_first(char *s, unsigned v, int n)
55{
56 while (--n >= 0) {
57 *s++ = i2a64(v);
58 v >>= 6;
59 }
60 return s;
61}
62
63static void
64num2str64_4chars_msb_first(char *s, unsigned v)
65{
66 *s++ = i2a64(v >> 18); /* bits 23..18 */
67 *s++ = i2a64(v >> 12); /* bits 17..12 */
68 *s++ = i2a64(v >> 6); /* bits 11..6 */
69 *s = i2a64(v); /* bits 5..0 */
70}
71
72int FAST_FUNC crypt_make_rand64encoded(char *p, int cnt /*, int x */)
73{
74 /* was: x += ... */
75 unsigned x = getpid() + monotonic_us();
76 do {
77 /* x = (x*1664525 + 1013904223) % 2^32 generator is lame
78 * (low-order bit is not "random", etc...),
79 * but for our purposes it is good enough */
80 x = x*1664525 + 1013904223;
81 /* BTW, Park and Miller's "minimal standard generator" is
82 * x = x*16807 % ((2^31)-1)
83 * It has no problem with visibly alternating lowest bit
84 * but is also weak in cryptographic sense + needs div,
85 * which needs more code (and slower) on many CPUs */
86 *p++ = i2a64(x >> 16);
87 *p++ = i2a64(x >> 22);
88 } while (--cnt);
89 *p = '\0';
90 return x;
91}
diff --git a/libbb/pw_encrypt.c b/libbb/pw_encrypt.c
index 3463fd95b..93653de9f 100644
--- a/libbb/pw_encrypt.c
+++ b/libbb/pw_encrypt.c
@@ -13,48 +13,11 @@
13#endif 13#endif
14#include "libbb.h" 14#include "libbb.h"
15 15
16/* static const uint8_t ascii64[] ALIGN1 = 16#include "pw_ascii64.c"
17 * "./0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
18 */
19
20static int i64c(int i)
21{
22 i &= 0x3f;
23 if (i == 0)
24 return '.';
25 if (i == 1)
26 return '/';
27 if (i < 12)
28 return ('0' - 2 + i);
29 if (i < 38)
30 return ('A' - 12 + i);
31 return ('a' - 38 + i);
32}
33
34int FAST_FUNC crypt_make_salt(char *p, int cnt /*, int x */)
35{
36 /* was: x += ... */
37 unsigned x = getpid() + monotonic_us();
38 do {
39 /* x = (x*1664525 + 1013904223) % 2^32 generator is lame
40 * (low-order bit is not "random", etc...),
41 * but for our purposes it is good enough */
42 x = x*1664525 + 1013904223;
43 /* BTW, Park and Miller's "minimal standard generator" is
44 * x = x*16807 % ((2^31)-1)
45 * It has no problem with visibly alternating lowest bit
46 * but is also weak in cryptographic sense + needs div,
47 * which needs more code (and slower) on many CPUs */
48 *p++ = i64c(x >> 16);
49 *p++ = i64c(x >> 22);
50 } while (--cnt);
51 *p = '\0';
52 return x;
53}
54 17
55char* FAST_FUNC crypt_make_pw_salt(char salt[MAX_PW_SALT_LEN], const char *algo) 18char* FAST_FUNC crypt_make_pw_salt(char salt[MAX_PW_SALT_LEN], const char *algo)
56{ 19{
57 int len = 2/2; 20 int len = 2 / 2;
58 char *salt_ptr = salt; 21 char *salt_ptr = salt;
59 22
60 /* Standard chpasswd uses uppercase algos ("MD5", not "md5"). 23 /* Standard chpasswd uses uppercase algos ("MD5", not "md5").
@@ -67,28 +30,61 @@ char* FAST_FUNC crypt_make_pw_salt(char salt[MAX_PW_SALT_LEN], const char *algo)
67 *salt_ptr++ = '$'; 30 *salt_ptr++ = '$';
68#if !ENABLE_USE_BB_CRYPT || ENABLE_USE_BB_CRYPT_SHA 31#if !ENABLE_USE_BB_CRYPT || ENABLE_USE_BB_CRYPT_SHA
69 if ((algo[0]|0x20) == 's') { /* sha */ 32 if ((algo[0]|0x20) == 's') { /* sha */
70 salt[1] = '5' + (strcasecmp(algo, "sha512") == 0); 33 salt[1] = '5' + (strncasecmp(algo, "sha512", 6) == 0);
71 len = 16/2; 34 len = 16 / 2;
35 }
36#endif
37#if !ENABLE_USE_BB_CRYPT || ENABLE_USE_BB_CRYPT_YES
38 if ((algo[0]|0x20) == 'y') { /* yescrypt */
39 int rnd;
40 salt[1] = 'y';
41// The "j9T$" below is the default "yescrypt parameters" encoded by yescrypt_encode_params_r():
42//shadow-4.17.4/src/passwd.c
43// salt = crypt_make_salt(NULL, NULL);
44//shadow-4.17.4/lib/salt.c
45//const char *crypt_make_salt(const char *meth, void *arg)
46// if (streq(method, "YESCRYPT")) {
47// MAGNUM(result, 'y');
48// salt_len = YESCRYPT_SALT_SIZE; // 24
49// rounds = YESCRYPT_get_salt_cost(arg); // always Y_COST_DEFAULT == 5 for NULL arg
50// YESCRYPT_salt_cost_to_buf(result, rounds); // always "j9T$"
51// char *retval = crypt_gensalt(result, rounds, NULL, 0);
52//libxcrypt-4.4.38/lib/crypt-yescrypt.c
53//void gensalt_yescrypt_rn (unsigned long count,
54// const uint8_t *rbytes, size_t nrbytes,
55// uint8_t *output, size_t o_size)
56// yescrypt_params_t params = {
57// .flags = YESCRYPT_DEFAULTS,
58// .p = 1,
59// };
60// if (count < 3) ... else
61// params.r = 32; // N in 4KiB
62// params.N = 1ULL << (count + 7); // 3 -> 1024, 4 -> 2048, ... 11 -> 262144
63// yescrypt_encode_params_r(&params, rbytes, nrbytes, outbuf, o_size) // always "$y$j9T$<random>"
64 len = 22 / 2;
65 salt_ptr = stpcpy(salt_ptr, "j9T$");
66 /* append 2*len random chars */
67 rnd = crypt_make_rand64encoded(salt_ptr, len);
68 /* fix up last char: it must be in 0..3 range (encoded as one of "./01").
69 * IOW: salt_ptr[20..21] encode 16th random byte, must not be > 0xff.
70 * Without this, we can generate salts which are rejected
71 * by implementations with more strict salt length check.
72 */
73 salt_ptr[21] = i2a64(rnd & 3);
74 /* For "mkpasswd -m yescrypt PASS j9T$<salt>" use case,
75 * "j9T$" is considered part of salt,
76 * need to return pointer to 'j'. Without -4,
77 * we'd end up using "j9T$j9T$<salt>" as salt.
78 */
79 return salt_ptr - 4;
72 } 80 }
73#endif 81#endif
74 } 82 }
75 crypt_make_salt(salt_ptr, len); 83 crypt_make_rand64encoded(salt_ptr, len); /* appends 2*len random chars */
76 return salt_ptr; 84 return salt_ptr;
77} 85}
78 86
79#if ENABLE_USE_BB_CRYPT 87#if ENABLE_USE_BB_CRYPT
80
81static char*
82to64(char *s, unsigned v, int n)
83{
84 while (--n >= 0) {
85 /* *s++ = ascii64[v & 0x3f]; */
86 *s++ = i64c(v);
87 v >>= 6;
88 }
89 return s;
90}
91
92/* 88/*
93 * DES and MD5 crypt implementations are taken from uclibc. 89 * DES and MD5 crypt implementations are taken from uclibc.
94 * They were modified to not use static buffers. 90 * They were modified to not use static buffers.
@@ -99,6 +95,9 @@ to64(char *s, unsigned v, int n)
99#if ENABLE_USE_BB_CRYPT_SHA 95#if ENABLE_USE_BB_CRYPT_SHA
100#include "pw_encrypt_sha.c" 96#include "pw_encrypt_sha.c"
101#endif 97#endif
98#if ENABLE_USE_BB_CRYPT_YES
99#include "pw_encrypt_yes.c"
100#endif
102 101
103/* Other advanced crypt ids (TODO?): */ 102/* Other advanced crypt ids (TODO?): */
104/* $2$ or $2a$: Blowfish */ 103/* $2$ or $2a$: Blowfish */
@@ -109,7 +108,7 @@ static struct des_ctx *des_ctx;
109/* my_crypt returns malloc'ed data */ 108/* my_crypt returns malloc'ed data */
110static char *my_crypt(const char *key, const char *salt) 109static char *my_crypt(const char *key, const char *salt)
111{ 110{
112 /* MD5 or SHA? */ 111 /* "$x$...." string? */
113 if (salt[0] == '$' && salt[1] && salt[2] == '$') { 112 if (salt[0] == '$' && salt[1] && salt[2] == '$') {
114 if (salt[1] == '1') 113 if (salt[1] == '1')
115 return md5_crypt(xzalloc(MD5_OUT_BUFSIZE), (unsigned char*)key, (unsigned char*)salt); 114 return md5_crypt(xzalloc(MD5_OUT_BUFSIZE), (unsigned char*)key, (unsigned char*)salt);
@@ -117,6 +116,10 @@ static char *my_crypt(const char *key, const char *salt)
117 if (salt[1] == '5' || salt[1] == '6') 116 if (salt[1] == '5' || salt[1] == '6')
118 return sha_crypt((char*)key, (char*)salt); 117 return sha_crypt((char*)key, (char*)salt);
119#endif 118#endif
119#if ENABLE_USE_BB_CRYPT_YES
120 if (salt[1] == 'y')
121 return yes_crypt(key, salt);
122#endif
120 } 123 }
121 124
122 if (!des_cctx) 125 if (!des_cctx)
diff --git a/libbb/pw_encrypt_des.c b/libbb/pw_encrypt_des.c
index fe8237cfe..ca8aa9bcc 100644
--- a/libbb/pw_encrypt_des.c
+++ b/libbb/pw_encrypt_des.c
@@ -186,39 +186,9 @@ static const uint8_t pbox[32] ALIGN1 = {
186 2, 8, 24, 14, 32, 27, 3, 9, 19, 13, 30, 6, 22, 11, 4, 25 186 2, 8, 24, 14, 32, 27, 3, 9, 19, 13, 30, 6, 22, 11, 4, 25
187}; 187};
188 188
189static const uint32_t bits32[32] ALIGN4 = {
190 0x80000000, 0x40000000, 0x20000000, 0x10000000,
191 0x08000000, 0x04000000, 0x02000000, 0x01000000,
192 0x00800000, 0x00400000, 0x00200000, 0x00100000,
193 0x00080000, 0x00040000, 0x00020000, 0x00010000,
194 0x00008000, 0x00004000, 0x00002000, 0x00001000,
195 0x00000800, 0x00000400, 0x00000200, 0x00000100,
196 0x00000080, 0x00000040, 0x00000020, 0x00000010,
197 0x00000008, 0x00000004, 0x00000002, 0x00000001
198};
199
200static const uint8_t bits8[8] ALIGN1 = { 0x80, 0x40, 0x20, 0x10, 0x08, 0x04, 0x02, 0x01 }; 189static const uint8_t bits8[8] ALIGN1 = { 0x80, 0x40, 0x20, 0x10, 0x08, 0x04, 0x02, 0x01 };
201 190
202 191
203static int
204ascii_to_bin(char ch)
205{
206 if (ch > 'z')
207 return 0;
208 if (ch >= 'a')
209 return (ch - 'a' + 38);
210 if (ch > 'Z')
211 return 0;
212 if (ch >= 'A')
213 return (ch - 'A' + 12);
214 if (ch > '9')
215 return 0;
216 if (ch >= '.')
217 return (ch - '.');
218 return 0;
219}
220
221
222/* Static stuff that stays resident and doesn't change after 192/* Static stuff that stays resident and doesn't change after
223 * being initialized, and therefore doesn't need to be made 193 * being initialized, and therefore doesn't need to be made
224 * reentrant. */ 194 * reentrant. */
@@ -354,11 +324,18 @@ des_init(struct des_ctx *ctx, const struct const_des_ctx *cctx)
354 int i, j, b, k, inbit, obit; 324 int i, j, b, k, inbit, obit;
355 uint32_t p; 325 uint32_t p;
356 const uint32_t *bits28, *bits24; 326 const uint32_t *bits28, *bits24;
327 uint32_t bits32[32];
357 328
358 if (!ctx) 329 if (!ctx)
359 ctx = xmalloc(sizeof(*ctx)); 330 ctx = xmalloc(sizeof(*ctx));
360 const_ctx = cctx; 331 const_ctx = cctx;
361 332
333 p = 0x80000000U;
334 for (i = 0; p; i++) {
335 bits32[i] = p;
336 p >>= 1;
337 }
338
362#if USE_REPETITIVE_SPEEDUP 339#if USE_REPETITIVE_SPEEDUP
363 old_rawkey0 = old_rawkey1 = 0; 340 old_rawkey0 = old_rawkey1 = 0;
364 old_salt = 0; 341 old_salt = 0;
@@ -694,21 +671,6 @@ do_des(struct des_ctx *ctx, /*uint32_t l_in, uint32_t r_in,*/ uint32_t *l_out, u
694 671
695#define DES_OUT_BUFSIZE 21 672#define DES_OUT_BUFSIZE 21
696 673
697static void
698to64_msb_first(char *s, unsigned v)
699{
700#if 0
701 *s++ = ascii64[(v >> 18) & 0x3f]; /* bits 23..18 */
702 *s++ = ascii64[(v >> 12) & 0x3f]; /* bits 17..12 */
703 *s++ = ascii64[(v >> 6) & 0x3f]; /* bits 11..6 */
704 *s = ascii64[v & 0x3f]; /* bits 5..0 */
705#endif
706 *s++ = i64c(v >> 18); /* bits 23..18 */
707 *s++ = i64c(v >> 12); /* bits 17..12 */
708 *s++ = i64c(v >> 6); /* bits 11..6 */
709 *s = i64c(v); /* bits 5..0 */
710}
711
712static char * 674static char *
713NOINLINE 675NOINLINE
714des_crypt(struct des_ctx *ctx, char output[DES_OUT_BUFSIZE], 676des_crypt(struct des_ctx *ctx, char output[DES_OUT_BUFSIZE],
@@ -740,44 +702,28 @@ des_crypt(struct des_ctx *ctx, char output[DES_OUT_BUFSIZE],
740 */ 702 */
741 output[0] = salt_str[0]; 703 output[0] = salt_str[0];
742 output[1] = salt_str[1]; 704 output[1] = salt_str[1];
743 salt = (ascii_to_bin(salt_str[1]) << 6) 705
744 | ascii_to_bin(salt_str[0]); 706 salt = a2i64(salt_str[0]);
707 if (salt >= 64)
708 return NULL; /* bad salt char */
709 salt |= (a2i64(salt_str[1]) << 6);
710 if (salt >= (64 << 6))
711 return NULL; /* bad salt char */
745 setup_salt(ctx, salt); /* set ctx->saltbits for do_des() */ 712 setup_salt(ctx, salt); /* set ctx->saltbits for do_des() */
746 713
747 /* Do it. */ 714 /* Do it. */
748 do_des(ctx, /*0, 0,*/ &r0, &r1, 25 /* count */); 715 do_des(ctx, /*0, 0,*/ &r0, &r1, 25 /* count */);
749 716
750 /* Now encode the result. */ 717 /* Now encode the result. */
751#if 0
752{
753 uint32_t l = (r0 >> 8);
754 q = (uint8_t *)output + 2;
755 *q++ = ascii64[(l >> 18) & 0x3f]; /* bits 31..26 of r0 */
756 *q++ = ascii64[(l >> 12) & 0x3f]; /* bits 25..20 of r0 */
757 *q++ = ascii64[(l >> 6) & 0x3f]; /* bits 19..14 of r0 */
758 *q++ = ascii64[l & 0x3f]; /* bits 13..8 of r0 */
759 l = ((r0 << 16) | (r1 >> 16));
760 *q++ = ascii64[(l >> 18) & 0x3f]; /* bits 7..2 of r0 */
761 *q++ = ascii64[(l >> 12) & 0x3f]; /* bits 1..2 of r0 and 31..28 of r1 */
762 *q++ = ascii64[(l >> 6) & 0x3f]; /* bits 27..22 of r1 */
763 *q++ = ascii64[l & 0x3f]; /* bits 21..16 of r1 */
764 l = r1 << 2;
765 *q++ = ascii64[(l >> 12) & 0x3f]; /* bits 15..10 of r1 */
766 *q++ = ascii64[(l >> 6) & 0x3f]; /* bits 9..4 of r1 */
767 *q++ = ascii64[l & 0x3f]; /* bits 3..0 of r1 + 00 */
768 *q = 0;
769}
770#else
771 /* Each call takes low-order 24 bits and stores 4 chars */ 718 /* Each call takes low-order 24 bits and stores 4 chars */
772 /* bits 31..8 of r0 */ 719 /* bits 31..8 of r0 */
773 to64_msb_first(output + 2, (r0 >> 8)); 720 num2str64_4chars_msb_first(output + 2, (r0 >> 8));
774 /* bits 7..0 of r0 and 31..16 of r1 */ 721 /* bits 7..0 of r0 and 31..16 of r1 */
775 to64_msb_first(output + 6, (r0 << 16) | (r1 >> 16)); 722 num2str64_4chars_msb_first(output + 6, (r0 << 16) | (r1 >> 16));
776 /* bits 15..0 of r1 and two zero bits (plus extra zero byte) */ 723 /* bits 15..0 of r1 and two zero bits (plus extra zero byte) */
777 to64_msb_first(output + 10, (r1 << 8)); 724 num2str64_4chars_msb_first(output + 10, (r1 << 8));
778 /* extra zero byte is encoded as '.', fixing it */ 725 /* extra zero byte is encoded as '.', fixing it */
779 output[13] = '\0'; 726 output[13] = '\0';
780#endif
781 727
782 return output; 728 return output;
783} 729}
diff --git a/libbb/pw_encrypt_md5.c b/libbb/pw_encrypt_md5.c
index 1e52ecaea..92d039f96 100644
--- a/libbb/pw_encrypt_md5.c
+++ b/libbb/pw_encrypt_md5.c
@@ -149,9 +149,9 @@ md5_crypt(char result[MD5_OUT_BUFSIZE], const unsigned char *pw, const unsigned
149 final[16] = final[5]; 149 final[16] = final[5];
150 for (i = 0; i < 5; i++) { 150 for (i = 0; i < 5; i++) {
151 unsigned l = (final[i] << 16) | (final[i+6] << 8) | final[i+12]; 151 unsigned l = (final[i] << 16) | (final[i+6] << 8) | final[i+12];
152 p = to64(p, l, 4); 152 p = num2str64_lsb_first(p, l, 4);
153 } 153 }
154 p = to64(p, final[11], 2); 154 p = num2str64_lsb_first(p, final[11], 2);
155 *p = '\0'; 155 *p = '\0';
156 156
157 /* Don't leave anything around in vm they could use. */ 157 /* Don't leave anything around in vm they could use. */
diff --git a/libbb/pw_encrypt_sha.c b/libbb/pw_encrypt_sha.c
index 5457d7ab6..695a5c07f 100644
--- a/libbb/pw_encrypt_sha.c
+++ b/libbb/pw_encrypt_sha.c
@@ -84,8 +84,7 @@ sha_crypt(/*const*/ char *key_data, /*const*/ char *salt_data)
84 as a scratch space later. */ 84 as a scratch space later. */
85 salt_data = xstrndup(salt_data, salt_len); 85 salt_data = xstrndup(salt_data, salt_len);
86 /* add "salt$" to result */ 86 /* add "salt$" to result */
87 strcpy(resptr, salt_data); 87 resptr = stpcpy(resptr, salt_data);
88 resptr += salt_len;
89 *resptr++ = '$'; 88 *resptr++ = '$';
90 /* key data doesn't need much processing */ 89 /* key data doesn't need much processing */
91 key_len = strlen(key_data); 90 key_len = strlen(key_data);
@@ -198,7 +197,7 @@ sha_crypt(/*const*/ char *key_data, /*const*/ char *salt_data)
198#define b64_from_24bit(B2, B1, B0, N) \ 197#define b64_from_24bit(B2, B1, B0, N) \
199do { \ 198do { \
200 unsigned w = ((B2) << 16) | ((B1) << 8) | (B0); \ 199 unsigned w = ((B2) << 16) | ((B1) << 8) | (B0); \
201 resptr = to64(resptr, w, N); \ 200 resptr = num2str64_lsb_first(resptr, w, N); \
202} while (0) 201} while (0)
203 if (_32or64 == 32) { /* sha256 */ 202 if (_32or64 == 32) { /* sha256 */
204 unsigned i = 0; 203 unsigned i = 0;
diff --git a/libbb/pw_encrypt_yes.c b/libbb/pw_encrypt_yes.c
new file mode 100644
index 000000000..50bd06418
--- /dev/null
+++ b/libbb/pw_encrypt_yes.c
@@ -0,0 +1,24 @@
1/*
2 * Utility routines.
3 *
4 * Copyright (C) 2025 by Denys Vlasenko <vda.linux@googlemail.com>
5 *
6 * Licensed under GPLv2, see file LICENSE in this source tree.
7 */
8#include "yescrypt/alg-yescrypt.h"
9
10static char *
11yes_crypt(const char *passwd, const char *salt_data)
12{
13 /* prefix, '$', hash, NUL */
14 char buf[YESCRYPT_PREFIX_LEN + 1 + YESCRYPT_HASH_LEN + 1];
15 char *retval;
16
17 retval = yescrypt_r(
18 (const uint8_t *)passwd, strlen(passwd),
19 (const uint8_t *)salt_data,
20 buf, sizeof(buf));
21 /* The returned value is either buf[], or NULL on error */
22
23 return xstrdup(retval);
24}
diff --git a/libbb/read_key.c b/libbb/read_key.c
index 54886cc9c..2414105ee 100644
--- a/libbb/read_key.c
+++ b/libbb/read_key.c
@@ -11,7 +11,7 @@
11 11
12int64_t FAST_FUNC read_key(int fd, char *buffer, int timeout) 12int64_t FAST_FUNC read_key(int fd, char *buffer, int timeout)
13{ 13{
14 struct pollfd pfd; 14 struct pollfd pfd[1];
15 const char *seq; 15 const char *seq;
16 int n; 16 int n;
17 17
@@ -117,8 +117,8 @@ int64_t FAST_FUNC read_key(int fd, char *buffer, int timeout)
117 return windows_read_key(fd, buffer, timeout); 117 return windows_read_key(fd, buffer, timeout);
118#endif 118#endif
119 119
120 pfd.fd = fd; 120 pfd->fd = fd;
121 pfd.events = POLLIN; 121 pfd->events = POLLIN;
122 122
123 buffer++; /* saved chars counter is in buffer[-1] now */ 123 buffer++; /* saved chars counter is in buffer[-1] now */
124 124
@@ -126,12 +126,16 @@ int64_t FAST_FUNC read_key(int fd, char *buffer, int timeout)
126 errno = 0; 126 errno = 0;
127 n = (unsigned char)buffer[-1]; 127 n = (unsigned char)buffer[-1];
128 if (n == 0) { 128 if (n == 0) {
129 /* If no data, wait for input. 129 /* No data. Wait for input. */
130 * If requested, wait TIMEOUT ms. TIMEOUT = -1 is useful 130
131 * if fd can be in non-blocking mode. 131 /* timeout == -2 means "do not poll". Else: */
132 */
133 if (timeout >= -1) { 132 if (timeout >= -1) {
134 n = poll(&pfd, 1, timeout); 133 /* We must poll even if timeout == -1:
134 * we want to be interrupted if signal arrives,
135 * regardless of SA_RESTART-ness of that signal!
136 */
137 /* test bb_got_signal, then poll(), atomically wrt signals */
138 n = check_got_signal_and_poll(pfd, timeout);
135 if (n < 0 && errno == EINTR) 139 if (n < 0 && errno == EINTR)
136 return n; 140 return n;
137 if (n == 0) { 141 if (n == 0) {
@@ -140,6 +144,7 @@ int64_t FAST_FUNC read_key(int fd, char *buffer, int timeout)
140 return -1; 144 return -1;
141 } 145 }
142 } 146 }
147
143 /* It is tempting to read more than one byte here, 148 /* It is tempting to read more than one byte here,
144 * but it breaks pasting. Example: at shell prompt, 149 * but it breaks pasting. Example: at shell prompt,
145 * user presses "c","a","t" and then pastes "\nline\n". 150 * user presses "c","a","t" and then pastes "\nline\n".
@@ -178,7 +183,7 @@ int64_t FAST_FUNC read_key(int fd, char *buffer, int timeout)
178 * so if we block for long it's not really an escape sequence. 183 * so if we block for long it's not really an escape sequence.
179 * Timeout is needed to reconnect escape sequences 184 * Timeout is needed to reconnect escape sequences
180 * split up by transmission over a serial console. */ 185 * split up by transmission over a serial console. */
181 if (safe_poll(&pfd, 1, 50) == 0) { 186 if (safe_poll(pfd, 1, 50) == 0) {
182 /* No more data! 187 /* No more data!
183 * Array is sorted from shortest to longest, 188 * Array is sorted from shortest to longest,
184 * we can't match anything later in array - 189 * we can't match anything later in array -
@@ -227,7 +232,7 @@ int64_t FAST_FUNC read_key(int fd, char *buffer, int timeout)
227 * n = bytes read. Try to read more until we time out. 232 * n = bytes read. Try to read more until we time out.
228 */ 233 */
229 while (n < KEYCODE_BUFFER_SIZE-1) { /* 1 for count byte at buffer[-1] */ 234 while (n < KEYCODE_BUFFER_SIZE-1) { /* 1 for count byte at buffer[-1] */
230 if (safe_poll(&pfd, 1, 50) == 0) { 235 if (safe_poll(pfd, 1, 50) == 0) {
231 /* No more data! */ 236 /* No more data! */
232 break; 237 break;
233 } 238 }
diff --git a/libbb/u_signal_names.c b/libbb/u_signal_names.c
index ef2b6f891..e233849c5 100644
--- a/libbb/u_signal_names.c
+++ b/libbb/u_signal_names.c
@@ -27,10 +27,6 @@
27 27
28#include "libbb.h" 28#include "libbb.h"
29 29
30#if ENABLE_PLATFORM_MINGW32
31# undef SIGPIPE
32#endif
33
34#if ENABLE_PLATFORM_POSIX || defined(SIGSTKFLT) || defined(SIGVTALRM) 30#if ENABLE_PLATFORM_POSIX || defined(SIGSTKFLT) || defined(SIGVTALRM)
35# define SIGLEN 7 31# define SIGLEN 7
36#elif defined(SIGWINCH) || (ENABLE_FEATURE_RTMINMAX && \ 32#elif defined(SIGWINCH) || (ENABLE_FEATURE_RTMINMAX && \
diff --git a/libbb/xfuncs.c b/libbb/xfuncs.c
index 7df1a4cd3..5609858d1 100644
--- a/libbb/xfuncs.c
+++ b/libbb/xfuncs.c
@@ -333,7 +333,7 @@ int FAST_FUNC get_termios_and_make_raw(int fd, struct termios *newterm, struct t
333 *newterm = *oldterm; 333 *newterm = *oldterm;
334 334
335#if ENABLE_PLATFORM_MINGW32 335#if ENABLE_PLATFORM_MINGW32
336 newterm->imode &= 336 newterm->w_mode &=
337 ~(ENABLE_ECHO_INPUT | ENABLE_LINE_INPUT | ENABLE_PROCESSED_INPUT); 337 ~(ENABLE_ECHO_INPUT | ENABLE_LINE_INPUT | ENABLE_PROCESSED_INPUT);
338#else 338#else
339 /* Turn off buffered input (ICANON) 339 /* Turn off buffered input (ICANON)
diff --git a/libbb/yescrypt/Kbuild.src b/libbb/yescrypt/Kbuild.src
new file mode 100644
index 000000000..a61211a29
--- /dev/null
+++ b/libbb/yescrypt/Kbuild.src
@@ -0,0 +1,9 @@
1# Makefile for busybox
2#
3# Copyright (C) 2025 by Denys Vlasenko <vda.linux@googlemail.com>
4#
5# Licensed under GPLv2, see file LICENSE in this source tree.
6
7lib-y:=
8
9INSERT
diff --git a/libbb/yescrypt/PARAMETERS b/libbb/yescrypt/PARAMETERS
new file mode 100644
index 000000000..d9f5d24e6
--- /dev/null
+++ b/libbb/yescrypt/PARAMETERS
@@ -0,0 +1,196 @@
1 Optimal yescrypt configuration.
2
3yescrypt is very flexible, but configuring it optimally is complicated.
4Here are some guidelines to simplify near-optimal configuration. We
5start by listing the parameters and their typical values, and then give
6currently recommended parameter sets by use case.
7
8
9 Parameters and their typical values.
10
11Set flags (yescrypt flavor) to YESCRYPT_DEFAULTS to use the currently
12recommended flavor. (Other flags values exist for compatibility and for
13specialized cases where you think you know what you're doing.)
14
15Set N (block count) based on target memory usage and running time, as
16well as on the value of r (block size in 128 byte units). N must be a
17power of two.
18
19Set r (block size) to 8 (so that N is in KiB, which is convenient) or to
20another small value (if more optimal or for fine-tuning of the total
21size and/or running time). Reasonable values for r are from 8 to 96.
22
23Set p (parallelism) to 1 meaning no thread-level parallelism within one
24computation of yescrypt. (Use of thread-level parallelism within
25yescrypt makes sense for ROM initialization and for key derivation at
26high memory usage, but usually not for password hashing where
27parallelism is available through concurrent authentication attempts.
28Don't use p > 1 unnecessarily.)
29
30Set t (time) to 0 to use the optimal running time for a given memory
31usage. This will allow you to maximize the memory usage (the value of
32N*r) while staying within your running time constraints. (Non-zero t
33makes sense in specialized cases where you can't afford higher memory
34usage but can afford more time.)
35
36Set g (upgrades) to 0 because there have been no hash upgrades yet.
37
38Set NROM (block count of ROM) to 0 unless you use a ROM (see below).
39NROM must be a power of two.
40
41
42 Password hashing for user authentication, no ROM.
43
44Small and fast (memory usage 2 MiB, performance like bcrypt cost 2^5 -
45latency 2-3 ms and throughput 10,000+ per second on a 16-core server):
46
47flags = YESCRYPT_DEFAULTS, N = 2048, r = 8, p = 1, t = 0, g = 0, NROM = 0
48
49Large and slow (memory usage 16 MiB, performance like bcrypt cost 2^8 -
50latency 10-30 ms and throughput 1000+ per second on a 16-core server):
51
52flags = YESCRYPT_DEFAULTS, N = 4096, r = 32, p = 1, t = 0, g = 0, NROM = 0
53
54Of course, even heavier and slower settings are possible, if affordable.
55Simply double the value of N as many times as needed. Since N must be a
56power of two, you may use r (in the range of 8 to 32) or/and t (in the
57range of 0 to 2) for fine-tuning the running time, but first bring N to
58the maximum you can afford. If this feels too complicated, just use one
59of the two parameter sets given above (preferably the second) as-is.
60
61
62 Password hashing for user authentication, with ROM.
63
64It's similar to the above, except that you need to adjust r, set NROM,
65and initialize the ROM.
66
67First decide on a ROM size, such as making it a large portion of your
68dedicated authentication servers' RAM sizes. Since NROM (block count)
69must be a power of two, you might need to choose r (block size) based on
70how your desired ROM size corresponds to a power of two. Also tuning
71for performance on current hardware, you'll likely end up with r in the
72range from slightly below 16 to 32. For example, to use 15/16 of a
73server's 256 GiB RAM as ROM (thus, making it 240 GiB), you could use
74r=15 or r=30. To use 23/24 of a server's 384 GiB RAM as ROM (thus,
75making it 368 GiB), you'd use r=23. Then set NROM to your desired ROM
76size in KiB divided by 128*r. Note that these examples might (or might
77not) be too extreme, leaving little memory for the rest of the system.
78You could as well opt for 7/8 with r=14 or 11/12 with r=11 or r=22.
79
80Note that higher r may make placing of ROM in e.g. NVMe flash memory
81instead of in RAM more reasonable (or less unreasonable) than it would
82have been with a lower r. If this is a concern as it relates to
83possible attacks and you do not intend to ever do it defensively, you
84might want to keep r lower (e.g., prefer r=15 over r=30 in the example
85above, even if 30 performs slightly faster).
86
87Your adjustments to r, if you deviate from powers of two, will also
88result in weirder memory usage per hash. Like 1.75 MiB at r=14 instead
89of 2 MiB at r=8 that you would have used without a ROM. That's OK.
90
91For ROM initialization, which you do with yescrypt_init_shared(), use
92the same r and NROM that you'd later use for password hashing, choose p
93based on your servers' physical and/or logical CPU count (maybe
94considering eventual upgrades as you won't be able to change this later,
95but without going unnecessarily high - e.g., p=28, p=56, or p=112 make
96sense on servers that currently have 28 physical / 56 logical CPUs), and
97set the rest of the parameters to:
98
99flags = YESCRYPT_DEFAULTS, N = 0, t = 0, g = 0
100
101N is set to 0 because it isn't relevant during ROM initialization (you
102can use different values of N for hashing passwords with the same ROM).
103
104To keep the ROM in e.g. SysV shared memory and reuse it across your
105authentication service restarts, you'd need to allocate the memory and
106set the flags to "YESCRYPT_DEFAULTS | YESCRYPT_SHARED_PREALLOCATED".
107
108For actual password hashing, you'd use your chosen values for N, r,
109NROM, and set the rest of the parameters to:
110
111flags = YESCRYPT_DEFAULTS, p = 1, t = 0, g = 0
112
113Note that although you'd use a large p for ROM initialization, you
114should use p=1 for actual password hashing like you would without a ROM.
115
116Do not forget to pass the ROM into the actual password hashing (and keep
117r and NROM set accordingly).
118
119Since N must be a power of two and r is dependent on ROM size, you may
120use t (in the range of 0 to 2) for fine-tuning the running time, but
121first bring N to the maximum you can afford.
122
123If this feels too complicated, or even if it doesn't, please consider
124engaging Openwall for your yescrypt deployment. We'd be happy to help.
125
126
127 Password-based key derivation.
128
129(Or rather passphrase-based.)
130
131Use settings similar to those for password hashing without a ROM, but
132adjusted for higher memory usage and running time, and optionally with
133thread-level parallelism.
134
135Small and fast (memory usage 128 MiB, running time under 100 ms on a
136fast desktop):
137
138flags = YESCRYPT_DEFAULTS, N = 32768, r = 32, p = 1, t = 0, g = 0, NROM = 0
139
140Large and fast (memory usage 1 GiB, running time under 200 ms on a fast
141quad-core desktop not including memory allocation overhead, under 250 ms
142with the overhead included), but requires build with OpenMP support (or
143otherwise will run as slow as yet be weaker than its p=1 alternative):
144
145flags = YESCRYPT_DEFAULTS, N = 262144, r = 32, p = 4, t = 0, g = 0, NROM = 0
146
147Large and slower (memory usage 1 GiB, running time under 300 ms on a
148fast quad-core desktop not including memory allocation overhead, under
149350 ms with the overhead included), also requires build with OpenMP
150support (or otherwise will run slower than the p=1 alternative below):
151
152flags = YESCRYPT_DEFAULTS, N = 262144, r = 32, p = 4, t = 2, g = 0, NROM = 0
153
154Large and slow (memory usage 1 GiB, running time under 600 ms on a fast
155desktop not including memory allocation overhead, under 650 ms with the
156overhead included):
157
158flags = YESCRYPT_DEFAULTS, N = 262144, r = 32, p = 1, t = 0, g = 0, NROM = 0
159
160Just like with password hashing, even heavier and slower settings are
161possible, if affordable, and you achieve them by adjusting N, r, t in
162the same way and in the same preferred ranges (please see the section on
163password hashing without a ROM, above). Unlike with password hashing,
164it makes some sense to go above t=2 if you expect that your users might
165not be able to afford more memory but can afford more time. However,
166increasing the memory usage provides better protection, and we don't
167recommend forcing your users to wait for more than 1 second as they
168could as well type more characters in that time. If this feels too
169complicated, just use one of the above parameter sets as-is.
170
171
172 Amortization of memory allocation overhead.
173
174It takes a significant fraction of yescrypt's total running time to
175allocate memory from the operating system, especially considering that
176the kernel zeroizes the memory before handing it over to your program.
177
178Unless you naturally need to compute yescrypt just once per process, you
179may achieve greater efficiency by fully using advanced yescrypt APIs
180that let you preserve and reuse the memory allocation across yescrypt
181invocations. This is done by reusing the structure pointed to by the
182"yescrypt_local_t *local" argument of yescrypt_r() or yescrypt_kdf()
183without calling yescrypt_free_local() inbetween the repeated invocations
184of yescrypt.
185
186
187 YESCRYPT_DEFAULTS macro.
188
189Please note that the value of the YESCRYPT_DEFAULTS macro might change
190later, so if you use the macro like it's recommended here then for
191results reproducible across versions you might need to store its value
192somewhere along with the hashes or the encrypted data.
193
194If you use yescrypt's standard hash string encoding, then yescrypt
195already encodes and decodes this value for you, so you don't need to
196worry about this.
diff --git a/libbb/yescrypt/README b/libbb/yescrypt/README
new file mode 100644
index 000000000..c1011c56a
--- /dev/null
+++ b/libbb/yescrypt/README
@@ -0,0 +1,4 @@
1The yescrypt code in this directory is adapted from libxcrypt-4.4.38
2with minimal edits, hopefully making it easier to track
3backports by resetting the tree to the commit which created this file,
4then comparing changes in upstream libxcrypt to the tree.
diff --git a/libbb/yescrypt/alg-sha256.c b/libbb/yescrypt/alg-sha256.c
new file mode 100644
index 000000000..20e8d1ee4
--- /dev/null
+++ b/libbb/yescrypt/alg-sha256.c
@@ -0,0 +1,86 @@
1/*-
2 * Copyright 2005-2016 Colin Percival
3 * Copyright 2016-2018,2021 Alexander Peslyak
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28/**
29 * PBKDF2_SHA256(passwd, passwdlen, salt, saltlen, c, buf, dkLen):
30 * Compute PBKDF2(passwd, salt, c, dkLen) using HMAC-SHA256 as the PRF, and
31 * write the output to buf. The value dkLen must be at most 32 * (2^32 - 1).
32 */
33static void
34PBKDF2_SHA256(const uint8_t *passwd, size_t passwdlen,
35 const uint8_t *salt, size_t saltlen,
36 uint64_t c, uint8_t *buf, size_t dkLen)
37{
38 hmac_ctx_t Phctx, PShctx;
39 uint32_t i;
40
41 /* Compute HMAC state after processing P. */
42 hmac_begin(&Phctx, passwd, passwdlen, sha256_begin);
43
44 /* Compute HMAC state after processing P and S. */
45 PShctx = Phctx;
46 hmac_hash(&PShctx, salt, saltlen);
47
48 /* Iterate through the blocks. */
49 for (i = 0; dkLen != 0; ) {
50 uint64_t U[32 / 8];
51 uint64_t T[32 / 8];
52 uint64_t j;
53 uint32_t ivec;
54 size_t clen;
55 int k;
56
57 /* Generate INT(i). */
58 i++;
59 ivec = SWAP_BE32(i);
60
61 /* Compute U_1 = PRF(P, S || INT(i)). */
62 hmac_peek_hash(&PShctx, (void*)T, &ivec, 4, NULL);
63//TODO: the above is a vararg function, might incur some ABI pain
64//does libbb need a non-vararg version with just one (buf,len)?
65
66 if (c > 1) {
67 /* T_i = U_1 ... */
68 memcpy(U, T, 32);
69 for (j = 2; j <= c; j++) {
70 /* Compute U_j. */
71 hmac_peek_hash(&Phctx, (void*)U, U, 32, NULL);
72 /* ... xor U_j ... */
73 for (k = 0; k < 32 / 8; k++)
74 T[k] ^= U[k];
75 //TODO: xorbuf32_aligned_long(T, U);
76 }
77 }
78
79 /* Copy as many bytes as necessary into buf. */
80 clen = dkLen;
81 if (clen > 32)
82 clen = 32;
83 buf = mempcpy(buf, T, clen);
84 dkLen -= clen;
85 }
86}
diff --git a/libbb/yescrypt/alg-yescrypt-common.c b/libbb/yescrypt/alg-yescrypt-common.c
new file mode 100644
index 000000000..c51823787
--- /dev/null
+++ b/libbb/yescrypt/alg-yescrypt-common.c
@@ -0,0 +1,408 @@
1/*-
2 * Copyright 2013-2018 Alexander Peslyak
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted.
7 *
8 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
9 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
10 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
11 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
12 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
13 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
14 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
15 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
16 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
17 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
18 * SUCH DAMAGE.
19 */
20
21#if RESTRICTED_PARAMS
22
23#define decode64_uint32(dst, src, min) \
24({ \
25 uint32_t d32 = a2i64(*(src)); \
26 if (d32 > 47) \
27 goto fail; \
28 *(dst) = d32 + (min); \
29 ++src; \
30})
31#define test_decode64_uint32() ((void)0)
32#define FULL_PARAMS(...)
33
34#else
35
36#define FULL_PARAMS(...) __VA_ARGS__
37
38/* Not inlining:
39 * de/encode64 functions are only used to read
40 * yescrypt_params_t field, and convert salt to binary -
41 * both of these are negligible compared to main hashing operation
42 */
43static NOINLINE const uint8_t *decode64_uint32(
44 uint32_t *dst,
45 const uint8_t *src, uint32_t val)
46{
47 uint32_t start = 0, end = 47, bits = 0;
48 uint32_t c;
49
50 if (!src) /* previous decode failed already? */
51 goto fail;
52
53 c = a2i64(*src++);
54 if (c > 63)
55 goto fail;
56
57// The encoding of number N:
58// start = 0 end = 47
59// If N < 48, it is encoded verbatim, else
60// N -= 48
61// start = end+1 = 48
62// end += (64-end)/2 = 55
63// If N < (end+1-start)<<6 = 8<<6, it is encoded as 48+(N>>6)|low6bits (that is, 48...55|<6bit>), else
64// N -= 8<<6
65// start = end+1 = 56
66// end += (64-end)/2 = 59
67// If N < (end+1-start)<<2*6 = 4<<12, it is encoded as 56+(N>>2*6)|low12bits (that is, 56...59|<6bit>|<6bit>), else
68// ...same for 60..61|<6bit>|<6bit>|<6bit>
69// .......same for 62|<6bit>|<6bit>|<6bit>|<6bit>
70// .......same for 63|<6bit>|<6bit>|<6bit>|<6bit>|<6bit>
71 dbg_dec64("c:%d val:0x%08x", (int)c, (unsigned)val);
72 while (c > end) {
73 dbg_dec64("c:%d > end:%d", (int)c, (int)end);
74 val += (end + 1 - start) << bits;
75 dbg_dec64("val+=0x%08x", (int)((end + 1 - start) << bits));
76 dbg_dec64(" val:0x%08x", (unsigned)val);
77 start = end + 1;
78 end += (64 - end) / 2;
79 bits += 6;
80 dbg_dec64("start=%d", (int)start);
81 dbg_dec64("end=%d", (int)end);
82 dbg_dec64("bits=%d", (int)bits);
83 }
84
85 val += (c - start) << bits;
86 dbg_dec64("final val+=0x%08x", (int)((c - start) << bits));
87 dbg_dec64(" val:0x%08x", (unsigned)val);
88
89 while (bits != 0) {
90 c = a2i64(*src++);
91 if (c > 63)
92 goto fail;
93 bits -= 6;
94 val += c << bits;
95 dbg_dec64("low bits val+=0x%08x", (int)(c << bits));
96 dbg_dec64(" val:0x%08x", (unsigned)val);
97 }
98 ret:
99 *dst = val;
100 return src;
101 fail:
102 val = 0;
103 src = NULL;
104 goto ret;
105}
106
107#if TEST_DECODE64
108static void test_decode64_uint32(void)
109{
110 const uint8_t *src, *end;
111 uint32_t u32;
112 int a = 48;
113 int b = 8<<6; // 0x0200
114 int c = 4<<12; // 0x04000
115 int d = 2<<18; // 0x080000
116 int e = 1<<24; // 0x1000000
117
118 src = (void*)"wzzz";
119 end = decode64_uint32(&u32, src, 0);
120 if (u32 != 0x0003ffff+c+b+a) bb_error_msg_and_die("Incorrect decode '%s':0x%08x", src, (unsigned)u32);
121 if (end != src + 4) bb_error_msg_and_die("Incorrect decode '%s': %p end:%p", src, src, end);
122 src = (void*)"xzzz";
123 end = decode64_uint32(&u32, src, 0);
124 if (u32 != 0x0007ffff+c+b+a) bb_error_msg_and_die("Incorrect decode '%s':0x%08x", src, (unsigned)u32);
125 if (end != src + 4) bb_error_msg_and_die("Incorrect decode '%s': %p end:%p", src, src, end);
126 // Note how the last representable "x---" encoding, 0x7ffff, is exactly d-1!
127 // And if we now increment it, we get:
128 src = (void*)"y....";
129 end = decode64_uint32(&u32, src, 0);
130 if (u32 != 0x00000000+d+c+b+a) bb_error_msg_and_die("Incorrect decode '%s':0x%08x", src, (unsigned)u32);
131 if (end != src + 5) bb_error_msg_and_die("Incorrect decode '%s': %p end:%p", src, src, end);
132 src = (void*)"yzzzz";
133 end = decode64_uint32(&u32, src, 0);
134 if (u32 != 0x00ffffff+d+c+b+a) bb_error_msg_and_die("Incorrect decode '%s':0x%08x", src, (unsigned)u32);
135 if (end != src + 5) bb_error_msg_and_die("Incorrect decode '%s': %p end:%p", src, src, end);
136
137 src = (void*)"zzzzzz";
138 end = decode64_uint32(&u32, src, 0);
139 if (u32 != 0x3fffffff+e+d+c+b+a) bb_error_msg_and_die("Incorrect decode '%s':0x%08x", src, (unsigned)u32);
140 if (end != src + 6) bb_error_msg_and_die("Incorrect decode '%s': %p end:%p", src, src, end);
141
142 bb_error_msg("test_decode64_uint32() OK");
143}
144#else
145# define test_decode64_uint32() ((void)0)
146#endif
147
148#endif /* !RESTRICTED_PARAMS */
149
150#if 1
151static const uint8_t *decode64(
152 uint8_t *dst, size_t *dstlen,
153 const uint8_t *src)
154{
155 unsigned dstpos = 0;
156
157 dbg_dec64("src:'%s'", src);
158 for (;;) {
159 uint32_t c, value = 0;
160 int bits = 0;
161 while (*src != '\0' && *src != '$') {
162 c = a2i64(*src);
163 if (c > 63) { /* bad ascii64 char, stop decoding at it */
164 break;
165 }
166 src++;
167 value |= c << bits;
168 bits += 6;
169 if (bits == 24) /* got 4 chars */
170 goto store;
171 }
172 /* we read entire src, or met a non-ascii64 char (such as "$") */
173 if (bits == 0)
174 break;
175 /* else: we got last, partial bit block - store it */
176 store:
177 dbg_dec64(" storing bits:%d dstpos:%u v:%08x", bits, dstpos, (int)SWAP_BE32(value)); //BE to see lsb first
178 for (;;) {
179 if ((*src == '\0' || *src == '$')
180 && value == 0 && bits < 8
181 ) {
182 /* Example: mkpasswd PWD '$y$j9T$123':
183 * the "123" is bits:18 value:03,51,00
184 * is considered to be 2 bytes, not 3!
185 *
186 * '$y$j9T$zzz' in upstream fails outright (3rd byte isn't zero).
187 * IOW: for upstream, validity of salt depends on VALUE,
188 * not just size of salt. Which is a bug.
189 * The '$y$j9T$zzz.' salt is the same
190 * (it adds 6 zero msbits) but upstream works with it,
191 * thus '$y$j9T$zzz' should work too and give the same result.
192 */
193 goto end;
194 }
195 if (dstpos >= *dstlen) {
196 dbg_dec64(" ERR: bits:%d dstpos:%u dst[] is too small", bits, dstpos);
197 goto fail;
198 }
199 *dst++ = value;
200 dstpos++;
201 value >>= 8;
202 bits -= 8;
203 if (bits <= 0) /* can get negative, if we e.g. had 6 bits */
204 break;
205 }
206 if (*src == '\0' || *src == '$')
207 break;
208 }
209 end:
210 *dstlen = dstpos;
211 dbg_dec64("dec64: OK, dst[%d]", (int)dstpos);
212 return src;
213 fail:
214 /* *dstlen = 0; - not needed, caller detects error by seeing NULL */
215 return NULL;
216}
217#else
218/* Buggy (and larger) original code */
219static const uint8_t *decode64(
220 uint8_t *dst, size_t *dstlen,
221 const uint8_t *src, size_t srclen)
222{
223 size_t dstpos = 0;
224
225 while (dstpos <= *dstlen && srclen) {
226 uint32_t value = 0, bits = 0;
227 while (srclen--) {
228 uint32_t c = a2i64(*src);
229 if (c > 63) {
230 srclen = 0;
231 break;
232 }
233 src++;
234 value |= c << bits;
235 bits += 6;
236 if (bits >= 24)
237 break;
238 }
239 if (!bits)
240 break;
241 if (bits < 12) /* must have at least one full byte */
242 goto fail;
243 dbg_dec64(" storing bits:%d v:%08x", (int)bits, (int)SWAP_BE32(value)); //BE to see lsb first
244 while (dstpos++ < *dstlen) {
245 *dst++ = value;
246 value >>= 8;
247 bits -= 8;
248 if (bits < 8) { /* 2 or 4 */
249 if (value) /* must be 0 */
250 goto fail;
251 bits = 0;
252 break;
253 }
254 }
255 if (bits)
256 goto fail;
257 }
258
259 if (!srclen && dstpos <= *dstlen) {
260 *dstlen = dstpos;
261 dbg_dec64("dec64: OK, dst[%d]", (int)dstpos);
262 return src;
263 }
264 fail:
265 /* *dstlen = 0; - not needed, caller detects error by seeing NULL */
266 return NULL;
267}
268#endif
269
270static char *encode64(
271 char *dst, size_t dstlen,
272 const uint8_t *src, size_t srclen)
273{
274 while (srclen) {
275 uint32_t value = 0, b = 0;
276 do {
277 value |= (uint32_t)(*src++ << b);
278 b += 8;
279 srclen--;
280 } while (srclen && b < 24);
281
282 b >>= 3; /* number of bits to number of bytes */
283 b++; /* 1, 2 or 3 bytes will become 2, 3 or 4 ascii64 chars */
284 dstlen -= b;
285 if ((ssize_t)dstlen <= 0)
286 return NULL;
287 dst = num2str64_lsb_first(dst, value, b);
288 }
289 *dst = '\0';
290 return dst;
291}
292
293char *yescrypt_r(
294 const uint8_t *passwd, size_t passwdlen,
295 const uint8_t *setting,
296 char *buf, size_t buflen)
297{
298 struct {
299 yescrypt_ctx_t yctx[1];
300 unsigned char hashbin32[32];
301 } u;
302#define yctx u.yctx
303#define hashbin32 u.hashbin32
304 char *dst;
305 const uint8_t *src, *saltend;
306 size_t need, prefixlen;
307 uint32_t u32;
308
309 test_decode64_uint32();
310
311 memset(yctx, 0, sizeof(yctx));
312 FULL_PARAMS(yctx->param.p = 1;)
313
314 /* we assume setting starts with "$y$" (caller must ensure this) */
315 src = setting + 3;
316
317 src = decode64_uint32(&yctx->param.flags, src, 0);
318 /* "j9T" returns: 0x2f */
319 //if (!src)
320 // goto fail;
321
322 if (yctx->param.flags < YESCRYPT_RW) {
323 dbg("yctx->param.flags=0x%x", (unsigned)yctx->param.flags);
324 goto fail; // bbox: we don't support scrypt - only yescrypt
325 } else if (yctx->param.flags <= YESCRYPT_RW + (YESCRYPT_RW_FLAVOR_MASK >> 2)) {
326 /* "j9T" sets flags to 0xb6 */
327 yctx->param.flags = YESCRYPT_RW + ((yctx->param.flags - YESCRYPT_RW) << 2);
328 dbg("yctx->param.flags=0x%x", (unsigned)yctx->param.flags);
329 dbg(" YESCRYPT_RW:%u", !!(yctx->param.flags & YESCRYPT_RW));
330 dbg((yctx->param.flags & YESCRYPT_RW_FLAVOR_MASK) ==
331 (YESCRYPT_ROUNDS_6 | YESCRYPT_GATHER_4 | YESCRYPT_SIMPLE_2 | YESCRYPT_SBOX_12K)
332 ? " YESCRYPT_ROUNDS_6 | YESCRYPT_GATHER_4 | YESCRYPT_SIMPLE_2 | YESCRYPT_SBOX_12K"
333 : " flags are not standard"
334 );
335 } else {
336 goto fail;
337 }
338
339 src = decode64_uint32(&u32, src, 1);
340 if (/*!src ||*/ u32 > 63)
341 goto fail;
342 yctx->param.N = (uint64_t)1 << u32;
343 /* "j9T" sets to 4096 (1<<12) */
344 dbg("yctx->param.N=%llu (1<<%u)", (unsigned long long)yctx->param.N, (unsigned)u32);
345
346 src = decode64_uint32(&yctx->param.r, src, 1);
347 /* "j9T" sets to 32 */
348 dbg("yctx->param.r=%u", yctx->param.r);
349
350 if (!src)
351 goto fail;
352 if (*src != '$') {
353#if RESTRICTED_PARAMS
354 goto fail;
355#else
356 src = decode64_uint32(&u32, src, 1);
357 dbg("yescrypt has extended params:0x%x", (unsigned)u32);
358 if (u32 & 1)
359 src = decode64_uint32(&yctx->param.p, src, 2);
360 if (u32 & 2)
361 src = decode64_uint32(&yctx->param.t, src, 1);
362 if (u32 & 4)
363 src = decode64_uint32(&yctx->param.g, src, 1);
364 if (u32 & 8) {
365 src = decode64_uint32(&u32, src, 1);
366 if (/*!src ||*/ u32 > 63)
367 goto fail;
368 yctx->param.NROM = (uint64_t)1 << u32;
369 }
370 if (!src)
371 goto fail;
372 if (*src != '$')
373 goto fail;
374#endif
375 }
376
377 yctx->saltlen = sizeof(yctx->salt);
378 src++; /* now points to salt */
379 saltend = decode64(yctx->salt, &yctx->saltlen, src);
380 if (!saltend || (*saltend != '\0' && *saltend != '$'))
381 goto fail; /* salt[] is too small, or bad char during decode */
382 dbg_dec64("salt is %d ascii64 chars -> %d bytes (in binary)", (int)(saltend - src), (int)yctx->saltlen);
383
384 prefixlen = saltend - setting;
385 need = prefixlen + 1 + YESCRYPT_HASH_LEN + 1;
386 if (need > buflen /*overflow is quite unlikely: || need < prefixlen*/)
387 goto fail;
388
389 if (yescrypt_kdf32(yctx, passwd, passwdlen, hashbin32)) {
390 dbg("error in yescrypt_kdf32");
391 goto fail;
392 }
393
394 dst = mempcpy(buf, setting, prefixlen);
395 *dst++ = '$';
396 dst = encode64(dst, buflen - (dst - buf), hashbin32, sizeof(hashbin32));
397 if (!dst)
398 goto fail;
399 ret:
400 free_region(yctx->local);
401 explicit_bzero(&u, sizeof(u));
402 return buf;
403 fail:
404 buf = NULL;
405 goto ret;
406#undef yctx
407#undef hashbin32
408}
diff --git a/libbb/yescrypt/alg-yescrypt-kdf.c b/libbb/yescrypt/alg-yescrypt-kdf.c
new file mode 100644
index 000000000..a9a1bd591
--- /dev/null
+++ b/libbb/yescrypt/alg-yescrypt-kdf.c
@@ -0,0 +1,1212 @@
1/*-
2 * Copyright 2009 Colin Percival
3 * Copyright 2012-2018 Alexander Peslyak
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 *
27 * This file was originally written by Colin Percival as part of the Tarsnap
28 * online backup system.
29 */
30
31#if __STDC_VERSION__ >= 199901L
32/* Have restrict */
33#elif defined(__GNUC__)
34#define restrict __restrict
35#else
36#define restrict
37#endif
38
39#ifdef __GNUC__
40#define unlikely(exp) __builtin_expect(exp, 0)
41#else
42#define unlikely(exp) (exp)
43#endif
44
45typedef union {
46 uint32_t w[16];
47 uint64_t d[8];
48} salsa20_blk_t;
49
50static void salsa20_simd_shuffle(
51 const salsa20_blk_t *Bin,
52 salsa20_blk_t *Bout)
53{
54#define COMBINE(out, in1, in2) \
55do { \
56 Bout->d[out] = Bin->w[in1 * 2] | ((uint64_t)Bin->w[in2 * 2 + 1] << 32); \
57} while (0)
58 COMBINE(0, 0, 2);
59 COMBINE(1, 5, 7);
60 COMBINE(2, 2, 4);
61 COMBINE(3, 7, 1);
62 COMBINE(4, 4, 6);
63 COMBINE(5, 1, 3);
64 COMBINE(6, 6, 0);
65 COMBINE(7, 3, 5);
66#undef COMBINE
67}
68
69static void salsa20_simd_unshuffle(
70 const salsa20_blk_t *Bin,
71 salsa20_blk_t *Bout)
72{
73#define UNCOMBINE(out, in1, in2) \
74do { \
75 Bout->w[out * 2] = Bin->d[in1]; \
76 Bout->w[out * 2 + 1] = Bin->d[in2] >> 32; \
77} while (0)
78 UNCOMBINE(0, 0, 6);
79 UNCOMBINE(1, 5, 3);
80 UNCOMBINE(2, 2, 0);
81 UNCOMBINE(3, 7, 5);
82 UNCOMBINE(4, 4, 2);
83 UNCOMBINE(5, 1, 7);
84 UNCOMBINE(6, 6, 4);
85 UNCOMBINE(7, 3, 1);
86#undef UNCOMBINE
87}
88
89#define DECL_X \
90 salsa20_blk_t X
91#define DECL_Y \
92 salsa20_blk_t Y
93
94#if KDF_UNROLL_COPY
95#define COPY(out, in) \
96do { \
97 (out).d[0] = (in).d[0]; \
98 (out).d[1] = (in).d[1]; \
99 (out).d[2] = (in).d[2]; \
100 (out).d[3] = (in).d[3]; \
101 (out).d[4] = (in).d[4]; \
102 (out).d[5] = (in).d[5]; \
103 (out).d[6] = (in).d[6]; \
104 (out).d[7] = (in).d[7]; \
105} while (0)
106#else
107#define COPY(out, in) \
108do { \
109 memcpy((out).d, (in).d, sizeof((in).d)); \
110} while (0)
111#endif
112
113#define READ_X(in) COPY(X, in)
114#define WRITE_X(out) COPY(out, X)
115
116/**
117 * salsa20(B):
118 * Apply the Salsa20 core to the provided block.
119 */
120static void salsa20(salsa20_blk_t *restrict B,
121 salsa20_blk_t *restrict Bout,
122 uint32_t doublerounds)
123{
124 salsa20_blk_t X;
125#define x X.w
126
127 salsa20_simd_unshuffle(B, &X);
128
129 do {
130#define R(a,b) (((a) << (b)) | ((a) >> (32 - (b))))
131 /* Operate on columns */
132#if KDF_UNROLL_SALSA20
133 x[ 4] ^= R(x[ 0]+x[12], 7); // x[j] ^= R(x[k]+x[l], CONST)
134 x[ 8] ^= R(x[ 4]+x[ 0], 9);
135 x[12] ^= R(x[ 8]+x[ 4],13);
136 x[ 0] ^= R(x[12]+x[ 8],18);
137
138 x[ 9] ^= R(x[ 5]+x[ 1], 7);
139 x[13] ^= R(x[ 9]+x[ 5], 9);
140 x[ 1] ^= R(x[13]+x[ 9],13);
141 x[ 5] ^= R(x[ 1]+x[13],18);
142
143 x[14] ^= R(x[10]+x[ 6], 7);
144 x[ 2] ^= R(x[14]+x[10], 9);
145 x[ 6] ^= R(x[ 2]+x[14],13);
146 x[10] ^= R(x[ 6]+x[ 2],18);
147
148 x[ 3] ^= R(x[15]+x[11], 7);
149 x[ 7] ^= R(x[ 3]+x[15], 9);
150 x[11] ^= R(x[ 7]+x[ 3],13);
151 x[15] ^= R(x[11]+x[ 7],18);
152#else
153 {
154 unsigned j, k, l;
155 j = 4; k = 0; l = 12;
156 for (;;) {
157 uint32_t t;
158 x[j] ^= ({ t = x[k] + x[l]; R(t, 7); }); l = k; k = j; j = (j+4) & 0xf;
159 x[j] ^= ({ t = x[k] + x[l]; R(t, 9); }); l = k; k = j; j = (j+4) & 0xf;
160 x[j] ^= ({ t = x[k] + x[l]; R(t,13); }); l = k; k = j; j = (j+4) & 0xf;
161 x[j] ^= ({ t = x[k] + x[l]; R(t,18); });
162 if (j == 15) break;
163 l = j + 1; k = j + 5; j = (j+9) & 0xf;
164 }
165 }
166#endif
167 /* Operate on rows */
168#if KDF_UNROLL_SALSA20
169// i=0 n=0
170 x[ 1] ^= R(x[ 0]+x[ 3], 7); // [i + (n+1)&3] [i + (n+0)&3] [i + (n+3)&3]
171 x[ 2] ^= R(x[ 1]+x[ 0], 9); // [i + (n+2)&3] [i + (n+1)&3] [i + (n+0)&3]
172 x[ 3] ^= R(x[ 2]+x[ 1],13); // [i + (n+3)&3] [i + (n+2)&3] [i + (n+1)&3]
173 x[ 0] ^= R(x[ 3]+x[ 2],18); // [i + (n+0)&3] [i + (n+3)&3] [i + (n+2)&3]
174// i=4 n=1 ^^^j^^^ ^^^k^^^ ^^^l^^^
175 x[ 6] ^= R(x[ 5]+x[ 4], 7); // [i + (n+1)&3] [i + (n+0)&3] [i + (n+3)&3]
176 x[ 7] ^= R(x[ 6]+x[ 5], 9); // [i + (n+2)&3] [i + (n+1)&3] [i + (n+0)&3]
177 x[ 4] ^= R(x[ 7]+x[ 6],13); // [i + (n+3)&3] [i + (n+2)&3] [i + (n+1)&3]
178 x[ 5] ^= R(x[ 4]+x[ 7],18); // [i + (n+0)&3] [i + (n+3)&3] [i + (n+2)&3]
179// i=8 n=2
180 x[11] ^= R(x[10]+x[ 9], 7); // [i + (n+1)&3] [i + (n+0)&3] [i + (n+3)&3]
181 x[ 8] ^= R(x[11]+x[10], 9); // [i + (n+2)&3] [i + (n+1)&3] [i + (n+0)&3]
182 x[ 9] ^= R(x[ 8]+x[11],13); // [i + (n+3)&3] [i + (n+2)&3] [i + (n+1)&3]
183 x[10] ^= R(x[ 9]+x[ 8],18); // [i + (n+0)&3] [i + (n+3)&3] [i + (n+2)&3]
184// i=12 n=3
185 x[12] ^= R(x[15]+x[14], 7); // [i + (n+1)&3] [i + (n+0)&3] [i + (n+3)&3]
186 x[13] ^= R(x[12]+x[15], 9); // [i + (n+2)&3] [i + (n+1)&3] [i + (n+0)&3]
187 x[14] ^= R(x[13]+x[12],13); // [i + (n+3)&3] [i + (n+2)&3] [i + (n+1)&3]
188 x[15] ^= R(x[14]+x[13],18); // [i + (n+0)&3] [i + (n+3)&3] [i + (n+2)&3]
189#else
190 {
191 unsigned j, k, l;
192 uint32_t *xrow;
193 j = 1; k = 0; l = 3;
194 xrow = &x[0];
195 for (;;) {
196 uint32_t t;
197 xrow[j] ^= ({ t = xrow[k] + xrow[l]; R(t, 7); }); l = k; k = j; j = (j+1) & 3;
198 xrow[j] ^= ({ t = xrow[k] + xrow[l]; R(t, 9); }); l = k; k = j; j = (j+1) & 3;
199 xrow[j] ^= ({ t = xrow[k] + xrow[l]; R(t,13); }); l = k; k = j; j = (j+1) & 3;
200 xrow[j] ^= ({ t = xrow[k] + xrow[l]; R(t,18); });
201 if (j == 3) break;
202 l = j; k = j + 1; j = (j+2) & 3;
203 xrow += 4;
204 }
205 }
206#endif
207
208#undef R
209 } while (--doublerounds);
210#undef x
211
212 {
213 uint32_t i;
214 salsa20_simd_shuffle(&X, Bout);
215 for (i = 0; i < 16; i++) {
216 // bbox: note: was unrolled x4
217 B->w[i] = Bout->w[i] += B->w[i];
218 }
219 }
220#if 0
221 /* Too expensive */
222 explicit_bzero(&X, sizeof(X));
223#endif
224}
225
226/**
227 * Apply the Salsa20/2 core to the block provided in X.
228 */
229#define SALSA20_2(out) \
230 salsa20(&X, &out, 1)
231
232#if 0
233#define XOR(out, in1, in2) \
234do { \
235 (out).d[0] = (in1).d[0] ^ (in2).d[0]; \
236 (out).d[1] = (in1).d[1] ^ (in2).d[1]; \
237 (out).d[2] = (in1).d[2] ^ (in2).d[2]; \
238 (out).d[3] = (in1).d[3] ^ (in2).d[3]; \
239 (out).d[4] = (in1).d[4] ^ (in2).d[4]; \
240 (out).d[5] = (in1).d[5] ^ (in2).d[5]; \
241 (out).d[6] = (in1).d[6] ^ (in2).d[6]; \
242 (out).d[7] = (in1).d[7] ^ (in2).d[7]; \
243} while (0)
244#else
245#define XOR(out, in1, in2) \
246do { \
247 xorbuf64_3_aligned64(&(out).d, &(in1).d, &(in2).d); \
248} while (0)
249#endif
250
251#define XOR_X(in) XOR(X, X, in)
252#define XOR_X_2(in1, in2) XOR(X, in1, in2)
253#define XOR_X_WRITE_XOR_Y_2(out, in) \
254do { \
255 XOR(Y, out, in); \
256 COPY(out, Y); \
257 XOR(X, X, Y); \
258} while (0)
259
260/**
261 * Apply the Salsa20/8 core to the block provided in X ^ in.
262 */
263#define SALSA20_8_XOR_MEM(in, out) \
264do { \
265 XOR_X(in); \
266 salsa20(&X, &out, 4); \
267} while (0)
268
269#define INTEGERIFY ((uint32_t)X.d[0])
270
271/**
272 * blockmix_salsa8(Bin, Bout, r):
273 * Compute Bout = BlockMix_{salsa20/8, r}(Bin). The input Bin must be 128r
274 * bytes in length; the output Bout must also be the same size.
275 */
276static void blockmix_salsa8(
277 const salsa20_blk_t *restrict Bin,
278 salsa20_blk_t *restrict Bout,
279 size_t r)
280{
281 size_t i;
282 DECL_X;
283
284 READ_X(Bin[r * 2 - 1]);
285 for (i = 0; i < r; i++) {
286 SALSA20_8_XOR_MEM(Bin[i * 2], Bout[i]);
287 SALSA20_8_XOR_MEM(Bin[i * 2 + 1], Bout[r + i]);
288 }
289}
290
291static uint32_t blockmix_salsa8_xor(
292 const salsa20_blk_t *restrict Bin1,
293 const salsa20_blk_t *restrict Bin2,
294 salsa20_blk_t *restrict Bout,
295 size_t r)
296{
297 size_t i;
298 DECL_X;
299
300 XOR_X_2(Bin1[r * 2 - 1], Bin2[r * 2 - 1]);
301 for (i = 0; i < r; i++) {
302 XOR_X(Bin1[i * 2]);
303 SALSA20_8_XOR_MEM(Bin2[i * 2], Bout[i]);
304 XOR_X(Bin1[i * 2 + 1]);
305 SALSA20_8_XOR_MEM(Bin2[i * 2 + 1], Bout[r + i]);
306 }
307
308 return INTEGERIFY;
309}
310
311/* This is tunable */
312#define Swidth 8
313
314/* Not tunable in this implementation, hard-coded in a few places */
315#define PWXsimple 2
316#define PWXgather 4
317
318/* Derived values. Not tunable except via Swidth above. */
319#define PWXbytes (PWXgather * PWXsimple * 8)
320#define Sbytes (3 * (1 << Swidth) * PWXsimple * 8)
321#define Smask (((1 << Swidth) - 1) * PWXsimple * 8)
322#define Smask2 (((uint64_t)Smask << 32) | Smask)
323
324#define DECL_SMASK2REG do {} while (0)
325#define FORCE_REGALLOC_3 do {} while (0)
326#define MAYBE_MEMORY_BARRIER do {} while (0)
327
328#define PWXFORM_SIMD(x0, x1) \
329do { \
330 uint64_t x = x0 & Smask2; \
331 uint64_t *p0 = (uint64_t *)(S0 + (uint32_t)x); \
332 uint64_t *p1 = (uint64_t *)(S1 + (x >> 32)); \
333 x0 = ((x0 >> 32) * (uint32_t)x0 + p0[0]) ^ p1[0]; \
334 x1 = ((x1 >> 32) * (uint32_t)x1 + p0[1]) ^ p1[1]; \
335} while (0)
336
337#if KDF_UNROLL_PWXFORM_ROUND
338#define PWXFORM_ROUND \
339do { \
340 PWXFORM_SIMD(X.d[0], X.d[1]); \
341 PWXFORM_SIMD(X.d[2], X.d[3]); \
342 PWXFORM_SIMD(X.d[4], X.d[5]); \
343 PWXFORM_SIMD(X.d[6], X.d[7]); \
344} while (0)
345#else
346#define PWXFORM_ROUND \
347do { \
348 for (int pwxi=0; pwxi<8; pwxi+=2) \
349 PWXFORM_SIMD(X.d[pwxi], X.d[pwxi + 1]); \
350} while (0)
351#endif
352
353/*
354 * This offset helps address the 256-byte write block via the single-byte
355 * displacements encodable in x86(-64) instructions. It is needed because the
356 * displacements are signed. Without it, we'd get 4-byte displacements for
357 * half of the writes. Setting it to 0x80 instead of 0x7c would avoid needing
358 * a displacement for one of the writes, but then the LEA instruction would
359 * need a 4-byte displacement.
360 */
361#define PWXFORM_WRITE_OFFSET 0x7c
362
363#define PWXFORM_WRITE \
364do { \
365 WRITE_X(*(salsa20_blk_t *)(Sw - PWXFORM_WRITE_OFFSET)); \
366 Sw += 64; \
367} while (0)
368
369#if KDF_UNROLL_PWXFORM
370#define PWXFORM \
371do { \
372 uint8_t *Sw = S2 + w + PWXFORM_WRITE_OFFSET; \
373 FORCE_REGALLOC_3; \
374 MAYBE_MEMORY_BARRIER; \
375 PWXFORM_ROUND; \
376 PWXFORM_ROUND; PWXFORM_WRITE; \
377 PWXFORM_ROUND; PWXFORM_WRITE; \
378 PWXFORM_ROUND; PWXFORM_WRITE; \
379 PWXFORM_ROUND; PWXFORM_WRITE; \
380 PWXFORM_ROUND; \
381 w = (w + 64 * 4) & Smask2; \
382 { \
383 uint8_t *Stmp = S2; \
384 S2 = S1; \
385 S1 = S0; \
386 S0 = Stmp; \
387 } \
388} while (0)
389#else
390#define PWXFORM \
391do { \
392 uint8_t *Sw = S2 + w + PWXFORM_WRITE_OFFSET; \
393 FORCE_REGALLOC_3; \
394 MAYBE_MEMORY_BARRIER; \
395 PWXFORM_ROUND; \
396 for (int pwxj=0; pwxj<4; pwxj++) {\
397 PWXFORM_ROUND; PWXFORM_WRITE; \
398 } \
399 PWXFORM_ROUND; \
400 w = (w + 64 * 4) & Smask2; \
401 { \
402 uint8_t *Stmp = S2; \
403 S2 = S1; \
404 S1 = S0; \
405 S0 = Stmp; \
406 } \
407} while (0)
408#endif
409
410typedef struct {
411 uint8_t *S0, *S1, *S2;
412 size_t w;
413} pwxform_ctx_t;
414
415#define Salloc (Sbytes + ((sizeof(pwxform_ctx_t) + 63) & ~63U))
416
417/**
418 * blockmix_pwxform(Bin, Bout, r, S):
419 * Compute Bout = BlockMix_pwxform{salsa20/2, r, S}(Bin). The input Bin must
420 * be 128r bytes in length; the output Bout must also be the same size.
421 */
422static void blockmix(
423 const salsa20_blk_t *restrict Bin,
424 salsa20_blk_t *restrict Bout,
425 size_t r,
426 pwxform_ctx_t *restrict ctx)
427{
428 uint8_t *S0 = ctx->S0, *S1 = ctx->S1, *S2 = ctx->S2;
429 size_t w = ctx->w;
430 size_t i;
431 DECL_X;
432
433 /* Convert count of 128-byte blocks to max index of 64-byte block */
434 r = r * 2 - 1;
435
436 READ_X(Bin[r]);
437
438 DECL_SMASK2REG;
439
440 i = 0;
441 for (;;) {
442 XOR_X(Bin[i]);
443 PWXFORM;
444 if (unlikely(i >= r))
445 break;
446 WRITE_X(Bout[i]);
447 i++;
448 }
449
450 ctx->S0 = S0;
451 ctx->S1 = S1;
452 ctx->S2 = S2;
453 ctx->w = w;
454
455 SALSA20_2(Bout[i]);
456}
457
458static uint32_t blockmix_xor(
459 const salsa20_blk_t *Bin1,
460 const salsa20_blk_t *restrict Bin2,
461 salsa20_blk_t *Bout,
462 size_t r,
463 pwxform_ctx_t *restrict ctx)
464{
465 uint8_t *S0 = ctx->S0, *S1 = ctx->S1, *S2 = ctx->S2;
466 size_t w = ctx->w;
467 size_t i;
468 DECL_X;
469
470 /* Convert count of 128-byte blocks to max index of 64-byte block */
471 r = r * 2 - 1;
472
473 XOR_X_2(Bin1[r], Bin2[r]);
474
475 DECL_SMASK2REG;
476
477 i = 0;
478 r--;
479 for (;;) {
480 XOR_X(Bin1[i]);
481 XOR_X(Bin2[i]);
482 PWXFORM;
483 if (unlikely(i > r))
484 break;
485 WRITE_X(Bout[i]);
486 i++;
487 }
488
489 ctx->S0 = S0;
490 ctx->S1 = S1;
491 ctx->S2 = S2;
492 ctx->w = w;
493
494 SALSA20_2(Bout[i]);
495
496 return INTEGERIFY;
497}
498
499static uint32_t blockmix_xor_save(
500 salsa20_blk_t *restrict Bin1out,
501 salsa20_blk_t *restrict Bin2,
502 size_t r,
503 pwxform_ctx_t *restrict ctx)
504{
505 uint8_t *S0 = ctx->S0, *S1 = ctx->S1, *S2 = ctx->S2;
506 size_t w = ctx->w;
507 size_t i;
508 DECL_X;
509 DECL_Y;
510
511 /* Convert count of 128-byte blocks to max index of 64-byte block */
512 r = r * 2 - 1;
513
514 XOR_X_2(Bin1out[r], Bin2[r]);
515
516 DECL_SMASK2REG;
517
518 i = 0;
519 r--;
520 for (;;) {
521 XOR_X_WRITE_XOR_Y_2(Bin2[i], Bin1out[i]);
522 PWXFORM;
523 if (unlikely(i > r))
524 break;
525 WRITE_X(Bin1out[i]);
526 i++;
527 }
528
529 ctx->S0 = S0;
530 ctx->S1 = S1;
531 ctx->S2 = S2;
532 ctx->w = w;
533
534 SALSA20_2(Bin1out[i]);
535
536 return INTEGERIFY;
537}
538
539/**
540 * integerify(B, r):
541 * Return the result of parsing B_{2r-1} as a little-endian integer.
542 */
543static inline uint32_t integerify(const salsa20_blk_t *B, size_t r)
544{
545/*
546 * Our 64-bit words are in host byte order, which is why we don't just read
547 * w[0] here (would be wrong on big-endian). Also, our 32-bit words are
548 * SIMD-shuffled (so the next 32 bits would be part of d[6]), but currently
549 * this does not matter as we only care about the least significant 32 bits.
550 */
551 return (uint32_t)B[2 * r - 1].d[0];
552}
553
554/**
555 * smix1(B, r, N, flags, V, NROM, VROM, XY, ctx):
556 * Compute first loop of B = SMix_r(B, N). The input B must be 128r bytes in
557 * length; the temporary storage V must be 128rN bytes in length; the temporary
558 * storage XY must be 128r+64 bytes in length. N must be even and at least 4.
559 * The array V must be aligned to a multiple of 64 bytes, and arrays B and XY
560 * to a multiple of at least 16 bytes.
561 */
562#if DISABLE_NROM_CODE
563#define smix1(B,r,N,flags,V,NROM,VROM,XY,ctx) \
564 smix1(B,r,N,flags,V,XY,ctx)
565#endif
566static void smix1(uint8_t *B, size_t r, uint32_t N,
567 uint32_t flags,
568 salsa20_blk_t *V,
569 uint32_t NROM, const salsa20_blk_t *VROM,
570 salsa20_blk_t *XY,
571 pwxform_ctx_t *ctx)
572{
573#if DISABLE_NROM_CODE
574 uint32_t NROM = 0;
575 const salsa20_blk_t *VROM = NULL;
576#endif
577 size_t s = 2 * r;
578 salsa20_blk_t *X = V, *Y = &V[s];
579 uint32_t i, j;
580
581 for (i = 0; i < 2 * r; i++) {
582 const salsa20_blk_t *src = (salsa20_blk_t *)&B[i * 64];
583 salsa20_blk_t *tmp = Y;
584 salsa20_blk_t *dst = &X[i];
585 size_t k;
586 for (k = 0; k < 16; k++)
587 tmp->w[k] = SWAP_LE32(src->w[k]);
588 salsa20_simd_shuffle(tmp, dst);
589 }
590
591 if (VROM) {
592 uint32_t n;
593 const salsa20_blk_t *V_j;
594
595 V_j = &VROM[(NROM - 1) * s];
596 j = blockmix_xor(X, V_j, Y, r, ctx) & (NROM - 1);
597 V_j = &VROM[j * s];
598 X = Y + s;
599 j = blockmix_xor(Y, V_j, X, r, ctx);
600
601 for (n = 2; n < N; n <<= 1) {
602 uint32_t m = (n < N / 2) ? n : (N - 1 - n);
603 for (i = 1; i < m; i += 2) {
604 j &= n - 1;
605 j += i - 1;
606 V_j = &V[j * s];
607 Y = X + s;
608 j = blockmix_xor(X, V_j, Y, r, ctx) & (NROM - 1);
609 V_j = &VROM[j * s];
610 X = Y + s;
611 j = blockmix_xor(Y, V_j, X, r, ctx);
612 }
613 }
614 n >>= 1;
615
616 j &= n - 1;
617 j += N - 2 - n;
618 V_j = &V[j * s];
619 Y = X + s;
620 j = blockmix_xor(X, V_j, Y, r, ctx) & (NROM - 1);
621 V_j = &VROM[j * s];
622 blockmix_xor(Y, V_j, XY, r, ctx);
623 } else if (flags & YESCRYPT_RW) {
624//can't use flags___YESCRYPT_RW, smix1() may be called with flags = 0
625 uint32_t n;
626 salsa20_blk_t *V_j;
627
628 blockmix(X, Y, r, ctx);
629 X = Y + s;
630 blockmix(Y, X, r, ctx);
631 j = integerify(X, r);
632
633 for (n = 2; n < N; n <<= 1) {
634 uint32_t m = (n < N / 2) ? n : (N - 1 - n);
635 for (i = 1; i < m; i += 2) {
636 Y = X + s;
637 j &= n - 1;
638 j += i - 1;
639 V_j = &V[j * s];
640 j = blockmix_xor(X, V_j, Y, r, ctx);
641 j &= n - 1;
642 j += i;
643 V_j = &V[j * s];
644 X = Y + s;
645 j = blockmix_xor(Y, V_j, X, r, ctx);
646 }
647 }
648 n >>= 1;
649
650 j &= n - 1;
651 j += N - 2 - n;
652 V_j = &V[j * s];
653 Y = X + s;
654 j = blockmix_xor(X, V_j, Y, r, ctx);
655 j &= n - 1;
656 j += N - 1 - n;
657 V_j = &V[j * s];
658 blockmix_xor(Y, V_j, XY, r, ctx);
659 } else {
660 N -= 2;
661 do {
662 blockmix_salsa8(X, Y, r);
663 X = Y + s;
664 blockmix_salsa8(Y, X, r);
665 Y = X + s;
666 } while ((N -= 2));
667
668 blockmix_salsa8(X, Y, r);
669 blockmix_salsa8(Y, XY, r);
670 }
671
672 for (i = 0; i < 2 * r; i++) {
673 const salsa20_blk_t *src = &XY[i];
674 salsa20_blk_t *tmp = &XY[s];
675 salsa20_blk_t *dst = (salsa20_blk_t *)&B[i * 64];
676 size_t k;
677 for (k = 0; k < 16; k++)
678 tmp->w[k] = SWAP_LE32(src->w[k]);
679 salsa20_simd_unshuffle(tmp, dst);
680 }
681}
682
683/**
684 * smix2(B, r, N, Nloop, flags, V, NROM, VROM, XY, ctx):
685 * Compute second loop of B = SMix_r(B, N). The input B must be 128r bytes in
686 * length; the temporary storage V must be 128rN bytes in length; the temporary
687 * storage XY must be 256r bytes in length. N must be a power of 2 and at
688 * least 2. Nloop must be even. The array V must be aligned to a multiple of
689 * 64 bytes, and arrays B and XY to a multiple of at least 16 bytes.
690 */
691#if DISABLE_NROM_CODE
692#define smix2(B,r,N,Nloop,flags,V,NROM,VROM,XY,ctx) \
693 smix2(B,r,N,Nloop,flags,V,XY,ctx)
694#endif
695static void smix2(uint8_t *B, size_t r, uint32_t N, uint64_t Nloop,
696 uint32_t flags,
697 salsa20_blk_t *V,
698 uint32_t NROM, const salsa20_blk_t *VROM,
699 salsa20_blk_t *XY,
700 pwxform_ctx_t *ctx)
701{
702#if DISABLE_NROM_CODE
703 uint32_t NROM = 0;
704 const salsa20_blk_t *VROM = NULL;
705#endif
706 size_t s = 2 * r;
707 salsa20_blk_t *X = XY, *Y = &XY[s];
708 uint32_t i, j;
709
710 if (Nloop == 0)
711 return;
712
713 for (i = 0; i < 2 * r; i++) {
714 const salsa20_blk_t *src = (salsa20_blk_t *)&B[i * 64];
715 salsa20_blk_t *tmp = Y;
716 salsa20_blk_t *dst = &X[i];
717 size_t k;
718 for (k = 0; k < 16; k++)
719 tmp->w[k] = SWAP_LE32(src->w[k]);
720 salsa20_simd_shuffle(tmp, dst);
721 }
722
723 j = integerify(X, r) & (N - 1);
724
725/*
726 * Normally, VROM implies YESCRYPT_RW, but we check for these separately
727 * because our SMix resets YESCRYPT_RW for the smix2() calls operating on the
728 * entire V when p > 1.
729 */
730//and this is why bbox can't use flags___YESCRYPT_RW in this function
731 if (VROM && (flags & YESCRYPT_RW)) {
732 do {
733 salsa20_blk_t *V_j = &V[j * s];
734 const salsa20_blk_t *VROM_j;
735 j = blockmix_xor_save(X, V_j, r, ctx) & (NROM - 1);
736 VROM_j = &VROM[j * s];
737 j = blockmix_xor(X, VROM_j, X, r, ctx) & (N - 1);
738 } while (Nloop -= 2);
739 } else if (VROM) {
740 do {
741 const salsa20_blk_t *V_j = &V[j * s];
742 j = blockmix_xor(X, V_j, X, r, ctx) & (NROM - 1);
743 V_j = &VROM[j * s];
744 j = blockmix_xor(X, V_j, X, r, ctx) & (N - 1);
745 } while (Nloop -= 2);
746 } else if (flags & YESCRYPT_RW) {
747 do {
748 salsa20_blk_t *V_j = &V[j * s];
749 j = blockmix_xor_save(X, V_j, r, ctx) & (N - 1);
750 V_j = &V[j * s];
751 j = blockmix_xor_save(X, V_j, r, ctx) & (N - 1);
752 } while (Nloop -= 2);
753 } else if (ctx) {
754 do {
755 const salsa20_blk_t *V_j = &V[j * s];
756 j = blockmix_xor(X, V_j, X, r, ctx) & (N - 1);
757 V_j = &V[j * s];
758 j = blockmix_xor(X, V_j, X, r, ctx) & (N - 1);
759 } while (Nloop -= 2);
760 } else {
761 do {
762 const salsa20_blk_t *V_j = &V[j * s];
763 j = blockmix_salsa8_xor(X, V_j, Y, r) & (N - 1);
764 V_j = &V[j * s];
765 j = blockmix_salsa8_xor(Y, V_j, X, r) & (N - 1);
766 } while (Nloop -= 2);
767 }
768
769 for (i = 0; i < 2 * r; i++) {
770 const salsa20_blk_t *src = &X[i];
771 salsa20_blk_t *tmp = Y;
772 salsa20_blk_t *dst = (salsa20_blk_t *)&B[i * 64];
773 size_t k;
774 for (k = 0; k < 16; k++)
775 tmp->w[k] = SWAP_LE32(src->w[k]);
776 salsa20_simd_unshuffle(tmp, dst);
777 }
778}
779
780/**
781 * p2floor(x):
782 * Largest power of 2 not greater than argument.
783 */
784static uint64_t p2floor(uint64_t x)
785{
786 uint64_t y;
787 while ((y = x & (x - 1)))
788 x = y;
789 return x;
790}
791
792/**
793 * smix(B, r, N, p, t, flags, V, NROM, VROM, XY, S, passwd):
794 * Compute B = SMix_r(B, N). The input B must be 128rp bytes in length; the
795 * temporary storage V must be 128rN bytes in length; the temporary storage
796 * XY must be 256r or 256rp bytes in length (the larger size is required with
797 * OpenMP-enabled builds). N must be a power of 2 and at least 4. The array V
798 * must be aligned to a multiple of 64 bytes, and arrays B and XY to a multiple
799 * of at least 16 bytes (aligning them to 64 bytes as well saves cache lines
800 * and helps avoid false sharing in OpenMP-enabled builds when p > 1, but it
801 * might also result in cache bank conflicts).
802 */
803#if DISABLE_NROM_CODE
804#define smix(B,r,N,p,t,flags,V,NROM,VROM,XY,S,passwd) \
805 smix(B,r,N,p,t,flags,V,XY,S,passwd)
806#endif
807static void smix(uint8_t *B, size_t r, uint32_t N, uint32_t p, uint32_t t,
808 uint32_t flags,
809 salsa20_blk_t *V,
810 uint32_t NROM, const salsa20_blk_t *VROM,
811 salsa20_blk_t *XY,
812 uint8_t *S, uint8_t *passwd)
813{
814 size_t s = 2 * r;
815 uint32_t Nchunk;
816 uint64_t Nloop_all, Nloop_rw;
817 uint32_t i;
818
819 Nchunk = N / p;
820 Nloop_all = Nchunk;
821 if (flags___YESCRYPT_RW) {
822 if (t <= 1) {
823 if (t)
824 Nloop_all *= 2; /* 2/3 */
825 Nloop_all = (Nloop_all + 2) / 3; /* 1/3, round up */
826 } else {
827 Nloop_all *= t - 1;
828 }
829 } else if (t) {
830 if (t == 1)
831 Nloop_all += (Nloop_all + 1) / 2; /* 1.5, round up */
832 Nloop_all *= t;
833 }
834
835 Nloop_rw = 0;
836 if (flags___YESCRYPT_RW)
837 Nloop_rw = Nloop_all / p;
838
839 Nchunk &= ~(uint32_t)1; /* round down to even */
840 Nloop_all++; Nloop_all &= ~(uint64_t)1; /* round up to even */
841 Nloop_rw++; Nloop_rw &= ~(uint64_t)1; /* round up to even */
842
843 for (i = 0; i < p; i++) {
844 uint32_t Vchunk = i * Nchunk;
845 uint32_t Np = (i < p - 1) ? Nchunk : (N - Vchunk);
846 uint8_t *Bp = &B[128 * r * i];
847 salsa20_blk_t *Vp = &V[Vchunk * s];
848 salsa20_blk_t *XYp = XY;
849 pwxform_ctx_t *ctx_i = NULL;
850 if (flags___YESCRYPT_RW) {
851 uint8_t *Si = S + i * Salloc;
852 smix1(Bp, 1, Sbytes / 128, 0 /* no flags */,
853 (salsa20_blk_t *)Si, 0, NULL, XYp, NULL);
854 ctx_i = (pwxform_ctx_t *)(Si + Sbytes);
855 ctx_i->S2 = Si;
856 ctx_i->S1 = Si + Sbytes / 3;
857 ctx_i->S0 = Si + Sbytes / 3 * 2;
858 ctx_i->w = 0;
859 if (i == 0)
860 hmac_block(
861 /* key,len: */ Bp + (128 * r - 64), 64,
862 /* hash fn: */ sha256_begin,
863 /* in,len: */ passwd, 32,
864 /* outbuf: */ passwd
865 );
866 }
867 smix1(Bp, r, Np, flags, Vp, NROM, VROM, XYp, ctx_i);
868 smix2(Bp, r, p2floor(Np), Nloop_rw, flags, Vp,
869 NROM, VROM, XYp, ctx_i);
870 }
871
872 if (Nloop_all > Nloop_rw) {
873 for (i = 0; i < p; i++) {
874 uint8_t *Bp = &B[128 * r * i];
875 salsa20_blk_t *XYp = XY;
876 pwxform_ctx_t *ctx_i = NULL;
877 if (flags___YESCRYPT_RW) {
878 uint8_t *Si = S + i * Salloc;
879 ctx_i = (pwxform_ctx_t *)(Si + Sbytes);
880 }
881 smix2(Bp, r, N, Nloop_all - Nloop_rw,
882 flags & (uint32_t)~YESCRYPT_RW,
883 V, NROM, VROM, XYp, ctx_i);
884 }
885 }
886}
887
888/* Allocator code */
889
890static void alloc_region(yescrypt_region_t *region, size_t size)
891{
892 uint8_t *base;
893 int flags =
894# ifdef MAP_NOCORE /* huh? */
895 MAP_NOCORE |
896# endif
897 MAP_ANON | MAP_PRIVATE;
898
899 base = mmap(NULL, size, PROT_READ | PROT_WRITE, flags, -1, 0);
900 if (base == MAP_FAILED)
901 bb_die_memory_exhausted();
902
903#if defined(MADV_HUGEPAGE)
904 /* Reduces mkpasswd qweRTY123@-+ '$y$jHT$123'
905 * (which allocates 4 Gbytes)
906 * run time from 10.543s to 5.635s
907 * Seen on linux-5.18.0.
908 */
909 madvise(base, size, MADV_HUGEPAGE);
910#endif
911 //region->base = base;
912 //region->base_size = size;
913 region->aligned = base;
914 region->aligned_size = size;
915}
916
917static void free_region(yescrypt_region_t *region)
918{
919 if (region->aligned)
920 munmap(region->aligned, region->aligned_size);
921 //region->base = NULL;
922 //region->base_size = 0;
923 region->aligned = NULL;
924 region->aligned_size = 0;
925}
926/**
927 * yescrypt_kdf_body(shared, local, passwd, passwdlen, salt, saltlen,
928 * flags, N, r, p, t, NROM, buf, buflen):
929 * Compute scrypt(passwd[0 .. passwdlen - 1], salt[0 .. saltlen - 1], N, r,
930 * p, buflen), or a revision of scrypt as requested by flags and shared, and
931 * write the result into buf.
932 *
933 * shared and flags may request special modes as described in yescrypt.h.
934 *
935 * local is the thread-local data structure, allowing to preserve and reuse a
936 * memory allocation across calls, thereby reducing its overhead.
937 *
938 * t controls computation time while not affecting peak memory usage.
939 *
940 * Return 0 on success; or -1 on error.
941 *
942 * This optimized implementation currently limits N to the range from 4 to
943 * 2^31, but other implementations might not.
944 */
945static int yescrypt_kdf32_body(
946 yescrypt_ctx_t *yctx,
947 const uint8_t *passwd, size_t passwdlen,
948 uint32_t flags, uint64_t N, uint32_t t,
949 uint8_t *buf32)
950{
951#if !DISABLE_NROM_CODE
952 const salsa20_blk_t *VROM;
953#endif
954 size_t B_size, V_size, XY_size, need;
955 uint8_t *B, *S;
956 salsa20_blk_t *V, *XY;
957 struct {
958 uint8_t sha256[32];
959 uint8_t dk[32];
960 } u;
961#define sha256 u.sha256
962#define dk u.dk
963 uint8_t *dkp = buf32;
964 uint32_t r, p;
965
966 /* Sanity-check parameters */
967 switch (flags___YESCRYPT_MODE_MASK) {
968 case 0: /* classic scrypt - can't have anything non-standard */
969 if (flags || t || YCTX_param_NROM)
970 goto out_EINVAL;
971 break;
972 case YESCRYPT_WORM:
973 if (flags != YESCRYPT_WORM || YCTX_param_NROM)
974 goto out_EINVAL;
975 break;
976 case YESCRYPT_RW:
977 if (flags != (flags & YESCRYPT_KNOWN_FLAGS))
978 goto out_EINVAL;
979#if PWXsimple == 2 && PWXgather == 4 && Sbytes == 12288
980 if ((flags & YESCRYPT_RW_FLAVOR_MASK) ==
981 (YESCRYPT_ROUNDS_6 | YESCRYPT_GATHER_4 |
982 YESCRYPT_SIMPLE_2 | YESCRYPT_SBOX_12K))
983 break;
984#else
985#error "Unsupported pwxform settings"
986#endif
987 /* FALLTHRU */
988 default:
989 goto out_EINVAL;
990 }
991
992 r = YCTX_param_r;
993 p = YCTX_param_p;
994 if ((uint64_t)r * (uint64_t)p >= 1 << 30) {
995 dbg("r * n >= 2^30");
996 goto out_EINVAL;
997 }
998 if (N > UINT32_MAX) {
999 dbg("N > 0x%lx", (long)UINT32_MAX);
1000 goto out_EINVAL;
1001 }
1002 if (N <= 3
1003 || r < 1
1004 || p < 1
1005 ) {
1006 dbg("bad N, r or p");
1007 goto out_EINVAL;
1008 }
1009 if (r > SIZE_MAX / 256 / p
1010 || N > SIZE_MAX / 128 / r
1011 ) {
1012 /* 32-bit testcase: mkpasswd qweRTY123@-+ '$y$jHT$123'
1013 * (works on 64-bit, needs buffer > 4Gbytes)
1014 */
1015 dbg("r > SIZE_MAX / 256 / p? %c", "NY"[r > SIZE_MAX / 256 / p]);
1016 dbg("N > SIZE_MAX / 128 / r? %c", "NY"[N > SIZE_MAX / 128 / r]);
1017 goto out_EINVAL;
1018 }
1019 if (flags___YESCRYPT_RW) {
1020 /* p cannot be greater than SIZE_MAX/Salloc on 64-bit systems,
1021 but it can on 32-bit systems. */
1022#pragma GCC diagnostic push
1023#pragma GCC diagnostic ignored "-Wtype-limits"
1024 if (N / p <= 3 || p > SIZE_MAX / Salloc) {
1025 dbg("bad p:%ld", (long)p);
1026 goto out_EINVAL;
1027 }
1028#pragma GCC diagnostic pop
1029 }
1030
1031#if !DISABLE_NROM_CODE
1032 VROM = NULL;
1033 if (YCTX_param_NROM)
1034 goto out_EINVAL;
1035#endif
1036
1037 /* Allocate memory */
1038 V = NULL;
1039 V_size = (size_t)128 * r * N;
1040 need = V_size;
1041 B_size = (size_t)128 * r * p;
1042 need += B_size;
1043 if (need < B_size) {
1044 dbg("integer overflow at += B_size(%lu)", (long)B_size);
1045 goto out_EINVAL;
1046 }
1047 XY_size = (size_t)256 * r;
1048 need += XY_size;
1049 if (need < XY_size) {
1050 dbg("integer overflow at += XY_size(%lu)", (long)XY_size);
1051 goto out_EINVAL;
1052 }
1053 if (flags___YESCRYPT_RW) {
1054 size_t S_size = (size_t)Salloc * p;
1055 need += S_size;
1056 if (need < S_size) {
1057 dbg("integer overflow at += S_size(%lu)", (long)S_size);
1058 goto out_EINVAL;
1059 }
1060 }
1061 if (yctx->local->aligned_size < need) {
1062 free_region(yctx->local);
1063 alloc_region(yctx->local, need);
1064 dbg("allocated local:%lu 0x%lx", (long)need, (long)need);
1065 /* standard "j9T" params allocate 16Mbytes here */
1066 }
1067 if (flags & YESCRYPT_ALLOC_ONLY)
1068 return -3; /* expected "failure" */
1069 B = (uint8_t *)yctx->local->aligned;
1070 V = (salsa20_blk_t *)((uint8_t *)B + B_size);
1071 XY = (salsa20_blk_t *)((uint8_t *)V + V_size);
1072 S = NULL;
1073 if (flags___YESCRYPT_RW)
1074 S = (uint8_t *)XY + XY_size;
1075
1076 if (flags) {
1077 hmac_block(
1078 /* key,len: */ (const void*)"yescrypt-prehash", (flags & YESCRYPT_PREHASH) ? 16 : 8,
1079 /* hash fn: */ sha256_begin,
1080 /* in,len: */ passwd, passwdlen,
1081 /* outbuf: */ sha256
1082 );
1083 passwd = sha256;
1084 passwdlen = sizeof(sha256);
1085 }
1086
1087 PBKDF2_SHA256(passwd, passwdlen, yctx->salt, yctx->saltlen, 1, B, B_size);
1088
1089 if (flags)
1090 memcpy(sha256, B, sizeof(sha256));
1091
1092 if (p == 1 || (flags___YESCRYPT_RW)) {
1093 smix(B, r, N, p, t, flags, V, YCTX_param_NROM, VROM, XY, S, sha256);
1094 } else {
1095 uint32_t i;
1096 for (i = 0; i < p; i++) {
1097 smix(&B[(size_t)128 * r * i], r, N, 1, t, flags, V,
1098 YCTX_param_NROM, VROM, XY, NULL, NULL);
1099 }
1100 }
1101
1102 dkp = buf32;
1103 if (flags && /*buflen:*/32 < sizeof(dk)) {
1104 PBKDF2_SHA256(passwd, passwdlen, B, B_size, 1, dk, sizeof(dk));
1105 dkp = dk;
1106 }
1107
1108 PBKDF2_SHA256(passwd, passwdlen, B, B_size, 1, buf32, /*buflen:*/32);
1109
1110 /*
1111 * Except when computing classic scrypt, allow all computation so far
1112 * to be performed on the client. The final steps below match those of
1113 * SCRAM (RFC 5802), so that an extension of SCRAM (with the steps so
1114 * far in place of SCRAM's use of PBKDF2 and with SHA-256 in place of
1115 * SCRAM's use of SHA-1) would be usable with yescrypt hashes.
1116 */
1117 if (flags && !(flags & YESCRYPT_PREHASH)) {
1118 /* Compute ClientKey */
1119 hmac_block(
1120 /* key,len: */ dkp, sizeof(dk),
1121 /* hash fn: */ sha256_begin,
1122 /* in,len: */ "Client Key", 10,
1123 /* outbuf: */ sha256
1124 );
1125 /* Compute StoredKey */
1126 {
1127 size_t clen = /*buflen:*/32;
1128 if (clen > sizeof(dk))
1129 clen = sizeof(dk);
1130 if (sizeof(dk) != 32) { /* not true, optimize it out */
1131 sha256_block(sha256, sizeof(sha256), dk);
1132 memcpy(buf32, dk, clen);
1133 } else {
1134 sha256_block(sha256, sizeof(sha256), buf32);
1135 }
1136 }
1137 }
1138
1139 explicit_bzero(&u, sizeof(u));
1140
1141 /* Success! */
1142 return 0;
1143
1144 out_EINVAL:
1145 //bbox does not need this: errno = EINVAL;
1146 return -1;
1147#undef sha256
1148#undef dk
1149}
1150
1151/**
1152 * yescrypt_kdf(shared, local, passwd, passwdlen, salt, saltlen, params,
1153 * buf, buflen):
1154 * Compute scrypt or its revision as requested by the parameters. The inputs
1155 * to this function are the same as those for yescrypt_kdf_body() above, with
1156 * the addition of g, which controls hash upgrades (0 for no upgrades so far).
1157 */
1158static
1159int yescrypt_kdf32(
1160 yescrypt_ctx_t *yctx,
1161 const uint8_t *passwd, size_t passwdlen,
1162 uint8_t *buf32)
1163{
1164 uint32_t flags = YCTX_param_flags;
1165 uint64_t N = YCTX_param_N;
1166 uint32_t r = YCTX_param_r;
1167 uint32_t p = YCTX_param_p;
1168 uint32_t t = YCTX_param_t;
1169 uint32_t g = YCTX_param_g;
1170 uint8_t dk32[32];
1171 int retval;
1172
1173 /* Support for hash upgrades has been temporarily removed */
1174 if (g) {
1175 //bbox does not need this: errno = EINVAL;
1176 return -1;
1177 }
1178
1179 if ((flags___YESCRYPT_RW)
1180 && p >= 1
1181 && N / p >= 0x100
1182 && N / p * r >= 0x20000
1183 ) {
1184 if (yescrypt_kdf32_body(yctx,
1185 passwd, passwdlen,
1186 flags | YESCRYPT_ALLOC_ONLY, N, t,
1187 buf32) != -3
1188 ) {
1189 dbg("yescrypt_kdf32_body: not -3");
1190 return -1;
1191 }
1192 retval = yescrypt_kdf32_body(yctx,
1193 passwd, passwdlen,
1194 flags | YESCRYPT_PREHASH, N >> 6, 0,
1195 dk32);
1196 if (retval) {
1197 dbg("yescrypt_kdf32_body(PREHASH):%d", retval);
1198 return retval;
1199 }
1200 passwd = dk32;
1201 passwdlen = sizeof(dk32);
1202 }
1203
1204 retval = yescrypt_kdf32_body(yctx,
1205 passwd, passwdlen,
1206 flags, N, t, buf32);
1207
1208 explicit_bzero(dk32, sizeof(dk32));
1209
1210 dbg("yescrypt_kdf32_body:%d", retval);
1211 return retval;
1212}
diff --git a/libbb/yescrypt/alg-yescrypt.h b/libbb/yescrypt/alg-yescrypt.h
new file mode 100644
index 000000000..b69843f5d
--- /dev/null
+++ b/libbb/yescrypt/alg-yescrypt.h
@@ -0,0 +1,247 @@
1/*-
2 * Copyright 2009 Colin Percival
3 * Copyright 2013-2018 Alexander Peslyak
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 *
27 * This file was originally written by Colin Percival as part of the Tarsnap
28 * online backup system.
29 */
30
31// busybox debug and size-reduction configuration
32
33#ifdef YESCRYPT_INTERNAL
34# if 1
35# define dbg(...) ((void)0)
36# else
37# define dbg(...) bb_error_msg(__VA_ARGS__)
38# endif
39# if 1
40# define dbg_dec64(...) ((void)0)
41# else
42# define dbg_dec64(...) bb_error_msg(__VA_ARGS__)
43# endif
44# define TEST_DECODE64 0
45#endif
46
47// Only accept one-char parameters in salt, and only first three?
48// Almost any reasonable yescrypt hashes in /etc/shadow should
49// only ever use "jXY" parameters which set N and r.
50// Fancy multi-byte-encoded wide integers are not needed for that.
51#define RESTRICTED_PARAMS 1
52// Note: if you enable the above, please also enable
53// YCTX_param_p, YCTX_param_t, YCTX_param_g, YCTX_param_NROM
54// optimizations, and DISABLE_NROM_CODE.
55
56#define DISABLE_NROM_CODE 1
57
58// How much we save by forcing "standard" value by commenting the next line:
59// 160 bytes
60//#define YCTX_param_flags yctx->param.flags
61// 260 bytes
62//#define flags___YESCRYPT_RW (flags & YESCRYPT_RW)
63// 140 bytes
64//#define flags___YESCRYPT_MODE_MASK (flags & YESCRYPT_MODE_MASK)
65// ^^^^ forcing the above since the code already requires (checks for) this
66// 50 bytes
67#define YCTX_param_N yctx->param.N
68// -100 bytes (negative!!!)
69#define YCTX_param_r yctx->param.r
70// 400 bytes
71//#define YCTX_param_p yctx->param.p
72// 130 bytes
73//#define YCTX_param_t yctx->param.t
74// 2 bytes
75//#define YCTX_param_g yctx->param.g
76// 1 bytes
77// ^^^^ this looks wrong, compiler should be able to constant-propagate the fact that NROM code is dead
78//#define YCTX_param_NROM yctx->param.NROM
79
80#ifndef YCTX_param_flags
81#define YCTX_param_flags (YESCRYPT_RW | YESCRYPT_ROUNDS_6 | YESCRYPT_GATHER_4 | YESCRYPT_SIMPLE_2 | YESCRYPT_SBOX_12K)
82#endif
83#ifndef flags___YESCRYPT_RW
84#define flags___YESCRYPT_RW ((void)flags, YESCRYPT_RW)
85#endif
86#ifndef flags___YESCRYPT_MODE_MASK
87#define flags___YESCRYPT_MODE_MASK ((void)flags, YESCRYPT_RW)
88#endif
89// standard ("j9T") values:
90#ifndef YCTX_param_N
91#define YCTX_param_N 4096
92#endif
93#ifndef YCTX_param_r
94#define YCTX_param_r 32
95#endif
96#ifndef YCTX_param_p
97#define YCTX_param_p 1
98#endif
99#ifndef YCTX_param_t
100#define YCTX_param_t 0
101#endif
102#ifndef YCTX_param_g
103#define YCTX_param_g 0
104#endif
105#ifndef YCTX_param_NROM
106#define YCTX_param_NROM 0
107#endif
108
109// "Faster/smaller code" knobs:
110// -941 bytes:
111#define KDF_UNROLL_COPY 0
112// -5324 bytes if 0:
113#define KDF_UNROLL_PWXFORM_ROUND 0
114// -4864 bytes if 0:
115#define KDF_UNROLL_PWXFORM 0
116// if both this ^^^^^^^^^^ and PWXFORM_ROUND set to 0: -7666 bytes
117// -464 bytes:
118#define KDF_UNROLL_SALSA20 0
119
120/**
121 * Type and possible values for the flags argument of yescrypt_kdf(),
122 * yescrypt_encode_params_r(), yescrypt_encode_params(). Most of these may be
123 * OR'ed together, except that YESCRYPT_WORM stands on its own.
124 * Please refer to the description of yescrypt_kdf() below for the meaning of
125 * these flags.
126 */
127/* yescrypt flags:
128 * bits pos: 7654321076543210
129 * ss r w
130 * sbox gg y
131 */
132/* Public */
133#define YESCRYPT_WORM 1
134#define YESCRYPT_RW 0x002
135#define YESCRYPT_ROUNDS_3 0x000 //r=0
136#define YESCRYPT_ROUNDS_6 0x004 //r=1
137#define YESCRYPT_GATHER_1 0x000 //gg=00
138#define YESCRYPT_GATHER_2 0x008 //gg=01
139#define YESCRYPT_GATHER_4 0x010 //gg=10
140#define YESCRYPT_GATHER_8 0x018 //gg=11
141#define YESCRYPT_SIMPLE_1 0x000 //ss=00
142#define YESCRYPT_SIMPLE_2 0x020 //ss=01
143#define YESCRYPT_SIMPLE_4 0x040 //ss=10
144#define YESCRYPT_SIMPLE_8 0x060 //ss=11
145#define YESCRYPT_SBOX_6K 0x000 //sbox=0000
146#define YESCRYPT_SBOX_12K 0x080 //sbox=0001
147#define YESCRYPT_SBOX_24K 0x100 //sbox=0010
148#define YESCRYPT_SBOX_48K 0x180 //sbox=0011
149#define YESCRYPT_SBOX_96K 0x200 //sbox=0100
150#define YESCRYPT_SBOX_192K 0x280 //sbox=0101
151#define YESCRYPT_SBOX_384K 0x300 //sbox=0110
152#define YESCRYPT_SBOX_768K 0x380 //sbox=0111
153
154#ifdef YESCRYPT_INTERNAL
155/* Private */
156#define YESCRYPT_MODE_MASK 0x003
157#define YESCRYPT_RW_FLAVOR_MASK 0x3fc
158#define YESCRYPT_ALLOC_ONLY 0x08000000
159#define YESCRYPT_PREHASH 0x10000000
160#endif
161
162#define YESCRYPT_RW_DEFAULTS \
163 (YESCRYPT_RW | \
164 YESCRYPT_ROUNDS_6 | YESCRYPT_GATHER_4 | YESCRYPT_SIMPLE_2 | \
165 YESCRYPT_SBOX_12K)
166
167#define YESCRYPT_DEFAULTS YESCRYPT_RW_DEFAULTS
168
169#ifdef YESCRYPT_INTERNAL
170#define YESCRYPT_KNOWN_FLAGS \
171 (YESCRYPT_MODE_MASK | YESCRYPT_RW_FLAVOR_MASK | \
172 YESCRYPT_ALLOC_ONLY | YESCRYPT_PREHASH)
173#endif
174
175/* How many chars base-64 encoded bytes require? */
176#define YESCRYPT_BYTES2CHARS(bytes) ((((bytes) * 8) + 5) / 6)
177/* The /etc/passwd-style hash is "<prefix>$<hash><NUL>" */
178/*
179 * "$y$", up to 8 params of up to 6 chars each, '$', salt
180 * Alternatively, but that's smaller:
181 * "$7$", 3 params encoded as 1+5+5 chars, salt
182 */
183#define YESCRYPT_PREFIX_LEN (3 + 8 * 6 + 1 + YESCRYPT_BYTES2CHARS(32))
184
185#define YESCRYPT_HASH_SIZE 32
186#define YESCRYPT_HASH_LEN YESCRYPT_BYTES2CHARS(YESCRYPT_HASH_SIZE)
187
188/**
189 * Internal type used by the memory allocator. Please do not use it directly.
190 * Use yescrypt_shared_t and yescrypt_local_t as appropriate instead, since
191 * they might differ from each other in a future version.
192 */
193typedef struct {
194// void *base;
195 void *aligned;
196// size_t base_size;
197 size_t aligned_size;
198} yescrypt_region_t;
199
200/**
201 * yescrypt parameters combined into one struct. N, r, p are the same as in
202 * classic scrypt, except that the meaning of p changes when YESCRYPT_RW is
203 * set. flags, t, g, NROM are special to yescrypt.
204 */
205typedef struct {
206 uint32_t flags;
207 uint32_t r;
208 uint64_t N;
209#if !RESTRICTED_PARAMS
210 uint32_t p, t, g;
211 uint64_t NROM;
212#endif
213} yescrypt_params_t;
214
215typedef struct {
216 yescrypt_params_t param;
217
218 /* salt in binary form */
219 /* stored here to cut down on the amount of function paramaters */
220 unsigned char salt[64];
221 size_t saltlen;
222
223 /* used by the memory allocator */
224 //yescrypt_region_t shared[1];
225 yescrypt_region_t local[1];
226} yescrypt_ctx_t;
227
228/**
229 * yescrypt_r(shared, local, passwd, passwdlen, setting, key, buf, buflen):
230 * Compute and encode an scrypt or enhanced scrypt hash of passwd given the
231 * parameters and salt value encoded in setting. If shared is not NULL, a ROM
232 * is used and YESCRYPT_RW is required. Otherwise, whether to compute classic
233 * scrypt, YESCRYPT_WORM (a slight deviation from classic scrypt), or
234 * YESCRYPT_RW (time-memory tradeoff discouraging modification) is determined
235 * by the setting string. shared (if not NULL) and local must be initialized
236 * as described above for yescrypt_kdf(). buf must be large enough (as
237 * indicated by buflen) to hold the encoded hash string.
238 *
239 * Return the encoded hash string on success; or NULL on error.
240 *
241 * MT-safe as long as local and buf are local to the thread.
242 */
243extern char *yescrypt_r(
244 const uint8_t *passwd, size_t passwdlen,
245 const uint8_t *setting,
246 char *buf, size_t buflen
247);
diff --git a/libbb/yescrypt/y.c b/libbb/yescrypt/y.c
new file mode 100644
index 000000000..d5ab8903f
--- /dev/null
+++ b/libbb/yescrypt/y.c
@@ -0,0 +1,16 @@
1/*
2 * The compilation unit for yescrypt-related code.
3 *
4 * Copyright (C) 2025 by Denys Vlasenko <vda.linux@googlemail.com>
5 *
6 * Licensed under GPLv2, see file LICENSE in this source tree.
7 */
8//kbuild:lib-$(CONFIG_USE_BB_CRYPT_YES) += y.o
9
10#include "libbb.h"
11
12#define YESCRYPT_INTERNAL
13#include "alg-yescrypt.h"
14#include "alg-sha256.c"
15#include "alg-yescrypt-kdf.c"
16#include "alg-yescrypt-common.c"