aboutsummaryrefslogtreecommitdiff
path: root/libbb/yescrypt
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--libbb/yescrypt/Kbuild.src9
-rw-r--r--libbb/yescrypt/PARAMETERS196
-rw-r--r--libbb/yescrypt/README4
-rw-r--r--libbb/yescrypt/alg-sha256.c91
-rw-r--r--libbb/yescrypt/alg-yescrypt-common.c408
-rw-r--r--libbb/yescrypt/alg-yescrypt-kdf.c1212
-rw-r--r--libbb/yescrypt/alg-yescrypt.h247
-rw-r--r--libbb/yescrypt/y.c16
8 files changed, 2183 insertions, 0 deletions
diff --git a/libbb/yescrypt/Kbuild.src b/libbb/yescrypt/Kbuild.src
new file mode 100644
index 000000000..a61211a29
--- /dev/null
+++ b/libbb/yescrypt/Kbuild.src
@@ -0,0 +1,9 @@
1# Makefile for busybox
2#
3# Copyright (C) 2025 by Denys Vlasenko <vda.linux@googlemail.com>
4#
5# Licensed under GPLv2, see file LICENSE in this source tree.
6
7lib-y:=
8
9INSERT
diff --git a/libbb/yescrypt/PARAMETERS b/libbb/yescrypt/PARAMETERS
new file mode 100644
index 000000000..d9f5d24e6
--- /dev/null
+++ b/libbb/yescrypt/PARAMETERS
@@ -0,0 +1,196 @@
1 Optimal yescrypt configuration.
2
3yescrypt is very flexible, but configuring it optimally is complicated.
4Here are some guidelines to simplify near-optimal configuration. We
5start by listing the parameters and their typical values, and then give
6currently recommended parameter sets by use case.
7
8
9 Parameters and their typical values.
10
11Set flags (yescrypt flavor) to YESCRYPT_DEFAULTS to use the currently
12recommended flavor. (Other flags values exist for compatibility and for
13specialized cases where you think you know what you're doing.)
14
15Set N (block count) based on target memory usage and running time, as
16well as on the value of r (block size in 128 byte units). N must be a
17power of two.
18
19Set r (block size) to 8 (so that N is in KiB, which is convenient) or to
20another small value (if more optimal or for fine-tuning of the total
21size and/or running time). Reasonable values for r are from 8 to 96.
22
23Set p (parallelism) to 1 meaning no thread-level parallelism within one
24computation of yescrypt. (Use of thread-level parallelism within
25yescrypt makes sense for ROM initialization and for key derivation at
26high memory usage, but usually not for password hashing where
27parallelism is available through concurrent authentication attempts.
28Don't use p > 1 unnecessarily.)
29
30Set t (time) to 0 to use the optimal running time for a given memory
31usage. This will allow you to maximize the memory usage (the value of
32N*r) while staying within your running time constraints. (Non-zero t
33makes sense in specialized cases where you can't afford higher memory
34usage but can afford more time.)
35
36Set g (upgrades) to 0 because there have been no hash upgrades yet.
37
38Set NROM (block count of ROM) to 0 unless you use a ROM (see below).
39NROM must be a power of two.
40
41
42 Password hashing for user authentication, no ROM.
43
44Small and fast (memory usage 2 MiB, performance like bcrypt cost 2^5 -
45latency 2-3 ms and throughput 10,000+ per second on a 16-core server):
46
47flags = YESCRYPT_DEFAULTS, N = 2048, r = 8, p = 1, t = 0, g = 0, NROM = 0
48
49Large and slow (memory usage 16 MiB, performance like bcrypt cost 2^8 -
50latency 10-30 ms and throughput 1000+ per second on a 16-core server):
51
52flags = YESCRYPT_DEFAULTS, N = 4096, r = 32, p = 1, t = 0, g = 0, NROM = 0
53
54Of course, even heavier and slower settings are possible, if affordable.
55Simply double the value of N as many times as needed. Since N must be a
56power of two, you may use r (in the range of 8 to 32) or/and t (in the
57range of 0 to 2) for fine-tuning the running time, but first bring N to
58the maximum you can afford. If this feels too complicated, just use one
59of the two parameter sets given above (preferably the second) as-is.
60
61
62 Password hashing for user authentication, with ROM.
63
64It's similar to the above, except that you need to adjust r, set NROM,
65and initialize the ROM.
66
67First decide on a ROM size, such as making it a large portion of your
68dedicated authentication servers' RAM sizes. Since NROM (block count)
69must be a power of two, you might need to choose r (block size) based on
70how your desired ROM size corresponds to a power of two. Also tuning
71for performance on current hardware, you'll likely end up with r in the
72range from slightly below 16 to 32. For example, to use 15/16 of a
73server's 256 GiB RAM as ROM (thus, making it 240 GiB), you could use
74r=15 or r=30. To use 23/24 of a server's 384 GiB RAM as ROM (thus,
75making it 368 GiB), you'd use r=23. Then set NROM to your desired ROM
76size in KiB divided by 128*r. Note that these examples might (or might
77not) be too extreme, leaving little memory for the rest of the system.
78You could as well opt for 7/8 with r=14 or 11/12 with r=11 or r=22.
79
80Note that higher r may make placing of ROM in e.g. NVMe flash memory
81instead of in RAM more reasonable (or less unreasonable) than it would
82have been with a lower r. If this is a concern as it relates to
83possible attacks and you do not intend to ever do it defensively, you
84might want to keep r lower (e.g., prefer r=15 over r=30 in the example
85above, even if 30 performs slightly faster).
86
87Your adjustments to r, if you deviate from powers of two, will also
88result in weirder memory usage per hash. Like 1.75 MiB at r=14 instead
89of 2 MiB at r=8 that you would have used without a ROM. That's OK.
90
91For ROM initialization, which you do with yescrypt_init_shared(), use
92the same r and NROM that you'd later use for password hashing, choose p
93based on your servers' physical and/or logical CPU count (maybe
94considering eventual upgrades as you won't be able to change this later,
95but without going unnecessarily high - e.g., p=28, p=56, or p=112 make
96sense on servers that currently have 28 physical / 56 logical CPUs), and
97set the rest of the parameters to:
98
99flags = YESCRYPT_DEFAULTS, N = 0, t = 0, g = 0
100
101N is set to 0 because it isn't relevant during ROM initialization (you
102can use different values of N for hashing passwords with the same ROM).
103
104To keep the ROM in e.g. SysV shared memory and reuse it across your
105authentication service restarts, you'd need to allocate the memory and
106set the flags to "YESCRYPT_DEFAULTS | YESCRYPT_SHARED_PREALLOCATED".
107
108For actual password hashing, you'd use your chosen values for N, r,
109NROM, and set the rest of the parameters to:
110
111flags = YESCRYPT_DEFAULTS, p = 1, t = 0, g = 0
112
113Note that although you'd use a large p for ROM initialization, you
114should use p=1 for actual password hashing like you would without a ROM.
115
116Do not forget to pass the ROM into the actual password hashing (and keep
117r and NROM set accordingly).
118
119Since N must be a power of two and r is dependent on ROM size, you may
120use t (in the range of 0 to 2) for fine-tuning the running time, but
121first bring N to the maximum you can afford.
122
123If this feels too complicated, or even if it doesn't, please consider
124engaging Openwall for your yescrypt deployment. We'd be happy to help.
125
126
127 Password-based key derivation.
128
129(Or rather passphrase-based.)
130
131Use settings similar to those for password hashing without a ROM, but
132adjusted for higher memory usage and running time, and optionally with
133thread-level parallelism.
134
135Small and fast (memory usage 128 MiB, running time under 100 ms on a
136fast desktop):
137
138flags = YESCRYPT_DEFAULTS, N = 32768, r = 32, p = 1, t = 0, g = 0, NROM = 0
139
140Large and fast (memory usage 1 GiB, running time under 200 ms on a fast
141quad-core desktop not including memory allocation overhead, under 250 ms
142with the overhead included), but requires build with OpenMP support (or
143otherwise will run as slow as yet be weaker than its p=1 alternative):
144
145flags = YESCRYPT_DEFAULTS, N = 262144, r = 32, p = 4, t = 0, g = 0, NROM = 0
146
147Large and slower (memory usage 1 GiB, running time under 300 ms on a
148fast quad-core desktop not including memory allocation overhead, under
149350 ms with the overhead included), also requires build with OpenMP
150support (or otherwise will run slower than the p=1 alternative below):
151
152flags = YESCRYPT_DEFAULTS, N = 262144, r = 32, p = 4, t = 2, g = 0, NROM = 0
153
154Large and slow (memory usage 1 GiB, running time under 600 ms on a fast
155desktop not including memory allocation overhead, under 650 ms with the
156overhead included):
157
158flags = YESCRYPT_DEFAULTS, N = 262144, r = 32, p = 1, t = 0, g = 0, NROM = 0
159
160Just like with password hashing, even heavier and slower settings are
161possible, if affordable, and you achieve them by adjusting N, r, t in
162the same way and in the same preferred ranges (please see the section on
163password hashing without a ROM, above). Unlike with password hashing,
164it makes some sense to go above t=2 if you expect that your users might
165not be able to afford more memory but can afford more time. However,
166increasing the memory usage provides better protection, and we don't
167recommend forcing your users to wait for more than 1 second as they
168could as well type more characters in that time. If this feels too
169complicated, just use one of the above parameter sets as-is.
170
171
172 Amortization of memory allocation overhead.
173
174It takes a significant fraction of yescrypt's total running time to
175allocate memory from the operating system, especially considering that
176the kernel zeroizes the memory before handing it over to your program.
177
178Unless you naturally need to compute yescrypt just once per process, you
179may achieve greater efficiency by fully using advanced yescrypt APIs
180that let you preserve and reuse the memory allocation across yescrypt
181invocations. This is done by reusing the structure pointed to by the
182"yescrypt_local_t *local" argument of yescrypt_r() or yescrypt_kdf()
183without calling yescrypt_free_local() inbetween the repeated invocations
184of yescrypt.
185
186
187 YESCRYPT_DEFAULTS macro.
188
189Please note that the value of the YESCRYPT_DEFAULTS macro might change
190later, so if you use the macro like it's recommended here then for
191results reproducible across versions you might need to store its value
192somewhere along with the hashes or the encrypted data.
193
194If you use yescrypt's standard hash string encoding, then yescrypt
195already encodes and decodes this value for you, so you don't need to
196worry about this.
diff --git a/libbb/yescrypt/README b/libbb/yescrypt/README
new file mode 100644
index 000000000..c1011c56a
--- /dev/null
+++ b/libbb/yescrypt/README
@@ -0,0 +1,4 @@
1The yescrypt code in this directory is adapted from libxcrypt-4.4.38
2with minimal edits, hopefully making it easier to track
3backports by resetting the tree to the commit which created this file,
4then comparing changes in upstream libxcrypt to the tree.
diff --git a/libbb/yescrypt/alg-sha256.c b/libbb/yescrypt/alg-sha256.c
new file mode 100644
index 000000000..dc748c968
--- /dev/null
+++ b/libbb/yescrypt/alg-sha256.c
@@ -0,0 +1,91 @@
1/*-
2 * Copyright 2005-2016 Colin Percival
3 * Copyright 2016-2018,2021 Alexander Peslyak
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28/**
29 * PBKDF2_SHA256(passwd, passwdlen, salt, saltlen, c, buf, dkLen):
30 * Compute PBKDF2(passwd, salt, c, dkLen) using HMAC-SHA256 as the PRF, and
31 * write the output to buf. The value dkLen must be at most 32 * (2^32 - 1).
32 */
33static void
34PBKDF2_SHA256(const uint8_t *passwd, size_t passwdlen,
35 const uint8_t *salt, size_t saltlen,
36 uint64_t c, uint8_t *buf, size_t dkLen)
37{
38 hmac_ctx_t Phctx, PShctx;
39 uint32_t i;
40
41 /* Compute HMAC state after processing P. */
42 hmac_begin(&Phctx, passwd, passwdlen, sha256_begin);
43
44 /* Compute HMAC state after processing P and S. */
45 PShctx = Phctx;
46 hmac_hash(&PShctx, salt, saltlen);
47
48 /* Iterate through the blocks. */
49 for (i = 0; dkLen != 0; ) {
50 long U[32 / sizeof(long)];
51 long T[32 / sizeof(long)];
52// Do not make these ^^ uint64_t[]. Keep them long[].
53// Even though the XORing loop below is optimized out,
54// gcc is not smart enough to realize that 64-bit alignment of the stack
55// is no longer useful, and generates ~50 more bytes of code on i386...
56 uint32_t ivec;
57 size_t clen;
58 int k;
59
60 /* Generate INT(i). */
61 i++;
62 ivec = SWAP_BE32(i);
63
64 /* Compute U_1 = PRF(P, S || INT(i)). */
65 hmac_peek_hash(&PShctx, (void*)T, &ivec, 4, NULL);
66//TODO: the above is a vararg function, might incur some ABI pain
67//does libbb need a non-vararg version with just one (buf,len)?
68
69 if (c > 1) {
70//in yescrypt, c is always 1, so this if() branch is optimized out
71 uint64_t j;
72 /* T_i = U_1 ... */
73 memcpy(U, T, 32);
74 for (j = 2; j <= c; j++) {
75 /* Compute U_j. */
76 hmac_peek_hash(&Phctx, (void*)U, U, 32, NULL);
77 /* ... xor U_j ... */
78 for (k = 0; k < 32 / sizeof(long); k++)
79 T[k] ^= U[k];
80 //TODO: xorbuf32_aligned_long(T, U);
81 }
82 }
83
84 /* Copy as many bytes as necessary into buf. */
85 clen = dkLen;
86 if (clen > 32)
87 clen = 32;
88 buf = mempcpy(buf, T, clen);
89 dkLen -= clen;
90 }
91}
diff --git a/libbb/yescrypt/alg-yescrypt-common.c b/libbb/yescrypt/alg-yescrypt-common.c
new file mode 100644
index 000000000..c51823787
--- /dev/null
+++ b/libbb/yescrypt/alg-yescrypt-common.c
@@ -0,0 +1,408 @@
1/*-
2 * Copyright 2013-2018 Alexander Peslyak
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted.
7 *
8 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
9 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
10 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
11 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
12 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
13 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
14 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
15 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
16 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
17 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
18 * SUCH DAMAGE.
19 */
20
21#if RESTRICTED_PARAMS
22
23#define decode64_uint32(dst, src, min) \
24({ \
25 uint32_t d32 = a2i64(*(src)); \
26 if (d32 > 47) \
27 goto fail; \
28 *(dst) = d32 + (min); \
29 ++src; \
30})
31#define test_decode64_uint32() ((void)0)
32#define FULL_PARAMS(...)
33
34#else
35
36#define FULL_PARAMS(...) __VA_ARGS__
37
38/* Not inlining:
39 * de/encode64 functions are only used to read
40 * yescrypt_params_t field, and convert salt to binary -
41 * both of these are negligible compared to main hashing operation
42 */
43static NOINLINE const uint8_t *decode64_uint32(
44 uint32_t *dst,
45 const uint8_t *src, uint32_t val)
46{
47 uint32_t start = 0, end = 47, bits = 0;
48 uint32_t c;
49
50 if (!src) /* previous decode failed already? */
51 goto fail;
52
53 c = a2i64(*src++);
54 if (c > 63)
55 goto fail;
56
57// The encoding of number N:
58// start = 0 end = 47
59// If N < 48, it is encoded verbatim, else
60// N -= 48
61// start = end+1 = 48
62// end += (64-end)/2 = 55
63// If N < (end+1-start)<<6 = 8<<6, it is encoded as 48+(N>>6)|low6bits (that is, 48...55|<6bit>), else
64// N -= 8<<6
65// start = end+1 = 56
66// end += (64-end)/2 = 59
67// If N < (end+1-start)<<2*6 = 4<<12, it is encoded as 56+(N>>2*6)|low12bits (that is, 56...59|<6bit>|<6bit>), else
68// ...same for 60..61|<6bit>|<6bit>|<6bit>
69// .......same for 62|<6bit>|<6bit>|<6bit>|<6bit>
70// .......same for 63|<6bit>|<6bit>|<6bit>|<6bit>|<6bit>
71 dbg_dec64("c:%d val:0x%08x", (int)c, (unsigned)val);
72 while (c > end) {
73 dbg_dec64("c:%d > end:%d", (int)c, (int)end);
74 val += (end + 1 - start) << bits;
75 dbg_dec64("val+=0x%08x", (int)((end + 1 - start) << bits));
76 dbg_dec64(" val:0x%08x", (unsigned)val);
77 start = end + 1;
78 end += (64 - end) / 2;
79 bits += 6;
80 dbg_dec64("start=%d", (int)start);
81 dbg_dec64("end=%d", (int)end);
82 dbg_dec64("bits=%d", (int)bits);
83 }
84
85 val += (c - start) << bits;
86 dbg_dec64("final val+=0x%08x", (int)((c - start) << bits));
87 dbg_dec64(" val:0x%08x", (unsigned)val);
88
89 while (bits != 0) {
90 c = a2i64(*src++);
91 if (c > 63)
92 goto fail;
93 bits -= 6;
94 val += c << bits;
95 dbg_dec64("low bits val+=0x%08x", (int)(c << bits));
96 dbg_dec64(" val:0x%08x", (unsigned)val);
97 }
98 ret:
99 *dst = val;
100 return src;
101 fail:
102 val = 0;
103 src = NULL;
104 goto ret;
105}
106
107#if TEST_DECODE64
108static void test_decode64_uint32(void)
109{
110 const uint8_t *src, *end;
111 uint32_t u32;
112 int a = 48;
113 int b = 8<<6; // 0x0200
114 int c = 4<<12; // 0x04000
115 int d = 2<<18; // 0x080000
116 int e = 1<<24; // 0x1000000
117
118 src = (void*)"wzzz";
119 end = decode64_uint32(&u32, src, 0);
120 if (u32 != 0x0003ffff+c+b+a) bb_error_msg_and_die("Incorrect decode '%s':0x%08x", src, (unsigned)u32);
121 if (end != src + 4) bb_error_msg_and_die("Incorrect decode '%s': %p end:%p", src, src, end);
122 src = (void*)"xzzz";
123 end = decode64_uint32(&u32, src, 0);
124 if (u32 != 0x0007ffff+c+b+a) bb_error_msg_and_die("Incorrect decode '%s':0x%08x", src, (unsigned)u32);
125 if (end != src + 4) bb_error_msg_and_die("Incorrect decode '%s': %p end:%p", src, src, end);
126 // Note how the last representable "x---" encoding, 0x7ffff, is exactly d-1!
127 // And if we now increment it, we get:
128 src = (void*)"y....";
129 end = decode64_uint32(&u32, src, 0);
130 if (u32 != 0x00000000+d+c+b+a) bb_error_msg_and_die("Incorrect decode '%s':0x%08x", src, (unsigned)u32);
131 if (end != src + 5) bb_error_msg_and_die("Incorrect decode '%s': %p end:%p", src, src, end);
132 src = (void*)"yzzzz";
133 end = decode64_uint32(&u32, src, 0);
134 if (u32 != 0x00ffffff+d+c+b+a) bb_error_msg_and_die("Incorrect decode '%s':0x%08x", src, (unsigned)u32);
135 if (end != src + 5) bb_error_msg_and_die("Incorrect decode '%s': %p end:%p", src, src, end);
136
137 src = (void*)"zzzzzz";
138 end = decode64_uint32(&u32, src, 0);
139 if (u32 != 0x3fffffff+e+d+c+b+a) bb_error_msg_and_die("Incorrect decode '%s':0x%08x", src, (unsigned)u32);
140 if (end != src + 6) bb_error_msg_and_die("Incorrect decode '%s': %p end:%p", src, src, end);
141
142 bb_error_msg("test_decode64_uint32() OK");
143}
144#else
145# define test_decode64_uint32() ((void)0)
146#endif
147
148#endif /* !RESTRICTED_PARAMS */
149
150#if 1
151static const uint8_t *decode64(
152 uint8_t *dst, size_t *dstlen,
153 const uint8_t *src)
154{
155 unsigned dstpos = 0;
156
157 dbg_dec64("src:'%s'", src);
158 for (;;) {
159 uint32_t c, value = 0;
160 int bits = 0;
161 while (*src != '\0' && *src != '$') {
162 c = a2i64(*src);
163 if (c > 63) { /* bad ascii64 char, stop decoding at it */
164 break;
165 }
166 src++;
167 value |= c << bits;
168 bits += 6;
169 if (bits == 24) /* got 4 chars */
170 goto store;
171 }
172 /* we read entire src, or met a non-ascii64 char (such as "$") */
173 if (bits == 0)
174 break;
175 /* else: we got last, partial bit block - store it */
176 store:
177 dbg_dec64(" storing bits:%d dstpos:%u v:%08x", bits, dstpos, (int)SWAP_BE32(value)); //BE to see lsb first
178 for (;;) {
179 if ((*src == '\0' || *src == '$')
180 && value == 0 && bits < 8
181 ) {
182 /* Example: mkpasswd PWD '$y$j9T$123':
183 * the "123" is bits:18 value:03,51,00
184 * is considered to be 2 bytes, not 3!
185 *
186 * '$y$j9T$zzz' in upstream fails outright (3rd byte isn't zero).
187 * IOW: for upstream, validity of salt depends on VALUE,
188 * not just size of salt. Which is a bug.
189 * The '$y$j9T$zzz.' salt is the same
190 * (it adds 6 zero msbits) but upstream works with it,
191 * thus '$y$j9T$zzz' should work too and give the same result.
192 */
193 goto end;
194 }
195 if (dstpos >= *dstlen) {
196 dbg_dec64(" ERR: bits:%d dstpos:%u dst[] is too small", bits, dstpos);
197 goto fail;
198 }
199 *dst++ = value;
200 dstpos++;
201 value >>= 8;
202 bits -= 8;
203 if (bits <= 0) /* can get negative, if we e.g. had 6 bits */
204 break;
205 }
206 if (*src == '\0' || *src == '$')
207 break;
208 }
209 end:
210 *dstlen = dstpos;
211 dbg_dec64("dec64: OK, dst[%d]", (int)dstpos);
212 return src;
213 fail:
214 /* *dstlen = 0; - not needed, caller detects error by seeing NULL */
215 return NULL;
216}
217#else
218/* Buggy (and larger) original code */
219static const uint8_t *decode64(
220 uint8_t *dst, size_t *dstlen,
221 const uint8_t *src, size_t srclen)
222{
223 size_t dstpos = 0;
224
225 while (dstpos <= *dstlen && srclen) {
226 uint32_t value = 0, bits = 0;
227 while (srclen--) {
228 uint32_t c = a2i64(*src);
229 if (c > 63) {
230 srclen = 0;
231 break;
232 }
233 src++;
234 value |= c << bits;
235 bits += 6;
236 if (bits >= 24)
237 break;
238 }
239 if (!bits)
240 break;
241 if (bits < 12) /* must have at least one full byte */
242 goto fail;
243 dbg_dec64(" storing bits:%d v:%08x", (int)bits, (int)SWAP_BE32(value)); //BE to see lsb first
244 while (dstpos++ < *dstlen) {
245 *dst++ = value;
246 value >>= 8;
247 bits -= 8;
248 if (bits < 8) { /* 2 or 4 */
249 if (value) /* must be 0 */
250 goto fail;
251 bits = 0;
252 break;
253 }
254 }
255 if (bits)
256 goto fail;
257 }
258
259 if (!srclen && dstpos <= *dstlen) {
260 *dstlen = dstpos;
261 dbg_dec64("dec64: OK, dst[%d]", (int)dstpos);
262 return src;
263 }
264 fail:
265 /* *dstlen = 0; - not needed, caller detects error by seeing NULL */
266 return NULL;
267}
268#endif
269
270static char *encode64(
271 char *dst, size_t dstlen,
272 const uint8_t *src, size_t srclen)
273{
274 while (srclen) {
275 uint32_t value = 0, b = 0;
276 do {
277 value |= (uint32_t)(*src++ << b);
278 b += 8;
279 srclen--;
280 } while (srclen && b < 24);
281
282 b >>= 3; /* number of bits to number of bytes */
283 b++; /* 1, 2 or 3 bytes will become 2, 3 or 4 ascii64 chars */
284 dstlen -= b;
285 if ((ssize_t)dstlen <= 0)
286 return NULL;
287 dst = num2str64_lsb_first(dst, value, b);
288 }
289 *dst = '\0';
290 return dst;
291}
292
293char *yescrypt_r(
294 const uint8_t *passwd, size_t passwdlen,
295 const uint8_t *setting,
296 char *buf, size_t buflen)
297{
298 struct {
299 yescrypt_ctx_t yctx[1];
300 unsigned char hashbin32[32];
301 } u;
302#define yctx u.yctx
303#define hashbin32 u.hashbin32
304 char *dst;
305 const uint8_t *src, *saltend;
306 size_t need, prefixlen;
307 uint32_t u32;
308
309 test_decode64_uint32();
310
311 memset(yctx, 0, sizeof(yctx));
312 FULL_PARAMS(yctx->param.p = 1;)
313
314 /* we assume setting starts with "$y$" (caller must ensure this) */
315 src = setting + 3;
316
317 src = decode64_uint32(&yctx->param.flags, src, 0);
318 /* "j9T" returns: 0x2f */
319 //if (!src)
320 // goto fail;
321
322 if (yctx->param.flags < YESCRYPT_RW) {
323 dbg("yctx->param.flags=0x%x", (unsigned)yctx->param.flags);
324 goto fail; // bbox: we don't support scrypt - only yescrypt
325 } else if (yctx->param.flags <= YESCRYPT_RW + (YESCRYPT_RW_FLAVOR_MASK >> 2)) {
326 /* "j9T" sets flags to 0xb6 */
327 yctx->param.flags = YESCRYPT_RW + ((yctx->param.flags - YESCRYPT_RW) << 2);
328 dbg("yctx->param.flags=0x%x", (unsigned)yctx->param.flags);
329 dbg(" YESCRYPT_RW:%u", !!(yctx->param.flags & YESCRYPT_RW));
330 dbg((yctx->param.flags & YESCRYPT_RW_FLAVOR_MASK) ==
331 (YESCRYPT_ROUNDS_6 | YESCRYPT_GATHER_4 | YESCRYPT_SIMPLE_2 | YESCRYPT_SBOX_12K)
332 ? " YESCRYPT_ROUNDS_6 | YESCRYPT_GATHER_4 | YESCRYPT_SIMPLE_2 | YESCRYPT_SBOX_12K"
333 : " flags are not standard"
334 );
335 } else {
336 goto fail;
337 }
338
339 src = decode64_uint32(&u32, src, 1);
340 if (/*!src ||*/ u32 > 63)
341 goto fail;
342 yctx->param.N = (uint64_t)1 << u32;
343 /* "j9T" sets to 4096 (1<<12) */
344 dbg("yctx->param.N=%llu (1<<%u)", (unsigned long long)yctx->param.N, (unsigned)u32);
345
346 src = decode64_uint32(&yctx->param.r, src, 1);
347 /* "j9T" sets to 32 */
348 dbg("yctx->param.r=%u", yctx->param.r);
349
350 if (!src)
351 goto fail;
352 if (*src != '$') {
353#if RESTRICTED_PARAMS
354 goto fail;
355#else
356 src = decode64_uint32(&u32, src, 1);
357 dbg("yescrypt has extended params:0x%x", (unsigned)u32);
358 if (u32 & 1)
359 src = decode64_uint32(&yctx->param.p, src, 2);
360 if (u32 & 2)
361 src = decode64_uint32(&yctx->param.t, src, 1);
362 if (u32 & 4)
363 src = decode64_uint32(&yctx->param.g, src, 1);
364 if (u32 & 8) {
365 src = decode64_uint32(&u32, src, 1);
366 if (/*!src ||*/ u32 > 63)
367 goto fail;
368 yctx->param.NROM = (uint64_t)1 << u32;
369 }
370 if (!src)
371 goto fail;
372 if (*src != '$')
373 goto fail;
374#endif
375 }
376
377 yctx->saltlen = sizeof(yctx->salt);
378 src++; /* now points to salt */
379 saltend = decode64(yctx->salt, &yctx->saltlen, src);
380 if (!saltend || (*saltend != '\0' && *saltend != '$'))
381 goto fail; /* salt[] is too small, or bad char during decode */
382 dbg_dec64("salt is %d ascii64 chars -> %d bytes (in binary)", (int)(saltend - src), (int)yctx->saltlen);
383
384 prefixlen = saltend - setting;
385 need = prefixlen + 1 + YESCRYPT_HASH_LEN + 1;
386 if (need > buflen /*overflow is quite unlikely: || need < prefixlen*/)
387 goto fail;
388
389 if (yescrypt_kdf32(yctx, passwd, passwdlen, hashbin32)) {
390 dbg("error in yescrypt_kdf32");
391 goto fail;
392 }
393
394 dst = mempcpy(buf, setting, prefixlen);
395 *dst++ = '$';
396 dst = encode64(dst, buflen - (dst - buf), hashbin32, sizeof(hashbin32));
397 if (!dst)
398 goto fail;
399 ret:
400 free_region(yctx->local);
401 explicit_bzero(&u, sizeof(u));
402 return buf;
403 fail:
404 buf = NULL;
405 goto ret;
406#undef yctx
407#undef hashbin32
408}
diff --git a/libbb/yescrypt/alg-yescrypt-kdf.c b/libbb/yescrypt/alg-yescrypt-kdf.c
new file mode 100644
index 000000000..a9a1bd591
--- /dev/null
+++ b/libbb/yescrypt/alg-yescrypt-kdf.c
@@ -0,0 +1,1212 @@
1/*-
2 * Copyright 2009 Colin Percival
3 * Copyright 2012-2018 Alexander Peslyak
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 *
27 * This file was originally written by Colin Percival as part of the Tarsnap
28 * online backup system.
29 */
30
31#if __STDC_VERSION__ >= 199901L
32/* Have restrict */
33#elif defined(__GNUC__)
34#define restrict __restrict
35#else
36#define restrict
37#endif
38
39#ifdef __GNUC__
40#define unlikely(exp) __builtin_expect(exp, 0)
41#else
42#define unlikely(exp) (exp)
43#endif
44
45typedef union {
46 uint32_t w[16];
47 uint64_t d[8];
48} salsa20_blk_t;
49
50static void salsa20_simd_shuffle(
51 const salsa20_blk_t *Bin,
52 salsa20_blk_t *Bout)
53{
54#define COMBINE(out, in1, in2) \
55do { \
56 Bout->d[out] = Bin->w[in1 * 2] | ((uint64_t)Bin->w[in2 * 2 + 1] << 32); \
57} while (0)
58 COMBINE(0, 0, 2);
59 COMBINE(1, 5, 7);
60 COMBINE(2, 2, 4);
61 COMBINE(3, 7, 1);
62 COMBINE(4, 4, 6);
63 COMBINE(5, 1, 3);
64 COMBINE(6, 6, 0);
65 COMBINE(7, 3, 5);
66#undef COMBINE
67}
68
69static void salsa20_simd_unshuffle(
70 const salsa20_blk_t *Bin,
71 salsa20_blk_t *Bout)
72{
73#define UNCOMBINE(out, in1, in2) \
74do { \
75 Bout->w[out * 2] = Bin->d[in1]; \
76 Bout->w[out * 2 + 1] = Bin->d[in2] >> 32; \
77} while (0)
78 UNCOMBINE(0, 0, 6);
79 UNCOMBINE(1, 5, 3);
80 UNCOMBINE(2, 2, 0);
81 UNCOMBINE(3, 7, 5);
82 UNCOMBINE(4, 4, 2);
83 UNCOMBINE(5, 1, 7);
84 UNCOMBINE(6, 6, 4);
85 UNCOMBINE(7, 3, 1);
86#undef UNCOMBINE
87}
88
89#define DECL_X \
90 salsa20_blk_t X
91#define DECL_Y \
92 salsa20_blk_t Y
93
94#if KDF_UNROLL_COPY
95#define COPY(out, in) \
96do { \
97 (out).d[0] = (in).d[0]; \
98 (out).d[1] = (in).d[1]; \
99 (out).d[2] = (in).d[2]; \
100 (out).d[3] = (in).d[3]; \
101 (out).d[4] = (in).d[4]; \
102 (out).d[5] = (in).d[5]; \
103 (out).d[6] = (in).d[6]; \
104 (out).d[7] = (in).d[7]; \
105} while (0)
106#else
107#define COPY(out, in) \
108do { \
109 memcpy((out).d, (in).d, sizeof((in).d)); \
110} while (0)
111#endif
112
113#define READ_X(in) COPY(X, in)
114#define WRITE_X(out) COPY(out, X)
115
116/**
117 * salsa20(B):
118 * Apply the Salsa20 core to the provided block.
119 */
120static void salsa20(salsa20_blk_t *restrict B,
121 salsa20_blk_t *restrict Bout,
122 uint32_t doublerounds)
123{
124 salsa20_blk_t X;
125#define x X.w
126
127 salsa20_simd_unshuffle(B, &X);
128
129 do {
130#define R(a,b) (((a) << (b)) | ((a) >> (32 - (b))))
131 /* Operate on columns */
132#if KDF_UNROLL_SALSA20
133 x[ 4] ^= R(x[ 0]+x[12], 7); // x[j] ^= R(x[k]+x[l], CONST)
134 x[ 8] ^= R(x[ 4]+x[ 0], 9);
135 x[12] ^= R(x[ 8]+x[ 4],13);
136 x[ 0] ^= R(x[12]+x[ 8],18);
137
138 x[ 9] ^= R(x[ 5]+x[ 1], 7);
139 x[13] ^= R(x[ 9]+x[ 5], 9);
140 x[ 1] ^= R(x[13]+x[ 9],13);
141 x[ 5] ^= R(x[ 1]+x[13],18);
142
143 x[14] ^= R(x[10]+x[ 6], 7);
144 x[ 2] ^= R(x[14]+x[10], 9);
145 x[ 6] ^= R(x[ 2]+x[14],13);
146 x[10] ^= R(x[ 6]+x[ 2],18);
147
148 x[ 3] ^= R(x[15]+x[11], 7);
149 x[ 7] ^= R(x[ 3]+x[15], 9);
150 x[11] ^= R(x[ 7]+x[ 3],13);
151 x[15] ^= R(x[11]+x[ 7],18);
152#else
153 {
154 unsigned j, k, l;
155 j = 4; k = 0; l = 12;
156 for (;;) {
157 uint32_t t;
158 x[j] ^= ({ t = x[k] + x[l]; R(t, 7); }); l = k; k = j; j = (j+4) & 0xf;
159 x[j] ^= ({ t = x[k] + x[l]; R(t, 9); }); l = k; k = j; j = (j+4) & 0xf;
160 x[j] ^= ({ t = x[k] + x[l]; R(t,13); }); l = k; k = j; j = (j+4) & 0xf;
161 x[j] ^= ({ t = x[k] + x[l]; R(t,18); });
162 if (j == 15) break;
163 l = j + 1; k = j + 5; j = (j+9) & 0xf;
164 }
165 }
166#endif
167 /* Operate on rows */
168#if KDF_UNROLL_SALSA20
169// i=0 n=0
170 x[ 1] ^= R(x[ 0]+x[ 3], 7); // [i + (n+1)&3] [i + (n+0)&3] [i + (n+3)&3]
171 x[ 2] ^= R(x[ 1]+x[ 0], 9); // [i + (n+2)&3] [i + (n+1)&3] [i + (n+0)&3]
172 x[ 3] ^= R(x[ 2]+x[ 1],13); // [i + (n+3)&3] [i + (n+2)&3] [i + (n+1)&3]
173 x[ 0] ^= R(x[ 3]+x[ 2],18); // [i + (n+0)&3] [i + (n+3)&3] [i + (n+2)&3]
174// i=4 n=1 ^^^j^^^ ^^^k^^^ ^^^l^^^
175 x[ 6] ^= R(x[ 5]+x[ 4], 7); // [i + (n+1)&3] [i + (n+0)&3] [i + (n+3)&3]
176 x[ 7] ^= R(x[ 6]+x[ 5], 9); // [i + (n+2)&3] [i + (n+1)&3] [i + (n+0)&3]
177 x[ 4] ^= R(x[ 7]+x[ 6],13); // [i + (n+3)&3] [i + (n+2)&3] [i + (n+1)&3]
178 x[ 5] ^= R(x[ 4]+x[ 7],18); // [i + (n+0)&3] [i + (n+3)&3] [i + (n+2)&3]
179// i=8 n=2
180 x[11] ^= R(x[10]+x[ 9], 7); // [i + (n+1)&3] [i + (n+0)&3] [i + (n+3)&3]
181 x[ 8] ^= R(x[11]+x[10], 9); // [i + (n+2)&3] [i + (n+1)&3] [i + (n+0)&3]
182 x[ 9] ^= R(x[ 8]+x[11],13); // [i + (n+3)&3] [i + (n+2)&3] [i + (n+1)&3]
183 x[10] ^= R(x[ 9]+x[ 8],18); // [i + (n+0)&3] [i + (n+3)&3] [i + (n+2)&3]
184// i=12 n=3
185 x[12] ^= R(x[15]+x[14], 7); // [i + (n+1)&3] [i + (n+0)&3] [i + (n+3)&3]
186 x[13] ^= R(x[12]+x[15], 9); // [i + (n+2)&3] [i + (n+1)&3] [i + (n+0)&3]
187 x[14] ^= R(x[13]+x[12],13); // [i + (n+3)&3] [i + (n+2)&3] [i + (n+1)&3]
188 x[15] ^= R(x[14]+x[13],18); // [i + (n+0)&3] [i + (n+3)&3] [i + (n+2)&3]
189#else
190 {
191 unsigned j, k, l;
192 uint32_t *xrow;
193 j = 1; k = 0; l = 3;
194 xrow = &x[0];
195 for (;;) {
196 uint32_t t;
197 xrow[j] ^= ({ t = xrow[k] + xrow[l]; R(t, 7); }); l = k; k = j; j = (j+1) & 3;
198 xrow[j] ^= ({ t = xrow[k] + xrow[l]; R(t, 9); }); l = k; k = j; j = (j+1) & 3;
199 xrow[j] ^= ({ t = xrow[k] + xrow[l]; R(t,13); }); l = k; k = j; j = (j+1) & 3;
200 xrow[j] ^= ({ t = xrow[k] + xrow[l]; R(t,18); });
201 if (j == 3) break;
202 l = j; k = j + 1; j = (j+2) & 3;
203 xrow += 4;
204 }
205 }
206#endif
207
208#undef R
209 } while (--doublerounds);
210#undef x
211
212 {
213 uint32_t i;
214 salsa20_simd_shuffle(&X, Bout);
215 for (i = 0; i < 16; i++) {
216 // bbox: note: was unrolled x4
217 B->w[i] = Bout->w[i] += B->w[i];
218 }
219 }
220#if 0
221 /* Too expensive */
222 explicit_bzero(&X, sizeof(X));
223#endif
224}
225
226/**
227 * Apply the Salsa20/2 core to the block provided in X.
228 */
229#define SALSA20_2(out) \
230 salsa20(&X, &out, 1)
231
232#if 0
233#define XOR(out, in1, in2) \
234do { \
235 (out).d[0] = (in1).d[0] ^ (in2).d[0]; \
236 (out).d[1] = (in1).d[1] ^ (in2).d[1]; \
237 (out).d[2] = (in1).d[2] ^ (in2).d[2]; \
238 (out).d[3] = (in1).d[3] ^ (in2).d[3]; \
239 (out).d[4] = (in1).d[4] ^ (in2).d[4]; \
240 (out).d[5] = (in1).d[5] ^ (in2).d[5]; \
241 (out).d[6] = (in1).d[6] ^ (in2).d[6]; \
242 (out).d[7] = (in1).d[7] ^ (in2).d[7]; \
243} while (0)
244#else
245#define XOR(out, in1, in2) \
246do { \
247 xorbuf64_3_aligned64(&(out).d, &(in1).d, &(in2).d); \
248} while (0)
249#endif
250
251#define XOR_X(in) XOR(X, X, in)
252#define XOR_X_2(in1, in2) XOR(X, in1, in2)
253#define XOR_X_WRITE_XOR_Y_2(out, in) \
254do { \
255 XOR(Y, out, in); \
256 COPY(out, Y); \
257 XOR(X, X, Y); \
258} while (0)
259
260/**
261 * Apply the Salsa20/8 core to the block provided in X ^ in.
262 */
263#define SALSA20_8_XOR_MEM(in, out) \
264do { \
265 XOR_X(in); \
266 salsa20(&X, &out, 4); \
267} while (0)
268
269#define INTEGERIFY ((uint32_t)X.d[0])
270
271/**
272 * blockmix_salsa8(Bin, Bout, r):
273 * Compute Bout = BlockMix_{salsa20/8, r}(Bin). The input Bin must be 128r
274 * bytes in length; the output Bout must also be the same size.
275 */
276static void blockmix_salsa8(
277 const salsa20_blk_t *restrict Bin,
278 salsa20_blk_t *restrict Bout,
279 size_t r)
280{
281 size_t i;
282 DECL_X;
283
284 READ_X(Bin[r * 2 - 1]);
285 for (i = 0; i < r; i++) {
286 SALSA20_8_XOR_MEM(Bin[i * 2], Bout[i]);
287 SALSA20_8_XOR_MEM(Bin[i * 2 + 1], Bout[r + i]);
288 }
289}
290
291static uint32_t blockmix_salsa8_xor(
292 const salsa20_blk_t *restrict Bin1,
293 const salsa20_blk_t *restrict Bin2,
294 salsa20_blk_t *restrict Bout,
295 size_t r)
296{
297 size_t i;
298 DECL_X;
299
300 XOR_X_2(Bin1[r * 2 - 1], Bin2[r * 2 - 1]);
301 for (i = 0; i < r; i++) {
302 XOR_X(Bin1[i * 2]);
303 SALSA20_8_XOR_MEM(Bin2[i * 2], Bout[i]);
304 XOR_X(Bin1[i * 2 + 1]);
305 SALSA20_8_XOR_MEM(Bin2[i * 2 + 1], Bout[r + i]);
306 }
307
308 return INTEGERIFY;
309}
310
311/* This is tunable */
312#define Swidth 8
313
314/* Not tunable in this implementation, hard-coded in a few places */
315#define PWXsimple 2
316#define PWXgather 4
317
318/* Derived values. Not tunable except via Swidth above. */
319#define PWXbytes (PWXgather * PWXsimple * 8)
320#define Sbytes (3 * (1 << Swidth) * PWXsimple * 8)
321#define Smask (((1 << Swidth) - 1) * PWXsimple * 8)
322#define Smask2 (((uint64_t)Smask << 32) | Smask)
323
324#define DECL_SMASK2REG do {} while (0)
325#define FORCE_REGALLOC_3 do {} while (0)
326#define MAYBE_MEMORY_BARRIER do {} while (0)
327
328#define PWXFORM_SIMD(x0, x1) \
329do { \
330 uint64_t x = x0 & Smask2; \
331 uint64_t *p0 = (uint64_t *)(S0 + (uint32_t)x); \
332 uint64_t *p1 = (uint64_t *)(S1 + (x >> 32)); \
333 x0 = ((x0 >> 32) * (uint32_t)x0 + p0[0]) ^ p1[0]; \
334 x1 = ((x1 >> 32) * (uint32_t)x1 + p0[1]) ^ p1[1]; \
335} while (0)
336
337#if KDF_UNROLL_PWXFORM_ROUND
338#define PWXFORM_ROUND \
339do { \
340 PWXFORM_SIMD(X.d[0], X.d[1]); \
341 PWXFORM_SIMD(X.d[2], X.d[3]); \
342 PWXFORM_SIMD(X.d[4], X.d[5]); \
343 PWXFORM_SIMD(X.d[6], X.d[7]); \
344} while (0)
345#else
346#define PWXFORM_ROUND \
347do { \
348 for (int pwxi=0; pwxi<8; pwxi+=2) \
349 PWXFORM_SIMD(X.d[pwxi], X.d[pwxi + 1]); \
350} while (0)
351#endif
352
353/*
354 * This offset helps address the 256-byte write block via the single-byte
355 * displacements encodable in x86(-64) instructions. It is needed because the
356 * displacements are signed. Without it, we'd get 4-byte displacements for
357 * half of the writes. Setting it to 0x80 instead of 0x7c would avoid needing
358 * a displacement for one of the writes, but then the LEA instruction would
359 * need a 4-byte displacement.
360 */
361#define PWXFORM_WRITE_OFFSET 0x7c
362
363#define PWXFORM_WRITE \
364do { \
365 WRITE_X(*(salsa20_blk_t *)(Sw - PWXFORM_WRITE_OFFSET)); \
366 Sw += 64; \
367} while (0)
368
369#if KDF_UNROLL_PWXFORM
370#define PWXFORM \
371do { \
372 uint8_t *Sw = S2 + w + PWXFORM_WRITE_OFFSET; \
373 FORCE_REGALLOC_3; \
374 MAYBE_MEMORY_BARRIER; \
375 PWXFORM_ROUND; \
376 PWXFORM_ROUND; PWXFORM_WRITE; \
377 PWXFORM_ROUND; PWXFORM_WRITE; \
378 PWXFORM_ROUND; PWXFORM_WRITE; \
379 PWXFORM_ROUND; PWXFORM_WRITE; \
380 PWXFORM_ROUND; \
381 w = (w + 64 * 4) & Smask2; \
382 { \
383 uint8_t *Stmp = S2; \
384 S2 = S1; \
385 S1 = S0; \
386 S0 = Stmp; \
387 } \
388} while (0)
389#else
390#define PWXFORM \
391do { \
392 uint8_t *Sw = S2 + w + PWXFORM_WRITE_OFFSET; \
393 FORCE_REGALLOC_3; \
394 MAYBE_MEMORY_BARRIER; \
395 PWXFORM_ROUND; \
396 for (int pwxj=0; pwxj<4; pwxj++) {\
397 PWXFORM_ROUND; PWXFORM_WRITE; \
398 } \
399 PWXFORM_ROUND; \
400 w = (w + 64 * 4) & Smask2; \
401 { \
402 uint8_t *Stmp = S2; \
403 S2 = S1; \
404 S1 = S0; \
405 S0 = Stmp; \
406 } \
407} while (0)
408#endif
409
410typedef struct {
411 uint8_t *S0, *S1, *S2;
412 size_t w;
413} pwxform_ctx_t;
414
415#define Salloc (Sbytes + ((sizeof(pwxform_ctx_t) + 63) & ~63U))
416
417/**
418 * blockmix_pwxform(Bin, Bout, r, S):
419 * Compute Bout = BlockMix_pwxform{salsa20/2, r, S}(Bin). The input Bin must
420 * be 128r bytes in length; the output Bout must also be the same size.
421 */
422static void blockmix(
423 const salsa20_blk_t *restrict Bin,
424 salsa20_blk_t *restrict Bout,
425 size_t r,
426 pwxform_ctx_t *restrict ctx)
427{
428 uint8_t *S0 = ctx->S0, *S1 = ctx->S1, *S2 = ctx->S2;
429 size_t w = ctx->w;
430 size_t i;
431 DECL_X;
432
433 /* Convert count of 128-byte blocks to max index of 64-byte block */
434 r = r * 2 - 1;
435
436 READ_X(Bin[r]);
437
438 DECL_SMASK2REG;
439
440 i = 0;
441 for (;;) {
442 XOR_X(Bin[i]);
443 PWXFORM;
444 if (unlikely(i >= r))
445 break;
446 WRITE_X(Bout[i]);
447 i++;
448 }
449
450 ctx->S0 = S0;
451 ctx->S1 = S1;
452 ctx->S2 = S2;
453 ctx->w = w;
454
455 SALSA20_2(Bout[i]);
456}
457
458static uint32_t blockmix_xor(
459 const salsa20_blk_t *Bin1,
460 const salsa20_blk_t *restrict Bin2,
461 salsa20_blk_t *Bout,
462 size_t r,
463 pwxform_ctx_t *restrict ctx)
464{
465 uint8_t *S0 = ctx->S0, *S1 = ctx->S1, *S2 = ctx->S2;
466 size_t w = ctx->w;
467 size_t i;
468 DECL_X;
469
470 /* Convert count of 128-byte blocks to max index of 64-byte block */
471 r = r * 2 - 1;
472
473 XOR_X_2(Bin1[r], Bin2[r]);
474
475 DECL_SMASK2REG;
476
477 i = 0;
478 r--;
479 for (;;) {
480 XOR_X(Bin1[i]);
481 XOR_X(Bin2[i]);
482 PWXFORM;
483 if (unlikely(i > r))
484 break;
485 WRITE_X(Bout[i]);
486 i++;
487 }
488
489 ctx->S0 = S0;
490 ctx->S1 = S1;
491 ctx->S2 = S2;
492 ctx->w = w;
493
494 SALSA20_2(Bout[i]);
495
496 return INTEGERIFY;
497}
498
499static uint32_t blockmix_xor_save(
500 salsa20_blk_t *restrict Bin1out,
501 salsa20_blk_t *restrict Bin2,
502 size_t r,
503 pwxform_ctx_t *restrict ctx)
504{
505 uint8_t *S0 = ctx->S0, *S1 = ctx->S1, *S2 = ctx->S2;
506 size_t w = ctx->w;
507 size_t i;
508 DECL_X;
509 DECL_Y;
510
511 /* Convert count of 128-byte blocks to max index of 64-byte block */
512 r = r * 2 - 1;
513
514 XOR_X_2(Bin1out[r], Bin2[r]);
515
516 DECL_SMASK2REG;
517
518 i = 0;
519 r--;
520 for (;;) {
521 XOR_X_WRITE_XOR_Y_2(Bin2[i], Bin1out[i]);
522 PWXFORM;
523 if (unlikely(i > r))
524 break;
525 WRITE_X(Bin1out[i]);
526 i++;
527 }
528
529 ctx->S0 = S0;
530 ctx->S1 = S1;
531 ctx->S2 = S2;
532 ctx->w = w;
533
534 SALSA20_2(Bin1out[i]);
535
536 return INTEGERIFY;
537}
538
539/**
540 * integerify(B, r):
541 * Return the result of parsing B_{2r-1} as a little-endian integer.
542 */
543static inline uint32_t integerify(const salsa20_blk_t *B, size_t r)
544{
545/*
546 * Our 64-bit words are in host byte order, which is why we don't just read
547 * w[0] here (would be wrong on big-endian). Also, our 32-bit words are
548 * SIMD-shuffled (so the next 32 bits would be part of d[6]), but currently
549 * this does not matter as we only care about the least significant 32 bits.
550 */
551 return (uint32_t)B[2 * r - 1].d[0];
552}
553
554/**
555 * smix1(B, r, N, flags, V, NROM, VROM, XY, ctx):
556 * Compute first loop of B = SMix_r(B, N). The input B must be 128r bytes in
557 * length; the temporary storage V must be 128rN bytes in length; the temporary
558 * storage XY must be 128r+64 bytes in length. N must be even and at least 4.
559 * The array V must be aligned to a multiple of 64 bytes, and arrays B and XY
560 * to a multiple of at least 16 bytes.
561 */
562#if DISABLE_NROM_CODE
563#define smix1(B,r,N,flags,V,NROM,VROM,XY,ctx) \
564 smix1(B,r,N,flags,V,XY,ctx)
565#endif
566static void smix1(uint8_t *B, size_t r, uint32_t N,
567 uint32_t flags,
568 salsa20_blk_t *V,
569 uint32_t NROM, const salsa20_blk_t *VROM,
570 salsa20_blk_t *XY,
571 pwxform_ctx_t *ctx)
572{
573#if DISABLE_NROM_CODE
574 uint32_t NROM = 0;
575 const salsa20_blk_t *VROM = NULL;
576#endif
577 size_t s = 2 * r;
578 salsa20_blk_t *X = V, *Y = &V[s];
579 uint32_t i, j;
580
581 for (i = 0; i < 2 * r; i++) {
582 const salsa20_blk_t *src = (salsa20_blk_t *)&B[i * 64];
583 salsa20_blk_t *tmp = Y;
584 salsa20_blk_t *dst = &X[i];
585 size_t k;
586 for (k = 0; k < 16; k++)
587 tmp->w[k] = SWAP_LE32(src->w[k]);
588 salsa20_simd_shuffle(tmp, dst);
589 }
590
591 if (VROM) {
592 uint32_t n;
593 const salsa20_blk_t *V_j;
594
595 V_j = &VROM[(NROM - 1) * s];
596 j = blockmix_xor(X, V_j, Y, r, ctx) & (NROM - 1);
597 V_j = &VROM[j * s];
598 X = Y + s;
599 j = blockmix_xor(Y, V_j, X, r, ctx);
600
601 for (n = 2; n < N; n <<= 1) {
602 uint32_t m = (n < N / 2) ? n : (N - 1 - n);
603 for (i = 1; i < m; i += 2) {
604 j &= n - 1;
605 j += i - 1;
606 V_j = &V[j * s];
607 Y = X + s;
608 j = blockmix_xor(X, V_j, Y, r, ctx) & (NROM - 1);
609 V_j = &VROM[j * s];
610 X = Y + s;
611 j = blockmix_xor(Y, V_j, X, r, ctx);
612 }
613 }
614 n >>= 1;
615
616 j &= n - 1;
617 j += N - 2 - n;
618 V_j = &V[j * s];
619 Y = X + s;
620 j = blockmix_xor(X, V_j, Y, r, ctx) & (NROM - 1);
621 V_j = &VROM[j * s];
622 blockmix_xor(Y, V_j, XY, r, ctx);
623 } else if (flags & YESCRYPT_RW) {
624//can't use flags___YESCRYPT_RW, smix1() may be called with flags = 0
625 uint32_t n;
626 salsa20_blk_t *V_j;
627
628 blockmix(X, Y, r, ctx);
629 X = Y + s;
630 blockmix(Y, X, r, ctx);
631 j = integerify(X, r);
632
633 for (n = 2; n < N; n <<= 1) {
634 uint32_t m = (n < N / 2) ? n : (N - 1 - n);
635 for (i = 1; i < m; i += 2) {
636 Y = X + s;
637 j &= n - 1;
638 j += i - 1;
639 V_j = &V[j * s];
640 j = blockmix_xor(X, V_j, Y, r, ctx);
641 j &= n - 1;
642 j += i;
643 V_j = &V[j * s];
644 X = Y + s;
645 j = blockmix_xor(Y, V_j, X, r, ctx);
646 }
647 }
648 n >>= 1;
649
650 j &= n - 1;
651 j += N - 2 - n;
652 V_j = &V[j * s];
653 Y = X + s;
654 j = blockmix_xor(X, V_j, Y, r, ctx);
655 j &= n - 1;
656 j += N - 1 - n;
657 V_j = &V[j * s];
658 blockmix_xor(Y, V_j, XY, r, ctx);
659 } else {
660 N -= 2;
661 do {
662 blockmix_salsa8(X, Y, r);
663 X = Y + s;
664 blockmix_salsa8(Y, X, r);
665 Y = X + s;
666 } while ((N -= 2));
667
668 blockmix_salsa8(X, Y, r);
669 blockmix_salsa8(Y, XY, r);
670 }
671
672 for (i = 0; i < 2 * r; i++) {
673 const salsa20_blk_t *src = &XY[i];
674 salsa20_blk_t *tmp = &XY[s];
675 salsa20_blk_t *dst = (salsa20_blk_t *)&B[i * 64];
676 size_t k;
677 for (k = 0; k < 16; k++)
678 tmp->w[k] = SWAP_LE32(src->w[k]);
679 salsa20_simd_unshuffle(tmp, dst);
680 }
681}
682
683/**
684 * smix2(B, r, N, Nloop, flags, V, NROM, VROM, XY, ctx):
685 * Compute second loop of B = SMix_r(B, N). The input B must be 128r bytes in
686 * length; the temporary storage V must be 128rN bytes in length; the temporary
687 * storage XY must be 256r bytes in length. N must be a power of 2 and at
688 * least 2. Nloop must be even. The array V must be aligned to a multiple of
689 * 64 bytes, and arrays B and XY to a multiple of at least 16 bytes.
690 */
691#if DISABLE_NROM_CODE
692#define smix2(B,r,N,Nloop,flags,V,NROM,VROM,XY,ctx) \
693 smix2(B,r,N,Nloop,flags,V,XY,ctx)
694#endif
695static void smix2(uint8_t *B, size_t r, uint32_t N, uint64_t Nloop,
696 uint32_t flags,
697 salsa20_blk_t *V,
698 uint32_t NROM, const salsa20_blk_t *VROM,
699 salsa20_blk_t *XY,
700 pwxform_ctx_t *ctx)
701{
702#if DISABLE_NROM_CODE
703 uint32_t NROM = 0;
704 const salsa20_blk_t *VROM = NULL;
705#endif
706 size_t s = 2 * r;
707 salsa20_blk_t *X = XY, *Y = &XY[s];
708 uint32_t i, j;
709
710 if (Nloop == 0)
711 return;
712
713 for (i = 0; i < 2 * r; i++) {
714 const salsa20_blk_t *src = (salsa20_blk_t *)&B[i * 64];
715 salsa20_blk_t *tmp = Y;
716 salsa20_blk_t *dst = &X[i];
717 size_t k;
718 for (k = 0; k < 16; k++)
719 tmp->w[k] = SWAP_LE32(src->w[k]);
720 salsa20_simd_shuffle(tmp, dst);
721 }
722
723 j = integerify(X, r) & (N - 1);
724
725/*
726 * Normally, VROM implies YESCRYPT_RW, but we check for these separately
727 * because our SMix resets YESCRYPT_RW for the smix2() calls operating on the
728 * entire V when p > 1.
729 */
730//and this is why bbox can't use flags___YESCRYPT_RW in this function
731 if (VROM && (flags & YESCRYPT_RW)) {
732 do {
733 salsa20_blk_t *V_j = &V[j * s];
734 const salsa20_blk_t *VROM_j;
735 j = blockmix_xor_save(X, V_j, r, ctx) & (NROM - 1);
736 VROM_j = &VROM[j * s];
737 j = blockmix_xor(X, VROM_j, X, r, ctx) & (N - 1);
738 } while (Nloop -= 2);
739 } else if (VROM) {
740 do {
741 const salsa20_blk_t *V_j = &V[j * s];
742 j = blockmix_xor(X, V_j, X, r, ctx) & (NROM - 1);
743 V_j = &VROM[j * s];
744 j = blockmix_xor(X, V_j, X, r, ctx) & (N - 1);
745 } while (Nloop -= 2);
746 } else if (flags & YESCRYPT_RW) {
747 do {
748 salsa20_blk_t *V_j = &V[j * s];
749 j = blockmix_xor_save(X, V_j, r, ctx) & (N - 1);
750 V_j = &V[j * s];
751 j = blockmix_xor_save(X, V_j, r, ctx) & (N - 1);
752 } while (Nloop -= 2);
753 } else if (ctx) {
754 do {
755 const salsa20_blk_t *V_j = &V[j * s];
756 j = blockmix_xor(X, V_j, X, r, ctx) & (N - 1);
757 V_j = &V[j * s];
758 j = blockmix_xor(X, V_j, X, r, ctx) & (N - 1);
759 } while (Nloop -= 2);
760 } else {
761 do {
762 const salsa20_blk_t *V_j = &V[j * s];
763 j = blockmix_salsa8_xor(X, V_j, Y, r) & (N - 1);
764 V_j = &V[j * s];
765 j = blockmix_salsa8_xor(Y, V_j, X, r) & (N - 1);
766 } while (Nloop -= 2);
767 }
768
769 for (i = 0; i < 2 * r; i++) {
770 const salsa20_blk_t *src = &X[i];
771 salsa20_blk_t *tmp = Y;
772 salsa20_blk_t *dst = (salsa20_blk_t *)&B[i * 64];
773 size_t k;
774 for (k = 0; k < 16; k++)
775 tmp->w[k] = SWAP_LE32(src->w[k]);
776 salsa20_simd_unshuffle(tmp, dst);
777 }
778}
779
780/**
781 * p2floor(x):
782 * Largest power of 2 not greater than argument.
783 */
784static uint64_t p2floor(uint64_t x)
785{
786 uint64_t y;
787 while ((y = x & (x - 1)))
788 x = y;
789 return x;
790}
791
792/**
793 * smix(B, r, N, p, t, flags, V, NROM, VROM, XY, S, passwd):
794 * Compute B = SMix_r(B, N). The input B must be 128rp bytes in length; the
795 * temporary storage V must be 128rN bytes in length; the temporary storage
796 * XY must be 256r or 256rp bytes in length (the larger size is required with
797 * OpenMP-enabled builds). N must be a power of 2 and at least 4. The array V
798 * must be aligned to a multiple of 64 bytes, and arrays B and XY to a multiple
799 * of at least 16 bytes (aligning them to 64 bytes as well saves cache lines
800 * and helps avoid false sharing in OpenMP-enabled builds when p > 1, but it
801 * might also result in cache bank conflicts).
802 */
803#if DISABLE_NROM_CODE
804#define smix(B,r,N,p,t,flags,V,NROM,VROM,XY,S,passwd) \
805 smix(B,r,N,p,t,flags,V,XY,S,passwd)
806#endif
807static void smix(uint8_t *B, size_t r, uint32_t N, uint32_t p, uint32_t t,
808 uint32_t flags,
809 salsa20_blk_t *V,
810 uint32_t NROM, const salsa20_blk_t *VROM,
811 salsa20_blk_t *XY,
812 uint8_t *S, uint8_t *passwd)
813{
814 size_t s = 2 * r;
815 uint32_t Nchunk;
816 uint64_t Nloop_all, Nloop_rw;
817 uint32_t i;
818
819 Nchunk = N / p;
820 Nloop_all = Nchunk;
821 if (flags___YESCRYPT_RW) {
822 if (t <= 1) {
823 if (t)
824 Nloop_all *= 2; /* 2/3 */
825 Nloop_all = (Nloop_all + 2) / 3; /* 1/3, round up */
826 } else {
827 Nloop_all *= t - 1;
828 }
829 } else if (t) {
830 if (t == 1)
831 Nloop_all += (Nloop_all + 1) / 2; /* 1.5, round up */
832 Nloop_all *= t;
833 }
834
835 Nloop_rw = 0;
836 if (flags___YESCRYPT_RW)
837 Nloop_rw = Nloop_all / p;
838
839 Nchunk &= ~(uint32_t)1; /* round down to even */
840 Nloop_all++; Nloop_all &= ~(uint64_t)1; /* round up to even */
841 Nloop_rw++; Nloop_rw &= ~(uint64_t)1; /* round up to even */
842
843 for (i = 0; i < p; i++) {
844 uint32_t Vchunk = i * Nchunk;
845 uint32_t Np = (i < p - 1) ? Nchunk : (N - Vchunk);
846 uint8_t *Bp = &B[128 * r * i];
847 salsa20_blk_t *Vp = &V[Vchunk * s];
848 salsa20_blk_t *XYp = XY;
849 pwxform_ctx_t *ctx_i = NULL;
850 if (flags___YESCRYPT_RW) {
851 uint8_t *Si = S + i * Salloc;
852 smix1(Bp, 1, Sbytes / 128, 0 /* no flags */,
853 (salsa20_blk_t *)Si, 0, NULL, XYp, NULL);
854 ctx_i = (pwxform_ctx_t *)(Si + Sbytes);
855 ctx_i->S2 = Si;
856 ctx_i->S1 = Si + Sbytes / 3;
857 ctx_i->S0 = Si + Sbytes / 3 * 2;
858 ctx_i->w = 0;
859 if (i == 0)
860 hmac_block(
861 /* key,len: */ Bp + (128 * r - 64), 64,
862 /* hash fn: */ sha256_begin,
863 /* in,len: */ passwd, 32,
864 /* outbuf: */ passwd
865 );
866 }
867 smix1(Bp, r, Np, flags, Vp, NROM, VROM, XYp, ctx_i);
868 smix2(Bp, r, p2floor(Np), Nloop_rw, flags, Vp,
869 NROM, VROM, XYp, ctx_i);
870 }
871
872 if (Nloop_all > Nloop_rw) {
873 for (i = 0; i < p; i++) {
874 uint8_t *Bp = &B[128 * r * i];
875 salsa20_blk_t *XYp = XY;
876 pwxform_ctx_t *ctx_i = NULL;
877 if (flags___YESCRYPT_RW) {
878 uint8_t *Si = S + i * Salloc;
879 ctx_i = (pwxform_ctx_t *)(Si + Sbytes);
880 }
881 smix2(Bp, r, N, Nloop_all - Nloop_rw,
882 flags & (uint32_t)~YESCRYPT_RW,
883 V, NROM, VROM, XYp, ctx_i);
884 }
885 }
886}
887
888/* Allocator code */
889
890static void alloc_region(yescrypt_region_t *region, size_t size)
891{
892 uint8_t *base;
893 int flags =
894# ifdef MAP_NOCORE /* huh? */
895 MAP_NOCORE |
896# endif
897 MAP_ANON | MAP_PRIVATE;
898
899 base = mmap(NULL, size, PROT_READ | PROT_WRITE, flags, -1, 0);
900 if (base == MAP_FAILED)
901 bb_die_memory_exhausted();
902
903#if defined(MADV_HUGEPAGE)
904 /* Reduces mkpasswd qweRTY123@-+ '$y$jHT$123'
905 * (which allocates 4 Gbytes)
906 * run time from 10.543s to 5.635s
907 * Seen on linux-5.18.0.
908 */
909 madvise(base, size, MADV_HUGEPAGE);
910#endif
911 //region->base = base;
912 //region->base_size = size;
913 region->aligned = base;
914 region->aligned_size = size;
915}
916
917static void free_region(yescrypt_region_t *region)
918{
919 if (region->aligned)
920 munmap(region->aligned, region->aligned_size);
921 //region->base = NULL;
922 //region->base_size = 0;
923 region->aligned = NULL;
924 region->aligned_size = 0;
925}
926/**
927 * yescrypt_kdf_body(shared, local, passwd, passwdlen, salt, saltlen,
928 * flags, N, r, p, t, NROM, buf, buflen):
929 * Compute scrypt(passwd[0 .. passwdlen - 1], salt[0 .. saltlen - 1], N, r,
930 * p, buflen), or a revision of scrypt as requested by flags and shared, and
931 * write the result into buf.
932 *
933 * shared and flags may request special modes as described in yescrypt.h.
934 *
935 * local is the thread-local data structure, allowing to preserve and reuse a
936 * memory allocation across calls, thereby reducing its overhead.
937 *
938 * t controls computation time while not affecting peak memory usage.
939 *
940 * Return 0 on success; or -1 on error.
941 *
942 * This optimized implementation currently limits N to the range from 4 to
943 * 2^31, but other implementations might not.
944 */
945static int yescrypt_kdf32_body(
946 yescrypt_ctx_t *yctx,
947 const uint8_t *passwd, size_t passwdlen,
948 uint32_t flags, uint64_t N, uint32_t t,
949 uint8_t *buf32)
950{
951#if !DISABLE_NROM_CODE
952 const salsa20_blk_t *VROM;
953#endif
954 size_t B_size, V_size, XY_size, need;
955 uint8_t *B, *S;
956 salsa20_blk_t *V, *XY;
957 struct {
958 uint8_t sha256[32];
959 uint8_t dk[32];
960 } u;
961#define sha256 u.sha256
962#define dk u.dk
963 uint8_t *dkp = buf32;
964 uint32_t r, p;
965
966 /* Sanity-check parameters */
967 switch (flags___YESCRYPT_MODE_MASK) {
968 case 0: /* classic scrypt - can't have anything non-standard */
969 if (flags || t || YCTX_param_NROM)
970 goto out_EINVAL;
971 break;
972 case YESCRYPT_WORM:
973 if (flags != YESCRYPT_WORM || YCTX_param_NROM)
974 goto out_EINVAL;
975 break;
976 case YESCRYPT_RW:
977 if (flags != (flags & YESCRYPT_KNOWN_FLAGS))
978 goto out_EINVAL;
979#if PWXsimple == 2 && PWXgather == 4 && Sbytes == 12288
980 if ((flags & YESCRYPT_RW_FLAVOR_MASK) ==
981 (YESCRYPT_ROUNDS_6 | YESCRYPT_GATHER_4 |
982 YESCRYPT_SIMPLE_2 | YESCRYPT_SBOX_12K))
983 break;
984#else
985#error "Unsupported pwxform settings"
986#endif
987 /* FALLTHRU */
988 default:
989 goto out_EINVAL;
990 }
991
992 r = YCTX_param_r;
993 p = YCTX_param_p;
994 if ((uint64_t)r * (uint64_t)p >= 1 << 30) {
995 dbg("r * n >= 2^30");
996 goto out_EINVAL;
997 }
998 if (N > UINT32_MAX) {
999 dbg("N > 0x%lx", (long)UINT32_MAX);
1000 goto out_EINVAL;
1001 }
1002 if (N <= 3
1003 || r < 1
1004 || p < 1
1005 ) {
1006 dbg("bad N, r or p");
1007 goto out_EINVAL;
1008 }
1009 if (r > SIZE_MAX / 256 / p
1010 || N > SIZE_MAX / 128 / r
1011 ) {
1012 /* 32-bit testcase: mkpasswd qweRTY123@-+ '$y$jHT$123'
1013 * (works on 64-bit, needs buffer > 4Gbytes)
1014 */
1015 dbg("r > SIZE_MAX / 256 / p? %c", "NY"[r > SIZE_MAX / 256 / p]);
1016 dbg("N > SIZE_MAX / 128 / r? %c", "NY"[N > SIZE_MAX / 128 / r]);
1017 goto out_EINVAL;
1018 }
1019 if (flags___YESCRYPT_RW) {
1020 /* p cannot be greater than SIZE_MAX/Salloc on 64-bit systems,
1021 but it can on 32-bit systems. */
1022#pragma GCC diagnostic push
1023#pragma GCC diagnostic ignored "-Wtype-limits"
1024 if (N / p <= 3 || p > SIZE_MAX / Salloc) {
1025 dbg("bad p:%ld", (long)p);
1026 goto out_EINVAL;
1027 }
1028#pragma GCC diagnostic pop
1029 }
1030
1031#if !DISABLE_NROM_CODE
1032 VROM = NULL;
1033 if (YCTX_param_NROM)
1034 goto out_EINVAL;
1035#endif
1036
1037 /* Allocate memory */
1038 V = NULL;
1039 V_size = (size_t)128 * r * N;
1040 need = V_size;
1041 B_size = (size_t)128 * r * p;
1042 need += B_size;
1043 if (need < B_size) {
1044 dbg("integer overflow at += B_size(%lu)", (long)B_size);
1045 goto out_EINVAL;
1046 }
1047 XY_size = (size_t)256 * r;
1048 need += XY_size;
1049 if (need < XY_size) {
1050 dbg("integer overflow at += XY_size(%lu)", (long)XY_size);
1051 goto out_EINVAL;
1052 }
1053 if (flags___YESCRYPT_RW) {
1054 size_t S_size = (size_t)Salloc * p;
1055 need += S_size;
1056 if (need < S_size) {
1057 dbg("integer overflow at += S_size(%lu)", (long)S_size);
1058 goto out_EINVAL;
1059 }
1060 }
1061 if (yctx->local->aligned_size < need) {
1062 free_region(yctx->local);
1063 alloc_region(yctx->local, need);
1064 dbg("allocated local:%lu 0x%lx", (long)need, (long)need);
1065 /* standard "j9T" params allocate 16Mbytes here */
1066 }
1067 if (flags & YESCRYPT_ALLOC_ONLY)
1068 return -3; /* expected "failure" */
1069 B = (uint8_t *)yctx->local->aligned;
1070 V = (salsa20_blk_t *)((uint8_t *)B + B_size);
1071 XY = (salsa20_blk_t *)((uint8_t *)V + V_size);
1072 S = NULL;
1073 if (flags___YESCRYPT_RW)
1074 S = (uint8_t *)XY + XY_size;
1075
1076 if (flags) {
1077 hmac_block(
1078 /* key,len: */ (const void*)"yescrypt-prehash", (flags & YESCRYPT_PREHASH) ? 16 : 8,
1079 /* hash fn: */ sha256_begin,
1080 /* in,len: */ passwd, passwdlen,
1081 /* outbuf: */ sha256
1082 );
1083 passwd = sha256;
1084 passwdlen = sizeof(sha256);
1085 }
1086
1087 PBKDF2_SHA256(passwd, passwdlen, yctx->salt, yctx->saltlen, 1, B, B_size);
1088
1089 if (flags)
1090 memcpy(sha256, B, sizeof(sha256));
1091
1092 if (p == 1 || (flags___YESCRYPT_RW)) {
1093 smix(B, r, N, p, t, flags, V, YCTX_param_NROM, VROM, XY, S, sha256);
1094 } else {
1095 uint32_t i;
1096 for (i = 0; i < p; i++) {
1097 smix(&B[(size_t)128 * r * i], r, N, 1, t, flags, V,
1098 YCTX_param_NROM, VROM, XY, NULL, NULL);
1099 }
1100 }
1101
1102 dkp = buf32;
1103 if (flags && /*buflen:*/32 < sizeof(dk)) {
1104 PBKDF2_SHA256(passwd, passwdlen, B, B_size, 1, dk, sizeof(dk));
1105 dkp = dk;
1106 }
1107
1108 PBKDF2_SHA256(passwd, passwdlen, B, B_size, 1, buf32, /*buflen:*/32);
1109
1110 /*
1111 * Except when computing classic scrypt, allow all computation so far
1112 * to be performed on the client. The final steps below match those of
1113 * SCRAM (RFC 5802), so that an extension of SCRAM (with the steps so
1114 * far in place of SCRAM's use of PBKDF2 and with SHA-256 in place of
1115 * SCRAM's use of SHA-1) would be usable with yescrypt hashes.
1116 */
1117 if (flags && !(flags & YESCRYPT_PREHASH)) {
1118 /* Compute ClientKey */
1119 hmac_block(
1120 /* key,len: */ dkp, sizeof(dk),
1121 /* hash fn: */ sha256_begin,
1122 /* in,len: */ "Client Key", 10,
1123 /* outbuf: */ sha256
1124 );
1125 /* Compute StoredKey */
1126 {
1127 size_t clen = /*buflen:*/32;
1128 if (clen > sizeof(dk))
1129 clen = sizeof(dk);
1130 if (sizeof(dk) != 32) { /* not true, optimize it out */
1131 sha256_block(sha256, sizeof(sha256), dk);
1132 memcpy(buf32, dk, clen);
1133 } else {
1134 sha256_block(sha256, sizeof(sha256), buf32);
1135 }
1136 }
1137 }
1138
1139 explicit_bzero(&u, sizeof(u));
1140
1141 /* Success! */
1142 return 0;
1143
1144 out_EINVAL:
1145 //bbox does not need this: errno = EINVAL;
1146 return -1;
1147#undef sha256
1148#undef dk
1149}
1150
1151/**
1152 * yescrypt_kdf(shared, local, passwd, passwdlen, salt, saltlen, params,
1153 * buf, buflen):
1154 * Compute scrypt or its revision as requested by the parameters. The inputs
1155 * to this function are the same as those for yescrypt_kdf_body() above, with
1156 * the addition of g, which controls hash upgrades (0 for no upgrades so far).
1157 */
1158static
1159int yescrypt_kdf32(
1160 yescrypt_ctx_t *yctx,
1161 const uint8_t *passwd, size_t passwdlen,
1162 uint8_t *buf32)
1163{
1164 uint32_t flags = YCTX_param_flags;
1165 uint64_t N = YCTX_param_N;
1166 uint32_t r = YCTX_param_r;
1167 uint32_t p = YCTX_param_p;
1168 uint32_t t = YCTX_param_t;
1169 uint32_t g = YCTX_param_g;
1170 uint8_t dk32[32];
1171 int retval;
1172
1173 /* Support for hash upgrades has been temporarily removed */
1174 if (g) {
1175 //bbox does not need this: errno = EINVAL;
1176 return -1;
1177 }
1178
1179 if ((flags___YESCRYPT_RW)
1180 && p >= 1
1181 && N / p >= 0x100
1182 && N / p * r >= 0x20000
1183 ) {
1184 if (yescrypt_kdf32_body(yctx,
1185 passwd, passwdlen,
1186 flags | YESCRYPT_ALLOC_ONLY, N, t,
1187 buf32) != -3
1188 ) {
1189 dbg("yescrypt_kdf32_body: not -3");
1190 return -1;
1191 }
1192 retval = yescrypt_kdf32_body(yctx,
1193 passwd, passwdlen,
1194 flags | YESCRYPT_PREHASH, N >> 6, 0,
1195 dk32);
1196 if (retval) {
1197 dbg("yescrypt_kdf32_body(PREHASH):%d", retval);
1198 return retval;
1199 }
1200 passwd = dk32;
1201 passwdlen = sizeof(dk32);
1202 }
1203
1204 retval = yescrypt_kdf32_body(yctx,
1205 passwd, passwdlen,
1206 flags, N, t, buf32);
1207
1208 explicit_bzero(dk32, sizeof(dk32));
1209
1210 dbg("yescrypt_kdf32_body:%d", retval);
1211 return retval;
1212}
diff --git a/libbb/yescrypt/alg-yescrypt.h b/libbb/yescrypt/alg-yescrypt.h
new file mode 100644
index 000000000..b69843f5d
--- /dev/null
+++ b/libbb/yescrypt/alg-yescrypt.h
@@ -0,0 +1,247 @@
1/*-
2 * Copyright 2009 Colin Percival
3 * Copyright 2013-2018 Alexander Peslyak
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 *
27 * This file was originally written by Colin Percival as part of the Tarsnap
28 * online backup system.
29 */
30
31// busybox debug and size-reduction configuration
32
33#ifdef YESCRYPT_INTERNAL
34# if 1
35# define dbg(...) ((void)0)
36# else
37# define dbg(...) bb_error_msg(__VA_ARGS__)
38# endif
39# if 1
40# define dbg_dec64(...) ((void)0)
41# else
42# define dbg_dec64(...) bb_error_msg(__VA_ARGS__)
43# endif
44# define TEST_DECODE64 0
45#endif
46
47// Only accept one-char parameters in salt, and only first three?
48// Almost any reasonable yescrypt hashes in /etc/shadow should
49// only ever use "jXY" parameters which set N and r.
50// Fancy multi-byte-encoded wide integers are not needed for that.
51#define RESTRICTED_PARAMS 1
52// Note: if you enable the above, please also enable
53// YCTX_param_p, YCTX_param_t, YCTX_param_g, YCTX_param_NROM
54// optimizations, and DISABLE_NROM_CODE.
55
56#define DISABLE_NROM_CODE 1
57
58// How much we save by forcing "standard" value by commenting the next line:
59// 160 bytes
60//#define YCTX_param_flags yctx->param.flags
61// 260 bytes
62//#define flags___YESCRYPT_RW (flags & YESCRYPT_RW)
63// 140 bytes
64//#define flags___YESCRYPT_MODE_MASK (flags & YESCRYPT_MODE_MASK)
65// ^^^^ forcing the above since the code already requires (checks for) this
66// 50 bytes
67#define YCTX_param_N yctx->param.N
68// -100 bytes (negative!!!)
69#define YCTX_param_r yctx->param.r
70// 400 bytes
71//#define YCTX_param_p yctx->param.p
72// 130 bytes
73//#define YCTX_param_t yctx->param.t
74// 2 bytes
75//#define YCTX_param_g yctx->param.g
76// 1 bytes
77// ^^^^ this looks wrong, compiler should be able to constant-propagate the fact that NROM code is dead
78//#define YCTX_param_NROM yctx->param.NROM
79
80#ifndef YCTX_param_flags
81#define YCTX_param_flags (YESCRYPT_RW | YESCRYPT_ROUNDS_6 | YESCRYPT_GATHER_4 | YESCRYPT_SIMPLE_2 | YESCRYPT_SBOX_12K)
82#endif
83#ifndef flags___YESCRYPT_RW
84#define flags___YESCRYPT_RW ((void)flags, YESCRYPT_RW)
85#endif
86#ifndef flags___YESCRYPT_MODE_MASK
87#define flags___YESCRYPT_MODE_MASK ((void)flags, YESCRYPT_RW)
88#endif
89// standard ("j9T") values:
90#ifndef YCTX_param_N
91#define YCTX_param_N 4096
92#endif
93#ifndef YCTX_param_r
94#define YCTX_param_r 32
95#endif
96#ifndef YCTX_param_p
97#define YCTX_param_p 1
98#endif
99#ifndef YCTX_param_t
100#define YCTX_param_t 0
101#endif
102#ifndef YCTX_param_g
103#define YCTX_param_g 0
104#endif
105#ifndef YCTX_param_NROM
106#define YCTX_param_NROM 0
107#endif
108
109// "Faster/smaller code" knobs:
110// -941 bytes:
111#define KDF_UNROLL_COPY 0
112// -5324 bytes if 0:
113#define KDF_UNROLL_PWXFORM_ROUND 0
114// -4864 bytes if 0:
115#define KDF_UNROLL_PWXFORM 0
116// if both this ^^^^^^^^^^ and PWXFORM_ROUND set to 0: -7666 bytes
117// -464 bytes:
118#define KDF_UNROLL_SALSA20 0
119
120/**
121 * Type and possible values for the flags argument of yescrypt_kdf(),
122 * yescrypt_encode_params_r(), yescrypt_encode_params(). Most of these may be
123 * OR'ed together, except that YESCRYPT_WORM stands on its own.
124 * Please refer to the description of yescrypt_kdf() below for the meaning of
125 * these flags.
126 */
127/* yescrypt flags:
128 * bits pos: 7654321076543210
129 * ss r w
130 * sbox gg y
131 */
132/* Public */
133#define YESCRYPT_WORM 1
134#define YESCRYPT_RW 0x002
135#define YESCRYPT_ROUNDS_3 0x000 //r=0
136#define YESCRYPT_ROUNDS_6 0x004 //r=1
137#define YESCRYPT_GATHER_1 0x000 //gg=00
138#define YESCRYPT_GATHER_2 0x008 //gg=01
139#define YESCRYPT_GATHER_4 0x010 //gg=10
140#define YESCRYPT_GATHER_8 0x018 //gg=11
141#define YESCRYPT_SIMPLE_1 0x000 //ss=00
142#define YESCRYPT_SIMPLE_2 0x020 //ss=01
143#define YESCRYPT_SIMPLE_4 0x040 //ss=10
144#define YESCRYPT_SIMPLE_8 0x060 //ss=11
145#define YESCRYPT_SBOX_6K 0x000 //sbox=0000
146#define YESCRYPT_SBOX_12K 0x080 //sbox=0001
147#define YESCRYPT_SBOX_24K 0x100 //sbox=0010
148#define YESCRYPT_SBOX_48K 0x180 //sbox=0011
149#define YESCRYPT_SBOX_96K 0x200 //sbox=0100
150#define YESCRYPT_SBOX_192K 0x280 //sbox=0101
151#define YESCRYPT_SBOX_384K 0x300 //sbox=0110
152#define YESCRYPT_SBOX_768K 0x380 //sbox=0111
153
154#ifdef YESCRYPT_INTERNAL
155/* Private */
156#define YESCRYPT_MODE_MASK 0x003
157#define YESCRYPT_RW_FLAVOR_MASK 0x3fc
158#define YESCRYPT_ALLOC_ONLY 0x08000000
159#define YESCRYPT_PREHASH 0x10000000
160#endif
161
162#define YESCRYPT_RW_DEFAULTS \
163 (YESCRYPT_RW | \
164 YESCRYPT_ROUNDS_6 | YESCRYPT_GATHER_4 | YESCRYPT_SIMPLE_2 | \
165 YESCRYPT_SBOX_12K)
166
167#define YESCRYPT_DEFAULTS YESCRYPT_RW_DEFAULTS
168
169#ifdef YESCRYPT_INTERNAL
170#define YESCRYPT_KNOWN_FLAGS \
171 (YESCRYPT_MODE_MASK | YESCRYPT_RW_FLAVOR_MASK | \
172 YESCRYPT_ALLOC_ONLY | YESCRYPT_PREHASH)
173#endif
174
175/* How many chars base-64 encoded bytes require? */
176#define YESCRYPT_BYTES2CHARS(bytes) ((((bytes) * 8) + 5) / 6)
177/* The /etc/passwd-style hash is "<prefix>$<hash><NUL>" */
178/*
179 * "$y$", up to 8 params of up to 6 chars each, '$', salt
180 * Alternatively, but that's smaller:
181 * "$7$", 3 params encoded as 1+5+5 chars, salt
182 */
183#define YESCRYPT_PREFIX_LEN (3 + 8 * 6 + 1 + YESCRYPT_BYTES2CHARS(32))
184
185#define YESCRYPT_HASH_SIZE 32
186#define YESCRYPT_HASH_LEN YESCRYPT_BYTES2CHARS(YESCRYPT_HASH_SIZE)
187
188/**
189 * Internal type used by the memory allocator. Please do not use it directly.
190 * Use yescrypt_shared_t and yescrypt_local_t as appropriate instead, since
191 * they might differ from each other in a future version.
192 */
193typedef struct {
194// void *base;
195 void *aligned;
196// size_t base_size;
197 size_t aligned_size;
198} yescrypt_region_t;
199
200/**
201 * yescrypt parameters combined into one struct. N, r, p are the same as in
202 * classic scrypt, except that the meaning of p changes when YESCRYPT_RW is
203 * set. flags, t, g, NROM are special to yescrypt.
204 */
205typedef struct {
206 uint32_t flags;
207 uint32_t r;
208 uint64_t N;
209#if !RESTRICTED_PARAMS
210 uint32_t p, t, g;
211 uint64_t NROM;
212#endif
213} yescrypt_params_t;
214
215typedef struct {
216 yescrypt_params_t param;
217
218 /* salt in binary form */
219 /* stored here to cut down on the amount of function paramaters */
220 unsigned char salt[64];
221 size_t saltlen;
222
223 /* used by the memory allocator */
224 //yescrypt_region_t shared[1];
225 yescrypt_region_t local[1];
226} yescrypt_ctx_t;
227
228/**
229 * yescrypt_r(shared, local, passwd, passwdlen, setting, key, buf, buflen):
230 * Compute and encode an scrypt or enhanced scrypt hash of passwd given the
231 * parameters and salt value encoded in setting. If shared is not NULL, a ROM
232 * is used and YESCRYPT_RW is required. Otherwise, whether to compute classic
233 * scrypt, YESCRYPT_WORM (a slight deviation from classic scrypt), or
234 * YESCRYPT_RW (time-memory tradeoff discouraging modification) is determined
235 * by the setting string. shared (if not NULL) and local must be initialized
236 * as described above for yescrypt_kdf(). buf must be large enough (as
237 * indicated by buflen) to hold the encoded hash string.
238 *
239 * Return the encoded hash string on success; or NULL on error.
240 *
241 * MT-safe as long as local and buf are local to the thread.
242 */
243extern char *yescrypt_r(
244 const uint8_t *passwd, size_t passwdlen,
245 const uint8_t *setting,
246 char *buf, size_t buflen
247);
diff --git a/libbb/yescrypt/y.c b/libbb/yescrypt/y.c
new file mode 100644
index 000000000..d5ab8903f
--- /dev/null
+++ b/libbb/yescrypt/y.c
@@ -0,0 +1,16 @@
1/*
2 * The compilation unit for yescrypt-related code.
3 *
4 * Copyright (C) 2025 by Denys Vlasenko <vda.linux@googlemail.com>
5 *
6 * Licensed under GPLv2, see file LICENSE in this source tree.
7 */
8//kbuild:lib-$(CONFIG_USE_BB_CRYPT_YES) += y.o
9
10#include "libbb.h"
11
12#define YESCRYPT_INTERNAL
13#include "alg-yescrypt.h"
14#include "alg-sha256.c"
15#include "alg-yescrypt-kdf.c"
16#include "alg-yescrypt-common.c"