aboutsummaryrefslogtreecommitdiff
path: root/libbb/yescrypt/alg-yescrypt-kdf.c
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--libbb/yescrypt/alg-yescrypt-kdf.c1212
1 files changed, 1212 insertions, 0 deletions
diff --git a/libbb/yescrypt/alg-yescrypt-kdf.c b/libbb/yescrypt/alg-yescrypt-kdf.c
new file mode 100644
index 000000000..a9a1bd591
--- /dev/null
+++ b/libbb/yescrypt/alg-yescrypt-kdf.c
@@ -0,0 +1,1212 @@
1/*-
2 * Copyright 2009 Colin Percival
3 * Copyright 2012-2018 Alexander Peslyak
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 *
27 * This file was originally written by Colin Percival as part of the Tarsnap
28 * online backup system.
29 */
30
31#if __STDC_VERSION__ >= 199901L
32/* Have restrict */
33#elif defined(__GNUC__)
34#define restrict __restrict
35#else
36#define restrict
37#endif
38
39#ifdef __GNUC__
40#define unlikely(exp) __builtin_expect(exp, 0)
41#else
42#define unlikely(exp) (exp)
43#endif
44
45typedef union {
46 uint32_t w[16];
47 uint64_t d[8];
48} salsa20_blk_t;
49
50static void salsa20_simd_shuffle(
51 const salsa20_blk_t *Bin,
52 salsa20_blk_t *Bout)
53{
54#define COMBINE(out, in1, in2) \
55do { \
56 Bout->d[out] = Bin->w[in1 * 2] | ((uint64_t)Bin->w[in2 * 2 + 1] << 32); \
57} while (0)
58 COMBINE(0, 0, 2);
59 COMBINE(1, 5, 7);
60 COMBINE(2, 2, 4);
61 COMBINE(3, 7, 1);
62 COMBINE(4, 4, 6);
63 COMBINE(5, 1, 3);
64 COMBINE(6, 6, 0);
65 COMBINE(7, 3, 5);
66#undef COMBINE
67}
68
69static void salsa20_simd_unshuffle(
70 const salsa20_blk_t *Bin,
71 salsa20_blk_t *Bout)
72{
73#define UNCOMBINE(out, in1, in2) \
74do { \
75 Bout->w[out * 2] = Bin->d[in1]; \
76 Bout->w[out * 2 + 1] = Bin->d[in2] >> 32; \
77} while (0)
78 UNCOMBINE(0, 0, 6);
79 UNCOMBINE(1, 5, 3);
80 UNCOMBINE(2, 2, 0);
81 UNCOMBINE(3, 7, 5);
82 UNCOMBINE(4, 4, 2);
83 UNCOMBINE(5, 1, 7);
84 UNCOMBINE(6, 6, 4);
85 UNCOMBINE(7, 3, 1);
86#undef UNCOMBINE
87}
88
89#define DECL_X \
90 salsa20_blk_t X
91#define DECL_Y \
92 salsa20_blk_t Y
93
94#if KDF_UNROLL_COPY
95#define COPY(out, in) \
96do { \
97 (out).d[0] = (in).d[0]; \
98 (out).d[1] = (in).d[1]; \
99 (out).d[2] = (in).d[2]; \
100 (out).d[3] = (in).d[3]; \
101 (out).d[4] = (in).d[4]; \
102 (out).d[5] = (in).d[5]; \
103 (out).d[6] = (in).d[6]; \
104 (out).d[7] = (in).d[7]; \
105} while (0)
106#else
107#define COPY(out, in) \
108do { \
109 memcpy((out).d, (in).d, sizeof((in).d)); \
110} while (0)
111#endif
112
113#define READ_X(in) COPY(X, in)
114#define WRITE_X(out) COPY(out, X)
115
116/**
117 * salsa20(B):
118 * Apply the Salsa20 core to the provided block.
119 */
120static void salsa20(salsa20_blk_t *restrict B,
121 salsa20_blk_t *restrict Bout,
122 uint32_t doublerounds)
123{
124 salsa20_blk_t X;
125#define x X.w
126
127 salsa20_simd_unshuffle(B, &X);
128
129 do {
130#define R(a,b) (((a) << (b)) | ((a) >> (32 - (b))))
131 /* Operate on columns */
132#if KDF_UNROLL_SALSA20
133 x[ 4] ^= R(x[ 0]+x[12], 7); // x[j] ^= R(x[k]+x[l], CONST)
134 x[ 8] ^= R(x[ 4]+x[ 0], 9);
135 x[12] ^= R(x[ 8]+x[ 4],13);
136 x[ 0] ^= R(x[12]+x[ 8],18);
137
138 x[ 9] ^= R(x[ 5]+x[ 1], 7);
139 x[13] ^= R(x[ 9]+x[ 5], 9);
140 x[ 1] ^= R(x[13]+x[ 9],13);
141 x[ 5] ^= R(x[ 1]+x[13],18);
142
143 x[14] ^= R(x[10]+x[ 6], 7);
144 x[ 2] ^= R(x[14]+x[10], 9);
145 x[ 6] ^= R(x[ 2]+x[14],13);
146 x[10] ^= R(x[ 6]+x[ 2],18);
147
148 x[ 3] ^= R(x[15]+x[11], 7);
149 x[ 7] ^= R(x[ 3]+x[15], 9);
150 x[11] ^= R(x[ 7]+x[ 3],13);
151 x[15] ^= R(x[11]+x[ 7],18);
152#else
153 {
154 unsigned j, k, l;
155 j = 4; k = 0; l = 12;
156 for (;;) {
157 uint32_t t;
158 x[j] ^= ({ t = x[k] + x[l]; R(t, 7); }); l = k; k = j; j = (j+4) & 0xf;
159 x[j] ^= ({ t = x[k] + x[l]; R(t, 9); }); l = k; k = j; j = (j+4) & 0xf;
160 x[j] ^= ({ t = x[k] + x[l]; R(t,13); }); l = k; k = j; j = (j+4) & 0xf;
161 x[j] ^= ({ t = x[k] + x[l]; R(t,18); });
162 if (j == 15) break;
163 l = j + 1; k = j + 5; j = (j+9) & 0xf;
164 }
165 }
166#endif
167 /* Operate on rows */
168#if KDF_UNROLL_SALSA20
169// i=0 n=0
170 x[ 1] ^= R(x[ 0]+x[ 3], 7); // [i + (n+1)&3] [i + (n+0)&3] [i + (n+3)&3]
171 x[ 2] ^= R(x[ 1]+x[ 0], 9); // [i + (n+2)&3] [i + (n+1)&3] [i + (n+0)&3]
172 x[ 3] ^= R(x[ 2]+x[ 1],13); // [i + (n+3)&3] [i + (n+2)&3] [i + (n+1)&3]
173 x[ 0] ^= R(x[ 3]+x[ 2],18); // [i + (n+0)&3] [i + (n+3)&3] [i + (n+2)&3]
174// i=4 n=1 ^^^j^^^ ^^^k^^^ ^^^l^^^
175 x[ 6] ^= R(x[ 5]+x[ 4], 7); // [i + (n+1)&3] [i + (n+0)&3] [i + (n+3)&3]
176 x[ 7] ^= R(x[ 6]+x[ 5], 9); // [i + (n+2)&3] [i + (n+1)&3] [i + (n+0)&3]
177 x[ 4] ^= R(x[ 7]+x[ 6],13); // [i + (n+3)&3] [i + (n+2)&3] [i + (n+1)&3]
178 x[ 5] ^= R(x[ 4]+x[ 7],18); // [i + (n+0)&3] [i + (n+3)&3] [i + (n+2)&3]
179// i=8 n=2
180 x[11] ^= R(x[10]+x[ 9], 7); // [i + (n+1)&3] [i + (n+0)&3] [i + (n+3)&3]
181 x[ 8] ^= R(x[11]+x[10], 9); // [i + (n+2)&3] [i + (n+1)&3] [i + (n+0)&3]
182 x[ 9] ^= R(x[ 8]+x[11],13); // [i + (n+3)&3] [i + (n+2)&3] [i + (n+1)&3]
183 x[10] ^= R(x[ 9]+x[ 8],18); // [i + (n+0)&3] [i + (n+3)&3] [i + (n+2)&3]
184// i=12 n=3
185 x[12] ^= R(x[15]+x[14], 7); // [i + (n+1)&3] [i + (n+0)&3] [i + (n+3)&3]
186 x[13] ^= R(x[12]+x[15], 9); // [i + (n+2)&3] [i + (n+1)&3] [i + (n+0)&3]
187 x[14] ^= R(x[13]+x[12],13); // [i + (n+3)&3] [i + (n+2)&3] [i + (n+1)&3]
188 x[15] ^= R(x[14]+x[13],18); // [i + (n+0)&3] [i + (n+3)&3] [i + (n+2)&3]
189#else
190 {
191 unsigned j, k, l;
192 uint32_t *xrow;
193 j = 1; k = 0; l = 3;
194 xrow = &x[0];
195 for (;;) {
196 uint32_t t;
197 xrow[j] ^= ({ t = xrow[k] + xrow[l]; R(t, 7); }); l = k; k = j; j = (j+1) & 3;
198 xrow[j] ^= ({ t = xrow[k] + xrow[l]; R(t, 9); }); l = k; k = j; j = (j+1) & 3;
199 xrow[j] ^= ({ t = xrow[k] + xrow[l]; R(t,13); }); l = k; k = j; j = (j+1) & 3;
200 xrow[j] ^= ({ t = xrow[k] + xrow[l]; R(t,18); });
201 if (j == 3) break;
202 l = j; k = j + 1; j = (j+2) & 3;
203 xrow += 4;
204 }
205 }
206#endif
207
208#undef R
209 } while (--doublerounds);
210#undef x
211
212 {
213 uint32_t i;
214 salsa20_simd_shuffle(&X, Bout);
215 for (i = 0; i < 16; i++) {
216 // bbox: note: was unrolled x4
217 B->w[i] = Bout->w[i] += B->w[i];
218 }
219 }
220#if 0
221 /* Too expensive */
222 explicit_bzero(&X, sizeof(X));
223#endif
224}
225
226/**
227 * Apply the Salsa20/2 core to the block provided in X.
228 */
229#define SALSA20_2(out) \
230 salsa20(&X, &out, 1)
231
232#if 0
233#define XOR(out, in1, in2) \
234do { \
235 (out).d[0] = (in1).d[0] ^ (in2).d[0]; \
236 (out).d[1] = (in1).d[1] ^ (in2).d[1]; \
237 (out).d[2] = (in1).d[2] ^ (in2).d[2]; \
238 (out).d[3] = (in1).d[3] ^ (in2).d[3]; \
239 (out).d[4] = (in1).d[4] ^ (in2).d[4]; \
240 (out).d[5] = (in1).d[5] ^ (in2).d[5]; \
241 (out).d[6] = (in1).d[6] ^ (in2).d[6]; \
242 (out).d[7] = (in1).d[7] ^ (in2).d[7]; \
243} while (0)
244#else
245#define XOR(out, in1, in2) \
246do { \
247 xorbuf64_3_aligned64(&(out).d, &(in1).d, &(in2).d); \
248} while (0)
249#endif
250
251#define XOR_X(in) XOR(X, X, in)
252#define XOR_X_2(in1, in2) XOR(X, in1, in2)
253#define XOR_X_WRITE_XOR_Y_2(out, in) \
254do { \
255 XOR(Y, out, in); \
256 COPY(out, Y); \
257 XOR(X, X, Y); \
258} while (0)
259
260/**
261 * Apply the Salsa20/8 core to the block provided in X ^ in.
262 */
263#define SALSA20_8_XOR_MEM(in, out) \
264do { \
265 XOR_X(in); \
266 salsa20(&X, &out, 4); \
267} while (0)
268
269#define INTEGERIFY ((uint32_t)X.d[0])
270
271/**
272 * blockmix_salsa8(Bin, Bout, r):
273 * Compute Bout = BlockMix_{salsa20/8, r}(Bin). The input Bin must be 128r
274 * bytes in length; the output Bout must also be the same size.
275 */
276static void blockmix_salsa8(
277 const salsa20_blk_t *restrict Bin,
278 salsa20_blk_t *restrict Bout,
279 size_t r)
280{
281 size_t i;
282 DECL_X;
283
284 READ_X(Bin[r * 2 - 1]);
285 for (i = 0; i < r; i++) {
286 SALSA20_8_XOR_MEM(Bin[i * 2], Bout[i]);
287 SALSA20_8_XOR_MEM(Bin[i * 2 + 1], Bout[r + i]);
288 }
289}
290
291static uint32_t blockmix_salsa8_xor(
292 const salsa20_blk_t *restrict Bin1,
293 const salsa20_blk_t *restrict Bin2,
294 salsa20_blk_t *restrict Bout,
295 size_t r)
296{
297 size_t i;
298 DECL_X;
299
300 XOR_X_2(Bin1[r * 2 - 1], Bin2[r * 2 - 1]);
301 for (i = 0; i < r; i++) {
302 XOR_X(Bin1[i * 2]);
303 SALSA20_8_XOR_MEM(Bin2[i * 2], Bout[i]);
304 XOR_X(Bin1[i * 2 + 1]);
305 SALSA20_8_XOR_MEM(Bin2[i * 2 + 1], Bout[r + i]);
306 }
307
308 return INTEGERIFY;
309}
310
311/* This is tunable */
312#define Swidth 8
313
314/* Not tunable in this implementation, hard-coded in a few places */
315#define PWXsimple 2
316#define PWXgather 4
317
318/* Derived values. Not tunable except via Swidth above. */
319#define PWXbytes (PWXgather * PWXsimple * 8)
320#define Sbytes (3 * (1 << Swidth) * PWXsimple * 8)
321#define Smask (((1 << Swidth) - 1) * PWXsimple * 8)
322#define Smask2 (((uint64_t)Smask << 32) | Smask)
323
324#define DECL_SMASK2REG do {} while (0)
325#define FORCE_REGALLOC_3 do {} while (0)
326#define MAYBE_MEMORY_BARRIER do {} while (0)
327
328#define PWXFORM_SIMD(x0, x1) \
329do { \
330 uint64_t x = x0 & Smask2; \
331 uint64_t *p0 = (uint64_t *)(S0 + (uint32_t)x); \
332 uint64_t *p1 = (uint64_t *)(S1 + (x >> 32)); \
333 x0 = ((x0 >> 32) * (uint32_t)x0 + p0[0]) ^ p1[0]; \
334 x1 = ((x1 >> 32) * (uint32_t)x1 + p0[1]) ^ p1[1]; \
335} while (0)
336
337#if KDF_UNROLL_PWXFORM_ROUND
338#define PWXFORM_ROUND \
339do { \
340 PWXFORM_SIMD(X.d[0], X.d[1]); \
341 PWXFORM_SIMD(X.d[2], X.d[3]); \
342 PWXFORM_SIMD(X.d[4], X.d[5]); \
343 PWXFORM_SIMD(X.d[6], X.d[7]); \
344} while (0)
345#else
346#define PWXFORM_ROUND \
347do { \
348 for (int pwxi=0; pwxi<8; pwxi+=2) \
349 PWXFORM_SIMD(X.d[pwxi], X.d[pwxi + 1]); \
350} while (0)
351#endif
352
353/*
354 * This offset helps address the 256-byte write block via the single-byte
355 * displacements encodable in x86(-64) instructions. It is needed because the
356 * displacements are signed. Without it, we'd get 4-byte displacements for
357 * half of the writes. Setting it to 0x80 instead of 0x7c would avoid needing
358 * a displacement for one of the writes, but then the LEA instruction would
359 * need a 4-byte displacement.
360 */
361#define PWXFORM_WRITE_OFFSET 0x7c
362
363#define PWXFORM_WRITE \
364do { \
365 WRITE_X(*(salsa20_blk_t *)(Sw - PWXFORM_WRITE_OFFSET)); \
366 Sw += 64; \
367} while (0)
368
369#if KDF_UNROLL_PWXFORM
370#define PWXFORM \
371do { \
372 uint8_t *Sw = S2 + w + PWXFORM_WRITE_OFFSET; \
373 FORCE_REGALLOC_3; \
374 MAYBE_MEMORY_BARRIER; \
375 PWXFORM_ROUND; \
376 PWXFORM_ROUND; PWXFORM_WRITE; \
377 PWXFORM_ROUND; PWXFORM_WRITE; \
378 PWXFORM_ROUND; PWXFORM_WRITE; \
379 PWXFORM_ROUND; PWXFORM_WRITE; \
380 PWXFORM_ROUND; \
381 w = (w + 64 * 4) & Smask2; \
382 { \
383 uint8_t *Stmp = S2; \
384 S2 = S1; \
385 S1 = S0; \
386 S0 = Stmp; \
387 } \
388} while (0)
389#else
390#define PWXFORM \
391do { \
392 uint8_t *Sw = S2 + w + PWXFORM_WRITE_OFFSET; \
393 FORCE_REGALLOC_3; \
394 MAYBE_MEMORY_BARRIER; \
395 PWXFORM_ROUND; \
396 for (int pwxj=0; pwxj<4; pwxj++) {\
397 PWXFORM_ROUND; PWXFORM_WRITE; \
398 } \
399 PWXFORM_ROUND; \
400 w = (w + 64 * 4) & Smask2; \
401 { \
402 uint8_t *Stmp = S2; \
403 S2 = S1; \
404 S1 = S0; \
405 S0 = Stmp; \
406 } \
407} while (0)
408#endif
409
410typedef struct {
411 uint8_t *S0, *S1, *S2;
412 size_t w;
413} pwxform_ctx_t;
414
415#define Salloc (Sbytes + ((sizeof(pwxform_ctx_t) + 63) & ~63U))
416
417/**
418 * blockmix_pwxform(Bin, Bout, r, S):
419 * Compute Bout = BlockMix_pwxform{salsa20/2, r, S}(Bin). The input Bin must
420 * be 128r bytes in length; the output Bout must also be the same size.
421 */
422static void blockmix(
423 const salsa20_blk_t *restrict Bin,
424 salsa20_blk_t *restrict Bout,
425 size_t r,
426 pwxform_ctx_t *restrict ctx)
427{
428 uint8_t *S0 = ctx->S0, *S1 = ctx->S1, *S2 = ctx->S2;
429 size_t w = ctx->w;
430 size_t i;
431 DECL_X;
432
433 /* Convert count of 128-byte blocks to max index of 64-byte block */
434 r = r * 2 - 1;
435
436 READ_X(Bin[r]);
437
438 DECL_SMASK2REG;
439
440 i = 0;
441 for (;;) {
442 XOR_X(Bin[i]);
443 PWXFORM;
444 if (unlikely(i >= r))
445 break;
446 WRITE_X(Bout[i]);
447 i++;
448 }
449
450 ctx->S0 = S0;
451 ctx->S1 = S1;
452 ctx->S2 = S2;
453 ctx->w = w;
454
455 SALSA20_2(Bout[i]);
456}
457
458static uint32_t blockmix_xor(
459 const salsa20_blk_t *Bin1,
460 const salsa20_blk_t *restrict Bin2,
461 salsa20_blk_t *Bout,
462 size_t r,
463 pwxform_ctx_t *restrict ctx)
464{
465 uint8_t *S0 = ctx->S0, *S1 = ctx->S1, *S2 = ctx->S2;
466 size_t w = ctx->w;
467 size_t i;
468 DECL_X;
469
470 /* Convert count of 128-byte blocks to max index of 64-byte block */
471 r = r * 2 - 1;
472
473 XOR_X_2(Bin1[r], Bin2[r]);
474
475 DECL_SMASK2REG;
476
477 i = 0;
478 r--;
479 for (;;) {
480 XOR_X(Bin1[i]);
481 XOR_X(Bin2[i]);
482 PWXFORM;
483 if (unlikely(i > r))
484 break;
485 WRITE_X(Bout[i]);
486 i++;
487 }
488
489 ctx->S0 = S0;
490 ctx->S1 = S1;
491 ctx->S2 = S2;
492 ctx->w = w;
493
494 SALSA20_2(Bout[i]);
495
496 return INTEGERIFY;
497}
498
499static uint32_t blockmix_xor_save(
500 salsa20_blk_t *restrict Bin1out,
501 salsa20_blk_t *restrict Bin2,
502 size_t r,
503 pwxform_ctx_t *restrict ctx)
504{
505 uint8_t *S0 = ctx->S0, *S1 = ctx->S1, *S2 = ctx->S2;
506 size_t w = ctx->w;
507 size_t i;
508 DECL_X;
509 DECL_Y;
510
511 /* Convert count of 128-byte blocks to max index of 64-byte block */
512 r = r * 2 - 1;
513
514 XOR_X_2(Bin1out[r], Bin2[r]);
515
516 DECL_SMASK2REG;
517
518 i = 0;
519 r--;
520 for (;;) {
521 XOR_X_WRITE_XOR_Y_2(Bin2[i], Bin1out[i]);
522 PWXFORM;
523 if (unlikely(i > r))
524 break;
525 WRITE_X(Bin1out[i]);
526 i++;
527 }
528
529 ctx->S0 = S0;
530 ctx->S1 = S1;
531 ctx->S2 = S2;
532 ctx->w = w;
533
534 SALSA20_2(Bin1out[i]);
535
536 return INTEGERIFY;
537}
538
539/**
540 * integerify(B, r):
541 * Return the result of parsing B_{2r-1} as a little-endian integer.
542 */
543static inline uint32_t integerify(const salsa20_blk_t *B, size_t r)
544{
545/*
546 * Our 64-bit words are in host byte order, which is why we don't just read
547 * w[0] here (would be wrong on big-endian). Also, our 32-bit words are
548 * SIMD-shuffled (so the next 32 bits would be part of d[6]), but currently
549 * this does not matter as we only care about the least significant 32 bits.
550 */
551 return (uint32_t)B[2 * r - 1].d[0];
552}
553
554/**
555 * smix1(B, r, N, flags, V, NROM, VROM, XY, ctx):
556 * Compute first loop of B = SMix_r(B, N). The input B must be 128r bytes in
557 * length; the temporary storage V must be 128rN bytes in length; the temporary
558 * storage XY must be 128r+64 bytes in length. N must be even and at least 4.
559 * The array V must be aligned to a multiple of 64 bytes, and arrays B and XY
560 * to a multiple of at least 16 bytes.
561 */
562#if DISABLE_NROM_CODE
563#define smix1(B,r,N,flags,V,NROM,VROM,XY,ctx) \
564 smix1(B,r,N,flags,V,XY,ctx)
565#endif
566static void smix1(uint8_t *B, size_t r, uint32_t N,
567 uint32_t flags,
568 salsa20_blk_t *V,
569 uint32_t NROM, const salsa20_blk_t *VROM,
570 salsa20_blk_t *XY,
571 pwxform_ctx_t *ctx)
572{
573#if DISABLE_NROM_CODE
574 uint32_t NROM = 0;
575 const salsa20_blk_t *VROM = NULL;
576#endif
577 size_t s = 2 * r;
578 salsa20_blk_t *X = V, *Y = &V[s];
579 uint32_t i, j;
580
581 for (i = 0; i < 2 * r; i++) {
582 const salsa20_blk_t *src = (salsa20_blk_t *)&B[i * 64];
583 salsa20_blk_t *tmp = Y;
584 salsa20_blk_t *dst = &X[i];
585 size_t k;
586 for (k = 0; k < 16; k++)
587 tmp->w[k] = SWAP_LE32(src->w[k]);
588 salsa20_simd_shuffle(tmp, dst);
589 }
590
591 if (VROM) {
592 uint32_t n;
593 const salsa20_blk_t *V_j;
594
595 V_j = &VROM[(NROM - 1) * s];
596 j = blockmix_xor(X, V_j, Y, r, ctx) & (NROM - 1);
597 V_j = &VROM[j * s];
598 X = Y + s;
599 j = blockmix_xor(Y, V_j, X, r, ctx);
600
601 for (n = 2; n < N; n <<= 1) {
602 uint32_t m = (n < N / 2) ? n : (N - 1 - n);
603 for (i = 1; i < m; i += 2) {
604 j &= n - 1;
605 j += i - 1;
606 V_j = &V[j * s];
607 Y = X + s;
608 j = blockmix_xor(X, V_j, Y, r, ctx) & (NROM - 1);
609 V_j = &VROM[j * s];
610 X = Y + s;
611 j = blockmix_xor(Y, V_j, X, r, ctx);
612 }
613 }
614 n >>= 1;
615
616 j &= n - 1;
617 j += N - 2 - n;
618 V_j = &V[j * s];
619 Y = X + s;
620 j = blockmix_xor(X, V_j, Y, r, ctx) & (NROM - 1);
621 V_j = &VROM[j * s];
622 blockmix_xor(Y, V_j, XY, r, ctx);
623 } else if (flags & YESCRYPT_RW) {
624//can't use flags___YESCRYPT_RW, smix1() may be called with flags = 0
625 uint32_t n;
626 salsa20_blk_t *V_j;
627
628 blockmix(X, Y, r, ctx);
629 X = Y + s;
630 blockmix(Y, X, r, ctx);
631 j = integerify(X, r);
632
633 for (n = 2; n < N; n <<= 1) {
634 uint32_t m = (n < N / 2) ? n : (N - 1 - n);
635 for (i = 1; i < m; i += 2) {
636 Y = X + s;
637 j &= n - 1;
638 j += i - 1;
639 V_j = &V[j * s];
640 j = blockmix_xor(X, V_j, Y, r, ctx);
641 j &= n - 1;
642 j += i;
643 V_j = &V[j * s];
644 X = Y + s;
645 j = blockmix_xor(Y, V_j, X, r, ctx);
646 }
647 }
648 n >>= 1;
649
650 j &= n - 1;
651 j += N - 2 - n;
652 V_j = &V[j * s];
653 Y = X + s;
654 j = blockmix_xor(X, V_j, Y, r, ctx);
655 j &= n - 1;
656 j += N - 1 - n;
657 V_j = &V[j * s];
658 blockmix_xor(Y, V_j, XY, r, ctx);
659 } else {
660 N -= 2;
661 do {
662 blockmix_salsa8(X, Y, r);
663 X = Y + s;
664 blockmix_salsa8(Y, X, r);
665 Y = X + s;
666 } while ((N -= 2));
667
668 blockmix_salsa8(X, Y, r);
669 blockmix_salsa8(Y, XY, r);
670 }
671
672 for (i = 0; i < 2 * r; i++) {
673 const salsa20_blk_t *src = &XY[i];
674 salsa20_blk_t *tmp = &XY[s];
675 salsa20_blk_t *dst = (salsa20_blk_t *)&B[i * 64];
676 size_t k;
677 for (k = 0; k < 16; k++)
678 tmp->w[k] = SWAP_LE32(src->w[k]);
679 salsa20_simd_unshuffle(tmp, dst);
680 }
681}
682
683/**
684 * smix2(B, r, N, Nloop, flags, V, NROM, VROM, XY, ctx):
685 * Compute second loop of B = SMix_r(B, N). The input B must be 128r bytes in
686 * length; the temporary storage V must be 128rN bytes in length; the temporary
687 * storage XY must be 256r bytes in length. N must be a power of 2 and at
688 * least 2. Nloop must be even. The array V must be aligned to a multiple of
689 * 64 bytes, and arrays B and XY to a multiple of at least 16 bytes.
690 */
691#if DISABLE_NROM_CODE
692#define smix2(B,r,N,Nloop,flags,V,NROM,VROM,XY,ctx) \
693 smix2(B,r,N,Nloop,flags,V,XY,ctx)
694#endif
695static void smix2(uint8_t *B, size_t r, uint32_t N, uint64_t Nloop,
696 uint32_t flags,
697 salsa20_blk_t *V,
698 uint32_t NROM, const salsa20_blk_t *VROM,
699 salsa20_blk_t *XY,
700 pwxform_ctx_t *ctx)
701{
702#if DISABLE_NROM_CODE
703 uint32_t NROM = 0;
704 const salsa20_blk_t *VROM = NULL;
705#endif
706 size_t s = 2 * r;
707 salsa20_blk_t *X = XY, *Y = &XY[s];
708 uint32_t i, j;
709
710 if (Nloop == 0)
711 return;
712
713 for (i = 0; i < 2 * r; i++) {
714 const salsa20_blk_t *src = (salsa20_blk_t *)&B[i * 64];
715 salsa20_blk_t *tmp = Y;
716 salsa20_blk_t *dst = &X[i];
717 size_t k;
718 for (k = 0; k < 16; k++)
719 tmp->w[k] = SWAP_LE32(src->w[k]);
720 salsa20_simd_shuffle(tmp, dst);
721 }
722
723 j = integerify(X, r) & (N - 1);
724
725/*
726 * Normally, VROM implies YESCRYPT_RW, but we check for these separately
727 * because our SMix resets YESCRYPT_RW for the smix2() calls operating on the
728 * entire V when p > 1.
729 */
730//and this is why bbox can't use flags___YESCRYPT_RW in this function
731 if (VROM && (flags & YESCRYPT_RW)) {
732 do {
733 salsa20_blk_t *V_j = &V[j * s];
734 const salsa20_blk_t *VROM_j;
735 j = blockmix_xor_save(X, V_j, r, ctx) & (NROM - 1);
736 VROM_j = &VROM[j * s];
737 j = blockmix_xor(X, VROM_j, X, r, ctx) & (N - 1);
738 } while (Nloop -= 2);
739 } else if (VROM) {
740 do {
741 const salsa20_blk_t *V_j = &V[j * s];
742 j = blockmix_xor(X, V_j, X, r, ctx) & (NROM - 1);
743 V_j = &VROM[j * s];
744 j = blockmix_xor(X, V_j, X, r, ctx) & (N - 1);
745 } while (Nloop -= 2);
746 } else if (flags & YESCRYPT_RW) {
747 do {
748 salsa20_blk_t *V_j = &V[j * s];
749 j = blockmix_xor_save(X, V_j, r, ctx) & (N - 1);
750 V_j = &V[j * s];
751 j = blockmix_xor_save(X, V_j, r, ctx) & (N - 1);
752 } while (Nloop -= 2);
753 } else if (ctx) {
754 do {
755 const salsa20_blk_t *V_j = &V[j * s];
756 j = blockmix_xor(X, V_j, X, r, ctx) & (N - 1);
757 V_j = &V[j * s];
758 j = blockmix_xor(X, V_j, X, r, ctx) & (N - 1);
759 } while (Nloop -= 2);
760 } else {
761 do {
762 const salsa20_blk_t *V_j = &V[j * s];
763 j = blockmix_salsa8_xor(X, V_j, Y, r) & (N - 1);
764 V_j = &V[j * s];
765 j = blockmix_salsa8_xor(Y, V_j, X, r) & (N - 1);
766 } while (Nloop -= 2);
767 }
768
769 for (i = 0; i < 2 * r; i++) {
770 const salsa20_blk_t *src = &X[i];
771 salsa20_blk_t *tmp = Y;
772 salsa20_blk_t *dst = (salsa20_blk_t *)&B[i * 64];
773 size_t k;
774 for (k = 0; k < 16; k++)
775 tmp->w[k] = SWAP_LE32(src->w[k]);
776 salsa20_simd_unshuffle(tmp, dst);
777 }
778}
779
780/**
781 * p2floor(x):
782 * Largest power of 2 not greater than argument.
783 */
784static uint64_t p2floor(uint64_t x)
785{
786 uint64_t y;
787 while ((y = x & (x - 1)))
788 x = y;
789 return x;
790}
791
792/**
793 * smix(B, r, N, p, t, flags, V, NROM, VROM, XY, S, passwd):
794 * Compute B = SMix_r(B, N). The input B must be 128rp bytes in length; the
795 * temporary storage V must be 128rN bytes in length; the temporary storage
796 * XY must be 256r or 256rp bytes in length (the larger size is required with
797 * OpenMP-enabled builds). N must be a power of 2 and at least 4. The array V
798 * must be aligned to a multiple of 64 bytes, and arrays B and XY to a multiple
799 * of at least 16 bytes (aligning them to 64 bytes as well saves cache lines
800 * and helps avoid false sharing in OpenMP-enabled builds when p > 1, but it
801 * might also result in cache bank conflicts).
802 */
803#if DISABLE_NROM_CODE
804#define smix(B,r,N,p,t,flags,V,NROM,VROM,XY,S,passwd) \
805 smix(B,r,N,p,t,flags,V,XY,S,passwd)
806#endif
807static void smix(uint8_t *B, size_t r, uint32_t N, uint32_t p, uint32_t t,
808 uint32_t flags,
809 salsa20_blk_t *V,
810 uint32_t NROM, const salsa20_blk_t *VROM,
811 salsa20_blk_t *XY,
812 uint8_t *S, uint8_t *passwd)
813{
814 size_t s = 2 * r;
815 uint32_t Nchunk;
816 uint64_t Nloop_all, Nloop_rw;
817 uint32_t i;
818
819 Nchunk = N / p;
820 Nloop_all = Nchunk;
821 if (flags___YESCRYPT_RW) {
822 if (t <= 1) {
823 if (t)
824 Nloop_all *= 2; /* 2/3 */
825 Nloop_all = (Nloop_all + 2) / 3; /* 1/3, round up */
826 } else {
827 Nloop_all *= t - 1;
828 }
829 } else if (t) {
830 if (t == 1)
831 Nloop_all += (Nloop_all + 1) / 2; /* 1.5, round up */
832 Nloop_all *= t;
833 }
834
835 Nloop_rw = 0;
836 if (flags___YESCRYPT_RW)
837 Nloop_rw = Nloop_all / p;
838
839 Nchunk &= ~(uint32_t)1; /* round down to even */
840 Nloop_all++; Nloop_all &= ~(uint64_t)1; /* round up to even */
841 Nloop_rw++; Nloop_rw &= ~(uint64_t)1; /* round up to even */
842
843 for (i = 0; i < p; i++) {
844 uint32_t Vchunk = i * Nchunk;
845 uint32_t Np = (i < p - 1) ? Nchunk : (N - Vchunk);
846 uint8_t *Bp = &B[128 * r * i];
847 salsa20_blk_t *Vp = &V[Vchunk * s];
848 salsa20_blk_t *XYp = XY;
849 pwxform_ctx_t *ctx_i = NULL;
850 if (flags___YESCRYPT_RW) {
851 uint8_t *Si = S + i * Salloc;
852 smix1(Bp, 1, Sbytes / 128, 0 /* no flags */,
853 (salsa20_blk_t *)Si, 0, NULL, XYp, NULL);
854 ctx_i = (pwxform_ctx_t *)(Si + Sbytes);
855 ctx_i->S2 = Si;
856 ctx_i->S1 = Si + Sbytes / 3;
857 ctx_i->S0 = Si + Sbytes / 3 * 2;
858 ctx_i->w = 0;
859 if (i == 0)
860 hmac_block(
861 /* key,len: */ Bp + (128 * r - 64), 64,
862 /* hash fn: */ sha256_begin,
863 /* in,len: */ passwd, 32,
864 /* outbuf: */ passwd
865 );
866 }
867 smix1(Bp, r, Np, flags, Vp, NROM, VROM, XYp, ctx_i);
868 smix2(Bp, r, p2floor(Np), Nloop_rw, flags, Vp,
869 NROM, VROM, XYp, ctx_i);
870 }
871
872 if (Nloop_all > Nloop_rw) {
873 for (i = 0; i < p; i++) {
874 uint8_t *Bp = &B[128 * r * i];
875 salsa20_blk_t *XYp = XY;
876 pwxform_ctx_t *ctx_i = NULL;
877 if (flags___YESCRYPT_RW) {
878 uint8_t *Si = S + i * Salloc;
879 ctx_i = (pwxform_ctx_t *)(Si + Sbytes);
880 }
881 smix2(Bp, r, N, Nloop_all - Nloop_rw,
882 flags & (uint32_t)~YESCRYPT_RW,
883 V, NROM, VROM, XYp, ctx_i);
884 }
885 }
886}
887
888/* Allocator code */
889
890static void alloc_region(yescrypt_region_t *region, size_t size)
891{
892 uint8_t *base;
893 int flags =
894# ifdef MAP_NOCORE /* huh? */
895 MAP_NOCORE |
896# endif
897 MAP_ANON | MAP_PRIVATE;
898
899 base = mmap(NULL, size, PROT_READ | PROT_WRITE, flags, -1, 0);
900 if (base == MAP_FAILED)
901 bb_die_memory_exhausted();
902
903#if defined(MADV_HUGEPAGE)
904 /* Reduces mkpasswd qweRTY123@-+ '$y$jHT$123'
905 * (which allocates 4 Gbytes)
906 * run time from 10.543s to 5.635s
907 * Seen on linux-5.18.0.
908 */
909 madvise(base, size, MADV_HUGEPAGE);
910#endif
911 //region->base = base;
912 //region->base_size = size;
913 region->aligned = base;
914 region->aligned_size = size;
915}
916
917static void free_region(yescrypt_region_t *region)
918{
919 if (region->aligned)
920 munmap(region->aligned, region->aligned_size);
921 //region->base = NULL;
922 //region->base_size = 0;
923 region->aligned = NULL;
924 region->aligned_size = 0;
925}
926/**
927 * yescrypt_kdf_body(shared, local, passwd, passwdlen, salt, saltlen,
928 * flags, N, r, p, t, NROM, buf, buflen):
929 * Compute scrypt(passwd[0 .. passwdlen - 1], salt[0 .. saltlen - 1], N, r,
930 * p, buflen), or a revision of scrypt as requested by flags and shared, and
931 * write the result into buf.
932 *
933 * shared and flags may request special modes as described in yescrypt.h.
934 *
935 * local is the thread-local data structure, allowing to preserve and reuse a
936 * memory allocation across calls, thereby reducing its overhead.
937 *
938 * t controls computation time while not affecting peak memory usage.
939 *
940 * Return 0 on success; or -1 on error.
941 *
942 * This optimized implementation currently limits N to the range from 4 to
943 * 2^31, but other implementations might not.
944 */
945static int yescrypt_kdf32_body(
946 yescrypt_ctx_t *yctx,
947 const uint8_t *passwd, size_t passwdlen,
948 uint32_t flags, uint64_t N, uint32_t t,
949 uint8_t *buf32)
950{
951#if !DISABLE_NROM_CODE
952 const salsa20_blk_t *VROM;
953#endif
954 size_t B_size, V_size, XY_size, need;
955 uint8_t *B, *S;
956 salsa20_blk_t *V, *XY;
957 struct {
958 uint8_t sha256[32];
959 uint8_t dk[32];
960 } u;
961#define sha256 u.sha256
962#define dk u.dk
963 uint8_t *dkp = buf32;
964 uint32_t r, p;
965
966 /* Sanity-check parameters */
967 switch (flags___YESCRYPT_MODE_MASK) {
968 case 0: /* classic scrypt - can't have anything non-standard */
969 if (flags || t || YCTX_param_NROM)
970 goto out_EINVAL;
971 break;
972 case YESCRYPT_WORM:
973 if (flags != YESCRYPT_WORM || YCTX_param_NROM)
974 goto out_EINVAL;
975 break;
976 case YESCRYPT_RW:
977 if (flags != (flags & YESCRYPT_KNOWN_FLAGS))
978 goto out_EINVAL;
979#if PWXsimple == 2 && PWXgather == 4 && Sbytes == 12288
980 if ((flags & YESCRYPT_RW_FLAVOR_MASK) ==
981 (YESCRYPT_ROUNDS_6 | YESCRYPT_GATHER_4 |
982 YESCRYPT_SIMPLE_2 | YESCRYPT_SBOX_12K))
983 break;
984#else
985#error "Unsupported pwxform settings"
986#endif
987 /* FALLTHRU */
988 default:
989 goto out_EINVAL;
990 }
991
992 r = YCTX_param_r;
993 p = YCTX_param_p;
994 if ((uint64_t)r * (uint64_t)p >= 1 << 30) {
995 dbg("r * n >= 2^30");
996 goto out_EINVAL;
997 }
998 if (N > UINT32_MAX) {
999 dbg("N > 0x%lx", (long)UINT32_MAX);
1000 goto out_EINVAL;
1001 }
1002 if (N <= 3
1003 || r < 1
1004 || p < 1
1005 ) {
1006 dbg("bad N, r or p");
1007 goto out_EINVAL;
1008 }
1009 if (r > SIZE_MAX / 256 / p
1010 || N > SIZE_MAX / 128 / r
1011 ) {
1012 /* 32-bit testcase: mkpasswd qweRTY123@-+ '$y$jHT$123'
1013 * (works on 64-bit, needs buffer > 4Gbytes)
1014 */
1015 dbg("r > SIZE_MAX / 256 / p? %c", "NY"[r > SIZE_MAX / 256 / p]);
1016 dbg("N > SIZE_MAX / 128 / r? %c", "NY"[N > SIZE_MAX / 128 / r]);
1017 goto out_EINVAL;
1018 }
1019 if (flags___YESCRYPT_RW) {
1020 /* p cannot be greater than SIZE_MAX/Salloc on 64-bit systems,
1021 but it can on 32-bit systems. */
1022#pragma GCC diagnostic push
1023#pragma GCC diagnostic ignored "-Wtype-limits"
1024 if (N / p <= 3 || p > SIZE_MAX / Salloc) {
1025 dbg("bad p:%ld", (long)p);
1026 goto out_EINVAL;
1027 }
1028#pragma GCC diagnostic pop
1029 }
1030
1031#if !DISABLE_NROM_CODE
1032 VROM = NULL;
1033 if (YCTX_param_NROM)
1034 goto out_EINVAL;
1035#endif
1036
1037 /* Allocate memory */
1038 V = NULL;
1039 V_size = (size_t)128 * r * N;
1040 need = V_size;
1041 B_size = (size_t)128 * r * p;
1042 need += B_size;
1043 if (need < B_size) {
1044 dbg("integer overflow at += B_size(%lu)", (long)B_size);
1045 goto out_EINVAL;
1046 }
1047 XY_size = (size_t)256 * r;
1048 need += XY_size;
1049 if (need < XY_size) {
1050 dbg("integer overflow at += XY_size(%lu)", (long)XY_size);
1051 goto out_EINVAL;
1052 }
1053 if (flags___YESCRYPT_RW) {
1054 size_t S_size = (size_t)Salloc * p;
1055 need += S_size;
1056 if (need < S_size) {
1057 dbg("integer overflow at += S_size(%lu)", (long)S_size);
1058 goto out_EINVAL;
1059 }
1060 }
1061 if (yctx->local->aligned_size < need) {
1062 free_region(yctx->local);
1063 alloc_region(yctx->local, need);
1064 dbg("allocated local:%lu 0x%lx", (long)need, (long)need);
1065 /* standard "j9T" params allocate 16Mbytes here */
1066 }
1067 if (flags & YESCRYPT_ALLOC_ONLY)
1068 return -3; /* expected "failure" */
1069 B = (uint8_t *)yctx->local->aligned;
1070 V = (salsa20_blk_t *)((uint8_t *)B + B_size);
1071 XY = (salsa20_blk_t *)((uint8_t *)V + V_size);
1072 S = NULL;
1073 if (flags___YESCRYPT_RW)
1074 S = (uint8_t *)XY + XY_size;
1075
1076 if (flags) {
1077 hmac_block(
1078 /* key,len: */ (const void*)"yescrypt-prehash", (flags & YESCRYPT_PREHASH) ? 16 : 8,
1079 /* hash fn: */ sha256_begin,
1080 /* in,len: */ passwd, passwdlen,
1081 /* outbuf: */ sha256
1082 );
1083 passwd = sha256;
1084 passwdlen = sizeof(sha256);
1085 }
1086
1087 PBKDF2_SHA256(passwd, passwdlen, yctx->salt, yctx->saltlen, 1, B, B_size);
1088
1089 if (flags)
1090 memcpy(sha256, B, sizeof(sha256));
1091
1092 if (p == 1 || (flags___YESCRYPT_RW)) {
1093 smix(B, r, N, p, t, flags, V, YCTX_param_NROM, VROM, XY, S, sha256);
1094 } else {
1095 uint32_t i;
1096 for (i = 0; i < p; i++) {
1097 smix(&B[(size_t)128 * r * i], r, N, 1, t, flags, V,
1098 YCTX_param_NROM, VROM, XY, NULL, NULL);
1099 }
1100 }
1101
1102 dkp = buf32;
1103 if (flags && /*buflen:*/32 < sizeof(dk)) {
1104 PBKDF2_SHA256(passwd, passwdlen, B, B_size, 1, dk, sizeof(dk));
1105 dkp = dk;
1106 }
1107
1108 PBKDF2_SHA256(passwd, passwdlen, B, B_size, 1, buf32, /*buflen:*/32);
1109
1110 /*
1111 * Except when computing classic scrypt, allow all computation so far
1112 * to be performed on the client. The final steps below match those of
1113 * SCRAM (RFC 5802), so that an extension of SCRAM (with the steps so
1114 * far in place of SCRAM's use of PBKDF2 and with SHA-256 in place of
1115 * SCRAM's use of SHA-1) would be usable with yescrypt hashes.
1116 */
1117 if (flags && !(flags & YESCRYPT_PREHASH)) {
1118 /* Compute ClientKey */
1119 hmac_block(
1120 /* key,len: */ dkp, sizeof(dk),
1121 /* hash fn: */ sha256_begin,
1122 /* in,len: */ "Client Key", 10,
1123 /* outbuf: */ sha256
1124 );
1125 /* Compute StoredKey */
1126 {
1127 size_t clen = /*buflen:*/32;
1128 if (clen > sizeof(dk))
1129 clen = sizeof(dk);
1130 if (sizeof(dk) != 32) { /* not true, optimize it out */
1131 sha256_block(sha256, sizeof(sha256), dk);
1132 memcpy(buf32, dk, clen);
1133 } else {
1134 sha256_block(sha256, sizeof(sha256), buf32);
1135 }
1136 }
1137 }
1138
1139 explicit_bzero(&u, sizeof(u));
1140
1141 /* Success! */
1142 return 0;
1143
1144 out_EINVAL:
1145 //bbox does not need this: errno = EINVAL;
1146 return -1;
1147#undef sha256
1148#undef dk
1149}
1150
1151/**
1152 * yescrypt_kdf(shared, local, passwd, passwdlen, salt, saltlen, params,
1153 * buf, buflen):
1154 * Compute scrypt or its revision as requested by the parameters. The inputs
1155 * to this function are the same as those for yescrypt_kdf_body() above, with
1156 * the addition of g, which controls hash upgrades (0 for no upgrades so far).
1157 */
1158static
1159int yescrypt_kdf32(
1160 yescrypt_ctx_t *yctx,
1161 const uint8_t *passwd, size_t passwdlen,
1162 uint8_t *buf32)
1163{
1164 uint32_t flags = YCTX_param_flags;
1165 uint64_t N = YCTX_param_N;
1166 uint32_t r = YCTX_param_r;
1167 uint32_t p = YCTX_param_p;
1168 uint32_t t = YCTX_param_t;
1169 uint32_t g = YCTX_param_g;
1170 uint8_t dk32[32];
1171 int retval;
1172
1173 /* Support for hash upgrades has been temporarily removed */
1174 if (g) {
1175 //bbox does not need this: errno = EINVAL;
1176 return -1;
1177 }
1178
1179 if ((flags___YESCRYPT_RW)
1180 && p >= 1
1181 && N / p >= 0x100
1182 && N / p * r >= 0x20000
1183 ) {
1184 if (yescrypt_kdf32_body(yctx,
1185 passwd, passwdlen,
1186 flags | YESCRYPT_ALLOC_ONLY, N, t,
1187 buf32) != -3
1188 ) {
1189 dbg("yescrypt_kdf32_body: not -3");
1190 return -1;
1191 }
1192 retval = yescrypt_kdf32_body(yctx,
1193 passwd, passwdlen,
1194 flags | YESCRYPT_PREHASH, N >> 6, 0,
1195 dk32);
1196 if (retval) {
1197 dbg("yescrypt_kdf32_body(PREHASH):%d", retval);
1198 return retval;
1199 }
1200 passwd = dk32;
1201 passwdlen = sizeof(dk32);
1202 }
1203
1204 retval = yescrypt_kdf32_body(yctx,
1205 passwd, passwdlen,
1206 flags, N, t, buf32);
1207
1208 explicit_bzero(dk32, sizeof(dk32));
1209
1210 dbg("yescrypt_kdf32_body:%d", retval);
1211 return retval;
1212}