aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDenys Vlasenko <vda.linux@googlemail.com>2025-07-07 22:34:31 +0200
committerDenys Vlasenko <vda.linux@googlemail.com>2025-07-07 22:34:31 +0200
commitf8e9bd30d73f2acf6818da71a2ba44748151b716 (patch)
treecc0855615b0e876253445c900e2d96fe04bf3d50
parente5d3a87633eac2a8a17d909b98a1e6dd21f80489 (diff)
downloadbusybox-w32-f8e9bd30d73f2acf6818da71a2ba44748151b716.tar.gz
busybox-w32-f8e9bd30d73f2acf6818da71a2ba44748151b716.tar.bz2
busybox-w32-f8e9bd30d73f2acf6818da71a2ba44748151b716.zip
libbb/yescrypt: disable unrolling in two places
Also, make many define macros safer function old new delta blockmix 2300 814 -1486 blockmix_xor 4606 1543 -3063 blockmix_xor_save 4737 1620 -3117 ------------------------------------------------------------------------------ (add/remove: 0/0 grow/shrink: 0/3 up/down: 0/-7666) Total: -7666 bytes Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
-rw-r--r--libbb/yescrypt/alg-yescrypt-kdf.c255
1 files changed, 159 insertions, 96 deletions
diff --git a/libbb/yescrypt/alg-yescrypt-kdf.c b/libbb/yescrypt/alg-yescrypt-kdf.c
index d24b05150..ab095eae1 100644
--- a/libbb/yescrypt/alg-yescrypt-kdf.c
+++ b/libbb/yescrypt/alg-yescrypt-kdf.c
@@ -42,6 +42,15 @@
42#define unlikely(exp) (exp) 42#define unlikely(exp) (exp)
43#endif 43#endif
44 44
45// Not a size win if 0
46#define UNROLL_COPY 1
47
48// -5324 bytes if 0:
49#define UNROLL_PWXFORM_ROUND 0
50// -4864 bytes if 0:
51#define UNROLL_PWXFORM 0
52// both 0: -7666 bytes
53
45typedef union { 54typedef union {
46 uint32_t w[16]; 55 uint32_t w[16];
47 uint64_t d[8]; 56 uint64_t d[8];
@@ -52,15 +61,17 @@ static void salsa20_simd_shuffle(
52 salsa20_blk_t *Bout) 61 salsa20_blk_t *Bout)
53{ 62{
54#define COMBINE(out, in1, in2) \ 63#define COMBINE(out, in1, in2) \
55 Bout->d[out] = Bin->w[in1 * 2] | ((uint64_t)Bin->w[in2 * 2 + 1] << 32); 64do { \
56 COMBINE(0, 0, 2) 65 Bout->d[out] = Bin->w[in1 * 2] | ((uint64_t)Bin->w[in2 * 2 + 1] << 32); \
57 COMBINE(1, 5, 7) 66} while (0)
58 COMBINE(2, 2, 4) 67 COMBINE(0, 0, 2);
59 COMBINE(3, 7, 1) 68 COMBINE(1, 5, 7);
60 COMBINE(4, 4, 6) 69 COMBINE(2, 2, 4);
61 COMBINE(5, 1, 3) 70 COMBINE(3, 7, 1);
62 COMBINE(6, 6, 0) 71 COMBINE(4, 4, 6);
63 COMBINE(7, 3, 5) 72 COMBINE(5, 1, 3);
73 COMBINE(6, 6, 0);
74 COMBINE(7, 3, 5);
64#undef COMBINE 75#undef COMBINE
65} 76}
66 77
@@ -69,25 +80,29 @@ static void salsa20_simd_unshuffle(
69 salsa20_blk_t *Bout) 80 salsa20_blk_t *Bout)
70{ 81{
71#define UNCOMBINE(out, in1, in2) \ 82#define UNCOMBINE(out, in1, in2) \
83do { \
72 Bout->w[out * 2] = Bin->d[in1]; \ 84 Bout->w[out * 2] = Bin->d[in1]; \
73 Bout->w[out * 2 + 1] = Bin->d[in2] >> 32; 85 Bout->w[out * 2 + 1] = Bin->d[in2] >> 32; \
74 UNCOMBINE(0, 0, 6) 86} while (0)
75 UNCOMBINE(1, 5, 3) 87 UNCOMBINE(0, 0, 6);
76 UNCOMBINE(2, 2, 0) 88 UNCOMBINE(1, 5, 3);
77 UNCOMBINE(3, 7, 5) 89 UNCOMBINE(2, 2, 0);
78 UNCOMBINE(4, 4, 2) 90 UNCOMBINE(3, 7, 5);
79 UNCOMBINE(5, 1, 7) 91 UNCOMBINE(4, 4, 2);
80 UNCOMBINE(6, 6, 4) 92 UNCOMBINE(5, 1, 7);
81 UNCOMBINE(7, 3, 1) 93 UNCOMBINE(6, 6, 4);
94 UNCOMBINE(7, 3, 1);
82#undef UNCOMBINE 95#undef UNCOMBINE
83} 96}
84 97
85#define DECL_X \ 98#define DECL_X \
86 salsa20_blk_t X; 99 salsa20_blk_t X
87#define DECL_Y \ 100#define DECL_Y \
88 salsa20_blk_t Y; 101 salsa20_blk_t Y
89 102
103#if UNROLL_COPY
90#define COPY(out, in) \ 104#define COPY(out, in) \
105do { \
91 (out).d[0] = (in).d[0]; \ 106 (out).d[0] = (in).d[0]; \
92 (out).d[1] = (in).d[1]; \ 107 (out).d[1] = (in).d[1]; \
93 (out).d[2] = (in).d[2]; \ 108 (out).d[2] = (in).d[2]; \
@@ -95,9 +110,17 @@ static void salsa20_simd_unshuffle(
95 (out).d[4] = (in).d[4]; \ 110 (out).d[4] = (in).d[4]; \
96 (out).d[5] = (in).d[5]; \ 111 (out).d[5] = (in).d[5]; \
97 (out).d[6] = (in).d[6]; \ 112 (out).d[6] = (in).d[6]; \
98 (out).d[7] = (in).d[7]; 113 (out).d[7] = (in).d[7]; \
114} while (0)
115#else
116#define COPY(out, in) \
117do { \
118 for (int copyi=0; copyi<8; copyi++) \
119 (out).d[copyi] = (in).d[copyi]; \
120} while (0)
121#endif
99 122
100#define READ_X(in) COPY(X, in) 123#define READ_X(in) COPY(X, in)
101#define WRITE_X(out) COPY(out, X) 124#define WRITE_X(out) COPY(out, X)
102 125
103/** 126/**
@@ -154,7 +177,6 @@ static void salsa20(salsa20_blk_t *restrict B,
154 B->w[i + 3] = Bout->w[i + 3] += B->w[i + 3]; 177 B->w[i + 3] = Bout->w[i + 3] += B->w[i + 3];
155 } 178 }
156 } 179 }
157
158#if 0 180#if 0
159 /* Too expensive */ 181 /* Too expensive */
160 explicit_bzero(&X, sizeof(X)); 182 explicit_bzero(&X, sizeof(X));
@@ -165,9 +187,10 @@ static void salsa20(salsa20_blk_t *restrict B,
165 * Apply the Salsa20/2 core to the block provided in X. 187 * Apply the Salsa20/2 core to the block provided in X.
166 */ 188 */
167#define SALSA20_2(out) \ 189#define SALSA20_2(out) \
168 salsa20(&X, &out, 1); 190 salsa20(&X, &out, 1)
169 191
170#define XOR(out, in1, in2) \ 192#define XOR(out, in1, in2) \
193do { \
171 (out).d[0] = (in1).d[0] ^ (in2).d[0]; \ 194 (out).d[0] = (in1).d[0] ^ (in2).d[0]; \
172 (out).d[1] = (in1).d[1] ^ (in2).d[1]; \ 195 (out).d[1] = (in1).d[1] ^ (in2).d[1]; \
173 (out).d[2] = (in1).d[2] ^ (in2).d[2]; \ 196 (out).d[2] = (in1).d[2] ^ (in2).d[2]; \
@@ -175,23 +198,28 @@ static void salsa20(salsa20_blk_t *restrict B,
175 (out).d[4] = (in1).d[4] ^ (in2).d[4]; \ 198 (out).d[4] = (in1).d[4] ^ (in2).d[4]; \
176 (out).d[5] = (in1).d[5] ^ (in2).d[5]; \ 199 (out).d[5] = (in1).d[5] ^ (in2).d[5]; \
177 (out).d[6] = (in1).d[6] ^ (in2).d[6]; \ 200 (out).d[6] = (in1).d[6] ^ (in2).d[6]; \
178 (out).d[7] = (in1).d[7] ^ (in2).d[7]; 201 (out).d[7] = (in1).d[7] ^ (in2).d[7]; \
202} while (0)
179 203
180#define XOR_X(in) XOR(X, X, in) 204#define XOR_X(in) XOR(X, X, in)
181#define XOR_X_2(in1, in2) XOR(X, in1, in2) 205#define XOR_X_2(in1, in2) XOR(X, in1, in2)
182#define XOR_X_WRITE_XOR_Y_2(out, in) \ 206#define XOR_X_WRITE_XOR_Y_2(out, in) \
183 XOR(Y, out, in) \ 207do { \
184 COPY(out, Y) \ 208 XOR(Y, out, in); \
185 XOR(X, X, Y) 209 COPY(out, Y); \
210 XOR(X, X, Y); \
211} while (0)
186 212
187/** 213/**
188 * Apply the Salsa20/8 core to the block provided in X ^ in. 214 * Apply the Salsa20/8 core to the block provided in X ^ in.
189 */ 215 */
190#define SALSA20_8_XOR_MEM(in, out) \ 216#define SALSA20_8_XOR_MEM(in, out) \
217do { \
191 XOR_X(in); \ 218 XOR_X(in); \
192 salsa20(&X, &out, 4); 219 salsa20(&X, &out, 4); \
220} while (0)
193 221
194#define INTEGERIFY (uint32_t)X.d[0] 222#define INTEGERIFY ((uint32_t)X.d[0])
195 223
196/** 224/**
197 * blockmix_salsa8(Bin, Bout, r): 225 * blockmix_salsa8(Bin, Bout, r):
@@ -204,12 +232,12 @@ static void blockmix_salsa8(
204 size_t r) 232 size_t r)
205{ 233{
206 size_t i; 234 size_t i;
207 DECL_X 235 DECL_X;
208 236
209 READ_X(Bin[r * 2 - 1]) 237 READ_X(Bin[r * 2 - 1]);
210 for (i = 0; i < r; i++) { 238 for (i = 0; i < r; i++) {
211 SALSA20_8_XOR_MEM(Bin[i * 2], Bout[i]) 239 SALSA20_8_XOR_MEM(Bin[i * 2], Bout[i]);
212 SALSA20_8_XOR_MEM(Bin[i * 2 + 1], Bout[r + i]) 240 SALSA20_8_XOR_MEM(Bin[i * 2 + 1], Bout[r + i]);
213 } 241 }
214} 242}
215 243
@@ -220,14 +248,14 @@ static uint32_t blockmix_salsa8_xor(
220 size_t r) 248 size_t r)
221{ 249{
222 size_t i; 250 size_t i;
223 DECL_X 251 DECL_X;
224 252
225 XOR_X_2(Bin1[r * 2 - 1], Bin2[r * 2 - 1]) 253 XOR_X_2(Bin1[r * 2 - 1], Bin2[r * 2 - 1]);
226 for (i = 0; i < r; i++) { 254 for (i = 0; i < r; i++) {
227 XOR_X(Bin1[i * 2]) 255 XOR_X(Bin1[i * 2]);
228 SALSA20_8_XOR_MEM(Bin2[i * 2], Bout[i]) 256 SALSA20_8_XOR_MEM(Bin2[i * 2], Bout[i]);
229 XOR_X(Bin1[i * 2 + 1]) 257 XOR_X(Bin1[i * 2 + 1]);
230 SALSA20_8_XOR_MEM(Bin2[i * 2 + 1], Bout[r + i]) 258 SALSA20_8_XOR_MEM(Bin2[i * 2 + 1], Bout[r + i]);
231 } 259 }
232 260
233 return INTEGERIFY; 261 return INTEGERIFY;
@@ -242,27 +270,38 @@ static uint32_t blockmix_salsa8_xor(
242 270
243/* Derived values. Not tunable except via Swidth above. */ 271/* Derived values. Not tunable except via Swidth above. */
244#define PWXbytes (PWXgather * PWXsimple * 8) 272#define PWXbytes (PWXgather * PWXsimple * 8)
245#define Sbytes (3 * (1 << Swidth) * PWXsimple * 8) 273#define Sbytes (3 * (1 << Swidth) * PWXsimple * 8)
246#define Smask (((1 << Swidth) - 1) * PWXsimple * 8) 274#define Smask (((1 << Swidth) - 1) * PWXsimple * 8)
247#define Smask2 (((uint64_t)Smask << 32) | Smask) 275#define Smask2 (((uint64_t)Smask << 32) | Smask)
248 276
249#define DECL_SMASK2REG /* empty */ 277#define DECL_SMASK2REG do {} while (0)
250#define FORCE_REGALLOC_3 /* empty */ 278#define FORCE_REGALLOC_3 do {} while (0)
251#define MAYBE_MEMORY_BARRIER /* empty */ 279#define MAYBE_MEMORY_BARRIER do {} while (0)
252 280
253#define PWXFORM_SIMD(x0, x1) { \ 281#define PWXFORM_SIMD(x0, x1) \
282do { \
254 uint64_t x = x0 & Smask2; \ 283 uint64_t x = x0 & Smask2; \
255 uint64_t *p0 = (uint64_t *)(S0 + (uint32_t)x); \ 284 uint64_t *p0 = (uint64_t *)(S0 + (uint32_t)x); \
256 uint64_t *p1 = (uint64_t *)(S1 + (x >> 32)); \ 285 uint64_t *p1 = (uint64_t *)(S1 + (x >> 32)); \
257 x0 = ((x0 >> 32) * (uint32_t)x0 + p0[0]) ^ p1[0]; \ 286 x0 = ((x0 >> 32) * (uint32_t)x0 + p0[0]) ^ p1[0]; \
258 x1 = ((x1 >> 32) * (uint32_t)x1 + p0[1]) ^ p1[1]; \ 287 x1 = ((x1 >> 32) * (uint32_t)x1 + p0[1]) ^ p1[1]; \
259} 288} while (0)
260 289
290#if UNROLL_PWXFORM_ROUND
291#define PWXFORM_ROUND \
292do { \
293 PWXFORM_SIMD(X.d[0], X.d[1]); \
294 PWXFORM_SIMD(X.d[2], X.d[3]); \
295 PWXFORM_SIMD(X.d[4], X.d[5]); \
296 PWXFORM_SIMD(X.d[6], X.d[7]); \
297} while (0)
298#else
261#define PWXFORM_ROUND \ 299#define PWXFORM_ROUND \
262 PWXFORM_SIMD(X.d[0], X.d[1]) \ 300do { \
263 PWXFORM_SIMD(X.d[2], X.d[3]) \ 301 for (int pwxi=0; pwxi<8; pwxi+=2) \
264 PWXFORM_SIMD(X.d[4], X.d[5]) \ 302 PWXFORM_SIMD(X.d[pwxi], X.d[pwxi + 1]); \
265 PWXFORM_SIMD(X.d[6], X.d[7]) 303} while (0)
304#endif
266 305
267/* 306/*
268 * This offset helps address the 256-byte write block via the single-byte 307 * This offset helps address the 256-byte write block via the single-byte
@@ -275,19 +314,23 @@ static uint32_t blockmix_salsa8_xor(
275#define PWXFORM_WRITE_OFFSET 0x7c 314#define PWXFORM_WRITE_OFFSET 0x7c
276 315
277#define PWXFORM_WRITE \ 316#define PWXFORM_WRITE \
278 WRITE_X(*(salsa20_blk_t *)(Sw - PWXFORM_WRITE_OFFSET)) \ 317do { \
279 Sw += 64; 318 WRITE_X(*(salsa20_blk_t *)(Sw - PWXFORM_WRITE_OFFSET)); \
280 319 Sw += 64; \
281#define PWXFORM { \ 320} while (0)
321
322#if UNROLL_PWXFORM
323#define PWXFORM \
324do { \
282 uint8_t *Sw = S2 + w + PWXFORM_WRITE_OFFSET; \ 325 uint8_t *Sw = S2 + w + PWXFORM_WRITE_OFFSET; \
283 FORCE_REGALLOC_3 \ 326 FORCE_REGALLOC_3; \
284 MAYBE_MEMORY_BARRIER \ 327 MAYBE_MEMORY_BARRIER; \
285 PWXFORM_ROUND \ 328 PWXFORM_ROUND; \
286 PWXFORM_ROUND PWXFORM_WRITE \ 329 PWXFORM_ROUND; PWXFORM_WRITE; \
287 PWXFORM_ROUND PWXFORM_WRITE \ 330 PWXFORM_ROUND; PWXFORM_WRITE; \
288 PWXFORM_ROUND PWXFORM_WRITE \ 331 PWXFORM_ROUND; PWXFORM_WRITE; \
289 PWXFORM_ROUND PWXFORM_WRITE \ 332 PWXFORM_ROUND; PWXFORM_WRITE; \
290 PWXFORM_ROUND \ 333 PWXFORM_ROUND; \
291 w = (w + 64 * 4) & Smask2; \ 334 w = (w + 64 * 4) & Smask2; \
292 { \ 335 { \
293 uint8_t *Stmp = S2; \ 336 uint8_t *Stmp = S2; \
@@ -295,7 +338,27 @@ static uint32_t blockmix_salsa8_xor(
295 S1 = S0; \ 338 S1 = S0; \
296 S0 = Stmp; \ 339 S0 = Stmp; \
297 } \ 340 } \
298} 341} while (0)
342#else
343#define PWXFORM \
344do { \
345 uint8_t *Sw = S2 + w + PWXFORM_WRITE_OFFSET; \
346 FORCE_REGALLOC_3; \
347 MAYBE_MEMORY_BARRIER; \
348 PWXFORM_ROUND; \
349 for (int pwxj=0; pwxj<4; pwxj++) {\
350 PWXFORM_ROUND; PWXFORM_WRITE; \
351 } \
352 PWXFORM_ROUND; \
353 w = (w + 64 * 4) & Smask2; \
354 { \
355 uint8_t *Stmp = S2; \
356 S2 = S1; \
357 S1 = S0; \
358 S0 = Stmp; \
359 } \
360} while (0)
361#endif
299 362
300typedef struct { 363typedef struct {
301 uint8_t *S0, *S1, *S2; 364 uint8_t *S0, *S1, *S2;
@@ -318,29 +381,29 @@ static void blockmix(
318 uint8_t *S0 = ctx->S0, *S1 = ctx->S1, *S2 = ctx->S2; 381 uint8_t *S0 = ctx->S0, *S1 = ctx->S1, *S2 = ctx->S2;
319 size_t w = ctx->w; 382 size_t w = ctx->w;
320 size_t i; 383 size_t i;
321 DECL_X 384 DECL_X;
322 385
323 /* Convert count of 128-byte blocks to max index of 64-byte block */ 386 /* Convert count of 128-byte blocks to max index of 64-byte block */
324 r = r * 2 - 1; 387 r = r * 2 - 1;
325 388
326 READ_X(Bin[r]) 389 READ_X(Bin[r]);
327 390
328 DECL_SMASK2REG 391 DECL_SMASK2REG;
329 392
330 i = 0; 393 i = 0;
331 do { 394 do {
332 XOR_X(Bin[i]) 395 XOR_X(Bin[i]);
333 PWXFORM 396 PWXFORM;
334 if (unlikely(i >= r)) 397 if (unlikely(i >= r))
335 break; 398 break;
336 WRITE_X(Bout[i]) 399 WRITE_X(Bout[i]);
337 i++; 400 i++;
338 } while (1); 401 } while (1);
339 402
340 ctx->S0 = S0; ctx->S1 = S1; ctx->S2 = S2; 403 ctx->S0 = S0; ctx->S1 = S1; ctx->S2 = S2;
341 ctx->w = w; 404 ctx->w = w;
342 405
343 SALSA20_2(Bout[i]) 406 SALSA20_2(Bout[i]);
344} 407}
345 408
346static uint32_t blockmix_xor(const salsa20_blk_t *Bin1, 409static uint32_t blockmix_xor(const salsa20_blk_t *Bin1,
@@ -352,31 +415,31 @@ static uint32_t blockmix_xor(const salsa20_blk_t *Bin1,
352 uint8_t *S0 = ctx->S0, *S1 = ctx->S1, *S2 = ctx->S2; 415 uint8_t *S0 = ctx->S0, *S1 = ctx->S1, *S2 = ctx->S2;
353 size_t w = ctx->w; 416 size_t w = ctx->w;
354 size_t i; 417 size_t i;
355 DECL_X 418 DECL_X;
356 419
357 /* Convert count of 128-byte blocks to max index of 64-byte block */ 420 /* Convert count of 128-byte blocks to max index of 64-byte block */
358 r = r * 2 - 1; 421 r = r * 2 - 1;
359 422
360 XOR_X_2(Bin1[r], Bin2[r]) 423 XOR_X_2(Bin1[r], Bin2[r]);
361 424
362 DECL_SMASK2REG 425 DECL_SMASK2REG;
363 426
364 i = 0; 427 i = 0;
365 r--; 428 r--;
366 do { 429 do {
367 XOR_X(Bin1[i]) 430 XOR_X(Bin1[i]);
368 XOR_X(Bin2[i]) 431 XOR_X(Bin2[i]);
369 PWXFORM 432 PWXFORM;
370 WRITE_X(Bout[i]) 433 WRITE_X(Bout[i]);
371 434
372 XOR_X(Bin1[i + 1]) 435 XOR_X(Bin1[i + 1]);
373 XOR_X(Bin2[i + 1]) 436 XOR_X(Bin2[i + 1]);
374 PWXFORM 437 PWXFORM;
375 438
376 if (unlikely(i >= r)) 439 if (unlikely(i >= r))
377 break; 440 break;
378 441
379 WRITE_X(Bout[i + 1]) 442 WRITE_X(Bout[i + 1]);
380 443
381 i += 2; 444 i += 2;
382 } while (1); 445 } while (1);
@@ -385,7 +448,7 @@ static uint32_t blockmix_xor(const salsa20_blk_t *Bin1,
385 ctx->S0 = S0; ctx->S1 = S1; ctx->S2 = S2; 448 ctx->S0 = S0; ctx->S1 = S1; ctx->S2 = S2;
386 ctx->w = w; 449 ctx->w = w;
387 450
388 SALSA20_2(Bout[i]) 451 SALSA20_2(Bout[i]);
389 452
390 return INTEGERIFY; 453 return INTEGERIFY;
391} 454}
@@ -399,30 +462,30 @@ static uint32_t blockmix_xor_save(
399 uint8_t *S0 = ctx->S0, *S1 = ctx->S1, *S2 = ctx->S2; 462 uint8_t *S0 = ctx->S0, *S1 = ctx->S1, *S2 = ctx->S2;
400 size_t w = ctx->w; 463 size_t w = ctx->w;
401 size_t i; 464 size_t i;
402 DECL_X 465 DECL_X;
403 DECL_Y 466 DECL_Y;
404 467
405 /* Convert count of 128-byte blocks to max index of 64-byte block */ 468 /* Convert count of 128-byte blocks to max index of 64-byte block */
406 r = r * 2 - 1; 469 r = r * 2 - 1;
407 470
408 XOR_X_2(Bin1out[r], Bin2[r]) 471 XOR_X_2(Bin1out[r], Bin2[r]);
409 472
410 DECL_SMASK2REG 473 DECL_SMASK2REG;
411 474
412 i = 0; 475 i = 0;
413 r--; 476 r--;
414 do { 477 do {
415 XOR_X_WRITE_XOR_Y_2(Bin2[i], Bin1out[i]) 478 XOR_X_WRITE_XOR_Y_2(Bin2[i], Bin1out[i]);
416 PWXFORM 479 PWXFORM;
417 WRITE_X(Bin1out[i]) 480 WRITE_X(Bin1out[i]);
418 481
419 XOR_X_WRITE_XOR_Y_2(Bin2[i + 1], Bin1out[i + 1]) 482 XOR_X_WRITE_XOR_Y_2(Bin2[i + 1], Bin1out[i + 1]);
420 PWXFORM 483 PWXFORM;
421 484
422 if (unlikely(i >= r)) 485 if (unlikely(i >= r))
423 break; 486 break;
424 487
425 WRITE_X(Bin1out[i + 1]) 488 WRITE_X(Bin1out[i + 1]);
426 489
427 i += 2; 490 i += 2;
428 } while (1); 491 } while (1);
@@ -431,7 +494,7 @@ static uint32_t blockmix_xor_save(
431 ctx->S0 = S0; ctx->S1 = S1; ctx->S2 = S2; 494 ctx->S0 = S0; ctx->S1 = S1; ctx->S2 = S2;
432 ctx->w = w; 495 ctx->w = w;
433 496
434 SALSA20_2(Bin1out[i]) 497 SALSA20_2(Bin1out[i]);
435 498
436 return INTEGERIFY; 499 return INTEGERIFY;
437} 500}