diff options
Diffstat (limited to '')
-rw-r--r-- | libbb/Config.src | 14 | ||||
-rw-r--r-- | libbb/bitops.c | 128 | ||||
-rw-r--r-- | libbb/const_hack.c | 29 | ||||
-rw-r--r-- | libbb/dump.c | 10 | ||||
-rw-r--r-- | libbb/hash_hmac.c | 154 | ||||
-rw-r--r-- | libbb/hash_md5_sha.c | 78 | ||||
-rw-r--r-- | libbb/hash_sha256_block.c | 19 | ||||
-rw-r--r-- | libbb/hash_sha256_hwaccel_x86-32.S | 218 | ||||
-rw-r--r-- | libbb/hash_sha256_hwaccel_x86-64.S | 218 | ||||
-rw-r--r-- | libbb/lineedit.c | 112 | ||||
-rw-r--r-- | libbb/poll_with_signals.c | 48 | ||||
-rw-r--r-- | libbb/pw_ascii64.c | 91 | ||||
-rw-r--r-- | libbb/pw_encrypt.c | 113 | ||||
-rw-r--r-- | libbb/pw_encrypt_des.c | 88 | ||||
-rw-r--r-- | libbb/pw_encrypt_md5.c | 4 | ||||
-rw-r--r-- | libbb/pw_encrypt_sha.c | 5 | ||||
-rw-r--r-- | libbb/pw_encrypt_yes.c | 24 | ||||
-rw-r--r-- | libbb/read_key.c | 25 | ||||
-rw-r--r-- | libbb/u_signal_names.c | 4 | ||||
-rw-r--r-- | libbb/xfuncs.c | 2 | ||||
-rw-r--r-- | libbb/yescrypt/Kbuild.src | 9 | ||||
-rw-r--r-- | libbb/yescrypt/PARAMETERS | 196 | ||||
-rw-r--r-- | libbb/yescrypt/README | 4 | ||||
-rw-r--r-- | libbb/yescrypt/alg-sha256.c | 86 | ||||
-rw-r--r-- | libbb/yescrypt/alg-yescrypt-common.c | 408 | ||||
-rw-r--r-- | libbb/yescrypt/alg-yescrypt-kdf.c | 1212 | ||||
-rw-r--r-- | libbb/yescrypt/alg-yescrypt.h | 247 | ||||
-rw-r--r-- | libbb/yescrypt/y.c | 16 |
28 files changed, 3152 insertions, 410 deletions
diff --git a/libbb/Config.src b/libbb/Config.src index 61b4601d6..eff327c2a 100644 --- a/libbb/Config.src +++ b/libbb/Config.src | |||
@@ -37,6 +37,14 @@ config PASSWORD_MINLEN | |||
37 | help | 37 | help |
38 | Minimum allowable password length. | 38 | Minimum allowable password length. |
39 | 39 | ||
40 | config FEATURE_USE_CNG_API | ||
41 | bool "Use the Windows CNG API for checksums (Windows 10+ only)" | ||
42 | default n | ||
43 | depends on PLATFORM_MINGW32 | ||
44 | help | ||
45 | Use the in-built Windows CNG API for checksums. | ||
46 | This reduces code size, but is only supported on Windows 10+. | ||
47 | |||
40 | config MD5_SMALL | 48 | config MD5_SMALL |
41 | int "MD5: Trade bytes for speed (0:fast, 3:slow)" | 49 | int "MD5: Trade bytes for speed (0:fast, 3:slow)" |
42 | default 1 # all "fast or small" options default to small | 50 | default 1 # all "fast or small" options default to small |
@@ -67,6 +75,7 @@ config SHA1_SMALL | |||
67 | config SHA1_HWACCEL | 75 | config SHA1_HWACCEL |
68 | bool "SHA1: Use hardware accelerated instructions if possible" | 76 | bool "SHA1: Use hardware accelerated instructions if possible" |
69 | default y | 77 | default y |
78 | depends on !FEATURE_USE_CNG_API | ||
70 | help | 79 | help |
71 | On x86, this adds ~590 bytes of code. Throughput | 80 | On x86, this adds ~590 bytes of code. Throughput |
72 | is about twice as fast as fully-unrolled generic code. | 81 | is about twice as fast as fully-unrolled generic code. |
@@ -74,6 +83,7 @@ config SHA1_HWACCEL | |||
74 | config SHA256_HWACCEL | 83 | config SHA256_HWACCEL |
75 | bool "SHA256: Use hardware accelerated instructions if possible" | 84 | bool "SHA256: Use hardware accelerated instructions if possible" |
76 | default y | 85 | default y |
86 | depends on !FEATURE_USE_CNG_API | ||
77 | help | 87 | help |
78 | On x86, this adds ~1k bytes of code. | 88 | On x86, this adds ~1k bytes of code. |
79 | 89 | ||
@@ -182,8 +192,8 @@ config FEATURE_EDITING_VI | |||
182 | config FEATURE_EDITING_HISTORY | 192 | config FEATURE_EDITING_HISTORY |
183 | int "History size" | 193 | int "History size" |
184 | # Don't allow way too big values here, code uses fixed "char *history[N]" struct member | 194 | # Don't allow way too big values here, code uses fixed "char *history[N]" struct member |
185 | range 0 9999 | 195 | range 0 2000 |
186 | default 255 | 196 | default 200 |
187 | depends on FEATURE_EDITING | 197 | depends on FEATURE_EDITING |
188 | help | 198 | help |
189 | Specify command history size (0 - disable). | 199 | Specify command history size (0 - disable). |
diff --git a/libbb/bitops.c b/libbb/bitops.c new file mode 100644 index 000000000..467e1a2d9 --- /dev/null +++ b/libbb/bitops.c | |||
@@ -0,0 +1,128 @@ | |||
1 | /* | ||
2 | * Utility routines. | ||
3 | * | ||
4 | * Copyright (C) 2025 by Denys Vlasenko <vda.linux@googlemail.com> | ||
5 | * | ||
6 | * Licensed under GPLv2, see file LICENSE in this source tree. | ||
7 | */ | ||
8 | //kbuild:lib-y += bitops.o | ||
9 | |||
10 | #include "libbb.h" | ||
11 | |||
12 | void FAST_FUNC xorbuf_3(void *dst, const void *src1, const void *src2, unsigned count) | ||
13 | { | ||
14 | uint8_t *d = dst; | ||
15 | const uint8_t *s1 = src1; | ||
16 | const uint8_t *s2 = src2; | ||
17 | #if BB_UNALIGNED_MEMACCESS_OK | ||
18 | while (count >= sizeof(long)) { | ||
19 | *(long*)d = *(long*)s1 ^ *(long*)s2; | ||
20 | count -= sizeof(long); | ||
21 | d += sizeof(long); | ||
22 | s1 += sizeof(long); | ||
23 | s2 += sizeof(long); | ||
24 | } | ||
25 | #endif | ||
26 | while (count--) | ||
27 | *d++ = *s1++ ^ *s2++; | ||
28 | } | ||
29 | |||
30 | void FAST_FUNC xorbuf(void *dst, const void *src, unsigned count) | ||
31 | { | ||
32 | xorbuf_3(dst, dst, src, count); | ||
33 | } | ||
34 | |||
35 | void FAST_FUNC xorbuf16_aligned_long(void *dst, const void *src) | ||
36 | { | ||
37 | #if defined(__SSE__) /* any x86_64 has it */ | ||
38 | asm volatile( | ||
39 | "\n movups (%0),%%xmm0" | ||
40 | "\n movups (%1),%%xmm1" // can't just xorps(%1),%%xmm0: | ||
41 | "\n xorps %%xmm1,%%xmm0" // SSE requires 16-byte alignment | ||
42 | "\n movups %%xmm0,(%0)" | ||
43 | "\n" | ||
44 | : "=r" (dst), "=r" (src) | ||
45 | : "0" (dst), "1" (src) | ||
46 | : "xmm0", "xmm1", "memory" | ||
47 | ); | ||
48 | #else | ||
49 | unsigned long *d = dst; | ||
50 | const unsigned long *s = src; | ||
51 | d[0] ^= s[0]; | ||
52 | # if LONG_MAX <= 0x7fffffffffffffff | ||
53 | d[1] ^= s[1]; | ||
54 | # if LONG_MAX == 0x7fffffff | ||
55 | d[2] ^= s[2]; | ||
56 | d[3] ^= s[3]; | ||
57 | # endif | ||
58 | # endif | ||
59 | #endif | ||
60 | } | ||
61 | // The above can be inlined in libbb.h, in a way where compiler | ||
62 | // is even free to use better addressing modes than (%reg), and | ||
63 | // to keep the result in a register | ||
64 | // (to not store it to memory after each XOR): | ||
65 | //#if defined(__SSE__) | ||
66 | //#include <xmmintrin.h> | ||
67 | //^^^ or just: typedef float __m128_u attribute((__vector_size__(16),__may_alias__,__aligned__(1))); | ||
68 | //static ALWAYS_INLINE void xorbuf16_aligned_long(void *dst, const void *src) | ||
69 | //{ | ||
70 | // __m128_u xmm0, xmm1; | ||
71 | // asm volatile( | ||
72 | //"\n xorps %1,%0" | ||
73 | // : "=x" (xmm0), "=x" (xmm1) | ||
74 | // : "0" (*(__m128_u*)dst), "1" (*(__m128_u*)src) | ||
75 | // ); | ||
76 | // *(__m128_u*)dst = xmm0; // this store may be optimized out! | ||
77 | //} | ||
78 | //#endif | ||
79 | // but I don't trust gcc optimizer enough to not generate some monstrosity. | ||
80 | // See GMULT() function in TLS code as an example. | ||
81 | |||
82 | void FAST_FUNC xorbuf64_3_aligned64(void *dst, const void *src1, const void *src2) | ||
83 | { | ||
84 | #if defined(__SSE__) /* any x86_64 has it */ | ||
85 | asm volatile( | ||
86 | "\n movups 0*16(%1),%%xmm0" | ||
87 | "\n movups 0*16(%2),%%xmm1" // can't just xorps(%2),%%xmm0: | ||
88 | "\n xorps %%xmm1,%%xmm0" // SSE requires 16-byte alignment, we have only 8-byte | ||
89 | "\n movups %%xmm0,0*16(%0)" | ||
90 | "\n movups 1*16(%1),%%xmm0" | ||
91 | "\n movups 1*16(%2),%%xmm1" | ||
92 | "\n xorps %%xmm1,%%xmm0" | ||
93 | "\n movups %%xmm0,1*16(%0)" | ||
94 | "\n movups 2*16(%1),%%xmm0" | ||
95 | "\n movups 2*16(%2),%%xmm1" | ||
96 | "\n xorps %%xmm1,%%xmm0" | ||
97 | "\n movups %%xmm0,2*16(%0)" | ||
98 | "\n movups 3*16(%1),%%xmm0" | ||
99 | "\n movups 3*16(%2),%%xmm1" | ||
100 | "\n xorps %%xmm1,%%xmm0" | ||
101 | "\n movups %%xmm0,3*16(%0)" | ||
102 | "\n" | ||
103 | : "=r" (dst), "=r" (src1), "=r" (src2) | ||
104 | : "0" (dst), "1" (src1), "2" (src2) | ||
105 | : "xmm0", "xmm1", "memory" | ||
106 | ); | ||
107 | #else | ||
108 | long *d = dst; | ||
109 | const long *s1 = src1; | ||
110 | const long *s2 = src2; | ||
111 | unsigned count = 64 / sizeof(long); | ||
112 | do { | ||
113 | *d++ = *s1++ ^ *s2++; | ||
114 | } while (--count != 0); | ||
115 | #endif | ||
116 | } | ||
117 | |||
118 | #if !BB_UNALIGNED_MEMACCESS_OK | ||
119 | void FAST_FUNC xorbuf16(void *dst, const void *src) | ||
120 | { | ||
121 | #define p_aligned(a) (((uintptr_t)(a) & (sizeof(long)-1)) == 0) | ||
122 | if (p_aligned(src) && p_aligned(dst)) { | ||
123 | xorbuf16_aligned_long(dst, src); | ||
124 | return; | ||
125 | } | ||
126 | xorbuf_3(dst, dst, src, 16); | ||
127 | } | ||
128 | #endif | ||
diff --git a/libbb/const_hack.c b/libbb/const_hack.c index 75163fede..1d175481b 100644 --- a/libbb/const_hack.c +++ b/libbb/const_hack.c | |||
@@ -9,18 +9,27 @@ | |||
9 | #include "libbb.h" | 9 | #include "libbb.h" |
10 | 10 | ||
11 | #if defined(__clang_major__) && __clang_major__ >= 9 | 11 | #if defined(__clang_major__) && __clang_major__ >= 9 |
12 | void FAST_FUNC XZALLOC_CONST_PTR(const void *pptr, size_t size) | 12 | /* Clang/llvm drops assignment to "constant" storage. Silently. |
13 | * Needs serious convincing to not eliminate the store. | ||
14 | */ | ||
15 | static ALWAYS_INLINE void* not_const_pp(const void *p) | ||
13 | { | 16 | { |
14 | ASSIGN_CONST_PTR(pptr, xzalloc(size)); | 17 | void *pp; |
18 | asm volatile ( | ||
19 | "# forget that p points to const" | ||
20 | : /*outputs*/ "=r" (pp) | ||
21 | : /*inputs*/ "0" (p) | ||
22 | ); | ||
23 | return pp; | ||
15 | } | 24 | } |
16 | 25 | void FAST_FUNC ASSIGN_CONST_PTR(const void *pptr, void *v) | |
17 | # if ENABLE_PLATFORM_MINGW32 | 26 | { |
18 | void FAST_FUNC ASSIGN_CONST_PTR(const void *pptr, const void *v) | 27 | *(void**)not_const_pp(pptr) = v; |
28 | barrier(); | ||
29 | } | ||
30 | void FAST_FUNC XZALLOC_CONST_PTR(const void *pptr, size_t size) | ||
19 | { | 31 | { |
20 | do { | 32 | *(void**)not_const_pp(pptr) = xzalloc(size); |
21 | *(void**)not_const_pp(pptr) = (void*)(v); | 33 | barrier(); |
22 | barrier(); | ||
23 | } while (0); | ||
24 | } | 34 | } |
25 | # endif | ||
26 | #endif | 35 | #endif |
diff --git a/libbb/dump.c b/libbb/dump.c index aa57eca8c..b2abe85af 100644 --- a/libbb/dump.c +++ b/libbb/dump.c | |||
@@ -703,15 +703,21 @@ static NOINLINE void display(priv_dumper_t* dumper) | |||
703 | conv_u(pr, bp); | 703 | conv_u(pr, bp); |
704 | break; | 704 | break; |
705 | case F_UINT: { | 705 | case F_UINT: { |
706 | union { | ||
707 | uint16_t uval16; | ||
708 | uint32_t uval32; | ||
709 | } u; | ||
706 | unsigned value = (unsigned char)*bp; | 710 | unsigned value = (unsigned char)*bp; |
707 | switch (pr->bcnt) { | 711 | switch (pr->bcnt) { |
708 | case 1: | 712 | case 1: |
709 | break; | 713 | break; |
710 | case 2: | 714 | case 2: |
711 | move_from_unaligned16(value, bp); | 715 | move_from_unaligned16(u.uval16, bp); |
716 | value = u.uval16; | ||
712 | break; | 717 | break; |
713 | case 4: | 718 | case 4: |
714 | move_from_unaligned32(value, bp); | 719 | move_from_unaligned32(u.uval32, bp); |
720 | value = u.uval32; | ||
715 | break; | 721 | break; |
716 | /* case 8: no users yet */ | 722 | /* case 8: no users yet */ |
717 | } | 723 | } |
diff --git a/libbb/hash_hmac.c b/libbb/hash_hmac.c new file mode 100644 index 000000000..b3138029f --- /dev/null +++ b/libbb/hash_hmac.c | |||
@@ -0,0 +1,154 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2025 Denys Vlasenko | ||
3 | * | ||
4 | * Licensed under GPLv2, see file LICENSE in this source tree. | ||
5 | */ | ||
6 | //kbuild:lib-$(CONFIG_TLS) += hash_hmac.o | ||
7 | //kbuild:lib-$(CONFIG_USE_BB_CRYPT_YES) += hash_hmac.o | ||
8 | |||
9 | #include "libbb.h" | ||
10 | |||
11 | // RFC 2104: | ||
12 | // HMAC(key, text) based on a hash H (say, sha256) is: | ||
13 | // ipad = [0x36 x INSIZE] | ||
14 | // opad = [0x5c x INSIZE] | ||
15 | // HMAC(key, text) = H((key XOR opad) + H((key XOR ipad) + text)) | ||
16 | // | ||
17 | // H(key XOR opad) and H(key XOR ipad) can be precomputed | ||
18 | // if we often need HMAC hmac with the same key. | ||
19 | // | ||
20 | // text is often given in disjoint pieces. | ||
21 | #if !ENABLE_FEATURE_USE_CNG_API | ||
22 | void FAST_FUNC hmac_begin(hmac_ctx_t *ctx, const uint8_t *key, unsigned key_size, md5sha_begin_func *begin) | ||
23 | { | ||
24 | #if HMAC_ONLY_SHA256 | ||
25 | #define begin sha256_begin | ||
26 | #endif | ||
27 | uint8_t key_xor_ipad[SHA2_INSIZE]; | ||
28 | uint8_t key_xor_opad[SHA2_INSIZE]; | ||
29 | unsigned i; | ||
30 | |||
31 | // "The authentication key can be of any length up to INSIZE, the | ||
32 | // block length of the hash function. Applications that use keys longer | ||
33 | // than INSIZE bytes will first hash the key using H and then use the | ||
34 | // resultant OUTSIZE byte string as the actual key to HMAC." | ||
35 | if (key_size > SHA2_INSIZE) { | ||
36 | uint8_t tempkey[SHA1_OUTSIZE < SHA256_OUTSIZE ? SHA256_OUTSIZE : SHA1_OUTSIZE]; | ||
37 | /* use ctx->hashed_key_xor_ipad as scratch ctx */ | ||
38 | begin(&ctx->hashed_key_xor_ipad); | ||
39 | md5sha_hash(&ctx->hashed_key_xor_ipad, key, key_size); | ||
40 | key_size = sha_end(&ctx->hashed_key_xor_ipad, tempkey); | ||
41 | key = tempkey; | ||
42 | } | ||
43 | |||
44 | for (i = 0; i < key_size; i++) { | ||
45 | key_xor_ipad[i] = key[i] ^ 0x36; | ||
46 | key_xor_opad[i] = key[i] ^ 0x5c; | ||
47 | } | ||
48 | for (; i < SHA2_INSIZE; i++) { | ||
49 | key_xor_ipad[i] = 0x36; | ||
50 | key_xor_opad[i] = 0x5c; | ||
51 | } | ||
52 | |||
53 | begin(&ctx->hashed_key_xor_ipad); | ||
54 | begin(&ctx->hashed_key_xor_opad); | ||
55 | md5sha_hash(&ctx->hashed_key_xor_ipad, key_xor_ipad, SHA2_INSIZE); | ||
56 | md5sha_hash(&ctx->hashed_key_xor_opad, key_xor_opad, SHA2_INSIZE); | ||
57 | } | ||
58 | #undef begin | ||
59 | |||
60 | unsigned FAST_FUNC hmac_end(hmac_ctx_t *ctx, uint8_t *out) | ||
61 | { | ||
62 | unsigned len = sha_end(&ctx->hashed_key_xor_ipad, out); | ||
63 | /* out = H((key XOR opad) + out) */ | ||
64 | md5sha_hash(&ctx->hashed_key_xor_opad, out, len); | ||
65 | return sha_end(&ctx->hashed_key_xor_opad, out); | ||
66 | } | ||
67 | |||
68 | unsigned FAST_FUNC hmac_block(const uint8_t *key, unsigned key_size, md5sha_begin_func *begin, const void *in, unsigned sz, uint8_t *out) | ||
69 | { | ||
70 | hmac_ctx_t ctx; | ||
71 | hmac_begin(&ctx, key, key_size, begin); | ||
72 | hmac_hash(&ctx, in, sz); | ||
73 | return hmac_end(&ctx, out); | ||
74 | } | ||
75 | |||
76 | /* TLS helpers */ | ||
77 | |||
78 | void FAST_FUNC hmac_hash_v( | ||
79 | hmac_ctx_t *ctx, | ||
80 | va_list va) | ||
81 | { | ||
82 | uint8_t *in; | ||
83 | |||
84 | /* ctx->hashed_key_xor_ipad contains unclosed "H((key XOR ipad) +" state */ | ||
85 | /* ctx->hashed_key_xor_opad contains unclosed "H((key XOR opad) +" state */ | ||
86 | |||
87 | /* calculate out = H((key XOR ipad) + text) */ | ||
88 | while ((in = va_arg(va, uint8_t*)) != NULL) { | ||
89 | unsigned size = va_arg(va, unsigned); | ||
90 | md5sha_hash(&ctx->hashed_key_xor_ipad, in, size); | ||
91 | } | ||
92 | } | ||
93 | #else | ||
94 | void _hmac_begin(hmac_ctx_t *ctx, uint8_t *key, unsigned key_size, | ||
95 | BCRYPT_ALG_HANDLE alg_handle) { | ||
96 | DWORD hash_object_length = 0; | ||
97 | ULONG _unused; | ||
98 | NTSTATUS status; | ||
99 | |||
100 | status = BCryptGetProperty(alg_handle, BCRYPT_OBJECT_LENGTH, | ||
101 | (PUCHAR)&hash_object_length, sizeof(DWORD), &_unused, 0); | ||
102 | mingw_die_if_error(status, "BCryptGetProperty"); | ||
103 | status = BCryptGetProperty(alg_handle, BCRYPT_HASH_LENGTH, | ||
104 | (PUCHAR)&ctx->output_size, sizeof(DWORD), &_unused, 0); | ||
105 | mingw_die_if_error(status, "BCryptGetProperty"); | ||
106 | |||
107 | ctx->hash_obj = xmalloc(hash_object_length); | ||
108 | |||
109 | status = BCryptCreateHash(alg_handle, &ctx->handle, ctx->hash_obj, | ||
110 | hash_object_length, key, key_size, BCRYPT_HASH_REUSABLE_FLAG); | ||
111 | mingw_die_if_error(status, "BCryptCreateHash"); | ||
112 | } | ||
113 | |||
114 | unsigned FAST_FUNC hmac_end(hmac_ctx_t *ctx, uint8_t *out) | ||
115 | { | ||
116 | NTSTATUS status; | ||
117 | |||
118 | status = BCryptFinishHash(ctx->handle, out, ctx->output_size, 0); | ||
119 | mingw_die_if_error(status, "BCryptFinishHash"); | ||
120 | |||
121 | return ctx->output_size; | ||
122 | } | ||
123 | |||
124 | void FAST_FUNC hmac_hash_v(hmac_ctx_t *ctx, va_list va) | ||
125 | { | ||
126 | uint8_t *in; | ||
127 | |||
128 | while ((in = va_arg(va, uint8_t*)) != NULL) { | ||
129 | unsigned size = va_arg(va, unsigned); | ||
130 | BCryptHashData(ctx->handle, in, size, 0); | ||
131 | } | ||
132 | } | ||
133 | |||
134 | void hmac_uninit(hmac_ctx_t *ctx) { | ||
135 | BCryptDestroyHash(ctx->handle); | ||
136 | free(ctx->hash_obj); | ||
137 | } | ||
138 | #endif | ||
139 | |||
140 | /* Using HMAC state, make a copy of it (IOW: without affecting this state!) | ||
141 | * hash in the list of (ptr,size) blocks, and finish the HMAC to out[] buffer. | ||
142 | * This function is useful for TLS PRF. | ||
143 | */ | ||
144 | unsigned hmac_peek_hash(hmac_ctx_t *ctx, uint8_t *out, ...) | ||
145 | { | ||
146 | hmac_ctx_t tmpctx = *ctx; /* struct copy */ | ||
147 | va_list va; | ||
148 | |||
149 | va_start(va, out); | ||
150 | hmac_hash_v(&tmpctx, va); | ||
151 | va_end(va); | ||
152 | |||
153 | return hmac_end(&tmpctx, out); | ||
154 | } | ||
diff --git a/libbb/hash_md5_sha.c b/libbb/hash_md5_sha.c index 75a61c32c..22dd890bf 100644 --- a/libbb/hash_md5_sha.c +++ b/libbb/hash_md5_sha.c | |||
@@ -13,6 +13,82 @@ | |||
13 | 13 | ||
14 | #define NEED_SHA512 (ENABLE_SHA512SUM || ENABLE_USE_BB_CRYPT_SHA) | 14 | #define NEED_SHA512 (ENABLE_SHA512SUM || ENABLE_USE_BB_CRYPT_SHA) |
15 | 15 | ||
16 | #if ENABLE_FEATURE_USE_CNG_API | ||
17 | # include <windows.h> | ||
18 | # include <bcrypt.h> | ||
19 | |||
20 | // these work on Windows >= 10 | ||
21 | # define BCRYPT_MD5_ALG_HANDLE ((BCRYPT_ALG_HANDLE) 0x00000021) | ||
22 | # define BCRYPT_SHA1_ALG_HANDLE ((BCRYPT_ALG_HANDLE) 0x00000031) | ||
23 | # define BCRYPT_SHA256_ALG_HANDLE ((BCRYPT_ALG_HANDLE) 0x00000041) | ||
24 | # define BCRYPT_SHA512_ALG_HANDLE ((BCRYPT_ALG_HANDLE) 0x00000061) | ||
25 | |||
26 | /* Initialize structure containing state of computation. | ||
27 | * (RFC 1321, 3.3: Step 3) | ||
28 | */ | ||
29 | |||
30 | static void generic_init(struct bcrypt_hash_ctx_t *ctx, BCRYPT_ALG_HANDLE alg_handle) { | ||
31 | DWORD hash_object_length = 0; | ||
32 | ULONG _unused; | ||
33 | NTSTATUS status; | ||
34 | |||
35 | status = BCryptGetProperty(alg_handle, BCRYPT_OBJECT_LENGTH, (PUCHAR)&hash_object_length, sizeof(DWORD), &_unused, 0); | ||
36 | mingw_die_if_error(status, "BCryptGetProperty"); | ||
37 | status = BCryptGetProperty(alg_handle, BCRYPT_HASH_LENGTH, (PUCHAR)&ctx->output_size, sizeof(DWORD), &_unused, 0); | ||
38 | mingw_die_if_error(status, "BCryptGetProperty"); | ||
39 | |||
40 | |||
41 | ctx->hash_obj = xmalloc(hash_object_length); | ||
42 | |||
43 | status = BCryptCreateHash(alg_handle, &ctx->handle, ctx->hash_obj, hash_object_length, NULL, 0, 0); | ||
44 | mingw_die_if_error(status, "BCryptCreateHash"); | ||
45 | } | ||
46 | |||
47 | void FAST_FUNC md5_begin(md5_ctx_t *ctx) | ||
48 | { | ||
49 | generic_init(ctx, BCRYPT_MD5_ALG_HANDLE); | ||
50 | } | ||
51 | |||
52 | void FAST_FUNC sha1_begin(sha1_ctx_t *ctx) | ||
53 | { | ||
54 | generic_init(ctx, BCRYPT_SHA1_ALG_HANDLE); | ||
55 | } | ||
56 | |||
57 | /* Initialize structure containing state of computation. | ||
58 | (FIPS 180-2:5.3.2) */ | ||
59 | void FAST_FUNC sha256_begin(sha256_ctx_t *ctx) | ||
60 | { | ||
61 | generic_init(ctx, BCRYPT_SHA256_ALG_HANDLE); | ||
62 | } | ||
63 | |||
64 | #if NEED_SHA512 | ||
65 | /* Initialize structure containing state of computation. | ||
66 | (FIPS 180-2:5.3.3) */ | ||
67 | void FAST_FUNC sha512_begin(sha512_ctx_t *ctx) | ||
68 | { | ||
69 | generic_init(ctx, BCRYPT_SHA512_ALG_HANDLE); | ||
70 | } | ||
71 | #endif /* NEED_SHA512 */ | ||
72 | |||
73 | void FAST_FUNC generic_hash(struct bcrypt_hash_ctx_t *ctx, const void *buffer, size_t len) | ||
74 | { | ||
75 | /* | ||
76 | for perf, no error checking here | ||
77 | */ | ||
78 | /*NTSTATUS status = */ BCryptHashData(ctx->handle, (const PUCHAR)buffer, len, 0); | ||
79 | // mingw_die_if_error(status, "BCryptHashData"); | ||
80 | } | ||
81 | |||
82 | unsigned FAST_FUNC generic_end(struct bcrypt_hash_ctx_t *ctx, void *resbuf) | ||
83 | { | ||
84 | NTSTATUS status = BCryptFinishHash(ctx->handle, resbuf, ctx->output_size, 0); | ||
85 | mingw_die_if_error(status, "BCryptFinishHash"); | ||
86 | BCryptDestroyHash(ctx->handle); | ||
87 | free(ctx->hash_obj); | ||
88 | return ctx->output_size; | ||
89 | } | ||
90 | #endif /* !ENABLE_FEATURE_USE_CNG_API */ | ||
91 | |||
16 | #if ENABLE_SHA1_HWACCEL || ENABLE_SHA256_HWACCEL | 92 | #if ENABLE_SHA1_HWACCEL || ENABLE_SHA256_HWACCEL |
17 | # if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) | 93 | # if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) |
18 | static void cpuid_eax_ebx_ecx(unsigned *eax, unsigned *ebx, unsigned *ecx, unsigned *edx) | 94 | static void cpuid_eax_ebx_ecx(unsigned *eax, unsigned *ebx, unsigned *ecx, unsigned *edx) |
@@ -80,6 +156,7 @@ static ALWAYS_INLINE uint64_t rotl64(uint64_t x, unsigned n) | |||
80 | return (x << n) | (x >> (64 - n)); | 156 | return (x << n) | (x >> (64 - n)); |
81 | } | 157 | } |
82 | 158 | ||
159 | #if !ENABLE_FEATURE_USE_CNG_API | ||
83 | /* Process the remaining bytes in the buffer */ | 160 | /* Process the remaining bytes in the buffer */ |
84 | static void FAST_FUNC common64_end(md5_ctx_t *ctx, int swap_needed) | 161 | static void FAST_FUNC common64_end(md5_ctx_t *ctx, int swap_needed) |
85 | { | 162 | { |
@@ -1367,6 +1444,7 @@ unsigned FAST_FUNC sha512_end(sha512_ctx_t *ctx, void *resbuf) | |||
1367 | return sizeof(ctx->hash); | 1444 | return sizeof(ctx->hash); |
1368 | } | 1445 | } |
1369 | #endif /* NEED_SHA512 */ | 1446 | #endif /* NEED_SHA512 */ |
1447 | #endif /* !ENABLE_FEATURE_USE_CNG_API */ | ||
1370 | 1448 | ||
1371 | 1449 | ||
1372 | /* | 1450 | /* |
diff --git a/libbb/hash_sha256_block.c b/libbb/hash_sha256_block.c new file mode 100644 index 000000000..3c4366321 --- /dev/null +++ b/libbb/hash_sha256_block.c | |||
@@ -0,0 +1,19 @@ | |||
1 | /* vi: set sw=4 ts=4: */ | ||
2 | /* | ||
3 | * Utility routines. | ||
4 | * | ||
5 | * Copyright (C) 2025 Denys Vlasenko | ||
6 | * | ||
7 | * Licensed under GPLv2 or later, see file LICENSE in this source tree. | ||
8 | */ | ||
9 | //kbuild:lib-y += hash_sha256_block.o | ||
10 | #include "libbb.h" | ||
11 | |||
12 | void FAST_FUNC | ||
13 | sha256_block(const void *in, size_t len, uint8_t hash[32]) | ||
14 | { | ||
15 | sha256_ctx_t ctx; | ||
16 | sha256_begin(&ctx); | ||
17 | sha256_hash(&ctx, in, len); | ||
18 | sha256_end(&ctx, hash); | ||
19 | } | ||
diff --git a/libbb/hash_sha256_hwaccel_x86-32.S b/libbb/hash_sha256_hwaccel_x86-32.S index a0e4a571a..8d84055e8 100644 --- a/libbb/hash_sha256_hwaccel_x86-32.S +++ b/libbb/hash_sha256_hwaccel_x86-32.S | |||
@@ -34,21 +34,21 @@ | |||
34 | #define MSG %xmm0 | 34 | #define MSG %xmm0 |
35 | #define STATE0 %xmm1 | 35 | #define STATE0 %xmm1 |
36 | #define STATE1 %xmm2 | 36 | #define STATE1 %xmm2 |
37 | #define MSGTMP0 %xmm3 | 37 | #define MSG0 %xmm3 |
38 | #define MSGTMP1 %xmm4 | 38 | #define MSG1 %xmm4 |
39 | #define MSGTMP2 %xmm5 | 39 | #define MSG2 %xmm5 |
40 | #define MSGTMP3 %xmm6 | 40 | #define MSG3 %xmm6 |
41 | 41 | ||
42 | #define XMMTMP %xmm7 | 42 | #define XMMTMP %xmm7 |
43 | 43 | ||
44 | #define SHUF(a,b,c,d) $(a+(b<<2)+(c<<4)+(d<<6)) | 44 | #define SHUF(a,b,c,d) $((a)+((b)<<2)+((c)<<4)+((d)<<6)) |
45 | 45 | ||
46 | .balign 8 # allow decoders to fetch at least 2 first insns | 46 | .balign 8 # allow decoders to fetch at least 2 first insns |
47 | sha256_process_block64_shaNI: | 47 | sha256_process_block64_shaNI: |
48 | 48 | ||
49 | movu128 76+0*16(%eax), XMMTMP /* ABCD (little-endian dword order) */ | 49 | movu128 76+0*16(%eax), XMMTMP /* ABCD (shown least-significant-dword-first) */ |
50 | movu128 76+1*16(%eax), STATE1 /* EFGH */ | 50 | movu128 76+1*16(%eax), STATE1 /* EFGH */ |
51 | /* shufps takes dwords 0,1 from *2nd* operand, and dwords 2,3 from 1st one */ | 51 | /* shufps: dwords 0,1 of the result are selected from *2nd* operand, and dwords 2,3 from 1st operand */ |
52 | mova128 STATE1, STATE0 | 52 | mova128 STATE1, STATE0 |
53 | /* --- -------------- ABCD -- EFGH */ | 53 | /* --- -------------- ABCD -- EFGH */ |
54 | shufps SHUF(1,0,1,0), XMMTMP, STATE0 /* FEBA */ | 54 | shufps SHUF(1,0,1,0), XMMTMP, STATE0 /* FEBA */ |
@@ -58,190 +58,208 @@ sha256_process_block64_shaNI: | |||
58 | mova128 PSHUFFLE_BSWAP32_FLIP_MASK, XMMTMP | 58 | mova128 PSHUFFLE_BSWAP32_FLIP_MASK, XMMTMP |
59 | movl $K256+8*16, SHA256CONSTANTS | 59 | movl $K256+8*16, SHA256CONSTANTS |
60 | 60 | ||
61 | // sha256rnds2 instruction uses only lower 64 bits of MSG. | ||
62 | // The code below needs to move upper 64 bits to lower 64 bits | ||
63 | // for the second sha256rnds2 invocation | ||
64 | // (what remains in upper bits does not matter). | ||
65 | // There are several ways to do it: | ||
66 | // movhlps MSG, MSG // abcd -> cdcd (3 bytes of code) | ||
67 | // shuf128_32 SHUF(2,3,n,n), MSG, MSG // abcd -> cdXX (4 bytes) | ||
68 | // punpckhqdq MSG, MSG // abcd -> cdcd (4 bytes) | ||
69 | // unpckhpd MSG, MSG // abcd -> cdcd (4 bytes) | ||
70 | // psrldq $8, MSG // abcd -> cd00 (5 bytes) | ||
71 | // palignr $8, MSG, MSG // abcd -> cdab (6 bytes, SSSE3 insn) | ||
72 | #define MOVE_UPPER64_DOWN(reg) movhlps reg, reg | ||
73 | //#define MOVE_UPPER64_DOWN(reg) shuf128_32 SHUF(2,3,0,0), reg, reg | ||
74 | //#define MOVE_UPPER64_DOWN(reg) punpckhqdq reg, reg | ||
75 | //#define MOVE_UPPER64_DOWN(reg) unpckhpd reg, reg | ||
76 | //#define MOVE_UPPER64_DOWN(reg) psrldq $8, reg | ||
77 | //#define MOVE_UPPER64_DOWN(reg) palignr $8, reg, reg | ||
78 | |||
61 | /* Rounds 0-3 */ | 79 | /* Rounds 0-3 */ |
62 | movu128 0*16(DATA_PTR), MSG | 80 | movu128 0*16(DATA_PTR), MSG |
63 | pshufb XMMTMP, MSG | 81 | pshufb XMMTMP, MSG |
64 | mova128 MSG, MSGTMP0 | 82 | mova128 MSG, MSG0 |
65 | paddd 0*16-8*16(SHA256CONSTANTS), MSG | 83 | paddd 0*16-8*16(SHA256CONSTANTS), MSG |
66 | sha256rnds2 MSG, STATE0, STATE1 | 84 | sha256rnds2 MSG, STATE0, STATE1 |
67 | shuf128_32 $0x0E, MSG, MSG | 85 | MOVE_UPPER64_DOWN(MSG) |
68 | sha256rnds2 MSG, STATE1, STATE0 | 86 | sha256rnds2 MSG, STATE1, STATE0 |
69 | 87 | ||
70 | /* Rounds 4-7 */ | 88 | /* Rounds 4-7 */ |
71 | movu128 1*16(DATA_PTR), MSG | 89 | movu128 1*16(DATA_PTR), MSG |
72 | pshufb XMMTMP, MSG | 90 | pshufb XMMTMP, MSG |
73 | mova128 MSG, MSGTMP1 | 91 | mova128 MSG, MSG1 |
74 | paddd 1*16-8*16(SHA256CONSTANTS), MSG | 92 | paddd 1*16-8*16(SHA256CONSTANTS), MSG |
75 | sha256rnds2 MSG, STATE0, STATE1 | 93 | sha256rnds2 MSG, STATE0, STATE1 |
76 | shuf128_32 $0x0E, MSG, MSG | 94 | MOVE_UPPER64_DOWN(MSG) |
77 | sha256rnds2 MSG, STATE1, STATE0 | 95 | sha256rnds2 MSG, STATE1, STATE0 |
78 | sha256msg1 MSGTMP1, MSGTMP0 | 96 | sha256msg1 MSG1, MSG0 |
79 | 97 | ||
80 | /* Rounds 8-11 */ | 98 | /* Rounds 8-11 */ |
81 | movu128 2*16(DATA_PTR), MSG | 99 | movu128 2*16(DATA_PTR), MSG |
82 | pshufb XMMTMP, MSG | 100 | pshufb XMMTMP, MSG |
83 | mova128 MSG, MSGTMP2 | 101 | mova128 MSG, MSG2 |
84 | paddd 2*16-8*16(SHA256CONSTANTS), MSG | 102 | paddd 2*16-8*16(SHA256CONSTANTS), MSG |
85 | sha256rnds2 MSG, STATE0, STATE1 | 103 | sha256rnds2 MSG, STATE0, STATE1 |
86 | shuf128_32 $0x0E, MSG, MSG | 104 | MOVE_UPPER64_DOWN(MSG) |
87 | sha256rnds2 MSG, STATE1, STATE0 | 105 | sha256rnds2 MSG, STATE1, STATE0 |
88 | sha256msg1 MSGTMP2, MSGTMP1 | 106 | sha256msg1 MSG2, MSG1 |
89 | 107 | ||
90 | /* Rounds 12-15 */ | 108 | /* Rounds 12-15 */ |
91 | movu128 3*16(DATA_PTR), MSG | 109 | movu128 3*16(DATA_PTR), MSG |
92 | pshufb XMMTMP, MSG | 110 | pshufb XMMTMP, MSG |
93 | /* ...to here */ | 111 | /* ...to here */ |
94 | mova128 MSG, MSGTMP3 | 112 | mova128 MSG, MSG3 |
95 | paddd 3*16-8*16(SHA256CONSTANTS), MSG | 113 | paddd 3*16-8*16(SHA256CONSTANTS), MSG |
96 | sha256rnds2 MSG, STATE0, STATE1 | 114 | sha256rnds2 MSG, STATE0, STATE1 |
97 | mova128 MSGTMP3, XMMTMP | 115 | mova128 MSG3, XMMTMP |
98 | palignr $4, MSGTMP2, XMMTMP | 116 | palignr $4, MSG2, XMMTMP |
99 | paddd XMMTMP, MSGTMP0 | 117 | paddd XMMTMP, MSG0 |
100 | sha256msg2 MSGTMP3, MSGTMP0 | 118 | sha256msg2 MSG3, MSG0 |
101 | shuf128_32 $0x0E, MSG, MSG | 119 | MOVE_UPPER64_DOWN(MSG) |
102 | sha256rnds2 MSG, STATE1, STATE0 | 120 | sha256rnds2 MSG, STATE1, STATE0 |
103 | sha256msg1 MSGTMP3, MSGTMP2 | 121 | sha256msg1 MSG3, MSG2 |
104 | 122 | ||
105 | /* Rounds 16-19 */ | 123 | /* Rounds 16-19 */ |
106 | mova128 MSGTMP0, MSG | 124 | mova128 MSG0, MSG |
107 | paddd 4*16-8*16(SHA256CONSTANTS), MSG | 125 | paddd 4*16-8*16(SHA256CONSTANTS), MSG |
108 | sha256rnds2 MSG, STATE0, STATE1 | 126 | sha256rnds2 MSG, STATE0, STATE1 |
109 | mova128 MSGTMP0, XMMTMP | 127 | mova128 MSG0, XMMTMP |
110 | palignr $4, MSGTMP3, XMMTMP | 128 | palignr $4, MSG3, XMMTMP |
111 | paddd XMMTMP, MSGTMP1 | 129 | paddd XMMTMP, MSG1 |
112 | sha256msg2 MSGTMP0, MSGTMP1 | 130 | sha256msg2 MSG0, MSG1 |
113 | shuf128_32 $0x0E, MSG, MSG | 131 | MOVE_UPPER64_DOWN(MSG) |
114 | sha256rnds2 MSG, STATE1, STATE0 | 132 | sha256rnds2 MSG, STATE1, STATE0 |
115 | sha256msg1 MSGTMP0, MSGTMP3 | 133 | sha256msg1 MSG0, MSG3 |
116 | 134 | ||
117 | /* Rounds 20-23 */ | 135 | /* Rounds 20-23 */ |
118 | mova128 MSGTMP1, MSG | 136 | mova128 MSG1, MSG |
119 | paddd 5*16-8*16(SHA256CONSTANTS), MSG | 137 | paddd 5*16-8*16(SHA256CONSTANTS), MSG |
120 | sha256rnds2 MSG, STATE0, STATE1 | 138 | sha256rnds2 MSG, STATE0, STATE1 |
121 | mova128 MSGTMP1, XMMTMP | 139 | mova128 MSG1, XMMTMP |
122 | palignr $4, MSGTMP0, XMMTMP | 140 | palignr $4, MSG0, XMMTMP |
123 | paddd XMMTMP, MSGTMP2 | 141 | paddd XMMTMP, MSG2 |
124 | sha256msg2 MSGTMP1, MSGTMP2 | 142 | sha256msg2 MSG1, MSG2 |
125 | shuf128_32 $0x0E, MSG, MSG | 143 | MOVE_UPPER64_DOWN(MSG) |
126 | sha256rnds2 MSG, STATE1, STATE0 | 144 | sha256rnds2 MSG, STATE1, STATE0 |
127 | sha256msg1 MSGTMP1, MSGTMP0 | 145 | sha256msg1 MSG1, MSG0 |
128 | 146 | ||
129 | /* Rounds 24-27 */ | 147 | /* Rounds 24-27 */ |
130 | mova128 MSGTMP2, MSG | 148 | mova128 MSG2, MSG |
131 | paddd 6*16-8*16(SHA256CONSTANTS), MSG | 149 | paddd 6*16-8*16(SHA256CONSTANTS), MSG |
132 | sha256rnds2 MSG, STATE0, STATE1 | 150 | sha256rnds2 MSG, STATE0, STATE1 |
133 | mova128 MSGTMP2, XMMTMP | 151 | mova128 MSG2, XMMTMP |
134 | palignr $4, MSGTMP1, XMMTMP | 152 | palignr $4, MSG1, XMMTMP |
135 | paddd XMMTMP, MSGTMP3 | 153 | paddd XMMTMP, MSG3 |
136 | sha256msg2 MSGTMP2, MSGTMP3 | 154 | sha256msg2 MSG2, MSG3 |
137 | shuf128_32 $0x0E, MSG, MSG | 155 | MOVE_UPPER64_DOWN(MSG) |
138 | sha256rnds2 MSG, STATE1, STATE0 | 156 | sha256rnds2 MSG, STATE1, STATE0 |
139 | sha256msg1 MSGTMP2, MSGTMP1 | 157 | sha256msg1 MSG2, MSG1 |
140 | 158 | ||
141 | /* Rounds 28-31 */ | 159 | /* Rounds 28-31 */ |
142 | mova128 MSGTMP3, MSG | 160 | mova128 MSG3, MSG |
143 | paddd 7*16-8*16(SHA256CONSTANTS), MSG | 161 | paddd 7*16-8*16(SHA256CONSTANTS), MSG |
144 | sha256rnds2 MSG, STATE0, STATE1 | 162 | sha256rnds2 MSG, STATE0, STATE1 |
145 | mova128 MSGTMP3, XMMTMP | 163 | mova128 MSG3, XMMTMP |
146 | palignr $4, MSGTMP2, XMMTMP | 164 | palignr $4, MSG2, XMMTMP |
147 | paddd XMMTMP, MSGTMP0 | 165 | paddd XMMTMP, MSG0 |
148 | sha256msg2 MSGTMP3, MSGTMP0 | 166 | sha256msg2 MSG3, MSG0 |
149 | shuf128_32 $0x0E, MSG, MSG | 167 | MOVE_UPPER64_DOWN(MSG) |
150 | sha256rnds2 MSG, STATE1, STATE0 | 168 | sha256rnds2 MSG, STATE1, STATE0 |
151 | sha256msg1 MSGTMP3, MSGTMP2 | 169 | sha256msg1 MSG3, MSG2 |
152 | 170 | ||
153 | /* Rounds 32-35 */ | 171 | /* Rounds 32-35 */ |
154 | mova128 MSGTMP0, MSG | 172 | mova128 MSG0, MSG |
155 | paddd 8*16-8*16(SHA256CONSTANTS), MSG | 173 | paddd 8*16-8*16(SHA256CONSTANTS), MSG |
156 | sha256rnds2 MSG, STATE0, STATE1 | 174 | sha256rnds2 MSG, STATE0, STATE1 |
157 | mova128 MSGTMP0, XMMTMP | 175 | mova128 MSG0, XMMTMP |
158 | palignr $4, MSGTMP3, XMMTMP | 176 | palignr $4, MSG3, XMMTMP |
159 | paddd XMMTMP, MSGTMP1 | 177 | paddd XMMTMP, MSG1 |
160 | sha256msg2 MSGTMP0, MSGTMP1 | 178 | sha256msg2 MSG0, MSG1 |
161 | shuf128_32 $0x0E, MSG, MSG | 179 | MOVE_UPPER64_DOWN(MSG) |
162 | sha256rnds2 MSG, STATE1, STATE0 | 180 | sha256rnds2 MSG, STATE1, STATE0 |
163 | sha256msg1 MSGTMP0, MSGTMP3 | 181 | sha256msg1 MSG0, MSG3 |
164 | 182 | ||
165 | /* Rounds 36-39 */ | 183 | /* Rounds 36-39 */ |
166 | mova128 MSGTMP1, MSG | 184 | mova128 MSG1, MSG |
167 | paddd 9*16-8*16(SHA256CONSTANTS), MSG | 185 | paddd 9*16-8*16(SHA256CONSTANTS), MSG |
168 | sha256rnds2 MSG, STATE0, STATE1 | 186 | sha256rnds2 MSG, STATE0, STATE1 |
169 | mova128 MSGTMP1, XMMTMP | 187 | mova128 MSG1, XMMTMP |
170 | palignr $4, MSGTMP0, XMMTMP | 188 | palignr $4, MSG0, XMMTMP |
171 | paddd XMMTMP, MSGTMP2 | 189 | paddd XMMTMP, MSG2 |
172 | sha256msg2 MSGTMP1, MSGTMP2 | 190 | sha256msg2 MSG1, MSG2 |
173 | shuf128_32 $0x0E, MSG, MSG | 191 | MOVE_UPPER64_DOWN(MSG) |
174 | sha256rnds2 MSG, STATE1, STATE0 | 192 | sha256rnds2 MSG, STATE1, STATE0 |
175 | sha256msg1 MSGTMP1, MSGTMP0 | 193 | sha256msg1 MSG1, MSG0 |
176 | 194 | ||
177 | /* Rounds 40-43 */ | 195 | /* Rounds 40-43 */ |
178 | mova128 MSGTMP2, MSG | 196 | mova128 MSG2, MSG |
179 | paddd 10*16-8*16(SHA256CONSTANTS), MSG | 197 | paddd 10*16-8*16(SHA256CONSTANTS), MSG |
180 | sha256rnds2 MSG, STATE0, STATE1 | 198 | sha256rnds2 MSG, STATE0, STATE1 |
181 | mova128 MSGTMP2, XMMTMP | 199 | mova128 MSG2, XMMTMP |
182 | palignr $4, MSGTMP1, XMMTMP | 200 | palignr $4, MSG1, XMMTMP |
183 | paddd XMMTMP, MSGTMP3 | 201 | paddd XMMTMP, MSG3 |
184 | sha256msg2 MSGTMP2, MSGTMP3 | 202 | sha256msg2 MSG2, MSG3 |
185 | shuf128_32 $0x0E, MSG, MSG | 203 | MOVE_UPPER64_DOWN(MSG) |
186 | sha256rnds2 MSG, STATE1, STATE0 | 204 | sha256rnds2 MSG, STATE1, STATE0 |
187 | sha256msg1 MSGTMP2, MSGTMP1 | 205 | sha256msg1 MSG2, MSG1 |
188 | 206 | ||
189 | /* Rounds 44-47 */ | 207 | /* Rounds 44-47 */ |
190 | mova128 MSGTMP3, MSG | 208 | mova128 MSG3, MSG |
191 | paddd 11*16-8*16(SHA256CONSTANTS), MSG | 209 | paddd 11*16-8*16(SHA256CONSTANTS), MSG |
192 | sha256rnds2 MSG, STATE0, STATE1 | 210 | sha256rnds2 MSG, STATE0, STATE1 |
193 | mova128 MSGTMP3, XMMTMP | 211 | mova128 MSG3, XMMTMP |
194 | palignr $4, MSGTMP2, XMMTMP | 212 | palignr $4, MSG2, XMMTMP |
195 | paddd XMMTMP, MSGTMP0 | 213 | paddd XMMTMP, MSG0 |
196 | sha256msg2 MSGTMP3, MSGTMP0 | 214 | sha256msg2 MSG3, MSG0 |
197 | shuf128_32 $0x0E, MSG, MSG | 215 | MOVE_UPPER64_DOWN(MSG) |
198 | sha256rnds2 MSG, STATE1, STATE0 | 216 | sha256rnds2 MSG, STATE1, STATE0 |
199 | sha256msg1 MSGTMP3, MSGTMP2 | 217 | sha256msg1 MSG3, MSG2 |
200 | 218 | ||
201 | /* Rounds 48-51 */ | 219 | /* Rounds 48-51 */ |
202 | mova128 MSGTMP0, MSG | 220 | mova128 MSG0, MSG |
203 | paddd 12*16-8*16(SHA256CONSTANTS), MSG | 221 | paddd 12*16-8*16(SHA256CONSTANTS), MSG |
204 | sha256rnds2 MSG, STATE0, STATE1 | 222 | sha256rnds2 MSG, STATE0, STATE1 |
205 | mova128 MSGTMP0, XMMTMP | 223 | mova128 MSG0, XMMTMP |
206 | palignr $4, MSGTMP3, XMMTMP | 224 | palignr $4, MSG3, XMMTMP |
207 | paddd XMMTMP, MSGTMP1 | 225 | paddd XMMTMP, MSG1 |
208 | sha256msg2 MSGTMP0, MSGTMP1 | 226 | sha256msg2 MSG0, MSG1 |
209 | shuf128_32 $0x0E, MSG, MSG | 227 | MOVE_UPPER64_DOWN(MSG) |
210 | sha256rnds2 MSG, STATE1, STATE0 | 228 | sha256rnds2 MSG, STATE1, STATE0 |
211 | sha256msg1 MSGTMP0, MSGTMP3 | 229 | sha256msg1 MSG0, MSG3 |
212 | 230 | ||
213 | /* Rounds 52-55 */ | 231 | /* Rounds 52-55 */ |
214 | mova128 MSGTMP1, MSG | 232 | mova128 MSG1, MSG |
215 | paddd 13*16-8*16(SHA256CONSTANTS), MSG | 233 | paddd 13*16-8*16(SHA256CONSTANTS), MSG |
216 | sha256rnds2 MSG, STATE0, STATE1 | 234 | sha256rnds2 MSG, STATE0, STATE1 |
217 | mova128 MSGTMP1, XMMTMP | 235 | mova128 MSG1, XMMTMP |
218 | palignr $4, MSGTMP0, XMMTMP | 236 | palignr $4, MSG0, XMMTMP |
219 | paddd XMMTMP, MSGTMP2 | 237 | paddd XMMTMP, MSG2 |
220 | sha256msg2 MSGTMP1, MSGTMP2 | 238 | sha256msg2 MSG1, MSG2 |
221 | shuf128_32 $0x0E, MSG, MSG | 239 | MOVE_UPPER64_DOWN(MSG) |
222 | sha256rnds2 MSG, STATE1, STATE0 | 240 | sha256rnds2 MSG, STATE1, STATE0 |
223 | 241 | ||
224 | /* Rounds 56-59 */ | 242 | /* Rounds 56-59 */ |
225 | mova128 MSGTMP2, MSG | 243 | mova128 MSG2, MSG |
226 | paddd 14*16-8*16(SHA256CONSTANTS), MSG | 244 | paddd 14*16-8*16(SHA256CONSTANTS), MSG |
227 | sha256rnds2 MSG, STATE0, STATE1 | 245 | sha256rnds2 MSG, STATE0, STATE1 |
228 | mova128 MSGTMP2, XMMTMP | 246 | mova128 MSG2, XMMTMP |
229 | palignr $4, MSGTMP1, XMMTMP | 247 | palignr $4, MSG1, XMMTMP |
230 | paddd XMMTMP, MSGTMP3 | 248 | paddd XMMTMP, MSG3 |
231 | sha256msg2 MSGTMP2, MSGTMP3 | 249 | sha256msg2 MSG2, MSG3 |
232 | shuf128_32 $0x0E, MSG, MSG | 250 | MOVE_UPPER64_DOWN(MSG) |
233 | sha256rnds2 MSG, STATE1, STATE0 | 251 | sha256rnds2 MSG, STATE1, STATE0 |
234 | 252 | ||
235 | /* Rounds 60-63 */ | 253 | /* Rounds 60-63 */ |
236 | mova128 MSGTMP3, MSG | 254 | mova128 MSG3, MSG |
237 | paddd 15*16-8*16(SHA256CONSTANTS), MSG | 255 | paddd 15*16-8*16(SHA256CONSTANTS), MSG |
238 | sha256rnds2 MSG, STATE0, STATE1 | 256 | sha256rnds2 MSG, STATE0, STATE1 |
239 | shuf128_32 $0x0E, MSG, MSG | 257 | MOVE_UPPER64_DOWN(MSG) |
240 | sha256rnds2 MSG, STATE1, STATE0 | 258 | sha256rnds2 MSG, STATE1, STATE0 |
241 | 259 | ||
242 | /* Write hash values back in the correct order */ | 260 | /* Write hash values back in the correct order */ |
243 | mova128 STATE0, XMMTMP | 261 | mova128 STATE0, XMMTMP |
244 | /* shufps takes dwords 0,1 from *2nd* operand, and dwords 2,3 from 1st one */ | 262 | /* shufps: dwords 0,1 of the result are selected from *2nd* operand, and dwords 2,3 from 1st operand */ |
245 | /* --- -------------- HGDC -- FEBA */ | 263 | /* --- -------------- HGDC -- FEBA */ |
246 | shufps SHUF(3,2,3,2), STATE1, STATE0 /* ABCD */ | 264 | shufps SHUF(3,2,3,2), STATE1, STATE0 /* ABCD */ |
247 | shufps SHUF(1,0,1,0), STATE1, XMMTMP /* EFGH */ | 265 | shufps SHUF(1,0,1,0), STATE1, XMMTMP /* EFGH */ |
diff --git a/libbb/hash_sha256_hwaccel_x86-64.S b/libbb/hash_sha256_hwaccel_x86-64.S index 172c2eae2..ee3abbd1f 100644 --- a/libbb/hash_sha256_hwaccel_x86-64.S +++ b/libbb/hash_sha256_hwaccel_x86-64.S | |||
@@ -34,24 +34,24 @@ | |||
34 | #define MSG %xmm0 | 34 | #define MSG %xmm0 |
35 | #define STATE0 %xmm1 | 35 | #define STATE0 %xmm1 |
36 | #define STATE1 %xmm2 | 36 | #define STATE1 %xmm2 |
37 | #define MSGTMP0 %xmm3 | 37 | #define MSG0 %xmm3 |
38 | #define MSGTMP1 %xmm4 | 38 | #define MSG1 %xmm4 |
39 | #define MSGTMP2 %xmm5 | 39 | #define MSG2 %xmm5 |
40 | #define MSGTMP3 %xmm6 | 40 | #define MSG3 %xmm6 |
41 | 41 | ||
42 | #define XMMTMP %xmm7 | 42 | #define XMMTMP %xmm7 |
43 | 43 | ||
44 | #define SAVE0 %xmm8 | 44 | #define SAVE0 %xmm8 |
45 | #define SAVE1 %xmm9 | 45 | #define SAVE1 %xmm9 |
46 | 46 | ||
47 | #define SHUF(a,b,c,d) $(a+(b<<2)+(c<<4)+(d<<6)) | 47 | #define SHUF(a,b,c,d) $((a)+((b)<<2)+((c)<<4)+((d)<<6)) |
48 | 48 | ||
49 | .balign 8 # allow decoders to fetch at least 2 first insns | 49 | .balign 8 # allow decoders to fetch at least 2 first insns |
50 | sha256_process_block64_shaNI: | 50 | sha256_process_block64_shaNI: |
51 | 51 | ||
52 | movu128 80+0*16(%rdi), XMMTMP /* ABCD (little-endian dword order) */ | 52 | movu128 80+0*16(%rdi), XMMTMP /* ABCD (shown least-significant-dword-first) */ |
53 | movu128 80+1*16(%rdi), STATE1 /* EFGH */ | 53 | movu128 80+1*16(%rdi), STATE1 /* EFGH */ |
54 | /* shufps takes dwords 0,1 from *2nd* operand, and dwords 2,3 from 1st one */ | 54 | /* shufps: dwords 0,1 of the result are selected from *2nd* operand, and dwords 2,3 from 1st operand */ |
55 | mova128 STATE1, STATE0 | 55 | mova128 STATE1, STATE0 |
56 | /* --- -------------- ABCD -- EFGH */ | 56 | /* --- -------------- ABCD -- EFGH */ |
57 | shufps SHUF(1,0,1,0), XMMTMP, STATE0 /* FEBA */ | 57 | shufps SHUF(1,0,1,0), XMMTMP, STATE0 /* FEBA */ |
@@ -65,185 +65,203 @@ sha256_process_block64_shaNI: | |||
65 | mova128 STATE0, SAVE0 | 65 | mova128 STATE0, SAVE0 |
66 | mova128 STATE1, SAVE1 | 66 | mova128 STATE1, SAVE1 |
67 | 67 | ||
68 | // sha256rnds2 instruction uses only lower 64 bits of MSG. | ||
69 | // The code below needs to move upper 64 bits to lower 64 bits | ||
70 | // for the second sha256rnds2 invocation | ||
71 | // (what remains in upper bits does not matter). | ||
72 | // There are several ways to do it: | ||
73 | // movhlps MSG, MSG // abcd -> cdcd (3 bytes of code) | ||
74 | // shuf128_32 SHUF(2,3,n,n), MSG, MSG // abcd -> cdXX (4 bytes) | ||
75 | // punpckhqdq MSG, MSG // abcd -> cdcd (4 bytes) | ||
76 | // unpckhpd MSG, MSG // abcd -> cdcd (4 bytes) | ||
77 | // psrldq $8, MSG // abcd -> cd00 (5 bytes) | ||
78 | // palignr $8, MSG, MSG // abcd -> cdab (6 bytes, SSSE3 insn) | ||
79 | #define MOVE_UPPER64_DOWN(reg) movhlps reg, reg | ||
80 | //#define MOVE_UPPER64_DOWN(reg) shuf128_32 SHUF(2,3,0,0), reg, reg | ||
81 | //#define MOVE_UPPER64_DOWN(reg) punpckhqdq reg, reg | ||
82 | //#define MOVE_UPPER64_DOWN(reg) unpckhpd reg, reg | ||
83 | //#define MOVE_UPPER64_DOWN(reg) psrldq $8, reg | ||
84 | //#define MOVE_UPPER64_DOWN(reg) palignr $8, reg, reg | ||
85 | |||
68 | /* Rounds 0-3 */ | 86 | /* Rounds 0-3 */ |
69 | movu128 0*16(DATA_PTR), MSG | 87 | movu128 0*16(DATA_PTR), MSG |
70 | pshufb XMMTMP, MSG | 88 | pshufb XMMTMP, MSG |
71 | mova128 MSG, MSGTMP0 | 89 | mova128 MSG, MSG0 |
72 | paddd 0*16-8*16(SHA256CONSTANTS), MSG | 90 | paddd 0*16-8*16(SHA256CONSTANTS), MSG |
73 | sha256rnds2 MSG, STATE0, STATE1 | 91 | sha256rnds2 MSG, STATE0, STATE1 |
74 | shuf128_32 $0x0E, MSG, MSG | 92 | MOVE_UPPER64_DOWN(MSG) |
75 | sha256rnds2 MSG, STATE1, STATE0 | 93 | sha256rnds2 MSG, STATE1, STATE0 |
76 | 94 | ||
77 | /* Rounds 4-7 */ | 95 | /* Rounds 4-7 */ |
78 | movu128 1*16(DATA_PTR), MSG | 96 | movu128 1*16(DATA_PTR), MSG |
79 | pshufb XMMTMP, MSG | 97 | pshufb XMMTMP, MSG |
80 | mova128 MSG, MSGTMP1 | 98 | mova128 MSG, MSG1 |
81 | paddd 1*16-8*16(SHA256CONSTANTS), MSG | 99 | paddd 1*16-8*16(SHA256CONSTANTS), MSG |
82 | sha256rnds2 MSG, STATE0, STATE1 | 100 | sha256rnds2 MSG, STATE0, STATE1 |
83 | shuf128_32 $0x0E, MSG, MSG | 101 | MOVE_UPPER64_DOWN(MSG) |
84 | sha256rnds2 MSG, STATE1, STATE0 | 102 | sha256rnds2 MSG, STATE1, STATE0 |
85 | sha256msg1 MSGTMP1, MSGTMP0 | 103 | sha256msg1 MSG1, MSG0 |
86 | 104 | ||
87 | /* Rounds 8-11 */ | 105 | /* Rounds 8-11 */ |
88 | movu128 2*16(DATA_PTR), MSG | 106 | movu128 2*16(DATA_PTR), MSG |
89 | pshufb XMMTMP, MSG | 107 | pshufb XMMTMP, MSG |
90 | mova128 MSG, MSGTMP2 | 108 | mova128 MSG, MSG2 |
91 | paddd 2*16-8*16(SHA256CONSTANTS), MSG | 109 | paddd 2*16-8*16(SHA256CONSTANTS), MSG |
92 | sha256rnds2 MSG, STATE0, STATE1 | 110 | sha256rnds2 MSG, STATE0, STATE1 |
93 | shuf128_32 $0x0E, MSG, MSG | 111 | MOVE_UPPER64_DOWN(MSG) |
94 | sha256rnds2 MSG, STATE1, STATE0 | 112 | sha256rnds2 MSG, STATE1, STATE0 |
95 | sha256msg1 MSGTMP2, MSGTMP1 | 113 | sha256msg1 MSG2, MSG1 |
96 | 114 | ||
97 | /* Rounds 12-15 */ | 115 | /* Rounds 12-15 */ |
98 | movu128 3*16(DATA_PTR), MSG | 116 | movu128 3*16(DATA_PTR), MSG |
99 | pshufb XMMTMP, MSG | 117 | pshufb XMMTMP, MSG |
100 | /* ...to here */ | 118 | /* ...to here */ |
101 | mova128 MSG, MSGTMP3 | 119 | mova128 MSG, MSG3 |
102 | paddd 3*16-8*16(SHA256CONSTANTS), MSG | 120 | paddd 3*16-8*16(SHA256CONSTANTS), MSG |
103 | sha256rnds2 MSG, STATE0, STATE1 | 121 | sha256rnds2 MSG, STATE0, STATE1 |
104 | mova128 MSGTMP3, XMMTMP | 122 | mova128 MSG3, XMMTMP |
105 | palignr $4, MSGTMP2, XMMTMP | 123 | palignr $4, MSG2, XMMTMP |
106 | paddd XMMTMP, MSGTMP0 | 124 | paddd XMMTMP, MSG0 |
107 | sha256msg2 MSGTMP3, MSGTMP0 | 125 | sha256msg2 MSG3, MSG0 |
108 | shuf128_32 $0x0E, MSG, MSG | 126 | MOVE_UPPER64_DOWN(MSG) |
109 | sha256rnds2 MSG, STATE1, STATE0 | 127 | sha256rnds2 MSG, STATE1, STATE0 |
110 | sha256msg1 MSGTMP3, MSGTMP2 | 128 | sha256msg1 MSG3, MSG2 |
111 | 129 | ||
112 | /* Rounds 16-19 */ | 130 | /* Rounds 16-19 */ |
113 | mova128 MSGTMP0, MSG | 131 | mova128 MSG0, MSG |
114 | paddd 4*16-8*16(SHA256CONSTANTS), MSG | 132 | paddd 4*16-8*16(SHA256CONSTANTS), MSG |
115 | sha256rnds2 MSG, STATE0, STATE1 | 133 | sha256rnds2 MSG, STATE0, STATE1 |
116 | mova128 MSGTMP0, XMMTMP | 134 | mova128 MSG0, XMMTMP |
117 | palignr $4, MSGTMP3, XMMTMP | 135 | palignr $4, MSG3, XMMTMP |
118 | paddd XMMTMP, MSGTMP1 | 136 | paddd XMMTMP, MSG1 |
119 | sha256msg2 MSGTMP0, MSGTMP1 | 137 | sha256msg2 MSG0, MSG1 |
120 | shuf128_32 $0x0E, MSG, MSG | 138 | MOVE_UPPER64_DOWN(MSG) |
121 | sha256rnds2 MSG, STATE1, STATE0 | 139 | sha256rnds2 MSG, STATE1, STATE0 |
122 | sha256msg1 MSGTMP0, MSGTMP3 | 140 | sha256msg1 MSG0, MSG3 |
123 | 141 | ||
124 | /* Rounds 20-23 */ | 142 | /* Rounds 20-23 */ |
125 | mova128 MSGTMP1, MSG | 143 | mova128 MSG1, MSG |
126 | paddd 5*16-8*16(SHA256CONSTANTS), MSG | 144 | paddd 5*16-8*16(SHA256CONSTANTS), MSG |
127 | sha256rnds2 MSG, STATE0, STATE1 | 145 | sha256rnds2 MSG, STATE0, STATE1 |
128 | mova128 MSGTMP1, XMMTMP | 146 | mova128 MSG1, XMMTMP |
129 | palignr $4, MSGTMP0, XMMTMP | 147 | palignr $4, MSG0, XMMTMP |
130 | paddd XMMTMP, MSGTMP2 | 148 | paddd XMMTMP, MSG2 |
131 | sha256msg2 MSGTMP1, MSGTMP2 | 149 | sha256msg2 MSG1, MSG2 |
132 | shuf128_32 $0x0E, MSG, MSG | 150 | MOVE_UPPER64_DOWN(MSG) |
133 | sha256rnds2 MSG, STATE1, STATE0 | 151 | sha256rnds2 MSG, STATE1, STATE0 |
134 | sha256msg1 MSGTMP1, MSGTMP0 | 152 | sha256msg1 MSG1, MSG0 |
135 | 153 | ||
136 | /* Rounds 24-27 */ | 154 | /* Rounds 24-27 */ |
137 | mova128 MSGTMP2, MSG | 155 | mova128 MSG2, MSG |
138 | paddd 6*16-8*16(SHA256CONSTANTS), MSG | 156 | paddd 6*16-8*16(SHA256CONSTANTS), MSG |
139 | sha256rnds2 MSG, STATE0, STATE1 | 157 | sha256rnds2 MSG, STATE0, STATE1 |
140 | mova128 MSGTMP2, XMMTMP | 158 | mova128 MSG2, XMMTMP |
141 | palignr $4, MSGTMP1, XMMTMP | 159 | palignr $4, MSG1, XMMTMP |
142 | paddd XMMTMP, MSGTMP3 | 160 | paddd XMMTMP, MSG3 |
143 | sha256msg2 MSGTMP2, MSGTMP3 | 161 | sha256msg2 MSG2, MSG3 |
144 | shuf128_32 $0x0E, MSG, MSG | 162 | MOVE_UPPER64_DOWN(MSG) |
145 | sha256rnds2 MSG, STATE1, STATE0 | 163 | sha256rnds2 MSG, STATE1, STATE0 |
146 | sha256msg1 MSGTMP2, MSGTMP1 | 164 | sha256msg1 MSG2, MSG1 |
147 | 165 | ||
148 | /* Rounds 28-31 */ | 166 | /* Rounds 28-31 */ |
149 | mova128 MSGTMP3, MSG | 167 | mova128 MSG3, MSG |
150 | paddd 7*16-8*16(SHA256CONSTANTS), MSG | 168 | paddd 7*16-8*16(SHA256CONSTANTS), MSG |
151 | sha256rnds2 MSG, STATE0, STATE1 | 169 | sha256rnds2 MSG, STATE0, STATE1 |
152 | mova128 MSGTMP3, XMMTMP | 170 | mova128 MSG3, XMMTMP |
153 | palignr $4, MSGTMP2, XMMTMP | 171 | palignr $4, MSG2, XMMTMP |
154 | paddd XMMTMP, MSGTMP0 | 172 | paddd XMMTMP, MSG0 |
155 | sha256msg2 MSGTMP3, MSGTMP0 | 173 | sha256msg2 MSG3, MSG0 |
156 | shuf128_32 $0x0E, MSG, MSG | 174 | MOVE_UPPER64_DOWN(MSG) |
157 | sha256rnds2 MSG, STATE1, STATE0 | 175 | sha256rnds2 MSG, STATE1, STATE0 |
158 | sha256msg1 MSGTMP3, MSGTMP2 | 176 | sha256msg1 MSG3, MSG2 |
159 | 177 | ||
160 | /* Rounds 32-35 */ | 178 | /* Rounds 32-35 */ |
161 | mova128 MSGTMP0, MSG | 179 | mova128 MSG0, MSG |
162 | paddd 8*16-8*16(SHA256CONSTANTS), MSG | 180 | paddd 8*16-8*16(SHA256CONSTANTS), MSG |
163 | sha256rnds2 MSG, STATE0, STATE1 | 181 | sha256rnds2 MSG, STATE0, STATE1 |
164 | mova128 MSGTMP0, XMMTMP | 182 | mova128 MSG0, XMMTMP |
165 | palignr $4, MSGTMP3, XMMTMP | 183 | palignr $4, MSG3, XMMTMP |
166 | paddd XMMTMP, MSGTMP1 | 184 | paddd XMMTMP, MSG1 |
167 | sha256msg2 MSGTMP0, MSGTMP1 | 185 | sha256msg2 MSG0, MSG1 |
168 | shuf128_32 $0x0E, MSG, MSG | 186 | MOVE_UPPER64_DOWN(MSG) |
169 | sha256rnds2 MSG, STATE1, STATE0 | 187 | sha256rnds2 MSG, STATE1, STATE0 |
170 | sha256msg1 MSGTMP0, MSGTMP3 | 188 | sha256msg1 MSG0, MSG3 |
171 | 189 | ||
172 | /* Rounds 36-39 */ | 190 | /* Rounds 36-39 */ |
173 | mova128 MSGTMP1, MSG | 191 | mova128 MSG1, MSG |
174 | paddd 9*16-8*16(SHA256CONSTANTS), MSG | 192 | paddd 9*16-8*16(SHA256CONSTANTS), MSG |
175 | sha256rnds2 MSG, STATE0, STATE1 | 193 | sha256rnds2 MSG, STATE0, STATE1 |
176 | mova128 MSGTMP1, XMMTMP | 194 | mova128 MSG1, XMMTMP |
177 | palignr $4, MSGTMP0, XMMTMP | 195 | palignr $4, MSG0, XMMTMP |
178 | paddd XMMTMP, MSGTMP2 | 196 | paddd XMMTMP, MSG2 |
179 | sha256msg2 MSGTMP1, MSGTMP2 | 197 | sha256msg2 MSG1, MSG2 |
180 | shuf128_32 $0x0E, MSG, MSG | 198 | MOVE_UPPER64_DOWN(MSG) |
181 | sha256rnds2 MSG, STATE1, STATE0 | 199 | sha256rnds2 MSG, STATE1, STATE0 |
182 | sha256msg1 MSGTMP1, MSGTMP0 | 200 | sha256msg1 MSG1, MSG0 |
183 | 201 | ||
184 | /* Rounds 40-43 */ | 202 | /* Rounds 40-43 */ |
185 | mova128 MSGTMP2, MSG | 203 | mova128 MSG2, MSG |
186 | paddd 10*16-8*16(SHA256CONSTANTS), MSG | 204 | paddd 10*16-8*16(SHA256CONSTANTS), MSG |
187 | sha256rnds2 MSG, STATE0, STATE1 | 205 | sha256rnds2 MSG, STATE0, STATE1 |
188 | mova128 MSGTMP2, XMMTMP | 206 | mova128 MSG2, XMMTMP |
189 | palignr $4, MSGTMP1, XMMTMP | 207 | palignr $4, MSG1, XMMTMP |
190 | paddd XMMTMP, MSGTMP3 | 208 | paddd XMMTMP, MSG3 |
191 | sha256msg2 MSGTMP2, MSGTMP3 | 209 | sha256msg2 MSG2, MSG3 |
192 | shuf128_32 $0x0E, MSG, MSG | 210 | MOVE_UPPER64_DOWN(MSG) |
193 | sha256rnds2 MSG, STATE1, STATE0 | 211 | sha256rnds2 MSG, STATE1, STATE0 |
194 | sha256msg1 MSGTMP2, MSGTMP1 | 212 | sha256msg1 MSG2, MSG1 |
195 | 213 | ||
196 | /* Rounds 44-47 */ | 214 | /* Rounds 44-47 */ |
197 | mova128 MSGTMP3, MSG | 215 | mova128 MSG3, MSG |
198 | paddd 11*16-8*16(SHA256CONSTANTS), MSG | 216 | paddd 11*16-8*16(SHA256CONSTANTS), MSG |
199 | sha256rnds2 MSG, STATE0, STATE1 | 217 | sha256rnds2 MSG, STATE0, STATE1 |
200 | mova128 MSGTMP3, XMMTMP | 218 | mova128 MSG3, XMMTMP |
201 | palignr $4, MSGTMP2, XMMTMP | 219 | palignr $4, MSG2, XMMTMP |
202 | paddd XMMTMP, MSGTMP0 | 220 | paddd XMMTMP, MSG0 |
203 | sha256msg2 MSGTMP3, MSGTMP0 | 221 | sha256msg2 MSG3, MSG0 |
204 | shuf128_32 $0x0E, MSG, MSG | 222 | MOVE_UPPER64_DOWN(MSG) |
205 | sha256rnds2 MSG, STATE1, STATE0 | 223 | sha256rnds2 MSG, STATE1, STATE0 |
206 | sha256msg1 MSGTMP3, MSGTMP2 | 224 | sha256msg1 MSG3, MSG2 |
207 | 225 | ||
208 | /* Rounds 48-51 */ | 226 | /* Rounds 48-51 */ |
209 | mova128 MSGTMP0, MSG | 227 | mova128 MSG0, MSG |
210 | paddd 12*16-8*16(SHA256CONSTANTS), MSG | 228 | paddd 12*16-8*16(SHA256CONSTANTS), MSG |
211 | sha256rnds2 MSG, STATE0, STATE1 | 229 | sha256rnds2 MSG, STATE0, STATE1 |
212 | mova128 MSGTMP0, XMMTMP | 230 | mova128 MSG0, XMMTMP |
213 | palignr $4, MSGTMP3, XMMTMP | 231 | palignr $4, MSG3, XMMTMP |
214 | paddd XMMTMP, MSGTMP1 | 232 | paddd XMMTMP, MSG1 |
215 | sha256msg2 MSGTMP0, MSGTMP1 | 233 | sha256msg2 MSG0, MSG1 |
216 | shuf128_32 $0x0E, MSG, MSG | 234 | MOVE_UPPER64_DOWN(MSG) |
217 | sha256rnds2 MSG, STATE1, STATE0 | 235 | sha256rnds2 MSG, STATE1, STATE0 |
218 | sha256msg1 MSGTMP0, MSGTMP3 | 236 | sha256msg1 MSG0, MSG3 |
219 | 237 | ||
220 | /* Rounds 52-55 */ | 238 | /* Rounds 52-55 */ |
221 | mova128 MSGTMP1, MSG | 239 | mova128 MSG1, MSG |
222 | paddd 13*16-8*16(SHA256CONSTANTS), MSG | 240 | paddd 13*16-8*16(SHA256CONSTANTS), MSG |
223 | sha256rnds2 MSG, STATE0, STATE1 | 241 | sha256rnds2 MSG, STATE0, STATE1 |
224 | mova128 MSGTMP1, XMMTMP | 242 | mova128 MSG1, XMMTMP |
225 | palignr $4, MSGTMP0, XMMTMP | 243 | palignr $4, MSG0, XMMTMP |
226 | paddd XMMTMP, MSGTMP2 | 244 | paddd XMMTMP, MSG2 |
227 | sha256msg2 MSGTMP1, MSGTMP2 | 245 | sha256msg2 MSG1, MSG2 |
228 | shuf128_32 $0x0E, MSG, MSG | 246 | MOVE_UPPER64_DOWN(MSG) |
229 | sha256rnds2 MSG, STATE1, STATE0 | 247 | sha256rnds2 MSG, STATE1, STATE0 |
230 | 248 | ||
231 | /* Rounds 56-59 */ | 249 | /* Rounds 56-59 */ |
232 | mova128 MSGTMP2, MSG | 250 | mova128 MSG2, MSG |
233 | paddd 14*16-8*16(SHA256CONSTANTS), MSG | 251 | paddd 14*16-8*16(SHA256CONSTANTS), MSG |
234 | sha256rnds2 MSG, STATE0, STATE1 | 252 | sha256rnds2 MSG, STATE0, STATE1 |
235 | mova128 MSGTMP2, XMMTMP | 253 | mova128 MSG2, XMMTMP |
236 | palignr $4, MSGTMP1, XMMTMP | 254 | palignr $4, MSG1, XMMTMP |
237 | paddd XMMTMP, MSGTMP3 | 255 | paddd XMMTMP, MSG3 |
238 | sha256msg2 MSGTMP2, MSGTMP3 | 256 | sha256msg2 MSG2, MSG3 |
239 | shuf128_32 $0x0E, MSG, MSG | 257 | MOVE_UPPER64_DOWN(MSG) |
240 | sha256rnds2 MSG, STATE1, STATE0 | 258 | sha256rnds2 MSG, STATE1, STATE0 |
241 | 259 | ||
242 | /* Rounds 60-63 */ | 260 | /* Rounds 60-63 */ |
243 | mova128 MSGTMP3, MSG | 261 | mova128 MSG3, MSG |
244 | paddd 15*16-8*16(SHA256CONSTANTS), MSG | 262 | paddd 15*16-8*16(SHA256CONSTANTS), MSG |
245 | sha256rnds2 MSG, STATE0, STATE1 | 263 | sha256rnds2 MSG, STATE0, STATE1 |
246 | shuf128_32 $0x0E, MSG, MSG | 264 | MOVE_UPPER64_DOWN(MSG) |
247 | sha256rnds2 MSG, STATE1, STATE0 | 265 | sha256rnds2 MSG, STATE1, STATE0 |
248 | 266 | ||
249 | /* Add current hash values with previously saved */ | 267 | /* Add current hash values with previously saved */ |
@@ -252,7 +270,7 @@ sha256_process_block64_shaNI: | |||
252 | 270 | ||
253 | /* Write hash values back in the correct order */ | 271 | /* Write hash values back in the correct order */ |
254 | mova128 STATE0, XMMTMP | 272 | mova128 STATE0, XMMTMP |
255 | /* shufps takes dwords 0,1 from *2nd* operand, and dwords 2,3 from 1st one */ | 273 | /* shufps: dwords 0,1 of the result are selected from *2nd* operand, and dwords 2,3 from 1st operand */ |
256 | /* --- -------------- HGDC -- FEBA */ | 274 | /* --- -------------- HGDC -- FEBA */ |
257 | shufps SHUF(3,2,3,2), STATE1, STATE0 /* ABCD */ | 275 | shufps SHUF(3,2,3,2), STATE1, STATE0 /* ABCD */ |
258 | shufps SHUF(1,0,1,0), STATE1, XMMTMP /* EFGH */ | 276 | shufps SHUF(1,0,1,0), STATE1, XMMTMP /* EFGH */ |
diff --git a/libbb/lineedit.c b/libbb/lineedit.c index 8e2b37853..c8a0f37fe 100644 --- a/libbb/lineedit.c +++ b/libbb/lineedit.c | |||
@@ -457,7 +457,7 @@ static void put_cur_glyph_and_inc_cursor(void) | |||
457 | * have automargin (IOW: it is moving cursor to next line | 457 | * have automargin (IOW: it is moving cursor to next line |
458 | * by itself (which is wrong for VT-10x terminals)), | 458 | * by itself (which is wrong for VT-10x terminals)), |
459 | * this will break things: there will be one extra empty line */ | 459 | * this will break things: there will be one extra empty line */ |
460 | puts("\r"); /* + implicit '\n' */ | 460 | fputs("\r\n", stderr); |
461 | #else | 461 | #else |
462 | /* VT-10x terminals don't wrap cursor to next line when last char | 462 | /* VT-10x terminals don't wrap cursor to next line when last char |
463 | * on the line is printed - cursor stays "over" this char. | 463 | * on the line is printed - cursor stays "over" this char. |
@@ -1302,9 +1302,10 @@ static void showfiles(void) | |||
1302 | ); | 1302 | ); |
1303 | } | 1303 | } |
1304 | if (ENABLE_UNICODE_SUPPORT) | 1304 | if (ENABLE_UNICODE_SUPPORT) |
1305 | puts(printable_string(matches[n])); | 1305 | fputs(printable_string(matches[n]), stderr); |
1306 | else | 1306 | else |
1307 | puts(matches[n]); | 1307 | fputs(matches[n], stderr); |
1308 | bb_putchar_stderr('\n'); | ||
1308 | } | 1309 | } |
1309 | } | 1310 | } |
1310 | 1311 | ||
@@ -1595,8 +1596,8 @@ unsigned FAST_FUNC size_from_HISTFILESIZE(const char *hp) | |||
1595 | # endif | 1596 | # endif |
1596 | if (hp) { | 1597 | if (hp) { |
1597 | size = atoi(hp); | 1598 | size = atoi(hp); |
1598 | if (size <= 0) | 1599 | if (size < 0) |
1599 | return 1; | 1600 | return 0; |
1600 | if (size > MAX_HISTORY) | 1601 | if (size > MAX_HISTORY) |
1601 | return MAX_HISTORY; | 1602 | return MAX_HISTORY; |
1602 | } | 1603 | } |
@@ -1690,18 +1691,21 @@ static void load_history(line_input_t *st_parm) | |||
1690 | /* NB: do not trash old history if file can't be opened */ | 1691 | /* NB: do not trash old history if file can't be opened */ |
1691 | 1692 | ||
1692 | fp = fopen_for_read(st_parm->hist_file); | 1693 | fp = fopen_for_read(st_parm->hist_file); |
1693 | if (fp) { | 1694 | if (!fp) |
1694 | /* clean up old history */ | 1695 | return; |
1695 | for (idx = st_parm->cnt_history; idx > 0;) { | 1696 | |
1696 | idx--; | 1697 | /* clean up old history */ |
1697 | free(st_parm->history[idx]); | 1698 | for (idx = st_parm->cnt_history; idx > 0;) { |
1698 | st_parm->history[idx] = NULL; | 1699 | idx--; |
1699 | } | 1700 | free(st_parm->history[idx]); |
1701 | st_parm->history[idx] = NULL; | ||
1702 | } | ||
1700 | 1703 | ||
1701 | /* fill temp_h[], retaining only last MAX_HISTORY lines */ | 1704 | /* fill temp_h[], retaining only last max_history lines */ |
1702 | memset(temp_h, 0, sizeof(temp_h)); | 1705 | memset(temp_h, 0, sizeof(temp_h)); |
1703 | idx = 0; | 1706 | idx = 0; |
1704 | st_parm->cnt_history_in_file = 0; | 1707 | st_parm->cnt_history_in_file = 0; |
1708 | if (st_parm->max_history != 0) { | ||
1705 | while ((line = xmalloc_fgetline(fp)) != NULL) { | 1709 | while ((line = xmalloc_fgetline(fp)) != NULL) { |
1706 | if (line[0] == '\0') { | 1710 | if (line[0] == '\0') { |
1707 | free(line); | 1711 | free(line); |
@@ -1714,34 +1718,34 @@ static void load_history(line_input_t *st_parm) | |||
1714 | if (idx == st_parm->max_history) | 1718 | if (idx == st_parm->max_history) |
1715 | idx = 0; | 1719 | idx = 0; |
1716 | } | 1720 | } |
1717 | fclose(fp); | 1721 | } |
1718 | 1722 | fclose(fp); | |
1719 | /* find first non-NULL temp_h[], if any */ | ||
1720 | if (st_parm->cnt_history_in_file) { | ||
1721 | while (temp_h[idx] == NULL) { | ||
1722 | idx++; | ||
1723 | if (idx == st_parm->max_history) | ||
1724 | idx = 0; | ||
1725 | } | ||
1726 | } | ||
1727 | 1723 | ||
1728 | /* copy temp_h[] to st_parm->history[] */ | 1724 | /* find first non-NULL temp_h[], if any */ |
1729 | for (i = 0; i < st_parm->max_history;) { | 1725 | if (st_parm->cnt_history_in_file != 0) { |
1730 | line = temp_h[idx]; | 1726 | while (temp_h[idx] == NULL) { |
1731 | if (!line) | ||
1732 | break; | ||
1733 | idx++; | 1727 | idx++; |
1734 | if (idx == st_parm->max_history) | 1728 | if (idx == st_parm->max_history) |
1735 | idx = 0; | 1729 | idx = 0; |
1736 | line_len = strlen(line); | ||
1737 | if (line_len >= MAX_LINELEN) | ||
1738 | line[MAX_LINELEN-1] = '\0'; | ||
1739 | st_parm->history[i++] = line; | ||
1740 | } | 1730 | } |
1741 | st_parm->cnt_history = i; | ||
1742 | if (ENABLE_FEATURE_EDITING_SAVE_ON_EXIT) | ||
1743 | st_parm->cnt_history_in_file = i; | ||
1744 | } | 1731 | } |
1732 | |||
1733 | /* copy temp_h[] to st_parm->history[] */ | ||
1734 | for (i = 0; i < st_parm->max_history;) { | ||
1735 | line = temp_h[idx]; | ||
1736 | if (!line) | ||
1737 | break; | ||
1738 | idx++; | ||
1739 | if (idx == st_parm->max_history) | ||
1740 | idx = 0; | ||
1741 | line_len = strlen(line); | ||
1742 | if (line_len >= MAX_LINELEN) | ||
1743 | line[MAX_LINELEN-1] = '\0'; | ||
1744 | st_parm->history[i++] = line; | ||
1745 | } | ||
1746 | st_parm->cnt_history = i; | ||
1747 | if (ENABLE_FEATURE_EDITING_SAVE_ON_EXIT) | ||
1748 | st_parm->cnt_history_in_file = i; | ||
1745 | } | 1749 | } |
1746 | 1750 | ||
1747 | # if ENABLE_FEATURE_EDITING_SAVE_ON_EXIT | 1751 | # if ENABLE_FEATURE_EDITING_SAVE_ON_EXIT |
@@ -1749,17 +1753,27 @@ void FAST_FUNC save_history(line_input_t *st) | |||
1749 | { | 1753 | { |
1750 | FILE *fp; | 1754 | FILE *fp; |
1751 | 1755 | ||
1752 | if (!st || !st->hist_file) | 1756 | /* bash compat: HISTFILE="" disables history saving */ |
1757 | if (!st || !st->hist_file || !state->hist_file[0]) | ||
1753 | return; | 1758 | return; |
1754 | if (st->cnt_history <= st->cnt_history_in_file) | 1759 | if (st->cnt_history <= st->cnt_history_in_file) |
1755 | return; | 1760 | return; /* no new entries were added */ |
1761 | /* note: if st->max_history is 0, we do not abort: we truncate the history to 0 lines */ | ||
1756 | 1762 | ||
1757 | fp = fopen(st->hist_file, "a"); | 1763 | fp = fopen(st->hist_file, (st->max_history == 0 ? "w" : "a")); |
1758 | if (fp) { | 1764 | if (fp) { |
1759 | int i, fd; | 1765 | int i, fd; |
1760 | char *new_name; | 1766 | char *new_name; |
1761 | line_input_t *st_temp; | 1767 | line_input_t *st_temp; |
1762 | 1768 | ||
1769 | /* max_history==0 needs special-casing in general code, | ||
1770 | * just handle it in a simpler way: */ | ||
1771 | if (st->max_history == 0) { | ||
1772 | /* fopen("w") already truncated it */ | ||
1773 | fclose(fp); | ||
1774 | return; | ||
1775 | } | ||
1776 | |||
1763 | for (i = st->cnt_history_in_file; i < st->cnt_history; i++) | 1777 | for (i = st->cnt_history_in_file; i < st->cnt_history; i++) |
1764 | fprintf(fp, "%s\n", st->history[i]); | 1778 | fprintf(fp, "%s\n", st->history[i]); |
1765 | fclose(fp); | 1779 | fclose(fp); |
@@ -1769,6 +1783,8 @@ void FAST_FUNC save_history(line_input_t *st) | |||
1769 | st_temp = new_line_input_t(st->flags); | 1783 | st_temp = new_line_input_t(st->flags); |
1770 | st_temp->hist_file = st->hist_file; | 1784 | st_temp->hist_file = st->hist_file; |
1771 | st_temp->max_history = st->max_history; | 1785 | st_temp->max_history = st->max_history; |
1786 | /* load no more than max_history last lines */ | ||
1787 | /* (in unlikely case that file disappeared, st_temp gets empty history) */ | ||
1772 | load_history(st_temp); | 1788 | load_history(st_temp); |
1773 | 1789 | ||
1774 | /* write out temp file and replace hist_file atomically */ | 1790 | /* write out temp file and replace hist_file atomically */ |
@@ -1792,13 +1808,13 @@ static void save_history(char *str) | |||
1792 | int fd; | 1808 | int fd; |
1793 | int len, len2; | 1809 | int len, len2; |
1794 | 1810 | ||
1795 | if (!state->hist_file) | 1811 | /* bash compat: HISTFILE="" disables history saving */ |
1812 | if (!state->hist_file || !state->hist_file[0]) | ||
1796 | return; | 1813 | return; |
1797 | 1814 | ||
1798 | fd = open(state->hist_file, O_WRONLY | O_CREAT | O_APPEND, 0600); | 1815 | fd = open(state->hist_file, O_WRONLY | O_CREAT | O_APPEND, 0600); |
1799 | if (fd < 0) | 1816 | if (fd < 0) |
1800 | return; | 1817 | return; |
1801 | xlseek(fd, 0, SEEK_END); /* paranoia */ | ||
1802 | len = strlen(str); | 1818 | len = strlen(str); |
1803 | str[len] = '\n'; /* we (try to) do atomic write */ | 1819 | str[len] = '\n'; /* we (try to) do atomic write */ |
1804 | len2 = full_write(fd, str, len + 1); | 1820 | len2 = full_write(fd, str, len + 1); |
@@ -1853,13 +1869,10 @@ static void remember_in_history(char *str) | |||
1853 | if (str[0] == '\0') | 1869 | if (str[0] == '\0') |
1854 | return; | 1870 | return; |
1855 | i = state->cnt_history; | 1871 | i = state->cnt_history; |
1856 | /* Don't save dupes */ | 1872 | /* Don't save dups */ |
1857 | if (i && strcmp(state->history[i-1], str) == 0) | 1873 | if (i != 0 && strcmp(state->history[i-1], str) == 0) |
1858 | return; | 1874 | return; |
1859 | 1875 | ||
1860 | free(state->history[state->max_history]); /* redundant, paranoia */ | ||
1861 | state->history[state->max_history] = NULL; /* redundant, paranoia */ | ||
1862 | |||
1863 | /* If history[] is full, remove the oldest command */ | 1876 | /* If history[] is full, remove the oldest command */ |
1864 | /* we need to keep history[state->max_history] empty, hence >=, not > */ | 1877 | /* we need to keep history[state->max_history] empty, hence >=, not > */ |
1865 | if (i >= state->max_history) { | 1878 | if (i >= state->max_history) { |
@@ -1872,7 +1885,7 @@ static void remember_in_history(char *str) | |||
1872 | state->cnt_history_in_file--; | 1885 | state->cnt_history_in_file--; |
1873 | # endif | 1886 | # endif |
1874 | } | 1887 | } |
1875 | /* i <= state->max_history-1 */ | 1888 | /* i < state->max_history */ |
1876 | state->history[i++] = xstrdup(str); | 1889 | state->history[i++] = xstrdup(str); |
1877 | /* i <= state->max_history */ | 1890 | /* i <= state->max_history */ |
1878 | state->cur_history = i; | 1891 | state->cur_history = i; |
@@ -2388,7 +2401,6 @@ static int lineedit_read_key(char *read_key_buffer, int timeout) | |||
2388 | errno = EINTR; | 2401 | errno = EINTR; |
2389 | return -1; | 2402 | return -1; |
2390 | } | 2403 | } |
2391 | //FIXME: still races here with signals, but small window to poll() inside read_key | ||
2392 | IF_FEATURE_EDITING_WINCH(S.ok_to_redraw = 1;) | 2404 | IF_FEATURE_EDITING_WINCH(S.ok_to_redraw = 1;) |
2393 | /* errno = 0; - read_key does this itself */ | 2405 | /* errno = 0; - read_key does this itself */ |
2394 | ic = read_key(STDIN_FILENO, read_key_buffer, timeout); | 2406 | ic = read_key(STDIN_FILENO, read_key_buffer, timeout); |
diff --git a/libbb/poll_with_signals.c b/libbb/poll_with_signals.c new file mode 100644 index 000000000..d3c005418 --- /dev/null +++ b/libbb/poll_with_signals.c | |||
@@ -0,0 +1,48 @@ | |||
1 | /* vi: set sw=4 ts=4: */ | ||
2 | /* | ||
3 | * Utility routines. | ||
4 | * | ||
5 | * Copyright (C) 2025 Denys Vlasenko <vda.linux@googlemail.com> | ||
6 | * | ||
7 | * Licensed under GPLv2, see file LICENSE in this source tree. | ||
8 | */ | ||
9 | //kbuild:lib-$(CONFIG_PLATFORM_POSIX) += poll_with_signals.o | ||
10 | |||
11 | #include "libbb.h" | ||
12 | |||
13 | /* Shells, for example, need their line input and "read" builtin | ||
14 | * to be interruptible, and the naive handling of it a-la: | ||
15 | * if (bb_got_signal) { | ||
16 | * errno = EINTR; | ||
17 | * return -1; | ||
18 | * } | ||
19 | * poll(pfd, 1, -1); // signal here would set EINTR | ||
20 | * is racy. | ||
21 | * This is a bit heavy-handed, but safe wrt races: | ||
22 | */ | ||
23 | int FAST_FUNC check_got_signal_and_poll(struct pollfd pfd[1], int timeout) | ||
24 | { | ||
25 | int n; | ||
26 | struct timespec tv; | ||
27 | sigset_t orig_mask; | ||
28 | |||
29 | if (bb_got_signal) /* optimization */ | ||
30 | goto eintr; | ||
31 | |||
32 | if (timeout >= 0) { | ||
33 | tv.tv_sec = timeout / 1000; | ||
34 | tv.tv_nsec = (timeout % 1000) * 1000000; | ||
35 | } | ||
36 | /* test bb_got_signal, then poll(), atomically wrt signals */ | ||
37 | sigfillset(&orig_mask); | ||
38 | sigprocmask2(SIG_BLOCK, &orig_mask); | ||
39 | if (bb_got_signal) { | ||
40 | sigprocmask2(SIG_SETMASK, &orig_mask); | ||
41 | eintr: | ||
42 | errno = EINTR; /* inform the caller that we got a signal */ | ||
43 | return -1; | ||
44 | } | ||
45 | n = ppoll(pfd, 1, timeout >= 0 ? &tv : NULL, &orig_mask); | ||
46 | sigprocmask2(SIG_SETMASK, &orig_mask); | ||
47 | return n; | ||
48 | } | ||
diff --git a/libbb/pw_ascii64.c b/libbb/pw_ascii64.c new file mode 100644 index 000000000..3993932ca --- /dev/null +++ b/libbb/pw_ascii64.c | |||
@@ -0,0 +1,91 @@ | |||
1 | /* vi: set sw=4 ts=4: */ | ||
2 | /* | ||
3 | * Utility routines. | ||
4 | * | ||
5 | * Copyright (C) 1999-2004 by Erik Andersen <andersen@codepoet.org> | ||
6 | * | ||
7 | * Licensed under GPLv2 or later, see file LICENSE in this source tree. | ||
8 | */ | ||
9 | |||
10 | /* Returns >=64 for invalid chars */ | ||
11 | int FAST_FUNC a2i64(char c) | ||
12 | { | ||
13 | unsigned char ch = c; | ||
14 | if (ch >= 'a') | ||
15 | /* "a..z" to 38..63 */ | ||
16 | /* anything after "z": positive int >= 64 */ | ||
17 | return (ch - 'a' + 38); | ||
18 | |||
19 | if (ch > 'Z') | ||
20 | /* after "Z" but before "a": positive byte >= 64 */ | ||
21 | return ch; | ||
22 | |||
23 | if (ch >= 'A') | ||
24 | /* "A..Z" to 12..37 */ | ||
25 | return (ch - 'A' + 12); | ||
26 | |||
27 | if (ch > '9') | ||
28 | return 64; | ||
29 | |||
30 | /* "./0123456789" to 0,1,2..11 */ | ||
31 | /* anything before "." becomes positive byte >= 64 */ | ||
32 | return (unsigned char)(ch - '.'); | ||
33 | } | ||
34 | |||
35 | /* 0..63 -> | ||
36 | * "./0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; | ||
37 | */ | ||
38 | int FAST_FUNC i2a64(int i) | ||
39 | { | ||
40 | i &= 0x3f; | ||
41 | |||
42 | i += '.'; | ||
43 | /* the above maps 0..11 to "./0123456789": | ||
44 | * ACSII codes of "./" are ('0'-2) and ('0'-1) */ | ||
45 | |||
46 | if (i > '9') | ||
47 | i += ('A' - '9' - 1); | ||
48 | if (i > 'Z') | ||
49 | i += ('a' - 'Z' - 1); | ||
50 | return i; | ||
51 | } | ||
52 | |||
53 | char* FAST_FUNC | ||
54 | num2str64_lsb_first(char *s, unsigned v, int n) | ||
55 | { | ||
56 | while (--n >= 0) { | ||
57 | *s++ = i2a64(v); | ||
58 | v >>= 6; | ||
59 | } | ||
60 | return s; | ||
61 | } | ||
62 | |||
63 | static void | ||
64 | num2str64_4chars_msb_first(char *s, unsigned v) | ||
65 | { | ||
66 | *s++ = i2a64(v >> 18); /* bits 23..18 */ | ||
67 | *s++ = i2a64(v >> 12); /* bits 17..12 */ | ||
68 | *s++ = i2a64(v >> 6); /* bits 11..6 */ | ||
69 | *s = i2a64(v); /* bits 5..0 */ | ||
70 | } | ||
71 | |||
72 | int FAST_FUNC crypt_make_rand64encoded(char *p, int cnt /*, int x */) | ||
73 | { | ||
74 | /* was: x += ... */ | ||
75 | unsigned x = getpid() + monotonic_us(); | ||
76 | do { | ||
77 | /* x = (x*1664525 + 1013904223) % 2^32 generator is lame | ||
78 | * (low-order bit is not "random", etc...), | ||
79 | * but for our purposes it is good enough */ | ||
80 | x = x*1664525 + 1013904223; | ||
81 | /* BTW, Park and Miller's "minimal standard generator" is | ||
82 | * x = x*16807 % ((2^31)-1) | ||
83 | * It has no problem with visibly alternating lowest bit | ||
84 | * but is also weak in cryptographic sense + needs div, | ||
85 | * which needs more code (and slower) on many CPUs */ | ||
86 | *p++ = i2a64(x >> 16); | ||
87 | *p++ = i2a64(x >> 22); | ||
88 | } while (--cnt); | ||
89 | *p = '\0'; | ||
90 | return x; | ||
91 | } | ||
diff --git a/libbb/pw_encrypt.c b/libbb/pw_encrypt.c index 3463fd95b..93653de9f 100644 --- a/libbb/pw_encrypt.c +++ b/libbb/pw_encrypt.c | |||
@@ -13,48 +13,11 @@ | |||
13 | #endif | 13 | #endif |
14 | #include "libbb.h" | 14 | #include "libbb.h" |
15 | 15 | ||
16 | /* static const uint8_t ascii64[] ALIGN1 = | 16 | #include "pw_ascii64.c" |
17 | * "./0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; | ||
18 | */ | ||
19 | |||
20 | static int i64c(int i) | ||
21 | { | ||
22 | i &= 0x3f; | ||
23 | if (i == 0) | ||
24 | return '.'; | ||
25 | if (i == 1) | ||
26 | return '/'; | ||
27 | if (i < 12) | ||
28 | return ('0' - 2 + i); | ||
29 | if (i < 38) | ||
30 | return ('A' - 12 + i); | ||
31 | return ('a' - 38 + i); | ||
32 | } | ||
33 | |||
34 | int FAST_FUNC crypt_make_salt(char *p, int cnt /*, int x */) | ||
35 | { | ||
36 | /* was: x += ... */ | ||
37 | unsigned x = getpid() + monotonic_us(); | ||
38 | do { | ||
39 | /* x = (x*1664525 + 1013904223) % 2^32 generator is lame | ||
40 | * (low-order bit is not "random", etc...), | ||
41 | * but for our purposes it is good enough */ | ||
42 | x = x*1664525 + 1013904223; | ||
43 | /* BTW, Park and Miller's "minimal standard generator" is | ||
44 | * x = x*16807 % ((2^31)-1) | ||
45 | * It has no problem with visibly alternating lowest bit | ||
46 | * but is also weak in cryptographic sense + needs div, | ||
47 | * which needs more code (and slower) on many CPUs */ | ||
48 | *p++ = i64c(x >> 16); | ||
49 | *p++ = i64c(x >> 22); | ||
50 | } while (--cnt); | ||
51 | *p = '\0'; | ||
52 | return x; | ||
53 | } | ||
54 | 17 | ||
55 | char* FAST_FUNC crypt_make_pw_salt(char salt[MAX_PW_SALT_LEN], const char *algo) | 18 | char* FAST_FUNC crypt_make_pw_salt(char salt[MAX_PW_SALT_LEN], const char *algo) |
56 | { | 19 | { |
57 | int len = 2/2; | 20 | int len = 2 / 2; |
58 | char *salt_ptr = salt; | 21 | char *salt_ptr = salt; |
59 | 22 | ||
60 | /* Standard chpasswd uses uppercase algos ("MD5", not "md5"). | 23 | /* Standard chpasswd uses uppercase algos ("MD5", not "md5"). |
@@ -67,28 +30,61 @@ char* FAST_FUNC crypt_make_pw_salt(char salt[MAX_PW_SALT_LEN], const char *algo) | |||
67 | *salt_ptr++ = '$'; | 30 | *salt_ptr++ = '$'; |
68 | #if !ENABLE_USE_BB_CRYPT || ENABLE_USE_BB_CRYPT_SHA | 31 | #if !ENABLE_USE_BB_CRYPT || ENABLE_USE_BB_CRYPT_SHA |
69 | if ((algo[0]|0x20) == 's') { /* sha */ | 32 | if ((algo[0]|0x20) == 's') { /* sha */ |
70 | salt[1] = '5' + (strcasecmp(algo, "sha512") == 0); | 33 | salt[1] = '5' + (strncasecmp(algo, "sha512", 6) == 0); |
71 | len = 16/2; | 34 | len = 16 / 2; |
35 | } | ||
36 | #endif | ||
37 | #if !ENABLE_USE_BB_CRYPT || ENABLE_USE_BB_CRYPT_YES | ||
38 | if ((algo[0]|0x20) == 'y') { /* yescrypt */ | ||
39 | int rnd; | ||
40 | salt[1] = 'y'; | ||
41 | // The "j9T$" below is the default "yescrypt parameters" encoded by yescrypt_encode_params_r(): | ||
42 | //shadow-4.17.4/src/passwd.c | ||
43 | // salt = crypt_make_salt(NULL, NULL); | ||
44 | //shadow-4.17.4/lib/salt.c | ||
45 | //const char *crypt_make_salt(const char *meth, void *arg) | ||
46 | // if (streq(method, "YESCRYPT")) { | ||
47 | // MAGNUM(result, 'y'); | ||
48 | // salt_len = YESCRYPT_SALT_SIZE; // 24 | ||
49 | // rounds = YESCRYPT_get_salt_cost(arg); // always Y_COST_DEFAULT == 5 for NULL arg | ||
50 | // YESCRYPT_salt_cost_to_buf(result, rounds); // always "j9T$" | ||
51 | // char *retval = crypt_gensalt(result, rounds, NULL, 0); | ||
52 | //libxcrypt-4.4.38/lib/crypt-yescrypt.c | ||
53 | //void gensalt_yescrypt_rn (unsigned long count, | ||
54 | // const uint8_t *rbytes, size_t nrbytes, | ||
55 | // uint8_t *output, size_t o_size) | ||
56 | // yescrypt_params_t params = { | ||
57 | // .flags = YESCRYPT_DEFAULTS, | ||
58 | // .p = 1, | ||
59 | // }; | ||
60 | // if (count < 3) ... else | ||
61 | // params.r = 32; // N in 4KiB | ||
62 | // params.N = 1ULL << (count + 7); // 3 -> 1024, 4 -> 2048, ... 11 -> 262144 | ||
63 | // yescrypt_encode_params_r(¶ms, rbytes, nrbytes, outbuf, o_size) // always "$y$j9T$<random>" | ||
64 | len = 22 / 2; | ||
65 | salt_ptr = stpcpy(salt_ptr, "j9T$"); | ||
66 | /* append 2*len random chars */ | ||
67 | rnd = crypt_make_rand64encoded(salt_ptr, len); | ||
68 | /* fix up last char: it must be in 0..3 range (encoded as one of "./01"). | ||
69 | * IOW: salt_ptr[20..21] encode 16th random byte, must not be > 0xff. | ||
70 | * Without this, we can generate salts which are rejected | ||
71 | * by implementations with more strict salt length check. | ||
72 | */ | ||
73 | salt_ptr[21] = i2a64(rnd & 3); | ||
74 | /* For "mkpasswd -m yescrypt PASS j9T$<salt>" use case, | ||
75 | * "j9T$" is considered part of salt, | ||
76 | * need to return pointer to 'j'. Without -4, | ||
77 | * we'd end up using "j9T$j9T$<salt>" as salt. | ||
78 | */ | ||
79 | return salt_ptr - 4; | ||
72 | } | 80 | } |
73 | #endif | 81 | #endif |
74 | } | 82 | } |
75 | crypt_make_salt(salt_ptr, len); | 83 | crypt_make_rand64encoded(salt_ptr, len); /* appends 2*len random chars */ |
76 | return salt_ptr; | 84 | return salt_ptr; |
77 | } | 85 | } |
78 | 86 | ||
79 | #if ENABLE_USE_BB_CRYPT | 87 | #if ENABLE_USE_BB_CRYPT |
80 | |||
81 | static char* | ||
82 | to64(char *s, unsigned v, int n) | ||
83 | { | ||
84 | while (--n >= 0) { | ||
85 | /* *s++ = ascii64[v & 0x3f]; */ | ||
86 | *s++ = i64c(v); | ||
87 | v >>= 6; | ||
88 | } | ||
89 | return s; | ||
90 | } | ||
91 | |||
92 | /* | 88 | /* |
93 | * DES and MD5 crypt implementations are taken from uclibc. | 89 | * DES and MD5 crypt implementations are taken from uclibc. |
94 | * They were modified to not use static buffers. | 90 | * They were modified to not use static buffers. |
@@ -99,6 +95,9 @@ to64(char *s, unsigned v, int n) | |||
99 | #if ENABLE_USE_BB_CRYPT_SHA | 95 | #if ENABLE_USE_BB_CRYPT_SHA |
100 | #include "pw_encrypt_sha.c" | 96 | #include "pw_encrypt_sha.c" |
101 | #endif | 97 | #endif |
98 | #if ENABLE_USE_BB_CRYPT_YES | ||
99 | #include "pw_encrypt_yes.c" | ||
100 | #endif | ||
102 | 101 | ||
103 | /* Other advanced crypt ids (TODO?): */ | 102 | /* Other advanced crypt ids (TODO?): */ |
104 | /* $2$ or $2a$: Blowfish */ | 103 | /* $2$ or $2a$: Blowfish */ |
@@ -109,7 +108,7 @@ static struct des_ctx *des_ctx; | |||
109 | /* my_crypt returns malloc'ed data */ | 108 | /* my_crypt returns malloc'ed data */ |
110 | static char *my_crypt(const char *key, const char *salt) | 109 | static char *my_crypt(const char *key, const char *salt) |
111 | { | 110 | { |
112 | /* MD5 or SHA? */ | 111 | /* "$x$...." string? */ |
113 | if (salt[0] == '$' && salt[1] && salt[2] == '$') { | 112 | if (salt[0] == '$' && salt[1] && salt[2] == '$') { |
114 | if (salt[1] == '1') | 113 | if (salt[1] == '1') |
115 | return md5_crypt(xzalloc(MD5_OUT_BUFSIZE), (unsigned char*)key, (unsigned char*)salt); | 114 | return md5_crypt(xzalloc(MD5_OUT_BUFSIZE), (unsigned char*)key, (unsigned char*)salt); |
@@ -117,6 +116,10 @@ static char *my_crypt(const char *key, const char *salt) | |||
117 | if (salt[1] == '5' || salt[1] == '6') | 116 | if (salt[1] == '5' || salt[1] == '6') |
118 | return sha_crypt((char*)key, (char*)salt); | 117 | return sha_crypt((char*)key, (char*)salt); |
119 | #endif | 118 | #endif |
119 | #if ENABLE_USE_BB_CRYPT_YES | ||
120 | if (salt[1] == 'y') | ||
121 | return yes_crypt(key, salt); | ||
122 | #endif | ||
120 | } | 123 | } |
121 | 124 | ||
122 | if (!des_cctx) | 125 | if (!des_cctx) |
diff --git a/libbb/pw_encrypt_des.c b/libbb/pw_encrypt_des.c index fe8237cfe..ca8aa9bcc 100644 --- a/libbb/pw_encrypt_des.c +++ b/libbb/pw_encrypt_des.c | |||
@@ -186,39 +186,9 @@ static const uint8_t pbox[32] ALIGN1 = { | |||
186 | 2, 8, 24, 14, 32, 27, 3, 9, 19, 13, 30, 6, 22, 11, 4, 25 | 186 | 2, 8, 24, 14, 32, 27, 3, 9, 19, 13, 30, 6, 22, 11, 4, 25 |
187 | }; | 187 | }; |
188 | 188 | ||
189 | static const uint32_t bits32[32] ALIGN4 = { | ||
190 | 0x80000000, 0x40000000, 0x20000000, 0x10000000, | ||
191 | 0x08000000, 0x04000000, 0x02000000, 0x01000000, | ||
192 | 0x00800000, 0x00400000, 0x00200000, 0x00100000, | ||
193 | 0x00080000, 0x00040000, 0x00020000, 0x00010000, | ||
194 | 0x00008000, 0x00004000, 0x00002000, 0x00001000, | ||
195 | 0x00000800, 0x00000400, 0x00000200, 0x00000100, | ||
196 | 0x00000080, 0x00000040, 0x00000020, 0x00000010, | ||
197 | 0x00000008, 0x00000004, 0x00000002, 0x00000001 | ||
198 | }; | ||
199 | |||
200 | static const uint8_t bits8[8] ALIGN1 = { 0x80, 0x40, 0x20, 0x10, 0x08, 0x04, 0x02, 0x01 }; | 189 | static const uint8_t bits8[8] ALIGN1 = { 0x80, 0x40, 0x20, 0x10, 0x08, 0x04, 0x02, 0x01 }; |
201 | 190 | ||
202 | 191 | ||
203 | static int | ||
204 | ascii_to_bin(char ch) | ||
205 | { | ||
206 | if (ch > 'z') | ||
207 | return 0; | ||
208 | if (ch >= 'a') | ||
209 | return (ch - 'a' + 38); | ||
210 | if (ch > 'Z') | ||
211 | return 0; | ||
212 | if (ch >= 'A') | ||
213 | return (ch - 'A' + 12); | ||
214 | if (ch > '9') | ||
215 | return 0; | ||
216 | if (ch >= '.') | ||
217 | return (ch - '.'); | ||
218 | return 0; | ||
219 | } | ||
220 | |||
221 | |||
222 | /* Static stuff that stays resident and doesn't change after | 192 | /* Static stuff that stays resident and doesn't change after |
223 | * being initialized, and therefore doesn't need to be made | 193 | * being initialized, and therefore doesn't need to be made |
224 | * reentrant. */ | 194 | * reentrant. */ |
@@ -354,11 +324,18 @@ des_init(struct des_ctx *ctx, const struct const_des_ctx *cctx) | |||
354 | int i, j, b, k, inbit, obit; | 324 | int i, j, b, k, inbit, obit; |
355 | uint32_t p; | 325 | uint32_t p; |
356 | const uint32_t *bits28, *bits24; | 326 | const uint32_t *bits28, *bits24; |
327 | uint32_t bits32[32]; | ||
357 | 328 | ||
358 | if (!ctx) | 329 | if (!ctx) |
359 | ctx = xmalloc(sizeof(*ctx)); | 330 | ctx = xmalloc(sizeof(*ctx)); |
360 | const_ctx = cctx; | 331 | const_ctx = cctx; |
361 | 332 | ||
333 | p = 0x80000000U; | ||
334 | for (i = 0; p; i++) { | ||
335 | bits32[i] = p; | ||
336 | p >>= 1; | ||
337 | } | ||
338 | |||
362 | #if USE_REPETITIVE_SPEEDUP | 339 | #if USE_REPETITIVE_SPEEDUP |
363 | old_rawkey0 = old_rawkey1 = 0; | 340 | old_rawkey0 = old_rawkey1 = 0; |
364 | old_salt = 0; | 341 | old_salt = 0; |
@@ -694,21 +671,6 @@ do_des(struct des_ctx *ctx, /*uint32_t l_in, uint32_t r_in,*/ uint32_t *l_out, u | |||
694 | 671 | ||
695 | #define DES_OUT_BUFSIZE 21 | 672 | #define DES_OUT_BUFSIZE 21 |
696 | 673 | ||
697 | static void | ||
698 | to64_msb_first(char *s, unsigned v) | ||
699 | { | ||
700 | #if 0 | ||
701 | *s++ = ascii64[(v >> 18) & 0x3f]; /* bits 23..18 */ | ||
702 | *s++ = ascii64[(v >> 12) & 0x3f]; /* bits 17..12 */ | ||
703 | *s++ = ascii64[(v >> 6) & 0x3f]; /* bits 11..6 */ | ||
704 | *s = ascii64[v & 0x3f]; /* bits 5..0 */ | ||
705 | #endif | ||
706 | *s++ = i64c(v >> 18); /* bits 23..18 */ | ||
707 | *s++ = i64c(v >> 12); /* bits 17..12 */ | ||
708 | *s++ = i64c(v >> 6); /* bits 11..6 */ | ||
709 | *s = i64c(v); /* bits 5..0 */ | ||
710 | } | ||
711 | |||
712 | static char * | 674 | static char * |
713 | NOINLINE | 675 | NOINLINE |
714 | des_crypt(struct des_ctx *ctx, char output[DES_OUT_BUFSIZE], | 676 | des_crypt(struct des_ctx *ctx, char output[DES_OUT_BUFSIZE], |
@@ -740,44 +702,28 @@ des_crypt(struct des_ctx *ctx, char output[DES_OUT_BUFSIZE], | |||
740 | */ | 702 | */ |
741 | output[0] = salt_str[0]; | 703 | output[0] = salt_str[0]; |
742 | output[1] = salt_str[1]; | 704 | output[1] = salt_str[1]; |
743 | salt = (ascii_to_bin(salt_str[1]) << 6) | 705 | |
744 | | ascii_to_bin(salt_str[0]); | 706 | salt = a2i64(salt_str[0]); |
707 | if (salt >= 64) | ||
708 | return NULL; /* bad salt char */ | ||
709 | salt |= (a2i64(salt_str[1]) << 6); | ||
710 | if (salt >= (64 << 6)) | ||
711 | return NULL; /* bad salt char */ | ||
745 | setup_salt(ctx, salt); /* set ctx->saltbits for do_des() */ | 712 | setup_salt(ctx, salt); /* set ctx->saltbits for do_des() */ |
746 | 713 | ||
747 | /* Do it. */ | 714 | /* Do it. */ |
748 | do_des(ctx, /*0, 0,*/ &r0, &r1, 25 /* count */); | 715 | do_des(ctx, /*0, 0,*/ &r0, &r1, 25 /* count */); |
749 | 716 | ||
750 | /* Now encode the result. */ | 717 | /* Now encode the result. */ |
751 | #if 0 | ||
752 | { | ||
753 | uint32_t l = (r0 >> 8); | ||
754 | q = (uint8_t *)output + 2; | ||
755 | *q++ = ascii64[(l >> 18) & 0x3f]; /* bits 31..26 of r0 */ | ||
756 | *q++ = ascii64[(l >> 12) & 0x3f]; /* bits 25..20 of r0 */ | ||
757 | *q++ = ascii64[(l >> 6) & 0x3f]; /* bits 19..14 of r0 */ | ||
758 | *q++ = ascii64[l & 0x3f]; /* bits 13..8 of r0 */ | ||
759 | l = ((r0 << 16) | (r1 >> 16)); | ||
760 | *q++ = ascii64[(l >> 18) & 0x3f]; /* bits 7..2 of r0 */ | ||
761 | *q++ = ascii64[(l >> 12) & 0x3f]; /* bits 1..2 of r0 and 31..28 of r1 */ | ||
762 | *q++ = ascii64[(l >> 6) & 0x3f]; /* bits 27..22 of r1 */ | ||
763 | *q++ = ascii64[l & 0x3f]; /* bits 21..16 of r1 */ | ||
764 | l = r1 << 2; | ||
765 | *q++ = ascii64[(l >> 12) & 0x3f]; /* bits 15..10 of r1 */ | ||
766 | *q++ = ascii64[(l >> 6) & 0x3f]; /* bits 9..4 of r1 */ | ||
767 | *q++ = ascii64[l & 0x3f]; /* bits 3..0 of r1 + 00 */ | ||
768 | *q = 0; | ||
769 | } | ||
770 | #else | ||
771 | /* Each call takes low-order 24 bits and stores 4 chars */ | 718 | /* Each call takes low-order 24 bits and stores 4 chars */ |
772 | /* bits 31..8 of r0 */ | 719 | /* bits 31..8 of r0 */ |
773 | to64_msb_first(output + 2, (r0 >> 8)); | 720 | num2str64_4chars_msb_first(output + 2, (r0 >> 8)); |
774 | /* bits 7..0 of r0 and 31..16 of r1 */ | 721 | /* bits 7..0 of r0 and 31..16 of r1 */ |
775 | to64_msb_first(output + 6, (r0 << 16) | (r1 >> 16)); | 722 | num2str64_4chars_msb_first(output + 6, (r0 << 16) | (r1 >> 16)); |
776 | /* bits 15..0 of r1 and two zero bits (plus extra zero byte) */ | 723 | /* bits 15..0 of r1 and two zero bits (plus extra zero byte) */ |
777 | to64_msb_first(output + 10, (r1 << 8)); | 724 | num2str64_4chars_msb_first(output + 10, (r1 << 8)); |
778 | /* extra zero byte is encoded as '.', fixing it */ | 725 | /* extra zero byte is encoded as '.', fixing it */ |
779 | output[13] = '\0'; | 726 | output[13] = '\0'; |
780 | #endif | ||
781 | 727 | ||
782 | return output; | 728 | return output; |
783 | } | 729 | } |
diff --git a/libbb/pw_encrypt_md5.c b/libbb/pw_encrypt_md5.c index 1e52ecaea..92d039f96 100644 --- a/libbb/pw_encrypt_md5.c +++ b/libbb/pw_encrypt_md5.c | |||
@@ -149,9 +149,9 @@ md5_crypt(char result[MD5_OUT_BUFSIZE], const unsigned char *pw, const unsigned | |||
149 | final[16] = final[5]; | 149 | final[16] = final[5]; |
150 | for (i = 0; i < 5; i++) { | 150 | for (i = 0; i < 5; i++) { |
151 | unsigned l = (final[i] << 16) | (final[i+6] << 8) | final[i+12]; | 151 | unsigned l = (final[i] << 16) | (final[i+6] << 8) | final[i+12]; |
152 | p = to64(p, l, 4); | 152 | p = num2str64_lsb_first(p, l, 4); |
153 | } | 153 | } |
154 | p = to64(p, final[11], 2); | 154 | p = num2str64_lsb_first(p, final[11], 2); |
155 | *p = '\0'; | 155 | *p = '\0'; |
156 | 156 | ||
157 | /* Don't leave anything around in vm they could use. */ | 157 | /* Don't leave anything around in vm they could use. */ |
diff --git a/libbb/pw_encrypt_sha.c b/libbb/pw_encrypt_sha.c index 5457d7ab6..695a5c07f 100644 --- a/libbb/pw_encrypt_sha.c +++ b/libbb/pw_encrypt_sha.c | |||
@@ -84,8 +84,7 @@ sha_crypt(/*const*/ char *key_data, /*const*/ char *salt_data) | |||
84 | as a scratch space later. */ | 84 | as a scratch space later. */ |
85 | salt_data = xstrndup(salt_data, salt_len); | 85 | salt_data = xstrndup(salt_data, salt_len); |
86 | /* add "salt$" to result */ | 86 | /* add "salt$" to result */ |
87 | strcpy(resptr, salt_data); | 87 | resptr = stpcpy(resptr, salt_data); |
88 | resptr += salt_len; | ||
89 | *resptr++ = '$'; | 88 | *resptr++ = '$'; |
90 | /* key data doesn't need much processing */ | 89 | /* key data doesn't need much processing */ |
91 | key_len = strlen(key_data); | 90 | key_len = strlen(key_data); |
@@ -198,7 +197,7 @@ sha_crypt(/*const*/ char *key_data, /*const*/ char *salt_data) | |||
198 | #define b64_from_24bit(B2, B1, B0, N) \ | 197 | #define b64_from_24bit(B2, B1, B0, N) \ |
199 | do { \ | 198 | do { \ |
200 | unsigned w = ((B2) << 16) | ((B1) << 8) | (B0); \ | 199 | unsigned w = ((B2) << 16) | ((B1) << 8) | (B0); \ |
201 | resptr = to64(resptr, w, N); \ | 200 | resptr = num2str64_lsb_first(resptr, w, N); \ |
202 | } while (0) | 201 | } while (0) |
203 | if (_32or64 == 32) { /* sha256 */ | 202 | if (_32or64 == 32) { /* sha256 */ |
204 | unsigned i = 0; | 203 | unsigned i = 0; |
diff --git a/libbb/pw_encrypt_yes.c b/libbb/pw_encrypt_yes.c new file mode 100644 index 000000000..50bd06418 --- /dev/null +++ b/libbb/pw_encrypt_yes.c | |||
@@ -0,0 +1,24 @@ | |||
1 | /* | ||
2 | * Utility routines. | ||
3 | * | ||
4 | * Copyright (C) 2025 by Denys Vlasenko <vda.linux@googlemail.com> | ||
5 | * | ||
6 | * Licensed under GPLv2, see file LICENSE in this source tree. | ||
7 | */ | ||
8 | #include "yescrypt/alg-yescrypt.h" | ||
9 | |||
10 | static char * | ||
11 | yes_crypt(const char *passwd, const char *salt_data) | ||
12 | { | ||
13 | /* prefix, '$', hash, NUL */ | ||
14 | char buf[YESCRYPT_PREFIX_LEN + 1 + YESCRYPT_HASH_LEN + 1]; | ||
15 | char *retval; | ||
16 | |||
17 | retval = yescrypt_r( | ||
18 | (const uint8_t *)passwd, strlen(passwd), | ||
19 | (const uint8_t *)salt_data, | ||
20 | buf, sizeof(buf)); | ||
21 | /* The returned value is either buf[], or NULL on error */ | ||
22 | |||
23 | return xstrdup(retval); | ||
24 | } | ||
diff --git a/libbb/read_key.c b/libbb/read_key.c index 54886cc9c..2414105ee 100644 --- a/libbb/read_key.c +++ b/libbb/read_key.c | |||
@@ -11,7 +11,7 @@ | |||
11 | 11 | ||
12 | int64_t FAST_FUNC read_key(int fd, char *buffer, int timeout) | 12 | int64_t FAST_FUNC read_key(int fd, char *buffer, int timeout) |
13 | { | 13 | { |
14 | struct pollfd pfd; | 14 | struct pollfd pfd[1]; |
15 | const char *seq; | 15 | const char *seq; |
16 | int n; | 16 | int n; |
17 | 17 | ||
@@ -117,8 +117,8 @@ int64_t FAST_FUNC read_key(int fd, char *buffer, int timeout) | |||
117 | return windows_read_key(fd, buffer, timeout); | 117 | return windows_read_key(fd, buffer, timeout); |
118 | #endif | 118 | #endif |
119 | 119 | ||
120 | pfd.fd = fd; | 120 | pfd->fd = fd; |
121 | pfd.events = POLLIN; | 121 | pfd->events = POLLIN; |
122 | 122 | ||
123 | buffer++; /* saved chars counter is in buffer[-1] now */ | 123 | buffer++; /* saved chars counter is in buffer[-1] now */ |
124 | 124 | ||
@@ -126,12 +126,16 @@ int64_t FAST_FUNC read_key(int fd, char *buffer, int timeout) | |||
126 | errno = 0; | 126 | errno = 0; |
127 | n = (unsigned char)buffer[-1]; | 127 | n = (unsigned char)buffer[-1]; |
128 | if (n == 0) { | 128 | if (n == 0) { |
129 | /* If no data, wait for input. | 129 | /* No data. Wait for input. */ |
130 | * If requested, wait TIMEOUT ms. TIMEOUT = -1 is useful | 130 | |
131 | * if fd can be in non-blocking mode. | 131 | /* timeout == -2 means "do not poll". Else: */ |
132 | */ | ||
133 | if (timeout >= -1) { | 132 | if (timeout >= -1) { |
134 | n = poll(&pfd, 1, timeout); | 133 | /* We must poll even if timeout == -1: |
134 | * we want to be interrupted if signal arrives, | ||
135 | * regardless of SA_RESTART-ness of that signal! | ||
136 | */ | ||
137 | /* test bb_got_signal, then poll(), atomically wrt signals */ | ||
138 | n = check_got_signal_and_poll(pfd, timeout); | ||
135 | if (n < 0 && errno == EINTR) | 139 | if (n < 0 && errno == EINTR) |
136 | return n; | 140 | return n; |
137 | if (n == 0) { | 141 | if (n == 0) { |
@@ -140,6 +144,7 @@ int64_t FAST_FUNC read_key(int fd, char *buffer, int timeout) | |||
140 | return -1; | 144 | return -1; |
141 | } | 145 | } |
142 | } | 146 | } |
147 | |||
143 | /* It is tempting to read more than one byte here, | 148 | /* It is tempting to read more than one byte here, |
144 | * but it breaks pasting. Example: at shell prompt, | 149 | * but it breaks pasting. Example: at shell prompt, |
145 | * user presses "c","a","t" and then pastes "\nline\n". | 150 | * user presses "c","a","t" and then pastes "\nline\n". |
@@ -178,7 +183,7 @@ int64_t FAST_FUNC read_key(int fd, char *buffer, int timeout) | |||
178 | * so if we block for long it's not really an escape sequence. | 183 | * so if we block for long it's not really an escape sequence. |
179 | * Timeout is needed to reconnect escape sequences | 184 | * Timeout is needed to reconnect escape sequences |
180 | * split up by transmission over a serial console. */ | 185 | * split up by transmission over a serial console. */ |
181 | if (safe_poll(&pfd, 1, 50) == 0) { | 186 | if (safe_poll(pfd, 1, 50) == 0) { |
182 | /* No more data! | 187 | /* No more data! |
183 | * Array is sorted from shortest to longest, | 188 | * Array is sorted from shortest to longest, |
184 | * we can't match anything later in array - | 189 | * we can't match anything later in array - |
@@ -227,7 +232,7 @@ int64_t FAST_FUNC read_key(int fd, char *buffer, int timeout) | |||
227 | * n = bytes read. Try to read more until we time out. | 232 | * n = bytes read. Try to read more until we time out. |
228 | */ | 233 | */ |
229 | while (n < KEYCODE_BUFFER_SIZE-1) { /* 1 for count byte at buffer[-1] */ | 234 | while (n < KEYCODE_BUFFER_SIZE-1) { /* 1 for count byte at buffer[-1] */ |
230 | if (safe_poll(&pfd, 1, 50) == 0) { | 235 | if (safe_poll(pfd, 1, 50) == 0) { |
231 | /* No more data! */ | 236 | /* No more data! */ |
232 | break; | 237 | break; |
233 | } | 238 | } |
diff --git a/libbb/u_signal_names.c b/libbb/u_signal_names.c index ef2b6f891..e233849c5 100644 --- a/libbb/u_signal_names.c +++ b/libbb/u_signal_names.c | |||
@@ -27,10 +27,6 @@ | |||
27 | 27 | ||
28 | #include "libbb.h" | 28 | #include "libbb.h" |
29 | 29 | ||
30 | #if ENABLE_PLATFORM_MINGW32 | ||
31 | # undef SIGPIPE | ||
32 | #endif | ||
33 | |||
34 | #if ENABLE_PLATFORM_POSIX || defined(SIGSTKFLT) || defined(SIGVTALRM) | 30 | #if ENABLE_PLATFORM_POSIX || defined(SIGSTKFLT) || defined(SIGVTALRM) |
35 | # define SIGLEN 7 | 31 | # define SIGLEN 7 |
36 | #elif defined(SIGWINCH) || (ENABLE_FEATURE_RTMINMAX && \ | 32 | #elif defined(SIGWINCH) || (ENABLE_FEATURE_RTMINMAX && \ |
diff --git a/libbb/xfuncs.c b/libbb/xfuncs.c index 7df1a4cd3..5609858d1 100644 --- a/libbb/xfuncs.c +++ b/libbb/xfuncs.c | |||
@@ -333,7 +333,7 @@ int FAST_FUNC get_termios_and_make_raw(int fd, struct termios *newterm, struct t | |||
333 | *newterm = *oldterm; | 333 | *newterm = *oldterm; |
334 | 334 | ||
335 | #if ENABLE_PLATFORM_MINGW32 | 335 | #if ENABLE_PLATFORM_MINGW32 |
336 | newterm->imode &= | 336 | newterm->w_mode &= |
337 | ~(ENABLE_ECHO_INPUT | ENABLE_LINE_INPUT | ENABLE_PROCESSED_INPUT); | 337 | ~(ENABLE_ECHO_INPUT | ENABLE_LINE_INPUT | ENABLE_PROCESSED_INPUT); |
338 | #else | 338 | #else |
339 | /* Turn off buffered input (ICANON) | 339 | /* Turn off buffered input (ICANON) |
diff --git a/libbb/yescrypt/Kbuild.src b/libbb/yescrypt/Kbuild.src new file mode 100644 index 000000000..a61211a29 --- /dev/null +++ b/libbb/yescrypt/Kbuild.src | |||
@@ -0,0 +1,9 @@ | |||
1 | # Makefile for busybox | ||
2 | # | ||
3 | # Copyright (C) 2025 by Denys Vlasenko <vda.linux@googlemail.com> | ||
4 | # | ||
5 | # Licensed under GPLv2, see file LICENSE in this source tree. | ||
6 | |||
7 | lib-y:= | ||
8 | |||
9 | INSERT | ||
diff --git a/libbb/yescrypt/PARAMETERS b/libbb/yescrypt/PARAMETERS new file mode 100644 index 000000000..d9f5d24e6 --- /dev/null +++ b/libbb/yescrypt/PARAMETERS | |||
@@ -0,0 +1,196 @@ | |||
1 | Optimal yescrypt configuration. | ||
2 | |||
3 | yescrypt is very flexible, but configuring it optimally is complicated. | ||
4 | Here are some guidelines to simplify near-optimal configuration. We | ||
5 | start by listing the parameters and their typical values, and then give | ||
6 | currently recommended parameter sets by use case. | ||
7 | |||
8 | |||
9 | Parameters and their typical values. | ||
10 | |||
11 | Set flags (yescrypt flavor) to YESCRYPT_DEFAULTS to use the currently | ||
12 | recommended flavor. (Other flags values exist for compatibility and for | ||
13 | specialized cases where you think you know what you're doing.) | ||
14 | |||
15 | Set N (block count) based on target memory usage and running time, as | ||
16 | well as on the value of r (block size in 128 byte units). N must be a | ||
17 | power of two. | ||
18 | |||
19 | Set r (block size) to 8 (so that N is in KiB, which is convenient) or to | ||
20 | another small value (if more optimal or for fine-tuning of the total | ||
21 | size and/or running time). Reasonable values for r are from 8 to 96. | ||
22 | |||
23 | Set p (parallelism) to 1 meaning no thread-level parallelism within one | ||
24 | computation of yescrypt. (Use of thread-level parallelism within | ||
25 | yescrypt makes sense for ROM initialization and for key derivation at | ||
26 | high memory usage, but usually not for password hashing where | ||
27 | parallelism is available through concurrent authentication attempts. | ||
28 | Don't use p > 1 unnecessarily.) | ||
29 | |||
30 | Set t (time) to 0 to use the optimal running time for a given memory | ||
31 | usage. This will allow you to maximize the memory usage (the value of | ||
32 | N*r) while staying within your running time constraints. (Non-zero t | ||
33 | makes sense in specialized cases where you can't afford higher memory | ||
34 | usage but can afford more time.) | ||
35 | |||
36 | Set g (upgrades) to 0 because there have been no hash upgrades yet. | ||
37 | |||
38 | Set NROM (block count of ROM) to 0 unless you use a ROM (see below). | ||
39 | NROM must be a power of two. | ||
40 | |||
41 | |||
42 | Password hashing for user authentication, no ROM. | ||
43 | |||
44 | Small and fast (memory usage 2 MiB, performance like bcrypt cost 2^5 - | ||
45 | latency 2-3 ms and throughput 10,000+ per second on a 16-core server): | ||
46 | |||
47 | flags = YESCRYPT_DEFAULTS, N = 2048, r = 8, p = 1, t = 0, g = 0, NROM = 0 | ||
48 | |||
49 | Large and slow (memory usage 16 MiB, performance like bcrypt cost 2^8 - | ||
50 | latency 10-30 ms and throughput 1000+ per second on a 16-core server): | ||
51 | |||
52 | flags = YESCRYPT_DEFAULTS, N = 4096, r = 32, p = 1, t = 0, g = 0, NROM = 0 | ||
53 | |||
54 | Of course, even heavier and slower settings are possible, if affordable. | ||
55 | Simply double the value of N as many times as needed. Since N must be a | ||
56 | power of two, you may use r (in the range of 8 to 32) or/and t (in the | ||
57 | range of 0 to 2) for fine-tuning the running time, but first bring N to | ||
58 | the maximum you can afford. If this feels too complicated, just use one | ||
59 | of the two parameter sets given above (preferably the second) as-is. | ||
60 | |||
61 | |||
62 | Password hashing for user authentication, with ROM. | ||
63 | |||
64 | It's similar to the above, except that you need to adjust r, set NROM, | ||
65 | and initialize the ROM. | ||
66 | |||
67 | First decide on a ROM size, such as making it a large portion of your | ||
68 | dedicated authentication servers' RAM sizes. Since NROM (block count) | ||
69 | must be a power of two, you might need to choose r (block size) based on | ||
70 | how your desired ROM size corresponds to a power of two. Also tuning | ||
71 | for performance on current hardware, you'll likely end up with r in the | ||
72 | range from slightly below 16 to 32. For example, to use 15/16 of a | ||
73 | server's 256 GiB RAM as ROM (thus, making it 240 GiB), you could use | ||
74 | r=15 or r=30. To use 23/24 of a server's 384 GiB RAM as ROM (thus, | ||
75 | making it 368 GiB), you'd use r=23. Then set NROM to your desired ROM | ||
76 | size in KiB divided by 128*r. Note that these examples might (or might | ||
77 | not) be too extreme, leaving little memory for the rest of the system. | ||
78 | You could as well opt for 7/8 with r=14 or 11/12 with r=11 or r=22. | ||
79 | |||
80 | Note that higher r may make placing of ROM in e.g. NVMe flash memory | ||
81 | instead of in RAM more reasonable (or less unreasonable) than it would | ||
82 | have been with a lower r. If this is a concern as it relates to | ||
83 | possible attacks and you do not intend to ever do it defensively, you | ||
84 | might want to keep r lower (e.g., prefer r=15 over r=30 in the example | ||
85 | above, even if 30 performs slightly faster). | ||
86 | |||
87 | Your adjustments to r, if you deviate from powers of two, will also | ||
88 | result in weirder memory usage per hash. Like 1.75 MiB at r=14 instead | ||
89 | of 2 MiB at r=8 that you would have used without a ROM. That's OK. | ||
90 | |||
91 | For ROM initialization, which you do with yescrypt_init_shared(), use | ||
92 | the same r and NROM that you'd later use for password hashing, choose p | ||
93 | based on your servers' physical and/or logical CPU count (maybe | ||
94 | considering eventual upgrades as you won't be able to change this later, | ||
95 | but without going unnecessarily high - e.g., p=28, p=56, or p=112 make | ||
96 | sense on servers that currently have 28 physical / 56 logical CPUs), and | ||
97 | set the rest of the parameters to: | ||
98 | |||
99 | flags = YESCRYPT_DEFAULTS, N = 0, t = 0, g = 0 | ||
100 | |||
101 | N is set to 0 because it isn't relevant during ROM initialization (you | ||
102 | can use different values of N for hashing passwords with the same ROM). | ||
103 | |||
104 | To keep the ROM in e.g. SysV shared memory and reuse it across your | ||
105 | authentication service restarts, you'd need to allocate the memory and | ||
106 | set the flags to "YESCRYPT_DEFAULTS | YESCRYPT_SHARED_PREALLOCATED". | ||
107 | |||
108 | For actual password hashing, you'd use your chosen values for N, r, | ||
109 | NROM, and set the rest of the parameters to: | ||
110 | |||
111 | flags = YESCRYPT_DEFAULTS, p = 1, t = 0, g = 0 | ||
112 | |||
113 | Note that although you'd use a large p for ROM initialization, you | ||
114 | should use p=1 for actual password hashing like you would without a ROM. | ||
115 | |||
116 | Do not forget to pass the ROM into the actual password hashing (and keep | ||
117 | r and NROM set accordingly). | ||
118 | |||
119 | Since N must be a power of two and r is dependent on ROM size, you may | ||
120 | use t (in the range of 0 to 2) for fine-tuning the running time, but | ||
121 | first bring N to the maximum you can afford. | ||
122 | |||
123 | If this feels too complicated, or even if it doesn't, please consider | ||
124 | engaging Openwall for your yescrypt deployment. We'd be happy to help. | ||
125 | |||
126 | |||
127 | Password-based key derivation. | ||
128 | |||
129 | (Or rather passphrase-based.) | ||
130 | |||
131 | Use settings similar to those for password hashing without a ROM, but | ||
132 | adjusted for higher memory usage and running time, and optionally with | ||
133 | thread-level parallelism. | ||
134 | |||
135 | Small and fast (memory usage 128 MiB, running time under 100 ms on a | ||
136 | fast desktop): | ||
137 | |||
138 | flags = YESCRYPT_DEFAULTS, N = 32768, r = 32, p = 1, t = 0, g = 0, NROM = 0 | ||
139 | |||
140 | Large and fast (memory usage 1 GiB, running time under 200 ms on a fast | ||
141 | quad-core desktop not including memory allocation overhead, under 250 ms | ||
142 | with the overhead included), but requires build with OpenMP support (or | ||
143 | otherwise will run as slow as yet be weaker than its p=1 alternative): | ||
144 | |||
145 | flags = YESCRYPT_DEFAULTS, N = 262144, r = 32, p = 4, t = 0, g = 0, NROM = 0 | ||
146 | |||
147 | Large and slower (memory usage 1 GiB, running time under 300 ms on a | ||
148 | fast quad-core desktop not including memory allocation overhead, under | ||
149 | 350 ms with the overhead included), also requires build with OpenMP | ||
150 | support (or otherwise will run slower than the p=1 alternative below): | ||
151 | |||
152 | flags = YESCRYPT_DEFAULTS, N = 262144, r = 32, p = 4, t = 2, g = 0, NROM = 0 | ||
153 | |||
154 | Large and slow (memory usage 1 GiB, running time under 600 ms on a fast | ||
155 | desktop not including memory allocation overhead, under 650 ms with the | ||
156 | overhead included): | ||
157 | |||
158 | flags = YESCRYPT_DEFAULTS, N = 262144, r = 32, p = 1, t = 0, g = 0, NROM = 0 | ||
159 | |||
160 | Just like with password hashing, even heavier and slower settings are | ||
161 | possible, if affordable, and you achieve them by adjusting N, r, t in | ||
162 | the same way and in the same preferred ranges (please see the section on | ||
163 | password hashing without a ROM, above). Unlike with password hashing, | ||
164 | it makes some sense to go above t=2 if you expect that your users might | ||
165 | not be able to afford more memory but can afford more time. However, | ||
166 | increasing the memory usage provides better protection, and we don't | ||
167 | recommend forcing your users to wait for more than 1 second as they | ||
168 | could as well type more characters in that time. If this feels too | ||
169 | complicated, just use one of the above parameter sets as-is. | ||
170 | |||
171 | |||
172 | Amortization of memory allocation overhead. | ||
173 | |||
174 | It takes a significant fraction of yescrypt's total running time to | ||
175 | allocate memory from the operating system, especially considering that | ||
176 | the kernel zeroizes the memory before handing it over to your program. | ||
177 | |||
178 | Unless you naturally need to compute yescrypt just once per process, you | ||
179 | may achieve greater efficiency by fully using advanced yescrypt APIs | ||
180 | that let you preserve and reuse the memory allocation across yescrypt | ||
181 | invocations. This is done by reusing the structure pointed to by the | ||
182 | "yescrypt_local_t *local" argument of yescrypt_r() or yescrypt_kdf() | ||
183 | without calling yescrypt_free_local() inbetween the repeated invocations | ||
184 | of yescrypt. | ||
185 | |||
186 | |||
187 | YESCRYPT_DEFAULTS macro. | ||
188 | |||
189 | Please note that the value of the YESCRYPT_DEFAULTS macro might change | ||
190 | later, so if you use the macro like it's recommended here then for | ||
191 | results reproducible across versions you might need to store its value | ||
192 | somewhere along with the hashes or the encrypted data. | ||
193 | |||
194 | If you use yescrypt's standard hash string encoding, then yescrypt | ||
195 | already encodes and decodes this value for you, so you don't need to | ||
196 | worry about this. | ||
diff --git a/libbb/yescrypt/README b/libbb/yescrypt/README new file mode 100644 index 000000000..c1011c56a --- /dev/null +++ b/libbb/yescrypt/README | |||
@@ -0,0 +1,4 @@ | |||
1 | The yescrypt code in this directory is adapted from libxcrypt-4.4.38 | ||
2 | with minimal edits, hopefully making it easier to track | ||
3 | backports by resetting the tree to the commit which created this file, | ||
4 | then comparing changes in upstream libxcrypt to the tree. | ||
diff --git a/libbb/yescrypt/alg-sha256.c b/libbb/yescrypt/alg-sha256.c new file mode 100644 index 000000000..20e8d1ee4 --- /dev/null +++ b/libbb/yescrypt/alg-sha256.c | |||
@@ -0,0 +1,86 @@ | |||
1 | /*- | ||
2 | * Copyright 2005-2016 Colin Percival | ||
3 | * Copyright 2016-2018,2021 Alexander Peslyak | ||
4 | * All rights reserved. | ||
5 | * | ||
6 | * Redistribution and use in source and binary forms, with or without | ||
7 | * modification, are permitted provided that the following conditions | ||
8 | * are met: | ||
9 | * 1. Redistributions of source code must retain the above copyright | ||
10 | * notice, this list of conditions and the following disclaimer. | ||
11 | * 2. Redistributions in binary form must reproduce the above copyright | ||
12 | * notice, this list of conditions and the following disclaimer in the | ||
13 | * documentation and/or other materials provided with the distribution. | ||
14 | * | ||
15 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND | ||
16 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||
17 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | ||
18 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE | ||
19 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | ||
20 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | ||
21 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | ||
22 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | ||
23 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | ||
24 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | ||
25 | * SUCH DAMAGE. | ||
26 | */ | ||
27 | |||
28 | /** | ||
29 | * PBKDF2_SHA256(passwd, passwdlen, salt, saltlen, c, buf, dkLen): | ||
30 | * Compute PBKDF2(passwd, salt, c, dkLen) using HMAC-SHA256 as the PRF, and | ||
31 | * write the output to buf. The value dkLen must be at most 32 * (2^32 - 1). | ||
32 | */ | ||
33 | static void | ||
34 | PBKDF2_SHA256(const uint8_t *passwd, size_t passwdlen, | ||
35 | const uint8_t *salt, size_t saltlen, | ||
36 | uint64_t c, uint8_t *buf, size_t dkLen) | ||
37 | { | ||
38 | hmac_ctx_t Phctx, PShctx; | ||
39 | uint32_t i; | ||
40 | |||
41 | /* Compute HMAC state after processing P. */ | ||
42 | hmac_begin(&Phctx, passwd, passwdlen, sha256_begin); | ||
43 | |||
44 | /* Compute HMAC state after processing P and S. */ | ||
45 | PShctx = Phctx; | ||
46 | hmac_hash(&PShctx, salt, saltlen); | ||
47 | |||
48 | /* Iterate through the blocks. */ | ||
49 | for (i = 0; dkLen != 0; ) { | ||
50 | uint64_t U[32 / 8]; | ||
51 | uint64_t T[32 / 8]; | ||
52 | uint64_t j; | ||
53 | uint32_t ivec; | ||
54 | size_t clen; | ||
55 | int k; | ||
56 | |||
57 | /* Generate INT(i). */ | ||
58 | i++; | ||
59 | ivec = SWAP_BE32(i); | ||
60 | |||
61 | /* Compute U_1 = PRF(P, S || INT(i)). */ | ||
62 | hmac_peek_hash(&PShctx, (void*)T, &ivec, 4, NULL); | ||
63 | //TODO: the above is a vararg function, might incur some ABI pain | ||
64 | //does libbb need a non-vararg version with just one (buf,len)? | ||
65 | |||
66 | if (c > 1) { | ||
67 | /* T_i = U_1 ... */ | ||
68 | memcpy(U, T, 32); | ||
69 | for (j = 2; j <= c; j++) { | ||
70 | /* Compute U_j. */ | ||
71 | hmac_peek_hash(&Phctx, (void*)U, U, 32, NULL); | ||
72 | /* ... xor U_j ... */ | ||
73 | for (k = 0; k < 32 / 8; k++) | ||
74 | T[k] ^= U[k]; | ||
75 | //TODO: xorbuf32_aligned_long(T, U); | ||
76 | } | ||
77 | } | ||
78 | |||
79 | /* Copy as many bytes as necessary into buf. */ | ||
80 | clen = dkLen; | ||
81 | if (clen > 32) | ||
82 | clen = 32; | ||
83 | buf = mempcpy(buf, T, clen); | ||
84 | dkLen -= clen; | ||
85 | } | ||
86 | } | ||
diff --git a/libbb/yescrypt/alg-yescrypt-common.c b/libbb/yescrypt/alg-yescrypt-common.c new file mode 100644 index 000000000..c51823787 --- /dev/null +++ b/libbb/yescrypt/alg-yescrypt-common.c | |||
@@ -0,0 +1,408 @@ | |||
1 | /*- | ||
2 | * Copyright 2013-2018 Alexander Peslyak | ||
3 | * All rights reserved. | ||
4 | * | ||
5 | * Redistribution and use in source and binary forms, with or without | ||
6 | * modification, are permitted. | ||
7 | * | ||
8 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND | ||
9 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||
10 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | ||
11 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE | ||
12 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | ||
13 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | ||
14 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | ||
15 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | ||
16 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | ||
17 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | ||
18 | * SUCH DAMAGE. | ||
19 | */ | ||
20 | |||
21 | #if RESTRICTED_PARAMS | ||
22 | |||
23 | #define decode64_uint32(dst, src, min) \ | ||
24 | ({ \ | ||
25 | uint32_t d32 = a2i64(*(src)); \ | ||
26 | if (d32 > 47) \ | ||
27 | goto fail; \ | ||
28 | *(dst) = d32 + (min); \ | ||
29 | ++src; \ | ||
30 | }) | ||
31 | #define test_decode64_uint32() ((void)0) | ||
32 | #define FULL_PARAMS(...) | ||
33 | |||
34 | #else | ||
35 | |||
36 | #define FULL_PARAMS(...) __VA_ARGS__ | ||
37 | |||
38 | /* Not inlining: | ||
39 | * de/encode64 functions are only used to read | ||
40 | * yescrypt_params_t field, and convert salt to binary - | ||
41 | * both of these are negligible compared to main hashing operation | ||
42 | */ | ||
43 | static NOINLINE const uint8_t *decode64_uint32( | ||
44 | uint32_t *dst, | ||
45 | const uint8_t *src, uint32_t val) | ||
46 | { | ||
47 | uint32_t start = 0, end = 47, bits = 0; | ||
48 | uint32_t c; | ||
49 | |||
50 | if (!src) /* previous decode failed already? */ | ||
51 | goto fail; | ||
52 | |||
53 | c = a2i64(*src++); | ||
54 | if (c > 63) | ||
55 | goto fail; | ||
56 | |||
57 | // The encoding of number N: | ||
58 | // start = 0 end = 47 | ||
59 | // If N < 48, it is encoded verbatim, else | ||
60 | // N -= 48 | ||
61 | // start = end+1 = 48 | ||
62 | // end += (64-end)/2 = 55 | ||
63 | // If N < (end+1-start)<<6 = 8<<6, it is encoded as 48+(N>>6)|low6bits (that is, 48...55|<6bit>), else | ||
64 | // N -= 8<<6 | ||
65 | // start = end+1 = 56 | ||
66 | // end += (64-end)/2 = 59 | ||
67 | // If N < (end+1-start)<<2*6 = 4<<12, it is encoded as 56+(N>>2*6)|low12bits (that is, 56...59|<6bit>|<6bit>), else | ||
68 | // ...same for 60..61|<6bit>|<6bit>|<6bit> | ||
69 | // .......same for 62|<6bit>|<6bit>|<6bit>|<6bit> | ||
70 | // .......same for 63|<6bit>|<6bit>|<6bit>|<6bit>|<6bit> | ||
71 | dbg_dec64("c:%d val:0x%08x", (int)c, (unsigned)val); | ||
72 | while (c > end) { | ||
73 | dbg_dec64("c:%d > end:%d", (int)c, (int)end); | ||
74 | val += (end + 1 - start) << bits; | ||
75 | dbg_dec64("val+=0x%08x", (int)((end + 1 - start) << bits)); | ||
76 | dbg_dec64(" val:0x%08x", (unsigned)val); | ||
77 | start = end + 1; | ||
78 | end += (64 - end) / 2; | ||
79 | bits += 6; | ||
80 | dbg_dec64("start=%d", (int)start); | ||
81 | dbg_dec64("end=%d", (int)end); | ||
82 | dbg_dec64("bits=%d", (int)bits); | ||
83 | } | ||
84 | |||
85 | val += (c - start) << bits; | ||
86 | dbg_dec64("final val+=0x%08x", (int)((c - start) << bits)); | ||
87 | dbg_dec64(" val:0x%08x", (unsigned)val); | ||
88 | |||
89 | while (bits != 0) { | ||
90 | c = a2i64(*src++); | ||
91 | if (c > 63) | ||
92 | goto fail; | ||
93 | bits -= 6; | ||
94 | val += c << bits; | ||
95 | dbg_dec64("low bits val+=0x%08x", (int)(c << bits)); | ||
96 | dbg_dec64(" val:0x%08x", (unsigned)val); | ||
97 | } | ||
98 | ret: | ||
99 | *dst = val; | ||
100 | return src; | ||
101 | fail: | ||
102 | val = 0; | ||
103 | src = NULL; | ||
104 | goto ret; | ||
105 | } | ||
106 | |||
107 | #if TEST_DECODE64 | ||
108 | static void test_decode64_uint32(void) | ||
109 | { | ||
110 | const uint8_t *src, *end; | ||
111 | uint32_t u32; | ||
112 | int a = 48; | ||
113 | int b = 8<<6; // 0x0200 | ||
114 | int c = 4<<12; // 0x04000 | ||
115 | int d = 2<<18; // 0x080000 | ||
116 | int e = 1<<24; // 0x1000000 | ||
117 | |||
118 | src = (void*)"wzzz"; | ||
119 | end = decode64_uint32(&u32, src, 0); | ||
120 | if (u32 != 0x0003ffff+c+b+a) bb_error_msg_and_die("Incorrect decode '%s':0x%08x", src, (unsigned)u32); | ||
121 | if (end != src + 4) bb_error_msg_and_die("Incorrect decode '%s': %p end:%p", src, src, end); | ||
122 | src = (void*)"xzzz"; | ||
123 | end = decode64_uint32(&u32, src, 0); | ||
124 | if (u32 != 0x0007ffff+c+b+a) bb_error_msg_and_die("Incorrect decode '%s':0x%08x", src, (unsigned)u32); | ||
125 | if (end != src + 4) bb_error_msg_and_die("Incorrect decode '%s': %p end:%p", src, src, end); | ||
126 | // Note how the last representable "x---" encoding, 0x7ffff, is exactly d-1! | ||
127 | // And if we now increment it, we get: | ||
128 | src = (void*)"y...."; | ||
129 | end = decode64_uint32(&u32, src, 0); | ||
130 | if (u32 != 0x00000000+d+c+b+a) bb_error_msg_and_die("Incorrect decode '%s':0x%08x", src, (unsigned)u32); | ||
131 | if (end != src + 5) bb_error_msg_and_die("Incorrect decode '%s': %p end:%p", src, src, end); | ||
132 | src = (void*)"yzzzz"; | ||
133 | end = decode64_uint32(&u32, src, 0); | ||
134 | if (u32 != 0x00ffffff+d+c+b+a) bb_error_msg_and_die("Incorrect decode '%s':0x%08x", src, (unsigned)u32); | ||
135 | if (end != src + 5) bb_error_msg_and_die("Incorrect decode '%s': %p end:%p", src, src, end); | ||
136 | |||
137 | src = (void*)"zzzzzz"; | ||
138 | end = decode64_uint32(&u32, src, 0); | ||
139 | if (u32 != 0x3fffffff+e+d+c+b+a) bb_error_msg_and_die("Incorrect decode '%s':0x%08x", src, (unsigned)u32); | ||
140 | if (end != src + 6) bb_error_msg_and_die("Incorrect decode '%s': %p end:%p", src, src, end); | ||
141 | |||
142 | bb_error_msg("test_decode64_uint32() OK"); | ||
143 | } | ||
144 | #else | ||
145 | # define test_decode64_uint32() ((void)0) | ||
146 | #endif | ||
147 | |||
148 | #endif /* !RESTRICTED_PARAMS */ | ||
149 | |||
150 | #if 1 | ||
151 | static const uint8_t *decode64( | ||
152 | uint8_t *dst, size_t *dstlen, | ||
153 | const uint8_t *src) | ||
154 | { | ||
155 | unsigned dstpos = 0; | ||
156 | |||
157 | dbg_dec64("src:'%s'", src); | ||
158 | for (;;) { | ||
159 | uint32_t c, value = 0; | ||
160 | int bits = 0; | ||
161 | while (*src != '\0' && *src != '$') { | ||
162 | c = a2i64(*src); | ||
163 | if (c > 63) { /* bad ascii64 char, stop decoding at it */ | ||
164 | break; | ||
165 | } | ||
166 | src++; | ||
167 | value |= c << bits; | ||
168 | bits += 6; | ||
169 | if (bits == 24) /* got 4 chars */ | ||
170 | goto store; | ||
171 | } | ||
172 | /* we read entire src, or met a non-ascii64 char (such as "$") */ | ||
173 | if (bits == 0) | ||
174 | break; | ||
175 | /* else: we got last, partial bit block - store it */ | ||
176 | store: | ||
177 | dbg_dec64(" storing bits:%d dstpos:%u v:%08x", bits, dstpos, (int)SWAP_BE32(value)); //BE to see lsb first | ||
178 | for (;;) { | ||
179 | if ((*src == '\0' || *src == '$') | ||
180 | && value == 0 && bits < 8 | ||
181 | ) { | ||
182 | /* Example: mkpasswd PWD '$y$j9T$123': | ||
183 | * the "123" is bits:18 value:03,51,00 | ||
184 | * is considered to be 2 bytes, not 3! | ||
185 | * | ||
186 | * '$y$j9T$zzz' in upstream fails outright (3rd byte isn't zero). | ||
187 | * IOW: for upstream, validity of salt depends on VALUE, | ||
188 | * not just size of salt. Which is a bug. | ||
189 | * The '$y$j9T$zzz.' salt is the same | ||
190 | * (it adds 6 zero msbits) but upstream works with it, | ||
191 | * thus '$y$j9T$zzz' should work too and give the same result. | ||
192 | */ | ||
193 | goto end; | ||
194 | } | ||
195 | if (dstpos >= *dstlen) { | ||
196 | dbg_dec64(" ERR: bits:%d dstpos:%u dst[] is too small", bits, dstpos); | ||
197 | goto fail; | ||
198 | } | ||
199 | *dst++ = value; | ||
200 | dstpos++; | ||
201 | value >>= 8; | ||
202 | bits -= 8; | ||
203 | if (bits <= 0) /* can get negative, if we e.g. had 6 bits */ | ||
204 | break; | ||
205 | } | ||
206 | if (*src == '\0' || *src == '$') | ||
207 | break; | ||
208 | } | ||
209 | end: | ||
210 | *dstlen = dstpos; | ||
211 | dbg_dec64("dec64: OK, dst[%d]", (int)dstpos); | ||
212 | return src; | ||
213 | fail: | ||
214 | /* *dstlen = 0; - not needed, caller detects error by seeing NULL */ | ||
215 | return NULL; | ||
216 | } | ||
217 | #else | ||
218 | /* Buggy (and larger) original code */ | ||
219 | static const uint8_t *decode64( | ||
220 | uint8_t *dst, size_t *dstlen, | ||
221 | const uint8_t *src, size_t srclen) | ||
222 | { | ||
223 | size_t dstpos = 0; | ||
224 | |||
225 | while (dstpos <= *dstlen && srclen) { | ||
226 | uint32_t value = 0, bits = 0; | ||
227 | while (srclen--) { | ||
228 | uint32_t c = a2i64(*src); | ||
229 | if (c > 63) { | ||
230 | srclen = 0; | ||
231 | break; | ||
232 | } | ||
233 | src++; | ||
234 | value |= c << bits; | ||
235 | bits += 6; | ||
236 | if (bits >= 24) | ||
237 | break; | ||
238 | } | ||
239 | if (!bits) | ||
240 | break; | ||
241 | if (bits < 12) /* must have at least one full byte */ | ||
242 | goto fail; | ||
243 | dbg_dec64(" storing bits:%d v:%08x", (int)bits, (int)SWAP_BE32(value)); //BE to see lsb first | ||
244 | while (dstpos++ < *dstlen) { | ||
245 | *dst++ = value; | ||
246 | value >>= 8; | ||
247 | bits -= 8; | ||
248 | if (bits < 8) { /* 2 or 4 */ | ||
249 | if (value) /* must be 0 */ | ||
250 | goto fail; | ||
251 | bits = 0; | ||
252 | break; | ||
253 | } | ||
254 | } | ||
255 | if (bits) | ||
256 | goto fail; | ||
257 | } | ||
258 | |||
259 | if (!srclen && dstpos <= *dstlen) { | ||
260 | *dstlen = dstpos; | ||
261 | dbg_dec64("dec64: OK, dst[%d]", (int)dstpos); | ||
262 | return src; | ||
263 | } | ||
264 | fail: | ||
265 | /* *dstlen = 0; - not needed, caller detects error by seeing NULL */ | ||
266 | return NULL; | ||
267 | } | ||
268 | #endif | ||
269 | |||
270 | static char *encode64( | ||
271 | char *dst, size_t dstlen, | ||
272 | const uint8_t *src, size_t srclen) | ||
273 | { | ||
274 | while (srclen) { | ||
275 | uint32_t value = 0, b = 0; | ||
276 | do { | ||
277 | value |= (uint32_t)(*src++ << b); | ||
278 | b += 8; | ||
279 | srclen--; | ||
280 | } while (srclen && b < 24); | ||
281 | |||
282 | b >>= 3; /* number of bits to number of bytes */ | ||
283 | b++; /* 1, 2 or 3 bytes will become 2, 3 or 4 ascii64 chars */ | ||
284 | dstlen -= b; | ||
285 | if ((ssize_t)dstlen <= 0) | ||
286 | return NULL; | ||
287 | dst = num2str64_lsb_first(dst, value, b); | ||
288 | } | ||
289 | *dst = '\0'; | ||
290 | return dst; | ||
291 | } | ||
292 | |||
293 | char *yescrypt_r( | ||
294 | const uint8_t *passwd, size_t passwdlen, | ||
295 | const uint8_t *setting, | ||
296 | char *buf, size_t buflen) | ||
297 | { | ||
298 | struct { | ||
299 | yescrypt_ctx_t yctx[1]; | ||
300 | unsigned char hashbin32[32]; | ||
301 | } u; | ||
302 | #define yctx u.yctx | ||
303 | #define hashbin32 u.hashbin32 | ||
304 | char *dst; | ||
305 | const uint8_t *src, *saltend; | ||
306 | size_t need, prefixlen; | ||
307 | uint32_t u32; | ||
308 | |||
309 | test_decode64_uint32(); | ||
310 | |||
311 | memset(yctx, 0, sizeof(yctx)); | ||
312 | FULL_PARAMS(yctx->param.p = 1;) | ||
313 | |||
314 | /* we assume setting starts with "$y$" (caller must ensure this) */ | ||
315 | src = setting + 3; | ||
316 | |||
317 | src = decode64_uint32(&yctx->param.flags, src, 0); | ||
318 | /* "j9T" returns: 0x2f */ | ||
319 | //if (!src) | ||
320 | // goto fail; | ||
321 | |||
322 | if (yctx->param.flags < YESCRYPT_RW) { | ||
323 | dbg("yctx->param.flags=0x%x", (unsigned)yctx->param.flags); | ||
324 | goto fail; // bbox: we don't support scrypt - only yescrypt | ||
325 | } else if (yctx->param.flags <= YESCRYPT_RW + (YESCRYPT_RW_FLAVOR_MASK >> 2)) { | ||
326 | /* "j9T" sets flags to 0xb6 */ | ||
327 | yctx->param.flags = YESCRYPT_RW + ((yctx->param.flags - YESCRYPT_RW) << 2); | ||
328 | dbg("yctx->param.flags=0x%x", (unsigned)yctx->param.flags); | ||
329 | dbg(" YESCRYPT_RW:%u", !!(yctx->param.flags & YESCRYPT_RW)); | ||
330 | dbg((yctx->param.flags & YESCRYPT_RW_FLAVOR_MASK) == | ||
331 | (YESCRYPT_ROUNDS_6 | YESCRYPT_GATHER_4 | YESCRYPT_SIMPLE_2 | YESCRYPT_SBOX_12K) | ||
332 | ? " YESCRYPT_ROUNDS_6 | YESCRYPT_GATHER_4 | YESCRYPT_SIMPLE_2 | YESCRYPT_SBOX_12K" | ||
333 | : " flags are not standard" | ||
334 | ); | ||
335 | } else { | ||
336 | goto fail; | ||
337 | } | ||
338 | |||
339 | src = decode64_uint32(&u32, src, 1); | ||
340 | if (/*!src ||*/ u32 > 63) | ||
341 | goto fail; | ||
342 | yctx->param.N = (uint64_t)1 << u32; | ||
343 | /* "j9T" sets to 4096 (1<<12) */ | ||
344 | dbg("yctx->param.N=%llu (1<<%u)", (unsigned long long)yctx->param.N, (unsigned)u32); | ||
345 | |||
346 | src = decode64_uint32(&yctx->param.r, src, 1); | ||
347 | /* "j9T" sets to 32 */ | ||
348 | dbg("yctx->param.r=%u", yctx->param.r); | ||
349 | |||
350 | if (!src) | ||
351 | goto fail; | ||
352 | if (*src != '$') { | ||
353 | #if RESTRICTED_PARAMS | ||
354 | goto fail; | ||
355 | #else | ||
356 | src = decode64_uint32(&u32, src, 1); | ||
357 | dbg("yescrypt has extended params:0x%x", (unsigned)u32); | ||
358 | if (u32 & 1) | ||
359 | src = decode64_uint32(&yctx->param.p, src, 2); | ||
360 | if (u32 & 2) | ||
361 | src = decode64_uint32(&yctx->param.t, src, 1); | ||
362 | if (u32 & 4) | ||
363 | src = decode64_uint32(&yctx->param.g, src, 1); | ||
364 | if (u32 & 8) { | ||
365 | src = decode64_uint32(&u32, src, 1); | ||
366 | if (/*!src ||*/ u32 > 63) | ||
367 | goto fail; | ||
368 | yctx->param.NROM = (uint64_t)1 << u32; | ||
369 | } | ||
370 | if (!src) | ||
371 | goto fail; | ||
372 | if (*src != '$') | ||
373 | goto fail; | ||
374 | #endif | ||
375 | } | ||
376 | |||
377 | yctx->saltlen = sizeof(yctx->salt); | ||
378 | src++; /* now points to salt */ | ||
379 | saltend = decode64(yctx->salt, &yctx->saltlen, src); | ||
380 | if (!saltend || (*saltend != '\0' && *saltend != '$')) | ||
381 | goto fail; /* salt[] is too small, or bad char during decode */ | ||
382 | dbg_dec64("salt is %d ascii64 chars -> %d bytes (in binary)", (int)(saltend - src), (int)yctx->saltlen); | ||
383 | |||
384 | prefixlen = saltend - setting; | ||
385 | need = prefixlen + 1 + YESCRYPT_HASH_LEN + 1; | ||
386 | if (need > buflen /*overflow is quite unlikely: || need < prefixlen*/) | ||
387 | goto fail; | ||
388 | |||
389 | if (yescrypt_kdf32(yctx, passwd, passwdlen, hashbin32)) { | ||
390 | dbg("error in yescrypt_kdf32"); | ||
391 | goto fail; | ||
392 | } | ||
393 | |||
394 | dst = mempcpy(buf, setting, prefixlen); | ||
395 | *dst++ = '$'; | ||
396 | dst = encode64(dst, buflen - (dst - buf), hashbin32, sizeof(hashbin32)); | ||
397 | if (!dst) | ||
398 | goto fail; | ||
399 | ret: | ||
400 | free_region(yctx->local); | ||
401 | explicit_bzero(&u, sizeof(u)); | ||
402 | return buf; | ||
403 | fail: | ||
404 | buf = NULL; | ||
405 | goto ret; | ||
406 | #undef yctx | ||
407 | #undef hashbin32 | ||
408 | } | ||
diff --git a/libbb/yescrypt/alg-yescrypt-kdf.c b/libbb/yescrypt/alg-yescrypt-kdf.c new file mode 100644 index 000000000..a9a1bd591 --- /dev/null +++ b/libbb/yescrypt/alg-yescrypt-kdf.c | |||
@@ -0,0 +1,1212 @@ | |||
1 | /*- | ||
2 | * Copyright 2009 Colin Percival | ||
3 | * Copyright 2012-2018 Alexander Peslyak | ||
4 | * All rights reserved. | ||
5 | * | ||
6 | * Redistribution and use in source and binary forms, with or without | ||
7 | * modification, are permitted provided that the following conditions | ||
8 | * are met: | ||
9 | * 1. Redistributions of source code must retain the above copyright | ||
10 | * notice, this list of conditions and the following disclaimer. | ||
11 | * 2. Redistributions in binary form must reproduce the above copyright | ||
12 | * notice, this list of conditions and the following disclaimer in the | ||
13 | * documentation and/or other materials provided with the distribution. | ||
14 | * | ||
15 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND | ||
16 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||
17 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | ||
18 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE | ||
19 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | ||
20 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | ||
21 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | ||
22 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | ||
23 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | ||
24 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | ||
25 | * SUCH DAMAGE. | ||
26 | * | ||
27 | * This file was originally written by Colin Percival as part of the Tarsnap | ||
28 | * online backup system. | ||
29 | */ | ||
30 | |||
31 | #if __STDC_VERSION__ >= 199901L | ||
32 | /* Have restrict */ | ||
33 | #elif defined(__GNUC__) | ||
34 | #define restrict __restrict | ||
35 | #else | ||
36 | #define restrict | ||
37 | #endif | ||
38 | |||
39 | #ifdef __GNUC__ | ||
40 | #define unlikely(exp) __builtin_expect(exp, 0) | ||
41 | #else | ||
42 | #define unlikely(exp) (exp) | ||
43 | #endif | ||
44 | |||
45 | typedef union { | ||
46 | uint32_t w[16]; | ||
47 | uint64_t d[8]; | ||
48 | } salsa20_blk_t; | ||
49 | |||
50 | static void salsa20_simd_shuffle( | ||
51 | const salsa20_blk_t *Bin, | ||
52 | salsa20_blk_t *Bout) | ||
53 | { | ||
54 | #define COMBINE(out, in1, in2) \ | ||
55 | do { \ | ||
56 | Bout->d[out] = Bin->w[in1 * 2] | ((uint64_t)Bin->w[in2 * 2 + 1] << 32); \ | ||
57 | } while (0) | ||
58 | COMBINE(0, 0, 2); | ||
59 | COMBINE(1, 5, 7); | ||
60 | COMBINE(2, 2, 4); | ||
61 | COMBINE(3, 7, 1); | ||
62 | COMBINE(4, 4, 6); | ||
63 | COMBINE(5, 1, 3); | ||
64 | COMBINE(6, 6, 0); | ||
65 | COMBINE(7, 3, 5); | ||
66 | #undef COMBINE | ||
67 | } | ||
68 | |||
69 | static void salsa20_simd_unshuffle( | ||
70 | const salsa20_blk_t *Bin, | ||
71 | salsa20_blk_t *Bout) | ||
72 | { | ||
73 | #define UNCOMBINE(out, in1, in2) \ | ||
74 | do { \ | ||
75 | Bout->w[out * 2] = Bin->d[in1]; \ | ||
76 | Bout->w[out * 2 + 1] = Bin->d[in2] >> 32; \ | ||
77 | } while (0) | ||
78 | UNCOMBINE(0, 0, 6); | ||
79 | UNCOMBINE(1, 5, 3); | ||
80 | UNCOMBINE(2, 2, 0); | ||
81 | UNCOMBINE(3, 7, 5); | ||
82 | UNCOMBINE(4, 4, 2); | ||
83 | UNCOMBINE(5, 1, 7); | ||
84 | UNCOMBINE(6, 6, 4); | ||
85 | UNCOMBINE(7, 3, 1); | ||
86 | #undef UNCOMBINE | ||
87 | } | ||
88 | |||
89 | #define DECL_X \ | ||
90 | salsa20_blk_t X | ||
91 | #define DECL_Y \ | ||
92 | salsa20_blk_t Y | ||
93 | |||
94 | #if KDF_UNROLL_COPY | ||
95 | #define COPY(out, in) \ | ||
96 | do { \ | ||
97 | (out).d[0] = (in).d[0]; \ | ||
98 | (out).d[1] = (in).d[1]; \ | ||
99 | (out).d[2] = (in).d[2]; \ | ||
100 | (out).d[3] = (in).d[3]; \ | ||
101 | (out).d[4] = (in).d[4]; \ | ||
102 | (out).d[5] = (in).d[5]; \ | ||
103 | (out).d[6] = (in).d[6]; \ | ||
104 | (out).d[7] = (in).d[7]; \ | ||
105 | } while (0) | ||
106 | #else | ||
107 | #define COPY(out, in) \ | ||
108 | do { \ | ||
109 | memcpy((out).d, (in).d, sizeof((in).d)); \ | ||
110 | } while (0) | ||
111 | #endif | ||
112 | |||
113 | #define READ_X(in) COPY(X, in) | ||
114 | #define WRITE_X(out) COPY(out, X) | ||
115 | |||
116 | /** | ||
117 | * salsa20(B): | ||
118 | * Apply the Salsa20 core to the provided block. | ||
119 | */ | ||
120 | static void salsa20(salsa20_blk_t *restrict B, | ||
121 | salsa20_blk_t *restrict Bout, | ||
122 | uint32_t doublerounds) | ||
123 | { | ||
124 | salsa20_blk_t X; | ||
125 | #define x X.w | ||
126 | |||
127 | salsa20_simd_unshuffle(B, &X); | ||
128 | |||
129 | do { | ||
130 | #define R(a,b) (((a) << (b)) | ((a) >> (32 - (b)))) | ||
131 | /* Operate on columns */ | ||
132 | #if KDF_UNROLL_SALSA20 | ||
133 | x[ 4] ^= R(x[ 0]+x[12], 7); // x[j] ^= R(x[k]+x[l], CONST) | ||
134 | x[ 8] ^= R(x[ 4]+x[ 0], 9); | ||
135 | x[12] ^= R(x[ 8]+x[ 4],13); | ||
136 | x[ 0] ^= R(x[12]+x[ 8],18); | ||
137 | |||
138 | x[ 9] ^= R(x[ 5]+x[ 1], 7); | ||
139 | x[13] ^= R(x[ 9]+x[ 5], 9); | ||
140 | x[ 1] ^= R(x[13]+x[ 9],13); | ||
141 | x[ 5] ^= R(x[ 1]+x[13],18); | ||
142 | |||
143 | x[14] ^= R(x[10]+x[ 6], 7); | ||
144 | x[ 2] ^= R(x[14]+x[10], 9); | ||
145 | x[ 6] ^= R(x[ 2]+x[14],13); | ||
146 | x[10] ^= R(x[ 6]+x[ 2],18); | ||
147 | |||
148 | x[ 3] ^= R(x[15]+x[11], 7); | ||
149 | x[ 7] ^= R(x[ 3]+x[15], 9); | ||
150 | x[11] ^= R(x[ 7]+x[ 3],13); | ||
151 | x[15] ^= R(x[11]+x[ 7],18); | ||
152 | #else | ||
153 | { | ||
154 | unsigned j, k, l; | ||
155 | j = 4; k = 0; l = 12; | ||
156 | for (;;) { | ||
157 | uint32_t t; | ||
158 | x[j] ^= ({ t = x[k] + x[l]; R(t, 7); }); l = k; k = j; j = (j+4) & 0xf; | ||
159 | x[j] ^= ({ t = x[k] + x[l]; R(t, 9); }); l = k; k = j; j = (j+4) & 0xf; | ||
160 | x[j] ^= ({ t = x[k] + x[l]; R(t,13); }); l = k; k = j; j = (j+4) & 0xf; | ||
161 | x[j] ^= ({ t = x[k] + x[l]; R(t,18); }); | ||
162 | if (j == 15) break; | ||
163 | l = j + 1; k = j + 5; j = (j+9) & 0xf; | ||
164 | } | ||
165 | } | ||
166 | #endif | ||
167 | /* Operate on rows */ | ||
168 | #if KDF_UNROLL_SALSA20 | ||
169 | // i=0 n=0 | ||
170 | x[ 1] ^= R(x[ 0]+x[ 3], 7); // [i + (n+1)&3] [i + (n+0)&3] [i + (n+3)&3] | ||
171 | x[ 2] ^= R(x[ 1]+x[ 0], 9); // [i + (n+2)&3] [i + (n+1)&3] [i + (n+0)&3] | ||
172 | x[ 3] ^= R(x[ 2]+x[ 1],13); // [i + (n+3)&3] [i + (n+2)&3] [i + (n+1)&3] | ||
173 | x[ 0] ^= R(x[ 3]+x[ 2],18); // [i + (n+0)&3] [i + (n+3)&3] [i + (n+2)&3] | ||
174 | // i=4 n=1 ^^^j^^^ ^^^k^^^ ^^^l^^^ | ||
175 | x[ 6] ^= R(x[ 5]+x[ 4], 7); // [i + (n+1)&3] [i + (n+0)&3] [i + (n+3)&3] | ||
176 | x[ 7] ^= R(x[ 6]+x[ 5], 9); // [i + (n+2)&3] [i + (n+1)&3] [i + (n+0)&3] | ||
177 | x[ 4] ^= R(x[ 7]+x[ 6],13); // [i + (n+3)&3] [i + (n+2)&3] [i + (n+1)&3] | ||
178 | x[ 5] ^= R(x[ 4]+x[ 7],18); // [i + (n+0)&3] [i + (n+3)&3] [i + (n+2)&3] | ||
179 | // i=8 n=2 | ||
180 | x[11] ^= R(x[10]+x[ 9], 7); // [i + (n+1)&3] [i + (n+0)&3] [i + (n+3)&3] | ||
181 | x[ 8] ^= R(x[11]+x[10], 9); // [i + (n+2)&3] [i + (n+1)&3] [i + (n+0)&3] | ||
182 | x[ 9] ^= R(x[ 8]+x[11],13); // [i + (n+3)&3] [i + (n+2)&3] [i + (n+1)&3] | ||
183 | x[10] ^= R(x[ 9]+x[ 8],18); // [i + (n+0)&3] [i + (n+3)&3] [i + (n+2)&3] | ||
184 | // i=12 n=3 | ||
185 | x[12] ^= R(x[15]+x[14], 7); // [i + (n+1)&3] [i + (n+0)&3] [i + (n+3)&3] | ||
186 | x[13] ^= R(x[12]+x[15], 9); // [i + (n+2)&3] [i + (n+1)&3] [i + (n+0)&3] | ||
187 | x[14] ^= R(x[13]+x[12],13); // [i + (n+3)&3] [i + (n+2)&3] [i + (n+1)&3] | ||
188 | x[15] ^= R(x[14]+x[13],18); // [i + (n+0)&3] [i + (n+3)&3] [i + (n+2)&3] | ||
189 | #else | ||
190 | { | ||
191 | unsigned j, k, l; | ||
192 | uint32_t *xrow; | ||
193 | j = 1; k = 0; l = 3; | ||
194 | xrow = &x[0]; | ||
195 | for (;;) { | ||
196 | uint32_t t; | ||
197 | xrow[j] ^= ({ t = xrow[k] + xrow[l]; R(t, 7); }); l = k; k = j; j = (j+1) & 3; | ||
198 | xrow[j] ^= ({ t = xrow[k] + xrow[l]; R(t, 9); }); l = k; k = j; j = (j+1) & 3; | ||
199 | xrow[j] ^= ({ t = xrow[k] + xrow[l]; R(t,13); }); l = k; k = j; j = (j+1) & 3; | ||
200 | xrow[j] ^= ({ t = xrow[k] + xrow[l]; R(t,18); }); | ||
201 | if (j == 3) break; | ||
202 | l = j; k = j + 1; j = (j+2) & 3; | ||
203 | xrow += 4; | ||
204 | } | ||
205 | } | ||
206 | #endif | ||
207 | |||
208 | #undef R | ||
209 | } while (--doublerounds); | ||
210 | #undef x | ||
211 | |||
212 | { | ||
213 | uint32_t i; | ||
214 | salsa20_simd_shuffle(&X, Bout); | ||
215 | for (i = 0; i < 16; i++) { | ||
216 | // bbox: note: was unrolled x4 | ||
217 | B->w[i] = Bout->w[i] += B->w[i]; | ||
218 | } | ||
219 | } | ||
220 | #if 0 | ||
221 | /* Too expensive */ | ||
222 | explicit_bzero(&X, sizeof(X)); | ||
223 | #endif | ||
224 | } | ||
225 | |||
226 | /** | ||
227 | * Apply the Salsa20/2 core to the block provided in X. | ||
228 | */ | ||
229 | #define SALSA20_2(out) \ | ||
230 | salsa20(&X, &out, 1) | ||
231 | |||
232 | #if 0 | ||
233 | #define XOR(out, in1, in2) \ | ||
234 | do { \ | ||
235 | (out).d[0] = (in1).d[0] ^ (in2).d[0]; \ | ||
236 | (out).d[1] = (in1).d[1] ^ (in2).d[1]; \ | ||
237 | (out).d[2] = (in1).d[2] ^ (in2).d[2]; \ | ||
238 | (out).d[3] = (in1).d[3] ^ (in2).d[3]; \ | ||
239 | (out).d[4] = (in1).d[4] ^ (in2).d[4]; \ | ||
240 | (out).d[5] = (in1).d[5] ^ (in2).d[5]; \ | ||
241 | (out).d[6] = (in1).d[6] ^ (in2).d[6]; \ | ||
242 | (out).d[7] = (in1).d[7] ^ (in2).d[7]; \ | ||
243 | } while (0) | ||
244 | #else | ||
245 | #define XOR(out, in1, in2) \ | ||
246 | do { \ | ||
247 | xorbuf64_3_aligned64(&(out).d, &(in1).d, &(in2).d); \ | ||
248 | } while (0) | ||
249 | #endif | ||
250 | |||
251 | #define XOR_X(in) XOR(X, X, in) | ||
252 | #define XOR_X_2(in1, in2) XOR(X, in1, in2) | ||
253 | #define XOR_X_WRITE_XOR_Y_2(out, in) \ | ||
254 | do { \ | ||
255 | XOR(Y, out, in); \ | ||
256 | COPY(out, Y); \ | ||
257 | XOR(X, X, Y); \ | ||
258 | } while (0) | ||
259 | |||
260 | /** | ||
261 | * Apply the Salsa20/8 core to the block provided in X ^ in. | ||
262 | */ | ||
263 | #define SALSA20_8_XOR_MEM(in, out) \ | ||
264 | do { \ | ||
265 | XOR_X(in); \ | ||
266 | salsa20(&X, &out, 4); \ | ||
267 | } while (0) | ||
268 | |||
269 | #define INTEGERIFY ((uint32_t)X.d[0]) | ||
270 | |||
271 | /** | ||
272 | * blockmix_salsa8(Bin, Bout, r): | ||
273 | * Compute Bout = BlockMix_{salsa20/8, r}(Bin). The input Bin must be 128r | ||
274 | * bytes in length; the output Bout must also be the same size. | ||
275 | */ | ||
276 | static void blockmix_salsa8( | ||
277 | const salsa20_blk_t *restrict Bin, | ||
278 | salsa20_blk_t *restrict Bout, | ||
279 | size_t r) | ||
280 | { | ||
281 | size_t i; | ||
282 | DECL_X; | ||
283 | |||
284 | READ_X(Bin[r * 2 - 1]); | ||
285 | for (i = 0; i < r; i++) { | ||
286 | SALSA20_8_XOR_MEM(Bin[i * 2], Bout[i]); | ||
287 | SALSA20_8_XOR_MEM(Bin[i * 2 + 1], Bout[r + i]); | ||
288 | } | ||
289 | } | ||
290 | |||
291 | static uint32_t blockmix_salsa8_xor( | ||
292 | const salsa20_blk_t *restrict Bin1, | ||
293 | const salsa20_blk_t *restrict Bin2, | ||
294 | salsa20_blk_t *restrict Bout, | ||
295 | size_t r) | ||
296 | { | ||
297 | size_t i; | ||
298 | DECL_X; | ||
299 | |||
300 | XOR_X_2(Bin1[r * 2 - 1], Bin2[r * 2 - 1]); | ||
301 | for (i = 0; i < r; i++) { | ||
302 | XOR_X(Bin1[i * 2]); | ||
303 | SALSA20_8_XOR_MEM(Bin2[i * 2], Bout[i]); | ||
304 | XOR_X(Bin1[i * 2 + 1]); | ||
305 | SALSA20_8_XOR_MEM(Bin2[i * 2 + 1], Bout[r + i]); | ||
306 | } | ||
307 | |||
308 | return INTEGERIFY; | ||
309 | } | ||
310 | |||
311 | /* This is tunable */ | ||
312 | #define Swidth 8 | ||
313 | |||
314 | /* Not tunable in this implementation, hard-coded in a few places */ | ||
315 | #define PWXsimple 2 | ||
316 | #define PWXgather 4 | ||
317 | |||
318 | /* Derived values. Not tunable except via Swidth above. */ | ||
319 | #define PWXbytes (PWXgather * PWXsimple * 8) | ||
320 | #define Sbytes (3 * (1 << Swidth) * PWXsimple * 8) | ||
321 | #define Smask (((1 << Swidth) - 1) * PWXsimple * 8) | ||
322 | #define Smask2 (((uint64_t)Smask << 32) | Smask) | ||
323 | |||
324 | #define DECL_SMASK2REG do {} while (0) | ||
325 | #define FORCE_REGALLOC_3 do {} while (0) | ||
326 | #define MAYBE_MEMORY_BARRIER do {} while (0) | ||
327 | |||
328 | #define PWXFORM_SIMD(x0, x1) \ | ||
329 | do { \ | ||
330 | uint64_t x = x0 & Smask2; \ | ||
331 | uint64_t *p0 = (uint64_t *)(S0 + (uint32_t)x); \ | ||
332 | uint64_t *p1 = (uint64_t *)(S1 + (x >> 32)); \ | ||
333 | x0 = ((x0 >> 32) * (uint32_t)x0 + p0[0]) ^ p1[0]; \ | ||
334 | x1 = ((x1 >> 32) * (uint32_t)x1 + p0[1]) ^ p1[1]; \ | ||
335 | } while (0) | ||
336 | |||
337 | #if KDF_UNROLL_PWXFORM_ROUND | ||
338 | #define PWXFORM_ROUND \ | ||
339 | do { \ | ||
340 | PWXFORM_SIMD(X.d[0], X.d[1]); \ | ||
341 | PWXFORM_SIMD(X.d[2], X.d[3]); \ | ||
342 | PWXFORM_SIMD(X.d[4], X.d[5]); \ | ||
343 | PWXFORM_SIMD(X.d[6], X.d[7]); \ | ||
344 | } while (0) | ||
345 | #else | ||
346 | #define PWXFORM_ROUND \ | ||
347 | do { \ | ||
348 | for (int pwxi=0; pwxi<8; pwxi+=2) \ | ||
349 | PWXFORM_SIMD(X.d[pwxi], X.d[pwxi + 1]); \ | ||
350 | } while (0) | ||
351 | #endif | ||
352 | |||
353 | /* | ||
354 | * This offset helps address the 256-byte write block via the single-byte | ||
355 | * displacements encodable in x86(-64) instructions. It is needed because the | ||
356 | * displacements are signed. Without it, we'd get 4-byte displacements for | ||
357 | * half of the writes. Setting it to 0x80 instead of 0x7c would avoid needing | ||
358 | * a displacement for one of the writes, but then the LEA instruction would | ||
359 | * need a 4-byte displacement. | ||
360 | */ | ||
361 | #define PWXFORM_WRITE_OFFSET 0x7c | ||
362 | |||
363 | #define PWXFORM_WRITE \ | ||
364 | do { \ | ||
365 | WRITE_X(*(salsa20_blk_t *)(Sw - PWXFORM_WRITE_OFFSET)); \ | ||
366 | Sw += 64; \ | ||
367 | } while (0) | ||
368 | |||
369 | #if KDF_UNROLL_PWXFORM | ||
370 | #define PWXFORM \ | ||
371 | do { \ | ||
372 | uint8_t *Sw = S2 + w + PWXFORM_WRITE_OFFSET; \ | ||
373 | FORCE_REGALLOC_3; \ | ||
374 | MAYBE_MEMORY_BARRIER; \ | ||
375 | PWXFORM_ROUND; \ | ||
376 | PWXFORM_ROUND; PWXFORM_WRITE; \ | ||
377 | PWXFORM_ROUND; PWXFORM_WRITE; \ | ||
378 | PWXFORM_ROUND; PWXFORM_WRITE; \ | ||
379 | PWXFORM_ROUND; PWXFORM_WRITE; \ | ||
380 | PWXFORM_ROUND; \ | ||
381 | w = (w + 64 * 4) & Smask2; \ | ||
382 | { \ | ||
383 | uint8_t *Stmp = S2; \ | ||
384 | S2 = S1; \ | ||
385 | S1 = S0; \ | ||
386 | S0 = Stmp; \ | ||
387 | } \ | ||
388 | } while (0) | ||
389 | #else | ||
390 | #define PWXFORM \ | ||
391 | do { \ | ||
392 | uint8_t *Sw = S2 + w + PWXFORM_WRITE_OFFSET; \ | ||
393 | FORCE_REGALLOC_3; \ | ||
394 | MAYBE_MEMORY_BARRIER; \ | ||
395 | PWXFORM_ROUND; \ | ||
396 | for (int pwxj=0; pwxj<4; pwxj++) {\ | ||
397 | PWXFORM_ROUND; PWXFORM_WRITE; \ | ||
398 | } \ | ||
399 | PWXFORM_ROUND; \ | ||
400 | w = (w + 64 * 4) & Smask2; \ | ||
401 | { \ | ||
402 | uint8_t *Stmp = S2; \ | ||
403 | S2 = S1; \ | ||
404 | S1 = S0; \ | ||
405 | S0 = Stmp; \ | ||
406 | } \ | ||
407 | } while (0) | ||
408 | #endif | ||
409 | |||
410 | typedef struct { | ||
411 | uint8_t *S0, *S1, *S2; | ||
412 | size_t w; | ||
413 | } pwxform_ctx_t; | ||
414 | |||
415 | #define Salloc (Sbytes + ((sizeof(pwxform_ctx_t) + 63) & ~63U)) | ||
416 | |||
417 | /** | ||
418 | * blockmix_pwxform(Bin, Bout, r, S): | ||
419 | * Compute Bout = BlockMix_pwxform{salsa20/2, r, S}(Bin). The input Bin must | ||
420 | * be 128r bytes in length; the output Bout must also be the same size. | ||
421 | */ | ||
422 | static void blockmix( | ||
423 | const salsa20_blk_t *restrict Bin, | ||
424 | salsa20_blk_t *restrict Bout, | ||
425 | size_t r, | ||
426 | pwxform_ctx_t *restrict ctx) | ||
427 | { | ||
428 | uint8_t *S0 = ctx->S0, *S1 = ctx->S1, *S2 = ctx->S2; | ||
429 | size_t w = ctx->w; | ||
430 | size_t i; | ||
431 | DECL_X; | ||
432 | |||
433 | /* Convert count of 128-byte blocks to max index of 64-byte block */ | ||
434 | r = r * 2 - 1; | ||
435 | |||
436 | READ_X(Bin[r]); | ||
437 | |||
438 | DECL_SMASK2REG; | ||
439 | |||
440 | i = 0; | ||
441 | for (;;) { | ||
442 | XOR_X(Bin[i]); | ||
443 | PWXFORM; | ||
444 | if (unlikely(i >= r)) | ||
445 | break; | ||
446 | WRITE_X(Bout[i]); | ||
447 | i++; | ||
448 | } | ||
449 | |||
450 | ctx->S0 = S0; | ||
451 | ctx->S1 = S1; | ||
452 | ctx->S2 = S2; | ||
453 | ctx->w = w; | ||
454 | |||
455 | SALSA20_2(Bout[i]); | ||
456 | } | ||
457 | |||
458 | static uint32_t blockmix_xor( | ||
459 | const salsa20_blk_t *Bin1, | ||
460 | const salsa20_blk_t *restrict Bin2, | ||
461 | salsa20_blk_t *Bout, | ||
462 | size_t r, | ||
463 | pwxform_ctx_t *restrict ctx) | ||
464 | { | ||
465 | uint8_t *S0 = ctx->S0, *S1 = ctx->S1, *S2 = ctx->S2; | ||
466 | size_t w = ctx->w; | ||
467 | size_t i; | ||
468 | DECL_X; | ||
469 | |||
470 | /* Convert count of 128-byte blocks to max index of 64-byte block */ | ||
471 | r = r * 2 - 1; | ||
472 | |||
473 | XOR_X_2(Bin1[r], Bin2[r]); | ||
474 | |||
475 | DECL_SMASK2REG; | ||
476 | |||
477 | i = 0; | ||
478 | r--; | ||
479 | for (;;) { | ||
480 | XOR_X(Bin1[i]); | ||
481 | XOR_X(Bin2[i]); | ||
482 | PWXFORM; | ||
483 | if (unlikely(i > r)) | ||
484 | break; | ||
485 | WRITE_X(Bout[i]); | ||
486 | i++; | ||
487 | } | ||
488 | |||
489 | ctx->S0 = S0; | ||
490 | ctx->S1 = S1; | ||
491 | ctx->S2 = S2; | ||
492 | ctx->w = w; | ||
493 | |||
494 | SALSA20_2(Bout[i]); | ||
495 | |||
496 | return INTEGERIFY; | ||
497 | } | ||
498 | |||
499 | static uint32_t blockmix_xor_save( | ||
500 | salsa20_blk_t *restrict Bin1out, | ||
501 | salsa20_blk_t *restrict Bin2, | ||
502 | size_t r, | ||
503 | pwxform_ctx_t *restrict ctx) | ||
504 | { | ||
505 | uint8_t *S0 = ctx->S0, *S1 = ctx->S1, *S2 = ctx->S2; | ||
506 | size_t w = ctx->w; | ||
507 | size_t i; | ||
508 | DECL_X; | ||
509 | DECL_Y; | ||
510 | |||
511 | /* Convert count of 128-byte blocks to max index of 64-byte block */ | ||
512 | r = r * 2 - 1; | ||
513 | |||
514 | XOR_X_2(Bin1out[r], Bin2[r]); | ||
515 | |||
516 | DECL_SMASK2REG; | ||
517 | |||
518 | i = 0; | ||
519 | r--; | ||
520 | for (;;) { | ||
521 | XOR_X_WRITE_XOR_Y_2(Bin2[i], Bin1out[i]); | ||
522 | PWXFORM; | ||
523 | if (unlikely(i > r)) | ||
524 | break; | ||
525 | WRITE_X(Bin1out[i]); | ||
526 | i++; | ||
527 | } | ||
528 | |||
529 | ctx->S0 = S0; | ||
530 | ctx->S1 = S1; | ||
531 | ctx->S2 = S2; | ||
532 | ctx->w = w; | ||
533 | |||
534 | SALSA20_2(Bin1out[i]); | ||
535 | |||
536 | return INTEGERIFY; | ||
537 | } | ||
538 | |||
539 | /** | ||
540 | * integerify(B, r): | ||
541 | * Return the result of parsing B_{2r-1} as a little-endian integer. | ||
542 | */ | ||
543 | static inline uint32_t integerify(const salsa20_blk_t *B, size_t r) | ||
544 | { | ||
545 | /* | ||
546 | * Our 64-bit words are in host byte order, which is why we don't just read | ||
547 | * w[0] here (would be wrong on big-endian). Also, our 32-bit words are | ||
548 | * SIMD-shuffled (so the next 32 bits would be part of d[6]), but currently | ||
549 | * this does not matter as we only care about the least significant 32 bits. | ||
550 | */ | ||
551 | return (uint32_t)B[2 * r - 1].d[0]; | ||
552 | } | ||
553 | |||
554 | /** | ||
555 | * smix1(B, r, N, flags, V, NROM, VROM, XY, ctx): | ||
556 | * Compute first loop of B = SMix_r(B, N). The input B must be 128r bytes in | ||
557 | * length; the temporary storage V must be 128rN bytes in length; the temporary | ||
558 | * storage XY must be 128r+64 bytes in length. N must be even and at least 4. | ||
559 | * The array V must be aligned to a multiple of 64 bytes, and arrays B and XY | ||
560 | * to a multiple of at least 16 bytes. | ||
561 | */ | ||
562 | #if DISABLE_NROM_CODE | ||
563 | #define smix1(B,r,N,flags,V,NROM,VROM,XY,ctx) \ | ||
564 | smix1(B,r,N,flags,V,XY,ctx) | ||
565 | #endif | ||
566 | static void smix1(uint8_t *B, size_t r, uint32_t N, | ||
567 | uint32_t flags, | ||
568 | salsa20_blk_t *V, | ||
569 | uint32_t NROM, const salsa20_blk_t *VROM, | ||
570 | salsa20_blk_t *XY, | ||
571 | pwxform_ctx_t *ctx) | ||
572 | { | ||
573 | #if DISABLE_NROM_CODE | ||
574 | uint32_t NROM = 0; | ||
575 | const salsa20_blk_t *VROM = NULL; | ||
576 | #endif | ||
577 | size_t s = 2 * r; | ||
578 | salsa20_blk_t *X = V, *Y = &V[s]; | ||
579 | uint32_t i, j; | ||
580 | |||
581 | for (i = 0; i < 2 * r; i++) { | ||
582 | const salsa20_blk_t *src = (salsa20_blk_t *)&B[i * 64]; | ||
583 | salsa20_blk_t *tmp = Y; | ||
584 | salsa20_blk_t *dst = &X[i]; | ||
585 | size_t k; | ||
586 | for (k = 0; k < 16; k++) | ||
587 | tmp->w[k] = SWAP_LE32(src->w[k]); | ||
588 | salsa20_simd_shuffle(tmp, dst); | ||
589 | } | ||
590 | |||
591 | if (VROM) { | ||
592 | uint32_t n; | ||
593 | const salsa20_blk_t *V_j; | ||
594 | |||
595 | V_j = &VROM[(NROM - 1) * s]; | ||
596 | j = blockmix_xor(X, V_j, Y, r, ctx) & (NROM - 1); | ||
597 | V_j = &VROM[j * s]; | ||
598 | X = Y + s; | ||
599 | j = blockmix_xor(Y, V_j, X, r, ctx); | ||
600 | |||
601 | for (n = 2; n < N; n <<= 1) { | ||
602 | uint32_t m = (n < N / 2) ? n : (N - 1 - n); | ||
603 | for (i = 1; i < m; i += 2) { | ||
604 | j &= n - 1; | ||
605 | j += i - 1; | ||
606 | V_j = &V[j * s]; | ||
607 | Y = X + s; | ||
608 | j = blockmix_xor(X, V_j, Y, r, ctx) & (NROM - 1); | ||
609 | V_j = &VROM[j * s]; | ||
610 | X = Y + s; | ||
611 | j = blockmix_xor(Y, V_j, X, r, ctx); | ||
612 | } | ||
613 | } | ||
614 | n >>= 1; | ||
615 | |||
616 | j &= n - 1; | ||
617 | j += N - 2 - n; | ||
618 | V_j = &V[j * s]; | ||
619 | Y = X + s; | ||
620 | j = blockmix_xor(X, V_j, Y, r, ctx) & (NROM - 1); | ||
621 | V_j = &VROM[j * s]; | ||
622 | blockmix_xor(Y, V_j, XY, r, ctx); | ||
623 | } else if (flags & YESCRYPT_RW) { | ||
624 | //can't use flags___YESCRYPT_RW, smix1() may be called with flags = 0 | ||
625 | uint32_t n; | ||
626 | salsa20_blk_t *V_j; | ||
627 | |||
628 | blockmix(X, Y, r, ctx); | ||
629 | X = Y + s; | ||
630 | blockmix(Y, X, r, ctx); | ||
631 | j = integerify(X, r); | ||
632 | |||
633 | for (n = 2; n < N; n <<= 1) { | ||
634 | uint32_t m = (n < N / 2) ? n : (N - 1 - n); | ||
635 | for (i = 1; i < m; i += 2) { | ||
636 | Y = X + s; | ||
637 | j &= n - 1; | ||
638 | j += i - 1; | ||
639 | V_j = &V[j * s]; | ||
640 | j = blockmix_xor(X, V_j, Y, r, ctx); | ||
641 | j &= n - 1; | ||
642 | j += i; | ||
643 | V_j = &V[j * s]; | ||
644 | X = Y + s; | ||
645 | j = blockmix_xor(Y, V_j, X, r, ctx); | ||
646 | } | ||
647 | } | ||
648 | n >>= 1; | ||
649 | |||
650 | j &= n - 1; | ||
651 | j += N - 2 - n; | ||
652 | V_j = &V[j * s]; | ||
653 | Y = X + s; | ||
654 | j = blockmix_xor(X, V_j, Y, r, ctx); | ||
655 | j &= n - 1; | ||
656 | j += N - 1 - n; | ||
657 | V_j = &V[j * s]; | ||
658 | blockmix_xor(Y, V_j, XY, r, ctx); | ||
659 | } else { | ||
660 | N -= 2; | ||
661 | do { | ||
662 | blockmix_salsa8(X, Y, r); | ||
663 | X = Y + s; | ||
664 | blockmix_salsa8(Y, X, r); | ||
665 | Y = X + s; | ||
666 | } while ((N -= 2)); | ||
667 | |||
668 | blockmix_salsa8(X, Y, r); | ||
669 | blockmix_salsa8(Y, XY, r); | ||
670 | } | ||
671 | |||
672 | for (i = 0; i < 2 * r; i++) { | ||
673 | const salsa20_blk_t *src = &XY[i]; | ||
674 | salsa20_blk_t *tmp = &XY[s]; | ||
675 | salsa20_blk_t *dst = (salsa20_blk_t *)&B[i * 64]; | ||
676 | size_t k; | ||
677 | for (k = 0; k < 16; k++) | ||
678 | tmp->w[k] = SWAP_LE32(src->w[k]); | ||
679 | salsa20_simd_unshuffle(tmp, dst); | ||
680 | } | ||
681 | } | ||
682 | |||
683 | /** | ||
684 | * smix2(B, r, N, Nloop, flags, V, NROM, VROM, XY, ctx): | ||
685 | * Compute second loop of B = SMix_r(B, N). The input B must be 128r bytes in | ||
686 | * length; the temporary storage V must be 128rN bytes in length; the temporary | ||
687 | * storage XY must be 256r bytes in length. N must be a power of 2 and at | ||
688 | * least 2. Nloop must be even. The array V must be aligned to a multiple of | ||
689 | * 64 bytes, and arrays B and XY to a multiple of at least 16 bytes. | ||
690 | */ | ||
691 | #if DISABLE_NROM_CODE | ||
692 | #define smix2(B,r,N,Nloop,flags,V,NROM,VROM,XY,ctx) \ | ||
693 | smix2(B,r,N,Nloop,flags,V,XY,ctx) | ||
694 | #endif | ||
695 | static void smix2(uint8_t *B, size_t r, uint32_t N, uint64_t Nloop, | ||
696 | uint32_t flags, | ||
697 | salsa20_blk_t *V, | ||
698 | uint32_t NROM, const salsa20_blk_t *VROM, | ||
699 | salsa20_blk_t *XY, | ||
700 | pwxform_ctx_t *ctx) | ||
701 | { | ||
702 | #if DISABLE_NROM_CODE | ||
703 | uint32_t NROM = 0; | ||
704 | const salsa20_blk_t *VROM = NULL; | ||
705 | #endif | ||
706 | size_t s = 2 * r; | ||
707 | salsa20_blk_t *X = XY, *Y = &XY[s]; | ||
708 | uint32_t i, j; | ||
709 | |||
710 | if (Nloop == 0) | ||
711 | return; | ||
712 | |||
713 | for (i = 0; i < 2 * r; i++) { | ||
714 | const salsa20_blk_t *src = (salsa20_blk_t *)&B[i * 64]; | ||
715 | salsa20_blk_t *tmp = Y; | ||
716 | salsa20_blk_t *dst = &X[i]; | ||
717 | size_t k; | ||
718 | for (k = 0; k < 16; k++) | ||
719 | tmp->w[k] = SWAP_LE32(src->w[k]); | ||
720 | salsa20_simd_shuffle(tmp, dst); | ||
721 | } | ||
722 | |||
723 | j = integerify(X, r) & (N - 1); | ||
724 | |||
725 | /* | ||
726 | * Normally, VROM implies YESCRYPT_RW, but we check for these separately | ||
727 | * because our SMix resets YESCRYPT_RW for the smix2() calls operating on the | ||
728 | * entire V when p > 1. | ||
729 | */ | ||
730 | //and this is why bbox can't use flags___YESCRYPT_RW in this function | ||
731 | if (VROM && (flags & YESCRYPT_RW)) { | ||
732 | do { | ||
733 | salsa20_blk_t *V_j = &V[j * s]; | ||
734 | const salsa20_blk_t *VROM_j; | ||
735 | j = blockmix_xor_save(X, V_j, r, ctx) & (NROM - 1); | ||
736 | VROM_j = &VROM[j * s]; | ||
737 | j = blockmix_xor(X, VROM_j, X, r, ctx) & (N - 1); | ||
738 | } while (Nloop -= 2); | ||
739 | } else if (VROM) { | ||
740 | do { | ||
741 | const salsa20_blk_t *V_j = &V[j * s]; | ||
742 | j = blockmix_xor(X, V_j, X, r, ctx) & (NROM - 1); | ||
743 | V_j = &VROM[j * s]; | ||
744 | j = blockmix_xor(X, V_j, X, r, ctx) & (N - 1); | ||
745 | } while (Nloop -= 2); | ||
746 | } else if (flags & YESCRYPT_RW) { | ||
747 | do { | ||
748 | salsa20_blk_t *V_j = &V[j * s]; | ||
749 | j = blockmix_xor_save(X, V_j, r, ctx) & (N - 1); | ||
750 | V_j = &V[j * s]; | ||
751 | j = blockmix_xor_save(X, V_j, r, ctx) & (N - 1); | ||
752 | } while (Nloop -= 2); | ||
753 | } else if (ctx) { | ||
754 | do { | ||
755 | const salsa20_blk_t *V_j = &V[j * s]; | ||
756 | j = blockmix_xor(X, V_j, X, r, ctx) & (N - 1); | ||
757 | V_j = &V[j * s]; | ||
758 | j = blockmix_xor(X, V_j, X, r, ctx) & (N - 1); | ||
759 | } while (Nloop -= 2); | ||
760 | } else { | ||
761 | do { | ||
762 | const salsa20_blk_t *V_j = &V[j * s]; | ||
763 | j = blockmix_salsa8_xor(X, V_j, Y, r) & (N - 1); | ||
764 | V_j = &V[j * s]; | ||
765 | j = blockmix_salsa8_xor(Y, V_j, X, r) & (N - 1); | ||
766 | } while (Nloop -= 2); | ||
767 | } | ||
768 | |||
769 | for (i = 0; i < 2 * r; i++) { | ||
770 | const salsa20_blk_t *src = &X[i]; | ||
771 | salsa20_blk_t *tmp = Y; | ||
772 | salsa20_blk_t *dst = (salsa20_blk_t *)&B[i * 64]; | ||
773 | size_t k; | ||
774 | for (k = 0; k < 16; k++) | ||
775 | tmp->w[k] = SWAP_LE32(src->w[k]); | ||
776 | salsa20_simd_unshuffle(tmp, dst); | ||
777 | } | ||
778 | } | ||
779 | |||
780 | /** | ||
781 | * p2floor(x): | ||
782 | * Largest power of 2 not greater than argument. | ||
783 | */ | ||
784 | static uint64_t p2floor(uint64_t x) | ||
785 | { | ||
786 | uint64_t y; | ||
787 | while ((y = x & (x - 1))) | ||
788 | x = y; | ||
789 | return x; | ||
790 | } | ||
791 | |||
792 | /** | ||
793 | * smix(B, r, N, p, t, flags, V, NROM, VROM, XY, S, passwd): | ||
794 | * Compute B = SMix_r(B, N). The input B must be 128rp bytes in length; the | ||
795 | * temporary storage V must be 128rN bytes in length; the temporary storage | ||
796 | * XY must be 256r or 256rp bytes in length (the larger size is required with | ||
797 | * OpenMP-enabled builds). N must be a power of 2 and at least 4. The array V | ||
798 | * must be aligned to a multiple of 64 bytes, and arrays B and XY to a multiple | ||
799 | * of at least 16 bytes (aligning them to 64 bytes as well saves cache lines | ||
800 | * and helps avoid false sharing in OpenMP-enabled builds when p > 1, but it | ||
801 | * might also result in cache bank conflicts). | ||
802 | */ | ||
803 | #if DISABLE_NROM_CODE | ||
804 | #define smix(B,r,N,p,t,flags,V,NROM,VROM,XY,S,passwd) \ | ||
805 | smix(B,r,N,p,t,flags,V,XY,S,passwd) | ||
806 | #endif | ||
807 | static void smix(uint8_t *B, size_t r, uint32_t N, uint32_t p, uint32_t t, | ||
808 | uint32_t flags, | ||
809 | salsa20_blk_t *V, | ||
810 | uint32_t NROM, const salsa20_blk_t *VROM, | ||
811 | salsa20_blk_t *XY, | ||
812 | uint8_t *S, uint8_t *passwd) | ||
813 | { | ||
814 | size_t s = 2 * r; | ||
815 | uint32_t Nchunk; | ||
816 | uint64_t Nloop_all, Nloop_rw; | ||
817 | uint32_t i; | ||
818 | |||
819 | Nchunk = N / p; | ||
820 | Nloop_all = Nchunk; | ||
821 | if (flags___YESCRYPT_RW) { | ||
822 | if (t <= 1) { | ||
823 | if (t) | ||
824 | Nloop_all *= 2; /* 2/3 */ | ||
825 | Nloop_all = (Nloop_all + 2) / 3; /* 1/3, round up */ | ||
826 | } else { | ||
827 | Nloop_all *= t - 1; | ||
828 | } | ||
829 | } else if (t) { | ||
830 | if (t == 1) | ||
831 | Nloop_all += (Nloop_all + 1) / 2; /* 1.5, round up */ | ||
832 | Nloop_all *= t; | ||
833 | } | ||
834 | |||
835 | Nloop_rw = 0; | ||
836 | if (flags___YESCRYPT_RW) | ||
837 | Nloop_rw = Nloop_all / p; | ||
838 | |||
839 | Nchunk &= ~(uint32_t)1; /* round down to even */ | ||
840 | Nloop_all++; Nloop_all &= ~(uint64_t)1; /* round up to even */ | ||
841 | Nloop_rw++; Nloop_rw &= ~(uint64_t)1; /* round up to even */ | ||
842 | |||
843 | for (i = 0; i < p; i++) { | ||
844 | uint32_t Vchunk = i * Nchunk; | ||
845 | uint32_t Np = (i < p - 1) ? Nchunk : (N - Vchunk); | ||
846 | uint8_t *Bp = &B[128 * r * i]; | ||
847 | salsa20_blk_t *Vp = &V[Vchunk * s]; | ||
848 | salsa20_blk_t *XYp = XY; | ||
849 | pwxform_ctx_t *ctx_i = NULL; | ||
850 | if (flags___YESCRYPT_RW) { | ||
851 | uint8_t *Si = S + i * Salloc; | ||
852 | smix1(Bp, 1, Sbytes / 128, 0 /* no flags */, | ||
853 | (salsa20_blk_t *)Si, 0, NULL, XYp, NULL); | ||
854 | ctx_i = (pwxform_ctx_t *)(Si + Sbytes); | ||
855 | ctx_i->S2 = Si; | ||
856 | ctx_i->S1 = Si + Sbytes / 3; | ||
857 | ctx_i->S0 = Si + Sbytes / 3 * 2; | ||
858 | ctx_i->w = 0; | ||
859 | if (i == 0) | ||
860 | hmac_block( | ||
861 | /* key,len: */ Bp + (128 * r - 64), 64, | ||
862 | /* hash fn: */ sha256_begin, | ||
863 | /* in,len: */ passwd, 32, | ||
864 | /* outbuf: */ passwd | ||
865 | ); | ||
866 | } | ||
867 | smix1(Bp, r, Np, flags, Vp, NROM, VROM, XYp, ctx_i); | ||
868 | smix2(Bp, r, p2floor(Np), Nloop_rw, flags, Vp, | ||
869 | NROM, VROM, XYp, ctx_i); | ||
870 | } | ||
871 | |||
872 | if (Nloop_all > Nloop_rw) { | ||
873 | for (i = 0; i < p; i++) { | ||
874 | uint8_t *Bp = &B[128 * r * i]; | ||
875 | salsa20_blk_t *XYp = XY; | ||
876 | pwxform_ctx_t *ctx_i = NULL; | ||
877 | if (flags___YESCRYPT_RW) { | ||
878 | uint8_t *Si = S + i * Salloc; | ||
879 | ctx_i = (pwxform_ctx_t *)(Si + Sbytes); | ||
880 | } | ||
881 | smix2(Bp, r, N, Nloop_all - Nloop_rw, | ||
882 | flags & (uint32_t)~YESCRYPT_RW, | ||
883 | V, NROM, VROM, XYp, ctx_i); | ||
884 | } | ||
885 | } | ||
886 | } | ||
887 | |||
888 | /* Allocator code */ | ||
889 | |||
890 | static void alloc_region(yescrypt_region_t *region, size_t size) | ||
891 | { | ||
892 | uint8_t *base; | ||
893 | int flags = | ||
894 | # ifdef MAP_NOCORE /* huh? */ | ||
895 | MAP_NOCORE | | ||
896 | # endif | ||
897 | MAP_ANON | MAP_PRIVATE; | ||
898 | |||
899 | base = mmap(NULL, size, PROT_READ | PROT_WRITE, flags, -1, 0); | ||
900 | if (base == MAP_FAILED) | ||
901 | bb_die_memory_exhausted(); | ||
902 | |||
903 | #if defined(MADV_HUGEPAGE) | ||
904 | /* Reduces mkpasswd qweRTY123@-+ '$y$jHT$123' | ||
905 | * (which allocates 4 Gbytes) | ||
906 | * run time from 10.543s to 5.635s | ||
907 | * Seen on linux-5.18.0. | ||
908 | */ | ||
909 | madvise(base, size, MADV_HUGEPAGE); | ||
910 | #endif | ||
911 | //region->base = base; | ||
912 | //region->base_size = size; | ||
913 | region->aligned = base; | ||
914 | region->aligned_size = size; | ||
915 | } | ||
916 | |||
917 | static void free_region(yescrypt_region_t *region) | ||
918 | { | ||
919 | if (region->aligned) | ||
920 | munmap(region->aligned, region->aligned_size); | ||
921 | //region->base = NULL; | ||
922 | //region->base_size = 0; | ||
923 | region->aligned = NULL; | ||
924 | region->aligned_size = 0; | ||
925 | } | ||
926 | /** | ||
927 | * yescrypt_kdf_body(shared, local, passwd, passwdlen, salt, saltlen, | ||
928 | * flags, N, r, p, t, NROM, buf, buflen): | ||
929 | * Compute scrypt(passwd[0 .. passwdlen - 1], salt[0 .. saltlen - 1], N, r, | ||
930 | * p, buflen), or a revision of scrypt as requested by flags and shared, and | ||
931 | * write the result into buf. | ||
932 | * | ||
933 | * shared and flags may request special modes as described in yescrypt.h. | ||
934 | * | ||
935 | * local is the thread-local data structure, allowing to preserve and reuse a | ||
936 | * memory allocation across calls, thereby reducing its overhead. | ||
937 | * | ||
938 | * t controls computation time while not affecting peak memory usage. | ||
939 | * | ||
940 | * Return 0 on success; or -1 on error. | ||
941 | * | ||
942 | * This optimized implementation currently limits N to the range from 4 to | ||
943 | * 2^31, but other implementations might not. | ||
944 | */ | ||
945 | static int yescrypt_kdf32_body( | ||
946 | yescrypt_ctx_t *yctx, | ||
947 | const uint8_t *passwd, size_t passwdlen, | ||
948 | uint32_t flags, uint64_t N, uint32_t t, | ||
949 | uint8_t *buf32) | ||
950 | { | ||
951 | #if !DISABLE_NROM_CODE | ||
952 | const salsa20_blk_t *VROM; | ||
953 | #endif | ||
954 | size_t B_size, V_size, XY_size, need; | ||
955 | uint8_t *B, *S; | ||
956 | salsa20_blk_t *V, *XY; | ||
957 | struct { | ||
958 | uint8_t sha256[32]; | ||
959 | uint8_t dk[32]; | ||
960 | } u; | ||
961 | #define sha256 u.sha256 | ||
962 | #define dk u.dk | ||
963 | uint8_t *dkp = buf32; | ||
964 | uint32_t r, p; | ||
965 | |||
966 | /* Sanity-check parameters */ | ||
967 | switch (flags___YESCRYPT_MODE_MASK) { | ||
968 | case 0: /* classic scrypt - can't have anything non-standard */ | ||
969 | if (flags || t || YCTX_param_NROM) | ||
970 | goto out_EINVAL; | ||
971 | break; | ||
972 | case YESCRYPT_WORM: | ||
973 | if (flags != YESCRYPT_WORM || YCTX_param_NROM) | ||
974 | goto out_EINVAL; | ||
975 | break; | ||
976 | case YESCRYPT_RW: | ||
977 | if (flags != (flags & YESCRYPT_KNOWN_FLAGS)) | ||
978 | goto out_EINVAL; | ||
979 | #if PWXsimple == 2 && PWXgather == 4 && Sbytes == 12288 | ||
980 | if ((flags & YESCRYPT_RW_FLAVOR_MASK) == | ||
981 | (YESCRYPT_ROUNDS_6 | YESCRYPT_GATHER_4 | | ||
982 | YESCRYPT_SIMPLE_2 | YESCRYPT_SBOX_12K)) | ||
983 | break; | ||
984 | #else | ||
985 | #error "Unsupported pwxform settings" | ||
986 | #endif | ||
987 | /* FALLTHRU */ | ||
988 | default: | ||
989 | goto out_EINVAL; | ||
990 | } | ||
991 | |||
992 | r = YCTX_param_r; | ||
993 | p = YCTX_param_p; | ||
994 | if ((uint64_t)r * (uint64_t)p >= 1 << 30) { | ||
995 | dbg("r * n >= 2^30"); | ||
996 | goto out_EINVAL; | ||
997 | } | ||
998 | if (N > UINT32_MAX) { | ||
999 | dbg("N > 0x%lx", (long)UINT32_MAX); | ||
1000 | goto out_EINVAL; | ||
1001 | } | ||
1002 | if (N <= 3 | ||
1003 | || r < 1 | ||
1004 | || p < 1 | ||
1005 | ) { | ||
1006 | dbg("bad N, r or p"); | ||
1007 | goto out_EINVAL; | ||
1008 | } | ||
1009 | if (r > SIZE_MAX / 256 / p | ||
1010 | || N > SIZE_MAX / 128 / r | ||
1011 | ) { | ||
1012 | /* 32-bit testcase: mkpasswd qweRTY123@-+ '$y$jHT$123' | ||
1013 | * (works on 64-bit, needs buffer > 4Gbytes) | ||
1014 | */ | ||
1015 | dbg("r > SIZE_MAX / 256 / p? %c", "NY"[r > SIZE_MAX / 256 / p]); | ||
1016 | dbg("N > SIZE_MAX / 128 / r? %c", "NY"[N > SIZE_MAX / 128 / r]); | ||
1017 | goto out_EINVAL; | ||
1018 | } | ||
1019 | if (flags___YESCRYPT_RW) { | ||
1020 | /* p cannot be greater than SIZE_MAX/Salloc on 64-bit systems, | ||
1021 | but it can on 32-bit systems. */ | ||
1022 | #pragma GCC diagnostic push | ||
1023 | #pragma GCC diagnostic ignored "-Wtype-limits" | ||
1024 | if (N / p <= 3 || p > SIZE_MAX / Salloc) { | ||
1025 | dbg("bad p:%ld", (long)p); | ||
1026 | goto out_EINVAL; | ||
1027 | } | ||
1028 | #pragma GCC diagnostic pop | ||
1029 | } | ||
1030 | |||
1031 | #if !DISABLE_NROM_CODE | ||
1032 | VROM = NULL; | ||
1033 | if (YCTX_param_NROM) | ||
1034 | goto out_EINVAL; | ||
1035 | #endif | ||
1036 | |||
1037 | /* Allocate memory */ | ||
1038 | V = NULL; | ||
1039 | V_size = (size_t)128 * r * N; | ||
1040 | need = V_size; | ||
1041 | B_size = (size_t)128 * r * p; | ||
1042 | need += B_size; | ||
1043 | if (need < B_size) { | ||
1044 | dbg("integer overflow at += B_size(%lu)", (long)B_size); | ||
1045 | goto out_EINVAL; | ||
1046 | } | ||
1047 | XY_size = (size_t)256 * r; | ||
1048 | need += XY_size; | ||
1049 | if (need < XY_size) { | ||
1050 | dbg("integer overflow at += XY_size(%lu)", (long)XY_size); | ||
1051 | goto out_EINVAL; | ||
1052 | } | ||
1053 | if (flags___YESCRYPT_RW) { | ||
1054 | size_t S_size = (size_t)Salloc * p; | ||
1055 | need += S_size; | ||
1056 | if (need < S_size) { | ||
1057 | dbg("integer overflow at += S_size(%lu)", (long)S_size); | ||
1058 | goto out_EINVAL; | ||
1059 | } | ||
1060 | } | ||
1061 | if (yctx->local->aligned_size < need) { | ||
1062 | free_region(yctx->local); | ||
1063 | alloc_region(yctx->local, need); | ||
1064 | dbg("allocated local:%lu 0x%lx", (long)need, (long)need); | ||
1065 | /* standard "j9T" params allocate 16Mbytes here */ | ||
1066 | } | ||
1067 | if (flags & YESCRYPT_ALLOC_ONLY) | ||
1068 | return -3; /* expected "failure" */ | ||
1069 | B = (uint8_t *)yctx->local->aligned; | ||
1070 | V = (salsa20_blk_t *)((uint8_t *)B + B_size); | ||
1071 | XY = (salsa20_blk_t *)((uint8_t *)V + V_size); | ||
1072 | S = NULL; | ||
1073 | if (flags___YESCRYPT_RW) | ||
1074 | S = (uint8_t *)XY + XY_size; | ||
1075 | |||
1076 | if (flags) { | ||
1077 | hmac_block( | ||
1078 | /* key,len: */ (const void*)"yescrypt-prehash", (flags & YESCRYPT_PREHASH) ? 16 : 8, | ||
1079 | /* hash fn: */ sha256_begin, | ||
1080 | /* in,len: */ passwd, passwdlen, | ||
1081 | /* outbuf: */ sha256 | ||
1082 | ); | ||
1083 | passwd = sha256; | ||
1084 | passwdlen = sizeof(sha256); | ||
1085 | } | ||
1086 | |||
1087 | PBKDF2_SHA256(passwd, passwdlen, yctx->salt, yctx->saltlen, 1, B, B_size); | ||
1088 | |||
1089 | if (flags) | ||
1090 | memcpy(sha256, B, sizeof(sha256)); | ||
1091 | |||
1092 | if (p == 1 || (flags___YESCRYPT_RW)) { | ||
1093 | smix(B, r, N, p, t, flags, V, YCTX_param_NROM, VROM, XY, S, sha256); | ||
1094 | } else { | ||
1095 | uint32_t i; | ||
1096 | for (i = 0; i < p; i++) { | ||
1097 | smix(&B[(size_t)128 * r * i], r, N, 1, t, flags, V, | ||
1098 | YCTX_param_NROM, VROM, XY, NULL, NULL); | ||
1099 | } | ||
1100 | } | ||
1101 | |||
1102 | dkp = buf32; | ||
1103 | if (flags && /*buflen:*/32 < sizeof(dk)) { | ||
1104 | PBKDF2_SHA256(passwd, passwdlen, B, B_size, 1, dk, sizeof(dk)); | ||
1105 | dkp = dk; | ||
1106 | } | ||
1107 | |||
1108 | PBKDF2_SHA256(passwd, passwdlen, B, B_size, 1, buf32, /*buflen:*/32); | ||
1109 | |||
1110 | /* | ||
1111 | * Except when computing classic scrypt, allow all computation so far | ||
1112 | * to be performed on the client. The final steps below match those of | ||
1113 | * SCRAM (RFC 5802), so that an extension of SCRAM (with the steps so | ||
1114 | * far in place of SCRAM's use of PBKDF2 and with SHA-256 in place of | ||
1115 | * SCRAM's use of SHA-1) would be usable with yescrypt hashes. | ||
1116 | */ | ||
1117 | if (flags && !(flags & YESCRYPT_PREHASH)) { | ||
1118 | /* Compute ClientKey */ | ||
1119 | hmac_block( | ||
1120 | /* key,len: */ dkp, sizeof(dk), | ||
1121 | /* hash fn: */ sha256_begin, | ||
1122 | /* in,len: */ "Client Key", 10, | ||
1123 | /* outbuf: */ sha256 | ||
1124 | ); | ||
1125 | /* Compute StoredKey */ | ||
1126 | { | ||
1127 | size_t clen = /*buflen:*/32; | ||
1128 | if (clen > sizeof(dk)) | ||
1129 | clen = sizeof(dk); | ||
1130 | if (sizeof(dk) != 32) { /* not true, optimize it out */ | ||
1131 | sha256_block(sha256, sizeof(sha256), dk); | ||
1132 | memcpy(buf32, dk, clen); | ||
1133 | } else { | ||
1134 | sha256_block(sha256, sizeof(sha256), buf32); | ||
1135 | } | ||
1136 | } | ||
1137 | } | ||
1138 | |||
1139 | explicit_bzero(&u, sizeof(u)); | ||
1140 | |||
1141 | /* Success! */ | ||
1142 | return 0; | ||
1143 | |||
1144 | out_EINVAL: | ||
1145 | //bbox does not need this: errno = EINVAL; | ||
1146 | return -1; | ||
1147 | #undef sha256 | ||
1148 | #undef dk | ||
1149 | } | ||
1150 | |||
1151 | /** | ||
1152 | * yescrypt_kdf(shared, local, passwd, passwdlen, salt, saltlen, params, | ||
1153 | * buf, buflen): | ||
1154 | * Compute scrypt or its revision as requested by the parameters. The inputs | ||
1155 | * to this function are the same as those for yescrypt_kdf_body() above, with | ||
1156 | * the addition of g, which controls hash upgrades (0 for no upgrades so far). | ||
1157 | */ | ||
1158 | static | ||
1159 | int yescrypt_kdf32( | ||
1160 | yescrypt_ctx_t *yctx, | ||
1161 | const uint8_t *passwd, size_t passwdlen, | ||
1162 | uint8_t *buf32) | ||
1163 | { | ||
1164 | uint32_t flags = YCTX_param_flags; | ||
1165 | uint64_t N = YCTX_param_N; | ||
1166 | uint32_t r = YCTX_param_r; | ||
1167 | uint32_t p = YCTX_param_p; | ||
1168 | uint32_t t = YCTX_param_t; | ||
1169 | uint32_t g = YCTX_param_g; | ||
1170 | uint8_t dk32[32]; | ||
1171 | int retval; | ||
1172 | |||
1173 | /* Support for hash upgrades has been temporarily removed */ | ||
1174 | if (g) { | ||
1175 | //bbox does not need this: errno = EINVAL; | ||
1176 | return -1; | ||
1177 | } | ||
1178 | |||
1179 | if ((flags___YESCRYPT_RW) | ||
1180 | && p >= 1 | ||
1181 | && N / p >= 0x100 | ||
1182 | && N / p * r >= 0x20000 | ||
1183 | ) { | ||
1184 | if (yescrypt_kdf32_body(yctx, | ||
1185 | passwd, passwdlen, | ||
1186 | flags | YESCRYPT_ALLOC_ONLY, N, t, | ||
1187 | buf32) != -3 | ||
1188 | ) { | ||
1189 | dbg("yescrypt_kdf32_body: not -3"); | ||
1190 | return -1; | ||
1191 | } | ||
1192 | retval = yescrypt_kdf32_body(yctx, | ||
1193 | passwd, passwdlen, | ||
1194 | flags | YESCRYPT_PREHASH, N >> 6, 0, | ||
1195 | dk32); | ||
1196 | if (retval) { | ||
1197 | dbg("yescrypt_kdf32_body(PREHASH):%d", retval); | ||
1198 | return retval; | ||
1199 | } | ||
1200 | passwd = dk32; | ||
1201 | passwdlen = sizeof(dk32); | ||
1202 | } | ||
1203 | |||
1204 | retval = yescrypt_kdf32_body(yctx, | ||
1205 | passwd, passwdlen, | ||
1206 | flags, N, t, buf32); | ||
1207 | |||
1208 | explicit_bzero(dk32, sizeof(dk32)); | ||
1209 | |||
1210 | dbg("yescrypt_kdf32_body:%d", retval); | ||
1211 | return retval; | ||
1212 | } | ||
diff --git a/libbb/yescrypt/alg-yescrypt.h b/libbb/yescrypt/alg-yescrypt.h new file mode 100644 index 000000000..b69843f5d --- /dev/null +++ b/libbb/yescrypt/alg-yescrypt.h | |||
@@ -0,0 +1,247 @@ | |||
1 | /*- | ||
2 | * Copyright 2009 Colin Percival | ||
3 | * Copyright 2013-2018 Alexander Peslyak | ||
4 | * All rights reserved. | ||
5 | * | ||
6 | * Redistribution and use in source and binary forms, with or without | ||
7 | * modification, are permitted provided that the following conditions | ||
8 | * are met: | ||
9 | * 1. Redistributions of source code must retain the above copyright | ||
10 | * notice, this list of conditions and the following disclaimer. | ||
11 | * 2. Redistributions in binary form must reproduce the above copyright | ||
12 | * notice, this list of conditions and the following disclaimer in the | ||
13 | * documentation and/or other materials provided with the distribution. | ||
14 | * | ||
15 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND | ||
16 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||
17 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | ||
18 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE | ||
19 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | ||
20 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | ||
21 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | ||
22 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | ||
23 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | ||
24 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | ||
25 | * SUCH DAMAGE. | ||
26 | * | ||
27 | * This file was originally written by Colin Percival as part of the Tarsnap | ||
28 | * online backup system. | ||
29 | */ | ||
30 | |||
31 | // busybox debug and size-reduction configuration | ||
32 | |||
33 | #ifdef YESCRYPT_INTERNAL | ||
34 | # if 1 | ||
35 | # define dbg(...) ((void)0) | ||
36 | # else | ||
37 | # define dbg(...) bb_error_msg(__VA_ARGS__) | ||
38 | # endif | ||
39 | # if 1 | ||
40 | # define dbg_dec64(...) ((void)0) | ||
41 | # else | ||
42 | # define dbg_dec64(...) bb_error_msg(__VA_ARGS__) | ||
43 | # endif | ||
44 | # define TEST_DECODE64 0 | ||
45 | #endif | ||
46 | |||
47 | // Only accept one-char parameters in salt, and only first three? | ||
48 | // Almost any reasonable yescrypt hashes in /etc/shadow should | ||
49 | // only ever use "jXY" parameters which set N and r. | ||
50 | // Fancy multi-byte-encoded wide integers are not needed for that. | ||
51 | #define RESTRICTED_PARAMS 1 | ||
52 | // Note: if you enable the above, please also enable | ||
53 | // YCTX_param_p, YCTX_param_t, YCTX_param_g, YCTX_param_NROM | ||
54 | // optimizations, and DISABLE_NROM_CODE. | ||
55 | |||
56 | #define DISABLE_NROM_CODE 1 | ||
57 | |||
58 | // How much we save by forcing "standard" value by commenting the next line: | ||
59 | // 160 bytes | ||
60 | //#define YCTX_param_flags yctx->param.flags | ||
61 | // 260 bytes | ||
62 | //#define flags___YESCRYPT_RW (flags & YESCRYPT_RW) | ||
63 | // 140 bytes | ||
64 | //#define flags___YESCRYPT_MODE_MASK (flags & YESCRYPT_MODE_MASK) | ||
65 | // ^^^^ forcing the above since the code already requires (checks for) this | ||
66 | // 50 bytes | ||
67 | #define YCTX_param_N yctx->param.N | ||
68 | // -100 bytes (negative!!!) | ||
69 | #define YCTX_param_r yctx->param.r | ||
70 | // 400 bytes | ||
71 | //#define YCTX_param_p yctx->param.p | ||
72 | // 130 bytes | ||
73 | //#define YCTX_param_t yctx->param.t | ||
74 | // 2 bytes | ||
75 | //#define YCTX_param_g yctx->param.g | ||
76 | // 1 bytes | ||
77 | // ^^^^ this looks wrong, compiler should be able to constant-propagate the fact that NROM code is dead | ||
78 | //#define YCTX_param_NROM yctx->param.NROM | ||
79 | |||
80 | #ifndef YCTX_param_flags | ||
81 | #define YCTX_param_flags (YESCRYPT_RW | YESCRYPT_ROUNDS_6 | YESCRYPT_GATHER_4 | YESCRYPT_SIMPLE_2 | YESCRYPT_SBOX_12K) | ||
82 | #endif | ||
83 | #ifndef flags___YESCRYPT_RW | ||
84 | #define flags___YESCRYPT_RW ((void)flags, YESCRYPT_RW) | ||
85 | #endif | ||
86 | #ifndef flags___YESCRYPT_MODE_MASK | ||
87 | #define flags___YESCRYPT_MODE_MASK ((void)flags, YESCRYPT_RW) | ||
88 | #endif | ||
89 | // standard ("j9T") values: | ||
90 | #ifndef YCTX_param_N | ||
91 | #define YCTX_param_N 4096 | ||
92 | #endif | ||
93 | #ifndef YCTX_param_r | ||
94 | #define YCTX_param_r 32 | ||
95 | #endif | ||
96 | #ifndef YCTX_param_p | ||
97 | #define YCTX_param_p 1 | ||
98 | #endif | ||
99 | #ifndef YCTX_param_t | ||
100 | #define YCTX_param_t 0 | ||
101 | #endif | ||
102 | #ifndef YCTX_param_g | ||
103 | #define YCTX_param_g 0 | ||
104 | #endif | ||
105 | #ifndef YCTX_param_NROM | ||
106 | #define YCTX_param_NROM 0 | ||
107 | #endif | ||
108 | |||
109 | // "Faster/smaller code" knobs: | ||
110 | // -941 bytes: | ||
111 | #define KDF_UNROLL_COPY 0 | ||
112 | // -5324 bytes if 0: | ||
113 | #define KDF_UNROLL_PWXFORM_ROUND 0 | ||
114 | // -4864 bytes if 0: | ||
115 | #define KDF_UNROLL_PWXFORM 0 | ||
116 | // if both this ^^^^^^^^^^ and PWXFORM_ROUND set to 0: -7666 bytes | ||
117 | // -464 bytes: | ||
118 | #define KDF_UNROLL_SALSA20 0 | ||
119 | |||
120 | /** | ||
121 | * Type and possible values for the flags argument of yescrypt_kdf(), | ||
122 | * yescrypt_encode_params_r(), yescrypt_encode_params(). Most of these may be | ||
123 | * OR'ed together, except that YESCRYPT_WORM stands on its own. | ||
124 | * Please refer to the description of yescrypt_kdf() below for the meaning of | ||
125 | * these flags. | ||
126 | */ | ||
127 | /* yescrypt flags: | ||
128 | * bits pos: 7654321076543210 | ||
129 | * ss r w | ||
130 | * sbox gg y | ||
131 | */ | ||
132 | /* Public */ | ||
133 | #define YESCRYPT_WORM 1 | ||
134 | #define YESCRYPT_RW 0x002 | ||
135 | #define YESCRYPT_ROUNDS_3 0x000 //r=0 | ||
136 | #define YESCRYPT_ROUNDS_6 0x004 //r=1 | ||
137 | #define YESCRYPT_GATHER_1 0x000 //gg=00 | ||
138 | #define YESCRYPT_GATHER_2 0x008 //gg=01 | ||
139 | #define YESCRYPT_GATHER_4 0x010 //gg=10 | ||
140 | #define YESCRYPT_GATHER_8 0x018 //gg=11 | ||
141 | #define YESCRYPT_SIMPLE_1 0x000 //ss=00 | ||
142 | #define YESCRYPT_SIMPLE_2 0x020 //ss=01 | ||
143 | #define YESCRYPT_SIMPLE_4 0x040 //ss=10 | ||
144 | #define YESCRYPT_SIMPLE_8 0x060 //ss=11 | ||
145 | #define YESCRYPT_SBOX_6K 0x000 //sbox=0000 | ||
146 | #define YESCRYPT_SBOX_12K 0x080 //sbox=0001 | ||
147 | #define YESCRYPT_SBOX_24K 0x100 //sbox=0010 | ||
148 | #define YESCRYPT_SBOX_48K 0x180 //sbox=0011 | ||
149 | #define YESCRYPT_SBOX_96K 0x200 //sbox=0100 | ||
150 | #define YESCRYPT_SBOX_192K 0x280 //sbox=0101 | ||
151 | #define YESCRYPT_SBOX_384K 0x300 //sbox=0110 | ||
152 | #define YESCRYPT_SBOX_768K 0x380 //sbox=0111 | ||
153 | |||
154 | #ifdef YESCRYPT_INTERNAL | ||
155 | /* Private */ | ||
156 | #define YESCRYPT_MODE_MASK 0x003 | ||
157 | #define YESCRYPT_RW_FLAVOR_MASK 0x3fc | ||
158 | #define YESCRYPT_ALLOC_ONLY 0x08000000 | ||
159 | #define YESCRYPT_PREHASH 0x10000000 | ||
160 | #endif | ||
161 | |||
162 | #define YESCRYPT_RW_DEFAULTS \ | ||
163 | (YESCRYPT_RW | \ | ||
164 | YESCRYPT_ROUNDS_6 | YESCRYPT_GATHER_4 | YESCRYPT_SIMPLE_2 | \ | ||
165 | YESCRYPT_SBOX_12K) | ||
166 | |||
167 | #define YESCRYPT_DEFAULTS YESCRYPT_RW_DEFAULTS | ||
168 | |||
169 | #ifdef YESCRYPT_INTERNAL | ||
170 | #define YESCRYPT_KNOWN_FLAGS \ | ||
171 | (YESCRYPT_MODE_MASK | YESCRYPT_RW_FLAVOR_MASK | \ | ||
172 | YESCRYPT_ALLOC_ONLY | YESCRYPT_PREHASH) | ||
173 | #endif | ||
174 | |||
175 | /* How many chars base-64 encoded bytes require? */ | ||
176 | #define YESCRYPT_BYTES2CHARS(bytes) ((((bytes) * 8) + 5) / 6) | ||
177 | /* The /etc/passwd-style hash is "<prefix>$<hash><NUL>" */ | ||
178 | /* | ||
179 | * "$y$", up to 8 params of up to 6 chars each, '$', salt | ||
180 | * Alternatively, but that's smaller: | ||
181 | * "$7$", 3 params encoded as 1+5+5 chars, salt | ||
182 | */ | ||
183 | #define YESCRYPT_PREFIX_LEN (3 + 8 * 6 + 1 + YESCRYPT_BYTES2CHARS(32)) | ||
184 | |||
185 | #define YESCRYPT_HASH_SIZE 32 | ||
186 | #define YESCRYPT_HASH_LEN YESCRYPT_BYTES2CHARS(YESCRYPT_HASH_SIZE) | ||
187 | |||
188 | /** | ||
189 | * Internal type used by the memory allocator. Please do not use it directly. | ||
190 | * Use yescrypt_shared_t and yescrypt_local_t as appropriate instead, since | ||
191 | * they might differ from each other in a future version. | ||
192 | */ | ||
193 | typedef struct { | ||
194 | // void *base; | ||
195 | void *aligned; | ||
196 | // size_t base_size; | ||
197 | size_t aligned_size; | ||
198 | } yescrypt_region_t; | ||
199 | |||
200 | /** | ||
201 | * yescrypt parameters combined into one struct. N, r, p are the same as in | ||
202 | * classic scrypt, except that the meaning of p changes when YESCRYPT_RW is | ||
203 | * set. flags, t, g, NROM are special to yescrypt. | ||
204 | */ | ||
205 | typedef struct { | ||
206 | uint32_t flags; | ||
207 | uint32_t r; | ||
208 | uint64_t N; | ||
209 | #if !RESTRICTED_PARAMS | ||
210 | uint32_t p, t, g; | ||
211 | uint64_t NROM; | ||
212 | #endif | ||
213 | } yescrypt_params_t; | ||
214 | |||
215 | typedef struct { | ||
216 | yescrypt_params_t param; | ||
217 | |||
218 | /* salt in binary form */ | ||
219 | /* stored here to cut down on the amount of function paramaters */ | ||
220 | unsigned char salt[64]; | ||
221 | size_t saltlen; | ||
222 | |||
223 | /* used by the memory allocator */ | ||
224 | //yescrypt_region_t shared[1]; | ||
225 | yescrypt_region_t local[1]; | ||
226 | } yescrypt_ctx_t; | ||
227 | |||
228 | /** | ||
229 | * yescrypt_r(shared, local, passwd, passwdlen, setting, key, buf, buflen): | ||
230 | * Compute and encode an scrypt or enhanced scrypt hash of passwd given the | ||
231 | * parameters and salt value encoded in setting. If shared is not NULL, a ROM | ||
232 | * is used and YESCRYPT_RW is required. Otherwise, whether to compute classic | ||
233 | * scrypt, YESCRYPT_WORM (a slight deviation from classic scrypt), or | ||
234 | * YESCRYPT_RW (time-memory tradeoff discouraging modification) is determined | ||
235 | * by the setting string. shared (if not NULL) and local must be initialized | ||
236 | * as described above for yescrypt_kdf(). buf must be large enough (as | ||
237 | * indicated by buflen) to hold the encoded hash string. | ||
238 | * | ||
239 | * Return the encoded hash string on success; or NULL on error. | ||
240 | * | ||
241 | * MT-safe as long as local and buf are local to the thread. | ||
242 | */ | ||
243 | extern char *yescrypt_r( | ||
244 | const uint8_t *passwd, size_t passwdlen, | ||
245 | const uint8_t *setting, | ||
246 | char *buf, size_t buflen | ||
247 | ); | ||
diff --git a/libbb/yescrypt/y.c b/libbb/yescrypt/y.c new file mode 100644 index 000000000..d5ab8903f --- /dev/null +++ b/libbb/yescrypt/y.c | |||
@@ -0,0 +1,16 @@ | |||
1 | /* | ||
2 | * The compilation unit for yescrypt-related code. | ||
3 | * | ||
4 | * Copyright (C) 2025 by Denys Vlasenko <vda.linux@googlemail.com> | ||
5 | * | ||
6 | * Licensed under GPLv2, see file LICENSE in this source tree. | ||
7 | */ | ||
8 | //kbuild:lib-$(CONFIG_USE_BB_CRYPT_YES) += y.o | ||
9 | |||
10 | #include "libbb.h" | ||
11 | |||
12 | #define YESCRYPT_INTERNAL | ||
13 | #include "alg-yescrypt.h" | ||
14 | #include "alg-sha256.c" | ||
15 | #include "alg-yescrypt-kdf.c" | ||
16 | #include "alg-yescrypt-common.c" | ||