diff options
author | Igor Pavlov <87184205+ip7z@users.noreply.github.com> | 2024-11-29 00:00:00 +0000 |
---|---|---|
committer | Igor Pavlov <87184205+ip7z@users.noreply.github.com> | 2024-11-30 15:27:15 +0500 |
commit | e5431fa6f5505e385c6f9367260717e9c47dc2ee (patch) | |
tree | 4cd2c2c3b225b48c8e7053432c41d7b6b6a3d5f8 /C/Sha512.c | |
parent | e008ce3976c087bfd21344af8f00a23cf69d4174 (diff) | |
download | 7zip-e5431fa6f5505e385c6f9367260717e9c47dc2ee.tar.gz 7zip-e5431fa6f5505e385c6f9367260717e9c47dc2ee.tar.bz2 7zip-e5431fa6f5505e385c6f9367260717e9c47dc2ee.zip |
Diffstat (limited to 'C/Sha512.c')
-rw-r--r-- | C/Sha512.c | 618 |
1 files changed, 618 insertions, 0 deletions
diff --git a/C/Sha512.c b/C/Sha512.c new file mode 100644 index 0000000..04827d6 --- /dev/null +++ b/C/Sha512.c | |||
@@ -0,0 +1,618 @@ | |||
1 | /* Sha512.c -- SHA-512 Hash | ||
2 | : Igor Pavlov : Public domain | ||
3 | This code is based on public domain code from Wei Dai's Crypto++ library. */ | ||
4 | |||
5 | #include "Precomp.h" | ||
6 | |||
7 | #include <string.h> | ||
8 | |||
9 | #include "Sha512.h" | ||
10 | #include "RotateDefs.h" | ||
11 | #include "CpuArch.h" | ||
12 | |||
13 | #ifdef MY_CPU_X86_OR_AMD64 | ||
14 | #if defined(Z7_LLVM_CLANG_VERSION) && (Z7_LLVM_CLANG_VERSION >= 170001) \ | ||
15 | || defined(Z7_APPLE_CLANG_VERSION) && (Z7_APPLE_CLANG_VERSION >= 170001) \ | ||
16 | || defined(Z7_GCC_VERSION) && (Z7_GCC_VERSION >= 140000) \ | ||
17 | || defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 2400) && (__INTEL_COMPILER <= 9900) \ | ||
18 | || defined(_MSC_VER) && (_MSC_VER >= 1940) | ||
19 | #define Z7_COMPILER_SHA512_SUPPORTED | ||
20 | #endif | ||
21 | #elif defined(MY_CPU_ARM64) && defined(MY_CPU_LE) | ||
22 | #if defined(__ARM_FEATURE_SHA512) | ||
23 | #define Z7_COMPILER_SHA512_SUPPORTED | ||
24 | #else | ||
25 | #if (defined(Z7_CLANG_VERSION) && (Z7_CLANG_VERSION >= 130000) \ | ||
26 | || defined(__GNUC__) && (__GNUC__ >= 9) \ | ||
27 | ) \ | ||
28 | || defined(Z7_MSC_VER_ORIGINAL) && (_MSC_VER >= 1940) // fix it | ||
29 | #define Z7_COMPILER_SHA512_SUPPORTED | ||
30 | #endif | ||
31 | #endif | ||
32 | #endif | ||
33 | |||
34 | |||
35 | |||
36 | |||
37 | |||
38 | |||
39 | |||
40 | |||
41 | |||
42 | |||
43 | |||
44 | |||
45 | |||
46 | |||
47 | void Z7_FASTCALL Sha512_UpdateBlocks(UInt64 state[8], const Byte *data, size_t numBlocks); | ||
48 | |||
49 | #ifdef Z7_COMPILER_SHA512_SUPPORTED | ||
50 | void Z7_FASTCALL Sha512_UpdateBlocks_HW(UInt64 state[8], const Byte *data, size_t numBlocks); | ||
51 | |||
52 | static SHA512_FUNC_UPDATE_BLOCKS g_SHA512_FUNC_UPDATE_BLOCKS = Sha512_UpdateBlocks; | ||
53 | static SHA512_FUNC_UPDATE_BLOCKS g_SHA512_FUNC_UPDATE_BLOCKS_HW; | ||
54 | |||
55 | #define SHA512_UPDATE_BLOCKS(p) p->v.vars.func_UpdateBlocks | ||
56 | #else | ||
57 | #define SHA512_UPDATE_BLOCKS(p) Sha512_UpdateBlocks | ||
58 | #endif | ||
59 | |||
60 | |||
61 | BoolInt Sha512_SetFunction(CSha512 *p, unsigned algo) | ||
62 | { | ||
63 | SHA512_FUNC_UPDATE_BLOCKS func = Sha512_UpdateBlocks; | ||
64 | |||
65 | #ifdef Z7_COMPILER_SHA512_SUPPORTED | ||
66 | if (algo != SHA512_ALGO_SW) | ||
67 | { | ||
68 | if (algo == SHA512_ALGO_DEFAULT) | ||
69 | func = g_SHA512_FUNC_UPDATE_BLOCKS; | ||
70 | else | ||
71 | { | ||
72 | if (algo != SHA512_ALGO_HW) | ||
73 | return False; | ||
74 | func = g_SHA512_FUNC_UPDATE_BLOCKS_HW; | ||
75 | if (!func) | ||
76 | return False; | ||
77 | } | ||
78 | } | ||
79 | #else | ||
80 | if (algo > 1) | ||
81 | return False; | ||
82 | #endif | ||
83 | |||
84 | p->v.vars.func_UpdateBlocks = func; | ||
85 | return True; | ||
86 | } | ||
87 | |||
88 | |||
89 | /* define it for speed optimization */ | ||
90 | |||
91 | #if 0 // 1 for size optimization | ||
92 | #define STEP_PRE 1 | ||
93 | #define STEP_MAIN 1 | ||
94 | #else | ||
95 | #define STEP_PRE 2 | ||
96 | #define STEP_MAIN 4 | ||
97 | // #define Z7_SHA512_UNROLL | ||
98 | #endif | ||
99 | |||
100 | #undef Z7_SHA512_BIG_W | ||
101 | #if STEP_MAIN != 16 | ||
102 | #define Z7_SHA512_BIG_W | ||
103 | #endif | ||
104 | |||
105 | |||
106 | #define U64C(x) UINT64_CONST(x) | ||
107 | |||
108 | static MY_ALIGN(64) const UInt64 SHA512_INIT_ARRAYS[4][8] = { | ||
109 | { U64C(0x8c3d37c819544da2), U64C(0x73e1996689dcd4d6), U64C(0x1dfab7ae32ff9c82), U64C(0x679dd514582f9fcf), | ||
110 | U64C(0x0f6d2b697bd44da8), U64C(0x77e36f7304c48942), U64C(0x3f9d85a86a1d36c8), U64C(0x1112e6ad91d692a1) | ||
111 | }, | ||
112 | { U64C(0x22312194fc2bf72c), U64C(0x9f555fa3c84c64c2), U64C(0x2393b86b6f53b151), U64C(0x963877195940eabd), | ||
113 | U64C(0x96283ee2a88effe3), U64C(0xbe5e1e2553863992), U64C(0x2b0199fc2c85b8aa), U64C(0x0eb72ddc81c52ca2) | ||
114 | }, | ||
115 | { U64C(0xcbbb9d5dc1059ed8), U64C(0x629a292a367cd507), U64C(0x9159015a3070dd17), U64C(0x152fecd8f70e5939), | ||
116 | U64C(0x67332667ffc00b31), U64C(0x8eb44a8768581511), U64C(0xdb0c2e0d64f98fa7), U64C(0x47b5481dbefa4fa4) | ||
117 | }, | ||
118 | { U64C(0x6a09e667f3bcc908), U64C(0xbb67ae8584caa73b), U64C(0x3c6ef372fe94f82b), U64C(0xa54ff53a5f1d36f1), | ||
119 | U64C(0x510e527fade682d1), U64C(0x9b05688c2b3e6c1f), U64C(0x1f83d9abfb41bd6b), U64C(0x5be0cd19137e2179) | ||
120 | }}; | ||
121 | |||
122 | void Sha512_InitState(CSha512 *p, unsigned digestSize) | ||
123 | { | ||
124 | p->v.vars.count = 0; | ||
125 | memcpy(p->state, SHA512_INIT_ARRAYS[(size_t)(digestSize >> 4) - 1], sizeof(p->state)); | ||
126 | } | ||
127 | |||
128 | void Sha512_Init(CSha512 *p, unsigned digestSize) | ||
129 | { | ||
130 | p->v.vars.func_UpdateBlocks = | ||
131 | #ifdef Z7_COMPILER_SHA512_SUPPORTED | ||
132 | g_SHA512_FUNC_UPDATE_BLOCKS; | ||
133 | #else | ||
134 | NULL; | ||
135 | #endif | ||
136 | Sha512_InitState(p, digestSize); | ||
137 | } | ||
138 | |||
139 | #define S0(x) (Z7_ROTR64(x,28) ^ Z7_ROTR64(x,34) ^ Z7_ROTR64(x,39)) | ||
140 | #define S1(x) (Z7_ROTR64(x,14) ^ Z7_ROTR64(x,18) ^ Z7_ROTR64(x,41)) | ||
141 | #define s0(x) (Z7_ROTR64(x, 1) ^ Z7_ROTR64(x, 8) ^ (x >> 7)) | ||
142 | #define s1(x) (Z7_ROTR64(x,19) ^ Z7_ROTR64(x,61) ^ (x >> 6)) | ||
143 | |||
144 | #define Ch(x,y,z) (z^(x&(y^z))) | ||
145 | #define Maj(x,y,z) ((x&y)|(z&(x|y))) | ||
146 | |||
147 | |||
148 | #define W_PRE(i) (W[(i) + (size_t)(j)] = GetBe64(data + ((size_t)(j) + i) * 8)) | ||
149 | |||
150 | #define blk2_main(j, i) s1(w(j, (i)-2)) + w(j, (i)-7) + s0(w(j, (i)-15)) | ||
151 | |||
152 | #ifdef Z7_SHA512_BIG_W | ||
153 | // we use +i instead of +(i) to change the order to solve CLANG compiler warning for signed/unsigned. | ||
154 | #define w(j, i) W[(size_t)(j) + i] | ||
155 | #define blk2(j, i) (w(j, i) = w(j, (i)-16) + blk2_main(j, i)) | ||
156 | #else | ||
157 | #if STEP_MAIN == 16 | ||
158 | #define w(j, i) W[(i) & 15] | ||
159 | #else | ||
160 | #define w(j, i) W[((size_t)(j) + (i)) & 15] | ||
161 | #endif | ||
162 | #define blk2(j, i) (w(j, i) += blk2_main(j, i)) | ||
163 | #endif | ||
164 | |||
165 | #define W_MAIN(i) blk2(j, i) | ||
166 | |||
167 | |||
168 | #define T1(wx, i) \ | ||
169 | tmp = h + S1(e) + Ch(e,f,g) + K[(i)+(size_t)(j)] + wx(i); \ | ||
170 | h = g; \ | ||
171 | g = f; \ | ||
172 | f = e; \ | ||
173 | e = d + tmp; \ | ||
174 | tmp += S0(a) + Maj(a, b, c); \ | ||
175 | d = c; \ | ||
176 | c = b; \ | ||
177 | b = a; \ | ||
178 | a = tmp; \ | ||
179 | |||
180 | #define R1_PRE(i) T1( W_PRE, i) | ||
181 | #define R1_MAIN(i) T1( W_MAIN, i) | ||
182 | |||
183 | #if (!defined(Z7_SHA512_UNROLL) || STEP_MAIN < 8) && (STEP_MAIN >= 4) | ||
184 | #define R2_MAIN(i) \ | ||
185 | R1_MAIN(i) \ | ||
186 | R1_MAIN(i + 1) \ | ||
187 | |||
188 | #endif | ||
189 | |||
190 | |||
191 | |||
192 | #if defined(Z7_SHA512_UNROLL) && STEP_MAIN >= 8 | ||
193 | |||
194 | #define T4( a,b,c,d,e,f,g,h, wx, i) \ | ||
195 | h += S1(e) + Ch(e,f,g) + K[(i)+(size_t)(j)] + wx(i); \ | ||
196 | tmp = h; \ | ||
197 | h += d; \ | ||
198 | d = tmp + S0(a) + Maj(a, b, c); \ | ||
199 | |||
200 | #define R4( wx, i) \ | ||
201 | T4 ( a,b,c,d,e,f,g,h, wx, (i )); \ | ||
202 | T4 ( d,a,b,c,h,e,f,g, wx, (i+1)); \ | ||
203 | T4 ( c,d,a,b,g,h,e,f, wx, (i+2)); \ | ||
204 | T4 ( b,c,d,a,f,g,h,e, wx, (i+3)); \ | ||
205 | |||
206 | #define R4_PRE(i) R4( W_PRE, i) | ||
207 | #define R4_MAIN(i) R4( W_MAIN, i) | ||
208 | |||
209 | |||
210 | #define T8( a,b,c,d,e,f,g,h, wx, i) \ | ||
211 | h += S1(e) + Ch(e,f,g) + K[(i)+(size_t)(j)] + wx(i); \ | ||
212 | d += h; \ | ||
213 | h += S0(a) + Maj(a, b, c); \ | ||
214 | |||
215 | #define R8( wx, i) \ | ||
216 | T8 ( a,b,c,d,e,f,g,h, wx, i ); \ | ||
217 | T8 ( h,a,b,c,d,e,f,g, wx, i+1); \ | ||
218 | T8 ( g,h,a,b,c,d,e,f, wx, i+2); \ | ||
219 | T8 ( f,g,h,a,b,c,d,e, wx, i+3); \ | ||
220 | T8 ( e,f,g,h,a,b,c,d, wx, i+4); \ | ||
221 | T8 ( d,e,f,g,h,a,b,c, wx, i+5); \ | ||
222 | T8 ( c,d,e,f,g,h,a,b, wx, i+6); \ | ||
223 | T8 ( b,c,d,e,f,g,h,a, wx, i+7); \ | ||
224 | |||
225 | #define R8_PRE(i) R8( W_PRE, i) | ||
226 | #define R8_MAIN(i) R8( W_MAIN, i) | ||
227 | |||
228 | #endif | ||
229 | |||
230 | |||
231 | extern | ||
232 | MY_ALIGN(64) const UInt64 SHA512_K_ARRAY[80]; | ||
233 | MY_ALIGN(64) const UInt64 SHA512_K_ARRAY[80] = { | ||
234 | U64C(0x428a2f98d728ae22), U64C(0x7137449123ef65cd), U64C(0xb5c0fbcfec4d3b2f), U64C(0xe9b5dba58189dbbc), | ||
235 | U64C(0x3956c25bf348b538), U64C(0x59f111f1b605d019), U64C(0x923f82a4af194f9b), U64C(0xab1c5ed5da6d8118), | ||
236 | U64C(0xd807aa98a3030242), U64C(0x12835b0145706fbe), U64C(0x243185be4ee4b28c), U64C(0x550c7dc3d5ffb4e2), | ||
237 | U64C(0x72be5d74f27b896f), U64C(0x80deb1fe3b1696b1), U64C(0x9bdc06a725c71235), U64C(0xc19bf174cf692694), | ||
238 | U64C(0xe49b69c19ef14ad2), U64C(0xefbe4786384f25e3), U64C(0x0fc19dc68b8cd5b5), U64C(0x240ca1cc77ac9c65), | ||
239 | U64C(0x2de92c6f592b0275), U64C(0x4a7484aa6ea6e483), U64C(0x5cb0a9dcbd41fbd4), U64C(0x76f988da831153b5), | ||
240 | U64C(0x983e5152ee66dfab), U64C(0xa831c66d2db43210), U64C(0xb00327c898fb213f), U64C(0xbf597fc7beef0ee4), | ||
241 | U64C(0xc6e00bf33da88fc2), U64C(0xd5a79147930aa725), U64C(0x06ca6351e003826f), U64C(0x142929670a0e6e70), | ||
242 | U64C(0x27b70a8546d22ffc), U64C(0x2e1b21385c26c926), U64C(0x4d2c6dfc5ac42aed), U64C(0x53380d139d95b3df), | ||
243 | U64C(0x650a73548baf63de), U64C(0x766a0abb3c77b2a8), U64C(0x81c2c92e47edaee6), U64C(0x92722c851482353b), | ||
244 | U64C(0xa2bfe8a14cf10364), U64C(0xa81a664bbc423001), U64C(0xc24b8b70d0f89791), U64C(0xc76c51a30654be30), | ||
245 | U64C(0xd192e819d6ef5218), U64C(0xd69906245565a910), U64C(0xf40e35855771202a), U64C(0x106aa07032bbd1b8), | ||
246 | U64C(0x19a4c116b8d2d0c8), U64C(0x1e376c085141ab53), U64C(0x2748774cdf8eeb99), U64C(0x34b0bcb5e19b48a8), | ||
247 | U64C(0x391c0cb3c5c95a63), U64C(0x4ed8aa4ae3418acb), U64C(0x5b9cca4f7763e373), U64C(0x682e6ff3d6b2b8a3), | ||
248 | U64C(0x748f82ee5defb2fc), U64C(0x78a5636f43172f60), U64C(0x84c87814a1f0ab72), U64C(0x8cc702081a6439ec), | ||
249 | U64C(0x90befffa23631e28), U64C(0xa4506cebde82bde9), U64C(0xbef9a3f7b2c67915), U64C(0xc67178f2e372532b), | ||
250 | U64C(0xca273eceea26619c), U64C(0xd186b8c721c0c207), U64C(0xeada7dd6cde0eb1e), U64C(0xf57d4f7fee6ed178), | ||
251 | U64C(0x06f067aa72176fba), U64C(0x0a637dc5a2c898a6), U64C(0x113f9804bef90dae), U64C(0x1b710b35131c471b), | ||
252 | U64C(0x28db77f523047d84), U64C(0x32caab7b40c72493), U64C(0x3c9ebe0a15c9bebc), U64C(0x431d67c49c100d4c), | ||
253 | U64C(0x4cc5d4becb3e42b6), U64C(0x597f299cfc657e2a), U64C(0x5fcb6fab3ad6faec), U64C(0x6c44198c4a475817) | ||
254 | }; | ||
255 | |||
256 | #define K SHA512_K_ARRAY | ||
257 | |||
258 | Z7_NO_INLINE | ||
259 | void Z7_FASTCALL Sha512_UpdateBlocks(UInt64 state[8], const Byte *data, size_t numBlocks) | ||
260 | { | ||
261 | UInt64 W | ||
262 | #ifdef Z7_SHA512_BIG_W | ||
263 | [80]; | ||
264 | #else | ||
265 | [16]; | ||
266 | #endif | ||
267 | unsigned j; | ||
268 | UInt64 a,b,c,d,e,f,g,h; | ||
269 | #if !defined(Z7_SHA512_UNROLL) || (STEP_MAIN <= 4) || (STEP_PRE <= 4) | ||
270 | UInt64 tmp; | ||
271 | #endif | ||
272 | |||
273 | if (numBlocks == 0) return; | ||
274 | |||
275 | a = state[0]; | ||
276 | b = state[1]; | ||
277 | c = state[2]; | ||
278 | d = state[3]; | ||
279 | e = state[4]; | ||
280 | f = state[5]; | ||
281 | g = state[6]; | ||
282 | h = state[7]; | ||
283 | |||
284 | do | ||
285 | { | ||
286 | |||
287 | for (j = 0; j < 16; j += STEP_PRE) | ||
288 | { | ||
289 | #if STEP_PRE > 4 | ||
290 | |||
291 | #if STEP_PRE < 8 | ||
292 | R4_PRE(0); | ||
293 | #else | ||
294 | R8_PRE(0); | ||
295 | #if STEP_PRE == 16 | ||
296 | R8_PRE(8); | ||
297 | #endif | ||
298 | #endif | ||
299 | |||
300 | #else | ||
301 | |||
302 | R1_PRE(0) | ||
303 | #if STEP_PRE >= 2 | ||
304 | R1_PRE(1) | ||
305 | #if STEP_PRE >= 4 | ||
306 | R1_PRE(2) | ||
307 | R1_PRE(3) | ||
308 | #endif | ||
309 | #endif | ||
310 | |||
311 | #endif | ||
312 | } | ||
313 | |||
314 | for (j = 16; j < 80; j += STEP_MAIN) | ||
315 | { | ||
316 | #if defined(Z7_SHA512_UNROLL) && STEP_MAIN >= 8 | ||
317 | |||
318 | #if STEP_MAIN < 8 | ||
319 | R4_MAIN(0) | ||
320 | #else | ||
321 | R8_MAIN(0) | ||
322 | #if STEP_MAIN == 16 | ||
323 | R8_MAIN(8) | ||
324 | #endif | ||
325 | #endif | ||
326 | |||
327 | #else | ||
328 | |||
329 | R1_MAIN(0) | ||
330 | #if STEP_MAIN >= 2 | ||
331 | R1_MAIN(1) | ||
332 | #if STEP_MAIN >= 4 | ||
333 | R2_MAIN(2) | ||
334 | #if STEP_MAIN >= 8 | ||
335 | R2_MAIN(4) | ||
336 | R2_MAIN(6) | ||
337 | #if STEP_MAIN >= 16 | ||
338 | R2_MAIN(8) | ||
339 | R2_MAIN(10) | ||
340 | R2_MAIN(12) | ||
341 | R2_MAIN(14) | ||
342 | #endif | ||
343 | #endif | ||
344 | #endif | ||
345 | #endif | ||
346 | #endif | ||
347 | } | ||
348 | |||
349 | a += state[0]; state[0] = a; | ||
350 | b += state[1]; state[1] = b; | ||
351 | c += state[2]; state[2] = c; | ||
352 | d += state[3]; state[3] = d; | ||
353 | e += state[4]; state[4] = e; | ||
354 | f += state[5]; state[5] = f; | ||
355 | g += state[6]; state[6] = g; | ||
356 | h += state[7]; state[7] = h; | ||
357 | |||
358 | data += SHA512_BLOCK_SIZE; | ||
359 | } | ||
360 | while (--numBlocks); | ||
361 | } | ||
362 | |||
363 | |||
364 | #define Sha512_UpdateBlock(p) SHA512_UPDATE_BLOCKS(p)(p->state, p->buffer, 1) | ||
365 | |||
366 | void Sha512_Update(CSha512 *p, const Byte *data, size_t size) | ||
367 | { | ||
368 | if (size == 0) | ||
369 | return; | ||
370 | { | ||
371 | const unsigned pos = (unsigned)p->v.vars.count & (SHA512_BLOCK_SIZE - 1); | ||
372 | const unsigned num = SHA512_BLOCK_SIZE - pos; | ||
373 | p->v.vars.count += size; | ||
374 | if (num > size) | ||
375 | { | ||
376 | memcpy(p->buffer + pos, data, size); | ||
377 | return; | ||
378 | } | ||
379 | if (pos != 0) | ||
380 | { | ||
381 | size -= num; | ||
382 | memcpy(p->buffer + pos, data, num); | ||
383 | data += num; | ||
384 | Sha512_UpdateBlock(p); | ||
385 | } | ||
386 | } | ||
387 | { | ||
388 | const size_t numBlocks = size >> 7; | ||
389 | // if (numBlocks) | ||
390 | SHA512_UPDATE_BLOCKS(p)(p->state, data, numBlocks); | ||
391 | size &= SHA512_BLOCK_SIZE - 1; | ||
392 | if (size == 0) | ||
393 | return; | ||
394 | data += (numBlocks << 7); | ||
395 | memcpy(p->buffer, data, size); | ||
396 | } | ||
397 | } | ||
398 | |||
399 | |||
400 | void Sha512_Final(CSha512 *p, Byte *digest, unsigned digestSize) | ||
401 | { | ||
402 | unsigned pos = (unsigned)p->v.vars.count & (SHA512_BLOCK_SIZE - 1); | ||
403 | p->buffer[pos++] = 0x80; | ||
404 | if (pos > (SHA512_BLOCK_SIZE - 8 * 2)) | ||
405 | { | ||
406 | while (pos != SHA512_BLOCK_SIZE) { p->buffer[pos++] = 0; } | ||
407 | // memset(&p->buf.buffer[pos], 0, SHA512_BLOCK_SIZE - pos); | ||
408 | Sha512_UpdateBlock(p); | ||
409 | pos = 0; | ||
410 | } | ||
411 | memset(&p->buffer[pos], 0, (SHA512_BLOCK_SIZE - 8 * 2) - pos); | ||
412 | { | ||
413 | const UInt64 numBits = p->v.vars.count << 3; | ||
414 | SetBe64(p->buffer + SHA512_BLOCK_SIZE - 8 * 2, 0) // = (p->v.vars.count >> (64 - 3)); (high 64-bits) | ||
415 | SetBe64(p->buffer + SHA512_BLOCK_SIZE - 8 * 1, numBits) | ||
416 | } | ||
417 | Sha512_UpdateBlock(p); | ||
418 | #if 1 && defined(MY_CPU_BE) | ||
419 | memcpy(digest, p->state, digestSize); | ||
420 | #else | ||
421 | { | ||
422 | const unsigned numWords = digestSize >> 3; | ||
423 | unsigned i; | ||
424 | for (i = 0; i < numWords; i++) | ||
425 | { | ||
426 | const UInt64 v = p->state[i]; | ||
427 | SetBe64(digest, v) | ||
428 | digest += 8; | ||
429 | } | ||
430 | if (digestSize & 4) // digestSize == SHA512_224_DIGEST_SIZE | ||
431 | { | ||
432 | const UInt32 v = (UInt32)((p->state[numWords]) >> 32); | ||
433 | SetBe32(digest, v) | ||
434 | } | ||
435 | } | ||
436 | #endif | ||
437 | Sha512_InitState(p, digestSize); | ||
438 | } | ||
439 | |||
440 | |||
441 | |||
442 | |||
443 | #if defined(_WIN32) && defined(Z7_COMPILER_SHA512_SUPPORTED) \ | ||
444 | && defined(MY_CPU_ARM64) // we can disable this check to debug in x64 | ||
445 | |||
446 | #if 1 // 0 for debug | ||
447 | |||
448 | #include "7zWindows.h" | ||
449 | // #include <stdio.h> | ||
450 | #if 0 && defined(MY_CPU_X86_OR_AMD64) | ||
451 | #include <intrin.h> // for debug : for __ud2() | ||
452 | #endif | ||
453 | |||
454 | BoolInt CPU_IsSupported_SHA512(void) | ||
455 | { | ||
456 | #if defined(MY_CPU_ARM64) | ||
457 | // we have no SHA512 flag for IsProcessorFeaturePresent() still. | ||
458 | if (!CPU_IsSupported_CRYPTO()) | ||
459 | return False; | ||
460 | #endif | ||
461 | // printf("\nCPU_IsSupported_SHA512\n"); | ||
462 | { | ||
463 | // we can't read ID_AA64ISAR0_EL1 register from application. | ||
464 | // but ID_AA64ISAR0_EL1 register is mapped to "CP 4030" registry value. | ||
465 | HKEY key = NULL; | ||
466 | LONG res = RegOpenKeyEx(HKEY_LOCAL_MACHINE, | ||
467 | TEXT("HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0"), | ||
468 | 0, KEY_READ, &key); | ||
469 | if (res != ERROR_SUCCESS) | ||
470 | return False; | ||
471 | { | ||
472 | DWORD type = 0; | ||
473 | DWORD count = sizeof(UInt64); | ||
474 | UInt64 val = 0; | ||
475 | res = RegQueryValueEx(key, TEXT("CP 4030"), NULL, | ||
476 | &type, (LPBYTE)&val, &count); | ||
477 | RegCloseKey(key); | ||
478 | if (res != ERROR_SUCCESS | ||
479 | || type != REG_QWORD | ||
480 | || count != sizeof(UInt64) | ||
481 | || ((unsigned)(val >> 12) & 0xf) != 2) | ||
482 | return False; | ||
483 | // we parse SHA2 field of ID_AA64ISAR0_EL1 register: | ||
484 | // 0 : No SHA2 instructions implemented | ||
485 | // 1 : SHA256 implemented | ||
486 | // 2 : SHA256 and SHA512 implemented | ||
487 | } | ||
488 | } | ||
489 | |||
490 | |||
491 | #if 1 // 0 for debug to disable SHA512 PROBE code | ||
492 | |||
493 | /* | ||
494 | ----- SHA512 PROBE ----- | ||
495 | |||
496 | We suppose that "CP 4030" registry reading is enough. | ||
497 | But we use additional SHA512 PROBE code, because | ||
498 | we can catch exception here, and we don't catch exceptions, | ||
499 | if we call Sha512 functions from main code. | ||
500 | |||
501 | NOTE: arm64 PROBE code doesn't work, if we call it via Wine in linux-arm64. | ||
502 | The program just stops. | ||
503 | Also x64 version of PROBE code doesn't work, if we run it via Intel SDE emulator | ||
504 | without SHA512 support (-skl switch), | ||
505 | The program stops, and we have message from SDE: | ||
506 | TID 0 SDE-ERROR: Executed instruction not valid for specified chip (SKYLAKE): vsha512msg1 | ||
507 | But we still want to catch that exception instead of process stopping. | ||
508 | Does this PROBE code work in native Windows-arm64 (with/without sha512 hw instructions)? | ||
509 | Are there any ways to fix the problems with arm64-wine and x64-SDE cases? | ||
510 | */ | ||
511 | |||
512 | // printf("\n========== CPU_IsSupported_SHA512 PROBE ========\n"); | ||
513 | { | ||
514 | #ifdef __clang_major__ | ||
515 | #pragma GCC diagnostic ignored "-Wlanguage-extension-token" | ||
516 | #endif | ||
517 | __try | ||
518 | { | ||
519 | #if 0 // 1 : for debug (reduced version to detect sha512) | ||
520 | const uint64x2_t a = vdupq_n_u64(1); | ||
521 | const uint64x2_t b = vsha512hq_u64(a, a, a); | ||
522 | if ((UInt32)vgetq_lane_u64(b, 0) == 0x11800002) | ||
523 | return True; | ||
524 | #else | ||
525 | MY_ALIGN(16) | ||
526 | UInt64 temp[SHA512_NUM_DIGEST_WORDS + SHA512_NUM_BLOCK_WORDS]; | ||
527 | memset(temp, 0x5a, sizeof(temp)); | ||
528 | #if 0 && defined(MY_CPU_X86_OR_AMD64) | ||
529 | __ud2(); // for debug : that exception is not problem for SDE | ||
530 | #endif | ||
531 | #if 1 | ||
532 | Sha512_UpdateBlocks_HW(temp, | ||
533 | (const Byte *)(const void *)(temp + SHA512_NUM_DIGEST_WORDS), 1); | ||
534 | // printf("\n==== t = %x\n", (UInt32)temp[0]); | ||
535 | if ((UInt32)temp[0] == 0xa33cfdf7) | ||
536 | { | ||
537 | // printf("\n=== PROBE SHA512: SHA512 supported\n"); | ||
538 | return True; | ||
539 | } | ||
540 | #endif | ||
541 | #endif | ||
542 | } | ||
543 | __except (EXCEPTION_EXECUTE_HANDLER) | ||
544 | { | ||
545 | // printf("\n==== CPU_IsSupported_SHA512 EXCEPTION_EXECUTE_HANDLER\n"); | ||
546 | } | ||
547 | } | ||
548 | return False; | ||
549 | #else | ||
550 | // without SHA512 PROBE code | ||
551 | return True; | ||
552 | #endif | ||
553 | |||
554 | } | ||
555 | |||
556 | #else | ||
557 | |||
558 | BoolInt CPU_IsSupported_SHA512(void) | ||
559 | { | ||
560 | return False; | ||
561 | } | ||
562 | |||
563 | #endif | ||
564 | #endif // WIN32 arm64 | ||
565 | |||
566 | |||
567 | void Sha512Prepare(void) | ||
568 | { | ||
569 | #ifdef Z7_COMPILER_SHA512_SUPPORTED | ||
570 | SHA512_FUNC_UPDATE_BLOCKS f, f_hw; | ||
571 | f = Sha512_UpdateBlocks; | ||
572 | f_hw = NULL; | ||
573 | #ifdef MY_CPU_X86_OR_AMD64 | ||
574 | if (CPU_IsSupported_SHA512() | ||
575 | && CPU_IsSupported_AVX2() | ||
576 | ) | ||
577 | #else | ||
578 | if (CPU_IsSupported_SHA512()) | ||
579 | #endif | ||
580 | { | ||
581 | // printf("\n========== HW SHA512 ======== \n"); | ||
582 | f = f_hw = Sha512_UpdateBlocks_HW; | ||
583 | } | ||
584 | g_SHA512_FUNC_UPDATE_BLOCKS = f; | ||
585 | g_SHA512_FUNC_UPDATE_BLOCKS_HW = f_hw; | ||
586 | #endif | ||
587 | } | ||
588 | |||
589 | |||
590 | #undef K | ||
591 | #undef S0 | ||
592 | #undef S1 | ||
593 | #undef s0 | ||
594 | #undef s1 | ||
595 | #undef Ch | ||
596 | #undef Maj | ||
597 | #undef W_MAIN | ||
598 | #undef W_PRE | ||
599 | #undef w | ||
600 | #undef blk2_main | ||
601 | #undef blk2 | ||
602 | #undef T1 | ||
603 | #undef T4 | ||
604 | #undef T8 | ||
605 | #undef R1_PRE | ||
606 | #undef R1_MAIN | ||
607 | #undef R2_MAIN | ||
608 | #undef R4 | ||
609 | #undef R4_PRE | ||
610 | #undef R4_MAIN | ||
611 | #undef R8 | ||
612 | #undef R8_PRE | ||
613 | #undef R8_MAIN | ||
614 | #undef STEP_PRE | ||
615 | #undef STEP_MAIN | ||
616 | #undef Z7_SHA512_BIG_W | ||
617 | #undef Z7_SHA512_UNROLL | ||
618 | #undef Z7_COMPILER_SHA512_SUPPORTED | ||