aboutsummaryrefslogtreecommitdiff
path: root/C/Sha512.c
diff options
context:
space:
mode:
Diffstat (limited to 'C/Sha512.c')
-rw-r--r--C/Sha512.c711
1 files changed, 711 insertions, 0 deletions
diff --git a/C/Sha512.c b/C/Sha512.c
new file mode 100644
index 0000000..f0787fd
--- /dev/null
+++ b/C/Sha512.c
@@ -0,0 +1,711 @@
1/* Sha512.c -- SHA-512 Hash
2: Igor Pavlov : Public domain
3This code is based on public domain code from Wei Dai's Crypto++ library. */
4
5#include "Precomp.h"
6
7#include <string.h>
8
9#include "Sha512.h"
10#include "RotateDefs.h"
11#include "CpuArch.h"
12
13#ifdef MY_CPU_X86_OR_AMD64
14 #if defined(Z7_LLVM_CLANG_VERSION) && (Z7_LLVM_CLANG_VERSION >= 170001) \
15 || defined(Z7_APPLE_CLANG_VERSION) && (Z7_APPLE_CLANG_VERSION >= 170001) \
16 || defined(Z7_GCC_VERSION) && (Z7_GCC_VERSION >= 140000) \
17 || defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 2400) && (__INTEL_COMPILER <= 9900) \
18 || defined(_MSC_VER) && (_MSC_VER >= 1940)
19 #define Z7_COMPILER_SHA512_SUPPORTED
20 #endif
21#elif defined(MY_CPU_ARM64) && defined(MY_CPU_LE)
22 #if defined(__ARM_FEATURE_SHA512)
23 #define Z7_COMPILER_SHA512_SUPPORTED
24 #else
25 #if (defined(Z7_CLANG_VERSION) && (Z7_CLANG_VERSION >= 130000) \
26 || defined(__GNUC__) && (__GNUC__ >= 9) \
27 ) \
28 || defined(Z7_MSC_VER_ORIGINAL) && (_MSC_VER >= 1940) // fix it
29 #define Z7_COMPILER_SHA512_SUPPORTED
30 #endif
31 #endif
32#endif
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47void Z7_FASTCALL Sha512_UpdateBlocks(UInt64 state[8], const Byte *data, size_t numBlocks);
48
49#ifdef Z7_COMPILER_SHA512_SUPPORTED
50 void Z7_FASTCALL Sha512_UpdateBlocks_HW(UInt64 state[8], const Byte *data, size_t numBlocks);
51
52 static SHA512_FUNC_UPDATE_BLOCKS g_SHA512_FUNC_UPDATE_BLOCKS = Sha512_UpdateBlocks;
53 static SHA512_FUNC_UPDATE_BLOCKS g_SHA512_FUNC_UPDATE_BLOCKS_HW;
54
55 #define SHA512_UPDATE_BLOCKS(p) p->v.vars.func_UpdateBlocks
56#else
57 #define SHA512_UPDATE_BLOCKS(p) Sha512_UpdateBlocks
58#endif
59
60
61BoolInt Sha512_SetFunction(CSha512 *p, unsigned algo)
62{
63 SHA512_FUNC_UPDATE_BLOCKS func = Sha512_UpdateBlocks;
64
65 #ifdef Z7_COMPILER_SHA512_SUPPORTED
66 if (algo != SHA512_ALGO_SW)
67 {
68 if (algo == SHA512_ALGO_DEFAULT)
69 func = g_SHA512_FUNC_UPDATE_BLOCKS;
70 else
71 {
72 if (algo != SHA512_ALGO_HW)
73 return False;
74 func = g_SHA512_FUNC_UPDATE_BLOCKS_HW;
75 if (!func)
76 return False;
77 }
78 }
79 #else
80 if (algo > 1)
81 return False;
82 #endif
83
84 p->v.vars.func_UpdateBlocks = func;
85 return True;
86}
87
88
89/* define it for speed optimization */
90
91#if 0 // 1 for size optimization
92 #define STEP_PRE 1
93 #define STEP_MAIN 1
94#else
95 #define STEP_PRE 2
96 #define STEP_MAIN 4
97 // #define Z7_SHA512_UNROLL
98#endif
99
100#undef Z7_SHA512_BIG_W
101#if STEP_MAIN != 16
102 #define Z7_SHA512_BIG_W
103#endif
104
105
106#define U64C(x) UINT64_CONST(x)
107
108static MY_ALIGN(64) const UInt64 SHA512_INIT_ARRAYS[4][8] = {
109{ U64C(0x8c3d37c819544da2), U64C(0x73e1996689dcd4d6), U64C(0x1dfab7ae32ff9c82), U64C(0x679dd514582f9fcf),
110 U64C(0x0f6d2b697bd44da8), U64C(0x77e36f7304c48942), U64C(0x3f9d85a86a1d36c8), U64C(0x1112e6ad91d692a1)
111},
112{ U64C(0x22312194fc2bf72c), U64C(0x9f555fa3c84c64c2), U64C(0x2393b86b6f53b151), U64C(0x963877195940eabd),
113 U64C(0x96283ee2a88effe3), U64C(0xbe5e1e2553863992), U64C(0x2b0199fc2c85b8aa), U64C(0x0eb72ddc81c52ca2)
114},
115{ U64C(0xcbbb9d5dc1059ed8), U64C(0x629a292a367cd507), U64C(0x9159015a3070dd17), U64C(0x152fecd8f70e5939),
116 U64C(0x67332667ffc00b31), U64C(0x8eb44a8768581511), U64C(0xdb0c2e0d64f98fa7), U64C(0x47b5481dbefa4fa4)
117},
118{ U64C(0x6a09e667f3bcc908), U64C(0xbb67ae8584caa73b), U64C(0x3c6ef372fe94f82b), U64C(0xa54ff53a5f1d36f1),
119 U64C(0x510e527fade682d1), U64C(0x9b05688c2b3e6c1f), U64C(0x1f83d9abfb41bd6b), U64C(0x5be0cd19137e2179)
120}};
121
122void Sha512_InitState(CSha512 *p, unsigned digestSize)
123{
124 p->v.vars.count = 0;
125 memcpy(p->state, SHA512_INIT_ARRAYS[(size_t)(digestSize >> 4) - 1], sizeof(p->state));
126}
127
128void Sha512_Init(CSha512 *p, unsigned digestSize)
129{
130 p->v.vars.func_UpdateBlocks =
131 #ifdef Z7_COMPILER_SHA512_SUPPORTED
132 g_SHA512_FUNC_UPDATE_BLOCKS;
133 #else
134 NULL;
135 #endif
136 Sha512_InitState(p, digestSize);
137}
138
139#define S0(x) (Z7_ROTR64(x,28) ^ Z7_ROTR64(x,34) ^ Z7_ROTR64(x,39))
140#define S1(x) (Z7_ROTR64(x,14) ^ Z7_ROTR64(x,18) ^ Z7_ROTR64(x,41))
141#define s0(x) (Z7_ROTR64(x, 1) ^ Z7_ROTR64(x, 8) ^ (x >> 7))
142#define s1(x) (Z7_ROTR64(x,19) ^ Z7_ROTR64(x,61) ^ (x >> 6))
143
144#define Ch(x,y,z) (z^(x&(y^z)))
145#define Maj(x,y,z) ((x&y)|(z&(x|y)))
146
147
148#define W_PRE(i) (W[(i) + (size_t)(j)] = GetBe64(data + ((size_t)(j) + i) * 8))
149
150#define blk2_main(j, i) s1(w(j, (i)-2)) + w(j, (i)-7) + s0(w(j, (i)-15))
151
152#ifdef Z7_SHA512_BIG_W
153 // we use +i instead of +(i) to change the order to solve CLANG compiler warning for signed/unsigned.
154 #define w(j, i) W[(size_t)(j) + i]
155 #define blk2(j, i) (w(j, i) = w(j, (i)-16) + blk2_main(j, i))
156#else
157 #if STEP_MAIN == 16
158 #define w(j, i) W[(i) & 15]
159 #else
160 #define w(j, i) W[((size_t)(j) + (i)) & 15]
161 #endif
162 #define blk2(j, i) (w(j, i) += blk2_main(j, i))
163#endif
164
165#define W_MAIN(i) blk2(j, i)
166
167
168#define T1(wx, i) \
169 tmp = h + S1(e) + Ch(e,f,g) + K[(i)+(size_t)(j)] + wx(i); \
170 h = g; \
171 g = f; \
172 f = e; \
173 e = d + tmp; \
174 tmp += S0(a) + Maj(a, b, c); \
175 d = c; \
176 c = b; \
177 b = a; \
178 a = tmp; \
179
180#define R1_PRE(i) T1( W_PRE, i)
181#define R1_MAIN(i) T1( W_MAIN, i)
182
183#if (!defined(Z7_SHA512_UNROLL) || STEP_MAIN < 8) && (STEP_MAIN >= 4)
184#define R2_MAIN(i) \
185 R1_MAIN(i) \
186 R1_MAIN(i + 1) \
187
188#endif
189
190
191
192#if defined(Z7_SHA512_UNROLL) && STEP_MAIN >= 8
193
194#define T4( a,b,c,d,e,f,g,h, wx, i) \
195 h += S1(e) + Ch(e,f,g) + K[(i)+(size_t)(j)] + wx(i); \
196 tmp = h; \
197 h += d; \
198 d = tmp + S0(a) + Maj(a, b, c); \
199
200#define R4( wx, i) \
201 T4 ( a,b,c,d,e,f,g,h, wx, (i )); \
202 T4 ( d,a,b,c,h,e,f,g, wx, (i+1)); \
203 T4 ( c,d,a,b,g,h,e,f, wx, (i+2)); \
204 T4 ( b,c,d,a,f,g,h,e, wx, (i+3)); \
205
206#define R4_PRE(i) R4( W_PRE, i)
207#define R4_MAIN(i) R4( W_MAIN, i)
208
209
210#define T8( a,b,c,d,e,f,g,h, wx, i) \
211 h += S1(e) + Ch(e,f,g) + K[(i)+(size_t)(j)] + wx(i); \
212 d += h; \
213 h += S0(a) + Maj(a, b, c); \
214
215#define R8( wx, i) \
216 T8 ( a,b,c,d,e,f,g,h, wx, i ); \
217 T8 ( h,a,b,c,d,e,f,g, wx, i+1); \
218 T8 ( g,h,a,b,c,d,e,f, wx, i+2); \
219 T8 ( f,g,h,a,b,c,d,e, wx, i+3); \
220 T8 ( e,f,g,h,a,b,c,d, wx, i+4); \
221 T8 ( d,e,f,g,h,a,b,c, wx, i+5); \
222 T8 ( c,d,e,f,g,h,a,b, wx, i+6); \
223 T8 ( b,c,d,e,f,g,h,a, wx, i+7); \
224
225#define R8_PRE(i) R8( W_PRE, i)
226#define R8_MAIN(i) R8( W_MAIN, i)
227
228#endif
229
230
231extern
232MY_ALIGN(64) const UInt64 SHA512_K_ARRAY[80];
233MY_ALIGN(64) const UInt64 SHA512_K_ARRAY[80] = {
234 U64C(0x428a2f98d728ae22), U64C(0x7137449123ef65cd), U64C(0xb5c0fbcfec4d3b2f), U64C(0xe9b5dba58189dbbc),
235 U64C(0x3956c25bf348b538), U64C(0x59f111f1b605d019), U64C(0x923f82a4af194f9b), U64C(0xab1c5ed5da6d8118),
236 U64C(0xd807aa98a3030242), U64C(0x12835b0145706fbe), U64C(0x243185be4ee4b28c), U64C(0x550c7dc3d5ffb4e2),
237 U64C(0x72be5d74f27b896f), U64C(0x80deb1fe3b1696b1), U64C(0x9bdc06a725c71235), U64C(0xc19bf174cf692694),
238 U64C(0xe49b69c19ef14ad2), U64C(0xefbe4786384f25e3), U64C(0x0fc19dc68b8cd5b5), U64C(0x240ca1cc77ac9c65),
239 U64C(0x2de92c6f592b0275), U64C(0x4a7484aa6ea6e483), U64C(0x5cb0a9dcbd41fbd4), U64C(0x76f988da831153b5),
240 U64C(0x983e5152ee66dfab), U64C(0xa831c66d2db43210), U64C(0xb00327c898fb213f), U64C(0xbf597fc7beef0ee4),
241 U64C(0xc6e00bf33da88fc2), U64C(0xd5a79147930aa725), U64C(0x06ca6351e003826f), U64C(0x142929670a0e6e70),
242 U64C(0x27b70a8546d22ffc), U64C(0x2e1b21385c26c926), U64C(0x4d2c6dfc5ac42aed), U64C(0x53380d139d95b3df),
243 U64C(0x650a73548baf63de), U64C(0x766a0abb3c77b2a8), U64C(0x81c2c92e47edaee6), U64C(0x92722c851482353b),
244 U64C(0xa2bfe8a14cf10364), U64C(0xa81a664bbc423001), U64C(0xc24b8b70d0f89791), U64C(0xc76c51a30654be30),
245 U64C(0xd192e819d6ef5218), U64C(0xd69906245565a910), U64C(0xf40e35855771202a), U64C(0x106aa07032bbd1b8),
246 U64C(0x19a4c116b8d2d0c8), U64C(0x1e376c085141ab53), U64C(0x2748774cdf8eeb99), U64C(0x34b0bcb5e19b48a8),
247 U64C(0x391c0cb3c5c95a63), U64C(0x4ed8aa4ae3418acb), U64C(0x5b9cca4f7763e373), U64C(0x682e6ff3d6b2b8a3),
248 U64C(0x748f82ee5defb2fc), U64C(0x78a5636f43172f60), U64C(0x84c87814a1f0ab72), U64C(0x8cc702081a6439ec),
249 U64C(0x90befffa23631e28), U64C(0xa4506cebde82bde9), U64C(0xbef9a3f7b2c67915), U64C(0xc67178f2e372532b),
250 U64C(0xca273eceea26619c), U64C(0xd186b8c721c0c207), U64C(0xeada7dd6cde0eb1e), U64C(0xf57d4f7fee6ed178),
251 U64C(0x06f067aa72176fba), U64C(0x0a637dc5a2c898a6), U64C(0x113f9804bef90dae), U64C(0x1b710b35131c471b),
252 U64C(0x28db77f523047d84), U64C(0x32caab7b40c72493), U64C(0x3c9ebe0a15c9bebc), U64C(0x431d67c49c100d4c),
253 U64C(0x4cc5d4becb3e42b6), U64C(0x597f299cfc657e2a), U64C(0x5fcb6fab3ad6faec), U64C(0x6c44198c4a475817)
254};
255
256#define K SHA512_K_ARRAY
257
258Z7_NO_INLINE
259void Z7_FASTCALL Sha512_UpdateBlocks(UInt64 state[8], const Byte *data, size_t numBlocks)
260{
261 UInt64 W
262#ifdef Z7_SHA512_BIG_W
263 [80];
264#else
265 [16];
266#endif
267 unsigned j;
268 UInt64 a,b,c,d,e,f,g,h;
269#if !defined(Z7_SHA512_UNROLL) || (STEP_MAIN <= 4) || (STEP_PRE <= 4)
270 UInt64 tmp;
271#endif
272
273 if (numBlocks == 0) return;
274
275 a = state[0];
276 b = state[1];
277 c = state[2];
278 d = state[3];
279 e = state[4];
280 f = state[5];
281 g = state[6];
282 h = state[7];
283
284 do
285 {
286
287 for (j = 0; j < 16; j += STEP_PRE)
288 {
289 #if STEP_PRE > 4
290
291 #if STEP_PRE < 8
292 R4_PRE(0);
293 #else
294 R8_PRE(0);
295 #if STEP_PRE == 16
296 R8_PRE(8);
297 #endif
298 #endif
299
300 #else
301
302 R1_PRE(0)
303 #if STEP_PRE >= 2
304 R1_PRE(1)
305 #if STEP_PRE >= 4
306 R1_PRE(2)
307 R1_PRE(3)
308 #endif
309 #endif
310
311 #endif
312 }
313
314 for (j = 16; j < 80; j += STEP_MAIN)
315 {
316 #if defined(Z7_SHA512_UNROLL) && STEP_MAIN >= 8
317
318 #if STEP_MAIN < 8
319 R4_MAIN(0)
320 #else
321 R8_MAIN(0)
322 #if STEP_MAIN == 16
323 R8_MAIN(8)
324 #endif
325 #endif
326
327 #else
328
329 R1_MAIN(0)
330 #if STEP_MAIN >= 2
331 R1_MAIN(1)
332 #if STEP_MAIN >= 4
333 R2_MAIN(2)
334 #if STEP_MAIN >= 8
335 R2_MAIN(4)
336 R2_MAIN(6)
337 #if STEP_MAIN >= 16
338 R2_MAIN(8)
339 R2_MAIN(10)
340 R2_MAIN(12)
341 R2_MAIN(14)
342 #endif
343 #endif
344 #endif
345 #endif
346 #endif
347 }
348
349 a += state[0]; state[0] = a;
350 b += state[1]; state[1] = b;
351 c += state[2]; state[2] = c;
352 d += state[3]; state[3] = d;
353 e += state[4]; state[4] = e;
354 f += state[5]; state[5] = f;
355 g += state[6]; state[6] = g;
356 h += state[7]; state[7] = h;
357
358 data += SHA512_BLOCK_SIZE;
359 }
360 while (--numBlocks);
361}
362
363
364#define Sha512_UpdateBlock(p) SHA512_UPDATE_BLOCKS(p)(p->state, p->buffer, 1)
365
366void Sha512_Update(CSha512 *p, const Byte *data, size_t size)
367{
368 if (size == 0)
369 return;
370 {
371 const unsigned pos = (unsigned)p->v.vars.count & (SHA512_BLOCK_SIZE - 1);
372 const unsigned num = SHA512_BLOCK_SIZE - pos;
373 p->v.vars.count += size;
374 if (num > size)
375 {
376 memcpy(p->buffer + pos, data, size);
377 return;
378 }
379 if (pos != 0)
380 {
381 size -= num;
382 memcpy(p->buffer + pos, data, num);
383 data += num;
384 Sha512_UpdateBlock(p);
385 }
386 }
387 {
388 const size_t numBlocks = size >> 7;
389 // if (numBlocks)
390 SHA512_UPDATE_BLOCKS(p)(p->state, data, numBlocks);
391 size &= SHA512_BLOCK_SIZE - 1;
392 if (size == 0)
393 return;
394 data += (numBlocks << 7);
395 memcpy(p->buffer, data, size);
396 }
397}
398
399
400void Sha512_Final(CSha512 *p, Byte *digest, unsigned digestSize)
401{
402 unsigned pos = (unsigned)p->v.vars.count & (SHA512_BLOCK_SIZE - 1);
403 p->buffer[pos++] = 0x80;
404 if (pos > (SHA512_BLOCK_SIZE - 8 * 2))
405 {
406 while (pos != SHA512_BLOCK_SIZE) { p->buffer[pos++] = 0; }
407 // memset(&p->buf.buffer[pos], 0, SHA512_BLOCK_SIZE - pos);
408 Sha512_UpdateBlock(p);
409 pos = 0;
410 }
411 memset(&p->buffer[pos], 0, (SHA512_BLOCK_SIZE - 8 * 2) - pos);
412 {
413 const UInt64 numBits = p->v.vars.count << 3;
414 SetBe64(p->buffer + SHA512_BLOCK_SIZE - 8 * 2, 0) // = (p->v.vars.count >> (64 - 3)); (high 64-bits)
415 SetBe64(p->buffer + SHA512_BLOCK_SIZE - 8 * 1, numBits)
416 }
417 Sha512_UpdateBlock(p);
418#if 1 && defined(MY_CPU_BE)
419 memcpy(digest, p->state, digestSize);
420#else
421 {
422 const unsigned numWords = digestSize >> 3;
423 unsigned i;
424 for (i = 0; i < numWords; i++)
425 {
426 const UInt64 v = p->state[i];
427 SetBe64(digest, v)
428 digest += 8;
429 }
430 if (digestSize & 4) // digestSize == SHA512_224_DIGEST_SIZE
431 {
432 const UInt32 v = (UInt32)((p->state[numWords]) >> 32);
433 SetBe32(digest, v)
434 }
435 }
436#endif
437 Sha512_InitState(p, digestSize);
438}
439
440
441
442// #define Z7_SHA512_PROBE_DEBUG // for debug
443
444#if defined(Z7_SHA512_PROBE_DEBUG) || defined(Z7_COMPILER_SHA512_SUPPORTED)
445
446#if defined(Z7_SHA512_PROBE_DEBUG) \
447 || defined(_WIN32) && defined(MY_CPU_ARM64)
448#ifndef Z7_SHA512_USE_PROBE
449#define Z7_SHA512_USE_PROBE
450#endif
451#endif
452
453#ifdef Z7_SHA512_USE_PROBE
454
455#ifdef Z7_SHA512_PROBE_DEBUG
456#include <stdio.h>
457#define PRF(x) x
458#else
459#define PRF(x)
460#endif
461
462#if 0 || !defined(_MSC_VER) // 1 || : for debug LONGJMP mode
463// MINGW doesn't support __try. So we use signal() / longjmp().
464// Note: signal() / longjmp() probably is not thread-safe.
465// So we must call Sha512Prepare() from main thread at program start.
466#ifndef Z7_SHA512_USE_LONGJMP
467#define Z7_SHA512_USE_LONGJMP
468#endif
469#endif
470
471#ifdef Z7_SHA512_USE_LONGJMP
472#include <signal.h>
473#include <setjmp.h>
474static jmp_buf g_Sha512_jmp_buf;
475// static int g_Sha512_Unsupported;
476
477#if defined(__GNUC__) && (__GNUC__ >= 8) \
478 || defined(__clang__) && (__clang_major__ >= 3)
479 __attribute__((noreturn))
480#endif
481static void Z7_CDECL Sha512_signal_Handler(int v)
482{
483 PRF(printf("======== Sha512_signal_Handler = %x\n", (unsigned)v);)
484 // g_Sha512_Unsupported = 1;
485 longjmp(g_Sha512_jmp_buf, 1);
486}
487#endif // Z7_SHA512_USE_LONGJMP
488
489
490#if defined(_WIN32)
491#include "7zWindows.h"
492#endif
493
494#if defined(MY_CPU_ARM64)
495// #define Z7_SHA512_USE_SIMPLIFIED_PROBE // for debug
496#endif
497
498#ifdef Z7_SHA512_USE_SIMPLIFIED_PROBE
499#include <arm_neon.h>
500#if defined(__clang__)
501 __attribute__((__target__("sha3")))
502#elif !defined(_MSC_VER)
503 __attribute__((__target__("arch=armv8.2-a+sha3")))
504#endif
505#endif
506static BoolInt CPU_IsSupported_SHA512_Probe(void)
507{
508 PRF(printf("\n== CPU_IsSupported_SHA512_Probe\n");)
509#if defined(_WIN32) && defined(MY_CPU_ARM64)
510 // we have no SHA512 flag for IsProcessorFeaturePresent() still.
511 if (!CPU_IsSupported_CRYPTO())
512 return False;
513 PRF(printf("==== Registry check\n");)
514 {
515 // we can't read ID_AA64ISAR0_EL1 register from application.
516 // but ID_AA64ISAR0_EL1 register is mapped to "CP 4030" registry value.
517 HKEY key = NULL;
518 LONG res = RegOpenKeyEx(HKEY_LOCAL_MACHINE,
519 TEXT("HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0"),
520 0, KEY_READ, &key);
521 if (res != ERROR_SUCCESS)
522 return False;
523 {
524 DWORD type = 0;
525 DWORD count = sizeof(UInt64);
526 UInt64 val = 0;
527 res = RegQueryValueEx(key, TEXT("CP 4030"), NULL,
528 &type, (LPBYTE)&val, &count);
529 RegCloseKey(key);
530 if (res != ERROR_SUCCESS
531 || type != REG_QWORD
532 || count != sizeof(UInt64)
533 || ((unsigned)(val >> 12) & 0xf) != 2)
534 return False;
535 // we parse SHA2 field of ID_AA64ISAR0_EL1 register:
536 // 0 : No SHA2 instructions implemented
537 // 1 : SHA256 implemented
538 // 2 : SHA256 and SHA512 implemented
539 }
540 }
541#endif // defined(_WIN32) && defined(MY_CPU_ARM64)
542
543
544#if 1 // 0 for debug to disable SHA512 PROBE code
545
546/*
547----- SHA512 PROBE -----
548
549We suppose that "CP 4030" registry reading is enough.
550But we use additional SHA512 PROBE code, because
551we can catch exception here, and we don't catch exceptions,
552if we call Sha512 functions from main code.
553
554NOTE: arm64 PROBE code doesn't work, if we call it via Wine in linux-arm64.
555The program just stops.
556Also x64 version of PROBE code doesn't work, if we run it via Intel SDE emulator
557without SHA512 support (-skl switch),
558The program stops, and we have message from SDE:
559 TID 0 SDE-ERROR: Executed instruction not valid for specified chip (SKYLAKE): vsha512msg1
560But we still want to catch that exception instead of process stopping.
561Does this PROBE code work in native Windows-arm64 (with/without sha512 hw instructions)?
562Are there any ways to fix the problems with arm64-wine and x64-SDE cases?
563*/
564
565 PRF(printf("==== CPU_IsSupported_SHA512 PROBE\n");)
566 {
567 BoolInt isSupported = False;
568#ifdef Z7_SHA512_USE_LONGJMP
569 void (Z7_CDECL *signal_prev)(int);
570 /*
571 if (g_Sha512_Unsupported)
572 {
573 PRF(printf("==== g_Sha512_Unsupported\n");)
574 return False;
575 }
576 */
577 printf("====== signal(SIGILL)\n");
578 signal_prev = signal(SIGILL, Sha512_signal_Handler);
579 if (signal_prev == SIG_ERR)
580 {
581 PRF(printf("====== signal fail\n");)
582 return False;
583 }
584 // PRF(printf("==== signal_prev = %p\n", (void *)signal_prev);)
585 // docs: Before the specified function is executed,
586 // the value of func is set to SIG_DFL.
587 // So we can exit if (setjmp(g_Sha512_jmp_buf) != 0).
588 PRF(printf("====== setjmp\n");)
589 if (!setjmp(g_Sha512_jmp_buf))
590#else // Z7_SHA512_USE_LONGJMP
591
592#ifdef _MSC_VER
593#ifdef __clang_major__
594 #pragma GCC diagnostic ignored "-Wlanguage-extension-token"
595#endif
596 __try
597#endif
598#endif // Z7_SHA512_USE_LONGJMP
599
600 {
601#if defined(Z7_COMPILER_SHA512_SUPPORTED)
602#ifdef Z7_SHA512_USE_SIMPLIFIED_PROBE
603 // simplified sha512 check for arm64:
604 const uint64x2_t a = vdupq_n_u64(1);
605 const uint64x2_t b = vsha512hq_u64(a, a, a);
606 PRF(printf("======== vsha512hq_u64 probe\n");)
607 if ((UInt32)vgetq_lane_u64(b, 0) == 0x11800002)
608#else
609 MY_ALIGN(16)
610 UInt64 temp[SHA512_NUM_DIGEST_WORDS + SHA512_NUM_BLOCK_WORDS];
611 memset(temp, 0x5a, sizeof(temp));
612 PRF(printf("======== Sha512_UpdateBlocks_HW\n");)
613 Sha512_UpdateBlocks_HW(temp,
614 (const Byte *)(const void *)(temp + SHA512_NUM_DIGEST_WORDS), 1);
615 // PRF(printf("======== t = %x\n", (UInt32)temp[0]);)
616 if ((UInt32)temp[0] == 0xa33cfdf7)
617#endif
618 {
619 PRF(printf("======== PROBE SHA512: SHA512 is supported\n");)
620 isSupported = True;
621 }
622#else // Z7_COMPILER_SHA512_SUPPORTED
623 // for debug : we generate bad instrction or raise exception.
624 // __except() doesn't catch raise() calls.
625#ifdef Z7_SHA512_USE_LONGJMP
626 PRF(printf("====== raise(SIGILL)\n");)
627 raise(SIGILL);
628#else
629#if defined(_MSC_VER) && defined(MY_CPU_X86)
630 __asm ud2
631#endif
632#endif // Z7_SHA512_USE_LONGJMP
633#endif // Z7_COMPILER_SHA512_SUPPORTED
634 }
635
636#ifdef Z7_SHA512_USE_LONGJMP
637 PRF(printf("====== restore signal SIGILL\n");)
638 signal(SIGILL, signal_prev);
639#elif _MSC_VER
640 __except (EXCEPTION_EXECUTE_HANDLER)
641 {
642 PRF(printf("==== CPU_IsSupported_SHA512 __except(EXCEPTION_EXECUTE_HANDLER)\n");)
643 }
644#endif
645 PRF(printf("== return (sha512 supported) = %d\n", isSupported);)
646 return isSupported;
647 }
648#else
649 // without SHA512 PROBE code
650 return True;
651#endif
652}
653
654#endif // Z7_SHA512_USE_PROBE
655#endif // defined(Z7_SHA512_PROBE_DEBUG) || defined(Z7_COMPILER_SHA512_SUPPORTED)
656
657
658void Sha512Prepare(void)
659{
660#ifdef Z7_COMPILER_SHA512_SUPPORTED
661 SHA512_FUNC_UPDATE_BLOCKS f, f_hw;
662 f = Sha512_UpdateBlocks;
663 f_hw = NULL;
664#ifdef Z7_SHA512_USE_PROBE
665 if (CPU_IsSupported_SHA512_Probe())
666#elif defined(MY_CPU_X86_OR_AMD64)
667 if (CPU_IsSupported_SHA512() && CPU_IsSupported_AVX2())
668#else
669 if (CPU_IsSupported_SHA512())
670#endif
671 {
672 // printf("\n========== HW SHA512 ======== \n");
673 f = f_hw = Sha512_UpdateBlocks_HW;
674 }
675 g_SHA512_FUNC_UPDATE_BLOCKS = f;
676 g_SHA512_FUNC_UPDATE_BLOCKS_HW = f_hw;
677#elif defined(Z7_SHA512_PROBE_DEBUG)
678 CPU_IsSupported_SHA512_Probe(); // for debug
679#endif
680}
681
682
683#undef K
684#undef S0
685#undef S1
686#undef s0
687#undef s1
688#undef Ch
689#undef Maj
690#undef W_MAIN
691#undef W_PRE
692#undef w
693#undef blk2_main
694#undef blk2
695#undef T1
696#undef T4
697#undef T8
698#undef R1_PRE
699#undef R1_MAIN
700#undef R2_MAIN
701#undef R4
702#undef R4_PRE
703#undef R4_MAIN
704#undef R8
705#undef R8_PRE
706#undef R8_MAIN
707#undef STEP_PRE
708#undef STEP_MAIN
709#undef Z7_SHA512_BIG_W
710#undef Z7_SHA512_UNROLL
711#undef Z7_COMPILER_SHA512_SUPPORTED