diff options
Diffstat (limited to 'C/Sha256Opt.c')
-rw-r--r-- | C/Sha256Opt.c | 127 |
1 files changed, 104 insertions, 23 deletions
diff --git a/C/Sha256Opt.c b/C/Sha256Opt.c index e4465e3..eb38166 100644 --- a/C/Sha256Opt.c +++ b/C/Sha256Opt.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* Sha256Opt.c -- SHA-256 optimized code for SHA-256 hardware instructions | 1 | /* Sha256Opt.c -- SHA-256 optimized code for SHA-256 hardware instructions |
2 | 2023-04-02 : Igor Pavlov : Public domain */ | 2 | 2024-03-01 : Igor Pavlov : Public domain */ |
3 | 3 | ||
4 | #include "Precomp.h" | 4 | #include "Precomp.h" |
5 | #include "Compiler.h" | 5 | #include "Compiler.h" |
@@ -11,6 +11,8 @@ | |||
11 | #endif | 11 | #endif |
12 | #endif | 12 | #endif |
13 | 13 | ||
14 | // #define Z7_USE_HW_SHA_STUB // for debug | ||
15 | |||
14 | #ifdef MY_CPU_X86_OR_AMD64 | 16 | #ifdef MY_CPU_X86_OR_AMD64 |
15 | #if defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1600) // fix that check | 17 | #if defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1600) // fix that check |
16 | #define USE_HW_SHA | 18 | #define USE_HW_SHA |
@@ -32,9 +34,14 @@ | |||
32 | #endif | 34 | #endif |
33 | #if (_MSC_VER >= USE_VER_MIN) | 35 | #if (_MSC_VER >= USE_VER_MIN) |
34 | #define USE_HW_SHA | 36 | #define USE_HW_SHA |
37 | #else | ||
38 | #define Z7_USE_HW_SHA_STUB | ||
35 | #endif | 39 | #endif |
36 | #endif | 40 | #endif |
37 | // #endif // MY_CPU_X86_OR_AMD64 | 41 | // #endif // MY_CPU_X86_OR_AMD64 |
42 | #ifndef USE_HW_SHA | ||
43 | // #define Z7_USE_HW_SHA_STUB // for debug | ||
44 | #endif | ||
38 | 45 | ||
39 | #ifdef USE_HW_SHA | 46 | #ifdef USE_HW_SHA |
40 | 47 | ||
@@ -202,19 +209,28 @@ void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_ | |||
202 | 209 | ||
203 | #endif // USE_HW_SHA | 210 | #endif // USE_HW_SHA |
204 | 211 | ||
205 | #elif defined(MY_CPU_ARM_OR_ARM64) | 212 | #elif defined(MY_CPU_ARM_OR_ARM64) && defined(MY_CPU_LE) |
206 | 213 | ||
207 | #if defined(__clang__) | 214 | #if defined(__ARM_FEATURE_SHA2) \ |
208 | #if (__clang_major__ >= 8) // fix that check | 215 | || defined(__ARM_FEATURE_CRYPTO) |
216 | #define USE_HW_SHA | ||
217 | #else | ||
218 | #if defined(MY_CPU_ARM64) \ | ||
219 | || defined(__ARM_ARCH) && (__ARM_ARCH >= 4) \ | ||
220 | || defined(Z7_MSC_VER_ORIGINAL) | ||
221 | #if defined(__ARM_FP) && \ | ||
222 | ( defined(Z7_CLANG_VERSION) && (Z7_CLANG_VERSION >= 30800) \ | ||
223 | || defined(__GNUC__) && (__GNUC__ >= 6) \ | ||
224 | ) \ | ||
225 | || defined(Z7_MSC_VER_ORIGINAL) && (_MSC_VER >= 1910) | ||
226 | #if defined(MY_CPU_ARM64) \ | ||
227 | || !defined(Z7_CLANG_VERSION) \ | ||
228 | || defined(__ARM_NEON) && \ | ||
229 | (Z7_CLANG_VERSION < 170000 || \ | ||
230 | Z7_CLANG_VERSION > 170001) | ||
209 | #define USE_HW_SHA | 231 | #define USE_HW_SHA |
210 | #endif | 232 | #endif |
211 | #elif defined(__GNUC__) | ||
212 | #if (__GNUC__ >= 6) // fix that check | ||
213 | #define USE_HW_SHA | ||
214 | #endif | 233 | #endif |
215 | #elif defined(_MSC_VER) | ||
216 | #if _MSC_VER >= 1910 | ||
217 | #define USE_HW_SHA | ||
218 | #endif | 234 | #endif |
219 | #endif | 235 | #endif |
220 | 236 | ||
@@ -222,24 +238,88 @@ void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_ | |||
222 | 238 | ||
223 | // #pragma message("=== Sha256 HW === ") | 239 | // #pragma message("=== Sha256 HW === ") |
224 | 240 | ||
241 | |||
225 | #if defined(__clang__) || defined(__GNUC__) | 242 | #if defined(__clang__) || defined(__GNUC__) |
243 | #if !defined(__ARM_FEATURE_SHA2) && \ | ||
244 | !defined(__ARM_FEATURE_CRYPTO) | ||
226 | #ifdef MY_CPU_ARM64 | 245 | #ifdef MY_CPU_ARM64 |
246 | #if defined(__clang__) | ||
247 | #define ATTRIB_SHA __attribute__((__target__("crypto"))) | ||
248 | #else | ||
227 | #define ATTRIB_SHA __attribute__((__target__("+crypto"))) | 249 | #define ATTRIB_SHA __attribute__((__target__("+crypto"))) |
250 | #endif | ||
228 | #else | 251 | #else |
252 | #if defined(__clang__) && (__clang_major__ >= 1) | ||
253 | #define ATTRIB_SHA __attribute__((__target__("armv8-a,sha2"))) | ||
254 | #else | ||
229 | #define ATTRIB_SHA __attribute__((__target__("fpu=crypto-neon-fp-armv8"))) | 255 | #define ATTRIB_SHA __attribute__((__target__("fpu=crypto-neon-fp-armv8"))) |
256 | #endif | ||
230 | #endif | 257 | #endif |
258 | #endif | ||
231 | #else | 259 | #else |
232 | // _MSC_VER | 260 | // _MSC_VER |
233 | // for arm32 | 261 | // for arm32 |
234 | #define _ARM_USE_NEW_NEON_INTRINSICS | 262 | #define _ARM_USE_NEW_NEON_INTRINSICS |
235 | #endif | 263 | #endif |
236 | 264 | ||
237 | #if defined(_MSC_VER) && defined(MY_CPU_ARM64) | 265 | |
266 | |||
267 | |||
268 | |||
269 | #if defined(Z7_MSC_VER_ORIGINAL) && defined(MY_CPU_ARM64) | ||
238 | #include <arm64_neon.h> | 270 | #include <arm64_neon.h> |
239 | #else | 271 | #else |
272 | |||
273 | |||
274 | |||
275 | |||
276 | |||
277 | |||
278 | |||
279 | |||
280 | |||
281 | #if defined(__clang__) && __clang_major__ < 16 | ||
282 | #if !defined(__ARM_FEATURE_SHA2) && \ | ||
283 | !defined(__ARM_FEATURE_CRYPTO) | ||
284 | // #pragma message("=== we set __ARM_FEATURE_CRYPTO 1 === ") | ||
285 | Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER | ||
286 | #define Z7_ARM_FEATURE_CRYPTO_WAS_SET 1 | ||
287 | // #if defined(__clang__) && __clang_major__ < 13 | ||
288 | #define __ARM_FEATURE_CRYPTO 1 | ||
289 | // #else | ||
290 | #define __ARM_FEATURE_SHA2 1 | ||
291 | // #endif | ||
292 | Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER | ||
293 | #endif | ||
294 | #endif // clang | ||
295 | |||
296 | #if defined(__clang__) | ||
297 | |||
298 | #if defined(__ARM_ARCH) && __ARM_ARCH < 8 | ||
299 | Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER | ||
300 | // #pragma message("#define __ARM_ARCH 8") | ||
301 | #undef __ARM_ARCH | ||
302 | #define __ARM_ARCH 8 | ||
303 | Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER | ||
304 | #endif | ||
305 | |||
306 | #endif // clang | ||
307 | |||
240 | #include <arm_neon.h> | 308 | #include <arm_neon.h> |
309 | |||
310 | #if defined(Z7_ARM_FEATURE_CRYPTO_WAS_SET) && \ | ||
311 | defined(__ARM_FEATURE_CRYPTO) && \ | ||
312 | defined(__ARM_FEATURE_SHA2) | ||
313 | Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER | ||
314 | #undef __ARM_FEATURE_CRYPTO | ||
315 | #undef __ARM_FEATURE_SHA2 | ||
316 | #undef Z7_ARM_FEATURE_CRYPTO_WAS_SET | ||
317 | Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER | ||
318 | // #pragma message("=== we undefine __ARM_FEATURE_CRYPTO === ") | ||
241 | #endif | 319 | #endif |
242 | 320 | ||
321 | #endif // Z7_MSC_VER_ORIGINAL | ||
322 | |||
243 | typedef uint32x4_t v128; | 323 | typedef uint32x4_t v128; |
244 | // typedef __n128 v128; // MSVC | 324 | // typedef __n128 v128; // MSVC |
245 | 325 | ||
@@ -316,10 +396,10 @@ void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_ | |||
316 | LOAD_SHUFFLE (m2, 2) | 396 | LOAD_SHUFFLE (m2, 2) |
317 | LOAD_SHUFFLE (m3, 3) | 397 | LOAD_SHUFFLE (m3, 3) |
318 | 398 | ||
319 | R16 ( 0, NNN, NNN, SM1, NNN, SM1, SM2, SM1, SM2 ); | 399 | R16 ( 0, NNN, NNN, SM1, NNN, SM1, SM2, SM1, SM2 ) |
320 | R16 ( 1, SM1, SM2, SM1, SM2, SM1, SM2, SM1, SM2 ); | 400 | R16 ( 1, SM1, SM2, SM1, SM2, SM1, SM2, SM1, SM2 ) |
321 | R16 ( 2, SM1, SM2, SM1, SM2, SM1, SM2, SM1, SM2 ); | 401 | R16 ( 2, SM1, SM2, SM1, SM2, SM1, SM2, SM1, SM2 ) |
322 | R16 ( 3, SM1, SM2, NNN, SM2, NNN, NNN, NNN, NNN ); | 402 | R16 ( 3, SM1, SM2, NNN, SM2, NNN, NNN, NNN, NNN ) |
323 | 403 | ||
324 | state0 = vaddq_u32(state0, state0_save); | 404 | state0 = vaddq_u32(state0, state0_save); |
325 | state1 = vaddq_u32(state1, state1_save); | 405 | state1 = vaddq_u32(state1, state1_save); |
@@ -337,16 +417,17 @@ void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_ | |||
337 | #endif // MY_CPU_ARM_OR_ARM64 | 417 | #endif // MY_CPU_ARM_OR_ARM64 |
338 | 418 | ||
339 | 419 | ||
340 | #ifndef USE_HW_SHA | 420 | #if !defined(USE_HW_SHA) && defined(Z7_USE_HW_SHA_STUB) |
341 | |||
342 | // #error Stop_Compiling_UNSUPPORTED_SHA | 421 | // #error Stop_Compiling_UNSUPPORTED_SHA |
343 | // #include <stdlib.h> | 422 | // #include <stdlib.h> |
344 | 423 | // We can compile this file with another C compiler, | |
424 | // or we can compile asm version. | ||
425 | // So we can generate real code instead of this stub function. | ||
345 | // #include "Sha256.h" | 426 | // #include "Sha256.h" |
346 | void Z7_FASTCALL Sha256_UpdateBlocks(UInt32 state[8], const Byte *data, size_t numBlocks); | 427 | // #if defined(_MSC_VER) |
347 | |||
348 | #pragma message("Sha256 HW-SW stub was used") | 428 | #pragma message("Sha256 HW-SW stub was used") |
349 | 429 | // #endif | |
430 | void Z7_FASTCALL Sha256_UpdateBlocks (UInt32 state[8], const Byte *data, size_t numBlocks); | ||
350 | void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks); | 431 | void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks); |
351 | void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks) | 432 | void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks) |
352 | { | 433 | { |
@@ -359,7 +440,6 @@ void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_ | |||
359 | return; | 440 | return; |
360 | */ | 441 | */ |
361 | } | 442 | } |
362 | |||
363 | #endif | 443 | #endif |
364 | 444 | ||
365 | 445 | ||
@@ -384,3 +464,4 @@ void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_ | |||
384 | #undef USE_HW_SHA | 464 | #undef USE_HW_SHA |
385 | #undef ATTRIB_SHA | 465 | #undef ATTRIB_SHA |
386 | #undef USE_VER_MIN | 466 | #undef USE_VER_MIN |
467 | #undef Z7_USE_HW_SHA_STUB | ||