From fc662341e6f85da78ada0e443f6116b978f79f22 Mon Sep 17 00:00:00 2001 From: Igor Pavlov <87184205+ip7z@users.noreply.github.com> Date: Tue, 14 May 2024 00:00:00 +0000 Subject: 24.05 --- C/Sha256Opt.c | 127 +++++++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 104 insertions(+), 23 deletions(-) (limited to 'C/Sha256Opt.c') diff --git a/C/Sha256Opt.c b/C/Sha256Opt.c index e4465e3..eb38166 100644 --- a/C/Sha256Opt.c +++ b/C/Sha256Opt.c @@ -1,5 +1,5 @@ /* Sha256Opt.c -- SHA-256 optimized code for SHA-256 hardware instructions -2023-04-02 : Igor Pavlov : Public domain */ +2024-03-01 : Igor Pavlov : Public domain */ #include "Precomp.h" #include "Compiler.h" @@ -11,6 +11,8 @@ #endif #endif +// #define Z7_USE_HW_SHA_STUB // for debug + #ifdef MY_CPU_X86_OR_AMD64 #if defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1600) // fix that check #define USE_HW_SHA @@ -32,9 +34,14 @@ #endif #if (_MSC_VER >= USE_VER_MIN) #define USE_HW_SHA + #else + #define Z7_USE_HW_SHA_STUB #endif #endif // #endif // MY_CPU_X86_OR_AMD64 +#ifndef USE_HW_SHA + // #define Z7_USE_HW_SHA_STUB // for debug +#endif #ifdef USE_HW_SHA @@ -202,19 +209,28 @@ void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_ #endif // USE_HW_SHA -#elif defined(MY_CPU_ARM_OR_ARM64) - - #if defined(__clang__) - #if (__clang_major__ >= 8) // fix that check +#elif defined(MY_CPU_ARM_OR_ARM64) && defined(MY_CPU_LE) + + #if defined(__ARM_FEATURE_SHA2) \ + || defined(__ARM_FEATURE_CRYPTO) + #define USE_HW_SHA + #else + #if defined(MY_CPU_ARM64) \ + || defined(__ARM_ARCH) && (__ARM_ARCH >= 4) \ + || defined(Z7_MSC_VER_ORIGINAL) + #if defined(__ARM_FP) && \ + ( defined(Z7_CLANG_VERSION) && (Z7_CLANG_VERSION >= 30800) \ + || defined(__GNUC__) && (__GNUC__ >= 6) \ + ) \ + || defined(Z7_MSC_VER_ORIGINAL) && (_MSC_VER >= 1910) + #if defined(MY_CPU_ARM64) \ + || !defined(Z7_CLANG_VERSION) \ + || defined(__ARM_NEON) && \ + (Z7_CLANG_VERSION < 170000 || \ + Z7_CLANG_VERSION > 170001) #define USE_HW_SHA #endif - #elif defined(__GNUC__) - #if (__GNUC__ >= 6) // fix that check - #define USE_HW_SHA #endif - #elif defined(_MSC_VER) - #if _MSC_VER >= 1910 - #define USE_HW_SHA #endif #endif @@ -222,24 +238,88 @@ void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_ // #pragma message("=== Sha256 HW === ") + #if defined(__clang__) || defined(__GNUC__) +#if !defined(__ARM_FEATURE_SHA2) && \ + !defined(__ARM_FEATURE_CRYPTO) #ifdef MY_CPU_ARM64 +#if defined(__clang__) + #define ATTRIB_SHA __attribute__((__target__("crypto"))) +#else #define ATTRIB_SHA __attribute__((__target__("+crypto"))) +#endif #else +#if defined(__clang__) && (__clang_major__ >= 1) + #define ATTRIB_SHA __attribute__((__target__("armv8-a,sha2"))) +#else #define ATTRIB_SHA __attribute__((__target__("fpu=crypto-neon-fp-armv8"))) +#endif #endif +#endif #else // _MSC_VER // for arm32 #define _ARM_USE_NEW_NEON_INTRINSICS #endif -#if defined(_MSC_VER) && defined(MY_CPU_ARM64) + + + + +#if defined(Z7_MSC_VER_ORIGINAL) && defined(MY_CPU_ARM64) #include <arm64_neon.h> #else + + + + + + + + + +#if defined(__clang__) && __clang_major__ < 16 +#if !defined(__ARM_FEATURE_SHA2) && \ + !defined(__ARM_FEATURE_CRYPTO) +// #pragma message("=== we set __ARM_FEATURE_CRYPTO 1 === ") + Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER + #define Z7_ARM_FEATURE_CRYPTO_WAS_SET 1 +// #if defined(__clang__) && __clang_major__ < 13 + #define __ARM_FEATURE_CRYPTO 1 +// #else + #define __ARM_FEATURE_SHA2 1 +// #endif + Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER +#endif +#endif // clang + +#if defined(__clang__) + +#if defined(__ARM_ARCH) && __ARM_ARCH < 8 + Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER +// #pragma message("#define __ARM_ARCH 8") + #undef __ARM_ARCH + #define __ARM_ARCH 8 + Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER +#endif + +#endif // clang + #include <arm_neon.h> + +#if defined(Z7_ARM_FEATURE_CRYPTO_WAS_SET) && \ + defined(__ARM_FEATURE_CRYPTO) && \ + defined(__ARM_FEATURE_SHA2) +Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER + #undef __ARM_FEATURE_CRYPTO + #undef __ARM_FEATURE_SHA2 + #undef Z7_ARM_FEATURE_CRYPTO_WAS_SET +Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER +// #pragma message("=== we undefine __ARM_FEATURE_CRYPTO === ") #endif +#endif // Z7_MSC_VER_ORIGINAL + typedef uint32x4_t v128; // typedef __n128 v128; // MSVC @@ -316,10 +396,10 @@ void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_ LOAD_SHUFFLE (m2, 2) LOAD_SHUFFLE (m3, 3) - R16 ( 0, NNN, NNN, SM1, NNN, SM1, SM2, SM1, SM2 ); - R16 ( 1, SM1, SM2, SM1, SM2, SM1, SM2, SM1, SM2 ); - R16 ( 2, SM1, SM2, SM1, SM2, SM1, SM2, SM1, SM2 ); - R16 ( 3, SM1, SM2, NNN, SM2, NNN, NNN, NNN, NNN ); + R16 ( 0, NNN, NNN, SM1, NNN, SM1, SM2, SM1, SM2 ) + R16 ( 1, SM1, SM2, SM1, SM2, SM1, SM2, SM1, SM2 ) + R16 ( 2, SM1, SM2, SM1, SM2, SM1, SM2, SM1, SM2 ) + R16 ( 3, SM1, SM2, NNN, SM2, NNN, NNN, NNN, NNN ) state0 = vaddq_u32(state0, state0_save); state1 = vaddq_u32(state1, state1_save); @@ -337,16 +417,17 @@ void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_ #endif // MY_CPU_ARM_OR_ARM64 -#ifndef USE_HW_SHA - +#if !defined(USE_HW_SHA) && defined(Z7_USE_HW_SHA_STUB) // #error Stop_Compiling_UNSUPPORTED_SHA // #include <stdlib.h> - +// We can compile this file with another C compiler, +// or we can compile asm version. +// So we can generate real code instead of this stub function. // #include "Sha256.h" -void Z7_FASTCALL Sha256_UpdateBlocks(UInt32 state[8], const Byte *data, size_t numBlocks); - +// #if defined(_MSC_VER) #pragma message("Sha256 HW-SW stub was used") - +// #endif +void Z7_FASTCALL Sha256_UpdateBlocks (UInt32 state[8], const Byte *data, size_t numBlocks); void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks); void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks) { @@ -359,7 +440,6 @@ void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_ return; */ } - #endif @@ -384,3 +464,4 @@ void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_ #undef USE_HW_SHA #undef ATTRIB_SHA #undef USE_VER_MIN +#undef Z7_USE_HW_SHA_STUB -- cgit v1.2.3-55-g6feb