From 5b39dc76f1bc82f941d5c800ab9f34407a06b53a Mon Sep 17 00:00:00 2001 From: Igor Pavlov <87184205+ip7z@users.noreply.github.com> Date: Wed, 21 Jun 2023 00:00:00 +0000 Subject: 23.01 --- C/Sha256Opt.c | 129 ++++++++++++++++++++++++++++++++-------------------------- 1 file changed, 71 insertions(+), 58 deletions(-) (limited to 'C/Sha256Opt.c') diff --git a/C/Sha256Opt.c b/C/Sha256Opt.c index decc138..e4465e3 100644 --- a/C/Sha256Opt.c +++ b/C/Sha256Opt.c @@ -1,7 +1,9 @@ /* Sha256Opt.c -- SHA-256 optimized code for SHA-256 hardware instructions -2021-04-01 : Igor Pavlov : Public domain */ +2023-04-02 : Igor Pavlov : Public domain */ #include "Precomp.h" +#include "Compiler.h" +#include "CpuArch.h" #if defined(_MSC_VER) #if (_MSC_VER < 1900) && (_MSC_VER >= 1200) @@ -9,41 +11,26 @@ #endif #endif -#include "CpuArch.h" - #ifdef MY_CPU_X86_OR_AMD64 - #if defined(__clang__) - #if (__clang_major__ >= 8) // fix that check + #if defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1600) // fix that check #define USE_HW_SHA - #ifndef __SHA__ - #define ATTRIB_SHA __attribute__((__target__("sha,ssse3"))) - #if defined(_MSC_VER) - // SSSE3: for clang-cl: - #include - #define __SHA__ - #endif - #endif - - #endif - #elif defined(__GNUC__) - #if (__GNUC__ >= 8) // fix that check + #elif defined(Z7_LLVM_CLANG_VERSION) && (Z7_LLVM_CLANG_VERSION >= 30800) \ + || defined(Z7_APPLE_CLANG_VERSION) && (Z7_APPLE_CLANG_VERSION >= 50100) \ + || defined(Z7_GCC_VERSION) && (Z7_GCC_VERSION >= 40900) #define USE_HW_SHA - #ifndef __SHA__ + #if !defined(_INTEL_COMPILER) + // icc defines __GNUC__, but icc doesn't support __attribute__(__target__) + #if !defined(__SHA__) || !defined(__SSSE3__) #define ATTRIB_SHA __attribute__((__target__("sha,ssse3"))) - // #pragma GCC target("sha,ssse3") #endif - #endif - #elif defined(__INTEL_COMPILER) - #if (__INTEL_COMPILER >= 1800) // fix that check - #define USE_HW_SHA - #endif + #endif #elif defined(_MSC_VER) #ifdef USE_MY_MM #define USE_VER_MIN 1300 #else - #define USE_VER_MIN 1910 + #define USE_VER_MIN 1900 #endif - #if _MSC_VER >= USE_VER_MIN + #if (_MSC_VER >= USE_VER_MIN) #define USE_HW_SHA #endif #endif @@ -52,16 +39,19 @@ #ifdef USE_HW_SHA // #pragma message("Sha256 HW") -// #include -#if !defined(_MSC_VER) || (_MSC_VER >= 1900) +// sse/sse2/ssse3: +#include +// sha*: #include -#else -#include -#if defined(_MSC_VER) && (_MSC_VER >= 1600) -// #include -#endif +#if defined (__clang__) && defined(_MSC_VER) + // #if !defined(__SSSE3__) + // #endif + #if !defined(__SHA__) + #include + #endif +#else #ifdef USE_MY_MM #include "My_mm.h" @@ -98,9 +88,9 @@ const UInt32 SHA256_K_ARRAY[64]; #define K SHA256_K_ARRAY -#define ADD_EPI32(dest, src) dest = _mm_add_epi32(dest, src); -#define SHA256_MSG1(dest, src) dest = _mm_sha256msg1_epu32(dest, src); -#define SHA25G_MSG2(dest, src) dest = _mm_sha256msg2_epu32(dest, src); +#define ADD_EPI32(dest, src) dest = _mm_add_epi32(dest, src); +#define SHA256_MSG1(dest, src) dest = _mm_sha256msg1_epu32(dest, src); +#define SHA25G_MSG2(dest, src) dest = _mm_sha256msg2_epu32(dest, src); #define LOAD_SHUFFLE(m, k) \ @@ -112,7 +102,7 @@ const UInt32 SHA256_K_ARRAY[64]; #define SM2(g0, g1, g2, g3) \ tmp = _mm_alignr_epi8(g1, g0, 4); \ - ADD_EPI32(g2, tmp); \ + ADD_EPI32(g2, tmp) \ SHA25G_MSG2(g2, g1); \ // #define LS0(k, g0, g1, g2, g3) LOAD_SHUFFLE(g0, k) @@ -138,16 +128,16 @@ const UInt32 SHA256_K_ARRAY[64]; // We use scheme with 3 rounds ahead for SHA256_MSG1 / 2 rounds ahead for SHA256_MSG2 #define R4(k, g0, g1, g2, g3, OP0, OP1) \ - RND2_0(g0, k); \ - OP0(g0, g1, g2, g3); \ - RND2_1; \ - OP1(g0, g1, g2, g3); \ + RND2_0(g0, k) \ + OP0(g0, g1, g2, g3) \ + RND2_1 \ + OP1(g0, g1, g2, g3) \ #define R16(k, OP0, OP1, OP2, OP3, OP4, OP5, OP6, OP7) \ - R4 ( (k)*4+0, m0, m1, m2, m3, OP0, OP1 ) \ - R4 ( (k)*4+1, m1, m2, m3, m0, OP2, OP3 ) \ - R4 ( (k)*4+2, m2, m3, m0, m1, OP4, OP5 ) \ - R4 ( (k)*4+3, m3, m0, m1, m2, OP6, OP7 ) \ + R4 ( (k)*4+0, m0,m1,m2,m3, OP0, OP1 ) \ + R4 ( (k)*4+1, m1,m2,m3,m0, OP2, OP3 ) \ + R4 ( (k)*4+2, m2,m3,m0,m1, OP4, OP5 ) \ + R4 ( (k)*4+3, m3,m0,m1,m2, OP6, OP7 ) \ #define PREPARE_STATE \ tmp = _mm_shuffle_epi32(state0, 0x1B); /* abcd */ \ @@ -157,11 +147,11 @@ const UInt32 SHA256_K_ARRAY[64]; state1 = _mm_unpackhi_epi64(state1, tmp); /* abef */ \ -void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks); +void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks); #ifdef ATTRIB_SHA ATTRIB_SHA #endif -void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks) +void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks) { const __m128i mask = _mm_set_epi32(0x0c0d0e0f, 0x08090a0b, 0x04050607, 0x00010203); __m128i tmp; @@ -192,13 +182,13 @@ void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size - R16 ( 0, NNN, NNN, SM1, NNN, SM1, SM2, SM1, SM2 ); - R16 ( 1, SM1, SM2, SM1, SM2, SM1, SM2, SM1, SM2 ); - R16 ( 2, SM1, SM2, SM1, SM2, SM1, SM2, SM1, SM2 ); - R16 ( 3, SM1, SM2, NNN, SM2, NNN, NNN, NNN, NNN ); + R16 ( 0, NNN, NNN, SM1, NNN, SM1, SM2, SM1, SM2 ) + R16 ( 1, SM1, SM2, SM1, SM2, SM1, SM2, SM1, SM2 ) + R16 ( 2, SM1, SM2, SM1, SM2, SM1, SM2, SM1, SM2 ) + R16 ( 3, SM1, SM2, NNN, SM2, NNN, NNN, NNN, NNN ) - ADD_EPI32(state0, state0_save); - ADD_EPI32(state1, state1_save); + ADD_EPI32(state0, state0_save) + ADD_EPI32(state1, state1_save) data += 64; } @@ -298,11 +288,11 @@ const UInt32 SHA256_K_ARRAY[64]; R4 ( (k)*4+3, m3, m0, m1, m2, OP6, OP7 ) \ -void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks); +void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks); #ifdef ATTRIB_SHA ATTRIB_SHA #endif -void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks) +void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks) { v128 state0, state1; @@ -353,12 +343,12 @@ void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size // #include // #include "Sha256.h" -void MY_FAST_CALL Sha256_UpdateBlocks(UInt32 state[8], const Byte *data, size_t numBlocks); +void Z7_FASTCALL Sha256_UpdateBlocks(UInt32 state[8], const Byte *data, size_t numBlocks); #pragma message("Sha256 HW-SW stub was used") -void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks); -void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks) +void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks); +void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks) { Sha256_UpdateBlocks(state, data, numBlocks); /* @@ -371,3 +361,26 @@ void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size } #endif + + + +#undef K +#undef RND2 +#undef RND2_0 +#undef RND2_1 + +#undef MY_rev32_for_LE +#undef NNN +#undef LOAD_128 +#undef STORE_128 +#undef LOAD_SHUFFLE +#undef SM1 +#undef SM2 + +#undef NNN +#undef R4 +#undef R16 +#undef PREPARE_STATE +#undef USE_HW_SHA +#undef ATTRIB_SHA +#undef USE_VER_MIN -- cgit v1.2.3-55-g6feb