From 5b39dc76f1bc82f941d5c800ab9f34407a06b53a Mon Sep 17 00:00:00 2001 From: Igor Pavlov <87184205+ip7z@users.noreply.github.com> Date: Wed, 21 Jun 2023 00:00:00 +0000 Subject: 23.01 --- C/Sha1Opt.c | 151 +++++++++++++++++++++++++++++++++--------------------------- 1 file changed, 82 insertions(+), 69 deletions(-) (limited to 'C/Sha1Opt.c') diff --git a/C/Sha1Opt.c b/C/Sha1Opt.c index 63132da..27796aa 100644 --- a/C/Sha1Opt.c +++ b/C/Sha1Opt.c @@ -1,7 +1,9 @@ /* Sha1Opt.c -- SHA-1 optimized code for SHA-1 hardware instructions -2021-04-01 : Igor Pavlov : Public domain */ +2023-04-02 : Igor Pavlov : Public domain */ #include "Precomp.h" +#include "Compiler.h" +#include "CpuArch.h" #if defined(_MSC_VER) #if (_MSC_VER < 1900) && (_MSC_VER >= 1200) @@ -9,41 +11,26 @@ #endif #endif -#include "CpuArch.h" - #ifdef MY_CPU_X86_OR_AMD64 - #if defined(__clang__) - #if (__clang_major__ >= 8) // fix that check + #if defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1600) // fix that check #define USE_HW_SHA - #ifndef __SHA__ - #define ATTRIB_SHA __attribute__((__target__("sha,ssse3"))) - #if defined(_MSC_VER) - // SSSE3: for clang-cl: - #include - #define __SHA__ - #endif - #endif - #pragma clang diagnostic ignored "-Wvector-conversion" - #endif - #elif defined(__GNUC__) - #if (__GNUC__ >= 8) // fix that check + #elif defined(Z7_LLVM_CLANG_VERSION) && (Z7_LLVM_CLANG_VERSION >= 30800) \ + || defined(Z7_APPLE_CLANG_VERSION) && (Z7_APPLE_CLANG_VERSION >= 50100) \ + || defined(Z7_GCC_VERSION) && (Z7_GCC_VERSION >= 40900) #define USE_HW_SHA - #ifndef __SHA__ + #if !defined(_INTEL_COMPILER) + // icc defines __GNUC__, but icc doesn't support __attribute__(__target__) + #if !defined(__SHA__) || !defined(__SSSE3__) #define ATTRIB_SHA __attribute__((__target__("sha,ssse3"))) - // #pragma GCC target("sha,ssse3") #endif - #endif - #elif defined(__INTEL_COMPILER) - #if (__INTEL_COMPILER >= 1800) // fix that check - #define USE_HW_SHA - #endif + #endif #elif defined(_MSC_VER) #ifdef USE_MY_MM #define USE_VER_MIN 1300 #else - #define USE_VER_MIN 1910 + #define USE_VER_MIN 1900 #endif - #if _MSC_VER >= USE_VER_MIN + #if (_MSC_VER >= USE_VER_MIN) #define USE_HW_SHA #endif #endif @@ -52,16 +39,19 @@ #ifdef USE_HW_SHA // #pragma message("Sha1 HW") -// #include -#if !defined(_MSC_VER) || (_MSC_VER >= 1900) +// sse/sse2/ssse3: +#include +// sha*: #include -#else -#include -#if defined(_MSC_VER) && (_MSC_VER >= 1600) -// #include -#endif +#if defined (__clang__) && defined(_MSC_VER) + // #if !defined(__SSSE3__) + // #endif + #if !defined(__SHA__) + #include + #endif +#else #ifdef USE_MY_MM #include "My_mm.h" @@ -87,37 +77,37 @@ SHA: _mm_sha1* */ -#define ADD_EPI32(dest, src) dest = _mm_add_epi32(dest, src); + #define XOR_SI128(dest, src) dest = _mm_xor_si128(dest, src); #define SHUFFLE_EPI8(dest, mask) dest = _mm_shuffle_epi8(dest, mask); #define SHUFFLE_EPI32(dest, mask) dest = _mm_shuffle_epi32(dest, mask); - -#define SHA1_RND4(abcd, e0, f) abcd = _mm_sha1rnds4_epu32(abcd, e0, f); -#define SHA1_NEXTE(e, m) e = _mm_sha1nexte_epu32(e, m); - - - - - -#define SHA1_MSG1(dest, src) dest = _mm_sha1msg1_epu32(dest, src); -#define SHA1_MSG2(dest, src) dest = _mm_sha1msg2_epu32(dest, src); +#ifdef __clang__ +#define SHA1_RNDS4_RET_TYPE_CAST (__m128i) +#else +#define SHA1_RNDS4_RET_TYPE_CAST +#endif +#define SHA1_RND4(abcd, e0, f) abcd = SHA1_RNDS4_RET_TYPE_CAST _mm_sha1rnds4_epu32(abcd, e0, f); +#define SHA1_NEXTE(e, m) e = _mm_sha1nexte_epu32(e, m); +#define ADD_EPI32(dest, src) dest = _mm_add_epi32(dest, src); +#define SHA1_MSG1(dest, src) dest = _mm_sha1msg1_epu32(dest, src); +#define SHA1_MSG2(dest, src) dest = _mm_sha1msg2_epu32(dest, src); #define LOAD_SHUFFLE(m, k) \ m = _mm_loadu_si128((const __m128i *)(const void *)(data + (k) * 16)); \ - SHUFFLE_EPI8(m, mask); \ + SHUFFLE_EPI8(m, mask) \ #define SM1(m0, m1, m2, m3) \ - SHA1_MSG1(m0, m1); \ + SHA1_MSG1(m0, m1) \ #define SM2(m0, m1, m2, m3) \ - XOR_SI128(m3, m1); \ - SHA1_MSG2(m3, m2); \ + XOR_SI128(m3, m1) \ + SHA1_MSG2(m3, m2) \ #define SM3(m0, m1, m2, m3) \ - XOR_SI128(m3, m1); \ + XOR_SI128(m3, m1) \ SM1(m0, m1, m2, m3) \ - SHA1_MSG2(m3, m2); \ + SHA1_MSG2(m3, m2) \ #define NNN(m0, m1, m2, m3) @@ -139,9 +129,9 @@ SHA: #define R4(k, e0, e1, m0, m1, m2, m3, OP) \ e1 = abcd; \ - SHA1_RND4(abcd, e0, (k) / 5); \ - SHA1_NEXTE(e1, m1); \ - OP(m0, m1, m2, m3); \ + SHA1_RND4(abcd, e0, (k) / 5) \ + SHA1_NEXTE(e1, m1) \ + OP(m0, m1, m2, m3) \ #define R16(k, mx, OP0, OP1, OP2, OP3) \ R4 ( (k)*4+0, e0,e1, m0,m1,m2,m3, OP0 ) \ @@ -150,18 +140,18 @@ SHA: R4 ( (k)*4+3, e1,e0, m3,mx,m1,m2, OP3 ) \ #define PREPARE_STATE \ - SHUFFLE_EPI32 (abcd, 0x1B); \ - SHUFFLE_EPI32 (e0, 0x1B); \ + SHUFFLE_EPI32 (abcd, 0x1B) \ + SHUFFLE_EPI32 (e0, 0x1B) \ -void MY_FAST_CALL Sha1_UpdateBlocks_HW(UInt32 state[5], const Byte *data, size_t numBlocks); +void Z7_FASTCALL Sha1_UpdateBlocks_HW(UInt32 state[5], const Byte *data, size_t numBlocks); #ifdef ATTRIB_SHA ATTRIB_SHA #endif -void MY_FAST_CALL Sha1_UpdateBlocks_HW(UInt32 state[5], const Byte *data, size_t numBlocks) +void Z7_FASTCALL Sha1_UpdateBlocks_HW(UInt32 state[5], const Byte *data, size_t numBlocks) { const __m128i mask = _mm_set_epi32(0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f); @@ -190,15 +180,15 @@ void MY_FAST_CALL Sha1_UpdateBlocks_HW(UInt32 state[5], const Byte *data, size_t LOAD_SHUFFLE (m2, 2) LOAD_SHUFFLE (m3, 3) - ADD_EPI32(e0, m0); + ADD_EPI32(e0, m0) - R16 ( 0, m0, SM1, SM3, SM3, SM3 ); - R16 ( 1, m0, SM3, SM3, SM3, SM3 ); - R16 ( 2, m0, SM3, SM3, SM3, SM3 ); - R16 ( 3, m0, SM3, SM3, SM3, SM3 ); - R16 ( 4, e2, SM2, NNN, NNN, NNN ); + R16 ( 0, m0, SM1, SM3, SM3, SM3 ) + R16 ( 1, m0, SM3, SM3, SM3, SM3 ) + R16 ( 2, m0, SM3, SM3, SM3, SM3 ) + R16 ( 3, m0, SM3, SM3, SM3, SM3 ) + R16 ( 4, e2, SM2, NNN, NNN, NNN ) - ADD_EPI32(abcd, abcd_save); + ADD_EPI32(abcd, abcd_save) data += 64; } @@ -274,11 +264,11 @@ typedef uint32x4_t v128; #define H(e) e = vsha1h_u32(vgetq_lane_u32(abcd, 0)) #define T(m, c) t = vaddq_u32(m, c) -void MY_FAST_CALL Sha1_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks); +void Z7_FASTCALL Sha1_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks); #ifdef ATTRIB_SHA ATTRIB_SHA #endif -void MY_FAST_CALL Sha1_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks) +void Z7_FASTCALL Sha1_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks) { v128 abcd; v128 c0, c1, c2, c3; @@ -353,12 +343,12 @@ void MY_FAST_CALL Sha1_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t // #include // #include "Sha1.h" -void MY_FAST_CALL Sha1_UpdateBlocks(UInt32 state[5], const Byte *data, size_t numBlocks); +void Z7_FASTCALL Sha1_UpdateBlocks(UInt32 state[5], const Byte *data, size_t numBlocks); #pragma message("Sha1 HW-SW stub was used") -void MY_FAST_CALL Sha1_UpdateBlocks_HW(UInt32 state[5], const Byte *data, size_t numBlocks); -void MY_FAST_CALL Sha1_UpdateBlocks_HW(UInt32 state[5], const Byte *data, size_t numBlocks) +void Z7_FASTCALL Sha1_UpdateBlocks_HW(UInt32 state[5], const Byte *data, size_t numBlocks); +void Z7_FASTCALL Sha1_UpdateBlocks_HW(UInt32 state[5], const Byte *data, size_t numBlocks) { Sha1_UpdateBlocks(state, data, numBlocks); /* @@ -371,3 +361,26 @@ void MY_FAST_CALL Sha1_UpdateBlocks_HW(UInt32 state[5], const Byte *data, size_t } #endif + +#undef SU0 +#undef SU1 +#undef C +#undef P +#undef M +#undef H +#undef T +#undef MY_rev32_for_LE +#undef NNN +#undef LOAD_128 +#undef STORE_128 +#undef LOAD_SHUFFLE +#undef SM1 +#undef SM2 +#undef SM3 +#undef NNN +#undef R4 +#undef R16 +#undef PREPARE_STATE +#undef USE_HW_SHA +#undef ATTRIB_SHA +#undef USE_VER_MIN -- cgit v1.2.3-55-g6feb