aboutsummaryrefslogtreecommitdiff
path: root/C/Sha1Opt.c
diff options
context:
space:
mode:
Diffstat (limited to 'C/Sha1Opt.c')
-rw-r--r--C/Sha1Opt.c151
1 files changed, 82 insertions, 69 deletions
diff --git a/C/Sha1Opt.c b/C/Sha1Opt.c
index 63132da..27796aa 100644
--- a/C/Sha1Opt.c
+++ b/C/Sha1Opt.c
@@ -1,7 +1,9 @@
1/* Sha1Opt.c -- SHA-1 optimized code for SHA-1 hardware instructions 1/* Sha1Opt.c -- SHA-1 optimized code for SHA-1 hardware instructions
22021-04-01 : Igor Pavlov : Public domain */ 22023-04-02 : Igor Pavlov : Public domain */
3 3
4#include "Precomp.h" 4#include "Precomp.h"
5#include "Compiler.h"
6#include "CpuArch.h"
5 7
6#if defined(_MSC_VER) 8#if defined(_MSC_VER)
7#if (_MSC_VER < 1900) && (_MSC_VER >= 1200) 9#if (_MSC_VER < 1900) && (_MSC_VER >= 1200)
@@ -9,41 +11,26 @@
9#endif 11#endif
10#endif 12#endif
11 13
12#include "CpuArch.h"
13
14#ifdef MY_CPU_X86_OR_AMD64 14#ifdef MY_CPU_X86_OR_AMD64
15 #if defined(__clang__) 15 #if defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1600) // fix that check
16 #if (__clang_major__ >= 8) // fix that check
17 #define USE_HW_SHA 16 #define USE_HW_SHA
18 #ifndef __SHA__ 17 #elif defined(Z7_LLVM_CLANG_VERSION) && (Z7_LLVM_CLANG_VERSION >= 30800) \
19 #define ATTRIB_SHA __attribute__((__target__("sha,ssse3"))) 18 || defined(Z7_APPLE_CLANG_VERSION) && (Z7_APPLE_CLANG_VERSION >= 50100) \
20 #if defined(_MSC_VER) 19 || defined(Z7_GCC_VERSION) && (Z7_GCC_VERSION >= 40900)
21 // SSSE3: for clang-cl:
22 #include <tmmintrin.h>
23 #define __SHA__
24 #endif
25 #endif
26 #pragma clang diagnostic ignored "-Wvector-conversion"
27 #endif
28 #elif defined(__GNUC__)
29 #if (__GNUC__ >= 8) // fix that check
30 #define USE_HW_SHA 20 #define USE_HW_SHA
31 #ifndef __SHA__ 21 #if !defined(_INTEL_COMPILER)
22 // icc defines __GNUC__, but icc doesn't support __attribute__(__target__)
23 #if !defined(__SHA__) || !defined(__SSSE3__)
32 #define ATTRIB_SHA __attribute__((__target__("sha,ssse3"))) 24 #define ATTRIB_SHA __attribute__((__target__("sha,ssse3")))
33 // #pragma GCC target("sha,ssse3")
34 #endif 25 #endif
35 #endif 26 #endif
36 #elif defined(__INTEL_COMPILER)
37 #if (__INTEL_COMPILER >= 1800) // fix that check
38 #define USE_HW_SHA
39 #endif
40 #elif defined(_MSC_VER) 27 #elif defined(_MSC_VER)
41 #ifdef USE_MY_MM 28 #ifdef USE_MY_MM
42 #define USE_VER_MIN 1300 29 #define USE_VER_MIN 1300
43 #else 30 #else
44 #define USE_VER_MIN 1910 31 #define USE_VER_MIN 1900
45 #endif 32 #endif
46 #if _MSC_VER >= USE_VER_MIN 33 #if (_MSC_VER >= USE_VER_MIN)
47 #define USE_HW_SHA 34 #define USE_HW_SHA
48 #endif 35 #endif
49 #endif 36 #endif
@@ -52,16 +39,19 @@
52#ifdef USE_HW_SHA 39#ifdef USE_HW_SHA
53 40
54// #pragma message("Sha1 HW") 41// #pragma message("Sha1 HW")
55// #include <wmmintrin.h>
56 42
57#if !defined(_MSC_VER) || (_MSC_VER >= 1900) 43// sse/sse2/ssse3:
44#include <tmmintrin.h>
45// sha*:
58#include <immintrin.h> 46#include <immintrin.h>
59#else
60#include <emmintrin.h>
61 47
62#if defined(_MSC_VER) && (_MSC_VER >= 1600) 48#if defined (__clang__) && defined(_MSC_VER)
63// #include <intrin.h> 49 // #if !defined(__SSSE3__)
64#endif 50 // #endif
51 #if !defined(__SHA__)
52 #include <shaintrin.h>
53 #endif
54#else
65 55
66#ifdef USE_MY_MM 56#ifdef USE_MY_MM
67#include "My_mm.h" 57#include "My_mm.h"
@@ -87,37 +77,37 @@ SHA:
87 _mm_sha1* 77 _mm_sha1*
88*/ 78*/
89 79
90#define ADD_EPI32(dest, src) dest = _mm_add_epi32(dest, src); 80
91#define XOR_SI128(dest, src) dest = _mm_xor_si128(dest, src); 81#define XOR_SI128(dest, src) dest = _mm_xor_si128(dest, src);
92#define SHUFFLE_EPI8(dest, mask) dest = _mm_shuffle_epi8(dest, mask); 82#define SHUFFLE_EPI8(dest, mask) dest = _mm_shuffle_epi8(dest, mask);
93#define SHUFFLE_EPI32(dest, mask) dest = _mm_shuffle_epi32(dest, mask); 83#define SHUFFLE_EPI32(dest, mask) dest = _mm_shuffle_epi32(dest, mask);
94 84#ifdef __clang__
95#define SHA1_RND4(abcd, e0, f) abcd = _mm_sha1rnds4_epu32(abcd, e0, f); 85#define SHA1_RNDS4_RET_TYPE_CAST (__m128i)
96#define SHA1_NEXTE(e, m) e = _mm_sha1nexte_epu32(e, m); 86#else
97 87#define SHA1_RNDS4_RET_TYPE_CAST
98 88#endif
99 89#define SHA1_RND4(abcd, e0, f) abcd = SHA1_RNDS4_RET_TYPE_CAST _mm_sha1rnds4_epu32(abcd, e0, f);
100 90#define SHA1_NEXTE(e, m) e = _mm_sha1nexte_epu32(e, m);
101 91#define ADD_EPI32(dest, src) dest = _mm_add_epi32(dest, src);
102#define SHA1_MSG1(dest, src) dest = _mm_sha1msg1_epu32(dest, src); 92#define SHA1_MSG1(dest, src) dest = _mm_sha1msg1_epu32(dest, src);
103#define SHA1_MSG2(dest, src) dest = _mm_sha1msg2_epu32(dest, src); 93#define SHA1_MSG2(dest, src) dest = _mm_sha1msg2_epu32(dest, src);
104 94
105 95
106#define LOAD_SHUFFLE(m, k) \ 96#define LOAD_SHUFFLE(m, k) \
107 m = _mm_loadu_si128((const __m128i *)(const void *)(data + (k) * 16)); \ 97 m = _mm_loadu_si128((const __m128i *)(const void *)(data + (k) * 16)); \
108 SHUFFLE_EPI8(m, mask); \ 98 SHUFFLE_EPI8(m, mask) \
109 99
110#define SM1(m0, m1, m2, m3) \ 100#define SM1(m0, m1, m2, m3) \
111 SHA1_MSG1(m0, m1); \ 101 SHA1_MSG1(m0, m1) \
112 102
113#define SM2(m0, m1, m2, m3) \ 103#define SM2(m0, m1, m2, m3) \
114 XOR_SI128(m3, m1); \ 104 XOR_SI128(m3, m1) \
115 SHA1_MSG2(m3, m2); \ 105 SHA1_MSG2(m3, m2) \
116 106
117#define SM3(m0, m1, m2, m3) \ 107#define SM3(m0, m1, m2, m3) \
118 XOR_SI128(m3, m1); \ 108 XOR_SI128(m3, m1) \
119 SM1(m0, m1, m2, m3) \ 109 SM1(m0, m1, m2, m3) \
120 SHA1_MSG2(m3, m2); \ 110 SHA1_MSG2(m3, m2) \
121 111
122#define NNN(m0, m1, m2, m3) 112#define NNN(m0, m1, m2, m3)
123 113
@@ -139,9 +129,9 @@ SHA:
139 129
140#define R4(k, e0, e1, m0, m1, m2, m3, OP) \ 130#define R4(k, e0, e1, m0, m1, m2, m3, OP) \
141 e1 = abcd; \ 131 e1 = abcd; \
142 SHA1_RND4(abcd, e0, (k) / 5); \ 132 SHA1_RND4(abcd, e0, (k) / 5) \
143 SHA1_NEXTE(e1, m1); \ 133 SHA1_NEXTE(e1, m1) \
144 OP(m0, m1, m2, m3); \ 134 OP(m0, m1, m2, m3) \
145 135
146#define R16(k, mx, OP0, OP1, OP2, OP3) \ 136#define R16(k, mx, OP0, OP1, OP2, OP3) \
147 R4 ( (k)*4+0, e0,e1, m0,m1,m2,m3, OP0 ) \ 137 R4 ( (k)*4+0, e0,e1, m0,m1,m2,m3, OP0 ) \
@@ -150,18 +140,18 @@ SHA:
150 R4 ( (k)*4+3, e1,e0, m3,mx,m1,m2, OP3 ) \ 140 R4 ( (k)*4+3, e1,e0, m3,mx,m1,m2, OP3 ) \
151 141
152#define PREPARE_STATE \ 142#define PREPARE_STATE \
153 SHUFFLE_EPI32 (abcd, 0x1B); \ 143 SHUFFLE_EPI32 (abcd, 0x1B) \
154 SHUFFLE_EPI32 (e0, 0x1B); \ 144 SHUFFLE_EPI32 (e0, 0x1B) \
155 145
156 146
157 147
158 148
159 149
160void MY_FAST_CALL Sha1_UpdateBlocks_HW(UInt32 state[5], const Byte *data, size_t numBlocks); 150void Z7_FASTCALL Sha1_UpdateBlocks_HW(UInt32 state[5], const Byte *data, size_t numBlocks);
161#ifdef ATTRIB_SHA 151#ifdef ATTRIB_SHA
162ATTRIB_SHA 152ATTRIB_SHA
163#endif 153#endif
164void MY_FAST_CALL Sha1_UpdateBlocks_HW(UInt32 state[5], const Byte *data, size_t numBlocks) 154void Z7_FASTCALL Sha1_UpdateBlocks_HW(UInt32 state[5], const Byte *data, size_t numBlocks)
165{ 155{
166 const __m128i mask = _mm_set_epi32(0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f); 156 const __m128i mask = _mm_set_epi32(0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f);
167 157
@@ -190,15 +180,15 @@ void MY_FAST_CALL Sha1_UpdateBlocks_HW(UInt32 state[5], const Byte *data, size_t
190 LOAD_SHUFFLE (m2, 2) 180 LOAD_SHUFFLE (m2, 2)
191 LOAD_SHUFFLE (m3, 3) 181 LOAD_SHUFFLE (m3, 3)
192 182
193 ADD_EPI32(e0, m0); 183 ADD_EPI32(e0, m0)
194 184
195 R16 ( 0, m0, SM1, SM3, SM3, SM3 ); 185 R16 ( 0, m0, SM1, SM3, SM3, SM3 )
196 R16 ( 1, m0, SM3, SM3, SM3, SM3 ); 186 R16 ( 1, m0, SM3, SM3, SM3, SM3 )
197 R16 ( 2, m0, SM3, SM3, SM3, SM3 ); 187 R16 ( 2, m0, SM3, SM3, SM3, SM3 )
198 R16 ( 3, m0, SM3, SM3, SM3, SM3 ); 188 R16 ( 3, m0, SM3, SM3, SM3, SM3 )
199 R16 ( 4, e2, SM2, NNN, NNN, NNN ); 189 R16 ( 4, e2, SM2, NNN, NNN, NNN )
200 190
201 ADD_EPI32(abcd, abcd_save); 191 ADD_EPI32(abcd, abcd_save)
202 192
203 data += 64; 193 data += 64;
204 } 194 }
@@ -274,11 +264,11 @@ typedef uint32x4_t v128;
274#define H(e) e = vsha1h_u32(vgetq_lane_u32(abcd, 0)) 264#define H(e) e = vsha1h_u32(vgetq_lane_u32(abcd, 0))
275#define T(m, c) t = vaddq_u32(m, c) 265#define T(m, c) t = vaddq_u32(m, c)
276 266
277void MY_FAST_CALL Sha1_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks); 267void Z7_FASTCALL Sha1_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks);
278#ifdef ATTRIB_SHA 268#ifdef ATTRIB_SHA
279ATTRIB_SHA 269ATTRIB_SHA
280#endif 270#endif
281void MY_FAST_CALL Sha1_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks) 271void Z7_FASTCALL Sha1_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks)
282{ 272{
283 v128 abcd; 273 v128 abcd;
284 v128 c0, c1, c2, c3; 274 v128 c0, c1, c2, c3;
@@ -353,12 +343,12 @@ void MY_FAST_CALL Sha1_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t
353// #include <stdlib.h> 343// #include <stdlib.h>
354 344
355// #include "Sha1.h" 345// #include "Sha1.h"
356void MY_FAST_CALL Sha1_UpdateBlocks(UInt32 state[5], const Byte *data, size_t numBlocks); 346void Z7_FASTCALL Sha1_UpdateBlocks(UInt32 state[5], const Byte *data, size_t numBlocks);
357 347
358#pragma message("Sha1 HW-SW stub was used") 348#pragma message("Sha1 HW-SW stub was used")
359 349
360void MY_FAST_CALL Sha1_UpdateBlocks_HW(UInt32 state[5], const Byte *data, size_t numBlocks); 350void Z7_FASTCALL Sha1_UpdateBlocks_HW(UInt32 state[5], const Byte *data, size_t numBlocks);
361void MY_FAST_CALL Sha1_UpdateBlocks_HW(UInt32 state[5], const Byte *data, size_t numBlocks) 351void Z7_FASTCALL Sha1_UpdateBlocks_HW(UInt32 state[5], const Byte *data, size_t numBlocks)
362{ 352{
363 Sha1_UpdateBlocks(state, data, numBlocks); 353 Sha1_UpdateBlocks(state, data, numBlocks);
364 /* 354 /*
@@ -371,3 +361,26 @@ void MY_FAST_CALL Sha1_UpdateBlocks_HW(UInt32 state[5], const Byte *data, size_t
371} 361}
372 362
373#endif 363#endif
364
365#undef SU0
366#undef SU1
367#undef C
368#undef P
369#undef M
370#undef H
371#undef T
372#undef MY_rev32_for_LE
373#undef NNN
374#undef LOAD_128
375#undef STORE_128
376#undef LOAD_SHUFFLE
377#undef SM1
378#undef SM2
379#undef SM3
380#undef NNN
381#undef R4
382#undef R16
383#undef PREPARE_STATE
384#undef USE_HW_SHA
385#undef ATTRIB_SHA
386#undef USE_VER_MIN