diff options
author | Igor Pavlov <87184205+ip7z@users.noreply.github.com> | 2023-06-21 00:00:00 +0000 |
---|---|---|
committer | Igor Pavlov <87184205+ip7z@users.noreply.github.com> | 2023-12-17 14:59:19 +0500 |
commit | 5b39dc76f1bc82f941d5c800ab9f34407a06b53a (patch) | |
tree | fe5e17420300b715021a76328444088d32047963 /C/Sha1Opt.c | |
parent | 93be7d4abfd4233228f58ee1fbbcd76d91be66a4 (diff) | |
download | 7zip-23.01.tar.gz 7zip-23.01.tar.bz2 7zip-23.01.zip |
23.0123.01
Diffstat (limited to 'C/Sha1Opt.c')
-rw-r--r-- | C/Sha1Opt.c | 151 |
1 files changed, 82 insertions, 69 deletions
diff --git a/C/Sha1Opt.c b/C/Sha1Opt.c index 63132da..27796aa 100644 --- a/C/Sha1Opt.c +++ b/C/Sha1Opt.c | |||
@@ -1,7 +1,9 @@ | |||
1 | /* Sha1Opt.c -- SHA-1 optimized code for SHA-1 hardware instructions | 1 | /* Sha1Opt.c -- SHA-1 optimized code for SHA-1 hardware instructions |
2 | 2021-04-01 : Igor Pavlov : Public domain */ | 2 | 2023-04-02 : Igor Pavlov : Public domain */ |
3 | 3 | ||
4 | #include "Precomp.h" | 4 | #include "Precomp.h" |
5 | #include "Compiler.h" | ||
6 | #include "CpuArch.h" | ||
5 | 7 | ||
6 | #if defined(_MSC_VER) | 8 | #if defined(_MSC_VER) |
7 | #if (_MSC_VER < 1900) && (_MSC_VER >= 1200) | 9 | #if (_MSC_VER < 1900) && (_MSC_VER >= 1200) |
@@ -9,41 +11,26 @@ | |||
9 | #endif | 11 | #endif |
10 | #endif | 12 | #endif |
11 | 13 | ||
12 | #include "CpuArch.h" | ||
13 | |||
14 | #ifdef MY_CPU_X86_OR_AMD64 | 14 | #ifdef MY_CPU_X86_OR_AMD64 |
15 | #if defined(__clang__) | 15 | #if defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1600) // fix that check |
16 | #if (__clang_major__ >= 8) // fix that check | ||
17 | #define USE_HW_SHA | 16 | #define USE_HW_SHA |
18 | #ifndef __SHA__ | 17 | #elif defined(Z7_LLVM_CLANG_VERSION) && (Z7_LLVM_CLANG_VERSION >= 30800) \ |
19 | #define ATTRIB_SHA __attribute__((__target__("sha,ssse3"))) | 18 | || defined(Z7_APPLE_CLANG_VERSION) && (Z7_APPLE_CLANG_VERSION >= 50100) \ |
20 | #if defined(_MSC_VER) | 19 | || defined(Z7_GCC_VERSION) && (Z7_GCC_VERSION >= 40900) |
21 | // SSSE3: for clang-cl: | ||
22 | #include <tmmintrin.h> | ||
23 | #define __SHA__ | ||
24 | #endif | ||
25 | #endif | ||
26 | #pragma clang diagnostic ignored "-Wvector-conversion" | ||
27 | #endif | ||
28 | #elif defined(__GNUC__) | ||
29 | #if (__GNUC__ >= 8) // fix that check | ||
30 | #define USE_HW_SHA | 20 | #define USE_HW_SHA |
31 | #ifndef __SHA__ | 21 | #if !defined(_INTEL_COMPILER) |
22 | // icc defines __GNUC__, but icc doesn't support __attribute__(__target__) | ||
23 | #if !defined(__SHA__) || !defined(__SSSE3__) | ||
32 | #define ATTRIB_SHA __attribute__((__target__("sha,ssse3"))) | 24 | #define ATTRIB_SHA __attribute__((__target__("sha,ssse3"))) |
33 | // #pragma GCC target("sha,ssse3") | ||
34 | #endif | 25 | #endif |
35 | #endif | 26 | #endif |
36 | #elif defined(__INTEL_COMPILER) | ||
37 | #if (__INTEL_COMPILER >= 1800) // fix that check | ||
38 | #define USE_HW_SHA | ||
39 | #endif | ||
40 | #elif defined(_MSC_VER) | 27 | #elif defined(_MSC_VER) |
41 | #ifdef USE_MY_MM | 28 | #ifdef USE_MY_MM |
42 | #define USE_VER_MIN 1300 | 29 | #define USE_VER_MIN 1300 |
43 | #else | 30 | #else |
44 | #define USE_VER_MIN 1910 | 31 | #define USE_VER_MIN 1900 |
45 | #endif | 32 | #endif |
46 | #if _MSC_VER >= USE_VER_MIN | 33 | #if (_MSC_VER >= USE_VER_MIN) |
47 | #define USE_HW_SHA | 34 | #define USE_HW_SHA |
48 | #endif | 35 | #endif |
49 | #endif | 36 | #endif |
@@ -52,16 +39,19 @@ | |||
52 | #ifdef USE_HW_SHA | 39 | #ifdef USE_HW_SHA |
53 | 40 | ||
54 | // #pragma message("Sha1 HW") | 41 | // #pragma message("Sha1 HW") |
55 | // #include <wmmintrin.h> | ||
56 | 42 | ||
57 | #if !defined(_MSC_VER) || (_MSC_VER >= 1900) | 43 | // sse/sse2/ssse3: |
44 | #include <tmmintrin.h> | ||
45 | // sha*: | ||
58 | #include <immintrin.h> | 46 | #include <immintrin.h> |
59 | #else | ||
60 | #include <emmintrin.h> | ||
61 | 47 | ||
62 | #if defined(_MSC_VER) && (_MSC_VER >= 1600) | 48 | #if defined (__clang__) && defined(_MSC_VER) |
63 | // #include <intrin.h> | 49 | // #if !defined(__SSSE3__) |
64 | #endif | 50 | // #endif |
51 | #if !defined(__SHA__) | ||
52 | #include <shaintrin.h> | ||
53 | #endif | ||
54 | #else | ||
65 | 55 | ||
66 | #ifdef USE_MY_MM | 56 | #ifdef USE_MY_MM |
67 | #include "My_mm.h" | 57 | #include "My_mm.h" |
@@ -87,37 +77,37 @@ SHA: | |||
87 | _mm_sha1* | 77 | _mm_sha1* |
88 | */ | 78 | */ |
89 | 79 | ||
90 | #define ADD_EPI32(dest, src) dest = _mm_add_epi32(dest, src); | 80 | |
91 | #define XOR_SI128(dest, src) dest = _mm_xor_si128(dest, src); | 81 | #define XOR_SI128(dest, src) dest = _mm_xor_si128(dest, src); |
92 | #define SHUFFLE_EPI8(dest, mask) dest = _mm_shuffle_epi8(dest, mask); | 82 | #define SHUFFLE_EPI8(dest, mask) dest = _mm_shuffle_epi8(dest, mask); |
93 | #define SHUFFLE_EPI32(dest, mask) dest = _mm_shuffle_epi32(dest, mask); | 83 | #define SHUFFLE_EPI32(dest, mask) dest = _mm_shuffle_epi32(dest, mask); |
94 | 84 | #ifdef __clang__ | |
95 | #define SHA1_RND4(abcd, e0, f) abcd = _mm_sha1rnds4_epu32(abcd, e0, f); | 85 | #define SHA1_RNDS4_RET_TYPE_CAST (__m128i) |
96 | #define SHA1_NEXTE(e, m) e = _mm_sha1nexte_epu32(e, m); | 86 | #else |
97 | 87 | #define SHA1_RNDS4_RET_TYPE_CAST | |
98 | 88 | #endif | |
99 | 89 | #define SHA1_RND4(abcd, e0, f) abcd = SHA1_RNDS4_RET_TYPE_CAST _mm_sha1rnds4_epu32(abcd, e0, f); | |
100 | 90 | #define SHA1_NEXTE(e, m) e = _mm_sha1nexte_epu32(e, m); | |
101 | 91 | #define ADD_EPI32(dest, src) dest = _mm_add_epi32(dest, src); | |
102 | #define SHA1_MSG1(dest, src) dest = _mm_sha1msg1_epu32(dest, src); | 92 | #define SHA1_MSG1(dest, src) dest = _mm_sha1msg1_epu32(dest, src); |
103 | #define SHA1_MSG2(dest, src) dest = _mm_sha1msg2_epu32(dest, src); | 93 | #define SHA1_MSG2(dest, src) dest = _mm_sha1msg2_epu32(dest, src); |
104 | 94 | ||
105 | 95 | ||
106 | #define LOAD_SHUFFLE(m, k) \ | 96 | #define LOAD_SHUFFLE(m, k) \ |
107 | m = _mm_loadu_si128((const __m128i *)(const void *)(data + (k) * 16)); \ | 97 | m = _mm_loadu_si128((const __m128i *)(const void *)(data + (k) * 16)); \ |
108 | SHUFFLE_EPI8(m, mask); \ | 98 | SHUFFLE_EPI8(m, mask) \ |
109 | 99 | ||
110 | #define SM1(m0, m1, m2, m3) \ | 100 | #define SM1(m0, m1, m2, m3) \ |
111 | SHA1_MSG1(m0, m1); \ | 101 | SHA1_MSG1(m0, m1) \ |
112 | 102 | ||
113 | #define SM2(m0, m1, m2, m3) \ | 103 | #define SM2(m0, m1, m2, m3) \ |
114 | XOR_SI128(m3, m1); \ | 104 | XOR_SI128(m3, m1) \ |
115 | SHA1_MSG2(m3, m2); \ | 105 | SHA1_MSG2(m3, m2) \ |
116 | 106 | ||
117 | #define SM3(m0, m1, m2, m3) \ | 107 | #define SM3(m0, m1, m2, m3) \ |
118 | XOR_SI128(m3, m1); \ | 108 | XOR_SI128(m3, m1) \ |
119 | SM1(m0, m1, m2, m3) \ | 109 | SM1(m0, m1, m2, m3) \ |
120 | SHA1_MSG2(m3, m2); \ | 110 | SHA1_MSG2(m3, m2) \ |
121 | 111 | ||
122 | #define NNN(m0, m1, m2, m3) | 112 | #define NNN(m0, m1, m2, m3) |
123 | 113 | ||
@@ -139,9 +129,9 @@ SHA: | |||
139 | 129 | ||
140 | #define R4(k, e0, e1, m0, m1, m2, m3, OP) \ | 130 | #define R4(k, e0, e1, m0, m1, m2, m3, OP) \ |
141 | e1 = abcd; \ | 131 | e1 = abcd; \ |
142 | SHA1_RND4(abcd, e0, (k) / 5); \ | 132 | SHA1_RND4(abcd, e0, (k) / 5) \ |
143 | SHA1_NEXTE(e1, m1); \ | 133 | SHA1_NEXTE(e1, m1) \ |
144 | OP(m0, m1, m2, m3); \ | 134 | OP(m0, m1, m2, m3) \ |
145 | 135 | ||
146 | #define R16(k, mx, OP0, OP1, OP2, OP3) \ | 136 | #define R16(k, mx, OP0, OP1, OP2, OP3) \ |
147 | R4 ( (k)*4+0, e0,e1, m0,m1,m2,m3, OP0 ) \ | 137 | R4 ( (k)*4+0, e0,e1, m0,m1,m2,m3, OP0 ) \ |
@@ -150,18 +140,18 @@ SHA: | |||
150 | R4 ( (k)*4+3, e1,e0, m3,mx,m1,m2, OP3 ) \ | 140 | R4 ( (k)*4+3, e1,e0, m3,mx,m1,m2, OP3 ) \ |
151 | 141 | ||
152 | #define PREPARE_STATE \ | 142 | #define PREPARE_STATE \ |
153 | SHUFFLE_EPI32 (abcd, 0x1B); \ | 143 | SHUFFLE_EPI32 (abcd, 0x1B) \ |
154 | SHUFFLE_EPI32 (e0, 0x1B); \ | 144 | SHUFFLE_EPI32 (e0, 0x1B) \ |
155 | 145 | ||
156 | 146 | ||
157 | 147 | ||
158 | 148 | ||
159 | 149 | ||
160 | void MY_FAST_CALL Sha1_UpdateBlocks_HW(UInt32 state[5], const Byte *data, size_t numBlocks); | 150 | void Z7_FASTCALL Sha1_UpdateBlocks_HW(UInt32 state[5], const Byte *data, size_t numBlocks); |
161 | #ifdef ATTRIB_SHA | 151 | #ifdef ATTRIB_SHA |
162 | ATTRIB_SHA | 152 | ATTRIB_SHA |
163 | #endif | 153 | #endif |
164 | void MY_FAST_CALL Sha1_UpdateBlocks_HW(UInt32 state[5], const Byte *data, size_t numBlocks) | 154 | void Z7_FASTCALL Sha1_UpdateBlocks_HW(UInt32 state[5], const Byte *data, size_t numBlocks) |
165 | { | 155 | { |
166 | const __m128i mask = _mm_set_epi32(0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f); | 156 | const __m128i mask = _mm_set_epi32(0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f); |
167 | 157 | ||
@@ -190,15 +180,15 @@ void MY_FAST_CALL Sha1_UpdateBlocks_HW(UInt32 state[5], const Byte *data, size_t | |||
190 | LOAD_SHUFFLE (m2, 2) | 180 | LOAD_SHUFFLE (m2, 2) |
191 | LOAD_SHUFFLE (m3, 3) | 181 | LOAD_SHUFFLE (m3, 3) |
192 | 182 | ||
193 | ADD_EPI32(e0, m0); | 183 | ADD_EPI32(e0, m0) |
194 | 184 | ||
195 | R16 ( 0, m0, SM1, SM3, SM3, SM3 ); | 185 | R16 ( 0, m0, SM1, SM3, SM3, SM3 ) |
196 | R16 ( 1, m0, SM3, SM3, SM3, SM3 ); | 186 | R16 ( 1, m0, SM3, SM3, SM3, SM3 ) |
197 | R16 ( 2, m0, SM3, SM3, SM3, SM3 ); | 187 | R16 ( 2, m0, SM3, SM3, SM3, SM3 ) |
198 | R16 ( 3, m0, SM3, SM3, SM3, SM3 ); | 188 | R16 ( 3, m0, SM3, SM3, SM3, SM3 ) |
199 | R16 ( 4, e2, SM2, NNN, NNN, NNN ); | 189 | R16 ( 4, e2, SM2, NNN, NNN, NNN ) |
200 | 190 | ||
201 | ADD_EPI32(abcd, abcd_save); | 191 | ADD_EPI32(abcd, abcd_save) |
202 | 192 | ||
203 | data += 64; | 193 | data += 64; |
204 | } | 194 | } |
@@ -274,11 +264,11 @@ typedef uint32x4_t v128; | |||
274 | #define H(e) e = vsha1h_u32(vgetq_lane_u32(abcd, 0)) | 264 | #define H(e) e = vsha1h_u32(vgetq_lane_u32(abcd, 0)) |
275 | #define T(m, c) t = vaddq_u32(m, c) | 265 | #define T(m, c) t = vaddq_u32(m, c) |
276 | 266 | ||
277 | void MY_FAST_CALL Sha1_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks); | 267 | void Z7_FASTCALL Sha1_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks); |
278 | #ifdef ATTRIB_SHA | 268 | #ifdef ATTRIB_SHA |
279 | ATTRIB_SHA | 269 | ATTRIB_SHA |
280 | #endif | 270 | #endif |
281 | void MY_FAST_CALL Sha1_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks) | 271 | void Z7_FASTCALL Sha1_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks) |
282 | { | 272 | { |
283 | v128 abcd; | 273 | v128 abcd; |
284 | v128 c0, c1, c2, c3; | 274 | v128 c0, c1, c2, c3; |
@@ -353,12 +343,12 @@ void MY_FAST_CALL Sha1_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t | |||
353 | // #include <stdlib.h> | 343 | // #include <stdlib.h> |
354 | 344 | ||
355 | // #include "Sha1.h" | 345 | // #include "Sha1.h" |
356 | void MY_FAST_CALL Sha1_UpdateBlocks(UInt32 state[5], const Byte *data, size_t numBlocks); | 346 | void Z7_FASTCALL Sha1_UpdateBlocks(UInt32 state[5], const Byte *data, size_t numBlocks); |
357 | 347 | ||
358 | #pragma message("Sha1 HW-SW stub was used") | 348 | #pragma message("Sha1 HW-SW stub was used") |
359 | 349 | ||
360 | void MY_FAST_CALL Sha1_UpdateBlocks_HW(UInt32 state[5], const Byte *data, size_t numBlocks); | 350 | void Z7_FASTCALL Sha1_UpdateBlocks_HW(UInt32 state[5], const Byte *data, size_t numBlocks); |
361 | void MY_FAST_CALL Sha1_UpdateBlocks_HW(UInt32 state[5], const Byte *data, size_t numBlocks) | 351 | void Z7_FASTCALL Sha1_UpdateBlocks_HW(UInt32 state[5], const Byte *data, size_t numBlocks) |
362 | { | 352 | { |
363 | Sha1_UpdateBlocks(state, data, numBlocks); | 353 | Sha1_UpdateBlocks(state, data, numBlocks); |
364 | /* | 354 | /* |
@@ -371,3 +361,26 @@ void MY_FAST_CALL Sha1_UpdateBlocks_HW(UInt32 state[5], const Byte *data, size_t | |||
371 | } | 361 | } |
372 | 362 | ||
373 | #endif | 363 | #endif |
364 | |||
365 | #undef SU0 | ||
366 | #undef SU1 | ||
367 | #undef C | ||
368 | #undef P | ||
369 | #undef M | ||
370 | #undef H | ||
371 | #undef T | ||
372 | #undef MY_rev32_for_LE | ||
373 | #undef NNN | ||
374 | #undef LOAD_128 | ||
375 | #undef STORE_128 | ||
376 | #undef LOAD_SHUFFLE | ||
377 | #undef SM1 | ||
378 | #undef SM2 | ||
379 | #undef SM3 | ||
380 | #undef NNN | ||
381 | #undef R4 | ||
382 | #undef R16 | ||
383 | #undef PREPARE_STATE | ||
384 | #undef USE_HW_SHA | ||
385 | #undef ATTRIB_SHA | ||
386 | #undef USE_VER_MIN | ||