aboutsummaryrefslogtreecommitdiff
path: root/C/Sha256Opt.c
diff options
context:
space:
mode:
authorIgor Pavlov <87184205+ip7z@users.noreply.github.com>2023-06-21 00:00:00 +0000
committerIgor Pavlov <87184205+ip7z@users.noreply.github.com>2023-12-17 14:59:19 +0500
commit5b39dc76f1bc82f941d5c800ab9f34407a06b53a (patch)
treefe5e17420300b715021a76328444088d32047963 /C/Sha256Opt.c
parent93be7d4abfd4233228f58ee1fbbcd76d91be66a4 (diff)
download7zip-23.01.tar.gz
7zip-23.01.tar.bz2
7zip-23.01.zip
23.0123.01
Diffstat (limited to 'C/Sha256Opt.c')
-rw-r--r--C/Sha256Opt.c129
1 files changed, 71 insertions, 58 deletions
diff --git a/C/Sha256Opt.c b/C/Sha256Opt.c
index decc138..e4465e3 100644
--- a/C/Sha256Opt.c
+++ b/C/Sha256Opt.c
@@ -1,7 +1,9 @@
1/* Sha256Opt.c -- SHA-256 optimized code for SHA-256 hardware instructions 1/* Sha256Opt.c -- SHA-256 optimized code for SHA-256 hardware instructions
22021-04-01 : Igor Pavlov : Public domain */ 22023-04-02 : Igor Pavlov : Public domain */
3 3
4#include "Precomp.h" 4#include "Precomp.h"
5#include "Compiler.h"
6#include "CpuArch.h"
5 7
6#if defined(_MSC_VER) 8#if defined(_MSC_VER)
7#if (_MSC_VER < 1900) && (_MSC_VER >= 1200) 9#if (_MSC_VER < 1900) && (_MSC_VER >= 1200)
@@ -9,41 +11,26 @@
9#endif 11#endif
10#endif 12#endif
11 13
12#include "CpuArch.h"
13
14#ifdef MY_CPU_X86_OR_AMD64 14#ifdef MY_CPU_X86_OR_AMD64
15 #if defined(__clang__) 15 #if defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1600) // fix that check
16 #if (__clang_major__ >= 8) // fix that check
17 #define USE_HW_SHA 16 #define USE_HW_SHA
18 #ifndef __SHA__ 17 #elif defined(Z7_LLVM_CLANG_VERSION) && (Z7_LLVM_CLANG_VERSION >= 30800) \
19 #define ATTRIB_SHA __attribute__((__target__("sha,ssse3"))) 18 || defined(Z7_APPLE_CLANG_VERSION) && (Z7_APPLE_CLANG_VERSION >= 50100) \
20 #if defined(_MSC_VER) 19 || defined(Z7_GCC_VERSION) && (Z7_GCC_VERSION >= 40900)
21 // SSSE3: for clang-cl:
22 #include <tmmintrin.h>
23 #define __SHA__
24 #endif
25 #endif
26
27 #endif
28 #elif defined(__GNUC__)
29 #if (__GNUC__ >= 8) // fix that check
30 #define USE_HW_SHA 20 #define USE_HW_SHA
31 #ifndef __SHA__ 21 #if !defined(_INTEL_COMPILER)
22 // icc defines __GNUC__, but icc doesn't support __attribute__(__target__)
23 #if !defined(__SHA__) || !defined(__SSSE3__)
32 #define ATTRIB_SHA __attribute__((__target__("sha,ssse3"))) 24 #define ATTRIB_SHA __attribute__((__target__("sha,ssse3")))
33 // #pragma GCC target("sha,ssse3")
34 #endif 25 #endif
35 #endif 26 #endif
36 #elif defined(__INTEL_COMPILER)
37 #if (__INTEL_COMPILER >= 1800) // fix that check
38 #define USE_HW_SHA
39 #endif
40 #elif defined(_MSC_VER) 27 #elif defined(_MSC_VER)
41 #ifdef USE_MY_MM 28 #ifdef USE_MY_MM
42 #define USE_VER_MIN 1300 29 #define USE_VER_MIN 1300
43 #else 30 #else
44 #define USE_VER_MIN 1910 31 #define USE_VER_MIN 1900
45 #endif 32 #endif
46 #if _MSC_VER >= USE_VER_MIN 33 #if (_MSC_VER >= USE_VER_MIN)
47 #define USE_HW_SHA 34 #define USE_HW_SHA
48 #endif 35 #endif
49 #endif 36 #endif
@@ -52,16 +39,19 @@
52#ifdef USE_HW_SHA 39#ifdef USE_HW_SHA
53 40
54// #pragma message("Sha256 HW") 41// #pragma message("Sha256 HW")
55// #include <wmmintrin.h>
56 42
57#if !defined(_MSC_VER) || (_MSC_VER >= 1900) 43// sse/sse2/ssse3:
44#include <tmmintrin.h>
45// sha*:
58#include <immintrin.h> 46#include <immintrin.h>
59#else
60#include <emmintrin.h>
61 47
62#if defined(_MSC_VER) && (_MSC_VER >= 1600) 48#if defined (__clang__) && defined(_MSC_VER)
63// #include <intrin.h> 49 // #if !defined(__SSSE3__)
64#endif 50 // #endif
51 #if !defined(__SHA__)
52 #include <shaintrin.h>
53 #endif
54#else
65 55
66#ifdef USE_MY_MM 56#ifdef USE_MY_MM
67#include "My_mm.h" 57#include "My_mm.h"
@@ -98,9 +88,9 @@ const UInt32 SHA256_K_ARRAY[64];
98#define K SHA256_K_ARRAY 88#define K SHA256_K_ARRAY
99 89
100 90
101#define ADD_EPI32(dest, src) dest = _mm_add_epi32(dest, src); 91#define ADD_EPI32(dest, src) dest = _mm_add_epi32(dest, src);
102#define SHA256_MSG1(dest, src) dest = _mm_sha256msg1_epu32(dest, src); 92#define SHA256_MSG1(dest, src) dest = _mm_sha256msg1_epu32(dest, src);
103#define SHA25G_MSG2(dest, src) dest = _mm_sha256msg2_epu32(dest, src); 93#define SHA25G_MSG2(dest, src) dest = _mm_sha256msg2_epu32(dest, src);
104 94
105 95
106#define LOAD_SHUFFLE(m, k) \ 96#define LOAD_SHUFFLE(m, k) \
@@ -112,7 +102,7 @@ const UInt32 SHA256_K_ARRAY[64];
112 102
113#define SM2(g0, g1, g2, g3) \ 103#define SM2(g0, g1, g2, g3) \
114 tmp = _mm_alignr_epi8(g1, g0, 4); \ 104 tmp = _mm_alignr_epi8(g1, g0, 4); \
115 ADD_EPI32(g2, tmp); \ 105 ADD_EPI32(g2, tmp) \
116 SHA25G_MSG2(g2, g1); \ 106 SHA25G_MSG2(g2, g1); \
117 107
118// #define LS0(k, g0, g1, g2, g3) LOAD_SHUFFLE(g0, k) 108// #define LS0(k, g0, g1, g2, g3) LOAD_SHUFFLE(g0, k)
@@ -138,16 +128,16 @@ const UInt32 SHA256_K_ARRAY[64];
138// We use scheme with 3 rounds ahead for SHA256_MSG1 / 2 rounds ahead for SHA256_MSG2 128// We use scheme with 3 rounds ahead for SHA256_MSG1 / 2 rounds ahead for SHA256_MSG2
139 129
140#define R4(k, g0, g1, g2, g3, OP0, OP1) \ 130#define R4(k, g0, g1, g2, g3, OP0, OP1) \
141 RND2_0(g0, k); \ 131 RND2_0(g0, k) \
142 OP0(g0, g1, g2, g3); \ 132 OP0(g0, g1, g2, g3) \
143 RND2_1; \ 133 RND2_1 \
144 OP1(g0, g1, g2, g3); \ 134 OP1(g0, g1, g2, g3) \
145 135
146#define R16(k, OP0, OP1, OP2, OP3, OP4, OP5, OP6, OP7) \ 136#define R16(k, OP0, OP1, OP2, OP3, OP4, OP5, OP6, OP7) \
147 R4 ( (k)*4+0, m0, m1, m2, m3, OP0, OP1 ) \ 137 R4 ( (k)*4+0, m0,m1,m2,m3, OP0, OP1 ) \
148 R4 ( (k)*4+1, m1, m2, m3, m0, OP2, OP3 ) \ 138 R4 ( (k)*4+1, m1,m2,m3,m0, OP2, OP3 ) \
149 R4 ( (k)*4+2, m2, m3, m0, m1, OP4, OP5 ) \ 139 R4 ( (k)*4+2, m2,m3,m0,m1, OP4, OP5 ) \
150 R4 ( (k)*4+3, m3, m0, m1, m2, OP6, OP7 ) \ 140 R4 ( (k)*4+3, m3,m0,m1,m2, OP6, OP7 ) \
151 141
152#define PREPARE_STATE \ 142#define PREPARE_STATE \
153 tmp = _mm_shuffle_epi32(state0, 0x1B); /* abcd */ \ 143 tmp = _mm_shuffle_epi32(state0, 0x1B); /* abcd */ \
@@ -157,11 +147,11 @@ const UInt32 SHA256_K_ARRAY[64];
157 state1 = _mm_unpackhi_epi64(state1, tmp); /* abef */ \ 147 state1 = _mm_unpackhi_epi64(state1, tmp); /* abef */ \
158 148
159 149
160void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks); 150void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks);
161#ifdef ATTRIB_SHA 151#ifdef ATTRIB_SHA
162ATTRIB_SHA 152ATTRIB_SHA
163#endif 153#endif
164void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks) 154void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks)
165{ 155{
166 const __m128i mask = _mm_set_epi32(0x0c0d0e0f, 0x08090a0b, 0x04050607, 0x00010203); 156 const __m128i mask = _mm_set_epi32(0x0c0d0e0f, 0x08090a0b, 0x04050607, 0x00010203);
167 __m128i tmp; 157 __m128i tmp;
@@ -192,13 +182,13 @@ void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size
192 182
193 183
194 184
195 R16 ( 0, NNN, NNN, SM1, NNN, SM1, SM2, SM1, SM2 ); 185 R16 ( 0, NNN, NNN, SM1, NNN, SM1, SM2, SM1, SM2 )
196 R16 ( 1, SM1, SM2, SM1, SM2, SM1, SM2, SM1, SM2 ); 186 R16 ( 1, SM1, SM2, SM1, SM2, SM1, SM2, SM1, SM2 )
197 R16 ( 2, SM1, SM2, SM1, SM2, SM1, SM2, SM1, SM2 ); 187 R16 ( 2, SM1, SM2, SM1, SM2, SM1, SM2, SM1, SM2 )
198 R16 ( 3, SM1, SM2, NNN, SM2, NNN, NNN, NNN, NNN ); 188 R16 ( 3, SM1, SM2, NNN, SM2, NNN, NNN, NNN, NNN )
199 189
200 ADD_EPI32(state0, state0_save); 190 ADD_EPI32(state0, state0_save)
201 ADD_EPI32(state1, state1_save); 191 ADD_EPI32(state1, state1_save)
202 192
203 data += 64; 193 data += 64;
204 } 194 }
@@ -298,11 +288,11 @@ const UInt32 SHA256_K_ARRAY[64];
298 R4 ( (k)*4+3, m3, m0, m1, m2, OP6, OP7 ) \ 288 R4 ( (k)*4+3, m3, m0, m1, m2, OP6, OP7 ) \
299 289
300 290
301void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks); 291void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks);
302#ifdef ATTRIB_SHA 292#ifdef ATTRIB_SHA
303ATTRIB_SHA 293ATTRIB_SHA
304#endif 294#endif
305void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks) 295void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks)
306{ 296{
307 v128 state0, state1; 297 v128 state0, state1;
308 298
@@ -353,12 +343,12 @@ void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size
353// #include <stdlib.h> 343// #include <stdlib.h>
354 344
355// #include "Sha256.h" 345// #include "Sha256.h"
356void MY_FAST_CALL Sha256_UpdateBlocks(UInt32 state[8], const Byte *data, size_t numBlocks); 346void Z7_FASTCALL Sha256_UpdateBlocks(UInt32 state[8], const Byte *data, size_t numBlocks);
357 347
358#pragma message("Sha256 HW-SW stub was used") 348#pragma message("Sha256 HW-SW stub was used")
359 349
360void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks); 350void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks);
361void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks) 351void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks)
362{ 352{
363 Sha256_UpdateBlocks(state, data, numBlocks); 353 Sha256_UpdateBlocks(state, data, numBlocks);
364 /* 354 /*
@@ -371,3 +361,26 @@ void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size
371} 361}
372 362
373#endif 363#endif
364
365
366
367#undef K
368#undef RND2
369#undef RND2_0
370#undef RND2_1
371
372#undef MY_rev32_for_LE
373#undef NNN
374#undef LOAD_128
375#undef STORE_128
376#undef LOAD_SHUFFLE
377#undef SM1
378#undef SM2
379
380#undef NNN
381#undef R4
382#undef R16
383#undef PREPARE_STATE
384#undef USE_HW_SHA
385#undef ATTRIB_SHA
386#undef USE_VER_MIN