aboutsummaryrefslogtreecommitdiff
path: root/C/Sha1Opt.c
diff options
context:
space:
mode:
Diffstat (limited to 'C/Sha1Opt.c')
-rw-r--r--C/Sha1Opt.c132
1 files changed, 109 insertions, 23 deletions
diff --git a/C/Sha1Opt.c b/C/Sha1Opt.c
index 27796aa..4e835f1 100644
--- a/C/Sha1Opt.c
+++ b/C/Sha1Opt.c
@@ -1,5 +1,5 @@
1/* Sha1Opt.c -- SHA-1 optimized code for SHA-1 hardware instructions 1/* Sha1Opt.c -- SHA-1 optimized code for SHA-1 hardware instructions
22023-04-02 : Igor Pavlov : Public domain */ 22024-03-01 : Igor Pavlov : Public domain */
3 3
4#include "Precomp.h" 4#include "Precomp.h"
5#include "Compiler.h" 5#include "Compiler.h"
@@ -11,6 +11,8 @@
11#endif 11#endif
12#endif 12#endif
13 13
14// #define Z7_USE_HW_SHA_STUB // for debug
15
14#ifdef MY_CPU_X86_OR_AMD64 16#ifdef MY_CPU_X86_OR_AMD64
15 #if defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1600) // fix that check 17 #if defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1600) // fix that check
16 #define USE_HW_SHA 18 #define USE_HW_SHA
@@ -32,9 +34,14 @@
32 #endif 34 #endif
33 #if (_MSC_VER >= USE_VER_MIN) 35 #if (_MSC_VER >= USE_VER_MIN)
34 #define USE_HW_SHA 36 #define USE_HW_SHA
37 #else
38 #define Z7_USE_HW_SHA_STUB
35 #endif 39 #endif
36 #endif 40 #endif
37// #endif // MY_CPU_X86_OR_AMD64 41// #endif // MY_CPU_X86_OR_AMD64
42#ifndef USE_HW_SHA
43 // #define Z7_USE_HW_SHA_STUB // for debug
44#endif
38 45
39#ifdef USE_HW_SHA 46#ifdef USE_HW_SHA
40 47
@@ -202,46 +209,124 @@ void Z7_FASTCALL Sha1_UpdateBlocks_HW(UInt32 state[5], const Byte *data, size_t
202 209
203#endif // USE_HW_SHA 210#endif // USE_HW_SHA
204 211
205#elif defined(MY_CPU_ARM_OR_ARM64) 212#elif defined(MY_CPU_ARM_OR_ARM64) && defined(MY_CPU_LE) \
206 213 && (!defined(Z7_MSC_VER_ORIGINAL) || (_MSC_VER >= 1929) && (_MSC_FULL_VER >= 192930037))
207 #if defined(__clang__) 214 #if defined(__ARM_FEATURE_SHA2) \
208 #if (__clang_major__ >= 8) // fix that check 215 || defined(__ARM_FEATURE_CRYPTO)
216 #define USE_HW_SHA
217 #else
218 #if defined(MY_CPU_ARM64) \
219 || defined(__ARM_ARCH) && (__ARM_ARCH >= 4) \
220 || defined(Z7_MSC_VER_ORIGINAL)
221 #if defined(__ARM_FP) && \
222 ( defined(Z7_CLANG_VERSION) && (Z7_CLANG_VERSION >= 30800) \
223 || defined(__GNUC__) && (__GNUC__ >= 6) \
224 ) \
225 || defined(Z7_MSC_VER_ORIGINAL) && (_MSC_VER >= 1910)
226 #if defined(MY_CPU_ARM64) \
227 || !defined(Z7_CLANG_VERSION) \
228 || defined(__ARM_NEON) && \
229 (Z7_CLANG_VERSION < 170000 || \
230 Z7_CLANG_VERSION > 170001)
209 #define USE_HW_SHA 231 #define USE_HW_SHA
210 #endif 232 #endif
211 #elif defined(__GNUC__)
212 #if (__GNUC__ >= 6) // fix that check
213 #define USE_HW_SHA
214 #endif 233 #endif
215 #elif defined(_MSC_VER)
216 #if _MSC_VER >= 1910
217 #define USE_HW_SHA
218 #endif 234 #endif
219 #endif 235 #endif
220 236
221#ifdef USE_HW_SHA 237#ifdef USE_HW_SHA
222 238
223// #pragma message("=== Sha1 HW === ") 239// #pragma message("=== Sha1 HW === ")
240// __ARM_FEATURE_CRYPTO macro is deprecated in favor of the finer grained feature macro __ARM_FEATURE_SHA2
224 241
225#if defined(__clang__) || defined(__GNUC__) 242#if defined(__clang__) || defined(__GNUC__)
243#if !defined(__ARM_FEATURE_SHA2) && \
244 !defined(__ARM_FEATURE_CRYPTO)
226 #ifdef MY_CPU_ARM64 245 #ifdef MY_CPU_ARM64
246#if defined(__clang__)
247 #define ATTRIB_SHA __attribute__((__target__("crypto")))
248#else
227 #define ATTRIB_SHA __attribute__((__target__("+crypto"))) 249 #define ATTRIB_SHA __attribute__((__target__("+crypto")))
250#endif
228 #else 251 #else
252#if defined(__clang__) && (__clang_major__ >= 1)
253 #define ATTRIB_SHA __attribute__((__target__("armv8-a,sha2")))
254#else
229 #define ATTRIB_SHA __attribute__((__target__("fpu=crypto-neon-fp-armv8"))) 255 #define ATTRIB_SHA __attribute__((__target__("fpu=crypto-neon-fp-armv8")))
256#endif
230 #endif 257 #endif
258#endif
231#else 259#else
232 // _MSC_VER 260 // _MSC_VER
233 // for arm32 261 // for arm32
234 #define _ARM_USE_NEW_NEON_INTRINSICS 262 #define _ARM_USE_NEW_NEON_INTRINSICS
235#endif 263#endif
236 264
237#if defined(_MSC_VER) && defined(MY_CPU_ARM64) 265
266
267
268
269#if defined(Z7_MSC_VER_ORIGINAL) && defined(MY_CPU_ARM64)
238#include <arm64_neon.h> 270#include <arm64_neon.h>
239#else 271#else
272
273
274
275
276
277
278
279
280
281#if defined(__clang__) && __clang_major__ < 16
282#if !defined(__ARM_FEATURE_SHA2) && \
283 !defined(__ARM_FEATURE_CRYPTO)
284// #pragma message("=== we set __ARM_FEATURE_CRYPTO 1 === ")
285 Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER
286 #define Z7_ARM_FEATURE_CRYPTO_WAS_SET 1
287// #if defined(__clang__) && __clang_major__ < 13
288 #define __ARM_FEATURE_CRYPTO 1
289// #else
290 #define __ARM_FEATURE_SHA2 1
291// #endif
292 Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER
293#endif
294#endif // clang
295
296#if defined(__clang__)
297
298#if defined(__ARM_ARCH) && __ARM_ARCH < 8
299 Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER
300// #pragma message("#define __ARM_ARCH 8")
301 #undef __ARM_ARCH
302 #define __ARM_ARCH 8
303 Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER
304#endif
305
306#endif // clang
307
240#include <arm_neon.h> 308#include <arm_neon.h>
309
310#if defined(Z7_ARM_FEATURE_CRYPTO_WAS_SET) && \
311 defined(__ARM_FEATURE_CRYPTO) && \
312 defined(__ARM_FEATURE_SHA2)
313Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER
314 #undef __ARM_FEATURE_CRYPTO
315 #undef __ARM_FEATURE_SHA2
316 #undef Z7_ARM_FEATURE_CRYPTO_WAS_SET
317Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER
318// #pragma message("=== we undefine __ARM_FEATURE_CRYPTO === ")
241#endif 319#endif
242 320
321#endif // Z7_MSC_VER_ORIGINAL
322
243typedef uint32x4_t v128; 323typedef uint32x4_t v128;
244// typedef __n128 v128; // MSVC 324// typedef __n128 v128; // MSVC
325// the bug in clang 3.8.1:
326// __builtin_neon_vgetq_lane_i32((int8x16_t)__s0, __p1);
327#if defined(__clang__) && (__clang_major__ <= 9)
328#pragma GCC diagnostic ignored "-Wvector-conversion"
329#endif
245 330
246#ifdef MY_CPU_BE 331#ifdef MY_CPU_BE
247 #define MY_rev32_for_LE(x) 332 #define MY_rev32_for_LE(x)
@@ -256,11 +341,11 @@ typedef uint32x4_t v128;
256 m = LOAD_128((data + (k) * 16)); \ 341 m = LOAD_128((data + (k) * 16)); \
257 MY_rev32_for_LE(m); \ 342 MY_rev32_for_LE(m); \
258 343
259#define SU0(dest, src2, src3) dest = vsha1su0q_u32(dest, src2, src3); 344#define SU0(dest, src2, src3) dest = vsha1su0q_u32(dest, src2, src3)
260#define SU1(dest, src) dest = vsha1su1q_u32(dest, src); 345#define SU1(dest, src) dest = vsha1su1q_u32(dest, src)
261#define C(e) abcd = vsha1cq_u32(abcd, e, t); 346#define C(e) abcd = vsha1cq_u32(abcd, e, t)
262#define P(e) abcd = vsha1pq_u32(abcd, e, t); 347#define P(e) abcd = vsha1pq_u32(abcd, e, t)
263#define M(e) abcd = vsha1mq_u32(abcd, e, t); 348#define M(e) abcd = vsha1mq_u32(abcd, e, t)
264#define H(e) e = vsha1h_u32(vgetq_lane_u32(abcd, 0)) 349#define H(e) e = vsha1h_u32(vgetq_lane_u32(abcd, 0))
265#define T(m, c) t = vaddq_u32(m, c) 350#define T(m, c) t = vaddq_u32(m, c)
266 351
@@ -337,16 +422,17 @@ void Z7_FASTCALL Sha1_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t
337#endif // MY_CPU_ARM_OR_ARM64 422#endif // MY_CPU_ARM_OR_ARM64
338 423
339 424
340#ifndef USE_HW_SHA 425#if !defined(USE_HW_SHA) && defined(Z7_USE_HW_SHA_STUB)
341
342// #error Stop_Compiling_UNSUPPORTED_SHA 426// #error Stop_Compiling_UNSUPPORTED_SHA
343// #include <stdlib.h> 427// #include <stdlib.h>
344 428
345// #include "Sha1.h"
346void Z7_FASTCALL Sha1_UpdateBlocks(UInt32 state[5], const Byte *data, size_t numBlocks);
347 429
348#pragma message("Sha1 HW-SW stub was used")
349 430
431// #include "Sha1.h"
432// #if defined(_MSC_VER)
433#pragma message("Sha1 HW-SW stub was used")
434// #endif
435void Z7_FASTCALL Sha1_UpdateBlocks (UInt32 state[5], const Byte *data, size_t numBlocks);
350void Z7_FASTCALL Sha1_UpdateBlocks_HW(UInt32 state[5], const Byte *data, size_t numBlocks); 436void Z7_FASTCALL Sha1_UpdateBlocks_HW(UInt32 state[5], const Byte *data, size_t numBlocks);
351void Z7_FASTCALL Sha1_UpdateBlocks_HW(UInt32 state[5], const Byte *data, size_t numBlocks) 437void Z7_FASTCALL Sha1_UpdateBlocks_HW(UInt32 state[5], const Byte *data, size_t numBlocks)
352{ 438{
@@ -359,7 +445,6 @@ void Z7_FASTCALL Sha1_UpdateBlocks_HW(UInt32 state[5], const Byte *data, size_t
359 return; 445 return;
360 */ 446 */
361} 447}
362
363#endif 448#endif
364 449
365#undef SU0 450#undef SU0
@@ -384,3 +469,4 @@ void Z7_FASTCALL Sha1_UpdateBlocks_HW(UInt32 state[5], const Byte *data, size_t
384#undef USE_HW_SHA 469#undef USE_HW_SHA
385#undef ATTRIB_SHA 470#undef ATTRIB_SHA
386#undef USE_VER_MIN 471#undef USE_VER_MIN
472#undef Z7_USE_HW_SHA_STUB