diff options
Diffstat (limited to 'C/Sha1Opt.c')
-rw-r--r-- | C/Sha1Opt.c | 132 |
1 files changed, 109 insertions, 23 deletions
diff --git a/C/Sha1Opt.c b/C/Sha1Opt.c index 27796aa..4e835f1 100644 --- a/C/Sha1Opt.c +++ b/C/Sha1Opt.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* Sha1Opt.c -- SHA-1 optimized code for SHA-1 hardware instructions | 1 | /* Sha1Opt.c -- SHA-1 optimized code for SHA-1 hardware instructions |
2 | 2023-04-02 : Igor Pavlov : Public domain */ | 2 | 2024-03-01 : Igor Pavlov : Public domain */ |
3 | 3 | ||
4 | #include "Precomp.h" | 4 | #include "Precomp.h" |
5 | #include "Compiler.h" | 5 | #include "Compiler.h" |
@@ -11,6 +11,8 @@ | |||
11 | #endif | 11 | #endif |
12 | #endif | 12 | #endif |
13 | 13 | ||
14 | // #define Z7_USE_HW_SHA_STUB // for debug | ||
15 | |||
14 | #ifdef MY_CPU_X86_OR_AMD64 | 16 | #ifdef MY_CPU_X86_OR_AMD64 |
15 | #if defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1600) // fix that check | 17 | #if defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1600) // fix that check |
16 | #define USE_HW_SHA | 18 | #define USE_HW_SHA |
@@ -32,9 +34,14 @@ | |||
32 | #endif | 34 | #endif |
33 | #if (_MSC_VER >= USE_VER_MIN) | 35 | #if (_MSC_VER >= USE_VER_MIN) |
34 | #define USE_HW_SHA | 36 | #define USE_HW_SHA |
37 | #else | ||
38 | #define Z7_USE_HW_SHA_STUB | ||
35 | #endif | 39 | #endif |
36 | #endif | 40 | #endif |
37 | // #endif // MY_CPU_X86_OR_AMD64 | 41 | // #endif // MY_CPU_X86_OR_AMD64 |
42 | #ifndef USE_HW_SHA | ||
43 | // #define Z7_USE_HW_SHA_STUB // for debug | ||
44 | #endif | ||
38 | 45 | ||
39 | #ifdef USE_HW_SHA | 46 | #ifdef USE_HW_SHA |
40 | 47 | ||
@@ -202,46 +209,124 @@ void Z7_FASTCALL Sha1_UpdateBlocks_HW(UInt32 state[5], const Byte *data, size_t | |||
202 | 209 | ||
203 | #endif // USE_HW_SHA | 210 | #endif // USE_HW_SHA |
204 | 211 | ||
205 | #elif defined(MY_CPU_ARM_OR_ARM64) | 212 | #elif defined(MY_CPU_ARM_OR_ARM64) && defined(MY_CPU_LE) \ |
206 | 213 | && (!defined(Z7_MSC_VER_ORIGINAL) || (_MSC_VER >= 1929) && (_MSC_FULL_VER >= 192930037)) | |
207 | #if defined(__clang__) | 214 | #if defined(__ARM_FEATURE_SHA2) \ |
208 | #if (__clang_major__ >= 8) // fix that check | 215 | || defined(__ARM_FEATURE_CRYPTO) |
216 | #define USE_HW_SHA | ||
217 | #else | ||
218 | #if defined(MY_CPU_ARM64) \ | ||
219 | || defined(__ARM_ARCH) && (__ARM_ARCH >= 4) \ | ||
220 | || defined(Z7_MSC_VER_ORIGINAL) | ||
221 | #if defined(__ARM_FP) && \ | ||
222 | ( defined(Z7_CLANG_VERSION) && (Z7_CLANG_VERSION >= 30800) \ | ||
223 | || defined(__GNUC__) && (__GNUC__ >= 6) \ | ||
224 | ) \ | ||
225 | || defined(Z7_MSC_VER_ORIGINAL) && (_MSC_VER >= 1910) | ||
226 | #if defined(MY_CPU_ARM64) \ | ||
227 | || !defined(Z7_CLANG_VERSION) \ | ||
228 | || defined(__ARM_NEON) && \ | ||
229 | (Z7_CLANG_VERSION < 170000 || \ | ||
230 | Z7_CLANG_VERSION > 170001) | ||
209 | #define USE_HW_SHA | 231 | #define USE_HW_SHA |
210 | #endif | 232 | #endif |
211 | #elif defined(__GNUC__) | ||
212 | #if (__GNUC__ >= 6) // fix that check | ||
213 | #define USE_HW_SHA | ||
214 | #endif | 233 | #endif |
215 | #elif defined(_MSC_VER) | ||
216 | #if _MSC_VER >= 1910 | ||
217 | #define USE_HW_SHA | ||
218 | #endif | 234 | #endif |
219 | #endif | 235 | #endif |
220 | 236 | ||
221 | #ifdef USE_HW_SHA | 237 | #ifdef USE_HW_SHA |
222 | 238 | ||
223 | // #pragma message("=== Sha1 HW === ") | 239 | // #pragma message("=== Sha1 HW === ") |
240 | // __ARM_FEATURE_CRYPTO macro is deprecated in favor of the finer grained feature macro __ARM_FEATURE_SHA2 | ||
224 | 241 | ||
225 | #if defined(__clang__) || defined(__GNUC__) | 242 | #if defined(__clang__) || defined(__GNUC__) |
243 | #if !defined(__ARM_FEATURE_SHA2) && \ | ||
244 | !defined(__ARM_FEATURE_CRYPTO) | ||
226 | #ifdef MY_CPU_ARM64 | 245 | #ifdef MY_CPU_ARM64 |
246 | #if defined(__clang__) | ||
247 | #define ATTRIB_SHA __attribute__((__target__("crypto"))) | ||
248 | #else | ||
227 | #define ATTRIB_SHA __attribute__((__target__("+crypto"))) | 249 | #define ATTRIB_SHA __attribute__((__target__("+crypto"))) |
250 | #endif | ||
228 | #else | 251 | #else |
252 | #if defined(__clang__) && (__clang_major__ >= 1) | ||
253 | #define ATTRIB_SHA __attribute__((__target__("armv8-a,sha2"))) | ||
254 | #else | ||
229 | #define ATTRIB_SHA __attribute__((__target__("fpu=crypto-neon-fp-armv8"))) | 255 | #define ATTRIB_SHA __attribute__((__target__("fpu=crypto-neon-fp-armv8"))) |
256 | #endif | ||
230 | #endif | 257 | #endif |
258 | #endif | ||
231 | #else | 259 | #else |
232 | // _MSC_VER | 260 | // _MSC_VER |
233 | // for arm32 | 261 | // for arm32 |
234 | #define _ARM_USE_NEW_NEON_INTRINSICS | 262 | #define _ARM_USE_NEW_NEON_INTRINSICS |
235 | #endif | 263 | #endif |
236 | 264 | ||
237 | #if defined(_MSC_VER) && defined(MY_CPU_ARM64) | 265 | |
266 | |||
267 | |||
268 | |||
269 | #if defined(Z7_MSC_VER_ORIGINAL) && defined(MY_CPU_ARM64) | ||
238 | #include <arm64_neon.h> | 270 | #include <arm64_neon.h> |
239 | #else | 271 | #else |
272 | |||
273 | |||
274 | |||
275 | |||
276 | |||
277 | |||
278 | |||
279 | |||
280 | |||
281 | #if defined(__clang__) && __clang_major__ < 16 | ||
282 | #if !defined(__ARM_FEATURE_SHA2) && \ | ||
283 | !defined(__ARM_FEATURE_CRYPTO) | ||
284 | // #pragma message("=== we set __ARM_FEATURE_CRYPTO 1 === ") | ||
285 | Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER | ||
286 | #define Z7_ARM_FEATURE_CRYPTO_WAS_SET 1 | ||
287 | // #if defined(__clang__) && __clang_major__ < 13 | ||
288 | #define __ARM_FEATURE_CRYPTO 1 | ||
289 | // #else | ||
290 | #define __ARM_FEATURE_SHA2 1 | ||
291 | // #endif | ||
292 | Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER | ||
293 | #endif | ||
294 | #endif // clang | ||
295 | |||
296 | #if defined(__clang__) | ||
297 | |||
298 | #if defined(__ARM_ARCH) && __ARM_ARCH < 8 | ||
299 | Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER | ||
300 | // #pragma message("#define __ARM_ARCH 8") | ||
301 | #undef __ARM_ARCH | ||
302 | #define __ARM_ARCH 8 | ||
303 | Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER | ||
304 | #endif | ||
305 | |||
306 | #endif // clang | ||
307 | |||
240 | #include <arm_neon.h> | 308 | #include <arm_neon.h> |
309 | |||
310 | #if defined(Z7_ARM_FEATURE_CRYPTO_WAS_SET) && \ | ||
311 | defined(__ARM_FEATURE_CRYPTO) && \ | ||
312 | defined(__ARM_FEATURE_SHA2) | ||
313 | Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER | ||
314 | #undef __ARM_FEATURE_CRYPTO | ||
315 | #undef __ARM_FEATURE_SHA2 | ||
316 | #undef Z7_ARM_FEATURE_CRYPTO_WAS_SET | ||
317 | Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER | ||
318 | // #pragma message("=== we undefine __ARM_FEATURE_CRYPTO === ") | ||
241 | #endif | 319 | #endif |
242 | 320 | ||
321 | #endif // Z7_MSC_VER_ORIGINAL | ||
322 | |||
243 | typedef uint32x4_t v128; | 323 | typedef uint32x4_t v128; |
244 | // typedef __n128 v128; // MSVC | 324 | // typedef __n128 v128; // MSVC |
325 | // the bug in clang 3.8.1: | ||
326 | // __builtin_neon_vgetq_lane_i32((int8x16_t)__s0, __p1); | ||
327 | #if defined(__clang__) && (__clang_major__ <= 9) | ||
328 | #pragma GCC diagnostic ignored "-Wvector-conversion" | ||
329 | #endif | ||
245 | 330 | ||
246 | #ifdef MY_CPU_BE | 331 | #ifdef MY_CPU_BE |
247 | #define MY_rev32_for_LE(x) | 332 | #define MY_rev32_for_LE(x) |
@@ -256,11 +341,11 @@ typedef uint32x4_t v128; | |||
256 | m = LOAD_128((data + (k) * 16)); \ | 341 | m = LOAD_128((data + (k) * 16)); \ |
257 | MY_rev32_for_LE(m); \ | 342 | MY_rev32_for_LE(m); \ |
258 | 343 | ||
259 | #define SU0(dest, src2, src3) dest = vsha1su0q_u32(dest, src2, src3); | 344 | #define SU0(dest, src2, src3) dest = vsha1su0q_u32(dest, src2, src3) |
260 | #define SU1(dest, src) dest = vsha1su1q_u32(dest, src); | 345 | #define SU1(dest, src) dest = vsha1su1q_u32(dest, src) |
261 | #define C(e) abcd = vsha1cq_u32(abcd, e, t); | 346 | #define C(e) abcd = vsha1cq_u32(abcd, e, t) |
262 | #define P(e) abcd = vsha1pq_u32(abcd, e, t); | 347 | #define P(e) abcd = vsha1pq_u32(abcd, e, t) |
263 | #define M(e) abcd = vsha1mq_u32(abcd, e, t); | 348 | #define M(e) abcd = vsha1mq_u32(abcd, e, t) |
264 | #define H(e) e = vsha1h_u32(vgetq_lane_u32(abcd, 0)) | 349 | #define H(e) e = vsha1h_u32(vgetq_lane_u32(abcd, 0)) |
265 | #define T(m, c) t = vaddq_u32(m, c) | 350 | #define T(m, c) t = vaddq_u32(m, c) |
266 | 351 | ||
@@ -337,16 +422,17 @@ void Z7_FASTCALL Sha1_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t | |||
337 | #endif // MY_CPU_ARM_OR_ARM64 | 422 | #endif // MY_CPU_ARM_OR_ARM64 |
338 | 423 | ||
339 | 424 | ||
340 | #ifndef USE_HW_SHA | 425 | #if !defined(USE_HW_SHA) && defined(Z7_USE_HW_SHA_STUB) |
341 | |||
342 | // #error Stop_Compiling_UNSUPPORTED_SHA | 426 | // #error Stop_Compiling_UNSUPPORTED_SHA |
343 | // #include <stdlib.h> | 427 | // #include <stdlib.h> |
344 | 428 | ||
345 | // #include "Sha1.h" | ||
346 | void Z7_FASTCALL Sha1_UpdateBlocks(UInt32 state[5], const Byte *data, size_t numBlocks); | ||
347 | 429 | ||
348 | #pragma message("Sha1 HW-SW stub was used") | ||
349 | 430 | ||
431 | // #include "Sha1.h" | ||
432 | // #if defined(_MSC_VER) | ||
433 | #pragma message("Sha1 HW-SW stub was used") | ||
434 | // #endif | ||
435 | void Z7_FASTCALL Sha1_UpdateBlocks (UInt32 state[5], const Byte *data, size_t numBlocks); | ||
350 | void Z7_FASTCALL Sha1_UpdateBlocks_HW(UInt32 state[5], const Byte *data, size_t numBlocks); | 436 | void Z7_FASTCALL Sha1_UpdateBlocks_HW(UInt32 state[5], const Byte *data, size_t numBlocks); |
351 | void Z7_FASTCALL Sha1_UpdateBlocks_HW(UInt32 state[5], const Byte *data, size_t numBlocks) | 437 | void Z7_FASTCALL Sha1_UpdateBlocks_HW(UInt32 state[5], const Byte *data, size_t numBlocks) |
352 | { | 438 | { |
@@ -359,7 +445,6 @@ void Z7_FASTCALL Sha1_UpdateBlocks_HW(UInt32 state[5], const Byte *data, size_t | |||
359 | return; | 445 | return; |
360 | */ | 446 | */ |
361 | } | 447 | } |
362 | |||
363 | #endif | 448 | #endif |
364 | 449 | ||
365 | #undef SU0 | 450 | #undef SU0 |
@@ -384,3 +469,4 @@ void Z7_FASTCALL Sha1_UpdateBlocks_HW(UInt32 state[5], const Byte *data, size_t | |||
384 | #undef USE_HW_SHA | 469 | #undef USE_HW_SHA |
385 | #undef ATTRIB_SHA | 470 | #undef ATTRIB_SHA |
386 | #undef USE_VER_MIN | 471 | #undef USE_VER_MIN |
472 | #undef Z7_USE_HW_SHA_STUB | ||