diff options
Diffstat (limited to 'C/Blake2s.c')
-rw-r--r-- | C/Blake2s.c | 43 |
1 files changed, 35 insertions, 8 deletions
diff --git a/C/Blake2s.c b/C/Blake2s.c index 459e76b..abb907d 100644 --- a/C/Blake2s.c +++ b/C/Blake2s.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* Blake2s.c -- BLAKE2sp Hash | 1 | /* Blake2s.c -- BLAKE2sp Hash |
2 | 2024-01-29 : Igor Pavlov : Public domain | 2 | 2024-05-18 : Igor Pavlov : Public domain |
3 | 2015-2019 : Samuel Neves : original code : CC0 1.0 Universal (CC0 1.0). */ | 3 | 2015-2019 : Samuel Neves : original code : CC0 1.0 Universal (CC0 1.0). */ |
4 | 4 | ||
5 | #include "Precomp.h" | 5 | #include "Precomp.h" |
@@ -12,6 +12,17 @@ | |||
12 | #include "Compiler.h" | 12 | #include "Compiler.h" |
13 | #include "CpuArch.h" | 13 | #include "CpuArch.h" |
14 | 14 | ||
15 | /* | ||
16 | if defined(__AVX512F__) && defined(__AVX512VL__) | ||
17 | { | ||
18 | we define Z7_BLAKE2S_USE_AVX512_ALWAYS, | ||
19 | but the compiler can use avx512 for any code. | ||
20 | } | ||
21 | else if defined(Z7_BLAKE2S_USE_AVX512_ALWAYS) | ||
22 | { we use avx512 only for sse* and avx* branches of code. } | ||
23 | */ | ||
24 | // #define Z7_BLAKE2S_USE_AVX512_ALWAYS // for debug | ||
25 | |||
15 | #if defined(__SSE2__) | 26 | #if defined(__SSE2__) |
16 | #define Z7_BLAKE2S_USE_VECTORS | 27 | #define Z7_BLAKE2S_USE_VECTORS |
17 | #elif defined(MY_CPU_X86_OR_AMD64) | 28 | #elif defined(MY_CPU_X86_OR_AMD64) |
@@ -59,6 +70,9 @@ | |||
59 | #endif // SSSE3 | 70 | #endif // SSSE3 |
60 | 71 | ||
61 | #if defined(__GNUC__) || defined(__clang__) | 72 | #if defined(__GNUC__) || defined(__clang__) |
73 | #if defined(Z7_BLAKE2S_USE_AVX512_ALWAYS) && !(defined(__AVX512F__) && defined(__AVX512VL__)) | ||
74 | #define BLAKE2S_ATTRIB_128BIT __attribute__((__target__("avx512vl,avx512f"))) | ||
75 | #else | ||
62 | #if defined(Z7_BLAKE2S_USE_SSE41) | 76 | #if defined(Z7_BLAKE2S_USE_SSE41) |
63 | #define BLAKE2S_ATTRIB_128BIT __attribute__((__target__("sse4.1"))) | 77 | #define BLAKE2S_ATTRIB_128BIT __attribute__((__target__("sse4.1"))) |
64 | #elif defined(Z7_BLAKE2S_USE_SSSE3) | 78 | #elif defined(Z7_BLAKE2S_USE_SSSE3) |
@@ -67,6 +81,7 @@ | |||
67 | #define BLAKE2S_ATTRIB_128BIT __attribute__((__target__("sse2"))) | 81 | #define BLAKE2S_ATTRIB_128BIT __attribute__((__target__("sse2"))) |
68 | #endif | 82 | #endif |
69 | #endif | 83 | #endif |
84 | #endif | ||
70 | 85 | ||
71 | 86 | ||
72 | #if defined(__AVX2__) | 87 | #if defined(__AVX2__) |
@@ -77,7 +92,11 @@ | |||
77 | || defined(Z7_LLVM_CLANG_VERSION) && (Z7_LLVM_CLANG_VERSION >= 30100) | 92 | || defined(Z7_LLVM_CLANG_VERSION) && (Z7_LLVM_CLANG_VERSION >= 30100) |
78 | #define Z7_BLAKE2S_USE_AVX2 | 93 | #define Z7_BLAKE2S_USE_AVX2 |
79 | #ifdef Z7_BLAKE2S_USE_AVX2 | 94 | #ifdef Z7_BLAKE2S_USE_AVX2 |
95 | #if defined(Z7_BLAKE2S_USE_AVX512_ALWAYS) && !(defined(__AVX512F__) && defined(__AVX512VL__)) | ||
96 | #define BLAKE2S_ATTRIB_AVX2 __attribute__((__target__("avx512vl,avx512f"))) | ||
97 | #else | ||
80 | #define BLAKE2S_ATTRIB_AVX2 __attribute__((__target__("avx2"))) | 98 | #define BLAKE2S_ATTRIB_AVX2 __attribute__((__target__("avx2"))) |
99 | #endif | ||
81 | #endif | 100 | #endif |
82 | #elif defined(Z7_MSC_VER_ORIGINAL) && (Z7_MSC_VER_ORIGINAL >= 1800) \ | 101 | #elif defined(Z7_MSC_VER_ORIGINAL) && (Z7_MSC_VER_ORIGINAL >= 1800) \ |
83 | || defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1400) | 102 | || defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1400) |
@@ -107,7 +126,9 @@ | |||
107 | 126 | ||
108 | #if defined(__AVX512F__) && defined(__AVX512VL__) | 127 | #if defined(__AVX512F__) && defined(__AVX512VL__) |
109 | // && defined(Z7_MSC_VER_ORIGINAL) && (Z7_MSC_VER_ORIGINAL > 1930) | 128 | // && defined(Z7_MSC_VER_ORIGINAL) && (Z7_MSC_VER_ORIGINAL > 1930) |
129 | #ifndef Z7_BLAKE2S_USE_AVX512_ALWAYS | ||
110 | #define Z7_BLAKE2S_USE_AVX512_ALWAYS | 130 | #define Z7_BLAKE2S_USE_AVX512_ALWAYS |
131 | #endif | ||
111 | // #pragma message ("=== Blake2s AVX512") | 132 | // #pragma message ("=== Blake2s AVX512") |
112 | #endif | 133 | #endif |
113 | 134 | ||
@@ -1164,7 +1185,9 @@ Blake2sp_Final_V128_Fast(UInt32 *states) | |||
1164 | #if 1 && defined(Z7_BLAKE2S_USE_AVX512_ALWAYS) | 1185 | #if 1 && defined(Z7_BLAKE2S_USE_AVX512_ALWAYS) |
1165 | #define MM256_ROR_EPI32 _mm256_ror_epi32 | 1186 | #define MM256_ROR_EPI32 _mm256_ror_epi32 |
1166 | #define Z7_MM256_ROR_EPI32_IS_SUPPORTED | 1187 | #define Z7_MM256_ROR_EPI32_IS_SUPPORTED |
1188 | #ifdef Z7_BLAKE2S_USE_AVX2_WAY2 | ||
1167 | #define LOAD_ROTATE_CONSTS_256 | 1189 | #define LOAD_ROTATE_CONSTS_256 |
1190 | #endif | ||
1168 | #else | 1191 | #else |
1169 | #ifdef Z7_BLAKE2S_USE_AVX2_WAY_SLOW | 1192 | #ifdef Z7_BLAKE2S_USE_AVX2_WAY_SLOW |
1170 | #ifdef Z7_BLAKE2S_USE_AVX2_WAY2 | 1193 | #ifdef Z7_BLAKE2S_USE_AVX2_WAY2 |
@@ -2549,9 +2572,11 @@ void z7_Black2sp_Prepare(void) | |||
2549 | 2572 | ||
2550 | #if defined(MY_CPU_X86_OR_AMD64) | 2573 | #if defined(MY_CPU_X86_OR_AMD64) |
2551 | #if defined(Z7_BLAKE2S_USE_AVX512_ALWAYS) | 2574 | #if defined(Z7_BLAKE2S_USE_AVX512_ALWAYS) |
2575 | // optional check | ||
2576 | #if 0 || !(defined(__AVX512F__) && defined(__AVX512VL__)) | ||
2552 | if (CPU_IsSupported_AVX512F_AVX512VL()) | 2577 | if (CPU_IsSupported_AVX512F_AVX512VL()) |
2553 | #endif | 2578 | #endif |
2554 | #if defined(Z7_BLAKE2S_USE_SSE41) | 2579 | #elif defined(Z7_BLAKE2S_USE_SSE41) |
2555 | if (CPU_IsSupported_SSE41()) | 2580 | if (CPU_IsSupported_SSE41()) |
2556 | #elif defined(Z7_BLAKE2S_USE_SSSE3) | 2581 | #elif defined(Z7_BLAKE2S_USE_SSSE3) |
2557 | if (CPU_IsSupported_SSSE3()) | 2582 | if (CPU_IsSupported_SSSE3()) |
@@ -2584,12 +2609,14 @@ void z7_Black2sp_Prepare(void) | |||
2584 | 2609 | ||
2585 | #ifdef Z7_BLAKE2S_USE_AVX2 | 2610 | #ifdef Z7_BLAKE2S_USE_AVX2 |
2586 | #if defined(MY_CPU_X86_OR_AMD64) | 2611 | #if defined(MY_CPU_X86_OR_AMD64) |
2587 | if ( | 2612 | |
2588 | #if 0 && defined(Z7_BLAKE2S_USE_AVX512_ALWAYS) | 2613 | #if defined(Z7_BLAKE2S_USE_AVX512_ALWAYS) |
2589 | CPU_IsSupported_AVX512F_AVX512VL() && | 2614 | #if 0 |
2615 | if (CPU_IsSupported_AVX512F_AVX512VL()) | ||
2616 | #endif | ||
2617 | #else | ||
2618 | if (CPU_IsSupported_AVX2()) | ||
2590 | #endif | 2619 | #endif |
2591 | CPU_IsSupported_AVX2() | ||
2592 | ) | ||
2593 | #endif | 2620 | #endif |
2594 | { | 2621 | { |
2595 | // #pragma message ("=== Blake2s AVX2") | 2622 | // #pragma message ("=== Blake2s AVX2") |