aboutsummaryrefslogtreecommitdiff
path: root/C/Blake2s.c
diff options
context:
space:
mode:
Diffstat (limited to 'C/Blake2s.c')
-rw-r--r--C/Blake2s.c43
1 files changed, 35 insertions, 8 deletions
diff --git a/C/Blake2s.c b/C/Blake2s.c
index 459e76b..abb907d 100644
--- a/C/Blake2s.c
+++ b/C/Blake2s.c
@@ -1,5 +1,5 @@
1/* Blake2s.c -- BLAKE2sp Hash 1/* Blake2s.c -- BLAKE2sp Hash
22024-01-29 : Igor Pavlov : Public domain 22024-05-18 : Igor Pavlov : Public domain
32015-2019 : Samuel Neves : original code : CC0 1.0 Universal (CC0 1.0). */ 32015-2019 : Samuel Neves : original code : CC0 1.0 Universal (CC0 1.0). */
4 4
5#include "Precomp.h" 5#include "Precomp.h"
@@ -12,6 +12,17 @@
12#include "Compiler.h" 12#include "Compiler.h"
13#include "CpuArch.h" 13#include "CpuArch.h"
14 14
15/*
16 if defined(__AVX512F__) && defined(__AVX512VL__)
17 {
18 we define Z7_BLAKE2S_USE_AVX512_ALWAYS,
19 but the compiler can use avx512 for any code.
20 }
21 else if defined(Z7_BLAKE2S_USE_AVX512_ALWAYS)
22 { we use avx512 only for sse* and avx* branches of code. }
23*/
24// #define Z7_BLAKE2S_USE_AVX512_ALWAYS // for debug
25
15#if defined(__SSE2__) 26#if defined(__SSE2__)
16 #define Z7_BLAKE2S_USE_VECTORS 27 #define Z7_BLAKE2S_USE_VECTORS
17#elif defined(MY_CPU_X86_OR_AMD64) 28#elif defined(MY_CPU_X86_OR_AMD64)
@@ -59,6 +70,9 @@
59#endif // SSSE3 70#endif // SSSE3
60 71
61#if defined(__GNUC__) || defined(__clang__) 72#if defined(__GNUC__) || defined(__clang__)
73#if defined(Z7_BLAKE2S_USE_AVX512_ALWAYS) && !(defined(__AVX512F__) && defined(__AVX512VL__))
74 #define BLAKE2S_ATTRIB_128BIT __attribute__((__target__("avx512vl,avx512f")))
75#else
62 #if defined(Z7_BLAKE2S_USE_SSE41) 76 #if defined(Z7_BLAKE2S_USE_SSE41)
63 #define BLAKE2S_ATTRIB_128BIT __attribute__((__target__("sse4.1"))) 77 #define BLAKE2S_ATTRIB_128BIT __attribute__((__target__("sse4.1")))
64 #elif defined(Z7_BLAKE2S_USE_SSSE3) 78 #elif defined(Z7_BLAKE2S_USE_SSSE3)
@@ -67,6 +81,7 @@
67 #define BLAKE2S_ATTRIB_128BIT __attribute__((__target__("sse2"))) 81 #define BLAKE2S_ATTRIB_128BIT __attribute__((__target__("sse2")))
68 #endif 82 #endif
69#endif 83#endif
84#endif
70 85
71 86
72#if defined(__AVX2__) 87#if defined(__AVX2__)
@@ -77,7 +92,11 @@
77 || defined(Z7_LLVM_CLANG_VERSION) && (Z7_LLVM_CLANG_VERSION >= 30100) 92 || defined(Z7_LLVM_CLANG_VERSION) && (Z7_LLVM_CLANG_VERSION >= 30100)
78 #define Z7_BLAKE2S_USE_AVX2 93 #define Z7_BLAKE2S_USE_AVX2
79 #ifdef Z7_BLAKE2S_USE_AVX2 94 #ifdef Z7_BLAKE2S_USE_AVX2
95#if defined(Z7_BLAKE2S_USE_AVX512_ALWAYS) && !(defined(__AVX512F__) && defined(__AVX512VL__))
96 #define BLAKE2S_ATTRIB_AVX2 __attribute__((__target__("avx512vl,avx512f")))
97#else
80 #define BLAKE2S_ATTRIB_AVX2 __attribute__((__target__("avx2"))) 98 #define BLAKE2S_ATTRIB_AVX2 __attribute__((__target__("avx2")))
99#endif
81 #endif 100 #endif
82 #elif defined(Z7_MSC_VER_ORIGINAL) && (Z7_MSC_VER_ORIGINAL >= 1800) \ 101 #elif defined(Z7_MSC_VER_ORIGINAL) && (Z7_MSC_VER_ORIGINAL >= 1800) \
83 || defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1400) 102 || defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1400)
@@ -107,7 +126,9 @@
107 126
108#if defined(__AVX512F__) && defined(__AVX512VL__) 127#if defined(__AVX512F__) && defined(__AVX512VL__)
109 // && defined(Z7_MSC_VER_ORIGINAL) && (Z7_MSC_VER_ORIGINAL > 1930) 128 // && defined(Z7_MSC_VER_ORIGINAL) && (Z7_MSC_VER_ORIGINAL > 1930)
129 #ifndef Z7_BLAKE2S_USE_AVX512_ALWAYS
110 #define Z7_BLAKE2S_USE_AVX512_ALWAYS 130 #define Z7_BLAKE2S_USE_AVX512_ALWAYS
131 #endif
111 // #pragma message ("=== Blake2s AVX512") 132 // #pragma message ("=== Blake2s AVX512")
112#endif 133#endif
113 134
@@ -1164,7 +1185,9 @@ Blake2sp_Final_V128_Fast(UInt32 *states)
1164#if 1 && defined(Z7_BLAKE2S_USE_AVX512_ALWAYS) 1185#if 1 && defined(Z7_BLAKE2S_USE_AVX512_ALWAYS)
1165 #define MM256_ROR_EPI32 _mm256_ror_epi32 1186 #define MM256_ROR_EPI32 _mm256_ror_epi32
1166 #define Z7_MM256_ROR_EPI32_IS_SUPPORTED 1187 #define Z7_MM256_ROR_EPI32_IS_SUPPORTED
1188#ifdef Z7_BLAKE2S_USE_AVX2_WAY2
1167 #define LOAD_ROTATE_CONSTS_256 1189 #define LOAD_ROTATE_CONSTS_256
1190#endif
1168#else 1191#else
1169#ifdef Z7_BLAKE2S_USE_AVX2_WAY_SLOW 1192#ifdef Z7_BLAKE2S_USE_AVX2_WAY_SLOW
1170#ifdef Z7_BLAKE2S_USE_AVX2_WAY2 1193#ifdef Z7_BLAKE2S_USE_AVX2_WAY2
@@ -2549,9 +2572,11 @@ void z7_Black2sp_Prepare(void)
2549 2572
2550#if defined(MY_CPU_X86_OR_AMD64) 2573#if defined(MY_CPU_X86_OR_AMD64)
2551 #if defined(Z7_BLAKE2S_USE_AVX512_ALWAYS) 2574 #if defined(Z7_BLAKE2S_USE_AVX512_ALWAYS)
2575 // optional check
2576 #if 0 || !(defined(__AVX512F__) && defined(__AVX512VL__))
2552 if (CPU_IsSupported_AVX512F_AVX512VL()) 2577 if (CPU_IsSupported_AVX512F_AVX512VL())
2553 #endif 2578 #endif
2554 #if defined(Z7_BLAKE2S_USE_SSE41) 2579 #elif defined(Z7_BLAKE2S_USE_SSE41)
2555 if (CPU_IsSupported_SSE41()) 2580 if (CPU_IsSupported_SSE41())
2556 #elif defined(Z7_BLAKE2S_USE_SSSE3) 2581 #elif defined(Z7_BLAKE2S_USE_SSSE3)
2557 if (CPU_IsSupported_SSSE3()) 2582 if (CPU_IsSupported_SSSE3())
@@ -2584,12 +2609,14 @@ void z7_Black2sp_Prepare(void)
2584 2609
2585#ifdef Z7_BLAKE2S_USE_AVX2 2610#ifdef Z7_BLAKE2S_USE_AVX2
2586#if defined(MY_CPU_X86_OR_AMD64) 2611#if defined(MY_CPU_X86_OR_AMD64)
2587 if ( 2612
2588 #if 0 && defined(Z7_BLAKE2S_USE_AVX512_ALWAYS) 2613 #if defined(Z7_BLAKE2S_USE_AVX512_ALWAYS)
2589 CPU_IsSupported_AVX512F_AVX512VL() && 2614 #if 0
2615 if (CPU_IsSupported_AVX512F_AVX512VL())
2616 #endif
2617 #else
2618 if (CPU_IsSupported_AVX2())
2590 #endif 2619 #endif
2591 CPU_IsSupported_AVX2()
2592 )
2593#endif 2620#endif
2594 { 2621 {
2595 // #pragma message ("=== Blake2s AVX2") 2622 // #pragma message ("=== Blake2s AVX2")