From fc662341e6f85da78ada0e443f6116b978f79f22 Mon Sep 17 00:00:00 2001
From: Igor Pavlov <87184205+ip7z@users.noreply.github.com>
Date: Tue, 14 May 2024 00:00:00 +0000
Subject: 24.05

---
 C/Sha256Opt.c | 127 +++++++++++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 104 insertions(+), 23 deletions(-)

(limited to 'C/Sha256Opt.c')

diff --git a/C/Sha256Opt.c b/C/Sha256Opt.c
index e4465e3..eb38166 100644
--- a/C/Sha256Opt.c
+++ b/C/Sha256Opt.c
@@ -1,5 +1,5 @@
 /* Sha256Opt.c -- SHA-256 optimized code for SHA-256 hardware instructions
-2023-04-02 : Igor Pavlov : Public domain */
+2024-03-01 : Igor Pavlov : Public domain */
 
 #include "Precomp.h"
 #include "Compiler.h"
@@ -11,6 +11,8 @@
 #endif
 #endif
 
+// #define Z7_USE_HW_SHA_STUB // for debug
+
 #ifdef MY_CPU_X86_OR_AMD64
   #if defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1600) // fix that check
       #define USE_HW_SHA
@@ -32,9 +34,14 @@
     #endif
     #if (_MSC_VER >= USE_VER_MIN)
       #define USE_HW_SHA
+    #else
+      #define Z7_USE_HW_SHA_STUB
     #endif
   #endif
 // #endif // MY_CPU_X86_OR_AMD64
+#ifndef USE_HW_SHA
+  // #define Z7_USE_HW_SHA_STUB // for debug
+#endif
 
 #ifdef USE_HW_SHA
 
@@ -202,19 +209,28 @@ void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_
 
 #endif // USE_HW_SHA
 
-#elif defined(MY_CPU_ARM_OR_ARM64)
-
-  #if defined(__clang__)
-    #if (__clang_major__ >= 8) // fix that check
+#elif defined(MY_CPU_ARM_OR_ARM64) && defined(MY_CPU_LE)
+  
+  #if   defined(__ARM_FEATURE_SHA2) \
+     || defined(__ARM_FEATURE_CRYPTO)
+    #define USE_HW_SHA
+  #else
+    #if  defined(MY_CPU_ARM64) \
+      || defined(__ARM_ARCH) && (__ARM_ARCH >= 4) \
+      || defined(Z7_MSC_VER_ORIGINAL)
+    #if  defined(__ARM_FP) && \
+          (   defined(Z7_CLANG_VERSION) && (Z7_CLANG_VERSION >= 30800) \
+           || defined(__GNUC__) && (__GNUC__ >= 6) \
+          ) \
+      || defined(Z7_MSC_VER_ORIGINAL) && (_MSC_VER >= 1910)
+    #if  defined(MY_CPU_ARM64) \
+      || !defined(Z7_CLANG_VERSION) \
+      || defined(__ARM_NEON) && \
+          (Z7_CLANG_VERSION < 170000 || \
+           Z7_CLANG_VERSION > 170001)
       #define USE_HW_SHA
     #endif
-  #elif defined(__GNUC__)
-    #if (__GNUC__ >= 6) // fix that check
-      #define USE_HW_SHA
     #endif
-  #elif defined(_MSC_VER)
-    #if _MSC_VER >= 1910
-      #define USE_HW_SHA
     #endif
   #endif
 
@@ -222,24 +238,88 @@ void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_
 
 // #pragma message("=== Sha256 HW === ")
 
+
 #if defined(__clang__) || defined(__GNUC__)
+#if !defined(__ARM_FEATURE_SHA2) && \
+    !defined(__ARM_FEATURE_CRYPTO)
   #ifdef MY_CPU_ARM64
+#if defined(__clang__)
+    #define ATTRIB_SHA __attribute__((__target__("crypto")))
+#else
     #define ATTRIB_SHA __attribute__((__target__("+crypto")))
+#endif
   #else
+#if defined(__clang__) && (__clang_major__ >= 1)
+    #define ATTRIB_SHA __attribute__((__target__("armv8-a,sha2")))
+#else
     #define ATTRIB_SHA __attribute__((__target__("fpu=crypto-neon-fp-armv8")))
+#endif
   #endif
+#endif
 #else
   // _MSC_VER
   // for arm32
   #define _ARM_USE_NEW_NEON_INTRINSICS
 #endif
 
-#if defined(_MSC_VER) && defined(MY_CPU_ARM64)
+
+
+
+
+#if defined(Z7_MSC_VER_ORIGINAL) && defined(MY_CPU_ARM64)
 #include <arm64_neon.h>
 #else
+
+
+
+
+
+
+
+
+
+#if defined(__clang__) && __clang_major__ < 16
+#if !defined(__ARM_FEATURE_SHA2) && \
+    !defined(__ARM_FEATURE_CRYPTO)
+//     #pragma message("=== we set __ARM_FEATURE_CRYPTO 1 === ")
+    Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER
+    #define Z7_ARM_FEATURE_CRYPTO_WAS_SET 1
+// #if defined(__clang__) && __clang_major__ < 13
+    #define __ARM_FEATURE_CRYPTO 1
+// #else
+    #define __ARM_FEATURE_SHA2 1
+// #endif
+    Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER
+#endif
+#endif // clang
+
+#if defined(__clang__)
+
+#if defined(__ARM_ARCH) && __ARM_ARCH < 8
+    Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER
+//    #pragma message("#define __ARM_ARCH 8")
+    #undef  __ARM_ARCH
+    #define __ARM_ARCH 8
+    Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER
+#endif
+
+#endif // clang
+
 #include <arm_neon.h>
+
+#if defined(Z7_ARM_FEATURE_CRYPTO_WAS_SET) && \
+    defined(__ARM_FEATURE_CRYPTO) && \
+    defined(__ARM_FEATURE_SHA2)
+Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER
+    #undef __ARM_FEATURE_CRYPTO
+    #undef __ARM_FEATURE_SHA2
+    #undef Z7_ARM_FEATURE_CRYPTO_WAS_SET
+Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER
+//    #pragma message("=== we undefine __ARM_FEATURE_CRYPTO === ")
 #endif
 
+#endif // Z7_MSC_VER_ORIGINAL
+
 typedef uint32x4_t v128;
 // typedef __n128 v128; // MSVC
 
@@ -316,10 +396,10 @@ void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_
     LOAD_SHUFFLE (m2, 2)
     LOAD_SHUFFLE (m3, 3)
 
-    R16 ( 0, NNN, NNN, SM1, NNN, SM1, SM2, SM1, SM2 );
-    R16 ( 1, SM1, SM2, SM1, SM2, SM1, SM2, SM1, SM2 );
-    R16 ( 2, SM1, SM2, SM1, SM2, SM1, SM2, SM1, SM2 );
-    R16 ( 3, SM1, SM2, NNN, SM2, NNN, NNN, NNN, NNN );
+    R16 ( 0, NNN, NNN, SM1, NNN, SM1, SM2, SM1, SM2 )
+    R16 ( 1, SM1, SM2, SM1, SM2, SM1, SM2, SM1, SM2 )
+    R16 ( 2, SM1, SM2, SM1, SM2, SM1, SM2, SM1, SM2 )
+    R16 ( 3, SM1, SM2, NNN, SM2, NNN, NNN, NNN, NNN )
     
     state0 = vaddq_u32(state0, state0_save);
     state1 = vaddq_u32(state1, state1_save);
@@ -337,16 +417,17 @@ void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_
 #endif // MY_CPU_ARM_OR_ARM64
 
 
-#ifndef USE_HW_SHA
-
+#if !defined(USE_HW_SHA) && defined(Z7_USE_HW_SHA_STUB)
 // #error Stop_Compiling_UNSUPPORTED_SHA
 // #include <stdlib.h>
-
+// We can compile this file with another C compiler,
+// or we can compile asm version.
+// So we can generate real code instead of this stub function.
 // #include "Sha256.h"
-void Z7_FASTCALL Sha256_UpdateBlocks(UInt32 state[8], const Byte *data, size_t numBlocks);
-
+// #if defined(_MSC_VER)
 #pragma message("Sha256 HW-SW stub was used")
-
+// #endif
+void Z7_FASTCALL Sha256_UpdateBlocks   (UInt32 state[8], const Byte *data, size_t numBlocks);
 void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks);
 void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks)
 {
@@ -359,7 +440,6 @@ void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_
   return;
   */
 }
-
 #endif
 
 
@@ -384,3 +464,4 @@ void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_
 #undef USE_HW_SHA
 #undef ATTRIB_SHA
 #undef USE_VER_MIN
+#undef Z7_USE_HW_SHA_STUB
-- 
cgit v1.2.3-55-g6feb