aboutsummaryrefslogtreecommitdiff
path: root/C/CpuArch.c
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--C/CpuArch.c109
1 files changed, 95 insertions, 14 deletions
diff --git a/C/CpuArch.c b/C/CpuArch.c
index e792f39..6e02551 100644
--- a/C/CpuArch.c
+++ b/C/CpuArch.c
@@ -1,5 +1,5 @@
1/* CpuArch.c -- CPU specific code 1/* CpuArch.c -- CPU specific code
22024-07-04 : Igor Pavlov : Public domain */ 2Igor Pavlov : Public domain */
3 3
4#include "Precomp.h" 4#include "Precomp.h"
5 5
@@ -17,7 +17,7 @@
17/* 17/*
18 cpuid instruction supports (subFunction) parameter in ECX, 18 cpuid instruction supports (subFunction) parameter in ECX,
19 that is used only with some specific (function) parameter values. 19 that is used only with some specific (function) parameter values.
20 But we always use only (subFunction==0). 20 most functions use only (subFunction==0).
21*/ 21*/
22/* 22/*
23 __cpuid(): MSVC and GCC/CLANG use same function/macro name 23 __cpuid(): MSVC and GCC/CLANG use same function/macro name
@@ -49,43 +49,49 @@
49#if defined(MY_CPU_AMD64) && defined(__PIC__) \ 49#if defined(MY_CPU_AMD64) && defined(__PIC__) \
50 && ((defined (__GNUC__) && (__GNUC__ < 5)) || defined(__clang__)) 50 && ((defined (__GNUC__) && (__GNUC__ < 5)) || defined(__clang__))
51 51
52#define x86_cpuid_MACRO(p, func) { \ 52 /* "=&r" selects free register. It can select even rbx, if that register is free.
53 "=&D" for (RDI) also works, but the code can be larger with "=&D"
54 "2"(subFun) : 2 is (zero-based) index in the output constraint list "=c" (ECX). */
55
56#define x86_cpuid_MACRO_2(p, func, subFunc) { \
53 __asm__ __volatile__ ( \ 57 __asm__ __volatile__ ( \
54 ASM_LN "mov %%rbx, %q1" \ 58 ASM_LN "mov %%rbx, %q1" \
55 ASM_LN "cpuid" \ 59 ASM_LN "cpuid" \
56 ASM_LN "xchg %%rbx, %q1" \ 60 ASM_LN "xchg %%rbx, %q1" \
57 : "=a" ((p)[0]), "=&r" ((p)[1]), "=c" ((p)[2]), "=d" ((p)[3]) : "0" (func), "2"(0)); } 61 : "=a" ((p)[0]), "=&r" ((p)[1]), "=c" ((p)[2]), "=d" ((p)[3]) : "0" (func), "2"(subFunc)); }
58
59 /* "=&r" selects free register. It can select even rbx, if that register is free.
60 "=&D" for (RDI) also works, but the code can be larger with "=&D"
61 "2"(0) means (subFunction = 0),
62 2 is (zero-based) index in the output constraint list "=c" (ECX). */
63 62
64#elif defined(MY_CPU_X86) && defined(__PIC__) \ 63#elif defined(MY_CPU_X86) && defined(__PIC__) \
65 && ((defined (__GNUC__) && (__GNUC__ < 5)) || defined(__clang__)) 64 && ((defined (__GNUC__) && (__GNUC__ < 5)) || defined(__clang__))
66 65
67#define x86_cpuid_MACRO(p, func) { \ 66#define x86_cpuid_MACRO_2(p, func, subFunc) { \
68 __asm__ __volatile__ ( \ 67 __asm__ __volatile__ ( \
69 ASM_LN "mov %%ebx, %k1" \ 68 ASM_LN "mov %%ebx, %k1" \
70 ASM_LN "cpuid" \ 69 ASM_LN "cpuid" \
71 ASM_LN "xchg %%ebx, %k1" \ 70 ASM_LN "xchg %%ebx, %k1" \
72 : "=a" ((p)[0]), "=&r" ((p)[1]), "=c" ((p)[2]), "=d" ((p)[3]) : "0" (func), "2"(0)); } 71 : "=a" ((p)[0]), "=&r" ((p)[1]), "=c" ((p)[2]), "=d" ((p)[3]) : "0" (func), "2"(subFunc)); }
73 72
74#else 73#else
75 74
76#define x86_cpuid_MACRO(p, func) { \ 75#define x86_cpuid_MACRO_2(p, func, subFunc) { \
77 __asm__ __volatile__ ( \ 76 __asm__ __volatile__ ( \
78 ASM_LN "cpuid" \ 77 ASM_LN "cpuid" \
79 : "=a" ((p)[0]), "=b" ((p)[1]), "=c" ((p)[2]), "=d" ((p)[3]) : "0" (func), "2"(0)); } 78 : "=a" ((p)[0]), "=b" ((p)[1]), "=c" ((p)[2]), "=d" ((p)[3]) : "0" (func), "2"(subFunc)); }
80 79
81#endif 80#endif
82 81
82#define x86_cpuid_MACRO(p, func) x86_cpuid_MACRO_2(p, func, 0)
83 83
84void Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func) 84void Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func)
85{ 85{
86 x86_cpuid_MACRO(p, func) 86 x86_cpuid_MACRO(p, func)
87} 87}
88 88
89static
90void Z7_FASTCALL z7_x86_cpuid_subFunc(UInt32 p[4], UInt32 func, UInt32 subFunc)
91{
92 x86_cpuid_MACRO_2(p, func, subFunc)
93}
94
89 95
90Z7_NO_INLINE 96Z7_NO_INLINE
91UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void) 97UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void)
@@ -205,11 +211,39 @@ void __declspec(naked) Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func)
205 __asm ret 0 211 __asm ret 0
206} 212}
207 213
214static
215void __declspec(naked) Z7_FASTCALL z7_x86_cpuid_subFunc(UInt32 p[4], UInt32 func, UInt32 subFunc)
216{
217 UNUSED_VAR(p)
218 UNUSED_VAR(func)
219 UNUSED_VAR(subFunc)
220 __asm push ebx
221 __asm push edi
222 __asm mov edi, ecx // p
223 __asm mov eax, edx // func
224 __asm mov ecx, [esp + 12] // subFunc
225 __asm cpuid
226 __asm mov [edi ], eax
227 __asm mov [edi + 4], ebx
228 __asm mov [edi + 8], ecx
229 __asm mov [edi + 12], edx
230 __asm pop edi
231 __asm pop ebx
232 __asm ret 4
233}
234
208#else // MY_CPU_AMD64 235#else // MY_CPU_AMD64
209 236
210 #if _MSC_VER >= 1600 237 #if _MSC_VER >= 1600
211 #include <intrin.h> 238 #include <intrin.h>
212 #define MY_cpuidex __cpuidex 239 #define MY_cpuidex __cpuidex
240
241static
242void Z7_FASTCALL z7_x86_cpuid_subFunc(UInt32 p[4], UInt32 func, UInt32 subFunc)
243{
244 __cpuidex((int *)p, func, subFunc);
245}
246
213 #else 247 #else
214/* 248/*
215 __cpuid (func == (0 or 7)) requires subfunction number in ECX. 249 __cpuid (func == (0 or 7)) requires subfunction number in ECX.
@@ -219,7 +253,7 @@ void __declspec(naked) Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func)
219 We still can use __cpuid for low (func) values that don't require ECX, 253 We still can use __cpuid for low (func) values that don't require ECX,
220 but __cpuid() in old MSVC will be incorrect for some func values: (func == 7). 254 but __cpuid() in old MSVC will be incorrect for some func values: (func == 7).
221 So here we use the hack for old MSVC to send (subFunction) in ECX register to cpuid instruction, 255 So here we use the hack for old MSVC to send (subFunction) in ECX register to cpuid instruction,
222 where ECX value is first parameter for FASTCALL / NO_INLINE func, 256 where ECX value is first parameter for FASTCALL / NO_INLINE func.
223 So the caller of MY_cpuidex_HACK() sets ECX as subFunction, and 257 So the caller of MY_cpuidex_HACK() sets ECX as subFunction, and
224 old MSVC for __cpuid() doesn't change ECX and cpuid instruction gets (subFunction) value. 258 old MSVC for __cpuid() doesn't change ECX and cpuid instruction gets (subFunction) value.
225 259
@@ -233,6 +267,11 @@ Z7_NO_INLINE void Z7_FASTCALL MY_cpuidex_HACK(Int32 subFunction, Int32 func, Int
233} 267}
234 #define MY_cpuidex(info, func, func2) MY_cpuidex_HACK(func2, func, info) 268 #define MY_cpuidex(info, func, func2) MY_cpuidex_HACK(func2, func, info)
235 #pragma message("======== MY_cpuidex_HACK WAS USED ========") 269 #pragma message("======== MY_cpuidex_HACK WAS USED ========")
270static
271void Z7_FASTCALL z7_x86_cpuid_subFunc(UInt32 p[4], UInt32 func, UInt32 subFunc)
272{
273 MY_cpuidex_HACK(subFunc, func, (Int32 *)p);
274}
236 #endif // _MSC_VER >= 1600 275 #endif // _MSC_VER >= 1600
237 276
238#if !defined(MY_CPU_AMD64) 277#if !defined(MY_CPU_AMD64)
@@ -445,6 +484,23 @@ BoolInt CPU_IsSupported_SHA(void)
445 } 484 }
446} 485}
447 486
487
488BoolInt CPU_IsSupported_SHA512(void)
489{
490 if (!CPU_IsSupported_AVX2()) return False; // maybe CPU_IsSupported_AVX() is enough here
491
492 if (z7_x86_cpuid_GetMaxFunc() < 7)
493 return False;
494 {
495 UInt32 d[4];
496 z7_x86_cpuid_subFunc(d, 7, 0);
497 if (d[0] < 1) // d[0] - is max supported subleaf value
498 return False;
499 z7_x86_cpuid_subFunc(d, 7, 1);
500 return (BoolInt)(d[0]) & 1;
501 }
502}
503
448/* 504/*
449MSVC: _xgetbv() intrinsic is available since VS2010SP1. 505MSVC: _xgetbv() intrinsic is available since VS2010SP1.
450 MSVC also defines (_XCR_XFEATURE_ENABLED_MASK) macro in 506 MSVC also defines (_XCR_XFEATURE_ENABLED_MASK) macro in
@@ -776,6 +832,18 @@ BoolInt CPU_IsSupported_NEON(void)
776 return z7_sysctlbyname_Get_BoolInt("hw.optional.neon"); 832 return z7_sysctlbyname_Get_BoolInt("hw.optional.neon");
777} 833}
778 834
835BoolInt CPU_IsSupported_SHA512(void)
836{
837 return z7_sysctlbyname_Get_BoolInt("hw.optional.armv8_2_sha512");
838}
839
840/*
841BoolInt CPU_IsSupported_SHA3(void)
842{
843 return z7_sysctlbyname_Get_BoolInt("hw.optional.armv8_2_sha3");
844}
845*/
846
779#ifdef MY_CPU_ARM64 847#ifdef MY_CPU_ARM64
780#define APPLE_CRYPTO_SUPPORT_VAL 1 848#define APPLE_CRYPTO_SUPPORT_VAL 1
781#else 849#else
@@ -860,6 +928,19 @@ MY_HWCAP_CHECK_FUNC (CRC32)
860MY_HWCAP_CHECK_FUNC (SHA1) 928MY_HWCAP_CHECK_FUNC (SHA1)
861MY_HWCAP_CHECK_FUNC (SHA2) 929MY_HWCAP_CHECK_FUNC (SHA2)
862MY_HWCAP_CHECK_FUNC (AES) 930MY_HWCAP_CHECK_FUNC (AES)
931#ifdef MY_CPU_ARM64
932// <hwcap.h> supports HWCAP_SHA512 and HWCAP_SHA3 since 2017.
933// we define them here, if they are not defined
934#ifndef HWCAP_SHA3
935// #define HWCAP_SHA3 (1 << 17)
936#endif
937#ifndef HWCAP_SHA512
938// #pragma message("=== HWCAP_SHA512 define === ")
939#define HWCAP_SHA512 (1 << 21)
940#endif
941MY_HWCAP_CHECK_FUNC (SHA512)
942// MY_HWCAP_CHECK_FUNC (SHA3)
943#endif
863 944
864#endif // __APPLE__ 945#endif // __APPLE__
865#endif // _WIN32 946#endif // _WIN32