diff options
author | Igor Pavlov <87184205+ip7z@users.noreply.github.com> | 2024-11-29 00:00:00 +0000 |
---|---|---|
committer | Igor Pavlov <87184205+ip7z@users.noreply.github.com> | 2024-11-30 15:27:15 +0500 |
commit | e5431fa6f5505e385c6f9367260717e9c47dc2ee (patch) | |
tree | 4cd2c2c3b225b48c8e7053432c41d7b6b6a3d5f8 /C/CpuArch.c | |
parent | e008ce3976c087bfd21344af8f00a23cf69d4174 (diff) | |
download | 7zip-main.tar.gz 7zip-main.tar.bz2 7zip-main.zip |
Diffstat (limited to 'C/CpuArch.c')
-rw-r--r-- | C/CpuArch.c | 109 |
1 files changed, 95 insertions, 14 deletions
diff --git a/C/CpuArch.c b/C/CpuArch.c index e792f39..6e02551 100644 --- a/C/CpuArch.c +++ b/C/CpuArch.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* CpuArch.c -- CPU specific code | 1 | /* CpuArch.c -- CPU specific code |
2 | 2024-07-04 : Igor Pavlov : Public domain */ | 2 | Igor Pavlov : Public domain */ |
3 | 3 | ||
4 | #include "Precomp.h" | 4 | #include "Precomp.h" |
5 | 5 | ||
@@ -17,7 +17,7 @@ | |||
17 | /* | 17 | /* |
18 | cpuid instruction supports (subFunction) parameter in ECX, | 18 | cpuid instruction supports (subFunction) parameter in ECX, |
19 | that is used only with some specific (function) parameter values. | 19 | that is used only with some specific (function) parameter values. |
20 | But we always use only (subFunction==0). | 20 | most functions use only (subFunction==0). |
21 | */ | 21 | */ |
22 | /* | 22 | /* |
23 | __cpuid(): MSVC and GCC/CLANG use same function/macro name | 23 | __cpuid(): MSVC and GCC/CLANG use same function/macro name |
@@ -49,43 +49,49 @@ | |||
49 | #if defined(MY_CPU_AMD64) && defined(__PIC__) \ | 49 | #if defined(MY_CPU_AMD64) && defined(__PIC__) \ |
50 | && ((defined (__GNUC__) && (__GNUC__ < 5)) || defined(__clang__)) | 50 | && ((defined (__GNUC__) && (__GNUC__ < 5)) || defined(__clang__)) |
51 | 51 | ||
52 | #define x86_cpuid_MACRO(p, func) { \ | 52 | /* "=&r" selects free register. It can select even rbx, if that register is free. |
53 | "=&D" for (RDI) also works, but the code can be larger with "=&D" | ||
54 | "2"(subFun) : 2 is (zero-based) index in the output constraint list "=c" (ECX). */ | ||
55 | |||
56 | #define x86_cpuid_MACRO_2(p, func, subFunc) { \ | ||
53 | __asm__ __volatile__ ( \ | 57 | __asm__ __volatile__ ( \ |
54 | ASM_LN "mov %%rbx, %q1" \ | 58 | ASM_LN "mov %%rbx, %q1" \ |
55 | ASM_LN "cpuid" \ | 59 | ASM_LN "cpuid" \ |
56 | ASM_LN "xchg %%rbx, %q1" \ | 60 | ASM_LN "xchg %%rbx, %q1" \ |
57 | : "=a" ((p)[0]), "=&r" ((p)[1]), "=c" ((p)[2]), "=d" ((p)[3]) : "0" (func), "2"(0)); } | 61 | : "=a" ((p)[0]), "=&r" ((p)[1]), "=c" ((p)[2]), "=d" ((p)[3]) : "0" (func), "2"(subFunc)); } |
58 | |||
59 | /* "=&r" selects free register. It can select even rbx, if that register is free. | ||
60 | "=&D" for (RDI) also works, but the code can be larger with "=&D" | ||
61 | "2"(0) means (subFunction = 0), | ||
62 | 2 is (zero-based) index in the output constraint list "=c" (ECX). */ | ||
63 | 62 | ||
64 | #elif defined(MY_CPU_X86) && defined(__PIC__) \ | 63 | #elif defined(MY_CPU_X86) && defined(__PIC__) \ |
65 | && ((defined (__GNUC__) && (__GNUC__ < 5)) || defined(__clang__)) | 64 | && ((defined (__GNUC__) && (__GNUC__ < 5)) || defined(__clang__)) |
66 | 65 | ||
67 | #define x86_cpuid_MACRO(p, func) { \ | 66 | #define x86_cpuid_MACRO_2(p, func, subFunc) { \ |
68 | __asm__ __volatile__ ( \ | 67 | __asm__ __volatile__ ( \ |
69 | ASM_LN "mov %%ebx, %k1" \ | 68 | ASM_LN "mov %%ebx, %k1" \ |
70 | ASM_LN "cpuid" \ | 69 | ASM_LN "cpuid" \ |
71 | ASM_LN "xchg %%ebx, %k1" \ | 70 | ASM_LN "xchg %%ebx, %k1" \ |
72 | : "=a" ((p)[0]), "=&r" ((p)[1]), "=c" ((p)[2]), "=d" ((p)[3]) : "0" (func), "2"(0)); } | 71 | : "=a" ((p)[0]), "=&r" ((p)[1]), "=c" ((p)[2]), "=d" ((p)[3]) : "0" (func), "2"(subFunc)); } |
73 | 72 | ||
74 | #else | 73 | #else |
75 | 74 | ||
76 | #define x86_cpuid_MACRO(p, func) { \ | 75 | #define x86_cpuid_MACRO_2(p, func, subFunc) { \ |
77 | __asm__ __volatile__ ( \ | 76 | __asm__ __volatile__ ( \ |
78 | ASM_LN "cpuid" \ | 77 | ASM_LN "cpuid" \ |
79 | : "=a" ((p)[0]), "=b" ((p)[1]), "=c" ((p)[2]), "=d" ((p)[3]) : "0" (func), "2"(0)); } | 78 | : "=a" ((p)[0]), "=b" ((p)[1]), "=c" ((p)[2]), "=d" ((p)[3]) : "0" (func), "2"(subFunc)); } |
80 | 79 | ||
81 | #endif | 80 | #endif |
82 | 81 | ||
82 | #define x86_cpuid_MACRO(p, func) x86_cpuid_MACRO_2(p, func, 0) | ||
83 | 83 | ||
84 | void Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func) | 84 | void Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func) |
85 | { | 85 | { |
86 | x86_cpuid_MACRO(p, func) | 86 | x86_cpuid_MACRO(p, func) |
87 | } | 87 | } |
88 | 88 | ||
89 | static | ||
90 | void Z7_FASTCALL z7_x86_cpuid_subFunc(UInt32 p[4], UInt32 func, UInt32 subFunc) | ||
91 | { | ||
92 | x86_cpuid_MACRO_2(p, func, subFunc) | ||
93 | } | ||
94 | |||
89 | 95 | ||
90 | Z7_NO_INLINE | 96 | Z7_NO_INLINE |
91 | UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void) | 97 | UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void) |
@@ -205,11 +211,39 @@ void __declspec(naked) Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func) | |||
205 | __asm ret 0 | 211 | __asm ret 0 |
206 | } | 212 | } |
207 | 213 | ||
214 | static | ||
215 | void __declspec(naked) Z7_FASTCALL z7_x86_cpuid_subFunc(UInt32 p[4], UInt32 func, UInt32 subFunc) | ||
216 | { | ||
217 | UNUSED_VAR(p) | ||
218 | UNUSED_VAR(func) | ||
219 | UNUSED_VAR(subFunc) | ||
220 | __asm push ebx | ||
221 | __asm push edi | ||
222 | __asm mov edi, ecx // p | ||
223 | __asm mov eax, edx // func | ||
224 | __asm mov ecx, [esp + 12] // subFunc | ||
225 | __asm cpuid | ||
226 | __asm mov [edi ], eax | ||
227 | __asm mov [edi + 4], ebx | ||
228 | __asm mov [edi + 8], ecx | ||
229 | __asm mov [edi + 12], edx | ||
230 | __asm pop edi | ||
231 | __asm pop ebx | ||
232 | __asm ret 4 | ||
233 | } | ||
234 | |||
208 | #else // MY_CPU_AMD64 | 235 | #else // MY_CPU_AMD64 |
209 | 236 | ||
210 | #if _MSC_VER >= 1600 | 237 | #if _MSC_VER >= 1600 |
211 | #include <intrin.h> | 238 | #include <intrin.h> |
212 | #define MY_cpuidex __cpuidex | 239 | #define MY_cpuidex __cpuidex |
240 | |||
241 | static | ||
242 | void Z7_FASTCALL z7_x86_cpuid_subFunc(UInt32 p[4], UInt32 func, UInt32 subFunc) | ||
243 | { | ||
244 | __cpuidex((int *)p, func, subFunc); | ||
245 | } | ||
246 | |||
213 | #else | 247 | #else |
214 | /* | 248 | /* |
215 | __cpuid (func == (0 or 7)) requires subfunction number in ECX. | 249 | __cpuid (func == (0 or 7)) requires subfunction number in ECX. |
@@ -219,7 +253,7 @@ void __declspec(naked) Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func) | |||
219 | We still can use __cpuid for low (func) values that don't require ECX, | 253 | We still can use __cpuid for low (func) values that don't require ECX, |
220 | but __cpuid() in old MSVC will be incorrect for some func values: (func == 7). | 254 | but __cpuid() in old MSVC will be incorrect for some func values: (func == 7). |
221 | So here we use the hack for old MSVC to send (subFunction) in ECX register to cpuid instruction, | 255 | So here we use the hack for old MSVC to send (subFunction) in ECX register to cpuid instruction, |
222 | where ECX value is first parameter for FASTCALL / NO_INLINE func, | 256 | where ECX value is first parameter for FASTCALL / NO_INLINE func. |
223 | So the caller of MY_cpuidex_HACK() sets ECX as subFunction, and | 257 | So the caller of MY_cpuidex_HACK() sets ECX as subFunction, and |
224 | old MSVC for __cpuid() doesn't change ECX and cpuid instruction gets (subFunction) value. | 258 | old MSVC for __cpuid() doesn't change ECX and cpuid instruction gets (subFunction) value. |
225 | 259 | ||
@@ -233,6 +267,11 @@ Z7_NO_INLINE void Z7_FASTCALL MY_cpuidex_HACK(Int32 subFunction, Int32 func, Int | |||
233 | } | 267 | } |
234 | #define MY_cpuidex(info, func, func2) MY_cpuidex_HACK(func2, func, info) | 268 | #define MY_cpuidex(info, func, func2) MY_cpuidex_HACK(func2, func, info) |
235 | #pragma message("======== MY_cpuidex_HACK WAS USED ========") | 269 | #pragma message("======== MY_cpuidex_HACK WAS USED ========") |
270 | static | ||
271 | void Z7_FASTCALL z7_x86_cpuid_subFunc(UInt32 p[4], UInt32 func, UInt32 subFunc) | ||
272 | { | ||
273 | MY_cpuidex_HACK(subFunc, func, (Int32 *)p); | ||
274 | } | ||
236 | #endif // _MSC_VER >= 1600 | 275 | #endif // _MSC_VER >= 1600 |
237 | 276 | ||
238 | #if !defined(MY_CPU_AMD64) | 277 | #if !defined(MY_CPU_AMD64) |
@@ -445,6 +484,23 @@ BoolInt CPU_IsSupported_SHA(void) | |||
445 | } | 484 | } |
446 | } | 485 | } |
447 | 486 | ||
487 | |||
488 | BoolInt CPU_IsSupported_SHA512(void) | ||
489 | { | ||
490 | if (!CPU_IsSupported_AVX2()) return False; // maybe CPU_IsSupported_AVX() is enough here | ||
491 | |||
492 | if (z7_x86_cpuid_GetMaxFunc() < 7) | ||
493 | return False; | ||
494 | { | ||
495 | UInt32 d[4]; | ||
496 | z7_x86_cpuid_subFunc(d, 7, 0); | ||
497 | if (d[0] < 1) // d[0] - is max supported subleaf value | ||
498 | return False; | ||
499 | z7_x86_cpuid_subFunc(d, 7, 1); | ||
500 | return (BoolInt)(d[0]) & 1; | ||
501 | } | ||
502 | } | ||
503 | |||
448 | /* | 504 | /* |
449 | MSVC: _xgetbv() intrinsic is available since VS2010SP1. | 505 | MSVC: _xgetbv() intrinsic is available since VS2010SP1. |
450 | MSVC also defines (_XCR_XFEATURE_ENABLED_MASK) macro in | 506 | MSVC also defines (_XCR_XFEATURE_ENABLED_MASK) macro in |
@@ -776,6 +832,18 @@ BoolInt CPU_IsSupported_NEON(void) | |||
776 | return z7_sysctlbyname_Get_BoolInt("hw.optional.neon"); | 832 | return z7_sysctlbyname_Get_BoolInt("hw.optional.neon"); |
777 | } | 833 | } |
778 | 834 | ||
835 | BoolInt CPU_IsSupported_SHA512(void) | ||
836 | { | ||
837 | return z7_sysctlbyname_Get_BoolInt("hw.optional.armv8_2_sha512"); | ||
838 | } | ||
839 | |||
840 | /* | ||
841 | BoolInt CPU_IsSupported_SHA3(void) | ||
842 | { | ||
843 | return z7_sysctlbyname_Get_BoolInt("hw.optional.armv8_2_sha3"); | ||
844 | } | ||
845 | */ | ||
846 | |||
779 | #ifdef MY_CPU_ARM64 | 847 | #ifdef MY_CPU_ARM64 |
780 | #define APPLE_CRYPTO_SUPPORT_VAL 1 | 848 | #define APPLE_CRYPTO_SUPPORT_VAL 1 |
781 | #else | 849 | #else |
@@ -860,6 +928,19 @@ MY_HWCAP_CHECK_FUNC (CRC32) | |||
860 | MY_HWCAP_CHECK_FUNC (SHA1) | 928 | MY_HWCAP_CHECK_FUNC (SHA1) |
861 | MY_HWCAP_CHECK_FUNC (SHA2) | 929 | MY_HWCAP_CHECK_FUNC (SHA2) |
862 | MY_HWCAP_CHECK_FUNC (AES) | 930 | MY_HWCAP_CHECK_FUNC (AES) |
931 | #ifdef MY_CPU_ARM64 | ||
932 | // <hwcap.h> supports HWCAP_SHA512 and HWCAP_SHA3 since 2017. | ||
933 | // we define them here, if they are not defined | ||
934 | #ifndef HWCAP_SHA3 | ||
935 | // #define HWCAP_SHA3 (1 << 17) | ||
936 | #endif | ||
937 | #ifndef HWCAP_SHA512 | ||
938 | // #pragma message("=== HWCAP_SHA512 define === ") | ||
939 | #define HWCAP_SHA512 (1 << 21) | ||
940 | #endif | ||
941 | MY_HWCAP_CHECK_FUNC (SHA512) | ||
942 | // MY_HWCAP_CHECK_FUNC (SHA3) | ||
943 | #endif | ||
863 | 944 | ||
864 | #endif // __APPLE__ | 945 | #endif // __APPLE__ |
865 | #endif // _WIN32 | 946 | #endif // _WIN32 |