aboutsummaryrefslogtreecommitdiff
path: root/C/CpuArch.c
diff options
context:
space:
mode:
Diffstat (limited to 'C/CpuArch.c')
-rw-r--r--C/CpuArch.c795
1 files changed, 570 insertions, 225 deletions
diff --git a/C/CpuArch.c b/C/CpuArch.c
index fa9afe3..33f8a3a 100644
--- a/C/CpuArch.c
+++ b/C/CpuArch.c
@@ -1,187 +1,318 @@
1/* CpuArch.c -- CPU specific code 1/* CpuArch.c -- CPU specific code
22021-07-13 : Igor Pavlov : Public domain */ 22023-05-18 : Igor Pavlov : Public domain */
3 3
4#include "Precomp.h" 4#include "Precomp.h"
5 5
6// #include <stdio.h>
7
6#include "CpuArch.h" 8#include "CpuArch.h"
7 9
8#ifdef MY_CPU_X86_OR_AMD64 10#ifdef MY_CPU_X86_OR_AMD64
9 11
10#if (defined(_MSC_VER) && !defined(MY_CPU_AMD64)) || defined(__GNUC__) 12#undef NEED_CHECK_FOR_CPUID
11#define USE_ASM 13#if !defined(MY_CPU_AMD64)
14#define NEED_CHECK_FOR_CPUID
12#endif 15#endif
13 16
14#if !defined(USE_ASM) && _MSC_VER >= 1500 17/*
15#include <intrin.h> 18 cpuid instruction supports (subFunction) parameter in ECX,
19 that is used only with some specific (function) parameter values.
20 But we always use only (subFunction==0).
21*/
22/*
23 __cpuid(): MSVC and GCC/CLANG use same function/macro name
24 but parameters are different.
25 We use MSVC __cpuid() parameters style for our z7_x86_cpuid() function.
26*/
27
28#if defined(__GNUC__) /* && (__GNUC__ >= 10) */ \
29 || defined(__clang__) /* && (__clang_major__ >= 10) */
30
31/* there was some CLANG/GCC compilers that have issues with
32 rbx(ebx) handling in asm blocks in -fPIC mode (__PIC__ is defined).
33 compiler's <cpuid.h> contains the macro __cpuid() that is similar to our code.
34 The history of __cpuid() changes in CLANG/GCC:
35 GCC:
36 2007: it preserved ebx for (__PIC__ && __i386__)
37 2013: it preserved rbx and ebx for __PIC__
38 2014: it doesn't preserves rbx and ebx anymore
39 we suppose that (__GNUC__ >= 5) fixed that __PIC__ ebx/rbx problem.
40 CLANG:
41 2014+: it preserves rbx, but only for 64-bit code. No __PIC__ check.
42 Why CLANG cares about 64-bit mode only, and doesn't care about ebx (in 32-bit)?
43 Do we need __PIC__ test for CLANG or we must care about rbx even if
44 __PIC__ is not defined?
45*/
46
47#define ASM_LN "\n"
48
49#if defined(MY_CPU_AMD64) && defined(__PIC__) \
50 && ((defined (__GNUC__) && (__GNUC__ < 5)) || defined(__clang__))
51
52#define x86_cpuid_MACRO(p, func) { \
53 __asm__ __volatile__ ( \
54 ASM_LN "mov %%rbx, %q1" \
55 ASM_LN "cpuid" \
56 ASM_LN "xchg %%rbx, %q1" \
57 : "=a" ((p)[0]), "=&r" ((p)[1]), "=c" ((p)[2]), "=d" ((p)[3]) : "0" (func), "2"(0)); }
58
59 /* "=&r" selects free register. It can select even rbx, if that register is free.
60 "=&D" for (RDI) also works, but the code can be larger with "=&D"
61 "2"(0) means (subFunction = 0),
62 2 is (zero-based) index in the output constraint list "=c" (ECX). */
63
64#elif defined(MY_CPU_X86) && defined(__PIC__) \
65 && ((defined (__GNUC__) && (__GNUC__ < 5)) || defined(__clang__))
66
67#define x86_cpuid_MACRO(p, func) { \
68 __asm__ __volatile__ ( \
69 ASM_LN "mov %%ebx, %k1" \
70 ASM_LN "cpuid" \
71 ASM_LN "xchg %%ebx, %k1" \
72 : "=a" ((p)[0]), "=&r" ((p)[1]), "=c" ((p)[2]), "=d" ((p)[3]) : "0" (func), "2"(0)); }
73
74#else
75
76#define x86_cpuid_MACRO(p, func) { \
77 __asm__ __volatile__ ( \
78 ASM_LN "cpuid" \
79 : "=a" ((p)[0]), "=b" ((p)[1]), "=c" ((p)[2]), "=d" ((p)[3]) : "0" (func), "2"(0)); }
80
16#endif 81#endif
17 82
18#if defined(USE_ASM) && !defined(MY_CPU_AMD64) 83
19static UInt32 CheckFlag(UInt32 flag) 84void Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func)
20{ 85{
21 #ifdef _MSC_VER 86 x86_cpuid_MACRO(p, func)
22 __asm pushfd; 87}
23 __asm pop EAX; 88
24 __asm mov EDX, EAX; 89
25 __asm xor EAX, flag; 90Z7_NO_INLINE
26 __asm push EAX; 91UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void)
27 __asm popfd; 92{
28 __asm pushfd; 93 #if defined(NEED_CHECK_FOR_CPUID)
29 __asm pop EAX; 94 #define EFALGS_CPUID_BIT 21
30 __asm xor EAX, EDX; 95 UInt32 a;
31 __asm push EDX;
32 __asm popfd;
33 __asm and flag, EAX;
34 #else
35 __asm__ __volatile__ ( 96 __asm__ __volatile__ (
36 "pushf\n\t" 97 ASM_LN "pushf"
37 "pop %%EAX\n\t" 98 ASM_LN "pushf"
38 "movl %%EAX,%%EDX\n\t" 99 ASM_LN "pop %0"
39 "xorl %0,%%EAX\n\t" 100 // ASM_LN "movl %0, %1"
40 "push %%EAX\n\t" 101 // ASM_LN "xorl $0x200000, %0"
41 "popf\n\t" 102 ASM_LN "btc %1, %0"
42 "pushf\n\t" 103 ASM_LN "push %0"
43 "pop %%EAX\n\t" 104 ASM_LN "popf"
44 "xorl %%EDX,%%EAX\n\t" 105 ASM_LN "pushf"
45 "push %%EDX\n\t" 106 ASM_LN "pop %0"
46 "popf\n\t" 107 ASM_LN "xorl (%%esp), %0"
47 "andl %%EAX, %0\n\t": 108
48 "=c" (flag) : "c" (flag) : 109 ASM_LN "popf"
49 "%eax", "%edx"); 110 ASM_LN
111 : "=&r" (a) // "=a"
112 : "i" (EFALGS_CPUID_BIT)
113 );
114 if ((a & (1 << EFALGS_CPUID_BIT)) == 0)
115 return 0;
116 #endif
117 {
118 UInt32 p[4];
119 x86_cpuid_MACRO(p, 0)
120 return p[0];
121 }
122}
123
124#undef ASM_LN
125
126#elif !defined(_MSC_VER)
127
128/*
129// for gcc/clang and other: we can try to use __cpuid macro:
130#include <cpuid.h>
131void Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func)
132{
133 __cpuid(func, p[0], p[1], p[2], p[3]);
134}
135UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void)
136{
137 return (UInt32)__get_cpuid_max(0, NULL);
138}
139*/
140// for unsupported cpuid:
141void Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func)
142{
143 UNUSED_VAR(func)
144 p[0] = p[1] = p[2] = p[3] = 0;
145}
146UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void)
147{
148 return 0;
149}
150
151#else // _MSC_VER
152
153#if !defined(MY_CPU_AMD64)
154
155UInt32 __declspec(naked) Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void)
156{
157 #if defined(NEED_CHECK_FOR_CPUID)
158 #define EFALGS_CPUID_BIT 21
159 __asm pushfd
160 __asm pushfd
161 /*
162 __asm pop eax
163 // __asm mov edx, eax
164 __asm btc eax, EFALGS_CPUID_BIT
165 __asm push eax
166 */
167 __asm btc dword ptr [esp], EFALGS_CPUID_BIT
168 __asm popfd
169 __asm pushfd
170 __asm pop eax
171 // __asm xor eax, edx
172 __asm xor eax, [esp]
173 // __asm push edx
174 __asm popfd
175 __asm and eax, (1 shl EFALGS_CPUID_BIT)
176 __asm jz end_func
177 #endif
178 __asm push ebx
179 __asm xor eax, eax // func
180 __asm xor ecx, ecx // subFunction (optional) for (func == 0)
181 __asm cpuid
182 __asm pop ebx
183 #if defined(NEED_CHECK_FOR_CPUID)
184 end_func:
50 #endif 185 #endif
51 return flag; 186 __asm ret 0
52} 187}
53#define CHECK_CPUID_IS_SUPPORTED if (CheckFlag(1 << 18) == 0 || CheckFlag(1 << 21) == 0) return False;
54#else
55#define CHECK_CPUID_IS_SUPPORTED
56#endif
57 188
58#ifndef USE_ASM 189void __declspec(naked) Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func)
59 #ifdef _MSC_VER 190{
191 UNUSED_VAR(p)
192 UNUSED_VAR(func)
193 __asm push ebx
194 __asm push edi
195 __asm mov edi, ecx // p
196 __asm mov eax, edx // func
197 __asm xor ecx, ecx // subfunction (optional) for (func == 0)
198 __asm cpuid
199 __asm mov [edi ], eax
200 __asm mov [edi + 4], ebx
201 __asm mov [edi + 8], ecx
202 __asm mov [edi + 12], edx
203 __asm pop edi
204 __asm pop ebx
205 __asm ret 0
206}
207
208#else // MY_CPU_AMD64
209
60 #if _MSC_VER >= 1600 210 #if _MSC_VER >= 1600
61 #define MY__cpuidex __cpuidex 211 #include <intrin.h>
212 #define MY_cpuidex __cpuidex
62 #else 213 #else
63
64/* 214/*
65 __cpuid (function == 4) requires subfunction number in ECX. 215 __cpuid (func == (0 or 7)) requires subfunction number in ECX.
66 MSDN: The __cpuid intrinsic clears the ECX register before calling the cpuid instruction. 216 MSDN: The __cpuid intrinsic clears the ECX register before calling the cpuid instruction.
67 __cpuid() in new MSVC clears ECX. 217 __cpuid() in new MSVC clears ECX.
68 __cpuid() in old MSVC (14.00) doesn't clear ECX 218 __cpuid() in old MSVC (14.00) x64 doesn't clear ECX
69 We still can use __cpuid for low (function) values that don't require ECX, 219 We still can use __cpuid for low (func) values that don't require ECX,
70 but __cpuid() in old MSVC will be incorrect for some function values: (function == 4). 220 but __cpuid() in old MSVC will be incorrect for some func values: (func == 7).
71 So here we use the hack for old MSVC to send (subFunction) in ECX register to cpuid instruction, 221 So here we use the hack for old MSVC to send (subFunction) in ECX register to cpuid instruction,
72 where ECX value is first parameter for FAST_CALL / NO_INLINE function, 222 where ECX value is first parameter for FASTCALL / NO_INLINE func,
73 So the caller of MY__cpuidex_HACK() sets ECX as subFunction, and 223 So the caller of MY_cpuidex_HACK() sets ECX as subFunction, and
74 old MSVC for __cpuid() doesn't change ECX and cpuid instruction gets (subFunction) value. 224 old MSVC for __cpuid() doesn't change ECX and cpuid instruction gets (subFunction) value.
75 225
76 DON'T remove MY_NO_INLINE and MY_FAST_CALL for MY__cpuidex_HACK() !!! 226DON'T remove Z7_NO_INLINE and Z7_FASTCALL for MY_cpuidex_HACK(): !!!
77*/ 227*/
78
79static 228static
80MY_NO_INLINE 229Z7_NO_INLINE void Z7_FASTCALL MY_cpuidex_HACK(UInt32 subFunction, UInt32 func, int *CPUInfo)
81void MY_FAST_CALL MY__cpuidex_HACK(UInt32 subFunction, int *CPUInfo, UInt32 function)
82{ 230{
83 UNUSED_VAR(subFunction); 231 UNUSED_VAR(subFunction)
84 __cpuid(CPUInfo, function); 232 __cpuid(CPUInfo, func);
85} 233}
86 234 #define MY_cpuidex(info, func, func2) MY_cpuidex_HACK(func2, func, info)
87 #define MY__cpuidex(info, func, func2) MY__cpuidex_HACK(func2, info, func) 235 #pragma message("======== MY_cpuidex_HACK WAS USED ========")
88 #pragma message("======== MY__cpuidex_HACK WAS USED ========") 236 #endif // _MSC_VER >= 1600
89 #endif 237
90 #else 238#if !defined(MY_CPU_AMD64)
91 #define MY__cpuidex(info, func, func2) __cpuid(info, func) 239/* inlining for __cpuid() in MSVC x86 (32-bit) produces big ineffective code,
92 #pragma message("======== (INCORRECT ?) cpuid WAS USED ========") 240 so we disable inlining here */
93 #endif 241Z7_NO_INLINE
94#endif 242#endif
95 243void Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func)
96
97
98
99void MyCPUID(UInt32 function, UInt32 *a, UInt32 *b, UInt32 *c, UInt32 *d)
100{ 244{
101 #ifdef USE_ASM 245 MY_cpuidex((int *)p, (int)func, 0);
102 246}
103 #ifdef _MSC_VER
104
105 UInt32 a2, b2, c2, d2;
106 __asm xor EBX, EBX;
107 __asm xor ECX, ECX;
108 __asm xor EDX, EDX;
109 __asm mov EAX, function;
110 __asm cpuid;
111 __asm mov a2, EAX;
112 __asm mov b2, EBX;
113 __asm mov c2, ECX;
114 __asm mov d2, EDX;
115
116 *a = a2;
117 *b = b2;
118 *c = c2;
119 *d = d2;
120
121 #else
122
123 __asm__ __volatile__ (
124 #if defined(MY_CPU_AMD64) && defined(__PIC__)
125 "mov %%rbx, %%rdi;"
126 "cpuid;"
127 "xchg %%rbx, %%rdi;"
128 : "=a" (*a) ,
129 "=D" (*b) ,
130 #elif defined(MY_CPU_X86) && defined(__PIC__)
131 "mov %%ebx, %%edi;"
132 "cpuid;"
133 "xchgl %%ebx, %%edi;"
134 : "=a" (*a) ,
135 "=D" (*b) ,
136 #else
137 "cpuid"
138 : "=a" (*a) ,
139 "=b" (*b) ,
140 #endif
141 "=c" (*c) ,
142 "=d" (*d)
143 : "0" (function), "c"(0) ) ;
144
145 #endif
146
147 #else
148 247
149 int CPUInfo[4]; 248Z7_NO_INLINE
249UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void)
250{
251 int a[4];
252 MY_cpuidex(a, 0, 0);
253 return a[0];
254}
150 255
151 MY__cpuidex(CPUInfo, (int)function, 0); 256#endif // MY_CPU_AMD64
257#endif // _MSC_VER
152 258
153 *a = (UInt32)CPUInfo[0]; 259#if defined(NEED_CHECK_FOR_CPUID)
154 *b = (UInt32)CPUInfo[1]; 260#define CHECK_CPUID_IS_SUPPORTED { if (z7_x86_cpuid_GetMaxFunc() == 0) return 0; }
155 *c = (UInt32)CPUInfo[2]; 261#else
156 *d = (UInt32)CPUInfo[3]; 262#define CHECK_CPUID_IS_SUPPORTED
263#endif
264#undef NEED_CHECK_FOR_CPUID
157 265
158 #endif
159}
160 266
161BoolInt x86cpuid_CheckAndRead(Cx86cpuid *p) 267static
268BoolInt x86cpuid_Func_1(UInt32 *p)
162{ 269{
163 CHECK_CPUID_IS_SUPPORTED 270 CHECK_CPUID_IS_SUPPORTED
164 MyCPUID(0, &p->maxFunc, &p->vendor[0], &p->vendor[2], &p->vendor[1]); 271 z7_x86_cpuid(p, 1);
165 MyCPUID(1, &p->ver, &p->b, &p->c, &p->d);
166 return True; 272 return True;
167} 273}
168 274
169static const UInt32 kVendors[][3] = 275/*
276static const UInt32 kVendors[][1] =
170{ 277{
171 { 0x756E6547, 0x49656E69, 0x6C65746E}, 278 { 0x756E6547 }, // , 0x49656E69, 0x6C65746E },
172 { 0x68747541, 0x69746E65, 0x444D4163}, 279 { 0x68747541 }, // , 0x69746E65, 0x444D4163 },
173 { 0x746E6543, 0x48727561, 0x736C7561} 280 { 0x746E6543 } // , 0x48727561, 0x736C7561 }
174}; 281};
282*/
283
284/*
285typedef struct
286{
287 UInt32 maxFunc;
288 UInt32 vendor[3];
289 UInt32 ver;
290 UInt32 b;
291 UInt32 c;
292 UInt32 d;
293} Cx86cpuid;
294
295enum
296{
297 CPU_FIRM_INTEL,
298 CPU_FIRM_AMD,
299 CPU_FIRM_VIA
300};
301int x86cpuid_GetFirm(const Cx86cpuid *p);
302#define x86cpuid_ver_GetFamily(ver) (((ver >> 16) & 0xff0) | ((ver >> 8) & 0xf))
303#define x86cpuid_ver_GetModel(ver) (((ver >> 12) & 0xf0) | ((ver >> 4) & 0xf))
304#define x86cpuid_ver_GetStepping(ver) (ver & 0xf)
175 305
176int x86cpuid_GetFirm(const Cx86cpuid *p) 306int x86cpuid_GetFirm(const Cx86cpuid *p)
177{ 307{
178 unsigned i; 308 unsigned i;
179 for (i = 0; i < sizeof(kVendors) / sizeof(kVendors[i]); i++) 309 for (i = 0; i < sizeof(kVendors) / sizeof(kVendors[0]); i++)
180 { 310 {
181 const UInt32 *v = kVendors[i]; 311 const UInt32 *v = kVendors[i];
182 if (v[0] == p->vendor[0] && 312 if (v[0] == p->vendor[0]
183 v[1] == p->vendor[1] && 313 // && v[1] == p->vendor[1]
184 v[2] == p->vendor[2]) 314 // && v[2] == p->vendor[2]
315 )
185 return (int)i; 316 return (int)i;
186 } 317 }
187 return -1; 318 return -1;
@@ -190,41 +321,55 @@ int x86cpuid_GetFirm(const Cx86cpuid *p)
190BoolInt CPU_Is_InOrder() 321BoolInt CPU_Is_InOrder()
191{ 322{
192 Cx86cpuid p; 323 Cx86cpuid p;
193 int firm;
194 UInt32 family, model; 324 UInt32 family, model;
195 if (!x86cpuid_CheckAndRead(&p)) 325 if (!x86cpuid_CheckAndRead(&p))
196 return True; 326 return True;
197 327
198 family = x86cpuid_GetFamily(p.ver); 328 family = x86cpuid_ver_GetFamily(p.ver);
199 model = x86cpuid_GetModel(p.ver); 329 model = x86cpuid_ver_GetModel(p.ver);
200
201 firm = x86cpuid_GetFirm(&p);
202 330
203 switch (firm) 331 switch (x86cpuid_GetFirm(&p))
204 { 332 {
205 case CPU_FIRM_INTEL: return (family < 6 || (family == 6 && ( 333 case CPU_FIRM_INTEL: return (family < 6 || (family == 6 && (
206 /* In-Order Atom CPU */ 334 // In-Order Atom CPU
207 model == 0x1C /* 45 nm, N4xx, D4xx, N5xx, D5xx, 230, 330 */ 335 model == 0x1C // 45 nm, N4xx, D4xx, N5xx, D5xx, 230, 330
208 || model == 0x26 /* 45 nm, Z6xx */ 336 || model == 0x26 // 45 nm, Z6xx
209 || model == 0x27 /* 32 nm, Z2460 */ 337 || model == 0x27 // 32 nm, Z2460
210 || model == 0x35 /* 32 nm, Z2760 */ 338 || model == 0x35 // 32 nm, Z2760
211 || model == 0x36 /* 32 nm, N2xxx, D2xxx */ 339 || model == 0x36 // 32 nm, N2xxx, D2xxx
212 ))); 340 )));
213 case CPU_FIRM_AMD: return (family < 5 || (family == 5 && (model < 6 || model == 0xA))); 341 case CPU_FIRM_AMD: return (family < 5 || (family == 5 && (model < 6 || model == 0xA)));
214 case CPU_FIRM_VIA: return (family < 6 || (family == 6 && model < 0xF)); 342 case CPU_FIRM_VIA: return (family < 6 || (family == 6 && model < 0xF));
215 } 343 }
216 return True; 344 return False; // v23 : unknown processors are not In-Order
217} 345}
346*/
347
348#ifdef _WIN32
349#include "7zWindows.h"
350#endif
218 351
219#if !defined(MY_CPU_AMD64) && defined(_WIN32) 352#if !defined(MY_CPU_AMD64) && defined(_WIN32)
220#include <Windows.h> 353
221static BoolInt CPU_Sys_Is_SSE_Supported() 354/* for legacy SSE ia32: there is no user-space cpu instruction to check
355 that OS supports SSE register storing/restoring on context switches.
356 So we need some OS-specific function to check that it's safe to use SSE registers.
357*/
358
359Z7_FORCE_INLINE
360static BoolInt CPU_Sys_Is_SSE_Supported(void)
222{ 361{
223 OSVERSIONINFO vi; 362#ifdef _MSC_VER
224 vi.dwOSVersionInfoSize = sizeof(vi); 363 #pragma warning(push)
225 if (!GetVersionEx(&vi)) 364 #pragma warning(disable : 4996) // `GetVersion': was declared deprecated
226 return False; 365#endif
227 return (vi.dwMajorVersion >= 5); 366 /* low byte is major version of Windows
367 We suppose that any Windows version since
368 Windows2000 (major == 5) supports SSE registers */
369 return (Byte)GetVersion() >= 5;
370#if defined(_MSC_VER)
371 #pragma warning(pop)
372#endif
228} 373}
229#define CHECK_SYS_SSE_SUPPORT if (!CPU_Sys_Is_SSE_Supported()) return False; 374#define CHECK_SYS_SSE_SUPPORT if (!CPU_Sys_Is_SSE_Supported()) return False;
230#else 375#else
@@ -232,94 +377,300 @@ static BoolInt CPU_Sys_Is_SSE_Supported()
232#endif 377#endif
233 378
234 379
235static UInt32 X86_CPUID_ECX_Get_Flags() 380#if !defined(MY_CPU_AMD64)
381
382BoolInt CPU_IsSupported_CMOV(void)
236{ 383{
237 Cx86cpuid p; 384 UInt32 a[4];
385 if (!x86cpuid_Func_1(&a[0]))
386 return 0;
387 return (a[3] >> 15) & 1;
388}
389
390BoolInt CPU_IsSupported_SSE(void)
391{
392 UInt32 a[4];
238 CHECK_SYS_SSE_SUPPORT 393 CHECK_SYS_SSE_SUPPORT
239 if (!x86cpuid_CheckAndRead(&p)) 394 if (!x86cpuid_Func_1(&a[0]))
395 return 0;
396 return (a[3] >> 25) & 1;
397}
398
399BoolInt CPU_IsSupported_SSE2(void)
400{
401 UInt32 a[4];
402 CHECK_SYS_SSE_SUPPORT
403 if (!x86cpuid_Func_1(&a[0]))
404 return 0;
405 return (a[3] >> 26) & 1;
406}
407
408#endif
409
410
411static UInt32 x86cpuid_Func_1_ECX(void)
412{
413 UInt32 a[4];
414 CHECK_SYS_SSE_SUPPORT
415 if (!x86cpuid_Func_1(&a[0]))
240 return 0; 416 return 0;
241 return p.c; 417 return a[2];
242} 418}
243 419
244BoolInt CPU_IsSupported_AES() 420BoolInt CPU_IsSupported_AES(void)
245{ 421{
246 return (X86_CPUID_ECX_Get_Flags() >> 25) & 1; 422 return (x86cpuid_Func_1_ECX() >> 25) & 1;
247} 423}
248 424
249BoolInt CPU_IsSupported_SSSE3() 425BoolInt CPU_IsSupported_SSSE3(void)
250{ 426{
251 return (X86_CPUID_ECX_Get_Flags() >> 9) & 1; 427 return (x86cpuid_Func_1_ECX() >> 9) & 1;
252} 428}
253 429
254BoolInt CPU_IsSupported_SSE41() 430BoolInt CPU_IsSupported_SSE41(void)
255{ 431{
256 return (X86_CPUID_ECX_Get_Flags() >> 19) & 1; 432 return (x86cpuid_Func_1_ECX() >> 19) & 1;
257} 433}
258 434
259BoolInt CPU_IsSupported_SHA() 435BoolInt CPU_IsSupported_SHA(void)
260{ 436{
261 Cx86cpuid p;
262 CHECK_SYS_SSE_SUPPORT 437 CHECK_SYS_SSE_SUPPORT
263 if (!x86cpuid_CheckAndRead(&p))
264 return False;
265 438
266 if (p.maxFunc < 7) 439 if (z7_x86_cpuid_GetMaxFunc() < 7)
267 return False; 440 return False;
268 { 441 {
269 UInt32 d[4] = { 0 }; 442 UInt32 d[4];
270 MyCPUID(7, &d[0], &d[1], &d[2], &d[3]); 443 z7_x86_cpuid(d, 7);
271 return (d[1] >> 29) & 1; 444 return (d[1] >> 29) & 1;
272 } 445 }
273} 446}
274 447
275// #include <stdio.h> 448/*
449MSVC: _xgetbv() intrinsic is available since VS2010SP1.
450 MSVC also defines (_XCR_XFEATURE_ENABLED_MASK) macro in
451 <immintrin.h> that we can use or check.
452 For any 32-bit x86 we can use asm code in MSVC,
453 but MSVC asm code is huge after compilation.
454 So _xgetbv() is better
455
456ICC: _xgetbv() intrinsic is available (in what version of ICC?)
457 ICC defines (__GNUC___) and it supports gnu assembler
458 also ICC supports MASM style code with -use-msasm switch.
459 but ICC doesn't support __attribute__((__target__))
460
461GCC/CLANG 9:
462 _xgetbv() is macro that works via __builtin_ia32_xgetbv()
463 and we need __attribute__((__target__("xsave")).
464 But with __target__("xsave") the function will be not
465 inlined to function that has no __target__("xsave") attribute.
466 If we want _xgetbv() call inlining, then we should use asm version
467 instead of calling _xgetbv().
468 Note:intrinsic is broke before GCC 8.2:
469 https://gcc.gnu.org/bugzilla/show_bug.cgi?id=85684
470*/
276 471
277#ifdef _WIN32 472#if defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1100) \
278#include <Windows.h> 473 || defined(_MSC_VER) && (_MSC_VER >= 1600) && (_MSC_FULL_VER >= 160040219) \
474 || defined(__GNUC__) && (__GNUC__ >= 9) \
475 || defined(__clang__) && (__clang_major__ >= 9)
476// we define ATTRIB_XGETBV, if we want to use predefined _xgetbv() from compiler
477#if defined(__INTEL_COMPILER)
478#define ATTRIB_XGETBV
479#elif defined(__GNUC__) || defined(__clang__)
480// we don't define ATTRIB_XGETBV here, because asm version is better for inlining.
481// #define ATTRIB_XGETBV __attribute__((__target__("xsave")))
482#else
483#define ATTRIB_XGETBV
484#endif
485#endif
486
487#if defined(ATTRIB_XGETBV)
488#include <immintrin.h>
279#endif 489#endif
280 490
281BoolInt CPU_IsSupported_AVX2() 491
492// XFEATURE_ENABLED_MASK/XCR0
493#define MY_XCR_XFEATURE_ENABLED_MASK 0
494
495#if defined(ATTRIB_XGETBV)
496ATTRIB_XGETBV
497#endif
498static UInt64 x86_xgetbv_0(UInt32 num)
282{ 499{
283 Cx86cpuid p; 500#if defined(ATTRIB_XGETBV)
284 CHECK_SYS_SSE_SUPPORT 501 {
502 return
503 #if (defined(_MSC_VER))
504 _xgetbv(num);
505 #else
506 __builtin_ia32_xgetbv(
507 #if !defined(__clang__)
508 (int)
509 #endif
510 num);
511 #endif
512 }
513
514#elif defined(__GNUC__) || defined(__clang__) || defined(__SUNPRO_CC)
515
516 UInt32 a, d;
517 #if defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 4))
518 __asm__
519 (
520 "xgetbv"
521 : "=a"(a), "=d"(d) : "c"(num) : "cc"
522 );
523 #else // is old gcc
524 __asm__
525 (
526 ".byte 0x0f, 0x01, 0xd0" "\n\t"
527 : "=a"(a), "=d"(d) : "c"(num) : "cc"
528 );
529 #endif
530 return ((UInt64)d << 32) | a;
531 // return a;
532
533#elif defined(_MSC_VER) && !defined(MY_CPU_AMD64)
534
535 UInt32 a, d;
536 __asm {
537 push eax
538 push edx
539 push ecx
540 mov ecx, num;
541 // xor ecx, ecx // = MY_XCR_XFEATURE_ENABLED_MASK
542 _emit 0x0f
543 _emit 0x01
544 _emit 0xd0
545 mov a, eax
546 mov d, edx
547 pop ecx
548 pop edx
549 pop eax
550 }
551 return ((UInt64)d << 32) | a;
552 // return a;
553
554#else // it's unknown compiler
555 // #error "Need xgetbv function"
556 UNUSED_VAR(num)
557 // for MSVC-X64 we could call external function from external file.
558 /* Actually we had checked OSXSAVE/AVX in cpuid before.
559 So it's expected that OS supports at least AVX and below. */
560 // if (num != MY_XCR_XFEATURE_ENABLED_MASK) return 0; // if not XCR0
561 return
562 // (1 << 0) | // x87
563 (1 << 1) // SSE
564 | (1 << 2); // AVX
565
566#endif
567}
285 568
569#ifdef _WIN32
570/*
571 Windows versions do not know about new ISA extensions that
572 can be introduced. But we still can use new extensions,
573 even if Windows doesn't report about supporting them,
574 But we can use new extensions, only if Windows knows about new ISA extension
575 that changes the number or size of registers: SSE, AVX/XSAVE, AVX512
576 So it's enough to check
577 MY_PF_AVX_INSTRUCTIONS_AVAILABLE
578 instead of
579 MY_PF_AVX2_INSTRUCTIONS_AVAILABLE
580*/
581#define MY_PF_XSAVE_ENABLED 17
582// #define MY_PF_SSSE3_INSTRUCTIONS_AVAILABLE 36
583// #define MY_PF_SSE4_1_INSTRUCTIONS_AVAILABLE 37
584// #define MY_PF_SSE4_2_INSTRUCTIONS_AVAILABLE 38
585// #define MY_PF_AVX_INSTRUCTIONS_AVAILABLE 39
586// #define MY_PF_AVX2_INSTRUCTIONS_AVAILABLE 40
587// #define MY_PF_AVX512F_INSTRUCTIONS_AVAILABLE 41
588#endif
589
590BoolInt CPU_IsSupported_AVX(void)
591{
286 #ifdef _WIN32 592 #ifdef _WIN32
287 #define MY__PF_XSAVE_ENABLED 17 593 if (!IsProcessorFeaturePresent(MY_PF_XSAVE_ENABLED))
288 if (!IsProcessorFeaturePresent(MY__PF_XSAVE_ENABLED))
289 return False; 594 return False;
595 /* PF_AVX_INSTRUCTIONS_AVAILABLE probably is supported starting from
596 some latest Win10 revisions. But we need AVX in older Windows also.
597 So we don't use the following check: */
598 /*
599 if (!IsProcessorFeaturePresent(MY_PF_AVX_INSTRUCTIONS_AVAILABLE))
600 return False;
601 */
290 #endif 602 #endif
291 603
292 if (!x86cpuid_CheckAndRead(&p)) 604 /*
605 OS must use new special XSAVE/XRSTOR instructions to save
606 AVX registers when it required for context switching.
607 At OS statring:
608 OS sets CR4.OSXSAVE flag to signal the processor that OS supports the XSAVE extensions.
609 Also OS sets bitmask in XCR0 register that defines what
610 registers will be processed by XSAVE instruction:
611 XCR0.SSE[bit 0] - x87 registers and state
612 XCR0.SSE[bit 1] - SSE registers and state
613 XCR0.AVX[bit 2] - AVX registers and state
614 CR4.OSXSAVE is reflected to CPUID.1:ECX.OSXSAVE[bit 27].
615 So we can read that bit in user-space.
616 XCR0 is available for reading in user-space by new XGETBV instruction.
617 */
618 {
619 const UInt32 c = x86cpuid_Func_1_ECX();
620 if (0 == (1
621 & (c >> 28) // AVX instructions are supported by hardware
622 & (c >> 27))) // OSXSAVE bit: XSAVE and related instructions are enabled by OS.
623 return False;
624 }
625
626 /* also we can check
627 CPUID.1:ECX.XSAVE [bit 26] : that shows that
628 XSAVE, XRESTOR, XSETBV, XGETBV instructions are supported by hardware.
629 But that check is redundant, because if OSXSAVE bit is set, then XSAVE is also set */
630
631 /* If OS have enabled XSAVE extension instructions (OSXSAVE == 1),
632 in most cases we expect that OS also will support storing/restoring
633 for AVX and SSE states at least.
634 But to be ensure for that we call user-space instruction
635 XGETBV(0) to get XCR0 value that contains bitmask that defines
636 what exact states(registers) OS have enabled for storing/restoring.
637 */
638
639 {
640 const UInt32 bm = (UInt32)x86_xgetbv_0(MY_XCR_XFEATURE_ENABLED_MASK);
641 // printf("\n=== XGetBV=%d\n", bm);
642 return 1
643 & (bm >> 1) // SSE state is supported (set by OS) for storing/restoring
644 & (bm >> 2); // AVX state is supported (set by OS) for storing/restoring
645 }
646 // since Win7SP1: we can use GetEnabledXStateFeatures();
647}
648
649
650BoolInt CPU_IsSupported_AVX2(void)
651{
652 if (!CPU_IsSupported_AVX())
293 return False; 653 return False;
294 if (p.maxFunc < 7) 654 if (z7_x86_cpuid_GetMaxFunc() < 7)
295 return False; 655 return False;
296 { 656 {
297 UInt32 d[4] = { 0 }; 657 UInt32 d[4];
298 MyCPUID(7, &d[0], &d[1], &d[2], &d[3]); 658 z7_x86_cpuid(d, 7);
299 // printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]); 659 // printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]);
300 return 1 660 return 1
301 & (d[1] >> 5); // avx2 661 & (d[1] >> 5); // avx2
302 } 662 }
303} 663}
304 664
305BoolInt CPU_IsSupported_VAES_AVX2() 665BoolInt CPU_IsSupported_VAES_AVX2(void)
306{ 666{
307 Cx86cpuid p; 667 if (!CPU_IsSupported_AVX())
308 CHECK_SYS_SSE_SUPPORT
309
310 #ifdef _WIN32
311 #define MY__PF_XSAVE_ENABLED 17
312 if (!IsProcessorFeaturePresent(MY__PF_XSAVE_ENABLED))
313 return False; 668 return False;
314 #endif 669 if (z7_x86_cpuid_GetMaxFunc() < 7)
315
316 if (!x86cpuid_CheckAndRead(&p))
317 return False;
318 if (p.maxFunc < 7)
319 return False; 670 return False;
320 { 671 {
321 UInt32 d[4] = { 0 }; 672 UInt32 d[4];
322 MyCPUID(7, &d[0], &d[1], &d[2], &d[3]); 673 z7_x86_cpuid(d, 7);
323 // printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]); 674 // printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]);
324 return 1 675 return 1
325 & (d[1] >> 5) // avx2 676 & (d[1] >> 5) // avx2
@@ -328,20 +679,15 @@ BoolInt CPU_IsSupported_VAES_AVX2()
328 } 679 }
329} 680}
330 681
331BoolInt CPU_IsSupported_PageGB() 682BoolInt CPU_IsSupported_PageGB(void)
332{ 683{
333 Cx86cpuid cpuid; 684 CHECK_CPUID_IS_SUPPORTED
334 if (!x86cpuid_CheckAndRead(&cpuid))
335 return False;
336 { 685 {
337 UInt32 d[4] = { 0 }; 686 UInt32 d[4];
338 MyCPUID(0x80000000, &d[0], &d[1], &d[2], &d[3]); 687 z7_x86_cpuid(d, 0x80000000);
339 if (d[0] < 0x80000001) 688 if (d[0] < 0x80000001)
340 return False; 689 return False;
341 } 690 z7_x86_cpuid(d, 0x80000001);
342 {
343 UInt32 d[4] = { 0 };
344 MyCPUID(0x80000001, &d[0], &d[1], &d[2], &d[3]);
345 return (d[3] >> 26) & 1; 691 return (d[3] >> 26) & 1;
346 } 692 }
347} 693}
@@ -351,11 +697,11 @@ BoolInt CPU_IsSupported_PageGB()
351 697
352#ifdef _WIN32 698#ifdef _WIN32
353 699
354#include <Windows.h> 700#include "7zWindows.h"
355 701
356BoolInt CPU_IsSupported_CRC32() { return IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE) ? 1 : 0; } 702BoolInt CPU_IsSupported_CRC32(void) { return IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE) ? 1 : 0; }
357BoolInt CPU_IsSupported_CRYPTO() { return IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE) ? 1 : 0; } 703BoolInt CPU_IsSupported_CRYPTO(void) { return IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE) ? 1 : 0; }
358BoolInt CPU_IsSupported_NEON() { return IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE) ? 1 : 0; } 704BoolInt CPU_IsSupported_NEON(void) { return IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE) ? 1 : 0; }
359 705
360#else 706#else
361 707
@@ -378,28 +724,27 @@ static void Print_sysctlbyname(const char *name)
378 } 724 }
379} 725}
380*/ 726*/
727/*
728 Print_sysctlbyname("hw.pagesize");
729 Print_sysctlbyname("machdep.cpu.brand_string");
730*/
381 731
382static BoolInt My_sysctlbyname_Get_BoolInt(const char *name) 732static BoolInt z7_sysctlbyname_Get_BoolInt(const char *name)
383{ 733{
384 UInt32 val = 0; 734 UInt32 val = 0;
385 if (My_sysctlbyname_Get_UInt32(name, &val) == 0 && val == 1) 735 if (z7_sysctlbyname_Get_UInt32(name, &val) == 0 && val == 1)
386 return 1; 736 return 1;
387 return 0; 737 return 0;
388} 738}
389 739
390 /*
391 Print_sysctlbyname("hw.pagesize");
392 Print_sysctlbyname("machdep.cpu.brand_string");
393 */
394
395BoolInt CPU_IsSupported_CRC32(void) 740BoolInt CPU_IsSupported_CRC32(void)
396{ 741{
397 return My_sysctlbyname_Get_BoolInt("hw.optional.armv8_crc32"); 742 return z7_sysctlbyname_Get_BoolInt("hw.optional.armv8_crc32");
398} 743}
399 744
400BoolInt CPU_IsSupported_NEON(void) 745BoolInt CPU_IsSupported_NEON(void)
401{ 746{
402 return My_sysctlbyname_Get_BoolInt("hw.optional.neon"); 747 return z7_sysctlbyname_Get_BoolInt("hw.optional.neon");
403} 748}
404 749
405#ifdef MY_CPU_ARM64 750#ifdef MY_CPU_ARM64
@@ -461,15 +806,15 @@ MY_HWCAP_CHECK_FUNC (AES)
461 806
462#include <sys/sysctl.h> 807#include <sys/sysctl.h>
463 808
464int My_sysctlbyname_Get(const char *name, void *buf, size_t *bufSize) 809int z7_sysctlbyname_Get(const char *name, void *buf, size_t *bufSize)
465{ 810{
466 return sysctlbyname(name, buf, bufSize, NULL, 0); 811 return sysctlbyname(name, buf, bufSize, NULL, 0);
467} 812}
468 813
469int My_sysctlbyname_Get_UInt32(const char *name, UInt32 *val) 814int z7_sysctlbyname_Get_UInt32(const char *name, UInt32 *val)
470{ 815{
471 size_t bufSize = sizeof(*val); 816 size_t bufSize = sizeof(*val);
472 int res = My_sysctlbyname_Get(name, val, &bufSize); 817 const int res = z7_sysctlbyname_Get(name, val, &bufSize);
473 if (res == 0 && bufSize != sizeof(*val)) 818 if (res == 0 && bufSize != sizeof(*val))
474 return EFAULT; 819 return EFAULT;
475 return res; 820 return res;