diff options
Diffstat (limited to 'C/CpuArch.c')
-rw-r--r-- | C/CpuArch.c | 478 |
1 files changed, 478 insertions, 0 deletions
diff --git a/C/CpuArch.c b/C/CpuArch.c new file mode 100644 index 0000000..fa9afe3 --- /dev/null +++ b/C/CpuArch.c | |||
@@ -0,0 +1,478 @@ | |||
1 | /* CpuArch.c -- CPU specific code | ||
2 | 2021-07-13 : Igor Pavlov : Public domain */ | ||
3 | |||
4 | #include "Precomp.h" | ||
5 | |||
6 | #include "CpuArch.h" | ||
7 | |||
8 | #ifdef MY_CPU_X86_OR_AMD64 | ||
9 | |||
10 | #if (defined(_MSC_VER) && !defined(MY_CPU_AMD64)) || defined(__GNUC__) | ||
11 | #define USE_ASM | ||
12 | #endif | ||
13 | |||
14 | #if !defined(USE_ASM) && _MSC_VER >= 1500 | ||
15 | #include <intrin.h> | ||
16 | #endif | ||
17 | |||
18 | #if defined(USE_ASM) && !defined(MY_CPU_AMD64) | ||
19 | static UInt32 CheckFlag(UInt32 flag) | ||
20 | { | ||
21 | #ifdef _MSC_VER | ||
22 | __asm pushfd; | ||
23 | __asm pop EAX; | ||
24 | __asm mov EDX, EAX; | ||
25 | __asm xor EAX, flag; | ||
26 | __asm push EAX; | ||
27 | __asm popfd; | ||
28 | __asm pushfd; | ||
29 | __asm pop EAX; | ||
30 | __asm xor EAX, EDX; | ||
31 | __asm push EDX; | ||
32 | __asm popfd; | ||
33 | __asm and flag, EAX; | ||
34 | #else | ||
35 | __asm__ __volatile__ ( | ||
36 | "pushf\n\t" | ||
37 | "pop %%EAX\n\t" | ||
38 | "movl %%EAX,%%EDX\n\t" | ||
39 | "xorl %0,%%EAX\n\t" | ||
40 | "push %%EAX\n\t" | ||
41 | "popf\n\t" | ||
42 | "pushf\n\t" | ||
43 | "pop %%EAX\n\t" | ||
44 | "xorl %%EDX,%%EAX\n\t" | ||
45 | "push %%EDX\n\t" | ||
46 | "popf\n\t" | ||
47 | "andl %%EAX, %0\n\t": | ||
48 | "=c" (flag) : "c" (flag) : | ||
49 | "%eax", "%edx"); | ||
50 | #endif | ||
51 | return flag; | ||
52 | } | ||
53 | #define CHECK_CPUID_IS_SUPPORTED if (CheckFlag(1 << 18) == 0 || CheckFlag(1 << 21) == 0) return False; | ||
54 | #else | ||
55 | #define CHECK_CPUID_IS_SUPPORTED | ||
56 | #endif | ||
57 | |||
58 | #ifndef USE_ASM | ||
59 | #ifdef _MSC_VER | ||
60 | #if _MSC_VER >= 1600 | ||
61 | #define MY__cpuidex __cpuidex | ||
62 | #else | ||
63 | |||
64 | /* | ||
65 | __cpuid (function == 4) requires subfunction number in ECX. | ||
66 | MSDN: The __cpuid intrinsic clears the ECX register before calling the cpuid instruction. | ||
67 | __cpuid() in new MSVC clears ECX. | ||
68 | __cpuid() in old MSVC (14.00) doesn't clear ECX | ||
69 | We still can use __cpuid for low (function) values that don't require ECX, | ||
70 | but __cpuid() in old MSVC will be incorrect for some function values: (function == 4). | ||
71 | So here we use the hack for old MSVC to send (subFunction) in ECX register to cpuid instruction, | ||
72 | where ECX value is first parameter for FAST_CALL / NO_INLINE function, | ||
73 | So the caller of MY__cpuidex_HACK() sets ECX as subFunction, and | ||
74 | old MSVC for __cpuid() doesn't change ECX and cpuid instruction gets (subFunction) value. | ||
75 | |||
76 | DON'T remove MY_NO_INLINE and MY_FAST_CALL for MY__cpuidex_HACK() !!! | ||
77 | */ | ||
78 | |||
79 | static | ||
80 | MY_NO_INLINE | ||
81 | void MY_FAST_CALL MY__cpuidex_HACK(UInt32 subFunction, int *CPUInfo, UInt32 function) | ||
82 | { | ||
83 | UNUSED_VAR(subFunction); | ||
84 | __cpuid(CPUInfo, function); | ||
85 | } | ||
86 | |||
87 | #define MY__cpuidex(info, func, func2) MY__cpuidex_HACK(func2, info, func) | ||
88 | #pragma message("======== MY__cpuidex_HACK WAS USED ========") | ||
89 | #endif | ||
90 | #else | ||
91 | #define MY__cpuidex(info, func, func2) __cpuid(info, func) | ||
92 | #pragma message("======== (INCORRECT ?) cpuid WAS USED ========") | ||
93 | #endif | ||
94 | #endif | ||
95 | |||
96 | |||
97 | |||
98 | |||
99 | void MyCPUID(UInt32 function, UInt32 *a, UInt32 *b, UInt32 *c, UInt32 *d) | ||
100 | { | ||
101 | #ifdef USE_ASM | ||
102 | |||
103 | #ifdef _MSC_VER | ||
104 | |||
105 | UInt32 a2, b2, c2, d2; | ||
106 | __asm xor EBX, EBX; | ||
107 | __asm xor ECX, ECX; | ||
108 | __asm xor EDX, EDX; | ||
109 | __asm mov EAX, function; | ||
110 | __asm cpuid; | ||
111 | __asm mov a2, EAX; | ||
112 | __asm mov b2, EBX; | ||
113 | __asm mov c2, ECX; | ||
114 | __asm mov d2, EDX; | ||
115 | |||
116 | *a = a2; | ||
117 | *b = b2; | ||
118 | *c = c2; | ||
119 | *d = d2; | ||
120 | |||
121 | #else | ||
122 | |||
123 | __asm__ __volatile__ ( | ||
124 | #if defined(MY_CPU_AMD64) && defined(__PIC__) | ||
125 | "mov %%rbx, %%rdi;" | ||
126 | "cpuid;" | ||
127 | "xchg %%rbx, %%rdi;" | ||
128 | : "=a" (*a) , | ||
129 | "=D" (*b) , | ||
130 | #elif defined(MY_CPU_X86) && defined(__PIC__) | ||
131 | "mov %%ebx, %%edi;" | ||
132 | "cpuid;" | ||
133 | "xchgl %%ebx, %%edi;" | ||
134 | : "=a" (*a) , | ||
135 | "=D" (*b) , | ||
136 | #else | ||
137 | "cpuid" | ||
138 | : "=a" (*a) , | ||
139 | "=b" (*b) , | ||
140 | #endif | ||
141 | "=c" (*c) , | ||
142 | "=d" (*d) | ||
143 | : "0" (function), "c"(0) ) ; | ||
144 | |||
145 | #endif | ||
146 | |||
147 | #else | ||
148 | |||
149 | int CPUInfo[4]; | ||
150 | |||
151 | MY__cpuidex(CPUInfo, (int)function, 0); | ||
152 | |||
153 | *a = (UInt32)CPUInfo[0]; | ||
154 | *b = (UInt32)CPUInfo[1]; | ||
155 | *c = (UInt32)CPUInfo[2]; | ||
156 | *d = (UInt32)CPUInfo[3]; | ||
157 | |||
158 | #endif | ||
159 | } | ||
160 | |||
161 | BoolInt x86cpuid_CheckAndRead(Cx86cpuid *p) | ||
162 | { | ||
163 | CHECK_CPUID_IS_SUPPORTED | ||
164 | MyCPUID(0, &p->maxFunc, &p->vendor[0], &p->vendor[2], &p->vendor[1]); | ||
165 | MyCPUID(1, &p->ver, &p->b, &p->c, &p->d); | ||
166 | return True; | ||
167 | } | ||
168 | |||
169 | static const UInt32 kVendors[][3] = | ||
170 | { | ||
171 | { 0x756E6547, 0x49656E69, 0x6C65746E}, | ||
172 | { 0x68747541, 0x69746E65, 0x444D4163}, | ||
173 | { 0x746E6543, 0x48727561, 0x736C7561} | ||
174 | }; | ||
175 | |||
176 | int x86cpuid_GetFirm(const Cx86cpuid *p) | ||
177 | { | ||
178 | unsigned i; | ||
179 | for (i = 0; i < sizeof(kVendors) / sizeof(kVendors[i]); i++) | ||
180 | { | ||
181 | const UInt32 *v = kVendors[i]; | ||
182 | if (v[0] == p->vendor[0] && | ||
183 | v[1] == p->vendor[1] && | ||
184 | v[2] == p->vendor[2]) | ||
185 | return (int)i; | ||
186 | } | ||
187 | return -1; | ||
188 | } | ||
189 | |||
190 | BoolInt CPU_Is_InOrder() | ||
191 | { | ||
192 | Cx86cpuid p; | ||
193 | int firm; | ||
194 | UInt32 family, model; | ||
195 | if (!x86cpuid_CheckAndRead(&p)) | ||
196 | return True; | ||
197 | |||
198 | family = x86cpuid_GetFamily(p.ver); | ||
199 | model = x86cpuid_GetModel(p.ver); | ||
200 | |||
201 | firm = x86cpuid_GetFirm(&p); | ||
202 | |||
203 | switch (firm) | ||
204 | { | ||
205 | case CPU_FIRM_INTEL: return (family < 6 || (family == 6 && ( | ||
206 | /* In-Order Atom CPU */ | ||
207 | model == 0x1C /* 45 nm, N4xx, D4xx, N5xx, D5xx, 230, 330 */ | ||
208 | || model == 0x26 /* 45 nm, Z6xx */ | ||
209 | || model == 0x27 /* 32 nm, Z2460 */ | ||
210 | || model == 0x35 /* 32 nm, Z2760 */ | ||
211 | || model == 0x36 /* 32 nm, N2xxx, D2xxx */ | ||
212 | ))); | ||
213 | case CPU_FIRM_AMD: return (family < 5 || (family == 5 && (model < 6 || model == 0xA))); | ||
214 | case CPU_FIRM_VIA: return (family < 6 || (family == 6 && model < 0xF)); | ||
215 | } | ||
216 | return True; | ||
217 | } | ||
218 | |||
219 | #if !defined(MY_CPU_AMD64) && defined(_WIN32) | ||
220 | #include <Windows.h> | ||
221 | static BoolInt CPU_Sys_Is_SSE_Supported() | ||
222 | { | ||
223 | OSVERSIONINFO vi; | ||
224 | vi.dwOSVersionInfoSize = sizeof(vi); | ||
225 | if (!GetVersionEx(&vi)) | ||
226 | return False; | ||
227 | return (vi.dwMajorVersion >= 5); | ||
228 | } | ||
229 | #define CHECK_SYS_SSE_SUPPORT if (!CPU_Sys_Is_SSE_Supported()) return False; | ||
230 | #else | ||
231 | #define CHECK_SYS_SSE_SUPPORT | ||
232 | #endif | ||
233 | |||
234 | |||
235 | static UInt32 X86_CPUID_ECX_Get_Flags() | ||
236 | { | ||
237 | Cx86cpuid p; | ||
238 | CHECK_SYS_SSE_SUPPORT | ||
239 | if (!x86cpuid_CheckAndRead(&p)) | ||
240 | return 0; | ||
241 | return p.c; | ||
242 | } | ||
243 | |||
244 | BoolInt CPU_IsSupported_AES() | ||
245 | { | ||
246 | return (X86_CPUID_ECX_Get_Flags() >> 25) & 1; | ||
247 | } | ||
248 | |||
249 | BoolInt CPU_IsSupported_SSSE3() | ||
250 | { | ||
251 | return (X86_CPUID_ECX_Get_Flags() >> 9) & 1; | ||
252 | } | ||
253 | |||
254 | BoolInt CPU_IsSupported_SSE41() | ||
255 | { | ||
256 | return (X86_CPUID_ECX_Get_Flags() >> 19) & 1; | ||
257 | } | ||
258 | |||
259 | BoolInt CPU_IsSupported_SHA() | ||
260 | { | ||
261 | Cx86cpuid p; | ||
262 | CHECK_SYS_SSE_SUPPORT | ||
263 | if (!x86cpuid_CheckAndRead(&p)) | ||
264 | return False; | ||
265 | |||
266 | if (p.maxFunc < 7) | ||
267 | return False; | ||
268 | { | ||
269 | UInt32 d[4] = { 0 }; | ||
270 | MyCPUID(7, &d[0], &d[1], &d[2], &d[3]); | ||
271 | return (d[1] >> 29) & 1; | ||
272 | } | ||
273 | } | ||
274 | |||
275 | // #include <stdio.h> | ||
276 | |||
277 | #ifdef _WIN32 | ||
278 | #include <Windows.h> | ||
279 | #endif | ||
280 | |||
281 | BoolInt CPU_IsSupported_AVX2() | ||
282 | { | ||
283 | Cx86cpuid p; | ||
284 | CHECK_SYS_SSE_SUPPORT | ||
285 | |||
286 | #ifdef _WIN32 | ||
287 | #define MY__PF_XSAVE_ENABLED 17 | ||
288 | if (!IsProcessorFeaturePresent(MY__PF_XSAVE_ENABLED)) | ||
289 | return False; | ||
290 | #endif | ||
291 | |||
292 | if (!x86cpuid_CheckAndRead(&p)) | ||
293 | return False; | ||
294 | if (p.maxFunc < 7) | ||
295 | return False; | ||
296 | { | ||
297 | UInt32 d[4] = { 0 }; | ||
298 | MyCPUID(7, &d[0], &d[1], &d[2], &d[3]); | ||
299 | // printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]); | ||
300 | return 1 | ||
301 | & (d[1] >> 5); // avx2 | ||
302 | } | ||
303 | } | ||
304 | |||
305 | BoolInt CPU_IsSupported_VAES_AVX2() | ||
306 | { | ||
307 | Cx86cpuid p; | ||
308 | CHECK_SYS_SSE_SUPPORT | ||
309 | |||
310 | #ifdef _WIN32 | ||
311 | #define MY__PF_XSAVE_ENABLED 17 | ||
312 | if (!IsProcessorFeaturePresent(MY__PF_XSAVE_ENABLED)) | ||
313 | return False; | ||
314 | #endif | ||
315 | |||
316 | if (!x86cpuid_CheckAndRead(&p)) | ||
317 | return False; | ||
318 | if (p.maxFunc < 7) | ||
319 | return False; | ||
320 | { | ||
321 | UInt32 d[4] = { 0 }; | ||
322 | MyCPUID(7, &d[0], &d[1], &d[2], &d[3]); | ||
323 | // printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]); | ||
324 | return 1 | ||
325 | & (d[1] >> 5) // avx2 | ||
326 | // & (d[1] >> 31) // avx512vl | ||
327 | & (d[2] >> 9); // vaes // VEX-256/EVEX | ||
328 | } | ||
329 | } | ||
330 | |||
331 | BoolInt CPU_IsSupported_PageGB() | ||
332 | { | ||
333 | Cx86cpuid cpuid; | ||
334 | if (!x86cpuid_CheckAndRead(&cpuid)) | ||
335 | return False; | ||
336 | { | ||
337 | UInt32 d[4] = { 0 }; | ||
338 | MyCPUID(0x80000000, &d[0], &d[1], &d[2], &d[3]); | ||
339 | if (d[0] < 0x80000001) | ||
340 | return False; | ||
341 | } | ||
342 | { | ||
343 | UInt32 d[4] = { 0 }; | ||
344 | MyCPUID(0x80000001, &d[0], &d[1], &d[2], &d[3]); | ||
345 | return (d[3] >> 26) & 1; | ||
346 | } | ||
347 | } | ||
348 | |||
349 | |||
350 | #elif defined(MY_CPU_ARM_OR_ARM64) | ||
351 | |||
352 | #ifdef _WIN32 | ||
353 | |||
354 | #include <Windows.h> | ||
355 | |||
356 | BoolInt CPU_IsSupported_CRC32() { return IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE) ? 1 : 0; } | ||
357 | BoolInt CPU_IsSupported_CRYPTO() { return IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE) ? 1 : 0; } | ||
358 | BoolInt CPU_IsSupported_NEON() { return IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE) ? 1 : 0; } | ||
359 | |||
360 | #else | ||
361 | |||
362 | #if defined(__APPLE__) | ||
363 | |||
364 | /* | ||
365 | #include <stdio.h> | ||
366 | #include <string.h> | ||
367 | static void Print_sysctlbyname(const char *name) | ||
368 | { | ||
369 | size_t bufSize = 256; | ||
370 | char buf[256]; | ||
371 | int res = sysctlbyname(name, &buf, &bufSize, NULL, 0); | ||
372 | { | ||
373 | int i; | ||
374 | printf("\nres = %d : %s : '%s' : bufSize = %d, numeric", res, name, buf, (unsigned)bufSize); | ||
375 | for (i = 0; i < 20; i++) | ||
376 | printf(" %2x", (unsigned)(Byte)buf[i]); | ||
377 | |||
378 | } | ||
379 | } | ||
380 | */ | ||
381 | |||
382 | static BoolInt My_sysctlbyname_Get_BoolInt(const char *name) | ||
383 | { | ||
384 | UInt32 val = 0; | ||
385 | if (My_sysctlbyname_Get_UInt32(name, &val) == 0 && val == 1) | ||
386 | return 1; | ||
387 | return 0; | ||
388 | } | ||
389 | |||
390 | /* | ||
391 | Print_sysctlbyname("hw.pagesize"); | ||
392 | Print_sysctlbyname("machdep.cpu.brand_string"); | ||
393 | */ | ||
394 | |||
395 | BoolInt CPU_IsSupported_CRC32(void) | ||
396 | { | ||
397 | return My_sysctlbyname_Get_BoolInt("hw.optional.armv8_crc32"); | ||
398 | } | ||
399 | |||
400 | BoolInt CPU_IsSupported_NEON(void) | ||
401 | { | ||
402 | return My_sysctlbyname_Get_BoolInt("hw.optional.neon"); | ||
403 | } | ||
404 | |||
405 | #ifdef MY_CPU_ARM64 | ||
406 | #define APPLE_CRYPTO_SUPPORT_VAL 1 | ||
407 | #else | ||
408 | #define APPLE_CRYPTO_SUPPORT_VAL 0 | ||
409 | #endif | ||
410 | |||
411 | BoolInt CPU_IsSupported_SHA1(void) { return APPLE_CRYPTO_SUPPORT_VAL; } | ||
412 | BoolInt CPU_IsSupported_SHA2(void) { return APPLE_CRYPTO_SUPPORT_VAL; } | ||
413 | BoolInt CPU_IsSupported_AES (void) { return APPLE_CRYPTO_SUPPORT_VAL; } | ||
414 | |||
415 | |||
416 | #else // __APPLE__ | ||
417 | |||
418 | #include <sys/auxv.h> | ||
419 | |||
420 | #define USE_HWCAP | ||
421 | |||
422 | #ifdef USE_HWCAP | ||
423 | |||
424 | #include <asm/hwcap.h> | ||
425 | |||
426 | #define MY_HWCAP_CHECK_FUNC_2(name1, name2) \ | ||
427 | BoolInt CPU_IsSupported_ ## name1() { return (getauxval(AT_HWCAP) & (HWCAP_ ## name2)) ? 1 : 0; } | ||
428 | |||
429 | #ifdef MY_CPU_ARM64 | ||
430 | #define MY_HWCAP_CHECK_FUNC(name) \ | ||
431 | MY_HWCAP_CHECK_FUNC_2(name, name) | ||
432 | MY_HWCAP_CHECK_FUNC_2(NEON, ASIMD) | ||
433 | // MY_HWCAP_CHECK_FUNC (ASIMD) | ||
434 | #elif defined(MY_CPU_ARM) | ||
435 | #define MY_HWCAP_CHECK_FUNC(name) \ | ||
436 | BoolInt CPU_IsSupported_ ## name() { return (getauxval(AT_HWCAP2) & (HWCAP2_ ## name)) ? 1 : 0; } | ||
437 | MY_HWCAP_CHECK_FUNC_2(NEON, NEON) | ||
438 | #endif | ||
439 | |||
440 | #else // USE_HWCAP | ||
441 | |||
442 | #define MY_HWCAP_CHECK_FUNC(name) \ | ||
443 | BoolInt CPU_IsSupported_ ## name() { return 0; } | ||
444 | MY_HWCAP_CHECK_FUNC(NEON) | ||
445 | |||
446 | #endif // USE_HWCAP | ||
447 | |||
448 | MY_HWCAP_CHECK_FUNC (CRC32) | ||
449 | MY_HWCAP_CHECK_FUNC (SHA1) | ||
450 | MY_HWCAP_CHECK_FUNC (SHA2) | ||
451 | MY_HWCAP_CHECK_FUNC (AES) | ||
452 | |||
453 | #endif // __APPLE__ | ||
454 | #endif // _WIN32 | ||
455 | |||
456 | #endif // MY_CPU_ARM_OR_ARM64 | ||
457 | |||
458 | |||
459 | |||
460 | #ifdef __APPLE__ | ||
461 | |||
462 | #include <sys/sysctl.h> | ||
463 | |||
464 | int My_sysctlbyname_Get(const char *name, void *buf, size_t *bufSize) | ||
465 | { | ||
466 | return sysctlbyname(name, buf, bufSize, NULL, 0); | ||
467 | } | ||
468 | |||
469 | int My_sysctlbyname_Get_UInt32(const char *name, UInt32 *val) | ||
470 | { | ||
471 | size_t bufSize = sizeof(*val); | ||
472 | int res = My_sysctlbyname_Get(name, val, &bufSize); | ||
473 | if (res == 0 && bufSize != sizeof(*val)) | ||
474 | return EFAULT; | ||
475 | return res; | ||
476 | } | ||
477 | |||
478 | #endif | ||