aboutsummaryrefslogtreecommitdiff
path: root/C/CpuArch.h
diff options
context:
space:
mode:
Diffstat (limited to 'C/CpuArch.h')
-rw-r--r--C/CpuArch.h144
1 files changed, 133 insertions, 11 deletions
diff --git a/C/CpuArch.h b/C/CpuArch.h
index 8e5d8a5..dfc68f1 100644
--- a/C/CpuArch.h
+++ b/C/CpuArch.h
@@ -1,5 +1,5 @@
1/* CpuArch.h -- CPU specific code 1/* CpuArch.h -- CPU specific code
22023-04-02 : Igor Pavlov : Public domain */ 22024-05-13 : Igor Pavlov : Public domain */
3 3
4#ifndef ZIP7_INC_CPU_ARCH_H 4#ifndef ZIP7_INC_CPU_ARCH_H
5#define ZIP7_INC_CPU_ARCH_H 5#define ZIP7_INC_CPU_ARCH_H
@@ -20,6 +20,7 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
20 MY_CPU_64BIT doesn't mean that (sizeof(void *) == 8) 20 MY_CPU_64BIT doesn't mean that (sizeof(void *) == 8)
21*/ 21*/
22 22
23#if !defined(_M_ARM64EC)
23#if defined(_M_X64) \ 24#if defined(_M_X64) \
24 || defined(_M_AMD64) \ 25 || defined(_M_AMD64) \
25 || defined(__x86_64__) \ 26 || defined(__x86_64__) \
@@ -35,6 +36,7 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
35 #endif 36 #endif
36 #define MY_CPU_64BIT 37 #define MY_CPU_64BIT
37#endif 38#endif
39#endif
38 40
39 41
40#if defined(_M_IX86) \ 42#if defined(_M_IX86) \
@@ -47,17 +49,26 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
47 49
48 50
49#if defined(_M_ARM64) \ 51#if defined(_M_ARM64) \
52 || defined(_M_ARM64EC) \
50 || defined(__AARCH64EL__) \ 53 || defined(__AARCH64EL__) \
51 || defined(__AARCH64EB__) \ 54 || defined(__AARCH64EB__) \
52 || defined(__aarch64__) 55 || defined(__aarch64__)
53 #define MY_CPU_ARM64 56 #define MY_CPU_ARM64
54 #ifdef __ILP32__ 57#if defined(__ILP32__) \
58 || defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ == 4)
55 #define MY_CPU_NAME "arm64-32" 59 #define MY_CPU_NAME "arm64-32"
56 #define MY_CPU_SIZEOF_POINTER 4 60 #define MY_CPU_SIZEOF_POINTER 4
57 #else 61#elif defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ == 16)
62 #define MY_CPU_NAME "arm64-128"
63 #define MY_CPU_SIZEOF_POINTER 16
64#else
65#if defined(_M_ARM64EC)
66 #define MY_CPU_NAME "arm64ec"
67#else
58 #define MY_CPU_NAME "arm64" 68 #define MY_CPU_NAME "arm64"
69#endif
59 #define MY_CPU_SIZEOF_POINTER 8 70 #define MY_CPU_SIZEOF_POINTER 8
60 #endif 71#endif
61 #define MY_CPU_64BIT 72 #define MY_CPU_64BIT
62#endif 73#endif
63 74
@@ -133,8 +144,36 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
133#endif 144#endif
134 145
135 146
147#if defined(__sparc__) \
148 || defined(__sparc)
149 #define MY_CPU_SPARC
150 #if defined(__LP64__) \
151 || defined(_LP64) \
152 || defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ == 8)
153 #define MY_CPU_NAME "sparcv9"
154 #define MY_CPU_SIZEOF_POINTER 8
155 #define MY_CPU_64BIT
156 #elif defined(__sparc_v9__) \
157 || defined(__sparcv9)
158 #define MY_CPU_64BIT
159 #if defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ == 4)
160 #define MY_CPU_NAME "sparcv9-32"
161 #else
162 #define MY_CPU_NAME "sparcv9m"
163 #endif
164 #elif defined(__sparc_v8__) \
165 || defined(__sparcv8)
166 #define MY_CPU_NAME "sparcv8"
167 #define MY_CPU_SIZEOF_POINTER 4
168 #else
169 #define MY_CPU_NAME "sparc"
170 #endif
171#endif
172
173
136#if defined(__riscv) \ 174#if defined(__riscv) \
137 || defined(__riscv__) 175 || defined(__riscv__)
176 #define MY_CPU_RISCV
138 #if __riscv_xlen == 32 177 #if __riscv_xlen == 32
139 #define MY_CPU_NAME "riscv32" 178 #define MY_CPU_NAME "riscv32"
140 #elif __riscv_xlen == 64 179 #elif __riscv_xlen == 64
@@ -145,6 +184,39 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
145#endif 184#endif
146 185
147 186
187#if defined(__loongarch__)
188 #define MY_CPU_LOONGARCH
189 #if defined(__loongarch64) || defined(__loongarch_grlen) && (__loongarch_grlen == 64)
190 #define MY_CPU_64BIT
191 #endif
192 #if defined(__loongarch64)
193 #define MY_CPU_NAME "loongarch64"
194 #define MY_CPU_LOONGARCH64
195 #else
196 #define MY_CPU_NAME "loongarch"
197 #endif
198#endif
199
200
201// #undef MY_CPU_NAME
202// #undef MY_CPU_SIZEOF_POINTER
203// #define __e2k__
204// #define __SIZEOF_POINTER__ 4
205#if defined(__e2k__)
206 #define MY_CPU_E2K
207 #if defined(__ILP32__) || defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ == 4)
208 #define MY_CPU_NAME "e2k-32"
209 #define MY_CPU_SIZEOF_POINTER 4
210 #else
211 #define MY_CPU_NAME "e2k"
212 #if defined(__LP64__) || defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ == 8)
213 #define MY_CPU_SIZEOF_POINTER 8
214 #endif
215 #endif
216 #define MY_CPU_64BIT
217#endif
218
219
148#if defined(MY_CPU_X86) || defined(MY_CPU_AMD64) 220#if defined(MY_CPU_X86) || defined(MY_CPU_AMD64)
149#define MY_CPU_X86_OR_AMD64 221#define MY_CPU_X86_OR_AMD64
150#endif 222#endif
@@ -175,6 +247,7 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
175 || defined(MY_CPU_ARM_LE) \ 247 || defined(MY_CPU_ARM_LE) \
176 || defined(MY_CPU_ARM64_LE) \ 248 || defined(MY_CPU_ARM64_LE) \
177 || defined(MY_CPU_IA64_LE) \ 249 || defined(MY_CPU_IA64_LE) \
250 || defined(_LITTLE_ENDIAN) \
178 || defined(__LITTLE_ENDIAN__) \ 251 || defined(__LITTLE_ENDIAN__) \
179 || defined(__ARMEL__) \ 252 || defined(__ARMEL__) \
180 || defined(__THUMBEL__) \ 253 || defined(__THUMBEL__) \
@@ -251,6 +324,7 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
251 324
252 325
253#ifndef MY_CPU_NAME 326#ifndef MY_CPU_NAME
327 // #define MY_CPU_IS_UNKNOWN
254 #ifdef MY_CPU_LE 328 #ifdef MY_CPU_LE
255 #define MY_CPU_NAME "LE" 329 #define MY_CPU_NAME "LE"
256 #elif defined(MY_CPU_BE) 330 #elif defined(MY_CPU_BE)
@@ -295,9 +369,19 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
295#define Z7_BSWAP64(v) _byteswap_uint64(v) 369#define Z7_BSWAP64(v) _byteswap_uint64(v)
296#define Z7_CPU_FAST_BSWAP_SUPPORTED 370#define Z7_CPU_FAST_BSWAP_SUPPORTED
297 371
298#elif (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))) \ 372/* GCC can generate slow code that calls function for __builtin_bswap32() for:
299 || (defined(__clang__) && Z7_has_builtin(__builtin_bswap16)) 373 - GCC for RISCV, if Zbb extension is not used.
300 374 - GCC for SPARC.
375 The code from CLANG for SPARC also is not fastest.
376 So we don't define Z7_CPU_FAST_BSWAP_SUPPORTED in some cases.
377*/
378#elif (!defined(MY_CPU_RISCV) || defined (__riscv_zbb)) \
379 && !defined(MY_CPU_SPARC) \
380 && ( \
381 (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))) \
382 || (defined(__clang__) && Z7_has_builtin(__builtin_bswap16)) \
383 )
384
301#define Z7_BSWAP16(v) __builtin_bswap16(v) 385#define Z7_BSWAP16(v) __builtin_bswap16(v)
302#define Z7_BSWAP32(v) __builtin_bswap32(v) 386#define Z7_BSWAP32(v) __builtin_bswap32(v)
303#define Z7_BSWAP64(v) __builtin_bswap64(v) 387#define Z7_BSWAP64(v) __builtin_bswap64(v)
@@ -329,13 +413,48 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
329 413
330#ifdef MY_CPU_LE 414#ifdef MY_CPU_LE
331 #if defined(MY_CPU_X86_OR_AMD64) \ 415 #if defined(MY_CPU_X86_OR_AMD64) \
332 || defined(MY_CPU_ARM64) 416 || defined(MY_CPU_ARM64) \
417 || defined(MY_CPU_RISCV) && defined(__riscv_misaligned_fast) \
418 || defined(MY_CPU_E2K) && defined(__iset__) && (__iset__ >= 6)
333 #define MY_CPU_LE_UNALIGN 419 #define MY_CPU_LE_UNALIGN
334 #define MY_CPU_LE_UNALIGN_64 420 #define MY_CPU_LE_UNALIGN_64
335 #elif defined(__ARM_FEATURE_UNALIGNED) 421 #elif defined(__ARM_FEATURE_UNALIGNED)
336 /* gcc9 for 32-bit arm can use LDRD instruction that requires 32-bit alignment. 422/* === ALIGNMENT on 32-bit arm and LDRD/STRD/LDM/STM instructions.
337 So we can't use unaligned 64-bit operations. */ 423 Description of problems:
338 #define MY_CPU_LE_UNALIGN 424problem-1 : 32-bit ARM architecture:
425 multi-access (pair of 32-bit accesses) instructions (LDRD/STRD/LDM/STM)
426 require 32-bit (WORD) alignment (by 32-bit ARM architecture).
427 So there is "Alignment fault exception", if data is not aligned for 32-bit.
428
429problem-2 : 32-bit kernels and arm64 kernels:
430 32-bit linux kernels provide fixup for these "paired" instruction "Alignment fault exception".
431 So unaligned paired-access instructions work via exception handler in kernel in 32-bit linux.
432
433 But some arm64 kernels do not handle these faults in 32-bit programs.
434 So we have unhandled exception for such instructions.
435 Probably some new arm64 kernels have fixed it, and unaligned
436 paired-access instructions work in new kernels?
437
438problem-3 : compiler for 32-bit arm:
439 Compilers use LDRD/STRD/LDM/STM for UInt64 accesses
440 and for another cases where two 32-bit accesses are fused
441 to one multi-access instruction.
442 So UInt64 variables must be aligned for 32-bit, and each
443 32-bit access must be aligned for 32-bit, if we want to
444 avoid "Alignment fault" exception (handled or unhandled).
445
446problem-4 : performace:
447 Even if unaligned access is handled by kernel, it will be slow.
448 So if we allow unaligned access, we can get fast unaligned
449 single-access, and slow unaligned paired-access.
450
451 We don't allow unaligned access on 32-bit arm, because compiler
452 genarates paired-access instructions that require 32-bit alignment,
453 and some arm64 kernels have no handler for these instructions.
454 Also unaligned paired-access instructions will be slow, if kernel handles them.
455*/
456 // it must be disabled:
457 // #define MY_CPU_LE_UNALIGN
339 #endif 458 #endif
340#endif 459#endif
341 460
@@ -439,6 +558,7 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
439 558
440#if defined(MY_CPU_BE) 559#if defined(MY_CPU_BE)
441 560
561#define GetBe64a(p) (*(const UInt64 *)(const void *)(p))
442#define GetBe32a(p) (*(const UInt32 *)(const void *)(p)) 562#define GetBe32a(p) (*(const UInt32 *)(const void *)(p))
443#define GetBe16a(p) (*(const UInt16 *)(const void *)(p)) 563#define GetBe16a(p) (*(const UInt16 *)(const void *)(p))
444#define SetBe32a(p, v) { *(UInt32 *)(void *)(p) = (v); } 564#define SetBe32a(p, v) { *(UInt32 *)(void *)(p) = (v); }
@@ -456,6 +576,7 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
456#define SetUi32a(p, v) { *(UInt32 *)(void *)(p) = (v); } 576#define SetUi32a(p, v) { *(UInt32 *)(void *)(p) = (v); }
457#define SetUi16a(p, v) { *(UInt16 *)(void *)(p) = (v); } 577#define SetUi16a(p, v) { *(UInt16 *)(void *)(p) = (v); }
458 578
579#define GetBe64a(p) GetBe64(p)
459#define GetBe32a(p) GetBe32(p) 580#define GetBe32a(p) GetBe32(p)
460#define GetBe16a(p) GetBe16(p) 581#define GetBe16a(p) GetBe16(p)
461#define SetBe32a(p, v) SetBe32(p, v) 582#define SetBe32a(p, v) SetBe32(p, v)
@@ -486,6 +607,7 @@ UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void);
486BoolInt CPU_IsSupported_AES(void); 607BoolInt CPU_IsSupported_AES(void);
487BoolInt CPU_IsSupported_AVX(void); 608BoolInt CPU_IsSupported_AVX(void);
488BoolInt CPU_IsSupported_AVX2(void); 609BoolInt CPU_IsSupported_AVX2(void);
610// BoolInt CPU_IsSupported_AVX512F_AVX512VL(void);
489BoolInt CPU_IsSupported_VAES_AVX2(void); 611BoolInt CPU_IsSupported_VAES_AVX2(void);
490BoolInt CPU_IsSupported_CMOV(void); 612BoolInt CPU_IsSupported_CMOV(void);
491BoolInt CPU_IsSupported_SSE(void); 613BoolInt CPU_IsSupported_SSE(void);