diff options
Diffstat (limited to 'C/SwapBytes.c')
-rw-r--r-- | C/SwapBytes.c | 63 |
1 files changed, 49 insertions, 14 deletions
diff --git a/C/SwapBytes.c b/C/SwapBytes.c index 7901bba..9290592 100644 --- a/C/SwapBytes.c +++ b/C/SwapBytes.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* SwapBytes.c -- Byte Swap conversion filter | 1 | /* SwapBytes.c -- Byte Swap conversion filter |
2 | 2023-04-07 : Igor Pavlov : Public domain */ | 2 | 2024-03-01 : Igor Pavlov : Public domain */ |
3 | 3 | ||
4 | #include "Precomp.h" | 4 | #include "Precomp.h" |
5 | 5 | ||
@@ -305,11 +305,12 @@ ShufBytes_256(void *items8, const void *lim8, const void *mask128_ptr) | |||
305 | msvc 19.30+ (VS2022): replaces _mm256_set_m128i(m,m) to vbroadcastf128(m) as we want | 305 | msvc 19.30+ (VS2022): replaces _mm256_set_m128i(m,m) to vbroadcastf128(m) as we want |
306 | */ | 306 | */ |
307 | // _mm256_broadcastsi128_si256(*mask128_ptr); | 307 | // _mm256_broadcastsi128_si256(*mask128_ptr); |
308 | /* | 308 | #if defined(Z7_GCC_VERSION) && (Z7_GCC_VERSION < 80000) |
309 | #define MY_mm256_set_m128i(hi, lo) _mm256_insertf128_si256(_mm256_castsi128_si256(lo), (hi), 1) | 309 | #define MY_mm256_set_m128i(hi, lo) _mm256_insertf128_si256(_mm256_castsi128_si256(lo), (hi), 1) |
310 | MY_mm256_set_m128i | 310 | #else |
311 | */ | 311 | #define MY_mm256_set_m128i _mm256_set_m128i |
312 | _mm256_set_m128i( | 312 | #endif |
313 | MY_mm256_set_m128i( | ||
313 | *(const __m128i *)mask128_ptr, | 314 | *(const __m128i *)mask128_ptr, |
314 | *(const __m128i *)mask128_ptr); | 315 | *(const __m128i *)mask128_ptr); |
315 | #endif | 316 | #endif |
@@ -330,32 +331,59 @@ ShufBytes_256(void *items8, const void *lim8, const void *mask128_ptr) | |||
330 | 331 | ||
331 | 332 | ||
332 | // compile message "NEON intrinsics not available with the soft-float ABI" | 333 | // compile message "NEON intrinsics not available with the soft-float ABI" |
333 | #elif defined(MY_CPU_ARM_OR_ARM64) || \ | 334 | #elif defined(MY_CPU_ARM_OR_ARM64) \ |
334 | (defined(__ARM_ARCH) && (__ARM_ARCH >= 7)) | 335 | && defined(MY_CPU_LE) \ |
335 | // #elif defined(MY_CPU_ARM64) | 336 | && !defined(Z7_DISABLE_ARM_NEON) |
336 | 337 | ||
337 | #if defined(__clang__) && (__clang_major__ >= 8) \ | 338 | #if defined(__clang__) && (__clang_major__ >= 8) \ |
338 | || defined(__GNUC__) && (__GNUC__ >= 8) | 339 | || defined(__GNUC__) && (__GNUC__ >= 6) |
339 | #if (defined(__ARM_ARCH) && (__ARM_ARCH >= 7)) \ | 340 | #if defined(__ARM_FP) |
341 | #if (defined(__ARM_ARCH) && (__ARM_ARCH >= 4)) \ | ||
340 | || defined(MY_CPU_ARM64) | 342 | || defined(MY_CPU_ARM64) |
343 | #if defined(MY_CPU_ARM64) \ | ||
344 | || !defined(Z7_CLANG_VERSION) \ | ||
345 | || defined(__ARM_NEON) | ||
341 | #define USE_SWAP_128 | 346 | #define USE_SWAP_128 |
342 | #endif | ||
343 | #ifdef MY_CPU_ARM64 | 347 | #ifdef MY_CPU_ARM64 |
344 | // #define SWAP_ATTRIB_NEON __attribute__((__target__(""))) | 348 | // #define SWAP_ATTRIB_NEON __attribute__((__target__(""))) |
345 | #else | 349 | #else |
346 | // #define SWAP_ATTRIB_NEON __attribute__((__target__("fpu=crypto-neon-fp-armv8"))) | 350 | #if defined(Z7_CLANG_VERSION) |
347 | #endif | 351 | // #define SWAP_ATTRIB_NEON __attribute__((__target__("neon"))) |
352 | #else | ||
353 | // #pragma message("SWAP_ATTRIB_NEON __attribute__((__target__(fpu=neon))") | ||
354 | #define SWAP_ATTRIB_NEON __attribute__((__target__("fpu=neon"))) | ||
355 | #endif | ||
356 | #endif // MY_CPU_ARM64 | ||
357 | #endif // __ARM_NEON | ||
358 | #endif // __ARM_ARCH | ||
359 | #endif // __ARM_FP | ||
360 | |||
348 | #elif defined(_MSC_VER) | 361 | #elif defined(_MSC_VER) |
349 | #if (_MSC_VER >= 1910) | 362 | #if (_MSC_VER >= 1910) |
350 | #define USE_SWAP_128 | 363 | #define USE_SWAP_128 |
351 | #endif | 364 | #endif |
352 | #endif | 365 | #endif |
353 | 366 | ||
354 | #if defined(_MSC_VER) && defined(MY_CPU_ARM64) | 367 | #ifdef USE_SWAP_128 |
368 | #if defined(Z7_MSC_VER_ORIGINAL) && defined(MY_CPU_ARM64) | ||
355 | #include <arm64_neon.h> | 369 | #include <arm64_neon.h> |
356 | #else | 370 | #else |
371 | |||
372 | /* | ||
373 | #if !defined(__ARM_NEON) | ||
374 | #if defined(Z7_GCC_VERSION) && (__GNUC__ < 5) \ | ||
375 | || defined(Z7_GCC_VERSION) && (__GNUC__ == 5) && (Z7_GCC_VERSION < 90201) \ | ||
376 | || defined(Z7_GCC_VERSION) && (__GNUC__ == 5) && (Z7_GCC_VERSION < 100100) | ||
377 | Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER | ||
378 | #pragma message("#define __ARM_NEON 1") | ||
379 | // #define __ARM_NEON 1 | ||
380 | Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER | ||
381 | #endif | ||
382 | #endif | ||
383 | */ | ||
357 | #include <arm_neon.h> | 384 | #include <arm_neon.h> |
358 | #endif | 385 | #endif |
386 | #endif | ||
359 | 387 | ||
360 | #ifndef USE_SWAP_128 | 388 | #ifndef USE_SWAP_128 |
361 | #define FORCE_SWAP_MODE | 389 | #define FORCE_SWAP_MODE |
@@ -464,6 +492,13 @@ Z7_ATTRIB_NO_VECTOR \ | |||
464 | void Z7_FASTCALL | 492 | void Z7_FASTCALL |
465 | 493 | ||
466 | 494 | ||
495 | #if defined(MY_CPU_ARM_OR_ARM64) | ||
496 | #if defined(__clang__) | ||
497 | #pragma GCC diagnostic ignored "-Wlanguage-extension-token" | ||
498 | #endif | ||
499 | #endif | ||
500 | |||
501 | |||
467 | #ifdef MY_CPU_64BIT | 502 | #ifdef MY_CPU_64BIT |
468 | 503 | ||
469 | #if defined(MY_CPU_ARM64) \ | 504 | #if defined(MY_CPU_ARM64) \ |