aboutsummaryrefslogtreecommitdiff
path: root/C/SwapBytes.c
diff options
context:
space:
mode:
Diffstat (limited to 'C/SwapBytes.c')
-rw-r--r--C/SwapBytes.c63
1 files changed, 49 insertions, 14 deletions
diff --git a/C/SwapBytes.c b/C/SwapBytes.c
index 7901bba..9290592 100644
--- a/C/SwapBytes.c
+++ b/C/SwapBytes.c
@@ -1,5 +1,5 @@
1/* SwapBytes.c -- Byte Swap conversion filter 1/* SwapBytes.c -- Byte Swap conversion filter
22023-04-07 : Igor Pavlov : Public domain */ 22024-03-01 : Igor Pavlov : Public domain */
3 3
4#include "Precomp.h" 4#include "Precomp.h"
5 5
@@ -305,11 +305,12 @@ ShufBytes_256(void *items8, const void *lim8, const void *mask128_ptr)
305 msvc 19.30+ (VS2022): replaces _mm256_set_m128i(m,m) to vbroadcastf128(m) as we want 305 msvc 19.30+ (VS2022): replaces _mm256_set_m128i(m,m) to vbroadcastf128(m) as we want
306 */ 306 */
307 // _mm256_broadcastsi128_si256(*mask128_ptr); 307 // _mm256_broadcastsi128_si256(*mask128_ptr);
308 /* 308#if defined(Z7_GCC_VERSION) && (Z7_GCC_VERSION < 80000)
309 #define MY_mm256_set_m128i(hi, lo) _mm256_insertf128_si256(_mm256_castsi128_si256(lo), (hi), 1) 309 #define MY_mm256_set_m128i(hi, lo) _mm256_insertf128_si256(_mm256_castsi128_si256(lo), (hi), 1)
310 MY_mm256_set_m128i 310#else
311 */ 311 #define MY_mm256_set_m128i _mm256_set_m128i
312 _mm256_set_m128i( 312#endif
313 MY_mm256_set_m128i(
313 *(const __m128i *)mask128_ptr, 314 *(const __m128i *)mask128_ptr,
314 *(const __m128i *)mask128_ptr); 315 *(const __m128i *)mask128_ptr);
315 #endif 316 #endif
@@ -330,32 +331,59 @@ ShufBytes_256(void *items8, const void *lim8, const void *mask128_ptr)
330 331
331 332
332// compile message "NEON intrinsics not available with the soft-float ABI" 333// compile message "NEON intrinsics not available with the soft-float ABI"
333#elif defined(MY_CPU_ARM_OR_ARM64) || \ 334#elif defined(MY_CPU_ARM_OR_ARM64) \
334 (defined(__ARM_ARCH) && (__ARM_ARCH >= 7)) 335 && defined(MY_CPU_LE) \
335// #elif defined(MY_CPU_ARM64) 336 && !defined(Z7_DISABLE_ARM_NEON)
336 337
337 #if defined(__clang__) && (__clang_major__ >= 8) \ 338 #if defined(__clang__) && (__clang_major__ >= 8) \
338 || defined(__GNUC__) && (__GNUC__ >= 8) 339 || defined(__GNUC__) && (__GNUC__ >= 6)
339 #if (defined(__ARM_ARCH) && (__ARM_ARCH >= 7)) \ 340 #if defined(__ARM_FP)
341 #if (defined(__ARM_ARCH) && (__ARM_ARCH >= 4)) \
340 || defined(MY_CPU_ARM64) 342 || defined(MY_CPU_ARM64)
343 #if defined(MY_CPU_ARM64) \
344 || !defined(Z7_CLANG_VERSION) \
345 || defined(__ARM_NEON)
341 #define USE_SWAP_128 346 #define USE_SWAP_128
342 #endif
343 #ifdef MY_CPU_ARM64 347 #ifdef MY_CPU_ARM64
344 // #define SWAP_ATTRIB_NEON __attribute__((__target__(""))) 348 // #define SWAP_ATTRIB_NEON __attribute__((__target__("")))
345 #else 349 #else
346 // #define SWAP_ATTRIB_NEON __attribute__((__target__("fpu=crypto-neon-fp-armv8"))) 350#if defined(Z7_CLANG_VERSION)
347 #endif 351 // #define SWAP_ATTRIB_NEON __attribute__((__target__("neon")))
352#else
353 // #pragma message("SWAP_ATTRIB_NEON __attribute__((__target__(fpu=neon))")
354 #define SWAP_ATTRIB_NEON __attribute__((__target__("fpu=neon")))
355#endif
356 #endif // MY_CPU_ARM64
357 #endif // __ARM_NEON
358 #endif // __ARM_ARCH
359 #endif // __ARM_FP
360
348 #elif defined(_MSC_VER) 361 #elif defined(_MSC_VER)
349 #if (_MSC_VER >= 1910) 362 #if (_MSC_VER >= 1910)
350 #define USE_SWAP_128 363 #define USE_SWAP_128
351 #endif 364 #endif
352 #endif 365 #endif
353 366
354 #if defined(_MSC_VER) && defined(MY_CPU_ARM64) 367 #ifdef USE_SWAP_128
368 #if defined(Z7_MSC_VER_ORIGINAL) && defined(MY_CPU_ARM64)
355 #include <arm64_neon.h> 369 #include <arm64_neon.h>
356 #else 370 #else
371
372/*
373#if !defined(__ARM_NEON)
374#if defined(Z7_GCC_VERSION) && (__GNUC__ < 5) \
375 || defined(Z7_GCC_VERSION) && (__GNUC__ == 5) && (Z7_GCC_VERSION < 90201) \
376 || defined(Z7_GCC_VERSION) && (__GNUC__ == 5) && (Z7_GCC_VERSION < 100100)
377Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER
378#pragma message("#define __ARM_NEON 1")
379// #define __ARM_NEON 1
380Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER
381#endif
382#endif
383*/
357 #include <arm_neon.h> 384 #include <arm_neon.h>
358 #endif 385 #endif
386 #endif
359 387
360#ifndef USE_SWAP_128 388#ifndef USE_SWAP_128
361 #define FORCE_SWAP_MODE 389 #define FORCE_SWAP_MODE
@@ -464,6 +492,13 @@ Z7_ATTRIB_NO_VECTOR \
464void Z7_FASTCALL 492void Z7_FASTCALL
465 493
466 494
495#if defined(MY_CPU_ARM_OR_ARM64)
496#if defined(__clang__)
497#pragma GCC diagnostic ignored "-Wlanguage-extension-token"
498#endif
499#endif
500
501
467#ifdef MY_CPU_64BIT 502#ifdef MY_CPU_64BIT
468 503
469#if defined(MY_CPU_ARM64) \ 504#if defined(MY_CPU_ARM64) \