/* XzCrc64Opt.c -- CRC64 calculation (optimized functions) 2023-12-08 : Igor Pavlov : Public domain */ #include "Precomp.h" #include "CpuArch.h" #if !defined(Z7_CRC64_NUM_TABLES) || Z7_CRC64_NUM_TABLES > 1 // for debug only : define Z7_CRC64_DEBUG_BE to test big-endian code in little-endian cpu // #define Z7_CRC64_DEBUG_BE #ifdef Z7_CRC64_DEBUG_BE #undef MY_CPU_LE #define MY_CPU_BE #endif #if defined(MY_CPU_64BIT) #define Z7_CRC64_USE_64BIT #endif // the value Z7_CRC64_NUM_TABLES_USE must be defined to same value as in XzCrc64.c #ifdef Z7_CRC64_NUM_TABLES #define Z7_CRC64_NUM_TABLES_USE Z7_CRC64_NUM_TABLES #else #define Z7_CRC64_NUM_TABLES_USE 12 #endif #if Z7_CRC64_NUM_TABLES_USE % 4 || \ Z7_CRC64_NUM_TABLES_USE < 4 || \ Z7_CRC64_NUM_TABLES_USE > 4 * 4 #error Stop_Compiling_Bad_CRC64_NUM_TABLES #endif #ifndef MY_CPU_BE #define CRC64_UPDATE_BYTE_2(crc, b) (table[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8)) #if defined(Z7_CRC64_USE_64BIT) && (Z7_CRC64_NUM_TABLES_USE % 8 == 0) #define Q64LE(n, d) \ ( (table + ((n) * 8 + 7) * 0x100)[((d) ) & 0xFF] \ ^ (table + ((n) * 8 + 6) * 0x100)[((d) >> 1 * 8) & 0xFF] \ ^ (table + ((n) * 8 + 5) * 0x100)[((d) >> 2 * 8) & 0xFF] \ ^ (table + ((n) * 8 + 4) * 0x100)[((d) >> 3 * 8) & 0xFF] \ ^ (table + ((n) * 8 + 3) * 0x100)[((d) >> 4 * 8) & 0xFF] \ ^ (table + ((n) * 8 + 2) * 0x100)[((d) >> 5 * 8) & 0xFF] \ ^ (table + ((n) * 8 + 1) * 0x100)[((d) >> 6 * 8) & 0xFF] \ ^ (table + ((n) * 8 + 0) * 0x100)[((d) >> 7 * 8)] ) #define R64(a) *((const UInt64 *)(const void *)p + (a)) #else #define Q32LE(n, d) \ ( (table + ((n) * 4 + 3) * 0x100)[((d) ) & 0xFF] \ ^ (table + ((n) * 4 + 2) * 0x100)[((d) >> 1 * 8) & 0xFF] \ ^ (table + ((n) * 4 + 1) * 0x100)[((d) >> 2 * 8) & 0xFF] \ ^ (table + ((n) * 4 + 0) * 0x100)[((d) >> 3 * 8)] ) #define R32(a) *((const UInt32 *)(const void *)p + (a)) #endif #define CRC64_FUNC_PRE_LE2(step) \ UInt64 Z7_FASTCALL XzCrc64UpdateT ## step (UInt64 v, const void *data, size_t size, const UInt64 *table) #define CRC64_FUNC_PRE_LE(step) \ CRC64_FUNC_PRE_LE2(step); \ CRC64_FUNC_PRE_LE2(step) CRC64_FUNC_PRE_LE(Z7_CRC64_NUM_TABLES_USE) { const Byte *p = (const Byte *)data; const Byte *lim; for (; size && ((unsigned)(ptrdiff_t)p & (7 - (Z7_CRC64_NUM_TABLES_USE & 4))) != 0; size--, p++) v = CRC64_UPDATE_BYTE_2(v, *p); lim = p + size; if (size >= Z7_CRC64_NUM_TABLES_USE) { lim -= Z7_CRC64_NUM_TABLES_USE; do { #if Z7_CRC64_NUM_TABLES_USE == 4 const UInt32 d = (UInt32)v ^ R32(0); v = (v >> 32) ^ Q32LE(0, d); #elif Z7_CRC64_NUM_TABLES_USE == 8 #ifdef Z7_CRC64_USE_64BIT v ^= R64(0); v = Q64LE(0, v); #else UInt32 v0, v1; v0 = (UInt32)v ^ R32(0); v1 = (UInt32)(v >> 32) ^ R32(1); v = Q32LE(1, v0) ^ Q32LE(0, v1); #endif #elif Z7_CRC64_NUM_TABLES_USE == 12 UInt32 w; UInt32 v0, v1; v0 = (UInt32)v ^ R32(0); v1 = (UInt32)(v >> 32) ^ R32(1); w = R32(2); v = Q32LE(0, w); v ^= Q32LE(2, v0) ^ Q32LE(1, v1); #elif Z7_CRC64_NUM_TABLES_USE == 16 #ifdef Z7_CRC64_USE_64BIT UInt64 w; UInt64 x; w = R64(1); x = Q64LE(0, w); v ^= R64(0); v = x ^ Q64LE(1, v); #else UInt32 v0, v1; UInt32 r0, r1; v0 = (UInt32)v ^ R32(0); v1 = (UInt32)(v >> 32) ^ R32(1); r0 = R32(2); r1 = R32(3); v = Q32LE(1, r0) ^ Q32LE(0, r1); v ^= Q32LE(3, v0) ^ Q32LE(2, v1); #endif #else #error Stop_Compiling_Bad_CRC64_NUM_TABLES #endif p += Z7_CRC64_NUM_TABLES_USE; } while (p <= lim); lim += Z7_CRC64_NUM_TABLES_USE; } for (; p < lim; p++) v = CRC64_UPDATE_BYTE_2(v, *p); return v; } #undef CRC64_UPDATE_BYTE_2 #undef R32 #undef R64 #undef Q32LE #undef Q64LE #undef CRC64_FUNC_PRE_LE #undef CRC64_FUNC_PRE_LE2 #endif #ifndef MY_CPU_LE #define CRC64_UPDATE_BYTE_2_BE(crc, b) (table[((crc) >> 56) ^ (b)] ^ ((crc) << 8)) #if defined(Z7_CRC64_USE_64BIT) && (Z7_CRC64_NUM_TABLES_USE % 8 == 0) #define Q64BE(n, d) \ ( (table + ((n) * 8 + 0) * 0x100)[(Byte)(d)] \ ^ (table + ((n) * 8 + 1) * 0x100)[((d) >> 1 * 8) & 0xFF] \ ^ (table + ((n) * 8 + 2) * 0x100)[((d) >> 2 * 8) & 0xFF] \ ^ (table + ((n) * 8 + 3) * 0x100)[((d) >> 3 * 8) & 0xFF] \ ^ (table + ((n) * 8 + 4) * 0x100)[((d) >> 4 * 8) & 0xFF] \ ^ (table + ((n) * 8 + 5) * 0x100)[((d) >> 5 * 8) & 0xFF] \ ^ (table + ((n) * 8 + 6) * 0x100)[((d) >> 6 * 8) & 0xFF] \ ^ (table + ((n) * 8 + 7) * 0x100)[((d) >> 7 * 8)] ) #ifdef Z7_CRC64_DEBUG_BE #define R64BE(a) GetBe64a((const UInt64 *)(const void *)p + (a)) #else #define R64BE(a) *((const UInt64 *)(const void *)p + (a)) #endif #else #define Q32BE(n, d) \ ( (table + ((n) * 4 + 0) * 0x100)[(Byte)(d)] \ ^ (table + ((n) * 4 + 1) * 0x100)[((d) >> 1 * 8) & 0xFF] \ ^ (table + ((n) * 4 + 2) * 0x100)[((d) >> 2 * 8) & 0xFF] \ ^ (table + ((n) * 4 + 3) * 0x100)[((d) >> 3 * 8)] ) #ifdef Z7_CRC64_DEBUG_BE #define R32BE(a) GetBe32a((const UInt32 *)(const void *)p + (a)) #else #define R32BE(a) *((const UInt32 *)(const void *)p + (a)) #endif #endif #define CRC64_FUNC_PRE_BE2(step) \ UInt64 Z7_FASTCALL XzCrc64UpdateBeT ## step (UInt64 v, const void *data, size_t size, const UInt64 *table) #define CRC64_FUNC_PRE_BE(step) \ CRC64_FUNC_PRE_BE2(step); \ CRC64_FUNC_PRE_BE2(step) CRC64_FUNC_PRE_BE(Z7_CRC64_NUM_TABLES_USE) { const Byte *p = (const Byte *)data; const Byte *lim; v = Z7_BSWAP64(v); for (; size && ((unsigned)(ptrdiff_t)p & (7 - (Z7_CRC64_NUM_TABLES_USE & 4))) != 0; size--, p++) v = CRC64_UPDATE_BYTE_2_BE(v, *p); lim = p + size; if (size >= Z7_CRC64_NUM_TABLES_USE) { lim -= Z7_CRC64_NUM_TABLES_USE; do { #if Z7_CRC64_NUM_TABLES_USE == 4 const UInt32 d = (UInt32)(v >> 32) ^ R32BE(0); v = (v << 32) ^ Q32BE(0, d); #elif Z7_CRC64_NUM_TABLES_USE == 12 const UInt32 d1 = (UInt32)(v >> 32) ^ R32BE(0); const UInt32 d0 = (UInt32)(v ) ^ R32BE(1); const UInt32 w = R32BE(2); v = Q32BE(0, w); v ^= Q32BE(2, d1) ^ Q32BE(1, d0); #elif Z7_CRC64_NUM_TABLES_USE == 8 #ifdef Z7_CRC64_USE_64BIT v ^= R64BE(0); v = Q64BE(0, v); #else const UInt32 d1 = (UInt32)(v >> 32) ^ R32BE(0); const UInt32 d0 = (UInt32)(v ) ^ R32BE(1); v = Q32BE(1, d1) ^ Q32BE(0, d0); #endif #elif Z7_CRC64_NUM_TABLES_USE == 16 #ifdef Z7_CRC64_USE_64BIT const UInt64 w = R64BE(1); v ^= R64BE(0); v = Q64BE(0, w) ^ Q64BE(1, v); #else const UInt32 d1 = (UInt32)(v >> 32) ^ R32BE(0); const UInt32 d0 = (UInt32)(v ) ^ R32BE(1); const UInt32 w1 = R32BE(2); const UInt32 w0 = R32BE(3); v = Q32BE(1, w1) ^ Q32BE(0, w0); v ^= Q32BE(3, d1) ^ Q32BE(2, d0); #endif #elif #error Stop_Compiling_Bad_CRC64_NUM_TABLES #endif p += Z7_CRC64_NUM_TABLES_USE; } while (p <= lim); lim += Z7_CRC64_NUM_TABLES_USE; } for (; p < lim; p++) v = CRC64_UPDATE_BYTE_2_BE(v, *p); return Z7_BSWAP64(v); } #undef CRC64_UPDATE_BYTE_2_BE #undef R32BE #undef R64BE #undef Q32BE #undef Q64BE #undef CRC64_FUNC_PRE_BE #undef CRC64_FUNC_PRE_BE2 #endif #undef Z7_CRC64_NUM_TABLES_USE #endif