aboutsummaryrefslogtreecommitdiff
path: root/C/XzCrc64Opt.c
diff options
context:
space:
mode:
Diffstat (limited to 'C/XzCrc64Opt.c')
-rw-r--r--C/XzCrc64Opt.c254
1 files changed, 227 insertions, 27 deletions
diff --git a/C/XzCrc64Opt.c b/C/XzCrc64Opt.c
index d03374c..0c1fc2f 100644
--- a/C/XzCrc64Opt.c
+++ b/C/XzCrc64Opt.c
@@ -1,61 +1,261 @@
1/* XzCrc64Opt.c -- CRC64 calculation 1/* XzCrc64Opt.c -- CRC64 calculation (optimized functions)
22023-04-02 : Igor Pavlov : Public domain */ 22023-12-08 : Igor Pavlov : Public domain */
3 3
4#include "Precomp.h" 4#include "Precomp.h"
5 5
6#include "CpuArch.h" 6#include "CpuArch.h"
7 7
8#if !defined(Z7_CRC64_NUM_TABLES) || Z7_CRC64_NUM_TABLES > 1
9
10// for debug only : define Z7_CRC64_DEBUG_BE to test big-endian code in little-endian cpu
11// #define Z7_CRC64_DEBUG_BE
12#ifdef Z7_CRC64_DEBUG_BE
13#undef MY_CPU_LE
14#define MY_CPU_BE
15#endif
16
17#if defined(MY_CPU_64BIT)
18#define Z7_CRC64_USE_64BIT
19#endif
20
21// the value Z7_CRC64_NUM_TABLES_USE must be defined to same value as in XzCrc64.c
22#ifdef Z7_CRC64_NUM_TABLES
23#define Z7_CRC64_NUM_TABLES_USE Z7_CRC64_NUM_TABLES
24#else
25#define Z7_CRC64_NUM_TABLES_USE 12
26#endif
27
28#if Z7_CRC64_NUM_TABLES_USE % 4 || \
29 Z7_CRC64_NUM_TABLES_USE < 4 || \
30 Z7_CRC64_NUM_TABLES_USE > 4 * 4
31 #error Stop_Compiling_Bad_CRC64_NUM_TABLES
32#endif
33
34
8#ifndef MY_CPU_BE 35#ifndef MY_CPU_BE
9 36
10#define CRC64_UPDATE_BYTE_2(crc, b) (table[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8)) 37#define CRC64_UPDATE_BYTE_2(crc, b) (table[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8))
38
39#if defined(Z7_CRC64_USE_64BIT) && (Z7_CRC64_NUM_TABLES_USE % 8 == 0)
11 40
12UInt64 Z7_FASTCALL XzCrc64UpdateT4(UInt64 v, const void *data, size_t size, const UInt64 *table); 41#define Q64LE(n, d) \
13UInt64 Z7_FASTCALL XzCrc64UpdateT4(UInt64 v, const void *data, size_t size, const UInt64 *table) 42 ( (table + ((n) * 8 + 7) * 0x100)[((d) ) & 0xFF] \
43 ^ (table + ((n) * 8 + 6) * 0x100)[((d) >> 1 * 8) & 0xFF] \
44 ^ (table + ((n) * 8 + 5) * 0x100)[((d) >> 2 * 8) & 0xFF] \
45 ^ (table + ((n) * 8 + 4) * 0x100)[((d) >> 3 * 8) & 0xFF] \
46 ^ (table + ((n) * 8 + 3) * 0x100)[((d) >> 4 * 8) & 0xFF] \
47 ^ (table + ((n) * 8 + 2) * 0x100)[((d) >> 5 * 8) & 0xFF] \
48 ^ (table + ((n) * 8 + 1) * 0x100)[((d) >> 6 * 8) & 0xFF] \
49 ^ (table + ((n) * 8 + 0) * 0x100)[((d) >> 7 * 8)] )
50
51#define R64(a) *((const UInt64 *)(const void *)p + (a))
52
53#else
54
55#define Q32LE(n, d) \
56 ( (table + ((n) * 4 + 3) * 0x100)[((d) ) & 0xFF] \
57 ^ (table + ((n) * 4 + 2) * 0x100)[((d) >> 1 * 8) & 0xFF] \
58 ^ (table + ((n) * 4 + 1) * 0x100)[((d) >> 2 * 8) & 0xFF] \
59 ^ (table + ((n) * 4 + 0) * 0x100)[((d) >> 3 * 8)] )
60
61#define R32(a) *((const UInt32 *)(const void *)p + (a))
62
63#endif
64
65
66#define CRC64_FUNC_PRE_LE2(step) \
67UInt64 Z7_FASTCALL XzCrc64UpdateT ## step (UInt64 v, const void *data, size_t size, const UInt64 *table)
68
69#define CRC64_FUNC_PRE_LE(step) \
70 CRC64_FUNC_PRE_LE2(step); \
71 CRC64_FUNC_PRE_LE2(step)
72
73CRC64_FUNC_PRE_LE(Z7_CRC64_NUM_TABLES_USE)
14{ 74{
15 const Byte *p = (const Byte *)data; 75 const Byte *p = (const Byte *)data;
16 for (; size > 0 && ((unsigned)(ptrdiff_t)p & 3) != 0; size--, p++) 76 const Byte *lim;
77 for (; size && ((unsigned)(ptrdiff_t)p & (7 - (Z7_CRC64_NUM_TABLES_USE & 4))) != 0; size--, p++)
17 v = CRC64_UPDATE_BYTE_2(v, *p); 78 v = CRC64_UPDATE_BYTE_2(v, *p);
18 for (; size >= 4; size -= 4, p += 4) 79 lim = p + size;
80 if (size >= Z7_CRC64_NUM_TABLES_USE)
19 { 81 {
20 const UInt32 d = (UInt32)v ^ *(const UInt32 *)(const void *)p; 82 lim -= Z7_CRC64_NUM_TABLES_USE;
21 v = (v >> 32) 83 do
22 ^ (table + 0x300)[((d ) & 0xFF)] 84 {
23 ^ (table + 0x200)[((d >> 8) & 0xFF)] 85#if Z7_CRC64_NUM_TABLES_USE == 4
24 ^ (table + 0x100)[((d >> 16) & 0xFF)] 86 const UInt32 d = (UInt32)v ^ R32(0);
25 ^ (table + 0x000)[((d >> 24))]; 87 v = (v >> 32) ^ Q32LE(0, d);
88#elif Z7_CRC64_NUM_TABLES_USE == 8
89#ifdef Z7_CRC64_USE_64BIT
90 v ^= R64(0);
91 v = Q64LE(0, v);
92#else
93 UInt32 v0, v1;
94 v0 = (UInt32)v ^ R32(0);
95 v1 = (UInt32)(v >> 32) ^ R32(1);
96 v = Q32LE(1, v0) ^ Q32LE(0, v1);
97#endif
98#elif Z7_CRC64_NUM_TABLES_USE == 12
99 UInt32 w;
100 UInt32 v0, v1;
101 v0 = (UInt32)v ^ R32(0);
102 v1 = (UInt32)(v >> 32) ^ R32(1);
103 w = R32(2);
104 v = Q32LE(0, w);
105 v ^= Q32LE(2, v0) ^ Q32LE(1, v1);
106#elif Z7_CRC64_NUM_TABLES_USE == 16
107#ifdef Z7_CRC64_USE_64BIT
108 UInt64 w;
109 UInt64 x;
110 w = R64(1); x = Q64LE(0, w);
111 v ^= R64(0); v = x ^ Q64LE(1, v);
112#else
113 UInt32 v0, v1;
114 UInt32 r0, r1;
115 v0 = (UInt32)v ^ R32(0);
116 v1 = (UInt32)(v >> 32) ^ R32(1);
117 r0 = R32(2);
118 r1 = R32(3);
119 v = Q32LE(1, r0) ^ Q32LE(0, r1);
120 v ^= Q32LE(3, v0) ^ Q32LE(2, v1);
121#endif
122#else
123#error Stop_Compiling_Bad_CRC64_NUM_TABLES
124#endif
125 p += Z7_CRC64_NUM_TABLES_USE;
126 }
127 while (p <= lim);
128 lim += Z7_CRC64_NUM_TABLES_USE;
26 } 129 }
27 for (; size > 0; size--, p++) 130 for (; p < lim; p++)
28 v = CRC64_UPDATE_BYTE_2(v, *p); 131 v = CRC64_UPDATE_BYTE_2(v, *p);
29 return v; 132 return v;
30} 133}
31 134
135#undef CRC64_UPDATE_BYTE_2
136#undef R32
137#undef R64
138#undef Q32LE
139#undef Q64LE
140#undef CRC64_FUNC_PRE_LE
141#undef CRC64_FUNC_PRE_LE2
142
32#endif 143#endif
33 144
34 145
146
147
35#ifndef MY_CPU_LE 148#ifndef MY_CPU_LE
36 149
37#define CRC64_UPDATE_BYTE_2_BE(crc, b) (table[(Byte)((crc) >> 56) ^ (b)] ^ ((crc) << 8)) 150#define CRC64_UPDATE_BYTE_2_BE(crc, b) (table[((crc) >> 56) ^ (b)] ^ ((crc) << 8))
151
152#if defined(Z7_CRC64_USE_64BIT) && (Z7_CRC64_NUM_TABLES_USE % 8 == 0)
153
154#define Q64BE(n, d) \
155 ( (table + ((n) * 8 + 0) * 0x100)[(Byte)(d)] \
156 ^ (table + ((n) * 8 + 1) * 0x100)[((d) >> 1 * 8) & 0xFF] \
157 ^ (table + ((n) * 8 + 2) * 0x100)[((d) >> 2 * 8) & 0xFF] \
158 ^ (table + ((n) * 8 + 3) * 0x100)[((d) >> 3 * 8) & 0xFF] \
159 ^ (table + ((n) * 8 + 4) * 0x100)[((d) >> 4 * 8) & 0xFF] \
160 ^ (table + ((n) * 8 + 5) * 0x100)[((d) >> 5 * 8) & 0xFF] \
161 ^ (table + ((n) * 8 + 6) * 0x100)[((d) >> 6 * 8) & 0xFF] \
162 ^ (table + ((n) * 8 + 7) * 0x100)[((d) >> 7 * 8)] )
163
164#ifdef Z7_CRC64_DEBUG_BE
165 #define R64BE(a) GetBe64a((const UInt64 *)(const void *)p + (a))
166#else
167 #define R64BE(a) *((const UInt64 *)(const void *)p + (a))
168#endif
169
170#else
171
172#define Q32BE(n, d) \
173 ( (table + ((n) * 4 + 0) * 0x100)[(Byte)(d)] \
174 ^ (table + ((n) * 4 + 1) * 0x100)[((d) >> 1 * 8) & 0xFF] \
175 ^ (table + ((n) * 4 + 2) * 0x100)[((d) >> 2 * 8) & 0xFF] \
176 ^ (table + ((n) * 4 + 3) * 0x100)[((d) >> 3 * 8)] )
38 177
39UInt64 Z7_FASTCALL XzCrc64UpdateT1_BeT4(UInt64 v, const void *data, size_t size, const UInt64 *table); 178#ifdef Z7_CRC64_DEBUG_BE
40UInt64 Z7_FASTCALL XzCrc64UpdateT1_BeT4(UInt64 v, const void *data, size_t size, const UInt64 *table) 179 #define R32BE(a) GetBe32a((const UInt32 *)(const void *)p + (a))
180#else
181 #define R32BE(a) *((const UInt32 *)(const void *)p + (a))
182#endif
183
184#endif
185
186#define CRC64_FUNC_PRE_BE2(step) \
187UInt64 Z7_FASTCALL XzCrc64UpdateBeT ## step (UInt64 v, const void *data, size_t size, const UInt64 *table)
188
189#define CRC64_FUNC_PRE_BE(step) \
190 CRC64_FUNC_PRE_BE2(step); \
191 CRC64_FUNC_PRE_BE2(step)
192
193CRC64_FUNC_PRE_BE(Z7_CRC64_NUM_TABLES_USE)
41{ 194{
42 const Byte *p = (const Byte *)data; 195 const Byte *p = (const Byte *)data;
43 table += 0x100; 196 const Byte *lim;
44 v = Z7_BSWAP64(v); 197 v = Z7_BSWAP64(v);
45 for (; size > 0 && ((unsigned)(ptrdiff_t)p & 3) != 0; size--, p++) 198 for (; size && ((unsigned)(ptrdiff_t)p & (7 - (Z7_CRC64_NUM_TABLES_USE & 4))) != 0; size--, p++)
46 v = CRC64_UPDATE_BYTE_2_BE(v, *p); 199 v = CRC64_UPDATE_BYTE_2_BE(v, *p);
47 for (; size >= 4; size -= 4, p += 4) 200 lim = p + size;
201 if (size >= Z7_CRC64_NUM_TABLES_USE)
48 { 202 {
49 const UInt32 d = (UInt32)(v >> 32) ^ *(const UInt32 *)(const void *)p; 203 lim -= Z7_CRC64_NUM_TABLES_USE;
50 v = (v << 32) 204 do
51 ^ (table + 0x000)[((d ) & 0xFF)] 205 {
52 ^ (table + 0x100)[((d >> 8) & 0xFF)] 206#if Z7_CRC64_NUM_TABLES_USE == 4
53 ^ (table + 0x200)[((d >> 16) & 0xFF)] 207 const UInt32 d = (UInt32)(v >> 32) ^ R32BE(0);
54 ^ (table + 0x300)[((d >> 24))]; 208 v = (v << 32) ^ Q32BE(0, d);
209#elif Z7_CRC64_NUM_TABLES_USE == 12
210 const UInt32 d1 = (UInt32)(v >> 32) ^ R32BE(0);
211 const UInt32 d0 = (UInt32)(v ) ^ R32BE(1);
212 const UInt32 w = R32BE(2);
213 v = Q32BE(0, w);
214 v ^= Q32BE(2, d1) ^ Q32BE(1, d0);
215
216#elif Z7_CRC64_NUM_TABLES_USE == 8
217 #ifdef Z7_CRC64_USE_64BIT
218 v ^= R64BE(0);
219 v = Q64BE(0, v);
220 #else
221 const UInt32 d1 = (UInt32)(v >> 32) ^ R32BE(0);
222 const UInt32 d0 = (UInt32)(v ) ^ R32BE(1);
223 v = Q32BE(1, d1) ^ Q32BE(0, d0);
224 #endif
225#elif Z7_CRC64_NUM_TABLES_USE == 16
226 #ifdef Z7_CRC64_USE_64BIT
227 const UInt64 w = R64BE(1);
228 v ^= R64BE(0);
229 v = Q64BE(0, w) ^ Q64BE(1, v);
230 #else
231 const UInt32 d1 = (UInt32)(v >> 32) ^ R32BE(0);
232 const UInt32 d0 = (UInt32)(v ) ^ R32BE(1);
233 const UInt32 w1 = R32BE(2);
234 const UInt32 w0 = R32BE(3);
235 v = Q32BE(1, w1) ^ Q32BE(0, w0);
236 v ^= Q32BE(3, d1) ^ Q32BE(2, d0);
237 #endif
238#elif
239#error Stop_Compiling_Bad_CRC64_NUM_TABLES
240#endif
241 p += Z7_CRC64_NUM_TABLES_USE;
242 }
243 while (p <= lim);
244 lim += Z7_CRC64_NUM_TABLES_USE;
55 } 245 }
56 for (; size > 0; size--, p++) 246 for (; p < lim; p++)
57 v = CRC64_UPDATE_BYTE_2_BE(v, *p); 247 v = CRC64_UPDATE_BYTE_2_BE(v, *p);
58 return Z7_BSWAP64(v); 248 return Z7_BSWAP64(v);
59} 249}
60 250
251#undef CRC64_UPDATE_BYTE_2_BE
252#undef R32BE
253#undef R64BE
254#undef Q32BE
255#undef Q64BE
256#undef CRC64_FUNC_PRE_BE
257#undef CRC64_FUNC_PRE_BE2
258
259#endif
260#undef Z7_CRC64_NUM_TABLES_USE
61#endif 261#endif