diff options
Diffstat (limited to 'C/XzCrc64Opt.c')
-rw-r--r-- | C/XzCrc64Opt.c | 254 |
1 files changed, 227 insertions, 27 deletions
diff --git a/C/XzCrc64Opt.c b/C/XzCrc64Opt.c index d03374c..0c1fc2f 100644 --- a/C/XzCrc64Opt.c +++ b/C/XzCrc64Opt.c | |||
@@ -1,61 +1,261 @@ | |||
1 | /* XzCrc64Opt.c -- CRC64 calculation | 1 | /* XzCrc64Opt.c -- CRC64 calculation (optimized functions) |
2 | 2023-04-02 : Igor Pavlov : Public domain */ | 2 | 2023-12-08 : Igor Pavlov : Public domain */ |
3 | 3 | ||
4 | #include "Precomp.h" | 4 | #include "Precomp.h" |
5 | 5 | ||
6 | #include "CpuArch.h" | 6 | #include "CpuArch.h" |
7 | 7 | ||
8 | #if !defined(Z7_CRC64_NUM_TABLES) || Z7_CRC64_NUM_TABLES > 1 | ||
9 | |||
10 | // for debug only : define Z7_CRC64_DEBUG_BE to test big-endian code in little-endian cpu | ||
11 | // #define Z7_CRC64_DEBUG_BE | ||
12 | #ifdef Z7_CRC64_DEBUG_BE | ||
13 | #undef MY_CPU_LE | ||
14 | #define MY_CPU_BE | ||
15 | #endif | ||
16 | |||
17 | #if defined(MY_CPU_64BIT) | ||
18 | #define Z7_CRC64_USE_64BIT | ||
19 | #endif | ||
20 | |||
21 | // the value Z7_CRC64_NUM_TABLES_USE must be defined to same value as in XzCrc64.c | ||
22 | #ifdef Z7_CRC64_NUM_TABLES | ||
23 | #define Z7_CRC64_NUM_TABLES_USE Z7_CRC64_NUM_TABLES | ||
24 | #else | ||
25 | #define Z7_CRC64_NUM_TABLES_USE 12 | ||
26 | #endif | ||
27 | |||
28 | #if Z7_CRC64_NUM_TABLES_USE % 4 || \ | ||
29 | Z7_CRC64_NUM_TABLES_USE < 4 || \ | ||
30 | Z7_CRC64_NUM_TABLES_USE > 4 * 4 | ||
31 | #error Stop_Compiling_Bad_CRC64_NUM_TABLES | ||
32 | #endif | ||
33 | |||
34 | |||
8 | #ifndef MY_CPU_BE | 35 | #ifndef MY_CPU_BE |
9 | 36 | ||
10 | #define CRC64_UPDATE_BYTE_2(crc, b) (table[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8)) | 37 | #define CRC64_UPDATE_BYTE_2(crc, b) (table[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8)) |
38 | |||
39 | #if defined(Z7_CRC64_USE_64BIT) && (Z7_CRC64_NUM_TABLES_USE % 8 == 0) | ||
11 | 40 | ||
12 | UInt64 Z7_FASTCALL XzCrc64UpdateT4(UInt64 v, const void *data, size_t size, const UInt64 *table); | 41 | #define Q64LE(n, d) \ |
13 | UInt64 Z7_FASTCALL XzCrc64UpdateT4(UInt64 v, const void *data, size_t size, const UInt64 *table) | 42 | ( (table + ((n) * 8 + 7) * 0x100)[((d) ) & 0xFF] \ |
43 | ^ (table + ((n) * 8 + 6) * 0x100)[((d) >> 1 * 8) & 0xFF] \ | ||
44 | ^ (table + ((n) * 8 + 5) * 0x100)[((d) >> 2 * 8) & 0xFF] \ | ||
45 | ^ (table + ((n) * 8 + 4) * 0x100)[((d) >> 3 * 8) & 0xFF] \ | ||
46 | ^ (table + ((n) * 8 + 3) * 0x100)[((d) >> 4 * 8) & 0xFF] \ | ||
47 | ^ (table + ((n) * 8 + 2) * 0x100)[((d) >> 5 * 8) & 0xFF] \ | ||
48 | ^ (table + ((n) * 8 + 1) * 0x100)[((d) >> 6 * 8) & 0xFF] \ | ||
49 | ^ (table + ((n) * 8 + 0) * 0x100)[((d) >> 7 * 8)] ) | ||
50 | |||
51 | #define R64(a) *((const UInt64 *)(const void *)p + (a)) | ||
52 | |||
53 | #else | ||
54 | |||
55 | #define Q32LE(n, d) \ | ||
56 | ( (table + ((n) * 4 + 3) * 0x100)[((d) ) & 0xFF] \ | ||
57 | ^ (table + ((n) * 4 + 2) * 0x100)[((d) >> 1 * 8) & 0xFF] \ | ||
58 | ^ (table + ((n) * 4 + 1) * 0x100)[((d) >> 2 * 8) & 0xFF] \ | ||
59 | ^ (table + ((n) * 4 + 0) * 0x100)[((d) >> 3 * 8)] ) | ||
60 | |||
61 | #define R32(a) *((const UInt32 *)(const void *)p + (a)) | ||
62 | |||
63 | #endif | ||
64 | |||
65 | |||
66 | #define CRC64_FUNC_PRE_LE2(step) \ | ||
67 | UInt64 Z7_FASTCALL XzCrc64UpdateT ## step (UInt64 v, const void *data, size_t size, const UInt64 *table) | ||
68 | |||
69 | #define CRC64_FUNC_PRE_LE(step) \ | ||
70 | CRC64_FUNC_PRE_LE2(step); \ | ||
71 | CRC64_FUNC_PRE_LE2(step) | ||
72 | |||
73 | CRC64_FUNC_PRE_LE(Z7_CRC64_NUM_TABLES_USE) | ||
14 | { | 74 | { |
15 | const Byte *p = (const Byte *)data; | 75 | const Byte *p = (const Byte *)data; |
16 | for (; size > 0 && ((unsigned)(ptrdiff_t)p & 3) != 0; size--, p++) | 76 | const Byte *lim; |
77 | for (; size && ((unsigned)(ptrdiff_t)p & (7 - (Z7_CRC64_NUM_TABLES_USE & 4))) != 0; size--, p++) | ||
17 | v = CRC64_UPDATE_BYTE_2(v, *p); | 78 | v = CRC64_UPDATE_BYTE_2(v, *p); |
18 | for (; size >= 4; size -= 4, p += 4) | 79 | lim = p + size; |
80 | if (size >= Z7_CRC64_NUM_TABLES_USE) | ||
19 | { | 81 | { |
20 | const UInt32 d = (UInt32)v ^ *(const UInt32 *)(const void *)p; | 82 | lim -= Z7_CRC64_NUM_TABLES_USE; |
21 | v = (v >> 32) | 83 | do |
22 | ^ (table + 0x300)[((d ) & 0xFF)] | 84 | { |
23 | ^ (table + 0x200)[((d >> 8) & 0xFF)] | 85 | #if Z7_CRC64_NUM_TABLES_USE == 4 |
24 | ^ (table + 0x100)[((d >> 16) & 0xFF)] | 86 | const UInt32 d = (UInt32)v ^ R32(0); |
25 | ^ (table + 0x000)[((d >> 24))]; | 87 | v = (v >> 32) ^ Q32LE(0, d); |
88 | #elif Z7_CRC64_NUM_TABLES_USE == 8 | ||
89 | #ifdef Z7_CRC64_USE_64BIT | ||
90 | v ^= R64(0); | ||
91 | v = Q64LE(0, v); | ||
92 | #else | ||
93 | UInt32 v0, v1; | ||
94 | v0 = (UInt32)v ^ R32(0); | ||
95 | v1 = (UInt32)(v >> 32) ^ R32(1); | ||
96 | v = Q32LE(1, v0) ^ Q32LE(0, v1); | ||
97 | #endif | ||
98 | #elif Z7_CRC64_NUM_TABLES_USE == 12 | ||
99 | UInt32 w; | ||
100 | UInt32 v0, v1; | ||
101 | v0 = (UInt32)v ^ R32(0); | ||
102 | v1 = (UInt32)(v >> 32) ^ R32(1); | ||
103 | w = R32(2); | ||
104 | v = Q32LE(0, w); | ||
105 | v ^= Q32LE(2, v0) ^ Q32LE(1, v1); | ||
106 | #elif Z7_CRC64_NUM_TABLES_USE == 16 | ||
107 | #ifdef Z7_CRC64_USE_64BIT | ||
108 | UInt64 w; | ||
109 | UInt64 x; | ||
110 | w = R64(1); x = Q64LE(0, w); | ||
111 | v ^= R64(0); v = x ^ Q64LE(1, v); | ||
112 | #else | ||
113 | UInt32 v0, v1; | ||
114 | UInt32 r0, r1; | ||
115 | v0 = (UInt32)v ^ R32(0); | ||
116 | v1 = (UInt32)(v >> 32) ^ R32(1); | ||
117 | r0 = R32(2); | ||
118 | r1 = R32(3); | ||
119 | v = Q32LE(1, r0) ^ Q32LE(0, r1); | ||
120 | v ^= Q32LE(3, v0) ^ Q32LE(2, v1); | ||
121 | #endif | ||
122 | #else | ||
123 | #error Stop_Compiling_Bad_CRC64_NUM_TABLES | ||
124 | #endif | ||
125 | p += Z7_CRC64_NUM_TABLES_USE; | ||
126 | } | ||
127 | while (p <= lim); | ||
128 | lim += Z7_CRC64_NUM_TABLES_USE; | ||
26 | } | 129 | } |
27 | for (; size > 0; size--, p++) | 130 | for (; p < lim; p++) |
28 | v = CRC64_UPDATE_BYTE_2(v, *p); | 131 | v = CRC64_UPDATE_BYTE_2(v, *p); |
29 | return v; | 132 | return v; |
30 | } | 133 | } |
31 | 134 | ||
135 | #undef CRC64_UPDATE_BYTE_2 | ||
136 | #undef R32 | ||
137 | #undef R64 | ||
138 | #undef Q32LE | ||
139 | #undef Q64LE | ||
140 | #undef CRC64_FUNC_PRE_LE | ||
141 | #undef CRC64_FUNC_PRE_LE2 | ||
142 | |||
32 | #endif | 143 | #endif |
33 | 144 | ||
34 | 145 | ||
146 | |||
147 | |||
35 | #ifndef MY_CPU_LE | 148 | #ifndef MY_CPU_LE |
36 | 149 | ||
37 | #define CRC64_UPDATE_BYTE_2_BE(crc, b) (table[(Byte)((crc) >> 56) ^ (b)] ^ ((crc) << 8)) | 150 | #define CRC64_UPDATE_BYTE_2_BE(crc, b) (table[((crc) >> 56) ^ (b)] ^ ((crc) << 8)) |
151 | |||
152 | #if defined(Z7_CRC64_USE_64BIT) && (Z7_CRC64_NUM_TABLES_USE % 8 == 0) | ||
153 | |||
154 | #define Q64BE(n, d) \ | ||
155 | ( (table + ((n) * 8 + 0) * 0x100)[(Byte)(d)] \ | ||
156 | ^ (table + ((n) * 8 + 1) * 0x100)[((d) >> 1 * 8) & 0xFF] \ | ||
157 | ^ (table + ((n) * 8 + 2) * 0x100)[((d) >> 2 * 8) & 0xFF] \ | ||
158 | ^ (table + ((n) * 8 + 3) * 0x100)[((d) >> 3 * 8) & 0xFF] \ | ||
159 | ^ (table + ((n) * 8 + 4) * 0x100)[((d) >> 4 * 8) & 0xFF] \ | ||
160 | ^ (table + ((n) * 8 + 5) * 0x100)[((d) >> 5 * 8) & 0xFF] \ | ||
161 | ^ (table + ((n) * 8 + 6) * 0x100)[((d) >> 6 * 8) & 0xFF] \ | ||
162 | ^ (table + ((n) * 8 + 7) * 0x100)[((d) >> 7 * 8)] ) | ||
163 | |||
164 | #ifdef Z7_CRC64_DEBUG_BE | ||
165 | #define R64BE(a) GetBe64a((const UInt64 *)(const void *)p + (a)) | ||
166 | #else | ||
167 | #define R64BE(a) *((const UInt64 *)(const void *)p + (a)) | ||
168 | #endif | ||
169 | |||
170 | #else | ||
171 | |||
172 | #define Q32BE(n, d) \ | ||
173 | ( (table + ((n) * 4 + 0) * 0x100)[(Byte)(d)] \ | ||
174 | ^ (table + ((n) * 4 + 1) * 0x100)[((d) >> 1 * 8) & 0xFF] \ | ||
175 | ^ (table + ((n) * 4 + 2) * 0x100)[((d) >> 2 * 8) & 0xFF] \ | ||
176 | ^ (table + ((n) * 4 + 3) * 0x100)[((d) >> 3 * 8)] ) | ||
38 | 177 | ||
39 | UInt64 Z7_FASTCALL XzCrc64UpdateT1_BeT4(UInt64 v, const void *data, size_t size, const UInt64 *table); | 178 | #ifdef Z7_CRC64_DEBUG_BE |
40 | UInt64 Z7_FASTCALL XzCrc64UpdateT1_BeT4(UInt64 v, const void *data, size_t size, const UInt64 *table) | 179 | #define R32BE(a) GetBe32a((const UInt32 *)(const void *)p + (a)) |
180 | #else | ||
181 | #define R32BE(a) *((const UInt32 *)(const void *)p + (a)) | ||
182 | #endif | ||
183 | |||
184 | #endif | ||
185 | |||
186 | #define CRC64_FUNC_PRE_BE2(step) \ | ||
187 | UInt64 Z7_FASTCALL XzCrc64UpdateBeT ## step (UInt64 v, const void *data, size_t size, const UInt64 *table) | ||
188 | |||
189 | #define CRC64_FUNC_PRE_BE(step) \ | ||
190 | CRC64_FUNC_PRE_BE2(step); \ | ||
191 | CRC64_FUNC_PRE_BE2(step) | ||
192 | |||
193 | CRC64_FUNC_PRE_BE(Z7_CRC64_NUM_TABLES_USE) | ||
41 | { | 194 | { |
42 | const Byte *p = (const Byte *)data; | 195 | const Byte *p = (const Byte *)data; |
43 | table += 0x100; | 196 | const Byte *lim; |
44 | v = Z7_BSWAP64(v); | 197 | v = Z7_BSWAP64(v); |
45 | for (; size > 0 && ((unsigned)(ptrdiff_t)p & 3) != 0; size--, p++) | 198 | for (; size && ((unsigned)(ptrdiff_t)p & (7 - (Z7_CRC64_NUM_TABLES_USE & 4))) != 0; size--, p++) |
46 | v = CRC64_UPDATE_BYTE_2_BE(v, *p); | 199 | v = CRC64_UPDATE_BYTE_2_BE(v, *p); |
47 | for (; size >= 4; size -= 4, p += 4) | 200 | lim = p + size; |
201 | if (size >= Z7_CRC64_NUM_TABLES_USE) | ||
48 | { | 202 | { |
49 | const UInt32 d = (UInt32)(v >> 32) ^ *(const UInt32 *)(const void *)p; | 203 | lim -= Z7_CRC64_NUM_TABLES_USE; |
50 | v = (v << 32) | 204 | do |
51 | ^ (table + 0x000)[((d ) & 0xFF)] | 205 | { |
52 | ^ (table + 0x100)[((d >> 8) & 0xFF)] | 206 | #if Z7_CRC64_NUM_TABLES_USE == 4 |
53 | ^ (table + 0x200)[((d >> 16) & 0xFF)] | 207 | const UInt32 d = (UInt32)(v >> 32) ^ R32BE(0); |
54 | ^ (table + 0x300)[((d >> 24))]; | 208 | v = (v << 32) ^ Q32BE(0, d); |
209 | #elif Z7_CRC64_NUM_TABLES_USE == 12 | ||
210 | const UInt32 d1 = (UInt32)(v >> 32) ^ R32BE(0); | ||
211 | const UInt32 d0 = (UInt32)(v ) ^ R32BE(1); | ||
212 | const UInt32 w = R32BE(2); | ||
213 | v = Q32BE(0, w); | ||
214 | v ^= Q32BE(2, d1) ^ Q32BE(1, d0); | ||
215 | |||
216 | #elif Z7_CRC64_NUM_TABLES_USE == 8 | ||
217 | #ifdef Z7_CRC64_USE_64BIT | ||
218 | v ^= R64BE(0); | ||
219 | v = Q64BE(0, v); | ||
220 | #else | ||
221 | const UInt32 d1 = (UInt32)(v >> 32) ^ R32BE(0); | ||
222 | const UInt32 d0 = (UInt32)(v ) ^ R32BE(1); | ||
223 | v = Q32BE(1, d1) ^ Q32BE(0, d0); | ||
224 | #endif | ||
225 | #elif Z7_CRC64_NUM_TABLES_USE == 16 | ||
226 | #ifdef Z7_CRC64_USE_64BIT | ||
227 | const UInt64 w = R64BE(1); | ||
228 | v ^= R64BE(0); | ||
229 | v = Q64BE(0, w) ^ Q64BE(1, v); | ||
230 | #else | ||
231 | const UInt32 d1 = (UInt32)(v >> 32) ^ R32BE(0); | ||
232 | const UInt32 d0 = (UInt32)(v ) ^ R32BE(1); | ||
233 | const UInt32 w1 = R32BE(2); | ||
234 | const UInt32 w0 = R32BE(3); | ||
235 | v = Q32BE(1, w1) ^ Q32BE(0, w0); | ||
236 | v ^= Q32BE(3, d1) ^ Q32BE(2, d0); | ||
237 | #endif | ||
238 | #elif | ||
239 | #error Stop_Compiling_Bad_CRC64_NUM_TABLES | ||
240 | #endif | ||
241 | p += Z7_CRC64_NUM_TABLES_USE; | ||
242 | } | ||
243 | while (p <= lim); | ||
244 | lim += Z7_CRC64_NUM_TABLES_USE; | ||
55 | } | 245 | } |
56 | for (; size > 0; size--, p++) | 246 | for (; p < lim; p++) |
57 | v = CRC64_UPDATE_BYTE_2_BE(v, *p); | 247 | v = CRC64_UPDATE_BYTE_2_BE(v, *p); |
58 | return Z7_BSWAP64(v); | 248 | return Z7_BSWAP64(v); |
59 | } | 249 | } |
60 | 250 | ||
251 | #undef CRC64_UPDATE_BYTE_2_BE | ||
252 | #undef R32BE | ||
253 | #undef R64BE | ||
254 | #undef Q32BE | ||
255 | #undef Q64BE | ||
256 | #undef CRC64_FUNC_PRE_BE | ||
257 | #undef CRC64_FUNC_PRE_BE2 | ||
258 | |||
259 | #endif | ||
260 | #undef Z7_CRC64_NUM_TABLES_USE | ||
61 | #endif | 261 | #endif |