aboutsummaryrefslogtreecommitdiff
path: root/C/Xxh64.c
diff options
context:
space:
mode:
Diffstat (limited to 'C/Xxh64.c')
-rw-r--r--C/Xxh64.c327
1 files changed, 327 insertions, 0 deletions
diff --git a/C/Xxh64.c b/C/Xxh64.c
new file mode 100644
index 0000000..dc02a02
--- /dev/null
+++ b/C/Xxh64.c
@@ -0,0 +1,327 @@
1/* Xxh64.c -- XXH64 hash calculation
2original code: Copyright (c) Yann Collet.
32023-08-18 : modified by Igor Pavlov.
4This source code is licensed under BSD 2-Clause License.
5*/
6
7#include "Precomp.h"
8
9#include "CpuArch.h"
10#include "RotateDefs.h"
11#include "Xxh64.h"
12
13#define Z7_XXH_PRIME64_1 UINT64_CONST(0x9E3779B185EBCA87)
14#define Z7_XXH_PRIME64_2 UINT64_CONST(0xC2B2AE3D27D4EB4F)
15#define Z7_XXH_PRIME64_3 UINT64_CONST(0x165667B19E3779F9)
16#define Z7_XXH_PRIME64_4 UINT64_CONST(0x85EBCA77C2B2AE63)
17#define Z7_XXH_PRIME64_5 UINT64_CONST(0x27D4EB2F165667C5)
18
19void Xxh64State_Init(CXxh64State *p)
20{
21 const UInt64 seed = 0;
22 p->v[0] = seed + Z7_XXH_PRIME64_1 + Z7_XXH_PRIME64_2;
23 p->v[1] = seed + Z7_XXH_PRIME64_2;
24 p->v[2] = seed;
25 p->v[3] = seed - Z7_XXH_PRIME64_1;
26}
27
28#if !defined(MY_CPU_64BIT) && defined(MY_CPU_X86) && defined(_MSC_VER)
29 #define Z7_XXH64_USE_ASM
30#endif
31
32#if !defined(MY_CPU_64BIT) && defined(MY_CPU_X86) \
33 && defined(Z7_MSC_VER_ORIGINAL) && Z7_MSC_VER_ORIGINAL > 1200
34/* we try to avoid __allmul calls in MSVC for 64-bit multiply.
35 But MSVC6 still uses __allmul for our code.
36 So for MSVC6 we use default 64-bit multiply without our optimization.
37*/
38#define LOW32(b) ((UInt32)(b & 0xffffffff))
39/* MSVC compiler (MSVC > 1200) can use "mul" instruction
40 without __allmul for our MY_emulu MACRO.
41 MY_emulu is similar to __emulu(a, b) MACRO */
42#define MY_emulu(a, b) ((UInt64)(a) * (b))
43#define MY_SET_HIGH32(a) ((UInt64)(a) << 32)
44#define MY_MUL32_SET_HIGH32(a, b) MY_SET_HIGH32((UInt32)(a) * (UInt32)(b))
45// /*
46#define MY_MUL64(a, b) \
47 ( MY_emulu((UInt32)(a), LOW32(b)) + \
48 MY_SET_HIGH32( \
49 (UInt32)((a) >> 32) * LOW32(b) + \
50 (UInt32)(a) * (UInt32)((b) >> 32) \
51 ))
52// */
53/*
54#define MY_MUL64(a, b) \
55 ( MY_emulu((UInt32)(a), LOW32(b)) \
56 + MY_MUL32_SET_HIGH32((a) >> 32, LOW32(b)) + \
57 + MY_MUL32_SET_HIGH32(a, (b) >> 32) \
58 )
59*/
60
61#define MY_MUL_32_64(a32, b) \
62 ( MY_emulu((UInt32)(a32), LOW32(b)) \
63 + MY_MUL32_SET_HIGH32(a32, (b) >> 32) \
64 )
65
66#else
67#define MY_MUL64(a, b) ((a) * (b))
68#define MY_MUL_32_64(a32, b) ((a32) * (UInt64)(b))
69#endif
70
71
72static
73Z7_FORCE_INLINE
74UInt64 Xxh64_Round(UInt64 acc, UInt64 input)
75{
76 acc += MY_MUL64(input, Z7_XXH_PRIME64_2);
77 acc = Z7_ROTL64(acc, 31);
78 return MY_MUL64(acc, Z7_XXH_PRIME64_1);
79}
80
81static UInt64 Xxh64_Merge(UInt64 acc, UInt64 val)
82{
83 acc ^= Xxh64_Round(0, val);
84 return MY_MUL64(acc, Z7_XXH_PRIME64_1) + Z7_XXH_PRIME64_4;
85}
86
87
88#ifdef Z7_XXH64_USE_ASM
89
90#define Z7_XXH_PRIME64_1_HIGH 0x9E3779B1
91#define Z7_XXH_PRIME64_1_LOW 0x85EBCA87
92#define Z7_XXH_PRIME64_2_HIGH 0xC2B2AE3D
93#define Z7_XXH_PRIME64_2_LOW 0x27D4EB4F
94
95void
96Z7_NO_INLINE
97__declspec(naked)
98Z7_FASTCALL
99Xxh64State_UpdateBlocks(CXxh64State *p, const void *data, const void *end)
100{
101 #if !defined(__clang__)
102 UNUSED_VAR(p)
103 UNUSED_VAR(data)
104 UNUSED_VAR(end)
105 #endif
106 __asm push ebx
107 __asm push ebp
108 __asm push esi
109 __asm push edi
110
111 #define STACK_OFFSET 4 * 8
112 __asm sub esp, STACK_OFFSET
113
114#define COPY_1(n) \
115 __asm mov eax, [ecx + n * 4] \
116 __asm mov [esp + n * 4], eax \
117
118#define COPY_2(n) \
119 __asm mov eax, [esp + n * 4] \
120 __asm mov [ecx + n * 4], eax \
121
122 COPY_1(0)
123 __asm mov edi, [ecx + 1 * 4] \
124 COPY_1(2)
125 COPY_1(3)
126 COPY_1(4)
127 COPY_1(5)
128 COPY_1(6)
129 COPY_1(7)
130
131 __asm mov esi, edx \
132 __asm mov [esp + 0 * 8 + 4], ecx
133 __asm mov ecx, Z7_XXH_PRIME64_2_LOW \
134 __asm mov ebp, Z7_XXH_PRIME64_1_LOW \
135
136#define R(n, state1, state1_reg) \
137 __asm mov eax, [esi + n * 8] \
138 __asm imul ebx, eax, Z7_XXH_PRIME64_2_HIGH \
139 __asm add ebx, state1 \
140 __asm mul ecx \
141 __asm add edx, ebx \
142 __asm mov ebx, [esi + n * 8 + 4] \
143 __asm imul ebx, ecx \
144 __asm add eax, [esp + n * 8] \
145 __asm adc edx, ebx \
146 __asm mov ebx, eax \
147 __asm shld eax, edx, 31 \
148 __asm shld edx, ebx, 31 \
149 __asm imul state1_reg, eax, Z7_XXH_PRIME64_1_HIGH \
150 __asm imul edx, ebp \
151 __asm add state1_reg, edx \
152 __asm mul ebp \
153 __asm add state1_reg, edx \
154 __asm mov [esp + n * 8], eax \
155
156#define R2(n) \
157 R(n, [esp + n * 8 + 4], ebx) \
158 __asm mov [esp + n * 8 + 4], ebx \
159
160 __asm align 16
161 __asm main_loop:
162 R(0, edi, edi)
163 R2(1)
164 R2(2)
165 R2(3)
166 __asm add esi, 32
167 __asm cmp esi, [esp + STACK_OFFSET + 4 * 4 + 4]
168 __asm jne main_loop
169
170 __asm mov ecx, [esp + 0 * 8 + 4]
171
172 COPY_2(0)
173 __asm mov [ecx + 1 * 4], edi
174 COPY_2(2)
175 COPY_2(3)
176 COPY_2(4)
177 COPY_2(5)
178 COPY_2(6)
179 COPY_2(7)
180
181 __asm add esp, STACK_OFFSET
182 __asm pop edi
183 __asm pop esi
184 __asm pop ebp
185 __asm pop ebx
186 __asm ret 4
187}
188
189#else
190
191void
192Z7_NO_INLINE
193Z7_FASTCALL
194Xxh64State_UpdateBlocks(CXxh64State *p, const void *_data, const void *end)
195{
196 const Byte *data = (const Byte *)_data;
197 UInt64 v[4];
198 v[0] = p->v[0];
199 v[1] = p->v[1];
200 v[2] = p->v[2];
201 v[3] = p->v[3];
202 do
203 {
204 v[0] = Xxh64_Round(v[0], GetUi64(data)); data += 8;
205 v[1] = Xxh64_Round(v[1], GetUi64(data)); data += 8;
206 v[2] = Xxh64_Round(v[2], GetUi64(data)); data += 8;
207 v[3] = Xxh64_Round(v[3], GetUi64(data)); data += 8;
208 }
209 while (data != end);
210 p->v[0] = v[0];
211 p->v[1] = v[1];
212 p->v[2] = v[2];
213 p->v[3] = v[3];
214}
215
216#endif
217
218UInt64 Xxh64State_Digest(const CXxh64State *p, const void *_data, UInt64 count)
219{
220 UInt64 h = p->v[2];
221
222 if (count >= 32)
223 {
224 h = Z7_ROTL64(p->v[0], 1) +
225 Z7_ROTL64(p->v[1], 7) +
226 Z7_ROTL64(h, 12) +
227 Z7_ROTL64(p->v[3], 18);
228 h = Xxh64_Merge(h, p->v[0]);
229 h = Xxh64_Merge(h, p->v[1]);
230 h = Xxh64_Merge(h, p->v[2]);
231 h = Xxh64_Merge(h, p->v[3]);
232 }
233 else
234 h += Z7_XXH_PRIME64_5;
235
236 h += count;
237
238 // XXH64_finalize():
239 {
240 unsigned cnt = (unsigned)count & 31;
241 const Byte *data = (const Byte *)_data;
242 while (cnt >= 8)
243 {
244 h ^= Xxh64_Round(0, GetUi64(data));
245 data += 8;
246 h = Z7_ROTL64(h, 27);
247 h = MY_MUL64(h, Z7_XXH_PRIME64_1) + Z7_XXH_PRIME64_4;
248 cnt -= 8;
249 }
250 if (cnt >= 4)
251 {
252 const UInt32 v = GetUi32(data);
253 data += 4;
254 h ^= MY_MUL_32_64(v, Z7_XXH_PRIME64_1);
255 h = Z7_ROTL64(h, 23);
256 h = MY_MUL64(h, Z7_XXH_PRIME64_2) + Z7_XXH_PRIME64_3;
257 cnt -= 4;
258 }
259 while (cnt)
260 {
261 const UInt32 v = *data++;
262 h ^= MY_MUL_32_64(v, Z7_XXH_PRIME64_5);
263 h = Z7_ROTL64(h, 11);
264 h = MY_MUL64(h, Z7_XXH_PRIME64_1);
265 cnt--;
266 }
267 // XXH64_avalanche(h):
268 h ^= h >> 33; h = MY_MUL64(h, Z7_XXH_PRIME64_2);
269 h ^= h >> 29; h = MY_MUL64(h, Z7_XXH_PRIME64_3);
270 h ^= h >> 32;
271 return h;
272 }
273}
274
275
276void Xxh64_Init(CXxh64 *p)
277{
278 Xxh64State_Init(&p->state);
279 p->count = 0;
280 p->buf64[0] = 0;
281 p->buf64[1] = 0;
282 p->buf64[2] = 0;
283 p->buf64[3] = 0;
284}
285
286void Xxh64_Update(CXxh64 *p, const void *_data, size_t size)
287{
288 const Byte *data = (const Byte *)_data;
289 unsigned cnt;
290 if (size == 0)
291 return;
292 cnt = (unsigned)p->count;
293 p->count += size;
294
295 if (cnt &= 31)
296 {
297 unsigned rem = 32 - cnt;
298 Byte *dest = (Byte *)p->buf64 + cnt;
299 if (rem > size)
300 rem = (unsigned)size;
301 size -= rem;
302 cnt += rem;
303 // memcpy((Byte *)p->buf64 + cnt, data, rem);
304 do
305 *dest++ = *data++;
306 while (--rem);
307 if (cnt != 32)
308 return;
309 Xxh64State_UpdateBlocks(&p->state, p->buf64, &p->buf64[4]);
310 }
311
312 if (size &= ~(size_t)31)
313 {
314 Xxh64State_UpdateBlocks(&p->state, data, data + size);
315 data += size;
316 }
317
318 cnt = (unsigned)p->count & 31;
319 if (cnt)
320 {
321 // memcpy(p->buf64, data, cnt);
322 Byte *dest = (Byte *)p->buf64;
323 do
324 *dest++ = *data++;
325 while (--cnt);
326 }
327}