diff options
author | Igor Pavlov <87184205+ip7z@users.noreply.github.com> | 2024-05-14 00:00:00 +0000 |
---|---|---|
committer | Igor Pavlov <87184205+ip7z@users.noreply.github.com> | 2024-05-15 23:55:04 +0500 |
commit | fc662341e6f85da78ada0e443f6116b978f79f22 (patch) | |
tree | 1be1cc402a7a9cbc18d4eeea6b141354c2d559e3 /C/Xxh64.c | |
parent | 5b39dc76f1bc82f941d5c800ab9f34407a06b53a (diff) | |
download | 7zip-fc662341e6f85da78ada0e443f6116b978f79f22.tar.gz 7zip-fc662341e6f85da78ada0e443f6116b978f79f22.tar.bz2 7zip-fc662341e6f85da78ada0e443f6116b978f79f22.zip |
24.0524.05
Diffstat (limited to 'C/Xxh64.c')
-rw-r--r-- | C/Xxh64.c | 327 |
1 files changed, 327 insertions, 0 deletions
diff --git a/C/Xxh64.c b/C/Xxh64.c new file mode 100644 index 0000000..dc02a02 --- /dev/null +++ b/C/Xxh64.c | |||
@@ -0,0 +1,327 @@ | |||
1 | /* Xxh64.c -- XXH64 hash calculation | ||
2 | original code: Copyright (c) Yann Collet. | ||
3 | 2023-08-18 : modified by Igor Pavlov. | ||
4 | This source code is licensed under BSD 2-Clause License. | ||
5 | */ | ||
6 | |||
7 | #include "Precomp.h" | ||
8 | |||
9 | #include "CpuArch.h" | ||
10 | #include "RotateDefs.h" | ||
11 | #include "Xxh64.h" | ||
12 | |||
13 | #define Z7_XXH_PRIME64_1 UINT64_CONST(0x9E3779B185EBCA87) | ||
14 | #define Z7_XXH_PRIME64_2 UINT64_CONST(0xC2B2AE3D27D4EB4F) | ||
15 | #define Z7_XXH_PRIME64_3 UINT64_CONST(0x165667B19E3779F9) | ||
16 | #define Z7_XXH_PRIME64_4 UINT64_CONST(0x85EBCA77C2B2AE63) | ||
17 | #define Z7_XXH_PRIME64_5 UINT64_CONST(0x27D4EB2F165667C5) | ||
18 | |||
19 | void Xxh64State_Init(CXxh64State *p) | ||
20 | { | ||
21 | const UInt64 seed = 0; | ||
22 | p->v[0] = seed + Z7_XXH_PRIME64_1 + Z7_XXH_PRIME64_2; | ||
23 | p->v[1] = seed + Z7_XXH_PRIME64_2; | ||
24 | p->v[2] = seed; | ||
25 | p->v[3] = seed - Z7_XXH_PRIME64_1; | ||
26 | } | ||
27 | |||
28 | #if !defined(MY_CPU_64BIT) && defined(MY_CPU_X86) && defined(_MSC_VER) | ||
29 | #define Z7_XXH64_USE_ASM | ||
30 | #endif | ||
31 | |||
32 | #if !defined(MY_CPU_64BIT) && defined(MY_CPU_X86) \ | ||
33 | && defined(Z7_MSC_VER_ORIGINAL) && Z7_MSC_VER_ORIGINAL > 1200 | ||
34 | /* we try to avoid __allmul calls in MSVC for 64-bit multiply. | ||
35 | But MSVC6 still uses __allmul for our code. | ||
36 | So for MSVC6 we use default 64-bit multiply without our optimization. | ||
37 | */ | ||
38 | #define LOW32(b) ((UInt32)(b & 0xffffffff)) | ||
39 | /* MSVC compiler (MSVC > 1200) can use "mul" instruction | ||
40 | without __allmul for our MY_emulu MACRO. | ||
41 | MY_emulu is similar to __emulu(a, b) MACRO */ | ||
42 | #define MY_emulu(a, b) ((UInt64)(a) * (b)) | ||
43 | #define MY_SET_HIGH32(a) ((UInt64)(a) << 32) | ||
44 | #define MY_MUL32_SET_HIGH32(a, b) MY_SET_HIGH32((UInt32)(a) * (UInt32)(b)) | ||
45 | // /* | ||
46 | #define MY_MUL64(a, b) \ | ||
47 | ( MY_emulu((UInt32)(a), LOW32(b)) + \ | ||
48 | MY_SET_HIGH32( \ | ||
49 | (UInt32)((a) >> 32) * LOW32(b) + \ | ||
50 | (UInt32)(a) * (UInt32)((b) >> 32) \ | ||
51 | )) | ||
52 | // */ | ||
53 | /* | ||
54 | #define MY_MUL64(a, b) \ | ||
55 | ( MY_emulu((UInt32)(a), LOW32(b)) \ | ||
56 | + MY_MUL32_SET_HIGH32((a) >> 32, LOW32(b)) + \ | ||
57 | + MY_MUL32_SET_HIGH32(a, (b) >> 32) \ | ||
58 | ) | ||
59 | */ | ||
60 | |||
61 | #define MY_MUL_32_64(a32, b) \ | ||
62 | ( MY_emulu((UInt32)(a32), LOW32(b)) \ | ||
63 | + MY_MUL32_SET_HIGH32(a32, (b) >> 32) \ | ||
64 | ) | ||
65 | |||
66 | #else | ||
67 | #define MY_MUL64(a, b) ((a) * (b)) | ||
68 | #define MY_MUL_32_64(a32, b) ((a32) * (UInt64)(b)) | ||
69 | #endif | ||
70 | |||
71 | |||
72 | static | ||
73 | Z7_FORCE_INLINE | ||
74 | UInt64 Xxh64_Round(UInt64 acc, UInt64 input) | ||
75 | { | ||
76 | acc += MY_MUL64(input, Z7_XXH_PRIME64_2); | ||
77 | acc = Z7_ROTL64(acc, 31); | ||
78 | return MY_MUL64(acc, Z7_XXH_PRIME64_1); | ||
79 | } | ||
80 | |||
81 | static UInt64 Xxh64_Merge(UInt64 acc, UInt64 val) | ||
82 | { | ||
83 | acc ^= Xxh64_Round(0, val); | ||
84 | return MY_MUL64(acc, Z7_XXH_PRIME64_1) + Z7_XXH_PRIME64_4; | ||
85 | } | ||
86 | |||
87 | |||
88 | #ifdef Z7_XXH64_USE_ASM | ||
89 | |||
90 | #define Z7_XXH_PRIME64_1_HIGH 0x9E3779B1 | ||
91 | #define Z7_XXH_PRIME64_1_LOW 0x85EBCA87 | ||
92 | #define Z7_XXH_PRIME64_2_HIGH 0xC2B2AE3D | ||
93 | #define Z7_XXH_PRIME64_2_LOW 0x27D4EB4F | ||
94 | |||
95 | void | ||
96 | Z7_NO_INLINE | ||
97 | __declspec(naked) | ||
98 | Z7_FASTCALL | ||
99 | Xxh64State_UpdateBlocks(CXxh64State *p, const void *data, const void *end) | ||
100 | { | ||
101 | #if !defined(__clang__) | ||
102 | UNUSED_VAR(p) | ||
103 | UNUSED_VAR(data) | ||
104 | UNUSED_VAR(end) | ||
105 | #endif | ||
106 | __asm push ebx | ||
107 | __asm push ebp | ||
108 | __asm push esi | ||
109 | __asm push edi | ||
110 | |||
111 | #define STACK_OFFSET 4 * 8 | ||
112 | __asm sub esp, STACK_OFFSET | ||
113 | |||
114 | #define COPY_1(n) \ | ||
115 | __asm mov eax, [ecx + n * 4] \ | ||
116 | __asm mov [esp + n * 4], eax \ | ||
117 | |||
118 | #define COPY_2(n) \ | ||
119 | __asm mov eax, [esp + n * 4] \ | ||
120 | __asm mov [ecx + n * 4], eax \ | ||
121 | |||
122 | COPY_1(0) | ||
123 | __asm mov edi, [ecx + 1 * 4] \ | ||
124 | COPY_1(2) | ||
125 | COPY_1(3) | ||
126 | COPY_1(4) | ||
127 | COPY_1(5) | ||
128 | COPY_1(6) | ||
129 | COPY_1(7) | ||
130 | |||
131 | __asm mov esi, edx \ | ||
132 | __asm mov [esp + 0 * 8 + 4], ecx | ||
133 | __asm mov ecx, Z7_XXH_PRIME64_2_LOW \ | ||
134 | __asm mov ebp, Z7_XXH_PRIME64_1_LOW \ | ||
135 | |||
136 | #define R(n, state1, state1_reg) \ | ||
137 | __asm mov eax, [esi + n * 8] \ | ||
138 | __asm imul ebx, eax, Z7_XXH_PRIME64_2_HIGH \ | ||
139 | __asm add ebx, state1 \ | ||
140 | __asm mul ecx \ | ||
141 | __asm add edx, ebx \ | ||
142 | __asm mov ebx, [esi + n * 8 + 4] \ | ||
143 | __asm imul ebx, ecx \ | ||
144 | __asm add eax, [esp + n * 8] \ | ||
145 | __asm adc edx, ebx \ | ||
146 | __asm mov ebx, eax \ | ||
147 | __asm shld eax, edx, 31 \ | ||
148 | __asm shld edx, ebx, 31 \ | ||
149 | __asm imul state1_reg, eax, Z7_XXH_PRIME64_1_HIGH \ | ||
150 | __asm imul edx, ebp \ | ||
151 | __asm add state1_reg, edx \ | ||
152 | __asm mul ebp \ | ||
153 | __asm add state1_reg, edx \ | ||
154 | __asm mov [esp + n * 8], eax \ | ||
155 | |||
156 | #define R2(n) \ | ||
157 | R(n, [esp + n * 8 + 4], ebx) \ | ||
158 | __asm mov [esp + n * 8 + 4], ebx \ | ||
159 | |||
160 | __asm align 16 | ||
161 | __asm main_loop: | ||
162 | R(0, edi, edi) | ||
163 | R2(1) | ||
164 | R2(2) | ||
165 | R2(3) | ||
166 | __asm add esi, 32 | ||
167 | __asm cmp esi, [esp + STACK_OFFSET + 4 * 4 + 4] | ||
168 | __asm jne main_loop | ||
169 | |||
170 | __asm mov ecx, [esp + 0 * 8 + 4] | ||
171 | |||
172 | COPY_2(0) | ||
173 | __asm mov [ecx + 1 * 4], edi | ||
174 | COPY_2(2) | ||
175 | COPY_2(3) | ||
176 | COPY_2(4) | ||
177 | COPY_2(5) | ||
178 | COPY_2(6) | ||
179 | COPY_2(7) | ||
180 | |||
181 | __asm add esp, STACK_OFFSET | ||
182 | __asm pop edi | ||
183 | __asm pop esi | ||
184 | __asm pop ebp | ||
185 | __asm pop ebx | ||
186 | __asm ret 4 | ||
187 | } | ||
188 | |||
189 | #else | ||
190 | |||
191 | void | ||
192 | Z7_NO_INLINE | ||
193 | Z7_FASTCALL | ||
194 | Xxh64State_UpdateBlocks(CXxh64State *p, const void *_data, const void *end) | ||
195 | { | ||
196 | const Byte *data = (const Byte *)_data; | ||
197 | UInt64 v[4]; | ||
198 | v[0] = p->v[0]; | ||
199 | v[1] = p->v[1]; | ||
200 | v[2] = p->v[2]; | ||
201 | v[3] = p->v[3]; | ||
202 | do | ||
203 | { | ||
204 | v[0] = Xxh64_Round(v[0], GetUi64(data)); data += 8; | ||
205 | v[1] = Xxh64_Round(v[1], GetUi64(data)); data += 8; | ||
206 | v[2] = Xxh64_Round(v[2], GetUi64(data)); data += 8; | ||
207 | v[3] = Xxh64_Round(v[3], GetUi64(data)); data += 8; | ||
208 | } | ||
209 | while (data != end); | ||
210 | p->v[0] = v[0]; | ||
211 | p->v[1] = v[1]; | ||
212 | p->v[2] = v[2]; | ||
213 | p->v[3] = v[3]; | ||
214 | } | ||
215 | |||
216 | #endif | ||
217 | |||
218 | UInt64 Xxh64State_Digest(const CXxh64State *p, const void *_data, UInt64 count) | ||
219 | { | ||
220 | UInt64 h = p->v[2]; | ||
221 | |||
222 | if (count >= 32) | ||
223 | { | ||
224 | h = Z7_ROTL64(p->v[0], 1) + | ||
225 | Z7_ROTL64(p->v[1], 7) + | ||
226 | Z7_ROTL64(h, 12) + | ||
227 | Z7_ROTL64(p->v[3], 18); | ||
228 | h = Xxh64_Merge(h, p->v[0]); | ||
229 | h = Xxh64_Merge(h, p->v[1]); | ||
230 | h = Xxh64_Merge(h, p->v[2]); | ||
231 | h = Xxh64_Merge(h, p->v[3]); | ||
232 | } | ||
233 | else | ||
234 | h += Z7_XXH_PRIME64_5; | ||
235 | |||
236 | h += count; | ||
237 | |||
238 | // XXH64_finalize(): | ||
239 | { | ||
240 | unsigned cnt = (unsigned)count & 31; | ||
241 | const Byte *data = (const Byte *)_data; | ||
242 | while (cnt >= 8) | ||
243 | { | ||
244 | h ^= Xxh64_Round(0, GetUi64(data)); | ||
245 | data += 8; | ||
246 | h = Z7_ROTL64(h, 27); | ||
247 | h = MY_MUL64(h, Z7_XXH_PRIME64_1) + Z7_XXH_PRIME64_4; | ||
248 | cnt -= 8; | ||
249 | } | ||
250 | if (cnt >= 4) | ||
251 | { | ||
252 | const UInt32 v = GetUi32(data); | ||
253 | data += 4; | ||
254 | h ^= MY_MUL_32_64(v, Z7_XXH_PRIME64_1); | ||
255 | h = Z7_ROTL64(h, 23); | ||
256 | h = MY_MUL64(h, Z7_XXH_PRIME64_2) + Z7_XXH_PRIME64_3; | ||
257 | cnt -= 4; | ||
258 | } | ||
259 | while (cnt) | ||
260 | { | ||
261 | const UInt32 v = *data++; | ||
262 | h ^= MY_MUL_32_64(v, Z7_XXH_PRIME64_5); | ||
263 | h = Z7_ROTL64(h, 11); | ||
264 | h = MY_MUL64(h, Z7_XXH_PRIME64_1); | ||
265 | cnt--; | ||
266 | } | ||
267 | // XXH64_avalanche(h): | ||
268 | h ^= h >> 33; h = MY_MUL64(h, Z7_XXH_PRIME64_2); | ||
269 | h ^= h >> 29; h = MY_MUL64(h, Z7_XXH_PRIME64_3); | ||
270 | h ^= h >> 32; | ||
271 | return h; | ||
272 | } | ||
273 | } | ||
274 | |||
275 | |||
276 | void Xxh64_Init(CXxh64 *p) | ||
277 | { | ||
278 | Xxh64State_Init(&p->state); | ||
279 | p->count = 0; | ||
280 | p->buf64[0] = 0; | ||
281 | p->buf64[1] = 0; | ||
282 | p->buf64[2] = 0; | ||
283 | p->buf64[3] = 0; | ||
284 | } | ||
285 | |||
286 | void Xxh64_Update(CXxh64 *p, const void *_data, size_t size) | ||
287 | { | ||
288 | const Byte *data = (const Byte *)_data; | ||
289 | unsigned cnt; | ||
290 | if (size == 0) | ||
291 | return; | ||
292 | cnt = (unsigned)p->count; | ||
293 | p->count += size; | ||
294 | |||
295 | if (cnt &= 31) | ||
296 | { | ||
297 | unsigned rem = 32 - cnt; | ||
298 | Byte *dest = (Byte *)p->buf64 + cnt; | ||
299 | if (rem > size) | ||
300 | rem = (unsigned)size; | ||
301 | size -= rem; | ||
302 | cnt += rem; | ||
303 | // memcpy((Byte *)p->buf64 + cnt, data, rem); | ||
304 | do | ||
305 | *dest++ = *data++; | ||
306 | while (--rem); | ||
307 | if (cnt != 32) | ||
308 | return; | ||
309 | Xxh64State_UpdateBlocks(&p->state, p->buf64, &p->buf64[4]); | ||
310 | } | ||
311 | |||
312 | if (size &= ~(size_t)31) | ||
313 | { | ||
314 | Xxh64State_UpdateBlocks(&p->state, data, data + size); | ||
315 | data += size; | ||
316 | } | ||
317 | |||
318 | cnt = (unsigned)p->count & 31; | ||
319 | if (cnt) | ||
320 | { | ||
321 | // memcpy(p->buf64, data, cnt); | ||
322 | Byte *dest = (Byte *)p->buf64; | ||
323 | do | ||
324 | *dest++ = *data++; | ||
325 | while (--cnt); | ||
326 | } | ||
327 | } | ||