diff options
author | Igor Pavlov <87184205+ip7z@users.noreply.github.com> | 2021-12-27 00:00:00 +0000 |
---|---|---|
committer | Igor Pavlov <87184205+ip7z@users.noreply.github.com> | 2022-03-18 15:35:13 +0500 |
commit | f19f813537c7aea1c20749c914e756b54a9c3cf5 (patch) | |
tree | 816ba62ca7c0fa19f2eb46d9e9d6f7dd7c3a744d /C/LzFindMt.c | |
parent | 98e06a519b63b81986abe76d28887f6984a7732b (diff) | |
download | 7zip-21.07.tar.gz 7zip-21.07.tar.bz2 7zip-21.07.zip |
'21.07'21.07
Diffstat (limited to '')
-rw-r--r-- | C/LzFindMt.c | 1400 |
1 files changed, 1400 insertions, 0 deletions
diff --git a/C/LzFindMt.c b/C/LzFindMt.c new file mode 100644 index 0000000..4e67fc3 --- /dev/null +++ b/C/LzFindMt.c | |||
@@ -0,0 +1,1400 @@ | |||
1 | /* LzFindMt.c -- multithreaded Match finder for LZ algorithms | ||
2 | 2021-12-21 : Igor Pavlov : Public domain */ | ||
3 | |||
4 | #include "Precomp.h" | ||
5 | |||
6 | // #include <stdio.h> | ||
7 | |||
8 | #include "CpuArch.h" | ||
9 | |||
10 | #include "LzHash.h" | ||
11 | #include "LzFindMt.h" | ||
12 | |||
13 | // #define LOG_ITERS | ||
14 | |||
15 | // #define LOG_THREAD | ||
16 | |||
17 | #ifdef LOG_THREAD | ||
18 | #include <stdio.h> | ||
19 | #define PRF(x) x | ||
20 | #else | ||
21 | #define PRF(x) | ||
22 | #endif | ||
23 | |||
24 | #ifdef LOG_ITERS | ||
25 | #include <stdio.h> | ||
26 | extern UInt64 g_NumIters_Tree; | ||
27 | extern UInt64 g_NumIters_Loop; | ||
28 | extern UInt64 g_NumIters_Bytes; | ||
29 | #define LOG_ITER(x) x | ||
30 | #else | ||
31 | #define LOG_ITER(x) | ||
32 | #endif | ||
33 | |||
34 | #define kMtHashBlockSize ((UInt32)1 << 17) | ||
35 | #define kMtHashNumBlocks (1 << 1) | ||
36 | |||
37 | #define GET_HASH_BLOCK_OFFSET(i) (((i) & (kMtHashNumBlocks - 1)) * kMtHashBlockSize) | ||
38 | |||
39 | #define kMtBtBlockSize ((UInt32)1 << 16) | ||
40 | #define kMtBtNumBlocks (1 << 4) | ||
41 | |||
42 | #define GET_BT_BLOCK_OFFSET(i) (((i) & (kMtBtNumBlocks - 1)) * (size_t)kMtBtBlockSize) | ||
43 | |||
44 | /* | ||
45 | HASH functions: | ||
46 | We use raw 8/16 bits from a[1] and a[2], | ||
47 | xored with crc(a[0]) and crc(a[3]). | ||
48 | We check a[0], a[3] only. We don't need to compare a[1] and a[2] in matches. | ||
49 | our crc() function provides one-to-one correspondence for low 8-bit values: | ||
50 | (crc[0...0xFF] & 0xFF) <-> [0...0xFF] | ||
51 | */ | ||
52 | |||
53 | #define MF(mt) ((mt)->MatchFinder) | ||
54 | #define MF_CRC (p->crc) | ||
55 | |||
56 | // #define MF(mt) (&(mt)->MatchFinder) | ||
57 | // #define MF_CRC (p->MatchFinder.crc) | ||
58 | |||
59 | #define MT_HASH2_CALC \ | ||
60 | h2 = (MF_CRC[cur[0]] ^ cur[1]) & (kHash2Size - 1); | ||
61 | |||
62 | #define MT_HASH3_CALC { \ | ||
63 | UInt32 temp = MF_CRC[cur[0]] ^ cur[1]; \ | ||
64 | h2 = temp & (kHash2Size - 1); \ | ||
65 | h3 = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); } | ||
66 | |||
67 | /* | ||
68 | #define MT_HASH3_CALC__NO_2 { \ | ||
69 | UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ | ||
70 | h3 = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); } | ||
71 | |||
72 | #define __MT_HASH4_CALC { \ | ||
73 | UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ | ||
74 | h2 = temp & (kHash2Size - 1); \ | ||
75 | temp ^= ((UInt32)cur[2] << 8); \ | ||
76 | h3 = temp & (kHash3Size - 1); \ | ||
77 | h4 = (temp ^ (p->crc[cur[3]] << kLzHash_CrcShift_1)) & p->hash4Mask; } | ||
78 | // (kHash4Size - 1); | ||
79 | */ | ||
80 | |||
81 | |||
82 | MY_NO_INLINE | ||
83 | static void MtSync_Construct(CMtSync *p) | ||
84 | { | ||
85 | p->affinity = 0; | ||
86 | p->wasCreated = False; | ||
87 | p->csWasInitialized = False; | ||
88 | p->csWasEntered = False; | ||
89 | Thread_Construct(&p->thread); | ||
90 | Event_Construct(&p->canStart); | ||
91 | Event_Construct(&p->wasStopped); | ||
92 | Semaphore_Construct(&p->freeSemaphore); | ||
93 | Semaphore_Construct(&p->filledSemaphore); | ||
94 | } | ||
95 | |||
96 | |||
97 | #define DEBUG_BUFFER_LOCK // define it to debug lock state | ||
98 | |||
99 | #ifdef DEBUG_BUFFER_LOCK | ||
100 | #include <stdlib.h> | ||
101 | #define BUFFER_MUST_BE_LOCKED(p) if (!(p)->csWasEntered) exit(1); | ||
102 | #define BUFFER_MUST_BE_UNLOCKED(p) if ( (p)->csWasEntered) exit(1); | ||
103 | #else | ||
104 | #define BUFFER_MUST_BE_LOCKED(p) | ||
105 | #define BUFFER_MUST_BE_UNLOCKED(p) | ||
106 | #endif | ||
107 | |||
108 | #define LOCK_BUFFER(p) { \ | ||
109 | BUFFER_MUST_BE_UNLOCKED(p); \ | ||
110 | CriticalSection_Enter(&(p)->cs); \ | ||
111 | (p)->csWasEntered = True; } | ||
112 | |||
113 | #define UNLOCK_BUFFER(p) { \ | ||
114 | BUFFER_MUST_BE_LOCKED(p); \ | ||
115 | CriticalSection_Leave(&(p)->cs); \ | ||
116 | (p)->csWasEntered = False; } | ||
117 | |||
118 | |||
119 | MY_NO_INLINE | ||
120 | static UInt32 MtSync_GetNextBlock(CMtSync *p) | ||
121 | { | ||
122 | UInt32 numBlocks = 0; | ||
123 | if (p->needStart) | ||
124 | { | ||
125 | BUFFER_MUST_BE_UNLOCKED(p) | ||
126 | p->numProcessedBlocks = 1; | ||
127 | p->needStart = False; | ||
128 | p->stopWriting = False; | ||
129 | p->exit = False; | ||
130 | Event_Reset(&p->wasStopped); | ||
131 | Event_Set(&p->canStart); | ||
132 | } | ||
133 | else | ||
134 | { | ||
135 | UNLOCK_BUFFER(p) | ||
136 | // we free current block | ||
137 | numBlocks = p->numProcessedBlocks++; | ||
138 | Semaphore_Release1(&p->freeSemaphore); | ||
139 | } | ||
140 | |||
141 | // buffer is UNLOCKED here | ||
142 | Semaphore_Wait(&p->filledSemaphore); | ||
143 | LOCK_BUFFER(p); | ||
144 | return numBlocks; | ||
145 | } | ||
146 | |||
147 | |||
148 | /* if Writing (Processing) thread was started, we must call MtSync_StopWriting() */ | ||
149 | |||
150 | MY_NO_INLINE | ||
151 | static void MtSync_StopWriting(CMtSync *p) | ||
152 | { | ||
153 | if (!Thread_WasCreated(&p->thread) || p->needStart) | ||
154 | return; | ||
155 | |||
156 | PRF(printf("\nMtSync_StopWriting %p\n", p)); | ||
157 | |||
158 | if (p->csWasEntered) | ||
159 | { | ||
160 | /* we don't use buffer in this thread after StopWriting(). | ||
161 | So we UNLOCK buffer. | ||
162 | And we restore default UNLOCKED state for stopped thread */ | ||
163 | UNLOCK_BUFFER(p) | ||
164 | } | ||
165 | |||
166 | /* We send (p->stopWriting) message and release freeSemaphore | ||
167 | to free current block. | ||
168 | So the thread will see (p->stopWriting) at some | ||
169 | iteration after Wait(freeSemaphore). | ||
170 | The thread doesn't need to fill all avail free blocks, | ||
171 | so we can get fast thread stop. | ||
172 | */ | ||
173 | |||
174 | p->stopWriting = True; | ||
175 | Semaphore_Release1(&p->freeSemaphore); // check semaphore count !!! | ||
176 | |||
177 | PRF(printf("\nMtSync_StopWriting %p : Event_Wait(&p->wasStopped)\n", p)); | ||
178 | Event_Wait(&p->wasStopped); | ||
179 | PRF(printf("\nMtSync_StopWriting %p : Event_Wait() finsihed\n", p)); | ||
180 | |||
181 | /* 21.03 : we don't restore samaphore counters here. | ||
182 | We will recreate and reinit samaphores in next start */ | ||
183 | |||
184 | p->needStart = True; | ||
185 | } | ||
186 | |||
187 | |||
188 | MY_NO_INLINE | ||
189 | static void MtSync_Destruct(CMtSync *p) | ||
190 | { | ||
191 | PRF(printf("\nMtSync_Destruct %p\n", p)); | ||
192 | |||
193 | if (Thread_WasCreated(&p->thread)) | ||
194 | { | ||
195 | /* we want thread to be in Stopped state before sending EXIT command. | ||
196 | note: stop(btSync) will stop (htSync) also */ | ||
197 | MtSync_StopWriting(p); | ||
198 | /* thread in Stopped state here : (p->needStart == true) */ | ||
199 | p->exit = True; | ||
200 | // if (p->needStart) // it's (true) | ||
201 | Event_Set(&p->canStart); // we send EXIT command to thread | ||
202 | Thread_Wait_Close(&p->thread); // we wait thread finishing | ||
203 | } | ||
204 | |||
205 | if (p->csWasInitialized) | ||
206 | { | ||
207 | CriticalSection_Delete(&p->cs); | ||
208 | p->csWasInitialized = False; | ||
209 | } | ||
210 | p->csWasEntered = False; | ||
211 | |||
212 | Event_Close(&p->canStart); | ||
213 | Event_Close(&p->wasStopped); | ||
214 | Semaphore_Close(&p->freeSemaphore); | ||
215 | Semaphore_Close(&p->filledSemaphore); | ||
216 | |||
217 | p->wasCreated = False; | ||
218 | } | ||
219 | |||
220 | |||
221 | // #define RINOK_THREAD(x) { if ((x) != 0) return SZ_ERROR_THREAD; } | ||
222 | // we want to get real system error codes here instead of SZ_ERROR_THREAD | ||
223 | #define RINOK_THREAD(x) RINOK(x) | ||
224 | |||
225 | |||
226 | // call it before each new file (when new starting is required): | ||
227 | MY_NO_INLINE | ||
228 | static SRes MtSync_Init(CMtSync *p, UInt32 numBlocks) | ||
229 | { | ||
230 | WRes wres; | ||
231 | // BUFFER_MUST_BE_UNLOCKED(p) | ||
232 | if (!p->needStart || p->csWasEntered) | ||
233 | return SZ_ERROR_FAIL; | ||
234 | wres = Semaphore_OptCreateInit(&p->freeSemaphore, numBlocks, numBlocks); | ||
235 | if (wres == 0) | ||
236 | wres = Semaphore_OptCreateInit(&p->filledSemaphore, 0, numBlocks); | ||
237 | return MY_SRes_HRESULT_FROM_WRes(wres); | ||
238 | } | ||
239 | |||
240 | |||
241 | static WRes MtSync_Create_WRes(CMtSync *p, THREAD_FUNC_TYPE startAddress, void *obj) | ||
242 | { | ||
243 | WRes wres; | ||
244 | |||
245 | if (p->wasCreated) | ||
246 | return SZ_OK; | ||
247 | |||
248 | RINOK_THREAD(CriticalSection_Init(&p->cs)); | ||
249 | p->csWasInitialized = True; | ||
250 | p->csWasEntered = False; | ||
251 | |||
252 | RINOK_THREAD(AutoResetEvent_CreateNotSignaled(&p->canStart)); | ||
253 | RINOK_THREAD(AutoResetEvent_CreateNotSignaled(&p->wasStopped)); | ||
254 | |||
255 | p->needStart = True; | ||
256 | p->exit = True; /* p->exit is unused before (canStart) Event. | ||
257 | But in case of some unexpected code failure we will get fast exit from thread */ | ||
258 | |||
259 | // return ERROR_TOO_MANY_POSTS; // for debug | ||
260 | // return EINVAL; // for debug | ||
261 | |||
262 | if (p->affinity != 0) | ||
263 | wres = Thread_Create_With_Affinity(&p->thread, startAddress, obj, (CAffinityMask)p->affinity); | ||
264 | else | ||
265 | wres = Thread_Create(&p->thread, startAddress, obj); | ||
266 | |||
267 | RINOK_THREAD(wres); | ||
268 | p->wasCreated = True; | ||
269 | return SZ_OK; | ||
270 | } | ||
271 | |||
272 | |||
273 | MY_NO_INLINE | ||
274 | static SRes MtSync_Create(CMtSync *p, THREAD_FUNC_TYPE startAddress, void *obj) | ||
275 | { | ||
276 | const WRes wres = MtSync_Create_WRes(p, startAddress, obj); | ||
277 | if (wres == 0) | ||
278 | return 0; | ||
279 | MtSync_Destruct(p); | ||
280 | return MY_SRes_HRESULT_FROM_WRes(wres); | ||
281 | } | ||
282 | |||
283 | |||
284 | // ---------- HASH THREAD ---------- | ||
285 | |||
286 | #define kMtMaxValForNormalize 0xFFFFFFFF | ||
287 | // #define kMtMaxValForNormalize ((1 << 21)) // for debug | ||
288 | // #define kNormalizeAlign (1 << 7) // alignment for speculated accesses | ||
289 | |||
290 | #ifdef MY_CPU_LE_UNALIGN | ||
291 | #define GetUi24hi_from32(p) ((UInt32)GetUi32(p) >> 8) | ||
292 | #else | ||
293 | #define GetUi24hi_from32(p) ((p)[1] ^ ((UInt32)(p)[2] << 8) ^ ((UInt32)(p)[3] << 16)) | ||
294 | #endif | ||
295 | |||
296 | #define GetHeads_DECL(name) \ | ||
297 | static void GetHeads ## name(const Byte *p, UInt32 pos, \ | ||
298 | UInt32 *hash, UInt32 hashMask, UInt32 *heads, UInt32 numHeads, const UInt32 *crc) | ||
299 | |||
300 | #define GetHeads_LOOP(v) \ | ||
301 | for (; numHeads != 0; numHeads--) { \ | ||
302 | const UInt32 value = (v); \ | ||
303 | p++; \ | ||
304 | *heads++ = pos - hash[value]; \ | ||
305 | hash[value] = pos++; } | ||
306 | |||
307 | #define DEF_GetHeads2(name, v, action) \ | ||
308 | GetHeads_DECL(name) { action \ | ||
309 | GetHeads_LOOP(v) } | ||
310 | |||
311 | #define DEF_GetHeads(name, v) DEF_GetHeads2(name, v, ;) | ||
312 | |||
313 | DEF_GetHeads2(2, GetUi16(p), UNUSED_VAR(hashMask); UNUSED_VAR(crc); ) | ||
314 | DEF_GetHeads(3, (crc[p[0]] ^ GetUi16(p + 1)) & hashMask) | ||
315 | DEF_GetHeads2(3b, GetUi16(p) ^ ((UInt32)(p)[2] << 16), UNUSED_VAR(hashMask); UNUSED_VAR(crc); ) | ||
316 | // BT3 is not good for crc collisions for big hashMask values. | ||
317 | |||
318 | /* | ||
319 | GetHeads_DECL(3b) | ||
320 | { | ||
321 | UNUSED_VAR(hashMask); | ||
322 | UNUSED_VAR(crc); | ||
323 | { | ||
324 | const Byte *pLim = p + numHeads; | ||
325 | if (numHeads == 0) | ||
326 | return; | ||
327 | pLim--; | ||
328 | while (p < pLim) | ||
329 | { | ||
330 | UInt32 v1 = GetUi32(p); | ||
331 | UInt32 v0 = v1 & 0xFFFFFF; | ||
332 | UInt32 h0, h1; | ||
333 | p += 2; | ||
334 | v1 >>= 8; | ||
335 | h0 = hash[v0]; hash[v0] = pos; heads[0] = pos - h0; pos++; | ||
336 | h1 = hash[v1]; hash[v1] = pos; heads[1] = pos - h1; pos++; | ||
337 | heads += 2; | ||
338 | } | ||
339 | if (p == pLim) | ||
340 | { | ||
341 | UInt32 v0 = GetUi16(p) ^ ((UInt32)(p)[2] << 16); | ||
342 | *heads = pos - hash[v0]; | ||
343 | hash[v0] = pos; | ||
344 | } | ||
345 | } | ||
346 | } | ||
347 | */ | ||
348 | |||
349 | /* | ||
350 | GetHeads_DECL(4) | ||
351 | { | ||
352 | unsigned sh = 0; | ||
353 | UNUSED_VAR(crc) | ||
354 | while ((hashMask & 0x80000000) == 0) | ||
355 | { | ||
356 | hashMask <<= 1; | ||
357 | sh++; | ||
358 | } | ||
359 | GetHeads_LOOP((GetUi32(p) * 0xa54a1) >> sh) | ||
360 | } | ||
361 | #define GetHeads4b GetHeads4 | ||
362 | */ | ||
363 | |||
364 | #define USE_GetHeads_LOCAL_CRC | ||
365 | |||
366 | #ifdef USE_GetHeads_LOCAL_CRC | ||
367 | |||
368 | GetHeads_DECL(4) | ||
369 | { | ||
370 | UInt32 crc0[256]; | ||
371 | UInt32 crc1[256]; | ||
372 | { | ||
373 | unsigned i; | ||
374 | for (i = 0; i < 256; i++) | ||
375 | { | ||
376 | UInt32 v = crc[i]; | ||
377 | crc0[i] = v & hashMask; | ||
378 | crc1[i] = (v << kLzHash_CrcShift_1) & hashMask; | ||
379 | // crc1[i] = rotlFixed(v, 8) & hashMask; | ||
380 | } | ||
381 | } | ||
382 | GetHeads_LOOP(crc0[p[0]] ^ crc1[p[3]] ^ (UInt32)GetUi16(p+1)) | ||
383 | } | ||
384 | |||
385 | GetHeads_DECL(4b) | ||
386 | { | ||
387 | UInt32 crc0[256]; | ||
388 | { | ||
389 | unsigned i; | ||
390 | for (i = 0; i < 256; i++) | ||
391 | crc0[i] = crc[i] & hashMask; | ||
392 | } | ||
393 | GetHeads_LOOP(crc0[p[0]] ^ GetUi24hi_from32(p)) | ||
394 | } | ||
395 | |||
396 | GetHeads_DECL(5) | ||
397 | { | ||
398 | UInt32 crc0[256]; | ||
399 | UInt32 crc1[256]; | ||
400 | UInt32 crc2[256]; | ||
401 | { | ||
402 | unsigned i; | ||
403 | for (i = 0; i < 256; i++) | ||
404 | { | ||
405 | UInt32 v = crc[i]; | ||
406 | crc0[i] = v & hashMask; | ||
407 | crc1[i] = (v << kLzHash_CrcShift_1) & hashMask; | ||
408 | crc2[i] = (v << kLzHash_CrcShift_2) & hashMask; | ||
409 | } | ||
410 | } | ||
411 | GetHeads_LOOP(crc0[p[0]] ^ crc1[p[3]] ^ crc2[p[4]] ^ (UInt32)GetUi16(p+1)) | ||
412 | } | ||
413 | |||
414 | GetHeads_DECL(5b) | ||
415 | { | ||
416 | UInt32 crc0[256]; | ||
417 | UInt32 crc1[256]; | ||
418 | { | ||
419 | unsigned i; | ||
420 | for (i = 0; i < 256; i++) | ||
421 | { | ||
422 | UInt32 v = crc[i]; | ||
423 | crc0[i] = v & hashMask; | ||
424 | crc1[i] = (v << kLzHash_CrcShift_1) & hashMask; | ||
425 | } | ||
426 | } | ||
427 | GetHeads_LOOP(crc0[p[0]] ^ crc1[p[4]] ^ GetUi24hi_from32(p)) | ||
428 | } | ||
429 | |||
430 | #else | ||
431 | |||
432 | DEF_GetHeads(4, (crc[p[0]] ^ (crc[p[3]] << kLzHash_CrcShift_1) ^ (UInt32)GetUi16(p+1)) & hashMask) | ||
433 | DEF_GetHeads(4b, (crc[p[0]] ^ GetUi24hi_from32(p)) & hashMask) | ||
434 | DEF_GetHeads(5, (crc[p[0]] ^ (crc[p[3]] << kLzHash_CrcShift_1) ^ (crc[p[4]] << kLzHash_CrcShift_2) ^ (UInt32)GetUi16(p + 1)) & hashMask) | ||
435 | DEF_GetHeads(5b, (crc[p[0]] ^ (crc[p[4]] << kLzHash_CrcShift_1) ^ GetUi24hi_from32(p)) & hashMask) | ||
436 | |||
437 | #endif | ||
438 | |||
439 | |||
440 | static void HashThreadFunc(CMatchFinderMt *mt) | ||
441 | { | ||
442 | CMtSync *p = &mt->hashSync; | ||
443 | PRF(printf("\nHashThreadFunc\n")); | ||
444 | |||
445 | for (;;) | ||
446 | { | ||
447 | UInt32 blockIndex = 0; | ||
448 | PRF(printf("\nHashThreadFunc : Event_Wait(&p->canStart)\n")); | ||
449 | Event_Wait(&p->canStart); | ||
450 | PRF(printf("\nHashThreadFunc : Event_Wait(&p->canStart) : after \n")); | ||
451 | if (p->exit) | ||
452 | { | ||
453 | PRF(printf("\nHashThreadFunc : exit \n")); | ||
454 | return; | ||
455 | } | ||
456 | |||
457 | MatchFinder_Init_HighHash(MF(mt)); | ||
458 | |||
459 | for (;;) | ||
460 | { | ||
461 | PRF(printf("Hash thread block = %d pos = %d\n", (unsigned)blockIndex, mt->MatchFinder->pos)); | ||
462 | |||
463 | { | ||
464 | CMatchFinder *mf = MF(mt); | ||
465 | if (MatchFinder_NeedMove(mf)) | ||
466 | { | ||
467 | CriticalSection_Enter(&mt->btSync.cs); | ||
468 | CriticalSection_Enter(&mt->hashSync.cs); | ||
469 | { | ||
470 | const Byte *beforePtr = Inline_MatchFinder_GetPointerToCurrentPos(mf); | ||
471 | ptrdiff_t offset; | ||
472 | MatchFinder_MoveBlock(mf); | ||
473 | offset = beforePtr - Inline_MatchFinder_GetPointerToCurrentPos(mf); | ||
474 | mt->pointerToCurPos -= offset; | ||
475 | mt->buffer -= offset; | ||
476 | } | ||
477 | CriticalSection_Leave(&mt->hashSync.cs); | ||
478 | CriticalSection_Leave(&mt->btSync.cs); | ||
479 | continue; | ||
480 | } | ||
481 | |||
482 | Semaphore_Wait(&p->freeSemaphore); | ||
483 | |||
484 | if (p->exit) // exit is unexpected here. But we check it here for some failure case | ||
485 | return; | ||
486 | |||
487 | // for faster stop : we check (p->stopWriting) after Wait(freeSemaphore) | ||
488 | if (p->stopWriting) | ||
489 | break; | ||
490 | |||
491 | MatchFinder_ReadIfRequired(mf); | ||
492 | { | ||
493 | UInt32 *heads = mt->hashBuf + GET_HASH_BLOCK_OFFSET(blockIndex++); | ||
494 | UInt32 num = Inline_MatchFinder_GetNumAvailableBytes(mf); | ||
495 | heads[0] = 2; | ||
496 | heads[1] = num; | ||
497 | |||
498 | /* heads[1] contains the number of avail bytes: | ||
499 | if (avail < mf->numHashBytes) : | ||
500 | { | ||
501 | it means that stream was finished | ||
502 | HASH_THREAD and BT_TREAD must move position for heads[1] (avail) bytes. | ||
503 | HASH_THREAD doesn't stop, | ||
504 | HASH_THREAD fills only the header (2 numbers) for all next blocks: | ||
505 | {2, NumHashBytes - 1}, {2,0}, {2,0}, ... , {2,0} | ||
506 | } | ||
507 | else | ||
508 | { | ||
509 | HASH_THREAD and BT_TREAD must move position for (heads[0] - 2) bytes; | ||
510 | } | ||
511 | */ | ||
512 | |||
513 | if (num >= mf->numHashBytes) | ||
514 | { | ||
515 | num = num - mf->numHashBytes + 1; | ||
516 | if (num > kMtHashBlockSize - 2) | ||
517 | num = kMtHashBlockSize - 2; | ||
518 | |||
519 | if (mf->pos > (UInt32)kMtMaxValForNormalize - num) | ||
520 | { | ||
521 | const UInt32 subValue = (mf->pos - mf->historySize - 1); // & ~(UInt32)(kNormalizeAlign - 1); | ||
522 | Inline_MatchFinder_ReduceOffsets(mf, subValue); | ||
523 | MatchFinder_Normalize3(subValue, mf->hash + mf->fixedHashSize, (size_t)mf->hashMask + 1); | ||
524 | } | ||
525 | |||
526 | heads[0] = 2 + num; | ||
527 | mt->GetHeadsFunc(mf->buffer, mf->pos, mf->hash + mf->fixedHashSize, mf->hashMask, heads + 2, num, mf->crc); | ||
528 | } | ||
529 | |||
530 | mf->pos += num; // wrap over zero is allowed at the end of stream | ||
531 | mf->buffer += num; | ||
532 | } | ||
533 | } | ||
534 | |||
535 | Semaphore_Release1(&p->filledSemaphore); | ||
536 | } // for() processing end | ||
537 | |||
538 | // p->numBlocks_Sent = blockIndex; | ||
539 | Event_Set(&p->wasStopped); | ||
540 | } // for() thread end | ||
541 | } | ||
542 | |||
543 | |||
544 | |||
545 | |||
546 | // ---------- BT THREAD ---------- | ||
547 | |||
548 | /* we use one variable instead of two (cyclicBufferPos == pos) before CyclicBuf wrap. | ||
549 | here we define fixed offset of (p->pos) from (p->cyclicBufferPos) */ | ||
550 | #define CYC_TO_POS_OFFSET 0 | ||
551 | // #define CYC_TO_POS_OFFSET 1 // for debug | ||
552 | |||
553 | #define MFMT_GM_INLINE | ||
554 | |||
555 | #ifdef MFMT_GM_INLINE | ||
556 | |||
557 | /* | ||
558 | we use size_t for (pos) instead of UInt32 | ||
559 | to eliminate "movsx" BUG in old MSVC x64 compiler. | ||
560 | */ | ||
561 | |||
562 | |||
563 | UInt32 * MY_FAST_CALL GetMatchesSpecN_2(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son, | ||
564 | UInt32 _cutValue, UInt32 *d, size_t _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size, | ||
565 | size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, | ||
566 | UInt32 *posRes); | ||
567 | |||
568 | #endif | ||
569 | |||
570 | |||
571 | static void BtGetMatches(CMatchFinderMt *p, UInt32 *d) | ||
572 | { | ||
573 | UInt32 numProcessed = 0; | ||
574 | UInt32 curPos = 2; | ||
575 | |||
576 | /* GetMatchesSpec() functions don't create (len = 1) | ||
577 | in [len, dist] match pairs, if (p->numHashBytes >= 2) | ||
578 | Also we suppose here that (matchMaxLen >= 2). | ||
579 | So the following code for (reserve) is not required | ||
580 | UInt32 reserve = (p->matchMaxLen * 2); | ||
581 | const UInt32 kNumHashBytes_Max = 5; // BT_HASH_BYTES_MAX | ||
582 | if (reserve < kNumHashBytes_Max - 1) | ||
583 | reserve = kNumHashBytes_Max - 1; | ||
584 | const UInt32 limit = kMtBtBlockSize - (reserve); | ||
585 | */ | ||
586 | |||
587 | const UInt32 limit = kMtBtBlockSize - (p->matchMaxLen * 2); | ||
588 | |||
589 | d[1] = p->hashNumAvail; | ||
590 | |||
591 | if (p->failure_BT) | ||
592 | { | ||
593 | // printf("\n == 1 BtGetMatches() p->failure_BT\n"); | ||
594 | d[0] = 0; | ||
595 | // d[1] = 0; | ||
596 | return; | ||
597 | } | ||
598 | |||
599 | while (curPos < limit) | ||
600 | { | ||
601 | if (p->hashBufPos == p->hashBufPosLimit) | ||
602 | { | ||
603 | // MatchFinderMt_GetNextBlock_Hash(p); | ||
604 | UInt32 avail; | ||
605 | { | ||
606 | const UInt32 bi = MtSync_GetNextBlock(&p->hashSync); | ||
607 | const UInt32 k = GET_HASH_BLOCK_OFFSET(bi); | ||
608 | const UInt32 *h = p->hashBuf + k; | ||
609 | avail = h[1]; | ||
610 | p->hashBufPosLimit = k + h[0]; | ||
611 | p->hashNumAvail = avail; | ||
612 | p->hashBufPos = k + 2; | ||
613 | } | ||
614 | |||
615 | { | ||
616 | /* we must prevent UInt32 overflow for avail total value, | ||
617 | if avail was increased with new hash block */ | ||
618 | UInt32 availSum = numProcessed + avail; | ||
619 | if (availSum < numProcessed) | ||
620 | availSum = (UInt32)(Int32)-1; | ||
621 | d[1] = availSum; | ||
622 | } | ||
623 | |||
624 | if (avail >= p->numHashBytes) | ||
625 | continue; | ||
626 | |||
627 | // if (p->hashBufPos != p->hashBufPosLimit) exit(1); | ||
628 | |||
629 | /* (avail < p->numHashBytes) | ||
630 | It means that stream was finished. | ||
631 | And (avail) - is a number of remaining bytes, | ||
632 | we fill (d) for (avail) bytes for LZ_THREAD (receiver). | ||
633 | but we don't update (p->pos) and (p->cyclicBufferPos) here in BT_THREAD */ | ||
634 | |||
635 | /* here we suppose that we have space enough: | ||
636 | (kMtBtBlockSize - curPos >= p->hashNumAvail) */ | ||
637 | p->hashNumAvail = 0; | ||
638 | d[0] = curPos + avail; | ||
639 | d += curPos; | ||
640 | for (; avail != 0; avail--) | ||
641 | *d++ = 0; | ||
642 | return; | ||
643 | } | ||
644 | { | ||
645 | UInt32 size = p->hashBufPosLimit - p->hashBufPos; | ||
646 | UInt32 pos = p->pos; | ||
647 | UInt32 cyclicBufferPos = p->cyclicBufferPos; | ||
648 | UInt32 lenLimit = p->matchMaxLen; | ||
649 | if (lenLimit >= p->hashNumAvail) | ||
650 | lenLimit = p->hashNumAvail; | ||
651 | { | ||
652 | UInt32 size2 = p->hashNumAvail - lenLimit + 1; | ||
653 | if (size2 < size) | ||
654 | size = size2; | ||
655 | size2 = p->cyclicBufferSize - cyclicBufferPos; | ||
656 | if (size2 < size) | ||
657 | size = size2; | ||
658 | } | ||
659 | |||
660 | if (pos > (UInt32)kMtMaxValForNormalize - size) | ||
661 | { | ||
662 | const UInt32 subValue = (pos - p->cyclicBufferSize); // & ~(UInt32)(kNormalizeAlign - 1); | ||
663 | pos -= subValue; | ||
664 | p->pos = pos; | ||
665 | MatchFinder_Normalize3(subValue, p->son, (size_t)p->cyclicBufferSize * 2); | ||
666 | } | ||
667 | |||
668 | #ifndef MFMT_GM_INLINE | ||
669 | while (curPos < limit && size-- != 0) | ||
670 | { | ||
671 | UInt32 *startDistances = d + curPos; | ||
672 | UInt32 num = (UInt32)(GetMatchesSpec1(lenLimit, pos - p->hashBuf[p->hashBufPos++], | ||
673 | pos, p->buffer, p->son, cyclicBufferPos, p->cyclicBufferSize, p->cutValue, | ||
674 | startDistances + 1, p->numHashBytes - 1) - startDistances); | ||
675 | *startDistances = num - 1; | ||
676 | curPos += num; | ||
677 | cyclicBufferPos++; | ||
678 | pos++; | ||
679 | p->buffer++; | ||
680 | } | ||
681 | #else | ||
682 | { | ||
683 | UInt32 posRes = pos; | ||
684 | const UInt32 *d_end; | ||
685 | { | ||
686 | d_end = GetMatchesSpecN_2( | ||
687 | p->buffer + lenLimit - 1, | ||
688 | pos, p->buffer, p->son, p->cutValue, d + curPos, | ||
689 | p->numHashBytes - 1, p->hashBuf + p->hashBufPos, | ||
690 | d + limit, p->hashBuf + p->hashBufPos + size, | ||
691 | cyclicBufferPos, p->cyclicBufferSize, | ||
692 | &posRes); | ||
693 | } | ||
694 | { | ||
695 | if (!d_end) | ||
696 | { | ||
697 | // printf("\n == 2 BtGetMatches() p->failure_BT\n"); | ||
698 | // internal data failure | ||
699 | p->failure_BT = True; | ||
700 | d[0] = 0; | ||
701 | // d[1] = 0; | ||
702 | return; | ||
703 | } | ||
704 | } | ||
705 | curPos = (UInt32)(d_end - d); | ||
706 | { | ||
707 | const UInt32 processed = posRes - pos; | ||
708 | pos = posRes; | ||
709 | p->hashBufPos += processed; | ||
710 | cyclicBufferPos += processed; | ||
711 | p->buffer += processed; | ||
712 | } | ||
713 | } | ||
714 | #endif | ||
715 | |||
716 | { | ||
717 | const UInt32 processed = pos - p->pos; | ||
718 | numProcessed += processed; | ||
719 | p->hashNumAvail -= processed; | ||
720 | p->pos = pos; | ||
721 | } | ||
722 | if (cyclicBufferPos == p->cyclicBufferSize) | ||
723 | cyclicBufferPos = 0; | ||
724 | p->cyclicBufferPos = cyclicBufferPos; | ||
725 | } | ||
726 | } | ||
727 | |||
728 | d[0] = curPos; | ||
729 | } | ||
730 | |||
731 | |||
732 | static void BtFillBlock(CMatchFinderMt *p, UInt32 globalBlockIndex) | ||
733 | { | ||
734 | CMtSync *sync = &p->hashSync; | ||
735 | |||
736 | BUFFER_MUST_BE_UNLOCKED(sync) | ||
737 | |||
738 | if (!sync->needStart) | ||
739 | { | ||
740 | LOCK_BUFFER(sync) | ||
741 | } | ||
742 | |||
743 | BtGetMatches(p, p->btBuf + GET_BT_BLOCK_OFFSET(globalBlockIndex)); | ||
744 | |||
745 | /* We suppose that we have called GetNextBlock() from start. | ||
746 | So buffer is LOCKED */ | ||
747 | |||
748 | UNLOCK_BUFFER(sync) | ||
749 | } | ||
750 | |||
751 | |||
752 | MY_NO_INLINE | ||
753 | static void BtThreadFunc(CMatchFinderMt *mt) | ||
754 | { | ||
755 | CMtSync *p = &mt->btSync; | ||
756 | for (;;) | ||
757 | { | ||
758 | UInt32 blockIndex = 0; | ||
759 | Event_Wait(&p->canStart); | ||
760 | |||
761 | for (;;) | ||
762 | { | ||
763 | PRF(printf(" BT thread block = %d pos = %d\n", (unsigned)blockIndex, mt->pos)); | ||
764 | /* (p->exit == true) is possible after (p->canStart) at first loop iteration | ||
765 | and is unexpected after more Wait(freeSemaphore) iterations */ | ||
766 | if (p->exit) | ||
767 | return; | ||
768 | |||
769 | Semaphore_Wait(&p->freeSemaphore); | ||
770 | |||
771 | // for faster stop : we check (p->stopWriting) after Wait(freeSemaphore) | ||
772 | if (p->stopWriting) | ||
773 | break; | ||
774 | |||
775 | BtFillBlock(mt, blockIndex++); | ||
776 | |||
777 | Semaphore_Release1(&p->filledSemaphore); | ||
778 | } | ||
779 | |||
780 | // we stop HASH_THREAD here | ||
781 | MtSync_StopWriting(&mt->hashSync); | ||
782 | |||
783 | // p->numBlocks_Sent = blockIndex; | ||
784 | Event_Set(&p->wasStopped); | ||
785 | } | ||
786 | } | ||
787 | |||
788 | |||
789 | void MatchFinderMt_Construct(CMatchFinderMt *p) | ||
790 | { | ||
791 | p->hashBuf = NULL; | ||
792 | MtSync_Construct(&p->hashSync); | ||
793 | MtSync_Construct(&p->btSync); | ||
794 | } | ||
795 | |||
796 | static void MatchFinderMt_FreeMem(CMatchFinderMt *p, ISzAllocPtr alloc) | ||
797 | { | ||
798 | ISzAlloc_Free(alloc, p->hashBuf); | ||
799 | p->hashBuf = NULL; | ||
800 | } | ||
801 | |||
802 | void MatchFinderMt_Destruct(CMatchFinderMt *p, ISzAllocPtr alloc) | ||
803 | { | ||
804 | /* | ||
805 | HASH_THREAD can use CriticalSection(s) btSync.cs and hashSync.cs. | ||
806 | So we must be sure that HASH_THREAD will not use CriticalSection(s) | ||
807 | after deleting CriticalSection here. | ||
808 | |||
809 | we call ReleaseStream(p) | ||
810 | that calls StopWriting(btSync) | ||
811 | that calls StopWriting(hashSync), if it's required to stop HASH_THREAD. | ||
812 | after StopWriting() it's safe to destruct MtSync(s) in any order */ | ||
813 | |||
814 | MatchFinderMt_ReleaseStream(p); | ||
815 | |||
816 | MtSync_Destruct(&p->btSync); | ||
817 | MtSync_Destruct(&p->hashSync); | ||
818 | |||
819 | LOG_ITER( | ||
820 | printf("\nTree %9d * %7d iter = %9d = sum : bytes = %9d\n", | ||
821 | (UInt32)(g_NumIters_Tree / 1000), | ||
822 | (UInt32)(((UInt64)g_NumIters_Loop * 1000) / (g_NumIters_Tree + 1)), | ||
823 | (UInt32)(g_NumIters_Loop / 1000), | ||
824 | (UInt32)(g_NumIters_Bytes / 1000) | ||
825 | )); | ||
826 | |||
827 | MatchFinderMt_FreeMem(p, alloc); | ||
828 | } | ||
829 | |||
830 | |||
831 | #define kHashBufferSize (kMtHashBlockSize * kMtHashNumBlocks) | ||
832 | #define kBtBufferSize (kMtBtBlockSize * kMtBtNumBlocks) | ||
833 | |||
834 | |||
835 | static THREAD_FUNC_DECL HashThreadFunc2(void *p) { HashThreadFunc((CMatchFinderMt *)p); return 0; } | ||
836 | static THREAD_FUNC_DECL BtThreadFunc2(void *p) | ||
837 | { | ||
838 | Byte allocaDummy[0x180]; | ||
839 | unsigned i = 0; | ||
840 | for (i = 0; i < 16; i++) | ||
841 | allocaDummy[i] = (Byte)0; | ||
842 | if (allocaDummy[0] == 0) | ||
843 | BtThreadFunc((CMatchFinderMt *)p); | ||
844 | return 0; | ||
845 | } | ||
846 | |||
847 | |||
848 | SRes MatchFinderMt_Create(CMatchFinderMt *p, UInt32 historySize, UInt32 keepAddBufferBefore, | ||
849 | UInt32 matchMaxLen, UInt32 keepAddBufferAfter, ISzAllocPtr alloc) | ||
850 | { | ||
851 | CMatchFinder *mf = MF(p); | ||
852 | p->historySize = historySize; | ||
853 | if (kMtBtBlockSize <= matchMaxLen * 4) | ||
854 | return SZ_ERROR_PARAM; | ||
855 | if (!p->hashBuf) | ||
856 | { | ||
857 | p->hashBuf = (UInt32 *)ISzAlloc_Alloc(alloc, ((size_t)kHashBufferSize + (size_t)kBtBufferSize) * sizeof(UInt32)); | ||
858 | if (!p->hashBuf) | ||
859 | return SZ_ERROR_MEM; | ||
860 | p->btBuf = p->hashBuf + kHashBufferSize; | ||
861 | } | ||
862 | keepAddBufferBefore += (kHashBufferSize + kBtBufferSize); | ||
863 | keepAddBufferAfter += kMtHashBlockSize; | ||
864 | if (!MatchFinder_Create(mf, historySize, keepAddBufferBefore, matchMaxLen, keepAddBufferAfter, alloc)) | ||
865 | return SZ_ERROR_MEM; | ||
866 | |||
867 | RINOK(MtSync_Create(&p->hashSync, HashThreadFunc2, p)); | ||
868 | RINOK(MtSync_Create(&p->btSync, BtThreadFunc2, p)); | ||
869 | return SZ_OK; | ||
870 | } | ||
871 | |||
872 | |||
873 | SRes MatchFinderMt_InitMt(CMatchFinderMt *p) | ||
874 | { | ||
875 | RINOK(MtSync_Init(&p->hashSync, kMtHashNumBlocks)); | ||
876 | return MtSync_Init(&p->btSync, kMtBtNumBlocks); | ||
877 | } | ||
878 | |||
879 | |||
880 | static void MatchFinderMt_Init(CMatchFinderMt *p) | ||
881 | { | ||
882 | CMatchFinder *mf = MF(p); | ||
883 | |||
884 | p->btBufPos = | ||
885 | p->btBufPosLimit = NULL; | ||
886 | p->hashBufPos = | ||
887 | p->hashBufPosLimit = 0; | ||
888 | p->hashNumAvail = 0; // 21.03 | ||
889 | |||
890 | p->failure_BT = False; | ||
891 | |||
892 | /* Init without data reading. We don't want to read data in this thread */ | ||
893 | MatchFinder_Init_4(mf); | ||
894 | |||
895 | MatchFinder_Init_LowHash(mf); | ||
896 | |||
897 | p->pointerToCurPos = Inline_MatchFinder_GetPointerToCurrentPos(mf); | ||
898 | p->btNumAvailBytes = 0; | ||
899 | p->failure_LZ_BT = False; | ||
900 | // p->failure_LZ_LZ = False; | ||
901 | |||
902 | p->lzPos = | ||
903 | 1; // optimal smallest value | ||
904 | // 0; // for debug: ignores match to start | ||
905 | // kNormalizeAlign; // for debug | ||
906 | |||
907 | p->hash = mf->hash; | ||
908 | p->fixedHashSize = mf->fixedHashSize; | ||
909 | // p->hash4Mask = mf->hash4Mask; | ||
910 | p->crc = mf->crc; | ||
911 | // memcpy(p->crc, mf->crc, sizeof(mf->crc)); | ||
912 | |||
913 | p->son = mf->son; | ||
914 | p->matchMaxLen = mf->matchMaxLen; | ||
915 | p->numHashBytes = mf->numHashBytes; | ||
916 | |||
917 | /* (mf->pos) and (mf->streamPos) were already initialized to 1 in MatchFinder_Init_4() */ | ||
918 | // mf->streamPos = mf->pos = 1; // optimal smallest value | ||
919 | // 0; // for debug: ignores match to start | ||
920 | // kNormalizeAlign; // for debug | ||
921 | |||
922 | /* we must init (p->pos = mf->pos) for BT, because | ||
923 | BT code needs (p->pos == delta_value_for_empty_hash_record == mf->pos) */ | ||
924 | p->pos = mf->pos; // do not change it | ||
925 | |||
926 | p->cyclicBufferPos = (p->pos - CYC_TO_POS_OFFSET); | ||
927 | p->cyclicBufferSize = mf->cyclicBufferSize; | ||
928 | p->buffer = mf->buffer; | ||
929 | p->cutValue = mf->cutValue; | ||
930 | // p->son[0] = p->son[1] = 0; // unused: to init skipped record for speculated accesses. | ||
931 | } | ||
932 | |||
933 | |||
934 | /* ReleaseStream is required to finish multithreading */ | ||
935 | void MatchFinderMt_ReleaseStream(CMatchFinderMt *p) | ||
936 | { | ||
937 | // Sleep(1); // for debug | ||
938 | MtSync_StopWriting(&p->btSync); | ||
939 | // Sleep(200); // for debug | ||
940 | /* p->MatchFinder->ReleaseStream(); */ | ||
941 | } | ||
942 | |||
943 | |||
944 | MY_NO_INLINE | ||
945 | static UInt32 MatchFinderMt_GetNextBlock_Bt(CMatchFinderMt *p) | ||
946 | { | ||
947 | if (p->failure_LZ_BT) | ||
948 | p->btBufPos = p->failureBuf; | ||
949 | else | ||
950 | { | ||
951 | const UInt32 bi = MtSync_GetNextBlock(&p->btSync); | ||
952 | const UInt32 *bt = p->btBuf + GET_BT_BLOCK_OFFSET(bi); | ||
953 | { | ||
954 | const UInt32 numItems = bt[0]; | ||
955 | p->btBufPosLimit = bt + numItems; | ||
956 | p->btNumAvailBytes = bt[1]; | ||
957 | p->btBufPos = bt + 2; | ||
958 | if (numItems < 2 || numItems > kMtBtBlockSize) | ||
959 | { | ||
960 | p->failureBuf[0] = 0; | ||
961 | p->btBufPos = p->failureBuf; | ||
962 | p->btBufPosLimit = p->failureBuf + 1; | ||
963 | p->failure_LZ_BT = True; | ||
964 | // p->btNumAvailBytes = 0; | ||
965 | /* we don't want to decrease AvailBytes, that was load before. | ||
966 | that can be unxepected for the code that have loaded anopther value before */ | ||
967 | } | ||
968 | } | ||
969 | |||
970 | if (p->lzPos >= (UInt32)kMtMaxValForNormalize - (UInt32)kMtBtBlockSize) | ||
971 | { | ||
972 | /* we don't check (lzPos) over exact avail bytes in (btBuf). | ||
973 | (fixedHashSize) is small, so normalization is fast */ | ||
974 | const UInt32 subValue = (p->lzPos - p->historySize - 1); // & ~(UInt32)(kNormalizeAlign - 1); | ||
975 | p->lzPos -= subValue; | ||
976 | MatchFinder_Normalize3(subValue, p->hash, p->fixedHashSize); | ||
977 | } | ||
978 | } | ||
979 | return p->btNumAvailBytes; | ||
980 | } | ||
981 | |||
982 | |||
983 | |||
984 | static const Byte * MatchFinderMt_GetPointerToCurrentPos(CMatchFinderMt *p) | ||
985 | { | ||
986 | return p->pointerToCurPos; | ||
987 | } | ||
988 | |||
989 | |||
990 | #define GET_NEXT_BLOCK_IF_REQUIRED if (p->btBufPos == p->btBufPosLimit) MatchFinderMt_GetNextBlock_Bt(p); | ||
991 | |||
992 | |||
993 | static UInt32 MatchFinderMt_GetNumAvailableBytes(CMatchFinderMt *p) | ||
994 | { | ||
995 | if (p->btBufPos != p->btBufPosLimit) | ||
996 | return p->btNumAvailBytes; | ||
997 | return MatchFinderMt_GetNextBlock_Bt(p); | ||
998 | } | ||
999 | |||
1000 | |||
1001 | // #define CHECK_FAILURE_LZ(_match_, _pos_) if (_match_ >= _pos_) { p->failure_LZ_LZ = True; return d; } | ||
1002 | #define CHECK_FAILURE_LZ(_match_, _pos_) | ||
1003 | |||
1004 | static UInt32 * MixMatches2(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *d) | ||
1005 | { | ||
1006 | UInt32 h2, c2; | ||
1007 | UInt32 *hash = p->hash; | ||
1008 | const Byte *cur = p->pointerToCurPos; | ||
1009 | const UInt32 m = p->lzPos; | ||
1010 | MT_HASH2_CALC | ||
1011 | |||
1012 | c2 = hash[h2]; | ||
1013 | hash[h2] = m; | ||
1014 | |||
1015 | if (c2 >= matchMinPos) | ||
1016 | { | ||
1017 | CHECK_FAILURE_LZ(c2, m) | ||
1018 | if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m] == cur[0]) | ||
1019 | { | ||
1020 | *d++ = 2; | ||
1021 | *d++ = m - c2 - 1; | ||
1022 | } | ||
1023 | } | ||
1024 | |||
1025 | return d; | ||
1026 | } | ||
1027 | |||
1028 | static UInt32 * MixMatches3(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *d) | ||
1029 | { | ||
1030 | UInt32 h2, h3, c2, c3; | ||
1031 | UInt32 *hash = p->hash; | ||
1032 | const Byte *cur = p->pointerToCurPos; | ||
1033 | const UInt32 m = p->lzPos; | ||
1034 | MT_HASH3_CALC | ||
1035 | |||
1036 | c2 = hash[h2]; | ||
1037 | c3 = (hash + kFix3HashSize)[h3]; | ||
1038 | |||
1039 | hash[h2] = m; | ||
1040 | (hash + kFix3HashSize)[h3] = m; | ||
1041 | |||
1042 | if (c2 >= matchMinPos) | ||
1043 | { | ||
1044 | CHECK_FAILURE_LZ(c2, m) | ||
1045 | if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m] == cur[0]) | ||
1046 | { | ||
1047 | d[1] = m - c2 - 1; | ||
1048 | if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m + 2] == cur[2]) | ||
1049 | { | ||
1050 | d[0] = 3; | ||
1051 | return d + 2; | ||
1052 | } | ||
1053 | d[0] = 2; | ||
1054 | d += 2; | ||
1055 | } | ||
1056 | } | ||
1057 | |||
1058 | if (c3 >= matchMinPos) | ||
1059 | { | ||
1060 | CHECK_FAILURE_LZ(c3, m) | ||
1061 | if (cur[(ptrdiff_t)c3 - (ptrdiff_t)m] == cur[0]) | ||
1062 | { | ||
1063 | *d++ = 3; | ||
1064 | *d++ = m - c3 - 1; | ||
1065 | } | ||
1066 | } | ||
1067 | |||
1068 | return d; | ||
1069 | } | ||
1070 | |||
1071 | |||
1072 | #define INCREASE_LZ_POS p->lzPos++; p->pointerToCurPos++; | ||
1073 | |||
1074 | /* | ||
1075 | static | ||
1076 | UInt32* MatchFinderMt_GetMatches_Bt4(CMatchFinderMt *p, UInt32 *d) | ||
1077 | { | ||
1078 | const UInt32 *bt = p->btBufPos; | ||
1079 | const UInt32 len = *bt++; | ||
1080 | const UInt32 *btLim = bt + len; | ||
1081 | UInt32 matchMinPos; | ||
1082 | UInt32 avail = p->btNumAvailBytes - 1; | ||
1083 | p->btBufPos = btLim; | ||
1084 | |||
1085 | { | ||
1086 | p->btNumAvailBytes = avail; | ||
1087 | |||
1088 | #define BT_HASH_BYTES_MAX 5 | ||
1089 | |||
1090 | matchMinPos = p->lzPos; | ||
1091 | |||
1092 | if (len != 0) | ||
1093 | matchMinPos -= bt[1]; | ||
1094 | else if (avail < (BT_HASH_BYTES_MAX - 1) - 1) | ||
1095 | { | ||
1096 | INCREASE_LZ_POS | ||
1097 | return d; | ||
1098 | } | ||
1099 | else | ||
1100 | { | ||
1101 | const UInt32 hs = p->historySize; | ||
1102 | if (matchMinPos > hs) | ||
1103 | matchMinPos -= hs; | ||
1104 | else | ||
1105 | matchMinPos = 1; | ||
1106 | } | ||
1107 | } | ||
1108 | |||
1109 | for (;;) | ||
1110 | { | ||
1111 | |||
1112 | UInt32 h2, h3, c2, c3; | ||
1113 | UInt32 *hash = p->hash; | ||
1114 | const Byte *cur = p->pointerToCurPos; | ||
1115 | UInt32 m = p->lzPos; | ||
1116 | MT_HASH3_CALC | ||
1117 | |||
1118 | c2 = hash[h2]; | ||
1119 | c3 = (hash + kFix3HashSize)[h3]; | ||
1120 | |||
1121 | hash[h2] = m; | ||
1122 | (hash + kFix3HashSize)[h3] = m; | ||
1123 | |||
1124 | if (c2 >= matchMinPos && cur[(ptrdiff_t)c2 - (ptrdiff_t)m] == cur[0]) | ||
1125 | { | ||
1126 | d[1] = m - c2 - 1; | ||
1127 | if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m + 2] == cur[2]) | ||
1128 | { | ||
1129 | d[0] = 3; | ||
1130 | d += 2; | ||
1131 | break; | ||
1132 | } | ||
1133 | // else | ||
1134 | { | ||
1135 | d[0] = 2; | ||
1136 | d += 2; | ||
1137 | } | ||
1138 | } | ||
1139 | if (c3 >= matchMinPos && cur[(ptrdiff_t)c3 - (ptrdiff_t)m] == cur[0]) | ||
1140 | { | ||
1141 | *d++ = 3; | ||
1142 | *d++ = m - c3 - 1; | ||
1143 | } | ||
1144 | break; | ||
1145 | } | ||
1146 | |||
1147 | if (len != 0) | ||
1148 | { | ||
1149 | do | ||
1150 | { | ||
1151 | const UInt32 v0 = bt[0]; | ||
1152 | const UInt32 v1 = bt[1]; | ||
1153 | bt += 2; | ||
1154 | d[0] = v0; | ||
1155 | d[1] = v1; | ||
1156 | d += 2; | ||
1157 | } | ||
1158 | while (bt != btLim); | ||
1159 | } | ||
1160 | INCREASE_LZ_POS | ||
1161 | return d; | ||
1162 | } | ||
1163 | */ | ||
1164 | |||
1165 | |||
1166 | static UInt32 *MixMatches4(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *d) | ||
1167 | { | ||
1168 | UInt32 h2, h3, /* h4, */ c2, c3 /* , c4 */; | ||
1169 | UInt32 *hash = p->hash; | ||
1170 | const Byte *cur = p->pointerToCurPos; | ||
1171 | const UInt32 m = p->lzPos; | ||
1172 | MT_HASH3_CALC | ||
1173 | // MT_HASH4_CALC | ||
1174 | c2 = hash[h2]; | ||
1175 | c3 = (hash + kFix3HashSize)[h3]; | ||
1176 | // c4 = (hash + kFix4HashSize)[h4]; | ||
1177 | |||
1178 | hash[h2] = m; | ||
1179 | (hash + kFix3HashSize)[h3] = m; | ||
1180 | // (hash + kFix4HashSize)[h4] = m; | ||
1181 | |||
1182 | #define _USE_H2 | ||
1183 | |||
1184 | #ifdef _USE_H2 | ||
1185 | if (c2 >= matchMinPos && cur[(ptrdiff_t)c2 - (ptrdiff_t)m] == cur[0]) | ||
1186 | { | ||
1187 | d[1] = m - c2 - 1; | ||
1188 | if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m + 2] == cur[2]) | ||
1189 | { | ||
1190 | // d[0] = (cur[(ptrdiff_t)c2 - (ptrdiff_t)m + 3] == cur[3]) ? 4 : 3; | ||
1191 | // return d + 2; | ||
1192 | |||
1193 | if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m + 3] == cur[3]) | ||
1194 | { | ||
1195 | d[0] = 4; | ||
1196 | return d + 2; | ||
1197 | } | ||
1198 | d[0] = 3; | ||
1199 | d += 2; | ||
1200 | |||
1201 | #ifdef _USE_H4 | ||
1202 | if (c4 >= matchMinPos) | ||
1203 | if ( | ||
1204 | cur[(ptrdiff_t)c4 - (ptrdiff_t)m] == cur[0] && | ||
1205 | cur[(ptrdiff_t)c4 - (ptrdiff_t)m + 3] == cur[3] | ||
1206 | ) | ||
1207 | { | ||
1208 | *d++ = 4; | ||
1209 | *d++ = m - c4 - 1; | ||
1210 | } | ||
1211 | #endif | ||
1212 | return d; | ||
1213 | } | ||
1214 | d[0] = 2; | ||
1215 | d += 2; | ||
1216 | } | ||
1217 | #endif | ||
1218 | |||
1219 | if (c3 >= matchMinPos && cur[(ptrdiff_t)c3 - (ptrdiff_t)m] == cur[0]) | ||
1220 | { | ||
1221 | d[1] = m - c3 - 1; | ||
1222 | if (cur[(ptrdiff_t)c3 - (ptrdiff_t)m + 3] == cur[3]) | ||
1223 | { | ||
1224 | d[0] = 4; | ||
1225 | return d + 2; | ||
1226 | } | ||
1227 | d[0] = 3; | ||
1228 | d += 2; | ||
1229 | } | ||
1230 | |||
1231 | #ifdef _USE_H4 | ||
1232 | if (c4 >= matchMinPos) | ||
1233 | if ( | ||
1234 | cur[(ptrdiff_t)c4 - (ptrdiff_t)m] == cur[0] && | ||
1235 | cur[(ptrdiff_t)c4 - (ptrdiff_t)m + 3] == cur[3] | ||
1236 | ) | ||
1237 | { | ||
1238 | *d++ = 4; | ||
1239 | *d++ = m - c4 - 1; | ||
1240 | } | ||
1241 | #endif | ||
1242 | |||
1243 | return d; | ||
1244 | } | ||
1245 | |||
1246 | |||
1247 | static UInt32* MatchFinderMt2_GetMatches(CMatchFinderMt *p, UInt32 *d) | ||
1248 | { | ||
1249 | const UInt32 *bt = p->btBufPos; | ||
1250 | const UInt32 len = *bt++; | ||
1251 | const UInt32 *btLim = bt + len; | ||
1252 | p->btBufPos = btLim; | ||
1253 | p->btNumAvailBytes--; | ||
1254 | INCREASE_LZ_POS | ||
1255 | { | ||
1256 | while (bt != btLim) | ||
1257 | { | ||
1258 | const UInt32 v0 = bt[0]; | ||
1259 | const UInt32 v1 = bt[1]; | ||
1260 | bt += 2; | ||
1261 | d[0] = v0; | ||
1262 | d[1] = v1; | ||
1263 | d += 2; | ||
1264 | } | ||
1265 | } | ||
1266 | return d; | ||
1267 | } | ||
1268 | |||
1269 | |||
1270 | |||
1271 | static UInt32* MatchFinderMt_GetMatches(CMatchFinderMt *p, UInt32 *d) | ||
1272 | { | ||
1273 | const UInt32 *bt = p->btBufPos; | ||
1274 | UInt32 len = *bt++; | ||
1275 | const UInt32 avail = p->btNumAvailBytes - 1; | ||
1276 | p->btNumAvailBytes = avail; | ||
1277 | p->btBufPos = bt + len; | ||
1278 | if (len == 0) | ||
1279 | { | ||
1280 | #define BT_HASH_BYTES_MAX 5 | ||
1281 | if (avail >= (BT_HASH_BYTES_MAX - 1) - 1) | ||
1282 | { | ||
1283 | UInt32 m = p->lzPos; | ||
1284 | if (m > p->historySize) | ||
1285 | m -= p->historySize; | ||
1286 | else | ||
1287 | m = 1; | ||
1288 | d = p->MixMatchesFunc(p, m, d); | ||
1289 | } | ||
1290 | } | ||
1291 | else | ||
1292 | { | ||
1293 | /* | ||
1294 | first match pair from BinTree: (match_len, match_dist), | ||
1295 | (match_len >= numHashBytes). | ||
1296 | MixMatchesFunc() inserts only hash matches that are nearer than (match_dist) | ||
1297 | */ | ||
1298 | d = p->MixMatchesFunc(p, p->lzPos - bt[1], d); | ||
1299 | // if (d) // check for failure | ||
1300 | do | ||
1301 | { | ||
1302 | const UInt32 v0 = bt[0]; | ||
1303 | const UInt32 v1 = bt[1]; | ||
1304 | bt += 2; | ||
1305 | d[0] = v0; | ||
1306 | d[1] = v1; | ||
1307 | d += 2; | ||
1308 | } | ||
1309 | while (len -= 2); | ||
1310 | } | ||
1311 | INCREASE_LZ_POS | ||
1312 | return d; | ||
1313 | } | ||
1314 | |||
1315 | #define SKIP_HEADER2_MT do { GET_NEXT_BLOCK_IF_REQUIRED | ||
1316 | #define SKIP_HEADER_MT(n) SKIP_HEADER2_MT if (p->btNumAvailBytes-- >= (n)) { const Byte *cur = p->pointerToCurPos; UInt32 *hash = p->hash; | ||
1317 | #define SKIP_FOOTER_MT } INCREASE_LZ_POS p->btBufPos += (size_t)*p->btBufPos + 1; } while (--num != 0); | ||
1318 | |||
1319 | static void MatchFinderMt0_Skip(CMatchFinderMt *p, UInt32 num) | ||
1320 | { | ||
1321 | SKIP_HEADER2_MT { p->btNumAvailBytes--; | ||
1322 | SKIP_FOOTER_MT | ||
1323 | } | ||
1324 | |||
1325 | static void MatchFinderMt2_Skip(CMatchFinderMt *p, UInt32 num) | ||
1326 | { | ||
1327 | SKIP_HEADER_MT(2) | ||
1328 | UInt32 h2; | ||
1329 | MT_HASH2_CALC | ||
1330 | hash[h2] = p->lzPos; | ||
1331 | SKIP_FOOTER_MT | ||
1332 | } | ||
1333 | |||
1334 | static void MatchFinderMt3_Skip(CMatchFinderMt *p, UInt32 num) | ||
1335 | { | ||
1336 | SKIP_HEADER_MT(3) | ||
1337 | UInt32 h2, h3; | ||
1338 | MT_HASH3_CALC | ||
1339 | (hash + kFix3HashSize)[h3] = | ||
1340 | hash[ h2] = | ||
1341 | p->lzPos; | ||
1342 | SKIP_FOOTER_MT | ||
1343 | } | ||
1344 | |||
1345 | /* | ||
1346 | // MatchFinderMt4_Skip() is similar to MatchFinderMt3_Skip(). | ||
1347 | // The difference is that MatchFinderMt3_Skip() updates hash for last 3 bytes of stream. | ||
1348 | |||
1349 | static void MatchFinderMt4_Skip(CMatchFinderMt *p, UInt32 num) | ||
1350 | { | ||
1351 | SKIP_HEADER_MT(4) | ||
1352 | UInt32 h2, h3; // h4 | ||
1353 | MT_HASH3_CALC | ||
1354 | // MT_HASH4_CALC | ||
1355 | // (hash + kFix4HashSize)[h4] = | ||
1356 | (hash + kFix3HashSize)[h3] = | ||
1357 | hash[ h2] = | ||
1358 | p->lzPos; | ||
1359 | SKIP_FOOTER_MT | ||
1360 | } | ||
1361 | */ | ||
1362 | |||
1363 | void MatchFinderMt_CreateVTable(CMatchFinderMt *p, IMatchFinder2 *vTable) | ||
1364 | { | ||
1365 | vTable->Init = (Mf_Init_Func)MatchFinderMt_Init; | ||
1366 | vTable->GetNumAvailableBytes = (Mf_GetNumAvailableBytes_Func)MatchFinderMt_GetNumAvailableBytes; | ||
1367 | vTable->GetPointerToCurrentPos = (Mf_GetPointerToCurrentPos_Func)MatchFinderMt_GetPointerToCurrentPos; | ||
1368 | vTable->GetMatches = (Mf_GetMatches_Func)MatchFinderMt_GetMatches; | ||
1369 | |||
1370 | switch (MF(p)->numHashBytes) | ||
1371 | { | ||
1372 | case 2: | ||
1373 | p->GetHeadsFunc = GetHeads2; | ||
1374 | p->MixMatchesFunc = (Mf_Mix_Matches)NULL; | ||
1375 | vTable->Skip = (Mf_Skip_Func)MatchFinderMt0_Skip; | ||
1376 | vTable->GetMatches = (Mf_GetMatches_Func)MatchFinderMt2_GetMatches; | ||
1377 | break; | ||
1378 | case 3: | ||
1379 | p->GetHeadsFunc = MF(p)->bigHash ? GetHeads3b : GetHeads3; | ||
1380 | p->MixMatchesFunc = (Mf_Mix_Matches)MixMatches2; | ||
1381 | vTable->Skip = (Mf_Skip_Func)MatchFinderMt2_Skip; | ||
1382 | break; | ||
1383 | case 4: | ||
1384 | p->GetHeadsFunc = MF(p)->bigHash ? GetHeads4b : GetHeads4; | ||
1385 | |||
1386 | // it's fast inline version of GetMatches() | ||
1387 | // vTable->GetMatches = (Mf_GetMatches_Func)MatchFinderMt_GetMatches_Bt4; | ||
1388 | |||
1389 | p->MixMatchesFunc = (Mf_Mix_Matches)MixMatches3; | ||
1390 | vTable->Skip = (Mf_Skip_Func)MatchFinderMt3_Skip; | ||
1391 | break; | ||
1392 | default: | ||
1393 | p->GetHeadsFunc = MF(p)->bigHash ? GetHeads5b : GetHeads5; | ||
1394 | p->MixMatchesFunc = (Mf_Mix_Matches)MixMatches4; | ||
1395 | vTable->Skip = | ||
1396 | (Mf_Skip_Func)MatchFinderMt3_Skip; | ||
1397 | // (Mf_Skip_Func)MatchFinderMt4_Skip; | ||
1398 | break; | ||
1399 | } | ||
1400 | } | ||