diff options
Diffstat (limited to 'C/LzFindOpt.c')
-rw-r--r-- | C/LzFindOpt.c | 578 |
1 files changed, 578 insertions, 0 deletions
diff --git a/C/LzFindOpt.c b/C/LzFindOpt.c new file mode 100644 index 0000000..8ff006e --- /dev/null +++ b/C/LzFindOpt.c | |||
@@ -0,0 +1,578 @@ | |||
1 | /* LzFindOpt.c -- multithreaded Match finder for LZ algorithms | ||
2 | 2021-07-13 : Igor Pavlov : Public domain */ | ||
3 | |||
4 | #include "Precomp.h" | ||
5 | |||
6 | #include "CpuArch.h" | ||
7 | #include "LzFind.h" | ||
8 | |||
9 | // #include "LzFindMt.h" | ||
10 | |||
11 | // #define LOG_ITERS | ||
12 | |||
13 | // #define LOG_THREAD | ||
14 | |||
15 | #ifdef LOG_THREAD | ||
16 | #include <stdio.h> | ||
17 | #define PRF(x) x | ||
18 | #else | ||
19 | // #define PRF(x) | ||
20 | #endif | ||
21 | |||
22 | #ifdef LOG_ITERS | ||
23 | #include <stdio.h> | ||
24 | UInt64 g_NumIters_Tree; | ||
25 | UInt64 g_NumIters_Loop; | ||
26 | UInt64 g_NumIters_Bytes; | ||
27 | #define LOG_ITER(x) x | ||
28 | #else | ||
29 | #define LOG_ITER(x) | ||
30 | #endif | ||
31 | |||
32 | // ---------- BT THREAD ---------- | ||
33 | |||
34 | #define USE_SON_PREFETCH | ||
35 | #define USE_LONG_MATCH_OPT | ||
36 | |||
37 | #define kEmptyHashValue 0 | ||
38 | |||
39 | // #define CYC_TO_POS_OFFSET 0 | ||
40 | |||
41 | // #define CYC_TO_POS_OFFSET 1 // for debug | ||
42 | |||
43 | /* | ||
44 | MY_NO_INLINE | ||
45 | UInt32 * MY_FAST_CALL GetMatchesSpecN_1(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son, | ||
46 | UInt32 _cutValue, UInt32 *d, size_t _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size, UInt32 *posRes) | ||
47 | { | ||
48 | do | ||
49 | { | ||
50 | UInt32 delta; | ||
51 | if (hash == size) | ||
52 | break; | ||
53 | delta = *hash++; | ||
54 | |||
55 | if (delta == 0 || delta > (UInt32)pos) | ||
56 | return NULL; | ||
57 | |||
58 | lenLimit++; | ||
59 | |||
60 | if (delta == (UInt32)pos) | ||
61 | { | ||
62 | CLzRef *ptr1 = son + ((size_t)pos << 1) - CYC_TO_POS_OFFSET * 2; | ||
63 | *d++ = 0; | ||
64 | ptr1[0] = kEmptyHashValue; | ||
65 | ptr1[1] = kEmptyHashValue; | ||
66 | } | ||
67 | else | ||
68 | { | ||
69 | UInt32 *_distances = ++d; | ||
70 | |||
71 | CLzRef *ptr0 = son + ((size_t)(pos) << 1) - CYC_TO_POS_OFFSET * 2 + 1; | ||
72 | CLzRef *ptr1 = son + ((size_t)(pos) << 1) - CYC_TO_POS_OFFSET * 2; | ||
73 | |||
74 | const Byte *len0 = cur, *len1 = cur; | ||
75 | UInt32 cutValue = _cutValue; | ||
76 | const Byte *maxLen = cur + _maxLen; | ||
77 | |||
78 | for (LOG_ITER(g_NumIters_Tree++);;) | ||
79 | { | ||
80 | LOG_ITER(g_NumIters_Loop++); | ||
81 | { | ||
82 | const ptrdiff_t diff = (ptrdiff_t)0 - (ptrdiff_t)delta; | ||
83 | CLzRef *pair = son + ((size_t)(((ptrdiff_t)pos - CYC_TO_POS_OFFSET) + diff) << 1); | ||
84 | const Byte *len = (len0 < len1 ? len0 : len1); | ||
85 | |||
86 | #ifdef USE_SON_PREFETCH | ||
87 | const UInt32 pair0 = *pair; | ||
88 | #endif | ||
89 | |||
90 | if (len[diff] == len[0]) | ||
91 | { | ||
92 | if (++len != lenLimit && len[diff] == len[0]) | ||
93 | while (++len != lenLimit) | ||
94 | { | ||
95 | LOG_ITER(g_NumIters_Bytes++); | ||
96 | if (len[diff] != len[0]) | ||
97 | break; | ||
98 | } | ||
99 | if (maxLen < len) | ||
100 | { | ||
101 | maxLen = len; | ||
102 | *d++ = (UInt32)(len - cur); | ||
103 | *d++ = delta - 1; | ||
104 | |||
105 | if (len == lenLimit) | ||
106 | { | ||
107 | const UInt32 pair1 = pair[1]; | ||
108 | *ptr1 = | ||
109 | #ifdef USE_SON_PREFETCH | ||
110 | pair0; | ||
111 | #else | ||
112 | pair[0]; | ||
113 | #endif | ||
114 | *ptr0 = pair1; | ||
115 | |||
116 | _distances[-1] = (UInt32)(d - _distances); | ||
117 | |||
118 | #ifdef USE_LONG_MATCH_OPT | ||
119 | |||
120 | if (hash == size || *hash != delta || lenLimit[diff] != lenLimit[0] || d >= limit) | ||
121 | break; | ||
122 | |||
123 | { | ||
124 | for (;;) | ||
125 | { | ||
126 | hash++; | ||
127 | pos++; | ||
128 | cur++; | ||
129 | lenLimit++; | ||
130 | { | ||
131 | CLzRef *ptr = son + ((size_t)(pos) << 1) - CYC_TO_POS_OFFSET * 2; | ||
132 | #if 0 | ||
133 | *(UInt64 *)(void *)ptr = ((const UInt64 *)(const void *)ptr)[diff]; | ||
134 | #else | ||
135 | const UInt32 p0 = ptr[0 + (diff * 2)]; | ||
136 | const UInt32 p1 = ptr[1 + (diff * 2)]; | ||
137 | ptr[0] = p0; | ||
138 | ptr[1] = p1; | ||
139 | // ptr[0] = ptr[0 + (diff * 2)]; | ||
140 | // ptr[1] = ptr[1 + (diff * 2)]; | ||
141 | #endif | ||
142 | } | ||
143 | // PrintSon(son + 2, pos - 1); | ||
144 | // printf("\npos = %x delta = %x\n", pos, delta); | ||
145 | len++; | ||
146 | *d++ = 2; | ||
147 | *d++ = (UInt32)(len - cur); | ||
148 | *d++ = delta - 1; | ||
149 | if (hash == size || *hash != delta || lenLimit[diff] != lenLimit[0] || d >= limit) | ||
150 | break; | ||
151 | } | ||
152 | } | ||
153 | #endif | ||
154 | |||
155 | break; | ||
156 | } | ||
157 | } | ||
158 | } | ||
159 | |||
160 | { | ||
161 | const UInt32 curMatch = (UInt32)pos - delta; // (UInt32)(pos + diff); | ||
162 | if (len[diff] < len[0]) | ||
163 | { | ||
164 | delta = pair[1]; | ||
165 | if (delta >= curMatch) | ||
166 | return NULL; | ||
167 | *ptr1 = curMatch; | ||
168 | ptr1 = pair + 1; | ||
169 | len1 = len; | ||
170 | } | ||
171 | else | ||
172 | { | ||
173 | delta = *pair; | ||
174 | if (delta >= curMatch) | ||
175 | return NULL; | ||
176 | *ptr0 = curMatch; | ||
177 | ptr0 = pair; | ||
178 | len0 = len; | ||
179 | } | ||
180 | |||
181 | delta = (UInt32)pos - delta; | ||
182 | |||
183 | if (--cutValue == 0 || delta >= pos) | ||
184 | { | ||
185 | *ptr0 = *ptr1 = kEmptyHashValue; | ||
186 | _distances[-1] = (UInt32)(d - _distances); | ||
187 | break; | ||
188 | } | ||
189 | } | ||
190 | } | ||
191 | } // for (tree iterations) | ||
192 | } | ||
193 | pos++; | ||
194 | cur++; | ||
195 | } | ||
196 | while (d < limit); | ||
197 | *posRes = (UInt32)pos; | ||
198 | return d; | ||
199 | } | ||
200 | */ | ||
201 | |||
202 | /* define cbs if you use 2 functions. | ||
203 | GetMatchesSpecN_1() : (pos < _cyclicBufferSize) | ||
204 | GetMatchesSpecN_2() : (pos >= _cyclicBufferSize) | ||
205 | |||
206 | do not define cbs if you use 1 function: | ||
207 | GetMatchesSpecN_2() | ||
208 | */ | ||
209 | |||
210 | // #define cbs _cyclicBufferSize | ||
211 | |||
212 | /* | ||
213 | we use size_t for (pos) and (_cyclicBufferPos_ instead of UInt32 | ||
214 | to eliminate "movsx" BUG in old MSVC x64 compiler. | ||
215 | */ | ||
216 | |||
217 | UInt32 * MY_FAST_CALL GetMatchesSpecN_2(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son, | ||
218 | UInt32 _cutValue, UInt32 *d, size_t _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size, | ||
219 | size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, | ||
220 | UInt32 *posRes); | ||
221 | |||
222 | MY_NO_INLINE | ||
223 | UInt32 * MY_FAST_CALL GetMatchesSpecN_2(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son, | ||
224 | UInt32 _cutValue, UInt32 *d, size_t _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size, | ||
225 | size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, | ||
226 | UInt32 *posRes) | ||
227 | { | ||
228 | do // while (hash != size) | ||
229 | { | ||
230 | UInt32 delta; | ||
231 | |||
232 | #ifndef cbs | ||
233 | UInt32 cbs; | ||
234 | #endif | ||
235 | |||
236 | if (hash == size) | ||
237 | break; | ||
238 | |||
239 | delta = *hash++; | ||
240 | |||
241 | if (delta == 0) | ||
242 | return NULL; | ||
243 | |||
244 | lenLimit++; | ||
245 | |||
246 | #ifndef cbs | ||
247 | cbs = _cyclicBufferSize; | ||
248 | if ((UInt32)pos < cbs) | ||
249 | { | ||
250 | if (delta > (UInt32)pos) | ||
251 | return NULL; | ||
252 | cbs = (UInt32)pos; | ||
253 | } | ||
254 | #endif | ||
255 | |||
256 | if (delta >= cbs) | ||
257 | { | ||
258 | CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1); | ||
259 | *d++ = 0; | ||
260 | ptr1[0] = kEmptyHashValue; | ||
261 | ptr1[1] = kEmptyHashValue; | ||
262 | } | ||
263 | else | ||
264 | { | ||
265 | UInt32 *_distances = ++d; | ||
266 | |||
267 | CLzRef *ptr0 = son + ((size_t)_cyclicBufferPos << 1) + 1; | ||
268 | CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1); | ||
269 | |||
270 | UInt32 cutValue = _cutValue; | ||
271 | const Byte *len0 = cur, *len1 = cur; | ||
272 | const Byte *maxLen = cur + _maxLen; | ||
273 | |||
274 | // if (cutValue == 0) { *ptr0 = *ptr1 = kEmptyHashValue; } else | ||
275 | for (LOG_ITER(g_NumIters_Tree++);;) | ||
276 | { | ||
277 | LOG_ITER(g_NumIters_Loop++); | ||
278 | { | ||
279 | // SPEC code | ||
280 | CLzRef *pair = son + ((size_t)((ptrdiff_t)_cyclicBufferPos - (ptrdiff_t)delta | ||
281 | + (ptrdiff_t)(UInt32)(_cyclicBufferPos < delta ? cbs : 0) | ||
282 | ) << 1); | ||
283 | |||
284 | const ptrdiff_t diff = (ptrdiff_t)0 - (ptrdiff_t)delta; | ||
285 | const Byte *len = (len0 < len1 ? len0 : len1); | ||
286 | |||
287 | #ifdef USE_SON_PREFETCH | ||
288 | const UInt32 pair0 = *pair; | ||
289 | #endif | ||
290 | |||
291 | if (len[diff] == len[0]) | ||
292 | { | ||
293 | if (++len != lenLimit && len[diff] == len[0]) | ||
294 | while (++len != lenLimit) | ||
295 | { | ||
296 | LOG_ITER(g_NumIters_Bytes++); | ||
297 | if (len[diff] != len[0]) | ||
298 | break; | ||
299 | } | ||
300 | if (maxLen < len) | ||
301 | { | ||
302 | maxLen = len; | ||
303 | *d++ = (UInt32)(len - cur); | ||
304 | *d++ = delta - 1; | ||
305 | |||
306 | if (len == lenLimit) | ||
307 | { | ||
308 | const UInt32 pair1 = pair[1]; | ||
309 | *ptr1 = | ||
310 | #ifdef USE_SON_PREFETCH | ||
311 | pair0; | ||
312 | #else | ||
313 | pair[0]; | ||
314 | #endif | ||
315 | *ptr0 = pair1; | ||
316 | |||
317 | _distances[-1] = (UInt32)(d - _distances); | ||
318 | |||
319 | #ifdef USE_LONG_MATCH_OPT | ||
320 | |||
321 | if (hash == size || *hash != delta || lenLimit[diff] != lenLimit[0] || d >= limit) | ||
322 | break; | ||
323 | |||
324 | { | ||
325 | for (;;) | ||
326 | { | ||
327 | *d++ = 2; | ||
328 | *d++ = (UInt32)(lenLimit - cur); | ||
329 | *d++ = delta - 1; | ||
330 | cur++; | ||
331 | lenLimit++; | ||
332 | // SPEC | ||
333 | _cyclicBufferPos++; | ||
334 | { | ||
335 | // SPEC code | ||
336 | CLzRef *dest = son + ((size_t)(_cyclicBufferPos) << 1); | ||
337 | const CLzRef *src = dest + ((diff | ||
338 | + (ptrdiff_t)(UInt32)((_cyclicBufferPos < delta) ? cbs : 0)) << 1); | ||
339 | // CLzRef *ptr = son + ((size_t)(pos) << 1) - CYC_TO_POS_OFFSET * 2; | ||
340 | #if 0 | ||
341 | *(UInt64 *)(void *)dest = *((const UInt64 *)(const void *)src); | ||
342 | #else | ||
343 | const UInt32 p0 = src[0]; | ||
344 | const UInt32 p1 = src[1]; | ||
345 | dest[0] = p0; | ||
346 | dest[1] = p1; | ||
347 | #endif | ||
348 | } | ||
349 | pos++; | ||
350 | hash++; | ||
351 | if (hash == size || *hash != delta || lenLimit[diff] != lenLimit[0] || d >= limit) | ||
352 | break; | ||
353 | } // for() end for long matches | ||
354 | } | ||
355 | #endif | ||
356 | |||
357 | break; // break from TREE iterations | ||
358 | } | ||
359 | } | ||
360 | } | ||
361 | { | ||
362 | const UInt32 curMatch = (UInt32)pos - delta; // (UInt32)(pos + diff); | ||
363 | if (len[diff] < len[0]) | ||
364 | { | ||
365 | delta = pair[1]; | ||
366 | *ptr1 = curMatch; | ||
367 | ptr1 = pair + 1; | ||
368 | len1 = len; | ||
369 | if (delta >= curMatch) | ||
370 | return NULL; | ||
371 | } | ||
372 | else | ||
373 | { | ||
374 | delta = *pair; | ||
375 | *ptr0 = curMatch; | ||
376 | ptr0 = pair; | ||
377 | len0 = len; | ||
378 | if (delta >= curMatch) | ||
379 | return NULL; | ||
380 | } | ||
381 | delta = (UInt32)pos - delta; | ||
382 | |||
383 | if (--cutValue == 0 || delta >= cbs) | ||
384 | { | ||
385 | *ptr0 = *ptr1 = kEmptyHashValue; | ||
386 | _distances[-1] = (UInt32)(d - _distances); | ||
387 | break; | ||
388 | } | ||
389 | } | ||
390 | } | ||
391 | } // for (tree iterations) | ||
392 | } | ||
393 | pos++; | ||
394 | _cyclicBufferPos++; | ||
395 | cur++; | ||
396 | } | ||
397 | while (d < limit); | ||
398 | *posRes = (UInt32)pos; | ||
399 | return d; | ||
400 | } | ||
401 | |||
402 | |||
403 | |||
404 | /* | ||
405 | typedef UInt32 uint32plus; // size_t | ||
406 | |||
407 | UInt32 * MY_FAST_CALL GetMatchesSpecN_3(uint32plus lenLimit, size_t pos, const Byte *cur, CLzRef *son, | ||
408 | UInt32 _cutValue, UInt32 *d, uint32plus _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size, | ||
409 | size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, | ||
410 | UInt32 *posRes) | ||
411 | { | ||
412 | do // while (hash != size) | ||
413 | { | ||
414 | UInt32 delta; | ||
415 | |||
416 | #ifndef cbs | ||
417 | UInt32 cbs; | ||
418 | #endif | ||
419 | |||
420 | if (hash == size) | ||
421 | break; | ||
422 | |||
423 | delta = *hash++; | ||
424 | |||
425 | if (delta == 0) | ||
426 | return NULL; | ||
427 | |||
428 | #ifndef cbs | ||
429 | cbs = _cyclicBufferSize; | ||
430 | if ((UInt32)pos < cbs) | ||
431 | { | ||
432 | if (delta > (UInt32)pos) | ||
433 | return NULL; | ||
434 | cbs = (UInt32)pos; | ||
435 | } | ||
436 | #endif | ||
437 | |||
438 | if (delta >= cbs) | ||
439 | { | ||
440 | CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1); | ||
441 | *d++ = 0; | ||
442 | ptr1[0] = kEmptyHashValue; | ||
443 | ptr1[1] = kEmptyHashValue; | ||
444 | } | ||
445 | else | ||
446 | { | ||
447 | CLzRef *ptr0 = son + ((size_t)_cyclicBufferPos << 1) + 1; | ||
448 | CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1); | ||
449 | UInt32 *_distances = ++d; | ||
450 | uint32plus len0 = 0, len1 = 0; | ||
451 | UInt32 cutValue = _cutValue; | ||
452 | uint32plus maxLen = _maxLen; | ||
453 | // lenLimit++; // const Byte *lenLimit = cur + _lenLimit; | ||
454 | |||
455 | for (LOG_ITER(g_NumIters_Tree++);;) | ||
456 | { | ||
457 | LOG_ITER(g_NumIters_Loop++); | ||
458 | { | ||
459 | // const ptrdiff_t diff = (ptrdiff_t)0 - (ptrdiff_t)delta; | ||
460 | CLzRef *pair = son + ((size_t)((ptrdiff_t)_cyclicBufferPos - delta | ||
461 | + (ptrdiff_t)(UInt32)(_cyclicBufferPos < delta ? cbs : 0) | ||
462 | ) << 1); | ||
463 | const Byte *pb = cur - delta; | ||
464 | uint32plus len = (len0 < len1 ? len0 : len1); | ||
465 | |||
466 | #ifdef USE_SON_PREFETCH | ||
467 | const UInt32 pair0 = *pair; | ||
468 | #endif | ||
469 | |||
470 | if (pb[len] == cur[len]) | ||
471 | { | ||
472 | if (++len != lenLimit && pb[len] == cur[len]) | ||
473 | while (++len != lenLimit) | ||
474 | if (pb[len] != cur[len]) | ||
475 | break; | ||
476 | if (maxLen < len) | ||
477 | { | ||
478 | maxLen = len; | ||
479 | *d++ = (UInt32)len; | ||
480 | *d++ = delta - 1; | ||
481 | if (len == lenLimit) | ||
482 | { | ||
483 | { | ||
484 | const UInt32 pair1 = pair[1]; | ||
485 | *ptr0 = pair1; | ||
486 | *ptr1 = | ||
487 | #ifdef USE_SON_PREFETCH | ||
488 | pair0; | ||
489 | #else | ||
490 | pair[0]; | ||
491 | #endif | ||
492 | } | ||
493 | |||
494 | _distances[-1] = (UInt32)(d - _distances); | ||
495 | |||
496 | #ifdef USE_LONG_MATCH_OPT | ||
497 | |||
498 | if (hash == size || *hash != delta || pb[lenLimit] != cur[lenLimit] || d >= limit) | ||
499 | break; | ||
500 | |||
501 | { | ||
502 | const ptrdiff_t diff = (ptrdiff_t)0 - (ptrdiff_t)delta; | ||
503 | for (;;) | ||
504 | { | ||
505 | *d++ = 2; | ||
506 | *d++ = (UInt32)lenLimit; | ||
507 | *d++ = delta - 1; | ||
508 | _cyclicBufferPos++; | ||
509 | { | ||
510 | CLzRef *dest = son + ((size_t)_cyclicBufferPos << 1); | ||
511 | const CLzRef *src = dest + ((diff + | ||
512 | (ptrdiff_t)(UInt32)(_cyclicBufferPos < delta ? cbs : 0)) << 1); | ||
513 | #if 0 | ||
514 | *(UInt64 *)(void *)dest = *((const UInt64 *)(const void *)src); | ||
515 | #else | ||
516 | const UInt32 p0 = src[0]; | ||
517 | const UInt32 p1 = src[1]; | ||
518 | dest[0] = p0; | ||
519 | dest[1] = p1; | ||
520 | #endif | ||
521 | } | ||
522 | hash++; | ||
523 | pos++; | ||
524 | cur++; | ||
525 | pb++; | ||
526 | if (hash == size || *hash != delta || pb[lenLimit] != cur[lenLimit] || d >= limit) | ||
527 | break; | ||
528 | } | ||
529 | } | ||
530 | #endif | ||
531 | |||
532 | break; | ||
533 | } | ||
534 | } | ||
535 | } | ||
536 | { | ||
537 | const UInt32 curMatch = (UInt32)pos - delta; | ||
538 | if (pb[len] < cur[len]) | ||
539 | { | ||
540 | delta = pair[1]; | ||
541 | *ptr1 = curMatch; | ||
542 | ptr1 = pair + 1; | ||
543 | len1 = len; | ||
544 | } | ||
545 | else | ||
546 | { | ||
547 | delta = *pair; | ||
548 | *ptr0 = curMatch; | ||
549 | ptr0 = pair; | ||
550 | len0 = len; | ||
551 | } | ||
552 | |||
553 | { | ||
554 | if (delta >= curMatch) | ||
555 | return NULL; | ||
556 | delta = (UInt32)pos - delta; | ||
557 | if (delta >= cbs | ||
558 | // delta >= _cyclicBufferSize || delta >= pos | ||
559 | || --cutValue == 0) | ||
560 | { | ||
561 | *ptr0 = *ptr1 = kEmptyHashValue; | ||
562 | _distances[-1] = (UInt32)(d - _distances); | ||
563 | break; | ||
564 | } | ||
565 | } | ||
566 | } | ||
567 | } | ||
568 | } // for (tree iterations) | ||
569 | } | ||
570 | pos++; | ||
571 | _cyclicBufferPos++; | ||
572 | cur++; | ||
573 | } | ||
574 | while (d < limit); | ||
575 | *posRes = (UInt32)pos; | ||
576 | return d; | ||
577 | } | ||
578 | */ | ||