aboutsummaryrefslogtreecommitdiff
path: root/C/LzFindOpt.c
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--C/LzFindOpt.c578
1 files changed, 578 insertions, 0 deletions
diff --git a/C/LzFindOpt.c b/C/LzFindOpt.c
new file mode 100644
index 0000000..8ff006e
--- /dev/null
+++ b/C/LzFindOpt.c
@@ -0,0 +1,578 @@
1/* LzFindOpt.c -- multithreaded Match finder for LZ algorithms
22021-07-13 : Igor Pavlov : Public domain */
3
4#include "Precomp.h"
5
6#include "CpuArch.h"
7#include "LzFind.h"
8
9// #include "LzFindMt.h"
10
11// #define LOG_ITERS
12
13// #define LOG_THREAD
14
15#ifdef LOG_THREAD
16#include <stdio.h>
17#define PRF(x) x
18#else
19// #define PRF(x)
20#endif
21
22#ifdef LOG_ITERS
23#include <stdio.h>
24UInt64 g_NumIters_Tree;
25UInt64 g_NumIters_Loop;
26UInt64 g_NumIters_Bytes;
27#define LOG_ITER(x) x
28#else
29#define LOG_ITER(x)
30#endif
31
32// ---------- BT THREAD ----------
33
34#define USE_SON_PREFETCH
35#define USE_LONG_MATCH_OPT
36
37#define kEmptyHashValue 0
38
39// #define CYC_TO_POS_OFFSET 0
40
41// #define CYC_TO_POS_OFFSET 1 // for debug
42
43/*
44MY_NO_INLINE
45UInt32 * MY_FAST_CALL GetMatchesSpecN_1(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son,
46 UInt32 _cutValue, UInt32 *d, size_t _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size, UInt32 *posRes)
47{
48 do
49 {
50 UInt32 delta;
51 if (hash == size)
52 break;
53 delta = *hash++;
54
55 if (delta == 0 || delta > (UInt32)pos)
56 return NULL;
57
58 lenLimit++;
59
60 if (delta == (UInt32)pos)
61 {
62 CLzRef *ptr1 = son + ((size_t)pos << 1) - CYC_TO_POS_OFFSET * 2;
63 *d++ = 0;
64 ptr1[0] = kEmptyHashValue;
65 ptr1[1] = kEmptyHashValue;
66 }
67else
68{
69 UInt32 *_distances = ++d;
70
71 CLzRef *ptr0 = son + ((size_t)(pos) << 1) - CYC_TO_POS_OFFSET * 2 + 1;
72 CLzRef *ptr1 = son + ((size_t)(pos) << 1) - CYC_TO_POS_OFFSET * 2;
73
74 const Byte *len0 = cur, *len1 = cur;
75 UInt32 cutValue = _cutValue;
76 const Byte *maxLen = cur + _maxLen;
77
78 for (LOG_ITER(g_NumIters_Tree++);;)
79 {
80 LOG_ITER(g_NumIters_Loop++);
81 {
82 const ptrdiff_t diff = (ptrdiff_t)0 - (ptrdiff_t)delta;
83 CLzRef *pair = son + ((size_t)(((ptrdiff_t)pos - CYC_TO_POS_OFFSET) + diff) << 1);
84 const Byte *len = (len0 < len1 ? len0 : len1);
85
86 #ifdef USE_SON_PREFETCH
87 const UInt32 pair0 = *pair;
88 #endif
89
90 if (len[diff] == len[0])
91 {
92 if (++len != lenLimit && len[diff] == len[0])
93 while (++len != lenLimit)
94 {
95 LOG_ITER(g_NumIters_Bytes++);
96 if (len[diff] != len[0])
97 break;
98 }
99 if (maxLen < len)
100 {
101 maxLen = len;
102 *d++ = (UInt32)(len - cur);
103 *d++ = delta - 1;
104
105 if (len == lenLimit)
106 {
107 const UInt32 pair1 = pair[1];
108 *ptr1 =
109 #ifdef USE_SON_PREFETCH
110 pair0;
111 #else
112 pair[0];
113 #endif
114 *ptr0 = pair1;
115
116 _distances[-1] = (UInt32)(d - _distances);
117
118 #ifdef USE_LONG_MATCH_OPT
119
120 if (hash == size || *hash != delta || lenLimit[diff] != lenLimit[0] || d >= limit)
121 break;
122
123 {
124 for (;;)
125 {
126 hash++;
127 pos++;
128 cur++;
129 lenLimit++;
130 {
131 CLzRef *ptr = son + ((size_t)(pos) << 1) - CYC_TO_POS_OFFSET * 2;
132 #if 0
133 *(UInt64 *)(void *)ptr = ((const UInt64 *)(const void *)ptr)[diff];
134 #else
135 const UInt32 p0 = ptr[0 + (diff * 2)];
136 const UInt32 p1 = ptr[1 + (diff * 2)];
137 ptr[0] = p0;
138 ptr[1] = p1;
139 // ptr[0] = ptr[0 + (diff * 2)];
140 // ptr[1] = ptr[1 + (diff * 2)];
141 #endif
142 }
143 // PrintSon(son + 2, pos - 1);
144 // printf("\npos = %x delta = %x\n", pos, delta);
145 len++;
146 *d++ = 2;
147 *d++ = (UInt32)(len - cur);
148 *d++ = delta - 1;
149 if (hash == size || *hash != delta || lenLimit[diff] != lenLimit[0] || d >= limit)
150 break;
151 }
152 }
153 #endif
154
155 break;
156 }
157 }
158 }
159
160 {
161 const UInt32 curMatch = (UInt32)pos - delta; // (UInt32)(pos + diff);
162 if (len[diff] < len[0])
163 {
164 delta = pair[1];
165 if (delta >= curMatch)
166 return NULL;
167 *ptr1 = curMatch;
168 ptr1 = pair + 1;
169 len1 = len;
170 }
171 else
172 {
173 delta = *pair;
174 if (delta >= curMatch)
175 return NULL;
176 *ptr0 = curMatch;
177 ptr0 = pair;
178 len0 = len;
179 }
180
181 delta = (UInt32)pos - delta;
182
183 if (--cutValue == 0 || delta >= pos)
184 {
185 *ptr0 = *ptr1 = kEmptyHashValue;
186 _distances[-1] = (UInt32)(d - _distances);
187 break;
188 }
189 }
190 }
191 } // for (tree iterations)
192}
193 pos++;
194 cur++;
195 }
196 while (d < limit);
197 *posRes = (UInt32)pos;
198 return d;
199}
200*/
201
202/* define cbs if you use 2 functions.
203 GetMatchesSpecN_1() : (pos < _cyclicBufferSize)
204 GetMatchesSpecN_2() : (pos >= _cyclicBufferSize)
205
206 do not define cbs if you use 1 function:
207 GetMatchesSpecN_2()
208*/
209
210// #define cbs _cyclicBufferSize
211
212/*
213 we use size_t for (pos) and (_cyclicBufferPos_ instead of UInt32
214 to eliminate "movsx" BUG in old MSVC x64 compiler.
215*/
216
217UInt32 * MY_FAST_CALL GetMatchesSpecN_2(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son,
218 UInt32 _cutValue, UInt32 *d, size_t _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size,
219 size_t _cyclicBufferPos, UInt32 _cyclicBufferSize,
220 UInt32 *posRes);
221
222MY_NO_INLINE
223UInt32 * MY_FAST_CALL GetMatchesSpecN_2(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son,
224 UInt32 _cutValue, UInt32 *d, size_t _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size,
225 size_t _cyclicBufferPos, UInt32 _cyclicBufferSize,
226 UInt32 *posRes)
227{
228 do // while (hash != size)
229 {
230 UInt32 delta;
231
232 #ifndef cbs
233 UInt32 cbs;
234 #endif
235
236 if (hash == size)
237 break;
238
239 delta = *hash++;
240
241 if (delta == 0)
242 return NULL;
243
244 lenLimit++;
245
246 #ifndef cbs
247 cbs = _cyclicBufferSize;
248 if ((UInt32)pos < cbs)
249 {
250 if (delta > (UInt32)pos)
251 return NULL;
252 cbs = (UInt32)pos;
253 }
254 #endif
255
256 if (delta >= cbs)
257 {
258 CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1);
259 *d++ = 0;
260 ptr1[0] = kEmptyHashValue;
261 ptr1[1] = kEmptyHashValue;
262 }
263else
264{
265 UInt32 *_distances = ++d;
266
267 CLzRef *ptr0 = son + ((size_t)_cyclicBufferPos << 1) + 1;
268 CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1);
269
270 UInt32 cutValue = _cutValue;
271 const Byte *len0 = cur, *len1 = cur;
272 const Byte *maxLen = cur + _maxLen;
273
274 // if (cutValue == 0) { *ptr0 = *ptr1 = kEmptyHashValue; } else
275 for (LOG_ITER(g_NumIters_Tree++);;)
276 {
277 LOG_ITER(g_NumIters_Loop++);
278 {
279 // SPEC code
280 CLzRef *pair = son + ((size_t)((ptrdiff_t)_cyclicBufferPos - (ptrdiff_t)delta
281 + (ptrdiff_t)(UInt32)(_cyclicBufferPos < delta ? cbs : 0)
282 ) << 1);
283
284 const ptrdiff_t diff = (ptrdiff_t)0 - (ptrdiff_t)delta;
285 const Byte *len = (len0 < len1 ? len0 : len1);
286
287 #ifdef USE_SON_PREFETCH
288 const UInt32 pair0 = *pair;
289 #endif
290
291 if (len[diff] == len[0])
292 {
293 if (++len != lenLimit && len[diff] == len[0])
294 while (++len != lenLimit)
295 {
296 LOG_ITER(g_NumIters_Bytes++);
297 if (len[diff] != len[0])
298 break;
299 }
300 if (maxLen < len)
301 {
302 maxLen = len;
303 *d++ = (UInt32)(len - cur);
304 *d++ = delta - 1;
305
306 if (len == lenLimit)
307 {
308 const UInt32 pair1 = pair[1];
309 *ptr1 =
310 #ifdef USE_SON_PREFETCH
311 pair0;
312 #else
313 pair[0];
314 #endif
315 *ptr0 = pair1;
316
317 _distances[-1] = (UInt32)(d - _distances);
318
319 #ifdef USE_LONG_MATCH_OPT
320
321 if (hash == size || *hash != delta || lenLimit[diff] != lenLimit[0] || d >= limit)
322 break;
323
324 {
325 for (;;)
326 {
327 *d++ = 2;
328 *d++ = (UInt32)(lenLimit - cur);
329 *d++ = delta - 1;
330 cur++;
331 lenLimit++;
332 // SPEC
333 _cyclicBufferPos++;
334 {
335 // SPEC code
336 CLzRef *dest = son + ((size_t)(_cyclicBufferPos) << 1);
337 const CLzRef *src = dest + ((diff
338 + (ptrdiff_t)(UInt32)((_cyclicBufferPos < delta) ? cbs : 0)) << 1);
339 // CLzRef *ptr = son + ((size_t)(pos) << 1) - CYC_TO_POS_OFFSET * 2;
340 #if 0
341 *(UInt64 *)(void *)dest = *((const UInt64 *)(const void *)src);
342 #else
343 const UInt32 p0 = src[0];
344 const UInt32 p1 = src[1];
345 dest[0] = p0;
346 dest[1] = p1;
347 #endif
348 }
349 pos++;
350 hash++;
351 if (hash == size || *hash != delta || lenLimit[diff] != lenLimit[0] || d >= limit)
352 break;
353 } // for() end for long matches
354 }
355 #endif
356
357 break; // break from TREE iterations
358 }
359 }
360 }
361 {
362 const UInt32 curMatch = (UInt32)pos - delta; // (UInt32)(pos + diff);
363 if (len[diff] < len[0])
364 {
365 delta = pair[1];
366 *ptr1 = curMatch;
367 ptr1 = pair + 1;
368 len1 = len;
369 if (delta >= curMatch)
370 return NULL;
371 }
372 else
373 {
374 delta = *pair;
375 *ptr0 = curMatch;
376 ptr0 = pair;
377 len0 = len;
378 if (delta >= curMatch)
379 return NULL;
380 }
381 delta = (UInt32)pos - delta;
382
383 if (--cutValue == 0 || delta >= cbs)
384 {
385 *ptr0 = *ptr1 = kEmptyHashValue;
386 _distances[-1] = (UInt32)(d - _distances);
387 break;
388 }
389 }
390 }
391 } // for (tree iterations)
392}
393 pos++;
394 _cyclicBufferPos++;
395 cur++;
396 }
397 while (d < limit);
398 *posRes = (UInt32)pos;
399 return d;
400}
401
402
403
404/*
405typedef UInt32 uint32plus; // size_t
406
407UInt32 * MY_FAST_CALL GetMatchesSpecN_3(uint32plus lenLimit, size_t pos, const Byte *cur, CLzRef *son,
408 UInt32 _cutValue, UInt32 *d, uint32plus _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size,
409 size_t _cyclicBufferPos, UInt32 _cyclicBufferSize,
410 UInt32 *posRes)
411{
412 do // while (hash != size)
413 {
414 UInt32 delta;
415
416 #ifndef cbs
417 UInt32 cbs;
418 #endif
419
420 if (hash == size)
421 break;
422
423 delta = *hash++;
424
425 if (delta == 0)
426 return NULL;
427
428 #ifndef cbs
429 cbs = _cyclicBufferSize;
430 if ((UInt32)pos < cbs)
431 {
432 if (delta > (UInt32)pos)
433 return NULL;
434 cbs = (UInt32)pos;
435 }
436 #endif
437
438 if (delta >= cbs)
439 {
440 CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1);
441 *d++ = 0;
442 ptr1[0] = kEmptyHashValue;
443 ptr1[1] = kEmptyHashValue;
444 }
445else
446{
447 CLzRef *ptr0 = son + ((size_t)_cyclicBufferPos << 1) + 1;
448 CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1);
449 UInt32 *_distances = ++d;
450 uint32plus len0 = 0, len1 = 0;
451 UInt32 cutValue = _cutValue;
452 uint32plus maxLen = _maxLen;
453 // lenLimit++; // const Byte *lenLimit = cur + _lenLimit;
454
455 for (LOG_ITER(g_NumIters_Tree++);;)
456 {
457 LOG_ITER(g_NumIters_Loop++);
458 {
459 // const ptrdiff_t diff = (ptrdiff_t)0 - (ptrdiff_t)delta;
460 CLzRef *pair = son + ((size_t)((ptrdiff_t)_cyclicBufferPos - delta
461 + (ptrdiff_t)(UInt32)(_cyclicBufferPos < delta ? cbs : 0)
462 ) << 1);
463 const Byte *pb = cur - delta;
464 uint32plus len = (len0 < len1 ? len0 : len1);
465
466 #ifdef USE_SON_PREFETCH
467 const UInt32 pair0 = *pair;
468 #endif
469
470 if (pb[len] == cur[len])
471 {
472 if (++len != lenLimit && pb[len] == cur[len])
473 while (++len != lenLimit)
474 if (pb[len] != cur[len])
475 break;
476 if (maxLen < len)
477 {
478 maxLen = len;
479 *d++ = (UInt32)len;
480 *d++ = delta - 1;
481 if (len == lenLimit)
482 {
483 {
484 const UInt32 pair1 = pair[1];
485 *ptr0 = pair1;
486 *ptr1 =
487 #ifdef USE_SON_PREFETCH
488 pair0;
489 #else
490 pair[0];
491 #endif
492 }
493
494 _distances[-1] = (UInt32)(d - _distances);
495
496 #ifdef USE_LONG_MATCH_OPT
497
498 if (hash == size || *hash != delta || pb[lenLimit] != cur[lenLimit] || d >= limit)
499 break;
500
501 {
502 const ptrdiff_t diff = (ptrdiff_t)0 - (ptrdiff_t)delta;
503 for (;;)
504 {
505 *d++ = 2;
506 *d++ = (UInt32)lenLimit;
507 *d++ = delta - 1;
508 _cyclicBufferPos++;
509 {
510 CLzRef *dest = son + ((size_t)_cyclicBufferPos << 1);
511 const CLzRef *src = dest + ((diff +
512 (ptrdiff_t)(UInt32)(_cyclicBufferPos < delta ? cbs : 0)) << 1);
513 #if 0
514 *(UInt64 *)(void *)dest = *((const UInt64 *)(const void *)src);
515 #else
516 const UInt32 p0 = src[0];
517 const UInt32 p1 = src[1];
518 dest[0] = p0;
519 dest[1] = p1;
520 #endif
521 }
522 hash++;
523 pos++;
524 cur++;
525 pb++;
526 if (hash == size || *hash != delta || pb[lenLimit] != cur[lenLimit] || d >= limit)
527 break;
528 }
529 }
530 #endif
531
532 break;
533 }
534 }
535 }
536 {
537 const UInt32 curMatch = (UInt32)pos - delta;
538 if (pb[len] < cur[len])
539 {
540 delta = pair[1];
541 *ptr1 = curMatch;
542 ptr1 = pair + 1;
543 len1 = len;
544 }
545 else
546 {
547 delta = *pair;
548 *ptr0 = curMatch;
549 ptr0 = pair;
550 len0 = len;
551 }
552
553 {
554 if (delta >= curMatch)
555 return NULL;
556 delta = (UInt32)pos - delta;
557 if (delta >= cbs
558 // delta >= _cyclicBufferSize || delta >= pos
559 || --cutValue == 0)
560 {
561 *ptr0 = *ptr1 = kEmptyHashValue;
562 _distances[-1] = (UInt32)(d - _distances);
563 break;
564 }
565 }
566 }
567 }
568 } // for (tree iterations)
569}
570 pos++;
571 _cyclicBufferPos++;
572 cur++;
573 }
574 while (d < limit);
575 *posRes = (UInt32)pos;
576 return d;
577}
578*/