diff options
author | Igor Pavlov <87184205+ip7z@users.noreply.github.com> | 2021-12-27 00:00:00 +0000 |
---|---|---|
committer | Igor Pavlov <87184205+ip7z@users.noreply.github.com> | 2022-03-18 15:35:13 +0500 |
commit | f19f813537c7aea1c20749c914e756b54a9c3cf5 (patch) | |
tree | 816ba62ca7c0fa19f2eb46d9e9d6f7dd7c3a744d /C/LzFind.c | |
parent | 98e06a519b63b81986abe76d28887f6984a7732b (diff) | |
download | 7zip-f19f813537c7aea1c20749c914e756b54a9c3cf5.tar.gz 7zip-f19f813537c7aea1c20749c914e756b54a9c3cf5.tar.bz2 7zip-f19f813537c7aea1c20749c914e756b54a9c3cf5.zip |
'21.07'21.07
Diffstat (limited to 'C/LzFind.c')
-rw-r--r-- | C/LzFind.c | 1628 |
1 files changed, 1628 insertions, 0 deletions
diff --git a/C/LzFind.c b/C/LzFind.c new file mode 100644 index 0000000..1b73c28 --- /dev/null +++ b/C/LzFind.c | |||
@@ -0,0 +1,1628 @@ | |||
1 | /* LzFind.c -- Match finder for LZ algorithms | ||
2 | 2021-11-29 : Igor Pavlov : Public domain */ | ||
3 | |||
4 | #include "Precomp.h" | ||
5 | |||
6 | #include <string.h> | ||
7 | // #include <stdio.h> | ||
8 | |||
9 | #include "CpuArch.h" | ||
10 | #include "LzFind.h" | ||
11 | #include "LzHash.h" | ||
12 | |||
13 | #define kBlockMoveAlign (1 << 7) // alignment for memmove() | ||
14 | #define kBlockSizeAlign (1 << 16) // alignment for block allocation | ||
15 | #define kBlockSizeReserveMin (1 << 24) // it's 1/256 from 4 GB dictinary | ||
16 | |||
17 | #define kEmptyHashValue 0 | ||
18 | |||
19 | #define kMaxValForNormalize ((UInt32)0) | ||
20 | // #define kMaxValForNormalize ((UInt32)(1 << 20) + 0xFFF) // for debug | ||
21 | |||
22 | // #define kNormalizeAlign (1 << 7) // alignment for speculated accesses | ||
23 | |||
24 | #define GET_AVAIL_BYTES(p) \ | ||
25 | Inline_MatchFinder_GetNumAvailableBytes(p) | ||
26 | |||
27 | |||
28 | // #define kFix5HashSize (kHash2Size + kHash3Size + kHash4Size) | ||
29 | #define kFix5HashSize kFix4HashSize | ||
30 | |||
31 | /* | ||
32 | HASH2_CALC: | ||
33 | if (hv) match, then cur[0] and cur[1] also match | ||
34 | */ | ||
35 | #define HASH2_CALC hv = GetUi16(cur); | ||
36 | |||
37 | // (crc[0 ... 255] & 0xFF) provides one-to-one correspondence to [0 ... 255] | ||
38 | |||
39 | /* | ||
40 | HASH3_CALC: | ||
41 | if (cur[0]) and (h2) match, then cur[1] also match | ||
42 | if (cur[0]) and (hv) match, then cur[1] and cur[2] also match | ||
43 | */ | ||
44 | #define HASH3_CALC { \ | ||
45 | UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ | ||
46 | h2 = temp & (kHash2Size - 1); \ | ||
47 | hv = (temp ^ ((UInt32)cur[2] << 8)) & p->hashMask; } | ||
48 | |||
49 | #define HASH4_CALC { \ | ||
50 | UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ | ||
51 | h2 = temp & (kHash2Size - 1); \ | ||
52 | temp ^= ((UInt32)cur[2] << 8); \ | ||
53 | h3 = temp & (kHash3Size - 1); \ | ||
54 | hv = (temp ^ (p->crc[cur[3]] << kLzHash_CrcShift_1)) & p->hashMask; } | ||
55 | |||
56 | #define HASH5_CALC { \ | ||
57 | UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ | ||
58 | h2 = temp & (kHash2Size - 1); \ | ||
59 | temp ^= ((UInt32)cur[2] << 8); \ | ||
60 | h3 = temp & (kHash3Size - 1); \ | ||
61 | temp ^= (p->crc[cur[3]] << kLzHash_CrcShift_1); \ | ||
62 | /* h4 = temp & p->hash4Mask; */ /* (kHash4Size - 1); */ \ | ||
63 | hv = (temp ^ (p->crc[cur[4]] << kLzHash_CrcShift_2)) & p->hashMask; } | ||
64 | |||
65 | #define HASH_ZIP_CALC hv = ((cur[2] | ((UInt32)cur[0] << 8)) ^ p->crc[cur[1]]) & 0xFFFF; | ||
66 | |||
67 | |||
68 | static void LzInWindow_Free(CMatchFinder *p, ISzAllocPtr alloc) | ||
69 | { | ||
70 | if (!p->directInput) | ||
71 | { | ||
72 | ISzAlloc_Free(alloc, p->bufferBase); | ||
73 | p->bufferBase = NULL; | ||
74 | } | ||
75 | } | ||
76 | |||
77 | |||
78 | static int LzInWindow_Create2(CMatchFinder *p, UInt32 blockSize, ISzAllocPtr alloc) | ||
79 | { | ||
80 | if (blockSize == 0) | ||
81 | return 0; | ||
82 | if (!p->bufferBase || p->blockSize != blockSize) | ||
83 | { | ||
84 | // size_t blockSizeT; | ||
85 | LzInWindow_Free(p, alloc); | ||
86 | p->blockSize = blockSize; | ||
87 | // blockSizeT = blockSize; | ||
88 | |||
89 | // printf("\nblockSize = 0x%x\n", blockSize); | ||
90 | /* | ||
91 | #if defined _WIN64 | ||
92 | // we can allocate 4GiB, but still use UInt32 for (p->blockSize) | ||
93 | // we use UInt32 type for (p->blockSize), because | ||
94 | // we don't want to wrap over 4 GiB, | ||
95 | // when we use (p->streamPos - p->pos) that is UInt32. | ||
96 | if (blockSize >= (UInt32)0 - (UInt32)kBlockSizeAlign) | ||
97 | { | ||
98 | blockSizeT = ((size_t)1 << 32); | ||
99 | printf("\nchanged to blockSizeT = 4GiB\n"); | ||
100 | } | ||
101 | #endif | ||
102 | */ | ||
103 | |||
104 | p->bufferBase = (Byte *)ISzAlloc_Alloc(alloc, blockSize); | ||
105 | // printf("\nbufferBase = %p\n", p->bufferBase); | ||
106 | // return 0; // for debug | ||
107 | } | ||
108 | return (p->bufferBase != NULL); | ||
109 | } | ||
110 | |||
111 | static const Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p) { return p->buffer; } | ||
112 | |||
113 | static UInt32 MatchFinder_GetNumAvailableBytes(CMatchFinder *p) { return GET_AVAIL_BYTES(p); } | ||
114 | |||
115 | |||
116 | MY_NO_INLINE | ||
117 | static void MatchFinder_ReadBlock(CMatchFinder *p) | ||
118 | { | ||
119 | if (p->streamEndWasReached || p->result != SZ_OK) | ||
120 | return; | ||
121 | |||
122 | /* We use (p->streamPos - p->pos) value. | ||
123 | (p->streamPos < p->pos) is allowed. */ | ||
124 | |||
125 | if (p->directInput) | ||
126 | { | ||
127 | UInt32 curSize = 0xFFFFFFFF - GET_AVAIL_BYTES(p); | ||
128 | if (curSize > p->directInputRem) | ||
129 | curSize = (UInt32)p->directInputRem; | ||
130 | p->directInputRem -= curSize; | ||
131 | p->streamPos += curSize; | ||
132 | if (p->directInputRem == 0) | ||
133 | p->streamEndWasReached = 1; | ||
134 | return; | ||
135 | } | ||
136 | |||
137 | for (;;) | ||
138 | { | ||
139 | Byte *dest = p->buffer + GET_AVAIL_BYTES(p); | ||
140 | size_t size = (size_t)(p->bufferBase + p->blockSize - dest); | ||
141 | if (size == 0) | ||
142 | { | ||
143 | /* we call ReadBlock() after NeedMove() and MoveBlock(). | ||
144 | NeedMove() and MoveBlock() povide more than (keepSizeAfter) | ||
145 | to the end of (blockSize). | ||
146 | So we don't execute this branch in normal code flow. | ||
147 | We can go here, if we will call ReadBlock() before NeedMove(), MoveBlock(). | ||
148 | */ | ||
149 | // p->result = SZ_ERROR_FAIL; // we can show error here | ||
150 | return; | ||
151 | } | ||
152 | |||
153 | // #define kRead 3 | ||
154 | // if (size > kRead) size = kRead; // for debug | ||
155 | |||
156 | p->result = ISeqInStream_Read(p->stream, dest, &size); | ||
157 | if (p->result != SZ_OK) | ||
158 | return; | ||
159 | if (size == 0) | ||
160 | { | ||
161 | p->streamEndWasReached = 1; | ||
162 | return; | ||
163 | } | ||
164 | p->streamPos += (UInt32)size; | ||
165 | if (GET_AVAIL_BYTES(p) > p->keepSizeAfter) | ||
166 | return; | ||
167 | /* here and in another (p->keepSizeAfter) checks we keep on 1 byte more than was requested by Create() function | ||
168 | (GET_AVAIL_BYTES(p) >= p->keepSizeAfter) - minimal required size */ | ||
169 | } | ||
170 | |||
171 | // on exit: (p->result != SZ_OK || p->streamEndWasReached || GET_AVAIL_BYTES(p) > p->keepSizeAfter) | ||
172 | } | ||
173 | |||
174 | |||
175 | |||
176 | MY_NO_INLINE | ||
177 | void MatchFinder_MoveBlock(CMatchFinder *p) | ||
178 | { | ||
179 | const size_t offset = (size_t)(p->buffer - p->bufferBase) - p->keepSizeBefore; | ||
180 | const size_t keepBefore = (offset & (kBlockMoveAlign - 1)) + p->keepSizeBefore; | ||
181 | p->buffer = p->bufferBase + keepBefore; | ||
182 | memmove(p->bufferBase, | ||
183 | p->bufferBase + (offset & ~((size_t)kBlockMoveAlign - 1)), | ||
184 | keepBefore + (size_t)GET_AVAIL_BYTES(p)); | ||
185 | } | ||
186 | |||
187 | /* We call MoveBlock() before ReadBlock(). | ||
188 | So MoveBlock() can be wasteful operation, if the whole input data | ||
189 | can fit in current block even without calling MoveBlock(). | ||
190 | in important case where (dataSize <= historySize) | ||
191 | condition (p->blockSize > dataSize + p->keepSizeAfter) is met | ||
192 | So there is no MoveBlock() in that case case. | ||
193 | */ | ||
194 | |||
195 | int MatchFinder_NeedMove(CMatchFinder *p) | ||
196 | { | ||
197 | if (p->directInput) | ||
198 | return 0; | ||
199 | if (p->streamEndWasReached || p->result != SZ_OK) | ||
200 | return 0; | ||
201 | return ((size_t)(p->bufferBase + p->blockSize - p->buffer) <= p->keepSizeAfter); | ||
202 | } | ||
203 | |||
204 | void MatchFinder_ReadIfRequired(CMatchFinder *p) | ||
205 | { | ||
206 | if (p->keepSizeAfter >= GET_AVAIL_BYTES(p)) | ||
207 | MatchFinder_ReadBlock(p); | ||
208 | } | ||
209 | |||
210 | |||
211 | |||
212 | static void MatchFinder_SetDefaultSettings(CMatchFinder *p) | ||
213 | { | ||
214 | p->cutValue = 32; | ||
215 | p->btMode = 1; | ||
216 | p->numHashBytes = 4; | ||
217 | p->bigHash = 0; | ||
218 | } | ||
219 | |||
220 | #define kCrcPoly 0xEDB88320 | ||
221 | |||
222 | void MatchFinder_Construct(CMatchFinder *p) | ||
223 | { | ||
224 | unsigned i; | ||
225 | p->bufferBase = NULL; | ||
226 | p->directInput = 0; | ||
227 | p->hash = NULL; | ||
228 | p->expectedDataSize = (UInt64)(Int64)-1; | ||
229 | MatchFinder_SetDefaultSettings(p); | ||
230 | |||
231 | for (i = 0; i < 256; i++) | ||
232 | { | ||
233 | UInt32 r = (UInt32)i; | ||
234 | unsigned j; | ||
235 | for (j = 0; j < 8; j++) | ||
236 | r = (r >> 1) ^ (kCrcPoly & ((UInt32)0 - (r & 1))); | ||
237 | p->crc[i] = r; | ||
238 | } | ||
239 | } | ||
240 | |||
241 | static void MatchFinder_FreeThisClassMemory(CMatchFinder *p, ISzAllocPtr alloc) | ||
242 | { | ||
243 | ISzAlloc_Free(alloc, p->hash); | ||
244 | p->hash = NULL; | ||
245 | } | ||
246 | |||
247 | void MatchFinder_Free(CMatchFinder *p, ISzAllocPtr alloc) | ||
248 | { | ||
249 | MatchFinder_FreeThisClassMemory(p, alloc); | ||
250 | LzInWindow_Free(p, alloc); | ||
251 | } | ||
252 | |||
253 | static CLzRef* AllocRefs(size_t num, ISzAllocPtr alloc) | ||
254 | { | ||
255 | size_t sizeInBytes = (size_t)num * sizeof(CLzRef); | ||
256 | if (sizeInBytes / sizeof(CLzRef) != num) | ||
257 | return NULL; | ||
258 | return (CLzRef *)ISzAlloc_Alloc(alloc, sizeInBytes); | ||
259 | } | ||
260 | |||
261 | #if (kBlockSizeReserveMin < kBlockSizeAlign * 2) | ||
262 | #error Stop_Compiling_Bad_Reserve | ||
263 | #endif | ||
264 | |||
265 | |||
266 | |||
267 | static UInt32 GetBlockSize(CMatchFinder *p, UInt32 historySize) | ||
268 | { | ||
269 | UInt32 blockSize = (p->keepSizeBefore + p->keepSizeAfter); | ||
270 | /* | ||
271 | if (historySize > kMaxHistorySize) | ||
272 | return 0; | ||
273 | */ | ||
274 | // printf("\nhistorySize == 0x%x\n", historySize); | ||
275 | |||
276 | if (p->keepSizeBefore < historySize || blockSize < p->keepSizeBefore) // if 32-bit overflow | ||
277 | return 0; | ||
278 | |||
279 | { | ||
280 | const UInt32 kBlockSizeMax = (UInt32)0 - (UInt32)kBlockSizeAlign; | ||
281 | const UInt32 rem = kBlockSizeMax - blockSize; | ||
282 | const UInt32 reserve = (blockSize >> (blockSize < ((UInt32)1 << 30) ? 1 : 2)) | ||
283 | + (1 << 12) + kBlockMoveAlign + kBlockSizeAlign; // do not overflow 32-bit here | ||
284 | if (blockSize >= kBlockSizeMax | ||
285 | || rem < kBlockSizeReserveMin) // we reject settings that will be slow | ||
286 | return 0; | ||
287 | if (reserve >= rem) | ||
288 | blockSize = kBlockSizeMax; | ||
289 | else | ||
290 | { | ||
291 | blockSize += reserve; | ||
292 | blockSize &= ~(UInt32)(kBlockSizeAlign - 1); | ||
293 | } | ||
294 | } | ||
295 | // printf("\n LzFind_blockSize = %x\n", blockSize); | ||
296 | // printf("\n LzFind_blockSize = %d\n", blockSize >> 20); | ||
297 | return blockSize; | ||
298 | } | ||
299 | |||
300 | |||
301 | int MatchFinder_Create(CMatchFinder *p, UInt32 historySize, | ||
302 | UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter, | ||
303 | ISzAllocPtr alloc) | ||
304 | { | ||
305 | /* we need one additional byte in (p->keepSizeBefore), | ||
306 | since we use MoveBlock() after (p->pos++) and before dictionary using */ | ||
307 | // keepAddBufferBefore = (UInt32)0xFFFFFFFF - (1 << 22); // for debug | ||
308 | p->keepSizeBefore = historySize + keepAddBufferBefore + 1; | ||
309 | |||
310 | keepAddBufferAfter += matchMaxLen; | ||
311 | /* we need (p->keepSizeAfter >= p->numHashBytes) */ | ||
312 | if (keepAddBufferAfter < p->numHashBytes) | ||
313 | keepAddBufferAfter = p->numHashBytes; | ||
314 | // keepAddBufferAfter -= 2; // for debug | ||
315 | p->keepSizeAfter = keepAddBufferAfter; | ||
316 | |||
317 | if (p->directInput) | ||
318 | p->blockSize = 0; | ||
319 | if (p->directInput || LzInWindow_Create2(p, GetBlockSize(p, historySize), alloc)) | ||
320 | { | ||
321 | const UInt32 newCyclicBufferSize = historySize + 1; // do not change it | ||
322 | UInt32 hs; | ||
323 | p->matchMaxLen = matchMaxLen; | ||
324 | { | ||
325 | // UInt32 hs4; | ||
326 | p->fixedHashSize = 0; | ||
327 | hs = (1 << 16) - 1; | ||
328 | if (p->numHashBytes != 2) | ||
329 | { | ||
330 | hs = historySize; | ||
331 | if (hs > p->expectedDataSize) | ||
332 | hs = (UInt32)p->expectedDataSize; | ||
333 | if (hs != 0) | ||
334 | hs--; | ||
335 | hs |= (hs >> 1); | ||
336 | hs |= (hs >> 2); | ||
337 | hs |= (hs >> 4); | ||
338 | hs |= (hs >> 8); | ||
339 | // we propagated 16 bits in (hs). Low 16 bits must be set later | ||
340 | hs >>= 1; | ||
341 | if (hs >= (1 << 24)) | ||
342 | { | ||
343 | if (p->numHashBytes == 3) | ||
344 | hs = (1 << 24) - 1; | ||
345 | else | ||
346 | hs >>= 1; | ||
347 | /* if (bigHash) mode, GetHeads4b() in LzFindMt.c needs (hs >= ((1 << 24) - 1))) */ | ||
348 | } | ||
349 | |||
350 | // hs = ((UInt32)1 << 25) - 1; // for test | ||
351 | |||
352 | // (hash_size >= (1 << 16)) : Required for (numHashBytes > 2) | ||
353 | hs |= (1 << 16) - 1; /* don't change it! */ | ||
354 | |||
355 | // bt5: we adjust the size with recommended minimum size | ||
356 | if (p->numHashBytes >= 5) | ||
357 | hs |= (256 << kLzHash_CrcShift_2) - 1; | ||
358 | } | ||
359 | p->hashMask = hs; | ||
360 | hs++; | ||
361 | |||
362 | /* | ||
363 | hs4 = (1 << 20); | ||
364 | if (hs4 > hs) | ||
365 | hs4 = hs; | ||
366 | // hs4 = (1 << 16); // for test | ||
367 | p->hash4Mask = hs4 - 1; | ||
368 | */ | ||
369 | |||
370 | if (p->numHashBytes > 2) p->fixedHashSize += kHash2Size; | ||
371 | if (p->numHashBytes > 3) p->fixedHashSize += kHash3Size; | ||
372 | // if (p->numHashBytes > 4) p->fixedHashSize += hs4; // kHash4Size; | ||
373 | hs += p->fixedHashSize; | ||
374 | } | ||
375 | |||
376 | { | ||
377 | size_t newSize; | ||
378 | size_t numSons; | ||
379 | p->historySize = historySize; | ||
380 | p->hashSizeSum = hs; | ||
381 | p->cyclicBufferSize = newCyclicBufferSize; // it must be = (historySize + 1) | ||
382 | |||
383 | numSons = newCyclicBufferSize; | ||
384 | if (p->btMode) | ||
385 | numSons <<= 1; | ||
386 | newSize = hs + numSons; | ||
387 | |||
388 | // aligned size is not required here, but it can be better for some loops | ||
389 | #define NUM_REFS_ALIGN_MASK 0xF | ||
390 | newSize = (newSize + NUM_REFS_ALIGN_MASK) & ~(size_t)NUM_REFS_ALIGN_MASK; | ||
391 | |||
392 | if (p->hash && p->numRefs == newSize) | ||
393 | return 1; | ||
394 | |||
395 | MatchFinder_FreeThisClassMemory(p, alloc); | ||
396 | p->numRefs = newSize; | ||
397 | p->hash = AllocRefs(newSize, alloc); | ||
398 | |||
399 | if (p->hash) | ||
400 | { | ||
401 | p->son = p->hash + p->hashSizeSum; | ||
402 | return 1; | ||
403 | } | ||
404 | } | ||
405 | } | ||
406 | |||
407 | MatchFinder_Free(p, alloc); | ||
408 | return 0; | ||
409 | } | ||
410 | |||
411 | |||
412 | static void MatchFinder_SetLimits(CMatchFinder *p) | ||
413 | { | ||
414 | UInt32 k; | ||
415 | UInt32 n = kMaxValForNormalize - p->pos; | ||
416 | if (n == 0) | ||
417 | n = (UInt32)(Int32)-1; // we allow (pos == 0) at start even with (kMaxValForNormalize == 0) | ||
418 | |||
419 | k = p->cyclicBufferSize - p->cyclicBufferPos; | ||
420 | if (k < n) | ||
421 | n = k; | ||
422 | |||
423 | k = GET_AVAIL_BYTES(p); | ||
424 | { | ||
425 | const UInt32 ksa = p->keepSizeAfter; | ||
426 | UInt32 mm = p->matchMaxLen; | ||
427 | if (k > ksa) | ||
428 | k -= ksa; // we must limit exactly to keepSizeAfter for ReadBlock | ||
429 | else if (k >= mm) | ||
430 | { | ||
431 | // the limitation for (p->lenLimit) update | ||
432 | k -= mm; // optimization : to reduce the number of checks | ||
433 | k++; | ||
434 | // k = 1; // non-optimized version : for debug | ||
435 | } | ||
436 | else | ||
437 | { | ||
438 | mm = k; | ||
439 | if (k != 0) | ||
440 | k = 1; | ||
441 | } | ||
442 | p->lenLimit = mm; | ||
443 | } | ||
444 | if (k < n) | ||
445 | n = k; | ||
446 | |||
447 | p->posLimit = p->pos + n; | ||
448 | } | ||
449 | |||
450 | |||
451 | void MatchFinder_Init_LowHash(CMatchFinder *p) | ||
452 | { | ||
453 | size_t i; | ||
454 | CLzRef *items = p->hash; | ||
455 | const size_t numItems = p->fixedHashSize; | ||
456 | for (i = 0; i < numItems; i++) | ||
457 | items[i] = kEmptyHashValue; | ||
458 | } | ||
459 | |||
460 | |||
461 | void MatchFinder_Init_HighHash(CMatchFinder *p) | ||
462 | { | ||
463 | size_t i; | ||
464 | CLzRef *items = p->hash + p->fixedHashSize; | ||
465 | const size_t numItems = (size_t)p->hashMask + 1; | ||
466 | for (i = 0; i < numItems; i++) | ||
467 | items[i] = kEmptyHashValue; | ||
468 | } | ||
469 | |||
470 | |||
471 | void MatchFinder_Init_4(CMatchFinder *p) | ||
472 | { | ||
473 | p->buffer = p->bufferBase; | ||
474 | { | ||
475 | /* kEmptyHashValue = 0 (Zero) is used in hash tables as NO-VALUE marker. | ||
476 | the code in CMatchFinderMt expects (pos = 1) */ | ||
477 | p->pos = | ||
478 | p->streamPos = | ||
479 | 1; // it's smallest optimal value. do not change it | ||
480 | // 0; // for debug | ||
481 | } | ||
482 | p->result = SZ_OK; | ||
483 | p->streamEndWasReached = 0; | ||
484 | } | ||
485 | |||
486 | |||
487 | // (CYC_TO_POS_OFFSET == 0) is expected by some optimized code | ||
488 | #define CYC_TO_POS_OFFSET 0 | ||
489 | // #define CYC_TO_POS_OFFSET 1 // for debug | ||
490 | |||
491 | void MatchFinder_Init(CMatchFinder *p) | ||
492 | { | ||
493 | MatchFinder_Init_HighHash(p); | ||
494 | MatchFinder_Init_LowHash(p); | ||
495 | MatchFinder_Init_4(p); | ||
496 | // if (readData) | ||
497 | MatchFinder_ReadBlock(p); | ||
498 | |||
499 | /* if we init (cyclicBufferPos = pos), then we can use one variable | ||
500 | instead of both (cyclicBufferPos) and (pos) : only before (cyclicBufferPos) wrapping */ | ||
501 | p->cyclicBufferPos = (p->pos - CYC_TO_POS_OFFSET); // init with relation to (pos) | ||
502 | // p->cyclicBufferPos = 0; // smallest value | ||
503 | // p->son[0] = p->son[1] = 0; // unused: we can init skipped record for speculated accesses. | ||
504 | MatchFinder_SetLimits(p); | ||
505 | } | ||
506 | |||
507 | |||
508 | |||
509 | #ifdef MY_CPU_X86_OR_AMD64 | ||
510 | #if defined(__clang__) && (__clang_major__ >= 8) \ | ||
511 | || defined(__GNUC__) && (__GNUC__ >= 8) \ | ||
512 | || defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1900) | ||
513 | #define USE_SATUR_SUB_128 | ||
514 | #define USE_AVX2 | ||
515 | #define ATTRIB_SSE41 __attribute__((__target__("sse4.1"))) | ||
516 | #define ATTRIB_AVX2 __attribute__((__target__("avx2"))) | ||
517 | #elif defined(_MSC_VER) | ||
518 | #if (_MSC_VER >= 1600) | ||
519 | #define USE_SATUR_SUB_128 | ||
520 | #if (_MSC_VER >= 1900) | ||
521 | #define USE_AVX2 | ||
522 | #include <immintrin.h> // avx | ||
523 | #endif | ||
524 | #endif | ||
525 | #endif | ||
526 | |||
527 | // #elif defined(MY_CPU_ARM_OR_ARM64) | ||
528 | #elif defined(MY_CPU_ARM64) | ||
529 | |||
530 | #if defined(__clang__) && (__clang_major__ >= 8) \ | ||
531 | || defined(__GNUC__) && (__GNUC__ >= 8) | ||
532 | #define USE_SATUR_SUB_128 | ||
533 | #ifdef MY_CPU_ARM64 | ||
534 | // #define ATTRIB_SSE41 __attribute__((__target__(""))) | ||
535 | #else | ||
536 | // #define ATTRIB_SSE41 __attribute__((__target__("fpu=crypto-neon-fp-armv8"))) | ||
537 | #endif | ||
538 | |||
539 | #elif defined(_MSC_VER) | ||
540 | #if (_MSC_VER >= 1910) | ||
541 | #define USE_SATUR_SUB_128 | ||
542 | #endif | ||
543 | #endif | ||
544 | |||
545 | #if defined(_MSC_VER) && defined(MY_CPU_ARM64) | ||
546 | #include <arm64_neon.h> | ||
547 | #else | ||
548 | #include <arm_neon.h> | ||
549 | #endif | ||
550 | |||
551 | #endif | ||
552 | |||
553 | /* | ||
554 | #ifndef ATTRIB_SSE41 | ||
555 | #define ATTRIB_SSE41 | ||
556 | #endif | ||
557 | #ifndef ATTRIB_AVX2 | ||
558 | #define ATTRIB_AVX2 | ||
559 | #endif | ||
560 | */ | ||
561 | |||
562 | #ifdef USE_SATUR_SUB_128 | ||
563 | |||
564 | // #define _SHOW_HW_STATUS | ||
565 | |||
566 | #ifdef _SHOW_HW_STATUS | ||
567 | #include <stdio.h> | ||
568 | #define _PRF(x) x | ||
569 | _PRF(;) | ||
570 | #else | ||
571 | #define _PRF(x) | ||
572 | #endif | ||
573 | |||
574 | #ifdef MY_CPU_ARM_OR_ARM64 | ||
575 | |||
576 | #ifdef MY_CPU_ARM64 | ||
577 | // #define FORCE_SATUR_SUB_128 | ||
578 | #endif | ||
579 | |||
580 | typedef uint32x4_t v128; | ||
581 | #define SASUB_128(i) \ | ||
582 | *(v128 *)(void *)(items + (i) * 4) = \ | ||
583 | vsubq_u32(vmaxq_u32(*(const v128 *)(const void *)(items + (i) * 4), sub2), sub2); | ||
584 | |||
585 | #else | ||
586 | |||
587 | #include <smmintrin.h> // sse4.1 | ||
588 | |||
589 | typedef __m128i v128; | ||
590 | #define SASUB_128(i) \ | ||
591 | *(v128 *)(void *)(items + (i) * 4) = \ | ||
592 | _mm_sub_epi32(_mm_max_epu32(*(const v128 *)(const void *)(items + (i) * 4), sub2), sub2); // SSE 4.1 | ||
593 | |||
594 | #endif | ||
595 | |||
596 | |||
597 | |||
598 | MY_NO_INLINE | ||
599 | static | ||
600 | #ifdef ATTRIB_SSE41 | ||
601 | ATTRIB_SSE41 | ||
602 | #endif | ||
603 | void | ||
604 | MY_FAST_CALL | ||
605 | LzFind_SaturSub_128(UInt32 subValue, CLzRef *items, const CLzRef *lim) | ||
606 | { | ||
607 | v128 sub2 = | ||
608 | #ifdef MY_CPU_ARM_OR_ARM64 | ||
609 | vdupq_n_u32(subValue); | ||
610 | #else | ||
611 | _mm_set_epi32((Int32)subValue, (Int32)subValue, (Int32)subValue, (Int32)subValue); | ||
612 | #endif | ||
613 | do | ||
614 | { | ||
615 | SASUB_128(0) | ||
616 | SASUB_128(1) | ||
617 | SASUB_128(2) | ||
618 | SASUB_128(3) | ||
619 | items += 4 * 4; | ||
620 | } | ||
621 | while (items != lim); | ||
622 | } | ||
623 | |||
624 | |||
625 | |||
626 | #ifdef USE_AVX2 | ||
627 | |||
628 | #include <immintrin.h> // avx | ||
629 | |||
630 | #define SASUB_256(i) *(__m256i *)(void *)(items + (i) * 8) = _mm256_sub_epi32(_mm256_max_epu32(*(const __m256i *)(const void *)(items + (i) * 8), sub2), sub2); // AVX2 | ||
631 | |||
632 | MY_NO_INLINE | ||
633 | static | ||
634 | #ifdef ATTRIB_AVX2 | ||
635 | ATTRIB_AVX2 | ||
636 | #endif | ||
637 | void | ||
638 | MY_FAST_CALL | ||
639 | LzFind_SaturSub_256(UInt32 subValue, CLzRef *items, const CLzRef *lim) | ||
640 | { | ||
641 | __m256i sub2 = _mm256_set_epi32( | ||
642 | (Int32)subValue, (Int32)subValue, (Int32)subValue, (Int32)subValue, | ||
643 | (Int32)subValue, (Int32)subValue, (Int32)subValue, (Int32)subValue); | ||
644 | do | ||
645 | { | ||
646 | SASUB_256(0) | ||
647 | SASUB_256(1) | ||
648 | items += 2 * 8; | ||
649 | } | ||
650 | while (items != lim); | ||
651 | } | ||
652 | #endif // USE_AVX2 | ||
653 | |||
654 | #ifndef FORCE_SATUR_SUB_128 | ||
655 | typedef void (MY_FAST_CALL *LZFIND_SATUR_SUB_CODE_FUNC)( | ||
656 | UInt32 subValue, CLzRef *items, const CLzRef *lim); | ||
657 | static LZFIND_SATUR_SUB_CODE_FUNC g_LzFind_SaturSub; | ||
658 | #endif // FORCE_SATUR_SUB_128 | ||
659 | |||
660 | #endif // USE_SATUR_SUB_128 | ||
661 | |||
662 | |||
663 | // kEmptyHashValue must be zero | ||
664 | // #define SASUB_32(i) v = items[i]; m = v - subValue; if (v < subValue) m = kEmptyHashValue; items[i] = m; | ||
665 | #define SASUB_32(i) v = items[i]; if (v < subValue) v = subValue; items[i] = v - subValue; | ||
666 | |||
667 | #ifdef FORCE_SATUR_SUB_128 | ||
668 | |||
669 | #define DEFAULT_SaturSub LzFind_SaturSub_128 | ||
670 | |||
671 | #else | ||
672 | |||
673 | #define DEFAULT_SaturSub LzFind_SaturSub_32 | ||
674 | |||
675 | MY_NO_INLINE | ||
676 | static | ||
677 | void | ||
678 | MY_FAST_CALL | ||
679 | LzFind_SaturSub_32(UInt32 subValue, CLzRef *items, const CLzRef *lim) | ||
680 | { | ||
681 | do | ||
682 | { | ||
683 | UInt32 v; | ||
684 | SASUB_32(0) | ||
685 | SASUB_32(1) | ||
686 | SASUB_32(2) | ||
687 | SASUB_32(3) | ||
688 | SASUB_32(4) | ||
689 | SASUB_32(5) | ||
690 | SASUB_32(6) | ||
691 | SASUB_32(7) | ||
692 | items += 8; | ||
693 | } | ||
694 | while (items != lim); | ||
695 | } | ||
696 | |||
697 | #endif | ||
698 | |||
699 | |||
700 | MY_NO_INLINE | ||
701 | void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, size_t numItems) | ||
702 | { | ||
703 | #define K_NORM_ALIGN_BLOCK_SIZE (1 << 6) | ||
704 | |||
705 | CLzRef *lim; | ||
706 | |||
707 | for (; numItems != 0 && ((unsigned)(ptrdiff_t)items & (K_NORM_ALIGN_BLOCK_SIZE - 1)) != 0; numItems--) | ||
708 | { | ||
709 | UInt32 v; | ||
710 | SASUB_32(0); | ||
711 | items++; | ||
712 | } | ||
713 | |||
714 | { | ||
715 | #define K_NORM_ALIGN_MASK (K_NORM_ALIGN_BLOCK_SIZE / 4 - 1) | ||
716 | lim = items + (numItems & ~(size_t)K_NORM_ALIGN_MASK); | ||
717 | numItems &= K_NORM_ALIGN_MASK; | ||
718 | if (items != lim) | ||
719 | { | ||
720 | #if defined(USE_SATUR_SUB_128) && !defined(FORCE_SATUR_SUB_128) | ||
721 | if (g_LzFind_SaturSub) | ||
722 | g_LzFind_SaturSub(subValue, items, lim); | ||
723 | else | ||
724 | #endif | ||
725 | DEFAULT_SaturSub(subValue, items, lim); | ||
726 | } | ||
727 | items = lim; | ||
728 | } | ||
729 | |||
730 | |||
731 | for (; numItems != 0; numItems--) | ||
732 | { | ||
733 | UInt32 v; | ||
734 | SASUB_32(0); | ||
735 | items++; | ||
736 | } | ||
737 | } | ||
738 | |||
739 | |||
740 | |||
741 | // call MatchFinder_CheckLimits() only after (p->pos++) update | ||
742 | |||
743 | MY_NO_INLINE | ||
744 | static void MatchFinder_CheckLimits(CMatchFinder *p) | ||
745 | { | ||
746 | if (// !p->streamEndWasReached && p->result == SZ_OK && | ||
747 | p->keepSizeAfter == GET_AVAIL_BYTES(p)) | ||
748 | { | ||
749 | // we try to read only in exact state (p->keepSizeAfter == GET_AVAIL_BYTES(p)) | ||
750 | if (MatchFinder_NeedMove(p)) | ||
751 | MatchFinder_MoveBlock(p); | ||
752 | MatchFinder_ReadBlock(p); | ||
753 | } | ||
754 | |||
755 | if (p->pos == kMaxValForNormalize) | ||
756 | if (GET_AVAIL_BYTES(p) >= p->numHashBytes) // optional optimization for last bytes of data. | ||
757 | /* | ||
758 | if we disable normalization for last bytes of data, and | ||
759 | if (data_size == 4 GiB), we don't call wastfull normalization, | ||
760 | but (pos) will be wrapped over Zero (0) in that case. | ||
761 | And we cannot resume later to normal operation | ||
762 | */ | ||
763 | { | ||
764 | // MatchFinder_Normalize(p); | ||
765 | /* after normalization we need (p->pos >= p->historySize + 1); */ | ||
766 | /* we can reduce subValue to aligned value, if want to keep alignment | ||
767 | of (p->pos) and (p->buffer) for speculated accesses. */ | ||
768 | const UInt32 subValue = (p->pos - p->historySize - 1) /* & ~(UInt32)(kNormalizeAlign - 1) */; | ||
769 | // const UInt32 subValue = (1 << 15); // for debug | ||
770 | // printf("\nMatchFinder_Normalize() subValue == 0x%x\n", subValue); | ||
771 | size_t numSonRefs = p->cyclicBufferSize; | ||
772 | if (p->btMode) | ||
773 | numSonRefs <<= 1; | ||
774 | Inline_MatchFinder_ReduceOffsets(p, subValue); | ||
775 | MatchFinder_Normalize3(subValue, p->hash, (size_t)p->hashSizeSum + numSonRefs); | ||
776 | } | ||
777 | |||
778 | if (p->cyclicBufferPos == p->cyclicBufferSize) | ||
779 | p->cyclicBufferPos = 0; | ||
780 | |||
781 | MatchFinder_SetLimits(p); | ||
782 | } | ||
783 | |||
784 | |||
785 | /* | ||
786 | (lenLimit > maxLen) | ||
787 | */ | ||
788 | MY_FORCE_INLINE | ||
789 | static UInt32 * Hc_GetMatchesSpec(size_t lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son, | ||
790 | size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue, | ||
791 | UInt32 *d, unsigned maxLen) | ||
792 | { | ||
793 | /* | ||
794 | son[_cyclicBufferPos] = curMatch; | ||
795 | for (;;) | ||
796 | { | ||
797 | UInt32 delta = pos - curMatch; | ||
798 | if (cutValue-- == 0 || delta >= _cyclicBufferSize) | ||
799 | return d; | ||
800 | { | ||
801 | const Byte *pb = cur - delta; | ||
802 | curMatch = son[_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)]; | ||
803 | if (pb[maxLen] == cur[maxLen] && *pb == *cur) | ||
804 | { | ||
805 | UInt32 len = 0; | ||
806 | while (++len != lenLimit) | ||
807 | if (pb[len] != cur[len]) | ||
808 | break; | ||
809 | if (maxLen < len) | ||
810 | { | ||
811 | maxLen = len; | ||
812 | *d++ = len; | ||
813 | *d++ = delta - 1; | ||
814 | if (len == lenLimit) | ||
815 | return d; | ||
816 | } | ||
817 | } | ||
818 | } | ||
819 | } | ||
820 | */ | ||
821 | |||
822 | const Byte *lim = cur + lenLimit; | ||
823 | son[_cyclicBufferPos] = curMatch; | ||
824 | |||
825 | do | ||
826 | { | ||
827 | UInt32 delta; | ||
828 | |||
829 | if (curMatch == 0) | ||
830 | break; | ||
831 | // if (curMatch2 >= curMatch) return NULL; | ||
832 | delta = pos - curMatch; | ||
833 | if (delta >= _cyclicBufferSize) | ||
834 | break; | ||
835 | { | ||
836 | ptrdiff_t diff; | ||
837 | curMatch = son[_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)]; | ||
838 | diff = (ptrdiff_t)0 - (ptrdiff_t)delta; | ||
839 | if (cur[maxLen] == cur[(ptrdiff_t)maxLen + diff]) | ||
840 | { | ||
841 | const Byte *c = cur; | ||
842 | while (*c == c[diff]) | ||
843 | { | ||
844 | if (++c == lim) | ||
845 | { | ||
846 | d[0] = (UInt32)(lim - cur); | ||
847 | d[1] = delta - 1; | ||
848 | return d + 2; | ||
849 | } | ||
850 | } | ||
851 | { | ||
852 | const unsigned len = (unsigned)(c - cur); | ||
853 | if (maxLen < len) | ||
854 | { | ||
855 | maxLen = len; | ||
856 | d[0] = (UInt32)len; | ||
857 | d[1] = delta - 1; | ||
858 | d += 2; | ||
859 | } | ||
860 | } | ||
861 | } | ||
862 | } | ||
863 | } | ||
864 | while (--cutValue); | ||
865 | |||
866 | return d; | ||
867 | } | ||
868 | |||
869 | |||
870 | MY_FORCE_INLINE | ||
871 | UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son, | ||
872 | size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue, | ||
873 | UInt32 *d, UInt32 maxLen) | ||
874 | { | ||
875 | CLzRef *ptr0 = son + ((size_t)_cyclicBufferPos << 1) + 1; | ||
876 | CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1); | ||
877 | unsigned len0 = 0, len1 = 0; | ||
878 | |||
879 | UInt32 cmCheck; | ||
880 | |||
881 | // if (curMatch >= pos) { *ptr0 = *ptr1 = kEmptyHashValue; return NULL; } | ||
882 | |||
883 | cmCheck = (UInt32)(pos - _cyclicBufferSize); | ||
884 | if ((UInt32)pos <= _cyclicBufferSize) | ||
885 | cmCheck = 0; | ||
886 | |||
887 | if (cmCheck < curMatch) | ||
888 | do | ||
889 | { | ||
890 | const UInt32 delta = pos - curMatch; | ||
891 | { | ||
892 | CLzRef *pair = son + ((size_t)(_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)) << 1); | ||
893 | const Byte *pb = cur - delta; | ||
894 | unsigned len = (len0 < len1 ? len0 : len1); | ||
895 | const UInt32 pair0 = pair[0]; | ||
896 | if (pb[len] == cur[len]) | ||
897 | { | ||
898 | if (++len != lenLimit && pb[len] == cur[len]) | ||
899 | while (++len != lenLimit) | ||
900 | if (pb[len] != cur[len]) | ||
901 | break; | ||
902 | if (maxLen < len) | ||
903 | { | ||
904 | maxLen = (UInt32)len; | ||
905 | *d++ = (UInt32)len; | ||
906 | *d++ = delta - 1; | ||
907 | if (len == lenLimit) | ||
908 | { | ||
909 | *ptr1 = pair0; | ||
910 | *ptr0 = pair[1]; | ||
911 | return d; | ||
912 | } | ||
913 | } | ||
914 | } | ||
915 | if (pb[len] < cur[len]) | ||
916 | { | ||
917 | *ptr1 = curMatch; | ||
918 | // const UInt32 curMatch2 = pair[1]; | ||
919 | // if (curMatch2 >= curMatch) { *ptr0 = *ptr1 = kEmptyHashValue; return NULL; } | ||
920 | // curMatch = curMatch2; | ||
921 | curMatch = pair[1]; | ||
922 | ptr1 = pair + 1; | ||
923 | len1 = len; | ||
924 | } | ||
925 | else | ||
926 | { | ||
927 | *ptr0 = curMatch; | ||
928 | curMatch = pair[0]; | ||
929 | ptr0 = pair; | ||
930 | len0 = len; | ||
931 | } | ||
932 | } | ||
933 | } | ||
934 | while(--cutValue && cmCheck < curMatch); | ||
935 | |||
936 | *ptr0 = *ptr1 = kEmptyHashValue; | ||
937 | return d; | ||
938 | } | ||
939 | |||
940 | |||
941 | static void SkipMatchesSpec(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son, | ||
942 | size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue) | ||
943 | { | ||
944 | CLzRef *ptr0 = son + ((size_t)_cyclicBufferPos << 1) + 1; | ||
945 | CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1); | ||
946 | unsigned len0 = 0, len1 = 0; | ||
947 | |||
948 | UInt32 cmCheck; | ||
949 | |||
950 | cmCheck = (UInt32)(pos - _cyclicBufferSize); | ||
951 | if ((UInt32)pos <= _cyclicBufferSize) | ||
952 | cmCheck = 0; | ||
953 | |||
954 | if (// curMatch >= pos || // failure | ||
955 | cmCheck < curMatch) | ||
956 | do | ||
957 | { | ||
958 | const UInt32 delta = pos - curMatch; | ||
959 | { | ||
960 | CLzRef *pair = son + ((size_t)(_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)) << 1); | ||
961 | const Byte *pb = cur - delta; | ||
962 | unsigned len = (len0 < len1 ? len0 : len1); | ||
963 | if (pb[len] == cur[len]) | ||
964 | { | ||
965 | while (++len != lenLimit) | ||
966 | if (pb[len] != cur[len]) | ||
967 | break; | ||
968 | { | ||
969 | if (len == lenLimit) | ||
970 | { | ||
971 | *ptr1 = pair[0]; | ||
972 | *ptr0 = pair[1]; | ||
973 | return; | ||
974 | } | ||
975 | } | ||
976 | } | ||
977 | if (pb[len] < cur[len]) | ||
978 | { | ||
979 | *ptr1 = curMatch; | ||
980 | curMatch = pair[1]; | ||
981 | ptr1 = pair + 1; | ||
982 | len1 = len; | ||
983 | } | ||
984 | else | ||
985 | { | ||
986 | *ptr0 = curMatch; | ||
987 | curMatch = pair[0]; | ||
988 | ptr0 = pair; | ||
989 | len0 = len; | ||
990 | } | ||
991 | } | ||
992 | } | ||
993 | while(--cutValue && cmCheck < curMatch); | ||
994 | |||
995 | *ptr0 = *ptr1 = kEmptyHashValue; | ||
996 | return; | ||
997 | } | ||
998 | |||
999 | |||
1000 | #define MOVE_POS \ | ||
1001 | ++p->cyclicBufferPos; \ | ||
1002 | p->buffer++; \ | ||
1003 | { const UInt32 pos1 = p->pos + 1; p->pos = pos1; if (pos1 == p->posLimit) MatchFinder_CheckLimits(p); } | ||
1004 | |||
1005 | #define MOVE_POS_RET MOVE_POS return distances; | ||
1006 | |||
1007 | MY_NO_INLINE | ||
1008 | static void MatchFinder_MovePos(CMatchFinder *p) | ||
1009 | { | ||
1010 | /* we go here at the end of stream data, when (avail < num_hash_bytes) | ||
1011 | We don't update sons[cyclicBufferPos << btMode]. | ||
1012 | So (sons) record will contain junk. And we cannot resume match searching | ||
1013 | to normal operation, even if we will provide more input data in buffer. | ||
1014 | p->sons[p->cyclicBufferPos << p->btMode] = 0; // kEmptyHashValue | ||
1015 | if (p->btMode) | ||
1016 | p->sons[(p->cyclicBufferPos << p->btMode) + 1] = 0; // kEmptyHashValue | ||
1017 | */ | ||
1018 | MOVE_POS; | ||
1019 | } | ||
1020 | |||
1021 | #define GET_MATCHES_HEADER2(minLen, ret_op) \ | ||
1022 | unsigned lenLimit; UInt32 hv; Byte *cur; UInt32 curMatch; \ | ||
1023 | lenLimit = (unsigned)p->lenLimit; { if (lenLimit < minLen) { MatchFinder_MovePos(p); ret_op; }} \ | ||
1024 | cur = p->buffer; | ||
1025 | |||
1026 | #define GET_MATCHES_HEADER(minLen) GET_MATCHES_HEADER2(minLen, return distances) | ||
1027 | #define SKIP_HEADER(minLen) do { GET_MATCHES_HEADER2(minLen, continue) | ||
1028 | |||
1029 | #define MF_PARAMS(p) lenLimit, curMatch, p->pos, p->buffer, p->son, p->cyclicBufferPos, p->cyclicBufferSize, p->cutValue | ||
1030 | |||
1031 | #define SKIP_FOOTER SkipMatchesSpec(MF_PARAMS(p)); MOVE_POS; } while (--num); | ||
1032 | |||
1033 | #define GET_MATCHES_FOOTER_BASE(_maxLen_, func) \ | ||
1034 | distances = func(MF_PARAMS(p), \ | ||
1035 | distances, (UInt32)_maxLen_); MOVE_POS_RET; | ||
1036 | |||
1037 | #define GET_MATCHES_FOOTER_BT(_maxLen_) \ | ||
1038 | GET_MATCHES_FOOTER_BASE(_maxLen_, GetMatchesSpec1) | ||
1039 | |||
1040 | #define GET_MATCHES_FOOTER_HC(_maxLen_) \ | ||
1041 | GET_MATCHES_FOOTER_BASE(_maxLen_, Hc_GetMatchesSpec) | ||
1042 | |||
1043 | |||
1044 | |||
1045 | #define UPDATE_maxLen { \ | ||
1046 | const ptrdiff_t diff = (ptrdiff_t)0 - (ptrdiff_t)d2; \ | ||
1047 | const Byte *c = cur + maxLen; \ | ||
1048 | const Byte *lim = cur + lenLimit; \ | ||
1049 | for (; c != lim; c++) if (*(c + diff) != *c) break; \ | ||
1050 | maxLen = (unsigned)(c - cur); } | ||
1051 | |||
1052 | static UInt32* Bt2_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) | ||
1053 | { | ||
1054 | GET_MATCHES_HEADER(2) | ||
1055 | HASH2_CALC; | ||
1056 | curMatch = p->hash[hv]; | ||
1057 | p->hash[hv] = p->pos; | ||
1058 | GET_MATCHES_FOOTER_BT(1) | ||
1059 | } | ||
1060 | |||
1061 | UInt32* Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) | ||
1062 | { | ||
1063 | GET_MATCHES_HEADER(3) | ||
1064 | HASH_ZIP_CALC; | ||
1065 | curMatch = p->hash[hv]; | ||
1066 | p->hash[hv] = p->pos; | ||
1067 | GET_MATCHES_FOOTER_BT(2) | ||
1068 | } | ||
1069 | |||
1070 | |||
1071 | #define SET_mmm \ | ||
1072 | mmm = p->cyclicBufferSize; \ | ||
1073 | if (pos < mmm) \ | ||
1074 | mmm = pos; | ||
1075 | |||
1076 | |||
1077 | static UInt32* Bt3_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) | ||
1078 | { | ||
1079 | UInt32 mmm; | ||
1080 | UInt32 h2, d2, pos; | ||
1081 | unsigned maxLen; | ||
1082 | UInt32 *hash; | ||
1083 | GET_MATCHES_HEADER(3) | ||
1084 | |||
1085 | HASH3_CALC; | ||
1086 | |||
1087 | hash = p->hash; | ||
1088 | pos = p->pos; | ||
1089 | |||
1090 | d2 = pos - hash[h2]; | ||
1091 | |||
1092 | curMatch = (hash + kFix3HashSize)[hv]; | ||
1093 | |||
1094 | hash[h2] = pos; | ||
1095 | (hash + kFix3HashSize)[hv] = pos; | ||
1096 | |||
1097 | SET_mmm | ||
1098 | |||
1099 | maxLen = 2; | ||
1100 | |||
1101 | if (d2 < mmm && *(cur - d2) == *cur) | ||
1102 | { | ||
1103 | UPDATE_maxLen | ||
1104 | distances[0] = (UInt32)maxLen; | ||
1105 | distances[1] = d2 - 1; | ||
1106 | distances += 2; | ||
1107 | if (maxLen == lenLimit) | ||
1108 | { | ||
1109 | SkipMatchesSpec(MF_PARAMS(p)); | ||
1110 | MOVE_POS_RET; | ||
1111 | } | ||
1112 | } | ||
1113 | |||
1114 | GET_MATCHES_FOOTER_BT(maxLen) | ||
1115 | } | ||
1116 | |||
1117 | |||
1118 | static UInt32* Bt4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) | ||
1119 | { | ||
1120 | UInt32 mmm; | ||
1121 | UInt32 h2, h3, d2, d3, pos; | ||
1122 | unsigned maxLen; | ||
1123 | UInt32 *hash; | ||
1124 | GET_MATCHES_HEADER(4) | ||
1125 | |||
1126 | HASH4_CALC; | ||
1127 | |||
1128 | hash = p->hash; | ||
1129 | pos = p->pos; | ||
1130 | |||
1131 | d2 = pos - hash [h2]; | ||
1132 | d3 = pos - (hash + kFix3HashSize)[h3]; | ||
1133 | curMatch = (hash + kFix4HashSize)[hv]; | ||
1134 | |||
1135 | hash [h2] = pos; | ||
1136 | (hash + kFix3HashSize)[h3] = pos; | ||
1137 | (hash + kFix4HashSize)[hv] = pos; | ||
1138 | |||
1139 | SET_mmm | ||
1140 | |||
1141 | maxLen = 3; | ||
1142 | |||
1143 | for (;;) | ||
1144 | { | ||
1145 | if (d2 < mmm && *(cur - d2) == *cur) | ||
1146 | { | ||
1147 | distances[0] = 2; | ||
1148 | distances[1] = d2 - 1; | ||
1149 | distances += 2; | ||
1150 | if (*(cur - d2 + 2) == cur[2]) | ||
1151 | { | ||
1152 | // distances[-2] = 3; | ||
1153 | } | ||
1154 | else if (d3 < mmm && *(cur - d3) == *cur) | ||
1155 | { | ||
1156 | d2 = d3; | ||
1157 | distances[1] = d3 - 1; | ||
1158 | distances += 2; | ||
1159 | } | ||
1160 | else | ||
1161 | break; | ||
1162 | } | ||
1163 | else if (d3 < mmm && *(cur - d3) == *cur) | ||
1164 | { | ||
1165 | d2 = d3; | ||
1166 | distances[1] = d3 - 1; | ||
1167 | distances += 2; | ||
1168 | } | ||
1169 | else | ||
1170 | break; | ||
1171 | |||
1172 | UPDATE_maxLen | ||
1173 | distances[-2] = (UInt32)maxLen; | ||
1174 | if (maxLen == lenLimit) | ||
1175 | { | ||
1176 | SkipMatchesSpec(MF_PARAMS(p)); | ||
1177 | MOVE_POS_RET | ||
1178 | } | ||
1179 | break; | ||
1180 | } | ||
1181 | |||
1182 | GET_MATCHES_FOOTER_BT(maxLen) | ||
1183 | } | ||
1184 | |||
1185 | |||
1186 | static UInt32* Bt5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) | ||
1187 | { | ||
1188 | UInt32 mmm; | ||
1189 | UInt32 h2, h3, d2, d3, maxLen, pos; | ||
1190 | UInt32 *hash; | ||
1191 | GET_MATCHES_HEADER(5) | ||
1192 | |||
1193 | HASH5_CALC; | ||
1194 | |||
1195 | hash = p->hash; | ||
1196 | pos = p->pos; | ||
1197 | |||
1198 | d2 = pos - hash [h2]; | ||
1199 | d3 = pos - (hash + kFix3HashSize)[h3]; | ||
1200 | // d4 = pos - (hash + kFix4HashSize)[h4]; | ||
1201 | |||
1202 | curMatch = (hash + kFix5HashSize)[hv]; | ||
1203 | |||
1204 | hash [h2] = pos; | ||
1205 | (hash + kFix3HashSize)[h3] = pos; | ||
1206 | // (hash + kFix4HashSize)[h4] = pos; | ||
1207 | (hash + kFix5HashSize)[hv] = pos; | ||
1208 | |||
1209 | SET_mmm | ||
1210 | |||
1211 | maxLen = 4; | ||
1212 | |||
1213 | for (;;) | ||
1214 | { | ||
1215 | if (d2 < mmm && *(cur - d2) == *cur) | ||
1216 | { | ||
1217 | distances[0] = 2; | ||
1218 | distances[1] = d2 - 1; | ||
1219 | distances += 2; | ||
1220 | if (*(cur - d2 + 2) == cur[2]) | ||
1221 | { | ||
1222 | } | ||
1223 | else if (d3 < mmm && *(cur - d3) == *cur) | ||
1224 | { | ||
1225 | distances[1] = d3 - 1; | ||
1226 | distances += 2; | ||
1227 | d2 = d3; | ||
1228 | } | ||
1229 | else | ||
1230 | break; | ||
1231 | } | ||
1232 | else if (d3 < mmm && *(cur - d3) == *cur) | ||
1233 | { | ||
1234 | distances[1] = d3 - 1; | ||
1235 | distances += 2; | ||
1236 | d2 = d3; | ||
1237 | } | ||
1238 | else | ||
1239 | break; | ||
1240 | |||
1241 | distances[-2] = 3; | ||
1242 | if (*(cur - d2 + 3) != cur[3]) | ||
1243 | break; | ||
1244 | UPDATE_maxLen | ||
1245 | distances[-2] = (UInt32)maxLen; | ||
1246 | if (maxLen == lenLimit) | ||
1247 | { | ||
1248 | SkipMatchesSpec(MF_PARAMS(p)); | ||
1249 | MOVE_POS_RET; | ||
1250 | } | ||
1251 | break; | ||
1252 | } | ||
1253 | |||
1254 | GET_MATCHES_FOOTER_BT(maxLen) | ||
1255 | } | ||
1256 | |||
1257 | |||
1258 | static UInt32* Hc4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) | ||
1259 | { | ||
1260 | UInt32 mmm; | ||
1261 | UInt32 h2, h3, d2, d3, pos; | ||
1262 | unsigned maxLen; | ||
1263 | UInt32 *hash; | ||
1264 | GET_MATCHES_HEADER(4) | ||
1265 | |||
1266 | HASH4_CALC; | ||
1267 | |||
1268 | hash = p->hash; | ||
1269 | pos = p->pos; | ||
1270 | |||
1271 | d2 = pos - hash [h2]; | ||
1272 | d3 = pos - (hash + kFix3HashSize)[h3]; | ||
1273 | curMatch = (hash + kFix4HashSize)[hv]; | ||
1274 | |||
1275 | hash [h2] = pos; | ||
1276 | (hash + kFix3HashSize)[h3] = pos; | ||
1277 | (hash + kFix4HashSize)[hv] = pos; | ||
1278 | |||
1279 | SET_mmm | ||
1280 | |||
1281 | maxLen = 3; | ||
1282 | |||
1283 | for (;;) | ||
1284 | { | ||
1285 | if (d2 < mmm && *(cur - d2) == *cur) | ||
1286 | { | ||
1287 | distances[0] = 2; | ||
1288 | distances[1] = d2 - 1; | ||
1289 | distances += 2; | ||
1290 | if (*(cur - d2 + 2) == cur[2]) | ||
1291 | { | ||
1292 | // distances[-2] = 3; | ||
1293 | } | ||
1294 | else if (d3 < mmm && *(cur - d3) == *cur) | ||
1295 | { | ||
1296 | d2 = d3; | ||
1297 | distances[1] = d3 - 1; | ||
1298 | distances += 2; | ||
1299 | } | ||
1300 | else | ||
1301 | break; | ||
1302 | } | ||
1303 | else if (d3 < mmm && *(cur - d3) == *cur) | ||
1304 | { | ||
1305 | d2 = d3; | ||
1306 | distances[1] = d3 - 1; | ||
1307 | distances += 2; | ||
1308 | } | ||
1309 | else | ||
1310 | break; | ||
1311 | |||
1312 | UPDATE_maxLen | ||
1313 | distances[-2] = (UInt32)maxLen; | ||
1314 | if (maxLen == lenLimit) | ||
1315 | { | ||
1316 | p->son[p->cyclicBufferPos] = curMatch; | ||
1317 | MOVE_POS_RET; | ||
1318 | } | ||
1319 | break; | ||
1320 | } | ||
1321 | |||
1322 | GET_MATCHES_FOOTER_HC(maxLen); | ||
1323 | } | ||
1324 | |||
1325 | |||
1326 | static UInt32 * Hc5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) | ||
1327 | { | ||
1328 | UInt32 mmm; | ||
1329 | UInt32 h2, h3, d2, d3, maxLen, pos; | ||
1330 | UInt32 *hash; | ||
1331 | GET_MATCHES_HEADER(5) | ||
1332 | |||
1333 | HASH5_CALC; | ||
1334 | |||
1335 | hash = p->hash; | ||
1336 | pos = p->pos; | ||
1337 | |||
1338 | d2 = pos - hash [h2]; | ||
1339 | d3 = pos - (hash + kFix3HashSize)[h3]; | ||
1340 | // d4 = pos - (hash + kFix4HashSize)[h4]; | ||
1341 | |||
1342 | curMatch = (hash + kFix5HashSize)[hv]; | ||
1343 | |||
1344 | hash [h2] = pos; | ||
1345 | (hash + kFix3HashSize)[h3] = pos; | ||
1346 | // (hash + kFix4HashSize)[h4] = pos; | ||
1347 | (hash + kFix5HashSize)[hv] = pos; | ||
1348 | |||
1349 | SET_mmm | ||
1350 | |||
1351 | maxLen = 4; | ||
1352 | |||
1353 | for (;;) | ||
1354 | { | ||
1355 | if (d2 < mmm && *(cur - d2) == *cur) | ||
1356 | { | ||
1357 | distances[0] = 2; | ||
1358 | distances[1] = d2 - 1; | ||
1359 | distances += 2; | ||
1360 | if (*(cur - d2 + 2) == cur[2]) | ||
1361 | { | ||
1362 | } | ||
1363 | else if (d3 < mmm && *(cur - d3) == *cur) | ||
1364 | { | ||
1365 | distances[1] = d3 - 1; | ||
1366 | distances += 2; | ||
1367 | d2 = d3; | ||
1368 | } | ||
1369 | else | ||
1370 | break; | ||
1371 | } | ||
1372 | else if (d3 < mmm && *(cur - d3) == *cur) | ||
1373 | { | ||
1374 | distances[1] = d3 - 1; | ||
1375 | distances += 2; | ||
1376 | d2 = d3; | ||
1377 | } | ||
1378 | else | ||
1379 | break; | ||
1380 | |||
1381 | distances[-2] = 3; | ||
1382 | if (*(cur - d2 + 3) != cur[3]) | ||
1383 | break; | ||
1384 | UPDATE_maxLen | ||
1385 | distances[-2] = maxLen; | ||
1386 | if (maxLen == lenLimit) | ||
1387 | { | ||
1388 | p->son[p->cyclicBufferPos] = curMatch; | ||
1389 | MOVE_POS_RET; | ||
1390 | } | ||
1391 | break; | ||
1392 | } | ||
1393 | |||
1394 | GET_MATCHES_FOOTER_HC(maxLen); | ||
1395 | } | ||
1396 | |||
1397 | |||
1398 | UInt32* Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) | ||
1399 | { | ||
1400 | GET_MATCHES_HEADER(3) | ||
1401 | HASH_ZIP_CALC; | ||
1402 | curMatch = p->hash[hv]; | ||
1403 | p->hash[hv] = p->pos; | ||
1404 | GET_MATCHES_FOOTER_HC(2) | ||
1405 | } | ||
1406 | |||
1407 | |||
1408 | static void Bt2_MatchFinder_Skip(CMatchFinder *p, UInt32 num) | ||
1409 | { | ||
1410 | SKIP_HEADER(2) | ||
1411 | { | ||
1412 | HASH2_CALC; | ||
1413 | curMatch = p->hash[hv]; | ||
1414 | p->hash[hv] = p->pos; | ||
1415 | } | ||
1416 | SKIP_FOOTER | ||
1417 | } | ||
1418 | |||
1419 | void Bt3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num) | ||
1420 | { | ||
1421 | SKIP_HEADER(3) | ||
1422 | { | ||
1423 | HASH_ZIP_CALC; | ||
1424 | curMatch = p->hash[hv]; | ||
1425 | p->hash[hv] = p->pos; | ||
1426 | } | ||
1427 | SKIP_FOOTER | ||
1428 | } | ||
1429 | |||
1430 | static void Bt3_MatchFinder_Skip(CMatchFinder *p, UInt32 num) | ||
1431 | { | ||
1432 | SKIP_HEADER(3) | ||
1433 | { | ||
1434 | UInt32 h2; | ||
1435 | UInt32 *hash; | ||
1436 | HASH3_CALC; | ||
1437 | hash = p->hash; | ||
1438 | curMatch = (hash + kFix3HashSize)[hv]; | ||
1439 | hash[h2] = | ||
1440 | (hash + kFix3HashSize)[hv] = p->pos; | ||
1441 | } | ||
1442 | SKIP_FOOTER | ||
1443 | } | ||
1444 | |||
1445 | static void Bt4_MatchFinder_Skip(CMatchFinder *p, UInt32 num) | ||
1446 | { | ||
1447 | SKIP_HEADER(4) | ||
1448 | { | ||
1449 | UInt32 h2, h3; | ||
1450 | UInt32 *hash; | ||
1451 | HASH4_CALC; | ||
1452 | hash = p->hash; | ||
1453 | curMatch = (hash + kFix4HashSize)[hv]; | ||
1454 | hash [h2] = | ||
1455 | (hash + kFix3HashSize)[h3] = | ||
1456 | (hash + kFix4HashSize)[hv] = p->pos; | ||
1457 | } | ||
1458 | SKIP_FOOTER | ||
1459 | } | ||
1460 | |||
1461 | static void Bt5_MatchFinder_Skip(CMatchFinder *p, UInt32 num) | ||
1462 | { | ||
1463 | SKIP_HEADER(5) | ||
1464 | { | ||
1465 | UInt32 h2, h3; | ||
1466 | UInt32 *hash; | ||
1467 | HASH5_CALC; | ||
1468 | hash = p->hash; | ||
1469 | curMatch = (hash + kFix5HashSize)[hv]; | ||
1470 | hash [h2] = | ||
1471 | (hash + kFix3HashSize)[h3] = | ||
1472 | // (hash + kFix4HashSize)[h4] = | ||
1473 | (hash + kFix5HashSize)[hv] = p->pos; | ||
1474 | } | ||
1475 | SKIP_FOOTER | ||
1476 | } | ||
1477 | |||
1478 | |||
1479 | #define HC_SKIP_HEADER(minLen) \ | ||
1480 | do { if (p->lenLimit < minLen) { MatchFinder_MovePos(p); num--; continue; } { \ | ||
1481 | Byte *cur; \ | ||
1482 | UInt32 *hash; \ | ||
1483 | UInt32 *son; \ | ||
1484 | UInt32 pos = p->pos; \ | ||
1485 | UInt32 num2 = num; \ | ||
1486 | /* (p->pos == p->posLimit) is not allowed here !!! */ \ | ||
1487 | { const UInt32 rem = p->posLimit - pos; if (num2 > rem) num2 = rem; } \ | ||
1488 | num -= num2; \ | ||
1489 | { const UInt32 cycPos = p->cyclicBufferPos; \ | ||
1490 | son = p->son + cycPos; \ | ||
1491 | p->cyclicBufferPos = cycPos + num2; } \ | ||
1492 | cur = p->buffer; \ | ||
1493 | hash = p->hash; \ | ||
1494 | do { \ | ||
1495 | UInt32 curMatch; \ | ||
1496 | UInt32 hv; | ||
1497 | |||
1498 | |||
1499 | #define HC_SKIP_FOOTER \ | ||
1500 | cur++; pos++; *son++ = curMatch; \ | ||
1501 | } while (--num2); \ | ||
1502 | p->buffer = cur; \ | ||
1503 | p->pos = pos; \ | ||
1504 | if (pos == p->posLimit) MatchFinder_CheckLimits(p); \ | ||
1505 | }} while(num); \ | ||
1506 | |||
1507 | |||
1508 | static void Hc4_MatchFinder_Skip(CMatchFinder *p, UInt32 num) | ||
1509 | { | ||
1510 | HC_SKIP_HEADER(4) | ||
1511 | |||
1512 | UInt32 h2, h3; | ||
1513 | HASH4_CALC; | ||
1514 | curMatch = (hash + kFix4HashSize)[hv]; | ||
1515 | hash [h2] = | ||
1516 | (hash + kFix3HashSize)[h3] = | ||
1517 | (hash + kFix4HashSize)[hv] = pos; | ||
1518 | |||
1519 | HC_SKIP_FOOTER | ||
1520 | } | ||
1521 | |||
1522 | |||
1523 | static void Hc5_MatchFinder_Skip(CMatchFinder *p, UInt32 num) | ||
1524 | { | ||
1525 | HC_SKIP_HEADER(5) | ||
1526 | |||
1527 | UInt32 h2, h3; | ||
1528 | HASH5_CALC | ||
1529 | curMatch = (hash + kFix5HashSize)[hv]; | ||
1530 | hash [h2] = | ||
1531 | (hash + kFix3HashSize)[h3] = | ||
1532 | // (hash + kFix4HashSize)[h4] = | ||
1533 | (hash + kFix5HashSize)[hv] = pos; | ||
1534 | |||
1535 | HC_SKIP_FOOTER | ||
1536 | } | ||
1537 | |||
1538 | |||
1539 | void Hc3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num) | ||
1540 | { | ||
1541 | HC_SKIP_HEADER(3) | ||
1542 | |||
1543 | HASH_ZIP_CALC; | ||
1544 | curMatch = hash[hv]; | ||
1545 | hash[hv] = pos; | ||
1546 | |||
1547 | HC_SKIP_FOOTER | ||
1548 | } | ||
1549 | |||
1550 | |||
1551 | void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder2 *vTable) | ||
1552 | { | ||
1553 | vTable->Init = (Mf_Init_Func)MatchFinder_Init; | ||
1554 | vTable->GetNumAvailableBytes = (Mf_GetNumAvailableBytes_Func)MatchFinder_GetNumAvailableBytes; | ||
1555 | vTable->GetPointerToCurrentPos = (Mf_GetPointerToCurrentPos_Func)MatchFinder_GetPointerToCurrentPos; | ||
1556 | if (!p->btMode) | ||
1557 | { | ||
1558 | if (p->numHashBytes <= 4) | ||
1559 | { | ||
1560 | vTable->GetMatches = (Mf_GetMatches_Func)Hc4_MatchFinder_GetMatches; | ||
1561 | vTable->Skip = (Mf_Skip_Func)Hc4_MatchFinder_Skip; | ||
1562 | } | ||
1563 | else | ||
1564 | { | ||
1565 | vTable->GetMatches = (Mf_GetMatches_Func)Hc5_MatchFinder_GetMatches; | ||
1566 | vTable->Skip = (Mf_Skip_Func)Hc5_MatchFinder_Skip; | ||
1567 | } | ||
1568 | } | ||
1569 | else if (p->numHashBytes == 2) | ||
1570 | { | ||
1571 | vTable->GetMatches = (Mf_GetMatches_Func)Bt2_MatchFinder_GetMatches; | ||
1572 | vTable->Skip = (Mf_Skip_Func)Bt2_MatchFinder_Skip; | ||
1573 | } | ||
1574 | else if (p->numHashBytes == 3) | ||
1575 | { | ||
1576 | vTable->GetMatches = (Mf_GetMatches_Func)Bt3_MatchFinder_GetMatches; | ||
1577 | vTable->Skip = (Mf_Skip_Func)Bt3_MatchFinder_Skip; | ||
1578 | } | ||
1579 | else if (p->numHashBytes == 4) | ||
1580 | { | ||
1581 | vTable->GetMatches = (Mf_GetMatches_Func)Bt4_MatchFinder_GetMatches; | ||
1582 | vTable->Skip = (Mf_Skip_Func)Bt4_MatchFinder_Skip; | ||
1583 | } | ||
1584 | else | ||
1585 | { | ||
1586 | vTable->GetMatches = (Mf_GetMatches_Func)Bt5_MatchFinder_GetMatches; | ||
1587 | vTable->Skip = (Mf_Skip_Func)Bt5_MatchFinder_Skip; | ||
1588 | } | ||
1589 | } | ||
1590 | |||
1591 | |||
1592 | |||
1593 | void LzFindPrepare() | ||
1594 | { | ||
1595 | #ifndef FORCE_SATUR_SUB_128 | ||
1596 | #ifdef USE_SATUR_SUB_128 | ||
1597 | LZFIND_SATUR_SUB_CODE_FUNC f = NULL; | ||
1598 | #ifdef MY_CPU_ARM_OR_ARM64 | ||
1599 | { | ||
1600 | if (CPU_IsSupported_NEON()) | ||
1601 | { | ||
1602 | // #pragma message ("=== LzFind NEON") | ||
1603 | _PRF(printf("\n=== LzFind NEON\n")); | ||
1604 | f = LzFind_SaturSub_128; | ||
1605 | } | ||
1606 | // f = 0; // for debug | ||
1607 | } | ||
1608 | #else // MY_CPU_ARM_OR_ARM64 | ||
1609 | if (CPU_IsSupported_SSE41()) | ||
1610 | { | ||
1611 | // #pragma message ("=== LzFind SSE41") | ||
1612 | _PRF(printf("\n=== LzFind SSE41\n")); | ||
1613 | f = LzFind_SaturSub_128; | ||
1614 | |||
1615 | #ifdef USE_AVX2 | ||
1616 | if (CPU_IsSupported_AVX2()) | ||
1617 | { | ||
1618 | // #pragma message ("=== LzFind AVX2") | ||
1619 | _PRF(printf("\n=== LzFind AVX2\n")); | ||
1620 | f = LzFind_SaturSub_256; | ||
1621 | } | ||
1622 | #endif | ||
1623 | } | ||
1624 | #endif // MY_CPU_ARM_OR_ARM64 | ||
1625 | g_LzFind_SaturSub = f; | ||
1626 | #endif // USE_SATUR_SUB_128 | ||
1627 | #endif // FORCE_SATUR_SUB_128 | ||
1628 | } | ||