aboutsummaryrefslogtreecommitdiff
path: root/C/LzFind.c
diff options
context:
space:
mode:
authorIgor Pavlov <87184205+ip7z@users.noreply.github.com>2023-06-21 00:00:00 +0000
committerIgor Pavlov <87184205+ip7z@users.noreply.github.com>2023-12-17 14:59:19 +0500
commit5b39dc76f1bc82f941d5c800ab9f34407a06b53a (patch)
treefe5e17420300b715021a76328444088d32047963 /C/LzFind.c
parent93be7d4abfd4233228f58ee1fbbcd76d91be66a4 (diff)
download7zip-5b39dc76f1bc82f941d5c800ab9f34407a06b53a.tar.gz
7zip-5b39dc76f1bc82f941d5c800ab9f34407a06b53a.tar.bz2
7zip-5b39dc76f1bc82f941d5c800ab9f34407a06b53a.zip
23.0123.01
Diffstat (limited to 'C/LzFind.c')
-rw-r--r--C/LzFind.c519
1 files changed, 304 insertions, 215 deletions
diff --git a/C/LzFind.c b/C/LzFind.c
index 1b73c28..0fbd5aa 100644
--- a/C/LzFind.c
+++ b/C/LzFind.c
@@ -1,5 +1,5 @@
1/* LzFind.c -- Match finder for LZ algorithms 1/* LzFind.c -- Match finder for LZ algorithms
22021-11-29 : Igor Pavlov : Public domain */ 22023-03-14 : Igor Pavlov : Public domain */
3 3
4#include "Precomp.h" 4#include "Precomp.h"
5 5
@@ -17,7 +17,7 @@
17#define kEmptyHashValue 0 17#define kEmptyHashValue 0
18 18
19#define kMaxValForNormalize ((UInt32)0) 19#define kMaxValForNormalize ((UInt32)0)
20// #define kMaxValForNormalize ((UInt32)(1 << 20) + 0xFFF) // for debug 20// #define kMaxValForNormalize ((UInt32)(1 << 20) + 0xfff) // for debug
21 21
22// #define kNormalizeAlign (1 << 7) // alignment for speculated accesses 22// #define kNormalizeAlign (1 << 7) // alignment for speculated accesses
23 23
@@ -67,10 +67,10 @@
67 67
68static void LzInWindow_Free(CMatchFinder *p, ISzAllocPtr alloc) 68static void LzInWindow_Free(CMatchFinder *p, ISzAllocPtr alloc)
69{ 69{
70 if (!p->directInput) 70 // if (!p->directInput)
71 { 71 {
72 ISzAlloc_Free(alloc, p->bufferBase); 72 ISzAlloc_Free(alloc, p->bufBase);
73 p->bufferBase = NULL; 73 p->bufBase = NULL;
74 } 74 }
75} 75}
76 76
@@ -79,7 +79,7 @@ static int LzInWindow_Create2(CMatchFinder *p, UInt32 blockSize, ISzAllocPtr all
79{ 79{
80 if (blockSize == 0) 80 if (blockSize == 0)
81 return 0; 81 return 0;
82 if (!p->bufferBase || p->blockSize != blockSize) 82 if (!p->bufBase || p->blockSize != blockSize)
83 { 83 {
84 // size_t blockSizeT; 84 // size_t blockSizeT;
85 LzInWindow_Free(p, alloc); 85 LzInWindow_Free(p, alloc);
@@ -101,11 +101,11 @@ static int LzInWindow_Create2(CMatchFinder *p, UInt32 blockSize, ISzAllocPtr all
101 #endif 101 #endif
102 */ 102 */
103 103
104 p->bufferBase = (Byte *)ISzAlloc_Alloc(alloc, blockSize); 104 p->bufBase = (Byte *)ISzAlloc_Alloc(alloc, blockSize);
105 // printf("\nbufferBase = %p\n", p->bufferBase); 105 // printf("\nbufferBase = %p\n", p->bufBase);
106 // return 0; // for debug 106 // return 0; // for debug
107 } 107 }
108 return (p->bufferBase != NULL); 108 return (p->bufBase != NULL);
109} 109}
110 110
111static const Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p) { return p->buffer; } 111static const Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p) { return p->buffer; }
@@ -113,7 +113,7 @@ static const Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p) { return
113static UInt32 MatchFinder_GetNumAvailableBytes(CMatchFinder *p) { return GET_AVAIL_BYTES(p); } 113static UInt32 MatchFinder_GetNumAvailableBytes(CMatchFinder *p) { return GET_AVAIL_BYTES(p); }
114 114
115 115
116MY_NO_INLINE 116Z7_NO_INLINE
117static void MatchFinder_ReadBlock(CMatchFinder *p) 117static void MatchFinder_ReadBlock(CMatchFinder *p)
118{ 118{
119 if (p->streamEndWasReached || p->result != SZ_OK) 119 if (p->streamEndWasReached || p->result != SZ_OK)
@@ -127,8 +127,8 @@ static void MatchFinder_ReadBlock(CMatchFinder *p)
127 UInt32 curSize = 0xFFFFFFFF - GET_AVAIL_BYTES(p); 127 UInt32 curSize = 0xFFFFFFFF - GET_AVAIL_BYTES(p);
128 if (curSize > p->directInputRem) 128 if (curSize > p->directInputRem)
129 curSize = (UInt32)p->directInputRem; 129 curSize = (UInt32)p->directInputRem;
130 p->directInputRem -= curSize;
131 p->streamPos += curSize; 130 p->streamPos += curSize;
131 p->directInputRem -= curSize;
132 if (p->directInputRem == 0) 132 if (p->directInputRem == 0)
133 p->streamEndWasReached = 1; 133 p->streamEndWasReached = 1;
134 return; 134 return;
@@ -136,8 +136,8 @@ static void MatchFinder_ReadBlock(CMatchFinder *p)
136 136
137 for (;;) 137 for (;;)
138 { 138 {
139 Byte *dest = p->buffer + GET_AVAIL_BYTES(p); 139 const Byte *dest = p->buffer + GET_AVAIL_BYTES(p);
140 size_t size = (size_t)(p->bufferBase + p->blockSize - dest); 140 size_t size = (size_t)(p->bufBase + p->blockSize - dest);
141 if (size == 0) 141 if (size == 0)
142 { 142 {
143 /* we call ReadBlock() after NeedMove() and MoveBlock(). 143 /* we call ReadBlock() after NeedMove() and MoveBlock().
@@ -153,7 +153,14 @@ static void MatchFinder_ReadBlock(CMatchFinder *p)
153 // #define kRead 3 153 // #define kRead 3
154 // if (size > kRead) size = kRead; // for debug 154 // if (size > kRead) size = kRead; // for debug
155 155
156 p->result = ISeqInStream_Read(p->stream, dest, &size); 156 /*
157 // we need cast (Byte *)dest.
158 #ifdef __clang__
159 #pragma GCC diagnostic ignored "-Wcast-qual"
160 #endif
161 */
162 p->result = ISeqInStream_Read(p->stream,
163 p->bufBase + (dest - p->bufBase), &size);
157 if (p->result != SZ_OK) 164 if (p->result != SZ_OK)
158 return; 165 return;
159 if (size == 0) 166 if (size == 0)
@@ -173,14 +180,14 @@ static void MatchFinder_ReadBlock(CMatchFinder *p)
173 180
174 181
175 182
176MY_NO_INLINE 183Z7_NO_INLINE
177void MatchFinder_MoveBlock(CMatchFinder *p) 184void MatchFinder_MoveBlock(CMatchFinder *p)
178{ 185{
179 const size_t offset = (size_t)(p->buffer - p->bufferBase) - p->keepSizeBefore; 186 const size_t offset = (size_t)(p->buffer - p->bufBase) - p->keepSizeBefore;
180 const size_t keepBefore = (offset & (kBlockMoveAlign - 1)) + p->keepSizeBefore; 187 const size_t keepBefore = (offset & (kBlockMoveAlign - 1)) + p->keepSizeBefore;
181 p->buffer = p->bufferBase + keepBefore; 188 p->buffer = p->bufBase + keepBefore;
182 memmove(p->bufferBase, 189 memmove(p->bufBase,
183 p->bufferBase + (offset & ~((size_t)kBlockMoveAlign - 1)), 190 p->bufBase + (offset & ~((size_t)kBlockMoveAlign - 1)),
184 keepBefore + (size_t)GET_AVAIL_BYTES(p)); 191 keepBefore + (size_t)GET_AVAIL_BYTES(p));
185} 192}
186 193
@@ -198,7 +205,7 @@ int MatchFinder_NeedMove(CMatchFinder *p)
198 return 0; 205 return 0;
199 if (p->streamEndWasReached || p->result != SZ_OK) 206 if (p->streamEndWasReached || p->result != SZ_OK)
200 return 0; 207 return 0;
201 return ((size_t)(p->bufferBase + p->blockSize - p->buffer) <= p->keepSizeAfter); 208 return ((size_t)(p->bufBase + p->blockSize - p->buffer) <= p->keepSizeAfter);
202} 209}
203 210
204void MatchFinder_ReadIfRequired(CMatchFinder *p) 211void MatchFinder_ReadIfRequired(CMatchFinder *p)
@@ -214,6 +221,8 @@ static void MatchFinder_SetDefaultSettings(CMatchFinder *p)
214 p->cutValue = 32; 221 p->cutValue = 32;
215 p->btMode = 1; 222 p->btMode = 1;
216 p->numHashBytes = 4; 223 p->numHashBytes = 4;
224 p->numHashBytes_Min = 2;
225 p->numHashOutBits = 0;
217 p->bigHash = 0; 226 p->bigHash = 0;
218} 227}
219 228
@@ -222,8 +231,10 @@ static void MatchFinder_SetDefaultSettings(CMatchFinder *p)
222void MatchFinder_Construct(CMatchFinder *p) 231void MatchFinder_Construct(CMatchFinder *p)
223{ 232{
224 unsigned i; 233 unsigned i;
225 p->bufferBase = NULL; 234 p->buffer = NULL;
235 p->bufBase = NULL;
226 p->directInput = 0; 236 p->directInput = 0;
237 p->stream = NULL;
227 p->hash = NULL; 238 p->hash = NULL;
228 p->expectedDataSize = (UInt64)(Int64)-1; 239 p->expectedDataSize = (UInt64)(Int64)-1;
229 MatchFinder_SetDefaultSettings(p); 240 MatchFinder_SetDefaultSettings(p);
@@ -238,6 +249,8 @@ void MatchFinder_Construct(CMatchFinder *p)
238 } 249 }
239} 250}
240 251
252#undef kCrcPoly
253
241static void MatchFinder_FreeThisClassMemory(CMatchFinder *p, ISzAllocPtr alloc) 254static void MatchFinder_FreeThisClassMemory(CMatchFinder *p, ISzAllocPtr alloc)
242{ 255{
243 ISzAlloc_Free(alloc, p->hash); 256 ISzAlloc_Free(alloc, p->hash);
@@ -252,7 +265,7 @@ void MatchFinder_Free(CMatchFinder *p, ISzAllocPtr alloc)
252 265
253static CLzRef* AllocRefs(size_t num, ISzAllocPtr alloc) 266static CLzRef* AllocRefs(size_t num, ISzAllocPtr alloc)
254{ 267{
255 size_t sizeInBytes = (size_t)num * sizeof(CLzRef); 268 const size_t sizeInBytes = (size_t)num * sizeof(CLzRef);
256 if (sizeInBytes / sizeof(CLzRef) != num) 269 if (sizeInBytes / sizeof(CLzRef) != num)
257 return NULL; 270 return NULL;
258 return (CLzRef *)ISzAlloc_Alloc(alloc, sizeInBytes); 271 return (CLzRef *)ISzAlloc_Alloc(alloc, sizeInBytes);
@@ -298,6 +311,62 @@ static UInt32 GetBlockSize(CMatchFinder *p, UInt32 historySize)
298} 311}
299 312
300 313
314// input is historySize
315static UInt32 MatchFinder_GetHashMask2(CMatchFinder *p, UInt32 hs)
316{
317 if (p->numHashBytes == 2)
318 return (1 << 16) - 1;
319 if (hs != 0)
320 hs--;
321 hs |= (hs >> 1);
322 hs |= (hs >> 2);
323 hs |= (hs >> 4);
324 hs |= (hs >> 8);
325 // we propagated 16 bits in (hs). Low 16 bits must be set later
326 if (hs >= (1 << 24))
327 {
328 if (p->numHashBytes == 3)
329 hs = (1 << 24) - 1;
330 /* if (bigHash) mode, GetHeads4b() in LzFindMt.c needs (hs >= ((1 << 24) - 1))) */
331 }
332 // (hash_size >= (1 << 16)) : Required for (numHashBytes > 2)
333 hs |= (1 << 16) - 1; /* don't change it! */
334 // bt5: we adjust the size with recommended minimum size
335 if (p->numHashBytes >= 5)
336 hs |= (256 << kLzHash_CrcShift_2) - 1;
337 return hs;
338}
339
340// input is historySize
341static UInt32 MatchFinder_GetHashMask(CMatchFinder *p, UInt32 hs)
342{
343 if (p->numHashBytes == 2)
344 return (1 << 16) - 1;
345 if (hs != 0)
346 hs--;
347 hs |= (hs >> 1);
348 hs |= (hs >> 2);
349 hs |= (hs >> 4);
350 hs |= (hs >> 8);
351 // we propagated 16 bits in (hs). Low 16 bits must be set later
352 hs >>= 1;
353 if (hs >= (1 << 24))
354 {
355 if (p->numHashBytes == 3)
356 hs = (1 << 24) - 1;
357 else
358 hs >>= 1;
359 /* if (bigHash) mode, GetHeads4b() in LzFindMt.c needs (hs >= ((1 << 24) - 1))) */
360 }
361 // (hash_size >= (1 << 16)) : Required for (numHashBytes > 2)
362 hs |= (1 << 16) - 1; /* don't change it! */
363 // bt5: we adjust the size with recommended minimum size
364 if (p->numHashBytes >= 5)
365 hs |= (256 << kLzHash_CrcShift_2) - 1;
366 return hs;
367}
368
369
301int MatchFinder_Create(CMatchFinder *p, UInt32 historySize, 370int MatchFinder_Create(CMatchFinder *p, UInt32 historySize,
302 UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter, 371 UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter,
303 ISzAllocPtr alloc) 372 ISzAllocPtr alloc)
@@ -318,78 +387,91 @@ int MatchFinder_Create(CMatchFinder *p, UInt32 historySize,
318 p->blockSize = 0; 387 p->blockSize = 0;
319 if (p->directInput || LzInWindow_Create2(p, GetBlockSize(p, historySize), alloc)) 388 if (p->directInput || LzInWindow_Create2(p, GetBlockSize(p, historySize), alloc))
320 { 389 {
321 const UInt32 newCyclicBufferSize = historySize + 1; // do not change it 390 size_t hashSizeSum;
322 UInt32 hs;
323 p->matchMaxLen = matchMaxLen;
324 { 391 {
325 // UInt32 hs4; 392 UInt32 hs;
326 p->fixedHashSize = 0; 393 UInt32 hsCur;
327 hs = (1 << 16) - 1; 394
328 if (p->numHashBytes != 2) 395 if (p->numHashOutBits != 0)
329 { 396 {
330 hs = historySize; 397 unsigned numBits = p->numHashOutBits;
331 if (hs > p->expectedDataSize) 398 const unsigned nbMax =
332 hs = (UInt32)p->expectedDataSize; 399 (p->numHashBytes == 2 ? 16 :
333 if (hs != 0) 400 (p->numHashBytes == 3 ? 24 : 32));
334 hs--; 401 if (numBits > nbMax)
335 hs |= (hs >> 1); 402 numBits = nbMax;
336 hs |= (hs >> 2); 403 if (numBits >= 32)
337 hs |= (hs >> 4); 404 hs = (UInt32)0 - 1;
338 hs |= (hs >> 8); 405 else
339 // we propagated 16 bits in (hs). Low 16 bits must be set later 406 hs = ((UInt32)1 << numBits) - 1;
340 hs >>= 1;
341 if (hs >= (1 << 24))
342 {
343 if (p->numHashBytes == 3)
344 hs = (1 << 24) - 1;
345 else
346 hs >>= 1;
347 /* if (bigHash) mode, GetHeads4b() in LzFindMt.c needs (hs >= ((1 << 24) - 1))) */
348 }
349
350 // hs = ((UInt32)1 << 25) - 1; // for test
351
352 // (hash_size >= (1 << 16)) : Required for (numHashBytes > 2) 407 // (hash_size >= (1 << 16)) : Required for (numHashBytes > 2)
353 hs |= (1 << 16) - 1; /* don't change it! */ 408 hs |= (1 << 16) - 1; /* don't change it! */
354
355 // bt5: we adjust the size with recommended minimum size
356 if (p->numHashBytes >= 5) 409 if (p->numHashBytes >= 5)
357 hs |= (256 << kLzHash_CrcShift_2) - 1; 410 hs |= (256 << kLzHash_CrcShift_2) - 1;
411 {
412 const UInt32 hs2 = MatchFinder_GetHashMask2(p, historySize);
413 if (hs > hs2)
414 hs = hs2;
415 }
416 hsCur = hs;
417 if (p->expectedDataSize < historySize)
418 {
419 const UInt32 hs2 = MatchFinder_GetHashMask2(p, (UInt32)p->expectedDataSize);
420 if (hsCur > hs2)
421 hsCur = hs2;
422 }
358 } 423 }
359 p->hashMask = hs; 424 else
360 hs++; 425 {
361 426 hs = MatchFinder_GetHashMask(p, historySize);
362 /* 427 hsCur = hs;
363 hs4 = (1 << 20); 428 if (p->expectedDataSize < historySize)
364 if (hs4 > hs) 429 {
365 hs4 = hs; 430 hsCur = MatchFinder_GetHashMask(p, (UInt32)p->expectedDataSize);
366 // hs4 = (1 << 16); // for test 431 if (hsCur > hs) // is it possible?
367 p->hash4Mask = hs4 - 1; 432 hsCur = hs;
368 */ 433 }
434 }
435
436 p->hashMask = hsCur;
369 437
370 if (p->numHashBytes > 2) p->fixedHashSize += kHash2Size; 438 hashSizeSum = hs;
371 if (p->numHashBytes > 3) p->fixedHashSize += kHash3Size; 439 hashSizeSum++;
372 // if (p->numHashBytes > 4) p->fixedHashSize += hs4; // kHash4Size; 440 if (hashSizeSum < hs)
373 hs += p->fixedHashSize; 441 return 0;
442 {
443 UInt32 fixedHashSize = 0;
444 if (p->numHashBytes > 2 && p->numHashBytes_Min <= 2) fixedHashSize += kHash2Size;
445 if (p->numHashBytes > 3 && p->numHashBytes_Min <= 3) fixedHashSize += kHash3Size;
446 // if (p->numHashBytes > 4) p->fixedHashSize += hs4; // kHash4Size;
447 hashSizeSum += fixedHashSize;
448 p->fixedHashSize = fixedHashSize;
449 }
374 } 450 }
375 451
452 p->matchMaxLen = matchMaxLen;
453
376 { 454 {
377 size_t newSize; 455 size_t newSize;
378 size_t numSons; 456 size_t numSons;
457 const UInt32 newCyclicBufferSize = historySize + 1; // do not change it
379 p->historySize = historySize; 458 p->historySize = historySize;
380 p->hashSizeSum = hs;
381 p->cyclicBufferSize = newCyclicBufferSize; // it must be = (historySize + 1) 459 p->cyclicBufferSize = newCyclicBufferSize; // it must be = (historySize + 1)
382 460
383 numSons = newCyclicBufferSize; 461 numSons = newCyclicBufferSize;
384 if (p->btMode) 462 if (p->btMode)
385 numSons <<= 1; 463 numSons <<= 1;
386 newSize = hs + numSons; 464 newSize = hashSizeSum + numSons;
465
466 if (numSons < newCyclicBufferSize || newSize < numSons)
467 return 0;
387 468
388 // aligned size is not required here, but it can be better for some loops 469 // aligned size is not required here, but it can be better for some loops
389 #define NUM_REFS_ALIGN_MASK 0xF 470 #define NUM_REFS_ALIGN_MASK 0xF
390 newSize = (newSize + NUM_REFS_ALIGN_MASK) & ~(size_t)NUM_REFS_ALIGN_MASK; 471 newSize = (newSize + NUM_REFS_ALIGN_MASK) & ~(size_t)NUM_REFS_ALIGN_MASK;
391 472
392 if (p->hash && p->numRefs == newSize) 473 // 22.02: we don't reallocate buffer, if old size is enough
474 if (p->hash && p->numRefs >= newSize)
393 return 1; 475 return 1;
394 476
395 MatchFinder_FreeThisClassMemory(p, alloc); 477 MatchFinder_FreeThisClassMemory(p, alloc);
@@ -398,7 +480,7 @@ int MatchFinder_Create(CMatchFinder *p, UInt32 historySize,
398 480
399 if (p->hash) 481 if (p->hash)
400 { 482 {
401 p->son = p->hash + p->hashSizeSum; 483 p->son = p->hash + hashSizeSum;
402 return 1; 484 return 1;
403 } 485 }
404 } 486 }
@@ -470,7 +552,8 @@ void MatchFinder_Init_HighHash(CMatchFinder *p)
470 552
471void MatchFinder_Init_4(CMatchFinder *p) 553void MatchFinder_Init_4(CMatchFinder *p)
472{ 554{
473 p->buffer = p->bufferBase; 555 if (!p->directInput)
556 p->buffer = p->bufBase;
474 { 557 {
475 /* kEmptyHashValue = 0 (Zero) is used in hash tables as NO-VALUE marker. 558 /* kEmptyHashValue = 0 (Zero) is used in hash tables as NO-VALUE marker.
476 the code in CMatchFinderMt expects (pos = 1) */ 559 the code in CMatchFinderMt expects (pos = 1) */
@@ -507,20 +590,20 @@ void MatchFinder_Init(CMatchFinder *p)
507 590
508 591
509#ifdef MY_CPU_X86_OR_AMD64 592#ifdef MY_CPU_X86_OR_AMD64
510 #if defined(__clang__) && (__clang_major__ >= 8) \ 593 #if defined(__clang__) && (__clang_major__ >= 4) \
511 || defined(__GNUC__) && (__GNUC__ >= 8) \ 594 || defined(Z7_GCC_VERSION) && (Z7_GCC_VERSION >= 40701)
512 || defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1900) 595 // || defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1900)
513 #define USE_SATUR_SUB_128 596
514 #define USE_AVX2 597 #define USE_LZFIND_SATUR_SUB_128
515 #define ATTRIB_SSE41 __attribute__((__target__("sse4.1"))) 598 #define USE_LZFIND_SATUR_SUB_256
516 #define ATTRIB_AVX2 __attribute__((__target__("avx2"))) 599 #define LZFIND_ATTRIB_SSE41 __attribute__((__target__("sse4.1")))
600 #define LZFIND_ATTRIB_AVX2 __attribute__((__target__("avx2")))
517 #elif defined(_MSC_VER) 601 #elif defined(_MSC_VER)
518 #if (_MSC_VER >= 1600) 602 #if (_MSC_VER >= 1600)
519 #define USE_SATUR_SUB_128 603 #define USE_LZFIND_SATUR_SUB_128
520 #if (_MSC_VER >= 1900) 604 #endif
521 #define USE_AVX2 605 #if (_MSC_VER >= 1900)
522 #include <immintrin.h> // avx 606 #define USE_LZFIND_SATUR_SUB_256
523 #endif
524 #endif 607 #endif
525 #endif 608 #endif
526 609
@@ -529,16 +612,16 @@ void MatchFinder_Init(CMatchFinder *p)
529 612
530 #if defined(__clang__) && (__clang_major__ >= 8) \ 613 #if defined(__clang__) && (__clang_major__ >= 8) \
531 || defined(__GNUC__) && (__GNUC__ >= 8) 614 || defined(__GNUC__) && (__GNUC__ >= 8)
532 #define USE_SATUR_SUB_128 615 #define USE_LZFIND_SATUR_SUB_128
533 #ifdef MY_CPU_ARM64 616 #ifdef MY_CPU_ARM64
534 // #define ATTRIB_SSE41 __attribute__((__target__(""))) 617 // #define LZFIND_ATTRIB_SSE41 __attribute__((__target__("")))
535 #else 618 #else
536 // #define ATTRIB_SSE41 __attribute__((__target__("fpu=crypto-neon-fp-armv8"))) 619 // #define LZFIND_ATTRIB_SSE41 __attribute__((__target__("fpu=crypto-neon-fp-armv8")))
537 #endif 620 #endif
538 621
539 #elif defined(_MSC_VER) 622 #elif defined(_MSC_VER)
540 #if (_MSC_VER >= 1910) 623 #if (_MSC_VER >= 1910)
541 #define USE_SATUR_SUB_128 624 #define USE_LZFIND_SATUR_SUB_128
542 #endif 625 #endif
543 #endif 626 #endif
544 627
@@ -550,121 +633,130 @@ void MatchFinder_Init(CMatchFinder *p)
550 633
551#endif 634#endif
552 635
553/*
554#ifndef ATTRIB_SSE41
555 #define ATTRIB_SSE41
556#endif
557#ifndef ATTRIB_AVX2
558 #define ATTRIB_AVX2
559#endif
560*/
561 636
562#ifdef USE_SATUR_SUB_128 637#ifdef USE_LZFIND_SATUR_SUB_128
563 638
564// #define _SHOW_HW_STATUS 639// #define Z7_SHOW_HW_STATUS
565 640
566#ifdef _SHOW_HW_STATUS 641#ifdef Z7_SHOW_HW_STATUS
567#include <stdio.h> 642#include <stdio.h>
568#define _PRF(x) x 643#define PRF(x) x
569_PRF(;) 644PRF(;)
570#else 645#else
571#define _PRF(x) 646#define PRF(x)
572#endif 647#endif
573 648
649
574#ifdef MY_CPU_ARM_OR_ARM64 650#ifdef MY_CPU_ARM_OR_ARM64
575 651
576#ifdef MY_CPU_ARM64 652#ifdef MY_CPU_ARM64
577// #define FORCE_SATUR_SUB_128 653// #define FORCE_LZFIND_SATUR_SUB_128
578#endif 654#endif
655typedef uint32x4_t LzFind_v128;
656#define SASUB_128_V(v, s) \
657 vsubq_u32(vmaxq_u32(v, s), s)
579 658
580typedef uint32x4_t v128; 659#else // MY_CPU_ARM_OR_ARM64
581#define SASUB_128(i) \
582 *(v128 *)(void *)(items + (i) * 4) = \
583 vsubq_u32(vmaxq_u32(*(const v128 *)(const void *)(items + (i) * 4), sub2), sub2);
584
585#else
586 660
587#include <smmintrin.h> // sse4.1 661#include <smmintrin.h> // sse4.1
588 662
589typedef __m128i v128; 663typedef __m128i LzFind_v128;
590#define SASUB_128(i) \ 664// SSE 4.1
591 *(v128 *)(void *)(items + (i) * 4) = \ 665#define SASUB_128_V(v, s) \
592 _mm_sub_epi32(_mm_max_epu32(*(const v128 *)(const void *)(items + (i) * 4), sub2), sub2); // SSE 4.1 666 _mm_sub_epi32(_mm_max_epu32(v, s), s)
593 667
594#endif 668#endif // MY_CPU_ARM_OR_ARM64
595 669
596 670
671#define SASUB_128(i) \
672 *( LzFind_v128 *)( void *)(items + (i) * 4) = SASUB_128_V( \
673 *(const LzFind_v128 *)(const void *)(items + (i) * 4), sub2);
674
597 675
598MY_NO_INLINE 676Z7_NO_INLINE
599static 677static
600#ifdef ATTRIB_SSE41 678#ifdef LZFIND_ATTRIB_SSE41
601ATTRIB_SSE41 679LZFIND_ATTRIB_SSE41
602#endif 680#endif
603void 681void
604MY_FAST_CALL 682Z7_FASTCALL
605LzFind_SaturSub_128(UInt32 subValue, CLzRef *items, const CLzRef *lim) 683LzFind_SaturSub_128(UInt32 subValue, CLzRef *items, const CLzRef *lim)
606{ 684{
607 v128 sub2 = 685 const LzFind_v128 sub2 =
608 #ifdef MY_CPU_ARM_OR_ARM64 686 #ifdef MY_CPU_ARM_OR_ARM64
609 vdupq_n_u32(subValue); 687 vdupq_n_u32(subValue);
610 #else 688 #else
611 _mm_set_epi32((Int32)subValue, (Int32)subValue, (Int32)subValue, (Int32)subValue); 689 _mm_set_epi32((Int32)subValue, (Int32)subValue, (Int32)subValue, (Int32)subValue);
612 #endif 690 #endif
691 Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
613 do 692 do
614 { 693 {
615 SASUB_128(0) 694 SASUB_128(0) SASUB_128(1) items += 2 * 4;
616 SASUB_128(1) 695 SASUB_128(0) SASUB_128(1) items += 2 * 4;
617 SASUB_128(2)
618 SASUB_128(3)
619 items += 4 * 4;
620 } 696 }
621 while (items != lim); 697 while (items != lim);
622} 698}
623 699
624 700
625 701
626#ifdef USE_AVX2 702#ifdef USE_LZFIND_SATUR_SUB_256
627 703
628#include <immintrin.h> // avx 704#include <immintrin.h> // avx
705/*
706clang :immintrin.h uses
707#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
708 defined(__AVX2__)
709#include <avx2intrin.h>
710#endif
711so we need <avxintrin.h> for clang-cl */
629 712
630#define SASUB_256(i) *(__m256i *)(void *)(items + (i) * 8) = _mm256_sub_epi32(_mm256_max_epu32(*(const __m256i *)(const void *)(items + (i) * 8), sub2), sub2); // AVX2 713#if defined(__clang__)
714#include <avxintrin.h>
715#include <avx2intrin.h>
716#endif
631 717
632MY_NO_INLINE 718// AVX2:
719#define SASUB_256(i) \
720 *( __m256i *)( void *)(items + (i) * 8) = \
721 _mm256_sub_epi32(_mm256_max_epu32( \
722 *(const __m256i *)(const void *)(items + (i) * 8), sub2), sub2);
723
724Z7_NO_INLINE
633static 725static
634#ifdef ATTRIB_AVX2 726#ifdef LZFIND_ATTRIB_AVX2
635ATTRIB_AVX2 727LZFIND_ATTRIB_AVX2
636#endif 728#endif
637void 729void
638MY_FAST_CALL 730Z7_FASTCALL
639LzFind_SaturSub_256(UInt32 subValue, CLzRef *items, const CLzRef *lim) 731LzFind_SaturSub_256(UInt32 subValue, CLzRef *items, const CLzRef *lim)
640{ 732{
641 __m256i sub2 = _mm256_set_epi32( 733 const __m256i sub2 = _mm256_set_epi32(
642 (Int32)subValue, (Int32)subValue, (Int32)subValue, (Int32)subValue, 734 (Int32)subValue, (Int32)subValue, (Int32)subValue, (Int32)subValue,
643 (Int32)subValue, (Int32)subValue, (Int32)subValue, (Int32)subValue); 735 (Int32)subValue, (Int32)subValue, (Int32)subValue, (Int32)subValue);
736 Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
644 do 737 do
645 { 738 {
646 SASUB_256(0) 739 SASUB_256(0) SASUB_256(1) items += 2 * 8;
647 SASUB_256(1) 740 SASUB_256(0) SASUB_256(1) items += 2 * 8;
648 items += 2 * 8;
649 } 741 }
650 while (items != lim); 742 while (items != lim);
651} 743}
652#endif // USE_AVX2 744#endif // USE_LZFIND_SATUR_SUB_256
653 745
654#ifndef FORCE_SATUR_SUB_128 746#ifndef FORCE_LZFIND_SATUR_SUB_128
655typedef void (MY_FAST_CALL *LZFIND_SATUR_SUB_CODE_FUNC)( 747typedef void (Z7_FASTCALL *LZFIND_SATUR_SUB_CODE_FUNC)(
656 UInt32 subValue, CLzRef *items, const CLzRef *lim); 748 UInt32 subValue, CLzRef *items, const CLzRef *lim);
657static LZFIND_SATUR_SUB_CODE_FUNC g_LzFind_SaturSub; 749static LZFIND_SATUR_SUB_CODE_FUNC g_LzFind_SaturSub;
658#endif // FORCE_SATUR_SUB_128 750#endif // FORCE_LZFIND_SATUR_SUB_128
659 751
660#endif // USE_SATUR_SUB_128 752#endif // USE_LZFIND_SATUR_SUB_128
661 753
662 754
663// kEmptyHashValue must be zero 755// kEmptyHashValue must be zero
664// #define SASUB_32(i) v = items[i]; m = v - subValue; if (v < subValue) m = kEmptyHashValue; items[i] = m; 756// #define SASUB_32(i) { UInt32 v = items[i]; UInt32 m = v - subValue; if (v < subValue) m = kEmptyHashValue; items[i] = m; }
665#define SASUB_32(i) v = items[i]; if (v < subValue) v = subValue; items[i] = v - subValue; 757#define SASUB_32(i) { UInt32 v = items[i]; if (v < subValue) v = subValue; items[i] = v - subValue; }
666 758
667#ifdef FORCE_SATUR_SUB_128 759#ifdef FORCE_LZFIND_SATUR_SUB_128
668 760
669#define DEFAULT_SaturSub LzFind_SaturSub_128 761#define DEFAULT_SaturSub LzFind_SaturSub_128
670 762
@@ -672,24 +764,19 @@ static LZFIND_SATUR_SUB_CODE_FUNC g_LzFind_SaturSub;
672 764
673#define DEFAULT_SaturSub LzFind_SaturSub_32 765#define DEFAULT_SaturSub LzFind_SaturSub_32
674 766
675MY_NO_INLINE 767Z7_NO_INLINE
676static 768static
677void 769void
678MY_FAST_CALL 770Z7_FASTCALL
679LzFind_SaturSub_32(UInt32 subValue, CLzRef *items, const CLzRef *lim) 771LzFind_SaturSub_32(UInt32 subValue, CLzRef *items, const CLzRef *lim)
680{ 772{
773 Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
681 do 774 do
682 { 775 {
683 UInt32 v; 776 SASUB_32(0) SASUB_32(1) items += 2;
684 SASUB_32(0) 777 SASUB_32(0) SASUB_32(1) items += 2;
685 SASUB_32(1) 778 SASUB_32(0) SASUB_32(1) items += 2;
686 SASUB_32(2) 779 SASUB_32(0) SASUB_32(1) items += 2;
687 SASUB_32(3)
688 SASUB_32(4)
689 SASUB_32(5)
690 SASUB_32(6)
691 SASUB_32(7)
692 items += 8;
693 } 780 }
694 while (items != lim); 781 while (items != lim);
695} 782}
@@ -697,27 +784,23 @@ LzFind_SaturSub_32(UInt32 subValue, CLzRef *items, const CLzRef *lim)
697#endif 784#endif
698 785
699 786
700MY_NO_INLINE 787Z7_NO_INLINE
701void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, size_t numItems) 788void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, size_t numItems)
702{ 789{
703 #define K_NORM_ALIGN_BLOCK_SIZE (1 << 6) 790 #define LZFIND_NORM_ALIGN_BLOCK_SIZE (1 << 7)
704 791 Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
705 CLzRef *lim; 792 for (; numItems != 0 && ((unsigned)(ptrdiff_t)items & (LZFIND_NORM_ALIGN_BLOCK_SIZE - 1)) != 0; numItems--)
706
707 for (; numItems != 0 && ((unsigned)(ptrdiff_t)items & (K_NORM_ALIGN_BLOCK_SIZE - 1)) != 0; numItems--)
708 { 793 {
709 UInt32 v; 794 SASUB_32(0)
710 SASUB_32(0);
711 items++; 795 items++;
712 } 796 }
713
714 { 797 {
715 #define K_NORM_ALIGN_MASK (K_NORM_ALIGN_BLOCK_SIZE / 4 - 1) 798 const size_t k_Align_Mask = (LZFIND_NORM_ALIGN_BLOCK_SIZE / 4 - 1);
716 lim = items + (numItems & ~(size_t)K_NORM_ALIGN_MASK); 799 CLzRef *lim = items + (numItems & ~(size_t)k_Align_Mask);
717 numItems &= K_NORM_ALIGN_MASK; 800 numItems &= k_Align_Mask;
718 if (items != lim) 801 if (items != lim)
719 { 802 {
720 #if defined(USE_SATUR_SUB_128) && !defined(FORCE_SATUR_SUB_128) 803 #if defined(USE_LZFIND_SATUR_SUB_128) && !defined(FORCE_LZFIND_SATUR_SUB_128)
721 if (g_LzFind_SaturSub) 804 if (g_LzFind_SaturSub)
722 g_LzFind_SaturSub(subValue, items, lim); 805 g_LzFind_SaturSub(subValue, items, lim);
723 else 806 else
@@ -726,12 +809,10 @@ void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, size_t numItems)
726 } 809 }
727 items = lim; 810 items = lim;
728 } 811 }
729 812 Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
730
731 for (; numItems != 0; numItems--) 813 for (; numItems != 0; numItems--)
732 { 814 {
733 UInt32 v; 815 SASUB_32(0)
734 SASUB_32(0);
735 items++; 816 items++;
736 } 817 }
737} 818}
@@ -740,7 +821,7 @@ void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, size_t numItems)
740 821
741// call MatchFinder_CheckLimits() only after (p->pos++) update 822// call MatchFinder_CheckLimits() only after (p->pos++) update
742 823
743MY_NO_INLINE 824Z7_NO_INLINE
744static void MatchFinder_CheckLimits(CMatchFinder *p) 825static void MatchFinder_CheckLimits(CMatchFinder *p)
745{ 826{
746 if (// !p->streamEndWasReached && p->result == SZ_OK && 827 if (// !p->streamEndWasReached && p->result == SZ_OK &&
@@ -768,11 +849,14 @@ static void MatchFinder_CheckLimits(CMatchFinder *p)
768 const UInt32 subValue = (p->pos - p->historySize - 1) /* & ~(UInt32)(kNormalizeAlign - 1) */; 849 const UInt32 subValue = (p->pos - p->historySize - 1) /* & ~(UInt32)(kNormalizeAlign - 1) */;
769 // const UInt32 subValue = (1 << 15); // for debug 850 // const UInt32 subValue = (1 << 15); // for debug
770 // printf("\nMatchFinder_Normalize() subValue == 0x%x\n", subValue); 851 // printf("\nMatchFinder_Normalize() subValue == 0x%x\n", subValue);
771 size_t numSonRefs = p->cyclicBufferSize; 852 MatchFinder_REDUCE_OFFSETS(p, subValue)
772 if (p->btMode) 853 MatchFinder_Normalize3(subValue, p->hash, (size_t)p->hashMask + 1 + p->fixedHashSize);
773 numSonRefs <<= 1; 854 {
774 Inline_MatchFinder_ReduceOffsets(p, subValue); 855 size_t numSonRefs = p->cyclicBufferSize;
775 MatchFinder_Normalize3(subValue, p->hash, (size_t)p->hashSizeSum + numSonRefs); 856 if (p->btMode)
857 numSonRefs <<= 1;
858 MatchFinder_Normalize3(subValue, p->son, numSonRefs);
859 }
776 } 860 }
777 861
778 if (p->cyclicBufferPos == p->cyclicBufferSize) 862 if (p->cyclicBufferPos == p->cyclicBufferSize)
@@ -785,7 +869,7 @@ static void MatchFinder_CheckLimits(CMatchFinder *p)
785/* 869/*
786 (lenLimit > maxLen) 870 (lenLimit > maxLen)
787*/ 871*/
788MY_FORCE_INLINE 872Z7_FORCE_INLINE
789static UInt32 * Hc_GetMatchesSpec(size_t lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son, 873static UInt32 * Hc_GetMatchesSpec(size_t lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son,
790 size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue, 874 size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue,
791 UInt32 *d, unsigned maxLen) 875 UInt32 *d, unsigned maxLen)
@@ -867,7 +951,7 @@ static UInt32 * Hc_GetMatchesSpec(size_t lenLimit, UInt32 curMatch, UInt32 pos,
867} 951}
868 952
869 953
870MY_FORCE_INLINE 954Z7_FORCE_INLINE
871UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son, 955UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son,
872 size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue, 956 size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue,
873 UInt32 *d, UInt32 maxLen) 957 UInt32 *d, UInt32 maxLen)
@@ -1004,7 +1088,7 @@ static void SkipMatchesSpec(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const
1004 1088
1005#define MOVE_POS_RET MOVE_POS return distances; 1089#define MOVE_POS_RET MOVE_POS return distances;
1006 1090
1007MY_NO_INLINE 1091Z7_NO_INLINE
1008static void MatchFinder_MovePos(CMatchFinder *p) 1092static void MatchFinder_MovePos(CMatchFinder *p)
1009{ 1093{
1010 /* we go here at the end of stream data, when (avail < num_hash_bytes) 1094 /* we go here at the end of stream data, when (avail < num_hash_bytes)
@@ -1015,11 +1099,11 @@ static void MatchFinder_MovePos(CMatchFinder *p)
1015 if (p->btMode) 1099 if (p->btMode)
1016 p->sons[(p->cyclicBufferPos << p->btMode) + 1] = 0; // kEmptyHashValue 1100 p->sons[(p->cyclicBufferPos << p->btMode) + 1] = 0; // kEmptyHashValue
1017 */ 1101 */
1018 MOVE_POS; 1102 MOVE_POS
1019} 1103}
1020 1104
1021#define GET_MATCHES_HEADER2(minLen, ret_op) \ 1105#define GET_MATCHES_HEADER2(minLen, ret_op) \
1022 unsigned lenLimit; UInt32 hv; Byte *cur; UInt32 curMatch; \ 1106 unsigned lenLimit; UInt32 hv; const Byte *cur; UInt32 curMatch; \
1023 lenLimit = (unsigned)p->lenLimit; { if (lenLimit < minLen) { MatchFinder_MovePos(p); ret_op; }} \ 1107 lenLimit = (unsigned)p->lenLimit; { if (lenLimit < minLen) { MatchFinder_MovePos(p); ret_op; }} \
1024 cur = p->buffer; 1108 cur = p->buffer;
1025 1109
@@ -1028,11 +1112,11 @@ static void MatchFinder_MovePos(CMatchFinder *p)
1028 1112
1029#define MF_PARAMS(p) lenLimit, curMatch, p->pos, p->buffer, p->son, p->cyclicBufferPos, p->cyclicBufferSize, p->cutValue 1113#define MF_PARAMS(p) lenLimit, curMatch, p->pos, p->buffer, p->son, p->cyclicBufferPos, p->cyclicBufferSize, p->cutValue
1030 1114
1031#define SKIP_FOOTER SkipMatchesSpec(MF_PARAMS(p)); MOVE_POS; } while (--num); 1115#define SKIP_FOOTER SkipMatchesSpec(MF_PARAMS(p)); MOVE_POS } while (--num);
1032 1116
1033#define GET_MATCHES_FOOTER_BASE(_maxLen_, func) \ 1117#define GET_MATCHES_FOOTER_BASE(_maxLen_, func) \
1034 distances = func(MF_PARAMS(p), \ 1118 distances = func(MF_PARAMS(p), \
1035 distances, (UInt32)_maxLen_); MOVE_POS_RET; 1119 distances, (UInt32)_maxLen_); MOVE_POS_RET
1036 1120
1037#define GET_MATCHES_FOOTER_BT(_maxLen_) \ 1121#define GET_MATCHES_FOOTER_BT(_maxLen_) \
1038 GET_MATCHES_FOOTER_BASE(_maxLen_, GetMatchesSpec1) 1122 GET_MATCHES_FOOTER_BASE(_maxLen_, GetMatchesSpec1)
@@ -1052,7 +1136,7 @@ static void MatchFinder_MovePos(CMatchFinder *p)
1052static UInt32* Bt2_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) 1136static UInt32* Bt2_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
1053{ 1137{
1054 GET_MATCHES_HEADER(2) 1138 GET_MATCHES_HEADER(2)
1055 HASH2_CALC; 1139 HASH2_CALC
1056 curMatch = p->hash[hv]; 1140 curMatch = p->hash[hv];
1057 p->hash[hv] = p->pos; 1141 p->hash[hv] = p->pos;
1058 GET_MATCHES_FOOTER_BT(1) 1142 GET_MATCHES_FOOTER_BT(1)
@@ -1061,7 +1145,7 @@ static UInt32* Bt2_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
1061UInt32* Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) 1145UInt32* Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
1062{ 1146{
1063 GET_MATCHES_HEADER(3) 1147 GET_MATCHES_HEADER(3)
1064 HASH_ZIP_CALC; 1148 HASH_ZIP_CALC
1065 curMatch = p->hash[hv]; 1149 curMatch = p->hash[hv];
1066 p->hash[hv] = p->pos; 1150 p->hash[hv] = p->pos;
1067 GET_MATCHES_FOOTER_BT(2) 1151 GET_MATCHES_FOOTER_BT(2)
@@ -1082,7 +1166,7 @@ static UInt32* Bt3_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
1082 UInt32 *hash; 1166 UInt32 *hash;
1083 GET_MATCHES_HEADER(3) 1167 GET_MATCHES_HEADER(3)
1084 1168
1085 HASH3_CALC; 1169 HASH3_CALC
1086 1170
1087 hash = p->hash; 1171 hash = p->hash;
1088 pos = p->pos; 1172 pos = p->pos;
@@ -1107,7 +1191,7 @@ static UInt32* Bt3_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
1107 if (maxLen == lenLimit) 1191 if (maxLen == lenLimit)
1108 { 1192 {
1109 SkipMatchesSpec(MF_PARAMS(p)); 1193 SkipMatchesSpec(MF_PARAMS(p));
1110 MOVE_POS_RET; 1194 MOVE_POS_RET
1111 } 1195 }
1112 } 1196 }
1113 1197
@@ -1123,7 +1207,7 @@ static UInt32* Bt4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
1123 UInt32 *hash; 1207 UInt32 *hash;
1124 GET_MATCHES_HEADER(4) 1208 GET_MATCHES_HEADER(4)
1125 1209
1126 HASH4_CALC; 1210 HASH4_CALC
1127 1211
1128 hash = p->hash; 1212 hash = p->hash;
1129 pos = p->pos; 1213 pos = p->pos;
@@ -1190,7 +1274,7 @@ static UInt32* Bt5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
1190 UInt32 *hash; 1274 UInt32 *hash;
1191 GET_MATCHES_HEADER(5) 1275 GET_MATCHES_HEADER(5)
1192 1276
1193 HASH5_CALC; 1277 HASH5_CALC
1194 1278
1195 hash = p->hash; 1279 hash = p->hash;
1196 pos = p->pos; 1280 pos = p->pos;
@@ -1246,7 +1330,7 @@ static UInt32* Bt5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
1246 if (maxLen == lenLimit) 1330 if (maxLen == lenLimit)
1247 { 1331 {
1248 SkipMatchesSpec(MF_PARAMS(p)); 1332 SkipMatchesSpec(MF_PARAMS(p));
1249 MOVE_POS_RET; 1333 MOVE_POS_RET
1250 } 1334 }
1251 break; 1335 break;
1252 } 1336 }
@@ -1263,7 +1347,7 @@ static UInt32* Hc4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
1263 UInt32 *hash; 1347 UInt32 *hash;
1264 GET_MATCHES_HEADER(4) 1348 GET_MATCHES_HEADER(4)
1265 1349
1266 HASH4_CALC; 1350 HASH4_CALC
1267 1351
1268 hash = p->hash; 1352 hash = p->hash;
1269 pos = p->pos; 1353 pos = p->pos;
@@ -1314,12 +1398,12 @@ static UInt32* Hc4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
1314 if (maxLen == lenLimit) 1398 if (maxLen == lenLimit)
1315 { 1399 {
1316 p->son[p->cyclicBufferPos] = curMatch; 1400 p->son[p->cyclicBufferPos] = curMatch;
1317 MOVE_POS_RET; 1401 MOVE_POS_RET
1318 } 1402 }
1319 break; 1403 break;
1320 } 1404 }
1321 1405
1322 GET_MATCHES_FOOTER_HC(maxLen); 1406 GET_MATCHES_FOOTER_HC(maxLen)
1323} 1407}
1324 1408
1325 1409
@@ -1330,7 +1414,7 @@ static UInt32 * Hc5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
1330 UInt32 *hash; 1414 UInt32 *hash;
1331 GET_MATCHES_HEADER(5) 1415 GET_MATCHES_HEADER(5)
1332 1416
1333 HASH5_CALC; 1417 HASH5_CALC
1334 1418
1335 hash = p->hash; 1419 hash = p->hash;
1336 pos = p->pos; 1420 pos = p->pos;
@@ -1386,19 +1470,19 @@ static UInt32 * Hc5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
1386 if (maxLen == lenLimit) 1470 if (maxLen == lenLimit)
1387 { 1471 {
1388 p->son[p->cyclicBufferPos] = curMatch; 1472 p->son[p->cyclicBufferPos] = curMatch;
1389 MOVE_POS_RET; 1473 MOVE_POS_RET
1390 } 1474 }
1391 break; 1475 break;
1392 } 1476 }
1393 1477
1394 GET_MATCHES_FOOTER_HC(maxLen); 1478 GET_MATCHES_FOOTER_HC(maxLen)
1395} 1479}
1396 1480
1397 1481
1398UInt32* Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) 1482UInt32* Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
1399{ 1483{
1400 GET_MATCHES_HEADER(3) 1484 GET_MATCHES_HEADER(3)
1401 HASH_ZIP_CALC; 1485 HASH_ZIP_CALC
1402 curMatch = p->hash[hv]; 1486 curMatch = p->hash[hv];
1403 p->hash[hv] = p->pos; 1487 p->hash[hv] = p->pos;
1404 GET_MATCHES_FOOTER_HC(2) 1488 GET_MATCHES_FOOTER_HC(2)
@@ -1409,7 +1493,7 @@ static void Bt2_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
1409{ 1493{
1410 SKIP_HEADER(2) 1494 SKIP_HEADER(2)
1411 { 1495 {
1412 HASH2_CALC; 1496 HASH2_CALC
1413 curMatch = p->hash[hv]; 1497 curMatch = p->hash[hv];
1414 p->hash[hv] = p->pos; 1498 p->hash[hv] = p->pos;
1415 } 1499 }
@@ -1420,7 +1504,7 @@ void Bt3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
1420{ 1504{
1421 SKIP_HEADER(3) 1505 SKIP_HEADER(3)
1422 { 1506 {
1423 HASH_ZIP_CALC; 1507 HASH_ZIP_CALC
1424 curMatch = p->hash[hv]; 1508 curMatch = p->hash[hv];
1425 p->hash[hv] = p->pos; 1509 p->hash[hv] = p->pos;
1426 } 1510 }
@@ -1433,7 +1517,7 @@ static void Bt3_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
1433 { 1517 {
1434 UInt32 h2; 1518 UInt32 h2;
1435 UInt32 *hash; 1519 UInt32 *hash;
1436 HASH3_CALC; 1520 HASH3_CALC
1437 hash = p->hash; 1521 hash = p->hash;
1438 curMatch = (hash + kFix3HashSize)[hv]; 1522 curMatch = (hash + kFix3HashSize)[hv];
1439 hash[h2] = 1523 hash[h2] =
@@ -1448,7 +1532,7 @@ static void Bt4_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
1448 { 1532 {
1449 UInt32 h2, h3; 1533 UInt32 h2, h3;
1450 UInt32 *hash; 1534 UInt32 *hash;
1451 HASH4_CALC; 1535 HASH4_CALC
1452 hash = p->hash; 1536 hash = p->hash;
1453 curMatch = (hash + kFix4HashSize)[hv]; 1537 curMatch = (hash + kFix4HashSize)[hv];
1454 hash [h2] = 1538 hash [h2] =
@@ -1464,7 +1548,7 @@ static void Bt5_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
1464 { 1548 {
1465 UInt32 h2, h3; 1549 UInt32 h2, h3;
1466 UInt32 *hash; 1550 UInt32 *hash;
1467 HASH5_CALC; 1551 HASH5_CALC
1468 hash = p->hash; 1552 hash = p->hash;
1469 curMatch = (hash + kFix5HashSize)[hv]; 1553 curMatch = (hash + kFix5HashSize)[hv];
1470 hash [h2] = 1554 hash [h2] =
@@ -1478,7 +1562,7 @@ static void Bt5_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
1478 1562
1479#define HC_SKIP_HEADER(minLen) \ 1563#define HC_SKIP_HEADER(minLen) \
1480 do { if (p->lenLimit < minLen) { MatchFinder_MovePos(p); num--; continue; } { \ 1564 do { if (p->lenLimit < minLen) { MatchFinder_MovePos(p); num--; continue; } { \
1481 Byte *cur; \ 1565 const Byte *cur; \
1482 UInt32 *hash; \ 1566 UInt32 *hash; \
1483 UInt32 *son; \ 1567 UInt32 *son; \
1484 UInt32 pos = p->pos; \ 1568 UInt32 pos = p->pos; \
@@ -1510,7 +1594,7 @@ static void Hc4_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
1510 HC_SKIP_HEADER(4) 1594 HC_SKIP_HEADER(4)
1511 1595
1512 UInt32 h2, h3; 1596 UInt32 h2, h3;
1513 HASH4_CALC; 1597 HASH4_CALC
1514 curMatch = (hash + kFix4HashSize)[hv]; 1598 curMatch = (hash + kFix4HashSize)[hv];
1515 hash [h2] = 1599 hash [h2] =
1516 (hash + kFix3HashSize)[h3] = 1600 (hash + kFix3HashSize)[h3] =
@@ -1540,7 +1624,7 @@ void Hc3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
1540{ 1624{
1541 HC_SKIP_HEADER(3) 1625 HC_SKIP_HEADER(3)
1542 1626
1543 HASH_ZIP_CALC; 1627 HASH_ZIP_CALC
1544 curMatch = hash[hv]; 1628 curMatch = hash[hv];
1545 hash[hv] = pos; 1629 hash[hv] = pos;
1546 1630
@@ -1590,17 +1674,17 @@ void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder2 *vTable)
1590 1674
1591 1675
1592 1676
1593void LzFindPrepare() 1677void LzFindPrepare(void)
1594{ 1678{
1595 #ifndef FORCE_SATUR_SUB_128 1679 #ifndef FORCE_LZFIND_SATUR_SUB_128
1596 #ifdef USE_SATUR_SUB_128 1680 #ifdef USE_LZFIND_SATUR_SUB_128
1597 LZFIND_SATUR_SUB_CODE_FUNC f = NULL; 1681 LZFIND_SATUR_SUB_CODE_FUNC f = NULL;
1598 #ifdef MY_CPU_ARM_OR_ARM64 1682 #ifdef MY_CPU_ARM_OR_ARM64
1599 { 1683 {
1600 if (CPU_IsSupported_NEON()) 1684 if (CPU_IsSupported_NEON())
1601 { 1685 {
1602 // #pragma message ("=== LzFind NEON") 1686 // #pragma message ("=== LzFind NEON")
1603 _PRF(printf("\n=== LzFind NEON\n")); 1687 PRF(printf("\n=== LzFind NEON\n"));
1604 f = LzFind_SaturSub_128; 1688 f = LzFind_SaturSub_128;
1605 } 1689 }
1606 // f = 0; // for debug 1690 // f = 0; // for debug
@@ -1609,20 +1693,25 @@ void LzFindPrepare()
1609 if (CPU_IsSupported_SSE41()) 1693 if (CPU_IsSupported_SSE41())
1610 { 1694 {
1611 // #pragma message ("=== LzFind SSE41") 1695 // #pragma message ("=== LzFind SSE41")
1612 _PRF(printf("\n=== LzFind SSE41\n")); 1696 PRF(printf("\n=== LzFind SSE41\n"));
1613 f = LzFind_SaturSub_128; 1697 f = LzFind_SaturSub_128;
1614 1698
1615 #ifdef USE_AVX2 1699 #ifdef USE_LZFIND_SATUR_SUB_256
1616 if (CPU_IsSupported_AVX2()) 1700 if (CPU_IsSupported_AVX2())
1617 { 1701 {
1618 // #pragma message ("=== LzFind AVX2") 1702 // #pragma message ("=== LzFind AVX2")
1619 _PRF(printf("\n=== LzFind AVX2\n")); 1703 PRF(printf("\n=== LzFind AVX2\n"));
1620 f = LzFind_SaturSub_256; 1704 f = LzFind_SaturSub_256;
1621 } 1705 }
1622 #endif 1706 #endif
1623 } 1707 }
1624 #endif // MY_CPU_ARM_OR_ARM64 1708 #endif // MY_CPU_ARM_OR_ARM64
1625 g_LzFind_SaturSub = f; 1709 g_LzFind_SaturSub = f;
1626 #endif // USE_SATUR_SUB_128 1710 #endif // USE_LZFIND_SATUR_SUB_128
1627 #endif // FORCE_SATUR_SUB_128 1711 #endif // FORCE_LZFIND_SATUR_SUB_128
1628} 1712}
1713
1714
1715#undef MOVE_POS
1716#undef MOVE_POS_RET
1717#undef PRF