aboutsummaryrefslogtreecommitdiff
path: root/C/ZstdDec.c
diff options
context:
space:
mode:
Diffstat (limited to 'C/ZstdDec.c')
-rw-r--r--C/ZstdDec.c4064
1 files changed, 4064 insertions, 0 deletions
diff --git a/C/ZstdDec.c b/C/ZstdDec.c
new file mode 100644
index 0000000..ecf6d22
--- /dev/null
+++ b/C/ZstdDec.c
@@ -0,0 +1,4064 @@
1/* ZstdDec.c -- Zstd Decoder
22024-01-21 : the code was developed by Igor Pavlov, using Zstandard format
3 specification and original zstd decoder code as reference code.
4original zstd decoder code: Copyright (c) Facebook, Inc. All rights reserved.
5This source code is licensed under BSD 3-Clause License.
6*/
7
8#include "Precomp.h"
9
10#include <string.h>
11#include <stdlib.h>
12// #include <stdio.h>
13
14#include "Alloc.h"
15#include "Xxh64.h"
16#include "ZstdDec.h"
17#include "CpuArch.h"
18
19#if defined(MY_CPU_ARM64)
20#include <arm_neon.h>
21#endif
22
23/* original-zstd still doesn't support window larger than 2 GiB.
24 So we also limit our decoder for 2 GiB window: */
25#if defined(MY_CPU_64BIT) && 0 == 1
26 #define MAX_WINDOW_SIZE_LOG 41
27#else
28 #define MAX_WINDOW_SIZE_LOG 31
29#endif
30
31typedef
32 #if MAX_WINDOW_SIZE_LOG < 32
33 UInt32
34 #else
35 size_t
36 #endif
37 CZstdDecOffset;
38
39// for debug: simpler and smaller code but slow:
40// #define Z7_ZSTD_DEC_USE_HUF_STREAM1_ALWAYS
41
42// #define SHOW_STAT
43#ifdef SHOW_STAT
44#include <stdio.h>
45static unsigned g_Num_Blocks_Compressed = 0;
46static unsigned g_Num_Blocks_memcpy = 0;
47static unsigned g_Num_Wrap_memmove_Num = 0;
48static unsigned g_Num_Wrap_memmove_Bytes = 0;
49static unsigned g_NumSeqs_total = 0;
50// static unsigned g_NumCopy = 0;
51static unsigned g_NumOver = 0;
52static unsigned g_NumOver2 = 0;
53static unsigned g_Num_Match = 0;
54static unsigned g_Num_Lits = 0;
55static unsigned g_Num_LitsBig = 0;
56static unsigned g_Num_Lit0 = 0;
57static unsigned g_Num_Rep0 = 0;
58static unsigned g_Num_Rep1 = 0;
59static unsigned g_Num_Rep2 = 0;
60static unsigned g_Num_Rep3 = 0;
61static unsigned g_Num_Threshold_0 = 0;
62static unsigned g_Num_Threshold_1 = 0;
63static unsigned g_Num_Threshold_0sum = 0;
64static unsigned g_Num_Threshold_1sum = 0;
65#define STAT_UPDATE(v) v
66#else
67#define STAT_UPDATE(v)
68#endif
69#define STAT_INC(v) STAT_UPDATE(v++;)
70
71
72typedef struct
73{
74 const Byte *ptr;
75 size_t len;
76}
77CInBufPair;
78
79
80#if defined(MY_CPU_ARM_OR_ARM64) || defined(MY_CPU_X86_OR_AMD64)
81 #if (defined(__clang__) && (__clang_major__ >= 6)) \
82 || (defined(__GNUC__) && (__GNUC__ >= 6))
83 // disable for debug:
84 #define Z7_ZSTD_DEC_USE_BSR
85 #elif defined(_MSC_VER) && (_MSC_VER >= 1300)
86 // #if defined(MY_CPU_ARM_OR_ARM64)
87 #if (_MSC_VER >= 1600)
88 #include <intrin.h>
89 #endif
90 // disable for debug:
91 #define Z7_ZSTD_DEC_USE_BSR
92 #endif
93#endif
94
95#ifdef Z7_ZSTD_DEC_USE_BSR
96 #if defined(__clang__) || defined(__GNUC__)
97 #define MY_clz(x) ((unsigned)__builtin_clz((UInt32)x))
98 #else // #if defined(_MSC_VER)
99 #ifdef MY_CPU_ARM_OR_ARM64
100 #define MY_clz _CountLeadingZeros
101 #endif // MY_CPU_X86_OR_AMD64
102 #endif // _MSC_VER
103#elif !defined(Z7_ZSTD_DEC_USE_LOG_TABLE)
104 #define Z7_ZSTD_DEC_USE_LOG_TABLE
105#endif
106
107
108static
109Z7_FORCE_INLINE
110unsigned GetHighestSetBit_32_nonzero_big(UInt32 num)
111{
112 // (num != 0)
113 #ifdef MY_clz
114 return 31 - MY_clz(num);
115 #elif defined(Z7_ZSTD_DEC_USE_BSR)
116 {
117 unsigned long zz;
118 _BitScanReverse(&zz, num);
119 return zz;
120 }
121 #else
122 {
123 int i = -1;
124 for (;;)
125 {
126 i++;
127 num >>= 1;
128 if (num == 0)
129 return (unsigned)i;
130 }
131 }
132 #endif
133}
134
135#ifdef Z7_ZSTD_DEC_USE_LOG_TABLE
136
137#define R1(a) a, a
138#define R2(a) R1(a), R1(a)
139#define R3(a) R2(a), R2(a)
140#define R4(a) R3(a), R3(a)
141#define R5(a) R4(a), R4(a)
142#define R6(a) R5(a), R5(a)
143#define R7(a) R6(a), R6(a)
144#define R8(a) R7(a), R7(a)
145#define R9(a) R8(a), R8(a)
146
147#define Z7_ZSTD_FSE_MAX_ACCURACY 9
148// states[] values in FSE_Generate() can use (Z7_ZSTD_FSE_MAX_ACCURACY + 1) bits.
149static const Byte k_zstd_LogTable[2 << Z7_ZSTD_FSE_MAX_ACCURACY] =
150{
151 R1(0), R1(1), R2(2), R3(3), R4(4), R5(5), R6(6), R7(7), R8(8), R9(9)
152};
153
154#define GetHighestSetBit_32_nonzero_small(num) (k_zstd_LogTable[num])
155#else
156#define GetHighestSetBit_32_nonzero_small GetHighestSetBit_32_nonzero_big
157#endif
158
159
160#ifdef MY_clz
161 #define UPDATE_BIT_OFFSET_FOR_PADDING(b, bitOffset) \
162 bitOffset -= (CBitCtr)(MY_clz(b) - 23);
163#elif defined(Z7_ZSTD_DEC_USE_BSR)
164 #define UPDATE_BIT_OFFSET_FOR_PADDING(b, bitOffset) \
165 { unsigned long zz; _BitScanReverse(&zz, b); bitOffset -= 8; bitOffset += zz; }
166#else
167 #define UPDATE_BIT_OFFSET_FOR_PADDING(b, bitOffset) \
168 for (;;) { bitOffset--; if (b & 0x80) { break; } b <<= 1; }
169#endif
170
171#define SET_bitOffset_TO_PAD(bitOffset, src, srcLen) \
172{ \
173 unsigned lastByte = (src)[(size_t)(srcLen) - 1]; \
174 if (lastByte == 0) return SZ_ERROR_DATA; \
175 bitOffset = (CBitCtr)((srcLen) * 8); \
176 UPDATE_BIT_OFFSET_FOR_PADDING(lastByte, bitOffset) \
177}
178
179#ifndef Z7_ZSTD_DEC_USE_HUF_STREAM1_ALWAYS
180
181#define SET_bitOffset_TO_PAD_and_SET_BIT_SIZE(bitOffset, src, srcLen_res) \
182{ \
183 unsigned lastByte = (src)[(size_t)(srcLen_res) - 1]; \
184 if (lastByte == 0) return SZ_ERROR_DATA; \
185 srcLen_res *= 8; \
186 bitOffset = (CBitCtr)srcLen_res; \
187 UPDATE_BIT_OFFSET_FOR_PADDING(lastByte, bitOffset) \
188}
189
190#endif
191
192/*
193typedef Int32 CBitCtr_signed;
194typedef Int32 CBitCtr;
195*/
196// /*
197typedef ptrdiff_t CBitCtr_signed;
198typedef ptrdiff_t CBitCtr;
199// */
200
201
202#define MATCH_LEN_MIN 3
203#define kBlockSizeMax (1u << 17)
204
205// #define Z7_ZSTD_DEC_PRINT_TABLE
206
207#ifdef Z7_ZSTD_DEC_PRINT_TABLE
208#define NUM_OFFSET_SYMBOLS_PREDEF 29
209#endif
210#define NUM_OFFSET_SYMBOLS_MAX (MAX_WINDOW_SIZE_LOG + 1) // 32
211#define NUM_LL_SYMBOLS 36
212#define NUM_ML_SYMBOLS 53
213#define FSE_NUM_SYMBOLS_MAX 53 // NUM_ML_SYMBOLS
214
215// /*
216#if !defined(MY_CPU_X86) || defined(__PIC__) || defined(MY_CPU_64BIT)
217#define Z7_ZSTD_DEC_USE_BASES_IN_OBJECT
218#endif
219// */
220// for debug:
221// #define Z7_ZSTD_DEC_USE_BASES_LOCAL
222// #define Z7_ZSTD_DEC_USE_BASES_IN_OBJECT
223
224#define GLOBAL_TABLE(n) k_ ## n
225
226#if defined(Z7_ZSTD_DEC_USE_BASES_LOCAL)
227 #define BASES_TABLE(n) a_ ## n
228#elif defined(Z7_ZSTD_DEC_USE_BASES_IN_OBJECT)
229 #define BASES_TABLE(n) p->m_ ## n
230#else
231 #define BASES_TABLE(n) GLOBAL_TABLE(n)
232#endif
233
234#define Z7_ZSTD_DEC_USE_ML_PLUS3
235
236#if defined(Z7_ZSTD_DEC_USE_BASES_LOCAL) || \
237 defined(Z7_ZSTD_DEC_USE_BASES_IN_OBJECT)
238
239#define SEQ_EXTRA_TABLES(n) \
240 Byte n ## SEQ_LL_EXTRA [NUM_LL_SYMBOLS]; \
241 Byte n ## SEQ_ML_EXTRA [NUM_ML_SYMBOLS]; \
242 UInt32 n ## SEQ_LL_BASES [NUM_LL_SYMBOLS]; \
243 UInt32 n ## SEQ_ML_BASES [NUM_ML_SYMBOLS]; \
244
245#define Z7_ZSTD_DEC_USE_BASES_CALC
246
247#ifdef Z7_ZSTD_DEC_USE_BASES_CALC
248
249 #define FILL_LOC_BASES(n, startSum) \
250 { unsigned i; UInt32 sum = startSum; \
251 for (i = 0; i != Z7_ARRAY_SIZE(GLOBAL_TABLE(n ## _EXTRA)); i++) \
252 { const unsigned a = GLOBAL_TABLE(n ## _EXTRA)[i]; \
253 BASES_TABLE(n ## _BASES)[i] = sum; \
254 /* if (sum != GLOBAL_TABLE(n ## _BASES)[i]) exit(1); */ \
255 sum += (UInt32)1 << a; \
256 BASES_TABLE(n ## _EXTRA)[i] = (Byte)a; }}
257
258 #define FILL_LOC_BASES_ALL \
259 FILL_LOC_BASES (SEQ_LL, 0) \
260 FILL_LOC_BASES (SEQ_ML, MATCH_LEN_MIN) \
261
262#else
263 #define COPY_GLOBAL_ARR(n) \
264 memcpy(BASES_TABLE(n), GLOBAL_TABLE(n), sizeof(GLOBAL_TABLE(n)));
265 #define FILL_LOC_BASES_ALL \
266 COPY_GLOBAL_ARR (SEQ_LL_EXTRA) \
267 COPY_GLOBAL_ARR (SEQ_ML_EXTRA) \
268 COPY_GLOBAL_ARR (SEQ_LL_BASES) \
269 COPY_GLOBAL_ARR (SEQ_ML_BASES) \
270
271#endif
272
273#endif
274
275
276
277/// The sequence decoding baseline and number of additional bits to read/add
278#if !defined(Z7_ZSTD_DEC_USE_BASES_CALC)
279static const UInt32 GLOBAL_TABLE(SEQ_LL_BASES) [NUM_LL_SYMBOLS] =
280{
281 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
282 16, 18, 20, 22, 24, 28, 32, 40, 48, 64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000,
283 0x2000, 0x4000, 0x8000, 0x10000
284};
285#endif
286
287static const Byte GLOBAL_TABLE(SEQ_LL_EXTRA) [NUM_LL_SYMBOLS] =
288{
289 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
290 1, 1, 1, 1, 2, 2, 3, 3, 4, 6, 7, 8, 9, 10, 11, 12,
291 13, 14, 15, 16
292};
293
294#if !defined(Z7_ZSTD_DEC_USE_BASES_CALC)
295static const UInt32 GLOBAL_TABLE(SEQ_ML_BASES) [NUM_ML_SYMBOLS] =
296{
297 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
298 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34,
299 35, 37, 39, 41, 43, 47, 51, 59, 67, 83, 99, 0x83, 0x103, 0x203, 0x403, 0x803,
300 0x1003, 0x2003, 0x4003, 0x8003, 0x10003
301};
302#endif
303
304static const Byte GLOBAL_TABLE(SEQ_ML_EXTRA) [NUM_ML_SYMBOLS] =
305{
306 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
307 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
308 1, 1, 1, 1, 2, 2, 3, 3, 4, 4, 5, 7, 8, 9, 10, 11,
309 12, 13, 14, 15, 16
310};
311
312
313#ifdef Z7_ZSTD_DEC_PRINT_TABLE
314
315static const Int16 SEQ_LL_PREDEF_DIST [NUM_LL_SYMBOLS] =
316{
317 4, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1,
318 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 1, 1, 1, 1, 1,
319 -1,-1,-1,-1
320};
321static const Int16 SEQ_OFFSET_PREDEF_DIST [NUM_OFFSET_SYMBOLS_PREDEF] =
322{
323 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1,
324 1, 1, 1, 1, 1, 1, 1, 1,-1,-1,-1,-1,-1
325};
326static const Int16 SEQ_ML_PREDEF_DIST [NUM_ML_SYMBOLS] =
327{
328 1, 4, 3, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1,
329 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
330 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1,-1,
331 -1,-1,-1,-1,-1
332};
333
334#endif
335
336// typedef int FastInt;
337// typedef Int32 FastInt32;
338typedef unsigned FastInt;
339typedef UInt32 FastInt32;
340typedef FastInt32 CFseRecord;
341
342
343#define FSE_REC_LEN_OFFSET 8
344#define FSE_REC_STATE_OFFSET 16
345#define GET_FSE_REC_SYM(st) ((Byte)(st))
346#define GET_FSE_REC_LEN(st) ((Byte)((st) >> FSE_REC_LEN_OFFSET))
347#define GET_FSE_REC_STATE(st) ((st) >> FSE_REC_STATE_OFFSET)
348
349// #define FSE_REC_SYM_MASK (0xff)
350// #define GET_FSE_REC_SYM(st) (st & FSE_REC_SYM_MASK)
351
352#define W_BASE(state, len, sym) \
353 (((UInt32)state << (4 + FSE_REC_STATE_OFFSET)) + \
354 (len << FSE_REC_LEN_OFFSET) + (sym))
355#define W(state, len, sym) W_BASE(state, len, sym)
356static const CFseRecord k_PredefRecords_LL[1 << 6] = {
357W(0,4, 0),W(1,4, 0),W(2,5, 1),W(0,5, 3),W(0,5, 4),W(0,5, 6),W(0,5, 7),W(0,5, 9),
358W(0,5,10),W(0,5,12),W(0,6,14),W(0,5,16),W(0,5,18),W(0,5,19),W(0,5,21),W(0,5,22),
359W(0,5,24),W(2,5,25),W(0,5,26),W(0,6,27),W(0,6,29),W(0,6,31),W(2,4, 0),W(0,4, 1),
360W(0,5, 2),W(2,5, 4),W(0,5, 5),W(2,5, 7),W(0,5, 8),W(2,5,10),W(0,5,11),W(0,6,13),
361W(2,5,16),W(0,5,17),W(2,5,19),W(0,5,20),W(2,5,22),W(0,5,23),W(0,4,25),W(1,4,25),
362W(2,5,26),W(0,6,28),W(0,6,30),W(3,4, 0),W(1,4, 1),W(2,5, 2),W(2,5, 3),W(2,5, 5),
363W(2,5, 6),W(2,5, 8),W(2,5, 9),W(2,5,11),W(2,5,12),W(0,6,15),W(2,5,17),W(2,5,18),
364W(2,5,20),W(2,5,21),W(2,5,23),W(2,5,24),W(0,6,35),W(0,6,34),W(0,6,33),W(0,6,32)
365};
366static const CFseRecord k_PredefRecords_OF[1 << 5] = {
367W(0,5, 0),W(0,4, 6),W(0,5, 9),W(0,5,15),W(0,5,21),W(0,5, 3),W(0,4, 7),W(0,5,12),
368W(0,5,18),W(0,5,23),W(0,5, 5),W(0,4, 8),W(0,5,14),W(0,5,20),W(0,5, 2),W(1,4, 7),
369W(0,5,11),W(0,5,17),W(0,5,22),W(0,5, 4),W(1,4, 8),W(0,5,13),W(0,5,19),W(0,5, 1),
370W(1,4, 6),W(0,5,10),W(0,5,16),W(0,5,28),W(0,5,27),W(0,5,26),W(0,5,25),W(0,5,24)
371};
372#if defined(Z7_ZSTD_DEC_USE_ML_PLUS3)
373#undef W
374#define W(state, len, sym) W_BASE(state, len, (sym + MATCH_LEN_MIN))
375#endif
376static const CFseRecord k_PredefRecords_ML[1 << 6] = {
377W(0,6, 0),W(0,4, 1),W(2,5, 2),W(0,5, 3),W(0,5, 5),W(0,5, 6),W(0,5, 8),W(0,6,10),
378W(0,6,13),W(0,6,16),W(0,6,19),W(0,6,22),W(0,6,25),W(0,6,28),W(0,6,31),W(0,6,33),
379W(0,6,35),W(0,6,37),W(0,6,39),W(0,6,41),W(0,6,43),W(0,6,45),W(1,4, 1),W(0,4, 2),
380W(2,5, 3),W(0,5, 4),W(2,5, 6),W(0,5, 7),W(0,6, 9),W(0,6,12),W(0,6,15),W(0,6,18),
381W(0,6,21),W(0,6,24),W(0,6,27),W(0,6,30),W(0,6,32),W(0,6,34),W(0,6,36),W(0,6,38),
382W(0,6,40),W(0,6,42),W(0,6,44),W(2,4, 1),W(3,4, 1),W(1,4, 2),W(2,5, 4),W(2,5, 5),
383W(2,5, 7),W(2,5, 8),W(0,6,11),W(0,6,14),W(0,6,17),W(0,6,20),W(0,6,23),W(0,6,26),
384W(0,6,29),W(0,6,52),W(0,6,51),W(0,6,50),W(0,6,49),W(0,6,48),W(0,6,47),W(0,6,46)
385};
386
387
388// sum of freqs[] must be correct
389// (numSyms != 0)
390// (accuracy >= 5)
391static
392Z7_NO_INLINE
393// Z7_FORCE_INLINE
394void FSE_Generate(CFseRecord *table,
395 const Int16 *const freqs, const size_t numSyms,
396 const unsigned accuracy, UInt32 delta)
397{
398 size_t size = (size_t)1 << accuracy;
399 // max value in states[x] is ((1 << accuracy) * 2)
400 UInt16 states[FSE_NUM_SYMBOLS_MAX];
401 {
402 /* Symbols with "less than 1" probability get a single cell,
403 starting from the end of the table.
404 These symbols define a full state reset, reading (accuracy) bits. */
405 size_t threshold = size;
406 {
407 size_t s = 0;
408 do
409 if (freqs[s] == -1)
410 {
411 table[--threshold] = (CFseRecord)s;
412 states[s] = 1;
413 }
414 while (++s != numSyms);
415 }
416
417 #ifdef SHOW_STAT
418 if (threshold == size)
419 {
420 STAT_INC(g_Num_Threshold_0)
421 STAT_UPDATE(g_Num_Threshold_0sum += (unsigned)size;)
422 }
423 else
424 {
425 STAT_INC(g_Num_Threshold_1)
426 STAT_UPDATE(g_Num_Threshold_1sum += (unsigned)size;)
427 }
428 #endif
429
430 // { unsigned uuu; for (uuu = 0; uuu < 400; uuu++)
431 {
432 // Each (symbol) gets freqs[symbol] cells.
433 // Cell allocation is spread, not linear.
434 const size_t step = (size >> 1) + (size >> 3) + 3;
435 size_t pos = 0;
436 // const unsigned mask = size - 1;
437 /*
438 if (threshold == size)
439 {
440 size_t s = 0;
441 size--;
442 do
443 {
444 int freq = freqs[s];
445 if (freq <= 0)
446 continue;
447 states[s] = (UInt16)freq;
448 do
449 {
450 table[pos] (CFseRecord)s;
451 pos = (pos + step) & size; // & mask;
452 }
453 while (--freq);
454 }
455 while (++s != numSyms);
456 }
457 else
458 */
459 {
460 size_t s = 0;
461 size--;
462 do
463 {
464 int freq = freqs[s];
465 if (freq <= 0)
466 continue;
467 states[s] = (UInt16)freq;
468 do
469 {
470 table[pos] = (CFseRecord)s;
471 // we skip position, if it's already occupied by a "less than 1" probability symbol.
472 // (step) is coprime to table size, so the cycle will visit each position exactly once
473 do
474 pos = (pos + step) & size; // & mask;
475 while (pos >= threshold);
476 }
477 while (--freq);
478 }
479 while (++s != numSyms);
480 }
481 size++;
482 // (pos != 0) is unexpected case that means that freqs[] are not correct.
483 // so it's some failure in code (for example, incorrect predefined freq[] table)
484 // if (pos != 0) return SZ_ERROR_FAIL;
485 }
486 // }
487 }
488 {
489 const CFseRecord * const limit = table + size;
490 delta = ((UInt32)size << FSE_REC_STATE_OFFSET) - delta;
491 /* State increases by symbol over time, decreasing number of bits.
492 Baseline increases until the bit threshold is passed, at which point it resets to 0 */
493 do
494 {
495 #define TABLE_ITER(a) \
496 { \
497 const FastInt sym = (FastInt)table[a]; \
498 const unsigned nextState = states[sym]; \
499 unsigned nb; \
500 states[sym] = (UInt16)(nextState + 1); \
501 nb = accuracy - GetHighestSetBit_32_nonzero_small(nextState); \
502 table[a] = (CFseRecord)(sym - delta \
503 + ((UInt32)nb << FSE_REC_LEN_OFFSET) \
504 + ((UInt32)nextState << FSE_REC_STATE_OFFSET << nb)); \
505 }
506 TABLE_ITER(0)
507 TABLE_ITER(1)
508 table += 2;
509 }
510 while (table != limit);
511 }
512}
513
514
515#ifdef Z7_ZSTD_DEC_PRINT_TABLE
516
517static void Print_Predef(unsigned predefAccuracy,
518 const unsigned numSymsPredef,
519 const Int16 * const predefFreqs,
520 const CFseRecord *checkTable)
521{
522 CFseRecord table[1 << 6];
523 unsigned i;
524 FSE_Generate(table, predefFreqs, numSymsPredef, predefAccuracy,
525 #if defined(Z7_ZSTD_DEC_USE_ML_PLUS3)
526 numSymsPredef == NUM_ML_SYMBOLS ? MATCH_LEN_MIN :
527 #endif
528 0
529 );
530 if (memcmp(table, checkTable, sizeof(UInt32) << predefAccuracy) != 0)
531 exit(1);
532 for (i = 0; i < (1u << predefAccuracy); i++)
533 {
534 const UInt32 v = table[i];
535 const unsigned state = (unsigned)(GET_FSE_REC_STATE(v));
536 if (state & 0xf)
537 exit(1);
538 if (i != 0)
539 {
540 printf(",");
541 if (i % 8 == 0)
542 printf("\n");
543 }
544 printf("W(%d,%d,%2d)",
545 (unsigned)(state >> 4),
546 (unsigned)((v >> FSE_REC_LEN_OFFSET) & 0xff),
547 (unsigned)GET_FSE_REC_SYM(v));
548 }
549 printf("\n\n");
550}
551
552#endif
553
554
555#define GET16(dest, p) { const Byte *ptr = p; dest = GetUi16(ptr); }
556#define GET32(dest, p) { const Byte *ptr = p; dest = GetUi32(ptr); }
557
558// (1 <= numBits <= 9)
559#define FORWARD_READ_BITS(destVal, numBits, mask) \
560 { const CBitCtr_signed bos3 = (bitOffset) >> 3; \
561 if (bos3 >= 0) return SZ_ERROR_DATA; \
562 GET16(destVal, src + bos3) \
563 destVal >>= (bitOffset) & 7; \
564 bitOffset += (CBitCtr_signed)(numBits); \
565 mask = (1u << (numBits)) - 1; \
566 destVal &= mask; \
567 }
568
569#define FORWARD_READ_1BIT(destVal) \
570 { const CBitCtr_signed bos3 = (bitOffset) >> 3; \
571 if (bos3 >= 0) return SZ_ERROR_DATA; \
572 destVal = *(src + bos3); \
573 destVal >>= (bitOffset) & 7; \
574 (bitOffset)++; \
575 destVal &= 1; \
576 }
577
578
579// in: (accuracyMax <= 9)
580// at least 2 bytes will be processed from (in) stream.
581// at return: (in->len > 0)
582static
583Z7_NO_INLINE
584SRes FSE_DecodeHeader(CFseRecord *const table,
585 CInBufPair *const in,
586 const unsigned accuracyMax,
587 Byte *const accuracyRes,
588 unsigned numSymbolsMax)
589{
590 unsigned accuracy;
591 unsigned remain1;
592 unsigned syms;
593 Int16 freqs[FSE_NUM_SYMBOLS_MAX + 3]; // +3 for overwrite (repeat)
594 const Byte *src = in->ptr;
595 CBitCtr_signed bitOffset = (CBitCtr_signed)in->len - 1;
596 if (bitOffset <= 0)
597 return SZ_ERROR_DATA;
598 accuracy = *src & 0xf;
599 accuracy += 5;
600 if (accuracy > accuracyMax)
601 return SZ_ERROR_DATA;
602 *accuracyRes = (Byte)accuracy;
603 remain1 = (1u << accuracy) + 1; // (it's remain_freqs_sum + 1)
604 syms = 0;
605 src += bitOffset; // src points to last byte
606 bitOffset = 4 - (bitOffset << 3);
607
608 for (;;)
609 {
610 // (2 <= remain1)
611 const unsigned bits = GetHighestSetBit_32_nonzero_small((unsigned)remain1);
612 // (1 <= bits <= accuracy)
613 unsigned val; // it must be unsigned or int
614 unsigned mask;
615 FORWARD_READ_BITS(val, bits, mask)
616 {
617 const unsigned val2 = remain1 + val - mask;
618 if (val2 > mask)
619 {
620 unsigned bit;
621 FORWARD_READ_1BIT(bit)
622 if (bit)
623 val = val2;
624 }
625 }
626 {
627 // (remain1 >= 2)
628 // (0 <= (int)val <= remain1)
629 val = (unsigned)((int)val - 1);
630 // val now is "probability" of symbol
631 // (probability == -1) means "less than 1" frequency.
632 // (-1 <= (int)val <= remain1 - 1)
633 freqs[syms++] = (Int16)(int)val;
634 if (val != 0)
635 {
636 remain1 -= (int)val < 0 ? 1u : (unsigned)val;
637 // remain1 -= val;
638 // val >>= (sizeof(val) * 8 - 2);
639 // remain1 -= val & 2;
640 // freqs[syms++] = (Int16)(int)val;
641 // syms++;
642 if (remain1 == 1)
643 break;
644 if (syms >= FSE_NUM_SYMBOLS_MAX)
645 return SZ_ERROR_DATA;
646 }
647 else // if (val == 0)
648 {
649 // freqs[syms++] = 0;
650 // syms++;
651 for (;;)
652 {
653 unsigned repeat;
654 FORWARD_READ_BITS(repeat, 2, mask)
655 freqs[syms ] = 0;
656 freqs[syms + 1] = 0;
657 freqs[syms + 2] = 0;
658 syms += repeat;
659 if (syms >= FSE_NUM_SYMBOLS_MAX)
660 return SZ_ERROR_DATA;
661 if (repeat != 3)
662 break;
663 }
664 }
665 }
666 }
667
668 if (syms > numSymbolsMax)
669 return SZ_ERROR_DATA;
670 bitOffset += 7;
671 bitOffset >>= 3;
672 if (bitOffset > 0)
673 return SZ_ERROR_DATA;
674 in->ptr = src + bitOffset;
675 in->len = (size_t)(1 - bitOffset);
676 {
677 // unsigned uuu; for (uuu = 0; uuu < 50; uuu++)
678 FSE_Generate(table, freqs, syms, accuracy,
679 #if defined(Z7_ZSTD_DEC_USE_ML_PLUS3)
680 numSymbolsMax == NUM_ML_SYMBOLS ? MATCH_LEN_MIN :
681 #endif
682 0
683 );
684 }
685 return SZ_OK;
686}
687
688
689// ---------- HUFFMAN ----------
690
691#define HUF_MAX_BITS 12
692#define HUF_MAX_SYMBS 256
693#define HUF_DUMMY_SIZE (128 + 8 * 2) // it must multiple of 8
694// #define HUF_DUMMY_SIZE 0
695#define HUF_TABLE_SIZE ((2 << HUF_MAX_BITS) + HUF_DUMMY_SIZE)
696#define HUF_GET_SYMBOLS(table) ((table) + (1 << HUF_MAX_BITS) + HUF_DUMMY_SIZE)
697// #define HUF_GET_LENS(table) (table)
698
699typedef struct
700{
701 // Byte table[HUF_TABLE_SIZE];
702 UInt64 table64[HUF_TABLE_SIZE / sizeof(UInt64)];
703}
704CZstdDecHufTable;
705
706/*
707Input:
708 numSyms != 0
709 (bits) array size must be aligned for 2
710 if (numSyms & 1), then bits[numSyms] == 0,
711 Huffman tree must be correct before Huf_Build() call:
712 (sum (1/2^bits[i]) == 1).
713 && (bits[i] <= HUF_MAX_BITS)
714*/
715static
716Z7_FORCE_INLINE
717void Huf_Build(Byte * const table,
718 const Byte *bits, const unsigned numSyms)
719{
720 unsigned counts0[HUF_MAX_BITS + 1];
721 unsigned counts1[HUF_MAX_BITS + 1];
722 const Byte * const bitsEnd = bits + numSyms;
723 // /*
724 {
725 unsigned t;
726 for (t = 0; t < Z7_ARRAY_SIZE(counts0); t++) counts0[t] = 0;
727 for (t = 0; t < Z7_ARRAY_SIZE(counts1); t++) counts1[t] = 0;
728 }
729 // */
730 // memset(counts0, 0, sizeof(counts0));
731 // memset(counts1, 0, sizeof(counts1));
732 {
733 const Byte *bits2 = bits;
734 // we access additional bits[symbol] if (numSyms & 1)
735 do
736 {
737 counts0[bits2[0]]++;
738 counts1[bits2[1]]++;
739 }
740 while ((bits2 += 2) < bitsEnd);
741 }
742 {
743 unsigned r = 0;
744 unsigned i = HUF_MAX_BITS;
745 // Byte *lens = HUF_GET_LENS(symbols);
746 do
747 {
748 const unsigned num = (counts0[i] + counts1[i]) << (HUF_MAX_BITS - i);
749 counts0[i] = r;
750 if (num)
751 {
752 Byte *lens = &table[r];
753 r += num;
754 memset(lens, (int)i, num);
755 }
756 }
757 while (--i);
758 counts0[0] = 0; // for speculated loads
759 // no need for check:
760 // if (r != (UInt32)1 << HUF_MAX_BITS) exit(0);
761 }
762 {
763 #ifdef MY_CPU_64BIT
764 UInt64
765 #else
766 UInt32
767 #endif
768 v = 0;
769 Byte *symbols = HUF_GET_SYMBOLS(table);
770 do
771 {
772 const unsigned nb = *bits++;
773 if (nb)
774 {
775 const unsigned code = counts0[nb];
776 const unsigned num = (1u << HUF_MAX_BITS) >> nb;
777 counts0[nb] = code + num;
778 // memset(&symbols[code], i, num);
779 // /*
780 {
781 Byte *s2 = &symbols[code];
782 if (num <= 2)
783 {
784 s2[0] = (Byte)v;
785 s2[(size_t)num - 1] = (Byte)v;
786 }
787 else if (num <= 8)
788 {
789 *(UInt32 *)(void *)s2 = (UInt32)v;
790 *(UInt32 *)(void *)(s2 + (size_t)num - 4) = (UInt32)v;
791 }
792 else
793 {
794 #ifdef MY_CPU_64BIT
795 UInt64 *s = (UInt64 *)(void *)s2;
796 const UInt64 *lim = (UInt64 *)(void *)(s2 + num);
797 do
798 {
799 s[0] = v; s[1] = v; s += 2;
800 }
801 while (s != lim);
802 #else
803 UInt32 *s = (UInt32 *)(void *)s2;
804 const UInt32 *lim = (const UInt32 *)(const void *)(s2 + num);
805 do
806 {
807 s[0] = v; s[1] = v; s += 2;
808 s[0] = v; s[1] = v; s += 2;
809 }
810 while (s != lim);
811 #endif
812 }
813 }
814 // */
815 }
816 v +=
817 #ifdef MY_CPU_64BIT
818 0x0101010101010101;
819 #else
820 0x01010101;
821 #endif
822 }
823 while (bits != bitsEnd);
824 }
825}
826
827
828
829// how many bytes (src) was moved back from original value.
830// we need (HUF_SRC_OFFSET == 3) for optimized 32-bit memory access
831#define HUF_SRC_OFFSET 3
832
833// v <<= 8 - (bitOffset & 7) + numBits;
834// v >>= 32 - HUF_MAX_BITS;
835#define HUF_GET_STATE(v, bitOffset, numBits) \
836 GET32(v, src + (HUF_SRC_OFFSET - 3) + ((CBitCtr_signed)bitOffset >> 3)) \
837 v >>= 32 - HUF_MAX_BITS - 8 + ((unsigned)bitOffset & 7) - numBits; \
838 v &= (1u << HUF_MAX_BITS) - 1; \
839
840
841#ifndef Z7_ZSTD_DEC_USE_HUF_STREAM1_ALWAYS
842#if defined(MY_CPU_AMD64) && defined(_MSC_VER) && _MSC_VER == 1400 \
843 || !defined(MY_CPU_X86_OR_AMD64) \
844 // || 1 == 1 /* for debug : to force STREAM4_PRELOAD mode */
845 // we need big number (>=16) of registers for PRELOAD4
846 #define Z7_ZSTD_DEC_USE_HUF_STREAM4_PRELOAD4
847 // #define Z7_ZSTD_DEC_USE_HUF_STREAM4_PRELOAD2 // for debug
848#endif
849#endif
850
851// for debug: simpler and smaller code but slow:
852// #define Z7_ZSTD_DEC_USE_HUF_STREAM1_SIMPLE
853
854#if defined(Z7_ZSTD_DEC_USE_HUF_STREAM1_SIMPLE) || \
855 !defined(Z7_ZSTD_DEC_USE_HUF_STREAM1_ALWAYS)
856
857#define HUF_DECODE(bitOffset, dest) \
858{ \
859 UInt32 v; \
860 HUF_GET_STATE(v, bitOffset, 0) \
861 bitOffset -= table[v]; \
862 *(dest) = symbols[v]; \
863 if ((CBitCtr_signed)bitOffset < 0) return SZ_ERROR_DATA; \
864}
865
866#endif
867
868#if !defined(Z7_ZSTD_DEC_USE_HUF_STREAM1_SIMPLE) || \
869 defined(Z7_ZSTD_DEC_USE_HUF_STREAM4_PRELOAD4) || \
870 defined(Z7_ZSTD_DEC_USE_HUF_STREAM4_PRELOAD2) \
871
872#define HUF_DECODE_2_INIT(v, bitOffset) \
873 HUF_GET_STATE(v, bitOffset, 0)
874
875#define HUF_DECODE_2(v, bitOffset, dest) \
876{ \
877 unsigned numBits; \
878 numBits = table[v]; \
879 *(dest) = symbols[v]; \
880 HUF_GET_STATE(v, bitOffset, numBits) \
881 bitOffset -= (CBitCtr)numBits; \
882 if ((CBitCtr_signed)bitOffset < 0) return SZ_ERROR_DATA; \
883}
884
885#endif
886
887
888// src == ptr - HUF_SRC_OFFSET
889// we are allowed to access 3 bytes before start of input buffer
890static
891Z7_NO_INLINE
892SRes Huf_Decompress_1stream(const Byte * const table,
893 const Byte *src, const size_t srcLen,
894 Byte *dest, const size_t destLen)
895{
896 CBitCtr bitOffset;
897 if (srcLen == 0)
898 return SZ_ERROR_DATA;
899 SET_bitOffset_TO_PAD (bitOffset, src + HUF_SRC_OFFSET, srcLen)
900 if (destLen)
901 {
902 const Byte *symbols = HUF_GET_SYMBOLS(table);
903 const Byte *destLim = dest + destLen;
904 #ifdef Z7_ZSTD_DEC_USE_HUF_STREAM1_SIMPLE
905 {
906 do
907 {
908 HUF_DECODE (bitOffset, dest)
909 }
910 while (++dest != destLim);
911 }
912 #else
913 {
914 UInt32 v;
915 HUF_DECODE_2_INIT (v, bitOffset)
916 do
917 {
918 HUF_DECODE_2 (v, bitOffset, dest)
919 }
920 while (++dest != destLim);
921 }
922 #endif
923 }
924 return bitOffset == 0 ? SZ_OK : SZ_ERROR_DATA;
925}
926
927
928// for debug : it reduces register pressure : by array copy can be slow :
929// #define Z7_ZSTD_DEC_USE_HUF_LOCAL
930
931// src == ptr + (6 - HUF_SRC_OFFSET)
932// srcLen >= 10
933// we are allowed to access 3 bytes before start of input buffer
934static
935Z7_NO_INLINE
936SRes Huf_Decompress_4stream(const Byte * const
937 #ifdef Z7_ZSTD_DEC_USE_HUF_LOCAL
938 table2,
939 #else
940 table,
941 #endif
942 const Byte *src, size_t srcLen,
943 Byte *dest, size_t destLen)
944{
945 #ifdef Z7_ZSTD_DEC_USE_HUF_LOCAL
946 Byte table[HUF_TABLE_SIZE];
947 #endif
948 UInt32 sizes[3];
949 const size_t delta = (destLen + 3) / 4;
950 if ((sizes[0] = GetUi16(src + (0 + HUF_SRC_OFFSET - 6))) == 0) return SZ_ERROR_DATA;
951 if ((sizes[1] = GetUi16(src + (2 + HUF_SRC_OFFSET - 6))) == 0) return SZ_ERROR_DATA;
952 sizes[1] += sizes[0];
953 if ((sizes[2] = GetUi16(src + (4 + HUF_SRC_OFFSET - 6))) == 0) return SZ_ERROR_DATA;
954 sizes[2] += sizes[1];
955 srcLen -= 6;
956 if (srcLen <= sizes[2])
957 return SZ_ERROR_DATA;
958
959 #ifdef Z7_ZSTD_DEC_USE_HUF_LOCAL
960 {
961 // unsigned i = 0; for(; i < 1000; i++)
962 memcpy(table, table2, HUF_TABLE_SIZE);
963 }
964 #endif
965
966 #ifndef Z7_ZSTD_DEC_USE_HUF_STREAM1_ALWAYS
967 {
968 CBitCtr bitOffset_0,
969 bitOffset_1,
970 bitOffset_2,
971 bitOffset_3;
972 {
973 SET_bitOffset_TO_PAD_and_SET_BIT_SIZE (bitOffset_0, src + HUF_SRC_OFFSET, sizes[0])
974 SET_bitOffset_TO_PAD_and_SET_BIT_SIZE (bitOffset_1, src + HUF_SRC_OFFSET, sizes[1])
975 SET_bitOffset_TO_PAD_and_SET_BIT_SIZE (bitOffset_2, src + HUF_SRC_OFFSET, sizes[2])
976 SET_bitOffset_TO_PAD (bitOffset_3, src + HUF_SRC_OFFSET, srcLen)
977 }
978 {
979 const Byte * const symbols = HUF_GET_SYMBOLS(table);
980 Byte *destLim = dest + destLen - delta * 3;
981
982 if (dest != destLim)
983 #ifdef Z7_ZSTD_DEC_USE_HUF_STREAM4_PRELOAD4
984 {
985 UInt32 v_0, v_1, v_2, v_3;
986 HUF_DECODE_2_INIT (v_0, bitOffset_0)
987 HUF_DECODE_2_INIT (v_1, bitOffset_1)
988 HUF_DECODE_2_INIT (v_2, bitOffset_2)
989 HUF_DECODE_2_INIT (v_3, bitOffset_3)
990 // #define HUF_DELTA (1 << 17) / 4
991 do
992 {
993 HUF_DECODE_2 (v_3, bitOffset_3, dest + delta * 3)
994 HUF_DECODE_2 (v_2, bitOffset_2, dest + delta * 2)
995 HUF_DECODE_2 (v_1, bitOffset_1, dest + delta)
996 HUF_DECODE_2 (v_0, bitOffset_0, dest)
997 }
998 while (++dest != destLim);
999 /*
1000 {// unsigned y = 0; for (;y < 1; y++)
1001 {
1002 const size_t num = destLen - delta * 3;
1003 Byte *orig = dest - num;
1004 memmove (orig + delta , orig + HUF_DELTA, num);
1005 memmove (orig + delta * 2, orig + HUF_DELTA * 2, num);
1006 memmove (orig + delta * 3, orig + HUF_DELTA * 3, num);
1007 }}
1008 */
1009 }
1010 #elif defined(Z7_ZSTD_DEC_USE_HUF_STREAM4_PRELOAD2)
1011 {
1012 UInt32 v_0, v_1, v_2, v_3;
1013 HUF_DECODE_2_INIT (v_0, bitOffset_0)
1014 HUF_DECODE_2_INIT (v_1, bitOffset_1)
1015 do
1016 {
1017 HUF_DECODE_2 (v_0, bitOffset_0, dest)
1018 HUF_DECODE_2 (v_1, bitOffset_1, dest + delta)
1019 }
1020 while (++dest != destLim);
1021 dest = destLim - (destLen - delta * 3);
1022 dest += delta * 2;
1023 destLim += delta * 2;
1024 HUF_DECODE_2_INIT (v_2, bitOffset_2)
1025 HUF_DECODE_2_INIT (v_3, bitOffset_3)
1026 do
1027 {
1028 HUF_DECODE_2 (v_2, bitOffset_2, dest)
1029 HUF_DECODE_2 (v_3, bitOffset_3, dest + delta)
1030 }
1031 while (++dest != destLim);
1032 dest -= delta * 2;
1033 destLim -= delta * 2;
1034 }
1035 #else
1036 {
1037 do
1038 {
1039 HUF_DECODE (bitOffset_3, dest + delta * 3)
1040 HUF_DECODE (bitOffset_2, dest + delta * 2)
1041 HUF_DECODE (bitOffset_1, dest + delta)
1042 HUF_DECODE (bitOffset_0, dest)
1043 }
1044 while (++dest != destLim);
1045 }
1046 #endif
1047
1048 if (bitOffset_3 != (CBitCtr)sizes[2])
1049 return SZ_ERROR_DATA;
1050 if (destLen &= 3)
1051 {
1052 destLim = dest + 4 - destLen;
1053 do
1054 {
1055 HUF_DECODE (bitOffset_2, dest + delta * 2)
1056 HUF_DECODE (bitOffset_1, dest + delta)
1057 HUF_DECODE (bitOffset_0, dest)
1058 }
1059 while (++dest != destLim);
1060 }
1061 if ( bitOffset_0 != 0
1062 || bitOffset_1 != (CBitCtr)sizes[0]
1063 || bitOffset_2 != (CBitCtr)sizes[1])
1064 return SZ_ERROR_DATA;
1065 }
1066 }
1067 #else // Z7_ZSTD_DEC_USE_HUF_STREAM1_ALWAYS
1068 {
1069 unsigned i;
1070 for (i = 0; i < 4; i++)
1071 {
1072 size_t d = destLen;
1073 size_t size = srcLen;
1074 if (i != 3)
1075 {
1076 d = delta;
1077 size = sizes[i];
1078 }
1079 if (i != 0)
1080 size -= sizes[i - 1];
1081 destLen -= d;
1082 RINOK(Huf_Decompress_1stream(table, src, size, dest, d))
1083 dest += d;
1084 src += size;
1085 }
1086 }
1087 #endif
1088
1089 return SZ_OK;
1090}
1091
1092
1093
1094// (in->len != 0)
1095// we are allowed to access in->ptr[-3]
1096// at least 2 bytes in (in->ptr) will be processed
1097static SRes Huf_DecodeTable(CZstdDecHufTable *const p, CInBufPair *const in)
1098{
1099 Byte weights[HUF_MAX_SYMBS + 1]; // +1 for extra write for loop unroll
1100 unsigned numSyms;
1101 const unsigned header = *(in->ptr)++;
1102 in->len--;
1103 // memset(weights, 0, sizeof(weights));
1104 if (header >= 128)
1105 {
1106 // direct representation: 4 bits field (0-15) per weight
1107 numSyms = header - 127;
1108 // numSyms != 0
1109 {
1110 const size_t numBytes = (numSyms + 1) / 2;
1111 const Byte *const ws = in->ptr;
1112 size_t i = 0;
1113 if (in->len < numBytes)
1114 return SZ_ERROR_DATA;
1115 in->ptr += numBytes;
1116 in->len -= numBytes;
1117 do
1118 {
1119 const unsigned b = ws[i];
1120 weights[i * 2 ] = (Byte)(b >> 4);
1121 weights[i * 2 + 1] = (Byte)(b & 0xf);
1122 }
1123 while (++i != numBytes);
1124 /* 7ZIP: we can restore correct zero value for weights[numSyms],
1125 if we want to use zero values starting from numSyms in code below. */
1126 // weights[numSyms] = 0;
1127 }
1128 }
1129 else
1130 {
1131 #define MAX_ACCURACY_LOG_FOR_WEIGHTS 6
1132 CFseRecord table[1 << MAX_ACCURACY_LOG_FOR_WEIGHTS];
1133
1134 Byte accuracy;
1135 const Byte *src;
1136 size_t srcLen;
1137 if (in->len < header)
1138 return SZ_ERROR_DATA;
1139 {
1140 CInBufPair fse_stream;
1141 fse_stream.len = header;
1142 fse_stream.ptr = in->ptr;
1143 in->ptr += header;
1144 in->len -= header;
1145 RINOK(FSE_DecodeHeader(table, &fse_stream,
1146 MAX_ACCURACY_LOG_FOR_WEIGHTS,
1147 &accuracy,
1148 16 // num weight symbols max (max-symbol is 15)
1149 ))
1150 // at least 2 bytes were processed in fse_stream.
1151 // (srcLen > 0) after FSE_DecodeHeader()
1152 // if (srcLen == 0) return SZ_ERROR_DATA;
1153 src = fse_stream.ptr;
1154 srcLen = fse_stream.len;
1155 }
1156 // we are allowed to access src[-5]
1157 {
1158 // unsigned yyy = 200; do {
1159 CBitCtr bitOffset;
1160 FastInt32 state1, state2;
1161 SET_bitOffset_TO_PAD (bitOffset, src, srcLen)
1162 state1 = accuracy;
1163 src -= state1 >> 2; // src -= 1; // for GET16() optimization
1164 state1 <<= FSE_REC_LEN_OFFSET;
1165 state2 = state1;
1166 numSyms = 0;
1167 for (;;)
1168 {
1169 #define FSE_WEIGHT_DECODE(st) \
1170 { \
1171 const unsigned bits = GET_FSE_REC_LEN(st); \
1172 FastInt r; \
1173 GET16(r, src + (bitOffset >> 3)) \
1174 r >>= (unsigned)bitOffset & 7; \
1175 if ((CBitCtr_signed)(bitOffset -= (CBitCtr)bits) < 0) \
1176 { if (bitOffset + (CBitCtr)bits != 0) \
1177 return SZ_ERROR_DATA; \
1178 break; } \
1179 r &= 0xff; \
1180 r >>= 8 - bits; \
1181 st = table[GET_FSE_REC_STATE(st) + r]; \
1182 weights[numSyms++] = (Byte)GET_FSE_REC_SYM(st); \
1183 }
1184 FSE_WEIGHT_DECODE (state1)
1185 FSE_WEIGHT_DECODE (state2)
1186 if (numSyms == HUF_MAX_SYMBS)
1187 return SZ_ERROR_DATA;
1188 }
1189 // src += (unsigned)accuracy >> 2; } while (--yyy);
1190 }
1191 }
1192
1193 // Build using weights:
1194 {
1195 UInt32 sum = 0;
1196 {
1197 // numSyms >= 1
1198 unsigned i = 0;
1199 weights[numSyms] = 0;
1200 do
1201 {
1202 sum += ((UInt32)1 << weights[i ]) & ~(UInt32)1;
1203 sum += ((UInt32)1 << weights[i + 1]) & ~(UInt32)1;
1204 i += 2;
1205 }
1206 while (i < numSyms);
1207 if (sum == 0)
1208 return SZ_ERROR_DATA;
1209 }
1210 {
1211 const unsigned maxBits = GetHighestSetBit_32_nonzero_big(sum) + 1;
1212 {
1213 const UInt32 left = ((UInt32)1 << maxBits) - sum;
1214 // (left != 0)
1215 // (left) must be power of 2 in correct stream
1216 if (left & (left - 1))
1217 return SZ_ERROR_DATA;
1218 weights[numSyms++] = (Byte)GetHighestSetBit_32_nonzero_big(left);
1219 }
1220 // if (numSyms & 1)
1221 weights[numSyms] = 0; // for loop unroll
1222 // numSyms >= 2
1223 {
1224 unsigned i = 0;
1225 do
1226 {
1227 /*
1228 #define WEIGHT_ITER(a) \
1229 { unsigned w = weights[i + (a)]; \
1230 const unsigned t = maxBits - w; \
1231 w = w ? t: w; \
1232 if (w > HUF_MAX_BITS) return SZ_ERROR_DATA; \
1233 weights[i + (a)] = (Byte)w; }
1234 */
1235 // /*
1236 #define WEIGHT_ITER(a) \
1237 { unsigned w = weights[i + (a)]; \
1238 if (w) { \
1239 w = maxBits - w; \
1240 if (w > HUF_MAX_BITS) return SZ_ERROR_DATA; \
1241 weights[i + (a)] = (Byte)w; }}
1242 // */
1243 WEIGHT_ITER(0)
1244 // WEIGHT_ITER(1)
1245 // i += 2;
1246 }
1247 while (++i != numSyms);
1248 }
1249 }
1250 }
1251 {
1252 // unsigned yyy; for (yyy = 0; yyy < 100; yyy++)
1253 Huf_Build((Byte *)(void *)p->table64, weights, numSyms);
1254 }
1255 return SZ_OK;
1256}
1257
1258
1259typedef enum
1260{
1261 k_SeqMode_Predef = 0,
1262 k_SeqMode_RLE = 1,
1263 k_SeqMode_FSE = 2,
1264 k_SeqMode_Repeat = 3
1265}
1266z7_zstd_enum_SeqMode;
1267
1268// predefAccuracy == 5 for OFFSET symbols
1269// predefAccuracy == 6 for MATCH/LIT LEN symbols
1270static
1271SRes
1272Z7_NO_INLINE
1273// Z7_FORCE_INLINE
1274FSE_Decode_SeqTable(CFseRecord * const table,
1275 CInBufPair * const in,
1276 unsigned predefAccuracy,
1277 Byte * const accuracyRes,
1278 unsigned numSymbolsMax,
1279 const CFseRecord * const predefs,
1280 const unsigned seqMode)
1281{
1282 // UNUSED_VAR(numSymsPredef)
1283 // UNUSED_VAR(predefFreqs)
1284 if (seqMode == k_SeqMode_FSE)
1285 {
1286 // unsigned y = 50; CInBufPair in2 = *in; do { *in = in2; RINOK(
1287 return
1288 FSE_DecodeHeader(table, in,
1289 predefAccuracy + 3, // accuracyMax
1290 accuracyRes,
1291 numSymbolsMax)
1292 ;
1293 // )} while (--y); return SZ_OK;
1294 }
1295 // numSymsMax = numSymsPredef + ((predefAccuracy & 1) * (32 - 29))); // numSymsMax
1296 // numSymsMax == 32 for offsets
1297
1298 if (seqMode == k_SeqMode_Predef)
1299 {
1300 *accuracyRes = (Byte)predefAccuracy;
1301 memcpy(table, predefs, sizeof(UInt32) << predefAccuracy);
1302 return SZ_OK;
1303 }
1304
1305 // (seqMode == k_SeqMode_RLE)
1306 if (in->len == 0)
1307 return SZ_ERROR_DATA;
1308 in->len--;
1309 {
1310 const Byte *ptr = in->ptr;
1311 const Byte sym = ptr[0];
1312 in->ptr = ptr + 1;
1313 table[0] = (FastInt32)sym
1314 #if defined(Z7_ZSTD_DEC_USE_ML_PLUS3)
1315 + (numSymbolsMax == NUM_ML_SYMBOLS ? MATCH_LEN_MIN : 0)
1316 #endif
1317 ;
1318 *accuracyRes = 0;
1319 }
1320 return SZ_OK;
1321}
1322
1323
1324typedef struct
1325{
1326 CFseRecord of[1 << 8];
1327 CFseRecord ll[1 << 9];
1328 CFseRecord ml[1 << 9];
1329}
1330CZstdDecFseTables;
1331
1332
1333typedef struct
1334{
1335 Byte *win;
1336 SizeT cycSize;
1337 /*
1338 if (outBuf_fromCaller) : cycSize = outBufSize_fromCaller
1339 else {
1340 if ( isCyclicMode) : cycSize = cyclic_buffer_size = (winSize + extra_space)
1341 if (!isCyclicMode) : cycSize = ContentSize,
1342 (isCyclicMode == true) if (ContetSize >= winSize) or ContetSize is unknown
1343 }
1344 */
1345 SizeT winPos;
1346
1347 CZstdDecOffset reps[3];
1348
1349 Byte ll_accuracy;
1350 Byte of_accuracy;
1351 Byte ml_accuracy;
1352 // Byte seqTables_wereSet;
1353 Byte litHuf_wasSet;
1354
1355 Byte *literalsBase;
1356
1357 size_t winSize; // from header
1358 size_t totalOutCheck; // totalOutCheck <= winSize
1359
1360 #ifdef Z7_ZSTD_DEC_USE_BASES_IN_OBJECT
1361 SEQ_EXTRA_TABLES(m_)
1362 #endif
1363 // UInt64 _pad_Alignment; // is not required now
1364 CZstdDecFseTables fse;
1365 CZstdDecHufTable huf;
1366}
1367CZstdDec1;
1368
1369#define ZstdDec1_GET_BLOCK_SIZE_LIMIT(p) \
1370 ((p)->winSize < kBlockSizeMax ? (UInt32)(p)->winSize : kBlockSizeMax)
1371
1372#define SEQ_TABLES_WERE_NOT_SET_ml_accuracy 1 // accuracy=1 is not used by zstd
1373#define IS_SEQ_TABLES_WERE_SET(p) (((p)->ml_accuracy != SEQ_TABLES_WERE_NOT_SET_ml_accuracy))
1374// #define IS_SEQ_TABLES_WERE_SET(p) ((p)->seqTables_wereSet)
1375
1376
1377static void ZstdDec1_Construct(CZstdDec1 *p)
1378{
1379 #ifdef Z7_ZSTD_DEC_PRINT_TABLE
1380 Print_Predef(6, NUM_LL_SYMBOLS, SEQ_LL_PREDEF_DIST, k_PredefRecords_LL);
1381 Print_Predef(5, NUM_OFFSET_SYMBOLS_PREDEF, SEQ_OFFSET_PREDEF_DIST, k_PredefRecords_OF);
1382 Print_Predef(6, NUM_ML_SYMBOLS, SEQ_ML_PREDEF_DIST, k_PredefRecords_ML);
1383 #endif
1384
1385 p->win = NULL;
1386 p->cycSize = 0;
1387 p->literalsBase = NULL;
1388 #ifdef Z7_ZSTD_DEC_USE_BASES_IN_OBJECT
1389 FILL_LOC_BASES_ALL
1390 #endif
1391}
1392
1393
1394static void ZstdDec1_Init(CZstdDec1 *p)
1395{
1396 p->reps[0] = 1;
1397 p->reps[1] = 4;
1398 p->reps[2] = 8;
1399 // p->seqTables_wereSet = False;
1400 p->ml_accuracy = SEQ_TABLES_WERE_NOT_SET_ml_accuracy;
1401 p->litHuf_wasSet = False;
1402 p->totalOutCheck = 0;
1403}
1404
1405
1406
1407#ifdef MY_CPU_LE_UNALIGN
1408 #define Z7_ZSTD_DEC_USE_UNALIGNED_COPY
1409#endif
1410
1411#ifdef Z7_ZSTD_DEC_USE_UNALIGNED_COPY
1412
1413 #define COPY_CHUNK_SIZE 16
1414
1415 #define COPY_CHUNK_4_2(dest, src) \
1416 { \
1417 ((UInt32 *)(void *)dest)[0] = ((const UInt32 *)(const void *)src)[0]; \
1418 ((UInt32 *)(void *)dest)[1] = ((const UInt32 *)(const void *)src)[1]; \
1419 src += 4 * 2; \
1420 dest += 4 * 2; \
1421 }
1422
1423 /* sse2 doesn't help here in GCC and CLANG.
1424 so we disabled sse2 here */
1425 /*
1426 #if defined(MY_CPU_AMD64)
1427 #define Z7_ZSTD_DEC_USE_SSE2
1428 #elif defined(MY_CPU_X86)
1429 #if defined(_MSC_VER) && _MSC_VER >= 1300 && defined(_M_IX86_FP) && (_M_IX86_FP >= 2) \
1430 || defined(__SSE2__) \
1431 // || 1 == 1 // for debug only
1432 #define Z7_ZSTD_DEC_USE_SSE2
1433 #endif
1434 #endif
1435 */
1436
1437 #if defined(MY_CPU_ARM64)
1438 #define COPY_OFFSET_MIN 16
1439 #define COPY_CHUNK1(dest, src) \
1440 { \
1441 vst1q_u8((uint8_t *)(void *)dest, \
1442 vld1q_u8((const uint8_t *)(const void *)src)); \
1443 src += 16; \
1444 dest += 16; \
1445 }
1446
1447 #define COPY_CHUNK(dest, src) \
1448 { \
1449 COPY_CHUNK1(dest, src) \
1450 if ((len -= COPY_CHUNK_SIZE) == 0) break; \
1451 COPY_CHUNK1(dest, src) \
1452 }
1453
1454 #elif defined(Z7_ZSTD_DEC_USE_SSE2)
1455 #include <emmintrin.h> // sse2
1456 #define COPY_OFFSET_MIN 16
1457
1458 #define COPY_CHUNK1(dest, src) \
1459 { \
1460 _mm_storeu_si128((__m128i *)(void *)dest, \
1461 _mm_loadu_si128((const __m128i *)(const void *)src)); \
1462 src += 16; \
1463 dest += 16; \
1464 }
1465
1466 #define COPY_CHUNK(dest, src) \
1467 { \
1468 COPY_CHUNK1(dest, src) \
1469 if ((len -= COPY_CHUNK_SIZE) == 0) break; \
1470 COPY_CHUNK1(dest, src) \
1471 }
1472
1473 #elif defined(MY_CPU_64BIT)
1474 #define COPY_OFFSET_MIN 8
1475
1476 #define COPY_CHUNK(dest, src) \
1477 { \
1478 ((UInt64 *)(void *)dest)[0] = ((const UInt64 *)(const void *)src)[0]; \
1479 ((UInt64 *)(void *)dest)[1] = ((const UInt64 *)(const void *)src)[1]; \
1480 src += 8 * 2; \
1481 dest += 8 * 2; \
1482 }
1483
1484 #else
1485 #define COPY_OFFSET_MIN 4
1486
1487 #define COPY_CHUNK(dest, src) \
1488 { \
1489 COPY_CHUNK_4_2(dest, src); \
1490 COPY_CHUNK_4_2(dest, src); \
1491 }
1492
1493 #endif
1494#endif
1495
1496
1497#ifndef COPY_CHUNK_SIZE
1498 #define COPY_OFFSET_MIN 4
1499 #define COPY_CHUNK_SIZE 8
1500 #define COPY_CHUNK_2(dest, src) \
1501 { \
1502 const Byte a0 = src[0]; \
1503 const Byte a1 = src[1]; \
1504 dest[0] = a0; \
1505 dest[1] = a1; \
1506 src += 2; \
1507 dest += 2; \
1508 }
1509 #define COPY_CHUNK(dest, src) \
1510 { \
1511 COPY_CHUNK_2(dest, src) \
1512 COPY_CHUNK_2(dest, src) \
1513 COPY_CHUNK_2(dest, src) \
1514 COPY_CHUNK_2(dest, src) \
1515 }
1516#endif
1517
1518
1519#define COPY_PREPARE \
1520 len += (COPY_CHUNK_SIZE - 1); \
1521 len &= ~(size_t)(COPY_CHUNK_SIZE - 1); \
1522 { if (len > rem) \
1523 { len = rem; \
1524 rem &= (COPY_CHUNK_SIZE - 1); \
1525 if (rem) { \
1526 len -= rem; \
1527 Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE \
1528 do *dest++ = *src++; while (--rem); \
1529 if (len == 0) return; }}}
1530
1531#define COPY_CHUNKS \
1532{ \
1533 Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE \
1534 do { COPY_CHUNK(dest, src) } \
1535 while (len -= COPY_CHUNK_SIZE); \
1536}
1537
1538// (len != 0)
1539// (len <= rem)
1540static
1541Z7_FORCE_INLINE
1542// Z7_ATTRIB_NO_VECTOR
1543void CopyLiterals(Byte *dest, Byte const *src, size_t len, size_t rem)
1544{
1545 COPY_PREPARE
1546 COPY_CHUNKS
1547}
1548
1549
1550/* we can define Z7_STD_DEC_USE_AFTER_CYC_BUF, if we want to use additional
1551 space after cycSize that can be used to reduce the code in CopyMatch(): */
1552// for debug:
1553// #define Z7_STD_DEC_USE_AFTER_CYC_BUF
1554
1555/*
1556CopyMatch()
1557if wrap (offset > winPos)
1558{
1559 then we have at least (COPY_CHUNK_SIZE) avail in (dest) before we will overwrite (src):
1560 (cycSize >= offset + COPY_CHUNK_SIZE)
1561 if defined(Z7_STD_DEC_USE_AFTER_CYC_BUF)
1562 we are allowed to read win[cycSize + COPY_CHUNK_SIZE - 1],
1563}
1564(len != 0)
1565*/
1566static
1567Z7_FORCE_INLINE
1568// Z7_ATTRIB_NO_VECTOR
1569void CopyMatch(size_t offset, size_t len,
1570 Byte *win, size_t winPos, size_t rem, const size_t cycSize)
1571{
1572 Byte *dest = win + winPos;
1573 const Byte *src;
1574 // STAT_INC(g_NumCopy)
1575
1576 if (offset > winPos)
1577 {
1578 size_t back = offset - winPos;
1579 // src = win + cycSize - back;
1580 // cycSize -= offset;
1581 STAT_INC(g_NumOver)
1582 src = dest + (cycSize - offset);
1583 // (src >= dest) here
1584 #ifdef Z7_STD_DEC_USE_AFTER_CYC_BUF
1585 if (back < len)
1586 {
1587 #else
1588 if (back < len + (COPY_CHUNK_SIZE - 1))
1589 {
1590 if (back >= len)
1591 {
1592 Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
1593 do
1594 *dest++ = *src++;
1595 while (--len);
1596 return;
1597 }
1598 #endif
1599 // back < len
1600 STAT_INC(g_NumOver2)
1601 len -= back;
1602 rem -= back;
1603 Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
1604 do
1605 *dest++ = *src++;
1606 while (--back);
1607 src = dest - offset;
1608 // src = win;
1609 // we go to MAIN-COPY
1610 }
1611 }
1612 else
1613 src = dest - offset;
1614
1615 // len != 0
1616 // do *dest++ = *src++; while (--len); return;
1617
1618 // --- MAIN COPY ---
1619 // if (src >= dest), then ((size_t)(src - dest) >= COPY_CHUNK_SIZE)
1620 // so we have at least COPY_CHUNK_SIZE space before overlap for writing.
1621 COPY_PREPARE
1622
1623 /* now (len == COPY_CHUNK_SIZE * x)
1624 so we can unroll for aligned copy */
1625 {
1626 // const unsigned b0 = src[0];
1627 // (COPY_OFFSET_MIN >= 4)
1628
1629 if (offset >= COPY_OFFSET_MIN)
1630 {
1631 COPY_CHUNKS
1632 // return;
1633 }
1634 else
1635 #if (COPY_OFFSET_MIN > 4)
1636 #if COPY_CHUNK_SIZE < 8
1637 #error Stop_Compiling_Bad_COPY_CHUNK_SIZE
1638 #endif
1639 if (offset >= 4)
1640 {
1641 Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
1642 do
1643 {
1644 COPY_CHUNK_4_2(dest, src)
1645 #if COPY_CHUNK_SIZE != 16
1646 if (len == 8) break;
1647 #endif
1648 COPY_CHUNK_4_2(dest, src)
1649 }
1650 while (len -= 16);
1651 // return;
1652 }
1653 else
1654 #endif
1655 {
1656 // (offset < 4)
1657 const unsigned b0 = src[0];
1658 if (offset < 2)
1659 {
1660 #if defined(Z7_ZSTD_DEC_USE_UNALIGNED_COPY) && (COPY_CHUNK_SIZE == 16)
1661 #if defined(MY_CPU_64BIT)
1662 {
1663 const UInt64 v64 = (UInt64)b0 * 0x0101010101010101;
1664 Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
1665 do
1666 {
1667 ((UInt64 *)(void *)dest)[0] = v64;
1668 ((UInt64 *)(void *)dest)[1] = v64;
1669 dest += 16;
1670 }
1671 while (len -= 16);
1672 }
1673 #else
1674 {
1675 UInt32 v = b0;
1676 v |= v << 8;
1677 v |= v << 16;
1678 do
1679 {
1680 ((UInt32 *)(void *)dest)[0] = v;
1681 ((UInt32 *)(void *)dest)[1] = v;
1682 dest += 8;
1683 ((UInt32 *)(void *)dest)[0] = v;
1684 ((UInt32 *)(void *)dest)[1] = v;
1685 dest += 8;
1686 }
1687 while (len -= 16);
1688 }
1689 #endif
1690 #else
1691 do
1692 {
1693 dest[0] = (Byte)b0;
1694 dest[1] = (Byte)b0;
1695 dest += 2;
1696 dest[0] = (Byte)b0;
1697 dest[1] = (Byte)b0;
1698 dest += 2;
1699 }
1700 while (len -= 4);
1701 #endif
1702 }
1703 else if (offset == 2)
1704 {
1705 const Byte b1 = src[1];
1706 {
1707 do
1708 {
1709 dest[0] = (Byte)b0;
1710 dest[1] = b1;
1711 dest += 2;
1712 }
1713 while (len -= 2);
1714 }
1715 }
1716 else // (offset == 3)
1717 {
1718 const Byte *lim = dest + len - 2;
1719 const Byte b1 = src[1];
1720 const Byte b2 = src[2];
1721 do
1722 {
1723 dest[0] = (Byte)b0;
1724 dest[1] = b1;
1725 dest[2] = b2;
1726 dest += 3;
1727 }
1728 while (dest < lim);
1729 lim++; // points to last byte that must be written
1730 if (dest <= lim)
1731 {
1732 *dest = (Byte)b0;
1733 if (dest != lim)
1734 dest[1] = b1;
1735 }
1736 }
1737 }
1738 }
1739}
1740
1741
1742
1743#define UPDATE_TOTAL_OUT(p, size) \
1744{ \
1745 size_t _toc = (p)->totalOutCheck + (size); \
1746 const size_t _ws = (p)->winSize; \
1747 if (_toc >= _ws) _toc = _ws; \
1748 (p)->totalOutCheck = _toc; \
1749}
1750
1751
1752#if defined(MY_CPU_64BIT) && defined(MY_CPU_LE_UNALIGN)
1753// we can disable it for debug:
1754#define Z7_ZSTD_DEC_USE_64BIT_LOADS
1755#endif
1756// #define Z7_ZSTD_DEC_USE_64BIT_LOADS // for debug : slow in 32-bit
1757
1758// SEQ_SRC_OFFSET: how many bytes (src) (seqSrc) was moved back from original value.
1759// we need (SEQ_SRC_OFFSET != 0) for optimized memory access
1760#ifdef Z7_ZSTD_DEC_USE_64BIT_LOADS
1761 #define SEQ_SRC_OFFSET 7
1762#else
1763 #define SEQ_SRC_OFFSET 3
1764#endif
1765#define SRC_PLUS_FOR_4BYTES(bitOffset) (SEQ_SRC_OFFSET - 3) + ((CBitCtr_signed)(bitOffset) >> 3)
1766#define BIT_OFFSET_7BITS(bitOffset) ((unsigned)(bitOffset) & 7)
1767/*
1768 if (BIT_OFFSET_DELTA_BITS == 0) : bitOffset == number_of_unprocessed_bits
1769 if (BIT_OFFSET_DELTA_BITS == 1) : bitOffset == number_of_unprocessed_bits - 1
1770 and we can read 1 bit more in that mode : (8 * n + 1).
1771*/
1772// #define BIT_OFFSET_DELTA_BITS 0
1773#define BIT_OFFSET_DELTA_BITS 1
1774#if BIT_OFFSET_DELTA_BITS == 1
1775 #define GET_SHIFT_FROM_BOFFS7(boff7) (7 ^ (boff7))
1776#else
1777 #define GET_SHIFT_FROM_BOFFS7(boff7) (8 - BIT_OFFSET_DELTA_BITS - (boff7))
1778#endif
1779
1780#define UPDATE_BIT_OFFSET(bitOffset, numBits) \
1781 (bitOffset) -= (CBitCtr)(numBits);
1782
1783#define GET_SHIFT(bitOffset) GET_SHIFT_FROM_BOFFS7(BIT_OFFSET_7BITS(bitOffset))
1784
1785
1786#if defined(Z7_ZSTD_DEC_USE_64BIT_LOADS)
1787 #if (NUM_OFFSET_SYMBOLS_MAX - BIT_OFFSET_DELTA_BITS < 32)
1788 /* if (NUM_OFFSET_SYMBOLS_MAX == 32 && BIT_OFFSET_DELTA_BITS == 1),
1789 we have depth 31 + 9 + 9 + 8 = 57 bits that can b read with single read. */
1790 #define Z7_ZSTD_DEC_USE_64BIT_PRELOAD_OF
1791 #endif
1792 #ifndef Z7_ZSTD_DEC_USE_64BIT_PRELOAD_OF
1793 #if (BIT_OFFSET_DELTA_BITS == 1)
1794 /* if (winLimit - winPos <= (kBlockSizeMax = (1 << 17)))
1795 {
1796 the case (16 bits literal extra + 16 match extra) is not possible
1797 in correct stream. So error will be detected for (16 + 16) case.
1798 And longest correct sequence after offset reading is (31 + 9 + 9 + 8 = 57 bits).
1799 So we can use just one 64-bit load here in that case.
1800 }
1801 */
1802 #define Z7_ZSTD_DEC_USE_64BIT_PRELOAD_ML
1803 #endif
1804 #endif
1805#endif
1806
1807
1808#if !defined(Z7_ZSTD_DEC_USE_64BIT_LOADS) || \
1809 (!defined(Z7_ZSTD_DEC_USE_64BIT_PRELOAD_OF) && \
1810 !defined(Z7_ZSTD_DEC_USE_64BIT_PRELOAD_ML))
1811// in : (0 < bits <= (24 or 25)):
1812#define STREAM_READ_BITS(dest, bits) \
1813{ \
1814 GET32(dest, src + SRC_PLUS_FOR_4BYTES(bitOffset)) \
1815 dest <<= GET_SHIFT(bitOffset); \
1816 UPDATE_BIT_OFFSET(bitOffset, bits) \
1817 dest >>= 32 - bits; \
1818}
1819#endif
1820
1821
1822#define FSE_Peek_1(table, state) table[state]
1823
1824#define STATE_VAR(name) state_ ## name
1825
1826// in : (0 <= accuracy <= (24 or 25))
1827#define FSE_INIT_STATE(name, cond) \
1828{ \
1829 UInt32 r; \
1830 const unsigned bits = p->name ## _accuracy; \
1831 GET32(r, src + SRC_PLUS_FOR_4BYTES(bitOffset)) \
1832 r <<= GET_SHIFT(bitOffset); \
1833 r >>= 1; \
1834 r >>= 31 ^ bits; \
1835 UPDATE_BIT_OFFSET(bitOffset, bits) \
1836 cond \
1837 STATE_VAR(name) = FSE_Peek_1(FSE_TABLE(name), r); \
1838 /* STATE_VAR(name) = dest << 16; */ \
1839}
1840
1841
1842#define FSE_Peek_Plus(name, r) \
1843 STATE_VAR(name) = FSE_Peek_1(FSE_TABLE(name), \
1844 GET_FSE_REC_STATE(STATE_VAR(name)) + r);
1845
1846#define LZ_LOOP_ERROR_EXIT { return SZ_ERROR_DATA; }
1847
1848#define BO_OVERFLOW_CHECK \
1849 { if ((CBitCtr_signed)bitOffset < 0) LZ_LOOP_ERROR_EXIT }
1850
1851
1852#ifdef Z7_ZSTD_DEC_USE_64BIT_LOADS
1853
1854#define GET64(dest, p) { const Byte *ptr = p; dest = GetUi64(ptr); }
1855
1856#define FSE_PRELOAD \
1857{ \
1858 GET64(v, src - 4 + SRC_PLUS_FOR_4BYTES(bitOffset)) \
1859 v <<= GET_SHIFT(bitOffset); \
1860}
1861
1862#define FSE_UPDATE_STATE_2(name, cond) \
1863{ \
1864 const unsigned bits = GET_FSE_REC_LEN(STATE_VAR(name)); \
1865 UInt64 r = v; \
1866 v <<= bits; \
1867 r >>= 1; \
1868 UPDATE_BIT_OFFSET(bitOffset, bits) \
1869 cond \
1870 r >>= 63 ^ bits; \
1871 FSE_Peek_Plus(name, r); \
1872}
1873
1874#define FSE_UPDATE_STATES \
1875 FSE_UPDATE_STATE_2 (ll, {} ) \
1876 FSE_UPDATE_STATE_2 (ml, {} ) \
1877 FSE_UPDATE_STATE_2 (of, BO_OVERFLOW_CHECK) \
1878
1879#else // Z7_ZSTD_DEC_USE_64BIT_LOADS
1880
1881// it supports 8 bits accuracy for any code
1882// it supports 9 bits accuracy, if (BIT_OFFSET_DELTA_BITS == 1)
1883#define FSE_UPDATE_STATE_0(name, cond) \
1884{ \
1885 UInt32 r; \
1886 const unsigned bits = GET_FSE_REC_LEN(STATE_VAR(name)); \
1887 GET16(r, src + 2 + SRC_PLUS_FOR_4BYTES(bitOffset)) \
1888 r >>= (bitOffset & 7); \
1889 r &= (1 << (8 + BIT_OFFSET_DELTA_BITS)) - 1; \
1890 UPDATE_BIT_OFFSET(bitOffset, bits) \
1891 cond \
1892 r >>= (8 + BIT_OFFSET_DELTA_BITS) - bits; \
1893 FSE_Peek_Plus(name, r); \
1894}
1895
1896// for debug (slow):
1897// #define Z7_ZSTD_DEC_USE_FSE_FUSION_FORCE
1898#if BIT_OFFSET_DELTA_BITS == 0 || defined(Z7_ZSTD_DEC_USE_FSE_FUSION_FORCE)
1899 #define Z7_ZSTD_DEC_USE_FSE_FUSION
1900#endif
1901
1902#ifdef Z7_ZSTD_DEC_USE_FSE_FUSION
1903#define FSE_UPDATE_STATE_1(name) \
1904{ UInt32 rest2; \
1905{ \
1906 UInt32 r; \
1907 unsigned bits; \
1908 GET32(r, src + SRC_PLUS_FOR_4BYTES(bitOffset)) \
1909 bits = GET_FSE_REC_LEN(STATE_VAR(name)); \
1910 r <<= GET_SHIFT(bitOffset); \
1911 rest2 = r << bits; \
1912 r >>= 1; \
1913 UPDATE_BIT_OFFSET(bitOffset, bits) \
1914 r >>= 31 ^ bits; \
1915 FSE_Peek_Plus(name, r); \
1916}
1917
1918#define FSE_UPDATE_STATE_3(name) \
1919{ \
1920 const unsigned bits = GET_FSE_REC_LEN(STATE_VAR(name)); \
1921 rest2 >>= 1; \
1922 UPDATE_BIT_OFFSET(bitOffset, bits) \
1923 rest2 >>= 31 ^ bits; \
1924 FSE_Peek_Plus(name, rest2); \
1925}}
1926
1927#define FSE_UPDATE_STATES \
1928 FSE_UPDATE_STATE_1 (ll) \
1929 FSE_UPDATE_STATE_3 (ml) \
1930 FSE_UPDATE_STATE_0 (of, BO_OVERFLOW_CHECK) \
1931
1932#else // Z7_ZSTD_DEC_USE_64BIT_LOADS
1933
1934#define FSE_UPDATE_STATES \
1935 FSE_UPDATE_STATE_0 (ll, {} ) \
1936 FSE_UPDATE_STATE_0 (ml, {} ) \
1937 FSE_UPDATE_STATE_0 (of, BO_OVERFLOW_CHECK) \
1938
1939#endif // Z7_ZSTD_DEC_USE_FSE_FUSION
1940#endif // Z7_ZSTD_DEC_USE_64BIT_LOADS
1941
1942
1943
1944typedef struct
1945{
1946 UInt32 numSeqs;
1947 UInt32 literalsLen;
1948 const Byte *literals;
1949}
1950CZstdDec1_Vars;
1951
1952
1953// if (BIT_OFFSET_DELTA_BITS != 0), we need (BIT_OFFSET_DELTA_BYTES > 0)
1954#define BIT_OFFSET_DELTA_BYTES BIT_OFFSET_DELTA_BITS
1955
1956/* if (NUM_OFFSET_SYMBOLS_MAX == 32)
1957 max_seq_bit_length = (31) + 16 + 16 + 9 + 8 + 9 = 89 bits
1958 if defined(Z7_ZSTD_DEC_USE_64BIT_PRELOAD_OF) we have longest backward
1959 lookahead offset, and we read UInt64 after literal_len reading.
1960 if (BIT_OFFSET_DELTA_BITS == 1 && NUM_OFFSET_SYMBOLS_MAX == 32)
1961 MAX_BACKWARD_DEPTH = 16 bytes
1962*/
1963#define MAX_BACKWARD_DEPTH \
1964 ((NUM_OFFSET_SYMBOLS_MAX - 1 + 16 + 16 + 7) / 8 + 7 + BIT_OFFSET_DELTA_BYTES)
1965
1966/* srcLen != 0
1967 src == real_data_ptr - SEQ_SRC_OFFSET - BIT_OFFSET_DELTA_BYTES
1968 if defined(Z7_ZSTD_DEC_USE_64BIT_PRELOAD_ML) then
1969 (winLimit - p->winPos <= (1 << 17)) is required
1970*/
1971static
1972Z7_NO_INLINE
1973// Z7_ATTRIB_NO_VECTOR
1974SRes Decompress_Sequences(CZstdDec1 * const p,
1975 const Byte *src, const size_t srcLen,
1976 const size_t winLimit,
1977 const CZstdDec1_Vars * const vars)
1978{
1979#ifdef Z7_ZSTD_DEC_USE_BASES_LOCAL
1980 SEQ_EXTRA_TABLES(a_)
1981#endif
1982
1983 // for debug:
1984 // #define Z7_ZSTD_DEC_USE_LOCAL_FSE_TABLES
1985#ifdef Z7_ZSTD_DEC_USE_LOCAL_FSE_TABLES
1986 #define FSE_TABLE(n) fse. n
1987 const CZstdDecFseTables fse = p->fse;
1988 /*
1989 CZstdDecFseTables fse;
1990 #define COPY_FSE_TABLE(n) \
1991 memcpy(fse. n, p->fse. n, (size_t)4 << p-> n ## _accuracy);
1992 COPY_FSE_TABLE(of)
1993 COPY_FSE_TABLE(ll)
1994 COPY_FSE_TABLE(ml)
1995 */
1996#else
1997 #define FSE_TABLE(n) (p->fse. n)
1998#endif
1999
2000#ifdef Z7_ZSTD_DEC_USE_BASES_LOCAL
2001 FILL_LOC_BASES_ALL
2002#endif
2003
2004 {
2005 unsigned numSeqs = vars->numSeqs;
2006 const Byte *literals = vars->literals;
2007 ptrdiff_t literalsLen = (ptrdiff_t)vars->literalsLen;
2008 Byte * const win = p->win;
2009 size_t winPos = p->winPos;
2010 const size_t cycSize = p->cycSize;
2011 size_t totalOutCheck = p->totalOutCheck;
2012 const size_t winSize = p->winSize;
2013 size_t reps_0 = p->reps[0];
2014 size_t reps_1 = p->reps[1];
2015 size_t reps_2 = p->reps[2];
2016 UInt32 STATE_VAR(ll), STATE_VAR(of), STATE_VAR(ml);
2017 CBitCtr bitOffset;
2018
2019 SET_bitOffset_TO_PAD (bitOffset, src + SEQ_SRC_OFFSET, srcLen + BIT_OFFSET_DELTA_BYTES)
2020
2021 bitOffset -= BIT_OFFSET_DELTA_BITS;
2022
2023 FSE_INIT_STATE(ll, {} )
2024 FSE_INIT_STATE(of, {} )
2025 FSE_INIT_STATE(ml, BO_OVERFLOW_CHECK)
2026
2027 for (;;)
2028 {
2029 size_t matchLen;
2030 #ifdef Z7_ZSTD_DEC_USE_64BIT_LOADS
2031 UInt64 v;
2032 #endif
2033
2034 #ifdef Z7_ZSTD_DEC_USE_64BIT_PRELOAD_OF
2035 FSE_PRELOAD
2036 #endif
2037
2038 // if (of_code == 0)
2039 if ((Byte)STATE_VAR(of) == 0)
2040 {
2041 if (GET_FSE_REC_SYM(STATE_VAR(ll)) == 0)
2042 {
2043 const size_t offset = reps_1;
2044 reps_1 = reps_0;
2045 reps_0 = offset;
2046 STAT_INC(g_Num_Rep1)
2047 }
2048 STAT_UPDATE(else g_Num_Rep0++;)
2049 }
2050 else
2051 {
2052 const unsigned of_code = (Byte)STATE_VAR(of);
2053
2054 #ifdef Z7_ZSTD_DEC_USE_64BIT_LOADS
2055 #if !defined(Z7_ZSTD_DEC_USE_64BIT_PRELOAD_OF)
2056 FSE_PRELOAD
2057 #endif
2058 #else
2059 UInt32 v;
2060 {
2061 const Byte *src4 = src + SRC_PLUS_FOR_4BYTES(bitOffset);
2062 const unsigned skip = GET_SHIFT(bitOffset);
2063 GET32(v, src4)
2064 v <<= skip;
2065 v |= (UInt32)src4[-1] >> (8 - skip);
2066 }
2067 #endif
2068
2069 UPDATE_BIT_OFFSET(bitOffset, of_code)
2070
2071 if (of_code == 1)
2072 {
2073 // read 1 bit
2074 #if defined(Z7_MSC_VER_ORIGINAL) || defined(MY_CPU_X86_OR_AMD64)
2075 #ifdef Z7_ZSTD_DEC_USE_64BIT_LOADS
2076 #define CHECK_HIGH_BIT_64(a) ((Int64)(UInt64)(a) < 0)
2077 #else
2078 #define CHECK_HIGH_BIT_32(a) ((Int32)(UInt32)(a) < 0)
2079 #endif
2080 #else
2081 #ifdef Z7_ZSTD_DEC_USE_64BIT_LOADS
2082 #define CHECK_HIGH_BIT_64(a) ((UInt64)(a) & ((UInt64)1 << 63))
2083 #else
2084 #define CHECK_HIGH_BIT_32(a) ((UInt32)(a) & ((UInt32)1 << 31))
2085 #endif
2086 #endif
2087
2088 if
2089 #ifdef Z7_ZSTD_DEC_USE_64BIT_LOADS
2090 CHECK_HIGH_BIT_64 (((UInt64)GET_FSE_REC_SYM(STATE_VAR(ll)) - 1) ^ v)
2091 #else
2092 CHECK_HIGH_BIT_32 (((UInt32)GET_FSE_REC_SYM(STATE_VAR(ll)) - 1) ^ v)
2093 #endif
2094 {
2095 v <<= 1;
2096 {
2097 const size_t offset = reps_2;
2098 reps_2 = reps_1;
2099 reps_1 = reps_0;
2100 reps_0 = offset;
2101 STAT_INC(g_Num_Rep2)
2102 }
2103 }
2104 else
2105 {
2106 if (GET_FSE_REC_SYM(STATE_VAR(ll)) == 0)
2107 {
2108 // litLen == 0 && bit == 1
2109 STAT_INC(g_Num_Rep3)
2110 v <<= 1;
2111 reps_2 = reps_1;
2112 reps_1 = reps_0;
2113 if (--reps_0 == 0)
2114 {
2115 // LZ_LOOP_ERROR_EXIT
2116 // original-zstd decoder : input is corrupted; force offset to 1
2117 // reps_0 = 1;
2118 reps_0++;
2119 }
2120 }
2121 else
2122 {
2123 // litLen != 0 && bit == 0
2124 v <<= 1;
2125 {
2126 const size_t offset = reps_1;
2127 reps_1 = reps_0;
2128 reps_0 = offset;
2129 STAT_INC(g_Num_Rep1)
2130 }
2131 }
2132 }
2133 }
2134 else
2135 {
2136 // (2 <= of_code)
2137 // if (of_code >= 32) LZ_LOOP_ERROR_EXIT // optional check
2138 // we don't allow (of_code >= 32) cases in another code
2139 reps_2 = reps_1;
2140 reps_1 = reps_0;
2141 reps_0 = ((size_t)1 << of_code) - 3 + (size_t)
2142 #ifdef Z7_ZSTD_DEC_USE_64BIT_LOADS
2143 (v >> (64 - of_code));
2144 v <<= of_code;
2145 #else
2146 (v >> (32 - of_code));
2147 #endif
2148 }
2149 }
2150
2151 #ifdef Z7_ZSTD_DEC_USE_64BIT_PRELOAD_ML
2152 FSE_PRELOAD
2153 #endif
2154
2155 matchLen = (size_t)GET_FSE_REC_SYM(STATE_VAR(ml))
2156 #ifndef Z7_ZSTD_DEC_USE_ML_PLUS3
2157 + MATCH_LEN_MIN
2158 #endif
2159 ;
2160 {
2161 {
2162 if (matchLen >= 32 + MATCH_LEN_MIN) // if (state_ml & 0x20)
2163 {
2164 const unsigned extra = BASES_TABLE(SEQ_ML_EXTRA) [(size_t)matchLen - MATCH_LEN_MIN];
2165 matchLen = BASES_TABLE(SEQ_ML_BASES) [(size_t)matchLen - MATCH_LEN_MIN];
2166 #if defined(Z7_ZSTD_DEC_USE_64BIT_LOADS) && \
2167 (defined(Z7_ZSTD_DEC_USE_64BIT_PRELOAD_ML) || \
2168 defined(Z7_ZSTD_DEC_USE_64BIT_PRELOAD_OF))
2169 {
2170 UPDATE_BIT_OFFSET(bitOffset, extra)
2171 matchLen += (size_t)(v >> (64 - extra));
2172 #if defined(Z7_ZSTD_DEC_USE_64BIT_PRELOAD_OF)
2173 FSE_PRELOAD
2174 #else
2175 v <<= extra;
2176 #endif
2177 }
2178 #else
2179 {
2180 UInt32 v32;
2181 STREAM_READ_BITS(v32, extra)
2182 matchLen += v32;
2183 }
2184 #endif
2185 STAT_INC(g_Num_Match)
2186 }
2187 }
2188 }
2189
2190 #if defined(Z7_ZSTD_DEC_USE_64BIT_LOADS) && \
2191 !defined(Z7_ZSTD_DEC_USE_64BIT_PRELOAD_OF) && \
2192 !defined(Z7_ZSTD_DEC_USE_64BIT_PRELOAD_ML)
2193 FSE_PRELOAD
2194 #endif
2195
2196 {
2197 size_t litLen = GET_FSE_REC_SYM(STATE_VAR(ll));
2198 if (litLen)
2199 {
2200 // if (STATE_VAR(ll) & 0x70)
2201 if (litLen >= 16)
2202 {
2203 const unsigned extra = BASES_TABLE(SEQ_LL_EXTRA) [litLen];
2204 litLen = BASES_TABLE(SEQ_LL_BASES) [litLen];
2205 #ifdef Z7_ZSTD_DEC_USE_64BIT_LOADS
2206 {
2207 UPDATE_BIT_OFFSET(bitOffset, extra)
2208 litLen += (size_t)(v >> (64 - extra));
2209 #if defined(Z7_ZSTD_DEC_USE_64BIT_PRELOAD_OF)
2210 FSE_PRELOAD
2211 #else
2212 v <<= extra;
2213 #endif
2214 }
2215 #else
2216 {
2217 UInt32 v32;
2218 STREAM_READ_BITS(v32, extra)
2219 litLen += v32;
2220 }
2221 #endif
2222 STAT_INC(g_Num_LitsBig)
2223 }
2224
2225 if ((literalsLen -= (ptrdiff_t)litLen) < 0)
2226 LZ_LOOP_ERROR_EXIT
2227 totalOutCheck += litLen;
2228 {
2229 const size_t rem = winLimit - winPos;
2230 if (litLen > rem)
2231 LZ_LOOP_ERROR_EXIT
2232 {
2233 const Byte *literals_temp = literals;
2234 Byte *d = win + winPos;
2235 literals += litLen;
2236 winPos += litLen;
2237 CopyLiterals(d, literals_temp, litLen, rem);
2238 }
2239 }
2240 }
2241 STAT_UPDATE(else g_Num_Lit0++;)
2242 }
2243
2244 #define COPY_MATCH \
2245 { if (reps_0 > winSize || reps_0 > totalOutCheck) LZ_LOOP_ERROR_EXIT \
2246 totalOutCheck += matchLen; \
2247 { const size_t rem = winLimit - winPos; \
2248 if (matchLen > rem) LZ_LOOP_ERROR_EXIT \
2249 { const size_t winPos_temp = winPos; \
2250 winPos += matchLen; \
2251 CopyMatch(reps_0, matchLen, win, winPos_temp, rem, cycSize); }}}
2252
2253 if (--numSeqs == 0)
2254 {
2255 COPY_MATCH
2256 break;
2257 }
2258 FSE_UPDATE_STATES
2259 COPY_MATCH
2260 } // for
2261
2262 if ((CBitCtr_signed)bitOffset != BIT_OFFSET_DELTA_BYTES * 8 - BIT_OFFSET_DELTA_BITS)
2263 return SZ_ERROR_DATA;
2264
2265 if (literalsLen)
2266 {
2267 const size_t rem = winLimit - winPos;
2268 if ((size_t)literalsLen > rem)
2269 return SZ_ERROR_DATA;
2270 {
2271 Byte *d = win + winPos;
2272 winPos += (size_t)literalsLen;
2273 totalOutCheck += (size_t)literalsLen;
2274 CopyLiterals
2275 // memcpy
2276 (d, literals, (size_t)literalsLen, rem);
2277 }
2278 }
2279 if (totalOutCheck >= winSize)
2280 totalOutCheck = winSize;
2281 p->totalOutCheck = totalOutCheck;
2282 p->winPos = winPos;
2283 p->reps[0] = (CZstdDecOffset)reps_0;
2284 p->reps[1] = (CZstdDecOffset)reps_1;
2285 p->reps[2] = (CZstdDecOffset)reps_2;
2286 }
2287 return SZ_OK;
2288}
2289
2290
2291// for debug: define to check that ZstdDec1_NeedTempBufferForInput() works correctly:
2292// #define Z7_ZSTD_DEC_USE_CHECK_OF_NEED_TEMP // define it for debug only
2293#ifdef Z7_ZSTD_DEC_USE_CHECK_OF_NEED_TEMP
2294static unsigned g_numSeqs;
2295#endif
2296
2297
2298#define k_LitBlockType_Flag_RLE_or_Treeless 1
2299#define k_LitBlockType_Flag_Compressed 2
2300
2301// outLimit : is strong limit
2302// outLimit <= ZstdDec1_GET_BLOCK_SIZE_LIMIT(p)
2303// inSize != 0
2304static
2305Z7_NO_INLINE
2306SRes ZstdDec1_DecodeBlock(CZstdDec1 *p,
2307 const Byte *src, SizeT inSize, SizeT afterAvail,
2308 const size_t outLimit)
2309{
2310 CZstdDec1_Vars vars;
2311 vars.literals = p->literalsBase;
2312 {
2313 const unsigned b0 = *src++;
2314 UInt32 numLits, compressedSize;
2315 const Byte *litStream;
2316 Byte *literalsDest;
2317 inSize--;
2318
2319 if ((b0 & k_LitBlockType_Flag_Compressed) == 0)
2320 {
2321 // we need at least one additional byte for (numSeqs).
2322 // so we check for that additional byte in conditions.
2323 numLits = b0 >> 3;
2324 if (b0 & 4)
2325 {
2326 UInt32 v;
2327 if (inSize < 1 + 1) // we need at least 1 byte here and 1 byte for (numSeqs).
2328 return SZ_ERROR_DATA;
2329 numLits >>= 1;
2330 v = GetUi16(src);
2331 src += 2;
2332 inSize -= 2;
2333 if ((b0 & 8) == 0)
2334 {
2335 src--;
2336 inSize++;
2337 v = (Byte)v;
2338 }
2339 numLits += v << 4;
2340 }
2341 compressedSize = 1;
2342 if ((b0 & k_LitBlockType_Flag_RLE_or_Treeless) == 0)
2343 compressedSize = numLits;
2344 }
2345 else if (inSize < 4)
2346 return SZ_ERROR_DATA;
2347 else
2348 {
2349 const unsigned mode4Streams = b0 & 0xc;
2350 const unsigned numBytes = (3 * mode4Streams + 32) >> 4;
2351 const unsigned numBits = 4 * numBytes - 2;
2352 const UInt32 mask = ((UInt32)16 << numBits) - 1;
2353 compressedSize = GetUi32(src);
2354 numLits = ((
2355 #ifdef MY_CPU_LE_UNALIGN
2356 GetUi32(src - 1)
2357 #else
2358 ((compressedSize << 8) + b0)
2359 #endif
2360 ) >> 4) & mask;
2361 src += numBytes;
2362 inSize -= numBytes;
2363 compressedSize >>= numBits;
2364 compressedSize &= mask;
2365 /*
2366 if (numLits != 0) printf("inSize = %7u num_lits=%7u compressed=%7u ratio = %u ratio2 = %u\n",
2367 i1, numLits, (unsigned)compressedSize * 1, (unsigned)compressedSize * 100 / numLits,
2368 (unsigned)numLits * 100 / (unsigned)inSize);
2369 }
2370 */
2371 if (compressedSize == 0)
2372 return SZ_ERROR_DATA; // (compressedSize == 0) is not allowed
2373 }
2374
2375 STAT_UPDATE(g_Num_Lits += numLits;)
2376
2377 vars.literalsLen = numLits;
2378
2379 if (compressedSize >= inSize)
2380 return SZ_ERROR_DATA;
2381 litStream = src;
2382 src += compressedSize;
2383 inSize -= compressedSize;
2384 // inSize != 0
2385 {
2386 UInt32 numSeqs = *src++;
2387 inSize--;
2388 if (numSeqs > 127)
2389 {
2390 UInt32 b1;
2391 if (inSize == 0)
2392 return SZ_ERROR_DATA;
2393 numSeqs -= 128;
2394 b1 = *src++;
2395 inSize--;
2396 if (numSeqs == 127)
2397 {
2398 if (inSize == 0)
2399 return SZ_ERROR_DATA;
2400 numSeqs = (UInt32)(*src++) + 127;
2401 inSize--;
2402 }
2403 numSeqs = (numSeqs << 8) + b1;
2404 }
2405 if (numSeqs * MATCH_LEN_MIN + numLits > outLimit)
2406 return SZ_ERROR_DATA;
2407 vars.numSeqs = numSeqs;
2408
2409 STAT_UPDATE(g_NumSeqs_total += numSeqs;)
2410 /*
2411 #ifdef SHOW_STAT
2412 printf("\n %5u : %8u, %8u : %5u", (int)g_Num_Blocks_Compressed, (int)numSeqs, (int)g_NumSeqs_total,
2413 (int)g_NumSeqs_total / g_Num_Blocks_Compressed);
2414 #endif
2415 // printf("\nnumSeqs2 = %d", numSeqs);
2416 */
2417 #ifdef Z7_ZSTD_DEC_USE_CHECK_OF_NEED_TEMP
2418 if (numSeqs != g_numSeqs) return SZ_ERROR_DATA; // for debug
2419 #endif
2420 if (numSeqs == 0)
2421 {
2422 if (inSize != 0)
2423 return SZ_ERROR_DATA;
2424 literalsDest = p->win + p->winPos;
2425 }
2426 else
2427 literalsDest = p->literalsBase;
2428 }
2429
2430 if ((b0 & k_LitBlockType_Flag_Compressed) == 0)
2431 {
2432 if (b0 & k_LitBlockType_Flag_RLE_or_Treeless)
2433 {
2434 memset(literalsDest, litStream[0], numLits);
2435 if (vars.numSeqs)
2436 {
2437 // literalsDest == p->literalsBase == vars.literals
2438 #if COPY_CHUNK_SIZE > 1
2439 memset(p->literalsBase + numLits, 0, COPY_CHUNK_SIZE);
2440 #endif
2441 }
2442 }
2443 else
2444 {
2445 // unsigned y;
2446 // for (y = 0; y < 10000; y++)
2447 memcpy(literalsDest, litStream, numLits);
2448 if (vars.numSeqs)
2449 {
2450 /* we need up to (15 == COPY_CHUNK_SIZE - 1) space for optimized CopyLiterals().
2451 If we have additional space in input stream after literals stream,
2452 we use direct copy of rar literals in input stream */
2453 if ((size_t)(src + inSize - litStream) - numLits + afterAvail >= (COPY_CHUNK_SIZE - 1))
2454 vars.literals = litStream;
2455 else
2456 {
2457 // literalsDest == p->literalsBase == vars.literals
2458 #if COPY_CHUNK_SIZE > 1
2459 /* CopyLiterals():
2460 1) we don't want reading non-initialized data
2461 2) we will copy only zero byte after literals buffer */
2462 memset(p->literalsBase + numLits, 0, COPY_CHUNK_SIZE);
2463 #endif
2464 }
2465 }
2466 }
2467 }
2468 else
2469 {
2470 CInBufPair hufStream;
2471 hufStream.ptr = litStream;
2472 hufStream.len = compressedSize;
2473
2474 if ((b0 & k_LitBlockType_Flag_RLE_or_Treeless) == 0)
2475 {
2476 // unsigned y = 100; CInBufPair hs2 = hufStream; do { hufStream = hs2;
2477 RINOK(Huf_DecodeTable(&p->huf, &hufStream))
2478 p->litHuf_wasSet = True;
2479 // } while (--y);
2480 }
2481 else if (!p->litHuf_wasSet)
2482 return SZ_ERROR_DATA;
2483
2484 {
2485 // int yyy; for (yyy = 0; yyy < 34; yyy++) {
2486 SRes sres;
2487 if ((b0 & 0xc) == 0) // mode4Streams
2488 sres = Huf_Decompress_1stream((const Byte *)(const void *)p->huf.table64,
2489 hufStream.ptr - HUF_SRC_OFFSET, hufStream.len, literalsDest, numLits);
2490 else
2491 {
2492 // 6 bytes for the jump table + 4x1 bytes of end-padding Bytes)
2493 if (hufStream.len < 6 + 4)
2494 return SZ_ERROR_DATA;
2495 // the condition from original-zstd decoder:
2496 #define Z7_ZSTD_MIN_LITERALS_FOR_4_STREAMS 6
2497 if (numLits < Z7_ZSTD_MIN_LITERALS_FOR_4_STREAMS)
2498 return SZ_ERROR_DATA;
2499 sres = Huf_Decompress_4stream((const Byte *)(const void *)p->huf.table64,
2500 hufStream.ptr + (6 - HUF_SRC_OFFSET), hufStream.len, literalsDest, numLits);
2501 }
2502 RINOK(sres)
2503 // }
2504 }
2505 }
2506
2507 if (vars.numSeqs == 0)
2508 {
2509 p->winPos += numLits;
2510 return SZ_OK;
2511 }
2512 }
2513 {
2514 CInBufPair in;
2515 unsigned mode;
2516 unsigned seqMode;
2517
2518 in.ptr = src;
2519 in.len = inSize;
2520 if (in.len == 0)
2521 return SZ_ERROR_DATA;
2522 in.len--;
2523 mode = *in.ptr++;
2524 if (mode & 3) // Reserved bits
2525 return SZ_ERROR_DATA;
2526
2527 seqMode = (mode >> 6);
2528 if (seqMode == k_SeqMode_Repeat)
2529 { if (!IS_SEQ_TABLES_WERE_SET(p)) return SZ_ERROR_DATA; }
2530 else RINOK(FSE_Decode_SeqTable(
2531 p->fse.ll,
2532 &in,
2533 6, // predefAccuracy
2534 &p->ll_accuracy,
2535 NUM_LL_SYMBOLS,
2536 k_PredefRecords_LL,
2537 seqMode))
2538
2539 seqMode = (mode >> 4) & 3;
2540 if (seqMode == k_SeqMode_Repeat)
2541 { if (!IS_SEQ_TABLES_WERE_SET(p)) return SZ_ERROR_DATA; }
2542 else RINOK(FSE_Decode_SeqTable(
2543 p->fse.of,
2544 &in,
2545 5, // predefAccuracy
2546 &p->of_accuracy,
2547 NUM_OFFSET_SYMBOLS_MAX,
2548 k_PredefRecords_OF,
2549 seqMode))
2550
2551 seqMode = (mode >> 2) & 3;
2552 if (seqMode == k_SeqMode_Repeat)
2553 { if (!IS_SEQ_TABLES_WERE_SET(p)) return SZ_ERROR_DATA; }
2554 else
2555 {
2556 RINOK(FSE_Decode_SeqTable(
2557 p->fse.ml,
2558 &in,
2559 6, // predefAccuracy
2560 &p->ml_accuracy,
2561 NUM_ML_SYMBOLS,
2562 k_PredefRecords_ML,
2563 seqMode))
2564 /*
2565 #if defined(Z7_ZSTD_DEC_USE_ML_PLUS3)
2566 // { unsigned y = 1 << 10; do
2567 {
2568 const unsigned accuracy = p->ml_accuracy;
2569 if (accuracy == 0)
2570 p->fse.ml[0] += 3;
2571 else
2572 #ifdef MY_CPU_64BIT
2573 {
2574 // alignemt (UInt64 _pad_Alignment) in fse.ml is required for that code
2575 UInt64 *table = (UInt64 *)(void *)p->fse.ml;
2576 const UInt64 *end = (const UInt64 *)(const void *)
2577 ((const Byte *)(const void *)table + ((size_t)sizeof(CFseRecord) << accuracy));
2578 do
2579 {
2580 table[0] += ((UInt64)MATCH_LEN_MIN << 32) + MATCH_LEN_MIN;
2581 table[1] += ((UInt64)MATCH_LEN_MIN << 32) + MATCH_LEN_MIN;
2582 table += 2;
2583 }
2584 while (table != end);
2585 }
2586 #else
2587 {
2588 UInt32 *table = p->fse.ml;
2589 const UInt32 *end = (const UInt32 *)(const void *)
2590 ((const Byte *)(const void *)table + ((size_t)sizeof(CFseRecord) << accuracy));
2591 do
2592 {
2593 table[0] += MATCH_LEN_MIN;
2594 table[1] += MATCH_LEN_MIN;
2595 table += 2;
2596 table[0] += MATCH_LEN_MIN;
2597 table[1] += MATCH_LEN_MIN;
2598 table += 2;
2599 }
2600 while (table != end);
2601 }
2602 #endif
2603 }
2604 // while (--y); }
2605 #endif
2606 */
2607 }
2608
2609 // p->seqTables_wereSet = True;
2610 if (in.len == 0)
2611 return SZ_ERROR_DATA;
2612 return Decompress_Sequences(p,
2613 in.ptr - SEQ_SRC_OFFSET - BIT_OFFSET_DELTA_BYTES, in.len,
2614 p->winPos + outLimit, &vars);
2615 }
2616}
2617
2618
2619
2620
2621// inSize != 0
2622// it must do similar to ZstdDec1_DecodeBlock()
2623static size_t ZstdDec1_NeedTempBufferForInput(
2624 const SizeT beforeSize, const Byte * const src, const SizeT inSize)
2625{
2626 unsigned b0;
2627 UInt32 pos;
2628
2629 #ifdef Z7_ZSTD_DEC_USE_CHECK_OF_NEED_TEMP
2630 g_numSeqs = 1 << 24;
2631 #else
2632 // we have at least 3 bytes before seq data: litBlockType, numSeqs, seqMode
2633 #define MIN_BLOCK_LZ_HEADERS_SIZE 3
2634 if (beforeSize >= MAX_BACKWARD_DEPTH - MIN_BLOCK_LZ_HEADERS_SIZE)
2635 return 0;
2636 #endif
2637
2638 b0 = src[0];
2639
2640 if ((b0 & k_LitBlockType_Flag_Compressed) == 0)
2641 {
2642 UInt32 numLits = b0 >> 3;
2643 pos = 1;
2644 if (b0 & 4)
2645 {
2646 UInt32 v;
2647 if (inSize < 3)
2648 return 0;
2649 numLits >>= 1;
2650 v = GetUi16(src + 1);
2651 pos = 3;
2652 if ((b0 & 8) == 0)
2653 {
2654 pos = 2;
2655 v = (Byte)v;
2656 }
2657 numLits += v << 4;
2658 }
2659 if (b0 & k_LitBlockType_Flag_RLE_or_Treeless)
2660 numLits = 1;
2661 pos += numLits;
2662 }
2663 else if (inSize < 5)
2664 return 0;
2665 else
2666 {
2667 const unsigned mode4Streams = b0 & 0xc;
2668 const unsigned numBytes = (3 * mode4Streams + 48) >> 4;
2669 const unsigned numBits = 4 * numBytes - 6;
2670 UInt32 cs = GetUi32(src + 1);
2671 cs >>= numBits;
2672 cs &= ((UInt32)16 << numBits) - 1;
2673 if (cs == 0)
2674 return 0;
2675 pos = numBytes + cs;
2676 }
2677
2678 if (pos >= inSize)
2679 return 0;
2680 {
2681 UInt32 numSeqs = src[pos++];
2682 if (numSeqs > 127)
2683 {
2684 UInt32 b1;
2685 if (pos >= inSize)
2686 return 0;
2687 numSeqs -= 128;
2688 b1 = src[pos++];
2689 if (numSeqs == 127)
2690 {
2691 if (pos >= inSize)
2692 return 0;
2693 numSeqs = (UInt32)(src[pos++]) + 127;
2694 }
2695 numSeqs = (numSeqs << 8) + b1;
2696 }
2697 #ifdef Z7_ZSTD_DEC_USE_CHECK_OF_NEED_TEMP
2698 g_numSeqs = numSeqs; // for debug
2699 #endif
2700 if (numSeqs == 0)
2701 return 0;
2702 }
2703 /*
2704 if (pos >= inSize)
2705 return 0;
2706 pos++;
2707 */
2708 // we will have one additional byte for seqMode:
2709 if (beforeSize + pos >= MAX_BACKWARD_DEPTH - 1)
2710 return 0;
2711 return 1;
2712}
2713
2714
2715
2716// ---------- ZSTD FRAME ----------
2717
2718#define kBlockType_Raw 0
2719#define kBlockType_RLE 1
2720#define kBlockType_Compressed 2
2721#define kBlockType_Reserved 3
2722
2723typedef enum
2724{
2725 // begin: states that require 4 bytes:
2726 ZSTD2_STATE_SIGNATURE,
2727 ZSTD2_STATE_HASH,
2728 ZSTD2_STATE_SKIP_HEADER,
2729 // end of states that require 4 bytes
2730
2731 ZSTD2_STATE_SKIP_DATA,
2732 ZSTD2_STATE_FRAME_HEADER,
2733 ZSTD2_STATE_AFTER_HEADER,
2734 ZSTD2_STATE_BLOCK,
2735 ZSTD2_STATE_DATA,
2736 ZSTD2_STATE_FINISHED
2737} EZstd2State;
2738
2739
2740struct CZstdDec
2741{
2742 EZstd2State frameState;
2743 unsigned tempSize;
2744
2745 Byte temp[14]; // 14 is required
2746
2747 Byte descriptor;
2748 Byte windowDescriptor;
2749 Byte isLastBlock;
2750 Byte blockType;
2751 Byte isErrorState;
2752 Byte hashError;
2753 Byte disableHash;
2754 Byte isCyclicMode;
2755
2756 UInt32 blockSize;
2757 UInt32 dictionaryId;
2758 UInt32 curBlockUnpackRem; // for compressed blocks only
2759 UInt32 inTempPos;
2760
2761 UInt64 contentSize;
2762 UInt64 contentProcessed;
2763 CXxh64State xxh64;
2764
2765 Byte *inTemp;
2766 SizeT winBufSize_Allocated;
2767 Byte *win_Base;
2768
2769 ISzAllocPtr alloc_Small;
2770 ISzAllocPtr alloc_Big;
2771
2772 CZstdDec1 decoder;
2773};
2774
2775#define ZstdDec_GET_UNPROCESSED_XXH64_SIZE(p) \
2776 ((unsigned)(p)->contentProcessed & (Z7_XXH64_BLOCK_SIZE - 1))
2777
2778#define ZSTD_DEC_IS_LAST_BLOCK(p) ((p)->isLastBlock)
2779
2780
2781static void ZstdDec_FreeWindow(CZstdDec * const p)
2782{
2783 if (p->win_Base)
2784 {
2785 ISzAlloc_Free(p->alloc_Big, p->win_Base);
2786 p->win_Base = NULL;
2787 // p->decoder.win = NULL;
2788 p->winBufSize_Allocated = 0;
2789 }
2790}
2791
2792
2793CZstdDecHandle ZstdDec_Create(ISzAllocPtr alloc_Small, ISzAllocPtr alloc_Big)
2794{
2795 CZstdDec *p = (CZstdDec *)ISzAlloc_Alloc(alloc_Small, sizeof(CZstdDec));
2796 if (!p)
2797 return NULL;
2798 p->alloc_Small = alloc_Small;
2799 p->alloc_Big = alloc_Big;
2800 // ZstdDec_CONSTRUCT(p)
2801 p->inTemp = NULL;
2802 p->win_Base = NULL;
2803 p->winBufSize_Allocated = 0;
2804 p->disableHash = False;
2805 ZstdDec1_Construct(&p->decoder);
2806 return p;
2807}
2808
2809void ZstdDec_Destroy(CZstdDecHandle p)
2810{
2811 #ifdef SHOW_STAT
2812 #define PRINT_STAT1(name, v) \
2813 printf("\n%25s = %9u", name, v);
2814 PRINT_STAT1("g_Num_Blocks_Compressed", g_Num_Blocks_Compressed)
2815 PRINT_STAT1("g_Num_Blocks_memcpy", g_Num_Blocks_memcpy)
2816 PRINT_STAT1("g_Num_Wrap_memmove_Num", g_Num_Wrap_memmove_Num)
2817 PRINT_STAT1("g_Num_Wrap_memmove_Bytes", g_Num_Wrap_memmove_Bytes)
2818 if (g_Num_Blocks_Compressed)
2819 {
2820 #define PRINT_STAT(name, v) \
2821 printf("\n%17s = %9u, per_block = %8u", name, v, v / g_Num_Blocks_Compressed);
2822 PRINT_STAT("g_NumSeqs", g_NumSeqs_total)
2823 // PRINT_STAT("g_NumCopy", g_NumCopy)
2824 PRINT_STAT("g_NumOver", g_NumOver)
2825 PRINT_STAT("g_NumOver2", g_NumOver2)
2826 PRINT_STAT("g_Num_Match", g_Num_Match)
2827 PRINT_STAT("g_Num_Lits", g_Num_Lits)
2828 PRINT_STAT("g_Num_LitsBig", g_Num_LitsBig)
2829 PRINT_STAT("g_Num_Lit0", g_Num_Lit0)
2830 PRINT_STAT("g_Num_Rep_0", g_Num_Rep0)
2831 PRINT_STAT("g_Num_Rep_1", g_Num_Rep1)
2832 PRINT_STAT("g_Num_Rep_2", g_Num_Rep2)
2833 PRINT_STAT("g_Num_Rep_3", g_Num_Rep3)
2834 PRINT_STAT("g_Num_Threshold_0", g_Num_Threshold_0)
2835 PRINT_STAT("g_Num_Threshold_1", g_Num_Threshold_1)
2836 PRINT_STAT("g_Num_Threshold_0sum", g_Num_Threshold_0sum)
2837 PRINT_STAT("g_Num_Threshold_1sum", g_Num_Threshold_1sum)
2838 }
2839 printf("\n");
2840 #endif
2841
2842 ISzAlloc_Free(p->alloc_Small, p->decoder.literalsBase);
2843 // p->->decoder.literalsBase = NULL;
2844 ISzAlloc_Free(p->alloc_Small, p->inTemp);
2845 // p->inTemp = NULL;
2846 ZstdDec_FreeWindow(p);
2847 ISzAlloc_Free(p->alloc_Small, p);
2848}
2849
2850
2851
2852#define kTempBuffer_PreSize (1u << 6)
2853#if kTempBuffer_PreSize < MAX_BACKWARD_DEPTH
2854 #error Stop_Compiling_Bad_kTempBuffer_PreSize
2855#endif
2856
2857static SRes ZstdDec_AllocateMisc(CZstdDec *p)
2858{
2859 #define k_Lit_AfterAvail (1u << 6)
2860 #if k_Lit_AfterAvail < (COPY_CHUNK_SIZE - 1)
2861 #error Stop_Compiling_Bad_k_Lit_AfterAvail
2862 #endif
2863 // return ZstdDec1_Allocate(&p->decoder, p->alloc_Small);
2864 if (!p->decoder.literalsBase)
2865 {
2866 p->decoder.literalsBase = (Byte *)ISzAlloc_Alloc(p->alloc_Small,
2867 kBlockSizeMax + k_Lit_AfterAvail);
2868 if (!p->decoder.literalsBase)
2869 return SZ_ERROR_MEM;
2870 }
2871 if (!p->inTemp)
2872 {
2873 // we need k_Lit_AfterAvail here for owerread from raw literals stream
2874 p->inTemp = (Byte *)ISzAlloc_Alloc(p->alloc_Small,
2875 kBlockSizeMax + kTempBuffer_PreSize + k_Lit_AfterAvail);
2876 if (!p->inTemp)
2877 return SZ_ERROR_MEM;
2878 }
2879 return SZ_OK;
2880}
2881
2882
2883static void ZstdDec_Init_ForNewFrame(CZstdDec *p)
2884{
2885 p->frameState = ZSTD2_STATE_SIGNATURE;
2886 p->tempSize = 0;
2887
2888 p->isErrorState = False;
2889 p->hashError = False;
2890 p->isCyclicMode = False;
2891 p->contentProcessed = 0;
2892 Xxh64State_Init(&p->xxh64);
2893 ZstdDec1_Init(&p->decoder);
2894}
2895
2896
2897void ZstdDec_Init(CZstdDec *p)
2898{
2899 ZstdDec_Init_ForNewFrame(p);
2900 p->decoder.winPos = 0;
2901 memset(p->temp, 0, sizeof(p->temp));
2902}
2903
2904
2905#define DESCRIPTOR_Get_DictionaryId_Flag(d) ((d) & 3)
2906#define DESCRIPTOR_FLAG_CHECKSUM (1 << 2)
2907#define DESCRIPTOR_FLAG_RESERVED (1 << 3)
2908// #define DESCRIPTOR_FLAG_UNUSED (1 << 4)
2909#define DESCRIPTOR_FLAG_SINGLE (1 << 5)
2910#define DESCRIPTOR_Get_ContentSize_Flag3(d) ((d) >> 5)
2911#define DESCRIPTOR_Is_ContentSize_Defined(d) (((d) & 0xe0) != 0)
2912
2913
2914static EZstd2State ZstdDec_UpdateState(CZstdDec * const p, const Byte b, CZstdDecInfo * const info)
2915{
2916 unsigned tempSize = p->tempSize;
2917 p->temp[tempSize++] = b;
2918 p->tempSize = tempSize;
2919
2920 if (p->frameState == ZSTD2_STATE_BLOCK)
2921 {
2922 if (tempSize < 3)
2923 return ZSTD2_STATE_BLOCK;
2924 {
2925 UInt32 b0 = GetUi32(p->temp);
2926 const unsigned type = ((unsigned)b0 >> 1) & 3;
2927 if (type == kBlockType_RLE && tempSize == 3)
2928 return ZSTD2_STATE_BLOCK;
2929 // info->num_Blocks_forType[type]++;
2930 info->num_Blocks++;
2931 if (type == kBlockType_Reserved)
2932 {
2933 p->isErrorState = True; // SZ_ERROR_UNSUPPORTED
2934 return ZSTD2_STATE_BLOCK;
2935 }
2936 p->blockType = (Byte)type;
2937 p->isLastBlock = (Byte)(b0 & 1);
2938 p->inTempPos = 0;
2939 p->tempSize = 0;
2940 b0 >>= 3;
2941 b0 &= 0x1fffff;
2942 // info->num_BlockBytes_forType[type] += b0;
2943 if (b0 == 0)
2944 {
2945 // empty RAW/RLE blocks are allowed in original-zstd decoder
2946 if (type == kBlockType_Compressed)
2947 {
2948 p->isErrorState = True;
2949 return ZSTD2_STATE_BLOCK;
2950 }
2951 if (!ZSTD_DEC_IS_LAST_BLOCK(p))
2952 return ZSTD2_STATE_BLOCK;
2953 if (p->descriptor & DESCRIPTOR_FLAG_CHECKSUM)
2954 return ZSTD2_STATE_HASH;
2955 return ZSTD2_STATE_FINISHED;
2956 }
2957 p->blockSize = b0;
2958 {
2959 UInt32 blockLim = ZstdDec1_GET_BLOCK_SIZE_LIMIT(&p->decoder);
2960 // compressed and uncompressed block sizes cannot be larger than min(kBlockSizeMax, window_size)
2961 if (b0 > blockLim)
2962 {
2963 p->isErrorState = True; // SZ_ERROR_UNSUPPORTED;
2964 return ZSTD2_STATE_BLOCK;
2965 }
2966 if (DESCRIPTOR_Is_ContentSize_Defined(p->descriptor))
2967 {
2968 const UInt64 rem = p->contentSize - p->contentProcessed;
2969 if (blockLim > rem)
2970 blockLim = (UInt32)rem;
2971 }
2972 p->curBlockUnpackRem = blockLim;
2973 // uncompressed block size cannot be larger than remain data size:
2974 if (type != kBlockType_Compressed)
2975 {
2976 if (b0 > blockLim)
2977 {
2978 p->isErrorState = True; // SZ_ERROR_UNSUPPORTED;
2979 return ZSTD2_STATE_BLOCK;
2980 }
2981 }
2982 }
2983 }
2984 return ZSTD2_STATE_DATA;
2985 }
2986
2987 if ((unsigned)p->frameState < ZSTD2_STATE_SKIP_DATA)
2988 {
2989 UInt32 v;
2990 if (tempSize != 4)
2991 return p->frameState;
2992 v = GetUi32(p->temp);
2993 if ((unsigned)p->frameState < ZSTD2_STATE_HASH) // == ZSTD2_STATE_SIGNATURE
2994 {
2995 if (v == 0xfd2fb528)
2996 {
2997 p->tempSize = 0;
2998 info->num_DataFrames++;
2999 return ZSTD2_STATE_FRAME_HEADER;
3000 }
3001 if ((v & 0xfffffff0) == 0x184d2a50)
3002 {
3003 p->tempSize = 0;
3004 info->num_SkipFrames++;
3005 return ZSTD2_STATE_SKIP_HEADER;
3006 }
3007 p->isErrorState = True;
3008 return ZSTD2_STATE_SIGNATURE;
3009 // return ZSTD2_STATE_ERROR; // is not ZSTD stream
3010 }
3011 if (p->frameState == ZSTD2_STATE_HASH)
3012 {
3013 info->checksum_Defined = True;
3014 info->checksum = v;
3015 // #ifndef DISABLE_XXH_CHECK
3016 if (!p->disableHash)
3017 {
3018 if (p->decoder.winPos < ZstdDec_GET_UNPROCESSED_XXH64_SIZE(p))
3019 {
3020 // unexpected code failure
3021 p->isErrorState = True;
3022 // SZ_ERROR_FAIL;
3023 }
3024 else
3025 if ((UInt32)Xxh64State_Digest(&p->xxh64,
3026 p->decoder.win + (p->decoder.winPos - ZstdDec_GET_UNPROCESSED_XXH64_SIZE(p)),
3027 p->contentProcessed) != v)
3028 {
3029 p->hashError = True;
3030 // return ZSTD2_STATE_ERROR; // hash error
3031 }
3032 }
3033 // #endif
3034 return ZSTD2_STATE_FINISHED;
3035 }
3036 // (p->frameState == ZSTD2_STATE_SKIP_HEADER)
3037 {
3038 p->blockSize = v;
3039 info->skipFrames_Size += v;
3040 p->tempSize = 0;
3041 /* we want the caller could know that there was finished frame
3042 finished frame. So we allow the case where
3043 we have ZSTD2_STATE_SKIP_DATA state with (blockSize == 0).
3044 */
3045 // if (v == 0) return ZSTD2_STATE_SIGNATURE;
3046 return ZSTD2_STATE_SKIP_DATA;
3047 }
3048 }
3049
3050 // if (p->frameState == ZSTD2_STATE_FRAME_HEADER)
3051 {
3052 unsigned descriptor;
3053 const Byte *h;
3054 descriptor = p->temp[0];
3055 p->descriptor = (Byte)descriptor;
3056 if (descriptor & DESCRIPTOR_FLAG_RESERVED) // reserved bit
3057 {
3058 p->isErrorState = True;
3059 return ZSTD2_STATE_FRAME_HEADER;
3060 // return ZSTD2_STATE_ERROR;
3061 }
3062 {
3063 const unsigned n = DESCRIPTOR_Get_ContentSize_Flag3(descriptor);
3064 // tempSize -= 1 + ((1u << (n >> 1)) | ((n + 1) & 1));
3065 tempSize -= (0x9a563422u >> (n * 4)) & 0xf;
3066 }
3067 if (tempSize != (4u >> (3 - DESCRIPTOR_Get_DictionaryId_Flag(descriptor))))
3068 return ZSTD2_STATE_FRAME_HEADER;
3069
3070 info->descriptor_OR = (Byte)(info->descriptor_OR | descriptor);
3071 info->descriptor_NOT_OR = (Byte)(info->descriptor_NOT_OR | ~descriptor);
3072
3073 h = &p->temp[1];
3074 {
3075 Byte w = 0;
3076 if ((descriptor & DESCRIPTOR_FLAG_SINGLE) == 0)
3077 {
3078 w = *h++;
3079 if (info->windowDescriptor_MAX < w)
3080 info->windowDescriptor_MAX = w;
3081 // info->are_WindowDescriptors = True;
3082 // info->num_WindowDescriptors++;
3083 }
3084 else
3085 {
3086 // info->are_SingleSegments = True;
3087 // info->num_SingleSegments++;
3088 }
3089 p->windowDescriptor = w;
3090 }
3091 {
3092 unsigned n = DESCRIPTOR_Get_DictionaryId_Flag(descriptor);
3093 UInt32 d = 0;
3094 if (n)
3095 {
3096 n = 1u << (n - 1);
3097 d = GetUi32(h) & ((UInt32)(Int32)-1 >> (32 - 8u * n));
3098 h += n;
3099 }
3100 p->dictionaryId = d;
3101 // info->dictionaryId_Cur = d;
3102 if (d != 0)
3103 {
3104 if (info->dictionaryId == 0)
3105 info->dictionaryId = d;
3106 else if (info->dictionaryId != d)
3107 info->are_DictionaryId_Different = True;
3108 }
3109 }
3110 {
3111 unsigned n = DESCRIPTOR_Get_ContentSize_Flag3(descriptor);
3112 UInt64 v = 0;
3113 if (n)
3114 {
3115 n >>= 1;
3116 if (n == 1)
3117 v = 256;
3118 v += GetUi64(h) & ((UInt64)(Int64)-1 >> (64 - (8u << n)));
3119 // info->are_ContentSize_Known = True;
3120 // info->num_Frames_with_ContentSize++;
3121 if (info->contentSize_MAX < v)
3122 info->contentSize_MAX = v;
3123 info->contentSize_Total += v;
3124 }
3125 else
3126 {
3127 info->are_ContentSize_Unknown = True;
3128 // info->num_Frames_without_ContentSize++;
3129 }
3130 p->contentSize = v;
3131 }
3132 // if ((size_t)(h - p->temp) != headerSize) return ZSTD2_STATE_ERROR; // it's unexpected internal code failure
3133 p->tempSize = 0;
3134
3135 info->checksum_Defined = False;
3136 /*
3137 if (descriptor & DESCRIPTOR_FLAG_CHECKSUM)
3138 info->are_Checksums = True;
3139 else
3140 info->are_Non_Checksums = True;
3141 */
3142
3143 return ZSTD2_STATE_AFTER_HEADER; // ZSTD2_STATE_BLOCK;
3144 }
3145}
3146
3147
3148static void ZstdDec_Update_XXH(CZstdDec * const p, size_t xxh64_winPos)
3149{
3150 /*
3151 #ifdef DISABLE_XXH_CHECK
3152 UNUSED_VAR(data)
3153 #else
3154 */
3155 if (!p->disableHash && (p->descriptor & DESCRIPTOR_FLAG_CHECKSUM))
3156 {
3157 // const size_t pos = p->xxh64_winPos;
3158 const size_t size = (p->decoder.winPos - xxh64_winPos) & ~(size_t)31;
3159 if (size)
3160 {
3161 // p->xxh64_winPos = pos + size;
3162 Xxh64State_UpdateBlocks(&p->xxh64,
3163 p->decoder.win + xxh64_winPos,
3164 p->decoder.win + xxh64_winPos + size);
3165 }
3166 }
3167}
3168
3169
3170/*
3171in:
3172 (winLimit) : is relaxed limit, where this function is allowed to stop writing of decoded data (if possible).
3173 - this function uses (winLimit) for RAW/RLE blocks only,
3174 because this function can decode single RAW/RLE block in several different calls.
3175 - this function DOESN'T use (winLimit) for Compressed blocks,
3176 because this function decodes full compressed block in single call.
3177 (CZstdDec1::winPos <= winLimit)
3178 (winLimit <= CZstdDec1::cycSize).
3179 Note: if (ds->outBuf_fromCaller) mode is used, then
3180 {
3181 (strong_limit) is stored in CZstdDec1::cycSize.
3182 So (winLimit) is more strong than (strong_limit).
3183 }
3184
3185exit:
3186 Note: (CZstdDecState::winPos) will be set by caller after exit of this function.
3187
3188 This function can exit for any of these conditions:
3189 - (frameState == ZSTD2_STATE_AFTER_HEADER)
3190 - (frameState == ZSTD2_STATE_FINISHED) : frame was finished : (status == ZSTD_STATUS_FINISHED_FRAME) is set
3191 - finished non-empty non-last block. So (CZstdDec1::winPos_atExit != winPos_atFuncStart).
3192 - ZSTD_STATUS_NEEDS_MORE_INPUT in src
3193 - (CZstdDec1::winPos) have reached (winLimit) in non-finished RAW/RLE block
3194
3195 This function decodes no more than one non-empty block.
3196 So it fulfills the condition at exit:
3197 (CZstdDec1::winPos_atExit - winPos_atFuncStart <= block_size_max)
3198 Note: (winPos_atExit > winLimit) is possible in some cases after compressed block decoding.
3199
3200 if (ds->outBuf_fromCaller) mode (useAdditionalWinLimit medo)
3201 {
3202 then this function uses additional strong limit from (CZstdDec1::cycSize).
3203 So this function will not write any data after (CZstdDec1::cycSize)
3204 And it fulfills the condition at exit:
3205 (CZstdDec1::winPos_atExit <= CZstdDec1::cycSize)
3206 }
3207*/
3208static SRes ZstdDec_DecodeBlock(CZstdDec * const p, CZstdDecState * const ds,
3209 SizeT winLimitAdd)
3210{
3211 const Byte *src = ds->inBuf;
3212 SizeT * const srcLen = &ds->inPos;
3213 const SizeT inSize = ds->inLim;
3214 // const int useAdditionalWinLimit = ds->outBuf_fromCaller ? 1 : 0;
3215 enum_ZstdStatus * const status = &ds->status;
3216 CZstdDecInfo * const info = &ds->info;
3217 SizeT winLimit;
3218
3219 const SizeT winPos_atFuncStart = p->decoder.winPos;
3220 src += *srcLen;
3221 *status = ZSTD_STATUS_NOT_SPECIFIED;
3222
3223 // finishMode = ZSTD_FINISH_ANY;
3224 if (ds->outSize_Defined)
3225 {
3226 if (ds->outSize < ds->outProcessed)
3227 {
3228 // p->isAfterSizeMode = 2; // we have extra bytes already
3229 *status = ZSTD_STATUS_OUT_REACHED;
3230 return SZ_OK;
3231 // size = 0;
3232 }
3233 else
3234 {
3235 // p->outSize >= p->outProcessed
3236 const UInt64 rem = ds->outSize - ds->outProcessed;
3237 /*
3238 if (rem == 0)
3239 p->isAfterSizeMode = 1; // we have reached exact required size
3240 */
3241 if (winLimitAdd >= rem)
3242 {
3243 winLimitAdd = (SizeT)rem;
3244 // if (p->finishMode) finishMode = ZSTD_FINISH_END;
3245 }
3246 }
3247 }
3248
3249 winLimit = p->decoder.winPos + winLimitAdd;
3250 // (p->decoder.winPos <= winLimit)
3251
3252 // while (p->frameState != ZSTD2_STATE_ERROR)
3253 while (!p->isErrorState)
3254 {
3255 SizeT inCur = inSize - *srcLen;
3256
3257 if (p->frameState == ZSTD2_STATE_DATA)
3258 {
3259 /* (p->decoder.winPos == winPos_atFuncStart) is expected,
3260 because this function doesn't start new block.
3261 if it have finished some non-empty block in this call. */
3262 if (p->decoder.winPos != winPos_atFuncStart)
3263 return SZ_ERROR_FAIL; // it's unexpected
3264
3265 /*
3266 if (p->decoder.winPos > winLimit)
3267 {
3268 // we can be here, if in this function call
3269 // - we have extracted non-empty compressed block, and (winPos > winLimit) after that.
3270 // - we have started new block decoding after that.
3271 // It's unexpected case, because we exit after non-empty non-last block.
3272 *status = (inSize == *srcLen) ?
3273 ZSTD_STATUS_NEEDS_MORE_INPUT :
3274 ZSTD_STATUS_NOT_FINISHED;
3275 return SZ_OK;
3276 }
3277 */
3278 // p->decoder.winPos <= winLimit
3279
3280 if (p->blockType != kBlockType_Compressed)
3281 {
3282 // it's RLE or RAW block.
3283 // p->BlockSize != 0_
3284 // winLimit <= p->decoder.cycSize
3285 /* So here we use more strong (winLimit), even for
3286 (ds->outBuf_fromCaller) mode. */
3287 SizeT outCur = winLimit - p->decoder.winPos;
3288 {
3289 const UInt32 rem = p->blockSize;
3290 if (outCur > rem)
3291 outCur = rem;
3292 }
3293 if (p->blockType == kBlockType_Raw)
3294 {
3295 if (outCur > inCur)
3296 outCur = inCur;
3297 /* output buffer is better aligned for XXH code.
3298 So we use hash for output buffer data */
3299 // ZstdDec_Update_XXH(p, src, outCur); // for debug:
3300 memcpy(p->decoder.win + p->decoder.winPos, src, outCur);
3301 src += outCur;
3302 *srcLen += outCur;
3303 }
3304 else // kBlockType_RLE
3305 {
3306 #define RLE_BYTE_INDEX_IN_temp 3
3307 memset(p->decoder.win + p->decoder.winPos,
3308 p->temp[RLE_BYTE_INDEX_IN_temp], outCur);
3309 }
3310 {
3311 const SizeT xxh64_winPos = p->decoder.winPos - ZstdDec_GET_UNPROCESSED_XXH64_SIZE(p);
3312 p->decoder.winPos += outCur;
3313 p->contentProcessed += outCur;
3314 ZstdDec_Update_XXH(p, xxh64_winPos);
3315 }
3316 // ds->winPos = p->decoder.winPos; // the caller does it instead. for debug:
3317 UPDATE_TOTAL_OUT(&p->decoder, outCur)
3318 ds->outProcessed += outCur;
3319 if (p->blockSize -= (UInt32)outCur)
3320 {
3321 /*
3322 if (ds->outSize_Defined)
3323 {
3324 if (ds->outSize <= ds->outProcessed) ds->isAfterSizeMode = (enum_ZstdStatus)
3325 (ds->outSize == ds->outProcessed ? 1u: 2u);
3326 }
3327 */
3328 *status = (enum_ZstdStatus)
3329 (ds->outSize_Defined && ds->outSize <= ds->outProcessed ?
3330 ZSTD_STATUS_OUT_REACHED : (p->blockType == kBlockType_Raw && inSize == *srcLen) ?
3331 ZSTD_STATUS_NEEDS_MORE_INPUT :
3332 ZSTD_STATUS_NOT_FINISHED);
3333 return SZ_OK;
3334 }
3335 }
3336 else // kBlockType_Compressed
3337 {
3338 // p->blockSize != 0
3339 // (uncompressed_size_of_block == 0) is allowed
3340 // (p->curBlockUnpackRem == 0) is allowed
3341 /*
3342 if (p->decoder.winPos >= winLimit)
3343 {
3344 if (p->decoder.winPos != winPos_atFuncStart)
3345 {
3346 // it's unexpected case
3347 // We already have some data in finished blocks in this function call.
3348 // So we don't decompress new block after (>=winLimit),
3349 // even if it's empty block.
3350 *status = (inSize == *srcLen) ?
3351 ZSTD_STATUS_NEEDS_MORE_INPUT :
3352 ZSTD_STATUS_NOT_FINISHED;
3353 return SZ_OK;
3354 }
3355 // (p->decoder.winPos == winLimit == winPos_atFuncStart)
3356 // we will decode current block, because that current
3357 // block can be empty block and we want to make some visible
3358 // change of (src) stream after function start.
3359 }
3360 */
3361 /*
3362 if (ds->outSize_Defined && ds->outSize < ds->outProcessed)
3363 {
3364 // we don't want to start new block, if we have more extra decoded bytes already
3365 *status = ZSTD_STATUS_OUT_REACHED;
3366 return SZ_OK;
3367 }
3368 */
3369 {
3370 const Byte *comprStream;
3371 size_t afterAvail;
3372 UInt32 inTempPos = p->inTempPos;
3373 const UInt32 rem = p->blockSize - inTempPos;
3374 // rem != 0
3375 if (inTempPos != 0 // (inTemp) buffer already contains some input data
3376 || inCur < rem // available input data size is smaller than compressed block size
3377 || ZstdDec1_NeedTempBufferForInput(*srcLen, src, rem))
3378 {
3379 if (inCur > rem)
3380 inCur = rem;
3381 if (inCur)
3382 {
3383 STAT_INC(g_Num_Blocks_memcpy)
3384 // we clear data for backward lookahead reading
3385 if (inTempPos == 0)
3386 memset(p->inTemp + kTempBuffer_PreSize - MAX_BACKWARD_DEPTH, 0, MAX_BACKWARD_DEPTH);
3387 // { unsigned y = 0; for(;y < 1000; y++)
3388 memcpy(p->inTemp + inTempPos + kTempBuffer_PreSize, src, inCur);
3389 // }
3390 src += inCur;
3391 *srcLen += inCur;
3392 inTempPos += (UInt32)inCur;
3393 p->inTempPos = inTempPos;
3394 }
3395 if (inTempPos != p->blockSize)
3396 {
3397 *status = ZSTD_STATUS_NEEDS_MORE_INPUT;
3398 return SZ_OK;
3399 }
3400 #if COPY_CHUNK_SIZE > 1
3401 memset(p->inTemp + kTempBuffer_PreSize + inTempPos, 0, COPY_CHUNK_SIZE);
3402 #endif
3403 comprStream = p->inTemp + kTempBuffer_PreSize;
3404 afterAvail = k_Lit_AfterAvail;
3405 // we don't want to read non-initialized data or junk in CopyMatch():
3406 }
3407 else
3408 {
3409 // inCur >= rem
3410 // we use direct decoding from (src) buffer:
3411 afterAvail = inCur - rem;
3412 comprStream = src;
3413 src += rem;
3414 *srcLen += rem;
3415 }
3416
3417 #ifdef Z7_ZSTD_DEC_USE_CHECK_OF_NEED_TEMP
3418 ZstdDec1_NeedTempBufferForInput(*srcLen, comprStream, p->blockSize);
3419 #endif
3420 // printf("\nblockSize=%u", p->blockSize);
3421 // printf("%x\n", (unsigned)p->contentProcessed);
3422 STAT_INC(g_Num_Blocks_Compressed)
3423 {
3424 SRes sres;
3425 const size_t winPos = p->decoder.winPos;
3426 /*
3427 if ( useAdditionalWinLimit), we use strong unpack limit: smallest from
3428 - limit from stream : (curBlockUnpackRem)
3429 - limit from caller : (cycSize - winPos)
3430 if (!useAdditionalWinLimit), we use only relaxed limit:
3431 - limit from stream : (curBlockUnpackRem)
3432 */
3433 SizeT outLimit = p->curBlockUnpackRem;
3434 if (ds->outBuf_fromCaller)
3435 // if (useAdditionalWinLimit)
3436 {
3437 const size_t limit = p->decoder.cycSize - winPos;
3438 if (outLimit > limit)
3439 outLimit = limit;
3440 }
3441 sres = ZstdDec1_DecodeBlock(&p->decoder,
3442 comprStream, p->blockSize, afterAvail, outLimit);
3443 // ds->winPos = p->decoder.winPos; // the caller does it instead. for debug:
3444 if (sres)
3445 {
3446 p->isErrorState = True;
3447 return sres;
3448 }
3449 {
3450 const SizeT xxh64_winPos = winPos - ZstdDec_GET_UNPROCESSED_XXH64_SIZE(p);
3451 const size_t num = p->decoder.winPos - winPos;
3452 ds->outProcessed += num;
3453 p->contentProcessed += num;
3454 ZstdDec_Update_XXH(p, xxh64_winPos);
3455 }
3456 }
3457 // printf("\nwinPos=%x", (int)(unsigned)p->decoder.winPos);
3458 }
3459 }
3460
3461 /*
3462 if (ds->outSize_Defined)
3463 {
3464 if (ds->outSize <= ds->outProcessed) ds->isAfterSizeMode = (enum_ZstdStatus)
3465 (ds->outSize == ds->outProcessed ? 1u: 2u);
3466 }
3467 */
3468
3469 if (!ZSTD_DEC_IS_LAST_BLOCK(p))
3470 {
3471 p->frameState = ZSTD2_STATE_BLOCK;
3472 if (ds->outSize_Defined && ds->outSize < ds->outProcessed)
3473 {
3474 *status = ZSTD_STATUS_OUT_REACHED;
3475 return SZ_OK;
3476 }
3477 // we exit only if (winPos) was changed in this function call:
3478 if (p->decoder.winPos != winPos_atFuncStart)
3479 {
3480 // decoded block was not empty. So we exit:
3481 *status = (enum_ZstdStatus)(
3482 (inSize == *srcLen) ?
3483 ZSTD_STATUS_NEEDS_MORE_INPUT :
3484 ZSTD_STATUS_NOT_FINISHED);
3485 return SZ_OK;
3486 }
3487 // (p->decoder.winPos == winPos_atFuncStart)
3488 // so current decoded block was empty.
3489 // we will try to decode more blocks in this function.
3490 continue;
3491 }
3492
3493 // decoded block was last in frame
3494 if (p->descriptor & DESCRIPTOR_FLAG_CHECKSUM)
3495 {
3496 p->frameState = ZSTD2_STATE_HASH;
3497 if (ds->outSize_Defined && ds->outSize < ds->outProcessed)
3498 {
3499 *status = ZSTD_STATUS_OUT_REACHED;
3500 return SZ_OK; // disable if want to
3501 /* We want to get same return codes for any input buffer sizes.
3502 We want to get faster ZSTD_STATUS_OUT_REACHED status.
3503 So we exit with ZSTD_STATUS_OUT_REACHED here,
3504 instead of ZSTD2_STATE_HASH and ZSTD2_STATE_FINISHED processing.
3505 that depends from input buffer size and that can set
3506 ZSTD_STATUS_NEEDS_MORE_INPUT or return SZ_ERROR_DATA or SZ_ERROR_CRC.
3507 */
3508 }
3509 }
3510 else
3511 {
3512 /* ZSTD2_STATE_FINISHED proccesing doesn't depend from input buffer */
3513 p->frameState = ZSTD2_STATE_FINISHED;
3514 }
3515 /*
3516 p->frameState = (p->descriptor & DESCRIPTOR_FLAG_CHECKSUM) ?
3517 ZSTD2_STATE_HASH :
3518 ZSTD2_STATE_FINISHED;
3519 */
3520 /* it's required to process ZSTD2_STATE_FINISHED state in this function call,
3521 because we must check contentSize and hashError in ZSTD2_STATE_FINISHED code,
3522 while the caller can reinit full state for ZSTD2_STATE_FINISHED
3523 So we can't exit from function here. */
3524 continue;
3525 }
3526
3527 if (p->frameState == ZSTD2_STATE_FINISHED)
3528 {
3529 *status = ZSTD_STATUS_FINISHED_FRAME;
3530 if (DESCRIPTOR_Is_ContentSize_Defined(p->descriptor)
3531 && p->contentSize != p->contentProcessed)
3532 return SZ_ERROR_DATA;
3533 if (p->hashError) // for debug
3534 return SZ_ERROR_CRC;
3535 return SZ_OK;
3536 // p->frameState = ZSTD2_STATE_SIGNATURE;
3537 // continue;
3538 }
3539
3540 if (p->frameState == ZSTD2_STATE_AFTER_HEADER)
3541 return SZ_OK; // we need memory allocation for that state
3542
3543 if (p->frameState == ZSTD2_STATE_SKIP_DATA)
3544 {
3545 UInt32 blockSize = p->blockSize;
3546 // (blockSize == 0) is possible
3547 if (inCur > blockSize)
3548 inCur = blockSize;
3549 src += inCur;
3550 *srcLen += inCur;
3551 blockSize -= (UInt32)inCur;
3552 p->blockSize = blockSize;
3553 if (blockSize == 0)
3554 {
3555 p->frameState = ZSTD2_STATE_SIGNATURE;
3556 // continue; // for debug: we can continue without return to caller.
3557 // we notify the caller that skip frame was finished:
3558 *status = ZSTD_STATUS_FINISHED_FRAME;
3559 return SZ_OK;
3560 }
3561 // blockSize != 0
3562 // (inCur) was smaller than previous value of p->blockSize.
3563 // (inSize == *srcLen) now
3564 *status = ZSTD_STATUS_NEEDS_MORE_INPUT;
3565 return SZ_OK;
3566 }
3567
3568 if (inCur == 0)
3569 {
3570 *status = ZSTD_STATUS_NEEDS_MORE_INPUT;
3571 return SZ_OK;
3572 }
3573
3574 {
3575 (*srcLen)++;
3576 p->frameState = ZstdDec_UpdateState(p, *src++, info);
3577 }
3578 }
3579
3580 *status = ZSTD_STATUS_NOT_SPECIFIED;
3581 p->isErrorState = True;
3582 // p->frameState = ZSTD2_STATE_ERROR;
3583 // if (p->frameState = ZSTD2_STATE_SIGNATURE) return SZ_ERROR_NO_ARCHIVE
3584 return SZ_ERROR_DATA;
3585}
3586
3587
3588
3589
3590SRes ZstdDec_Decode(CZstdDecHandle dec, CZstdDecState *p)
3591{
3592 p->needWrite_Size = 0;
3593 p->status = ZSTD_STATUS_NOT_SPECIFIED;
3594 dec->disableHash = p->disableHash;
3595
3596 if (p->outBuf_fromCaller)
3597 {
3598 dec->decoder.win = p->outBuf_fromCaller;
3599 dec->decoder.cycSize = p->outBufSize_fromCaller;
3600 }
3601
3602 // p->winPos = dec->decoder.winPos;
3603
3604 for (;;)
3605 {
3606 SizeT winPos, size;
3607 // SizeT outProcessed;
3608 SRes res;
3609
3610 if (p->wrPos > dec->decoder.winPos)
3611 return SZ_ERROR_FAIL;
3612
3613 if (dec->frameState == ZSTD2_STATE_FINISHED)
3614 {
3615 if (!p->outBuf_fromCaller)
3616 {
3617 // we need to set positions to zero for new frame.
3618 if (p->wrPos != dec->decoder.winPos)
3619 {
3620 /* We have already asked the caller to flush all data
3621 with (p->needWrite_Size) and (ZSTD_STATUS_FINISHED_FRAME) status.
3622 So it's unexpected case */
3623 // p->winPos = dec->decoder.winPos;
3624 // p->needWrite_Size = dec->decoder.winPos - p->wrPos; // flush size asking
3625 // return SZ_OK; // ask to flush again
3626 return SZ_ERROR_FAIL;
3627 }
3628 // (p->wrPos == dec->decoder.winPos), and we wrap to zero:
3629 dec->decoder.winPos = 0;
3630 p->winPos = 0;
3631 p->wrPos = 0;
3632 }
3633 ZstdDec_Init_ForNewFrame(dec);
3634 // continue;
3635 }
3636
3637 winPos = dec->decoder.winPos;
3638 {
3639 SizeT next = dec->decoder.cycSize;
3640 /* cycSize == 0, if no buffer was allocated still,
3641 or, if (outBuf_fromCaller) mode and (outBufSize_fromCaller == 0) */
3642 if (!p->outBuf_fromCaller
3643 && next
3644 && next <= winPos
3645 && dec->isCyclicMode)
3646 {
3647 // (0 < decoder.cycSize <= winPos) in isCyclicMode.
3648 // so we need to wrap (winPos) and (wrPos) over (cycSize).
3649 const size_t delta = next;
3650 // (delta) is how many bytes we remove from buffer.
3651 /*
3652 // we don't need data older than last (cycSize) bytes.
3653 size_t delta = winPos - next; // num bytes after (cycSize)
3654 if (delta <= next) // it's expected case
3655 delta = next;
3656 // delta == Max(cycSize, winPos - cycSize)
3657 */
3658 if (p->wrPos < delta)
3659 {
3660 // (wrPos < decoder.cycSize)
3661 // We have asked already the caller to flush required data
3662 // p->status = ZSTD_STATUS_NOT_SPECIFIED;
3663 // p->winPos = winPos;
3664 // p->needWrite_Size = delta - p->wrPos; // flush size asking
3665 // return SZ_OK; // ask to flush again
3666 return SZ_ERROR_FAIL;
3667 }
3668 // p->wrPos >= decoder.cycSize
3669 // we move extra data after (decoder.cycSize) to start of cyclic buffer:
3670 winPos -= delta;
3671 if (winPos)
3672 {
3673 if (winPos >= delta)
3674 return SZ_ERROR_FAIL;
3675 memmove(dec->decoder.win, dec->decoder.win + delta, winPos);
3676 // printf("\nmemmove processed=%8x winPos=%8x\n", (unsigned)p->outProcessed, (unsigned)dec->decoder.winPos);
3677 STAT_INC(g_Num_Wrap_memmove_Num)
3678 STAT_UPDATE(g_Num_Wrap_memmove_Bytes += (unsigned)winPos;)
3679 }
3680 dec->decoder.winPos = winPos;
3681 p->winPos = winPos;
3682 p->wrPos -= delta;
3683 // dec->xxh64_winPos -= delta;
3684
3685 // (winPos < delta)
3686 #ifdef Z7_STD_DEC_USE_AFTER_CYC_BUF
3687 /* we set the data after cycSize, because
3688 we don't want to read non-initialized data or junk in CopyMatch(). */
3689 memset(dec->decoder.win + next, 0, COPY_CHUNK_SIZE);
3690 #endif
3691
3692 /*
3693 if (winPos == next)
3694 {
3695 if (winPos != p->wrPos)
3696 {
3697 // we already requested before to flush full data for that case.
3698 // but we give the caller a second chance to flush data:
3699 p->needWrite_Size = winPos - p->wrPos;
3700 return SZ_OK;
3701 }
3702 // (decoder.cycSize == winPos == p->wrPos)
3703 // so we do second wrapping to zero:
3704 winPos = 0;
3705 dec->decoder.winPos = 0;
3706 p->winPos = 0;
3707 p->wrPos = 0;
3708 }
3709 */
3710 // (winPos < next)
3711 }
3712
3713 if (winPos > next)
3714 return SZ_ERROR_FAIL; // it's unexpected case
3715 /*
3716 if (!outBuf_fromCaller && isCyclicMode && cycSize != 0)
3717 then (winPos < cycSize)
3718 else (winPos <= cycSize)
3719 */
3720 if (!p->outBuf_fromCaller)
3721 {
3722 // that code is optional. We try to optimize write chunk sizes.
3723 /* (next2) is expected next write position in the caller,
3724 if the caller writes by kBlockSizeMax chunks.
3725 */
3726 /*
3727 const size_t next2 = (winPos + kBlockSizeMax) & (kBlockSizeMax - 1);
3728 if (winPos < next2 && next2 < next)
3729 next = next2;
3730 */
3731 }
3732 size = next - winPos;
3733 }
3734
3735 // note: ZstdDec_DecodeBlock() uses (winLimit = winPos + size) only for RLE and RAW blocks
3736 res = ZstdDec_DecodeBlock(dec, p, size);
3737 /*
3738 after one block decoding:
3739 if (!outBuf_fromCaller && isCyclicMode && cycSize != 0)
3740 then (winPos < cycSize + max_block_size)
3741 else (winPos <= cycSize)
3742 */
3743
3744 if (!p->outBuf_fromCaller)
3745 p->win = dec->decoder.win;
3746 p->winPos = dec->decoder.winPos;
3747
3748 // outProcessed = dec->decoder.winPos - winPos;
3749 // p->outProcessed += outProcessed;
3750
3751 if (res != SZ_OK)
3752 return res;
3753
3754 if (dec->frameState != ZSTD2_STATE_AFTER_HEADER)
3755 {
3756 if (p->outBuf_fromCaller)
3757 return SZ_OK;
3758 {
3759 // !p->outBuf_fromCaller
3760 /*
3761 if (ZSTD_STATUS_FINISHED_FRAME), we request full flushing here because
3762 1) it's simpler to work with allocation and extracting of next frame,
3763 2) it's better to start writing to next new frame with aligned memory
3764 for faster xxh 64-bit reads.
3765 */
3766 size_t end = dec->decoder.winPos; // end pos for all data flushing
3767 if (p->status != ZSTD_STATUS_FINISHED_FRAME)
3768 {
3769 // we will request flush here only for cases when wrap in cyclic buffer can be required in next call.
3770 if (!dec->isCyclicMode)
3771 return SZ_OK;
3772 // isCyclicMode
3773 {
3774 const size_t delta = dec->decoder.cycSize;
3775 if (end < delta)
3776 return SZ_OK; // (winPos < cycSize). no need for flush
3777 // cycSize <= winPos
3778 // So we ask the caller to flush of (cycSize - wrPos) bytes,
3779 // and then we will wrap cylicBuffer in next call
3780 end = delta;
3781 }
3782 }
3783 p->needWrite_Size = end - p->wrPos;
3784 }
3785 return SZ_OK;
3786 }
3787
3788 // ZSTD2_STATE_AFTER_HEADER
3789 {
3790 BoolInt useCyclic = False;
3791 size_t cycSize;
3792
3793 // p->status = ZSTD_STATUS_NOT_FINISHED;
3794 if (dec->dictionaryId != 0)
3795 {
3796 /* actually we can try to decode some data,
3797 because it's possible that some data doesn't use dictionary */
3798 // p->status = ZSTD_STATUS_NOT_SPECIFIED;
3799 return SZ_ERROR_UNSUPPORTED;
3800 }
3801
3802 {
3803 UInt64 winSize = dec->contentSize;
3804 UInt64 winSize_Allocate = winSize;
3805 const unsigned descriptor = dec->descriptor;
3806
3807 if ((descriptor & DESCRIPTOR_FLAG_SINGLE) == 0)
3808 {
3809 const Byte wd = dec->windowDescriptor;
3810 winSize = (UInt64)(8 + (wd & 7)) << ((wd >> 3) + 10 - 3);
3811 if (!DESCRIPTOR_Is_ContentSize_Defined(descriptor)
3812 || winSize_Allocate > winSize)
3813 {
3814 winSize_Allocate = winSize;
3815 useCyclic = True;
3816 }
3817 }
3818 /*
3819 else
3820 {
3821 if (p->info.singleSegment_ContentSize_MAX < winSize)
3822 p->info.singleSegment_ContentSize_MAX = winSize;
3823 // p->info.num_SingleSegments++;
3824 }
3825 */
3826 if (p->info.windowSize_MAX < winSize)
3827 p->info.windowSize_MAX = winSize;
3828 if (p->info.windowSize_Allocate_MAX < winSize_Allocate)
3829 p->info.windowSize_Allocate_MAX = winSize_Allocate;
3830 /*
3831 winSize_Allocate is MIN(content_size, window_size_from_descriptor).
3832 Wven if (content_size < (window_size_from_descriptor))
3833 original-zstd still uses (window_size_from_descriptor) to check that decoding is allowed.
3834 We try to follow original-zstd, and here we check (winSize) instead of (winSize_Allocate))
3835 */
3836 if (
3837 // winSize_Allocate // it's relaxed check
3838 winSize // it's more strict check to be compatible with original-zstd
3839 > ((UInt64)1 << MAX_WINDOW_SIZE_LOG))
3840 return SZ_ERROR_UNSUPPORTED; // SZ_ERROR_MEM
3841 cycSize = (size_t)winSize_Allocate;
3842 if (cycSize != winSize_Allocate)
3843 return SZ_ERROR_MEM;
3844 // cycSize <= winSize
3845 /* later we will use (CZstdDec1::winSize) to check match offsets and check block sizes.
3846 if (there is window descriptor)
3847 {
3848 We will check block size with (window_size_from_descriptor) instead of (winSize_Allocate).
3849 Does original-zstd do it that way also?
3850 }
3851 Here we must reduce full real 64-bit (winSize) to size_t for (CZstdDec1::winSize).
3852 Also we don't want too big values for (CZstdDec1::winSize).
3853 our (CZstdDec1::winSize) will meet the condition:
3854 (CZstdDec1::winSize < kBlockSizeMax || CZstdDec1::winSize <= cycSize).
3855 */
3856 dec->decoder.winSize = (winSize < kBlockSizeMax) ? (size_t)winSize: cycSize;
3857 // note: (CZstdDec1::winSize > cycSize) is possible, if (!useCyclic)
3858 }
3859
3860 RINOK(ZstdDec_AllocateMisc(dec))
3861
3862 if (p->outBuf_fromCaller)
3863 dec->isCyclicMode = False;
3864 else
3865 {
3866 size_t d = cycSize;
3867
3868 if (dec->decoder.winPos != p->wrPos)
3869 return SZ_ERROR_FAIL;
3870
3871 dec->decoder.winPos = 0;
3872 p->wrPos = 0;
3873 p->winPos = dec->decoder.winPos;
3874
3875 /*
3876 const size_t needWrite = dec->decoder.winPos - p->wrPos;
3877 if (!needWrite)
3878 {
3879 dec->decoder.winPos = 0;
3880 p->wrPos = 0;
3881 p->winPos = dec->decoder.winPos;
3882 }
3883 */
3884 /* if (!useCyclic) we allocate only cycSize = ContentSize.
3885 But if we want to support the case where new frame starts with winPos != 0,
3886 then we will wrap over zero, and we still need
3887 to set (useCyclic) and allocate additional buffer spaces.
3888 Now we don't allow new frame starting with (winPos != 0).
3889 so (dec->decoder->winPos == 0)
3890 can use (!useCyclic) with reduced buffer sizes.
3891 */
3892 /*
3893 if (dec->decoder->winPos != 0)
3894 useCyclic = True;
3895 */
3896
3897 if (useCyclic)
3898 {
3899 /* cyclyc buffer size must be at least (COPY_CHUNK_SIZE - 1) bytes
3900 larger than window size, because CopyMatch() can write additional
3901 (COPY_CHUNK_SIZE - 1) bytes and overwrite oldests data in cyclyc buffer.
3902 But for performance reasons we align (cycSize) for (kBlockSizeMax).
3903 also we must provide (cycSize >= max_decoded_data_after_cycSize),
3904 because after data move wrapping over zero we must provide (winPos < cycSize).
3905 */
3906 const size_t alignSize = kBlockSizeMax;
3907 /* here we add (1 << 7) instead of (COPY_CHUNK_SIZE - 1), because
3908 we want to get same (cycSize) for different COPY_CHUNK_SIZE values. */
3909 // cycSize += (COPY_CHUNK_SIZE - 1) + (alignSize - 1); // for debug : we can get smallest (cycSize)
3910 cycSize += (1 << 7) + alignSize;
3911 cycSize &= ~(size_t)(alignSize - 1);
3912 // cycSize must be aligned for 32, because xxh requires 32-bytes blocks.
3913 // cycSize += 12345; // for debug
3914 // cycSize += 1 << 10; // for debug
3915 // cycSize += 32; // for debug
3916 // cycSize += kBlockSizeMax; // for debug
3917 if (cycSize < d)
3918 return SZ_ERROR_MEM;
3919 /*
3920 in cyclic buffer mode we allow to decode one additional block
3921 that exceeds (cycSize).
3922 So we must allocate additional (kBlockSizeMax) bytes after (cycSize).
3923 if defined(Z7_STD_DEC_USE_AFTER_CYC_BUF)
3924 {
3925 we can read (COPY_CHUNK_SIZE - 1) bytes after (cycSize)
3926 but we aready allocate additional kBlockSizeMax that
3927 is larger than COPY_CHUNK_SIZE.
3928 So we don't need additional space of COPY_CHUNK_SIZE after (cycSize).
3929 }
3930 */
3931 /*
3932 #ifdef Z7_STD_DEC_USE_AFTER_CYC_BUF
3933 d = cycSize + (1 << 7); // we must add at least (COPY_CHUNK_SIZE - 1)
3934 #endif
3935 */
3936 d = cycSize + kBlockSizeMax;
3937 if (d < cycSize)
3938 return SZ_ERROR_MEM;
3939 }
3940
3941 {
3942 const size_t kMinWinAllocSize = 1 << 12;
3943 if (d < kMinWinAllocSize)
3944 d = kMinWinAllocSize;
3945 }
3946
3947 if (d > dec->winBufSize_Allocated)
3948 {
3949 /*
3950 if (needWrite)
3951 {
3952 p->needWrite_Size = needWrite;
3953 return SZ_OK;
3954 // return SZ_ERROR_FAIL;
3955 }
3956 */
3957
3958 if (dec->winBufSize_Allocated != 0)
3959 {
3960 const size_t k_extra = (useCyclic || d >= (1u << 20)) ?
3961 2 * kBlockSizeMax : 0;
3962 unsigned i = useCyclic ? 17 : 12;
3963 for (; i < sizeof(size_t) * 8; i++)
3964 {
3965 const size_t d2 = ((size_t)1 << i) + k_extra;
3966 if (d2 >= d)
3967 {
3968 d = d2;
3969 break;
3970 }
3971 }
3972 }
3973 // RINOK(ZstdDec_AllocateWindow(dec, d))
3974 ZstdDec_FreeWindow(dec);
3975 dec->win_Base = (Byte *)ISzAlloc_Alloc(dec->alloc_Big, d);
3976 if (!dec->win_Base)
3977 return SZ_ERROR_MEM;
3978 dec->decoder.win = dec->win_Base;
3979 dec->winBufSize_Allocated = d;
3980 }
3981 /*
3982 else
3983 {
3984 // for non-cyclycMode we want flush data, and set winPos = 0
3985 if (needWrite)
3986 {
3987 if (!useCyclic || dec->decoder.winPos >= cycSize)
3988 {
3989 p->needWrite_Size = needWrite;
3990 return SZ_OK;
3991 // return SZ_ERROR_FAIL;
3992 }
3993 }
3994 }
3995 */
3996
3997 dec->decoder.cycSize = cycSize;
3998 p->win = dec->decoder.win;
3999 // p->cycSize = dec->decoder.cycSize;
4000 dec->isCyclicMode = (Byte)useCyclic;
4001 } // (!p->outBuf_fromCaller) end
4002
4003 // p->winPos = dec->decoder.winPos;
4004 dec->frameState = ZSTD2_STATE_BLOCK;
4005 // continue;
4006 } // ZSTD2_STATE_AFTER_HEADER end
4007 }
4008}
4009
4010
4011void ZstdDec_GetResInfo(const CZstdDec *dec,
4012 const CZstdDecState *p,
4013 SRes res,
4014 CZstdDecResInfo *stat)
4015{
4016 // ZstdDecInfo_CLEAR(stat);
4017 stat->extraSize = 0;
4018 stat->is_NonFinishedFrame = False;
4019 if (dec->frameState != ZSTD2_STATE_FINISHED)
4020 {
4021 if (dec->frameState == ZSTD2_STATE_SIGNATURE)
4022 {
4023 stat->extraSize = (Byte)dec->tempSize;
4024 if (ZstdDecInfo_GET_NUM_FRAMES(&p->info) == 0)
4025 res = SZ_ERROR_NO_ARCHIVE;
4026 }
4027 else
4028 {
4029 stat->is_NonFinishedFrame = True;
4030 if (res == SZ_OK && p->status == ZSTD_STATUS_NEEDS_MORE_INPUT)
4031 res = SZ_ERROR_INPUT_EOF;
4032 }
4033 }
4034 stat->decode_SRes = res;
4035}
4036
4037
4038size_t ZstdDec_ReadUnusedFromInBuf(
4039 CZstdDecHandle dec,
4040 size_t afterDecoding_tempPos,
4041 void *data, size_t size)
4042{
4043 size_t processed = 0;
4044 if (dec->frameState == ZSTD2_STATE_SIGNATURE)
4045 {
4046 Byte *dest = (Byte *)data;
4047 const size_t tempSize = dec->tempSize;
4048 while (afterDecoding_tempPos < tempSize)
4049 {
4050 if (size == 0)
4051 break;
4052 size--;
4053 *dest++ = dec->temp[afterDecoding_tempPos++];
4054 processed++;
4055 }
4056 }
4057 return processed;
4058}
4059
4060
4061void ZstdDecState_Clear(CZstdDecState *p)
4062{
4063 memset(p, 0 , sizeof(*p));
4064}